From 97c2b5cba2044f1c0bc3f14d7102176dbcf81af0 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 21 May 2018 10:59:54 +0100
Subject: mfd: madera: Add register definitions for Cirrus Logic Madera codecs

This patch adds a header file of register definitions for Cirrus
Logic "Madera" class codecs. These codecs are all based off a common
set of hardware IP so have a common register map (with a few minor
device-to-device variations).

The registers.h file is tool-generated directly from the hardware design
but has been manually stripped down to reduce size (full register
map is >44000 lines). All names are kept the same as datasheet names
so that they can be cross-referenced between source and datasheet without
confusion.

The register map layout is kept fully-defined rather than factored into
macros and/or block-indexing code. The major reasons for this are:

 - #1 is that it makes the source highly greppable, which is important.
   "What does the driver do with register bits XYZ" or "Where does it use
   register bits XYZ" are commonly types of questions. These can be quickly
   answered by a grep. Squashing definitions into generator macros or block-
   indexing code is a way of defeating grep.

 - most of the register definitions are used in tables, so a constant value
   is required. Using generator macros make the table definition clunky and
   obscure.

 - the code is clearer when it's there in the source exactly what register
   and field it is using

 - it is easier to diff the register map of a new (unsupported) codec against
   what is already supported and merge in differences

 - it makes the register map available in source for maintenance/debugging
   instead of having to refer back to the datasheet for a register map

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/madera/registers.h | 3917 ++++++++++++++++++++++++++++++++++
 1 file changed, 3917 insertions(+)
 create mode 100644 include/linux/mfd/madera/registers.h

(limited to 'include')

diff --git a/include/linux/mfd/madera/registers.h b/include/linux/mfd/madera/registers.h
new file mode 100644
index 000000000000..a6b7c4222c5e
--- /dev/null
+++ b/include/linux/mfd/madera/registers.h
@@ -0,0 +1,3917 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Madera register definitions
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#ifndef MADERA_REGISTERS_H
+#define MADERA_REGISTERS_H
+
+/*
+ * Register Addresses.
+ */
+#define MADERA_SOFTWARE_RESET				0x00
+#define MADERA_HARDWARE_REVISION			0x01
+#define MADERA_CTRL_IF_CFG_1				0x08
+#define MADERA_CTRL_IF_CFG_2				0x09
+#define MADERA_CTRL_IF_CFG_3				0x0A
+#define MADERA_WRITE_SEQUENCER_CTRL_0			0x16
+#define MADERA_WRITE_SEQUENCER_CTRL_1			0x17
+#define MADERA_WRITE_SEQUENCER_CTRL_2			0x18
+#define MADERA_TONE_GENERATOR_1				0x20
+#define MADERA_TONE_GENERATOR_2				0x21
+#define MADERA_TONE_GENERATOR_3				0x22
+#define MADERA_TONE_GENERATOR_4				0x23
+#define MADERA_TONE_GENERATOR_5				0x24
+#define MADERA_PWM_DRIVE_1				0x30
+#define MADERA_PWM_DRIVE_2				0x31
+#define MADERA_PWM_DRIVE_3				0x32
+#define MADERA_SEQUENCE_CONTROL				0x41
+#define MADERA_SAMPLE_RATE_SEQUENCE_SELECT_1		0x61
+#define MADERA_SAMPLE_RATE_SEQUENCE_SELECT_2		0x62
+#define MADERA_SAMPLE_RATE_SEQUENCE_SELECT_3		0x63
+#define MADERA_SAMPLE_RATE_SEQUENCE_SELECT_4		0x64
+#define MADERA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_1	0x66
+#define MADERA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_2	0x67
+#define MADERA_HAPTICS_CONTROL_1			0x90
+#define MADERA_HAPTICS_CONTROL_2			0x91
+#define MADERA_HAPTICS_PHASE_1_INTENSITY		0x92
+#define MADERA_HAPTICS_PHASE_1_DURATION			0x93
+#define MADERA_HAPTICS_PHASE_2_INTENSITY		0x94
+#define MADERA_HAPTICS_PHASE_2_DURATION			0x95
+#define MADERA_HAPTICS_PHASE_3_INTENSITY		0x96
+#define MADERA_HAPTICS_PHASE_3_DURATION			0x97
+#define MADERA_HAPTICS_STATUS				0x98
+#define MADERA_COMFORT_NOISE_GENERATOR			0xA0
+#define MADERA_CLOCK_32K_1				0x100
+#define MADERA_SYSTEM_CLOCK_1				0x101
+#define MADERA_SAMPLE_RATE_1				0x102
+#define MADERA_SAMPLE_RATE_2				0x103
+#define MADERA_SAMPLE_RATE_3				0x104
+#define MADERA_SAMPLE_RATE_1_STATUS			0x10A
+#define MADERA_SAMPLE_RATE_2_STATUS			0x10B
+#define MADERA_SAMPLE_RATE_3_STATUS			0x10C
+#define MADERA_ASYNC_CLOCK_1				0x112
+#define MADERA_ASYNC_SAMPLE_RATE_1			0x113
+#define MADERA_ASYNC_SAMPLE_RATE_2			0x114
+#define MADERA_ASYNC_SAMPLE_RATE_1_STATUS		0x11B
+#define MADERA_ASYNC_SAMPLE_RATE_2_STATUS		0x11C
+#define MADERA_DSP_CLOCK_1				0x120
+#define MADERA_DSP_CLOCK_2				0x122
+#define MADERA_OUTPUT_SYSTEM_CLOCK			0x149
+#define MADERA_OUTPUT_ASYNC_CLOCK			0x14A
+#define MADERA_RATE_ESTIMATOR_1				0x152
+#define MADERA_RATE_ESTIMATOR_2				0x153
+#define MADERA_RATE_ESTIMATOR_3				0x154
+#define MADERA_RATE_ESTIMATOR_4				0x155
+#define MADERA_RATE_ESTIMATOR_5				0x156
+#define MADERA_FLL1_CONTROL_1				0x171
+#define MADERA_FLL1_CONTROL_2				0x172
+#define MADERA_FLL1_CONTROL_3				0x173
+#define MADERA_FLL1_CONTROL_4				0x174
+#define MADERA_FLL1_CONTROL_5				0x175
+#define MADERA_FLL1_CONTROL_6				0x176
+#define MADERA_FLL1_LOOP_FILTER_TEST_1			0x177
+#define MADERA_FLL1_NCO_TEST_0				0x178
+#define MADERA_FLL1_CONTROL_7				0x179
+#define MADERA_FLL1_EFS_2				0x17A
+#define CS47L35_FLL1_SYNCHRONISER_1			0x17F
+#define CS47L35_FLL1_SYNCHRONISER_2			0x180
+#define CS47L35_FLL1_SYNCHRONISER_3			0x181
+#define CS47L35_FLL1_SYNCHRONISER_4			0x182
+#define CS47L35_FLL1_SYNCHRONISER_5			0x183
+#define CS47L35_FLL1_SYNCHRONISER_6			0x184
+#define CS47L35_FLL1_SYNCHRONISER_7			0x185
+#define CS47L35_FLL1_SPREAD_SPECTRUM			0x187
+#define CS47L35_FLL1_GPIO_CLOCK				0x188
+#define MADERA_FLL1_SYNCHRONISER_1			0x181
+#define MADERA_FLL1_SYNCHRONISER_2			0x182
+#define MADERA_FLL1_SYNCHRONISER_3			0x183
+#define MADERA_FLL1_SYNCHRONISER_4			0x184
+#define MADERA_FLL1_SYNCHRONISER_5			0x185
+#define MADERA_FLL1_SYNCHRONISER_6			0x186
+#define MADERA_FLL1_SYNCHRONISER_7			0x187
+#define MADERA_FLL1_SPREAD_SPECTRUM			0x189
+#define MADERA_FLL1_GPIO_CLOCK				0x18A
+#define MADERA_FLL2_CONTROL_1				0x191
+#define MADERA_FLL2_CONTROL_2				0x192
+#define MADERA_FLL2_CONTROL_3				0x193
+#define MADERA_FLL2_CONTROL_4				0x194
+#define MADERA_FLL2_CONTROL_5				0x195
+#define MADERA_FLL2_CONTROL_6				0x196
+#define MADERA_FLL2_LOOP_FILTER_TEST_1			0x197
+#define MADERA_FLL2_NCO_TEST_0				0x198
+#define MADERA_FLL2_CONTROL_7				0x199
+#define MADERA_FLL2_EFS_2				0x19A
+#define MADERA_FLL2_SYNCHRONISER_1			0x1A1
+#define MADERA_FLL2_SYNCHRONISER_2			0x1A2
+#define MADERA_FLL2_SYNCHRONISER_3			0x1A3
+#define MADERA_FLL2_SYNCHRONISER_4			0x1A4
+#define MADERA_FLL2_SYNCHRONISER_5			0x1A5
+#define MADERA_FLL2_SYNCHRONISER_6			0x1A6
+#define MADERA_FLL2_SYNCHRONISER_7			0x1A7
+#define MADERA_FLL2_SPREAD_SPECTRUM			0x1A9
+#define MADERA_FLL2_GPIO_CLOCK				0x1AA
+#define MADERA_FLL3_CONTROL_1				0x1B1
+#define MADERA_FLL3_CONTROL_2				0x1B2
+#define MADERA_FLL3_CONTROL_3				0x1B3
+#define MADERA_FLL3_CONTROL_4				0x1B4
+#define MADERA_FLL3_CONTROL_5				0x1B5
+#define MADERA_FLL3_CONTROL_6				0x1B6
+#define MADERA_FLL3_LOOP_FILTER_TEST_1			0x1B7
+#define MADERA_FLL3_NCO_TEST_0				0x1B8
+#define MADERA_FLL3_CONTROL_7				0x1B9
+#define MADERA_FLL3_SYNCHRONISER_1			0x1C1
+#define MADERA_FLL3_SYNCHRONISER_2			0x1C2
+#define MADERA_FLL3_SYNCHRONISER_3			0x1C3
+#define MADERA_FLL3_SYNCHRONISER_4			0x1C4
+#define MADERA_FLL3_SYNCHRONISER_5			0x1C5
+#define MADERA_FLL3_SYNCHRONISER_6			0x1C6
+#define MADERA_FLL3_SYNCHRONISER_7			0x1C7
+#define MADERA_FLL3_SPREAD_SPECTRUM			0x1C9
+#define MADERA_FLL3_GPIO_CLOCK				0x1CA
+#define MADERA_FLLAO_CONTROL_1				0x1D1
+#define MADERA_FLLAO_CONTROL_2				0x1D2
+#define MADERA_FLLAO_CONTROL_3				0x1D3
+#define MADERA_FLLAO_CONTROL_4				0x1D4
+#define MADERA_FLLAO_CONTROL_5				0x1D5
+#define MADERA_FLLAO_CONTROL_6				0x1D6
+#define MADERA_FLLAO_CONTROL_7				0x1D8
+#define MADERA_FLLAO_CONTROL_8				0x1DA
+#define MADERA_FLLAO_CONTROL_9				0x1DB
+#define MADERA_FLLAO_CONTROL_10				0x1DC
+#define MADERA_FLLAO_CONTROL_11				0x1DD
+#define MADERA_MIC_CHARGE_PUMP_1			0x200
+#define MADERA_HP_CHARGE_PUMP_8				0x20B
+#define MADERA_LDO1_CONTROL_1				0x210
+#define MADERA_LDO2_CONTROL_1				0x213
+#define MADERA_MIC_BIAS_CTRL_1				0x218
+#define MADERA_MIC_BIAS_CTRL_2				0x219
+#define MADERA_MIC_BIAS_CTRL_3				0x21A
+#define MADERA_MIC_BIAS_CTRL_4				0x21B
+#define MADERA_MIC_BIAS_CTRL_5				0x21C
+#define MADERA_MIC_BIAS_CTRL_6				0x21E
+#define MADERA_HP_CTRL_1L				0x225
+#define MADERA_HP_CTRL_1R				0x226
+#define MADERA_HP_CTRL_2L				0x227
+#define MADERA_HP_CTRL_2R				0x228
+#define MADERA_HP_CTRL_3L				0x229
+#define MADERA_HP_CTRL_3R				0x22A
+#define MADERA_DCS_HP1L_CONTROL				0x232
+#define MADERA_DCS_HP1R_CONTROL				0x238
+#define MADERA_EDRE_HP_STEREO_CONTROL			0x27E
+#define MADERA_ACCESSORY_DETECT_MODE_1			0x293
+#define MADERA_HEADPHONE_DETECT_0			0x299
+#define MADERA_HEADPHONE_DETECT_1			0x29B
+#define MADERA_HEADPHONE_DETECT_2			0x29C
+#define MADERA_HEADPHONE_DETECT_3			0x29D
+#define MADERA_HEADPHONE_DETECT_4			0x29E
+#define MADERA_HEADPHONE_DETECT_5			0x29F
+#define MADERA_MIC_DETECT_1_CONTROL_0			0x2A2
+#define MADERA_MIC_DETECT_1_CONTROL_1			0x2A3
+#define MADERA_MIC_DETECT_1_CONTROL_2			0x2A4
+#define MADERA_MIC_DETECT_1_CONTROL_3			0x2A5
+#define MADERA_MIC_DETECT_1_LEVEL_1			0x2A6
+#define MADERA_MIC_DETECT_1_LEVEL_2			0x2A7
+#define MADERA_MIC_DETECT_1_LEVEL_3			0x2A8
+#define MADERA_MIC_DETECT_1_LEVEL_4			0x2A9
+#define MADERA_MIC_DETECT_1_CONTROL_4			0x2AB
+#define MADERA_MIC_DETECT_2_CONTROL_0			0x2B2
+#define MADERA_MIC_DETECT_2_CONTROL_1			0x2B3
+#define MADERA_MIC_DETECT_2_CONTROL_2			0x2B4
+#define MADERA_MIC_DETECT_2_CONTROL_3			0x2B5
+#define MADERA_MIC_DETECT_2_LEVEL_1			0x2B6
+#define MADERA_MIC_DETECT_2_LEVEL_2			0x2B7
+#define MADERA_MIC_DETECT_2_LEVEL_3			0x2B8
+#define MADERA_MIC_DETECT_2_LEVEL_4			0x2B9
+#define MADERA_MIC_DETECT_2_CONTROL_4			0x2BB
+#define MADERA_MICD_CLAMP_CONTROL			0x2C6
+#define MADERA_GP_SWITCH_1				0x2C8
+#define MADERA_JACK_DETECT_ANALOGUE			0x2D3
+#define MADERA_INPUT_ENABLES				0x300
+#define MADERA_INPUT_ENABLES_STATUS			0x301
+#define MADERA_INPUT_RATE				0x308
+#define MADERA_INPUT_VOLUME_RAMP			0x309
+#define MADERA_HPF_CONTROL				0x30C
+#define MADERA_IN1L_CONTROL				0x310
+#define MADERA_ADC_DIGITAL_VOLUME_1L			0x311
+#define MADERA_DMIC1L_CONTROL				0x312
+#define MADERA_IN1L_RATE_CONTROL			0x313
+#define MADERA_IN1R_CONTROL				0x314
+#define MADERA_ADC_DIGITAL_VOLUME_1R			0x315
+#define MADERA_DMIC1R_CONTROL				0x316
+#define MADERA_IN1R_RATE_CONTROL			0x317
+#define MADERA_IN2L_CONTROL				0x318
+#define MADERA_ADC_DIGITAL_VOLUME_2L			0x319
+#define MADERA_DMIC2L_CONTROL				0x31A
+#define MADERA_IN2L_RATE_CONTROL			0x31B
+#define MADERA_IN2R_CONTROL				0x31C
+#define MADERA_ADC_DIGITAL_VOLUME_2R			0x31D
+#define MADERA_DMIC2R_CONTROL				0x31E
+#define MADERA_IN2R_RATE_CONTROL			0x31F
+#define MADERA_IN3L_CONTROL				0x320
+#define MADERA_ADC_DIGITAL_VOLUME_3L			0x321
+#define MADERA_DMIC3L_CONTROL				0x322
+#define MADERA_IN3L_RATE_CONTROL			0x323
+#define MADERA_IN3R_CONTROL				0x324
+#define MADERA_ADC_DIGITAL_VOLUME_3R			0x325
+#define MADERA_DMIC3R_CONTROL				0x326
+#define MADERA_IN3R_RATE_CONTROL			0x327
+#define MADERA_IN4L_CONTROL				0x328
+#define MADERA_ADC_DIGITAL_VOLUME_4L			0x329
+#define MADERA_DMIC4L_CONTROL				0x32A
+#define MADERA_IN4L_RATE_CONTROL			0x32B
+#define MADERA_IN4R_CONTROL				0x32C
+#define MADERA_ADC_DIGITAL_VOLUME_4R			0x32D
+#define MADERA_DMIC4R_CONTROL				0x32E
+#define MADERA_IN4R_RATE_CONTROL			0x32F
+#define MADERA_IN5L_CONTROL				0x330
+#define MADERA_ADC_DIGITAL_VOLUME_5L			0x331
+#define MADERA_DMIC5L_CONTROL				0x332
+#define MADERA_IN5L_RATE_CONTROL			0x333
+#define MADERA_IN5R_CONTROL				0x334
+#define MADERA_ADC_DIGITAL_VOLUME_5R			0x335
+#define MADERA_DMIC5R_CONTROL				0x336
+#define MADERA_IN5R_RATE_CONTROL			0x337
+#define MADERA_IN6L_CONTROL				0x338
+#define MADERA_ADC_DIGITAL_VOLUME_6L			0x339
+#define MADERA_DMIC6L_CONTROL				0x33A
+#define MADERA_IN6R_CONTROL				0x33C
+#define MADERA_ADC_DIGITAL_VOLUME_6R			0x33D
+#define MADERA_DMIC6R_CONTROL				0x33E
+#define MADERA_OUTPUT_ENABLES_1				0x400
+#define MADERA_OUTPUT_STATUS_1				0x401
+#define MADERA_RAW_OUTPUT_STATUS_1			0x406
+#define MADERA_OUTPUT_RATE_1				0x408
+#define MADERA_OUTPUT_VOLUME_RAMP			0x409
+#define MADERA_OUTPUT_PATH_CONFIG_1L			0x410
+#define MADERA_DAC_DIGITAL_VOLUME_1L			0x411
+#define MADERA_OUTPUT_PATH_CONFIG_1			0x412
+#define MADERA_NOISE_GATE_SELECT_1L			0x413
+#define MADERA_OUTPUT_PATH_CONFIG_1R			0x414
+#define MADERA_DAC_DIGITAL_VOLUME_1R			0x415
+#define MADERA_NOISE_GATE_SELECT_1R			0x417
+#define MADERA_OUTPUT_PATH_CONFIG_2L			0x418
+#define MADERA_DAC_DIGITAL_VOLUME_2L			0x419
+#define MADERA_OUTPUT_PATH_CONFIG_2			0x41A
+#define MADERA_NOISE_GATE_SELECT_2L			0x41B
+#define MADERA_OUTPUT_PATH_CONFIG_2R			0x41C
+#define MADERA_DAC_DIGITAL_VOLUME_2R			0x41D
+#define MADERA_NOISE_GATE_SELECT_2R			0x41F
+#define MADERA_OUTPUT_PATH_CONFIG_3L			0x420
+#define MADERA_DAC_DIGITAL_VOLUME_3L			0x421
+#define MADERA_NOISE_GATE_SELECT_3L			0x423
+#define MADERA_OUTPUT_PATH_CONFIG_3R			0x424
+#define MADERA_DAC_DIGITAL_VOLUME_3R			0x425
+#define MADERA_NOISE_GATE_SELECT_3R			0x427
+#define MADERA_OUTPUT_PATH_CONFIG_4L			0x428
+#define MADERA_DAC_DIGITAL_VOLUME_4L			0x429
+#define MADERA_NOISE_GATE_SELECT_4L			0x42B
+#define MADERA_OUTPUT_PATH_CONFIG_4R			0x42C
+#define MADERA_DAC_DIGITAL_VOLUME_4R			0x42D
+#define MADERA_NOISE_GATE_SELECT_4R			0x42F
+#define MADERA_OUTPUT_PATH_CONFIG_5L			0x430
+#define MADERA_DAC_DIGITAL_VOLUME_5L			0x431
+#define MADERA_NOISE_GATE_SELECT_5L			0x433
+#define MADERA_OUTPUT_PATH_CONFIG_5R			0x434
+#define MADERA_DAC_DIGITAL_VOLUME_5R			0x435
+#define MADERA_NOISE_GATE_SELECT_5R			0x437
+#define MADERA_OUTPUT_PATH_CONFIG_6L			0x438
+#define MADERA_DAC_DIGITAL_VOLUME_6L			0x439
+#define MADERA_NOISE_GATE_SELECT_6L			0x43B
+#define MADERA_OUTPUT_PATH_CONFIG_6R			0x43C
+#define MADERA_DAC_DIGITAL_VOLUME_6R			0x43D
+#define MADERA_NOISE_GATE_SELECT_6R			0x43F
+#define MADERA_DRE_ENABLE				0x440
+#define MADERA_EDRE_ENABLE				0x448
+#define MADERA_EDRE_MANUAL				0x44A
+#define MADERA_DAC_AEC_CONTROL_1			0x450
+#define MADERA_DAC_AEC_CONTROL_2			0x451
+#define MADERA_NOISE_GATE_CONTROL			0x458
+#define MADERA_PDM_SPK1_CTRL_1				0x490
+#define MADERA_PDM_SPK1_CTRL_2				0x491
+#define MADERA_PDM_SPK2_CTRL_1				0x492
+#define MADERA_PDM_SPK2_CTRL_2				0x493
+#define MADERA_HP1_SHORT_CIRCUIT_CTRL			0x4A0
+#define MADERA_HP2_SHORT_CIRCUIT_CTRL			0x4A1
+#define MADERA_HP3_SHORT_CIRCUIT_CTRL			0x4A2
+#define MADERA_HP_TEST_CTRL_1				0x4A4
+#define MADERA_HP_TEST_CTRL_5				0x4A8
+#define MADERA_HP_TEST_CTRL_6				0x4A9
+#define MADERA_AIF1_BCLK_CTRL				0x500
+#define MADERA_AIF1_TX_PIN_CTRL				0x501
+#define MADERA_AIF1_RX_PIN_CTRL				0x502
+#define MADERA_AIF1_RATE_CTRL				0x503
+#define MADERA_AIF1_FORMAT				0x504
+#define MADERA_AIF1_RX_BCLK_RATE			0x506
+#define MADERA_AIF1_FRAME_CTRL_1			0x507
+#define MADERA_AIF1_FRAME_CTRL_2			0x508
+#define MADERA_AIF1_FRAME_CTRL_3			0x509
+#define MADERA_AIF1_FRAME_CTRL_4			0x50A
+#define MADERA_AIF1_FRAME_CTRL_5			0x50B
+#define MADERA_AIF1_FRAME_CTRL_6			0x50C
+#define MADERA_AIF1_FRAME_CTRL_7			0x50D
+#define MADERA_AIF1_FRAME_CTRL_8			0x50E
+#define MADERA_AIF1_FRAME_CTRL_9			0x50F
+#define MADERA_AIF1_FRAME_CTRL_10			0x510
+#define MADERA_AIF1_FRAME_CTRL_11			0x511
+#define MADERA_AIF1_FRAME_CTRL_12			0x512
+#define MADERA_AIF1_FRAME_CTRL_13			0x513
+#define MADERA_AIF1_FRAME_CTRL_14			0x514
+#define MADERA_AIF1_FRAME_CTRL_15			0x515
+#define MADERA_AIF1_FRAME_CTRL_16			0x516
+#define MADERA_AIF1_FRAME_CTRL_17			0x517
+#define MADERA_AIF1_FRAME_CTRL_18			0x518
+#define MADERA_AIF1_TX_ENABLES				0x519
+#define MADERA_AIF1_RX_ENABLES				0x51A
+#define MADERA_AIF1_FORCE_WRITE				0x51B
+#define MADERA_AIF2_BCLK_CTRL				0x540
+#define MADERA_AIF2_TX_PIN_CTRL				0x541
+#define MADERA_AIF2_RX_PIN_CTRL				0x542
+#define MADERA_AIF2_RATE_CTRL				0x543
+#define MADERA_AIF2_FORMAT				0x544
+#define MADERA_AIF2_RX_BCLK_RATE			0x546
+#define MADERA_AIF2_FRAME_CTRL_1			0x547
+#define MADERA_AIF2_FRAME_CTRL_2			0x548
+#define MADERA_AIF2_FRAME_CTRL_3			0x549
+#define MADERA_AIF2_FRAME_CTRL_4			0x54A
+#define MADERA_AIF2_FRAME_CTRL_5			0x54B
+#define MADERA_AIF2_FRAME_CTRL_6			0x54C
+#define MADERA_AIF2_FRAME_CTRL_7			0x54D
+#define MADERA_AIF2_FRAME_CTRL_8			0x54E
+#define MADERA_AIF2_FRAME_CTRL_9			0x54F
+#define MADERA_AIF2_FRAME_CTRL_10			0x550
+#define MADERA_AIF2_FRAME_CTRL_11			0x551
+#define MADERA_AIF2_FRAME_CTRL_12			0x552
+#define MADERA_AIF2_FRAME_CTRL_13			0x553
+#define MADERA_AIF2_FRAME_CTRL_14			0x554
+#define MADERA_AIF2_FRAME_CTRL_15			0x555
+#define MADERA_AIF2_FRAME_CTRL_16			0x556
+#define MADERA_AIF2_FRAME_CTRL_17			0x557
+#define MADERA_AIF2_FRAME_CTRL_18			0x558
+#define MADERA_AIF2_TX_ENABLES				0x559
+#define MADERA_AIF2_RX_ENABLES				0x55A
+#define MADERA_AIF2_FORCE_WRITE				0x55B
+#define MADERA_AIF3_BCLK_CTRL				0x580
+#define MADERA_AIF3_TX_PIN_CTRL				0x581
+#define MADERA_AIF3_RX_PIN_CTRL				0x582
+#define MADERA_AIF3_RATE_CTRL				0x583
+#define MADERA_AIF3_FORMAT				0x584
+#define MADERA_AIF3_RX_BCLK_RATE			0x586
+#define MADERA_AIF3_FRAME_CTRL_1			0x587
+#define MADERA_AIF3_FRAME_CTRL_2			0x588
+#define MADERA_AIF3_FRAME_CTRL_3			0x589
+#define MADERA_AIF3_FRAME_CTRL_4			0x58A
+#define MADERA_AIF3_FRAME_CTRL_11			0x591
+#define MADERA_AIF3_FRAME_CTRL_12			0x592
+#define MADERA_AIF3_TX_ENABLES				0x599
+#define MADERA_AIF3_RX_ENABLES				0x59A
+#define MADERA_AIF3_FORCE_WRITE				0x59B
+#define MADERA_AIF4_BCLK_CTRL				0x5A0
+#define MADERA_AIF4_TX_PIN_CTRL				0x5A1
+#define MADERA_AIF4_RX_PIN_CTRL				0x5A2
+#define MADERA_AIF4_RATE_CTRL				0x5A3
+#define MADERA_AIF4_FORMAT				0x5A4
+#define MADERA_AIF4_RX_BCLK_RATE			0x5A6
+#define MADERA_AIF4_FRAME_CTRL_1			0x5A7
+#define MADERA_AIF4_FRAME_CTRL_2			0x5A8
+#define MADERA_AIF4_FRAME_CTRL_3			0x5A9
+#define MADERA_AIF4_FRAME_CTRL_4			0x5AA
+#define MADERA_AIF4_FRAME_CTRL_11			0x5B1
+#define MADERA_AIF4_FRAME_CTRL_12			0x5B2
+#define MADERA_AIF4_TX_ENABLES				0x5B9
+#define MADERA_AIF4_RX_ENABLES				0x5BA
+#define MADERA_AIF4_FORCE_WRITE				0x5BB
+#define MADERA_SPD1_TX_CONTROL				0x5C2
+#define MADERA_SPD1_TX_CHANNEL_STATUS_1			0x5C3
+#define MADERA_SPD1_TX_CHANNEL_STATUS_2			0x5C4
+#define MADERA_SPD1_TX_CHANNEL_STATUS_3			0x5C5
+#define MADERA_SLIMBUS_FRAMER_REF_GEAR			0x5E3
+#define MADERA_SLIMBUS_RATES_1				0x5E5
+#define MADERA_SLIMBUS_RATES_2				0x5E6
+#define MADERA_SLIMBUS_RATES_3				0x5E7
+#define MADERA_SLIMBUS_RATES_4				0x5E8
+#define MADERA_SLIMBUS_RATES_5				0x5E9
+#define MADERA_SLIMBUS_RATES_6				0x5EA
+#define MADERA_SLIMBUS_RATES_7				0x5EB
+#define MADERA_SLIMBUS_RATES_8				0x5EC
+#define MADERA_SLIMBUS_RX_CHANNEL_ENABLE		0x5F5
+#define MADERA_SLIMBUS_TX_CHANNEL_ENABLE		0x5F6
+#define MADERA_SLIMBUS_RX_PORT_STATUS			0x5F7
+#define MADERA_SLIMBUS_TX_PORT_STATUS			0x5F8
+#define MADERA_PWM1MIX_INPUT_1_SOURCE			0x640
+#define MADERA_PWM1MIX_INPUT_1_VOLUME			0x641
+#define MADERA_PWM1MIX_INPUT_2_SOURCE			0x642
+#define MADERA_PWM1MIX_INPUT_2_VOLUME			0x643
+#define MADERA_PWM1MIX_INPUT_3_SOURCE			0x644
+#define MADERA_PWM1MIX_INPUT_3_VOLUME			0x645
+#define MADERA_PWM1MIX_INPUT_4_SOURCE			0x646
+#define MADERA_PWM1MIX_INPUT_4_VOLUME			0x647
+#define MADERA_PWM2MIX_INPUT_1_SOURCE			0x648
+#define MADERA_PWM2MIX_INPUT_1_VOLUME			0x649
+#define MADERA_PWM2MIX_INPUT_2_SOURCE			0x64A
+#define MADERA_PWM2MIX_INPUT_2_VOLUME			0x64B
+#define MADERA_PWM2MIX_INPUT_3_SOURCE			0x64C
+#define MADERA_PWM2MIX_INPUT_3_VOLUME			0x64D
+#define MADERA_PWM2MIX_INPUT_4_SOURCE			0x64E
+#define MADERA_PWM2MIX_INPUT_4_VOLUME			0x64F
+#define MADERA_OUT1LMIX_INPUT_1_SOURCE			0x680
+#define MADERA_OUT1LMIX_INPUT_1_VOLUME			0x681
+#define MADERA_OUT1LMIX_INPUT_2_SOURCE			0x682
+#define MADERA_OUT1LMIX_INPUT_2_VOLUME			0x683
+#define MADERA_OUT1LMIX_INPUT_3_SOURCE			0x684
+#define MADERA_OUT1LMIX_INPUT_3_VOLUME			0x685
+#define MADERA_OUT1LMIX_INPUT_4_SOURCE			0x686
+#define MADERA_OUT1LMIX_INPUT_4_VOLUME			0x687
+#define MADERA_OUT1RMIX_INPUT_1_SOURCE			0x688
+#define MADERA_OUT1RMIX_INPUT_1_VOLUME			0x689
+#define MADERA_OUT1RMIX_INPUT_2_SOURCE			0x68A
+#define MADERA_OUT1RMIX_INPUT_2_VOLUME			0x68B
+#define MADERA_OUT1RMIX_INPUT_3_SOURCE			0x68C
+#define MADERA_OUT1RMIX_INPUT_3_VOLUME			0x68D
+#define MADERA_OUT1RMIX_INPUT_4_SOURCE			0x68E
+#define MADERA_OUT1RMIX_INPUT_4_VOLUME			0x68F
+#define MADERA_OUT2LMIX_INPUT_1_SOURCE			0x690
+#define MADERA_OUT2LMIX_INPUT_1_VOLUME			0x691
+#define MADERA_OUT2LMIX_INPUT_2_SOURCE			0x692
+#define MADERA_OUT2LMIX_INPUT_2_VOLUME			0x693
+#define MADERA_OUT2LMIX_INPUT_3_SOURCE			0x694
+#define MADERA_OUT2LMIX_INPUT_3_VOLUME			0x695
+#define MADERA_OUT2LMIX_INPUT_4_SOURCE			0x696
+#define MADERA_OUT2LMIX_INPUT_4_VOLUME			0x697
+#define MADERA_OUT2RMIX_INPUT_1_SOURCE			0x698
+#define MADERA_OUT2RMIX_INPUT_1_VOLUME			0x699
+#define MADERA_OUT2RMIX_INPUT_2_SOURCE			0x69A
+#define MADERA_OUT2RMIX_INPUT_2_VOLUME			0x69B
+#define MADERA_OUT2RMIX_INPUT_3_SOURCE			0x69C
+#define MADERA_OUT2RMIX_INPUT_3_VOLUME			0x69D
+#define MADERA_OUT2RMIX_INPUT_4_SOURCE			0x69E
+#define MADERA_OUT2RMIX_INPUT_4_VOLUME			0x69F
+#define MADERA_OUT3LMIX_INPUT_1_SOURCE			0x6A0
+#define MADERA_OUT3LMIX_INPUT_1_VOLUME			0x6A1
+#define MADERA_OUT3LMIX_INPUT_2_SOURCE			0x6A2
+#define MADERA_OUT3LMIX_INPUT_2_VOLUME			0x6A3
+#define MADERA_OUT3LMIX_INPUT_3_SOURCE			0x6A4
+#define MADERA_OUT3LMIX_INPUT_3_VOLUME			0x6A5
+#define MADERA_OUT3LMIX_INPUT_4_SOURCE			0x6A6
+#define MADERA_OUT3LMIX_INPUT_4_VOLUME			0x6A7
+#define MADERA_OUT3RMIX_INPUT_1_SOURCE			0x6A8
+#define MADERA_OUT3RMIX_INPUT_1_VOLUME			0x6A9
+#define MADERA_OUT3RMIX_INPUT_2_SOURCE			0x6AA
+#define MADERA_OUT3RMIX_INPUT_2_VOLUME			0x6AB
+#define MADERA_OUT3RMIX_INPUT_3_SOURCE			0x6AC
+#define MADERA_OUT3RMIX_INPUT_3_VOLUME			0x6AD
+#define MADERA_OUT3RMIX_INPUT_4_SOURCE			0x6AE
+#define MADERA_OUT3RMIX_INPUT_4_VOLUME			0x6AF
+#define MADERA_OUT4LMIX_INPUT_1_SOURCE			0x6B0
+#define MADERA_OUT4LMIX_INPUT_1_VOLUME			0x6B1
+#define MADERA_OUT4LMIX_INPUT_2_SOURCE			0x6B2
+#define MADERA_OUT4LMIX_INPUT_2_VOLUME			0x6B3
+#define MADERA_OUT4LMIX_INPUT_3_SOURCE			0x6B4
+#define MADERA_OUT4LMIX_INPUT_3_VOLUME			0x6B5
+#define MADERA_OUT4LMIX_INPUT_4_SOURCE			0x6B6
+#define MADERA_OUT4LMIX_INPUT_4_VOLUME			0x6B7
+#define MADERA_OUT4RMIX_INPUT_1_SOURCE			0x6B8
+#define MADERA_OUT4RMIX_INPUT_1_VOLUME			0x6B9
+#define MADERA_OUT4RMIX_INPUT_2_SOURCE			0x6BA
+#define MADERA_OUT4RMIX_INPUT_2_VOLUME			0x6BB
+#define MADERA_OUT4RMIX_INPUT_3_SOURCE			0x6BC
+#define MADERA_OUT4RMIX_INPUT_3_VOLUME			0x6BD
+#define MADERA_OUT4RMIX_INPUT_4_SOURCE			0x6BE
+#define MADERA_OUT4RMIX_INPUT_4_VOLUME			0x6BF
+#define MADERA_OUT5LMIX_INPUT_1_SOURCE			0x6C0
+#define MADERA_OUT5LMIX_INPUT_1_VOLUME			0x6C1
+#define MADERA_OUT5LMIX_INPUT_2_SOURCE			0x6C2
+#define MADERA_OUT5LMIX_INPUT_2_VOLUME			0x6C3
+#define MADERA_OUT5LMIX_INPUT_3_SOURCE			0x6C4
+#define MADERA_OUT5LMIX_INPUT_3_VOLUME			0x6C5
+#define MADERA_OUT5LMIX_INPUT_4_SOURCE			0x6C6
+#define MADERA_OUT5LMIX_INPUT_4_VOLUME			0x6C7
+#define MADERA_OUT5RMIX_INPUT_1_SOURCE			0x6C8
+#define MADERA_OUT5RMIX_INPUT_1_VOLUME			0x6C9
+#define MADERA_OUT5RMIX_INPUT_2_SOURCE			0x6CA
+#define MADERA_OUT5RMIX_INPUT_2_VOLUME			0x6CB
+#define MADERA_OUT5RMIX_INPUT_3_SOURCE			0x6CC
+#define MADERA_OUT5RMIX_INPUT_3_VOLUME			0x6CD
+#define MADERA_OUT5RMIX_INPUT_4_SOURCE			0x6CE
+#define MADERA_OUT5RMIX_INPUT_4_VOLUME			0x6CF
+#define MADERA_OUT6LMIX_INPUT_1_SOURCE			0x6D0
+#define MADERA_OUT6LMIX_INPUT_1_VOLUME			0x6D1
+#define MADERA_OUT6LMIX_INPUT_2_SOURCE			0x6D2
+#define MADERA_OUT6LMIX_INPUT_2_VOLUME			0x6D3
+#define MADERA_OUT6LMIX_INPUT_3_SOURCE			0x6D4
+#define MADERA_OUT6LMIX_INPUT_3_VOLUME			0x6D5
+#define MADERA_OUT6LMIX_INPUT_4_SOURCE			0x6D6
+#define MADERA_OUT6LMIX_INPUT_4_VOLUME			0x6D7
+#define MADERA_OUT6RMIX_INPUT_1_SOURCE			0x6D8
+#define MADERA_OUT6RMIX_INPUT_1_VOLUME			0x6D9
+#define MADERA_OUT6RMIX_INPUT_2_SOURCE			0x6DA
+#define MADERA_OUT6RMIX_INPUT_2_VOLUME			0x6DB
+#define MADERA_OUT6RMIX_INPUT_3_SOURCE			0x6DC
+#define MADERA_OUT6RMIX_INPUT_3_VOLUME			0x6DD
+#define MADERA_OUT6RMIX_INPUT_4_SOURCE			0x6DE
+#define MADERA_OUT6RMIX_INPUT_4_VOLUME			0x6DF
+#define MADERA_AIF1TX1MIX_INPUT_1_SOURCE		0x700
+#define MADERA_AIF1TX1MIX_INPUT_1_VOLUME		0x701
+#define MADERA_AIF1TX1MIX_INPUT_2_SOURCE		0x702
+#define MADERA_AIF1TX1MIX_INPUT_2_VOLUME		0x703
+#define MADERA_AIF1TX1MIX_INPUT_3_SOURCE		0x704
+#define MADERA_AIF1TX1MIX_INPUT_3_VOLUME		0x705
+#define MADERA_AIF1TX1MIX_INPUT_4_SOURCE		0x706
+#define MADERA_AIF1TX1MIX_INPUT_4_VOLUME		0x707
+#define MADERA_AIF1TX2MIX_INPUT_1_SOURCE		0x708
+#define MADERA_AIF1TX2MIX_INPUT_1_VOLUME		0x709
+#define MADERA_AIF1TX2MIX_INPUT_2_SOURCE		0x70A
+#define MADERA_AIF1TX2MIX_INPUT_2_VOLUME		0x70B
+#define MADERA_AIF1TX2MIX_INPUT_3_SOURCE		0x70C
+#define MADERA_AIF1TX2MIX_INPUT_3_VOLUME		0x70D
+#define MADERA_AIF1TX2MIX_INPUT_4_SOURCE		0x70E
+#define MADERA_AIF1TX2MIX_INPUT_4_VOLUME		0x70F
+#define MADERA_AIF1TX3MIX_INPUT_1_SOURCE		0x710
+#define MADERA_AIF1TX3MIX_INPUT_1_VOLUME		0x711
+#define MADERA_AIF1TX3MIX_INPUT_2_SOURCE		0x712
+#define MADERA_AIF1TX3MIX_INPUT_2_VOLUME		0x713
+#define MADERA_AIF1TX3MIX_INPUT_3_SOURCE		0x714
+#define MADERA_AIF1TX3MIX_INPUT_3_VOLUME		0x715
+#define MADERA_AIF1TX3MIX_INPUT_4_SOURCE		0x716
+#define MADERA_AIF1TX3MIX_INPUT_4_VOLUME		0x717
+#define MADERA_AIF1TX4MIX_INPUT_1_SOURCE		0x718
+#define MADERA_AIF1TX4MIX_INPUT_1_VOLUME		0x719
+#define MADERA_AIF1TX4MIX_INPUT_2_SOURCE		0x71A
+#define MADERA_AIF1TX4MIX_INPUT_2_VOLUME		0x71B
+#define MADERA_AIF1TX4MIX_INPUT_3_SOURCE		0x71C
+#define MADERA_AIF1TX4MIX_INPUT_3_VOLUME		0x71D
+#define MADERA_AIF1TX4MIX_INPUT_4_SOURCE		0x71E
+#define MADERA_AIF1TX4MIX_INPUT_4_VOLUME		0x71F
+#define MADERA_AIF1TX5MIX_INPUT_1_SOURCE		0x720
+#define MADERA_AIF1TX5MIX_INPUT_1_VOLUME		0x721
+#define MADERA_AIF1TX5MIX_INPUT_2_SOURCE		0x722
+#define MADERA_AIF1TX5MIX_INPUT_2_VOLUME		0x723
+#define MADERA_AIF1TX5MIX_INPUT_3_SOURCE		0x724
+#define MADERA_AIF1TX5MIX_INPUT_3_VOLUME		0x725
+#define MADERA_AIF1TX5MIX_INPUT_4_SOURCE		0x726
+#define MADERA_AIF1TX5MIX_INPUT_4_VOLUME		0x727
+#define MADERA_AIF1TX6MIX_INPUT_1_SOURCE		0x728
+#define MADERA_AIF1TX6MIX_INPUT_1_VOLUME		0x729
+#define MADERA_AIF1TX6MIX_INPUT_2_SOURCE		0x72A
+#define MADERA_AIF1TX6MIX_INPUT_2_VOLUME		0x72B
+#define MADERA_AIF1TX6MIX_INPUT_3_SOURCE		0x72C
+#define MADERA_AIF1TX6MIX_INPUT_3_VOLUME		0x72D
+#define MADERA_AIF1TX6MIX_INPUT_4_SOURCE		0x72E
+#define MADERA_AIF1TX6MIX_INPUT_4_VOLUME		0x72F
+#define MADERA_AIF1TX7MIX_INPUT_1_SOURCE		0x730
+#define MADERA_AIF1TX7MIX_INPUT_1_VOLUME		0x731
+#define MADERA_AIF1TX7MIX_INPUT_2_SOURCE		0x732
+#define MADERA_AIF1TX7MIX_INPUT_2_VOLUME		0x733
+#define MADERA_AIF1TX7MIX_INPUT_3_SOURCE		0x734
+#define MADERA_AIF1TX7MIX_INPUT_3_VOLUME		0x735
+#define MADERA_AIF1TX7MIX_INPUT_4_SOURCE		0x736
+#define MADERA_AIF1TX7MIX_INPUT_4_VOLUME		0x737
+#define MADERA_AIF1TX8MIX_INPUT_1_SOURCE		0x738
+#define MADERA_AIF1TX8MIX_INPUT_1_VOLUME		0x739
+#define MADERA_AIF1TX8MIX_INPUT_2_SOURCE		0x73A
+#define MADERA_AIF1TX8MIX_INPUT_2_VOLUME		0x73B
+#define MADERA_AIF1TX8MIX_INPUT_3_SOURCE		0x73C
+#define MADERA_AIF1TX8MIX_INPUT_3_VOLUME		0x73D
+#define MADERA_AIF1TX8MIX_INPUT_4_SOURCE		0x73E
+#define MADERA_AIF1TX8MIX_INPUT_4_VOLUME		0x73F
+#define MADERA_AIF2TX1MIX_INPUT_1_SOURCE		0x740
+#define MADERA_AIF2TX1MIX_INPUT_1_VOLUME		0x741
+#define MADERA_AIF2TX1MIX_INPUT_2_SOURCE		0x742
+#define MADERA_AIF2TX1MIX_INPUT_2_VOLUME		0x743
+#define MADERA_AIF2TX1MIX_INPUT_3_SOURCE		0x744
+#define MADERA_AIF2TX1MIX_INPUT_3_VOLUME		0x745
+#define MADERA_AIF2TX1MIX_INPUT_4_SOURCE		0x746
+#define MADERA_AIF2TX1MIX_INPUT_4_VOLUME		0x747
+#define MADERA_AIF2TX2MIX_INPUT_1_SOURCE		0x748
+#define MADERA_AIF2TX2MIX_INPUT_1_VOLUME		0x749
+#define MADERA_AIF2TX2MIX_INPUT_2_SOURCE		0x74A
+#define MADERA_AIF2TX2MIX_INPUT_2_VOLUME		0x74B
+#define MADERA_AIF2TX2MIX_INPUT_3_SOURCE		0x74C
+#define MADERA_AIF2TX2MIX_INPUT_3_VOLUME		0x74D
+#define MADERA_AIF2TX2MIX_INPUT_4_SOURCE		0x74E
+#define MADERA_AIF2TX2MIX_INPUT_4_VOLUME		0x74F
+#define MADERA_AIF2TX3MIX_INPUT_1_SOURCE		0x750
+#define MADERA_AIF2TX3MIX_INPUT_1_VOLUME		0x751
+#define MADERA_AIF2TX3MIX_INPUT_2_SOURCE		0x752
+#define MADERA_AIF2TX3MIX_INPUT_2_VOLUME		0x753
+#define MADERA_AIF2TX3MIX_INPUT_3_SOURCE		0x754
+#define MADERA_AIF2TX3MIX_INPUT_3_VOLUME		0x755
+#define MADERA_AIF2TX3MIX_INPUT_4_SOURCE		0x756
+#define MADERA_AIF2TX3MIX_INPUT_4_VOLUME		0x757
+#define MADERA_AIF2TX4MIX_INPUT_1_SOURCE		0x758
+#define MADERA_AIF2TX4MIX_INPUT_1_VOLUME		0x759
+#define MADERA_AIF2TX4MIX_INPUT_2_SOURCE		0x75A
+#define MADERA_AIF2TX4MIX_INPUT_2_VOLUME		0x75B
+#define MADERA_AIF2TX4MIX_INPUT_3_SOURCE		0x75C
+#define MADERA_AIF2TX4MIX_INPUT_3_VOLUME		0x75D
+#define MADERA_AIF2TX4MIX_INPUT_4_SOURCE		0x75E
+#define MADERA_AIF2TX4MIX_INPUT_4_VOLUME		0x75F
+#define MADERA_AIF2TX5MIX_INPUT_1_SOURCE		0x760
+#define MADERA_AIF2TX5MIX_INPUT_1_VOLUME		0x761
+#define MADERA_AIF2TX5MIX_INPUT_2_SOURCE		0x762
+#define MADERA_AIF2TX5MIX_INPUT_2_VOLUME		0x763
+#define MADERA_AIF2TX5MIX_INPUT_3_SOURCE		0x764
+#define MADERA_AIF2TX5MIX_INPUT_3_VOLUME		0x765
+#define MADERA_AIF2TX5MIX_INPUT_4_SOURCE		0x766
+#define MADERA_AIF2TX5MIX_INPUT_4_VOLUME		0x767
+#define MADERA_AIF2TX6MIX_INPUT_1_SOURCE		0x768
+#define MADERA_AIF2TX6MIX_INPUT_1_VOLUME		0x769
+#define MADERA_AIF2TX6MIX_INPUT_2_SOURCE		0x76A
+#define MADERA_AIF2TX6MIX_INPUT_2_VOLUME		0x76B
+#define MADERA_AIF2TX6MIX_INPUT_3_SOURCE		0x76C
+#define MADERA_AIF2TX6MIX_INPUT_3_VOLUME		0x76D
+#define MADERA_AIF2TX6MIX_INPUT_4_SOURCE		0x76E
+#define MADERA_AIF2TX6MIX_INPUT_4_VOLUME		0x76F
+#define MADERA_AIF2TX7MIX_INPUT_1_SOURCE		0x770
+#define MADERA_AIF2TX7MIX_INPUT_1_VOLUME		0x771
+#define MADERA_AIF2TX7MIX_INPUT_2_SOURCE		0x772
+#define MADERA_AIF2TX7MIX_INPUT_2_VOLUME		0x773
+#define MADERA_AIF2TX7MIX_INPUT_3_SOURCE		0x774
+#define MADERA_AIF2TX7MIX_INPUT_3_VOLUME		0x775
+#define MADERA_AIF2TX7MIX_INPUT_4_SOURCE		0x776
+#define MADERA_AIF2TX7MIX_INPUT_4_VOLUME		0x777
+#define MADERA_AIF2TX8MIX_INPUT_1_SOURCE		0x778
+#define MADERA_AIF2TX8MIX_INPUT_1_VOLUME		0x779
+#define MADERA_AIF2TX8MIX_INPUT_2_SOURCE		0x77A
+#define MADERA_AIF2TX8MIX_INPUT_2_VOLUME		0x77B
+#define MADERA_AIF2TX8MIX_INPUT_3_SOURCE		0x77C
+#define MADERA_AIF2TX8MIX_INPUT_3_VOLUME		0x77D
+#define MADERA_AIF2TX8MIX_INPUT_4_SOURCE		0x77E
+#define MADERA_AIF2TX8MIX_INPUT_4_VOLUME		0x77F
+#define MADERA_AIF3TX1MIX_INPUT_1_SOURCE		0x780
+#define MADERA_AIF3TX1MIX_INPUT_1_VOLUME		0x781
+#define MADERA_AIF3TX1MIX_INPUT_2_SOURCE		0x782
+#define MADERA_AIF3TX1MIX_INPUT_2_VOLUME		0x783
+#define MADERA_AIF3TX1MIX_INPUT_3_SOURCE		0x784
+#define MADERA_AIF3TX1MIX_INPUT_3_VOLUME		0x785
+#define MADERA_AIF3TX1MIX_INPUT_4_SOURCE		0x786
+#define MADERA_AIF3TX1MIX_INPUT_4_VOLUME		0x787
+#define MADERA_AIF3TX2MIX_INPUT_1_SOURCE		0x788
+#define MADERA_AIF3TX2MIX_INPUT_1_VOLUME		0x789
+#define MADERA_AIF3TX2MIX_INPUT_2_SOURCE		0x78A
+#define MADERA_AIF3TX2MIX_INPUT_2_VOLUME		0x78B
+#define MADERA_AIF3TX2MIX_INPUT_3_SOURCE		0x78C
+#define MADERA_AIF3TX2MIX_INPUT_3_VOLUME		0x78D
+#define MADERA_AIF3TX2MIX_INPUT_4_SOURCE		0x78E
+#define MADERA_AIF3TX2MIX_INPUT_4_VOLUME		0x78F
+#define MADERA_AIF4TX1MIX_INPUT_1_SOURCE		0x7A0
+#define MADERA_AIF4TX1MIX_INPUT_1_VOLUME		0x7A1
+#define MADERA_AIF4TX1MIX_INPUT_2_SOURCE		0x7A2
+#define MADERA_AIF4TX1MIX_INPUT_2_VOLUME		0x7A3
+#define MADERA_AIF4TX1MIX_INPUT_3_SOURCE		0x7A4
+#define MADERA_AIF4TX1MIX_INPUT_3_VOLUME		0x7A5
+#define MADERA_AIF4TX1MIX_INPUT_4_SOURCE		0x7A6
+#define MADERA_AIF4TX1MIX_INPUT_4_VOLUME		0x7A7
+#define MADERA_AIF4TX2MIX_INPUT_1_SOURCE		0x7A8
+#define MADERA_AIF4TX2MIX_INPUT_1_VOLUME		0x7A9
+#define MADERA_AIF4TX2MIX_INPUT_2_SOURCE		0x7AA
+#define MADERA_AIF4TX2MIX_INPUT_2_VOLUME		0x7AB
+#define MADERA_AIF4TX2MIX_INPUT_3_SOURCE		0x7AC
+#define MADERA_AIF4TX2MIX_INPUT_3_VOLUME		0x7AD
+#define MADERA_AIF4TX2MIX_INPUT_4_SOURCE		0x7AE
+#define MADERA_AIF4TX2MIX_INPUT_4_VOLUME		0x7AF
+#define MADERA_SLIMTX1MIX_INPUT_1_SOURCE		0x7C0
+#define MADERA_SLIMTX1MIX_INPUT_1_VOLUME		0x7C1
+#define MADERA_SLIMTX1MIX_INPUT_2_SOURCE		0x7C2
+#define MADERA_SLIMTX1MIX_INPUT_2_VOLUME		0x7C3
+#define MADERA_SLIMTX1MIX_INPUT_3_SOURCE		0x7C4
+#define MADERA_SLIMTX1MIX_INPUT_3_VOLUME		0x7C5
+#define MADERA_SLIMTX1MIX_INPUT_4_SOURCE		0x7C6
+#define MADERA_SLIMTX1MIX_INPUT_4_VOLUME		0x7C7
+#define MADERA_SLIMTX2MIX_INPUT_1_SOURCE		0x7C8
+#define MADERA_SLIMTX2MIX_INPUT_1_VOLUME		0x7C9
+#define MADERA_SLIMTX2MIX_INPUT_2_SOURCE		0x7CA
+#define MADERA_SLIMTX2MIX_INPUT_2_VOLUME		0x7CB
+#define MADERA_SLIMTX2MIX_INPUT_3_SOURCE		0x7CC
+#define MADERA_SLIMTX2MIX_INPUT_3_VOLUME		0x7CD
+#define MADERA_SLIMTX2MIX_INPUT_4_SOURCE		0x7CE
+#define MADERA_SLIMTX2MIX_INPUT_4_VOLUME		0x7CF
+#define MADERA_SLIMTX3MIX_INPUT_1_SOURCE		0x7D0
+#define MADERA_SLIMTX3MIX_INPUT_1_VOLUME		0x7D1
+#define MADERA_SLIMTX3MIX_INPUT_2_SOURCE		0x7D2
+#define MADERA_SLIMTX3MIX_INPUT_2_VOLUME		0x7D3
+#define MADERA_SLIMTX3MIX_INPUT_3_SOURCE		0x7D4
+#define MADERA_SLIMTX3MIX_INPUT_3_VOLUME		0x7D5
+#define MADERA_SLIMTX3MIX_INPUT_4_SOURCE		0x7D6
+#define MADERA_SLIMTX3MIX_INPUT_4_VOLUME		0x7D7
+#define MADERA_SLIMTX4MIX_INPUT_1_SOURCE		0x7D8
+#define MADERA_SLIMTX4MIX_INPUT_1_VOLUME		0x7D9
+#define MADERA_SLIMTX4MIX_INPUT_2_SOURCE		0x7DA
+#define MADERA_SLIMTX4MIX_INPUT_2_VOLUME		0x7DB
+#define MADERA_SLIMTX4MIX_INPUT_3_SOURCE		0x7DC
+#define MADERA_SLIMTX4MIX_INPUT_3_VOLUME		0x7DD
+#define MADERA_SLIMTX4MIX_INPUT_4_SOURCE		0x7DE
+#define MADERA_SLIMTX4MIX_INPUT_4_VOLUME		0x7DF
+#define MADERA_SLIMTX5MIX_INPUT_1_SOURCE		0x7E0
+#define MADERA_SLIMTX5MIX_INPUT_1_VOLUME		0x7E1
+#define MADERA_SLIMTX5MIX_INPUT_2_SOURCE		0x7E2
+#define MADERA_SLIMTX5MIX_INPUT_2_VOLUME		0x7E3
+#define MADERA_SLIMTX5MIX_INPUT_3_SOURCE		0x7E4
+#define MADERA_SLIMTX5MIX_INPUT_3_VOLUME		0x7E5
+#define MADERA_SLIMTX5MIX_INPUT_4_SOURCE		0x7E6
+#define MADERA_SLIMTX5MIX_INPUT_4_VOLUME		0x7E7
+#define MADERA_SLIMTX6MIX_INPUT_1_SOURCE		0x7E8
+#define MADERA_SLIMTX6MIX_INPUT_1_VOLUME		0x7E9
+#define MADERA_SLIMTX6MIX_INPUT_2_SOURCE		0x7EA
+#define MADERA_SLIMTX6MIX_INPUT_2_VOLUME		0x7EB
+#define MADERA_SLIMTX6MIX_INPUT_3_SOURCE		0x7EC
+#define MADERA_SLIMTX6MIX_INPUT_3_VOLUME		0x7ED
+#define MADERA_SLIMTX6MIX_INPUT_4_SOURCE		0x7EE
+#define MADERA_SLIMTX6MIX_INPUT_4_VOLUME		0x7EF
+#define MADERA_SLIMTX7MIX_INPUT_1_SOURCE		0x7F0
+#define MADERA_SLIMTX7MIX_INPUT_1_VOLUME		0x7F1
+#define MADERA_SLIMTX7MIX_INPUT_2_SOURCE		0x7F2
+#define MADERA_SLIMTX7MIX_INPUT_2_VOLUME		0x7F3
+#define MADERA_SLIMTX7MIX_INPUT_3_SOURCE		0x7F4
+#define MADERA_SLIMTX7MIX_INPUT_3_VOLUME		0x7F5
+#define MADERA_SLIMTX7MIX_INPUT_4_SOURCE		0x7F6
+#define MADERA_SLIMTX7MIX_INPUT_4_VOLUME		0x7F7
+#define MADERA_SLIMTX8MIX_INPUT_1_SOURCE		0x7F8
+#define MADERA_SLIMTX8MIX_INPUT_1_VOLUME		0x7F9
+#define MADERA_SLIMTX8MIX_INPUT_2_SOURCE		0x7FA
+#define MADERA_SLIMTX8MIX_INPUT_2_VOLUME		0x7FB
+#define MADERA_SLIMTX8MIX_INPUT_3_SOURCE		0x7FC
+#define MADERA_SLIMTX8MIX_INPUT_3_VOLUME		0x7FD
+#define MADERA_SLIMTX8MIX_INPUT_4_SOURCE		0x7FE
+#define MADERA_SLIMTX8MIX_INPUT_4_VOLUME		0x7FF
+#define MADERA_SPDIF1TX1MIX_INPUT_1_SOURCE		0x800
+#define MADERA_SPDIF1TX1MIX_INPUT_1_VOLUME		0x801
+#define MADERA_SPDIF1TX2MIX_INPUT_1_SOURCE		0x808
+#define MADERA_SPDIF1TX2MIX_INPUT_1_VOLUME		0x809
+#define MADERA_EQ1MIX_INPUT_1_SOURCE			0x880
+#define MADERA_EQ1MIX_INPUT_1_VOLUME			0x881
+#define MADERA_EQ1MIX_INPUT_2_SOURCE			0x882
+#define MADERA_EQ1MIX_INPUT_2_VOLUME			0x883
+#define MADERA_EQ1MIX_INPUT_3_SOURCE			0x884
+#define MADERA_EQ1MIX_INPUT_3_VOLUME			0x885
+#define MADERA_EQ1MIX_INPUT_4_SOURCE			0x886
+#define MADERA_EQ1MIX_INPUT_4_VOLUME			0x887
+#define MADERA_EQ2MIX_INPUT_1_SOURCE			0x888
+#define MADERA_EQ2MIX_INPUT_1_VOLUME			0x889
+#define MADERA_EQ2MIX_INPUT_2_SOURCE			0x88A
+#define MADERA_EQ2MIX_INPUT_2_VOLUME			0x88B
+#define MADERA_EQ2MIX_INPUT_3_SOURCE			0x88C
+#define MADERA_EQ2MIX_INPUT_3_VOLUME			0x88D
+#define MADERA_EQ2MIX_INPUT_4_SOURCE			0x88E
+#define MADERA_EQ2MIX_INPUT_4_VOLUME			0x88F
+#define MADERA_EQ3MIX_INPUT_1_SOURCE			0x890
+#define MADERA_EQ3MIX_INPUT_1_VOLUME			0x891
+#define MADERA_EQ3MIX_INPUT_2_SOURCE			0x892
+#define MADERA_EQ3MIX_INPUT_2_VOLUME			0x893
+#define MADERA_EQ3MIX_INPUT_3_SOURCE			0x894
+#define MADERA_EQ3MIX_INPUT_3_VOLUME			0x895
+#define MADERA_EQ3MIX_INPUT_4_SOURCE			0x896
+#define MADERA_EQ3MIX_INPUT_4_VOLUME			0x897
+#define MADERA_EQ4MIX_INPUT_1_SOURCE			0x898
+#define MADERA_EQ4MIX_INPUT_1_VOLUME			0x899
+#define MADERA_EQ4MIX_INPUT_2_SOURCE			0x89A
+#define MADERA_EQ4MIX_INPUT_2_VOLUME			0x89B
+#define MADERA_EQ4MIX_INPUT_3_SOURCE			0x89C
+#define MADERA_EQ4MIX_INPUT_3_VOLUME			0x89D
+#define MADERA_EQ4MIX_INPUT_4_SOURCE			0x89E
+#define MADERA_EQ4MIX_INPUT_4_VOLUME			0x89F
+#define MADERA_DRC1LMIX_INPUT_1_SOURCE			0x8C0
+#define MADERA_DRC1LMIX_INPUT_1_VOLUME			0x8C1
+#define MADERA_DRC1LMIX_INPUT_2_SOURCE			0x8C2
+#define MADERA_DRC1LMIX_INPUT_2_VOLUME			0x8C3
+#define MADERA_DRC1LMIX_INPUT_3_SOURCE			0x8C4
+#define MADERA_DRC1LMIX_INPUT_3_VOLUME			0x8C5
+#define MADERA_DRC1LMIX_INPUT_4_SOURCE			0x8C6
+#define MADERA_DRC1LMIX_INPUT_4_VOLUME			0x8C7
+#define MADERA_DRC1RMIX_INPUT_1_SOURCE			0x8C8
+#define MADERA_DRC1RMIX_INPUT_1_VOLUME			0x8C9
+#define MADERA_DRC1RMIX_INPUT_2_SOURCE			0x8CA
+#define MADERA_DRC1RMIX_INPUT_2_VOLUME			0x8CB
+#define MADERA_DRC1RMIX_INPUT_3_SOURCE			0x8CC
+#define MADERA_DRC1RMIX_INPUT_3_VOLUME			0x8CD
+#define MADERA_DRC1RMIX_INPUT_4_SOURCE			0x8CE
+#define MADERA_DRC1RMIX_INPUT_4_VOLUME			0x8CF
+#define MADERA_DRC2LMIX_INPUT_1_SOURCE			0x8D0
+#define MADERA_DRC2LMIX_INPUT_1_VOLUME			0x8D1
+#define MADERA_DRC2LMIX_INPUT_2_SOURCE			0x8D2
+#define MADERA_DRC2LMIX_INPUT_2_VOLUME			0x8D3
+#define MADERA_DRC2LMIX_INPUT_3_SOURCE			0x8D4
+#define MADERA_DRC2LMIX_INPUT_3_VOLUME			0x8D5
+#define MADERA_DRC2LMIX_INPUT_4_SOURCE			0x8D6
+#define MADERA_DRC2LMIX_INPUT_4_VOLUME			0x8D7
+#define MADERA_DRC2RMIX_INPUT_1_SOURCE			0x8D8
+#define MADERA_DRC2RMIX_INPUT_1_VOLUME			0x8D9
+#define MADERA_DRC2RMIX_INPUT_2_SOURCE			0x8DA
+#define MADERA_DRC2RMIX_INPUT_2_VOLUME			0x8DB
+#define MADERA_DRC2RMIX_INPUT_3_SOURCE			0x8DC
+#define MADERA_DRC2RMIX_INPUT_3_VOLUME			0x8DD
+#define MADERA_DRC2RMIX_INPUT_4_SOURCE			0x8DE
+#define MADERA_DRC2RMIX_INPUT_4_VOLUME			0x8DF
+#define MADERA_HPLP1MIX_INPUT_1_SOURCE			0x900
+#define MADERA_HPLP1MIX_INPUT_1_VOLUME			0x901
+#define MADERA_HPLP1MIX_INPUT_2_SOURCE			0x902
+#define MADERA_HPLP1MIX_INPUT_2_VOLUME			0x903
+#define MADERA_HPLP1MIX_INPUT_3_SOURCE			0x904
+#define MADERA_HPLP1MIX_INPUT_3_VOLUME			0x905
+#define MADERA_HPLP1MIX_INPUT_4_SOURCE			0x906
+#define MADERA_HPLP1MIX_INPUT_4_VOLUME			0x907
+#define MADERA_HPLP2MIX_INPUT_1_SOURCE			0x908
+#define MADERA_HPLP2MIX_INPUT_1_VOLUME			0x909
+#define MADERA_HPLP2MIX_INPUT_2_SOURCE			0x90A
+#define MADERA_HPLP2MIX_INPUT_2_VOLUME			0x90B
+#define MADERA_HPLP2MIX_INPUT_3_SOURCE			0x90C
+#define MADERA_HPLP2MIX_INPUT_3_VOLUME			0x90D
+#define MADERA_HPLP2MIX_INPUT_4_SOURCE			0x90E
+#define MADERA_HPLP2MIX_INPUT_4_VOLUME			0x90F
+#define MADERA_HPLP3MIX_INPUT_1_SOURCE			0x910
+#define MADERA_HPLP3MIX_INPUT_1_VOLUME			0x911
+#define MADERA_HPLP3MIX_INPUT_2_SOURCE			0x912
+#define MADERA_HPLP3MIX_INPUT_2_VOLUME			0x913
+#define MADERA_HPLP3MIX_INPUT_3_SOURCE			0x914
+#define MADERA_HPLP3MIX_INPUT_3_VOLUME			0x915
+#define MADERA_HPLP3MIX_INPUT_4_SOURCE			0x916
+#define MADERA_HPLP3MIX_INPUT_4_VOLUME			0x917
+#define MADERA_HPLP4MIX_INPUT_1_SOURCE			0x918
+#define MADERA_HPLP4MIX_INPUT_1_VOLUME			0x919
+#define MADERA_HPLP4MIX_INPUT_2_SOURCE			0x91A
+#define MADERA_HPLP4MIX_INPUT_2_VOLUME			0x91B
+#define MADERA_HPLP4MIX_INPUT_3_SOURCE			0x91C
+#define MADERA_HPLP4MIX_INPUT_3_VOLUME			0x91D
+#define MADERA_HPLP4MIX_INPUT_4_SOURCE			0x91E
+#define MADERA_HPLP4MIX_INPUT_4_VOLUME			0x91F
+#define MADERA_DSP1LMIX_INPUT_1_SOURCE			0x940
+#define MADERA_DSP1LMIX_INPUT_1_VOLUME			0x941
+#define MADERA_DSP1LMIX_INPUT_2_SOURCE			0x942
+#define MADERA_DSP1LMIX_INPUT_2_VOLUME			0x943
+#define MADERA_DSP1LMIX_INPUT_3_SOURCE			0x944
+#define MADERA_DSP1LMIX_INPUT_3_VOLUME			0x945
+#define MADERA_DSP1LMIX_INPUT_4_SOURCE			0x946
+#define MADERA_DSP1LMIX_INPUT_4_VOLUME			0x947
+#define MADERA_DSP1RMIX_INPUT_1_SOURCE			0x948
+#define MADERA_DSP1RMIX_INPUT_1_VOLUME			0x949
+#define MADERA_DSP1RMIX_INPUT_2_SOURCE			0x94A
+#define MADERA_DSP1RMIX_INPUT_2_VOLUME			0x94B
+#define MADERA_DSP1RMIX_INPUT_3_SOURCE			0x94C
+#define MADERA_DSP1RMIX_INPUT_3_VOLUME			0x94D
+#define MADERA_DSP1RMIX_INPUT_4_SOURCE			0x94E
+#define MADERA_DSP1RMIX_INPUT_4_VOLUME			0x94F
+#define MADERA_DSP1AUX1MIX_INPUT_1_SOURCE		0x950
+#define MADERA_DSP1AUX2MIX_INPUT_1_SOURCE		0x958
+#define MADERA_DSP1AUX3MIX_INPUT_1_SOURCE		0x960
+#define MADERA_DSP1AUX4MIX_INPUT_1_SOURCE		0x968
+#define MADERA_DSP1AUX5MIX_INPUT_1_SOURCE		0x970
+#define MADERA_DSP1AUX6MIX_INPUT_1_SOURCE		0x978
+#define MADERA_DSP2LMIX_INPUT_1_SOURCE			0x980
+#define MADERA_DSP2LMIX_INPUT_1_VOLUME			0x981
+#define MADERA_DSP2LMIX_INPUT_2_SOURCE			0x982
+#define MADERA_DSP2LMIX_INPUT_2_VOLUME			0x983
+#define MADERA_DSP2LMIX_INPUT_3_SOURCE			0x984
+#define MADERA_DSP2LMIX_INPUT_3_VOLUME			0x985
+#define MADERA_DSP2LMIX_INPUT_4_SOURCE			0x986
+#define MADERA_DSP2LMIX_INPUT_4_VOLUME			0x987
+#define MADERA_DSP2RMIX_INPUT_1_SOURCE			0x988
+#define MADERA_DSP2RMIX_INPUT_1_VOLUME			0x989
+#define MADERA_DSP2RMIX_INPUT_2_SOURCE			0x98A
+#define MADERA_DSP2RMIX_INPUT_2_VOLUME			0x98B
+#define MADERA_DSP2RMIX_INPUT_3_SOURCE			0x98C
+#define MADERA_DSP2RMIX_INPUT_3_VOLUME			0x98D
+#define MADERA_DSP2RMIX_INPUT_4_SOURCE			0x98E
+#define MADERA_DSP2RMIX_INPUT_4_VOLUME			0x98F
+#define MADERA_DSP2AUX1MIX_INPUT_1_SOURCE		0x990
+#define MADERA_DSP2AUX2MIX_INPUT_1_SOURCE		0x998
+#define MADERA_DSP2AUX3MIX_INPUT_1_SOURCE		0x9A0
+#define MADERA_DSP2AUX4MIX_INPUT_1_SOURCE		0x9A8
+#define MADERA_DSP2AUX5MIX_INPUT_1_SOURCE		0x9B0
+#define MADERA_DSP2AUX6MIX_INPUT_1_SOURCE		0x9B8
+#define MADERA_DSP3LMIX_INPUT_1_SOURCE			0x9C0
+#define MADERA_DSP3LMIX_INPUT_1_VOLUME			0x9C1
+#define MADERA_DSP3LMIX_INPUT_2_SOURCE			0x9C2
+#define MADERA_DSP3LMIX_INPUT_2_VOLUME			0x9C3
+#define MADERA_DSP3LMIX_INPUT_3_SOURCE			0x9C4
+#define MADERA_DSP3LMIX_INPUT_3_VOLUME			0x9C5
+#define MADERA_DSP3LMIX_INPUT_4_SOURCE			0x9C6
+#define MADERA_DSP3LMIX_INPUT_4_VOLUME			0x9C7
+#define MADERA_DSP3RMIX_INPUT_1_SOURCE			0x9C8
+#define MADERA_DSP3RMIX_INPUT_1_VOLUME			0x9C9
+#define MADERA_DSP3RMIX_INPUT_2_SOURCE			0x9CA
+#define MADERA_DSP3RMIX_INPUT_2_VOLUME			0x9CB
+#define MADERA_DSP3RMIX_INPUT_3_SOURCE			0x9CC
+#define MADERA_DSP3RMIX_INPUT_3_VOLUME			0x9CD
+#define MADERA_DSP3RMIX_INPUT_4_SOURCE			0x9CE
+#define MADERA_DSP3RMIX_INPUT_4_VOLUME			0x9CF
+#define MADERA_DSP3AUX1MIX_INPUT_1_SOURCE		0x9D0
+#define MADERA_DSP3AUX2MIX_INPUT_1_SOURCE		0x9D8
+#define MADERA_DSP3AUX3MIX_INPUT_1_SOURCE		0x9E0
+#define MADERA_DSP3AUX4MIX_INPUT_1_SOURCE		0x9E8
+#define MADERA_DSP3AUX5MIX_INPUT_1_SOURCE		0x9F0
+#define MADERA_DSP3AUX6MIX_INPUT_1_SOURCE		0x9F8
+#define MADERA_DSP4LMIX_INPUT_1_SOURCE			0xA00
+#define MADERA_DSP4LMIX_INPUT_1_VOLUME			0xA01
+#define MADERA_DSP4LMIX_INPUT_2_SOURCE			0xA02
+#define MADERA_DSP4LMIX_INPUT_2_VOLUME			0xA03
+#define MADERA_DSP4LMIX_INPUT_3_SOURCE			0xA04
+#define MADERA_DSP4LMIX_INPUT_3_VOLUME			0xA05
+#define MADERA_DSP4LMIX_INPUT_4_SOURCE			0xA06
+#define MADERA_DSP4LMIX_INPUT_4_VOLUME			0xA07
+#define MADERA_DSP4RMIX_INPUT_1_SOURCE			0xA08
+#define MADERA_DSP4RMIX_INPUT_1_VOLUME			0xA09
+#define MADERA_DSP4RMIX_INPUT_2_SOURCE			0xA0A
+#define MADERA_DSP4RMIX_INPUT_2_VOLUME			0xA0B
+#define MADERA_DSP4RMIX_INPUT_3_SOURCE			0xA0C
+#define MADERA_DSP4RMIX_INPUT_3_VOLUME			0xA0D
+#define MADERA_DSP4RMIX_INPUT_4_SOURCE			0xA0E
+#define MADERA_DSP4RMIX_INPUT_4_VOLUME			0xA0F
+#define MADERA_DSP4AUX1MIX_INPUT_1_SOURCE		0xA10
+#define MADERA_DSP4AUX2MIX_INPUT_1_SOURCE		0xA18
+#define MADERA_DSP4AUX3MIX_INPUT_1_SOURCE		0xA20
+#define MADERA_DSP4AUX4MIX_INPUT_1_SOURCE		0xA28
+#define MADERA_DSP4AUX5MIX_INPUT_1_SOURCE		0xA30
+#define MADERA_DSP4AUX6MIX_INPUT_1_SOURCE		0xA38
+#define MADERA_DSP5LMIX_INPUT_1_SOURCE			0xA40
+#define MADERA_DSP5LMIX_INPUT_1_VOLUME			0xA41
+#define MADERA_DSP5LMIX_INPUT_2_SOURCE			0xA42
+#define MADERA_DSP5LMIX_INPUT_2_VOLUME			0xA43
+#define MADERA_DSP5LMIX_INPUT_3_SOURCE			0xA44
+#define MADERA_DSP5LMIX_INPUT_3_VOLUME			0xA45
+#define MADERA_DSP5LMIX_INPUT_4_SOURCE			0xA46
+#define MADERA_DSP5LMIX_INPUT_4_VOLUME			0xA47
+#define MADERA_DSP5RMIX_INPUT_1_SOURCE			0xA48
+#define MADERA_DSP5RMIX_INPUT_1_VOLUME			0xA49
+#define MADERA_DSP5RMIX_INPUT_2_SOURCE			0xA4A
+#define MADERA_DSP5RMIX_INPUT_2_VOLUME			0xA4B
+#define MADERA_DSP5RMIX_INPUT_3_SOURCE			0xA4C
+#define MADERA_DSP5RMIX_INPUT_3_VOLUME			0xA4D
+#define MADERA_DSP5RMIX_INPUT_4_SOURCE			0xA4E
+#define MADERA_DSP5RMIX_INPUT_4_VOLUME			0xA4F
+#define MADERA_DSP5AUX1MIX_INPUT_1_SOURCE		0xA50
+#define MADERA_DSP5AUX2MIX_INPUT_1_SOURCE		0xA58
+#define MADERA_DSP5AUX3MIX_INPUT_1_SOURCE		0xA60
+#define MADERA_DSP5AUX4MIX_INPUT_1_SOURCE		0xA68
+#define MADERA_DSP5AUX5MIX_INPUT_1_SOURCE		0xA70
+#define MADERA_DSP5AUX6MIX_INPUT_1_SOURCE		0xA78
+#define MADERA_ASRC1_1LMIX_INPUT_1_SOURCE		0xA80
+#define MADERA_ASRC1_1RMIX_INPUT_1_SOURCE		0xA88
+#define MADERA_ASRC1_2LMIX_INPUT_1_SOURCE		0xA90
+#define MADERA_ASRC1_2RMIX_INPUT_1_SOURCE		0xA98
+#define MADERA_ASRC2_1LMIX_INPUT_1_SOURCE		0xAA0
+#define MADERA_ASRC2_1RMIX_INPUT_1_SOURCE		0xAA8
+#define MADERA_ASRC2_2LMIX_INPUT_1_SOURCE		0xAB0
+#define MADERA_ASRC2_2RMIX_INPUT_1_SOURCE		0xAB8
+#define MADERA_ISRC1DEC1MIX_INPUT_1_SOURCE		0xB00
+#define MADERA_ISRC1DEC2MIX_INPUT_1_SOURCE		0xB08
+#define MADERA_ISRC1DEC3MIX_INPUT_1_SOURCE		0xB10
+#define MADERA_ISRC1DEC4MIX_INPUT_1_SOURCE		0xB18
+#define MADERA_ISRC1INT1MIX_INPUT_1_SOURCE		0xB20
+#define MADERA_ISRC1INT2MIX_INPUT_1_SOURCE		0xB28
+#define MADERA_ISRC1INT3MIX_INPUT_1_SOURCE		0xB30
+#define MADERA_ISRC1INT4MIX_INPUT_1_SOURCE		0xB38
+#define MADERA_ISRC2DEC1MIX_INPUT_1_SOURCE		0xB40
+#define MADERA_ISRC2DEC2MIX_INPUT_1_SOURCE		0xB48
+#define MADERA_ISRC2DEC3MIX_INPUT_1_SOURCE		0xB50
+#define MADERA_ISRC2DEC4MIX_INPUT_1_SOURCE		0xB58
+#define MADERA_ISRC2INT1MIX_INPUT_1_SOURCE		0xB60
+#define MADERA_ISRC2INT2MIX_INPUT_1_SOURCE		0xB68
+#define MADERA_ISRC2INT3MIX_INPUT_1_SOURCE		0xB70
+#define MADERA_ISRC2INT4MIX_INPUT_1_SOURCE		0xB78
+#define MADERA_ISRC3DEC1MIX_INPUT_1_SOURCE		0xB80
+#define MADERA_ISRC3DEC2MIX_INPUT_1_SOURCE		0xB88
+#define MADERA_ISRC3DEC3MIX_INPUT_1_SOURCE		0xB90
+#define MADERA_ISRC3DEC4MIX_INPUT_1_SOURCE		0xB98
+#define MADERA_ISRC3INT1MIX_INPUT_1_SOURCE		0xBA0
+#define MADERA_ISRC3INT2MIX_INPUT_1_SOURCE		0xBA8
+#define MADERA_ISRC3INT3MIX_INPUT_1_SOURCE		0xBB0
+#define MADERA_ISRC3INT4MIX_INPUT_1_SOURCE		0xBB8
+#define MADERA_ISRC4DEC1MIX_INPUT_1_SOURCE		0xBC0
+#define MADERA_ISRC4DEC2MIX_INPUT_1_SOURCE		0xBC8
+#define MADERA_ISRC4INT1MIX_INPUT_1_SOURCE		0xBE0
+#define MADERA_ISRC4INT2MIX_INPUT_1_SOURCE		0xBE8
+#define MADERA_DSP6LMIX_INPUT_1_SOURCE			0xC00
+#define MADERA_DSP6LMIX_INPUT_1_VOLUME			0xC01
+#define MADERA_DSP6LMIX_INPUT_2_SOURCE			0xC02
+#define MADERA_DSP6LMIX_INPUT_2_VOLUME			0xC03
+#define MADERA_DSP6LMIX_INPUT_3_SOURCE			0xC04
+#define MADERA_DSP6LMIX_INPUT_3_VOLUME			0xC05
+#define MADERA_DSP6LMIX_INPUT_4_SOURCE			0xC06
+#define MADERA_DSP6LMIX_INPUT_4_VOLUME			0xC07
+#define MADERA_DSP6RMIX_INPUT_1_SOURCE			0xC08
+#define MADERA_DSP6RMIX_INPUT_1_VOLUME			0xC09
+#define MADERA_DSP6RMIX_INPUT_2_SOURCE			0xC0A
+#define MADERA_DSP6RMIX_INPUT_2_VOLUME			0xC0B
+#define MADERA_DSP6RMIX_INPUT_3_SOURCE			0xC0C
+#define MADERA_DSP6RMIX_INPUT_3_VOLUME			0xC0D
+#define MADERA_DSP6RMIX_INPUT_4_SOURCE			0xC0E
+#define MADERA_DSP6RMIX_INPUT_4_VOLUME			0xC0F
+#define MADERA_DSP6AUX1MIX_INPUT_1_SOURCE		0xC10
+#define MADERA_DSP6AUX2MIX_INPUT_1_SOURCE		0xC18
+#define MADERA_DSP6AUX3MIX_INPUT_1_SOURCE		0xC20
+#define MADERA_DSP6AUX4MIX_INPUT_1_SOURCE		0xC28
+#define MADERA_DSP6AUX5MIX_INPUT_1_SOURCE		0xC30
+#define MADERA_DSP6AUX6MIX_INPUT_1_SOURCE		0xC38
+#define MADERA_DSP7LMIX_INPUT_1_SOURCE			0xC40
+#define MADERA_DSP7LMIX_INPUT_1_VOLUME			0xC41
+#define MADERA_DSP7LMIX_INPUT_2_SOURCE			0xC42
+#define MADERA_DSP7LMIX_INPUT_2_VOLUME			0xC43
+#define MADERA_DSP7LMIX_INPUT_3_SOURCE			0xC44
+#define MADERA_DSP7LMIX_INPUT_3_VOLUME			0xC45
+#define MADERA_DSP7LMIX_INPUT_4_SOURCE			0xC46
+#define MADERA_DSP7LMIX_INPUT_4_VOLUME			0xC47
+#define MADERA_DSP7RMIX_INPUT_1_SOURCE			0xC48
+#define MADERA_DSP7RMIX_INPUT_1_VOLUME			0xC49
+#define MADERA_DSP7RMIX_INPUT_2_SOURCE			0xC4A
+#define MADERA_DSP7RMIX_INPUT_2_VOLUME			0xC4B
+#define MADERA_DSP7RMIX_INPUT_3_SOURCE			0xC4C
+#define MADERA_DSP7RMIX_INPUT_3_VOLUME			0xC4D
+#define MADERA_DSP7RMIX_INPUT_4_SOURCE			0xC4E
+#define MADERA_DSP7RMIX_INPUT_4_VOLUME			0xC4F
+#define MADERA_DSP7AUX1MIX_INPUT_1_SOURCE		0xC50
+#define MADERA_DSP7AUX2MIX_INPUT_1_SOURCE		0xC58
+#define MADERA_DSP7AUX3MIX_INPUT_1_SOURCE		0xC60
+#define MADERA_DSP7AUX4MIX_INPUT_1_SOURCE		0xC68
+#define MADERA_DSP7AUX5MIX_INPUT_1_SOURCE		0xC70
+#define MADERA_DSP7AUX6MIX_INPUT_1_SOURCE		0xC78
+#define MADERA_DFC1MIX_INPUT_1_SOURCE			0xDC0
+#define MADERA_DFC2MIX_INPUT_1_SOURCE			0xDC8
+#define MADERA_DFC3MIX_INPUT_1_SOURCE			0xDD0
+#define MADERA_DFC4MIX_INPUT_1_SOURCE			0xDD8
+#define MADERA_DFC5MIX_INPUT_1_SOURCE			0xDE0
+#define MADERA_DFC6MIX_INPUT_1_SOURCE			0xDE8
+#define MADERA_DFC7MIX_INPUT_1_SOURCE			0xDF0
+#define MADERA_DFC8MIX_INPUT_1_SOURCE			0xDF8
+#define MADERA_FX_CTRL1					0xE00
+#define MADERA_FX_CTRL2					0xE01
+#define MADERA_EQ1_1					0xE10
+#define MADERA_EQ1_2					0xE11
+#define MADERA_EQ1_21					0xE24
+#define MADERA_EQ2_1					0xE26
+#define MADERA_EQ2_2					0xE27
+#define MADERA_EQ2_21					0xE3A
+#define MADERA_EQ3_1					0xE3C
+#define MADERA_EQ3_2					0xE3D
+#define MADERA_EQ3_21					0xE50
+#define MADERA_EQ4_1					0xE52
+#define MADERA_EQ4_2					0xE53
+#define MADERA_EQ4_21					0xE66
+#define MADERA_DRC1_CTRL1				0xE80
+#define MADERA_DRC1_CTRL2				0xE81
+#define MADERA_DRC1_CTRL3				0xE82
+#define MADERA_DRC1_CTRL4				0xE83
+#define MADERA_DRC1_CTRL5				0xE84
+#define MADERA_DRC2_CTRL1				0xE88
+#define MADERA_DRC2_CTRL2				0xE89
+#define MADERA_DRC2_CTRL3				0xE8A
+#define MADERA_DRC2_CTRL4				0xE8B
+#define MADERA_DRC2_CTRL5				0xE8C
+#define MADERA_HPLPF1_1					0xEC0
+#define MADERA_HPLPF1_2					0xEC1
+#define MADERA_HPLPF2_1					0xEC4
+#define MADERA_HPLPF2_2					0xEC5
+#define MADERA_HPLPF3_1					0xEC8
+#define MADERA_HPLPF3_2					0xEC9
+#define MADERA_HPLPF4_1					0xECC
+#define MADERA_HPLPF4_2					0xECD
+#define MADERA_ASRC2_ENABLE				0xED0
+#define MADERA_ASRC2_STATUS				0xED1
+#define MADERA_ASRC2_RATE1				0xED2
+#define MADERA_ASRC2_RATE2				0xED3
+#define MADERA_ASRC1_ENABLE				0xEE0
+#define MADERA_ASRC1_STATUS				0xEE1
+#define MADERA_ASRC1_RATE1				0xEE2
+#define MADERA_ASRC1_RATE2				0xEE3
+#define MADERA_ISRC_1_CTRL_1				0xEF0
+#define MADERA_ISRC_1_CTRL_2				0xEF1
+#define MADERA_ISRC_1_CTRL_3				0xEF2
+#define MADERA_ISRC_2_CTRL_1				0xEF3
+#define MADERA_ISRC_2_CTRL_2				0xEF4
+#define MADERA_ISRC_2_CTRL_3				0xEF5
+#define MADERA_ISRC_3_CTRL_1				0xEF6
+#define MADERA_ISRC_3_CTRL_2				0xEF7
+#define MADERA_ISRC_3_CTRL_3				0xEF8
+#define MADERA_ISRC_4_CTRL_1				0xEF9
+#define MADERA_ISRC_4_CTRL_2				0xEFA
+#define MADERA_ISRC_4_CTRL_3				0xEFB
+#define MADERA_CLOCK_CONTROL				0xF00
+#define MADERA_ANC_SRC					0xF01
+#define MADERA_DSP_STATUS				0xF02
+#define MADERA_ANC_COEFF_START				0xF08
+#define MADERA_ANC_COEFF_END				0xF12
+#define MADERA_FCL_FILTER_CONTROL			0xF15
+#define MADERA_FCL_ADC_REFORMATTER_CONTROL		0xF17
+#define MADERA_FCL_COEFF_START				0xF18
+#define MADERA_FCL_COEFF_END				0xF69
+#define MADERA_FCR_FILTER_CONTROL			0xF71
+#define MADERA_FCR_ADC_REFORMATTER_CONTROL		0xF73
+#define MADERA_FCR_COEFF_START				0xF74
+#define MADERA_FCR_COEFF_END				0xFC5
+#define MADERA_DAC_COMP_1				0x1300
+#define MADERA_DAC_COMP_2				0x1302
+#define MADERA_FRF_COEFFICIENT_1L_1			0x1380
+#define MADERA_FRF_COEFFICIENT_1L_2			0x1381
+#define MADERA_FRF_COEFFICIENT_1L_3			0x1382
+#define MADERA_FRF_COEFFICIENT_1L_4			0x1383
+#define MADERA_FRF_COEFFICIENT_1R_1			0x1390
+#define MADERA_FRF_COEFFICIENT_1R_2			0x1391
+#define MADERA_FRF_COEFFICIENT_1R_3			0x1392
+#define MADERA_FRF_COEFFICIENT_1R_4			0x1393
+#define MADERA_FRF_COEFFICIENT_2L_1			0x13A0
+#define MADERA_FRF_COEFFICIENT_2L_2			0x13A1
+#define MADERA_FRF_COEFFICIENT_2L_3			0x13A2
+#define MADERA_FRF_COEFFICIENT_2L_4			0x13A3
+#define MADERA_FRF_COEFFICIENT_2R_1			0x13B0
+#define MADERA_FRF_COEFFICIENT_2R_2			0x13B1
+#define MADERA_FRF_COEFFICIENT_2R_3			0x13B2
+#define MADERA_FRF_COEFFICIENT_2R_4			0x13B3
+#define MADERA_FRF_COEFFICIENT_3L_1			0x13C0
+#define MADERA_FRF_COEFFICIENT_3L_2			0x13C1
+#define MADERA_FRF_COEFFICIENT_3L_3			0x13C2
+#define MADERA_FRF_COEFFICIENT_3L_4			0x13C3
+#define MADERA_FRF_COEFFICIENT_3R_1			0x13D0
+#define MADERA_FRF_COEFFICIENT_3R_2			0x13D1
+#define MADERA_FRF_COEFFICIENT_3R_3			0x13D2
+#define MADERA_FRF_COEFFICIENT_3R_4			0x13D3
+#define MADERA_FRF_COEFFICIENT_4L_1			0x13E0
+#define MADERA_FRF_COEFFICIENT_4L_2			0x13E1
+#define MADERA_FRF_COEFFICIENT_4L_3			0x13E2
+#define MADERA_FRF_COEFFICIENT_4L_4			0x13E3
+#define MADERA_FRF_COEFFICIENT_4R_1			0x13F0
+#define MADERA_FRF_COEFFICIENT_4R_2			0x13F1
+#define MADERA_FRF_COEFFICIENT_4R_3			0x13F2
+#define MADERA_FRF_COEFFICIENT_4R_4			0x13F3
+#define CS47L35_FRF_COEFFICIENT_4L_1			0x13A0
+#define CS47L35_FRF_COEFFICIENT_4L_2			0x13A1
+#define CS47L35_FRF_COEFFICIENT_4L_3			0x13A2
+#define CS47L35_FRF_COEFFICIENT_4L_4			0x13A3
+#define CS47L35_FRF_COEFFICIENT_5L_1			0x13B0
+#define CS47L35_FRF_COEFFICIENT_5L_2			0x13B1
+#define CS47L35_FRF_COEFFICIENT_5L_3			0x13B2
+#define CS47L35_FRF_COEFFICIENT_5L_4			0x13B3
+#define CS47L35_FRF_COEFFICIENT_5R_1			0x13C0
+#define CS47L35_FRF_COEFFICIENT_5R_2			0x13C1
+#define CS47L35_FRF_COEFFICIENT_5R_3			0x13C2
+#define CS47L35_FRF_COEFFICIENT_5R_4			0x13C3
+#define MADERA_FRF_COEFFICIENT_5L_1			0x1400
+#define MADERA_FRF_COEFFICIENT_5L_2			0x1401
+#define MADERA_FRF_COEFFICIENT_5L_3			0x1402
+#define MADERA_FRF_COEFFICIENT_5L_4			0x1403
+#define MADERA_FRF_COEFFICIENT_5R_1			0x1410
+#define MADERA_FRF_COEFFICIENT_5R_2			0x1411
+#define MADERA_FRF_COEFFICIENT_5R_3			0x1412
+#define MADERA_FRF_COEFFICIENT_5R_4			0x1413
+#define MADERA_FRF_COEFFICIENT_6L_1			0x1420
+#define MADERA_FRF_COEFFICIENT_6L_2			0x1421
+#define MADERA_FRF_COEFFICIENT_6L_3			0x1422
+#define MADERA_FRF_COEFFICIENT_6L_4			0x1423
+#define MADERA_FRF_COEFFICIENT_6R_1			0x1430
+#define MADERA_FRF_COEFFICIENT_6R_2			0x1431
+#define MADERA_FRF_COEFFICIENT_6R_3			0x1432
+#define MADERA_FRF_COEFFICIENT_6R_4			0x1433
+#define MADERA_DFC1_CTRL				0x1480
+#define MADERA_DFC1_RX					0x1482
+#define MADERA_DFC1_TX					0x1484
+#define MADERA_DFC2_CTRL				0x1486
+#define MADERA_DFC2_RX					0x1488
+#define MADERA_DFC2_TX					0x148A
+#define MADERA_DFC3_CTRL				0x148C
+#define MADERA_DFC3_RX					0x148E
+#define MADERA_DFC3_TX					0x1490
+#define MADERA_DFC4_CTRL				0x1492
+#define MADERA_DFC4_RX					0x1494
+#define MADERA_DFC4_TX					0x1496
+#define MADERA_DFC5_CTRL				0x1498
+#define MADERA_DFC5_RX					0x149A
+#define MADERA_DFC5_TX					0x149C
+#define MADERA_DFC6_CTRL				0x149E
+#define MADERA_DFC6_RX					0x14A0
+#define MADERA_DFC6_TX					0x14A2
+#define MADERA_DFC7_CTRL				0x14A4
+#define MADERA_DFC7_RX					0x14A6
+#define MADERA_DFC7_TX					0x14A8
+#define MADERA_DFC8_CTRL				0x14AA
+#define MADERA_DFC8_RX					0x14AC
+#define MADERA_DFC8_TX					0x14AE
+#define MADERA_DFC_STATUS				0x14B6
+#define MADERA_ADSP2_IRQ0				0x1600
+#define MADERA_ADSP2_IRQ1				0x1601
+#define MADERA_ADSP2_IRQ2				0x1602
+#define MADERA_ADSP2_IRQ3				0x1603
+#define MADERA_ADSP2_IRQ4				0x1604
+#define MADERA_ADSP2_IRQ5				0x1605
+#define MADERA_ADSP2_IRQ6				0x1606
+#define MADERA_ADSP2_IRQ7				0x1607
+#define MADERA_GPIO1_CTRL_1				0x1700
+#define MADERA_GPIO1_CTRL_2				0x1701
+#define MADERA_GPIO2_CTRL_1				0x1702
+#define MADERA_GPIO2_CTRL_2				0x1703
+#define MADERA_GPIO16_CTRL_1				0x171E
+#define MADERA_GPIO16_CTRL_2				0x171F
+#define MADERA_GPIO38_CTRL_1				0x174A
+#define MADERA_GPIO38_CTRL_2				0x174B
+#define MADERA_GPIO40_CTRL_1				0x174E
+#define MADERA_GPIO40_CTRL_2				0x174F
+#define MADERA_IRQ1_STATUS_1				0x1800
+#define MADERA_IRQ1_STATUS_2				0x1801
+#define MADERA_IRQ1_STATUS_6				0x1805
+#define MADERA_IRQ1_STATUS_7				0x1806
+#define MADERA_IRQ1_STATUS_9				0x1808
+#define MADERA_IRQ1_STATUS_11				0x180A
+#define MADERA_IRQ1_STATUS_12				0x180B
+#define MADERA_IRQ1_STATUS_15				0x180E
+#define MADERA_IRQ1_STATUS_33				0x1820
+#define MADERA_IRQ1_MASK_1				0x1840
+#define MADERA_IRQ1_MASK_2				0x1841
+#define MADERA_IRQ1_MASK_33				0x1860
+#define MADERA_IRQ1_RAW_STATUS_1			0x1880
+#define MADERA_IRQ1_RAW_STATUS_2			0x1881
+#define MADERA_IRQ1_RAW_STATUS_15			0x188E
+#define MADERA_IRQ1_RAW_STATUS_33			0x18A0
+#define MADERA_INTERRUPT_DEBOUNCE_7			0x1A06
+#define MADERA_INTERRUPT_DEBOUNCE_15			0x1A0E
+#define MADERA_IRQ1_CTRL				0x1A80
+#define MADERA_IRQ2_CTRL				0x1A82
+#define MADERA_INTERRUPT_RAW_STATUS_1			0x1AA0
+#define MADERA_WSEQ_SEQUENCE_1				0x3000
+#define MADERA_WSEQ_SEQUENCE_252			0x31F6
+#define CS47L35_OTP_HPDET_CAL_1				0x31F8
+#define CS47L35_OTP_HPDET_CAL_2				0x31FA
+#define MADERA_WSEQ_SEQUENCE_508			0x33F6
+#define CS47L85_OTP_HPDET_CAL_1				0x33F8
+#define CS47L85_OTP_HPDET_CAL_2				0x33FA
+#define MADERA_OTP_HPDET_CAL_1				0x20004
+#define MADERA_OTP_HPDET_CAL_2				0x20006
+#define MADERA_DSP1_CONFIG_1				0x0FFE00
+#define MADERA_DSP1_CONFIG_2				0x0FFE02
+#define MADERA_DSP1_SCRATCH_1				0x0FFE40
+#define MADERA_DSP1_SCRATCH_2				0x0FFE42
+#define MADERA_DSP1_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0xFFE7C
+#define MADERA_DSP2_CONFIG_1				0x17FE00
+#define MADERA_DSP2_CONFIG_2				0x17FE02
+#define MADERA_DSP2_SCRATCH_1				0x17FE40
+#define MADERA_DSP2_SCRATCH_2				0x17FE42
+#define MADERA_DSP2_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x17FE7C
+#define MADERA_DSP3_CONFIG_1				0x1FFE00
+#define MADERA_DSP3_CONFIG_2				0x1FFE02
+#define MADERA_DSP3_SCRATCH_1				0x1FFE40
+#define MADERA_DSP3_SCRATCH_2				0x1FFE42
+#define MADERA_DSP3_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x1FFE7C
+#define MADERA_DSP4_CONFIG_1				0x27FE00
+#define MADERA_DSP4_CONFIG_2				0x27FE02
+#define MADERA_DSP4_SCRATCH_1				0x27FE40
+#define MADERA_DSP4_SCRATCH_2				0x27FE42
+#define MADERA_DSP4_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x27FE7C
+#define MADERA_DSP5_CONFIG_1				0x2FFE00
+#define MADERA_DSP5_CONFIG_2				0x2FFE02
+#define MADERA_DSP5_SCRATCH_1				0x2FFE40
+#define MADERA_DSP5_SCRATCH_2				0x2FFE42
+#define MADERA_DSP5_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x2FFE7C
+#define MADERA_DSP6_CONFIG_1				0x37FE00
+#define MADERA_DSP6_CONFIG_2				0x37FE02
+#define MADERA_DSP6_SCRATCH_1				0x37FE40
+#define MADERA_DSP6_SCRATCH_2				0x37FE42
+#define MADERA_DSP6_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x37FE7C
+#define MADERA_DSP7_CONFIG_1				0x3FFE00
+#define MADERA_DSP7_CONFIG_2				0x3FFE02
+#define MADERA_DSP7_SCRATCH_1				0x3FFE40
+#define MADERA_DSP7_SCRATCH_2				0x3FFE42
+#define MADERA_DSP7_PMEM_ERR_ADDR___XMEM_ERR_ADDR	0x3FFE7C
+
+/* (0x0000)  Software_Reset */
+#define MADERA_SW_RST_DEV_ID1_MASK			0xFFFF
+#define MADERA_SW_RST_DEV_ID1_SHIFT			     0
+#define MADERA_SW_RST_DEV_ID1_WIDTH			    16
+
+/* (0x0001)  Hardware_Revision */
+#define MADERA_HW_REVISION_MASK				0x00FF
+#define MADERA_HW_REVISION_SHIFT			     0
+#define MADERA_HW_REVISION_WIDTH			     8
+
+/* (0x0020)  Tone_Generator_1 */
+#define MADERA_TONE2_ENA				0x0002
+#define MADERA_TONE2_ENA_MASK				0x0002
+#define MADERA_TONE2_ENA_SHIFT				     1
+#define MADERA_TONE2_ENA_WIDTH				     1
+#define MADERA_TONE1_ENA				0x0001
+#define MADERA_TONE1_ENA_MASK				0x0001
+#define MADERA_TONE1_ENA_SHIFT				     0
+#define MADERA_TONE1_ENA_WIDTH				     1
+
+/* (0x0021)  Tone_Generator_2 */
+#define MADERA_TONE1_LVL_0_MASK				0xFFFF
+#define MADERA_TONE1_LVL_0_SHIFT			     0
+#define MADERA_TONE1_LVL_0_WIDTH			    16
+
+/* (0x0022)  Tone_Generator_3 */
+#define MADERA_TONE1_LVL_MASK				0x00FF
+#define MADERA_TONE1_LVL_SHIFT				     0
+#define MADERA_TONE1_LVL_WIDTH				     8
+
+/* (0x0023)  Tone_Generator_4 */
+#define MADERA_TONE2_LVL_0_MASK				0xFFFF
+#define MADERA_TONE2_LVL_0_SHIFT			     0
+#define MADERA_TONE2_LVL_0_WIDTH			    16
+
+/* (0x0024)  Tone_Generator_5 */
+#define MADERA_TONE2_LVL_MASK				0x00FF
+#define MADERA_TONE2_LVL_SHIFT				     0
+#define MADERA_TONE2_LVL_WIDTH				     8
+
+/* (0x0030)  PWM_Drive_1 */
+#define MADERA_PWM2_ENA					0x0002
+#define MADERA_PWM2_ENA_MASK				0x0002
+#define MADERA_PWM2_ENA_SHIFT				     1
+#define MADERA_PWM2_ENA_WIDTH				     1
+#define MADERA_PWM1_ENA					0x0001
+#define MADERA_PWM1_ENA_MASK				0x0001
+#define MADERA_PWM1_ENA_SHIFT				     0
+#define MADERA_PWM1_ENA_WIDTH				     1
+
+/* (0x00A0)  Comfort_Noise_Generator */
+#define MADERA_NOISE_GEN_ENA				0x0020
+#define MADERA_NOISE_GEN_ENA_MASK			0x0020
+#define MADERA_NOISE_GEN_ENA_SHIFT			     5
+#define MADERA_NOISE_GEN_ENA_WIDTH			     1
+#define MADERA_NOISE_GEN_GAIN_MASK			0x001F
+#define MADERA_NOISE_GEN_GAIN_SHIFT			     0
+#define MADERA_NOISE_GEN_GAIN_WIDTH			     5
+
+/* (0x0100)  Clock_32k_1 */
+#define MADERA_CLK_32K_ENA				0x0040
+#define MADERA_CLK_32K_ENA_MASK				0x0040
+#define MADERA_CLK_32K_ENA_SHIFT			     6
+#define MADERA_CLK_32K_ENA_WIDTH			     1
+#define MADERA_CLK_32K_SRC_MASK				0x0003
+#define MADERA_CLK_32K_SRC_SHIFT			     0
+#define MADERA_CLK_32K_SRC_WIDTH			     2
+
+/* (0x0101)  System_Clock_1 */
+#define MADERA_SYSCLK_FRAC				0x8000
+#define MADERA_SYSCLK_FRAC_MASK				0x8000
+#define MADERA_SYSCLK_FRAC_SHIFT			    15
+#define MADERA_SYSCLK_FRAC_WIDTH			     1
+#define MADERA_SYSCLK_FREQ_MASK				0x0700
+#define MADERA_SYSCLK_FREQ_SHIFT			     8
+#define MADERA_SYSCLK_FREQ_WIDTH			     3
+#define MADERA_SYSCLK_ENA				0x0040
+#define MADERA_SYSCLK_ENA_MASK				0x0040
+#define MADERA_SYSCLK_ENA_SHIFT				     6
+#define MADERA_SYSCLK_ENA_WIDTH				     1
+#define MADERA_SYSCLK_SRC_MASK				0x000F
+#define MADERA_SYSCLK_SRC_SHIFT				     0
+#define MADERA_SYSCLK_SRC_WIDTH				     4
+
+/* (0x0102)  Sample_rate_1 */
+#define MADERA_SAMPLE_RATE_1_MASK			0x001F
+#define MADERA_SAMPLE_RATE_1_SHIFT			     0
+#define MADERA_SAMPLE_RATE_1_WIDTH			     5
+
+/* (0x0103)  Sample_rate_2 */
+#define MADERA_SAMPLE_RATE_2_MASK			0x001F
+#define MADERA_SAMPLE_RATE_2_SHIFT			     0
+#define MADERA_SAMPLE_RATE_2_WIDTH			     5
+
+/* (0x0104)  Sample_rate_3 */
+#define MADERA_SAMPLE_RATE_3_MASK			0x001F
+#define MADERA_SAMPLE_RATE_3_SHIFT			     0
+#define MADERA_SAMPLE_RATE_3_WIDTH			     5
+
+/* (0x0112)  Async_clock_1 */
+#define MADERA_ASYNC_CLK_FREQ_MASK			0x0700
+#define MADERA_ASYNC_CLK_FREQ_SHIFT			     8
+#define MADERA_ASYNC_CLK_FREQ_WIDTH			     3
+#define MADERA_ASYNC_CLK_ENA				0x0040
+#define MADERA_ASYNC_CLK_ENA_MASK			0x0040
+#define MADERA_ASYNC_CLK_ENA_SHIFT			     6
+#define MADERA_ASYNC_CLK_ENA_WIDTH			     1
+#define MADERA_ASYNC_CLK_SRC_MASK			0x000F
+#define MADERA_ASYNC_CLK_SRC_SHIFT			     0
+#define MADERA_ASYNC_CLK_SRC_WIDTH			     4
+
+/* (0x0113)  Async_sample_rate_1 */
+#define MADERA_ASYNC_SAMPLE_RATE_1_MASK			0x001F
+#define MADERA_ASYNC_SAMPLE_RATE_1_SHIFT		     0
+#define MADERA_ASYNC_SAMPLE_RATE_1_WIDTH		     5
+
+/* (0x0114)  Async_sample_rate_2 */
+#define MADERA_ASYNC_SAMPLE_RATE_2_MASK			0x001F
+#define MADERA_ASYNC_SAMPLE_RATE_2_SHIFT		     0
+#define MADERA_ASYNC_SAMPLE_RATE_2_WIDTH		     5
+
+/* (0x0120)  DSP_Clock_1 */
+#define MADERA_DSP_CLK_FREQ_LEGACY			0x0700
+#define MADERA_DSP_CLK_FREQ_LEGACY_MASK			0x0700
+#define MADERA_DSP_CLK_FREQ_LEGACY_SHIFT		     8
+#define MADERA_DSP_CLK_FREQ_LEGACY_WIDTH		     3
+#define MADERA_DSP_CLK_ENA				0x0040
+#define MADERA_DSP_CLK_ENA_MASK				0x0040
+#define MADERA_DSP_CLK_ENA_SHIFT			     6
+#define MADERA_DSP_CLK_ENA_WIDTH			     1
+#define MADERA_DSP_CLK_SRC				0x000F
+#define MADERA_DSP_CLK_SRC_MASK				0x000F
+#define MADERA_DSP_CLK_SRC_SHIFT			     0
+#define MADERA_DSP_CLK_SRC_WIDTH			     4
+
+/* (0x0122)  DSP_Clock_2 */
+#define MADERA_DSP_CLK_FREQ_MASK			0x03FF
+#define MADERA_DSP_CLK_FREQ_SHIFT			     0
+#define MADERA_DSP_CLK_FREQ_WIDTH			    10
+
+/* (0x0149)  Output_system_clock */
+#define MADERA_OPCLK_ENA				0x8000
+#define MADERA_OPCLK_ENA_MASK				0x8000
+#define MADERA_OPCLK_ENA_SHIFT				    15
+#define MADERA_OPCLK_ENA_WIDTH				     1
+#define MADERA_OPCLK_DIV_MASK				0x00F8
+#define MADERA_OPCLK_DIV_SHIFT				     3
+#define MADERA_OPCLK_DIV_WIDTH				     5
+#define MADERA_OPCLK_SEL_MASK				0x0007
+#define MADERA_OPCLK_SEL_SHIFT				     0
+#define MADERA_OPCLK_SEL_WIDTH				     3
+
+/* (0x014A)  Output_async_clock */
+#define MADERA_OPCLK_ASYNC_ENA				0x8000
+#define MADERA_OPCLK_ASYNC_ENA_MASK			0x8000
+#define MADERA_OPCLK_ASYNC_ENA_SHIFT			    15
+#define MADERA_OPCLK_ASYNC_ENA_WIDTH			     1
+#define MADERA_OPCLK_ASYNC_DIV_MASK			0x00F8
+#define MADERA_OPCLK_ASYNC_DIV_SHIFT			     3
+#define MADERA_OPCLK_ASYNC_DIV_WIDTH			     5
+#define MADERA_OPCLK_ASYNC_SEL_MASK			0x0007
+#define MADERA_OPCLK_ASYNC_SEL_SHIFT			     0
+#define MADERA_OPCLK_ASYNC_SEL_WIDTH			     3
+
+/* (0x0171)  FLL1_Control_1 */
+#define MADERA_FLL1_FREERUN				0x0002
+#define MADERA_FLL1_FREERUN_MASK			0x0002
+#define MADERA_FLL1_FREERUN_SHIFT			     1
+#define MADERA_FLL1_FREERUN_WIDTH			     1
+#define MADERA_FLL1_ENA					0x0001
+#define MADERA_FLL1_ENA_MASK				0x0001
+#define MADERA_FLL1_ENA_SHIFT				     0
+#define MADERA_FLL1_ENA_WIDTH				     1
+
+/* (0x0172)  FLL1_Control_2 */
+#define MADERA_FLL1_CTRL_UPD				0x8000
+#define MADERA_FLL1_CTRL_UPD_MASK			0x8000
+#define MADERA_FLL1_CTRL_UPD_SHIFT			    15
+#define MADERA_FLL1_CTRL_UPD_WIDTH			     1
+#define MADERA_FLL1_N_MASK				0x03FF
+#define MADERA_FLL1_N_SHIFT				     0
+#define MADERA_FLL1_N_WIDTH				    10
+
+/* (0x0173)  FLL1_Control_3 */
+#define MADERA_FLL1_THETA_MASK				0xFFFF
+#define MADERA_FLL1_THETA_SHIFT				     0
+#define MADERA_FLL1_THETA_WIDTH				    16
+
+/* (0x0174)  FLL1_Control_4 */
+#define MADERA_FLL1_LAMBDA_MASK				0xFFFF
+#define MADERA_FLL1_LAMBDA_SHIFT			     0
+#define MADERA_FLL1_LAMBDA_WIDTH			    16
+
+/* (0x0175)  FLL1_Control_5 */
+#define MADERA_FLL1_FRATIO_MASK				0x0F00
+#define MADERA_FLL1_FRATIO_SHIFT			     8
+#define MADERA_FLL1_FRATIO_WIDTH			     4
+
+/* (0x0176)  FLL1_Control_6 */
+#define MADERA_FLL1_REFCLK_DIV_MASK			0x00C0
+#define MADERA_FLL1_REFCLK_DIV_SHIFT			     6
+#define MADERA_FLL1_REFCLK_DIV_WIDTH			     2
+#define MADERA_FLL1_REFCLK_SRC_MASK			0x000F
+#define MADERA_FLL1_REFCLK_SRC_SHIFT			     0
+#define MADERA_FLL1_REFCLK_SRC_WIDTH			     4
+
+/* (0x0177)  FLL1_Loop_Filter_Test_1 */
+#define MADERA_FLL1_FRC_INTEG_UPD			0x8000
+#define MADERA_FLL1_FRC_INTEG_UPD_MASK			0x8000
+#define MADERA_FLL1_FRC_INTEG_UPD_SHIFT			    15
+#define MADERA_FLL1_FRC_INTEG_UPD_WIDTH			     1
+#define MADERA_FLL1_FRC_INTEG_VAL_MASK			0x0FFF
+#define MADERA_FLL1_FRC_INTEG_VAL_SHIFT			     0
+#define MADERA_FLL1_FRC_INTEG_VAL_WIDTH			    12
+
+/* (0x0179)  FLL1_Control_7 */
+#define MADERA_FLL1_GAIN_MASK				0x003c
+#define MADERA_FLL1_GAIN_SHIFT				     2
+#define MADERA_FLL1_GAIN_WIDTH				     4
+
+/* (0x017A)  FLL1_EFS_2 */
+#define MADERA_FLL1_PHASE_GAIN_MASK			0xF000
+#define MADERA_FLL1_PHASE_GAIN_SHIFT			    12
+#define MADERA_FLL1_PHASE_GAIN_WIDTH			     4
+#define MADERA_FLL1_PHASE_ENA_MASK			0x0800
+#define MADERA_FLL1_PHASE_ENA_SHIFT			    11
+#define MADERA_FLL1_PHASE_ENA_WIDTH			     1
+
+/* (0x0181)  FLL1_Synchroniser_1 */
+#define MADERA_FLL1_SYNC_ENA				0x0001
+#define MADERA_FLL1_SYNC_ENA_MASK			0x0001
+#define MADERA_FLL1_SYNC_ENA_SHIFT			     0
+#define MADERA_FLL1_SYNC_ENA_WIDTH			     1
+
+/* (0x0182)  FLL1_Synchroniser_2 */
+#define MADERA_FLL1_SYNC_N_MASK				0x03FF
+#define MADERA_FLL1_SYNC_N_SHIFT			     0
+#define MADERA_FLL1_SYNC_N_WIDTH			    10
+
+/* (0x0183)  FLL1_Synchroniser_3 */
+#define MADERA_FLL1_SYNC_THETA_MASK			0xFFFF
+#define MADERA_FLL1_SYNC_THETA_SHIFT			     0
+#define MADERA_FLL1_SYNC_THETA_WIDTH			    16
+
+/* (0x0184)  FLL1_Synchroniser_4 */
+#define MADERA_FLL1_SYNC_LAMBDA_MASK			0xFFFF
+#define MADERA_FLL1_SYNC_LAMBDA_SHIFT			     0
+#define MADERA_FLL1_SYNC_LAMBDA_WIDTH			    16
+
+/* (0x0185)  FLL1_Synchroniser_5 */
+#define MADERA_FLL1_SYNC_FRATIO_MASK			0x0700
+#define MADERA_FLL1_SYNC_FRATIO_SHIFT			     8
+#define MADERA_FLL1_SYNC_FRATIO_WIDTH			     3
+
+/* (0x0186)  FLL1_Synchroniser_6 */
+#define MADERA_FLL1_SYNCCLK_DIV_MASK			0x00C0
+#define MADERA_FLL1_SYNCCLK_DIV_SHIFT			     6
+#define MADERA_FLL1_SYNCCLK_DIV_WIDTH			     2
+#define MADERA_FLL1_SYNCCLK_SRC_MASK			0x000F
+#define MADERA_FLL1_SYNCCLK_SRC_SHIFT			     0
+#define MADERA_FLL1_SYNCCLK_SRC_WIDTH			     4
+
+/* (0x0187)  FLL1_Synchroniser_7 */
+#define MADERA_FLL1_SYNC_GAIN_MASK			0x003c
+#define MADERA_FLL1_SYNC_GAIN_SHIFT			     2
+#define MADERA_FLL1_SYNC_GAIN_WIDTH			     4
+#define MADERA_FLL1_SYNC_DFSAT				0x0001
+#define MADERA_FLL1_SYNC_DFSAT_MASK			0x0001
+#define MADERA_FLL1_SYNC_DFSAT_SHIFT			     0
+#define MADERA_FLL1_SYNC_DFSAT_WIDTH			     1
+
+/* (0x01D1)  FLL_AO_Control_1 */
+#define MADERA_FLL_AO_HOLD				0x0004
+#define MADERA_FLL_AO_HOLD_MASK				0x0004
+#define MADERA_FLL_AO_HOLD_SHIFT			     2
+#define MADERA_FLL_AO_HOLD_WIDTH			     1
+#define MADERA_FLL_AO_FREERUN				0x0002
+#define MADERA_FLL_AO_FREERUN_MASK			0x0002
+#define MADERA_FLL_AO_FREERUN_SHIFT			     1
+#define MADERA_FLL_AO_FREERUN_WIDTH			     1
+#define MADERA_FLL_AO_ENA				0x0001
+#define MADERA_FLL_AO_ENA_MASK				0x0001
+#define MADERA_FLL_AO_ENA_SHIFT				     0
+#define MADERA_FLL_AO_ENA_WIDTH				     1
+
+/* (0x01D2)  FLL_AO_Control_2 */
+#define MADERA_FLL_AO_CTRL_UPD				0x8000
+#define MADERA_FLL_AO_CTRL_UPD_MASK			0x8000
+#define MADERA_FLL_AO_CTRL_UPD_SHIFT			    15
+#define MADERA_FLL_AO_CTRL_UPD_WIDTH			     1
+
+/* (0x01D6)  FLL_AO_Control_6 */
+#define MADERA_FLL_AO_REFCLK_SRC_MASK			0x000F
+#define MADERA_FLL_AO_REFCLK_SRC_SHIFT			     0
+#define MADERA_FLL_AO_REFCLK_SRC_WIDTH			     4
+
+/* (0x0200)  Mic_Charge_Pump_1 */
+#define MADERA_CPMIC_BYPASS				0x0002
+#define MADERA_CPMIC_BYPASS_MASK			0x0002
+#define MADERA_CPMIC_BYPASS_SHIFT			     1
+#define MADERA_CPMIC_BYPASS_WIDTH			     1
+#define MADERA_CPMIC_ENA				0x0001
+#define MADERA_CPMIC_ENA_MASK				0x0001
+#define MADERA_CPMIC_ENA_SHIFT				     0
+#define MADERA_CPMIC_ENA_WIDTH				     1
+
+/* (0x0210)  LDO1_Control_1 */
+#define MADERA_LDO1_VSEL_MASK				0x07E0
+#define MADERA_LDO1_VSEL_SHIFT				     5
+#define MADERA_LDO1_VSEL_WIDTH				     6
+#define MADERA_LDO1_FAST				0x0010
+#define MADERA_LDO1_FAST_MASK				0x0010
+#define MADERA_LDO1_FAST_SHIFT				     4
+#define MADERA_LDO1_FAST_WIDTH				     1
+#define MADERA_LDO1_DISCH				0x0004
+#define MADERA_LDO1_DISCH_MASK				0x0004
+#define MADERA_LDO1_DISCH_SHIFT				     2
+#define MADERA_LDO1_DISCH_WIDTH				     1
+#define MADERA_LDO1_BYPASS				0x0002
+#define MADERA_LDO1_BYPASS_MASK				0x0002
+#define MADERA_LDO1_BYPASS_SHIFT			     1
+#define MADERA_LDO1_BYPASS_WIDTH			     1
+#define MADERA_LDO1_ENA					0x0001
+#define MADERA_LDO1_ENA_MASK				0x0001
+#define MADERA_LDO1_ENA_SHIFT				     0
+#define MADERA_LDO1_ENA_WIDTH				     1
+
+/* (0x0213)  LDO2_Control_1 */
+#define MADERA_LDO2_VSEL_MASK				0x07E0
+#define MADERA_LDO2_VSEL_SHIFT				     5
+#define MADERA_LDO2_VSEL_WIDTH				     6
+#define MADERA_LDO2_FAST				0x0010
+#define MADERA_LDO2_FAST_MASK				0x0010
+#define MADERA_LDO2_FAST_SHIFT				     4
+#define MADERA_LDO2_FAST_WIDTH				     1
+#define MADERA_LDO2_DISCH				0x0004
+#define MADERA_LDO2_DISCH_MASK				0x0004
+#define MADERA_LDO2_DISCH_SHIFT				     2
+#define MADERA_LDO2_DISCH_WIDTH				     1
+#define MADERA_LDO2_BYPASS				0x0002
+#define MADERA_LDO2_BYPASS_MASK				0x0002
+#define MADERA_LDO2_BYPASS_SHIFT			     1
+#define MADERA_LDO2_BYPASS_WIDTH			     1
+#define MADERA_LDO2_ENA					0x0001
+#define MADERA_LDO2_ENA_MASK				0x0001
+#define MADERA_LDO2_ENA_SHIFT				     0
+#define MADERA_LDO2_ENA_WIDTH				     1
+
+/* (0x0218)  Mic_Bias_Ctrl_1 */
+#define MADERA_MICB1_ENA				0x0001
+#define MADERA_MICB1_ENA_MASK				0x0001
+#define MADERA_MICB1_ENA_SHIFT				     0
+#define MADERA_MICB1_ENA_WIDTH				     1
+
+/* (0x021C)  Mic_Bias_Ctrl_5 */
+#define MADERA_MICB1D_ENA				0x1000
+#define MADERA_MICB1D_ENA_MASK				0x1000
+#define MADERA_MICB1D_ENA_SHIFT				    12
+#define MADERA_MICB1D_ENA_WIDTH				     1
+#define MADERA_MICB1C_ENA				0x0100
+#define MADERA_MICB1C_ENA_MASK				0x0100
+#define MADERA_MICB1C_ENA_SHIFT				     8
+#define MADERA_MICB1C_ENA_WIDTH				     1
+#define MADERA_MICB1B_ENA				0x0010
+#define MADERA_MICB1B_ENA_MASK				0x0010
+#define MADERA_MICB1B_ENA_SHIFT				     4
+#define MADERA_MICB1B_ENA_WIDTH				     1
+#define MADERA_MICB1A_ENA				0x0001
+#define MADERA_MICB1A_ENA_MASK				0x0001
+#define MADERA_MICB1A_ENA_SHIFT				     0
+#define MADERA_MICB1A_ENA_WIDTH				     1
+
+/* (0x021E)  Mic_Bias_Ctrl_6 */
+#define MADERA_MICB2D_ENA				0x1000
+#define MADERA_MICB2D_ENA_MASK				0x1000
+#define MADERA_MICB2D_ENA_SHIFT				    12
+#define MADERA_MICB2D_ENA_WIDTH				     1
+#define MADERA_MICB2C_ENA				0x0100
+#define MADERA_MICB2C_ENA_MASK				0x0100
+#define MADERA_MICB2C_ENA_SHIFT				     8
+#define MADERA_MICB2C_ENA_WIDTH				     1
+#define MADERA_MICB2B_ENA				0x0010
+#define MADERA_MICB2B_ENA_MASK				0x0010
+#define MADERA_MICB2B_ENA_SHIFT				     4
+#define MADERA_MICB2B_ENA_WIDTH				     1
+#define MADERA_MICB2A_ENA				0x0001
+#define MADERA_MICB2A_ENA_MASK				0x0001
+#define MADERA_MICB2A_ENA_SHIFT				     0
+#define MADERA_MICB2A_ENA_WIDTH				     1
+
+/* (0x0293)  Accessory_Detect_Mode_1 */
+#define MADERA_ACCDET_SRC				0x2000
+#define MADERA_ACCDET_SRC_MASK				0x2000
+#define MADERA_ACCDET_SRC_SHIFT				    13
+#define MADERA_ACCDET_SRC_WIDTH				     1
+#define MADERA_ACCDET_POLARITY_INV_ENA			0x0080
+#define MADERA_ACCDET_POLARITY_INV_ENA_MASK		0x0080
+#define MADERA_ACCDET_POLARITY_INV_ENA_SHIFT		     7
+#define MADERA_ACCDET_POLARITY_INV_ENA_WIDTH		     1
+#define MADERA_ACCDET_MODE_MASK				0x0007
+#define MADERA_ACCDET_MODE_SHIFT			     0
+#define MADERA_ACCDET_MODE_WIDTH			     3
+
+/* (0x0299)  Headphone_Detect_0 */
+#define MADERA_HPD_GND_SEL				0x0007
+#define MADERA_HPD_GND_SEL_MASK				0x0007
+#define MADERA_HPD_GND_SEL_SHIFT			     0
+#define MADERA_HPD_GND_SEL_WIDTH			     3
+#define MADERA_HPD_SENSE_SEL				0x00F0
+#define MADERA_HPD_SENSE_SEL_MASK			0x00F0
+#define MADERA_HPD_SENSE_SEL_SHIFT			     4
+#define MADERA_HPD_SENSE_SEL_WIDTH			     4
+#define MADERA_HPD_FRC_SEL				0x0F00
+#define MADERA_HPD_FRC_SEL_MASK				0x0F00
+#define MADERA_HPD_FRC_SEL_SHIFT			     8
+#define MADERA_HPD_FRC_SEL_WIDTH			     4
+#define MADERA_HPD_OUT_SEL				0x7000
+#define MADERA_HPD_OUT_SEL_MASK				0x7000
+#define MADERA_HPD_OUT_SEL_SHIFT			    12
+#define MADERA_HPD_OUT_SEL_WIDTH			     3
+#define MADERA_HPD_OVD_ENA_SEL				0x8000
+#define MADERA_HPD_OVD_ENA_SEL_MASK			0x8000
+#define MADERA_HPD_OVD_ENA_SEL_SHIFT			    15
+#define MADERA_HPD_OVD_ENA_SEL_WIDTH			     1
+
+/* (0x029B)  Headphone_Detect_1 */
+#define MADERA_HP_IMPEDANCE_RANGE_MASK			0x0600
+#define MADERA_HP_IMPEDANCE_RANGE_SHIFT			     9
+#define MADERA_HP_IMPEDANCE_RANGE_WIDTH			     2
+#define MADERA_HP_STEP_SIZE				0x0100
+#define MADERA_HP_STEP_SIZE_MASK			0x0100
+#define MADERA_HP_STEP_SIZE_SHIFT			     8
+#define MADERA_HP_STEP_SIZE_WIDTH			     1
+#define MADERA_HP_CLK_DIV_MASK				0x0018
+#define MADERA_HP_CLK_DIV_SHIFT				     3
+#define MADERA_HP_CLK_DIV_WIDTH				     2
+#define MADERA_HP_RATE_MASK				0x0006
+#define MADERA_HP_RATE_SHIFT				     1
+#define MADERA_HP_RATE_WIDTH				     2
+#define MADERA_HP_POLL					0x0001
+#define MADERA_HP_POLL_MASK				0x0001
+#define MADERA_HP_POLL_SHIFT				     0
+#define MADERA_HP_POLL_WIDTH				     1
+
+/* (0x029C)  Headphone_Detect_2 */
+#define MADERA_HP_DONE_MASK				0x8000
+#define MADERA_HP_DONE_SHIFT				    15
+#define MADERA_HP_DONE_WIDTH				     1
+#define MADERA_HP_LVL_MASK				0x7FFF
+#define MADERA_HP_LVL_SHIFT				     0
+#define MADERA_HP_LVL_WIDTH				    15
+
+/* (0x029D)  Headphone_Detect_3 */
+#define MADERA_HP_DACVAL_MASK				0x03FF
+#define MADERA_HP_DACVAL_SHIFT				     0
+#define MADERA_HP_DACVAL_WIDTH				    10
+
+/* (0x029F) - Headphone Detect 5 */
+#define MADERA_HP_DACVAL_DOWN_MASK			0x03FF
+#define MADERA_HP_DACVAL_DOWN_SHIFT			     0
+#define MADERA_HP_DACVAL_DOWN_WIDTH			    10
+
+/* (0x02A2)  Mic_Detect_1_Control_0 */
+#define MADERA_MICD1_GND_MASK				0x0007
+#define MADERA_MICD1_GND_SHIFT				     0
+#define MADERA_MICD1_GND_WIDTH				     3
+#define MADERA_MICD1_SENSE_MASK				0x00F0
+#define MADERA_MICD1_SENSE_SHIFT			     4
+#define MADERA_MICD1_SENSE_WIDTH			     4
+#define MADERA_MICD1_ADC_MODE_MASK			0x8000
+#define MADERA_MICD1_ADC_MODE_SHIFT			    15
+#define MADERA_MICD1_ADC_MODE_WIDTH			     1
+
+/* (0x02A3)  Mic_Detect_1_Control_1 */
+#define MADERA_MICD_BIAS_STARTTIME_MASK			0xF000
+#define MADERA_MICD_BIAS_STARTTIME_SHIFT		    12
+#define MADERA_MICD_BIAS_STARTTIME_WIDTH		     4
+#define MADERA_MICD_RATE_MASK				0x0F00
+#define MADERA_MICD_RATE_SHIFT				     8
+#define MADERA_MICD_RATE_WIDTH				     4
+#define MADERA_MICD_BIAS_SRC_MASK			0x00F0
+#define MADERA_MICD_BIAS_SRC_SHIFT			     4
+#define MADERA_MICD_BIAS_SRC_WIDTH			     4
+#define MADERA_MICD_DBTIME				0x0002
+#define MADERA_MICD_DBTIME_MASK				0x0002
+#define MADERA_MICD_DBTIME_SHIFT			     1
+#define MADERA_MICD_DBTIME_WIDTH			     1
+#define MADERA_MICD_ENA					0x0001
+#define MADERA_MICD_ENA_MASK				0x0001
+#define MADERA_MICD_ENA_SHIFT				     0
+#define MADERA_MICD_ENA_WIDTH				     1
+
+/* (0x02A4)  Mic_Detect_1_Control_2 */
+#define MADERA_MICD_LVL_SEL_MASK			0x00FF
+#define MADERA_MICD_LVL_SEL_SHIFT			     0
+#define MADERA_MICD_LVL_SEL_WIDTH			     8
+
+/* (0x02A5)  Mic_Detect_1_Control_3 */
+#define MADERA_MICD_LVL_0				0x0004
+#define MADERA_MICD_LVL_1				0x0008
+#define MADERA_MICD_LVL_2				0x0010
+#define MADERA_MICD_LVL_3				0x0020
+#define MADERA_MICD_LVL_4				0x0040
+#define MADERA_MICD_LVL_5				0x0080
+#define MADERA_MICD_LVL_6				0x0100
+#define MADERA_MICD_LVL_7				0x0200
+#define MADERA_MICD_LVL_8				0x0400
+#define MADERA_MICD_LVL_MASK				0x07FC
+#define MADERA_MICD_LVL_SHIFT				     2
+#define MADERA_MICD_LVL_WIDTH				     9
+#define MADERA_MICD_VALID				0x0002
+#define MADERA_MICD_VALID_MASK				0x0002
+#define MADERA_MICD_VALID_SHIFT				     1
+#define MADERA_MICD_VALID_WIDTH				     1
+#define MADERA_MICD_STS					0x0001
+#define MADERA_MICD_STS_MASK				0x0001
+#define MADERA_MICD_STS_SHIFT				     0
+#define MADERA_MICD_STS_WIDTH				     1
+
+/* (0x02AB)  Mic_Detect_1_Control_4 */
+#define MADERA_MICDET_ADCVAL_DIFF_MASK			0xFF00
+#define MADERA_MICDET_ADCVAL_DIFF_SHIFT			     8
+#define MADERA_MICDET_ADCVAL_DIFF_WIDTH			     8
+#define MADERA_MICDET_ADCVAL_MASK			0x007F
+#define MADERA_MICDET_ADCVAL_SHIFT			     0
+#define MADERA_MICDET_ADCVAL_WIDTH			     7
+
+/* (0x02C6)  Micd_Clamp_control */
+#define MADERA_MICD_CLAMP_OVD				0x0010
+#define MADERA_MICD_CLAMP_OVD_MASK			0x0010
+#define MADERA_MICD_CLAMP_OVD_SHIFT			     4
+#define MADERA_MICD_CLAMP_OVD_WIDTH			     1
+#define MADERA_MICD_CLAMP_MODE_MASK			0x000F
+#define MADERA_MICD_CLAMP_MODE_SHIFT			     0
+#define MADERA_MICD_CLAMP_MODE_WIDTH			     4
+
+/* (0x02C8)  GP_Switch_1 */
+#define MADERA_SW2_MODE_MASK				0x000C
+#define MADERA_SW2_MODE_SHIFT				     2
+#define MADERA_SW2_MODE_WIDTH				     2
+#define MADERA_SW1_MODE_MASK				0x0003
+#define MADERA_SW1_MODE_SHIFT				     0
+#define MADERA_SW1_MODE_WIDTH				     2
+
+/* (0x02D3)  Jack_detect_analogue */
+#define MADERA_JD2_ENA					0x0002
+#define MADERA_JD2_ENA_MASK				0x0002
+#define MADERA_JD2_ENA_SHIFT				     1
+#define MADERA_JD2_ENA_WIDTH				     1
+#define MADERA_JD1_ENA					0x0001
+#define MADERA_JD1_ENA_MASK				0x0001
+#define MADERA_JD1_ENA_SHIFT				     0
+#define MADERA_JD1_ENA_WIDTH				     1
+
+/* (0x0300)  Input_Enables */
+#define MADERA_IN6L_ENA					0x0800
+#define MADERA_IN6L_ENA_MASK				0x0800
+#define MADERA_IN6L_ENA_SHIFT				    11
+#define MADERA_IN6L_ENA_WIDTH				     1
+#define MADERA_IN6R_ENA					0x0400
+#define MADERA_IN6R_ENA_MASK				0x0400
+#define MADERA_IN6R_ENA_SHIFT				    10
+#define MADERA_IN6R_ENA_WIDTH				     1
+#define MADERA_IN5L_ENA					0x0200
+#define MADERA_IN5L_ENA_MASK				0x0200
+#define MADERA_IN5L_ENA_SHIFT				     9
+#define MADERA_IN5L_ENA_WIDTH				     1
+#define MADERA_IN5R_ENA					0x0100
+#define MADERA_IN5R_ENA_MASK				0x0100
+#define MADERA_IN5R_ENA_SHIFT				     8
+#define MADERA_IN5R_ENA_WIDTH				     1
+#define MADERA_IN4L_ENA					0x0080
+#define MADERA_IN4L_ENA_MASK				0x0080
+#define MADERA_IN4L_ENA_SHIFT				     7
+#define MADERA_IN4L_ENA_WIDTH				     1
+#define MADERA_IN4R_ENA					0x0040
+#define MADERA_IN4R_ENA_MASK				0x0040
+#define MADERA_IN4R_ENA_SHIFT				     6
+#define MADERA_IN4R_ENA_WIDTH				     1
+#define MADERA_IN3L_ENA					0x0020
+#define MADERA_IN3L_ENA_MASK				0x0020
+#define MADERA_IN3L_ENA_SHIFT				     5
+#define MADERA_IN3L_ENA_WIDTH				     1
+#define MADERA_IN3R_ENA					0x0010
+#define MADERA_IN3R_ENA_MASK				0x0010
+#define MADERA_IN3R_ENA_SHIFT				     4
+#define MADERA_IN3R_ENA_WIDTH				     1
+#define MADERA_IN2L_ENA					0x0008
+#define MADERA_IN2L_ENA_MASK				0x0008
+#define MADERA_IN2L_ENA_SHIFT				     3
+#define MADERA_IN2L_ENA_WIDTH				     1
+#define MADERA_IN2R_ENA					0x0004
+#define MADERA_IN2R_ENA_MASK				0x0004
+#define MADERA_IN2R_ENA_SHIFT				     2
+#define MADERA_IN2R_ENA_WIDTH				     1
+#define MADERA_IN1L_ENA					0x0002
+#define MADERA_IN1L_ENA_MASK				0x0002
+#define MADERA_IN1L_ENA_SHIFT				     1
+#define MADERA_IN1L_ENA_WIDTH				     1
+#define MADERA_IN1R_ENA					0x0001
+#define MADERA_IN1R_ENA_MASK				0x0001
+#define MADERA_IN1R_ENA_SHIFT				     0
+#define MADERA_IN1R_ENA_WIDTH				     1
+
+/* (0x0308)  Input_Rate */
+#define MADERA_IN_RATE_MASK				0xF800
+#define MADERA_IN_RATE_SHIFT				    11
+#define MADERA_IN_RATE_WIDTH				     5
+#define MADERA_IN_MODE_MASK				0x0400
+#define MADERA_IN_MODE_SHIFT				    10
+#define MADERA_IN_MODE_WIDTH				     1
+
+/* (0x0309)  Input_Volume_Ramp */
+#define MADERA_IN_VD_RAMP_MASK				0x0070
+#define MADERA_IN_VD_RAMP_SHIFT				     4
+#define MADERA_IN_VD_RAMP_WIDTH				     3
+#define MADERA_IN_VI_RAMP_MASK				0x0007
+#define MADERA_IN_VI_RAMP_SHIFT				     0
+#define MADERA_IN_VI_RAMP_WIDTH				     3
+
+/* (0x030C)  HPF_Control */
+#define MADERA_IN_HPF_CUT_MASK				0x0007
+#define MADERA_IN_HPF_CUT_SHIFT				     0
+#define MADERA_IN_HPF_CUT_WIDTH				     3
+
+/* (0x0310)  IN1L_Control */
+#define MADERA_IN1L_HPF_MASK				0x8000
+#define MADERA_IN1L_HPF_SHIFT				    15
+#define MADERA_IN1L_HPF_WIDTH				     1
+#define MADERA_IN1_DMIC_SUP_MASK			0x1800
+#define MADERA_IN1_DMIC_SUP_SHIFT			    11
+#define MADERA_IN1_DMIC_SUP_WIDTH			     2
+#define MADERA_IN1_MODE_MASK				0x0400
+#define MADERA_IN1_MODE_SHIFT				    10
+#define MADERA_IN1_MODE_WIDTH				     1
+#define MADERA_IN1L_PGA_VOL_MASK			0x00FE
+#define MADERA_IN1L_PGA_VOL_SHIFT			     1
+#define MADERA_IN1L_PGA_VOL_WIDTH			     7
+
+/* (0x0311)  ADC_Digital_Volume_1L */
+#define MADERA_IN1L_SRC_MASK				0x4000
+#define MADERA_IN1L_SRC_SHIFT				    14
+#define MADERA_IN1L_SRC_WIDTH				     1
+#define MADERA_IN1L_SRC_SE_MASK				0x2000
+#define MADERA_IN1L_SRC_SE_SHIFT			    13
+#define MADERA_IN1L_SRC_SE_WIDTH			     1
+#define MADERA_IN1L_LP_MODE				0x0800
+#define MADERA_IN1L_LP_MODE_MASK			0x0800
+#define MADERA_IN1L_LP_MODE_SHIFT			    11
+#define MADERA_IN1L_LP_MODE_WIDTH			     1
+#define MADERA_IN_VU					0x0200
+#define MADERA_IN_VU_MASK				0x0200
+#define MADERA_IN_VU_SHIFT				     9
+#define MADERA_IN_VU_WIDTH				     1
+#define MADERA_IN1L_MUTE				0x0100
+#define MADERA_IN1L_MUTE_MASK				0x0100
+#define MADERA_IN1L_MUTE_SHIFT				     8
+#define MADERA_IN1L_MUTE_WIDTH				     1
+#define MADERA_IN1L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN1L_DIG_VOL_SHIFT			     0
+#define MADERA_IN1L_DIG_VOL_WIDTH			     8
+
+/* (0x0312)  DMIC1L_Control */
+#define MADERA_IN1_OSR_MASK				0x0700
+#define MADERA_IN1_OSR_SHIFT				     8
+#define MADERA_IN1_OSR_WIDTH				     3
+
+/* (0x0313)  IN1L_Rate_Control */
+#define MADERA_IN1L_RATE_MASK				0xF800
+#define MADERA_IN1L_RATE_SHIFT				    11
+#define MADERA_IN1L_RATE_WIDTH				     5
+
+/* (0x0314)  IN1R_Control */
+#define MADERA_IN1R_HPF_MASK				0x8000
+#define MADERA_IN1R_HPF_SHIFT				    15
+#define MADERA_IN1R_HPF_WIDTH				     1
+#define MADERA_IN1R_PGA_VOL_MASK			0x00FE
+#define MADERA_IN1R_PGA_VOL_SHIFT			     1
+#define MADERA_IN1R_PGA_VOL_WIDTH			     7
+#define MADERA_IN1_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN1_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN1_DMICCLK_SRC_WIDTH			     2
+
+/* (0x0315)  ADC_Digital_Volume_1R */
+#define MADERA_IN1R_SRC_MASK				0x4000
+#define MADERA_IN1R_SRC_SHIFT				    14
+#define MADERA_IN1R_SRC_WIDTH				     1
+#define MADERA_IN1R_SRC_SE_MASK				0x2000
+#define MADERA_IN1R_SRC_SE_SHIFT			    13
+#define MADERA_IN1R_SRC_SE_WIDTH			     1
+#define MADERA_IN1R_LP_MODE				0x0800
+#define MADERA_IN1R_LP_MODE_MASK			0x0800
+#define MADERA_IN1R_LP_MODE_SHIFT			    11
+#define MADERA_IN1R_LP_MODE_WIDTH			     1
+#define MADERA_IN1R_MUTE				0x0100
+#define MADERA_IN1R_MUTE_MASK				0x0100
+#define MADERA_IN1R_MUTE_SHIFT				     8
+#define MADERA_IN1R_MUTE_WIDTH				     1
+#define MADERA_IN1R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN1R_DIG_VOL_SHIFT			     0
+#define MADERA_IN1R_DIG_VOL_WIDTH			     8
+
+/* (0x0317)  IN1R_Rate_Control */
+#define MADERA_IN1R_RATE_MASK				0xF800
+#define MADERA_IN1R_RATE_SHIFT				    11
+#define MADERA_IN1R_RATE_WIDTH				     5
+
+/* (0x0318)  IN2L_Control */
+#define MADERA_IN2L_HPF_MASK				0x8000
+#define MADERA_IN2L_HPF_SHIFT				    15
+#define MADERA_IN2L_HPF_WIDTH				     1
+#define MADERA_IN2_DMIC_SUP_MASK			0x1800
+#define MADERA_IN2_DMIC_SUP_SHIFT			    11
+#define MADERA_IN2_DMIC_SUP_WIDTH			     2
+#define MADERA_IN2_MODE_MASK				0x0400
+#define MADERA_IN2_MODE_SHIFT				    10
+#define MADERA_IN2_MODE_WIDTH				     1
+#define MADERA_IN2L_PGA_VOL_MASK			0x00FE
+#define MADERA_IN2L_PGA_VOL_SHIFT			     1
+#define MADERA_IN2L_PGA_VOL_WIDTH			     7
+
+/* (0x0319)  ADC_Digital_Volume_2L */
+#define MADERA_IN2L_SRC_MASK				0x4000
+#define MADERA_IN2L_SRC_SHIFT				    14
+#define MADERA_IN2L_SRC_WIDTH				     1
+#define MADERA_IN2L_SRC_SE_MASK				0x2000
+#define MADERA_IN2L_SRC_SE_SHIFT			    13
+#define MADERA_IN2L_SRC_SE_WIDTH			     1
+#define MADERA_IN2L_LP_MODE				0x0800
+#define MADERA_IN2L_LP_MODE_MASK			0x0800
+#define MADERA_IN2L_LP_MODE_SHIFT			    11
+#define MADERA_IN2L_LP_MODE_WIDTH			     1
+#define MADERA_IN2L_MUTE				0x0100
+#define MADERA_IN2L_MUTE_MASK				0x0100
+#define MADERA_IN2L_MUTE_SHIFT				     8
+#define MADERA_IN2L_MUTE_WIDTH				     1
+#define MADERA_IN2L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN2L_DIG_VOL_SHIFT			     0
+#define MADERA_IN2L_DIG_VOL_WIDTH			     8
+
+/* (0x031A)  DMIC2L_Control */
+#define MADERA_IN2_OSR_MASK				0x0700
+#define MADERA_IN2_OSR_SHIFT				     8
+#define MADERA_IN2_OSR_WIDTH				     3
+
+/* (0x031C)  IN2R_Control */
+#define MADERA_IN2R_HPF_MASK				0x8000
+#define MADERA_IN2R_HPF_SHIFT				    15
+#define MADERA_IN2R_HPF_WIDTH				     1
+#define MADERA_IN2R_PGA_VOL_MASK			0x00FE
+#define MADERA_IN2R_PGA_VOL_SHIFT			     1
+#define MADERA_IN2R_PGA_VOL_WIDTH			     7
+#define MADERA_IN2_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN2_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN2_DMICCLK_SRC_WIDTH			     2
+
+/* (0x031D)  ADC_Digital_Volume_2R */
+#define MADERA_IN2R_SRC_MASK				0x4000
+#define MADERA_IN2R_SRC_SHIFT				    14
+#define MADERA_IN2R_SRC_WIDTH				     1
+#define MADERA_IN2R_SRC_SE_MASK				0x2000
+#define MADERA_IN2R_SRC_SE_SHIFT			    13
+#define MADERA_IN2R_SRC_SE_WIDTH			     1
+#define MADERA_IN2R_LP_MODE				0x0800
+#define MADERA_IN2R_LP_MODE_MASK			0x0800
+#define MADERA_IN2R_LP_MODE_SHIFT			    11
+#define MADERA_IN2R_LP_MODE_WIDTH			     1
+#define MADERA_IN2R_MUTE				0x0100
+#define MADERA_IN2R_MUTE_MASK				0x0100
+#define MADERA_IN2R_MUTE_SHIFT				     8
+#define MADERA_IN2R_MUTE_WIDTH				     1
+#define MADERA_IN2R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN2R_DIG_VOL_SHIFT			     0
+#define MADERA_IN2R_DIG_VOL_WIDTH			     8
+
+/* (0x0320)  IN3L_Control */
+#define MADERA_IN3L_HPF_MASK				0x8000
+#define MADERA_IN3L_HPF_SHIFT				    15
+#define MADERA_IN3L_HPF_WIDTH				     1
+#define MADERA_IN3_DMIC_SUP_MASK			0x1800
+#define MADERA_IN3_DMIC_SUP_SHIFT			    11
+#define MADERA_IN3_DMIC_SUP_WIDTH			     2
+#define MADERA_IN3_MODE_MASK				0x0400
+#define MADERA_IN3_MODE_SHIFT				    10
+#define MADERA_IN3_MODE_WIDTH				     1
+#define MADERA_IN3L_PGA_VOL_MASK			0x00FE
+#define MADERA_IN3L_PGA_VOL_SHIFT			     1
+#define MADERA_IN3L_PGA_VOL_WIDTH			     7
+
+/* (0x0321)  ADC_Digital_Volume_3L */
+#define MADERA_IN3L_MUTE				0x0100
+#define MADERA_IN3L_MUTE_MASK				0x0100
+#define MADERA_IN3L_MUTE_SHIFT				     8
+#define MADERA_IN3L_MUTE_WIDTH				     1
+#define MADERA_IN3L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN3L_DIG_VOL_SHIFT			     0
+#define MADERA_IN3L_DIG_VOL_WIDTH			     8
+
+/* (0x0322)  DMIC3L_Control */
+#define MADERA_IN3_OSR_MASK				0x0700
+#define MADERA_IN3_OSR_SHIFT				     8
+#define MADERA_IN3_OSR_WIDTH				     3
+
+/* (0x0324)  IN3R_Control */
+#define MADERA_IN3R_HPF_MASK				0x8000
+#define MADERA_IN3R_HPF_SHIFT				    15
+#define MADERA_IN3R_HPF_WIDTH				     1
+#define MADERA_IN3R_PGA_VOL_MASK			0x00FE
+#define MADERA_IN3R_PGA_VOL_SHIFT			     1
+#define MADERA_IN3R_PGA_VOL_WIDTH			     7
+#define MADERA_IN3_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN3_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN3_DMICCLK_SRC_WIDTH			     2
+
+/* (0x0325)  ADC_Digital_Volume_3R */
+#define MADERA_IN3R_MUTE				0x0100
+#define MADERA_IN3R_MUTE_MASK				0x0100
+#define MADERA_IN3R_MUTE_SHIFT				     8
+#define MADERA_IN3R_MUTE_WIDTH				     1
+#define MADERA_IN3R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN3R_DIG_VOL_SHIFT			     0
+#define MADERA_IN3R_DIG_VOL_WIDTH			     8
+
+/* (0x0328)  IN4L_Control */
+#define MADERA_IN4L_HPF_MASK				0x8000
+#define MADERA_IN4L_HPF_SHIFT				    15
+#define MADERA_IN4L_HPF_WIDTH				     1
+#define MADERA_IN4_DMIC_SUP_MASK			0x1800
+#define MADERA_IN4_DMIC_SUP_SHIFT			    11
+#define MADERA_IN4_DMIC_SUP_WIDTH			     2
+
+/* (0x0329)  ADC_Digital_Volume_4L */
+#define MADERA_IN4L_MUTE				0x0100
+#define MADERA_IN4L_MUTE_MASK				0x0100
+#define MADERA_IN4L_MUTE_SHIFT				     8
+#define MADERA_IN4L_MUTE_WIDTH				     1
+#define MADERA_IN4L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN4L_DIG_VOL_SHIFT			     0
+#define MADERA_IN4L_DIG_VOL_WIDTH			     8
+
+/* (0x032A)  DMIC4L_Control */
+#define MADERA_IN4_OSR_MASK				0x0700
+#define MADERA_IN4_OSR_SHIFT				     8
+#define MADERA_IN4_OSR_WIDTH				     3
+
+/* (0x032C)  IN4R_Control */
+#define MADERA_IN4R_HPF_MASK				0x8000
+#define MADERA_IN4R_HPF_SHIFT				    15
+#define MADERA_IN4R_HPF_WIDTH				     1
+#define MADERA_IN4_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN4_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN4_DMICCLK_SRC_WIDTH			     2
+
+/* (0x032D)  ADC_Digital_Volume_4R */
+#define MADERA_IN4R_MUTE				0x0100
+#define MADERA_IN4R_MUTE_MASK				0x0100
+#define MADERA_IN4R_MUTE_SHIFT				     8
+#define MADERA_IN4R_MUTE_WIDTH				     1
+#define MADERA_IN4R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN4R_DIG_VOL_SHIFT			     0
+#define MADERA_IN4R_DIG_VOL_WIDTH			     8
+
+/* (0x0330)  IN5L_Control */
+#define MADERA_IN5L_HPF_MASK				0x8000
+#define MADERA_IN5L_HPF_SHIFT				    15
+#define MADERA_IN5L_HPF_WIDTH				     1
+#define MADERA_IN5_DMIC_SUP_MASK			0x1800
+#define MADERA_IN5_DMIC_SUP_SHIFT			    11
+#define MADERA_IN5_DMIC_SUP_WIDTH			     2
+
+/* (0x0331)  ADC_Digital_Volume_5L */
+#define MADERA_IN5L_MUTE				0x0100
+#define MADERA_IN5L_MUTE_MASK				0x0100
+#define MADERA_IN5L_MUTE_SHIFT				     8
+#define MADERA_IN5L_MUTE_WIDTH				     1
+#define MADERA_IN5L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN5L_DIG_VOL_SHIFT			     0
+#define MADERA_IN5L_DIG_VOL_WIDTH			     8
+
+/* (0x0332)  DMIC5L_Control */
+#define MADERA_IN5_OSR_MASK				0x0700
+#define MADERA_IN5_OSR_SHIFT				     8
+#define MADERA_IN5_OSR_WIDTH				     3
+
+/* (0x0334)  IN5R_Control */
+#define MADERA_IN5R_HPF_MASK				0x8000
+#define MADERA_IN5R_HPF_SHIFT				    15
+#define MADERA_IN5R_HPF_WIDTH				     1
+#define MADERA_IN5_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN5_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN5_DMICCLK_SRC_WIDTH			     2
+
+/* (0x0335)  ADC_Digital_Volume_5R */
+#define MADERA_IN5R_MUTE				0x0100
+#define MADERA_IN5R_MUTE_MASK				0x0100
+#define MADERA_IN5R_MUTE_SHIFT				     8
+#define MADERA_IN5R_MUTE_WIDTH				     1
+#define MADERA_IN5R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN5R_DIG_VOL_SHIFT			     0
+#define MADERA_IN5R_DIG_VOL_WIDTH			     8
+
+/* (0x0338)  IN6L_Control */
+#define MADERA_IN6L_HPF_MASK				0x8000
+#define MADERA_IN6L_HPF_SHIFT				    15
+#define MADERA_IN6L_HPF_WIDTH				     1
+#define MADERA_IN6_DMIC_SUP_MASK			0x1800
+#define MADERA_IN6_DMIC_SUP_SHIFT			    11
+#define MADERA_IN6_DMIC_SUP_WIDTH			     2
+
+/* (0x0339)  ADC_Digital_Volume_6L */
+#define MADERA_IN6L_MUTE				0x0100
+#define MADERA_IN6L_MUTE_MASK				0x0100
+#define MADERA_IN6L_MUTE_SHIFT				     8
+#define MADERA_IN6L_MUTE_WIDTH				     1
+#define MADERA_IN6L_DIG_VOL_MASK			0x00FF
+#define MADERA_IN6L_DIG_VOL_SHIFT			     0
+#define MADERA_IN6L_DIG_VOL_WIDTH			     8
+
+/* (0x033A)  DMIC6L_Control */
+#define MADERA_IN6_OSR_MASK				0x0700
+#define MADERA_IN6_OSR_SHIFT				     8
+#define MADERA_IN6_OSR_WIDTH				     3
+
+/* (0x033C)  IN6R_Control */
+#define MADERA_IN6R_HPF_MASK				0x8000
+#define MADERA_IN6R_HPF_SHIFT				    15
+#define MADERA_IN6R_HPF_WIDTH				     1
+
+/* (0x033D)  ADC_Digital_Volume_6R */
+#define MADERA_IN6R_MUTE				0x0100
+#define MADERA_IN6R_MUTE_MASK				0x0100
+#define MADERA_IN6R_MUTE_SHIFT				     8
+#define MADERA_IN6R_MUTE_WIDTH				     1
+#define MADERA_IN6R_DIG_VOL_MASK			0x00FF
+#define MADERA_IN6R_DIG_VOL_SHIFT			     0
+#define MADERA_IN6R_DIG_VOL_WIDTH			     8
+
+/* (0x033E)  DMIC6R_Control */
+#define MADERA_IN6_DMICCLK_SRC_MASK			0x1800
+#define MADERA_IN6_DMICCLK_SRC_SHIFT			    11
+#define MADERA_IN6_DMICCLK_SRC_WIDTH			     2
+
+/* (0x0400)  Output_Enables_1 */
+#define MADERA_EP_SEL					0x8000
+#define MADERA_EP_SEL_MASK				0x8000
+#define MADERA_EP_SEL_SHIFT				    15
+#define MADERA_EP_SEL_WIDTH				     1
+#define MADERA_OUT6L_ENA				0x0800
+#define MADERA_OUT6L_ENA_MASK				0x0800
+#define MADERA_OUT6L_ENA_SHIFT				    11
+#define MADERA_OUT6L_ENA_WIDTH				     1
+#define MADERA_OUT6R_ENA				0x0400
+#define MADERA_OUT6R_ENA_MASK				0x0400
+#define MADERA_OUT6R_ENA_SHIFT				    10
+#define MADERA_OUT6R_ENA_WIDTH				     1
+#define MADERA_OUT5L_ENA				0x0200
+#define MADERA_OUT5L_ENA_MASK				0x0200
+#define MADERA_OUT5L_ENA_SHIFT				     9
+#define MADERA_OUT5L_ENA_WIDTH				     1
+#define MADERA_OUT5R_ENA				0x0100
+#define MADERA_OUT5R_ENA_MASK				0x0100
+#define MADERA_OUT5R_ENA_SHIFT				     8
+#define MADERA_OUT5R_ENA_WIDTH				     1
+#define MADERA_OUT4L_ENA				0x0080
+#define MADERA_OUT4L_ENA_MASK				0x0080
+#define MADERA_OUT4L_ENA_SHIFT				     7
+#define MADERA_OUT4L_ENA_WIDTH				     1
+#define MADERA_OUT4R_ENA				0x0040
+#define MADERA_OUT4R_ENA_MASK				0x0040
+#define MADERA_OUT4R_ENA_SHIFT				     6
+#define MADERA_OUT4R_ENA_WIDTH				     1
+#define MADERA_OUT3L_ENA				0x0020
+#define MADERA_OUT3L_ENA_MASK				0x0020
+#define MADERA_OUT3L_ENA_SHIFT				     5
+#define MADERA_OUT3L_ENA_WIDTH				     1
+#define MADERA_OUT3R_ENA				0x0010
+#define MADERA_OUT3R_ENA_MASK				0x0010
+#define MADERA_OUT3R_ENA_SHIFT				     4
+#define MADERA_OUT3R_ENA_WIDTH				     1
+#define MADERA_OUT2L_ENA				0x0008
+#define MADERA_OUT2L_ENA_MASK				0x0008
+#define MADERA_OUT2L_ENA_SHIFT				     3
+#define MADERA_OUT2L_ENA_WIDTH				     1
+#define MADERA_OUT2R_ENA				0x0004
+#define MADERA_OUT2R_ENA_MASK				0x0004
+#define MADERA_OUT2R_ENA_SHIFT				     2
+#define MADERA_OUT2R_ENA_WIDTH				     1
+#define MADERA_OUT1L_ENA				0x0002
+#define MADERA_OUT1L_ENA_MASK				0x0002
+#define MADERA_OUT1L_ENA_SHIFT				     1
+#define MADERA_OUT1L_ENA_WIDTH				     1
+#define MADERA_OUT1R_ENA				0x0001
+#define MADERA_OUT1R_ENA_MASK				0x0001
+#define MADERA_OUT1R_ENA_SHIFT				     0
+#define MADERA_OUT1R_ENA_WIDTH				     1
+
+/* (0x0409)  Output_Volume_Ramp */
+#define MADERA_OUT_VD_RAMP_MASK				0x0070
+#define MADERA_OUT_VD_RAMP_SHIFT			     4
+#define MADERA_OUT_VD_RAMP_WIDTH			     3
+#define MADERA_OUT_VI_RAMP_MASK				0x0007
+#define MADERA_OUT_VI_RAMP_SHIFT			     0
+#define MADERA_OUT_VI_RAMP_WIDTH			     3
+
+/* (0x0410)  Output_Path_Config_1L */
+#define MADERA_OUT1_MONO				0x1000
+#define MADERA_OUT1_MONO_MASK				0x1000
+#define MADERA_OUT1_MONO_SHIFT				    12
+#define MADERA_OUT1_MONO_WIDTH				     1
+#define MADERA_OUT1L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT1L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT1L_ANC_SRC_WIDTH			     2
+
+/* (0x0411)  DAC_Digital_Volume_1L */
+#define MADERA_OUT1L_VU					0x0200
+#define MADERA_OUT1L_VU_MASK				0x0200
+#define MADERA_OUT1L_VU_SHIFT				     9
+#define MADERA_OUT1L_VU_WIDTH				     1
+#define MADERA_OUT1L_MUTE				0x0100
+#define MADERA_OUT1L_MUTE_MASK				0x0100
+#define MADERA_OUT1L_MUTE_SHIFT				     8
+#define MADERA_OUT1L_MUTE_WIDTH				     1
+#define MADERA_OUT1L_VOL_MASK				0x00FF
+#define MADERA_OUT1L_VOL_SHIFT				     0
+#define MADERA_OUT1L_VOL_WIDTH				     8
+
+/* (0x0412)  Output_Path_Config_1 */
+#define MADERA_HP1_GND_SEL_MASK				0x0007
+#define MADERA_HP1_GND_SEL_SHIFT			     0
+#define MADERA_HP1_GND_SEL_WIDTH			     3
+
+/* (0x0414)  Output_Path_Config_1R */
+#define MADERA_OUT1R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT1R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT1R_ANC_SRC_WIDTH			     2
+
+/* (0x0415)  DAC_Digital_Volume_1R */
+#define MADERA_OUT1R_MUTE				0x0100
+#define MADERA_OUT1R_MUTE_MASK				0x0100
+#define MADERA_OUT1R_MUTE_SHIFT				     8
+#define MADERA_OUT1R_MUTE_WIDTH				     1
+#define MADERA_OUT1R_VOL_MASK				0x00FF
+#define MADERA_OUT1R_VOL_SHIFT				     0
+#define MADERA_OUT1R_VOL_WIDTH				     8
+
+/* (0x0418)  Output_Path_Config_2L */
+#define MADERA_OUT2L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT2L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT2L_ANC_SRC_WIDTH			     2
+
+/* (0x0419)  DAC_Digital_Volume_2L */
+#define MADERA_OUT2L_MUTE				0x0100
+#define MADERA_OUT2L_MUTE_MASK				0x0100
+#define MADERA_OUT2L_MUTE_SHIFT				     8
+#define MADERA_OUT2L_MUTE_WIDTH				     1
+#define MADERA_OUT2L_VOL_MASK				0x00FF
+#define MADERA_OUT2L_VOL_SHIFT				     0
+#define MADERA_OUT2L_VOL_WIDTH				     8
+
+/* (0x041A)  Output_Path_Config_2 */
+#define MADERA_HP2_GND_SEL_MASK				0x0007
+#define MADERA_HP2_GND_SEL_SHIFT			     0
+#define MADERA_HP2_GND_SEL_WIDTH			     3
+
+/* (0x041C)  Output_Path_Config_2R */
+#define MADERA_OUT2R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT2R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT2R_ANC_SRC_WIDTH			     2
+
+/* (0x041D)  DAC_Digital_Volume_2R */
+#define MADERA_OUT2R_MUTE				0x0100
+#define MADERA_OUT2R_MUTE_MASK				0x0100
+#define MADERA_OUT2R_MUTE_SHIFT				     8
+#define MADERA_OUT2R_MUTE_WIDTH				     1
+#define MADERA_OUT2R_VOL_MASK				0x00FF
+#define MADERA_OUT2R_VOL_SHIFT				     0
+#define MADERA_OUT2R_VOL_WIDTH				     8
+
+/* (0x0420)  Output_Path_Config_3L */
+#define MADERA_OUT3L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT3L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT3L_ANC_SRC_WIDTH			     2
+
+/* (0x0421)  DAC_Digital_Volume_3L */
+#define MADERA_OUT3L_MUTE				0x0100
+#define MADERA_OUT3L_MUTE_MASK				0x0100
+#define MADERA_OUT3L_MUTE_SHIFT				     8
+#define MADERA_OUT3L_MUTE_WIDTH				     1
+#define MADERA_OUT3L_VOL_MASK				0x00FF
+#define MADERA_OUT3L_VOL_SHIFT				     0
+#define MADERA_OUT3L_VOL_WIDTH				     8
+
+/* (0x0424)  Output_Path_Config_3R */
+#define MADERA_OUT3R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT3R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT3R_ANC_SRC_WIDTH			     2
+
+/* (0x0425)  DAC_Digital_Volume_3R */
+#define MADERA_OUT3R_MUTE				0x0100
+#define MADERA_OUT3R_MUTE_MASK				0x0100
+#define MADERA_OUT3R_MUTE_SHIFT				     8
+#define MADERA_OUT3R_MUTE_WIDTH				     1
+#define MADERA_OUT3R_VOL_MASK				0x00FF
+#define MADERA_OUT3R_VOL_SHIFT				     0
+#define MADERA_OUT3R_VOL_WIDTH				     8
+
+/* (0x0428)  Output_Path_Config_4L */
+#define MADERA_OUT4L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT4L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT4L_ANC_SRC_WIDTH			     2
+
+/* (0x0429)  DAC_Digital_Volume_4L */
+#define MADERA_OUT4L_MUTE				0x0100
+#define MADERA_OUT4L_MUTE_MASK				0x0100
+#define MADERA_OUT4L_MUTE_SHIFT				     8
+#define MADERA_OUT4L_MUTE_WIDTH				     1
+#define MADERA_OUT4L_VOL_MASK				0x00FF
+#define MADERA_OUT4L_VOL_SHIFT				     0
+#define MADERA_OUT4L_VOL_WIDTH				     8
+
+/* (0x042C)  Output_Path_Config_4R */
+#define MADERA_OUT4R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT4R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT4R_ANC_SRC_WIDTH			     2
+
+/* (0x042D)  DAC_Digital_Volume_4R */
+#define MADERA_OUT4R_MUTE				0x0100
+#define MADERA_OUT4R_MUTE_MASK				0x0100
+#define MADERA_OUT4R_MUTE_SHIFT				     8
+#define MADERA_OUT4R_MUTE_WIDTH				     1
+#define MADERA_OUT4R_VOL_MASK				0x00FF
+#define MADERA_OUT4R_VOL_SHIFT				     0
+#define MADERA_OUT4R_VOL_WIDTH				     8
+
+/* (0x0430)  Output_Path_Config_5L */
+#define MADERA_OUT5_OSR					0x2000
+#define MADERA_OUT5_OSR_MASK				0x2000
+#define MADERA_OUT5_OSR_SHIFT				    13
+#define MADERA_OUT5_OSR_WIDTH				     1
+#define MADERA_OUT5L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT5L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT5L_ANC_SRC_WIDTH			     2
+
+/* (0x0431)  DAC_Digital_Volume_5L */
+#define MADERA_OUT5L_MUTE				0x0100
+#define MADERA_OUT5L_MUTE_MASK				0x0100
+#define MADERA_OUT5L_MUTE_SHIFT				     8
+#define MADERA_OUT5L_MUTE_WIDTH				     1
+#define MADERA_OUT5L_VOL_MASK				0x00FF
+#define MADERA_OUT5L_VOL_SHIFT				     0
+#define MADERA_OUT5L_VOL_WIDTH				     8
+
+/* (0x0434)  Output_Path_Config_5R */
+#define MADERA_OUT5R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT5R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT5R_ANC_SRC_WIDTH			     2
+
+/* (0x0435)  DAC_Digital_Volume_5R */
+#define MADERA_OUT5R_MUTE				0x0100
+#define MADERA_OUT5R_MUTE_MASK				0x0100
+#define MADERA_OUT5R_MUTE_SHIFT				     8
+#define MADERA_OUT5R_MUTE_WIDTH				     1
+#define MADERA_OUT5R_VOL_MASK				0x00FF
+#define MADERA_OUT5R_VOL_SHIFT				     0
+#define MADERA_OUT5R_VOL_WIDTH				     8
+
+/* (0x0438)  Output_Path_Config_6L */
+#define MADERA_OUT6_OSR					0x2000
+#define MADERA_OUT6_OSR_MASK				0x2000
+#define MADERA_OUT6_OSR_SHIFT				    13
+#define MADERA_OUT6_OSR_WIDTH				     1
+#define MADERA_OUT6L_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT6L_ANC_SRC_SHIFT			    10
+#define MADERA_OUT6L_ANC_SRC_WIDTH			     2
+
+/* (0x0439)  DAC_Digital_Volume_6L */
+#define MADERA_OUT6L_MUTE				0x0100
+#define MADERA_OUT6L_MUTE_MASK				0x0100
+#define MADERA_OUT6L_MUTE_SHIFT				     8
+#define MADERA_OUT6L_MUTE_WIDTH				     1
+#define MADERA_OUT6L_VOL_MASK				0x00FF
+#define MADERA_OUT6L_VOL_SHIFT				     0
+#define MADERA_OUT6L_VOL_WIDTH				     8
+
+/* (0x043C)  Output_Path_Config_6R */
+#define MADERA_OUT6R_ANC_SRC_MASK			0x0C00
+#define MADERA_OUT6R_ANC_SRC_SHIFT			    10
+#define MADERA_OUT6R_ANC_SRC_WIDTH			     2
+
+/* (0x043D)  DAC_Digital_Volume_6R */
+#define MADERA_OUT6R_MUTE				0x0100
+#define MADERA_OUT6R_MUTE_MASK				0x0100
+#define MADERA_OUT6R_MUTE_SHIFT				     8
+#define MADERA_OUT6R_MUTE_WIDTH				     1
+#define MADERA_OUT6R_VOL_MASK				0x00FF
+#define MADERA_OUT6R_VOL_SHIFT				     0
+#define MADERA_OUT6R_VOL_WIDTH				     8
+
+/* (0x0450) - DAC AEC Control 1 */
+#define MADERA_AEC1_LOOPBACK_SRC_MASK			0x003C
+#define MADERA_AEC1_LOOPBACK_SRC_SHIFT			     2
+#define MADERA_AEC1_LOOPBACK_SRC_WIDTH			     4
+#define MADERA_AEC1_ENA_STS				0x0002
+#define MADERA_AEC1_ENA_STS_MASK			0x0002
+#define MADERA_AEC1_ENA_STS_SHIFT			     1
+#define MADERA_AEC1_ENA_STS_WIDTH			     1
+#define MADERA_AEC1_LOOPBACK_ENA			0x0001
+#define MADERA_AEC1_LOOPBACK_ENA_MASK			0x0001
+#define MADERA_AEC1_LOOPBACK_ENA_SHIFT			     0
+#define MADERA_AEC1_LOOPBACK_ENA_WIDTH			     1
+
+/* (0x0451)  DAC_AEC_Control_2 */
+#define MADERA_AEC2_LOOPBACK_SRC_MASK			0x003C
+#define MADERA_AEC2_LOOPBACK_SRC_SHIFT			     2
+#define MADERA_AEC2_LOOPBACK_SRC_WIDTH			     4
+#define MADERA_AEC2_ENA_STS				0x0002
+#define MADERA_AEC2_ENA_STS_MASK			0x0002
+#define MADERA_AEC2_ENA_STS_SHIFT			     1
+#define MADERA_AEC2_ENA_STS_WIDTH			     1
+#define MADERA_AEC2_LOOPBACK_ENA			0x0001
+#define MADERA_AEC2_LOOPBACK_ENA_MASK			0x0001
+#define MADERA_AEC2_LOOPBACK_ENA_SHIFT			     0
+#define MADERA_AEC2_LOOPBACK_ENA_WIDTH			     1
+
+/* (0x0458)  Noise_Gate_Control */
+#define MADERA_NGATE_HOLD_MASK				0x0030
+#define MADERA_NGATE_HOLD_SHIFT				     4
+#define MADERA_NGATE_HOLD_WIDTH				     2
+#define MADERA_NGATE_THR_MASK				0x000E
+#define MADERA_NGATE_THR_SHIFT				     1
+#define MADERA_NGATE_THR_WIDTH				     3
+#define MADERA_NGATE_ENA				0x0001
+#define MADERA_NGATE_ENA_MASK				0x0001
+#define MADERA_NGATE_ENA_SHIFT				     0
+#define MADERA_NGATE_ENA_WIDTH				     1
+
+/* (0x0490)  PDM_SPK1_CTRL_1 */
+#define MADERA_SPK1R_MUTE				0x2000
+#define MADERA_SPK1R_MUTE_MASK				0x2000
+#define MADERA_SPK1R_MUTE_SHIFT				    13
+#define MADERA_SPK1R_MUTE_WIDTH				     1
+#define MADERA_SPK1L_MUTE				0x1000
+#define MADERA_SPK1L_MUTE_MASK				0x1000
+#define MADERA_SPK1L_MUTE_SHIFT				    12
+#define MADERA_SPK1L_MUTE_WIDTH				     1
+#define MADERA_SPK1_MUTE_ENDIAN				0x0100
+#define MADERA_SPK1_MUTE_ENDIAN_MASK			0x0100
+#define MADERA_SPK1_MUTE_ENDIAN_SHIFT			     8
+#define MADERA_SPK1_MUTE_ENDIAN_WIDTH			     1
+#define MADERA_SPK1_MUTE_SEQ1_MASK			0x00FF
+#define MADERA_SPK1_MUTE_SEQ1_SHIFT			     0
+#define MADERA_SPK1_MUTE_SEQ1_WIDTH			     8
+
+/* (0x0491)  PDM_SPK1_CTRL_2 */
+#define MADERA_SPK1_FMT					0x0001
+#define MADERA_SPK1_FMT_MASK				0x0001
+#define MADERA_SPK1_FMT_SHIFT				     0
+#define MADERA_SPK1_FMT_WIDTH				     1
+
+/* (0x0492)  PDM_SPK2_CTRL_1 */
+#define MADERA_SPK2R_MUTE				0x2000
+#define MADERA_SPK2R_MUTE_MASK				0x2000
+#define MADERA_SPK2R_MUTE_SHIFT				    13
+#define MADERA_SPK2R_MUTE_WIDTH				     1
+#define MADERA_SPK2L_MUTE				0x1000
+#define MADERA_SPK2L_MUTE_MASK				0x1000
+#define MADERA_SPK2L_MUTE_SHIFT				    12
+#define MADERA_SPK2L_MUTE_WIDTH				     1
+
+/* (0x04A0) - HP1 Short Circuit Ctrl */
+#define MADERA_HP1_SC_ENA				0x1000
+#define MADERA_HP1_SC_ENA_MASK				0x1000
+#define MADERA_HP1_SC_ENA_SHIFT				    12
+#define MADERA_HP1_SC_ENA_WIDTH				     1
+
+/* (0x04A1) - HP2 Short Circuit Ctrl */
+#define MADERA_HP2_SC_ENA				0x1000
+#define MADERA_HP2_SC_ENA_MASK				0x1000
+#define MADERA_HP2_SC_ENA_SHIFT				    12
+#define MADERA_HP2_SC_ENA_WIDTH				     1
+
+/* (0x04A2) - HP3 Short Circuit Ctrl */
+#define MADERA_HP3_SC_ENA				0x1000
+#define MADERA_HP3_SC_ENA_MASK				0x1000
+#define MADERA_HP3_SC_ENA_SHIFT				    12
+#define MADERA_HP3_SC_ENA_WIDTH				     1
+
+/* (0x04A8) - HP_Test_Ctrl_5 */
+#define MADERA_HP1L_ONEFLT				0x0100
+#define MADERA_HP1L_ONEFLT_MASK				0x0100
+#define MADERA_HP1L_ONEFLT_SHIFT			     8
+#define MADERA_HP1L_ONEFLT_WIDTH			     1
+
+/* (0x04A9) - HP_Test_Ctrl_6 */
+#define MADERA_HP1R_ONEFLT				0x0100
+#define MADERA_HP1R_ONEFLT_MASK				0x0100
+#define MADERA_HP1R_ONEFLT_SHIFT			     8
+#define MADERA_HP1R_ONEFLT_WIDTH			     1
+
+/* (0x0500)  AIF1_BCLK_Ctrl */
+#define MADERA_AIF1_BCLK_INV				0x0080
+#define MADERA_AIF1_BCLK_INV_MASK			0x0080
+#define MADERA_AIF1_BCLK_INV_SHIFT			     7
+#define MADERA_AIF1_BCLK_INV_WIDTH			     1
+#define MADERA_AIF1_BCLK_MSTR				0x0020
+#define MADERA_AIF1_BCLK_MSTR_MASK			0x0020
+#define MADERA_AIF1_BCLK_MSTR_SHIFT			     5
+#define MADERA_AIF1_BCLK_MSTR_WIDTH			     1
+#define MADERA_AIF1_BCLK_FREQ_MASK			0x001F
+#define MADERA_AIF1_BCLK_FREQ_SHIFT			     0
+#define MADERA_AIF1_BCLK_FREQ_WIDTH			     5
+
+/* (0x0501)  AIF1_Tx_Pin_Ctrl */
+#define MADERA_AIF1TX_LRCLK_SRC				0x0008
+#define MADERA_AIF1TX_LRCLK_SRC_MASK			0x0008
+#define MADERA_AIF1TX_LRCLK_SRC_SHIFT			     3
+#define MADERA_AIF1TX_LRCLK_SRC_WIDTH			     1
+#define MADERA_AIF1TX_LRCLK_INV				0x0004
+#define MADERA_AIF1TX_LRCLK_INV_MASK			0x0004
+#define MADERA_AIF1TX_LRCLK_INV_SHIFT			     2
+#define MADERA_AIF1TX_LRCLK_INV_WIDTH			     1
+#define MADERA_AIF1TX_LRCLK_MSTR			0x0001
+#define MADERA_AIF1TX_LRCLK_MSTR_MASK			0x0001
+#define MADERA_AIF1TX_LRCLK_MSTR_SHIFT			     0
+#define MADERA_AIF1TX_LRCLK_MSTR_WIDTH			     1
+
+/* (0x0502)  AIF1_Rx_Pin_Ctrl */
+#define MADERA_AIF1RX_LRCLK_INV				0x0004
+#define MADERA_AIF1RX_LRCLK_INV_MASK			0x0004
+#define MADERA_AIF1RX_LRCLK_INV_SHIFT			     2
+#define MADERA_AIF1RX_LRCLK_INV_WIDTH			     1
+#define MADERA_AIF1RX_LRCLK_FRC				0x0002
+#define MADERA_AIF1RX_LRCLK_FRC_MASK			0x0002
+#define MADERA_AIF1RX_LRCLK_FRC_SHIFT			     1
+#define MADERA_AIF1RX_LRCLK_FRC_WIDTH			     1
+#define MADERA_AIF1RX_LRCLK_MSTR			0x0001
+#define MADERA_AIF1RX_LRCLK_MSTR_MASK			0x0001
+#define MADERA_AIF1RX_LRCLK_MSTR_SHIFT			     0
+#define MADERA_AIF1RX_LRCLK_MSTR_WIDTH			     1
+
+/* (0x0503)  AIF1_Rate_Ctrl */
+#define MADERA_AIF1_RATE_MASK				0xF800
+#define MADERA_AIF1_RATE_SHIFT				    11
+#define MADERA_AIF1_RATE_WIDTH				     5
+#define MADERA_AIF1_TRI					0x0040
+#define MADERA_AIF1_TRI_MASK				0x0040
+#define MADERA_AIF1_TRI_SHIFT				     6
+#define MADERA_AIF1_TRI_WIDTH				     1
+
+/* (0x0504)  AIF1_Format */
+#define MADERA_AIF1_FMT_MASK				0x0007
+#define MADERA_AIF1_FMT_SHIFT				     0
+#define MADERA_AIF1_FMT_WIDTH				     3
+
+/* (0x0506)  AIF1_Rx_BCLK_Rate */
+#define MADERA_AIF1RX_BCPF_MASK				0x1FFF
+#define MADERA_AIF1RX_BCPF_SHIFT			     0
+#define MADERA_AIF1RX_BCPF_WIDTH			    13
+
+/* (0x0507)  AIF1_Frame_Ctrl_1 */
+#define MADERA_AIF1TX_WL_MASK				0x3F00
+#define MADERA_AIF1TX_WL_SHIFT				     8
+#define MADERA_AIF1TX_WL_WIDTH				     6
+#define MADERA_AIF1TX_SLOT_LEN_MASK			0x00FF
+#define MADERA_AIF1TX_SLOT_LEN_SHIFT			     0
+#define MADERA_AIF1TX_SLOT_LEN_WIDTH			     8
+
+/* (0x0508)  AIF1_Frame_Ctrl_2 */
+#define MADERA_AIF1RX_WL_MASK				0x3F00
+#define MADERA_AIF1RX_WL_SHIFT				     8
+#define MADERA_AIF1RX_WL_WIDTH				     6
+#define MADERA_AIF1RX_SLOT_LEN_MASK			0x00FF
+#define MADERA_AIF1RX_SLOT_LEN_SHIFT			     0
+#define MADERA_AIF1RX_SLOT_LEN_WIDTH			     8
+
+/* (0x0509)  AIF1_Frame_Ctrl_3 */
+#define MADERA_AIF1TX1_SLOT_MASK			0x003F
+#define MADERA_AIF1TX1_SLOT_SHIFT			     0
+#define MADERA_AIF1TX1_SLOT_WIDTH			     6
+
+/* (0x0519)  AIF1_Tx_Enables */
+#define MADERA_AIF1TX8_ENA				0x0080
+#define MADERA_AIF1TX8_ENA_MASK				0x0080
+#define MADERA_AIF1TX8_ENA_SHIFT			     7
+#define MADERA_AIF1TX8_ENA_WIDTH			     1
+#define MADERA_AIF1TX7_ENA				0x0040
+#define MADERA_AIF1TX7_ENA_MASK				0x0040
+#define MADERA_AIF1TX7_ENA_SHIFT			     6
+#define MADERA_AIF1TX7_ENA_WIDTH			     1
+#define MADERA_AIF1TX6_ENA				0x0020
+#define MADERA_AIF1TX6_ENA_MASK				0x0020
+#define MADERA_AIF1TX6_ENA_SHIFT			     5
+#define MADERA_AIF1TX6_ENA_WIDTH			     1
+#define MADERA_AIF1TX5_ENA				0x0010
+#define MADERA_AIF1TX5_ENA_MASK				0x0010
+#define MADERA_AIF1TX5_ENA_SHIFT			     4
+#define MADERA_AIF1TX5_ENA_WIDTH			     1
+#define MADERA_AIF1TX4_ENA				0x0008
+#define MADERA_AIF1TX4_ENA_MASK				0x0008
+#define MADERA_AIF1TX4_ENA_SHIFT			     3
+#define MADERA_AIF1TX4_ENA_WIDTH			     1
+#define MADERA_AIF1TX3_ENA				0x0004
+#define MADERA_AIF1TX3_ENA_MASK				0x0004
+#define MADERA_AIF1TX3_ENA_SHIFT			     2
+#define MADERA_AIF1TX3_ENA_WIDTH			     1
+#define MADERA_AIF1TX2_ENA				0x0002
+#define MADERA_AIF1TX2_ENA_MASK				0x0002
+#define MADERA_AIF1TX2_ENA_SHIFT			     1
+#define MADERA_AIF1TX2_ENA_WIDTH			     1
+#define MADERA_AIF1TX1_ENA				0x0001
+#define MADERA_AIF1TX1_ENA_MASK				0x0001
+#define MADERA_AIF1TX1_ENA_SHIFT			     0
+#define MADERA_AIF1TX1_ENA_WIDTH			     1
+
+/* (0x051A)  AIF1_Rx_Enables */
+#define MADERA_AIF1RX8_ENA				0x0080
+#define MADERA_AIF1RX8_ENA_MASK				0x0080
+#define MADERA_AIF1RX8_ENA_SHIFT			     7
+#define MADERA_AIF1RX8_ENA_WIDTH			     1
+#define MADERA_AIF1RX7_ENA				0x0040
+#define MADERA_AIF1RX7_ENA_MASK				0x0040
+#define MADERA_AIF1RX7_ENA_SHIFT			     6
+#define MADERA_AIF1RX7_ENA_WIDTH			     1
+#define MADERA_AIF1RX6_ENA				0x0020
+#define MADERA_AIF1RX6_ENA_MASK				0x0020
+#define MADERA_AIF1RX6_ENA_SHIFT			     5
+#define MADERA_AIF1RX6_ENA_WIDTH			     1
+#define MADERA_AIF1RX5_ENA				0x0010
+#define MADERA_AIF1RX5_ENA_MASK				0x0010
+#define MADERA_AIF1RX5_ENA_SHIFT			     4
+#define MADERA_AIF1RX5_ENA_WIDTH			     1
+#define MADERA_AIF1RX4_ENA				0x0008
+#define MADERA_AIF1RX4_ENA_MASK				0x0008
+#define MADERA_AIF1RX4_ENA_SHIFT			     3
+#define MADERA_AIF1RX4_ENA_WIDTH			     1
+#define MADERA_AIF1RX3_ENA				0x0004
+#define MADERA_AIF1RX3_ENA_MASK				0x0004
+#define MADERA_AIF1RX3_ENA_SHIFT			     2
+#define MADERA_AIF1RX3_ENA_WIDTH			     1
+#define MADERA_AIF1RX2_ENA				0x0002
+#define MADERA_AIF1RX2_ENA_MASK				0x0002
+#define MADERA_AIF1RX2_ENA_SHIFT			     1
+#define MADERA_AIF1RX2_ENA_WIDTH			     1
+#define MADERA_AIF1RX1_ENA				0x0001
+#define MADERA_AIF1RX1_ENA_MASK				0x0001
+#define MADERA_AIF1RX1_ENA_SHIFT			     0
+#define MADERA_AIF1RX1_ENA_WIDTH			     1
+
+/* (0x0559)  AIF2_Tx_Enables */
+#define MADERA_AIF2TX8_ENA				0x0080
+#define MADERA_AIF2TX8_ENA_MASK				0x0080
+#define MADERA_AIF2TX8_ENA_SHIFT			     7
+#define MADERA_AIF2TX8_ENA_WIDTH			     1
+#define MADERA_AIF2TX7_ENA				0x0040
+#define MADERA_AIF2TX7_ENA_MASK				0x0040
+#define MADERA_AIF2TX7_ENA_SHIFT			     6
+#define MADERA_AIF2TX7_ENA_WIDTH			     1
+#define MADERA_AIF2TX6_ENA				0x0020
+#define MADERA_AIF2TX6_ENA_MASK				0x0020
+#define MADERA_AIF2TX6_ENA_SHIFT			     5
+#define MADERA_AIF2TX6_ENA_WIDTH			     1
+#define MADERA_AIF2TX5_ENA				0x0010
+#define MADERA_AIF2TX5_ENA_MASK				0x0010
+#define MADERA_AIF2TX5_ENA_SHIFT			     4
+#define MADERA_AIF2TX5_ENA_WIDTH			     1
+#define MADERA_AIF2TX4_ENA				0x0008
+#define MADERA_AIF2TX4_ENA_MASK				0x0008
+#define MADERA_AIF2TX4_ENA_SHIFT			     3
+#define MADERA_AIF2TX4_ENA_WIDTH			     1
+#define MADERA_AIF2TX3_ENA				0x0004
+#define MADERA_AIF2TX3_ENA_MASK				0x0004
+#define MADERA_AIF2TX3_ENA_SHIFT			     2
+#define MADERA_AIF2TX3_ENA_WIDTH			     1
+#define MADERA_AIF2TX2_ENA				0x0002
+#define MADERA_AIF2TX2_ENA_MASK				0x0002
+#define MADERA_AIF2TX2_ENA_SHIFT			     1
+#define MADERA_AIF2TX2_ENA_WIDTH			     1
+#define MADERA_AIF2TX1_ENA				0x0001
+#define MADERA_AIF2TX1_ENA_MASK				0x0001
+#define MADERA_AIF2TX1_ENA_SHIFT			     0
+#define MADERA_AIF2TX1_ENA_WIDTH			     1
+
+/* (0x055A)  AIF2_Rx_Enables */
+#define MADERA_AIF2RX8_ENA				0x0080
+#define MADERA_AIF2RX8_ENA_MASK				0x0080
+#define MADERA_AIF2RX8_ENA_SHIFT			     7
+#define MADERA_AIF2RX8_ENA_WIDTH			     1
+#define MADERA_AIF2RX7_ENA				0x0040
+#define MADERA_AIF2RX7_ENA_MASK				0x0040
+#define MADERA_AIF2RX7_ENA_SHIFT			     6
+#define MADERA_AIF2RX7_ENA_WIDTH			     1
+#define MADERA_AIF2RX6_ENA				0x0020
+#define MADERA_AIF2RX6_ENA_MASK				0x0020
+#define MADERA_AIF2RX6_ENA_SHIFT			     5
+#define MADERA_AIF2RX6_ENA_WIDTH			     1
+#define MADERA_AIF2RX5_ENA				0x0010
+#define MADERA_AIF2RX5_ENA_MASK				0x0010
+#define MADERA_AIF2RX5_ENA_SHIFT			     4
+#define MADERA_AIF2RX5_ENA_WIDTH			     1
+#define MADERA_AIF2RX4_ENA				0x0008
+#define MADERA_AIF2RX4_ENA_MASK				0x0008
+#define MADERA_AIF2RX4_ENA_SHIFT			     3
+#define MADERA_AIF2RX4_ENA_WIDTH			     1
+#define MADERA_AIF2RX3_ENA				0x0004
+#define MADERA_AIF2RX3_ENA_MASK				0x0004
+#define MADERA_AIF2RX3_ENA_SHIFT			     2
+#define MADERA_AIF2RX3_ENA_WIDTH			     1
+#define MADERA_AIF2RX2_ENA				0x0002
+#define MADERA_AIF2RX2_ENA_MASK				0x0002
+#define MADERA_AIF2RX2_ENA_SHIFT			     1
+#define MADERA_AIF2RX2_ENA_WIDTH			     1
+#define MADERA_AIF2RX1_ENA				0x0001
+#define MADERA_AIF2RX1_ENA_MASK				0x0001
+#define MADERA_AIF2RX1_ENA_SHIFT			     0
+#define MADERA_AIF2RX1_ENA_WIDTH			     1
+
+/* (0x0599)  AIF3_Tx_Enables */
+#define MADERA_AIF3TX2_ENA				0x0002
+#define MADERA_AIF3TX2_ENA_MASK				0x0002
+#define MADERA_AIF3TX2_ENA_SHIFT			     1
+#define MADERA_AIF3TX2_ENA_WIDTH			     1
+#define MADERA_AIF3TX1_ENA				0x0001
+#define MADERA_AIF3TX1_ENA_MASK				0x0001
+#define MADERA_AIF3TX1_ENA_SHIFT			     0
+#define MADERA_AIF3TX1_ENA_WIDTH			     1
+
+/* (0x059A)  AIF3_Rx_Enables */
+#define MADERA_AIF3RX2_ENA				0x0002
+#define MADERA_AIF3RX2_ENA_MASK				0x0002
+#define MADERA_AIF3RX2_ENA_SHIFT			     1
+#define MADERA_AIF3RX2_ENA_WIDTH			     1
+#define MADERA_AIF3RX1_ENA				0x0001
+#define MADERA_AIF3RX1_ENA_MASK				0x0001
+#define MADERA_AIF3RX1_ENA_SHIFT			     0
+#define MADERA_AIF3RX1_ENA_WIDTH			     1
+
+/* (0x05B9)  AIF4_Tx_Enables */
+#define MADERA_AIF4TX2_ENA				0x0002
+#define MADERA_AIF4TX2_ENA_MASK				0x0002
+#define MADERA_AIF4TX2_ENA_SHIFT			     1
+#define MADERA_AIF4TX2_ENA_WIDTH			     1
+#define MADERA_AIF4TX1_ENA				0x0001
+#define MADERA_AIF4TX1_ENA_MASK				0x0001
+#define MADERA_AIF4TX1_ENA_SHIFT			     0
+#define MADERA_AIF4TX1_ENA_WIDTH			     1
+
+/* (0x05BA)  AIF4_Rx_Enables */
+#define MADERA_AIF4RX2_ENA				0x0002
+#define MADERA_AIF4RX2_ENA_MASK				0x0002
+#define MADERA_AIF4RX2_ENA_SHIFT			     1
+#define MADERA_AIF4RX2_ENA_WIDTH			     1
+#define MADERA_AIF4RX1_ENA				0x0001
+#define MADERA_AIF4RX1_ENA_MASK				0x0001
+#define MADERA_AIF4RX1_ENA_SHIFT			     0
+#define MADERA_AIF4RX1_ENA_WIDTH			     1
+
+/* (0x05C2)  SPD1_TX_Control */
+#define MADERA_SPD1_VAL2				0x2000
+#define MADERA_SPD1_VAL2_MASK				0x2000
+#define MADERA_SPD1_VAL2_SHIFT				    13
+#define MADERA_SPD1_VAL2_WIDTH				     1
+#define MADERA_SPD1_VAL1				0x1000
+#define MADERA_SPD1_VAL1_MASK				0x1000
+#define MADERA_SPD1_VAL1_SHIFT				    12
+#define MADERA_SPD1_VAL1_WIDTH				     1
+#define MADERA_SPD1_RATE_MASK				0x00F0
+#define MADERA_SPD1_RATE_SHIFT				     4
+#define MADERA_SPD1_RATE_WIDTH				     4
+#define MADERA_SPD1_ENA					0x0001
+#define MADERA_SPD1_ENA_MASK				0x0001
+#define MADERA_SPD1_ENA_SHIFT				     0
+#define MADERA_SPD1_ENA_WIDTH				     1
+
+/* (0x05F5)  SLIMbus_RX_Channel_Enable */
+#define MADERA_SLIMRX8_ENA				0x0080
+#define MADERA_SLIMRX8_ENA_MASK				0x0080
+#define MADERA_SLIMRX8_ENA_SHIFT			     7
+#define MADERA_SLIMRX8_ENA_WIDTH			     1
+#define MADERA_SLIMRX7_ENA				0x0040
+#define MADERA_SLIMRX7_ENA_MASK				0x0040
+#define MADERA_SLIMRX7_ENA_SHIFT			     6
+#define MADERA_SLIMRX7_ENA_WIDTH			     1
+#define MADERA_SLIMRX6_ENA				0x0020
+#define MADERA_SLIMRX6_ENA_MASK				0x0020
+#define MADERA_SLIMRX6_ENA_SHIFT			     5
+#define MADERA_SLIMRX6_ENA_WIDTH			     1
+#define MADERA_SLIMRX5_ENA				0x0010
+#define MADERA_SLIMRX5_ENA_MASK				0x0010
+#define MADERA_SLIMRX5_ENA_SHIFT			     4
+#define MADERA_SLIMRX5_ENA_WIDTH			     1
+#define MADERA_SLIMRX4_ENA				0x0008
+#define MADERA_SLIMRX4_ENA_MASK				0x0008
+#define MADERA_SLIMRX4_ENA_SHIFT			     3
+#define MADERA_SLIMRX4_ENA_WIDTH			     1
+#define MADERA_SLIMRX3_ENA				0x0004
+#define MADERA_SLIMRX3_ENA_MASK				0x0004
+#define MADERA_SLIMRX3_ENA_SHIFT			     2
+#define MADERA_SLIMRX3_ENA_WIDTH			     1
+#define MADERA_SLIMRX2_ENA				0x0002
+#define MADERA_SLIMRX2_ENA_MASK				0x0002
+#define MADERA_SLIMRX2_ENA_SHIFT			     1
+#define MADERA_SLIMRX2_ENA_WIDTH			     1
+#define MADERA_SLIMRX1_ENA				0x0001
+#define MADERA_SLIMRX1_ENA_MASK				0x0001
+#define MADERA_SLIMRX1_ENA_SHIFT			     0
+#define MADERA_SLIMRX1_ENA_WIDTH			     1
+
+/* (0x05F6)  SLIMbus_TX_Channel_Enable */
+#define MADERA_SLIMTX8_ENA				0x0080
+#define MADERA_SLIMTX8_ENA_MASK				0x0080
+#define MADERA_SLIMTX8_ENA_SHIFT			     7
+#define MADERA_SLIMTX8_ENA_WIDTH			     1
+#define MADERA_SLIMTX7_ENA				0x0040
+#define MADERA_SLIMTX7_ENA_MASK				0x0040
+#define MADERA_SLIMTX7_ENA_SHIFT			     6
+#define MADERA_SLIMTX7_ENA_WIDTH			     1
+#define MADERA_SLIMTX6_ENA				0x0020
+#define MADERA_SLIMTX6_ENA_MASK				0x0020
+#define MADERA_SLIMTX6_ENA_SHIFT			     5
+#define MADERA_SLIMTX6_ENA_WIDTH			     1
+#define MADERA_SLIMTX5_ENA				0x0010
+#define MADERA_SLIMTX5_ENA_MASK				0x0010
+#define MADERA_SLIMTX5_ENA_SHIFT			     4
+#define MADERA_SLIMTX5_ENA_WIDTH			     1
+#define MADERA_SLIMTX4_ENA				0x0008
+#define MADERA_SLIMTX4_ENA_MASK				0x0008
+#define MADERA_SLIMTX4_ENA_SHIFT			     3
+#define MADERA_SLIMTX4_ENA_WIDTH			     1
+#define MADERA_SLIMTX3_ENA				0x0004
+#define MADERA_SLIMTX3_ENA_MASK				0x0004
+#define MADERA_SLIMTX3_ENA_SHIFT			     2
+#define MADERA_SLIMTX3_ENA_WIDTH			     1
+#define MADERA_SLIMTX2_ENA				0x0002
+#define MADERA_SLIMTX2_ENA_MASK				0x0002
+#define MADERA_SLIMTX2_ENA_SHIFT			     1
+#define MADERA_SLIMTX2_ENA_WIDTH			     1
+#define MADERA_SLIMTX1_ENA				0x0001
+#define MADERA_SLIMTX1_ENA_MASK				0x0001
+#define MADERA_SLIMTX1_ENA_SHIFT			     0
+#define MADERA_SLIMTX1_ENA_WIDTH			     1
+
+/* (0x0E10)  EQ1_1 */
+#define MADERA_EQ1_B1_GAIN_MASK				0xF800
+#define MADERA_EQ1_B1_GAIN_SHIFT			    11
+#define MADERA_EQ1_B1_GAIN_WIDTH			     5
+#define MADERA_EQ1_B2_GAIN_MASK				0x07C0
+#define MADERA_EQ1_B2_GAIN_SHIFT			     6
+#define MADERA_EQ1_B2_GAIN_WIDTH			     5
+#define MADERA_EQ1_B3_GAIN_MASK				0x003E
+#define MADERA_EQ1_B3_GAIN_SHIFT			     1
+#define MADERA_EQ1_B3_GAIN_WIDTH			     5
+#define MADERA_EQ1_ENA					0x0001
+#define MADERA_EQ1_ENA_MASK				0x0001
+#define MADERA_EQ1_ENA_SHIFT				     0
+#define MADERA_EQ1_ENA_WIDTH				     1
+
+/* (0x0E11)  EQ1_2 */
+#define MADERA_EQ1_B4_GAIN_MASK				0xF800
+#define MADERA_EQ1_B4_GAIN_SHIFT			    11
+#define MADERA_EQ1_B4_GAIN_WIDTH			     5
+#define MADERA_EQ1_B5_GAIN_MASK				0x07C0
+#define MADERA_EQ1_B5_GAIN_SHIFT			     6
+#define MADERA_EQ1_B5_GAIN_WIDTH			     5
+#define MADERA_EQ1_B1_MODE				0x0001
+#define MADERA_EQ1_B1_MODE_MASK				0x0001
+#define MADERA_EQ1_B1_MODE_SHIFT			     0
+#define MADERA_EQ1_B1_MODE_WIDTH			     1
+
+/* (0x0E26)  EQ2_1 */
+#define MADERA_EQ2_B1_GAIN_MASK				0xF800
+#define MADERA_EQ2_B1_GAIN_SHIFT			    11
+#define MADERA_EQ2_B1_GAIN_WIDTH			     5
+#define MADERA_EQ2_B2_GAIN_MASK				0x07C0
+#define MADERA_EQ2_B2_GAIN_SHIFT			     6
+#define MADERA_EQ2_B2_GAIN_WIDTH			     5
+#define MADERA_EQ2_B3_GAIN_MASK				0x003E
+#define MADERA_EQ2_B3_GAIN_SHIFT			     1
+#define MADERA_EQ2_B3_GAIN_WIDTH			     5
+#define MADERA_EQ2_ENA					0x0001
+#define MADERA_EQ2_ENA_MASK				0x0001
+#define MADERA_EQ2_ENA_SHIFT				     0
+#define MADERA_EQ2_ENA_WIDTH				     1
+
+/* (0x0E27)  EQ2_2 */
+#define MADERA_EQ2_B4_GAIN_MASK				0xF800
+#define MADERA_EQ2_B4_GAIN_SHIFT			    11
+#define MADERA_EQ2_B4_GAIN_WIDTH			     5
+#define MADERA_EQ2_B5_GAIN_MASK				0x07C0
+#define MADERA_EQ2_B5_GAIN_SHIFT			     6
+#define MADERA_EQ2_B5_GAIN_WIDTH			     5
+#define MADERA_EQ2_B1_MODE				0x0001
+#define MADERA_EQ2_B1_MODE_MASK				0x0001
+#define MADERA_EQ2_B1_MODE_SHIFT			     0
+#define MADERA_EQ2_B1_MODE_WIDTH			     1
+
+/* (0x0E3C)  EQ3_1 */
+#define MADERA_EQ3_B1_GAIN_MASK				0xF800
+#define MADERA_EQ3_B1_GAIN_SHIFT			    11
+#define MADERA_EQ3_B1_GAIN_WIDTH			     5
+#define MADERA_EQ3_B2_GAIN_MASK				0x07C0
+#define MADERA_EQ3_B2_GAIN_SHIFT			     6
+#define MADERA_EQ3_B2_GAIN_WIDTH			     5
+#define MADERA_EQ3_B3_GAIN_MASK				0x003E
+#define MADERA_EQ3_B3_GAIN_SHIFT			     1
+#define MADERA_EQ3_B3_GAIN_WIDTH			     5
+#define MADERA_EQ3_ENA					0x0001
+#define MADERA_EQ3_ENA_MASK				0x0001
+#define MADERA_EQ3_ENA_SHIFT				     0
+#define MADERA_EQ3_ENA_WIDTH				     1
+
+/* (0x0E3D)  EQ3_2 */
+#define MADERA_EQ3_B4_GAIN_MASK				0xF800
+#define MADERA_EQ3_B4_GAIN_SHIFT			    11
+#define MADERA_EQ3_B4_GAIN_WIDTH			     5
+#define MADERA_EQ3_B5_GAIN_MASK				0x07C0
+#define MADERA_EQ3_B5_GAIN_SHIFT			     6
+#define MADERA_EQ3_B5_GAIN_WIDTH			     5
+#define MADERA_EQ3_B1_MODE				0x0001
+#define MADERA_EQ3_B1_MODE_MASK				0x0001
+#define MADERA_EQ3_B1_MODE_SHIFT			     0
+#define MADERA_EQ3_B1_MODE_WIDTH			     1
+
+/* (0x0E52)  EQ4_1 */
+#define MADERA_EQ4_B1_GAIN_MASK				0xF800
+#define MADERA_EQ4_B1_GAIN_SHIFT			    11
+#define MADERA_EQ4_B1_GAIN_WIDTH			     5
+#define MADERA_EQ4_B2_GAIN_MASK				0x07C0
+#define MADERA_EQ4_B2_GAIN_SHIFT			     6
+#define MADERA_EQ4_B2_GAIN_WIDTH			     5
+#define MADERA_EQ4_B3_GAIN_MASK				0x003E
+#define MADERA_EQ4_B3_GAIN_SHIFT			     1
+#define MADERA_EQ4_B3_GAIN_WIDTH			     5
+#define MADERA_EQ4_ENA					0x0001
+#define MADERA_EQ4_ENA_MASK				0x0001
+#define MADERA_EQ4_ENA_SHIFT				     0
+#define MADERA_EQ4_ENA_WIDTH				     1
+
+/* (0x0E53)  EQ4_2 */
+#define MADERA_EQ4_B4_GAIN_MASK				0xF800
+#define MADERA_EQ4_B4_GAIN_SHIFT			    11
+#define MADERA_EQ4_B4_GAIN_WIDTH			     5
+#define MADERA_EQ4_B5_GAIN_MASK				0x07C0
+#define MADERA_EQ4_B5_GAIN_SHIFT			     6
+#define MADERA_EQ4_B5_GAIN_WIDTH			     5
+#define MADERA_EQ4_B1_MODE				0x0001
+#define MADERA_EQ4_B1_MODE_MASK				0x0001
+#define MADERA_EQ4_B1_MODE_SHIFT			     0
+#define MADERA_EQ4_B1_MODE_WIDTH			     1
+
+/* (0x0E80)  DRC1_ctrl1 */
+#define MADERA_DRC1L_ENA				0x0002
+#define MADERA_DRC1L_ENA_MASK				0x0002
+#define MADERA_DRC1L_ENA_SHIFT				     1
+#define MADERA_DRC1L_ENA_WIDTH				     1
+#define MADERA_DRC1R_ENA				0x0001
+#define MADERA_DRC1R_ENA_MASK				0x0001
+#define MADERA_DRC1R_ENA_SHIFT				     0
+#define MADERA_DRC1R_ENA_WIDTH				     1
+
+/* (0x0E88)  DRC2_ctrl1 */
+#define MADERA_DRC2L_ENA				0x0002
+#define MADERA_DRC2L_ENA_MASK				0x0002
+#define MADERA_DRC2L_ENA_SHIFT				     1
+#define MADERA_DRC2L_ENA_WIDTH				     1
+#define MADERA_DRC2R_ENA				0x0001
+#define MADERA_DRC2R_ENA_MASK				0x0001
+#define MADERA_DRC2R_ENA_SHIFT				     0
+#define MADERA_DRC2R_ENA_WIDTH				     1
+
+/* (0x0EC0)  HPLPF1_1 */
+#define MADERA_LHPF1_MODE				0x0002
+#define MADERA_LHPF1_MODE_MASK				0x0002
+#define MADERA_LHPF1_MODE_SHIFT				     1
+#define MADERA_LHPF1_MODE_WIDTH				     1
+#define MADERA_LHPF1_ENA				0x0001
+#define MADERA_LHPF1_ENA_MASK				0x0001
+#define MADERA_LHPF1_ENA_SHIFT				     0
+#define MADERA_LHPF1_ENA_WIDTH				     1
+
+/* (0x0EC1)  HPLPF1_2 */
+#define MADERA_LHPF1_COEFF_MASK				0xFFFF
+#define MADERA_LHPF1_COEFF_SHIFT			     0
+#define MADERA_LHPF1_COEFF_WIDTH			    16
+
+/* (0x0EC4)  HPLPF2_1 */
+#define MADERA_LHPF2_MODE				0x0002
+#define MADERA_LHPF2_MODE_MASK				0x0002
+#define MADERA_LHPF2_MODE_SHIFT				     1
+#define MADERA_LHPF2_MODE_WIDTH				     1
+#define MADERA_LHPF2_ENA				0x0001
+#define MADERA_LHPF2_ENA_MASK				0x0001
+#define MADERA_LHPF2_ENA_SHIFT				     0
+#define MADERA_LHPF2_ENA_WIDTH				     1
+
+/* (0x0EC5)  HPLPF2_2 */
+#define MADERA_LHPF2_COEFF_MASK				0xFFFF
+#define MADERA_LHPF2_COEFF_SHIFT			     0
+#define MADERA_LHPF2_COEFF_WIDTH			    16
+
+/* (0x0EC8)  HPLPF3_1 */
+#define MADERA_LHPF3_MODE				0x0002
+#define MADERA_LHPF3_MODE_MASK				0x0002
+#define MADERA_LHPF3_MODE_SHIFT				     1
+#define MADERA_LHPF3_MODE_WIDTH				     1
+#define MADERA_LHPF3_ENA				0x0001
+#define MADERA_LHPF3_ENA_MASK				0x0001
+#define MADERA_LHPF3_ENA_SHIFT				     0
+#define MADERA_LHPF3_ENA_WIDTH				     1
+
+/* (0x0EC9)  HPLPF3_2 */
+#define MADERA_LHPF3_COEFF_MASK				0xFFFF
+#define MADERA_LHPF3_COEFF_SHIFT			     0
+#define MADERA_LHPF3_COEFF_WIDTH			    16
+
+/* (0x0ECC)  HPLPF4_1 */
+#define MADERA_LHPF4_MODE				0x0002
+#define MADERA_LHPF4_MODE_MASK				0x0002
+#define MADERA_LHPF4_MODE_SHIFT				     1
+#define MADERA_LHPF4_MODE_WIDTH				     1
+#define MADERA_LHPF4_ENA				0x0001
+#define MADERA_LHPF4_ENA_MASK				0x0001
+#define MADERA_LHPF4_ENA_SHIFT				     0
+#define MADERA_LHPF4_ENA_WIDTH				     1
+
+/* (0x0ECD)  HPLPF4_2 */
+#define MADERA_LHPF4_COEFF_MASK				0xFFFF
+#define MADERA_LHPF4_COEFF_SHIFT			     0
+#define MADERA_LHPF4_COEFF_WIDTH			    16
+
+/* (0x0ED0)  ASRC2_ENABLE */
+#define MADERA_ASRC2_IN2L_ENA				0x0008
+#define MADERA_ASRC2_IN2L_ENA_MASK			0x0008
+#define MADERA_ASRC2_IN2L_ENA_SHIFT			     3
+#define MADERA_ASRC2_IN2L_ENA_WIDTH			     1
+#define MADERA_ASRC2_IN2R_ENA				0x0004
+#define MADERA_ASRC2_IN2R_ENA_MASK			0x0004
+#define MADERA_ASRC2_IN2R_ENA_SHIFT			     2
+#define MADERA_ASRC2_IN2R_ENA_WIDTH			     1
+#define MADERA_ASRC2_IN1L_ENA				0x0002
+#define MADERA_ASRC2_IN1L_ENA_MASK			0x0002
+#define MADERA_ASRC2_IN1L_ENA_SHIFT			     1
+#define MADERA_ASRC2_IN1L_ENA_WIDTH			     1
+#define MADERA_ASRC2_IN1R_ENA				0x0001
+#define MADERA_ASRC2_IN1R_ENA_MASK			0x0001
+#define MADERA_ASRC2_IN1R_ENA_SHIFT			     0
+#define MADERA_ASRC2_IN1R_ENA_WIDTH			     1
+
+/* (0x0ED2)  ASRC2_RATE1 */
+#define MADERA_ASRC2_RATE1_MASK				0xF800
+#define MADERA_ASRC2_RATE1_SHIFT			    11
+#define MADERA_ASRC2_RATE1_WIDTH			     5
+
+/* (0x0ED3)  ASRC2_RATE2 */
+#define MADERA_ASRC2_RATE2_MASK				0xF800
+#define MADERA_ASRC2_RATE2_SHIFT			    11
+#define MADERA_ASRC2_RATE2_WIDTH			     5
+
+/* (0x0EE0)  ASRC1_ENABLE */
+#define MADERA_ASRC1_IN2L_ENA				0x0008
+#define MADERA_ASRC1_IN2L_ENA_MASK			0x0008
+#define MADERA_ASRC1_IN2L_ENA_SHIFT			     3
+#define MADERA_ASRC1_IN2L_ENA_WIDTH			     1
+#define MADERA_ASRC1_IN2R_ENA				0x0004
+#define MADERA_ASRC1_IN2R_ENA_MASK			0x0004
+#define MADERA_ASRC1_IN2R_ENA_SHIFT			     2
+#define MADERA_ASRC1_IN2R_ENA_WIDTH			     1
+#define MADERA_ASRC1_IN1L_ENA				0x0002
+#define MADERA_ASRC1_IN1L_ENA_MASK			0x0002
+#define MADERA_ASRC1_IN1L_ENA_SHIFT			     1
+#define MADERA_ASRC1_IN1L_ENA_WIDTH			     1
+#define MADERA_ASRC1_IN1R_ENA				0x0001
+#define MADERA_ASRC1_IN1R_ENA_MASK			0x0001
+#define MADERA_ASRC1_IN1R_ENA_SHIFT			     0
+#define MADERA_ASRC1_IN1R_ENA_WIDTH			     1
+
+/* (0x0EE2)  ASRC1_RATE1 */
+#define MADERA_ASRC1_RATE1_MASK				0xF800
+#define MADERA_ASRC1_RATE1_SHIFT			    11
+#define MADERA_ASRC1_RATE1_WIDTH			     5
+
+/* (0x0EE3)  ASRC1_RATE2 */
+#define MADERA_ASRC1_RATE2_MASK				0xF800
+#define MADERA_ASRC1_RATE2_SHIFT			    11
+#define MADERA_ASRC1_RATE2_WIDTH			     5
+
+/* (0x0EF0) - ISRC1 CTRL 1 */
+#define MADERA_ISRC1_FSH_MASK				0xF800
+#define MADERA_ISRC1_FSH_SHIFT				    11
+#define MADERA_ISRC1_FSH_WIDTH				     5
+#define MADERA_ISRC1_CLK_SEL_MASK			0x0700
+#define MADERA_ISRC1_CLK_SEL_SHIFT			     8
+#define MADERA_ISRC1_CLK_SEL_WIDTH			     3
+
+/* (0x0EF1)  ISRC1_CTRL_2 */
+#define MADERA_ISRC1_FSL_MASK				0xF800
+#define MADERA_ISRC1_FSL_SHIFT				    11
+#define MADERA_ISRC1_FSL_WIDTH				     5
+
+/* (0x0EF2)  ISRC1_CTRL_3 */
+#define MADERA_ISRC1_INT1_ENA				0x8000
+#define MADERA_ISRC1_INT1_ENA_MASK			0x8000
+#define MADERA_ISRC1_INT1_ENA_SHIFT			    15
+#define MADERA_ISRC1_INT1_ENA_WIDTH			     1
+#define MADERA_ISRC1_INT2_ENA				0x4000
+#define MADERA_ISRC1_INT2_ENA_MASK			0x4000
+#define MADERA_ISRC1_INT2_ENA_SHIFT			    14
+#define MADERA_ISRC1_INT2_ENA_WIDTH			     1
+#define MADERA_ISRC1_INT3_ENA				0x2000
+#define MADERA_ISRC1_INT3_ENA_MASK			0x2000
+#define MADERA_ISRC1_INT3_ENA_SHIFT			    13
+#define MADERA_ISRC1_INT3_ENA_WIDTH			     1
+#define MADERA_ISRC1_INT4_ENA				0x1000
+#define MADERA_ISRC1_INT4_ENA_MASK			0x1000
+#define MADERA_ISRC1_INT4_ENA_SHIFT			    12
+#define MADERA_ISRC1_INT4_ENA_WIDTH			     1
+#define MADERA_ISRC1_DEC1_ENA				0x0200
+#define MADERA_ISRC1_DEC1_ENA_MASK			0x0200
+#define MADERA_ISRC1_DEC1_ENA_SHIFT			     9
+#define MADERA_ISRC1_DEC1_ENA_WIDTH			     1
+#define MADERA_ISRC1_DEC2_ENA				0x0100
+#define MADERA_ISRC1_DEC2_ENA_MASK			0x0100
+#define MADERA_ISRC1_DEC2_ENA_SHIFT			     8
+#define MADERA_ISRC1_DEC2_ENA_WIDTH			     1
+#define MADERA_ISRC1_DEC3_ENA				0x0080
+#define MADERA_ISRC1_DEC3_ENA_MASK			0x0080
+#define MADERA_ISRC1_DEC3_ENA_SHIFT			     7
+#define MADERA_ISRC1_DEC3_ENA_WIDTH			     1
+#define MADERA_ISRC1_DEC4_ENA				0x0040
+#define MADERA_ISRC1_DEC4_ENA_MASK			0x0040
+#define MADERA_ISRC1_DEC4_ENA_SHIFT			     6
+#define MADERA_ISRC1_DEC4_ENA_WIDTH			     1
+#define MADERA_ISRC1_NOTCH_ENA				0x0001
+#define MADERA_ISRC1_NOTCH_ENA_MASK			0x0001
+#define MADERA_ISRC1_NOTCH_ENA_SHIFT			     0
+#define MADERA_ISRC1_NOTCH_ENA_WIDTH			     1
+
+/* (0x0EF3)  ISRC2_CTRL_1 */
+#define MADERA_ISRC2_FSH_MASK				0xF800
+#define MADERA_ISRC2_FSH_SHIFT				    11
+#define MADERA_ISRC2_FSH_WIDTH				     5
+#define MADERA_ISRC2_CLK_SEL_MASK			0x0700
+#define MADERA_ISRC2_CLK_SEL_SHIFT			     8
+#define MADERA_ISRC2_CLK_SEL_WIDTH			     3
+
+/* (0x0EF4)  ISRC2_CTRL_2 */
+#define MADERA_ISRC2_FSL_MASK				0xF800
+#define MADERA_ISRC2_FSL_SHIFT				    11
+#define MADERA_ISRC2_FSL_WIDTH				     5
+
+/* (0x0EF5)  ISRC2_CTRL_3 */
+#define MADERA_ISRC2_INT1_ENA				0x8000
+#define MADERA_ISRC2_INT1_ENA_MASK			0x8000
+#define MADERA_ISRC2_INT1_ENA_SHIFT			    15
+#define MADERA_ISRC2_INT1_ENA_WIDTH			     1
+#define MADERA_ISRC2_INT2_ENA				0x4000
+#define MADERA_ISRC2_INT2_ENA_MASK			0x4000
+#define MADERA_ISRC2_INT2_ENA_SHIFT			    14
+#define MADERA_ISRC2_INT2_ENA_WIDTH			     1
+#define MADERA_ISRC2_INT3_ENA				0x2000
+#define MADERA_ISRC2_INT3_ENA_MASK			0x2000
+#define MADERA_ISRC2_INT3_ENA_SHIFT			    13
+#define MADERA_ISRC2_INT3_ENA_WIDTH			     1
+#define MADERA_ISRC2_INT4_ENA				0x1000
+#define MADERA_ISRC2_INT4_ENA_MASK			0x1000
+#define MADERA_ISRC2_INT4_ENA_SHIFT			    12
+#define MADERA_ISRC2_INT4_ENA_WIDTH			     1
+#define MADERA_ISRC2_DEC1_ENA				0x0200
+#define MADERA_ISRC2_DEC1_ENA_MASK			0x0200
+#define MADERA_ISRC2_DEC1_ENA_SHIFT			     9
+#define MADERA_ISRC2_DEC1_ENA_WIDTH			     1
+#define MADERA_ISRC2_DEC2_ENA				0x0100
+#define MADERA_ISRC2_DEC2_ENA_MASK			0x0100
+#define MADERA_ISRC2_DEC2_ENA_SHIFT			     8
+#define MADERA_ISRC2_DEC2_ENA_WIDTH			     1
+#define MADERA_ISRC2_DEC3_ENA				0x0080
+#define MADERA_ISRC2_DEC3_ENA_MASK			0x0080
+#define MADERA_ISRC2_DEC3_ENA_SHIFT			     7
+#define MADERA_ISRC2_DEC3_ENA_WIDTH			     1
+#define MADERA_ISRC2_DEC4_ENA				0x0040
+#define MADERA_ISRC2_DEC4_ENA_MASK			0x0040
+#define MADERA_ISRC2_DEC4_ENA_SHIFT			     6
+#define MADERA_ISRC2_DEC4_ENA_WIDTH			     1
+#define MADERA_ISRC2_NOTCH_ENA				0x0001
+#define MADERA_ISRC2_NOTCH_ENA_MASK			0x0001
+#define MADERA_ISRC2_NOTCH_ENA_SHIFT			     0
+#define MADERA_ISRC2_NOTCH_ENA_WIDTH			     1
+
+/* (0x0EF6)  ISRC3_CTRL_1 */
+#define MADERA_ISRC3_FSH_MASK				0xF800
+#define MADERA_ISRC3_FSH_SHIFT				    11
+#define MADERA_ISRC3_FSH_WIDTH				     5
+#define MADERA_ISRC3_CLK_SEL_MASK			0x0700
+#define MADERA_ISRC3_CLK_SEL_SHIFT			     8
+#define MADERA_ISRC3_CLK_SEL_WIDTH			     3
+
+/* (0x0EF7)  ISRC3_CTRL_2 */
+#define MADERA_ISRC3_FSL_MASK				0xF800
+#define MADERA_ISRC3_FSL_SHIFT				    11
+#define MADERA_ISRC3_FSL_WIDTH				     5
+
+/* (0x0EF8)  ISRC3_CTRL_3 */
+#define MADERA_ISRC3_INT1_ENA				0x8000
+#define MADERA_ISRC3_INT1_ENA_MASK			0x8000
+#define MADERA_ISRC3_INT1_ENA_SHIFT			    15
+#define MADERA_ISRC3_INT1_ENA_WIDTH			     1
+#define MADERA_ISRC3_INT2_ENA				0x4000
+#define MADERA_ISRC3_INT2_ENA_MASK			0x4000
+#define MADERA_ISRC3_INT2_ENA_SHIFT			    14
+#define MADERA_ISRC3_INT2_ENA_WIDTH			     1
+#define MADERA_ISRC3_INT3_ENA				0x2000
+#define MADERA_ISRC3_INT3_ENA_MASK			0x2000
+#define MADERA_ISRC3_INT3_ENA_SHIFT			    13
+#define MADERA_ISRC3_INT3_ENA_WIDTH			     1
+#define MADERA_ISRC3_INT4_ENA				0x1000
+#define MADERA_ISRC3_INT4_ENA_MASK			0x1000
+#define MADERA_ISRC3_INT4_ENA_SHIFT			    12
+#define MADERA_ISRC3_INT4_ENA_WIDTH			     1
+#define MADERA_ISRC3_DEC1_ENA				0x0200
+#define MADERA_ISRC3_DEC1_ENA_MASK			0x0200
+#define MADERA_ISRC3_DEC1_ENA_SHIFT			     9
+#define MADERA_ISRC3_DEC1_ENA_WIDTH			     1
+#define MADERA_ISRC3_DEC2_ENA				0x0100
+#define MADERA_ISRC3_DEC2_ENA_MASK			0x0100
+#define MADERA_ISRC3_DEC2_ENA_SHIFT			     8
+#define MADERA_ISRC3_DEC2_ENA_WIDTH			     1
+#define MADERA_ISRC3_DEC3_ENA				0x0080
+#define MADERA_ISRC3_DEC3_ENA_MASK			0x0080
+#define MADERA_ISRC3_DEC3_ENA_SHIFT			     7
+#define MADERA_ISRC3_DEC3_ENA_WIDTH			     1
+#define MADERA_ISRC3_DEC4_ENA				0x0040
+#define MADERA_ISRC3_DEC4_ENA_MASK			0x0040
+#define MADERA_ISRC3_DEC4_ENA_SHIFT			     6
+#define MADERA_ISRC3_DEC4_ENA_WIDTH			     1
+#define MADERA_ISRC3_NOTCH_ENA				0x0001
+#define MADERA_ISRC3_NOTCH_ENA_MASK			0x0001
+#define MADERA_ISRC3_NOTCH_ENA_SHIFT			     0
+#define MADERA_ISRC3_NOTCH_ENA_WIDTH			     1
+
+/* (0x0EF9)  ISRC4_CTRL_1 */
+#define MADERA_ISRC4_FSH_MASK				0xF800
+#define MADERA_ISRC4_FSH_SHIFT				    11
+#define MADERA_ISRC4_FSH_WIDTH				     5
+#define MADERA_ISRC4_CLK_SEL_MASK			0x0700
+#define MADERA_ISRC4_CLK_SEL_SHIFT			     8
+#define MADERA_ISRC4_CLK_SEL_WIDTH			     3
+
+/* (0x0EFA)  ISRC4_CTRL_2 */
+#define MADERA_ISRC4_FSL_MASK				0xF800
+#define MADERA_ISRC4_FSL_SHIFT				    11
+#define MADERA_ISRC4_FSL_WIDTH				     5
+
+/* (0x0EFB)  ISRC4_CTRL_3 */
+#define MADERA_ISRC4_INT1_ENA				0x8000
+#define MADERA_ISRC4_INT1_ENA_MASK			0x8000
+#define MADERA_ISRC4_INT1_ENA_SHIFT			    15
+#define MADERA_ISRC4_INT1_ENA_WIDTH			     1
+#define MADERA_ISRC4_INT2_ENA				0x4000
+#define MADERA_ISRC4_INT2_ENA_MASK			0x4000
+#define MADERA_ISRC4_INT2_ENA_SHIFT			    14
+#define MADERA_ISRC4_INT2_ENA_WIDTH			     1
+#define MADERA_ISRC4_INT3_ENA				0x2000
+#define MADERA_ISRC4_INT3_ENA_MASK			0x2000
+#define MADERA_ISRC4_INT3_ENA_SHIFT			    13
+#define MADERA_ISRC4_INT3_ENA_WIDTH			     1
+#define MADERA_ISRC4_INT4_ENA				0x1000
+#define MADERA_ISRC4_INT4_ENA_MASK			0x1000
+#define MADERA_ISRC4_INT4_ENA_SHIFT			    12
+#define MADERA_ISRC4_INT4_ENA_WIDTH			     1
+#define MADERA_ISRC4_DEC1_ENA				0x0200
+#define MADERA_ISRC4_DEC1_ENA_MASK			0x0200
+#define MADERA_ISRC4_DEC1_ENA_SHIFT			     9
+#define MADERA_ISRC4_DEC1_ENA_WIDTH			     1
+#define MADERA_ISRC4_DEC2_ENA				0x0100
+#define MADERA_ISRC4_DEC2_ENA_MASK			0x0100
+#define MADERA_ISRC4_DEC2_ENA_SHIFT			     8
+#define MADERA_ISRC4_DEC2_ENA_WIDTH			     1
+#define MADERA_ISRC4_DEC3_ENA				0x0080
+#define MADERA_ISRC4_DEC3_ENA_MASK			0x0080
+#define MADERA_ISRC4_DEC3_ENA_SHIFT			     7
+#define MADERA_ISRC4_DEC3_ENA_WIDTH			     1
+#define MADERA_ISRC4_DEC4_ENA				0x0040
+#define MADERA_ISRC4_DEC4_ENA_MASK			0x0040
+#define MADERA_ISRC4_DEC4_ENA_SHIFT			     6
+#define MADERA_ISRC4_DEC4_ENA_WIDTH			     1
+#define MADERA_ISRC4_NOTCH_ENA				0x0001
+#define MADERA_ISRC4_NOTCH_ENA_MASK			0x0001
+#define MADERA_ISRC4_NOTCH_ENA_SHIFT			     0
+#define MADERA_ISRC4_NOTCH_ENA_WIDTH			     1
+
+/* (0x0F00)  Clock_Control */
+#define MADERA_EXT_NG_SEL_CLR				0x0080
+#define MADERA_EXT_NG_SEL_CLR_MASK			0x0080
+#define MADERA_EXT_NG_SEL_CLR_SHIFT			     7
+#define MADERA_EXT_NG_SEL_CLR_WIDTH			     1
+#define MADERA_EXT_NG_SEL_SET				0x0040
+#define MADERA_EXT_NG_SEL_SET_MASK			0x0040
+#define MADERA_EXT_NG_SEL_SET_SHIFT			     6
+#define MADERA_EXT_NG_SEL_SET_WIDTH			     1
+#define MADERA_CLK_R_ENA_CLR				0x0020
+#define MADERA_CLK_R_ENA_CLR_MASK			0x0020
+#define MADERA_CLK_R_ENA_CLR_SHIFT			     5
+#define MADERA_CLK_R_ENA_CLR_WIDTH			     1
+#define MADERA_CLK_R_ENA_SET				0x0010
+#define MADERA_CLK_R_ENA_SET_MASK			0x0010
+#define MADERA_CLK_R_ENA_SET_SHIFT			     4
+#define MADERA_CLK_R_ENA_SET_WIDTH			     1
+#define MADERA_CLK_NG_ENA_CLR				0x0008
+#define MADERA_CLK_NG_ENA_CLR_MASK			0x0008
+#define MADERA_CLK_NG_ENA_CLR_SHIFT			     3
+#define MADERA_CLK_NG_ENA_CLR_WIDTH			     1
+#define MADERA_CLK_NG_ENA_SET				0x0004
+#define MADERA_CLK_NG_ENA_SET_MASK			0x0004
+#define MADERA_CLK_NG_ENA_SET_SHIFT			     2
+#define MADERA_CLK_NG_ENA_SET_WIDTH			     1
+#define MADERA_CLK_L_ENA_CLR				0x0002
+#define MADERA_CLK_L_ENA_CLR_MASK			0x0002
+#define MADERA_CLK_L_ENA_CLR_SHIFT			     1
+#define MADERA_CLK_L_ENA_CLR_WIDTH			     1
+#define MADERA_CLK_L_ENA_SET				0x0001
+#define MADERA_CLK_L_ENA_SET_MASK			0x0001
+#define MADERA_CLK_L_ENA_SET_SHIFT			     0
+#define MADERA_CLK_L_ENA_SET_WIDTH			     1
+
+/* (0x0F01)  ANC_SRC */
+#define MADERA_IN_RXANCR_SEL_MASK			0x0070
+#define MADERA_IN_RXANCR_SEL_SHIFT			     4
+#define MADERA_IN_RXANCR_SEL_WIDTH			     3
+#define MADERA_IN_RXANCL_SEL_MASK			0x0007
+#define MADERA_IN_RXANCL_SEL_SHIFT			     0
+#define MADERA_IN_RXANCL_SEL_WIDTH			     3
+
+/* (0x0F17)  FCL_ADC_reformatter_control */
+#define MADERA_FCL_MIC_MODE_SEL				0x000C
+#define MADERA_FCL_MIC_MODE_SEL_SHIFT			     2
+#define MADERA_FCL_MIC_MODE_SEL_WIDTH			     2
+
+/* (0x0F73)  FCR_ADC_reformatter_control */
+#define MADERA_FCR_MIC_MODE_SEL				0x000C
+#define MADERA_FCR_MIC_MODE_SEL_SHIFT			     2
+#define MADERA_FCR_MIC_MODE_SEL_WIDTH			     2
+
+/* (0x1480)  DFC1_CTRL_W0 */
+#define MADERA_DFC1_RATE_MASK				0x007C
+#define MADERA_DFC1_RATE_SHIFT				     2
+#define MADERA_DFC1_RATE_WIDTH				     5
+#define MADERA_DFC1_DITH_ENA				0x0002
+#define MADERA_DFC1_DITH_ENA_MASK			0x0002
+#define MADERA_DFC1_DITH_ENA_SHIFT			     1
+#define MADERA_DFC1_DITH_ENA_WIDTH			     1
+#define MADERA_DFC1_ENA					0x0001
+#define MADERA_DFC1_ENA_MASK				0x0001
+#define MADERA_DFC1_ENA_SHIFT				     0
+#define MADERA_DFC1_ENA_WIDTH				     1
+
+/* (0x1482)  DFC1_RX_W0 */
+#define MADERA_DFC1_RX_DATA_WIDTH_MASK			0x1F00
+#define MADERA_DFC1_RX_DATA_WIDTH_SHIFT			     8
+#define MADERA_DFC1_RX_DATA_WIDTH_WIDTH			     5
+
+#define MADERA_DFC1_RX_DATA_TYPE_MASK			0x0007
+#define MADERA_DFC1_RX_DATA_TYPE_SHIFT			     0
+#define MADERA_DFC1_RX_DATA_TYPE_WIDTH			     3
+
+/* (0x1484)  DFC1_TX_W0 */
+#define MADERA_DFC1_TX_DATA_WIDTH_MASK			0x1F00
+#define MADERA_DFC1_TX_DATA_WIDTH_SHIFT			     8
+#define MADERA_DFC1_TX_DATA_WIDTH_WIDTH			     5
+
+#define MADERA_DFC1_TX_DATA_TYPE_MASK			0x0007
+#define MADERA_DFC1_TX_DATA_TYPE_SHIFT			     0
+#define MADERA_DFC1_TX_DATA_TYPE_WIDTH			     3
+
+/* (0x1600)  ADSP2_IRQ0 */
+#define MADERA_DSP_IRQ2					0x0002
+#define MADERA_DSP_IRQ1					0x0001
+
+/* (0x1601)  ADSP2_IRQ1 */
+#define MADERA_DSP_IRQ4					0x0002
+#define MADERA_DSP_IRQ3					0x0001
+
+/* (0x1602)  ADSP2_IRQ2 */
+#define MADERA_DSP_IRQ6					0x0002
+#define MADERA_DSP_IRQ5					0x0001
+
+/* (0x1603)  ADSP2_IRQ3 */
+#define MADERA_DSP_IRQ8					0x0002
+#define MADERA_DSP_IRQ7					0x0001
+
+/* (0x1604)  ADSP2_IRQ4 */
+#define MADERA_DSP_IRQ10				0x0002
+#define MADERA_DSP_IRQ9					0x0001
+
+/* (0x1605)  ADSP2_IRQ5 */
+#define MADERA_DSP_IRQ12				0x0002
+#define MADERA_DSP_IRQ11				0x0001
+
+/* (0x1606)  ADSP2_IRQ6 */
+#define MADERA_DSP_IRQ14				0x0002
+#define MADERA_DSP_IRQ13				0x0001
+
+/* (0x1607)  ADSP2_IRQ7 */
+#define MADERA_DSP_IRQ16				0x0002
+#define MADERA_DSP_IRQ15				0x0001
+
+/* (0x1700)  GPIO1_CTRL_1 */
+#define MADERA_GP1_LVL					0x8000
+#define MADERA_GP1_LVL_MASK				0x8000
+#define MADERA_GP1_LVL_SHIFT				    15
+#define MADERA_GP1_LVL_WIDTH				     1
+#define MADERA_GP1_OP_CFG				0x4000
+#define MADERA_GP1_OP_CFG_MASK				0x4000
+#define MADERA_GP1_OP_CFG_SHIFT				    14
+#define MADERA_GP1_OP_CFG_WIDTH				     1
+#define MADERA_GP1_DB					0x2000
+#define MADERA_GP1_DB_MASK				0x2000
+#define MADERA_GP1_DB_SHIFT				    13
+#define MADERA_GP1_DB_WIDTH				     1
+#define MADERA_GP1_POL					0x1000
+#define MADERA_GP1_POL_MASK				0x1000
+#define MADERA_GP1_POL_SHIFT				    12
+#define MADERA_GP1_POL_WIDTH				     1
+#define MADERA_GP1_IP_CFG				0x0800
+#define MADERA_GP1_IP_CFG_MASK				0x0800
+#define MADERA_GP1_IP_CFG_SHIFT				    11
+#define MADERA_GP1_IP_CFG_WIDTH				     1
+#define MADERA_GP1_FN_MASK				0x03FF
+#define MADERA_GP1_FN_SHIFT				     0
+#define MADERA_GP1_FN_WIDTH				    10
+
+/* (0x1701)  GPIO1_CTRL_2 */
+#define MADERA_GP1_DIR					0x8000
+#define MADERA_GP1_DIR_MASK				0x8000
+#define MADERA_GP1_DIR_SHIFT				    15
+#define MADERA_GP1_DIR_WIDTH				     1
+#define MADERA_GP1_PU					0x4000
+#define MADERA_GP1_PU_MASK				0x4000
+#define MADERA_GP1_PU_SHIFT				    14
+#define MADERA_GP1_PU_WIDTH				     1
+#define MADERA_GP1_PD					0x2000
+#define MADERA_GP1_PD_MASK				0x2000
+#define MADERA_GP1_PD_SHIFT				    13
+#define MADERA_GP1_PD_WIDTH				     1
+#define MADERA_GP1_DRV_STR_MASK				0x1800
+#define MADERA_GP1_DRV_STR_SHIFT			    11
+#define MADERA_GP1_DRV_STR_WIDTH			     2
+
+/* (0x1800)  IRQ1_Status_1 */
+#define MADERA_CTRLIF_ERR_EINT1				0x1000
+#define MADERA_CTRLIF_ERR_EINT1_MASK			0x1000
+#define MADERA_CTRLIF_ERR_EINT1_SHIFT			    12
+#define MADERA_CTRLIF_ERR_EINT1_WIDTH			     1
+#define MADERA_SYSCLK_FAIL_EINT1			0x0200
+#define MADERA_SYSCLK_FAIL_EINT1_MASK			0x0200
+#define MADERA_SYSCLK_FAIL_EINT1_SHIFT			     9
+#define MADERA_SYSCLK_FAIL_EINT1_WIDTH			     1
+#define MADERA_CLOCK_DETECT_EINT1			0x0100
+#define MADERA_CLOCK_DETECT_EINT1_MASK			0x0100
+#define MADERA_CLOCK_DETECT_EINT1_SHIFT			     8
+#define MADERA_CLOCK_DETECT_EINT1_WIDTH			     1
+#define MADERA_BOOT_DONE_EINT1				0x0080
+#define MADERA_BOOT_DONE_EINT1_MASK			0x0080
+#define MADERA_BOOT_DONE_EINT1_SHIFT			     7
+#define MADERA_BOOT_DONE_EINT1_WIDTH			     1
+
+/* (0x1801)  IRQ1_Status_2 */
+#define MADERA_FLLAO_LOCK_EINT1				0x0800
+#define MADERA_FLLAO_LOCK_EINT1_MASK			0x0800
+#define MADERA_FLLAO_LOCK_EINT1_SHIFT			    11
+#define MADERA_FLLAO_LOCK_EINT1_WIDTH			     1
+#define MADERA_FLL3_LOCK_EINT1				0x0400
+#define MADERA_FLL3_LOCK_EINT1_MASK			0x0400
+#define MADERA_FLL3_LOCK_EINT1_SHIFT			    10
+#define MADERA_FLL3_LOCK_EINT1_WIDTH			     1
+#define MADERA_FLL2_LOCK_EINT1				0x0200
+#define MADERA_FLL2_LOCK_EINT1_MASK			0x0200
+#define MADERA_FLL2_LOCK_EINT1_SHIFT			     9
+#define MADERA_FLL2_LOCK_EINT1_WIDTH			     1
+#define MADERA_FLL1_LOCK_EINT1				0x0100
+#define MADERA_FLL1_LOCK_EINT1_MASK			0x0100
+#define MADERA_FLL1_LOCK_EINT1_SHIFT			     8
+#define MADERA_FLL1_LOCK_EINT1_WIDTH			     1
+
+/* (0x1805)  IRQ1_Status_6 */
+#define MADERA_MICDET2_EINT1				0x0200
+#define MADERA_MICDET2_EINT1_MASK			0x0200
+#define MADERA_MICDET2_EINT1_SHIFT			     9
+#define MADERA_MICDET2_EINT1_WIDTH			     1
+#define MADERA_MICDET1_EINT1				0x0100
+#define MADERA_MICDET1_EINT1_MASK			0x0100
+#define MADERA_MICDET1_EINT1_SHIFT			     8
+#define MADERA_MICDET1_EINT1_WIDTH			     1
+#define MADERA_HPDET_EINT1				0x0001
+#define MADERA_HPDET_EINT1_MASK				0x0001
+#define MADERA_HPDET_EINT1_SHIFT			     0
+#define MADERA_HPDET_EINT1_WIDTH			     1
+
+/* (0x1806)  IRQ1_Status_7 */
+#define MADERA_MICD_CLAMP_FALL_EINT1			0x0020
+#define MADERA_MICD_CLAMP_FALL_EINT1_MASK		0x0020
+#define MADERA_MICD_CLAMP_FALL_EINT1_SHIFT		     5
+#define MADERA_MICD_CLAMP_FALL_EINT1_WIDTH		     1
+#define MADERA_MICD_CLAMP_RISE_EINT1			0x0010
+#define MADERA_MICD_CLAMP_RISE_EINT1_MASK		0x0010
+#define MADERA_MICD_CLAMP_RISE_EINT1_SHIFT		     4
+#define MADERA_MICD_CLAMP_RISE_EINT1_WIDTH		     1
+#define MADERA_JD2_FALL_EINT1				0x0008
+#define MADERA_JD2_FALL_EINT1_MASK			0x0008
+#define MADERA_JD2_FALL_EINT1_SHIFT			     3
+#define MADERA_JD2_FALL_EINT1_WIDTH			     1
+#define MADERA_JD2_RISE_EINT1				0x0004
+#define MADERA_JD2_RISE_EINT1_MASK			0x0004
+#define MADERA_JD2_RISE_EINT1_SHIFT			     2
+#define MADERA_JD2_RISE_EINT1_WIDTH			     1
+#define MADERA_JD1_FALL_EINT1				0x0002
+#define MADERA_JD1_FALL_EINT1_MASK			0x0002
+#define MADERA_JD1_FALL_EINT1_SHIFT			     1
+#define MADERA_JD1_FALL_EINT1_WIDTH			     1
+#define MADERA_JD1_RISE_EINT1				0x0001
+#define MADERA_JD1_RISE_EINT1_MASK			0x0001
+#define MADERA_JD1_RISE_EINT1_SHIFT			     0
+#define MADERA_JD1_RISE_EINT1_WIDTH			     1
+
+/* (0x1808)  IRQ1_Status_9 */
+#define MADERA_ASRC2_IN2_LOCK_EINT1			0x0800
+#define MADERA_ASRC2_IN2_LOCK_EINT1_MASK		0x0800
+#define MADERA_ASRC2_IN2_LOCK_EINT1_SHIFT		    11
+#define MADERA_ASRC2_IN2_LOCK_EINT1_WIDTH		     1
+#define MADERA_ASRC2_IN1_LOCK_EINT1			0x0400
+#define MADERA_ASRC2_IN1_LOCK_EINT1_MASK		0x0400
+#define MADERA_ASRC2_IN1_LOCK_EINT1_SHIFT		    10
+#define MADERA_ASRC2_IN1_LOCK_EINT1_WIDTH		     1
+#define MADERA_ASRC1_IN2_LOCK_EINT1			0x0200
+#define MADERA_ASRC1_IN2_LOCK_EINT1_MASK		0x0200
+#define MADERA_ASRC1_IN2_LOCK_EINT1_SHIFT		     9
+#define MADERA_ASRC1_IN2_LOCK_EINT1_WIDTH		     1
+#define MADERA_ASRC1_IN1_LOCK_EINT1			0x0100
+#define MADERA_ASRC1_IN1_LOCK_EINT1_MASK		0x0100
+#define MADERA_ASRC1_IN1_LOCK_EINT1_SHIFT		     8
+#define MADERA_ASRC1_IN1_LOCK_EINT1_WIDTH		     1
+#define MADERA_DRC2_SIG_DET_EINT1			0x0002
+#define MADERA_DRC2_SIG_DET_EINT1_MASK			0x0002
+#define MADERA_DRC2_SIG_DET_EINT1_SHIFT			     1
+#define MADERA_DRC2_SIG_DET_EINT1_WIDTH			     1
+#define MADERA_DRC1_SIG_DET_EINT1			0x0001
+#define MADERA_DRC1_SIG_DET_EINT1_MASK			0x0001
+#define MADERA_DRC1_SIG_DET_EINT1_SHIFT			     0
+#define MADERA_DRC1_SIG_DET_EINT1_WIDTH			     1
+
+/* (0x180A)  IRQ1_Status_11 */
+#define MADERA_DSP_IRQ16_EINT1				0x8000
+#define MADERA_DSP_IRQ16_EINT1_MASK			0x8000
+#define MADERA_DSP_IRQ16_EINT1_SHIFT			    15
+#define MADERA_DSP_IRQ16_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ15_EINT1				0x4000
+#define MADERA_DSP_IRQ15_EINT1_MASK			0x4000
+#define MADERA_DSP_IRQ15_EINT1_SHIFT			    14
+#define MADERA_DSP_IRQ15_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ14_EINT1				0x2000
+#define MADERA_DSP_IRQ14_EINT1_MASK			0x2000
+#define MADERA_DSP_IRQ14_EINT1_SHIFT			    13
+#define MADERA_DSP_IRQ14_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ13_EINT1				0x1000
+#define MADERA_DSP_IRQ13_EINT1_MASK			0x1000
+#define MADERA_DSP_IRQ13_EINT1_SHIFT			    12
+#define MADERA_DSP_IRQ13_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ12_EINT1				0x0800
+#define MADERA_DSP_IRQ12_EINT1_MASK			0x0800
+#define MADERA_DSP_IRQ12_EINT1_SHIFT			    11
+#define MADERA_DSP_IRQ12_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ11_EINT1				0x0400
+#define MADERA_DSP_IRQ11_EINT1_MASK			0x0400
+#define MADERA_DSP_IRQ11_EINT1_SHIFT			    10
+#define MADERA_DSP_IRQ11_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ10_EINT1				0x0200
+#define MADERA_DSP_IRQ10_EINT1_MASK			0x0200
+#define MADERA_DSP_IRQ10_EINT1_SHIFT			     9
+#define MADERA_DSP_IRQ10_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ9_EINT1				0x0100
+#define MADERA_DSP_IRQ9_EINT1_MASK			0x0100
+#define MADERA_DSP_IRQ9_EINT1_SHIFT			     8
+#define MADERA_DSP_IRQ9_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ8_EINT1				0x0080
+#define MADERA_DSP_IRQ8_EINT1_MASK			0x0080
+#define MADERA_DSP_IRQ8_EINT1_SHIFT			     7
+#define MADERA_DSP_IRQ8_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ7_EINT1				0x0040
+#define MADERA_DSP_IRQ7_EINT1_MASK			0x0040
+#define MADERA_DSP_IRQ7_EINT1_SHIFT			     6
+#define MADERA_DSP_IRQ7_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ6_EINT1				0x0020
+#define MADERA_DSP_IRQ6_EINT1_MASK			0x0020
+#define MADERA_DSP_IRQ6_EINT1_SHIFT			     5
+#define MADERA_DSP_IRQ6_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ5_EINT1				0x0010
+#define MADERA_DSP_IRQ5_EINT1_MASK			0x0010
+#define MADERA_DSP_IRQ5_EINT1_SHIFT			     4
+#define MADERA_DSP_IRQ5_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ4_EINT1				0x0008
+#define MADERA_DSP_IRQ4_EINT1_MASK			0x0008
+#define MADERA_DSP_IRQ4_EINT1_SHIFT			     3
+#define MADERA_DSP_IRQ4_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ3_EINT1				0x0004
+#define MADERA_DSP_IRQ3_EINT1_MASK			0x0004
+#define MADERA_DSP_IRQ3_EINT1_SHIFT			     2
+#define MADERA_DSP_IRQ3_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ2_EINT1				0x0002
+#define MADERA_DSP_IRQ2_EINT1_MASK			0x0002
+#define MADERA_DSP_IRQ2_EINT1_SHIFT			     1
+#define MADERA_DSP_IRQ2_EINT1_WIDTH			     1
+#define MADERA_DSP_IRQ1_EINT1				0x0001
+#define MADERA_DSP_IRQ1_EINT1_MASK			0x0001
+#define MADERA_DSP_IRQ1_EINT1_SHIFT			     0
+#define MADERA_DSP_IRQ1_EINT1_WIDTH			     1
+
+/* (0x180B)  IRQ1_Status_12 */
+#define MADERA_SPKOUTR_SC_EINT1				0x0080
+#define MADERA_SPKOUTR_SC_EINT1_MASK			0x0080
+#define MADERA_SPKOUTR_SC_EINT1_SHIFT			     7
+#define MADERA_SPKOUTR_SC_EINT1_WIDTH			     1
+#define MADERA_SPKOUTL_SC_EINT1				0x0040
+#define MADERA_SPKOUTL_SC_EINT1_MASK			0x0040
+#define MADERA_SPKOUTL_SC_EINT1_SHIFT			     6
+#define MADERA_SPKOUTL_SC_EINT1_WIDTH			     1
+#define MADERA_HP3R_SC_EINT1				0x0020
+#define MADERA_HP3R_SC_EINT1_MASK			0x0020
+#define MADERA_HP3R_SC_EINT1_SHIFT			     5
+#define MADERA_HP3R_SC_EINT1_WIDTH			     1
+#define MADERA_HP3L_SC_EINT1				0x0010
+#define MADERA_HP3L_SC_EINT1_MASK			0x0010
+#define MADERA_HP3L_SC_EINT1_SHIFT			     4
+#define MADERA_HP3L_SC_EINT1_WIDTH			     1
+#define MADERA_HP2R_SC_EINT1				0x0008
+#define MADERA_HP2R_SC_EINT1_MASK			0x0008
+#define MADERA_HP2R_SC_EINT1_SHIFT			     3
+#define MADERA_HP2R_SC_EINT1_WIDTH			     1
+#define MADERA_HP2L_SC_EINT1				0x0004
+#define MADERA_HP2L_SC_EINT1_MASK			0x0004
+#define MADERA_HP2L_SC_EINT1_SHIFT			     2
+#define MADERA_HP2L_SC_EINT1_WIDTH			     1
+#define MADERA_HP1R_SC_EINT1				0x0002
+#define MADERA_HP1R_SC_EINT1_MASK			0x0002
+#define MADERA_HP1R_SC_EINT1_SHIFT			     1
+#define MADERA_HP1R_SC_EINT1_WIDTH			     1
+#define MADERA_HP1L_SC_EINT1				0x0001
+#define MADERA_HP1L_SC_EINT1_MASK			0x0001
+#define MADERA_HP1L_SC_EINT1_SHIFT			     0
+#define MADERA_HP1L_SC_EINT1_WIDTH			     1
+
+/* (0x180E)  IRQ1_Status_15 */
+#define MADERA_SPK_OVERHEAT_WARN_EINT1			0x0004
+#define MADERA_SPK_OVERHEAT_WARN_EINT1_MASK		0x0004
+#define MADERA_SPK_OVERHEAT_WARN_EINT1_SHIFT		     2
+#define MADERA_SPK_OVERHEAT_WARN_EINT1_WIDTH		     1
+#define MADERA_SPK_OVERHEAT_EINT1			0x0002
+#define MADERA_SPK_OVERHEAT_EINT1_MASK			0x0002
+#define MADERA_SPK_OVERHEAT_EINT1_SHIFT			     1
+#define MADERA_SPK_OVERHEAT_EINT1_WIDTH			     1
+#define MADERA_SPK_SHUTDOWN_EINT1			0x0001
+#define MADERA_SPK_SHUTDOWN_EINT1_MASK			0x0001
+#define MADERA_SPK_SHUTDOWN_EINT1_SHIFT			     0
+#define MADERA_SPK_SHUTDOWN_EINT1_WIDTH			     1
+
+/* (0x1820) - IRQ1 Status 33 */
+#define MADERA_DSP7_BUS_ERR_EINT1			0x0040
+#define MADERA_DSP7_BUS_ERR_EINT1_MASK			0x0040
+#define MADERA_DSP7_BUS_ERR_EINT1_SHIFT			     6
+#define MADERA_DSP7_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP6_BUS_ERR_EINT1			0x0020
+#define MADERA_DSP6_BUS_ERR_EINT1_MASK			0x0020
+#define MADERA_DSP6_BUS_ERR_EINT1_SHIFT			     5
+#define MADERA_DSP6_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP5_BUS_ERR_EINT1			0x0010
+#define MADERA_DSP5_BUS_ERR_EINT1_MASK			0x0010
+#define MADERA_DSP5_BUS_ERR_EINT1_SHIFT			     4
+#define MADERA_DSP5_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP4_BUS_ERR_EINT1			0x0008
+#define MADERA_DSP4_BUS_ERR_EINT1_MASK			0x0008
+#define MADERA_DSP4_BUS_ERR_EINT1_SHIFT			     3
+#define MADERA_DSP4_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP3_BUS_ERR_EINT1			0x0004
+#define MADERA_DSP3_BUS_ERR_EINT1_MASK			0x0004
+#define MADERA_DSP3_BUS_ERR_EINT1_SHIFT			     2
+#define MADERA_DSP3_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP2_BUS_ERR_EINT1			0x0002
+#define MADERA_DSP2_BUS_ERR_EINT1_MASK			0x0002
+#define MADERA_DSP2_BUS_ERR_EINT1_SHIFT			     1
+#define MADERA_DSP2_BUS_ERR_EINT1_WIDTH			     1
+#define MADERA_DSP1_BUS_ERR_EINT1			0x0001
+#define MADERA_DSP1_BUS_ERR_EINT1_MASK			0x0001
+#define MADERA_DSP1_BUS_ERR_EINT1_SHIFT			     0
+#define MADERA_DSP1_BUS_ERR_EINT1_WIDTH			     1
+
+/* (0x184E)  IRQ1_Mask_15 */
+#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1		0x0004
+#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_MASK		0x0004
+#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_SHIFT		     2
+#define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_WIDTH		     1
+#define MADERA_IM_SPK_OVERHEAT_EINT1			0x0002
+#define MADERA_IM_SPK_OVERHEAT_EINT1_MASK		0x0002
+#define MADERA_IM_SPK_OVERHEAT_EINT1_SHIFT		     1
+#define MADERA_IM_SPK_OVERHEAT_EINT1_WIDTH		     1
+#define MADERA_IM_SPK_SHUTDOWN_EINT1			0x0001
+#define MADERA_IM_SPK_SHUTDOWN_EINT1_MASK		0x0001
+#define MADERA_IM_SPK_SHUTDOWN_EINT1_SHIFT		     0
+#define MADERA_IM_SPK_SHUTDOWN_EINT1_WIDTH		     1
+
+/* (0x1880) - IRQ1 Raw Status 1 */
+#define MADERA_CTRLIF_ERR_STS1				0x1000
+#define MADERA_CTRLIF_ERR_STS1_MASK			0x1000
+#define MADERA_CTRLIF_ERR_STS1_SHIFT			    12
+#define MADERA_CTRLIF_ERR_STS1_WIDTH			     1
+#define MADERA_SYSCLK_FAIL_STS1				0x0200
+#define MADERA_SYSCLK_FAIL_STS1_MASK			0x0200
+#define MADERA_SYSCLK_FAIL_STS1_SHIFT			     9
+#define MADERA_SYSCLK_FAIL_STS1_WIDTH			     1
+#define MADERA_CLOCK_DETECT_STS1			0x0100
+#define MADERA_CLOCK_DETECT_STS1_MASK			0x0100
+#define MADERA_CLOCK_DETECT_STS1_SHIFT			     8
+#define MADERA_CLOCK_DETECT_STS1_WIDTH			     1
+#define MADERA_BOOT_DONE_STS1				0x0080
+#define MADERA_BOOT_DONE_STS1_MASK			0x0080
+#define MADERA_BOOT_DONE_STS1_SHIFT			     7
+#define MADERA_BOOT_DONE_STS1_WIDTH			     1
+
+/* (0x1881) - IRQ1 Raw Status 2 */
+#define MADERA_FLL3_LOCK_STS1				0x0400
+#define MADERA_FLL3_LOCK_STS1_MASK			0x0400
+#define MADERA_FLL3_LOCK_STS1_SHIFT			    10
+#define MADERA_FLL3_LOCK_STS1_WIDTH			     1
+#define MADERA_FLL2_LOCK_STS1				0x0200
+#define MADERA_FLL2_LOCK_STS1_MASK			0x0200
+#define MADERA_FLL2_LOCK_STS1_SHIFT			     9
+#define MADERA_FLL2_LOCK_STS1_WIDTH			     1
+#define MADERA_FLL1_LOCK_STS1				0x0100
+#define MADERA_FLL1_LOCK_STS1_MASK			0x0100
+#define MADERA_FLL1_LOCK_STS1_SHIFT			     8
+#define MADERA_FLL1_LOCK_STS1_WIDTH			     1
+
+/* (0x1886) - IRQ1 Raw Status 7 */
+#define MADERA_MICD_CLAMP_FALL_STS1			0x0020
+#define MADERA_MICD_CLAMP_FALL_STS1_MASK		0x0020
+#define MADERA_MICD_CLAMP_FALL_STS1_SHIFT		     5
+#define MADERA_MICD_CLAMP_FALL_STS1_WIDTH		     1
+#define MADERA_MICD_CLAMP_RISE_STS1			0x0010
+#define MADERA_MICD_CLAMP_RISE_STS1_MASK		0x0010
+#define MADERA_MICD_CLAMP_RISE_STS1_SHIFT		     4
+#define MADERA_MICD_CLAMP_RISE_STS1_WIDTH		     1
+#define MADERA_JD2_FALL_STS1				0x0008
+#define MADERA_JD2_FALL_STS1_MASK			0x0008
+#define MADERA_JD2_FALL_STS1_SHIFT			     3
+#define MADERA_JD2_FALL_STS1_WIDTH			     1
+#define MADERA_JD2_RISE_STS1				0x0004
+#define MADERA_JD2_RISE_STS1_MASK			0x0004
+#define MADERA_JD2_RISE_STS1_SHIFT			     2
+#define MADERA_JD2_RISE_STS1_WIDTH			     1
+#define MADERA_JD1_FALL_STS1				0x0002
+#define MADERA_JD1_FALL_STS1_MASK			0x0002
+#define MADERA_JD1_FALL_STS1_SHIFT			     1
+#define MADERA_JD1_FALL_STS1_WIDTH			     1
+#define MADERA_JD1_RISE_STS1				0x0001
+#define MADERA_JD1_RISE_STS1_MASK			0x0001
+#define MADERA_JD1_RISE_STS1_SHIFT			     0
+#define MADERA_JD1_RISE_STS1_WIDTH			     1
+
+/* (0x188E) - IRQ1 Raw Status 15 */
+#define MADERA_SPK_OVERHEAT_WARN_STS1			0x0004
+#define MADERA_SPK_OVERHEAT_WARN_STS1_MASK		0x0004
+#define MADERA_SPK_OVERHEAT_WARN_STS1_SHIFT		     2
+#define MADERA_SPK_OVERHEAT_WARN_STS1_WIDTH		     1
+#define MADERA_SPK_OVERHEAT_STS1			0x0002
+#define MADERA_SPK_OVERHEAT_STS1_MASK			0x0002
+#define MADERA_SPK_OVERHEAT_STS1_SHIFT			     1
+#define MADERA_SPK_OVERHEAT_STS1_WIDTH			     1
+#define MADERA_SPK_SHUTDOWN_STS1			0x0001
+#define MADERA_SPK_SHUTDOWN_STS1_MASK			0x0001
+#define MADERA_SPK_SHUTDOWN_STS1_SHIFT			     0
+#define MADERA_SPK_SHUTDOWN_STS1_WIDTH			     1
+
+/* (0x1A06)  Interrupt_Debounce_7 */
+#define MADERA_MICD_CLAMP_DB				0x0010
+#define MADERA_MICD_CLAMP_DB_MASK			0x0010
+#define MADERA_MICD_CLAMP_DB_SHIFT			     4
+#define MADERA_MICD_CLAMP_DB_WIDTH			     1
+#define MADERA_JD2_DB					0x0004
+#define MADERA_JD2_DB_MASK				0x0004
+#define MADERA_JD2_DB_SHIFT				     2
+#define MADERA_JD2_DB_WIDTH				     1
+#define MADERA_JD1_DB					0x0001
+#define MADERA_JD1_DB_MASK				0x0001
+#define MADERA_JD1_DB_SHIFT				     0
+#define MADERA_JD1_DB_WIDTH				     1
+
+/* (0x1A0E)  Interrupt_Debounce_15 */
+#define MADERA_SPK_OVERHEAT_WARN_DB			0x0004
+#define MADERA_SPK_OVERHEAT_WARN_DB_MASK		0x0004
+#define MADERA_SPK_OVERHEAT_WARN_DB_SHIFT		     2
+#define MADERA_SPK_OVERHEAT_WARN_DB_WIDTH		     1
+#define MADERA_SPK_OVERHEAT_DB				0x0002
+#define MADERA_SPK_OVERHEAT_DB_MASK			0x0002
+#define MADERA_SPK_OVERHEAT_DB_SHIFT			     1
+#define MADERA_SPK_OVERHEAT_DB_WIDTH			     1
+
+/* (0x1A80)  IRQ1_CTRL */
+#define MADERA_IM_IRQ1					0x0800
+#define MADERA_IM_IRQ1_MASK				0x0800
+#define MADERA_IM_IRQ1_SHIFT				    11
+#define MADERA_IM_IRQ1_WIDTH				     1
+#define MADERA_IRQ_POL					0x0400
+#define MADERA_IRQ_POL_MASK				0x0400
+#define MADERA_IRQ_POL_SHIFT				    10
+#define MADERA_IRQ_POL_WIDTH				     1
+
+/* (0x20004)  OTP_HPDET_Cal_1 */
+#define MADERA_OTP_HPDET_CALIB_OFFSET_11	    0xFF000000
+#define MADERA_OTP_HPDET_CALIB_OFFSET_11_MASK	    0xFF000000
+#define MADERA_OTP_HPDET_CALIB_OFFSET_11_SHIFT		    24
+#define MADERA_OTP_HPDET_CALIB_OFFSET_11_WIDTH		     8
+#define MADERA_OTP_HPDET_CALIB_OFFSET_10	    0x00FF0000
+#define MADERA_OTP_HPDET_CALIB_OFFSET_10_MASK	    0x00FF0000
+#define MADERA_OTP_HPDET_CALIB_OFFSET_10_SHIFT		    16
+#define MADERA_OTP_HPDET_CALIB_OFFSET_10_WIDTH		     8
+#define MADERA_OTP_HPDET_CALIB_OFFSET_01	    0x0000FF00
+#define MADERA_OTP_HPDET_CALIB_OFFSET_01_MASK	    0x0000FF00
+#define MADERA_OTP_HPDET_CALIB_OFFSET_01_SHIFT		     8
+#define MADERA_OTP_HPDET_CALIB_OFFSET_01_WIDTH		     8
+#define MADERA_OTP_HPDET_CALIB_OFFSET_00	    0x000000FF
+#define MADERA_OTP_HPDET_CALIB_OFFSET_00_MASK	    0x000000FF
+#define MADERA_OTP_HPDET_CALIB_OFFSET_00_SHIFT		     0
+#define MADERA_OTP_HPDET_CALIB_OFFSET_00_WIDTH		     8
+
+/* (0x20006)  OTP_HPDET_Cal_2 */
+#define MADERA_OTP_HPDET_GRADIENT_1X		    0x0000FF00
+#define MADERA_OTP_HPDET_GRADIENT_1X_MASK	    0x0000FF00
+#define MADERA_OTP_HPDET_GRADIENT_1X_SHIFT		     8
+#define MADERA_OTP_HPDET_GRADIENT_1X_WIDTH		     8
+#define MADERA_OTP_HPDET_GRADIENT_0X		    0x000000FF
+#define MADERA_OTP_HPDET_GRADIENT_0X_MASK	    0x000000FF
+#define MADERA_OTP_HPDET_GRADIENT_0X_SHIFT		     0
+#define MADERA_OTP_HPDET_GRADIENT_0X_WIDTH		     8
+
+#endif
-- 
cgit v1.2.3


From 16b27467f46c1e0dbf093f53971aeb5decbaff4e Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 21 May 2018 10:59:56 +0100
Subject: mfd: madera: Add common support for Cirrus Logic Madera codecs

This adds the generic core support for Cirrus Logic "Madera" class codecs.
These are complex audio codec SoCs with a variety of digital and analogue
I/O, onboard audio processing and DSPs, and other features.

These codecs are all based off a common set of hardware IP so can be
supported by a core of common code (with a few minor device-to-device
variations).

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Nikesh Oswal <Nikesh.Oswal@cirrus.com>
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 MAINTAINERS                      |   3 +
 drivers/mfd/Kconfig              |  29 ++
 drivers/mfd/Makefile             |   5 +
 drivers/mfd/madera-core.c        | 609 +++++++++++++++++++++++++++++++++++++++
 drivers/mfd/madera-i2c.c         | 140 +++++++++
 drivers/mfd/madera-spi.c         | 139 +++++++++
 drivers/mfd/madera.h             |  44 +++
 include/linux/mfd/madera/core.h  | 187 ++++++++++++
 include/linux/mfd/madera/pdata.h |  59 ++++
 9 files changed, 1215 insertions(+)
 create mode 100644 drivers/mfd/madera-core.c
 create mode 100644 drivers/mfd/madera-i2c.c
 create mode 100644 drivers/mfd/madera-spi.c
 create mode 100644 drivers/mfd/madera.h
 create mode 100644 include/linux/mfd/madera/core.h
 create mode 100644 include/linux/mfd/madera/pdata.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 6e5bb62acea2..7b8e857d6b33 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3487,7 +3487,10 @@ L:	patches@opensource.cirrus.com
 T:	git https://github.com/CirrusLogic/linux-drivers.git
 W:	https://github.com/CirrusLogic/linux-drivers/wiki
 S:	Supported
+F:	Documentation/devicetree/bindings/mfd/madera.txt
 F:	include/linux/mfd/madera/*
+F:	drivers/mfd/madera*
+F:	drivers/mfd/cs47l*
 
 CLEANCACHE API
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b860eb5aa194..f5ca392f8bc2 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -232,6 +232,35 @@ config MFD_CROS_EC_CHARDEV
           If you have a supported Chromebook, choose Y or M here.
           The module will be called cros_ec_dev.
 
+config MFD_MADERA
+	tristate "Cirrus Logic Madera codecs"
+	select MFD_CORE
+	select REGMAP
+	select REGMAP_IRQ
+	select MADERA_IRQ
+	select PINCTRL
+	select PINCTRL_MADERA
+	help
+	  Support for the Cirrus Logic Madera platform audio codecs
+
+config MFD_MADERA_I2C
+	tristate "Cirrus Logic Madera codecs with I2C"
+	depends on MFD_MADERA
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  Support for the Cirrus Logic Madera platform audio SoC
+	  core functionality controlled via I2C.
+
+config MFD_MADERA_SPI
+	tristate "Cirrus Logic Madera codecs with SPI"
+	depends on MFD_MADERA
+	depends on SPI_MASTER
+	select REGMAP_SPI
+	help
+	  Support for the Cirrus Logic Madera platform audio SoC
+	  core functionality controlled via SPI.
+
 config MFD_ASIC3
 	bool "Compaq ASIC3"
 	depends on GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d9d2cf0d32ef..0a89a6a6d793 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -73,6 +73,11 @@ wm8994-objs			:= wm8994-core.o wm8994-irq.o wm8994-regmap.o
 obj-$(CONFIG_MFD_WM8994)	+= wm8994.o
 obj-$(CONFIG_MFD_WM97xx)	+= wm97xx-core.o
 
+madera-objs			:= madera-core.o
+obj-$(CONFIG_MFD_MADERA)	+= madera.o
+obj-$(CONFIG_MFD_MADERA_I2C)	+= madera-i2c.o
+obj-$(CONFIG_MFD_MADERA_SPI)	+= madera-spi.o
+
 obj-$(CONFIG_TPS6105X)		+= tps6105x.o
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_TPS6507X)		+= tps6507x.o
diff --git a/drivers/mfd/madera-core.c b/drivers/mfd/madera-core.c
new file mode 100644
index 000000000000..8cfea969b060
--- /dev/null
+++ b/drivers/mfd/madera-core.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Core MFD support for Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+
+#include <linux/mfd/madera/core.h>
+#include <linux/mfd/madera/registers.h>
+
+#include "madera.h"
+
+#define CS47L35_SILICON_ID	0x6360
+#define CS47L85_SILICON_ID	0x6338
+#define CS47L90_SILICON_ID	0x6364
+
+#define MADERA_32KZ_MCLK2	1
+
+static const char * const madera_core_supplies[] = {
+	"AVDD",
+	"DBVDD1",
+};
+
+static const struct mfd_cell madera_ldo1_devs[] = {
+	{ .name = "madera-ldo1" },
+};
+
+static const char * const cs47l35_supplies[] = {
+	"MICVDD",
+	"DBVDD2",
+	"CPVDD1",
+	"CPVDD2",
+	"SPKVDD",
+};
+
+static const struct mfd_cell cs47l35_devs[] = {
+	{ .name = "madera-pinctrl", },
+	{ .name = "madera-irq", },
+	{ .name = "madera-micsupp", },
+	{ .name = "madera-gpio", },
+	{ .name = "madera-extcon", },
+	{
+		.name = "cs47l35-codec",
+		.parent_supplies = cs47l35_supplies,
+		.num_parent_supplies = ARRAY_SIZE(cs47l35_supplies),
+	},
+};
+
+static const char * const cs47l85_supplies[] = {
+	"MICVDD",
+	"DBVDD2",
+	"DBVDD3",
+	"DBVDD4",
+	"CPVDD1",
+	"CPVDD2",
+	"SPKVDDL",
+	"SPKVDDR",
+};
+
+static const struct mfd_cell cs47l85_devs[] = {
+	{ .name = "madera-pinctrl", },
+	{ .name = "madera-irq", },
+	{ .name = "madera-micsupp" },
+	{ .name = "madera-gpio", },
+	{ .name = "madera-extcon", },
+	{
+		.name = "cs47l85-codec",
+		.parent_supplies = cs47l85_supplies,
+		.num_parent_supplies = ARRAY_SIZE(cs47l85_supplies),
+	},
+};
+
+static const char * const cs47l90_supplies[] = {
+	"MICVDD",
+	"DBVDD2",
+	"DBVDD3",
+	"DBVDD4",
+	"CPVDD1",
+	"CPVDD2",
+};
+
+static const struct mfd_cell cs47l90_devs[] = {
+	{ .name = "madera-pinctrl", },
+	{ .name = "madera-irq", },
+	{ .name = "madera-micsupp", },
+	{ .name = "madera-gpio", },
+	{ .name = "madera-extcon", },
+	{
+		.name = "cs47l90-codec",
+		.parent_supplies = cs47l90_supplies,
+		.num_parent_supplies = ARRAY_SIZE(cs47l90_supplies),
+	},
+};
+
+/* Used by madera-i2c and madera-spi drivers */
+const char *madera_name_from_type(enum madera_type type)
+{
+	switch (type) {
+	case CS47L35:
+		return "CS47L35";
+	case CS47L85:
+		return "CS47L85";
+	case CS47L90:
+		return "CS47L90";
+	case CS47L91:
+		return "CS47L91";
+	case WM1840:
+		return "WM1840";
+	default:
+		return "Unknown";
+	}
+}
+EXPORT_SYMBOL_GPL(madera_name_from_type);
+
+#define MADERA_BOOT_POLL_MAX_INTERVAL_US  5000
+#define MADERA_BOOT_POLL_TIMEOUT_US	 25000
+
+static int madera_wait_for_boot(struct madera *madera)
+{
+	unsigned int val;
+	int ret;
+
+	/*
+	 * We can't use an interrupt as we need to runtime resume to do so,
+	 * so we poll the status bit. This won't race with the interrupt
+	 * handler because it will be blocked on runtime resume.
+	 */
+	ret = regmap_read_poll_timeout(madera->regmap,
+				       MADERA_IRQ1_RAW_STATUS_1,
+				       val,
+				       (val & MADERA_BOOT_DONE_STS1),
+				       MADERA_BOOT_POLL_MAX_INTERVAL_US,
+				       MADERA_BOOT_POLL_TIMEOUT_US);
+
+	if (ret)
+		dev_err(madera->dev, "Polling BOOT_DONE_STS failed: %d\n", ret);
+
+	/*
+	 * BOOT_DONE defaults to unmasked on boot so we must ack it.
+	 * Do this unconditionally to avoid interrupt storms.
+	 */
+	regmap_write(madera->regmap, MADERA_IRQ1_STATUS_1,
+		     MADERA_BOOT_DONE_EINT1);
+
+	pm_runtime_mark_last_busy(madera->dev);
+
+	return ret;
+}
+
+static int madera_soft_reset(struct madera *madera)
+{
+	int ret;
+
+	ret = regmap_write(madera->regmap, MADERA_SOFTWARE_RESET, 0);
+	if (ret != 0) {
+		dev_err(madera->dev, "Failed to soft reset device: %d\n", ret);
+		return ret;
+	}
+
+	/* Allow time for internal clocks to startup after reset */
+	usleep_range(1000, 2000);
+
+	return 0;
+}
+
+static void madera_enable_hard_reset(struct madera *madera)
+{
+	if (!madera->pdata.reset)
+		return;
+
+	/*
+	 * There are many existing out-of-tree users of these codecs that we
+	 * can't break so preserve the expected behaviour of setting the line
+	 * low to assert reset.
+	 */
+	gpiod_set_raw_value_cansleep(madera->pdata.reset, 0);
+}
+
+static void madera_disable_hard_reset(struct madera *madera)
+{
+	if (!madera->pdata.reset)
+		return;
+
+	gpiod_set_raw_value_cansleep(madera->pdata.reset, 1);
+	usleep_range(1000, 2000);
+}
+
+static int __maybe_unused madera_runtime_resume(struct device *dev)
+{
+	struct madera *madera = dev_get_drvdata(dev);
+	int ret;
+
+	dev_dbg(dev, "Leaving sleep mode\n");
+
+	ret = regulator_enable(madera->dcvdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable DCVDD: %d\n", ret);
+		return ret;
+	}
+
+	regcache_cache_only(madera->regmap, false);
+	regcache_cache_only(madera->regmap_32bit, false);
+
+	ret = madera_wait_for_boot(madera);
+	if (ret)
+		goto err;
+
+	ret = regcache_sync(madera->regmap);
+	if (ret) {
+		dev_err(dev, "Failed to restore 16-bit register cache\n");
+		goto err;
+	}
+
+	ret = regcache_sync(madera->regmap_32bit);
+	if (ret) {
+		dev_err(dev, "Failed to restore 32-bit register cache\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	regcache_cache_only(madera->regmap_32bit, true);
+	regcache_cache_only(madera->regmap, true);
+	regulator_disable(madera->dcvdd);
+
+	return ret;
+}
+
+static int __maybe_unused madera_runtime_suspend(struct device *dev)
+{
+	struct madera *madera = dev_get_drvdata(dev);
+
+	dev_dbg(madera->dev, "Entering sleep mode\n");
+
+	regcache_cache_only(madera->regmap, true);
+	regcache_mark_dirty(madera->regmap);
+	regcache_cache_only(madera->regmap_32bit, true);
+	regcache_mark_dirty(madera->regmap_32bit);
+
+	regulator_disable(madera->dcvdd);
+
+	return 0;
+}
+
+const struct dev_pm_ops madera_pm_ops = {
+	SET_RUNTIME_PM_OPS(madera_runtime_suspend,
+			   madera_runtime_resume,
+			   NULL)
+};
+EXPORT_SYMBOL_GPL(madera_pm_ops);
+
+const struct of_device_id madera_of_match[] = {
+	{ .compatible = "cirrus,cs47l35", .data = (void *)CS47L35 },
+	{ .compatible = "cirrus,cs47l85", .data = (void *)CS47L85 },
+	{ .compatible = "cirrus,cs47l90", .data = (void *)CS47L90 },
+	{ .compatible = "cirrus,cs47l91", .data = (void *)CS47L91 },
+	{ .compatible = "cirrus,wm1840", .data = (void *)WM1840 },
+	{}
+};
+EXPORT_SYMBOL_GPL(madera_of_match);
+
+static int madera_get_reset_gpio(struct madera *madera)
+{
+	struct gpio_desc *reset;
+	int ret;
+
+	if (madera->pdata.reset)
+		return 0;
+
+	reset = devm_gpiod_get_optional(madera->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(reset)) {
+		ret = PTR_ERR(reset);
+		if (ret != -EPROBE_DEFER)
+			dev_err(madera->dev, "Failed to request /RESET: %d\n",
+				ret);
+		return ret;
+	}
+
+	/*
+	 * A hard reset is needed for full reset of the chip. We allow running
+	 * without hard reset only because it can be useful for early
+	 * prototyping and some debugging, but we need to warn it's not ideal.
+	 */
+	if (!reset)
+		dev_warn(madera->dev,
+			 "Running without reset GPIO is not recommended\n");
+
+	madera->pdata.reset = reset;
+
+	return 0;
+}
+
+static void madera_set_micbias_info(struct madera *madera)
+{
+	/*
+	 * num_childbias is an array because future codecs can have different
+	 * childbiases for each micbias. Unspecified values default to 0.
+	 */
+	switch (madera->type) {
+	case CS47L35:
+		madera->num_micbias = 2;
+		madera->num_childbias[0] = 2;
+		madera->num_childbias[1] = 2;
+		return;
+	case CS47L85:
+	case WM1840:
+		madera->num_micbias = 4;
+		/* no child biases */
+		return;
+	case CS47L90:
+	case CS47L91:
+		madera->num_micbias = 2;
+		madera->num_childbias[0] = 4;
+		madera->num_childbias[1] = 4;
+		return;
+	default:
+		return;
+	}
+}
+
+int madera_dev_init(struct madera *madera)
+{
+	struct device *dev = madera->dev;
+	unsigned int hwid;
+	int (*patch_fn)(struct madera *) = NULL;
+	const struct mfd_cell *mfd_devs;
+	int n_devs = 0;
+	int i, ret;
+
+	dev_set_drvdata(madera->dev, madera);
+	BLOCKING_INIT_NOTIFIER_HEAD(&madera->notifier);
+	madera_set_micbias_info(madera);
+
+	/*
+	 * We need writable hw config info that all children can share.
+	 * Simplest to take one shared copy of pdata struct.
+	 */
+	if (dev_get_platdata(madera->dev)) {
+		memcpy(&madera->pdata, dev_get_platdata(madera->dev),
+		       sizeof(madera->pdata));
+	}
+
+	ret = madera_get_reset_gpio(madera);
+	if (ret)
+		return ret;
+
+	regcache_cache_only(madera->regmap, true);
+	regcache_cache_only(madera->regmap_32bit, true);
+
+	for (i = 0; i < ARRAY_SIZE(madera_core_supplies); i++)
+		madera->core_supplies[i].supply = madera_core_supplies[i];
+
+	madera->num_core_supplies = ARRAY_SIZE(madera_core_supplies);
+
+	/*
+	 * On some codecs DCVDD could be supplied by the internal LDO1.
+	 * For those we must add the LDO1 driver before requesting DCVDD
+	 * No devm_ because we need to control shutdown order of children.
+	 */
+	switch (madera->type) {
+	case CS47L35:
+	case CS47L90:
+	case CS47L91:
+		break;
+	case CS47L85:
+	case WM1840:
+		ret = mfd_add_devices(madera->dev, PLATFORM_DEVID_NONE,
+				      madera_ldo1_devs,
+				      ARRAY_SIZE(madera_ldo1_devs),
+				      NULL, 0, NULL);
+		if (ret) {
+			dev_err(dev, "Failed to add LDO1 child: %d\n", ret);
+			return ret;
+		}
+		break;
+	default:
+		/* No point continuing if the type is unknown */
+		dev_err(madera->dev, "Unknown device type %d\n", madera->type);
+		return -ENODEV;
+	}
+
+	ret = devm_regulator_bulk_get(dev, madera->num_core_supplies,
+				      madera->core_supplies);
+	if (ret) {
+		dev_err(dev, "Failed to request core supplies: %d\n", ret);
+		goto err_devs;
+	}
+
+	/*
+	 * Don't use devres here. If the regulator is one of our children it
+	 * will already have been removed before devres cleanup on this mfd
+	 * driver tries to call put() on it. We need control of shutdown order.
+	 */
+	madera->dcvdd = regulator_get(madera->dev, "DCVDD");
+	if (IS_ERR(madera->dcvdd)) {
+		ret = PTR_ERR(madera->dcvdd);
+		dev_err(dev, "Failed to request DCVDD: %d\n", ret);
+		goto err_devs;
+	}
+
+	ret = regulator_bulk_enable(madera->num_core_supplies,
+				    madera->core_supplies);
+	if (ret) {
+		dev_err(dev, "Failed to enable core supplies: %d\n", ret);
+		goto err_dcvdd;
+	}
+
+	ret = regulator_enable(madera->dcvdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable DCVDD: %d\n", ret);
+		goto err_enable;
+	}
+
+	madera_disable_hard_reset(madera);
+
+	regcache_cache_only(madera->regmap, false);
+	regcache_cache_only(madera->regmap_32bit, false);
+
+	/*
+	 * Now we can power up and verify that this is a chip we know about
+	 * before we start doing any writes to its registers.
+	 */
+	ret = regmap_read(madera->regmap, MADERA_SOFTWARE_RESET, &hwid);
+	if (ret) {
+		dev_err(dev, "Failed to read ID register: %d\n", ret);
+		goto err_reset;
+	}
+
+	switch (hwid) {
+	case CS47L35_SILICON_ID:
+		if (IS_ENABLED(CONFIG_MFD_CS47L35)) {
+			switch (madera->type) {
+			case CS47L35:
+				patch_fn = cs47l35_patch;
+				mfd_devs = cs47l35_devs;
+				n_devs = ARRAY_SIZE(cs47l35_devs);
+				break;
+			default:
+				break;
+			}
+		}
+		break;
+	case CS47L85_SILICON_ID:
+		if (IS_ENABLED(CONFIG_MFD_CS47L85)) {
+			switch (madera->type) {
+			case CS47L85:
+			case WM1840:
+				patch_fn = cs47l85_patch;
+				mfd_devs = cs47l85_devs;
+				n_devs = ARRAY_SIZE(cs47l85_devs);
+				break;
+			default:
+				break;
+			}
+		}
+		break;
+	case CS47L90_SILICON_ID:
+		if (IS_ENABLED(CONFIG_MFD_CS47L90)) {
+			switch (madera->type) {
+			case CS47L90:
+			case CS47L91:
+				patch_fn = cs47l90_patch;
+				mfd_devs = cs47l90_devs;
+				n_devs = ARRAY_SIZE(cs47l90_devs);
+				break;
+			default:
+				break;
+			}
+		}
+		break;
+	default:
+		dev_err(madera->dev, "Unknown device ID: %x\n", hwid);
+		ret = -EINVAL;
+		goto err_reset;
+	}
+
+	if (!n_devs) {
+		dev_err(madera->dev, "Device ID 0x%x not a %s\n", hwid,
+			madera->type_name);
+		ret = -ENODEV;
+		goto err_reset;
+	}
+
+	/*
+	 * It looks like a device we support. If we don't have a hard reset
+	 * we can now attempt a soft reset.
+	 */
+	if (!madera->pdata.reset) {
+		ret = madera_soft_reset(madera);
+		if (ret)
+			goto err_reset;
+	}
+
+	ret = madera_wait_for_boot(madera);
+	if (ret) {
+		dev_err(madera->dev, "Device failed initial boot: %d\n", ret);
+		goto err_reset;
+	}
+
+	ret = regmap_read(madera->regmap, MADERA_HARDWARE_REVISION,
+			  &madera->rev);
+	if (ret) {
+		dev_err(dev, "Failed to read revision register: %d\n", ret);
+		goto err_reset;
+	}
+	madera->rev &= MADERA_HW_REVISION_MASK;
+
+	dev_info(dev, "%s silicon revision %d\n", madera->type_name,
+		 madera->rev);
+
+	/* Apply hardware patch */
+	if (patch_fn) {
+		ret = patch_fn(madera);
+		if (ret) {
+			dev_err(madera->dev, "Failed to apply patch %d\n", ret);
+			goto err_reset;
+		}
+	}
+
+	/* Init 32k clock sourced from MCLK2 */
+	ret = regmap_update_bits(madera->regmap,
+			MADERA_CLOCK_32K_1,
+			MADERA_CLK_32K_ENA_MASK | MADERA_CLK_32K_SRC_MASK,
+			MADERA_CLK_32K_ENA | MADERA_32KZ_MCLK2);
+	if (ret) {
+		dev_err(madera->dev, "Failed to init 32k clock: %d\n", ret);
+		goto err_reset;
+	}
+
+	pm_runtime_set_active(madera->dev);
+	pm_runtime_enable(madera->dev);
+	pm_runtime_set_autosuspend_delay(madera->dev, 100);
+	pm_runtime_use_autosuspend(madera->dev);
+
+	/* No devm_ because we need to control shutdown order of children */
+	ret = mfd_add_devices(madera->dev, PLATFORM_DEVID_NONE,
+			      mfd_devs, n_devs,
+			      NULL, 0, NULL);
+	if (ret) {
+		dev_err(madera->dev, "Failed to add subdevices: %d\n", ret);
+		goto err_pm_runtime;
+	}
+
+	return 0;
+
+err_pm_runtime:
+	pm_runtime_disable(madera->dev);
+err_reset:
+	madera_enable_hard_reset(madera);
+	regulator_disable(madera->dcvdd);
+err_enable:
+	regulator_bulk_disable(madera->num_core_supplies,
+			       madera->core_supplies);
+err_dcvdd:
+	regulator_put(madera->dcvdd);
+err_devs:
+	mfd_remove_devices(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(madera_dev_init);
+
+int madera_dev_exit(struct madera *madera)
+{
+	/* Prevent any IRQs being serviced while we clean up */
+	disable_irq(madera->irq);
+
+	/*
+	 * DCVDD could be supplied by a child node, we must disable it before
+	 * removing the children, and prevent PM runtime from turning it back on
+	 */
+	pm_runtime_disable(madera->dev);
+
+	regulator_disable(madera->dcvdd);
+	regulator_put(madera->dcvdd);
+
+	mfd_remove_devices(madera->dev);
+	madera_enable_hard_reset(madera);
+
+	regulator_bulk_disable(madera->num_core_supplies,
+			       madera->core_supplies);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(madera_dev_exit);
+
+MODULE_DESCRIPTION("Madera core MFD driver");
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/madera-i2c.c b/drivers/mfd/madera-i2c.c
new file mode 100644
index 000000000000..05ae94be01d8
--- /dev/null
+++ b/drivers/mfd/madera-i2c.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I2C bus interface to Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/madera/core.h>
+
+#include "madera.h"
+
+static int madera_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct madera *madera;
+	const struct regmap_config *regmap_16bit_config = NULL;
+	const struct regmap_config *regmap_32bit_config = NULL;
+	const void *of_data;
+	unsigned long type;
+	const char *name;
+	int ret;
+
+	of_data = of_device_get_match_data(&i2c->dev);
+	if (of_data)
+		type = (unsigned long)of_data;
+	else
+		type = id->driver_data;
+
+	switch (type) {
+	case CS47L35:
+		if (IS_ENABLED(CONFIG_MFD_CS47L35)) {
+			regmap_16bit_config = &cs47l35_16bit_i2c_regmap;
+			regmap_32bit_config = &cs47l35_32bit_i2c_regmap;
+		}
+		break;
+	case CS47L85:
+	case WM1840:
+		if (IS_ENABLED(CONFIG_MFD_CS47L85)) {
+			regmap_16bit_config = &cs47l85_16bit_i2c_regmap;
+			regmap_32bit_config = &cs47l85_32bit_i2c_regmap;
+		}
+		break;
+	case CS47L90:
+	case CS47L91:
+		if (IS_ENABLED(CONFIG_MFD_CS47L90)) {
+			regmap_16bit_config = &cs47l90_16bit_i2c_regmap;
+			regmap_32bit_config = &cs47l90_32bit_i2c_regmap;
+		}
+		break;
+	default:
+		dev_err(&i2c->dev,
+			"Unknown Madera I2C device type %ld\n", type);
+		return -EINVAL;
+	}
+
+	name = madera_name_from_type(type);
+
+	if (!regmap_16bit_config) {
+		/* it's polite to say which codec isn't built into the kernel */
+		dev_err(&i2c->dev,
+			"Kernel does not include support for %s\n", name);
+		return -EINVAL;
+	}
+
+	madera = devm_kzalloc(&i2c->dev, sizeof(*madera), GFP_KERNEL);
+	if (!madera)
+		return -ENOMEM;
+
+
+	madera->regmap = devm_regmap_init_i2c(i2c, regmap_16bit_config);
+	if (IS_ERR(madera->regmap)) {
+		ret = PTR_ERR(madera->regmap);
+		dev_err(&i2c->dev,
+			"Failed to allocate 16-bit register map: %d\n",	ret);
+		return ret;
+	}
+
+	madera->regmap_32bit = devm_regmap_init_i2c(i2c, regmap_32bit_config);
+	if (IS_ERR(madera->regmap_32bit)) {
+		ret = PTR_ERR(madera->regmap_32bit);
+		dev_err(&i2c->dev,
+			"Failed to allocate 32-bit register map: %d\n", ret);
+		return ret;
+	}
+
+	madera->type = type;
+	madera->type_name = name;
+	madera->dev = &i2c->dev;
+	madera->irq = i2c->irq;
+
+	return madera_dev_init(madera);
+}
+
+static int madera_i2c_remove(struct i2c_client *i2c)
+{
+	struct madera *madera = dev_get_drvdata(&i2c->dev);
+
+	madera_dev_exit(madera);
+
+	return 0;
+}
+
+static const struct i2c_device_id madera_i2c_id[] = {
+	{ "cs47l35", CS47L35 },
+	{ "cs47l85", CS47L85 },
+	{ "cs47l90", CS47L90 },
+	{ "cs47l91", CS47L91 },
+	{ "wm1840", WM1840 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, madera_i2c_id);
+
+static struct i2c_driver madera_i2c_driver = {
+	.driver = {
+		.name	= "madera",
+		.pm	= &madera_pm_ops,
+		.of_match_table	= of_match_ptr(madera_of_match),
+	},
+	.probe		= madera_i2c_probe,
+	.remove		= madera_i2c_remove,
+	.id_table	= madera_i2c_id,
+};
+
+module_i2c_driver(madera_i2c_driver);
+
+MODULE_DESCRIPTION("Madera I2C bus interface");
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/madera-spi.c b/drivers/mfd/madera-spi.c
new file mode 100644
index 000000000000..4c398b278bba
--- /dev/null
+++ b/drivers/mfd/madera-spi.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SPI bus interface to Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+
+#include <linux/mfd/madera/core.h>
+
+#include "madera.h"
+
+static int madera_spi_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	struct madera *madera;
+	const struct regmap_config *regmap_16bit_config = NULL;
+	const struct regmap_config *regmap_32bit_config = NULL;
+	const void *of_data;
+	unsigned long type;
+	const char *name;
+	int ret;
+
+	of_data = of_device_get_match_data(&spi->dev);
+	if (of_data)
+		type = (unsigned long)of_data;
+	else
+		type = id->driver_data;
+
+	switch (type) {
+	case CS47L35:
+		if (IS_ENABLED(CONFIG_MFD_CS47L35)) {
+			regmap_16bit_config = &cs47l35_16bit_spi_regmap;
+			regmap_32bit_config = &cs47l35_32bit_spi_regmap;
+		}
+		break;
+	case CS47L85:
+	case WM1840:
+		if (IS_ENABLED(CONFIG_MFD_CS47L85)) {
+			regmap_16bit_config = &cs47l85_16bit_spi_regmap;
+			regmap_32bit_config = &cs47l85_32bit_spi_regmap;
+		}
+		break;
+	case CS47L90:
+	case CS47L91:
+		if (IS_ENABLED(CONFIG_MFD_CS47L90)) {
+			regmap_16bit_config = &cs47l90_16bit_spi_regmap;
+			regmap_32bit_config = &cs47l90_32bit_spi_regmap;
+		}
+		break;
+	default:
+		dev_err(&spi->dev,
+			"Unknown Madera SPI device type %ld\n", type);
+		return -EINVAL;
+	}
+
+	name = madera_name_from_type(type);
+
+	if (!regmap_16bit_config) {
+		/* it's polite to say which codec isn't built into the kernel */
+		dev_err(&spi->dev,
+			"Kernel does not include support for %s\n", name);
+		return -EINVAL;
+	}
+
+	madera = devm_kzalloc(&spi->dev, sizeof(*madera), GFP_KERNEL);
+	if (!madera)
+		return -ENOMEM;
+
+	madera->regmap = devm_regmap_init_spi(spi, regmap_16bit_config);
+	if (IS_ERR(madera->regmap)) {
+		ret = PTR_ERR(madera->regmap);
+		dev_err(&spi->dev,
+			"Failed to allocate 16-bit register map: %d\n",	ret);
+		return ret;
+	}
+
+	madera->regmap_32bit = devm_regmap_init_spi(spi, regmap_32bit_config);
+	if (IS_ERR(madera->regmap_32bit)) {
+		ret = PTR_ERR(madera->regmap_32bit);
+		dev_err(&spi->dev,
+			"Failed to allocate 32-bit register map: %d\n",	ret);
+		return ret;
+	}
+
+	madera->type = type;
+	madera->type_name = name;
+	madera->dev = &spi->dev;
+	madera->irq = spi->irq;
+
+	return madera_dev_init(madera);
+}
+
+static int madera_spi_remove(struct spi_device *spi)
+{
+	struct madera *madera = spi_get_drvdata(spi);
+
+	madera_dev_exit(madera);
+
+	return 0;
+}
+
+static const struct spi_device_id madera_spi_ids[] = {
+	{ "cs47l35", CS47L35 },
+	{ "cs47l85", CS47L85 },
+	{ "cs47l90", CS47L90 },
+	{ "cs47l91", CS47L91 },
+	{ "wm1840", WM1840 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, madera_spi_ids);
+
+static struct spi_driver madera_spi_driver = {
+	.driver = {
+		.name	= "madera",
+		.pm	= &madera_pm_ops,
+		.of_match_table	= of_match_ptr(madera_of_match),
+	},
+	.probe		= madera_spi_probe,
+	.remove		= madera_spi_remove,
+	.id_table	= madera_spi_ids,
+};
+
+module_spi_driver(madera_spi_driver);
+
+MODULE_DESCRIPTION("Madera SPI bus interface");
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mfd/madera.h b/drivers/mfd/madera.h
new file mode 100644
index 000000000000..891b84efb9a7
--- /dev/null
+++ b/drivers/mfd/madera.h
@@ -0,0 +1,44 @@
+/*
+ * MFD internals for Cirrus Logic Madera codecs
+ *
+ * Copyright 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MADERA_MFD_H
+#define MADERA_MFD_H
+
+#include <linux/of.h>
+#include <linux/pm.h>
+
+struct madera;
+
+extern const struct dev_pm_ops madera_pm_ops;
+extern const struct of_device_id madera_of_match[];
+
+int madera_dev_init(struct madera *madera);
+int madera_dev_exit(struct madera *madera);
+
+const char *madera_name_from_type(enum madera_type type);
+
+extern const struct regmap_config cs47l35_16bit_spi_regmap;
+extern const struct regmap_config cs47l35_32bit_spi_regmap;
+extern const struct regmap_config cs47l35_16bit_i2c_regmap;
+extern const struct regmap_config cs47l35_32bit_i2c_regmap;
+int cs47l35_patch(struct madera *madera);
+
+extern const struct regmap_config cs47l85_16bit_spi_regmap;
+extern const struct regmap_config cs47l85_32bit_spi_regmap;
+extern const struct regmap_config cs47l85_16bit_i2c_regmap;
+extern const struct regmap_config cs47l85_32bit_i2c_regmap;
+int cs47l85_patch(struct madera *madera);
+
+extern const struct regmap_config cs47l90_16bit_spi_regmap;
+extern const struct regmap_config cs47l90_32bit_spi_regmap;
+extern const struct regmap_config cs47l90_16bit_i2c_regmap;
+extern const struct regmap_config cs47l90_32bit_i2c_regmap;
+int cs47l90_patch(struct madera *madera);
+#endif
diff --git a/include/linux/mfd/madera/core.h b/include/linux/mfd/madera/core.h
new file mode 100644
index 000000000000..c332681848ef
--- /dev/null
+++ b/include/linux/mfd/madera/core.h
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MFD internals for Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#ifndef MADERA_CORE_H
+#define MADERA_CORE_H
+
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/madera/pdata.h>
+#include <linux/notifier.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+
+enum madera_type {
+	/* 0 is reserved for indicating failure to identify */
+	CS47L35 = 1,
+	CS47L85 = 2,
+	CS47L90 = 3,
+	CS47L91 = 4,
+	WM1840 = 7,
+};
+
+#define MADERA_MAX_CORE_SUPPLIES	2
+#define MADERA_MAX_GPIOS		40
+
+#define CS47L35_NUM_GPIOS		16
+#define CS47L85_NUM_GPIOS		40
+#define CS47L90_NUM_GPIOS		38
+
+#define MADERA_MAX_MICBIAS		4
+
+/* Notifier events */
+#define MADERA_NOTIFY_VOICE_TRIGGER	0x1
+#define MADERA_NOTIFY_HPDET		0x2
+#define MADERA_NOTIFY_MICDET		0x4
+
+/* GPIO Function Definitions */
+#define MADERA_GP_FN_ALTERNATE		0x00
+#define MADERA_GP_FN_GPIO		0x01
+#define MADERA_GP_FN_DSP_GPIO		0x02
+#define MADERA_GP_FN_IRQ1		0x03
+#define MADERA_GP_FN_IRQ2		0x04
+#define MADERA_GP_FN_FLL1_CLOCK		0x10
+#define MADERA_GP_FN_FLL2_CLOCK		0x11
+#define MADERA_GP_FN_FLL3_CLOCK		0x12
+#define MADERA_GP_FN_FLLAO_CLOCK	0x13
+#define MADERA_GP_FN_FLL1_LOCK		0x18
+#define MADERA_GP_FN_FLL2_LOCK		0x19
+#define MADERA_GP_FN_FLL3_LOCK		0x1A
+#define MADERA_GP_FN_FLLAO_LOCK		0x1B
+#define MADERA_GP_FN_OPCLK_OUT		0x40
+#define MADERA_GP_FN_OPCLK_ASYNC_OUT	0x41
+#define MADERA_GP_FN_PWM1		0x48
+#define MADERA_GP_FN_PWM2		0x49
+#define MADERA_GP_FN_SPDIF_OUT		0x4C
+#define MADERA_GP_FN_HEADPHONE_DET	0x50
+#define MADERA_GP_FN_MIC_DET		0x58
+#define MADERA_GP_FN_DRC1_SIGNAL_DETECT	0x80
+#define MADERA_GP_FN_DRC2_SIGNAL_DETECT	0x81
+#define MADERA_GP_FN_ASRC1_IN1_LOCK	0x88
+#define MADERA_GP_FN_ASRC1_IN2_LOCK	0x89
+#define MADERA_GP_FN_ASRC2_IN1_LOCK	0x8A
+#define MADERA_GP_FN_ASRC2_IN2_LOCK	0x8B
+#define MADERA_GP_FN_DSP_IRQ1		0xA0
+#define MADERA_GP_FN_DSP_IRQ2		0xA1
+#define MADERA_GP_FN_DSP_IRQ3		0xA2
+#define MADERA_GP_FN_DSP_IRQ4		0xA3
+#define MADERA_GP_FN_DSP_IRQ5		0xA4
+#define MADERA_GP_FN_DSP_IRQ6		0xA5
+#define MADERA_GP_FN_DSP_IRQ7		0xA6
+#define MADERA_GP_FN_DSP_IRQ8		0xA7
+#define MADERA_GP_FN_DSP_IRQ9		0xA8
+#define MADERA_GP_FN_DSP_IRQ10		0xA9
+#define MADERA_GP_FN_DSP_IRQ11		0xAA
+#define MADERA_GP_FN_DSP_IRQ12		0xAB
+#define MADERA_GP_FN_DSP_IRQ13		0xAC
+#define MADERA_GP_FN_DSP_IRQ14		0xAD
+#define MADERA_GP_FN_DSP_IRQ15		0xAE
+#define MADERA_GP_FN_DSP_IRQ16		0xAF
+#define MADERA_GP_FN_HPOUT1L_SC		0xB0
+#define MADERA_GP_FN_HPOUT1R_SC		0xB1
+#define MADERA_GP_FN_HPOUT2L_SC		0xB2
+#define MADERA_GP_FN_HPOUT2R_SC		0xB3
+#define MADERA_GP_FN_HPOUT3L_SC		0xB4
+#define MADERA_GP_FN_HPOUT4R_SC		0xB5
+#define MADERA_GP_FN_SPKOUTL_SC		0xB6
+#define MADERA_GP_FN_SPKOUTR_SC		0xB7
+#define MADERA_GP_FN_HPOUT1L_ENA	0xC0
+#define MADERA_GP_FN_HPOUT1R_ENA	0xC1
+#define MADERA_GP_FN_HPOUT2L_ENA	0xC2
+#define MADERA_GP_FN_HPOUT2R_ENA	0xC3
+#define MADERA_GP_FN_HPOUT3L_ENA	0xC4
+#define MADERA_GP_FN_HPOUT4R_ENA	0xC5
+#define MADERA_GP_FN_SPKOUTL_ENA	0xC6
+#define MADERA_GP_FN_SPKOUTR_ENA	0xC7
+#define MADERA_GP_FN_HPOUT1L_DIS	0xD0
+#define MADERA_GP_FN_HPOUT1R_DIS	0xD1
+#define MADERA_GP_FN_HPOUT2L_DIS	0xD2
+#define MADERA_GP_FN_HPOUT2R_DIS	0xD3
+#define MADERA_GP_FN_HPOUT3L_DIS	0xD4
+#define MADERA_GP_FN_HPOUT4R_DIS	0xD5
+#define MADERA_GP_FN_SPKOUTL_DIS	0xD6
+#define MADERA_GP_FN_SPKOUTR_DIS	0xD7
+#define MADERA_GP_FN_SPK_SHUTDOWN	0xE0
+#define MADERA_GP_FN_SPK_OVH_SHUTDOWN	0xE1
+#define MADERA_GP_FN_SPK_OVH_WARN	0xE2
+#define MADERA_GP_FN_TIMER1_STATUS	0x140
+#define MADERA_GP_FN_TIMER2_STATUS	0x141
+#define MADERA_GP_FN_TIMER3_STATUS	0x142
+#define MADERA_GP_FN_TIMER4_STATUS	0x143
+#define MADERA_GP_FN_TIMER5_STATUS	0x144
+#define MADERA_GP_FN_TIMER6_STATUS	0x145
+#define MADERA_GP_FN_TIMER7_STATUS	0x146
+#define MADERA_GP_FN_TIMER8_STATUS	0x147
+#define MADERA_GP_FN_EVENTLOG1_FIFO_STS	0x150
+#define MADERA_GP_FN_EVENTLOG2_FIFO_STS	0x151
+#define MADERA_GP_FN_EVENTLOG3_FIFO_STS	0x152
+#define MADERA_GP_FN_EVENTLOG4_FIFO_STS	0x153
+#define MADERA_GP_FN_EVENTLOG5_FIFO_STS	0x154
+#define MADERA_GP_FN_EVENTLOG6_FIFO_STS	0x155
+#define MADERA_GP_FN_EVENTLOG7_FIFO_STS	0x156
+#define MADERA_GP_FN_EVENTLOG8_FIFO_STS	0x157
+
+struct snd_soc_dapm_context;
+
+/*
+ * struct madera - internal data shared by the set of Madera drivers
+ *
+ * This should not be used by anything except child drivers of the Madera MFD
+ *
+ * @regmap:		pointer to the regmap instance for 16-bit registers
+ * @regmap_32bit:	pointer to the regmap instance for 32-bit registers
+ * @dev:		pointer to the MFD device
+ * @type:		type of codec
+ * @rev:		silicon revision
+ * @type_name:		display name of this codec
+ * @num_core_supplies:	number of core supply regulators
+ * @core_supplies:	list of core supplies that are always required
+ * @dcvdd:		pointer to DCVDD regulator
+ * @internal_dcvdd:	true if DCVDD is supplied from the internal LDO1
+ * @pdata:		our pdata
+ * @irq_dev:		the irqchip child driver device
+ * @irq:		host irq number from SPI or I2C configuration
+ * @out_clamp:		indicates output clamp state for each analogue output
+ * @out_shorted:	indicates short circuit state for each analogue output
+ * @hp_ena:		bitflags of enable state for the headphone outputs
+ * @num_micbias:	number of MICBIAS outputs
+ * @num_childbias:	number of child biases for each MICBIAS
+ * @dapm:		pointer to codec driver DAPM context
+ * @notifier:		notifier for signalling events to ASoC machine driver
+ */
+struct madera {
+	struct regmap *regmap;
+	struct regmap *regmap_32bit;
+
+	struct device *dev;
+
+	enum madera_type type;
+	unsigned int rev;
+	const char *type_name;
+
+	int num_core_supplies;
+	struct regulator_bulk_data core_supplies[MADERA_MAX_CORE_SUPPLIES];
+	struct regulator *dcvdd;
+	bool internal_dcvdd;
+
+	struct madera_pdata pdata;
+
+	struct device *irq_dev;
+	int irq;
+
+	unsigned int num_micbias;
+	unsigned int num_childbias[MADERA_MAX_MICBIAS];
+
+	struct snd_soc_dapm_context *dapm;
+
+	struct blocking_notifier_head notifier;
+};
+#endif
diff --git a/include/linux/mfd/madera/pdata.h b/include/linux/mfd/madera/pdata.h
new file mode 100644
index 000000000000..0b311f39c8f4
--- /dev/null
+++ b/include/linux/mfd/madera/pdata.h
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Platform data for Cirrus Logic Madera codecs
+ *
+ * Copyright (C) 2015-2018 Cirrus Logic
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ */
+
+#ifndef MADERA_PDATA_H
+#define MADERA_PDATA_H
+
+#include <linux/kernel.h>
+#include <linux/regulator/arizona-ldo1.h>
+#include <linux/regulator/arizona-micsupp.h>
+#include <linux/regulator/machine.h>
+
+#define MADERA_MAX_MICBIAS		4
+#define MADERA_MAX_CHILD_MICBIAS	4
+
+#define MADERA_MAX_GPSW			2
+
+struct gpio_desc;
+struct pinctrl_map;
+struct madera_irqchip_pdata;
+struct madera_codec_pdata;
+
+/**
+ * struct madera_pdata - Configuration data for Madera devices
+ *
+ * @reset:	    GPIO controlling /RESET (NULL = none)
+ * @ldo1:	    Substruct of pdata for the LDO1 regulator
+ * @micvdd:	    Substruct of pdata for the MICVDD regulator
+ * @irq_flags:	    Mode for primary IRQ (defaults to active low)
+ * @gpio_base:	    Base GPIO number
+ * @gpio_configs:   Array of GPIO configurations (See Documentation/pinctrl.txt)
+ * @n_gpio_configs: Number of entries in gpio_configs
+ * @gpsw:	    General purpose switch mode setting. Depends on the external
+ *		    hardware connected to the switch. (See the SW1_MODE field
+ *		    in the datasheet for the available values for your codec)
+ */
+struct madera_pdata {
+	struct gpio_desc *reset;
+
+	struct arizona_ldo1_pdata ldo1;
+	struct arizona_micsupp_pdata micvdd;
+
+	unsigned int irq_flags;
+	int gpio_base;
+
+	const struct pinctrl_map *gpio_configs;
+	int n_gpio_configs;
+
+	u32 gpsw[MADERA_MAX_GPSW];
+};
+
+#endif
-- 
cgit v1.2.3


From 3055a6cfa04ba4288589778925e8838261e56078 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 22 May 2018 10:52:32 +0300
Subject: iio: Add channel for Position Relative

Add new channel type for relative position on a pad.

These type of analog sensor offers the position of a pen
on a touchpad, and is represented as a voltage, which can be
converted to a position on X and Y axis on the pad.
The channel will hand the relative position on the pad in both directions.

The channel can then be consumed by a touchscreen driver or
read as-is for a raw indication of the touchpen on a touchpad.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/ABI/testing/sysfs-bus-iio | 12 ++++++++++++
 drivers/iio/industrialio-core.c         |  1 +
 include/uapi/linux/iio/types.h          |  1 +
 tools/iio/iio_event_monitor.c           |  2 ++
 4 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 731146c3b138..c7353030670a 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -197,6 +197,18 @@ Description:
 		Angle of rotation. Units after application of scale and offset
 		are radians.
 
+What:		/sys/bus/iio/devices/iio:deviceX/in_positionrelative_x_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_positionrelative_y_raw
+KernelVersion:	4.18
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Relative position in direction x or y on a pad (may be
+		arbitrarily assigned but should match other such assignments on
+		device).
+		Units after application of scale and offset are milli percents
+		from the pad's size in both directions. Should be calibrated by
+		the consumer.
+
 What:		/sys/bus/iio/devices/iio:deviceX/in_anglvel_x_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_anglvel_y_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_anglvel_z_raw
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 19bdf3d2962a..14bf3d243cb1 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -85,6 +85,7 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_COUNT] = "count",
 	[IIO_INDEX] = "index",
 	[IIO_GRAVITY]  = "gravity",
+	[IIO_POSITIONRELATIVE]  = "positionrelative",
 };
 
 static const char * const iio_modifier_names[] = {
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index 4213cdf88e3c..033c7d28924e 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -44,6 +44,7 @@ enum iio_chan_type {
 	IIO_COUNT,
 	IIO_INDEX,
 	IIO_GRAVITY,
+	IIO_POSITIONRELATIVE,
 };
 
 enum iio_modifier {
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index b61245e1181d..148f69dfae75 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -58,6 +58,7 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_PH] = "ph",
 	[IIO_UVINDEX] = "uvindex",
 	[IIO_GRAVITY] = "gravity",
+	[IIO_POSITIONRELATIVE] = "positionrelative",
 };
 
 static const char * const iio_ev_type_text[] = {
@@ -151,6 +152,7 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_PH:
 	case IIO_UVINDEX:
 	case IIO_GRAVITY:
+	case IIO_POSITIONRELATIVE:
 		break;
 	default:
 		return false;
-- 
cgit v1.2.3


From a591525f43e7d17dc5f94f314d30f6e4da753d7b Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 22 May 2018 10:52:37 +0300
Subject: dt-bindings: iio: adc: at91-sama5d2_adc: add channel specific
 consumer info

Added defines for channel consumer device-tree binding

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/adc/at91-sama5d2_adc.txt     |  9 +++++++++
 include/dt-bindings/iio/adc/at91-sama5d2_adc.h           | 16 ++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 include/dt-bindings/iio/adc/at91-sama5d2_adc.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt b/Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt
index 6469a4cd2a6d..4a3c1d496e1a 100644
--- a/Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/at91-sama5d2_adc.txt
@@ -21,6 +21,14 @@ Optional properties:
   - dmas: Phandle to dma channel for the ADC.
   - dma-names: Must be "rx" when dmas property is being used.
   See ../../dma/dma.txt for details.
+  - #io-channel-cells: in case consumer drivers are attached, this must be 1.
+  See <Documentation/devicetree/bindings/iio/iio-bindings.txt> for details.
+
+Properties for consumer drivers:
+  - Consumer drivers can be connected to this producer device, as specified
+  in <Documentation/devicetree/bindings/iio/iio-bindings.txt>
+  - Channels exposed are specified in:
+  <dt-bindings/iio/adc/at91-sama5d2_adc.txt>
 
 Example:
 
@@ -38,4 +46,5 @@ adc: adc@fc030000 {
 	atmel,trigger-edge-type = <IRQ_TYPE_EDGE_BOTH>;
 	dmas = <&dma0 (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) | AT91_XDMAC_DT_PERID(25))>;
 	dma-names = "rx";
+	#io-channel-cells = <1>;
 }
diff --git a/include/dt-bindings/iio/adc/at91-sama5d2_adc.h b/include/dt-bindings/iio/adc/at91-sama5d2_adc.h
new file mode 100644
index 000000000000..70f99dbdbb42
--- /dev/null
+++ b/include/dt-bindings/iio/adc/at91-sama5d2_adc.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header provides constants for configuring the AT91 SAMA5D2 ADC
+ */
+
+#ifndef _DT_BINDINGS_IIO_ADC_AT91_SAMA5D2_ADC_H
+#define _DT_BINDINGS_IIO_ADC_AT91_SAMA5D2_ADC_H
+
+/* X relative position channel index */
+#define AT91_SAMA5D2_ADC_X_CHANNEL		24
+/* Y relative position channel index */
+#define AT91_SAMA5D2_ADC_Y_CHANNEL		25
+/* pressure channel index */
+#define AT91_SAMA5D2_ADC_P_CHANNEL		26
+
+#endif
-- 
cgit v1.2.3


From 918da95176beb743ff47b3bbfb0a4ee420658a95 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Tue, 20 Feb 2018 14:48:33 +1030
Subject: fsi: gpio: Trace busy count

An observation from trace output of the existing FSI tracepoints was
that the remote device was sometimes reporting as busy. Add a new
tracepoint reporting the busy count in order to get a better grip on how
often this is the case.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Acked-by: Eddie James <eajames@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Joel Stanley <joel@jms.id.au>
---
 drivers/fsi/fsi-master-gpio.c          |  3 +++
 include/trace/events/fsi_master_gpio.h | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c
index 3f487449a277..2a49b167effe 100644
--- a/drivers/fsi/fsi-master-gpio.c
+++ b/drivers/fsi/fsi-master-gpio.c
@@ -401,6 +401,9 @@ retry:
 		break;
 	}
 
+	if (busy_count > 0)
+		trace_fsi_master_gpio_poll_response_busy(master, busy_count);
+
 	/* Clock the slave enough to be ready for next operation */
 	clock_zeros(master, FSI_GPIO_PRIME_SLAVE_CLOCKS);
 	return rc;
diff --git a/include/trace/events/fsi_master_gpio.h b/include/trace/events/fsi_master_gpio.h
index f95cf3264bf9..33e928c5acf3 100644
--- a/include/trace/events/fsi_master_gpio.h
+++ b/include/trace/events/fsi_master_gpio.h
@@ -64,6 +64,22 @@ TRACE_EVENT(fsi_master_gpio_break,
 	)
 );
 
+
+TRACE_EVENT(fsi_master_gpio_poll_response_busy,
+	TP_PROTO(const struct fsi_master_gpio *master, int busy),
+	TP_ARGS(master, busy),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(int,	busy)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->busy = busy;
+	),
+	TP_printk("fsi-gpio%d: device reported busy %d times",
+		__entry->master_idx, __entry->busy)
+);
+
 #endif /* _TRACE_FSI_MASTER_GPIO_H */
 
 #include <trace/define_trace.h>
-- 
cgit v1.2.3


From 4e56828a5db19e2de8f8dc464c6df2e7e9ff4e13 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 15 May 2018 16:14:43 +1000
Subject: fsi/fsi-master-gpio: Implement CRC error recovery

The FSI protocol defines two modes of recovery from CRC errors,
this implements both:

 - If the device returns an ECRC (it detected a CRC error in the
   command), then we simply issue the command again.

 - If the master detects a CRC error in the response, we send
   an E_POLL command which requests a resend of the response
   without actually re-executing the command (which could otherwise
   have unwanted side effects such as dequeuing a FIFO twice).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Christopher Bostic <cbostic@linux.vnet.ibm.com>
Tested-by: Joel Stanley <joel@jms.id.au>
---

Note: This was actually tested by removing some of my fixes, thus
causing us to hit occasional CRC errors during high LPC activity.
---
 drivers/fsi/fsi-master-gpio.c          | 90 +++++++++++++++++++++++++++-------
 include/trace/events/fsi_master_gpio.h | 27 ++++++++++
 2 files changed, 99 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c
index 0a6799bda294..351c12f2ac55 100644
--- a/drivers/fsi/fsi-master-gpio.c
+++ b/drivers/fsi/fsi-master-gpio.c
@@ -22,20 +22,23 @@
 #define	FSI_BREAK_CLOCKS	256	/* Number of clocks to issue break */
 #define	FSI_POST_BREAK_CLOCKS	16000	/* Number clocks to set up cfam */
 #define	FSI_INIT_CLOCKS		5000	/* Clock out any old data */
+#define	FSI_GPIO_DPOLL_CLOCKS	50      /* < 21 will cause slave to hang */
+#define	FSI_GPIO_EPOLL_CLOCKS	50      /* Number of clocks for E_POLL retry */
 #define	FSI_GPIO_STD_DELAY	10	/* Standard GPIO delay in nS */
 					/* todo: adjust down as low as */
 					/* possible or eliminate */
+#define FSI_CRC_ERR_RETRIES	10
+
 #define	FSI_GPIO_CMD_DPOLL      0x2
+#define	FSI_GPIO_CMD_EPOLL      0x3
 #define	FSI_GPIO_CMD_TERM	0x3f
 #define FSI_GPIO_CMD_ABS_AR	0x4
 #define FSI_GPIO_CMD_REL_AR	0x5
 #define FSI_GPIO_CMD_SAME_AR	0x3	/* but only a 2-bit opcode... */
 
-
-#define	FSI_GPIO_DPOLL_CLOCKS	50      /* < 21 will cause slave to hang */
-
-/* Bus errors */
-#define	FSI_GPIO_ERR_BUSY	1	/* Slave stuck in busy state */
+/* Slave responses */
+#define	FSI_GPIO_RESP_ACK	0	/* Success */
+#define	FSI_GPIO_RESP_BUSY	1	/* Slave busy */
 #define	FSI_GPIO_RESP_ERRA	2	/* Any (misc) Error */
 #define	FSI_GPIO_RESP_ERRC	3	/* Slave reports master CRC error */
 #define	FSI_GPIO_MTOE		4	/* Master time out error */
@@ -330,6 +333,16 @@ static void build_dpoll_command(struct fsi_gpio_msg *cmd, uint8_t slave_id)
 	msg_push_crc(cmd);
 }
 
+static void build_epoll_command(struct fsi_gpio_msg *cmd, uint8_t slave_id)
+{
+	cmd->bits = 0;
+	cmd->msg = 0;
+
+	msg_push_bits(cmd, slave_id, 2);
+	msg_push_bits(cmd, FSI_GPIO_CMD_EPOLL, 3);
+	msg_push_crc(cmd);
+}
+
 static void echo_delay(struct fsi_master_gpio *master)
 {
 	set_sda_output(master, 1);
@@ -355,6 +368,12 @@ static void fsi_master_gpio_error(struct fsi_master_gpio *master, int error)
 
 }
 
+/*
+ * Note: callers rely specifically on this returning -EAGAIN for
+ * a CRC error detected in the response. Use other error code
+ * for other situations. It will be converted to something else
+ * higher up the stack before it reaches userspace.
+ */
 static int read_one_response(struct fsi_master_gpio *master,
 		uint8_t data_size, struct fsi_gpio_msg *msgp, uint8_t *tagp)
 {
@@ -379,7 +398,7 @@ static int read_one_response(struct fsi_master_gpio *master,
 			"Master time out waiting for response\n");
 		fsi_master_gpio_error(master, FSI_GPIO_MTOE);
 		spin_unlock_irqrestore(&master->bit_lock, flags);
-		return -EIO;
+		return -ETIMEDOUT;
 	}
 
 	msg.bits = 0;
@@ -405,7 +424,7 @@ static int read_one_response(struct fsi_master_gpio *master,
 	if (crc) {
 		dev_dbg(master->dev, "ERR response CRC\n");
 		fsi_master_gpio_error(master, FSI_GPIO_CRC_INVAL);
-		return -EIO;
+		return -EAGAIN;
 	}
 
 	if (msgp)
@@ -451,11 +470,33 @@ static int poll_for_response(struct fsi_master_gpio *master,
 	unsigned long flags;
 	uint8_t tag;
 	uint8_t *data_byte = data;
-
+	int crc_err_retries = 0;
 retry:
 	rc = read_one_response(master, size, &response, &tag);
-	if (rc)
-		return rc;
+
+	/* Handle retries on CRC errors */
+	if (rc == -EAGAIN) {
+		/* Too many retries ? */
+		if (crc_err_retries++ > FSI_CRC_ERR_RETRIES) {
+			/*
+			 * Pass it up as a -EIO otherwise upper level will retry
+			 * the whole command which isn't what we want here.
+			 */
+			rc = -EIO;
+			goto fail;
+		}
+		dev_dbg(master->dev,
+			 "CRC error retry %d\n", crc_err_retries);
+		trace_fsi_master_gpio_crc_rsp_error(master);
+		build_epoll_command(&cmd, slave);
+		spin_lock_irqsave(&master->bit_lock, flags);
+		clock_zeros(master, FSI_GPIO_EPOLL_CLOCKS);
+		serial_out(master, &cmd);
+		echo_delay(master);
+		spin_unlock_irqrestore(&master->bit_lock, flags);
+		goto retry;
+	} else if (rc)
+		goto fail;
 
 	switch (tag) {
 	case FSI_GPIO_RESP_ACK:
@@ -496,18 +537,21 @@ retry:
 		break;
 
 	case FSI_GPIO_RESP_ERRA:
-	case FSI_GPIO_RESP_ERRC:
-		dev_dbg(master->dev, "ERR%c received: 0x%x\n",
-			tag == FSI_GPIO_RESP_ERRA ? 'A' : 'C',
-			(int)response.msg);
+		dev_dbg(master->dev, "ERRA received: 0x%x\n", (int)response.msg);
 		fsi_master_gpio_error(master, response.msg);
 		rc = -EIO;
 		break;
+	case FSI_GPIO_RESP_ERRC:
+		dev_dbg(master->dev, "ERRC received: 0x%x\n", (int)response.msg);
+		fsi_master_gpio_error(master, response.msg);
+		trace_fsi_master_gpio_crc_cmd_error(master);
+		rc = -EAGAIN;
+		break;
 	}
 
 	if (busy_count > 0)
 		trace_fsi_master_gpio_poll_response_busy(master, busy_count);
-
+ fail:
 	/* Clock the slave enough to be ready for next operation */
 	spin_lock_irqsave(&master->bit_lock, flags);
 	clock_zeros(master, FSI_GPIO_PRIME_SLAVE_CLOCKS);
@@ -536,11 +580,21 @@ static int send_request(struct fsi_master_gpio *master,
 static int fsi_master_gpio_xfer(struct fsi_master_gpio *master, uint8_t slave,
 		struct fsi_gpio_msg *cmd, size_t resp_len, void *resp)
 {
-	int rc;
+	int rc = -EAGAIN, retries = 0;
 
-	rc = send_request(master, cmd);
-	if (!rc)
+	while ((retries++) < FSI_CRC_ERR_RETRIES) {
+		rc = send_request(master, cmd);
+		if (rc)
+			break;
 		rc = poll_for_response(master, slave, resp_len, resp);
+		if (rc != -EAGAIN)
+			break;
+		rc = -EIO;
+		dev_warn(master->dev, "ECRC retry %d\n", retries);
+
+		/* Pace it a bit before retry */
+		msleep(1);
+	}
 
 	return rc;
 }
diff --git a/include/trace/events/fsi_master_gpio.h b/include/trace/events/fsi_master_gpio.h
index 33e928c5acf3..389082132433 100644
--- a/include/trace/events/fsi_master_gpio.h
+++ b/include/trace/events/fsi_master_gpio.h
@@ -64,6 +64,33 @@ TRACE_EVENT(fsi_master_gpio_break,
 	)
 );
 
+TRACE_EVENT(fsi_master_gpio_crc_cmd_error,
+	TP_PROTO(const struct fsi_master_gpio *master),
+	TP_ARGS(master),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+	),
+	TP_printk("fsi-gpio%d ----CRC command retry---",
+		__entry->master_idx
+	)
+);
+
+TRACE_EVENT(fsi_master_gpio_crc_rsp_error,
+	TP_PROTO(const struct fsi_master_gpio *master),
+	TP_ARGS(master),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+	),
+	TP_printk("fsi-gpio%d ----CRC response---",
+		__entry->master_idx
+	)
+);
 
 TRACE_EVENT(fsi_master_gpio_poll_response_busy,
 	TP_PROTO(const struct fsi_master_gpio *master, int busy),
-- 
cgit v1.2.3


From 9f4a8a2d7f9d71093f41c4bb0ef8707e8145bad3 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 17 May 2018 15:58:00 +1000
Subject: fsi/sbefifo: Add driver for the SBE FIFO

This driver provides an in-kernel and a user API for accessing
the command FIFO of the SBE (Self Boot Engine) of the POWER9
processor, via the FSI bus.

It provides an in-kernel interface to submit command and receive
responses, along with a helper to locate and analyse the response
status block. It's a simple synchronous submit() type API.

The user interface uses the write/read interface that an earlier
version of this driver already provided, however it has some
specific limitations in order to keep the driver simple and
avoid using up a lot of kernel memory:

 - The user should perform a single write() with the command and
   a single read() to get the response (with a buffer big enough
   to hold the entire response).

 - On a write() the command is simply "stored" into a kernel buffer,
   it is submitted as one operation on the subsequent read(). This
   allows to have the code write directly from the FIFO into the user
   buffer and avoid hogging the SBE between the write() and read()
   syscall as it's critical that the SBE be freed asap to respond
   to the host. An extra write() will simply replace the previously
   written command.

 - A write of a single 4 bytes containing the value 0x52534554
   in big endian will trigger a reset request. No read is necessary,
   the write() call will return when the reset has been acknowledged
   or times out.

 - The command is limited to 4K bytes.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Joel Stanley <joel@jms.id.au>
---
---
 drivers/fsi/Kconfig         |    7 +
 drivers/fsi/Makefile        |    1 +
 drivers/fsi/fsi-sbefifo.c   | 1005 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/fsi-sbefifo.h |   33 ++
 4 files changed, 1046 insertions(+)
 create mode 100644 drivers/fsi/fsi-sbefifo.c
 create mode 100644 include/linux/fsi-sbefifo.h

(limited to 'include')

diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index a326ed663d3c..24f84a96b8b9 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -32,4 +32,11 @@ config FSI_SCOM
 	---help---
 	This option enables an FSI based SCOM device driver.
 
+config FSI_SBEFIFO
+	tristate "SBEFIFO FSI client device driver"
+	---help---
+	This option enables an FSI based SBEFIFO device driver. The SBEFIFO is
+	a pipe-like FSI device for communicating with the self boot engine
+	(SBE) on POWER processors.
+
 endif
diff --git a/drivers/fsi/Makefile b/drivers/fsi/Makefile
index 65eb99dfafdb..851182e1cd9e 100644
--- a/drivers/fsi/Makefile
+++ b/drivers/fsi/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_FSI) += fsi-core.o
 obj-$(CONFIG_FSI_MASTER_HUB) += fsi-master-hub.o
 obj-$(CONFIG_FSI_MASTER_GPIO) += fsi-master-gpio.o
 obj-$(CONFIG_FSI_SCOM) += fsi-scom.o
+obj-$(CONFIG_FSI_SBEFIFO) += fsi-sbefifo.o
diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c
new file mode 100644
index 000000000000..9b8b6b346af6
--- /dev/null
+++ b/drivers/fsi/fsi-sbefifo.c
@@ -0,0 +1,1005 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) IBM Corporation 2017
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERGCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fsi.h>
+#include <linux/fsi-sbefifo.h>
+#include <linux/idr.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/uio.h>
+#include <linux/vmalloc.h>
+
+/*
+ * The SBEFIFO is a pipe-like FSI device for communicating with
+ * the self boot engine on POWER processors.
+ */
+
+#define DEVICE_NAME		"sbefifo"
+#define FSI_ENGID_SBE		0x22
+
+/*
+ * Register layout
+ */
+
+/* Register banks */
+#define SBEFIFO_UP		0x00		/* FSI -> Host */
+#define SBEFIFO_DOWN		0x40		/* Host -> FSI */
+
+/* Per-bank registers */
+#define SBEFIFO_FIFO		0x00		/* The FIFO itself */
+#define SBEFIFO_STS		0x04		/* Status register */
+#define   SBEFIFO_STS_PARITY_ERR	0x20000000
+#define   SBEFIFO_STS_RESET_REQ		0x02000000
+#define   SBEFIFO_STS_GOT_EOT		0x00800000
+#define   SBEFIFO_STS_MAX_XFER_LIMIT	0x00400000
+#define   SBEFIFO_STS_FULL		0x00200000
+#define   SBEFIFO_STS_EMPTY		0x00100000
+#define   SBEFIFO_STS_ECNT_MASK		0x000f0000
+#define   SBEFIFO_STS_ECNT_SHIFT	16
+#define   SBEFIFO_STS_VALID_MASK	0x0000ff00
+#define   SBEFIFO_STS_VALID_SHIFT	8
+#define   SBEFIFO_STS_EOT_MASK		0x000000ff
+#define   SBEFIFO_STS_EOT_SHIFT		0
+#define SBEFIFO_EOT_RAISE	0x08		/* (Up only) Set End Of Transfer */
+#define SBEFIFO_REQ_RESET	0x0C		/* (Up only) Reset Request */
+#define SBEFIFO_PERFORM_RESET	0x10		/* (Down only) Perform Reset */
+#define SBEFIFO_EOT_ACK		0x14		/* (Down only) Acknowledge EOT */
+#define SBEFIFO_DOWN_MAX	0x18		/* (Down only) Max transfer */
+
+/* CFAM GP Mailbox SelfBoot Message register */
+#define CFAM_GP_MBOX_SBM_ADDR	0x2824	/* Converted 0x2809 */
+
+#define CFAM_SBM_SBE_BOOTED		0x80000000
+#define CFAM_SBM_SBE_ASYNC_FFDC		0x40000000
+#define CFAM_SBM_SBE_STATE_MASK		0x00f00000
+#define CFAM_SBM_SBE_STATE_SHIFT	20
+
+enum sbe_state
+{
+	SBE_STATE_UNKNOWN = 0x0, // Unkown, initial state
+	SBE_STATE_IPLING  = 0x1, // IPL'ing - autonomous mode (transient)
+	SBE_STATE_ISTEP   = 0x2, // ISTEP - Running IPL by steps (transient)
+	SBE_STATE_MPIPL   = 0x3, // MPIPL
+	SBE_STATE_RUNTIME = 0x4, // SBE Runtime
+	SBE_STATE_DMT     = 0x5, // Dead Man Timer State (transient)
+	SBE_STATE_DUMP    = 0x6, // Dumping
+	SBE_STATE_FAILURE = 0x7, // Internal SBE failure
+	SBE_STATE_QUIESCE = 0x8, // Final state - needs SBE reset to get out
+};
+
+/* FIFO depth */
+#define SBEFIFO_FIFO_DEPTH		8
+
+/* Helpers */
+#define sbefifo_empty(sts)	((sts) & SBEFIFO_STS_EMPTY)
+#define sbefifo_full(sts)	((sts) & SBEFIFO_STS_FULL)
+#define sbefifo_parity_err(sts)	((sts) & SBEFIFO_STS_PARITY_ERR)
+#define sbefifo_populated(sts)	(((sts) & SBEFIFO_STS_ECNT_MASK) >> SBEFIFO_STS_ECNT_SHIFT)
+#define sbefifo_vacant(sts)	(SBEFIFO_FIFO_DEPTH - sbefifo_populated(sts))
+#define sbefifo_eot_set(sts)	(((sts) & SBEFIFO_STS_EOT_MASK) >> SBEFIFO_STS_EOT_SHIFT)
+
+/* Reset request timeout in ms */
+#define SBEFIFO_RESET_TIMEOUT		10000
+
+/* Timeouts for commands in ms */
+#define SBEFIFO_TIMEOUT_START_CMD	10000
+#define SBEFIFO_TIMEOUT_IN_CMD		1000
+#define SBEFIFO_TIMEOUT_START_RSP	10000
+#define SBEFIFO_TIMEOUT_IN_RSP		1000
+
+/* Other constants */
+#define SBEFIFO_MAX_CMD_LEN		PAGE_SIZE
+#define SBEFIFO_RESET_MAGIC		0x52534554 /* "RSET" */
+
+struct sbefifo {
+	uint32_t		magic;
+#define SBEFIFO_MAGIC		0x53424546 /* "SBEF" */
+	struct fsi_device	*fsi_dev;
+	struct miscdevice	mdev;
+	struct mutex		lock;
+	char			name[32];
+	int			idx;
+	bool			broken;
+	bool			async_ffdc;
+};
+
+struct sbefifo_user {
+	struct sbefifo		*sbefifo;
+	struct mutex		file_lock;
+	void			*pending_cmd;
+	size_t			pending_len;
+};
+
+static DEFINE_IDA(sbefifo_ida);
+static DEFINE_MUTEX(sbefifo_ffdc_mutex);
+
+
+static void sbefifo_dump_ffdc(struct device *dev, const __be32 *ffdc,
+			      size_t ffdc_sz, bool internal)
+{
+	int pack = 0;
+#define FFDC_LSIZE	60
+	static char ffdc_line[FFDC_LSIZE];
+	char *p = ffdc_line;
+
+	mutex_lock(&sbefifo_ffdc_mutex);
+	while (ffdc_sz) {
+		u32 w0, w1, w2, i;
+		if (ffdc_sz < 3) {
+			dev_err(dev, "SBE invalid FFDC package size %zd\n", ffdc_sz);
+			return;
+		}
+		w0 = be32_to_cpu(*(ffdc++));
+		w1 = be32_to_cpu(*(ffdc++));
+		w2 = be32_to_cpu(*(ffdc++));
+		ffdc_sz -= 3;
+		if ((w0 >> 16) != 0xFFDC) {
+			dev_err(dev, "SBE invalid FFDC package signature %08x %08x %08x\n",
+				w0, w1, w2);
+			break;
+		}
+		w0 &= 0xffff;
+		if (w0 > ffdc_sz) {
+			dev_err(dev, "SBE FFDC package len %d words but only %zd remaining\n",
+				w0, ffdc_sz);
+			w0 = ffdc_sz;
+			break;
+		}
+		if (internal) {
+			dev_warn(dev, "+---- SBE FFDC package %d for async err -----+\n",
+				 pack++);
+		} else {
+			dev_warn(dev, "+---- SBE FFDC package %d for cmd %02x:%02x -----+\n",
+				 pack++, (w1 >> 8) & 0xff, w1 & 0xff);
+		}
+		dev_warn(dev, "| Response code: %08x                   |\n", w2);
+		dev_warn(dev, "|-------------------------------------------|\n");
+		for (i = 0; i < w0; i++) {
+			if ((i & 3) == 0) {
+				p = ffdc_line;
+				p += sprintf(p, "| %04x:", i << 4);
+			}
+			p += sprintf(p, " %08x", be32_to_cpu(*(ffdc++)));
+			ffdc_sz--;
+			if ((i & 3) == 3 || i == (w0 - 1)) {
+				while ((i & 3) < 3) {
+					p += sprintf(p, "         ");
+					i++;
+				}
+				dev_warn(dev, "%s |\n", ffdc_line);
+			}
+		}
+		dev_warn(dev, "+-------------------------------------------+\n");
+	}
+}
+
+int sbefifo_parse_status(struct device *dev, u16 cmd, __be32 *response,
+			 size_t resp_len, size_t *data_len)
+{
+	u32 dh, s0, s1;
+	size_t ffdc_sz;
+
+	if (resp_len < 3) {
+		pr_debug("sbefifo: cmd %04x, response too small: %zd\n",
+			 cmd, resp_len);
+		return -ENXIO;
+	}
+	dh = be32_to_cpu(response[resp_len - 1]);
+	if (dh > resp_len || dh < 3) {
+		dev_err(dev, "SBE cmd %02x:%02x status offset out of range: %d/%zd\n",
+			cmd >> 8, cmd & 0xff, dh, resp_len);
+		return -ENXIO;
+	}
+	s0 = be32_to_cpu(response[resp_len - dh]);
+	s1 = be32_to_cpu(response[resp_len - dh + 1]);
+	if (((s0 >> 16) != 0xC0DE) || ((s0 & 0xffff) != cmd)) {
+		dev_err(dev, "SBE cmd %02x:%02x, status signature invalid: 0x%08x 0x%08x\n",
+			cmd >> 8, cmd & 0xff, s0, s1);
+		return -ENXIO;
+	}
+	if (s1 != 0) {
+		ffdc_sz = dh - 3;
+		dev_warn(dev, "SBE error cmd %02x:%02x status=%04x:%04x\n",
+			 cmd >> 8, cmd & 0xff, s1 >> 16, s1 & 0xffff);
+		if (ffdc_sz)
+			sbefifo_dump_ffdc(dev, &response[resp_len - dh + 2],
+					  ffdc_sz, false);
+	}
+	if (data_len)
+		*data_len = resp_len - dh;
+
+	/*
+	 * Primary status don't have the top bit set, so can't be confused with
+	 * Linux negative error codes, so return the status word whole.
+	 */
+	return s1;
+}
+EXPORT_SYMBOL_GPL(sbefifo_parse_status);
+
+static int sbefifo_regr(struct sbefifo *sbefifo, int reg, u32 *word)
+{
+	__be32 raw_word;
+	int rc;
+
+	rc = fsi_device_read(sbefifo->fsi_dev, reg, &raw_word,
+			     sizeof(raw_word));
+	if (rc)
+		return rc;
+
+	*word = be32_to_cpu(raw_word);
+
+	return 0;
+}
+
+static int sbefifo_regw(struct sbefifo *sbefifo, int reg, u32 word)
+{
+	__be32 raw_word = cpu_to_be32(word);
+
+	return fsi_device_write(sbefifo->fsi_dev, reg, &raw_word,
+				sizeof(raw_word));
+}
+
+static int sbefifo_check_sbe_state(struct sbefifo *sbefifo)
+{
+	__be32 raw_word;
+	u32 sbm;
+	int rc;
+
+	rc = fsi_slave_read(sbefifo->fsi_dev->slave, CFAM_GP_MBOX_SBM_ADDR,
+			    &raw_word, sizeof(raw_word));
+	if (rc)
+		return rc;
+	sbm = be32_to_cpu(raw_word);
+
+	/* SBE booted at all ? */
+	if (!(sbm & CFAM_SBM_SBE_BOOTED))
+		return -ESHUTDOWN;
+
+	/* Check its state */
+	switch ((sbm & CFAM_SBM_SBE_STATE_MASK) >> CFAM_SBM_SBE_STATE_SHIFT) {
+	case SBE_STATE_UNKNOWN:
+		return -ESHUTDOWN;
+	case SBE_STATE_IPLING:
+	case SBE_STATE_ISTEP:
+	case SBE_STATE_MPIPL:
+	case SBE_STATE_DMT:
+		return -EBUSY;
+	case SBE_STATE_RUNTIME:
+	case SBE_STATE_DUMP: /* Not sure about that one */
+		break;
+	case SBE_STATE_FAILURE:
+	case SBE_STATE_QUIESCE:
+		return -ESHUTDOWN;
+	}
+
+	/* Is there async FFDC available ? Remember it */
+	if (sbm & CFAM_SBM_SBE_ASYNC_FFDC)
+		sbefifo->async_ffdc = true;
+
+	return 0;
+}
+
+/* Don't flip endianness of data to/from FIFO, just pass through. */
+static int sbefifo_down_read(struct sbefifo *sbefifo, __be32 *word)
+{
+	return fsi_device_read(sbefifo->fsi_dev, SBEFIFO_DOWN, word,
+			       sizeof(*word));
+}
+
+static int sbefifo_up_write(struct sbefifo *sbefifo, __be32 word)
+{
+	return fsi_device_write(sbefifo->fsi_dev, SBEFIFO_UP, &word,
+				sizeof(word));
+}
+
+static int sbefifo_request_reset(struct sbefifo *sbefifo)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	u32 status, timeout;
+	int rc;
+
+	dev_dbg(dev, "Requesting FIFO reset\n");
+
+	/* Mark broken first, will be cleared if reset succeeds */
+	sbefifo->broken = true;
+
+	/* Send reset request */
+	rc = sbefifo_regw(sbefifo, SBEFIFO_UP | SBEFIFO_REQ_RESET, 1);
+	if (rc) {
+		dev_err(dev, "Sending reset request failed, rc=%d\n", rc);
+		return rc;
+	}
+
+	/* Wait for it to complete */
+	for (timeout = 0; timeout < SBEFIFO_RESET_TIMEOUT; timeout++) {
+		rc = sbefifo_regr(sbefifo, SBEFIFO_UP | SBEFIFO_STS, &status);
+		if (rc) {
+			dev_err(dev, "Failed to read UP fifo status during reset"
+				" , rc=%d\n", rc);
+			return rc;
+		}
+
+		if (!(status & SBEFIFO_STS_RESET_REQ)) {
+			dev_dbg(dev, "FIFO reset done\n");
+			sbefifo->broken = false;
+			return 0;
+		}
+
+		msleep(1);
+	}
+	dev_err(dev, "FIFO reset timed out\n");
+
+	return -ETIMEDOUT;
+}
+
+static int sbefifo_cleanup_hw(struct sbefifo *sbefifo)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	u32 up_status, down_status;
+	bool need_reset = false;
+	int rc;
+
+	rc = sbefifo_check_sbe_state(sbefifo);
+	if (rc) {
+		dev_dbg(dev, "SBE state=%d\n", rc);
+		return rc;
+	}
+
+	/* If broken, we don't need to look at status, go straight to reset */
+	if (sbefifo->broken)
+		goto do_reset;
+
+	rc = sbefifo_regr(sbefifo, SBEFIFO_UP | SBEFIFO_STS, &up_status);
+	if (rc) {
+		dev_err(dev, "Cleanup: Reading UP status failed, rc=%d\n", rc);
+
+		/* Will try reset again on next attempt at using it */
+		sbefifo->broken = true;
+		return rc;
+	}
+
+	rc = sbefifo_regr(sbefifo, SBEFIFO_DOWN | SBEFIFO_STS, &down_status);
+	if (rc) {
+		dev_err(dev, "Cleanup: Reading DOWN status failed, rc=%d\n", rc);
+
+		/* Will try reset again on next attempt at using it */
+		sbefifo->broken = true;
+		return rc;
+	}
+
+	/* The FIFO already contains a reset request from the SBE ? */
+	if (down_status & SBEFIFO_STS_RESET_REQ) {
+		dev_info(dev, "Cleanup: FIFO reset request set, resetting\n");
+		rc = sbefifo_regw(sbefifo, SBEFIFO_UP, SBEFIFO_PERFORM_RESET);
+		if (rc) {
+			sbefifo->broken = true;
+			dev_err(dev, "Cleanup: Reset reg write failed, rc=%d\n", rc);
+			return rc;
+		}
+		sbefifo->broken = false;
+		return 0;
+	}
+
+	/* Parity error on either FIFO ? */
+	if ((up_status | down_status) & SBEFIFO_STS_PARITY_ERR)
+		need_reset = true;
+
+	/* Either FIFO not empty ? */
+	if (!((up_status & down_status) & SBEFIFO_STS_EMPTY))
+		need_reset = true;
+
+	if (!need_reset)
+		return 0;
+
+	dev_info(dev, "Cleanup: FIFO not clean (up=0x%08x down=0x%08x)\n",
+		 up_status, down_status);
+
+ do_reset:
+
+	/* Mark broken, will be cleared if/when reset succeeds */
+	return sbefifo_request_reset(sbefifo);
+}
+
+static int sbefifo_wait(struct sbefifo *sbefifo, bool up,
+			u32 *status, unsigned long timeout)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	unsigned long end_time;
+	bool ready = false;
+	u32 addr, sts = 0;
+	int rc;
+
+	dev_vdbg(dev, "Wait on %s fifo...\n", up ? "up" : "down");
+
+	addr = (up ? SBEFIFO_UP : SBEFIFO_DOWN) | SBEFIFO_STS;
+
+	end_time = jiffies + timeout;
+	while (!time_after(jiffies, end_time)) {
+		cond_resched();
+		rc = sbefifo_regr(sbefifo, addr, &sts);
+		if (rc < 0) {
+			dev_err(dev, "FSI error %d reading status register\n", rc);
+			return rc;
+		}
+		if (!up && sbefifo_parity_err(sts)) {
+			dev_err(dev, "Parity error in DOWN FIFO\n");
+			return -ENXIO;
+		}
+		ready = !(up ? sbefifo_full(sts) : sbefifo_empty(sts));
+		if (ready)
+			break;
+	}
+	if (!ready) {
+		dev_err(dev, "%s FIFO Timeout ! status=%08x\n", up ? "UP" : "DOWN", sts);
+		return -ETIMEDOUT;
+	}
+	dev_vdbg(dev, "End of wait status: %08x\n", sts);
+
+	*status = sts;
+
+	return 0;
+}
+
+static int sbefifo_send_command(struct sbefifo *sbefifo,
+				const __be32 *command, size_t cmd_len)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	size_t len, chunk, vacant = 0, remaining = cmd_len;
+	unsigned long timeout;
+	u32 status;
+	int rc;
+
+	dev_vdbg(dev, "sending command (%zd words, cmd=%04x)\n",
+		 cmd_len, be32_to_cpu(command[1]));
+
+	/* As long as there's something to send */
+	timeout = msecs_to_jiffies(SBEFIFO_TIMEOUT_START_CMD);
+	while (remaining) {
+		/* Wait for room in the FIFO */
+		rc = sbefifo_wait(sbefifo, true, &status, timeout);
+		if (rc < 0)
+			return rc;
+		timeout = msecs_to_jiffies(SBEFIFO_TIMEOUT_IN_CMD);
+
+		vacant = sbefifo_vacant(status);
+		len = chunk = min(vacant, remaining);
+
+		dev_vdbg(dev, "  status=%08x vacant=%zd chunk=%zd\n",
+			 status, vacant, chunk);
+
+		/* Write as much as we can */
+		while (len--) {
+			rc = sbefifo_up_write(sbefifo, *(command++));
+			if (rc) {
+				dev_err(dev, "FSI error %d writing UP FIFO\n", rc);
+				return rc;
+			}
+		}
+		remaining -= chunk;
+		vacant -= chunk;
+	}
+
+	/* If there's no room left, wait for some to write EOT */
+	if (!vacant) {
+		rc = sbefifo_wait(sbefifo, true, &status, timeout);
+		if (rc)
+			return rc;
+	}
+
+	/* Send an EOT */
+	rc = sbefifo_regw(sbefifo, SBEFIFO_UP | SBEFIFO_EOT_RAISE, 0);
+	if (rc)
+		dev_err(dev, "FSI error %d writing EOT\n", rc);
+	return rc;
+}
+
+static int sbefifo_read_response(struct sbefifo *sbefifo, struct iov_iter *response)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	u32 data, status, eot_set;
+	unsigned long timeout;
+	bool overflow = false;
+	size_t len;
+	int rc;
+
+	dev_vdbg(dev, "reading response, buflen = %zd\n", iov_iter_count(response));
+
+	timeout = msecs_to_jiffies(SBEFIFO_TIMEOUT_START_RSP);
+	for (;;) {
+		/* Grab FIFO status (this will handle parity errors) */
+		rc = sbefifo_wait(sbefifo, false, &status, timeout);
+		if (rc < 0)
+			return rc;
+		timeout = msecs_to_jiffies(SBEFIFO_TIMEOUT_IN_RSP);
+
+		/* Decode status */
+		len = sbefifo_populated(status);
+		eot_set = sbefifo_eot_set(status);
+
+		dev_vdbg(dev, "  chunk size %zd eot_set=0x%x\n", len, eot_set);
+
+		/* Go through the chunk */
+		while(len--) {
+			/* Read the data */
+			rc = sbefifo_down_read(sbefifo, &data);
+			if (rc < 0)
+				return rc;
+
+			/* Was it an EOT ? */
+			if (eot_set & 0x80) {
+				/*
+				 * There should be nothing else in the FIFO,
+				 * if there is, mark broken, this will force
+				 * a reset on next use, but don't fail the
+				 * command.
+				 */
+				if (len) {
+					dev_warn(dev, "FIFO read hit"
+						 " EOT with still %zd data\n",
+						 len);
+					sbefifo->broken = true;
+				}
+
+				/* We are done */
+				rc = sbefifo_regw(sbefifo,
+						  SBEFIFO_DOWN | SBEFIFO_EOT_ACK, 0);
+
+				/*
+				 * If that write fail, still complete the request but mark
+				 * the fifo as broken for subsequent reset (not much else
+				 * we can do here).
+				 */
+				if (rc) {
+					dev_err(dev, "FSI error %d ack'ing EOT\n", rc);
+					sbefifo->broken = true;
+				}
+
+				/* Tell whether we overflowed */
+				return overflow ? -EOVERFLOW : 0;
+			}
+
+			/* Store it if there is room */
+			if (iov_iter_count(response) >= sizeof(__be32)) {
+				if (copy_to_iter(&data, sizeof(__be32), response) < sizeof(__be32))
+					return -EFAULT;
+			} else {
+				dev_vdbg(dev, "Response overflowed !\n");
+
+				overflow = true;
+			}
+
+			/* Next EOT bit */
+			eot_set <<= 1;
+		}
+	}
+	/* Shouldn't happen */
+	return -EIO;
+}
+
+static int sbefifo_do_command(struct sbefifo *sbefifo,
+			      const __be32 *command, size_t cmd_len,
+			      struct iov_iter *response)
+{
+	/* Try sending the command */
+	int rc = sbefifo_send_command(sbefifo, command, cmd_len);
+	if (rc)
+		return rc;
+
+	/* Now, get the response */
+	return sbefifo_read_response(sbefifo, response);
+}
+
+static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+        struct iov_iter ffdc_iter;
+        struct kvec ffdc_iov;
+	__be32 *ffdc;
+	size_t ffdc_sz;
+	u32 cmd[2];
+	int rc;
+
+	sbefifo->async_ffdc = false;
+	ffdc = vmalloc(SBEFIFO_MAX_FFDC_SIZE);
+	if (!ffdc) {
+		dev_err(dev, "Failed to allocate SBE FFDC buffer\n");
+		return;
+	}
+        ffdc_iov.iov_base = ffdc;
+	ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE;;
+        iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
+	cmd[0] = cpu_to_be32(2);
+	cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC);
+	rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter);
+	if (rc != 0) {
+		dev_err(dev, "Error %d retrieving SBE FFDC\n", rc);
+		goto bail;
+	}
+	ffdc_sz = SBEFIFO_MAX_FFDC_SIZE - iov_iter_count(&ffdc_iter);
+	ffdc_sz /= sizeof(__be32);
+	rc = sbefifo_parse_status(dev, SBEFIFO_CMD_GET_SBE_FFDC, ffdc,
+				  ffdc_sz, &ffdc_sz);
+	if (rc != 0) {
+		dev_err(dev, "Error %d decoding SBE FFDC\n", rc);
+		goto bail;
+	}
+	if (ffdc_sz > 0)
+		sbefifo_dump_ffdc(dev, ffdc, ffdc_sz, true);
+ bail:
+	vfree(ffdc);
+
+}
+
+static int __sbefifo_submit(struct sbefifo *sbefifo,
+			    const __be32 *command, size_t cmd_len,
+			    struct iov_iter *response)
+{
+	struct device *dev = &sbefifo->fsi_dev->dev;
+	int rc;
+
+	if (cmd_len < 2 || be32_to_cpu(command[0]) != cmd_len) {
+		dev_vdbg(dev, "Invalid command len %zd (header: %d)\n",
+			 cmd_len, be32_to_cpu(command[0]));
+		return -EINVAL;
+	}
+
+	/* First ensure the HW is in a clean state */
+	rc = sbefifo_cleanup_hw(sbefifo);
+	if (rc)
+		return rc;
+
+	/* Look for async FFDC first if any */
+	if (sbefifo->async_ffdc)
+		sbefifo_collect_async_ffdc(sbefifo);
+
+	rc = sbefifo_do_command(sbefifo, command, cmd_len, response);
+	if (rc != 0 && rc != -EOVERFLOW)
+		goto fail;
+	return rc;
+ fail:
+	/*
+	 * On failure, attempt a reset. Ignore the result, it will mark
+	 * the fifo broken if the reset fails
+	 */
+        sbefifo_request_reset(sbefifo);
+
+	/* Return original error */
+	return rc;
+}
+
+/**
+ * sbefifo_submit() - Submit and SBE fifo command and receive response
+ * @dev: The sbefifo device
+ * @command: The raw command data
+ * @cmd_len: The command size (in 32-bit words)
+ * @response: The output response buffer
+ * @resp_len: In: Response buffer size, Out: Response size
+ *
+ * This will perform the entire operation. If the reponse buffer
+ * overflows, returns -EOVERFLOW
+ */
+int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
+		   __be32 *response, size_t *resp_len)
+{
+	struct sbefifo *sbefifo = dev_get_drvdata(dev);
+        struct iov_iter resp_iter;
+        struct kvec resp_iov;
+	size_t rbytes;
+	int rc;
+
+	if (!dev || !sbefifo)
+		return -ENODEV;
+	if (WARN_ON_ONCE(sbefifo->magic != SBEFIFO_MAGIC))
+		return -ENODEV;
+	if (!resp_len || !command || !response || cmd_len > SBEFIFO_MAX_CMD_LEN)
+		return -EINVAL;
+
+	/* Prepare iov iterator */
+	rbytes = (*resp_len) * sizeof(__be32);
+	resp_iov.iov_base = response;
+	resp_iov.iov_len = rbytes;
+        iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes);
+
+	/* Perform the command */
+	mutex_lock(&sbefifo->lock);
+	rc = __sbefifo_submit(sbefifo, command, cmd_len, &resp_iter);
+	mutex_unlock(&sbefifo->lock);
+
+	/* Extract the response length */
+	rbytes -= iov_iter_count(&resp_iter);
+	*resp_len = rbytes / sizeof(__be32);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(sbefifo_submit);
+
+/*
+ * Char device interface
+ */
+static int sbefifo_user_open(struct inode *inode, struct file *file)
+{
+	struct sbefifo *sbefifo = container_of(file->private_data,
+					       struct sbefifo, mdev);
+	struct sbefifo_user *user;
+
+	user = kzalloc(sizeof(struct sbefifo_user), GFP_KERNEL);
+	if (!user)
+		return -ENOMEM;
+
+	file->private_data = user;
+	user->sbefifo = sbefifo;
+	user->pending_cmd = (void *)__get_free_page(GFP_KERNEL);
+	if (!user->pending_cmd) {
+		kfree(user);
+		return -ENOMEM;
+	}
+	mutex_init(&user->file_lock);
+
+	return 0;
+}
+
+static ssize_t sbefifo_user_read(struct file *file, char __user *buf,
+				 size_t len, loff_t *offset)
+{
+	struct sbefifo_user *user = file->private_data;
+	struct sbefifo *sbefifo;
+	struct iov_iter resp_iter;
+        struct iovec resp_iov;
+	size_t cmd_len;
+	int rc;
+
+	if (!user)
+		return -EINVAL;
+	sbefifo = user->sbefifo;
+	if (len & 3)
+		return -EINVAL;
+
+	mutex_lock(&user->file_lock);
+
+	/* Cronus relies on -EAGAIN after a short read */
+	if (user->pending_len == 0) {
+		rc = -EAGAIN;
+		goto bail;
+	}
+	if (user->pending_len < 8) {
+		rc = -EINVAL;
+		goto bail;
+	}
+	cmd_len = user->pending_len >> 2;
+
+	/* Prepare iov iterator */
+	resp_iov.iov_base = buf;
+	resp_iov.iov_len = len;
+	iov_iter_init(&resp_iter, WRITE, &resp_iov, 1, len);
+
+	/* Perform the command */
+	mutex_lock(&sbefifo->lock);
+	rc = __sbefifo_submit(sbefifo, user->pending_cmd, cmd_len, &resp_iter);
+	mutex_unlock(&sbefifo->lock);
+	if (rc < 0)
+		goto bail;
+
+	/* Extract the response length */
+	rc = len - iov_iter_count(&resp_iter);
+ bail:
+	user->pending_len = 0;
+	mutex_unlock(&user->file_lock);
+	return rc;
+}
+
+static ssize_t sbefifo_user_write(struct file *file, const char __user *buf,
+				  size_t len, loff_t *offset)
+{
+	struct sbefifo_user *user = file->private_data;
+	struct sbefifo *sbefifo;
+	int rc = len;
+
+	if (!user)
+		return -EINVAL;
+	sbefifo = user->sbefifo;
+	if (len > SBEFIFO_MAX_CMD_LEN)
+		return -EINVAL;
+	if (len & 3)
+		return -EINVAL;
+
+	mutex_lock(&user->file_lock);
+
+	/* Copy the command into the staging buffer */
+	if (copy_from_user(user->pending_cmd, buf, len)) {
+		rc = -EFAULT;
+		goto bail;
+	}
+
+	/* Check for the magic reset command */
+	if (len == 4 && be32_to_cpu(*(__be32 *)user->pending_cmd) ==
+	    SBEFIFO_RESET_MAGIC)  {
+
+		/* Clear out any pending command */
+		user->pending_len = 0;
+
+		/* Trigger reset request */
+		mutex_lock(&sbefifo->lock);
+		rc = sbefifo_request_reset(user->sbefifo);
+		mutex_unlock(&sbefifo->lock);
+		if (rc == 0)
+			rc = 4;
+		goto bail;
+	}
+
+	/* Update the staging buffer size */
+	user->pending_len = len;
+ bail:
+	mutex_unlock(&user->file_lock);
+
+	/* And that's it, we'll issue the command on a read */
+	return rc;
+}
+
+static int sbefifo_user_release(struct inode *inode, struct file *file)
+{
+	struct sbefifo_user *user = file->private_data;
+
+	if (!user)
+		return -EINVAL;
+
+	free_page((unsigned long)user->pending_cmd);
+	kfree(user);
+
+	return 0;
+}
+
+static const struct file_operations sbefifo_fops = {
+	.owner		= THIS_MODULE,
+	.open		= sbefifo_user_open,
+	.read		= sbefifo_user_read,
+	.write		= sbefifo_user_write,
+	.release	= sbefifo_user_release,
+};
+
+/*
+ * Probe/remove
+ */
+
+static int sbefifo_probe(struct device *dev)
+{
+	struct fsi_device *fsi_dev = to_fsi_dev(dev);
+	struct sbefifo *sbefifo;
+	struct device_node *np;
+	struct platform_device *child;
+	char child_name[32];
+	int rc, child_idx = 0;
+
+	dev_dbg(dev, "Found sbefifo device\n");
+
+	sbefifo = devm_kzalloc(dev, sizeof(*sbefifo), GFP_KERNEL);
+	if (!sbefifo)
+		return -ENOMEM;
+	sbefifo->magic = SBEFIFO_MAGIC;
+	sbefifo->fsi_dev = fsi_dev;
+	mutex_init(&sbefifo->lock);
+
+	/*
+	 * Try cleaning up the FIFO. If this fails, we still register the
+	 * driver and will try cleaning things up again on the next access.
+	 */
+	rc = sbefifo_cleanup_hw(sbefifo);
+	if (rc && rc != -ESHUTDOWN)
+		dev_err(dev, "Initial HW cleanup failed, will retry later\n");
+
+	sbefifo->idx = ida_simple_get(&sbefifo_ida, 1, INT_MAX, GFP_KERNEL);
+	snprintf(sbefifo->name, sizeof(sbefifo->name), "sbefifo%d",
+		 sbefifo->idx);
+
+	dev_set_drvdata(dev, sbefifo);
+
+	/* Create misc chardev for userspace access */
+	sbefifo->mdev.minor = MISC_DYNAMIC_MINOR;
+	sbefifo->mdev.fops = &sbefifo_fops;
+	sbefifo->mdev.name = sbefifo->name;
+	sbefifo->mdev.parent = dev;
+	rc = misc_register(&sbefifo->mdev);
+	if (rc) {
+		dev_err(dev, "Failed to register miscdevice: %d\n", rc);
+		ida_simple_remove(&sbefifo_ida, sbefifo->idx);
+		return rc;
+	}
+
+	/* Create platform devs for dts child nodes (occ, etc) */
+	for_each_available_child_of_node(dev->of_node, np) {
+		snprintf(child_name, sizeof(child_name), "%s-dev%d",
+			 sbefifo->name, child_idx++);
+		child = of_platform_device_create(np, child_name, dev);
+		if (!child)
+			dev_warn(dev, "failed to create child %s dev\n",
+				 child_name);
+	}
+
+	return 0;
+}
+
+static int sbefifo_unregister_child(struct device *dev, void *data)
+{
+	struct platform_device *child = to_platform_device(dev);
+
+	of_device_unregister(child);
+	if (dev->of_node)
+		of_node_clear_flag(dev->of_node, OF_POPULATED);
+
+	return 0;
+}
+
+static int sbefifo_remove(struct device *dev)
+{
+	struct sbefifo *sbefifo = dev_get_drvdata(dev);
+
+	dev_dbg(dev, "Removing sbefifo device...\n");
+
+	misc_deregister(&sbefifo->mdev);
+	device_for_each_child(dev, NULL, sbefifo_unregister_child);
+
+	ida_simple_remove(&sbefifo_ida, sbefifo->idx);
+
+	return 0;
+}
+
+static struct fsi_device_id sbefifo_ids[] = {
+	{
+		.engine_type = FSI_ENGID_SBE,
+		.version = FSI_VERSION_ANY,
+	},
+	{ 0 }
+};
+
+static struct fsi_driver sbefifo_drv = {
+	.id_table = sbefifo_ids,
+	.drv = {
+		.name = DEVICE_NAME,
+		.bus = &fsi_bus_type,
+		.probe = sbefifo_probe,
+		.remove = sbefifo_remove,
+	}
+};
+
+static int sbefifo_init(void)
+{
+	return fsi_driver_register(&sbefifo_drv);
+}
+
+static void sbefifo_exit(void)
+{
+	fsi_driver_unregister(&sbefifo_drv);
+
+	ida_destroy(&sbefifo_ida);
+}
+
+module_init(sbefifo_init);
+module_exit(sbefifo_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Brad Bishop <bradleyb@fuzziesquirrel.com>");
+MODULE_AUTHOR("Eddie James <eajames@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Andrew Jeffery <andrew@aj.id.au>");
+MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
+MODULE_DESCRIPTION("Linux device interface to the POWER Self Boot Engine");
diff --git a/include/linux/fsi-sbefifo.h b/include/linux/fsi-sbefifo.h
new file mode 100644
index 000000000000..13f9ebeaa25e
--- /dev/null
+++ b/include/linux/fsi-sbefifo.h
@@ -0,0 +1,33 @@
+/*
+ * SBEFIFO FSI Client device driver
+ *
+ * Copyright (C) IBM Corporation 2017
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERGCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef LINUX_FSI_SBEFIFO_H
+#define LINUX_FSI_SBEFIFO_H
+
+#define SBEFIFO_CMD_PUT_OCC_SRAM	0xa404
+#define SBEFIFO_CMD_GET_OCC_SRAM	0xa403
+#define SBEFIFO_CMD_GET_SBE_FFDC	0xa801
+
+#define SBEFIFO_MAX_FFDC_SIZE		0x2000
+
+struct device;
+
+int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
+		   __be32 *response, size_t *resp_len);
+
+int sbefifo_parse_status(struct device *dev, u16 cmd, __be32 *response,
+			 size_t resp_len, size_t *data_len);
+
+#endif /* LINUX_FSI_SBEFIFO_H */
-- 
cgit v1.2.3


From ce63b2c89cc02d8acf7472272016ecd979fb08d5 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Tue, 12 Jun 2018 15:02:57 -0700
Subject: xen: Sync up with the canonical protocol definitions in Xen

This is the sync up with the canonical definitions of the input,
sound and display protocols in Xen.

Changes to kbdif:
1. Add missing string constants for {feature|request}-raw-pointer
   to align with the rest of the interface file.

2. Add new XenStore feature fields, so it is possible to individually
   control set of exposed virtual devices for each guest OS:
     - set feature-disable-keyboard to 1 if no keyboard device needs
       to be created
     - set feature-disable-pointer to 1 if no pointer device needs
       to be created

3. Move multi-touch device parameters to backend nodes: these are
    described as a part of frontend's XenBus configuration nodes
    while they belong to backend's configuration. Fix this by moving
    the parameters to the proper section.

Unique-id field:
1. Add unique-id XenBus entry for virtual input and display.

2. Change type of unique-id field to string for sndif to align with
display and input protocols.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/xen/interface/io/displif.h |  8 ++++
 include/xen/interface/io/kbdif.h   | 78 ++++++++++++++++++++++++++++----------
 include/xen/interface/io/sndif.h   | 10 ++---
 3 files changed, 71 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/xen/interface/io/displif.h b/include/xen/interface/io/displif.h
index 596578d9be3e..fdc279dc4a88 100644
--- a/include/xen/interface/io/displif.h
+++ b/include/xen/interface/io/displif.h
@@ -189,6 +189,13 @@
  *
  *----------------------------- Connector settings ----------------------------
  *
+ * unique-id
+ *      Values:         <string>
+ *
+ *      After device instance initialization each connector is assigned a
+ *      unique ID, so it can be identified by the backend by this ID.
+ *      This can be UUID or such.
+ *
  * resolution
  *      Values:         <width, uint32_t>x<height, uint32_t>
  *
@@ -368,6 +375,7 @@
 #define XENDISPL_FIELD_EVT_CHANNEL	"evt-event-channel"
 #define XENDISPL_FIELD_RESOLUTION	"resolution"
 #define XENDISPL_FIELD_BE_ALLOC		"be-alloc"
+#define XENDISPL_FIELD_UNIQUE_ID	"unique-id"
 
 /*
  ******************************************************************************
diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h
index 2a9510ade701..b4439cf48220 100644
--- a/include/xen/interface/io/kbdif.h
+++ b/include/xen/interface/io/kbdif.h
@@ -51,6 +51,18 @@
  * corresponding entries in XenStore and puts 1 as the value of the entry.
  * If a feature is not supported then 0 must be set or feature entry omitted.
  *
+ * feature-disable-keyboard
+ *      Values:         <uint>
+ *
+ *      If there is no need to expose a virtual keyboard device by the
+ *      frontend then this must be set to 1.
+ *
+ * feature-disable-pointer
+ *      Values:         <uint>
+ *
+ *      If there is no need to expose a virtual pointer device by the
+ *      frontend then this must be set to 1.
+ *
  * feature-abs-pointer
  *      Values:         <uint>
  *
@@ -63,6 +75,22 @@
  *      Backends, which support reporting of multi-touch events
  *      should set this to 1.
  *
+ * feature-raw-pointer
+ *      Values:        <uint>
+ *
+ *      Backends, which support reporting raw (unscaled) absolute coordinates
+ *      for pointer devices should set this to 1. Raw (unscaled) values have
+ *      a range of [0, 0x7fff].
+ *
+ *-----------------------  Device Instance Parameters ------------------------
+ *
+ * unique-id
+ *      Values:         <string>
+ *
+ *      After device instance initialization it is assigned a unique ID,
+ *      so every instance of the frontend can be identified by the backend
+ *      by this ID. This can be UUID or such.
+ *
  *------------------------- Pointer Device Parameters ------------------------
  *
  * width
@@ -77,6 +105,25 @@
  *      Maximum Y coordinate (height) to be used by the frontend
  *      while reporting input events, pixels, [0; UINT32_MAX].
  *
+ *----------------------- Multi-touch Device Parameters ----------------------
+ *
+ * multi-touch-num-contacts
+ *      Values:         <uint>
+ *
+ *      Number of simultaneous touches reported.
+ *
+ * multi-touch-width
+ *      Values:         <uint>
+ *
+ *      Width of the touch area to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
+ * multi-touch-height
+ *      Values:         <uint>
+ *
+ *      Height of the touch area to be used by the frontend
+ *      while reporting input events, pixels, [0; UINT32_MAX].
+ *
  *****************************************************************************
  *                            Frontend XenBus Nodes
  *****************************************************************************
@@ -98,6 +145,13 @@
  *
  *      Request backend to report multi-touch events.
  *
+ * request-raw-pointer
+ *      Values:         <uint>
+ *
+ *      Request backend to report raw unscaled absolute pointer coordinates.
+ *      This option is only valid if request-abs-pointer is also set.
+ *      Raw unscaled coordinates have the range [0, 0x7fff]
+ *
  *----------------------- Request Transport Parameters -----------------------
  *
  * event-channel
@@ -117,25 +171,6 @@
  *
  *      OBSOLETE, not recommended for use.
  *      PFN of the shared page.
- *
- *----------------------- Multi-touch Device Parameters -----------------------
- *
- * multi-touch-num-contacts
- *      Values:         <uint>
- *
- *      Number of simultaneous touches reported.
- *
- * multi-touch-width
- *      Values:         <uint>
- *
- *      Width of the touch area to be used by the frontend
- *      while reporting input events, pixels, [0; UINT32_MAX].
- *
- * multi-touch-height
- *      Values:         <uint>
- *
- *      Height of the touch area to be used by the frontend
- *      while reporting input events, pixels, [0; UINT32_MAX].
  */
 
 /*
@@ -163,9 +198,13 @@
 
 #define XENKBD_DRIVER_NAME		"vkbd"
 
+#define XENKBD_FIELD_FEAT_DSBL_KEYBRD	"feature-disable-keyboard"
+#define XENKBD_FIELD_FEAT_DSBL_POINTER	"feature-disable-pointer"
 #define XENKBD_FIELD_FEAT_ABS_POINTER	"feature-abs-pointer"
+#define XENKBD_FIELD_FEAT_RAW_POINTER	"feature-raw-pointer"
 #define XENKBD_FIELD_FEAT_MTOUCH	"feature-multi-touch"
 #define XENKBD_FIELD_REQ_ABS_POINTER	"request-abs-pointer"
+#define XENKBD_FIELD_REQ_RAW_POINTER	"request-raw-pointer"
 #define XENKBD_FIELD_REQ_MTOUCH		"request-multi-touch"
 #define XENKBD_FIELD_RING_GREF		"page-gref"
 #define XENKBD_FIELD_EVT_CHANNEL	"event-channel"
@@ -174,6 +213,7 @@
 #define XENKBD_FIELD_MT_WIDTH		"multi-touch-width"
 #define XENKBD_FIELD_MT_HEIGHT		"multi-touch-height"
 #define XENKBD_FIELD_MT_NUM_CONTACTS	"multi-touch-num-contacts"
+#define XENKBD_FIELD_UNIQUE_ID		"unique-id"
 
 /* OBSOLETE, not recommended for use */
 #define XENKBD_FIELD_RING_REF		"page-ref"
diff --git a/include/xen/interface/io/sndif.h b/include/xen/interface/io/sndif.h
index 78bb5d9f8d83..2aac8f73614c 100644
--- a/include/xen/interface/io/sndif.h
+++ b/include/xen/interface/io/sndif.h
@@ -278,13 +278,11 @@
  *      defined under the same device.
  *
  * unique-id
- *      Values:         <uint32_t>
+ *      Values:         <string>
  *
- *      After stream initialization it is assigned a unique ID (within the front
- *      driver), so every stream of the frontend can be identified by the
- *      backend by this ID. This is not equal to stream-idx as the later is
- *      zero based within the device, but this index is contigous within the
- *      driver.
+ *      After stream initialization it is assigned a unique ID, so every
+ *      stream of the frontend can be identified by the backend by this ID.
+ *      This can be UUID or such.
  *
  *-------------------- Stream Request Transport Parameters --------------------
  *
-- 
cgit v1.2.3


From 2e076f199097d670ce5e5492cea57f552b93bba9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 28 May 2018 15:47:40 +0200
Subject: nl80211: add scan features for improved scan privacy

Add the scan flags for randomized SN and minimized probe request
content for improved scan privacy.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/uapi/linux/nl80211.h | 15 +++++++++++++++
 net/wireless/nl80211.c       |  8 +++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 28b36545de24..49f718e821a3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5133,6 +5133,11 @@ enum nl80211_feature_flags {
  *	support to nl80211.
  * @NL80211_EXT_FEATURE_TXQS: Driver supports FQ-CoDel-enabled intermediate
  *      TXQs.
+ * @NL80211_EXT_FEATURE_SCAN_RANDOM_SN: Driver/device supports randomizing the
+ *	SN in probe request frames if requested by %NL80211_SCAN_FLAG_RANDOM_SN.
+ * @NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT: Driver/device can omit all data
+ *	except for supported rates from the probe request content if requested
+ *	by the %NL80211_SCAN_FLAG_MIN_PREQ_CONTENT flag.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -5167,6 +5172,8 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211,
 	NL80211_EXT_FEATURE_DATA_ACK_SIGNAL_SUPPORT,
 	NL80211_EXT_FEATURE_TXQS,
+	NL80211_EXT_FEATURE_SCAN_RANDOM_SN,
+	NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -5272,6 +5279,12 @@ enum nl80211_timeout_reason {
  *	possible scan results. This flag hints the driver to use the best
  *	possible scan configuration to improve the accuracy in scanning.
  *	Latency and power use may get impacted with this flag.
+ * @NL80211_SCAN_FLAG_RANDOM_SN: randomize the sequence number in probe
+ *	request frames from this scan to avoid correlation/tracking being
+ *	possible.
+ * @NL80211_SCAN_FLAG_MIN_PREQ_CONTENT: minimize probe request content to
+ *	only have supported rates and no additional capabilities (unless
+ *	added by userspace explicitly.)
  */
 enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_PRIORITY				= 1<<0,
@@ -5285,6 +5298,8 @@ enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_SPAN				= 1<<8,
 	NL80211_SCAN_FLAG_LOW_POWER				= 1<<9,
 	NL80211_SCAN_FLAG_HIGH_ACCURACY				= 1<<10,
+	NL80211_SCAN_FLAG_RANDOM_SN				= 1<<11,
+	NL80211_SCAN_FLAG_MIN_PREQ_CONTENT			= 1<<12,
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6c3ded1223fb..d2677259e13e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6925,7 +6925,13 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) ||
 	    !nl80211_check_scan_feat(wiphy, *flags,
 				     NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE,
-				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
+				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_RANDOM_SN,
+				     NL80211_EXT_FEATURE_SCAN_RANDOM_SN) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_MIN_PREQ_CONTENT,
+				     NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT))
 		return -EOPNOTSUPP;
 
 	if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
-- 
cgit v1.2.3


From 2d4f545cb14fa2d9865124183f3100d46dc9b3b5 Mon Sep 17 00:00:00 2001
From: Peter Meerwald <pmeerw@pmeerw.net>
Date: Mon, 4 Jun 2018 23:34:06 +0200
Subject: rfkill: Fixes and cleanup of kernel-doc in the header file

Fixes kerneldoc parameter names to match implementation,
rfkill_set_hw_state(), rfkill_set_sw_state().
Fix description of rfkill_resume_polling().
Fix typos in documentation of rfkill_find_type().
Consistently start kerneldoc description with uppercase
letter.

Signed-off-by: Peter Meerwald-Stadler <pmeerw@pmeerw.net>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/rfkill.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index e6a0031d1b1f..8ad2487a86d5 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -66,7 +66,7 @@ struct rfkill_ops {
 
 #if defined(CONFIG_RFKILL) || defined(CONFIG_RFKILL_MODULE)
 /**
- * rfkill_alloc - allocate rfkill structure
+ * rfkill_alloc - Allocate rfkill structure
  * @name: name of the struct -- the string is not copied internally
  * @parent: device that has rf switch on it
  * @type: type of the switch (RFKILL_TYPE_*)
@@ -112,7 +112,7 @@ void rfkill_pause_polling(struct rfkill *rfkill);
 /**
  * rfkill_resume_polling(struct rfkill *rfkill)
  *
- * Pause polling -- say transmitter is off for other reasons.
+ * Resume polling
  * NOTE: not necessary for suspend/resume -- in that case the
  * core stops polling anyway
  */
@@ -130,7 +130,7 @@ void rfkill_resume_polling(struct rfkill *rfkill);
 void rfkill_unregister(struct rfkill *rfkill);
 
 /**
- * rfkill_destroy - free rfkill structure
+ * rfkill_destroy - Free rfkill structure
  * @rfkill: rfkill structure to be destroyed
  *
  * Destroys the rfkill structure.
@@ -140,7 +140,7 @@ void rfkill_destroy(struct rfkill *rfkill);
 /**
  * rfkill_set_hw_state - Set the internal rfkill hardware block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current hardware block state to set
+ * @blocked: the current hardware block state to set
  *
  * rfkill drivers that get events when the hard-blocked state changes
  * use this function to notify the rfkill core (and through that also
@@ -161,7 +161,7 @@ bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
 /**
  * rfkill_set_sw_state - Set the internal rfkill software block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current software block state to set
+ * @blocked: the current software block state to set
  *
  * rfkill drivers that get events when the soft-blocked state changes
  * (yes, some platforms directly act on input but allow changing again)
@@ -183,7 +183,7 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked);
 /**
  * rfkill_init_sw_state - Initialize persistent software block state
  * @rfkill: pointer to the rfkill class to modify.
- * @state: the current software block state to set
+ * @blocked: the current software block state to set
  *
  * rfkill drivers that preserve their software block state over power off
  * use this function to notify the rfkill core (and through that also
@@ -208,17 +208,17 @@ void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked);
 void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw);
 
 /**
- * rfkill_blocked - query rfkill block
+ * rfkill_blocked - Query rfkill block state
  *
  * @rfkill: rfkill struct to query
  */
 bool rfkill_blocked(struct rfkill *rfkill);
 
 /**
- * rfkill_find_type - Helpper for finding rfkill type by name
+ * rfkill_find_type - Helper for finding rfkill type by name
  * @name: the name of the type
  *
- * Returns enum rfkill_type that conrresponds the name.
+ * Returns enum rfkill_type that corresponds to the name.
  */
 enum rfkill_type rfkill_find_type(const char *name);
 
@@ -296,7 +296,7 @@ static inline enum rfkill_type rfkill_find_type(const char *name)
 const char *rfkill_get_led_trigger_name(struct rfkill *rfkill);
 
 /**
- * rfkill_set_led_trigger_name -- set the LED trigger name
+ * rfkill_set_led_trigger_name - Set the LED trigger name
  * @rfkill: rfkill struct
  * @name: LED trigger name
  *
-- 
cgit v1.2.3


From c4cbaf7973a794839af080f13748335976cf3f3f Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 9 Jun 2018 09:14:42 +0300
Subject: cfg80211: Add support for HE

Add support for the HE in cfg80211 and also add userspace API to
nl80211 to send rate information out, conforming with P802.11ax_D2.0.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/ieee80211.h    | 427 +++++++++++++++++++++++++++++++++++++++++++
 include/net/cfg80211.h       | 106 ++++++++++-
 include/uapi/linux/nl80211.h |  87 ++++++++-
 net/wireless/core.c          |  21 ++-
 net/wireless/nl80211.c       |  99 +++++++++-
 net/wireless/util.c          |  82 +++++++++
 6 files changed, 817 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8fe7e4306816..e6a6503bfa33 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1539,6 +1539,106 @@ struct ieee80211_vht_operation {
 	__le16 basic_mcs_set;
 } __packed;
 
+/**
+ * struct ieee80211_he_cap_elem - HE capabilities element
+ *
+ * This structure is the "HE capabilities element" fixed fields as
+ * described in P802.11ax_D2.0 section 9.4.2.237.2 and 9.4.2.237.3
+ */
+struct ieee80211_he_cap_elem {
+	u8 mac_cap_info[5];
+	u8 phy_cap_info[9];
+} __packed;
+
+#define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN	5
+
+/**
+ * enum ieee80211_he_mcs_support - HE MCS support definitions
+ * @IEEE80211_HE_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the
+ *	number of streams
+ * @IEEE80211_HE_MCS_SUPPORT_0_9: MCSes 0-9 are supported
+ * @IEEE80211_HE_MCS_SUPPORT_0_11: MCSes 0-11 are supported
+ * @IEEE80211_HE_MCS_NOT_SUPPORTED: This number of streams isn't supported
+ *
+ * These definitions are used in each 2-bit subfield of the rx_mcs_*
+ * and tx_mcs_* fields of &struct ieee80211_he_mcs_nss_supp, which are
+ * both split into 8 subfields by number of streams. These values indicate
+ * which MCSes are supported for the number of streams the value appears
+ * for.
+ */
+enum ieee80211_he_mcs_support {
+	IEEE80211_HE_MCS_SUPPORT_0_7	= 0,
+	IEEE80211_HE_MCS_SUPPORT_0_9	= 1,
+	IEEE80211_HE_MCS_SUPPORT_0_11	= 2,
+	IEEE80211_HE_MCS_NOT_SUPPORTED	= 3,
+};
+
+/**
+ * struct ieee80211_he_mcs_nss_supp - HE Tx/Rx HE MCS NSS Support Field
+ *
+ * This structure holds the data required for the Tx/Rx HE MCS NSS Support Field
+ * described in P802.11ax_D2.0 section 9.4.2.237.4
+ *
+ * @rx_mcs_80: Rx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     widths less than 80MHz.
+ * @tx_mcs_80: Tx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     widths less than 80MHz.
+ * @rx_mcs_160: Rx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     width 160MHz.
+ * @tx_mcs_160: Tx MCS map 2 bits for each stream, total 8 streams, for channel
+ *     width 160MHz.
+ * @rx_mcs_80p80: Rx MCS map 2 bits for each stream, total 8 streams, for
+ *     channel width 80p80MHz.
+ * @tx_mcs_80p80: Tx MCS map 2 bits for each stream, total 8 streams, for
+ *     channel width 80p80MHz.
+ */
+struct ieee80211_he_mcs_nss_supp {
+	__le16 rx_mcs_80;
+	__le16 tx_mcs_80;
+	__le16 rx_mcs_160;
+	__le16 tx_mcs_160;
+	__le16 rx_mcs_80p80;
+	__le16 tx_mcs_80p80;
+} __packed;
+
+/**
+ * struct ieee80211_he_operation - HE capabilities element
+ *
+ * This structure is the "HE operation element" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.238
+ */
+struct ieee80211_he_operation {
+	__le32 he_oper_params;
+	__le16 he_mcs_nss_set;
+	/* Optional 0,1,3 or 4 bytes: depends on @he_oper_params */
+	u8 optional[0];
+} __packed;
+
+/**
+ * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field
+ *
+ * This structure is the "MU AC Parameter Record" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.240
+ */
+struct ieee80211_he_mu_edca_param_ac_rec {
+	u8 aifsn;
+	u8 ecw_min_max;
+	u8 mu_edca_timer;
+} __packed;
+
+/**
+ * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element
+ *
+ * This structure is the "MU EDCA Parameter Set element" fields as
+ * described in P802.11ax_D2.0 section 9.4.2.240
+ */
+struct ieee80211_mu_edca_param_set {
+	u8 mu_qos_info;
+	struct ieee80211_he_mu_edca_param_ac_rec ac_be;
+	struct ieee80211_he_mu_edca_param_ac_rec ac_bk;
+	struct ieee80211_he_mu_edca_param_ac_rec ac_vi;
+	struct ieee80211_he_mu_edca_param_ac_rec ac_vo;
+} __packed;
 
 /* 802.11ac VHT Capabilities */
 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895			0x00000000
@@ -1577,6 +1677,328 @@ struct ieee80211_vht_operation {
 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN			0x10000000
 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN			0x20000000
 
+/* 802.11ax HE MAC capabilities */
+#define IEEE80211_HE_MAC_CAP0_HTC_HE				0x01
+#define IEEE80211_HE_MAC_CAP0_TWT_REQ				0x02
+#define IEEE80211_HE_MAC_CAP0_TWT_RES				0x04
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_NOT_SUPP		0x00
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_1		0x08
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_2		0x10
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_3		0x18
+#define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK			0x18
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_1		0x00
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_2		0x20
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_4		0x40
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_8		0x60
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_16		0x80
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_32		0xa0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_64		0xc0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_UNLIMITED	0xe0
+#define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_MASK		0xe0
+
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_UNLIMITED		0x00
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_128			0x01
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_256			0x02
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_512			0x03
+#define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_MASK		0x03
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_0US		0x00
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US		0x04
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US		0x08
+#define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK		0x0c
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_1		0x00
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_2		0x10
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_3		0x20
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_4		0x30
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_5		0x40
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_6		0x50
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_7		0x60
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8		0x70
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_MASK		0x70
+
+/* Link adaptation is split between byte HE_MAC_CAP1 and
+ * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE
+ * in which case the following values apply:
+ * 0 = No feedback.
+ * 1 = reserved.
+ * 2 = Unsolicited feedback.
+ * 3 = both
+ */
+#define IEEE80211_HE_MAC_CAP1_LINK_ADAPTATION			0x80
+
+#define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION			0x01
+#define IEEE80211_HE_MAC_CAP2_ALL_ACK				0x02
+#define IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED			0x04
+#define IEEE80211_HE_MAC_CAP2_BSR				0x08
+#define IEEE80211_HE_MAC_CAP2_BCAST_TWT				0x10
+#define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP			0x20
+#define IEEE80211_HE_MAC_CAP2_MU_CASCADING			0x40
+#define IEEE80211_HE_MAC_CAP2_ACK_EN				0x80
+
+#define IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU	0x01
+#define IEEE80211_HE_MAC_CAP3_OMI_CONTROL			0x02
+#define IEEE80211_HE_MAC_CAP3_OFDMA_RA				0x04
+
+/* The maximum length of an A-MDPU is defined by the combination of the Maximum
+ * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the
+ * same field in the HE capabilities.
+ */
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT	0x00
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1		0x08
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2		0x10
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED	0x18
+#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK		0x18
+#define IEEE80211_HE_MAC_CAP3_A_AMSDU_FRAG			0x20
+#define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED			0x40
+#define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS		0x80
+
+#define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG		0x01
+#define IEEE80211_HE_MAC_CAP4_QTP				0x02
+#define IEEE80211_HE_MAC_CAP4_BQR				0x04
+#define IEEE80211_HE_MAC_CAP4_SR_RESP				0x08
+#define IEEE80211_HE_MAC_CAP4_NDP_FB_REP			0x10
+#define IEEE80211_HE_MAC_CAP4_OPS				0x20
+#define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU			0x40
+
+/* 802.11ax HE PHY capabilities */
+#define IEEE80211_HE_PHY_CAP0_DUAL_BAND					0x01
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G		0x02
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G	0x04
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G		0x08
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G	0x10
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G	0x20
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G	0x40
+#define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK			0xfe
+
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_20MHZ	0x01
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_40MHZ	0x02
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_20MHZ	0x04
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_40MHZ	0x08
+#define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK			0x0f
+#define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A				0x10
+#define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD			0x20
+#define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US		0x40
+/* Midamble RX Max NSTS is split between byte #2 and byte #3 */
+#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS			0x80
+
+#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS			0x01
+#define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US			0x02
+#define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ			0x04
+#define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ			0x08
+#define IEEE80211_HE_PHY_CAP2_DOPPLER_TX				0x10
+#define IEEE80211_HE_PHY_CAP2_DOPPLER_RX				0x20
+
+/* Note that the meaning of UL MU below is different between an AP and a non-AP
+ * sta, where in the AP case it indicates support for Rx and in the non-AP sta
+ * case it indicates support for Tx.
+ */
+#define IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO			0x40
+#define IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO			0x80
+
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM			0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK			0x01
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK			0x02
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM			0x03
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK			0x03
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1				0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2				0x04
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM			0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK			0x08
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK			0x10
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM			0x18
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK			0x18
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1				0x00
+#define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_2				0x20
+#define IEEE80211_HE_PHY_CAP3_RX_HE_MU_PPDU_FROM_NON_AP_STA		0x40
+#define IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER				0x80
+
+#define IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE				0x01
+#define IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER				0x02
+
+/* Minimal allowed value of Max STS under 80MHz is 3 */
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4		0x0c
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_5		0x10
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_6		0x14
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_7		0x18
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8		0x1c
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_MASK	0x1c
+
+/* Minimal allowed value of Max STS above 80MHz is 3 */
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4		0x60
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_5		0x80
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_6		0xa0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_7		0xc0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_8		0xe0
+#define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_MASK	0xe0
+
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_1	0x00
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2	0x01
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_3	0x02
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_4	0x03
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_5	0x04
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_6	0x05
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_7	0x06
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_8	0x07
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK	0x07
+
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_1	0x00
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2	0x08
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_3	0x10
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_4	0x18
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_5	0x20
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_6	0x28
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_7	0x30
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_8	0x38
+#define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK	0x38
+
+#define IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK				0x40
+#define IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK				0x80
+
+#define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU			0x01
+#define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU			0x02
+#define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB			0x04
+#define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB			0x08
+#define IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB				0x10
+#define IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE			0x20
+#define IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO		0x40
+#define IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT			0x80
+
+#define IEEE80211_HE_PHY_CAP7_SRP_BASED_SR				0x01
+#define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR			0x02
+#define IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI		0x04
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_1					0x08
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_2					0x10
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_3					0x18
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_4					0x20
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_5					0x28
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_6					0x30
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_7					0x38
+#define IEEE80211_HE_PHY_CAP7_MAX_NC_MASK				0x38
+#define IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ			0x40
+#define IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ			0x80
+
+#define IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI		0x01
+#define IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G		0x02
+#define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU			0x04
+#define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU			0x08
+#define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI		0x10
+#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_2X_AND_1XLTF			0x20
+
+/* 802.11ax HE TX/RX MCS NSS Support  */
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS			(3)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_POS			(6)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_POS			(11)
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_MASK			0x07c0
+#define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_MASK			0xf800
+
+/* TX/RX HE MCS Support field Highest MCS subfield encoding */
+enum ieee80211_he_highest_mcs_supported_subfield_enc {
+	HIGHEST_MCS_SUPPORTED_MCS7 = 0,
+	HIGHEST_MCS_SUPPORTED_MCS8,
+	HIGHEST_MCS_SUPPORTED_MCS9,
+	HIGHEST_MCS_SUPPORTED_MCS10,
+	HIGHEST_MCS_SUPPORTED_MCS11,
+};
+
+/* Calculate 802.11ax HE capabilities IE Tx/Rx HE MCS NSS Support Field size */
+static inline u8
+ieee80211_he_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap)
+{
+	u8 count = 4;
+
+	if (he_cap->phy_cap_info[0] &
+	    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G)
+		count += 4;
+
+	if (he_cap->phy_cap_info[0] &
+	    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)
+		count += 4;
+
+	return count;
+}
+
+/* 802.11ax HE PPE Thresholds */
+#define IEEE80211_PPE_THRES_NSS_SUPPORT_2NSS			(1)
+#define IEEE80211_PPE_THRES_NSS_POS				(0)
+#define IEEE80211_PPE_THRES_NSS_MASK				(7)
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_2x966_AND_966_RU	\
+	(BIT(5) | BIT(6))
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK		0x78
+#define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS		(3)
+#define IEEE80211_PPE_THRES_INFO_PPET_SIZE			(3)
+
+/*
+ * Calculate 802.11ax HE capabilities IE PPE field size
+ * Input: Header byte of ppe_thres (first byte), and HE capa IE's PHY cap u8*
+ */
+static inline u8
+ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
+{
+	u8 n;
+
+	if ((phy_cap_info[6] &
+	     IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0)
+		return 0;
+
+	n = hweight8(ppe_thres_hdr &
+		     IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK);
+	n *= (1 + ((ppe_thres_hdr & IEEE80211_PPE_THRES_NSS_MASK) >>
+		   IEEE80211_PPE_THRES_NSS_POS));
+
+	/*
+	 * Each pair is 6 bits, and we need to add the 7 "header" bits to the
+	 * total size.
+	 */
+	n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7;
+	n = DIV_ROUND_UP(n, 8);
+
+	return n;
+}
+
+/* HE Operation defines */
+#define IEEE80211_HE_OPERATION_BSS_COLOR_MASK			0x0000003f
+#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK		0x000001c0
+#define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET		6
+#define IEEE80211_HE_OPERATION_TWT_REQUIRED			0x00000200
+#define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK		0x000ffc00
+#define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET		10
+#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR		0x000100000
+#define IEEE80211_HE_OPERATION_VHT_OPER_INFO			0x000200000
+#define IEEE80211_HE_OPERATION_MULTI_BSSID_AP			0x10000000
+#define IEEE80211_HE_OPERATION_TX_BSSID_INDICATOR		0x20000000
+#define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED		0x40000000
+
+/*
+ * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size
+ * @he_oper_ie: byte data of the He Operations IE, stating from the the byte
+ *	after the ext ID byte. It is assumed that he_oper_ie has at least
+ *	sizeof(struct ieee80211_he_operation) bytes, checked already in
+ *	ieee802_11_parse_elems_crc()
+ * @return the actual size of the IE data (not including header), or 0 on error
+ */
+static inline u8
+ieee80211_he_oper_size(const u8 *he_oper_ie)
+{
+	struct ieee80211_he_operation *he_oper = (void *)he_oper_ie;
+	u8 oper_len = sizeof(struct ieee80211_he_operation);
+	u32 he_oper_params;
+
+	/* Make sure the input is not NULL */
+	if (!he_oper_ie)
+		return 0;
+
+	/* Calc required length */
+	he_oper_params = le32_to_cpu(he_oper->he_oper_params);
+	if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO)
+		oper_len += 3;
+	if (he_oper_params & IEEE80211_HE_OPERATION_MULTI_BSSID_AP)
+		oper_len++;
+
+	/* Add the first byte (extension ID) to the total length */
+	oper_len++;
+
+	return oper_len;
+}
+
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
@@ -1992,6 +2414,11 @@ enum ieee80211_eid_ext {
 	WLAN_EID_EXT_FILS_WRAPPED_DATA = 8,
 	WLAN_EID_EXT_FILS_PUBLIC_KEY = 12,
 	WLAN_EID_EXT_FILS_NONCE = 13,
+	WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE = 14,
+	WLAN_EID_EXT_HE_CAPABILITY = 35,
+	WLAN_EID_EXT_HE_OPERATION = 36,
+	WLAN_EID_EXT_UORA = 37,
+	WLAN_EID_EXT_HE_MU_EDCA = 38,
 };
 
 /* Action category code */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5fbfe61f41c6..9ba1f289c439 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -285,6 +285,41 @@ struct ieee80211_sta_vht_cap {
 	struct ieee80211_vht_mcs_info vht_mcs;
 };
 
+#define IEEE80211_HE_PPE_THRES_MAX_LEN		25
+
+/**
+ * struct ieee80211_sta_he_cap - STA's HE capabilities
+ *
+ * This structure describes most essential parameters needed
+ * to describe 802.11ax HE capabilities for a STA.
+ *
+ * @has_he: true iff HE data is valid.
+ * @he_cap_elem: Fixed portion of the HE capabilities element.
+ * @he_mcs_nss_supp: The supported NSS/MCS combinations.
+ * @ppe_thres: Holds the PPE Thresholds data.
+ */
+struct ieee80211_sta_he_cap {
+	bool has_he;
+	struct ieee80211_he_cap_elem he_cap_elem;
+	struct ieee80211_he_mcs_nss_supp he_mcs_nss_supp;
+	u8 ppe_thres[IEEE80211_HE_PPE_THRES_MAX_LEN];
+};
+
+/**
+ * struct ieee80211_sband_iftype_data
+ *
+ * This structure encapsulates sband data that is relevant for the
+ * interface types defined in @types_mask.  Each type in the
+ * @types_mask must be unique across all instances of iftype_data.
+ *
+ * @types_mask: interface types mask
+ * @he_cap: holds the HE capabilities
+ */
+struct ieee80211_sband_iftype_data {
+	u16 types_mask;
+	struct ieee80211_sta_he_cap he_cap;
+};
+
 /**
  * struct ieee80211_supported_band - frequency band definition
  *
@@ -301,6 +336,11 @@ struct ieee80211_sta_vht_cap {
  * @n_bitrates: Number of bitrates in @bitrates
  * @ht_cap: HT capabilities in this band
  * @vht_cap: VHT capabilities in this band
+ * @n_iftype_data: number of iftype data entries
+ * @iftype_data: interface type data entries.  Note that the bits in
+ *	@types_mask inside this structure cannot overlap (i.e. only
+ *	one occurrence of each type is allowed across all instances of
+ *	iftype_data).
  */
 struct ieee80211_supported_band {
 	struct ieee80211_channel *channels;
@@ -310,8 +350,55 @@ struct ieee80211_supported_band {
 	int n_bitrates;
 	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_sta_vht_cap vht_cap;
+	u16 n_iftype_data;
+	const struct ieee80211_sband_iftype_data *iftype_data;
 };
 
+/**
+ * ieee80211_get_sband_iftype_data - return sband data for a given iftype
+ * @sband: the sband to search for the STA on
+ * @iftype: enum nl80211_iftype
+ *
+ * Return: pointer to struct ieee80211_sband_iftype_data, or NULL is none found
+ */
+static inline const struct ieee80211_sband_iftype_data *
+ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
+				u8 iftype)
+{
+	int i;
+
+	if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
+		return NULL;
+
+	for (i = 0; i < sband->n_iftype_data; i++)  {
+		const struct ieee80211_sband_iftype_data *data =
+			&sband->iftype_data[i];
+
+		if (data->types_mask & BIT(iftype))
+			return data;
+	}
+
+	return NULL;
+}
+
+/**
+ * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA
+ * @sband: the sband to search for the STA on
+ *
+ * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found
+ */
+static inline const struct ieee80211_sta_he_cap *
+ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
+{
+	const struct ieee80211_sband_iftype_data *data =
+		ieee80211_get_sband_iftype_data(sband, NL80211_IFTYPE_STATION);
+
+	if (data && data->he_cap.has_he)
+		return &data->he_cap;
+
+	return NULL;
+}
+
 /**
  * wiphy_read_of_freq_limits - read frequency limits from device tree
  *
@@ -899,6 +986,8 @@ enum station_parameters_apply_mask {
  * @opmode_notif: operating mode field from Operating Mode Notification
  * @opmode_notif_used: information if operating mode field is used
  * @support_p2p_ps: information if station supports P2P PS mechanism
+ * @he_capa: HE capabilities of station
+ * @he_capa_len: the length of the HE capabilities
  */
 struct station_parameters {
 	const u8 *supported_rates;
@@ -926,6 +1015,8 @@ struct station_parameters {
 	u8 opmode_notif;
 	bool opmode_notif_used;
 	int support_p2p_ps;
+	const struct ieee80211_he_cap_elem *he_capa;
+	u8 he_capa_len;
 };
 
 /**
@@ -1000,12 +1091,14 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
+ * @RATE_INFO_FLAGS_HE_MCS: HE MCS information
  */
 enum rate_info_flags {
 	RATE_INFO_FLAGS_MCS			= BIT(0),
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
 	RATE_INFO_FLAGS_SHORT_GI		= BIT(2),
 	RATE_INFO_FLAGS_60G			= BIT(3),
+	RATE_INFO_FLAGS_HE_MCS			= BIT(4),
 };
 
 /**
@@ -1019,6 +1112,7 @@ enum rate_info_flags {
  * @RATE_INFO_BW_40: 40 MHz bandwidth
  * @RATE_INFO_BW_80: 80 MHz bandwidth
  * @RATE_INFO_BW_160: 160 MHz bandwidth
+ * @RATE_INFO_BW_HE_RU: bandwidth determined by HE RU allocation
  */
 enum rate_info_bw {
 	RATE_INFO_BW_20 = 0,
@@ -1027,6 +1121,7 @@ enum rate_info_bw {
 	RATE_INFO_BW_40,
 	RATE_INFO_BW_80,
 	RATE_INFO_BW_160,
+	RATE_INFO_BW_HE_RU,
 };
 
 /**
@@ -1035,10 +1130,14 @@ enum rate_info_bw {
  * Information about a receiving or transmitting bitrate
  *
  * @flags: bitflag of flags from &enum rate_info_flags
- * @mcs: mcs index if struct describes a 802.11n bitrate
+ * @mcs: mcs index if struct describes an HT/VHT/HE rate
  * @legacy: bitrate in 100kbit/s for 802.11abg
- * @nss: number of streams (VHT only)
+ * @nss: number of streams (VHT & HE only)
  * @bw: bandwidth (from &enum rate_info_bw)
+ * @he_gi: HE guard interval (from &enum nl80211_he_gi)
+ * @he_dcm: HE DCM value
+ * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc,
+ *	only valid if bw is %RATE_INFO_BW_HE_RU)
  */
 struct rate_info {
 	u8 flags;
@@ -1046,6 +1145,9 @@ struct rate_info {
 	u16 legacy;
 	u8 nss;
 	u8 bw;
+	u8 he_gi;
+	u8 he_dcm;
+	u8 he_ru_alloc;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 49f718e821a3..f82ce3c89ab7 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2237,6 +2237,9 @@ enum nl80211_commands {
  *      enforced.
  * @NL80211_ATTR_TXQ_QUANTUM: TXQ scheduler quantum (bytes). Number of bytes
  *      a flow is assigned on each round of the DRR scheduler.
+ * @NL80211_ATTR_HE_CAPABILITY: HE Capability information element (from
+ *	association request when used with NL80211_CMD_NEW_STATION). Can be set
+ *	only if %NL80211_STA_FLAG_WME is set.
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2677,6 +2680,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_TXQ_MEMORY_LIMIT,
 	NL80211_ATTR_TXQ_QUANTUM,
 
+	NL80211_ATTR_HE_CAPABILITY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2726,7 +2731,8 @@ enum nl80211_attrs {
 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY	24
 #define NL80211_HT_CAPABILITY_LEN		26
 #define NL80211_VHT_CAPABILITY_LEN		12
-
+#define NL80211_HE_MIN_CAPABILITY_LEN           16
+#define NL80211_HE_MAX_CAPABILITY_LEN           51
 #define NL80211_MAX_NR_CIPHER_SUITES		5
 #define NL80211_MAX_NR_AKM_SUITES		2
 
@@ -2853,6 +2859,38 @@ struct nl80211_sta_flag_update {
 	__u32 set;
 } __attribute__((packed));
 
+/**
+ * enum nl80211_he_gi - HE guard interval
+ * @NL80211_RATE_INFO_HE_GI_0_8: 0.8 usec
+ * @NL80211_RATE_INFO_HE_GI_1_6: 1.6 usec
+ * @NL80211_RATE_INFO_HE_GI_3_2: 3.2 usec
+ */
+enum nl80211_he_gi {
+	NL80211_RATE_INFO_HE_GI_0_8,
+	NL80211_RATE_INFO_HE_GI_1_6,
+	NL80211_RATE_INFO_HE_GI_3_2,
+};
+
+/**
+ * enum nl80211_he_ru_alloc - HE RU allocation values
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_26: 26-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_52: 52-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_106: 106-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_242: 242-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_484: 484-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_996: 996-tone RU allocation
+ * @NL80211_RATE_INFO_HE_RU_ALLOC_2x996: 2x996-tone RU allocation
+ */
+enum nl80211_he_ru_alloc {
+	NL80211_RATE_INFO_HE_RU_ALLOC_26,
+	NL80211_RATE_INFO_HE_RU_ALLOC_52,
+	NL80211_RATE_INFO_HE_RU_ALLOC_106,
+	NL80211_RATE_INFO_HE_RU_ALLOC_242,
+	NL80211_RATE_INFO_HE_RU_ALLOC_484,
+	NL80211_RATE_INFO_HE_RU_ALLOC_996,
+	NL80211_RATE_INFO_HE_RU_ALLOC_2x996,
+};
+
 /**
  * enum nl80211_rate_info - bitrate information
  *
@@ -2885,6 +2923,13 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is
  *	a legacy rate and will be reported as the actual bitrate, i.e.
  *	a quarter of the base (20 MHz) rate
+ * @NL80211_RATE_INFO_HE_MCS: HE MCS index (u8, 0-11)
+ * @NL80211_RATE_INFO_HE_NSS: HE NSS value (u8, 1-8)
+ * @NL80211_RATE_INFO_HE_GI: HE guard interval identifier
+ *	(u8, see &enum nl80211_he_gi)
+ * @NL80211_RATE_INFO_HE_DCM: HE DCM value (u8, 0/1)
+ * @NL80211_RATE_INFO_RU_ALLOC: HE RU allocation, if not present then
+ *	non-OFDMA was used (u8, see &enum nl80211_he_ru_alloc)
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
 enum nl80211_rate_info {
@@ -2901,6 +2946,11 @@ enum nl80211_rate_info {
 	NL80211_RATE_INFO_160_MHZ_WIDTH,
 	NL80211_RATE_INFO_10_MHZ_WIDTH,
 	NL80211_RATE_INFO_5_MHZ_WIDTH,
+	NL80211_RATE_INFO_HE_MCS,
+	NL80211_RATE_INFO_HE_NSS,
+	NL80211_RATE_INFO_HE_GI,
+	NL80211_RATE_INFO_HE_DCM,
+	NL80211_RATE_INFO_HE_RU_ALLOC,
 
 	/* keep last */
 	__NL80211_RATE_INFO_AFTER_LAST,
@@ -3166,6 +3216,38 @@ enum nl80211_mpath_info {
 	NL80211_MPATH_INFO_MAX = __NL80211_MPATH_INFO_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_band_iftype_attr - Interface type data attributes
+ *
+ * @__NL80211_BAND_IFTYPE_ATTR_INVALID: attribute number 0 is reserved
+ * @NL80211_BAND_IFTYPE_ATTR_IFTYPES: nested attribute containing a flag attribute
+ *     for each interface type that supports the band data
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC: HE MAC capabilities as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY: HE PHY capabilities as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET: HE supported NSS/MCS as in HE
+ *     capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE: HE PPE thresholds information as
+ *     defined in HE capabilities IE
+ * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently
+ *     defined
+ * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
+ */
+enum nl80211_band_iftype_attr {
+	__NL80211_BAND_IFTYPE_ATTR_INVALID,
+
+	NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+	NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+	NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+	NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+	NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+
+	/* keep last */
+	__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST,
+	NL80211_BAND_IFTYPE_ATTR_MAX = __NL80211_BAND_IFTYPE_ATTR_AFTER_LAST - 1
+};
+
 /**
  * enum nl80211_band_attr - band attributes
  * @__NL80211_BAND_ATTR_INVALID: attribute number 0 is reserved
@@ -3181,6 +3263,8 @@ enum nl80211_mpath_info {
  * @NL80211_BAND_ATTR_VHT_MCS_SET: 32-byte attribute containing the MCS set as
  *	defined in 802.11ac
  * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE
+ * @NL80211_BAND_ATTR_IFTYPE_DATA: nested array attribute, with each entry using
+ *	attributes from &enum nl80211_band_iftype_attr
  * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined
  * @__NL80211_BAND_ATTR_AFTER_LAST: internal use
  */
@@ -3196,6 +3280,7 @@ enum nl80211_band_attr {
 
 	NL80211_BAND_ATTR_VHT_MCS_SET,
 	NL80211_BAND_ATTR_VHT_CAPA,
+	NL80211_BAND_ATTR_IFTYPE_DATA,
 
 	/* keep last */
 	__NL80211_BAND_ATTR_AFTER_LAST,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5fe35aafdd9c..d23abc619e77 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright 2015	Intel Deutschland GmbH
+ * Copyright 2015-2017	Intel Deutschland GmbH
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -744,6 +744,8 @@ int wiphy_register(struct wiphy *wiphy)
 
 	/* sanity check supported bands/channels */
 	for (band = 0; band < NUM_NL80211_BANDS; band++) {
+		u16 types = 0;
+
 		sband = wiphy->bands[band];
 		if (!sband)
 			continue;
@@ -788,6 +790,23 @@ int wiphy_register(struct wiphy *wiphy)
 			sband->channels[i].band = band;
 		}
 
+		for (i = 0; i < sband->n_iftype_data; i++) {
+			const struct ieee80211_sband_iftype_data *iftd;
+
+			iftd = &sband->iftype_data[i];
+
+			if (WARN_ON(!iftd->types_mask))
+				return -EINVAL;
+			if (WARN_ON(types & iftd->types_mask))
+				return -EINVAL;
+
+			/* at least one piece of information must be present */
+			if (WARN_ON(!iftd->he_cap.has_he))
+				return -EINVAL;
+
+			types |= iftd->types_mask;
+		}
+
 		have_band = true;
 	}
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7b21914ae18b..0ccce338a66e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -428,6 +428,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
+	[NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY,
+					 .len = NL80211_HE_MAX_CAPABILITY_LEN },
 };
 
 /* policy for the key attributes */
@@ -1324,6 +1326,34 @@ static int nl80211_send_coalesce(struct sk_buff *msg,
 	return 0;
 }
 
+static int
+nl80211_send_iftype_data(struct sk_buff *msg,
+			 const struct ieee80211_sband_iftype_data *iftdata)
+{
+	const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap;
+
+	if (nl80211_put_iftypes(msg, NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+				iftdata->types_mask))
+		return -ENOBUFS;
+
+	if (he_cap->has_he) {
+		if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+			    sizeof(he_cap->he_cap_elem.mac_cap_info),
+			    he_cap->he_cap_elem.mac_cap_info) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+			    sizeof(he_cap->he_cap_elem.phy_cap_info),
+			    he_cap->he_cap_elem.phy_cap_info) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+			    sizeof(he_cap->he_mcs_nss_supp),
+			    &he_cap->he_mcs_nss_supp) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+			    sizeof(he_cap->ppe_thres), he_cap->ppe_thres))
+			return -ENOBUFS;
+	}
+
+	return 0;
+}
+
 static int nl80211_send_band_rateinfo(struct sk_buff *msg,
 				      struct ieee80211_supported_band *sband)
 {
@@ -1353,6 +1383,32 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
 			 sband->vht_cap.cap)))
 		return -ENOBUFS;
 
+	if (sband->n_iftype_data) {
+		struct nlattr *nl_iftype_data =
+			nla_nest_start(msg, NL80211_BAND_ATTR_IFTYPE_DATA);
+		int err;
+
+		if (!nl_iftype_data)
+			return -ENOBUFS;
+
+		for (i = 0; i < sband->n_iftype_data; i++) {
+			struct nlattr *iftdata;
+
+			iftdata = nla_nest_start(msg, i + 1);
+			if (!iftdata)
+				return -ENOBUFS;
+
+			err = nl80211_send_iftype_data(msg,
+						       &sband->iftype_data[i]);
+			if (err)
+				return err;
+
+			nla_nest_end(msg, iftdata);
+		}
+
+		nla_nest_end(msg, nl_iftype_data);
+	}
+
 	/* add bitrates */
 	nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
 	if (!nl_rates)
@@ -4472,6 +4528,9 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	case RATE_INFO_BW_160:
 		rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH;
 		break;
+	case RATE_INFO_BW_HE_RU:
+		rate_flg = 0;
+		WARN_ON(!(info->flags & RATE_INFO_FLAGS_HE_MCS));
 	}
 
 	if (rate_flg && nla_put_flag(msg, rate_flg))
@@ -4491,6 +4550,19 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
+	} else if (info->flags & RATE_INFO_FLAGS_HE_MCS) {
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_MCS, info->mcs))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_NSS, info->nss))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_GI, info->he_gi))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_DCM, info->he_dcm))
+			return false;
+		if (info->bw == RATE_INFO_BW_HE_RU &&
+		    nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC,
+			       info->he_ru_alloc))
+			return false;
 	}
 
 	nla_nest_end(msg, rate);
@@ -4887,7 +4959,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 			return -EINVAL;
 		if (params->supported_rates)
 			return -EINVAL;
-		if (params->ext_capab || params->ht_capa || params->vht_capa)
+		if (params->ext_capab || params->ht_capa || params->vht_capa ||
+		    params->he_capa)
 			return -EINVAL;
 	}
 
@@ -5093,6 +5166,15 @@ static int nl80211_set_station_tdls(struct genl_info *info,
 	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
 		params->vht_capa =
 			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
+	if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+		params->he_capa =
+			nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+		params->he_capa_len =
+			nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+		if (params->he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
+			return -EINVAL;
+	}
 
 	err = nl80211_parse_sta_channel_info(info, params);
 	if (err)
@@ -5320,6 +5402,17 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.vht_capa =
 			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
 
+	if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+		params.he_capa =
+			nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+		params.he_capa_len =
+			nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+		/* max len is validated in nla policy */
+		if (params.he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
+			return -EINVAL;
+	}
+
 	if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
 		params.opmode_notif_used = true;
 		params.opmode_notif =
@@ -5352,6 +5445,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) {
 		params.ht_capa = NULL;
 		params.vht_capa = NULL;
+
+		/* HE requires WME */
+		if (params.he_capa_len)
+			return -EINVAL;
 	}
 
 	/* When you run into this, adjust the code below for the new flag */
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b91597a8baa2..4ed06b271f32 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -4,6 +4,7 @@
  *
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2017	Intel Deutschland GmbH
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
@@ -1142,6 +1143,85 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	return 0;
 }
 
+static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
+{
+#define SCALE 2048
+	u16 mcs_divisors[12] = {
+		34133, /* 16.666666... */
+		17067, /*  8.333333... */
+		11378, /*  5.555555... */
+		 8533, /*  4.166666... */
+		 5689, /*  2.777777... */
+		 4267, /*  2.083333... */
+		 3923, /*  1.851851... */
+		 3413, /*  1.666666... */
+		 2844, /*  1.388888... */
+		 2560, /*  1.250000... */
+		 2276, /*  1.111111... */
+		 2048, /*  1.000000... */
+	};
+	u32 rates_160M[3] = { 960777777, 907400000, 816666666 };
+	u32 rates_969[3] =  { 480388888, 453700000, 408333333 };
+	u32 rates_484[3] =  { 229411111, 216666666, 195000000 };
+	u32 rates_242[3] =  { 114711111, 108333333,  97500000 };
+	u32 rates_106[3] =  {  40000000,  37777777,  34000000 };
+	u32 rates_52[3]  =  {  18820000,  17777777,  16000000 };
+	u32 rates_26[3]  =  {   9411111,   8888888,   8000000 };
+	u64 tmp;
+	u32 result;
+
+	if (WARN_ON_ONCE(rate->mcs > 11))
+		return 0;
+
+	if (WARN_ON_ONCE(rate->he_gi > NL80211_RATE_INFO_HE_GI_3_2))
+		return 0;
+	if (WARN_ON_ONCE(rate->he_ru_alloc >
+			 NL80211_RATE_INFO_HE_RU_ALLOC_2x996))
+		return 0;
+	if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8))
+		return 0;
+
+	if (rate->bw == RATE_INFO_BW_160)
+		result = rates_160M[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_80 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_996))
+		result = rates_969[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_40 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_484))
+		result = rates_484[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_20 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_242))
+		result = rates_242[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_106)
+		result = rates_106[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_52)
+		result = rates_52[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26)
+		result = rates_26[rate->he_gi];
+	else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
+		      rate->bw, rate->he_ru_alloc))
+		return 0;
+
+	/* now scale to the appropriate MCS */
+	tmp = result;
+	tmp *= SCALE;
+	do_div(tmp, mcs_divisors[rate->mcs]);
+	result = tmp;
+
+	/* and take NSS, DCM into account */
+	result = (result * rate->nss) / 8;
+	if (rate->he_dcm)
+		result /= 2;
+
+	return result;
+}
+
 u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 {
 	if (rate->flags & RATE_INFO_FLAGS_MCS)
@@ -1150,6 +1230,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 		return cfg80211_calculate_bitrate_60g(rate);
 	if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
 		return cfg80211_calculate_bitrate_vht(rate);
+	if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
+		return cfg80211_calculate_bitrate_he(rate);
 
 	return rate->legacy;
 }
-- 
cgit v1.2.3


From 95a28eeaf1491bcb8bf521bad4784683333705ee Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 9 Jun 2018 09:14:43 +0300
Subject: radiotap: add structs for HE

Add radiotap structures for HE.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/net/ieee80211_radiotap.h | 123 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

(limited to 'include')

diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 960236fb1681..feef706e1158 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2017		Intel Deutschland GmbH
+ * Copyright (c) 2018		Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -72,6 +73,8 @@ enum ieee80211_radiotap_presence {
 	IEEE80211_RADIOTAP_AMPDU_STATUS = 20,
 	IEEE80211_RADIOTAP_VHT = 21,
 	IEEE80211_RADIOTAP_TIMESTAMP = 22,
+	IEEE80211_RADIOTAP_HE = 23,
+	IEEE80211_RADIOTAP_HE_MU = 24,
 
 	/* valid in every it_present bitmap, even vendor namespaces */
 	IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29,
@@ -202,6 +205,126 @@ enum ieee80211_radiotap_timestamp_flags {
 	IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY = 0x02,
 };
 
+struct ieee80211_radiotap_he {
+	__le16 data1, data2, data3, data4, data5, data6;
+};
+
+enum ieee80211_radiotap_he_bits {
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_MASK		= 3,
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_SU		= 0,
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_EXT_SU	= 1,
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_MU		= 2,
+	IEEE80211_RADIOTAP_HE_DATA1_FORMAT_TRIG		= 3,
+
+	IEEE80211_RADIOTAP_HE_DATA1_BSS_COLOR_KNOWN	= 0x0004,
+	IEEE80211_RADIOTAP_HE_DATA1_BEAM_CHANGE_KNOWN	= 0x0008,
+	IEEE80211_RADIOTAP_HE_DATA1_UL_DL_KNOWN		= 0x0010,
+	IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN	= 0x0020,
+	IEEE80211_RADIOTAP_HE_DATA1_DATA_DCM_KNOWN	= 0x0040,
+	IEEE80211_RADIOTAP_HE_DATA1_CODING_KNOWN	= 0x0080,
+	IEEE80211_RADIOTAP_HE_DATA1_LDPC_XSYMSEG_KNOWN	= 0x0100,
+	IEEE80211_RADIOTAP_HE_DATA1_STBC_KNOWN		= 0x0200,
+	IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE_KNOWN	= 0x0400,
+	IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE2_KNOWN	= 0x0800,
+	IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE3_KNOWN	= 0x1000,
+	IEEE80211_RADIOTAP_HE_DATA1_SPTL_REUSE4_KNOWN	= 0x2000,
+	IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN	= 0x4000,
+	IEEE80211_RADIOTAP_HE_DATA1_DOPPLER_KNOWN	= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_KNOWN	= 0x0001,
+	IEEE80211_RADIOTAP_HE_DATA2_GI_KNOWN		= 0x0002,
+	IEEE80211_RADIOTAP_HE_DATA2_NUM_LTF_SYMS_KNOWN	= 0x0004,
+	IEEE80211_RADIOTAP_HE_DATA2_PRE_FEC_PAD_KNOWN	= 0x0008,
+	IEEE80211_RADIOTAP_HE_DATA2_TXBF_KNOWN		= 0x0010,
+	IEEE80211_RADIOTAP_HE_DATA2_PE_DISAMBIG_KNOWN	= 0x0020,
+	IEEE80211_RADIOTAP_HE_DATA2_TXOP_KNOWN		= 0x0040,
+	IEEE80211_RADIOTAP_HE_DATA2_MIDAMBLE_KNOWN	= 0x0080,
+	IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET		= 0x3f00,
+	IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET_KNOWN	= 0x4000,
+	IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_SEC	= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_DATA3_BSS_COLOR		= 0x003f,
+	IEEE80211_RADIOTAP_HE_DATA3_BEAM_CHANGE		= 0x0040,
+	IEEE80211_RADIOTAP_HE_DATA3_UL_DL		= 0x0080,
+	IEEE80211_RADIOTAP_HE_DATA3_DATA_MCS		= 0x0f00,
+	IEEE80211_RADIOTAP_HE_DATA3_DATA_DCM		= 0x1000,
+	IEEE80211_RADIOTAP_HE_DATA3_CODING		= 0x2000,
+	IEEE80211_RADIOTAP_HE_DATA3_LDPC_XSYMSEG	= 0x4000,
+	IEEE80211_RADIOTAP_HE_DATA3_STBC		= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_DATA4_SU_MU_SPTL_REUSE	= 0x000f,
+	IEEE80211_RADIOTAP_HE_DATA4_MU_STA_ID		= 0x7ff0,
+	IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE1	= 0x000f,
+	IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE2	= 0x00f0,
+	IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE3	= 0x0f00,
+	IEEE80211_RADIOTAP_HE_DATA4_TB_SPTL_REUSE4	= 0xf000,
+
+	IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC	= 0x000f,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ	= 0,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ	= 1,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ	= 2,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ	= 3,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_26T	= 4,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_52T	= 5,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_106T	= 6,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_242T	= 7,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_484T	= 8,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_996T	= 9,
+		IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_2x996T	= 10,
+
+	IEEE80211_RADIOTAP_HE_DATA5_GI			= 0x0030,
+		IEEE80211_RADIOTAP_HE_DATA5_GI_0_8			= 0,
+		IEEE80211_RADIOTAP_HE_DATA5_GI_1_6			= 1,
+		IEEE80211_RADIOTAP_HE_DATA5_GI_3_2			= 2,
+
+	IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE		= 0x00c0,
+		IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_UNKNOWN		= 0,
+		IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_1X			= 1,
+		IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_2X			= 2,
+		IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE_4X			= 3,
+	IEEE80211_RADIOTAP_HE_DATA5_NUM_LTF_SYMS	= 0x0700,
+	IEEE80211_RADIOTAP_HE_DATA5_PRE_FEC_PAD		= 0x3000,
+	IEEE80211_RADIOTAP_HE_DATA5_TXBF		= 0x4000,
+	IEEE80211_RADIOTAP_HE_DATA5_PE_DISAMBIG		= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_DATA6_NSTS		= 0x000f,
+	IEEE80211_RADIOTAP_HE_DATA6_DOPPLER		= 0x0010,
+	IEEE80211_RADIOTAP_HE_DATA6_TXOP		= 0x7f00,
+	IEEE80211_RADIOTAP_HE_DATA6_MIDAMBLE_PDCTY	= 0x8000,
+};
+
+struct ieee80211_radiotap_he_mu {
+	__le16 flags1, flags2;
+	u8 ru_ch1[4];
+	u8 ru_ch2[4];
+};
+
+enum ieee80211_radiotap_he_mu_bits {
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_MCS		= 0x000f,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_MCS_KNOWN		= 0x0010,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_DCM		= 0x0020,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_DCM_KNOWN		= 0x0040,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH2_CTR_26T_RU_KNOWN	= 0x0080,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_RU_KNOWN		= 0x0100,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH2_RU_KNOWN		= 0x0200,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU_KNOWN	= 0x1000,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_CH1_CTR_26T_RU		= 0x2000,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_COMP_KNOWN	= 0x4000,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS1_SIG_B_SYMS_USERS_KNOWN	= 0x8000,
+
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW	= 0x0003,
+		IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_20MHZ	= 0x0000,
+		IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_40MHZ	= 0x0001,
+		IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_80MHZ	= 0x0002,
+		IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_160MHZ	= 0x0003,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_KNOWN	= 0x0004,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_COMP		= 0x0008,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_SIG_B_SYMS_USERS	= 0x00f0,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW	= 0x0300,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_PUNC_FROM_SIG_A_BW_KNOWN= 0x0400,
+	IEEE80211_RADIOTAP_HE_MU_FLAGS2_CH2_CTR_26T_RU		= 0x0800,
+};
+
 /**
  * ieee80211_get_radiotap_len - get radiotap header length
  */
-- 
cgit v1.2.3


From a85857633b04d57f4524cca0a2bfaf87b2543f9f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 25 Apr 2018 13:26:23 -0400
Subject: nfsd4: support change_attr_type attribute

The change attribute is what is used by clients to revalidate their
caches.  Our server may use i_version or ctime for that purpose.  Those
choices behave slightly differently, and it may be useful to the client
to know which we're using.  This attribute tells the client that.  The
Linux client doesn't yet use this attribute yet, though.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c    | 10 ++++++++++
 fs/nfsd/nfsd.h       |  1 +
 include/linux/nfs4.h |  8 ++++++++
 3 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 4161031ae14e..fb4991889f89 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2891,6 +2891,16 @@ out_acl:
 			goto out;
 	}
 
+	if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+		p = xdr_reserve_space(xdr, 4);
+		if (!p)
+			goto out_resource;
+		if (IS_I_VERSION(d_inode(dentry)))
+			*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR);
+		else
+			*p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
+	}
+
 	if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
 		status = nfsd4_encode_security_label(xdr, rqstp, context,
 								contextlen);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 3fce905d0365..066899929863 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -360,6 +360,7 @@ void		nfsd_lockd_shutdown(void);
 
 #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
 	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+	FATTR4_WORD2_CHANGE_ATTR_TYPE | \
 	FATTR4_WORD2_MODE_UMASK | \
 	NFSD4_2_SECURITY_ATTRS)
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 57ffaa20d564..0877ed333733 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -374,6 +374,13 @@ enum lock_type4 {
 	NFS4_WRITEW_LT = 4
 };
 
+enum change_attr_type4 {
+	NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
+	NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+	NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
+	NFS4_CHANGE_TYPE_IS_UNDEFINED = 4
+};
 
 /* Mandatory Attributes */
 #define FATTR4_WORD0_SUPPORTED_ATTRS    (1UL << 0)
@@ -441,6 +448,7 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_CLONE_BLKSIZE	(1UL << 13)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE	(1UL << 15)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
 #define FATTR4_WORD2_MODE_UMASK		(1UL << 17)
 
-- 
cgit v1.2.3


From 6b293258cded9c8ee44cce4081d9170d6d1b5f5d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 12 Jun 2018 15:19:11 +1000
Subject: fsi: scom: Major overhaul

This was too hard to split ... this adds a number of features
to the SCOM user interface:

 - Support for indirect SCOMs

 - read()/write() interface now handle errors and retries

 - New ioctl() "raw" interface for use by debuggers

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Eddie James <eajames@linux.vnet.ibm.com>
Reviewed-by: Alistair Popple <alistair@popple.id.au>
---
 drivers/fsi/fsi-scom.c   | 424 +++++++++++++++++++++++++++++++++++++++++++----
 include/uapi/linux/fsi.h |  58 +++++++
 2 files changed, 452 insertions(+), 30 deletions(-)
 create mode 100644 include/uapi/linux/fsi.h

(limited to 'include')

diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c
index e98573ecdae1..39c74351f1bf 100644
--- a/drivers/fsi/fsi-scom.c
+++ b/drivers/fsi/fsi-scom.c
@@ -24,6 +24,8 @@
 #include <linux/list.h>
 #include <linux/idr.h>
 
+#include <uapi/linux/fsi.h>
+
 #define FSI_ENGID_SCOM		0x5
 
 /* SCOM engine register set */
@@ -41,14 +43,36 @@
 /* Status register bits */
 #define SCOM_STATUS_ERR_SUMMARY		0x80000000
 #define SCOM_STATUS_PROTECTION		0x01000000
+#define SCOM_STATUS_PARITY		0x04000000
 #define SCOM_STATUS_PIB_ABORT		0x00100000
 #define SCOM_STATUS_PIB_RESP_MASK	0x00007000
 #define SCOM_STATUS_PIB_RESP_SHIFT	12
 
 #define SCOM_STATUS_ANY_ERR		(SCOM_STATUS_ERR_SUMMARY | \
 					 SCOM_STATUS_PROTECTION | \
+					 SCOM_STATUS_PARITY |	  \
 					 SCOM_STATUS_PIB_ABORT | \
 					 SCOM_STATUS_PIB_RESP_MASK)
+/* SCOM address encodings */
+#define XSCOM_ADDR_IND_FLAG		BIT_ULL(63)
+#define XSCOM_ADDR_INF_FORM1		BIT_ULL(60)
+
+/* SCOM indirect stuff */
+#define XSCOM_ADDR_DIRECT_PART		0x7fffffffull
+#define XSCOM_ADDR_INDIRECT_PART	0x000fffff00000000ull
+#define XSCOM_DATA_IND_READ		BIT_ULL(63)
+#define XSCOM_DATA_IND_COMPLETE		BIT_ULL(31)
+#define XSCOM_DATA_IND_ERR_MASK		0x70000000ull
+#define XSCOM_DATA_IND_ERR_SHIFT	28
+#define XSCOM_DATA_IND_DATA		0x0000ffffull
+#define XSCOM_DATA_IND_FORM1_DATA	0x000fffffffffffffull
+#define XSCOM_ADDR_FORM1_LOW		0x000ffffffffull
+#define XSCOM_ADDR_FORM1_HI		0xfff00000000ull
+#define XSCOM_ADDR_FORM1_HI_SHIFT	20
+
+/* Retries */
+#define SCOM_MAX_RETRIES		100	/* Retries on busy */
+#define SCOM_MAX_IND_RETRIES		10	/* Retries indirect not ready */
 
 struct scom_device {
 	struct list_head link;
@@ -56,7 +80,7 @@ struct scom_device {
 	struct miscdevice mdev;
 	struct mutex lock;
 	char	name[32];
-	int idx;
+	int	idx;
 };
 
 #define to_scom_dev(x)		container_of((x), struct scom_device, mdev)
@@ -65,80 +89,304 @@ static struct list_head scom_devices;
 
 static DEFINE_IDA(scom_ida);
 
-static int put_scom(struct scom_device *scom_dev, uint64_t value,
-		    uint32_t addr)
+static int __put_scom(struct scom_device *scom_dev, uint64_t value,
+		      uint32_t addr, uint32_t *status)
 {
-	__be32 data;
+	__be32 data, raw_status;
 	int rc;
 
-	mutex_lock(&scom_dev->lock);
-
 	data = cpu_to_be32((value >> 32) & 0xffffffff);
 	rc = fsi_device_write(scom_dev->fsi_dev, SCOM_DATA0_REG, &data,
 				sizeof(uint32_t));
 	if (rc)
-		goto bail;
+		return rc;
 
 	data = cpu_to_be32(value & 0xffffffff);
 	rc = fsi_device_write(scom_dev->fsi_dev, SCOM_DATA1_REG, &data,
 				sizeof(uint32_t));
 	if (rc)
-		goto bail;
+		return rc;
 
 	data = cpu_to_be32(SCOM_WRITE_CMD | addr);
 	rc = fsi_device_write(scom_dev->fsi_dev, SCOM_CMD_REG, &data,
 				sizeof(uint32_t));
- bail:
-	mutex_unlock(&scom_dev->lock);
-	return rc;
+	if (rc)
+		return rc;
+	rc = fsi_device_read(scom_dev->fsi_dev, SCOM_STATUS_REG, &raw_status,
+			     sizeof(uint32_t));
+	if (rc)
+		return rc;
+	*status = be32_to_cpu(raw_status);
+
+	return 0;
 }
 
-static int get_scom(struct scom_device *scom_dev, uint64_t *value,
-		    uint32_t addr)
+static int __get_scom(struct scom_device *scom_dev, uint64_t *value,
+		      uint32_t addr, uint32_t *status)
 {
-	__be32 result, data;
+	__be32 data, raw_status;
 	int rc;
 
 
-	mutex_lock(&scom_dev->lock);
 	*value = 0ULL;
 	data = cpu_to_be32(SCOM_READ_CMD | addr);
 	rc = fsi_device_write(scom_dev->fsi_dev, SCOM_CMD_REG, &data,
 				sizeof(uint32_t));
 	if (rc)
-		goto bail;
+		return rc;
+	rc = fsi_device_read(scom_dev->fsi_dev, SCOM_STATUS_REG, &raw_status,
+			     sizeof(uint32_t));
+	if (rc)
+		return rc;
 
-	rc = fsi_device_read(scom_dev->fsi_dev, SCOM_DATA0_REG, &result,
+	/*
+	 * Read the data registers even on error, so we don't have
+	 * to interpret the status register here.
+	 */
+	rc = fsi_device_read(scom_dev->fsi_dev, SCOM_DATA0_REG, &data,
 				sizeof(uint32_t));
 	if (rc)
-		goto bail;
-
-	*value |= (uint64_t)be32_to_cpu(result) << 32;
-	rc = fsi_device_read(scom_dev->fsi_dev, SCOM_DATA1_REG, &result,
+		return rc;
+	*value |= (uint64_t)be32_to_cpu(data) << 32;
+	rc = fsi_device_read(scom_dev->fsi_dev, SCOM_DATA1_REG, &data,
 				sizeof(uint32_t));
 	if (rc)
-		goto bail;
+		return rc;
+	*value |= be32_to_cpu(data);
+	*status = be32_to_cpu(raw_status);
+
+	return rc;
+}
+
+static int put_indirect_scom_form0(struct scom_device *scom, uint64_t value,
+				   uint64_t addr, uint32_t *status)
+{
+	uint64_t ind_data, ind_addr;
+	int rc, retries, err = 0;
+
+	if (value & ~XSCOM_DATA_IND_DATA)
+		return -EINVAL;
+
+	ind_addr = addr & XSCOM_ADDR_DIRECT_PART;
+	ind_data = (addr & XSCOM_ADDR_INDIRECT_PART) | value;
+	rc = __put_scom(scom, ind_data, ind_addr, status);
+	if (rc || (*status & SCOM_STATUS_ANY_ERR))
+		return rc;
+
+	for (retries = 0; retries < SCOM_MAX_IND_RETRIES; retries++) {
+		rc = __get_scom(scom, &ind_data, addr, status);
+		if (rc || (*status & SCOM_STATUS_ANY_ERR))
+			return rc;
+
+		err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT;
+		*status = err << SCOM_STATUS_PIB_RESP_SHIFT;
+		if ((ind_data & XSCOM_DATA_IND_COMPLETE) || (err != SCOM_PIB_BLOCKED))
+			return 0;
+
+		msleep(1);
+	}
+	return rc;
+}
+
+static int put_indirect_scom_form1(struct scom_device *scom, uint64_t value,
+				   uint64_t addr, uint32_t *status)
+{
+	uint64_t ind_data, ind_addr;
+
+	if (value & ~XSCOM_DATA_IND_FORM1_DATA)
+		return -EINVAL;
+
+	ind_addr = addr & XSCOM_ADDR_FORM1_LOW;
+	ind_data = value | (addr & XSCOM_ADDR_FORM1_HI) << XSCOM_ADDR_FORM1_HI_SHIFT;
+	return __put_scom(scom, ind_data, ind_addr, status);
+}
+
+static int get_indirect_scom_form0(struct scom_device *scom, uint64_t *value,
+				   uint64_t addr, uint32_t *status)
+{
+	uint64_t ind_data, ind_addr;
+	int rc, retries, err = 0;
+
+	ind_addr = addr & XSCOM_ADDR_DIRECT_PART;
+	ind_data = (addr & XSCOM_ADDR_INDIRECT_PART) | XSCOM_DATA_IND_READ;
+	rc = __put_scom(scom, ind_data, ind_addr, status);
+	if (rc || (*status & SCOM_STATUS_ANY_ERR))
+		return rc;
+
+	for (retries = 0; retries < SCOM_MAX_IND_RETRIES; retries++) {
+		rc = __get_scom(scom, &ind_data, addr, status);
+		if (rc || (*status & SCOM_STATUS_ANY_ERR))
+			return rc;
+
+		err = (ind_data & XSCOM_DATA_IND_ERR_MASK) >> XSCOM_DATA_IND_ERR_SHIFT;
+		*status = err << SCOM_STATUS_PIB_RESP_SHIFT;
+		*value = ind_data & XSCOM_DATA_IND_DATA;
+
+		if ((ind_data & XSCOM_DATA_IND_COMPLETE) || (err != SCOM_PIB_BLOCKED))
+			return 0;
+
+		msleep(1);
+	}
+	return rc;
+}
+
+static int raw_put_scom(struct scom_device *scom, uint64_t value,
+			uint64_t addr, uint32_t *status)
+{
+	if (addr & XSCOM_ADDR_IND_FLAG) {
+		if (addr & XSCOM_ADDR_INF_FORM1)
+			return put_indirect_scom_form1(scom, value, addr, status);
+		else
+			return put_indirect_scom_form0(scom, value, addr, status);
+	} else
+		return __put_scom(scom, value, addr, status);
+}
+
+static int raw_get_scom(struct scom_device *scom, uint64_t *value,
+			uint64_t addr, uint32_t *status)
+{
+	if (addr & XSCOM_ADDR_IND_FLAG) {
+		if (addr & XSCOM_ADDR_INF_FORM1)
+			return -ENXIO;
+		return get_indirect_scom_form0(scom, value, addr, status);
+	} else
+		return __get_scom(scom, value, addr, status);
+}
+
+static int handle_fsi2pib_status(struct scom_device *scom, uint32_t status)
+{
+	uint32_t dummy = -1;
+
+	if (status & SCOM_STATUS_PROTECTION)
+		return -EPERM;
+	if (status & SCOM_STATUS_PARITY) {
+		fsi_device_write(scom->fsi_dev, SCOM_FSI2PIB_RESET_REG, &dummy,
+				 sizeof(uint32_t));
+		return -EIO;
+	}
+	/* Return -EBUSY on PIB abort to force a retry */
+	if (status & SCOM_STATUS_PIB_ABORT)
+		return -EBUSY;
+	if (status & SCOM_STATUS_ERR_SUMMARY) {
+		fsi_device_write(scom->fsi_dev, SCOM_FSI2PIB_RESET_REG, &dummy,
+				 sizeof(uint32_t));
+		return -EIO;
+	}
+	return 0;
+}
+
+static int handle_pib_status(struct scom_device *scom, uint8_t status)
+{
+	uint32_t dummy = -1;
+
+	if (status == SCOM_PIB_SUCCESS)
+		return 0;
+	if (status == SCOM_PIB_BLOCKED)
+		return -EBUSY;
+
+	/* Reset the bridge */
+	fsi_device_write(scom->fsi_dev, SCOM_FSI2PIB_RESET_REG, &dummy,
+			 sizeof(uint32_t));
+
+	switch(status) {
+	case SCOM_PIB_OFFLINE:
+		return -ENODEV;
+	case SCOM_PIB_BAD_ADDR:
+		return -ENXIO;
+	case SCOM_PIB_TIMEOUT:
+		return -ETIMEDOUT;
+	case SCOM_PIB_PARTIAL:
+	case SCOM_PIB_CLK_ERR:
+	case SCOM_PIB_PARITY_ERR:
+	default:
+		return -EIO;
+	}
+}
 
-	*value |= be32_to_cpu(result);
- bail:
-	mutex_unlock(&scom_dev->lock);
+static int put_scom(struct scom_device *scom, uint64_t value,
+		    uint64_t addr)
+{
+	uint32_t status, dummy = -1;
+	int rc, retries;
+
+	for (retries = 0; retries < SCOM_MAX_RETRIES; retries++) {
+		rc = raw_put_scom(scom, value, addr, &status);
+		if (rc) {
+			/* Try resetting the bridge if FSI fails */
+			if (rc != -ENODEV && retries == 0) {
+				fsi_device_write(scom->fsi_dev, SCOM_FSI2PIB_RESET_REG,
+						 &dummy, sizeof(uint32_t));
+				rc = -EBUSY;
+			} else
+				return rc;
+		} else
+			rc = handle_fsi2pib_status(scom, status);
+		if (rc && rc != -EBUSY)
+			break;
+		if (rc == 0) {
+			rc = handle_pib_status(scom,
+					       (status & SCOM_STATUS_PIB_RESP_MASK)
+					       >> SCOM_STATUS_PIB_RESP_SHIFT);
+			if (rc && rc != -EBUSY)
+				break;
+		}
+		if (rc == 0)
+			break;
+		msleep(1);
+	}
+	return rc;
+}
+
+static int get_scom(struct scom_device *scom, uint64_t *value,
+		    uint64_t addr)
+{
+	uint32_t status, dummy = -1;
+	int rc, retries;
+
+	for (retries = 0; retries < SCOM_MAX_RETRIES; retries++) {
+		rc = raw_get_scom(scom, value, addr, &status);
+		if (rc) {
+			/* Try resetting the bridge if FSI fails */
+			if (rc != -ENODEV && retries == 0) {
+				fsi_device_write(scom->fsi_dev, SCOM_FSI2PIB_RESET_REG,
+						 &dummy, sizeof(uint32_t));
+				rc = -EBUSY;
+			} else
+				return rc;
+		} else
+			rc = handle_fsi2pib_status(scom, status);
+		if (rc && rc != -EBUSY)
+			break;
+		if (rc == 0) {
+			rc = handle_pib_status(scom,
+					       (status & SCOM_STATUS_PIB_RESP_MASK)
+					       >> SCOM_STATUS_PIB_RESP_SHIFT);
+			if (rc && rc != -EBUSY)
+				break;
+		}
+		if (rc == 0)
+			break;
+		msleep(1);
+	}
 	return rc;
 }
 
 static ssize_t scom_read(struct file *filep, char __user *buf, size_t len,
 			 loff_t *offset)
 {
-	int rc;
 	struct miscdevice *mdev =
 				(struct miscdevice *)filep->private_data;
 	struct scom_device *scom = to_scom_dev(mdev);
 	struct device *dev = &scom->fsi_dev->dev;
 	uint64_t val;
+	int rc;
 
 	if (len != sizeof(uint64_t))
 		return -EINVAL;
 
+	mutex_lock(&scom->lock);
 	rc = get_scom(scom, &val, *offset);
+	mutex_unlock(&scom->lock);
 	if (rc) {
 		dev_dbg(dev, "get_scom fail:%d\n", rc);
 		return rc;
@@ -169,7 +417,9 @@ static ssize_t scom_write(struct file *filep, const char __user *buf,
 		return -EINVAL;
 	}
 
+	mutex_lock(&scom->lock);
 	rc = put_scom(scom, val, *offset);
+	mutex_unlock(&scom->lock);
 	if (rc) {
 		dev_dbg(dev, "put_scom failed with:%d\n", rc);
 		return rc;
@@ -193,11 +443,125 @@ static loff_t scom_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
+static void raw_convert_status(struct scom_access *acc, uint32_t status)
+{
+	acc->pib_status = (status & SCOM_STATUS_PIB_RESP_MASK) >>
+		SCOM_STATUS_PIB_RESP_SHIFT;
+	acc->intf_errors = 0;
+
+	if (status & SCOM_STATUS_PROTECTION)
+		acc->intf_errors |= SCOM_INTF_ERR_PROTECTION;
+	else if (status & SCOM_STATUS_PARITY)
+		acc->intf_errors |= SCOM_INTF_ERR_PARITY;
+	else if (status & SCOM_STATUS_PIB_ABORT)
+		acc->intf_errors |= SCOM_INTF_ERR_ABORT;
+	else if (status & SCOM_STATUS_ERR_SUMMARY)
+		acc->intf_errors |= SCOM_INTF_ERR_UNKNOWN;
+}
+
+static int scom_raw_read(struct scom_device *scom, void __user *argp)
+{
+	struct scom_access acc;
+	uint32_t status;
+	int rc;
+
+	if (copy_from_user(&acc, argp, sizeof(struct scom_access)))
+		return -EFAULT;
+
+	rc = raw_get_scom(scom, &acc.data, acc.addr, &status);
+	if (rc)
+		return rc;
+	raw_convert_status(&acc, status);
+	if (copy_to_user(argp, &acc, sizeof(struct scom_access)))
+		return -EFAULT;
+	return 0;
+}
+
+static int scom_raw_write(struct scom_device *scom, void __user *argp)
+{
+	u64 prev_data, mask, data;
+	struct scom_access acc;
+	uint32_t status;
+	int rc;
+
+	if (copy_from_user(&acc, argp, sizeof(struct scom_access)))
+		return -EFAULT;
+
+	if (acc.mask) {
+		rc = raw_get_scom(scom, &prev_data, acc.addr, &status);
+		if (rc)
+			return rc;
+		if (status & SCOM_STATUS_ANY_ERR)
+			goto fail;
+		mask = acc.mask;
+	} else {
+		prev_data = mask = -1ull;
+	}
+	data = (prev_data & ~mask) | (acc.data & mask);
+	rc = raw_put_scom(scom, data, acc.addr, &status);
+	if (rc)
+		return rc;
+ fail:
+	raw_convert_status(&acc, status);
+	if (copy_to_user(argp, &acc, sizeof(struct scom_access)))
+		return -EFAULT;
+	return 0;
+}
+
+static int scom_reset(struct scom_device *scom, void __user *argp)
+{
+	uint32_t flags, dummy = -1;
+	int rc = 0;
+
+	if (get_user(flags, (__u32 __user *)argp))
+		return -EFAULT;
+	if (flags & SCOM_RESET_PIB)
+		rc = fsi_device_write(scom->fsi_dev, SCOM_PIB_RESET_REG, &dummy,
+				      sizeof(uint32_t));
+	if (!rc && (flags & (SCOM_RESET_PIB | SCOM_RESET_INTF)))
+		rc = fsi_device_write(scom->fsi_dev, SCOM_FSI2PIB_RESET_REG, &dummy,
+				      sizeof(uint32_t));
+	return rc;
+}
+
+static int scom_check(struct scom_device *scom, void __user *argp)
+{
+	/* Still need to find out how to get "protected" */
+	return put_user(SCOM_CHECK_SUPPORTED, (__u32 __user *)argp);
+}
+
+static long scom_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct miscdevice *mdev = file->private_data;
+	struct scom_device *scom = to_scom_dev(mdev);
+	void __user *argp = (void __user *)arg;
+	int rc = -ENOTTY;
+
+	mutex_lock(&scom->lock);
+	switch(cmd) {
+	case FSI_SCOM_CHECK:
+		rc = scom_check(scom, argp);
+		break;
+	case FSI_SCOM_READ:
+		rc = scom_raw_read(scom, argp);
+		break;
+	case FSI_SCOM_WRITE:
+		rc = scom_raw_write(scom, argp);
+		break;
+	case FSI_SCOM_RESET:
+		rc = scom_reset(scom, argp);
+		break;
+	}
+	mutex_unlock(&scom->lock);
+	return rc;
+}
+
 static const struct file_operations scom_fops = {
-	.owner	= THIS_MODULE,
-	.llseek	= scom_llseek,
-	.read	= scom_read,
-	.write	= scom_write,
+	.owner		= THIS_MODULE,
+	.llseek		= scom_llseek,
+	.read		= scom_read,
+	.write		= scom_write,
+	.unlocked_ioctl	= scom_ioctl,
 };
 
 static int scom_probe(struct device *dev)
diff --git a/include/uapi/linux/fsi.h b/include/uapi/linux/fsi.h
new file mode 100644
index 000000000000..da577ecd90e7
--- /dev/null
+++ b/include/uapi/linux/fsi.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FSI_H
+#define _UAPI_LINUX_FSI_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * /dev/scom "raw" ioctl interface
+ *
+ * The driver supports a high level "read/write" interface which
+ * handles retries and converts the status to Linux error codes,
+ * however low level tools an debugger need to access the "raw"
+ * HW status information and interpret it themselves, so this
+ * ioctl interface is also provided for their use case.
+ */
+
+/* Structure for SCOM read/write */
+struct scom_access {
+	__u64	addr;		/* SCOM address, supports indirect */
+	__u64	data;		/* SCOM data (in for write, out for read) */
+	__u64	mask;		/* Data mask for writes */
+	__u32	intf_errors;	/* Interface error flags */
+#define SCOM_INTF_ERR_PARITY		0x00000001 /* Parity error */
+#define SCOM_INTF_ERR_PROTECTION	0x00000002 /* Blocked by secure boot */
+#define SCOM_INTF_ERR_ABORT		0x00000004 /* PIB reset during access */
+#define SCOM_INTF_ERR_UNKNOWN		0x80000000 /* Unknown error */
+	/*
+	 * Note: Any other bit set in intf_errors need to be considered as an
+	 * error. Future implementations may define new error conditions. The
+	 * pib_status below is only valid if intf_errors is 0.
+	 */
+	__u8	pib_status;	/* 3-bit PIB status */
+#define SCOM_PIB_SUCCESS	0	/* Access successful */
+#define SCOM_PIB_BLOCKED	1	/* PIB blocked, pls retry */
+#define SCOM_PIB_OFFLINE	2	/* Chiplet offline */
+#define SCOM_PIB_PARTIAL	3	/* Partial good */
+#define SCOM_PIB_BAD_ADDR	4	/* Invalid address */
+#define SCOM_PIB_CLK_ERR	5	/* Clock error */
+#define SCOM_PIB_PARITY_ERR	6	/* Parity error on the PIB bus */
+#define SCOM_PIB_TIMEOUT	7	/* Bus timeout */
+	__u8	pad;
+};
+
+/* Flags for SCOM check */
+#define SCOM_CHECK_SUPPORTED	0x00000001	/* Interface supported */
+#define SCOM_CHECK_PROTECTED	0x00000002	/* Interface blocked by secure boot */
+
+/* Flags for SCOM reset */
+#define SCOM_RESET_INTF		0x00000001	/* Reset interface */
+#define SCOM_RESET_PIB		0x00000002	/* Reset PIB */
+
+#define FSI_SCOM_CHECK	_IOR('s', 0x00, __u32)
+#define FSI_SCOM_READ	_IOWR('s', 0x01, struct scom_access)
+#define FSI_SCOM_WRITE	_IOWR('s', 0x02, struct scom_access)
+#define FSI_SCOM_RESET	_IOW('s', 0x03, __u32)
+
+#endif /* _UAPI_LINUX_FSI_H */
-- 
cgit v1.2.3


From eb3744a2dd01cb07ce9f556d56d6fe451f0c313a Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 13 Jun 2018 09:10:37 +0530
Subject: gpio: davinci: Do not assume continuous IRQ numbering

Currently the driver assumes that the interrupts are continuous
and does platform_get_irq only once and assumes the rest are continuous,
instead call platform_get_irq for all the interrupts and store them
in an array for later use.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-davinci.c                | 63 ++++++++++++++++++++----------
 include/linux/platform_data/gpio-davinci.h |  3 +-
 2 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 423d37c95e6b..a5ece8ea79bc 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -55,7 +55,7 @@ static inline struct davinci_gpio_regs __iomem *irq2regs(struct irq_data *d)
 	return g;
 }
 
-static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq);
+static int davinci_gpio_irq_setup(struct platform_device *pdev);
 
 /*--------------------------------------------------------------------------*/
 
@@ -167,8 +167,8 @@ of_err:
 static int davinci_gpio_probe(struct platform_device *pdev)
 {
 	static int ctrl_num, bank_base;
-	int gpio, bank, bank_irq, ret = 0;
-	unsigned ngpio, nbank;
+	int gpio, bank, i, ret = 0;
+	unsigned int ngpio, nbank, nirq;
 	struct davinci_gpio_controller *chips;
 	struct davinci_gpio_platform_data *pdata;
 	struct device *dev = &pdev->dev;
@@ -197,6 +197,16 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 	if (WARN_ON(ARCH_NR_GPIOS < ngpio))
 		ngpio = ARCH_NR_GPIOS;
 
+	/*
+	 * If there are unbanked interrupts then the number of
+	 * interrupts is equal to number of gpios else all are banked so
+	 * number of interrupts is equal to number of banks(each with 16 gpios)
+	 */
+	if (pdata->gpio_unbanked)
+		nirq = pdata->gpio_unbanked;
+	else
+		nirq = DIV_ROUND_UP(ngpio, 16);
+
 	nbank = DIV_ROUND_UP(ngpio, 32);
 	chips = devm_kcalloc(dev,
 			     nbank, sizeof(struct davinci_gpio_controller),
@@ -209,10 +219,13 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 	if (IS_ERR(gpio_base))
 		return PTR_ERR(gpio_base);
 
-	bank_irq = platform_get_irq(pdev, 0);
-	if (bank_irq < 0) {
-		dev_dbg(dev, "IRQ not populated\n");
-		return bank_irq;
+	for (i = 0; i < nirq; i++) {
+		chips->irqs[i] = platform_get_irq(pdev, i);
+		if (chips->irqs[i] < 0) {
+			dev_info(dev, "IRQ not populated, err = %d\n",
+				 chips->irqs[i]);
+			return chips->irqs[i];
+		}
 	}
 
 	snprintf(label, MAX_LABEL_SIZE, "davinci_gpio.%d", ctrl_num++);
@@ -249,7 +262,7 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 		goto err;
 
 	platform_set_drvdata(pdev, chips);
-	ret = davinci_gpio_irq_setup(pdev, bank_irq);
+	ret = davinci_gpio_irq_setup(pdev);
 	if (ret)
 		goto err;
 
@@ -383,7 +396,7 @@ static int gpio_to_irq_unbanked(struct gpio_chip *chip, unsigned offset)
 	 * can provide direct-mapped IRQs to AINTC (up to 32 GPIOs).
 	 */
 	if (offset < d->gpio_unbanked)
-		return d->base_irq + offset;
+		return d->irqs[offset];
 	else
 		return -ENODEV;
 }
@@ -392,11 +405,18 @@ static int gpio_irq_type_unbanked(struct irq_data *data, unsigned trigger)
 {
 	struct davinci_gpio_controller *d;
 	struct davinci_gpio_regs __iomem *g;
-	u32 mask;
+	u32 mask, i;
 
 	d = (struct davinci_gpio_controller *)irq_data_get_irq_handler_data(data);
 	g = (struct davinci_gpio_regs __iomem *)d->regs[0];
-	mask = __gpio_mask(data->irq - d->base_irq);
+	for (i = 0; i < MAX_INT_PER_BANK; i++)
+		if (data->irq == d->irqs[i])
+			break;
+
+	if (i == MAX_INT_PER_BANK)
+		return -EINVAL;
+
+	mask = __gpio_mask(i);
 
 	if (trigger & ~(IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING))
 		return -EINVAL;
@@ -458,7 +478,7 @@ static const struct of_device_id davinci_gpio_ids[];
  * (dm6446) can be set appropriately for GPIOV33 pins.
  */
 
-static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
+static int davinci_gpio_irq_setup(struct platform_device *pdev)
 {
 	unsigned	gpio, bank;
 	int		irq;
@@ -492,6 +512,7 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 		dev_err(dev, "Error %ld getting gpio clock\n", PTR_ERR(clk));
 		return PTR_ERR(clk);
 	}
+
 	ret = clk_prepare_enable(clk);
 	if (ret)
 		return ret;
@@ -531,12 +552,11 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 	if (pdata->gpio_unbanked) {
 		/* pass "bank 0" GPIO IRQs to AINTC */
 		chips->chip.to_irq = gpio_to_irq_unbanked;
-		chips->base_irq = bank_irq;
 		chips->gpio_unbanked = pdata->gpio_unbanked;
 		binten = GENMASK(pdata->gpio_unbanked / 16, 0);
 
 		/* AINTC handles mask/unmask; GPIO handles triggering */
-		irq = bank_irq;
+		irq = chips->irqs[0];
 		irq_chip = gpio_get_irq_chip(irq);
 		irq_chip->name = "GPIO-AINTC";
 		irq_chip->irq_set_type = gpio_irq_type_unbanked;
@@ -547,10 +567,11 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 		writel_relaxed(~0, &g->set_rising);
 
 		/* set the direct IRQs up to use that irqchip */
-		for (gpio = 0; gpio < pdata->gpio_unbanked; gpio++, irq++) {
-			irq_set_chip(irq, irq_chip);
-			irq_set_handler_data(irq, chips);
-			irq_set_status_flags(irq, IRQ_TYPE_EDGE_BOTH);
+		for (gpio = 0; gpio < pdata->gpio_unbanked; gpio++) {
+			irq_set_chip(chips->irqs[gpio], irq_chip);
+			irq_set_handler_data(chips->irqs[gpio], chips);
+			irq_set_status_flags(chips->irqs[gpio],
+					     IRQ_TYPE_EDGE_BOTH);
 		}
 
 		goto done;
@@ -560,7 +581,7 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 	 * Or, AINTC can handle IRQs for banks of 16 GPIO IRQs, which we
 	 * then chain through our own handler.
 	 */
-	for (gpio = 0, bank = 0; gpio < ngpio; bank++, bank_irq++, gpio += 16) {
+	for (gpio = 0, bank = 0; gpio < ngpio; bank++, gpio += 16) {
 		/* disabled by default, enabled only as needed
 		 * There are register sets for 32 GPIOs. 2 banks of 16
 		 * GPIOs are covered by each set of registers hence divide by 2
@@ -587,8 +608,8 @@ static int davinci_gpio_irq_setup(struct platform_device *pdev, int bank_irq)
 		irqdata->bank_num = bank;
 		irqdata->chip = chips;
 
-		irq_set_chained_handler_and_data(bank_irq, gpio_irq_handler,
-						 irqdata);
+		irq_set_chained_handler_and_data(chips->irqs[bank],
+						 gpio_irq_handler, irqdata);
 
 		binten |= BIT(bank);
 	}
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 90ae19ca828f..57a5a35e0073 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -22,6 +22,7 @@
 #include <asm-generic/gpio.h>
 
 #define MAX_REGS_BANKS		5
+#define MAX_INT_PER_BANK 32
 
 struct davinci_gpio_platform_data {
 	u32	ngpio;
@@ -41,7 +42,7 @@ struct davinci_gpio_controller {
 	spinlock_t		lock;
 	void __iomem		*regs[MAX_REGS_BANKS];
 	int			gpio_unbanked;
-	unsigned int		base_irq;
+	int			irqs[MAX_INT_PER_BANK];
 	unsigned int		base;
 };
 
-- 
cgit v1.2.3


From 90b39402e9f31c4aab48dc1a43d85a724065793f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 1 Jun 2018 13:21:27 +0200
Subject: gpio: Add API to explicitly name a consumer

The GPIO (descriptor) API registers a "label" naming what is
currently using the GPIO line. Typically this is taken from
things like the device tree node, so "reset-gpios" will result
in he line being labeled "reset".

The technical effect is pretty much zero: the use is for
debug and introspection, such as "lsgpio" and debugfs files.

However sometimes the user want this cuddly feeling of
listing all GPIO lines and seeing exactly what they are for
and it gives a very fulfilling sense of control. Especially
in the cases when the device tree node doesn't provide a
good name, or anonymous GPIO lines assigned just to
"gpios" in the device tree because the usage is implicit.

For these cases it may be nice to be able to label the
line directly and explicitly.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c        | 13 +++++++++++++
 include/linux/gpio/consumer.h |  7 +++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e11a3bb03820..c6f77e806cb8 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3193,6 +3193,19 @@ int gpiod_cansleep(const struct gpio_desc *desc)
 }
 EXPORT_SYMBOL_GPL(gpiod_cansleep);
 
+/**
+ * gpiod_set_consumer_name() - set the consumer name for the descriptor
+ * @desc: gpio to set the consumer name on
+ * @name: the new consumer name
+ */
+void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name)
+{
+	VALIDATE_DESC_VOID(desc);
+	/* Just overwrite whatever the previous name was */
+	desc->label = name;
+}
+EXPORT_SYMBOL_GPL(gpiod_set_consumer_name);
+
 /**
  * gpiod_to_irq() - return the IRQ corresponding to a GPIO
  * @desc: gpio whose IRQ will be returned (already requested)
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 243112c7fa7d..e8aaf34dd65d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -145,6 +145,7 @@ int gpiod_is_active_low(const struct gpio_desc *desc);
 int gpiod_cansleep(const struct gpio_desc *desc);
 
 int gpiod_to_irq(const struct gpio_desc *desc);
+void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name);
 
 /* Convert between the old gpio_ and new gpiod_ interfaces */
 struct gpio_desc *gpio_to_desc(unsigned gpio);
@@ -467,6 +468,12 @@ static inline int gpiod_to_irq(const struct gpio_desc *desc)
 	return -EINVAL;
 }
 
+static inline void gpiod_set_consumer_name(struct gpio_desc *desc, const char *name)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
 static inline struct gpio_desc *gpio_to_desc(unsigned gpio)
 {
 	return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From f2b1d2f94af887c91bb8a0cfb495e546331bc5ed Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 30 May 2018 17:25:13 +0200
Subject: soc: renesas: rcar-sysc: Provide helpers to power up/down CPUs

Provide helpers to control CPU power areas from platform code, taking
just a CPU index.  This will avoid having to pass full CPU power area
parameter blocks, and thus duplicating information already provided by
SoC-specific SYSC drivers.

This will be used on R-Car H1 only.
Later R-Car generations rely on APMU/RST for CPU power area control.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/soc/renesas/rcar-sysc.c       | 40 +++++++++++++++++++++++++++++++++++
 include/linux/soc/renesas/rcar-sysc.h |  2 ++
 2 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/drivers/soc/renesas/rcar-sysc.c b/drivers/soc/renesas/rcar-sysc.c
index 95120acc4d80..4ad6dcd19420 100644
--- a/drivers/soc/renesas/rcar-sysc.c
+++ b/drivers/soc/renesas/rcar-sysc.c
@@ -310,6 +310,8 @@ struct rcar_pm_domains {
 	struct generic_pm_domain *domains[RCAR_PD_ALWAYS_ON + 1];
 };
 
+static struct genpd_onecell_data *rcar_sysc_onecell_data;
+
 static int __init rcar_sysc_pd_init(void)
 {
 	const struct rcar_sysc_info *info;
@@ -356,6 +358,7 @@ static int __init rcar_sysc_pd_init(void)
 
 	domains->onecell_data.domains = domains->domains;
 	domains->onecell_data.num_domains = ARRAY_SIZE(domains->domains);
+	rcar_sysc_onecell_data = &domains->onecell_data;
 
 	for (i = 0, syscier = 0; i < info->num_areas; i++)
 		syscier |= BIT(info->areas[i].isr_bit);
@@ -449,3 +452,40 @@ void __init rcar_sysc_init(phys_addr_t base, u32 syscier)
 	pr_debug("%s: syscier = 0x%08x\n", __func__, syscier);
 	iowrite32(syscier, rcar_sysc_base + SYSCIER);
 }
+
+#ifdef CONFIG_ARCH_R8A7779
+static int rcar_sysc_power_cpu(unsigned int idx, bool on)
+{
+	struct generic_pm_domain *genpd;
+	struct rcar_sysc_pd *pd;
+	unsigned int i;
+
+	if (!rcar_sysc_onecell_data)
+		return -ENODEV;
+
+	for (i = 0; i < rcar_sysc_onecell_data->num_domains; i++) {
+		genpd = rcar_sysc_onecell_data->domains[i];
+		if (!genpd)
+			continue;
+
+		pd = to_rcar_pd(genpd);
+		if (!(pd->flags & PD_CPU) || pd->ch.chan_bit != idx)
+			continue;
+
+		return on ? rcar_sysc_power_up(&pd->ch)
+			  : rcar_sysc_power_down(&pd->ch);
+	}
+
+	return -ENOENT;
+}
+
+int rcar_sysc_power_down_cpu(unsigned int cpu)
+{
+	return rcar_sysc_power_cpu(cpu, false);
+}
+
+int rcar_sysc_power_up_cpu(unsigned int cpu)
+{
+	return rcar_sysc_power_cpu(cpu, true);
+}
+#endif /* CONFIG_ARCH_R8A7779 */
diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h
index 8a6086d2e9c3..9020da2111fd 100644
--- a/include/linux/soc/renesas/rcar-sysc.h
+++ b/include/linux/soc/renesas/rcar-sysc.h
@@ -13,5 +13,7 @@ struct rcar_sysc_ch {
 int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch);
 int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch);
 void rcar_sysc_init(phys_addr_t base, u32 syscier);
+int rcar_sysc_power_down_cpu(unsigned int cpu);
+int rcar_sysc_power_up_cpu(unsigned int cpu);
 
 #endif /* __LINUX_SOC_RENESAS_RCAR_SYSC_H__ */
-- 
cgit v1.2.3


From 7e8a50df26f4e7003d09f7e8d1e57fbbb7ebb750 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 30 May 2018 17:25:16 +0200
Subject: soc: renesas: rcar-sysc: Drop legacy handling

Now the R-Car platform code no longer supports DTBs lacking a SYSC
device node in DT, all legacy handling can be dropped from the R-Car
SYSC driver:
  - Make rcar_sysc_ch private to the driver,
  - Make rcar_sysc_power_{down,up}() static (they have been replaced by
    rcar_sysc_power_{down,up}_cpu()),
  - Remove the legacy wrapper rcar_sysc_init(), and the check for double
    initialization (only the early_initcall is left).

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/soc/renesas/rcar-sysc.c       | 38 ++++++++---------------------------
 include/linux/soc/renesas/rcar-sysc.h | 11 ----------
 2 files changed, 8 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/renesas/rcar-sysc.c b/drivers/soc/renesas/rcar-sysc.c
index 4ad6dcd19420..41af9c7b912f 100644
--- a/drivers/soc/renesas/rcar-sysc.c
+++ b/drivers/soc/renesas/rcar-sysc.c
@@ -58,6 +58,12 @@
 
 #define RCAR_PD_ALWAYS_ON	32	/* Always-on power area */
 
+struct rcar_sysc_ch {
+	u16 chan_offs;
+	u8 chan_bit;
+	u8 isr_bit;
+};
+
 static void __iomem *rcar_sysc_base;
 static DEFINE_SPINLOCK(rcar_sysc_lock); /* SMP CPUs + I/O devices */
 
@@ -143,12 +149,12 @@ static int rcar_sysc_power(const struct rcar_sysc_ch *sysc_ch, bool on)
 	return ret;
 }
 
-int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch)
+static int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch)
 {
 	return rcar_sysc_power(sysc_ch, false);
 }
 
-int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch)
+static int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch)
 {
 	return rcar_sysc_power(sysc_ch, true);
 }
@@ -323,9 +329,6 @@ static int __init rcar_sysc_pd_init(void)
 	unsigned int i;
 	int error;
 
-	if (rcar_sysc_base)
-		return 0;
-
 	np = of_find_matching_node_and_match(NULL, rcar_sysc_matches, &match);
 	if (!np)
 		return -ENODEV;
@@ -428,31 +431,6 @@ void __init rcar_sysc_nullify(struct rcar_sysc_area *areas,
 		}
 }
 
-void __init rcar_sysc_init(phys_addr_t base, u32 syscier)
-{
-	u32 syscimr;
-
-	if (!rcar_sysc_pd_init())
-		return;
-
-	rcar_sysc_base = ioremap_nocache(base, PAGE_SIZE);
-
-	/*
-	 * Mask all interrupt sources to prevent the CPU from receiving them.
-	 * Make sure not to clear reserved bits that were set before.
-	 */
-	syscimr = ioread32(rcar_sysc_base + SYSCIMR);
-	syscimr |= syscier;
-	pr_debug("%s: syscimr = 0x%08x\n", __func__, syscimr);
-	iowrite32(syscimr, rcar_sysc_base + SYSCIMR);
-
-	/*
-	 * SYSC needs all interrupt sources enabled to control power.
-	 */
-	pr_debug("%s: syscier = 0x%08x\n", __func__, syscier);
-	iowrite32(syscier, rcar_sysc_base + SYSCIER);
-}
-
 #ifdef CONFIG_ARCH_R8A7779
 static int rcar_sysc_power_cpu(unsigned int idx, bool on)
 {
diff --git a/include/linux/soc/renesas/rcar-sysc.h b/include/linux/soc/renesas/rcar-sysc.h
index 9020da2111fd..00fae6fd234d 100644
--- a/include/linux/soc/renesas/rcar-sysc.h
+++ b/include/linux/soc/renesas/rcar-sysc.h
@@ -2,17 +2,6 @@
 #ifndef __LINUX_SOC_RENESAS_RCAR_SYSC_H__
 #define __LINUX_SOC_RENESAS_RCAR_SYSC_H__
 
-#include <linux/types.h>
-
-struct rcar_sysc_ch {
-	u16 chan_offs;
-	u8 chan_bit;
-	u8 isr_bit;
-};
-
-int rcar_sysc_power_down(const struct rcar_sysc_ch *sysc_ch);
-int rcar_sysc_power_up(const struct rcar_sysc_ch *sysc_ch);
-void rcar_sysc_init(phys_addr_t base, u32 syscier);
 int rcar_sysc_power_down_cpu(unsigned int cpu);
 int rcar_sysc_power_up_cpu(unsigned int cpu);
 
-- 
cgit v1.2.3


From c60b613a7097cff20fdd05e2891ce69542f0d5a3 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Date: Thu, 14 Jun 2018 20:50:37 +0100
Subject: ASoC: topology: Give more data to clients via callbacks

Give topology clients more access to the topology data by passing index,
pcm, link_config and dai_driver to clients. This allows clients to fully
instantiate and track topology objects.

The SOF driver is the first user of these new APIs and needs them to build
component topology driver and FW objects.

Signed-off-by: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-topology.h           | 23 ++++++++++++++---------
 sound/soc/intel/skylake/skl-pcm.c      |  7 ++++---
 sound/soc/intel/skylake/skl-topology.c |  5 +++--
 sound/soc/intel/skylake/skl-topology.h |  5 +++--
 sound/soc/soc-topology.c               | 31 ++++++++++++++++++-------------
 5 files changed, 42 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h
index f552c3f56368..e1f265e21ee1 100644
--- a/include/sound/soc-topology.h
+++ b/include/sound/soc-topology.h
@@ -30,6 +30,8 @@ struct snd_soc_dapm_context;
 struct snd_soc_card;
 struct snd_kcontrol_new;
 struct snd_soc_dai_link;
+struct snd_soc_dai_driver;
+struct snd_soc_dai;
 
 /* object scan be loaded and unloaded in groups with identfying indexes */
 #define SND_SOC_TPLG_INDEX_ALL	0	/* ID that matches all FW objects */
@@ -109,35 +111,38 @@ struct snd_soc_tplg_widget_events {
 struct snd_soc_tplg_ops {
 
 	/* external kcontrol init - used for any driver specific init */
-	int (*control_load)(struct snd_soc_component *,
+	int (*control_load)(struct snd_soc_component *, int index,
 		struct snd_kcontrol_new *, struct snd_soc_tplg_ctl_hdr *);
 	int (*control_unload)(struct snd_soc_component *,
 		struct snd_soc_dobj *);
 
 	/* external widget init - used for any driver specific init */
-	int (*widget_load)(struct snd_soc_component *,
+	int (*widget_load)(struct snd_soc_component *, int index,
 		struct snd_soc_dapm_widget *,
 		struct snd_soc_tplg_dapm_widget *);
-	int (*widget_ready)(struct snd_soc_component *,
+	int (*widget_ready)(struct snd_soc_component *, int index,
 		struct snd_soc_dapm_widget *,
 		struct snd_soc_tplg_dapm_widget *);
 	int (*widget_unload)(struct snd_soc_component *,
 		struct snd_soc_dobj *);
 
 	/* FE DAI - used for any driver specific init */
-	int (*dai_load)(struct snd_soc_component *,
-		struct snd_soc_dai_driver *dai_drv);
+	int (*dai_load)(struct snd_soc_component *, int index,
+		struct snd_soc_dai_driver *dai_drv,
+		struct snd_soc_tplg_pcm *pcm, struct snd_soc_dai *dai);
+
 	int (*dai_unload)(struct snd_soc_component *,
 		struct snd_soc_dobj *);
 
 	/* DAI link - used for any driver specific init */
-	int (*link_load)(struct snd_soc_component *,
-		struct snd_soc_dai_link *link);
+	int (*link_load)(struct snd_soc_component *, int index,
+		struct snd_soc_dai_link *link,
+		struct snd_soc_tplg_link_config *cfg);
 	int (*link_unload)(struct snd_soc_component *,
 		struct snd_soc_dobj *);
 
 	/* callback to handle vendor bespoke data */
-	int (*vendor_load)(struct snd_soc_component *,
+	int (*vendor_load)(struct snd_soc_component *, int index,
 		struct snd_soc_tplg_hdr *);
 	int (*vendor_unload)(struct snd_soc_component *,
 		struct snd_soc_tplg_hdr *);
@@ -146,7 +151,7 @@ struct snd_soc_tplg_ops {
 	void (*complete)(struct snd_soc_component *);
 
 	/* manifest - optional to inform component of manifest */
-	int (*manifest)(struct snd_soc_component *,
+	int (*manifest)(struct snd_soc_component *, int index,
 		struct snd_soc_tplg_manifest *);
 
 	/* vendor specific kcontrol handlers available for binding */
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index afa86b9e4dcf..1f4dd08d36c5 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -1017,10 +1017,11 @@ static struct snd_soc_dai_driver skl_platform_dai[] = {
 },
 };
 
-int skl_dai_load(struct snd_soc_component *cmp,
-		 struct snd_soc_dai_driver *pcm_dai)
+int skl_dai_load(struct snd_soc_component *cmp, int index,
+			struct snd_soc_dai_driver *dai_drv,
+			struct snd_soc_tplg_pcm *pcm, struct snd_soc_dai *dai)
 {
-	pcm_dai->ops = &skl_pcm_dai_ops;
+	dai_drv->ops = &skl_pcm_dai_ops;
 
 	return 0;
 }
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index fcdc716754b6..647e52aecdc3 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -3024,7 +3024,7 @@ void skl_cleanup_resources(struct skl *skl)
  * information to the driver about module and pipeline parameters which DSP
  * FW expects like ids, resource values, formats etc
  */
-static int skl_tplg_widget_load(struct snd_soc_component *cmpnt,
+static int skl_tplg_widget_load(struct snd_soc_component *cmpnt, int index,
 				struct snd_soc_dapm_widget *w,
 				struct snd_soc_tplg_dapm_widget *tplg_w)
 {
@@ -3131,6 +3131,7 @@ static int skl_init_enum_data(struct device *dev, struct soc_enum *se,
 }
 
 static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
+				int index,
 				struct snd_kcontrol_new *kctl,
 				struct snd_soc_tplg_ctl_hdr *hdr)
 {
@@ -3619,7 +3620,7 @@ static int skl_tplg_get_manifest_data(struct snd_soc_tplg_manifest *manifest,
 	return 0;
 }
 
-static int skl_manifest_load(struct snd_soc_component *cmpnt,
+static int skl_manifest_load(struct snd_soc_component *cmpnt, int index,
 				struct snd_soc_tplg_manifest *manifest)
 {
 	struct hdac_ext_bus *ebus = snd_soc_component_get_drvdata(cmpnt);
diff --git a/sound/soc/intel/skylake/skl-topology.h b/sound/soc/intel/skylake/skl-topology.h
index 6d7e0569695f..af198ea0379e 100644
--- a/sound/soc/intel/skylake/skl-topology.h
+++ b/sound/soc/intel/skylake/skl-topology.h
@@ -512,8 +512,9 @@ int skl_pcm_host_dma_prepare(struct device *dev,
 int skl_pcm_link_dma_prepare(struct device *dev,
 			struct skl_pipe_params *params);
 
-int skl_dai_load(struct snd_soc_component *cmp,
-		 struct snd_soc_dai_driver *pcm_dai);
+int skl_dai_load(struct snd_soc_component *cmp, int index,
+		struct snd_soc_dai_driver *dai_drv,
+		struct snd_soc_tplg_pcm *pcm, struct snd_soc_dai *dai);
 void skl_tplg_add_moduleid_in_bind_params(struct skl *skl,
 				struct snd_soc_dapm_widget *w);
 #endif
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 53f121a50c97..9b33260fd537 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -259,7 +259,7 @@ static int soc_tplg_vendor_load_(struct soc_tplg *tplg,
 	int ret = 0;
 
 	if (tplg->comp && tplg->ops && tplg->ops->vendor_load)
-		ret = tplg->ops->vendor_load(tplg->comp, hdr);
+		ret = tplg->ops->vendor_load(tplg->comp, tplg->index, hdr);
 	else {
 		dev_err(tplg->dev, "ASoC: no vendor load callback for ID %d\n",
 			hdr->vendor_type);
@@ -291,7 +291,8 @@ static int soc_tplg_widget_load(struct soc_tplg *tplg,
 	struct snd_soc_dapm_widget *w, struct snd_soc_tplg_dapm_widget *tplg_w)
 {
 	if (tplg->comp && tplg->ops && tplg->ops->widget_load)
-		return tplg->ops->widget_load(tplg->comp, w, tplg_w);
+		return tplg->ops->widget_load(tplg->comp, tplg->index, w,
+			tplg_w);
 
 	return 0;
 }
@@ -302,27 +303,30 @@ static int soc_tplg_widget_ready(struct soc_tplg *tplg,
 	struct snd_soc_dapm_widget *w, struct snd_soc_tplg_dapm_widget *tplg_w)
 {
 	if (tplg->comp && tplg->ops && tplg->ops->widget_ready)
-		return tplg->ops->widget_ready(tplg->comp, w, tplg_w);
+		return tplg->ops->widget_ready(tplg->comp, tplg->index, w,
+			tplg_w);
 
 	return 0;
 }
 
 /* pass DAI configurations to component driver for extra initialization */
 static int soc_tplg_dai_load(struct soc_tplg *tplg,
-	struct snd_soc_dai_driver *dai_drv)
+	struct snd_soc_dai_driver *dai_drv,
+	struct snd_soc_tplg_pcm *pcm, struct snd_soc_dai *dai)
 {
 	if (tplg->comp && tplg->ops && tplg->ops->dai_load)
-		return tplg->ops->dai_load(tplg->comp, dai_drv);
+		return tplg->ops->dai_load(tplg->comp, tplg->index, dai_drv,
+			pcm, dai);
 
 	return 0;
 }
 
 /* pass link configurations to component driver for extra initialization */
 static int soc_tplg_dai_link_load(struct soc_tplg *tplg,
-	struct snd_soc_dai_link *link)
+	struct snd_soc_dai_link *link, struct snd_soc_tplg_link_config *cfg)
 {
 	if (tplg->comp && tplg->ops && tplg->ops->link_load)
-		return tplg->ops->link_load(tplg->comp, link);
+		return tplg->ops->link_load(tplg->comp, tplg->index, link, cfg);
 
 	return 0;
 }
@@ -643,7 +647,8 @@ static int soc_tplg_init_kcontrol(struct soc_tplg *tplg,
 	struct snd_kcontrol_new *k, struct snd_soc_tplg_ctl_hdr *hdr)
 {
 	if (tplg->comp && tplg->ops && tplg->ops->control_load)
-		return tplg->ops->control_load(tplg->comp, k, hdr);
+		return tplg->ops->control_load(tplg->comp, tplg->index, k,
+			hdr);
 
 	return 0;
 }
@@ -1702,7 +1707,7 @@ static int soc_tplg_dai_create(struct soc_tplg *tplg,
 		dai_drv->compress_new = snd_soc_new_compress;
 
 	/* pass control to component driver for optional further init */
-	ret = soc_tplg_dai_load(tplg, dai_drv);
+	ret = soc_tplg_dai_load(tplg, dai_drv, pcm, NULL);
 	if (ret < 0) {
 		dev_err(tplg->comp->dev, "ASoC: DAI loading failed\n");
 		kfree(dai_drv);
@@ -1772,7 +1777,7 @@ static int soc_tplg_fe_link_create(struct soc_tplg *tplg,
 		set_link_flags(link, pcm->flag_mask, pcm->flags);
 
 	/* pass control to component driver for optional further init */
-	ret = soc_tplg_dai_link_load(tplg, link);
+	ret = soc_tplg_dai_link_load(tplg, link, NULL);
 	if (ret < 0) {
 		dev_err(tplg->comp->dev, "ASoC: FE link loading failed\n");
 		kfree(link);
@@ -2080,7 +2085,7 @@ static int soc_tplg_link_config(struct soc_tplg *tplg,
 		set_link_flags(link, cfg->flag_mask, cfg->flags);
 
 	/* pass control to component driver for optional further init */
-	ret = soc_tplg_dai_link_load(tplg, link);
+	ret = soc_tplg_dai_link_load(tplg, link, cfg);
 	if (ret < 0) {
 		dev_err(tplg->dev, "ASoC: physical link loading failed\n");
 		return ret;
@@ -2202,7 +2207,7 @@ static int soc_tplg_dai_config(struct soc_tplg *tplg,
 		set_dai_flags(dai_drv, d->flag_mask, d->flags);
 
 	/* pass control to component driver for optional further init */
-	ret = soc_tplg_dai_load(tplg, dai_drv);
+	ret = soc_tplg_dai_load(tplg, dai_drv, NULL, dai);
 	if (ret < 0) {
 		dev_err(tplg->comp->dev, "ASoC: DAI loading failed\n");
 		return ret;
@@ -2311,7 +2316,7 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg,
 
 	/* pass control to component driver for optional further init */
 	if (tplg->comp && tplg->ops && tplg->ops->manifest)
-		return tplg->ops->manifest(tplg->comp, _manifest);
+		return tplg->ops->manifest(tplg->comp, tplg->index, _manifest);
 
 	if (!abi_match)	/* free the duplicated one */
 		kfree(_manifest);
-- 
cgit v1.2.3


From 503e79b793fea5de626db73accf8e8994bc4289d Mon Sep 17 00:00:00 2001
From: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Date: Thu, 14 Jun 2018 20:53:59 +0100
Subject: ASoC: topology: Add callback for DAPM route load/unload

Add a callback fro clients for notification about DAPM route loading and
unloading.

Signed-off-by: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-topology.h |  7 +++++++
 sound/soc/soc-topology.c     | 13 +++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h
index e1f265e21ee1..401ef2c45d6c 100644
--- a/include/sound/soc-topology.h
+++ b/include/sound/soc-topology.h
@@ -32,6 +32,7 @@ struct snd_kcontrol_new;
 struct snd_soc_dai_link;
 struct snd_soc_dai_driver;
 struct snd_soc_dai;
+struct snd_soc_dapm_route;
 
 /* object scan be loaded and unloaded in groups with identfying indexes */
 #define SND_SOC_TPLG_INDEX_ALL	0	/* ID that matches all FW objects */
@@ -116,6 +117,12 @@ struct snd_soc_tplg_ops {
 	int (*control_unload)(struct snd_soc_component *,
 		struct snd_soc_dobj *);
 
+	/* DAPM graph route element loading and unloading */
+	int (*dapm_route_load)(struct snd_soc_component *, int index,
+		struct snd_soc_dapm_route *route);
+	int (*dapm_route_unload)(struct snd_soc_component *,
+		struct snd_soc_dobj *);
+
 	/* external widget init - used for any driver specific init */
 	int (*widget_load)(struct snd_soc_component *, int index,
 		struct snd_soc_dapm_widget *,
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 9b33260fd537..05d177d689e2 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1105,6 +1105,17 @@ static int soc_tplg_kcontrol_elems_load(struct soc_tplg *tplg,
 	return 0;
 }
 
+/* optionally pass new dynamic kcontrol to component driver. */
+static int soc_tplg_add_route(struct soc_tplg *tplg,
+	struct snd_soc_dapm_route *route)
+{
+	if (tplg->comp && tplg->ops && tplg->ops->dapm_route_load)
+		return tplg->ops->dapm_route_load(tplg->comp, tplg->index,
+			route);
+
+	return 0;
+}
+
 static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg,
 	struct snd_soc_tplg_hdr *hdr)
 {
@@ -1153,6 +1164,8 @@ static int soc_tplg_dapm_graph_elems_load(struct soc_tplg *tplg,
 		else
 			route.control = elem->control;
 
+		soc_tplg_add_route(tplg, &route);
+
 		/* add route, but keep going if some fail */
 		snd_soc_dapm_add_routes(dapm, &route, 1);
 	}
-- 
cgit v1.2.3


From 297101ab85841319aac2c7843ca755d650c1964f Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 15 Jun 2018 13:44:53 +0200
Subject: regulator: pfuze100: add pfuze3001 support

This extends the pfuze100 driver with pfuze3001 support.

Latest datasheet:
https://www.nxp.com/docs/en/data-sheet/PF3001.pdf

Signed-off-by: Robin Gong <yibin.gong@nxp.com>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig              |  4 +-
 drivers/regulator/pfuze100-regulator.c | 78 +++++++++++++++++++++++++++++++---
 include/linux/regulator/pfuze100.h     | 11 +++++
 3 files changed, 84 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 5dbccf5f3037..2964eaea94c0 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -633,12 +633,12 @@ config REGULATOR_PCF50633
 	 on PCF50633
 
 config REGULATOR_PFUZE100
-	tristate "Freescale PFUZE100/200/3000 regulator driver"
+	tristate "Freescale PFUZE100/200/3000/3001 regulator driver"
 	depends on I2C
 	select REGMAP_I2C
 	help
 	  Say y here to support the regulators found on the Freescale
-	  PFUZE100/200/3000 PMIC.
+	  PFUZE100/200/3000/3001 PMIC.
 
 config REGULATOR_PV88060
 	tristate "Powerventure Semiconductor PV88060 regulator"
diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c
index 8d9dbcc775ea..32f9af7f87c4 100644
--- a/drivers/regulator/pfuze100-regulator.c
+++ b/drivers/regulator/pfuze100-regulator.c
@@ -44,7 +44,7 @@
 #define PFUZE100_VGEN5VOL	0x70
 #define PFUZE100_VGEN6VOL	0x71
 
-enum chips { PFUZE100, PFUZE200, PFUZE3000 = 3 };
+enum chips { PFUZE100, PFUZE200, PFUZE3000 = 3, PFUZE3001 = 0x31, };
 
 struct pfuze_regulator {
 	struct regulator_desc desc;
@@ -92,6 +92,7 @@ static const struct i2c_device_id pfuze_device_id[] = {
 	{.name = "pfuze100", .driver_data = PFUZE100},
 	{.name = "pfuze200", .driver_data = PFUZE200},
 	{.name = "pfuze3000", .driver_data = PFUZE3000},
+	{.name = "pfuze3001", .driver_data = PFUZE3001},
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, pfuze_device_id);
@@ -100,6 +101,7 @@ static const struct of_device_id pfuze_dt_ids[] = {
 	{ .compatible = "fsl,pfuze100", .data = (void *)PFUZE100},
 	{ .compatible = "fsl,pfuze200", .data = (void *)PFUZE200},
 	{ .compatible = "fsl,pfuze3000", .data = (void *)PFUZE3000},
+	{ .compatible = "fsl,pfuze3001", .data = (void *)PFUZE3001},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, pfuze_dt_ids);
@@ -108,10 +110,28 @@ static int pfuze100_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
 {
 	struct pfuze_chip *pfuze100 = rdev_get_drvdata(rdev);
 	int id = rdev_get_id(rdev);
+	bool reg_has_ramp_delay;
 	unsigned int ramp_bits;
 	int ret;
 
-	if (id < PFUZE100_SWBST) {
+	switch (pfuze100->chip_id) {
+	case PFUZE3001:
+		/* no dynamic voltage scaling for PF3001 */
+		reg_has_ramp_delay = false;
+		break;
+	case PFUZE3000:
+		reg_has_ramp_delay = (id < PFUZE3000_SWBST);
+		break;
+	case PFUZE200:
+		reg_has_ramp_delay = (id < PFUZE200_SWBST);
+		break;
+	case PFUZE100:
+	default:
+		reg_has_ramp_delay = (id < PFUZE100_SWBST);
+		break;
+	}
+
+	if (reg_has_ramp_delay) {
 		ramp_delay = 12500 / ramp_delay;
 		ramp_bits = (ramp_delay >> 1) - (ramp_delay >> 3);
 		ret = regmap_update_bits(pfuze100->regmap,
@@ -119,8 +139,9 @@ static int pfuze100_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
 					 0xc0, ramp_bits << 6);
 		if (ret < 0)
 			dev_err(pfuze100->dev, "ramp failed, err %d\n", ret);
-	} else
+	} else {
 		ret = -EACCES;
+	}
 
 	return ret;
 }
@@ -361,6 +382,19 @@ static struct pfuze_regulator pfuze3000_regulators[] = {
 	PFUZE100_VGEN_REG(PFUZE3000, VLDO4, PFUZE100_VGEN6VOL, 1800000, 3300000, 100000),
 };
 
+static struct pfuze_regulator pfuze3001_regulators[] = {
+	PFUZE100_SWB_REG(PFUZE3001, SW1, PFUZE100_SW1ABVOL, 0x1f, pfuze3000_sw1a),
+	PFUZE100_SWB_REG(PFUZE3001, SW2, PFUZE100_SW2VOL, 0x7, pfuze3000_sw2lo),
+	PFUZE3000_SW3_REG(PFUZE3001, SW3, PFUZE100_SW3AVOL, 900000, 1650000, 50000),
+	PFUZE100_SWB_REG(PFUZE3001, VSNVS, PFUZE100_VSNVSVOL, 0x7, pfuze100_vsnvs),
+	PFUZE100_VGEN_REG(PFUZE3001, VLDO1, PFUZE100_VGEN1VOL, 1800000, 3300000, 100000),
+	PFUZE100_VGEN_REG(PFUZE3001, VLDO2, PFUZE100_VGEN2VOL, 800000, 1550000, 50000),
+	PFUZE3000_VCC_REG(PFUZE3001, VCCSD, PFUZE100_VGEN3VOL, 2850000, 3300000, 150000),
+	PFUZE3000_VCC_REG(PFUZE3001, V33, PFUZE100_VGEN4VOL, 2850000, 3300000, 150000),
+	PFUZE100_VGEN_REG(PFUZE3001, VLDO3, PFUZE100_VGEN5VOL, 1800000, 3300000, 100000),
+	PFUZE100_VGEN_REG(PFUZE3001, VLDO4, PFUZE100_VGEN6VOL, 1800000, 3300000, 100000),
+};
+
 #ifdef CONFIG_OF
 /* PFUZE100 */
 static struct of_regulator_match pfuze100_matches[] = {
@@ -418,6 +452,21 @@ static struct of_regulator_match pfuze3000_matches[] = {
 	{ .name = "vldo4",	},
 };
 
+/* PFUZE3001 */
+static struct of_regulator_match pfuze3001_matches[] = {
+
+	{ .name = "sw1",	},
+	{ .name = "sw2",	},
+	{ .name = "sw3",	},
+	{ .name = "vsnvs",	},
+	{ .name = "vldo1",	},
+	{ .name = "vldo2",	},
+	{ .name = "vccsd",	},
+	{ .name = "v33",	},
+	{ .name = "vldo3",	},
+	{ .name = "vldo4",	},
+};
+
 static struct of_regulator_match *pfuze_matches;
 
 static int pfuze_parse_regulators_dt(struct pfuze_chip *chip)
@@ -437,6 +486,11 @@ static int pfuze_parse_regulators_dt(struct pfuze_chip *chip)
 	}
 
 	switch (chip->chip_id) {
+	case PFUZE3001:
+		pfuze_matches = pfuze3001_matches;
+		ret = of_regulator_match(dev, parent, pfuze3001_matches,
+					 ARRAY_SIZE(pfuze3001_matches));
+		break;
 	case PFUZE3000:
 		pfuze_matches = pfuze3000_matches;
 		ret = of_regulator_match(dev, parent, pfuze3000_matches,
@@ -508,7 +562,8 @@ static int pfuze_identify(struct pfuze_chip *pfuze_chip)
 		 */
 		dev_info(pfuze_chip->dev, "Assuming misprogrammed ID=0x8");
 	} else if ((value & 0x0f) != pfuze_chip->chip_id &&
-		   (value & 0xf0) >> 4 != pfuze_chip->chip_id) {
+		   (value & 0xf0) >> 4 != pfuze_chip->chip_id &&
+		   (value != pfuze_chip->chip_id)) {
 		/* device id NOT match with your setting */
 		dev_warn(pfuze_chip->dev, "Illegal ID: %x\n", value);
 		return -ENODEV;
@@ -588,6 +643,13 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 
 	/* use the right regulators after identify the right device */
 	switch (pfuze_chip->chip_id) {
+	case PFUZE3001:
+		pfuze_chip->pfuze_regulators = pfuze3001_regulators;
+		regulator_num = ARRAY_SIZE(pfuze3001_regulators);
+		sw_check_start = PFUZE3001_SW2;
+		sw_check_end = PFUZE3001_SW2;
+		sw_hi = 1 << 3;
+		break;
 	case PFUZE3000:
 		pfuze_chip->pfuze_regulators = pfuze3000_regulators;
 		regulator_num = ARRAY_SIZE(pfuze3000_regulators);
@@ -611,7 +673,8 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 	}
 	dev_info(&client->dev, "pfuze%s found.\n",
 		(pfuze_chip->chip_id == PFUZE100) ? "100" :
-		((pfuze_chip->chip_id == PFUZE200) ? "200" : "3000"));
+		(((pfuze_chip->chip_id == PFUZE200) ? "200" :
+		((pfuze_chip->chip_id == PFUZE3000) ? "3000" : "3001"))));
 
 	memcpy(pfuze_chip->regulator_descs, pfuze_chip->pfuze_regulators,
 		sizeof(pfuze_chip->regulator_descs));
@@ -636,7 +699,8 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
 		if (i >= sw_check_start && i <= sw_check_end) {
 			regmap_read(pfuze_chip->regmap, desc->vsel_reg, &val);
 			if (val & sw_hi) {
-				if (pfuze_chip->chip_id == PFUZE3000) {
+				if (pfuze_chip->chip_id == PFUZE3000 ||
+					pfuze_chip->chip_id == PFUZE3001) {
 					desc->volt_table = pfuze3000_sw2hi;
 					desc->n_voltages = ARRAY_SIZE(pfuze3000_sw2hi);
 				} else {
@@ -675,5 +739,5 @@ static struct i2c_driver pfuze_driver = {
 module_i2c_driver(pfuze_driver);
 
 MODULE_AUTHOR("Robin Gong <b38343@freescale.com>");
-MODULE_DESCRIPTION("Regulator Driver for Freescale PFUZE100/200/3000 PMIC");
+MODULE_DESCRIPTION("Regulator Driver for Freescale PFUZE100/200/3000/3001 PMIC");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h
index e0ccf46f66cf..cb5aecd40f07 100644
--- a/include/linux/regulator/pfuze100.h
+++ b/include/linux/regulator/pfuze100.h
@@ -64,6 +64,17 @@
 #define PFUZE3000_VLDO3		11
 #define PFUZE3000_VLDO4		12
 
+#define PFUZE3001_SW1		0
+#define PFUZE3001_SW2		1
+#define PFUZE3001_SW3		2
+#define PFUZE3001_VSNVS		3
+#define PFUZE3001_VLDO1		4
+#define PFUZE3001_VLDO2		5
+#define PFUZE3001_VCCSD		6
+#define PFUZE3001_V33		7
+#define PFUZE3001_VLDO3		8
+#define PFUZE3001_VLDO4		9
+
 struct regulator_init_data;
 
 struct pfuze_regulator_platform_data {
-- 
cgit v1.2.3


From 7bd0c7ba62e8a9840f15fc4ff0122b29fe1b6413 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Thu, 7 Jun 2018 16:51:38 -0700
Subject: regulator: Fix typo in comment of struct regulator_linear_range

regulator_map_linar_range() => regulator_map_linear_range()

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index fc2dc8df476f..dea96ee39fdc 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -46,7 +46,7 @@ enum regulator_status {
 /**
  * struct regulator_linear_range - specify linear voltage ranges
  *
- * Specify a range of voltages for regulator_map_linar_range() and
+ * Specify a range of voltages for regulator_map_linear_range() and
  * regulator_list_linear_range().
  *
  * @min_uV:  Lowest voltage in range
-- 
cgit v1.2.3


From 3e247b0fb540a9a503a71ef6b918a3443e49655f Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 19:40:32 +0000
Subject: spi: remove unused adi_spi3.h header

include/linux/spi/adi_spi3.h is unused since commit 47838669de9d ("spi: remove blackfin related host drivers")
Finish the cleaning by removing it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/adi_spi3.h | 254 -------------------------------------------
 1 file changed, 254 deletions(-)
 delete mode 100644 include/linux/spi/adi_spi3.h

(limited to 'include')

diff --git a/include/linux/spi/adi_spi3.h b/include/linux/spi/adi_spi3.h
deleted file mode 100644
index c84123aa1d06..000000000000
--- a/include/linux/spi/adi_spi3.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Analog Devices SPI3 controller driver
- *
- * Copyright (c) 2014 Analog Devices Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef _ADI_SPI3_H_
-#define _ADI_SPI3_H_
-
-#include <linux/types.h>
-
-/* SPI_CONTROL */
-#define SPI_CTL_EN                  0x00000001    /* Enable */
-#define SPI_CTL_MSTR                0x00000002    /* Master/Slave */
-#define SPI_CTL_PSSE                0x00000004    /* controls modf error in master mode */
-#define SPI_CTL_ODM                 0x00000008    /* Open Drain Mode */
-#define SPI_CTL_CPHA                0x00000010    /* Clock Phase */
-#define SPI_CTL_CPOL                0x00000020    /* Clock Polarity */
-#define SPI_CTL_ASSEL               0x00000040    /* Slave Select Pin Control */
-#define SPI_CTL_SELST               0x00000080    /* Slave Select Polarity in-between transfers */
-#define SPI_CTL_EMISO               0x00000100    /* Enable MISO */
-#define SPI_CTL_SIZE                0x00000600    /* Word Transfer Size */
-#define SPI_CTL_SIZE08              0x00000000    /* SIZE: 8 bits */
-#define SPI_CTL_SIZE16              0x00000200    /* SIZE: 16 bits */
-#define SPI_CTL_SIZE32              0x00000400    /* SIZE: 32 bits */
-#define SPI_CTL_LSBF                0x00001000    /* LSB First */
-#define SPI_CTL_FCEN                0x00002000    /* Flow-Control Enable */
-#define SPI_CTL_FCCH                0x00004000    /* Flow-Control Channel Selection */
-#define SPI_CTL_FCPL                0x00008000    /* Flow-Control Polarity */
-#define SPI_CTL_FCWM                0x00030000    /* Flow-Control Water-Mark */
-#define SPI_CTL_FIFO0               0x00000000    /* FCWM: TFIFO empty or RFIFO Full */
-#define SPI_CTL_FIFO1               0x00010000    /* FCWM: TFIFO 75% or more empty or RFIFO 75% or more full */
-#define SPI_CTL_FIFO2               0x00020000    /* FCWM: TFIFO 50% or more empty or RFIFO 50% or more full */
-#define SPI_CTL_FMODE               0x00040000    /* Fast-mode Enable */
-#define SPI_CTL_MIOM                0x00300000    /* Multiple I/O Mode */
-#define SPI_CTL_MIO_DIS             0x00000000    /* MIOM: Disable */
-#define SPI_CTL_MIO_DUAL            0x00100000    /* MIOM: Enable DIOM (Dual I/O Mode) */
-#define SPI_CTL_MIO_QUAD            0x00200000    /* MIOM: Enable QUAD (Quad SPI Mode) */
-#define SPI_CTL_SOSI                0x00400000    /* Start on MOSI */
-/* SPI_RX_CONTROL */
-#define SPI_RXCTL_REN               0x00000001    /* Receive Channel Enable */
-#define SPI_RXCTL_RTI               0x00000004    /* Receive Transfer Initiate */
-#define SPI_RXCTL_RWCEN             0x00000008    /* Receive Word Counter Enable */
-#define SPI_RXCTL_RDR               0x00000070    /* Receive Data Request */
-#define SPI_RXCTL_RDR_DIS           0x00000000    /* RDR: Disabled */
-#define SPI_RXCTL_RDR_NE            0x00000010    /* RDR: RFIFO not empty */
-#define SPI_RXCTL_RDR_25            0x00000020    /* RDR: RFIFO 25% full */
-#define SPI_RXCTL_RDR_50            0x00000030    /* RDR: RFIFO 50% full */
-#define SPI_RXCTL_RDR_75            0x00000040    /* RDR: RFIFO 75% full */
-#define SPI_RXCTL_RDR_FULL          0x00000050    /* RDR: RFIFO full */
-#define SPI_RXCTL_RDO               0x00000100    /* Receive Data Over-Run */
-#define SPI_RXCTL_RRWM              0x00003000    /* FIFO Regular Water-Mark */
-#define SPI_RXCTL_RWM_0             0x00000000    /* RRWM: RFIFO Empty */
-#define SPI_RXCTL_RWM_25            0x00001000    /* RRWM: RFIFO 25% full */
-#define SPI_RXCTL_RWM_50            0x00002000    /* RRWM: RFIFO 50% full */
-#define SPI_RXCTL_RWM_75            0x00003000    /* RRWM: RFIFO 75% full */
-#define SPI_RXCTL_RUWM              0x00070000    /* FIFO Urgent Water-Mark */
-#define SPI_RXCTL_UWM_DIS           0x00000000    /* RUWM: Disabled */
-#define SPI_RXCTL_UWM_25            0x00010000    /* RUWM: RFIFO 25% full */
-#define SPI_RXCTL_UWM_50            0x00020000    /* RUWM: RFIFO 50% full */
-#define SPI_RXCTL_UWM_75            0x00030000    /* RUWM: RFIFO 75% full */
-#define SPI_RXCTL_UWM_FULL          0x00040000    /* RUWM: RFIFO full */
-/* SPI_TX_CONTROL */
-#define SPI_TXCTL_TEN               0x00000001    /* Transmit Channel Enable */
-#define SPI_TXCTL_TTI               0x00000004    /* Transmit Transfer Initiate */
-#define SPI_TXCTL_TWCEN             0x00000008    /* Transmit Word Counter Enable */
-#define SPI_TXCTL_TDR               0x00000070    /* Transmit Data Request */
-#define SPI_TXCTL_TDR_DIS           0x00000000    /* TDR: Disabled */
-#define SPI_TXCTL_TDR_NF            0x00000010    /* TDR: TFIFO not full */
-#define SPI_TXCTL_TDR_25            0x00000020    /* TDR: TFIFO 25% empty */
-#define SPI_TXCTL_TDR_50            0x00000030    /* TDR: TFIFO 50% empty */
-#define SPI_TXCTL_TDR_75            0x00000040    /* TDR: TFIFO 75% empty */
-#define SPI_TXCTL_TDR_EMPTY         0x00000050    /* TDR: TFIFO empty */
-#define SPI_TXCTL_TDU               0x00000100    /* Transmit Data Under-Run */
-#define SPI_TXCTL_TRWM              0x00003000    /* FIFO Regular Water-Mark */
-#define SPI_TXCTL_RWM_FULL          0x00000000    /* TRWM: TFIFO full */
-#define SPI_TXCTL_RWM_25            0x00001000    /* TRWM: TFIFO 25% empty */
-#define SPI_TXCTL_RWM_50            0x00002000    /* TRWM: TFIFO 50% empty */
-#define SPI_TXCTL_RWM_75            0x00003000    /* TRWM: TFIFO 75% empty */
-#define SPI_TXCTL_TUWM              0x00070000    /* FIFO Urgent Water-Mark */
-#define SPI_TXCTL_UWM_DIS           0x00000000    /* TUWM: Disabled */
-#define SPI_TXCTL_UWM_25            0x00010000    /* TUWM: TFIFO 25% empty */
-#define SPI_TXCTL_UWM_50            0x00020000    /* TUWM: TFIFO 50% empty */
-#define SPI_TXCTL_UWM_75            0x00030000    /* TUWM: TFIFO 75% empty */
-#define SPI_TXCTL_UWM_EMPTY         0x00040000    /* TUWM: TFIFO empty */
-/* SPI_CLOCK */
-#define SPI_CLK_BAUD                0x0000FFFF    /* Baud Rate */
-/* SPI_DELAY */
-#define SPI_DLY_STOP                0x000000FF    /* Transfer delay time in multiples of SCK period */
-#define SPI_DLY_LEADX               0x00000100    /* Extended (1 SCK) LEAD Control */
-#define SPI_DLY_LAGX                0x00000200    /* Extended (1 SCK) LAG control */
-/* SPI_SSEL */
-#define SPI_SLVSEL_SSE1             0x00000002    /* SPISSEL1 Enable */
-#define SPI_SLVSEL_SSE2             0x00000004    /* SPISSEL2 Enable */
-#define SPI_SLVSEL_SSE3             0x00000008    /* SPISSEL3 Enable */
-#define SPI_SLVSEL_SSE4             0x00000010    /* SPISSEL4 Enable */
-#define SPI_SLVSEL_SSE5             0x00000020    /* SPISSEL5 Enable */
-#define SPI_SLVSEL_SSE6             0x00000040    /* SPISSEL6 Enable */
-#define SPI_SLVSEL_SSE7             0x00000080    /* SPISSEL7 Enable */
-#define SPI_SLVSEL_SSEL1            0x00000200    /* SPISSEL1 Value */
-#define SPI_SLVSEL_SSEL2            0x00000400    /* SPISSEL2 Value */
-#define SPI_SLVSEL_SSEL3            0x00000800    /* SPISSEL3 Value */
-#define SPI_SLVSEL_SSEL4            0x00001000    /* SPISSEL4 Value */
-#define SPI_SLVSEL_SSEL5            0x00002000    /* SPISSEL5 Value */
-#define SPI_SLVSEL_SSEL6            0x00004000    /* SPISSEL6 Value */
-#define SPI_SLVSEL_SSEL7            0x00008000    /* SPISSEL7 Value */
-/* SPI_RWC */
-#define SPI_RWC_VALUE               0x0000FFFF    /* Received Word-Count */
-/* SPI_RWCR */
-#define SPI_RWCR_VALUE              0x0000FFFF    /* Received Word-Count Reload */
-/* SPI_TWC */
-#define SPI_TWC_VALUE               0x0000FFFF    /* Transmitted Word-Count */
-/* SPI_TWCR */
-#define SPI_TWCR_VALUE              0x0000FFFF    /* Transmitted Word-Count Reload */
-/* SPI_IMASK */
-#define SPI_IMSK_RUWM               0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_TUWM               0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_ROM                0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_TUM                0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_TCM                0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_MFM                0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_RSM                0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_TSM                0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_RFM                0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_TFM                0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_IMASKCL */
-#define SPI_IMSK_CLR_RUW            0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_CLR_TUWM           0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_CLR_ROM            0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_CLR_TUM            0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_CLR_TCM            0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_CLR_MFM            0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_CLR_RSM            0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_CLR_TSM            0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_CLR_RFM            0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_CLR_TFM            0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_IMASKST */
-#define SPI_IMSK_SET_RUWM           0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_SET_TUWM           0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_SET_ROM            0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_SET_TUM            0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_SET_TCM            0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_SET_MFM            0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_SET_RSM            0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_SET_TSM            0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_SET_RFM            0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_SET_TFM            0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_STATUS */
-#define SPI_STAT_SPIF               0x00000001    /* SPI Finished */
-#define SPI_STAT_RUWM               0x00000002    /* Receive Urgent Water-Mark Breached */
-#define SPI_STAT_TUWM               0x00000004    /* Transmit Urgent Water-Mark Breached */
-#define SPI_STAT_ROE                0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_STAT_TUE                0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_STAT_TCE                0x00000040    /* Transmit Collision Error Indication */
-#define SPI_STAT_MODF               0x00000080    /* Mode Fault Error Indication */
-#define SPI_STAT_RS                 0x00000100    /* Receive Start Indication */
-#define SPI_STAT_TS                 0x00000200    /* Transmit Start Indication */
-#define SPI_STAT_RF                 0x00000400    /* Receive Finish Indication */
-#define SPI_STAT_TF                 0x00000800    /* Transmit Finish Indication */
-#define SPI_STAT_RFS                0x00007000    /* SPI_RFIFO status */
-#define SPI_STAT_RFIFO_EMPTY        0x00000000    /* RFS: RFIFO Empty */
-#define SPI_STAT_RFIFO_25           0x00001000    /* RFS: RFIFO 25% Full */
-#define SPI_STAT_RFIFO_50           0x00002000    /* RFS: RFIFO 50% Full */
-#define SPI_STAT_RFIFO_75           0x00003000    /* RFS: RFIFO 75% Full */
-#define SPI_STAT_RFIFO_FULL         0x00004000    /* RFS: RFIFO Full */
-#define SPI_STAT_TFS                0x00070000    /* SPI_TFIFO status */
-#define SPI_STAT_TFIFO_FULL         0x00000000    /* TFS: TFIFO full */
-#define SPI_STAT_TFIFO_25           0x00010000    /* TFS: TFIFO 25% empty */
-#define SPI_STAT_TFIFO_50           0x00020000    /* TFS: TFIFO 50% empty */
-#define SPI_STAT_TFIFO_75           0x00030000    /* TFS: TFIFO 75% empty */
-#define SPI_STAT_TFIFO_EMPTY        0x00040000    /* TFS: TFIFO empty */
-#define SPI_STAT_FCS                0x00100000    /* Flow-Control Stall Indication */
-#define SPI_STAT_RFE                0x00400000    /* SPI_RFIFO Empty */
-#define SPI_STAT_TFF                0x00800000    /* SPI_TFIFO Full */
-/* SPI_ILAT */
-#define SPI_ILAT_RUWMI              0x00000002    /* Receive Urgent Water Mark Interrupt */
-#define SPI_ILAT_TUWMI              0x00000004    /* Transmit Urgent Water Mark Interrupt */
-#define SPI_ILAT_ROI                0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_ILAT_TUI                0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_ILAT_TCI                0x00000040    /* Transmit Collision Error Indication */
-#define SPI_ILAT_MFI                0x00000080    /* Mode Fault Error Indication */
-#define SPI_ILAT_RSI                0x00000100    /* Receive Start Indication */
-#define SPI_ILAT_TSI                0x00000200    /* Transmit Start Indication */
-#define SPI_ILAT_RFI                0x00000400    /* Receive Finish Indication */
-#define SPI_ILAT_TFI                0x00000800    /* Transmit Finish Indication */
-/* SPI_ILATCL */
-#define SPI_ILAT_CLR_RUWMI          0x00000002    /* Receive Urgent Water Mark Interrupt */
-#define SPI_ILAT_CLR_TUWMI          0x00000004    /* Transmit Urgent Water Mark Interrupt */
-#define SPI_ILAT_CLR_ROI            0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_ILAT_CLR_TUI            0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_ILAT_CLR_TCI            0x00000040    /* Transmit Collision Error Indication */
-#define SPI_ILAT_CLR_MFI            0x00000080    /* Mode Fault Error Indication */
-#define SPI_ILAT_CLR_RSI            0x00000100    /* Receive Start Indication */
-#define SPI_ILAT_CLR_TSI            0x00000200    /* Transmit Start Indication */
-#define SPI_ILAT_CLR_RFI            0x00000400    /* Receive Finish Indication */
-#define SPI_ILAT_CLR_TFI            0x00000800    /* Transmit Finish Indication */
-
-/*
- * adi spi3 registers layout
- */
-struct adi_spi_regs {
-	u32 revid;
-	u32 control;
-	u32 rx_control;
-	u32 tx_control;
-	u32 clock;
-	u32 delay;
-	u32 ssel;
-	u32 rwc;
-	u32 rwcr;
-	u32 twc;
-	u32 twcr;
-	u32 reserved0;
-	u32 emask;
-	u32 emaskcl;
-	u32 emaskst;
-	u32 reserved1;
-	u32 status;
-	u32 elat;
-	u32 elatcl;
-	u32 reserved2;
-	u32 rfifo;
-	u32 reserved3;
-	u32 tfifo;
-};
-
-#define MAX_CTRL_CS          8  /* cs in spi controller */
-
-/* device.platform_data for SSP controller devices */
-struct adi_spi3_master {
-	u16 num_chipselect;
-	u16 pin_req[7];
-};
-
-/* spi_board_info.controller_data for SPI slave devices,
- * copied to spi_device.platform_data ... mostly for dma tuning
- */
-struct adi_spi3_chip {
-	u32 control;
-	u16 cs_chg_udelay; /* Some devices require 16-bit delays */
-	u32 tx_dummy_val; /* tx value for rx only transfer */
-	bool enable_dma;
-};
-
-#endif /* _ADI_SPI3_H_ */
-- 
cgit v1.2.3


From 62c2c9fcac4341d306dda4cf400b77e7e124480a Mon Sep 17 00:00:00 2001
From: Katsuhiro Suzuki <suzuki.katsuhiro@socionext.com>
Date: Mon, 11 Jun 2018 17:32:12 +0900
Subject: ASoC: simple-card-utils: move hp and mic detect gpios from
 simple-card

This patch moves headphone and microphone jack detection gpios from
simple-card driver. It is preparing for using this feature from other
drivers.

Signed-off-by: Katsuhiro Suzuki <suzuki.katsuhiro@socionext.com>
Acked-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card_utils.h     | 15 ++++++++
 sound/soc/generic/simple-card-utils.c | 59 ++++++++++++++++++++++++++++++++
 sound/soc/generic/simple-card.c       | 64 -----------------------------------
 3 files changed, 74 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index 7e25afce6566..f82acef3b992 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -12,6 +12,11 @@
 
 #include <sound/soc.h>
 
+#define asoc_simple_card_init_hp(card, sjack, prefix) \
+	asoc_simple_card_init_jack(card, sjack, 1, prefix)
+#define asoc_simple_card_init_mic(card, sjack, prefix) \
+	asoc_simple_card_init_jack(card, sjack, 0, prefix)
+
 struct asoc_simple_dai {
 	const char *name;
 	unsigned int sysclk;
@@ -28,6 +33,12 @@ struct asoc_simple_card_data {
 	u32 convert_channels;
 };
 
+struct asoc_simple_jack {
+	struct snd_soc_jack jack;
+	struct snd_soc_jack_pin pin;
+	struct snd_soc_jack_gpio gpio;
+};
+
 int asoc_simple_card_parse_daifmt(struct device *dev,
 				  struct device_node *node,
 				  struct device_node *codec,
@@ -107,4 +118,8 @@ int asoc_simple_card_of_parse_routing(struct snd_soc_card *card,
 int asoc_simple_card_of_parse_widgets(struct snd_soc_card *card,
 				      char *prefix);
 
+int asoc_simple_card_init_jack(struct snd_soc_card *card,
+			       struct asoc_simple_jack *sjack,
+			       int is_hp, char *prefix);
+
 #endif /* __SIMPLE_CARD_UTILS_H */
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 3751a07de6aa..4398c9580929 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -8,9 +8,13 @@
  * published by the Free Software Foundation.
  */
 #include <linux/clk.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_gpio.h>
 #include <linux/of_graph.h>
+#include <sound/jack.h>
 #include <sound/simple_card_utils.h>
 
 void asoc_simple_card_convert_fixup(struct asoc_simple_card_data *data,
@@ -419,6 +423,61 @@ int asoc_simple_card_of_parse_widgets(struct snd_soc_card *card,
 }
 EXPORT_SYMBOL_GPL(asoc_simple_card_of_parse_widgets);
 
+int asoc_simple_card_init_jack(struct snd_soc_card *card,
+			       struct asoc_simple_jack *sjack,
+			       int is_hp, char *prefix)
+{
+	struct device *dev = card->dev;
+	enum of_gpio_flags flags;
+	char prop[128];
+	char *pin_name;
+	char *gpio_name;
+	int mask;
+	int det;
+
+	if (!prefix)
+		prefix = "";
+
+	sjack->gpio.gpio = -ENOENT;
+
+	if (is_hp) {
+		snprintf(prop, sizeof(prop), "%shp-det-gpio", prefix);
+		pin_name	= "Headphones";
+		gpio_name	= "Headphone detection";
+		mask		= SND_JACK_HEADPHONE;
+	} else {
+		snprintf(prop, sizeof(prop), "%smic-det-gpio", prefix);
+		pin_name	= "Mic Jack";
+		gpio_name	= "Mic detection";
+		mask		= SND_JACK_MICROPHONE;
+	}
+
+	det = of_get_named_gpio_flags(dev->of_node, prop, 0, &flags);
+	if (det == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
+	if (gpio_is_valid(det)) {
+		sjack->pin.pin		= pin_name;
+		sjack->pin.mask		= mask;
+
+		sjack->gpio.name	= gpio_name;
+		sjack->gpio.report	= mask;
+		sjack->gpio.gpio	= det;
+		sjack->gpio.invert	= !!(flags & OF_GPIO_ACTIVE_LOW);
+		sjack->gpio.debounce_time = 150;
+
+		snd_soc_card_jack_new(card, pin_name, mask,
+				      &sjack->jack,
+				      &sjack->pin, 1);
+
+		snd_soc_jack_add_gpios(&sjack->jack, 1,
+				       &sjack->gpio);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(asoc_simple_card_init_jack);
+
 /* Module information */
 MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
 MODULE_DESCRIPTION("ALSA SoC Simple Card Utils");
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 8b374af86a6e..a6477a022156 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -10,23 +10,14 @@
  */
 #include <linux/clk.h>
 #include <linux/device.h>
-#include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/platform_device.h>
 #include <linux/string.h>
-#include <sound/jack.h>
 #include <sound/simple_card.h>
 #include <sound/soc-dai.h>
 #include <sound/soc.h>
 
-struct asoc_simple_jack {
-	struct snd_soc_jack jack;
-	struct snd_soc_jack_pin pin;
-	struct snd_soc_jack_gpio gpio;
-};
-
 struct simple_card_data {
 	struct snd_soc_card snd_card;
 	struct simple_dai_props {
@@ -49,61 +40,6 @@ struct simple_card_data {
 #define CELL	"#sound-dai-cells"
 #define PREFIX	"simple-audio-card,"
 
-#define asoc_simple_card_init_hp(card, sjack, prefix)\
-	asoc_simple_card_init_jack(card, sjack, 1, prefix)
-#define asoc_simple_card_init_mic(card, sjack, prefix)\
-	asoc_simple_card_init_jack(card, sjack, 0, prefix)
-static int asoc_simple_card_init_jack(struct snd_soc_card *card,
-				      struct asoc_simple_jack *sjack,
-				      int is_hp, char *prefix)
-{
-	struct device *dev = card->dev;
-	enum of_gpio_flags flags;
-	char prop[128];
-	char *pin_name;
-	char *gpio_name;
-	int mask;
-	int det;
-
-	sjack->gpio.gpio = -ENOENT;
-
-	if (is_hp) {
-		snprintf(prop, sizeof(prop), "%shp-det-gpio", prefix);
-		pin_name	= "Headphones";
-		gpio_name	= "Headphone detection";
-		mask		= SND_JACK_HEADPHONE;
-	} else {
-		snprintf(prop, sizeof(prop), "%smic-det-gpio", prefix);
-		pin_name	= "Mic Jack";
-		gpio_name	= "Mic detection";
-		mask		= SND_JACK_MICROPHONE;
-	}
-
-	det = of_get_named_gpio_flags(dev->of_node, prop, 0, &flags);
-	if (det == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
-
-	if (gpio_is_valid(det)) {
-		sjack->pin.pin		= pin_name;
-		sjack->pin.mask		= mask;
-
-		sjack->gpio.name	= gpio_name;
-		sjack->gpio.report	= mask;
-		sjack->gpio.gpio	= det;
-		sjack->gpio.invert	= !!(flags & OF_GPIO_ACTIVE_LOW);
-		sjack->gpio.debounce_time = 150;
-
-		snd_soc_card_jack_new(card, pin_name, mask,
-				      &sjack->jack,
-				      &sjack->pin, 1);
-
-		snd_soc_jack_add_gpios(&sjack->jack, 1,
-				       &sjack->gpio);
-	}
-
-	return 0;
-}
-
 static int asoc_simple_card_startup(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-- 
cgit v1.2.3


From 0ddce71c21f03fd19867c4939d3ca710f37cdf1a Mon Sep 17 00:00:00 2001
From: Bard Liao <bardliao@realtek.com>
Date: Thu, 7 Jun 2018 16:37:38 +0800
Subject: ASoC: rt5682: add rt5682 codec driver

This is the initial codec driver for rt5682.

Signed-off-by: Bard Liao <bardliao@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/sound/rt5682.txt |   50 +
 include/sound/rt5682.h                             |   40 +
 sound/soc/codecs/Kconfig                           |    6 +
 sound/soc/codecs/Makefile                          |    2 +
 sound/soc/codecs/rt5682.c                          | 2682 ++++++++++++++++++++
 sound/soc/codecs/rt5682.h                          | 1324 ++++++++++
 6 files changed, 4104 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/rt5682.txt
 create mode 100644 include/sound/rt5682.h
 create mode 100644 sound/soc/codecs/rt5682.c
 create mode 100644 sound/soc/codecs/rt5682.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/sound/rt5682.txt b/Documentation/devicetree/bindings/sound/rt5682.txt
new file mode 100644
index 000000000000..312e9a129530
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rt5682.txt
@@ -0,0 +1,50 @@
+RT5682 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : "realtek,rt5682" or "realtek,rt5682i"
+
+- reg : The I2C address of the device.
+
+Optional properties:
+
+- interrupts : The CODEC's interrupt output.
+
+- realtek,dmic1-data-pin
+  0: dmic1 is not used
+  1: using GPIO2 pin as dmic1 data pin
+  2: using GPIO5 pin as dmic1 data pin
+
+- realtek,dmic1-clk-pin
+  0: using GPIO1 pin as dmic1 clock pin
+  1: using GPIO3 pin as dmic1 clock pin
+
+- realtek,jd-src
+  0: No JD is used
+  1: using JD1 as JD source
+
+- realtek,ldo1-en-gpios : The GPIO that controls the CODEC's LDO1_EN pin.
+
+Pins on the device (for linking into audio routes) for RT5682:
+
+  * DMIC L1
+  * DMIC R1
+  * IN1P
+  * HPOL
+  * HPOR
+
+Example:
+
+rt5682 {
+	compatible = "realtek,rt5682i";
+	reg = <0x1a>;
+	interrupt-parent = <&gpio>;
+	interrupts = <TEGRA_GPIO(U, 6) GPIO_ACTIVE_HIGH>;
+	realtek,ldo1-en-gpios =
+		<&gpio TEGRA_GPIO(R, 2) GPIO_ACTIVE_HIGH>;
+	realtek,dmic1-data-pin = <1>;
+	realtek,dmic1-clk-pin = <1>;
+	realtek,jd-src = <1>;
+};
diff --git a/include/sound/rt5682.h b/include/sound/rt5682.h
new file mode 100644
index 000000000000..0251797ab438
--- /dev/null
+++ b/include/sound/rt5682.h
@@ -0,0 +1,40 @@
+/*
+ * linux/sound/rt5682.h -- Platform data for RT5682
+ *
+ * Copyright 2018 Realtek Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_SND_RT5682_H
+#define __LINUX_SND_RT5682_H
+
+enum rt5682_dmic1_data_pin {
+	RT5682_DMIC1_NULL,
+	RT5682_DMIC1_DATA_GPIO2,
+	RT5682_DMIC1_DATA_GPIO5,
+};
+
+enum rt5682_dmic1_clk_pin {
+	RT5682_DMIC1_CLK_GPIO1,
+	RT5682_DMIC1_CLK_GPIO3,
+};
+
+enum rt5682_jd_src {
+	RT5682_JD_NULL,
+	RT5682_JD1,
+};
+
+struct rt5682_platform_data {
+
+	int ldo1_en; /* GPIO for LDO1_EN */
+
+	enum rt5682_dmic1_data_pin dmic1_data_pin;
+	enum rt5682_dmic1_clk_pin dmic1_clk_pin;
+	enum rt5682_jd_src jd_src;
+};
+
+#endif
+
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 63cf62e9c9aa..f6b8d4bf8796 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -141,6 +141,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_RT5668 if I2C
 	select SND_SOC_RT5670 if I2C
 	select SND_SOC_RT5677 if I2C && SPI_MASTER
+	select SND_SOC_RT5682 if I2C
 	select SND_SOC_SGTL5000 if I2C
 	select SND_SOC_SI476X if MFD_SI476X_CORE
 	select SND_SOC_SIRF_AUDIO_CODEC
@@ -778,6 +779,7 @@ config SND_SOC_RL6231
 	default y if SND_SOC_RT5668=y
 	default y if SND_SOC_RT5670=y
 	default y if SND_SOC_RT5677=y
+	default y if SND_SOC_RT5682=y
 	default y if SND_SOC_RT1305=y
 	default m if SND_SOC_RT5514=m
 	default m if SND_SOC_RT5616=m
@@ -791,6 +793,7 @@ config SND_SOC_RL6231
 	default m if SND_SOC_RT5668=m
 	default m if SND_SOC_RT5670=m
 	default m if SND_SOC_RT5677=m
+	default m if SND_SOC_RT5682=m
 	default m if SND_SOC_RT1305=m
 
 config SND_SOC_RL6347A
@@ -871,6 +874,9 @@ config SND_SOC_RT5677_SPI
 	tristate
 	default SND_SOC_RT5677 && SPI
 
+config SND_SOC_RT5682
+	tristate
+
 #Freescale sgtl5000 codec
 config SND_SOC_SGTL5000
 	tristate "Freescale SGTL5000 CODEC"
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index e023fdf85221..e43d99a039d3 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -146,6 +146,7 @@ snd-soc-rt5668-objs := rt5668.o
 snd-soc-rt5670-objs := rt5670.o
 snd-soc-rt5677-objs := rt5677.o
 snd-soc-rt5677-spi-objs := rt5677-spi.o
+snd-soc-rt5682-objs := rt5682.o
 snd-soc-sgtl5000-objs := sgtl5000.o
 snd-soc-alc5623-objs := alc5623.o
 snd-soc-alc5632-objs := alc5632.o
@@ -405,6 +406,7 @@ obj-$(CONFIG_SND_SOC_RT5668)	+= snd-soc-rt5668.o
 obj-$(CONFIG_SND_SOC_RT5670)	+= snd-soc-rt5670.o
 obj-$(CONFIG_SND_SOC_RT5677)	+= snd-soc-rt5677.o
 obj-$(CONFIG_SND_SOC_RT5677_SPI)	+= snd-soc-rt5677-spi.o
+obj-$(CONFIG_SND_SOC_RT5682)	+= snd-soc-rt5682.o
 obj-$(CONFIG_SND_SOC_SGTL5000)  += snd-soc-sgtl5000.o
 obj-$(CONFIG_SND_SOC_SIGMADSP)	+= snd-soc-sigmadsp.o
 obj-$(CONFIG_SND_SOC_SIGMADSP_I2C)	+= snd-soc-sigmadsp-i2c.o
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
new file mode 100644
index 000000000000..61a97301bcfa
--- /dev/null
+++ b/sound/soc/codecs/rt5682.c
@@ -0,0 +1,2682 @@
+/*
+ * rt5682.c  --  RT5682 ALSA SoC audio component driver
+ *
+ * Copyright 2018 Realtek Semiconductor Corp.
+ * Author: Bard Liao <bardliao@realtek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/acpi.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/regulator/consumer.h>
+#include <linux/mutex.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/jack.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+#include <sound/rt5682.h>
+
+#include "rl6231.h"
+#include "rt5682.h"
+
+#define RT5682_NUM_SUPPLIES 3
+
+static const char *rt5682_supply_names[RT5682_NUM_SUPPLIES] = {
+	"AVDD",
+	"MICVDD",
+	"VBAT",
+};
+
+struct rt5682_priv {
+	struct snd_soc_component *component;
+	struct rt5682_platform_data pdata;
+	struct regmap *regmap;
+	struct snd_soc_jack *hs_jack;
+	struct regulator_bulk_data supplies[RT5682_NUM_SUPPLIES];
+	struct delayed_work jack_detect_work;
+	struct delayed_work jd_check_work;
+	struct mutex calibrate_mutex;
+
+	int sysclk;
+	int sysclk_src;
+	int lrck[RT5682_AIFS];
+	int bclk[RT5682_AIFS];
+	int master[RT5682_AIFS];
+
+	int pll_src;
+	int pll_in;
+	int pll_out;
+
+	int jack_type;
+};
+
+static const struct reg_sequence patch_list[] = {
+	{0x01c1, 0x1000},
+};
+
+static const struct reg_default rt5682_reg[] = {
+	{0x0002, 0x8080},
+	{0x0003, 0x8000},
+	{0x0005, 0x0000},
+	{0x0006, 0x0000},
+	{0x0008, 0x800f},
+	{0x000b, 0x0000},
+	{0x0010, 0x4040},
+	{0x0011, 0x0000},
+	{0x0012, 0x1404},
+	{0x0013, 0x1000},
+	{0x0014, 0xa00a},
+	{0x0015, 0x0404},
+	{0x0016, 0x0404},
+	{0x0019, 0xafaf},
+	{0x001c, 0x2f2f},
+	{0x001f, 0x0000},
+	{0x0022, 0x5757},
+	{0x0023, 0x0039},
+	{0x0024, 0x000b},
+	{0x0026, 0xc0c4},
+	{0x0029, 0x8080},
+	{0x002a, 0xa0a0},
+	{0x002b, 0x0300},
+	{0x0030, 0x0000},
+	{0x003c, 0x0080},
+	{0x0044, 0x0c0c},
+	{0x0049, 0x0000},
+	{0x0061, 0x0000},
+	{0x0062, 0x0000},
+	{0x0063, 0x003f},
+	{0x0064, 0x0000},
+	{0x0065, 0x0000},
+	{0x0066, 0x0030},
+	{0x0067, 0x0000},
+	{0x006b, 0x0000},
+	{0x006c, 0x0000},
+	{0x006d, 0x2200},
+	{0x006e, 0x0a10},
+	{0x0070, 0x8000},
+	{0x0071, 0x8000},
+	{0x0073, 0x0000},
+	{0x0074, 0x0000},
+	{0x0075, 0x0002},
+	{0x0076, 0x0001},
+	{0x0079, 0x0000},
+	{0x007a, 0x0000},
+	{0x007b, 0x0000},
+	{0x007c, 0x0100},
+	{0x007e, 0x0000},
+	{0x0080, 0x0000},
+	{0x0081, 0x0000},
+	{0x0082, 0x0000},
+	{0x0083, 0x0000},
+	{0x0084, 0x0000},
+	{0x0085, 0x0000},
+	{0x0086, 0x0005},
+	{0x0087, 0x0000},
+	{0x0088, 0x0000},
+	{0x008c, 0x0003},
+	{0x008d, 0x0000},
+	{0x008e, 0x0060},
+	{0x008f, 0x1000},
+	{0x0091, 0x0c26},
+	{0x0092, 0x0073},
+	{0x0093, 0x0000},
+	{0x0094, 0x0080},
+	{0x0098, 0x0000},
+	{0x009a, 0x0000},
+	{0x009b, 0x0000},
+	{0x009c, 0x0000},
+	{0x009d, 0x0000},
+	{0x009e, 0x100c},
+	{0x009f, 0x0000},
+	{0x00a0, 0x0000},
+	{0x00a3, 0x0002},
+	{0x00a4, 0x0001},
+	{0x00ae, 0x2040},
+	{0x00af, 0x0000},
+	{0x00b6, 0x0000},
+	{0x00b7, 0x0000},
+	{0x00b8, 0x0000},
+	{0x00b9, 0x0002},
+	{0x00be, 0x0000},
+	{0x00c0, 0x0160},
+	{0x00c1, 0x82a0},
+	{0x00c2, 0x0000},
+	{0x00d0, 0x0000},
+	{0x00d1, 0x2244},
+	{0x00d2, 0x3300},
+	{0x00d3, 0x2200},
+	{0x00d4, 0x0000},
+	{0x00d9, 0x0009},
+	{0x00da, 0x0000},
+	{0x00db, 0x0000},
+	{0x00dc, 0x00c0},
+	{0x00dd, 0x2220},
+	{0x00de, 0x3131},
+	{0x00df, 0x3131},
+	{0x00e0, 0x3131},
+	{0x00e2, 0x0000},
+	{0x00e3, 0x4000},
+	{0x00e4, 0x0aa0},
+	{0x00e5, 0x3131},
+	{0x00e6, 0x3131},
+	{0x00e7, 0x3131},
+	{0x00e8, 0x3131},
+	{0x00ea, 0xb320},
+	{0x00eb, 0x0000},
+	{0x00f0, 0x0000},
+	{0x00f1, 0x00d0},
+	{0x00f2, 0x00d0},
+	{0x00f6, 0x0000},
+	{0x00fa, 0x0000},
+	{0x00fb, 0x0000},
+	{0x00fc, 0x0000},
+	{0x00fd, 0x0000},
+	{0x00fe, 0x10ec},
+	{0x00ff, 0x6530},
+	{0x0100, 0xa0a0},
+	{0x010b, 0x0000},
+	{0x010c, 0xae00},
+	{0x010d, 0xaaa0},
+	{0x010e, 0x8aa2},
+	{0x010f, 0x02a2},
+	{0x0110, 0xc000},
+	{0x0111, 0x04a2},
+	{0x0112, 0x2800},
+	{0x0113, 0x0000},
+	{0x0117, 0x0100},
+	{0x0125, 0x0410},
+	{0x0132, 0x6026},
+	{0x0136, 0x5555},
+	{0x0138, 0x3700},
+	{0x013a, 0x2000},
+	{0x013b, 0x2000},
+	{0x013c, 0x2005},
+	{0x013f, 0x0000},
+	{0x0142, 0x0000},
+	{0x0145, 0x0002},
+	{0x0146, 0x0000},
+	{0x0147, 0x0000},
+	{0x0148, 0x0000},
+	{0x0149, 0x0000},
+	{0x0150, 0x79a1},
+	{0x0151, 0x0000},
+	{0x0160, 0x4ec0},
+	{0x0161, 0x0080},
+	{0x0162, 0x0200},
+	{0x0163, 0x0800},
+	{0x0164, 0x0000},
+	{0x0165, 0x0000},
+	{0x0166, 0x0000},
+	{0x0167, 0x000f},
+	{0x0168, 0x000f},
+	{0x0169, 0x0021},
+	{0x0190, 0x413d},
+	{0x0194, 0x0000},
+	{0x0195, 0x0000},
+	{0x0197, 0x0022},
+	{0x0198, 0x0000},
+	{0x0199, 0x0000},
+	{0x01af, 0x0000},
+	{0x01b0, 0x0400},
+	{0x01b1, 0x0000},
+	{0x01b2, 0x0000},
+	{0x01b3, 0x0000},
+	{0x01b4, 0x0000},
+	{0x01b5, 0x0000},
+	{0x01b6, 0x01c3},
+	{0x01b7, 0x02a0},
+	{0x01b8, 0x03e9},
+	{0x01b9, 0x1389},
+	{0x01ba, 0xc351},
+	{0x01bb, 0x0009},
+	{0x01bc, 0x0018},
+	{0x01bd, 0x002a},
+	{0x01be, 0x004c},
+	{0x01bf, 0x0097},
+	{0x01c0, 0x433d},
+	{0x01c2, 0x0000},
+	{0x01c3, 0x0000},
+	{0x01c4, 0x0000},
+	{0x01c5, 0x0000},
+	{0x01c6, 0x0000},
+	{0x01c7, 0x0000},
+	{0x01c8, 0x40af},
+	{0x01c9, 0x0702},
+	{0x01ca, 0x0000},
+	{0x01cb, 0x0000},
+	{0x01cc, 0x5757},
+	{0x01cd, 0x5757},
+	{0x01ce, 0x5757},
+	{0x01cf, 0x5757},
+	{0x01d0, 0x5757},
+	{0x01d1, 0x5757},
+	{0x01d2, 0x5757},
+	{0x01d3, 0x5757},
+	{0x01d4, 0x5757},
+	{0x01d5, 0x5757},
+	{0x01d6, 0x0000},
+	{0x01d7, 0x0008},
+	{0x01d8, 0x0029},
+	{0x01d9, 0x3333},
+	{0x01da, 0x0000},
+	{0x01db, 0x0004},
+	{0x01dc, 0x0000},
+	{0x01de, 0x7c00},
+	{0x01df, 0x0320},
+	{0x01e0, 0x06a1},
+	{0x01e1, 0x0000},
+	{0x01e2, 0x0000},
+	{0x01e3, 0x0000},
+	{0x01e4, 0x0000},
+	{0x01e6, 0x0001},
+	{0x01e7, 0x0000},
+	{0x01e8, 0x0000},
+	{0x01ea, 0x0000},
+	{0x01eb, 0x0000},
+	{0x01ec, 0x0000},
+	{0x01ed, 0x0000},
+	{0x01ee, 0x0000},
+	{0x01ef, 0x0000},
+	{0x01f0, 0x0000},
+	{0x01f1, 0x0000},
+	{0x01f2, 0x0000},
+	{0x01f3, 0x0000},
+	{0x01f4, 0x0000},
+	{0x0210, 0x6297},
+	{0x0211, 0xa005},
+	{0x0212, 0x824c},
+	{0x0213, 0xf7ff},
+	{0x0214, 0xf24c},
+	{0x0215, 0x0102},
+	{0x0216, 0x00a3},
+	{0x0217, 0x0048},
+	{0x0218, 0xa2c0},
+	{0x0219, 0x0400},
+	{0x021a, 0x00c8},
+	{0x021b, 0x00c0},
+	{0x021c, 0x0000},
+	{0x0250, 0x4500},
+	{0x0251, 0x40b3},
+	{0x0252, 0x0000},
+	{0x0253, 0x0000},
+	{0x0254, 0x0000},
+	{0x0255, 0x0000},
+	{0x0256, 0x0000},
+	{0x0257, 0x0000},
+	{0x0258, 0x0000},
+	{0x0259, 0x0000},
+	{0x025a, 0x0005},
+	{0x0270, 0x0000},
+	{0x02ff, 0x0110},
+	{0x0300, 0x001f},
+	{0x0301, 0x032c},
+	{0x0302, 0x5f21},
+	{0x0303, 0x4000},
+	{0x0304, 0x4000},
+	{0x0305, 0x06d5},
+	{0x0306, 0x8000},
+	{0x0307, 0x0700},
+	{0x0310, 0x4560},
+	{0x0311, 0xa4a8},
+	{0x0312, 0x7418},
+	{0x0313, 0x0000},
+	{0x0314, 0x0006},
+	{0x0315, 0xffff},
+	{0x0316, 0xc400},
+	{0x0317, 0x0000},
+	{0x03c0, 0x7e00},
+	{0x03c1, 0x8000},
+	{0x03c2, 0x8000},
+	{0x03c3, 0x8000},
+	{0x03c4, 0x8000},
+	{0x03c5, 0x8000},
+	{0x03c6, 0x8000},
+	{0x03c7, 0x8000},
+	{0x03c8, 0x8000},
+	{0x03c9, 0x8000},
+	{0x03ca, 0x8000},
+	{0x03cb, 0x8000},
+	{0x03cc, 0x8000},
+	{0x03d0, 0x0000},
+	{0x03d1, 0x0000},
+	{0x03d2, 0x0000},
+	{0x03d3, 0x0000},
+	{0x03d4, 0x2000},
+	{0x03d5, 0x2000},
+	{0x03d6, 0x0000},
+	{0x03d7, 0x0000},
+	{0x03d8, 0x2000},
+	{0x03d9, 0x2000},
+	{0x03da, 0x2000},
+	{0x03db, 0x2000},
+	{0x03dc, 0x0000},
+	{0x03dd, 0x0000},
+	{0x03de, 0x0000},
+	{0x03df, 0x2000},
+	{0x03e0, 0x0000},
+	{0x03e1, 0x0000},
+	{0x03e2, 0x0000},
+	{0x03e3, 0x0000},
+	{0x03e4, 0x0000},
+	{0x03e5, 0x0000},
+	{0x03e6, 0x0000},
+	{0x03e7, 0x0000},
+	{0x03e8, 0x0000},
+	{0x03e9, 0x0000},
+	{0x03ea, 0x0000},
+	{0x03eb, 0x0000},
+	{0x03ec, 0x0000},
+	{0x03ed, 0x0000},
+	{0x03ee, 0x0000},
+	{0x03ef, 0x0000},
+	{0x03f0, 0x0800},
+	{0x03f1, 0x0800},
+	{0x03f2, 0x0800},
+	{0x03f3, 0x0800},
+};
+
+static bool rt5682_volatile_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case RT5682_RESET:
+	case RT5682_CBJ_CTRL_2:
+	case RT5682_INT_ST_1:
+	case RT5682_4BTN_IL_CMD_1:
+	case RT5682_AJD1_CTRL:
+	case RT5682_HP_CALIB_CTRL_1:
+	case RT5682_DEVICE_ID:
+	case RT5682_I2C_MODE:
+	case RT5682_HP_CALIB_CTRL_10:
+	case RT5682_EFUSE_CTRL_2:
+	case RT5682_JD_TOP_VC_VTRL:
+	case RT5682_HP_IMP_SENS_CTRL_19:
+	case RT5682_IL_CMD_1:
+	case RT5682_SAR_IL_CMD_2:
+	case RT5682_SAR_IL_CMD_4:
+	case RT5682_SAR_IL_CMD_10:
+	case RT5682_SAR_IL_CMD_11:
+	case RT5682_EFUSE_CTRL_6...RT5682_EFUSE_CTRL_11:
+	case RT5682_HP_CALIB_STA_1...RT5682_HP_CALIB_STA_11:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rt5682_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case RT5682_RESET:
+	case RT5682_VERSION_ID:
+	case RT5682_VENDOR_ID:
+	case RT5682_DEVICE_ID:
+	case RT5682_HP_CTRL_1:
+	case RT5682_HP_CTRL_2:
+	case RT5682_HPL_GAIN:
+	case RT5682_HPR_GAIN:
+	case RT5682_I2C_CTRL:
+	case RT5682_CBJ_BST_CTRL:
+	case RT5682_CBJ_CTRL_1:
+	case RT5682_CBJ_CTRL_2:
+	case RT5682_CBJ_CTRL_3:
+	case RT5682_CBJ_CTRL_4:
+	case RT5682_CBJ_CTRL_5:
+	case RT5682_CBJ_CTRL_6:
+	case RT5682_CBJ_CTRL_7:
+	case RT5682_DAC1_DIG_VOL:
+	case RT5682_STO1_ADC_DIG_VOL:
+	case RT5682_STO1_ADC_BOOST:
+	case RT5682_HP_IMP_GAIN_1:
+	case RT5682_HP_IMP_GAIN_2:
+	case RT5682_SIDETONE_CTRL:
+	case RT5682_STO1_ADC_MIXER:
+	case RT5682_AD_DA_MIXER:
+	case RT5682_STO1_DAC_MIXER:
+	case RT5682_A_DAC1_MUX:
+	case RT5682_DIG_INF2_DATA:
+	case RT5682_REC_MIXER:
+	case RT5682_CAL_REC:
+	case RT5682_ALC_BACK_GAIN:
+	case RT5682_PWR_DIG_1:
+	case RT5682_PWR_DIG_2:
+	case RT5682_PWR_ANLG_1:
+	case RT5682_PWR_ANLG_2:
+	case RT5682_PWR_ANLG_3:
+	case RT5682_PWR_MIXER:
+	case RT5682_PWR_VOL:
+	case RT5682_CLK_DET:
+	case RT5682_RESET_LPF_CTRL:
+	case RT5682_RESET_HPF_CTRL:
+	case RT5682_DMIC_CTRL_1:
+	case RT5682_I2S1_SDP:
+	case RT5682_I2S2_SDP:
+	case RT5682_ADDA_CLK_1:
+	case RT5682_ADDA_CLK_2:
+	case RT5682_I2S1_F_DIV_CTRL_1:
+	case RT5682_I2S1_F_DIV_CTRL_2:
+	case RT5682_TDM_CTRL:
+	case RT5682_TDM_ADDA_CTRL_1:
+	case RT5682_TDM_ADDA_CTRL_2:
+	case RT5682_DATA_SEL_CTRL_1:
+	case RT5682_TDM_TCON_CTRL:
+	case RT5682_GLB_CLK:
+	case RT5682_PLL_CTRL_1:
+	case RT5682_PLL_CTRL_2:
+	case RT5682_PLL_TRACK_1:
+	case RT5682_PLL_TRACK_2:
+	case RT5682_PLL_TRACK_3:
+	case RT5682_PLL_TRACK_4:
+	case RT5682_PLL_TRACK_5:
+	case RT5682_PLL_TRACK_6:
+	case RT5682_PLL_TRACK_11:
+	case RT5682_SDW_REF_CLK:
+	case RT5682_DEPOP_1:
+	case RT5682_DEPOP_2:
+	case RT5682_HP_CHARGE_PUMP_1:
+	case RT5682_HP_CHARGE_PUMP_2:
+	case RT5682_MICBIAS_1:
+	case RT5682_MICBIAS_2:
+	case RT5682_PLL_TRACK_12:
+	case RT5682_PLL_TRACK_14:
+	case RT5682_PLL2_CTRL_1:
+	case RT5682_PLL2_CTRL_2:
+	case RT5682_PLL2_CTRL_3:
+	case RT5682_PLL2_CTRL_4:
+	case RT5682_RC_CLK_CTRL:
+	case RT5682_I2S_M_CLK_CTRL_1:
+	case RT5682_I2S2_F_DIV_CTRL_1:
+	case RT5682_I2S2_F_DIV_CTRL_2:
+	case RT5682_EQ_CTRL_1:
+	case RT5682_EQ_CTRL_2:
+	case RT5682_IRQ_CTRL_1:
+	case RT5682_IRQ_CTRL_2:
+	case RT5682_IRQ_CTRL_3:
+	case RT5682_IRQ_CTRL_4:
+	case RT5682_INT_ST_1:
+	case RT5682_GPIO_CTRL_1:
+	case RT5682_GPIO_CTRL_2:
+	case RT5682_GPIO_CTRL_3:
+	case RT5682_HP_AMP_DET_CTRL_1:
+	case RT5682_HP_AMP_DET_CTRL_2:
+	case RT5682_MID_HP_AMP_DET:
+	case RT5682_LOW_HP_AMP_DET:
+	case RT5682_DELAY_BUF_CTRL:
+	case RT5682_SV_ZCD_1:
+	case RT5682_SV_ZCD_2:
+	case RT5682_IL_CMD_1:
+	case RT5682_IL_CMD_2:
+	case RT5682_IL_CMD_3:
+	case RT5682_IL_CMD_4:
+	case RT5682_IL_CMD_5:
+	case RT5682_IL_CMD_6:
+	case RT5682_4BTN_IL_CMD_1:
+	case RT5682_4BTN_IL_CMD_2:
+	case RT5682_4BTN_IL_CMD_3:
+	case RT5682_4BTN_IL_CMD_4:
+	case RT5682_4BTN_IL_CMD_5:
+	case RT5682_4BTN_IL_CMD_6:
+	case RT5682_4BTN_IL_CMD_7:
+	case RT5682_ADC_STO1_HP_CTRL_1:
+	case RT5682_ADC_STO1_HP_CTRL_2:
+	case RT5682_AJD1_CTRL:
+	case RT5682_JD1_THD:
+	case RT5682_JD2_THD:
+	case RT5682_JD_CTRL_1:
+	case RT5682_DUMMY_1:
+	case RT5682_DUMMY_2:
+	case RT5682_DUMMY_3:
+	case RT5682_DAC_ADC_DIG_VOL1:
+	case RT5682_BIAS_CUR_CTRL_2:
+	case RT5682_BIAS_CUR_CTRL_3:
+	case RT5682_BIAS_CUR_CTRL_4:
+	case RT5682_BIAS_CUR_CTRL_5:
+	case RT5682_BIAS_CUR_CTRL_6:
+	case RT5682_BIAS_CUR_CTRL_7:
+	case RT5682_BIAS_CUR_CTRL_8:
+	case RT5682_BIAS_CUR_CTRL_9:
+	case RT5682_BIAS_CUR_CTRL_10:
+	case RT5682_VREF_REC_OP_FB_CAP_CTRL:
+	case RT5682_CHARGE_PUMP_1:
+	case RT5682_DIG_IN_CTRL_1:
+	case RT5682_PAD_DRIVING_CTRL:
+	case RT5682_SOFT_RAMP_DEPOP:
+	case RT5682_CHOP_DAC:
+	case RT5682_CHOP_ADC:
+	case RT5682_CALIB_ADC_CTRL:
+	case RT5682_VOL_TEST:
+	case RT5682_SPKVDD_DET_STA:
+	case RT5682_TEST_MODE_CTRL_1:
+	case RT5682_TEST_MODE_CTRL_2:
+	case RT5682_TEST_MODE_CTRL_3:
+	case RT5682_TEST_MODE_CTRL_4:
+	case RT5682_TEST_MODE_CTRL_5:
+	case RT5682_PLL1_INTERNAL:
+	case RT5682_PLL2_INTERNAL:
+	case RT5682_STO_NG2_CTRL_1:
+	case RT5682_STO_NG2_CTRL_2:
+	case RT5682_STO_NG2_CTRL_3:
+	case RT5682_STO_NG2_CTRL_4:
+	case RT5682_STO_NG2_CTRL_5:
+	case RT5682_STO_NG2_CTRL_6:
+	case RT5682_STO_NG2_CTRL_7:
+	case RT5682_STO_NG2_CTRL_8:
+	case RT5682_STO_NG2_CTRL_9:
+	case RT5682_STO_NG2_CTRL_10:
+	case RT5682_STO1_DAC_SIL_DET:
+	case RT5682_SIL_PSV_CTRL1:
+	case RT5682_SIL_PSV_CTRL2:
+	case RT5682_SIL_PSV_CTRL3:
+	case RT5682_SIL_PSV_CTRL4:
+	case RT5682_SIL_PSV_CTRL5:
+	case RT5682_HP_IMP_SENS_CTRL_01:
+	case RT5682_HP_IMP_SENS_CTRL_02:
+	case RT5682_HP_IMP_SENS_CTRL_03:
+	case RT5682_HP_IMP_SENS_CTRL_04:
+	case RT5682_HP_IMP_SENS_CTRL_05:
+	case RT5682_HP_IMP_SENS_CTRL_06:
+	case RT5682_HP_IMP_SENS_CTRL_07:
+	case RT5682_HP_IMP_SENS_CTRL_08:
+	case RT5682_HP_IMP_SENS_CTRL_09:
+	case RT5682_HP_IMP_SENS_CTRL_10:
+	case RT5682_HP_IMP_SENS_CTRL_11:
+	case RT5682_HP_IMP_SENS_CTRL_12:
+	case RT5682_HP_IMP_SENS_CTRL_13:
+	case RT5682_HP_IMP_SENS_CTRL_14:
+	case RT5682_HP_IMP_SENS_CTRL_15:
+	case RT5682_HP_IMP_SENS_CTRL_16:
+	case RT5682_HP_IMP_SENS_CTRL_17:
+	case RT5682_HP_IMP_SENS_CTRL_18:
+	case RT5682_HP_IMP_SENS_CTRL_19:
+	case RT5682_HP_IMP_SENS_CTRL_20:
+	case RT5682_HP_IMP_SENS_CTRL_21:
+	case RT5682_HP_IMP_SENS_CTRL_22:
+	case RT5682_HP_IMP_SENS_CTRL_23:
+	case RT5682_HP_IMP_SENS_CTRL_24:
+	case RT5682_HP_IMP_SENS_CTRL_25:
+	case RT5682_HP_IMP_SENS_CTRL_26:
+	case RT5682_HP_IMP_SENS_CTRL_27:
+	case RT5682_HP_IMP_SENS_CTRL_28:
+	case RT5682_HP_IMP_SENS_CTRL_29:
+	case RT5682_HP_IMP_SENS_CTRL_30:
+	case RT5682_HP_IMP_SENS_CTRL_31:
+	case RT5682_HP_IMP_SENS_CTRL_32:
+	case RT5682_HP_IMP_SENS_CTRL_33:
+	case RT5682_HP_IMP_SENS_CTRL_34:
+	case RT5682_HP_IMP_SENS_CTRL_35:
+	case RT5682_HP_IMP_SENS_CTRL_36:
+	case RT5682_HP_IMP_SENS_CTRL_37:
+	case RT5682_HP_IMP_SENS_CTRL_38:
+	case RT5682_HP_IMP_SENS_CTRL_39:
+	case RT5682_HP_IMP_SENS_CTRL_40:
+	case RT5682_HP_IMP_SENS_CTRL_41:
+	case RT5682_HP_IMP_SENS_CTRL_42:
+	case RT5682_HP_IMP_SENS_CTRL_43:
+	case RT5682_HP_LOGIC_CTRL_1:
+	case RT5682_HP_LOGIC_CTRL_2:
+	case RT5682_HP_LOGIC_CTRL_3:
+	case RT5682_HP_CALIB_CTRL_1:
+	case RT5682_HP_CALIB_CTRL_2:
+	case RT5682_HP_CALIB_CTRL_3:
+	case RT5682_HP_CALIB_CTRL_4:
+	case RT5682_HP_CALIB_CTRL_5:
+	case RT5682_HP_CALIB_CTRL_6:
+	case RT5682_HP_CALIB_CTRL_7:
+	case RT5682_HP_CALIB_CTRL_9:
+	case RT5682_HP_CALIB_CTRL_10:
+	case RT5682_HP_CALIB_CTRL_11:
+	case RT5682_HP_CALIB_STA_1:
+	case RT5682_HP_CALIB_STA_2:
+	case RT5682_HP_CALIB_STA_3:
+	case RT5682_HP_CALIB_STA_4:
+	case RT5682_HP_CALIB_STA_5:
+	case RT5682_HP_CALIB_STA_6:
+	case RT5682_HP_CALIB_STA_7:
+	case RT5682_HP_CALIB_STA_8:
+	case RT5682_HP_CALIB_STA_9:
+	case RT5682_HP_CALIB_STA_10:
+	case RT5682_HP_CALIB_STA_11:
+	case RT5682_SAR_IL_CMD_1:
+	case RT5682_SAR_IL_CMD_2:
+	case RT5682_SAR_IL_CMD_3:
+	case RT5682_SAR_IL_CMD_4:
+	case RT5682_SAR_IL_CMD_5:
+	case RT5682_SAR_IL_CMD_6:
+	case RT5682_SAR_IL_CMD_7:
+	case RT5682_SAR_IL_CMD_8:
+	case RT5682_SAR_IL_CMD_9:
+	case RT5682_SAR_IL_CMD_10:
+	case RT5682_SAR_IL_CMD_11:
+	case RT5682_SAR_IL_CMD_12:
+	case RT5682_SAR_IL_CMD_13:
+	case RT5682_EFUSE_CTRL_1:
+	case RT5682_EFUSE_CTRL_2:
+	case RT5682_EFUSE_CTRL_3:
+	case RT5682_EFUSE_CTRL_4:
+	case RT5682_EFUSE_CTRL_5:
+	case RT5682_EFUSE_CTRL_6:
+	case RT5682_EFUSE_CTRL_7:
+	case RT5682_EFUSE_CTRL_8:
+	case RT5682_EFUSE_CTRL_9:
+	case RT5682_EFUSE_CTRL_10:
+	case RT5682_EFUSE_CTRL_11:
+	case RT5682_JD_TOP_VC_VTRL:
+	case RT5682_DRC1_CTRL_0:
+	case RT5682_DRC1_CTRL_1:
+	case RT5682_DRC1_CTRL_2:
+	case RT5682_DRC1_CTRL_3:
+	case RT5682_DRC1_CTRL_4:
+	case RT5682_DRC1_CTRL_5:
+	case RT5682_DRC1_CTRL_6:
+	case RT5682_DRC1_HARD_LMT_CTRL_1:
+	case RT5682_DRC1_HARD_LMT_CTRL_2:
+	case RT5682_DRC1_PRIV_1:
+	case RT5682_DRC1_PRIV_2:
+	case RT5682_DRC1_PRIV_3:
+	case RT5682_DRC1_PRIV_4:
+	case RT5682_DRC1_PRIV_5:
+	case RT5682_DRC1_PRIV_6:
+	case RT5682_DRC1_PRIV_7:
+	case RT5682_DRC1_PRIV_8:
+	case RT5682_EQ_AUTO_RCV_CTRL1:
+	case RT5682_EQ_AUTO_RCV_CTRL2:
+	case RT5682_EQ_AUTO_RCV_CTRL3:
+	case RT5682_EQ_AUTO_RCV_CTRL4:
+	case RT5682_EQ_AUTO_RCV_CTRL5:
+	case RT5682_EQ_AUTO_RCV_CTRL6:
+	case RT5682_EQ_AUTO_RCV_CTRL7:
+	case RT5682_EQ_AUTO_RCV_CTRL8:
+	case RT5682_EQ_AUTO_RCV_CTRL9:
+	case RT5682_EQ_AUTO_RCV_CTRL10:
+	case RT5682_EQ_AUTO_RCV_CTRL11:
+	case RT5682_EQ_AUTO_RCV_CTRL12:
+	case RT5682_EQ_AUTO_RCV_CTRL13:
+	case RT5682_ADC_L_EQ_LPF1_A1:
+	case RT5682_R_EQ_LPF1_A1:
+	case RT5682_L_EQ_LPF1_H0:
+	case RT5682_R_EQ_LPF1_H0:
+	case RT5682_L_EQ_BPF1_A1:
+	case RT5682_R_EQ_BPF1_A1:
+	case RT5682_L_EQ_BPF1_A2:
+	case RT5682_R_EQ_BPF1_A2:
+	case RT5682_L_EQ_BPF1_H0:
+	case RT5682_R_EQ_BPF1_H0:
+	case RT5682_L_EQ_BPF2_A1:
+	case RT5682_R_EQ_BPF2_A1:
+	case RT5682_L_EQ_BPF2_A2:
+	case RT5682_R_EQ_BPF2_A2:
+	case RT5682_L_EQ_BPF2_H0:
+	case RT5682_R_EQ_BPF2_H0:
+	case RT5682_L_EQ_BPF3_A1:
+	case RT5682_R_EQ_BPF3_A1:
+	case RT5682_L_EQ_BPF3_A2:
+	case RT5682_R_EQ_BPF3_A2:
+	case RT5682_L_EQ_BPF3_H0:
+	case RT5682_R_EQ_BPF3_H0:
+	case RT5682_L_EQ_BPF4_A1:
+	case RT5682_R_EQ_BPF4_A1:
+	case RT5682_L_EQ_BPF4_A2:
+	case RT5682_R_EQ_BPF4_A2:
+	case RT5682_L_EQ_BPF4_H0:
+	case RT5682_R_EQ_BPF4_H0:
+	case RT5682_L_EQ_HPF1_A1:
+	case RT5682_R_EQ_HPF1_A1:
+	case RT5682_L_EQ_HPF1_H0:
+	case RT5682_R_EQ_HPF1_H0:
+	case RT5682_L_EQ_PRE_VOL:
+	case RT5682_R_EQ_PRE_VOL:
+	case RT5682_L_EQ_POST_VOL:
+	case RT5682_R_EQ_POST_VOL:
+	case RT5682_I2C_MODE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const DECLARE_TLV_DB_SCALE(hp_vol_tlv, -2250, 150, 0);
+static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -65625, 375, 0);
+static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -17625, 375, 0);
+static const DECLARE_TLV_DB_SCALE(adc_bst_tlv, 0, 1200, 0);
+
+/* {0, +20, +24, +30, +35, +40, +44, +50, +52} dB */
+static const DECLARE_TLV_DB_RANGE(bst_tlv,
+	0, 0, TLV_DB_SCALE_ITEM(0, 0, 0),
+	1, 1, TLV_DB_SCALE_ITEM(2000, 0, 0),
+	2, 2, TLV_DB_SCALE_ITEM(2400, 0, 0),
+	3, 5, TLV_DB_SCALE_ITEM(3000, 500, 0),
+	6, 6, TLV_DB_SCALE_ITEM(4400, 0, 0),
+	7, 7, TLV_DB_SCALE_ITEM(5000, 0, 0),
+	8, 8, TLV_DB_SCALE_ITEM(5200, 0, 0)
+);
+
+/* Interface data select */
+static const char * const rt5682_data_select[] = {
+	"L/R", "R/L", "L/L", "R/R"
+};
+
+static SOC_ENUM_SINGLE_DECL(rt5682_if2_adc_enum,
+	RT5682_DIG_INF2_DATA, RT5682_IF2_ADC_SEL_SFT, rt5682_data_select);
+
+static SOC_ENUM_SINGLE_DECL(rt5682_if1_01_adc_enum,
+	RT5682_TDM_ADDA_CTRL_1, RT5682_IF1_ADC1_SEL_SFT, rt5682_data_select);
+
+static SOC_ENUM_SINGLE_DECL(rt5682_if1_23_adc_enum,
+	RT5682_TDM_ADDA_CTRL_1, RT5682_IF1_ADC2_SEL_SFT, rt5682_data_select);
+
+static SOC_ENUM_SINGLE_DECL(rt5682_if1_45_adc_enum,
+	RT5682_TDM_ADDA_CTRL_1, RT5682_IF1_ADC3_SEL_SFT, rt5682_data_select);
+
+static SOC_ENUM_SINGLE_DECL(rt5682_if1_67_adc_enum,
+	RT5682_TDM_ADDA_CTRL_1, RT5682_IF1_ADC4_SEL_SFT, rt5682_data_select);
+
+static const struct snd_kcontrol_new rt5682_if2_adc_swap_mux =
+	SOC_DAPM_ENUM("IF2 ADC Swap Mux", rt5682_if2_adc_enum);
+
+static const struct snd_kcontrol_new rt5682_if1_01_adc_swap_mux =
+	SOC_DAPM_ENUM("IF1 01 ADC Swap Mux", rt5682_if1_01_adc_enum);
+
+static const struct snd_kcontrol_new rt5682_if1_23_adc_swap_mux =
+	SOC_DAPM_ENUM("IF1 23 ADC Swap Mux", rt5682_if1_23_adc_enum);
+
+static const struct snd_kcontrol_new rt5682_if1_45_adc_swap_mux =
+	SOC_DAPM_ENUM("IF1 45 ADC Swap Mux", rt5682_if1_45_adc_enum);
+
+static const struct snd_kcontrol_new rt5682_if1_67_adc_swap_mux =
+	SOC_DAPM_ENUM("IF1 67 ADC Swap Mux", rt5682_if1_67_adc_enum);
+
+static void rt5682_reset(struct regmap *regmap)
+{
+	regmap_write(regmap, RT5682_RESET, 0);
+	regmap_write(regmap, RT5682_I2C_MODE, 1);
+}
+/**
+ * rt5682_sel_asrc_clk_src - select ASRC clock source for a set of filters
+ * @component: SoC audio component device.
+ * @filter_mask: mask of filters.
+ * @clk_src: clock source
+ *
+ * The ASRC function is for asynchronous MCLK and LRCK. Also, since RT5682 can
+ * only support standard 32fs or 64fs i2s format, ASRC should be enabled to
+ * support special i2s clock format such as Intel's 100fs(100 * sampling rate).
+ * ASRC function will track i2s clock and generate a corresponding system clock
+ * for codec. This function provides an API to select the clock source for a
+ * set of filters specified by the mask. And the component driver will turn on
+ * ASRC for these filters if ASRC is selected as their clock source.
+ */
+int rt5682_sel_asrc_clk_src(struct snd_soc_component *component,
+		unsigned int filter_mask, unsigned int clk_src)
+{
+
+	switch (clk_src) {
+	case RT5682_CLK_SEL_SYS:
+	case RT5682_CLK_SEL_I2S1_ASRC:
+	case RT5682_CLK_SEL_I2S2_ASRC:
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (filter_mask & RT5682_DA_STEREO1_FILTER) {
+		snd_soc_component_update_bits(component, RT5682_PLL_TRACK_2,
+			RT5682_FILTER_CLK_SEL_MASK,
+			clk_src << RT5682_FILTER_CLK_SEL_SFT);
+	}
+
+	if (filter_mask & RT5682_AD_STEREO1_FILTER) {
+		snd_soc_component_update_bits(component, RT5682_PLL_TRACK_3,
+			RT5682_FILTER_CLK_SEL_MASK,
+			clk_src << RT5682_FILTER_CLK_SEL_SFT);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rt5682_sel_asrc_clk_src);
+
+static int rt5682_button_detect(struct snd_soc_component *component)
+{
+	int btn_type, val;
+
+	val = snd_soc_component_read32(component, RT5682_4BTN_IL_CMD_1);
+	btn_type = val & 0xfff0;
+	snd_soc_component_write(component, RT5682_4BTN_IL_CMD_1, val);
+	pr_debug("%s btn_type=%x\n", __func__, btn_type);
+
+	return btn_type;
+}
+
+static void rt5682_enable_push_button_irq(struct snd_soc_component *component,
+		bool enable)
+{
+	if (enable) {
+		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
+			RT5682_SAR_BUTT_DET_MASK, RT5682_SAR_BUTT_DET_EN);
+		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_13,
+			RT5682_SAR_SOUR_MASK, RT5682_SAR_SOUR_BTN);
+		snd_soc_component_write(component, RT5682_IL_CMD_1, 0x0040);
+		snd_soc_component_update_bits(component, RT5682_4BTN_IL_CMD_2,
+			RT5682_4BTN_IL_MASK | RT5682_4BTN_IL_RST_MASK,
+			RT5682_4BTN_IL_EN | RT5682_4BTN_IL_NOR);
+		snd_soc_component_update_bits(component, RT5682_IRQ_CTRL_3,
+			RT5682_IL_IRQ_MASK, RT5682_IL_IRQ_EN);
+	} else {
+		snd_soc_component_update_bits(component, RT5682_IRQ_CTRL_3,
+			RT5682_IL_IRQ_MASK, RT5682_IL_IRQ_DIS);
+		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
+			RT5682_SAR_BUTT_DET_MASK, RT5682_SAR_BUTT_DET_DIS);
+		snd_soc_component_update_bits(component, RT5682_4BTN_IL_CMD_2,
+			RT5682_4BTN_IL_MASK, RT5682_4BTN_IL_DIS);
+		snd_soc_component_update_bits(component, RT5682_4BTN_IL_CMD_2,
+			RT5682_4BTN_IL_RST_MASK, RT5682_4BTN_IL_RST);
+		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_13,
+			RT5682_SAR_SOUR_MASK, RT5682_SAR_SOUR_TYPE);
+	}
+}
+
+/**
+ * rt5682_headset_detect - Detect headset.
+ * @component: SoC audio component device.
+ * @jack_insert: Jack insert or not.
+ *
+ * Detect whether is headset or not when jack inserted.
+ *
+ * Returns detect status.
+ */
+static int rt5682_headset_detect(struct snd_soc_component *component,
+		int jack_insert)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	unsigned int val, count;
+
+	if (jack_insert) {
+		snd_soc_dapm_force_enable_pin(dapm, "CBJ Power");
+		snd_soc_dapm_sync(dapm);
+		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+			RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH);
+
+		count = 0;
+		val = snd_soc_component_read32(component, RT5682_CBJ_CTRL_2)
+			& RT5682_JACK_TYPE_MASK;
+		while (val == 0 && count < 50) {
+			usleep_range(10000, 15000);
+			val = snd_soc_component_read32(component,
+				RT5682_CBJ_CTRL_2) & RT5682_JACK_TYPE_MASK;
+			count++;
+		}
+
+		switch (val) {
+		case 0x1:
+		case 0x2:
+			rt5682->jack_type = SND_JACK_HEADSET;
+			rt5682_enable_push_button_irq(component, true);
+			break;
+		default:
+			rt5682->jack_type = SND_JACK_HEADPHONE;
+		}
+
+	} else {
+		rt5682_enable_push_button_irq(component, false);
+		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+			RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
+		snd_soc_dapm_disable_pin(dapm, "CBJ Power");
+		snd_soc_dapm_sync(dapm);
+
+		rt5682->jack_type = 0;
+	}
+
+	dev_dbg(component->dev, "jack_type = %d\n", rt5682->jack_type);
+	return rt5682->jack_type;
+}
+
+static irqreturn_t rt5682_irq(int irq, void *data)
+{
+	struct rt5682_priv *rt5682 = data;
+
+	mod_delayed_work(system_power_efficient_wq,
+			&rt5682->jack_detect_work, msecs_to_jiffies(250));
+
+	return IRQ_HANDLED;
+}
+
+static void rt5682_jd_check_handler(struct work_struct *work)
+{
+	struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv,
+		jd_check_work.work);
+
+	if (snd_soc_component_read32(rt5682->component, RT5682_AJD1_CTRL)
+		& RT5682_JDH_RS_MASK) {
+		/* jack out */
+		rt5682->jack_type = rt5682_headset_detect(rt5682->component, 0);
+
+		snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type,
+				SND_JACK_HEADSET |
+				SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+				SND_JACK_BTN_2 | SND_JACK_BTN_3);
+	} else {
+		schedule_delayed_work(&rt5682->jd_check_work, 500);
+	}
+}
+
+static int rt5682_set_jack_detect(struct snd_soc_component *component,
+	struct snd_soc_jack *hs_jack, void *data)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+
+	switch (rt5682->pdata.jd_src) {
+	case RT5682_JD1:
+		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_2,
+			RT5682_EXT_JD_SRC, RT5682_EXT_JD_SRC_MANUAL);
+		snd_soc_component_write(component, RT5682_CBJ_CTRL_1, 0xd042);
+		snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_3,
+			RT5682_CBJ_IN_BUF_EN, RT5682_CBJ_IN_BUF_EN);
+		snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
+			RT5682_SAR_POW_MASK, RT5682_SAR_POW_EN);
+		regmap_update_bits(rt5682->regmap, RT5682_GPIO_CTRL_1,
+			RT5682_GP1_PIN_MASK, RT5682_GP1_PIN_IRQ);
+		regmap_update_bits(rt5682->regmap, RT5682_RC_CLK_CTRL,
+				RT5682_POW_IRQ | RT5682_POW_JDH |
+				RT5682_POW_ANA, RT5682_POW_IRQ |
+				RT5682_POW_JDH | RT5682_POW_ANA);
+		regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_2,
+			RT5682_PWR_JDH | RT5682_PWR_JDL,
+			RT5682_PWR_JDH | RT5682_PWR_JDL);
+		regmap_update_bits(rt5682->regmap, RT5682_IRQ_CTRL_2,
+			RT5682_JD1_EN_MASK | RT5682_JD1_POL_MASK,
+			RT5682_JD1_EN | RT5682_JD1_POL_NOR);
+		mod_delayed_work(system_power_efficient_wq,
+			   &rt5682->jack_detect_work, msecs_to_jiffies(250));
+		break;
+
+	case RT5682_JD_NULL:
+		regmap_update_bits(rt5682->regmap, RT5682_IRQ_CTRL_2,
+			RT5682_JD1_EN_MASK, RT5682_JD1_DIS);
+		regmap_update_bits(rt5682->regmap, RT5682_RC_CLK_CTRL,
+				RT5682_POW_JDH | RT5682_POW_JDL, 0);
+		break;
+
+	default:
+		dev_warn(component->dev, "Wrong JD source\n");
+		break;
+	}
+
+	rt5682->hs_jack = hs_jack;
+
+	return 0;
+}
+
+static void rt5682_jack_detect_handler(struct work_struct *work)
+{
+	struct rt5682_priv *rt5682 =
+		container_of(work, struct rt5682_priv, jack_detect_work.work);
+	int val, btn_type;
+
+	while (!rt5682->component)
+		usleep_range(10000, 15000);
+
+	while (!rt5682->component->card->instantiated)
+		usleep_range(10000, 15000);
+
+	mutex_lock(&rt5682->calibrate_mutex);
+
+	val = snd_soc_component_read32(rt5682->component, RT5682_AJD1_CTRL)
+		& RT5682_JDH_RS_MASK;
+	if (!val) {
+		/* jack in */
+		if (rt5682->jack_type == 0) {
+			/* jack was out, report jack type */
+			rt5682->jack_type =
+				rt5682_headset_detect(rt5682->component, 1);
+		} else {
+			/* jack is already in, report button event */
+			rt5682->jack_type = SND_JACK_HEADSET;
+			btn_type = rt5682_button_detect(rt5682->component);
+			/**
+			 * rt5682 can report three kinds of button behavior,
+			 * one click, double click and hold. However,
+			 * currently we will report button pressed/released
+			 * event. So all the three button behaviors are
+			 * treated as button pressed.
+			 */
+			switch (btn_type) {
+			case 0x8000:
+			case 0x4000:
+			case 0x2000:
+				rt5682->jack_type |= SND_JACK_BTN_0;
+				break;
+			case 0x1000:
+			case 0x0800:
+			case 0x0400:
+				rt5682->jack_type |= SND_JACK_BTN_1;
+				break;
+			case 0x0200:
+			case 0x0100:
+			case 0x0080:
+				rt5682->jack_type |= SND_JACK_BTN_2;
+				break;
+			case 0x0040:
+			case 0x0020:
+			case 0x0010:
+				rt5682->jack_type |= SND_JACK_BTN_3;
+				break;
+			case 0x0000: /* unpressed */
+				break;
+			default:
+				btn_type = 0;
+				dev_err(rt5682->component->dev,
+					"Unexpected button code 0x%04x\n",
+					btn_type);
+				break;
+			}
+		}
+	} else {
+		/* jack out */
+		rt5682->jack_type = rt5682_headset_detect(rt5682->component, 0);
+	}
+
+	snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type,
+			SND_JACK_HEADSET |
+			    SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+			    SND_JACK_BTN_2 | SND_JACK_BTN_3);
+
+	if (rt5682->jack_type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+		SND_JACK_BTN_2 | SND_JACK_BTN_3))
+		schedule_delayed_work(&rt5682->jd_check_work, 0);
+	else
+		cancel_delayed_work_sync(&rt5682->jd_check_work);
+
+	mutex_unlock(&rt5682->calibrate_mutex);
+}
+
+static const struct snd_kcontrol_new rt5682_snd_controls[] = {
+	/* Headphone Output Volume */
+	SOC_DOUBLE_R_TLV("Headphone Playback Volume", RT5682_HPL_GAIN,
+		RT5682_HPR_GAIN, RT5682_G_HP_SFT, 15, 1, hp_vol_tlv),
+
+	/* DAC Digital Volume */
+	SOC_DOUBLE_TLV("DAC1 Playback Volume", RT5682_DAC1_DIG_VOL,
+		RT5682_L_VOL_SFT, RT5682_R_VOL_SFT, 175, 0, dac_vol_tlv),
+
+	/* IN Boost Volume */
+	SOC_SINGLE_TLV("CBJ Boost Volume", RT5682_CBJ_BST_CTRL,
+		RT5682_BST_CBJ_SFT, 8, 0, bst_tlv),
+
+	/* ADC Digital Volume Control */
+	SOC_DOUBLE("STO1 ADC Capture Switch", RT5682_STO1_ADC_DIG_VOL,
+		RT5682_L_MUTE_SFT, RT5682_R_MUTE_SFT, 1, 1),
+	SOC_DOUBLE_TLV("STO1 ADC Capture Volume", RT5682_STO1_ADC_DIG_VOL,
+		RT5682_L_VOL_SFT, RT5682_R_VOL_SFT, 127, 0, adc_vol_tlv),
+
+	/* ADC Boost Volume Control */
+	SOC_DOUBLE_TLV("STO1 ADC Boost Gain Volume", RT5682_STO1_ADC_BOOST,
+		RT5682_STO1_ADC_L_BST_SFT, RT5682_STO1_ADC_R_BST_SFT,
+		3, 0, adc_bst_tlv),
+};
+
+
+static int rt5682_div_sel(struct rt5682_priv *rt5682,
+			  int target, const int div[], int size)
+{
+	int i;
+
+	if (rt5682->sysclk < target) {
+		pr_err("sysclk rate %d is too low\n",
+			rt5682->sysclk);
+		return 0;
+	}
+
+	for (i = 0; i < size - 1; i++) {
+		pr_info("div[%d]=%d\n", i, div[i]);
+		if (target * div[i] == rt5682->sysclk)
+			return i;
+		if (target * div[i + 1] > rt5682->sysclk) {
+			pr_err("can't find div for sysclk %d\n",
+				rt5682->sysclk);
+			return i;
+		}
+	}
+
+	if (target * div[i] < rt5682->sysclk)
+		pr_err("sysclk rate %d is too high\n",
+			rt5682->sysclk);
+
+	return size - 1;
+
+}
+
+/**
+ * set_dmic_clk - Set parameter of dmic.
+ *
+ * @w: DAPM widget.
+ * @kcontrol: The kcontrol of this widget.
+ * @event: Event id.
+ *
+ * Choose dmic clock between 1MHz and 3MHz.
+ * It is better for clock to approximate 3MHz.
+ */
+static int set_dmic_clk(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+	int idx = -EINVAL;
+	static const int div[] = {2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128};
+
+	idx = rt5682_div_sel(rt5682, 1500000, div, ARRAY_SIZE(div));
+
+	snd_soc_component_update_bits(component, RT5682_DMIC_CTRL_1,
+		RT5682_DMIC_CLK_MASK, idx << RT5682_DMIC_CLK_SFT);
+
+	return 0;
+}
+
+static int set_filter_clk(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+	int ref, val, reg, sft, mask, idx = -EINVAL;
+	static const int div_f[] = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48};
+	static const int div_o[] = {1, 2, 4, 6, 8, 12, 16, 24, 32, 48};
+
+	val = snd_soc_component_read32(component, RT5682_GPIO_CTRL_1) &
+		RT5682_GP4_PIN_MASK;
+	if (w->shift == RT5682_PWR_ADC_S1F_BIT &&
+		val == RT5682_GP4_PIN_ADCDAT2)
+		ref = 256 * rt5682->lrck[RT5682_AIF2];
+	else
+		ref = 256 * rt5682->lrck[RT5682_AIF1];
+
+	idx = rt5682_div_sel(rt5682, ref, div_f, ARRAY_SIZE(div_f));
+
+	if (w->shift == RT5682_PWR_ADC_S1F_BIT) {
+		reg = RT5682_PLL_TRACK_3;
+		sft = RT5682_ADC_OSR_SFT;
+		mask = RT5682_ADC_OSR_MASK;
+	} else {
+		reg = RT5682_PLL_TRACK_2;
+		sft = RT5682_DAC_OSR_SFT;
+		mask = RT5682_DAC_OSR_MASK;
+	}
+
+	snd_soc_component_update_bits(component, reg,
+		RT5682_FILTER_CLK_DIV_MASK, idx << RT5682_FILTER_CLK_DIV_SFT);
+
+	/* select over sample rate */
+	for (idx = 0; idx < ARRAY_SIZE(div_o); idx++) {
+		if (rt5682->sysclk <= 12288000 * div_o[idx])
+			break;
+	}
+
+	snd_soc_component_update_bits(component, RT5682_ADDA_CLK_1,
+		mask, idx << sft);
+
+	return 0;
+}
+
+static int is_sys_clk_from_pll1(struct snd_soc_dapm_widget *w,
+			 struct snd_soc_dapm_widget *sink)
+{
+	unsigned int val;
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	val = snd_soc_component_read32(component, RT5682_GLB_CLK);
+	val &= RT5682_SCLK_SRC_MASK;
+	if (val == RT5682_SCLK_SRC_PLL1)
+		return 1;
+	else
+		return 0;
+}
+
+static int is_using_asrc(struct snd_soc_dapm_widget *w,
+			 struct snd_soc_dapm_widget *sink)
+{
+	unsigned int reg, shift, val;
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	switch (w->shift) {
+	case RT5682_ADC_STO1_ASRC_SFT:
+		reg = RT5682_PLL_TRACK_3;
+		shift = RT5682_FILTER_CLK_SEL_SFT;
+		break;
+	case RT5682_DAC_STO1_ASRC_SFT:
+		reg = RT5682_PLL_TRACK_2;
+		shift = RT5682_FILTER_CLK_SEL_SFT;
+		break;
+	default:
+		return 0;
+	}
+
+	val = (snd_soc_component_read32(component, reg) >> shift) & 0xf;
+	switch (val) {
+	case RT5682_CLK_SEL_I2S1_ASRC:
+	case RT5682_CLK_SEL_I2S2_ASRC:
+		return 1;
+	default:
+		return 0;
+	}
+
+}
+
+/* Digital Mixer */
+static const struct snd_kcontrol_new rt5682_sto1_adc_l_mix[] = {
+	SOC_DAPM_SINGLE("ADC1 Switch", RT5682_STO1_ADC_MIXER,
+			RT5682_M_STO1_ADC_L1_SFT, 1, 1),
+	SOC_DAPM_SINGLE("ADC2 Switch", RT5682_STO1_ADC_MIXER,
+			RT5682_M_STO1_ADC_L2_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5682_sto1_adc_r_mix[] = {
+	SOC_DAPM_SINGLE("ADC1 Switch", RT5682_STO1_ADC_MIXER,
+			RT5682_M_STO1_ADC_R1_SFT, 1, 1),
+	SOC_DAPM_SINGLE("ADC2 Switch", RT5682_STO1_ADC_MIXER,
+			RT5682_M_STO1_ADC_R2_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5682_dac_l_mix[] = {
+	SOC_DAPM_SINGLE("Stereo ADC Switch", RT5682_AD_DA_MIXER,
+			RT5682_M_ADCMIX_L_SFT, 1, 1),
+	SOC_DAPM_SINGLE("DAC1 Switch", RT5682_AD_DA_MIXER,
+			RT5682_M_DAC1_L_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5682_dac_r_mix[] = {
+	SOC_DAPM_SINGLE("Stereo ADC Switch", RT5682_AD_DA_MIXER,
+			RT5682_M_ADCMIX_R_SFT, 1, 1),
+	SOC_DAPM_SINGLE("DAC1 Switch", RT5682_AD_DA_MIXER,
+			RT5682_M_DAC1_R_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5682_sto1_dac_l_mix[] = {
+	SOC_DAPM_SINGLE("DAC L1 Switch", RT5682_STO1_DAC_MIXER,
+			RT5682_M_DAC_L1_STO_L_SFT, 1, 1),
+	SOC_DAPM_SINGLE("DAC R1 Switch", RT5682_STO1_DAC_MIXER,
+			RT5682_M_DAC_R1_STO_L_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5682_sto1_dac_r_mix[] = {
+	SOC_DAPM_SINGLE("DAC L1 Switch", RT5682_STO1_DAC_MIXER,
+			RT5682_M_DAC_L1_STO_R_SFT, 1, 1),
+	SOC_DAPM_SINGLE("DAC R1 Switch", RT5682_STO1_DAC_MIXER,
+			RT5682_M_DAC_R1_STO_R_SFT, 1, 1),
+};
+
+/* Analog Input Mixer */
+static const struct snd_kcontrol_new rt5682_rec1_l_mix[] = {
+	SOC_DAPM_SINGLE("CBJ Switch", RT5682_REC_MIXER,
+			RT5682_M_CBJ_RM1_L_SFT, 1, 1),
+};
+
+/* STO1 ADC1 Source */
+/* MX-26 [13] [5] */
+static const char * const rt5682_sto1_adc1_src[] = {
+	"DAC MIX", "ADC"
+};
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5682_sto1_adc1l_enum, RT5682_STO1_ADC_MIXER,
+	RT5682_STO1_ADC1L_SRC_SFT, rt5682_sto1_adc1_src);
+
+static const struct snd_kcontrol_new rt5682_sto1_adc1l_mux =
+	SOC_DAPM_ENUM("Stereo1 ADC1L Source", rt5682_sto1_adc1l_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5682_sto1_adc1r_enum, RT5682_STO1_ADC_MIXER,
+	RT5682_STO1_ADC1R_SRC_SFT, rt5682_sto1_adc1_src);
+
+static const struct snd_kcontrol_new rt5682_sto1_adc1r_mux =
+	SOC_DAPM_ENUM("Stereo1 ADC1L Source", rt5682_sto1_adc1r_enum);
+
+/* STO1 ADC Source */
+/* MX-26 [11:10] [3:2] */
+static const char * const rt5682_sto1_adc_src[] = {
+	"ADC1 L", "ADC1 R"
+};
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5682_sto1_adcl_enum, RT5682_STO1_ADC_MIXER,
+	RT5682_STO1_ADCL_SRC_SFT, rt5682_sto1_adc_src);
+
+static const struct snd_kcontrol_new rt5682_sto1_adcl_mux =
+	SOC_DAPM_ENUM("Stereo1 ADCL Source", rt5682_sto1_adcl_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5682_sto1_adcr_enum, RT5682_STO1_ADC_MIXER,
+	RT5682_STO1_ADCR_SRC_SFT, rt5682_sto1_adc_src);
+
+static const struct snd_kcontrol_new rt5682_sto1_adcr_mux =
+	SOC_DAPM_ENUM("Stereo1 ADCR Source", rt5682_sto1_adcr_enum);
+
+/* STO1 ADC2 Source */
+/* MX-26 [12] [4] */
+static const char * const rt5682_sto1_adc2_src[] = {
+	"DAC MIX", "DMIC"
+};
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5682_sto1_adc2l_enum, RT5682_STO1_ADC_MIXER,
+	RT5682_STO1_ADC2L_SRC_SFT, rt5682_sto1_adc2_src);
+
+static const struct snd_kcontrol_new rt5682_sto1_adc2l_mux =
+	SOC_DAPM_ENUM("Stereo1 ADC2L Source", rt5682_sto1_adc2l_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5682_sto1_adc2r_enum, RT5682_STO1_ADC_MIXER,
+	RT5682_STO1_ADC2R_SRC_SFT, rt5682_sto1_adc2_src);
+
+static const struct snd_kcontrol_new rt5682_sto1_adc2r_mux =
+	SOC_DAPM_ENUM("Stereo1 ADC2R Source", rt5682_sto1_adc2r_enum);
+
+/* MX-79 [6:4] I2S1 ADC data location */
+static const unsigned int rt5682_if1_adc_slot_values[] = {
+	0,
+	2,
+	4,
+	6,
+};
+
+static const char * const rt5682_if1_adc_slot_src[] = {
+	"Slot 0", "Slot 2", "Slot 4", "Slot 6"
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(rt5682_if1_adc_slot_enum,
+	RT5682_TDM_CTRL, RT5682_TDM_ADC_LCA_SFT, RT5682_TDM_ADC_LCA_MASK,
+	rt5682_if1_adc_slot_src, rt5682_if1_adc_slot_values);
+
+static const struct snd_kcontrol_new rt5682_if1_adc_slot_mux =
+	SOC_DAPM_ENUM("IF1 ADC Slot location", rt5682_if1_adc_slot_enum);
+
+/* Analog DAC L1 Source, Analog DAC R1 Source*/
+/* MX-2B [4], MX-2B [0]*/
+static const char * const rt5682_alg_dac1_src[] = {
+	"Stereo1 DAC Mixer", "DAC1"
+};
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5682_alg_dac_l1_enum, RT5682_A_DAC1_MUX,
+	RT5682_A_DACL1_SFT, rt5682_alg_dac1_src);
+
+static const struct snd_kcontrol_new rt5682_alg_dac_l1_mux =
+	SOC_DAPM_ENUM("Analog DAC L1 Source", rt5682_alg_dac_l1_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5682_alg_dac_r1_enum, RT5682_A_DAC1_MUX,
+	RT5682_A_DACR1_SFT, rt5682_alg_dac1_src);
+
+static const struct snd_kcontrol_new rt5682_alg_dac_r1_mux =
+	SOC_DAPM_ENUM("Analog DAC R1 Source", rt5682_alg_dac_r1_enum);
+
+/* Out Switch */
+static const struct snd_kcontrol_new hpol_switch =
+	SOC_DAPM_SINGLE_AUTODISABLE("Switch", RT5682_HP_CTRL_1,
+					RT5682_L_MUTE_SFT, 1, 1);
+static const struct snd_kcontrol_new hpor_switch =
+	SOC_DAPM_SINGLE_AUTODISABLE("Switch", RT5682_HP_CTRL_1,
+					RT5682_R_MUTE_SFT, 1, 1);
+
+static int rt5682_hp_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		snd_soc_component_write(component,
+			RT5682_HP_LOGIC_CTRL_2, 0x0012);
+		snd_soc_component_write(component,
+			RT5682_HP_CTRL_2, 0x6000);
+		snd_soc_component_update_bits(component, RT5682_STO_NG2_CTRL_1,
+			RT5682_NG2_EN_MASK, RT5682_NG2_EN);
+		snd_soc_component_update_bits(component,
+			RT5682_DEPOP_1, 0x60, 0x60);
+		break;
+
+	case SND_SOC_DAPM_POST_PMD:
+		snd_soc_component_update_bits(component,
+			RT5682_DEPOP_1, 0x60, 0x0);
+		snd_soc_component_write(component,
+			RT5682_HP_CTRL_2, 0x0000);
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+
+}
+
+static int set_dmic_power(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		/*Add delay to avoid pop noise*/
+		msleep(150);
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+static int rt5655_set_verf(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		switch (w->shift) {
+		case RT5682_PWR_VREF1_BIT:
+			snd_soc_component_update_bits(component,
+				RT5682_PWR_ANLG_1, RT5682_PWR_FV1, 0);
+			break;
+
+		case RT5682_PWR_VREF2_BIT:
+			snd_soc_component_update_bits(component,
+				RT5682_PWR_ANLG_1, RT5682_PWR_FV2, 0);
+			break;
+
+		default:
+			break;
+		}
+		break;
+
+	case SND_SOC_DAPM_POST_PMU:
+		usleep_range(15000, 20000);
+		switch (w->shift) {
+		case RT5682_PWR_VREF1_BIT:
+			snd_soc_component_update_bits(component,
+				RT5682_PWR_ANLG_1, RT5682_PWR_FV1,
+				RT5682_PWR_FV1);
+			break;
+
+		case RT5682_PWR_VREF2_BIT:
+			snd_soc_component_update_bits(component,
+				RT5682_PWR_ANLG_1, RT5682_PWR_FV2,
+				RT5682_PWR_FV2);
+			break;
+
+		default:
+			break;
+		}
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+static const unsigned int rt5682_adcdat_pin_values[] = {
+	1,
+	3,
+};
+
+static const char * const rt5682_adcdat_pin_select[] = {
+	"ADCDAT1",
+	"ADCDAT2",
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(rt5682_adcdat_pin_enum,
+	RT5682_GPIO_CTRL_1, RT5682_GP4_PIN_SFT, RT5682_GP4_PIN_MASK,
+	rt5682_adcdat_pin_select, rt5682_adcdat_pin_values);
+
+static const struct snd_kcontrol_new rt5682_adcdat_pin_ctrl =
+	SOC_DAPM_ENUM("ADCDAT", rt5682_adcdat_pin_enum);
+
+static const struct snd_soc_dapm_widget rt5682_dapm_widgets[] = {
+	SND_SOC_DAPM_SUPPLY("LDO2", RT5682_PWR_ANLG_3, RT5682_PWR_LDO2_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("PLL1", RT5682_PWR_ANLG_3, RT5682_PWR_PLL_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("PLL2B", RT5682_PWR_ANLG_3, RT5682_PWR_PLL2B_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("PLL2F", RT5682_PWR_ANLG_3, RT5682_PWR_PLL2F_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Vref1", RT5682_PWR_ANLG_1, RT5682_PWR_VREF1_BIT, 0,
+		rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+	SND_SOC_DAPM_SUPPLY("Vref2", RT5682_PWR_ANLG_1, RT5682_PWR_VREF2_BIT, 0,
+		rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+
+	/* ASRC */
+	SND_SOC_DAPM_SUPPLY_S("DAC STO1 ASRC", 1, RT5682_PLL_TRACK_1,
+		RT5682_DAC_STO1_ASRC_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("ADC STO1 ASRC", 1, RT5682_PLL_TRACK_1,
+		RT5682_ADC_STO1_ASRC_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("AD ASRC", 1, RT5682_PLL_TRACK_1,
+		RT5682_AD_ASRC_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("DA ASRC", 1, RT5682_PLL_TRACK_1,
+		RT5682_DA_ASRC_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("DMIC ASRC", 1, RT5682_PLL_TRACK_1,
+		RT5682_DMIC_ASRC_SFT, 0, NULL, 0),
+
+	/* Input Side */
+	SND_SOC_DAPM_SUPPLY("MICBIAS1", RT5682_PWR_ANLG_2, RT5682_PWR_MB1_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("MICBIAS2", RT5682_PWR_ANLG_2, RT5682_PWR_MB2_BIT,
+		0, NULL, 0),
+
+	/* Input Lines */
+	SND_SOC_DAPM_INPUT("DMIC L1"),
+	SND_SOC_DAPM_INPUT("DMIC R1"),
+
+	SND_SOC_DAPM_INPUT("IN1P"),
+
+	SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0,
+		set_dmic_clk, SND_SOC_DAPM_PRE_PMU),
+	SND_SOC_DAPM_SUPPLY("DMIC1 Power", RT5682_DMIC_CTRL_1,
+		RT5682_DMIC_1_EN_SFT, 0, set_dmic_power, SND_SOC_DAPM_POST_PMU),
+
+	/* Boost */
+	SND_SOC_DAPM_PGA("BST1 CBJ", SND_SOC_NOPM,
+		0, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("CBJ Power", RT5682_PWR_ANLG_3,
+		RT5682_PWR_CBJ_BIT, 0, NULL, 0),
+
+	/* REC Mixer */
+	SND_SOC_DAPM_MIXER("RECMIX1L", SND_SOC_NOPM, 0, 0, rt5682_rec1_l_mix,
+		ARRAY_SIZE(rt5682_rec1_l_mix)),
+	SND_SOC_DAPM_SUPPLY("RECMIX1L Power", RT5682_PWR_ANLG_2,
+		RT5682_PWR_RM1_L_BIT, 0, NULL, 0),
+
+	/* ADCs */
+	SND_SOC_DAPM_ADC("ADC1 L", NULL, SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_ADC("ADC1 R", NULL, SND_SOC_NOPM, 0, 0),
+
+	SND_SOC_DAPM_SUPPLY("ADC1 L Power", RT5682_PWR_DIG_1,
+		RT5682_PWR_ADC_L1_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ADC1 R Power", RT5682_PWR_DIG_1,
+		RT5682_PWR_ADC_R1_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ADC1 clock", RT5682_CHOP_ADC,
+		RT5682_CKGEN_ADC1_SFT, 0, NULL, 0),
+
+	/* ADC Mux */
+	SND_SOC_DAPM_MUX("Stereo1 ADC L1 Mux", SND_SOC_NOPM, 0, 0,
+		&rt5682_sto1_adc1l_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC R1 Mux", SND_SOC_NOPM, 0, 0,
+		&rt5682_sto1_adc1r_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC L2 Mux", SND_SOC_NOPM, 0, 0,
+		&rt5682_sto1_adc2l_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC R2 Mux", SND_SOC_NOPM, 0, 0,
+		&rt5682_sto1_adc2r_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC L Mux", SND_SOC_NOPM, 0, 0,
+		&rt5682_sto1_adcl_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC R Mux", SND_SOC_NOPM, 0, 0,
+		&rt5682_sto1_adcr_mux),
+	SND_SOC_DAPM_MUX("IF1_ADC Mux", SND_SOC_NOPM, 0, 0,
+		&rt5682_if1_adc_slot_mux),
+
+	/* ADC Mixer */
+	SND_SOC_DAPM_SUPPLY("ADC Stereo1 Filter", RT5682_PWR_DIG_2,
+		RT5682_PWR_ADC_S1F_BIT, 0, set_filter_clk,
+		SND_SOC_DAPM_PRE_PMU),
+	SND_SOC_DAPM_MIXER("Stereo1 ADC MIXL", RT5682_STO1_ADC_DIG_VOL,
+		RT5682_L_MUTE_SFT, 1, rt5682_sto1_adc_l_mix,
+		ARRAY_SIZE(rt5682_sto1_adc_l_mix)),
+	SND_SOC_DAPM_MIXER("Stereo1 ADC MIXR", RT5682_STO1_ADC_DIG_VOL,
+		RT5682_R_MUTE_SFT, 1, rt5682_sto1_adc_r_mix,
+		ARRAY_SIZE(rt5682_sto1_adc_r_mix)),
+
+	/* ADC PGA */
+	SND_SOC_DAPM_PGA("Stereo1 ADC MIX", SND_SOC_NOPM, 0, 0, NULL, 0),
+
+	/* Digital Interface */
+	SND_SOC_DAPM_SUPPLY("I2S1", RT5682_PWR_DIG_1, RT5682_PWR_I2S1_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("I2S2", RT5682_PWR_DIG_1, RT5682_PWR_I2S2_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_PGA("IF1 DAC1", SND_SOC_NOPM, 0, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("IF1 DAC1 L", SND_SOC_NOPM, 0, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("IF1 DAC1 R", SND_SOC_NOPM, 0, 0, NULL, 0),
+
+	/* Digital Interface Select */
+	SND_SOC_DAPM_MUX("IF1 01 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5682_if1_01_adc_swap_mux),
+	SND_SOC_DAPM_MUX("IF1 23 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5682_if1_23_adc_swap_mux),
+	SND_SOC_DAPM_MUX("IF1 45 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5682_if1_45_adc_swap_mux),
+	SND_SOC_DAPM_MUX("IF1 67 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5682_if1_67_adc_swap_mux),
+	SND_SOC_DAPM_MUX("IF2 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5682_if2_adc_swap_mux),
+
+	SND_SOC_DAPM_MUX("ADCDAT Mux", SND_SOC_NOPM, 0, 0,
+			&rt5682_adcdat_pin_ctrl),
+
+	/* Audio Interface */
+	SND_SOC_DAPM_AIF_OUT("AIF1TX", "AIF1 Capture", 0,
+		RT5682_I2S1_SDP, RT5682_SEL_ADCDAT_SFT, 1),
+	SND_SOC_DAPM_AIF_OUT("AIF2TX", "AIF2 Capture", 0,
+		RT5682_I2S2_SDP, RT5682_I2S2_PIN_CFG_SFT, 1),
+	SND_SOC_DAPM_AIF_IN("AIF1RX", "AIF1 Playback", 0, SND_SOC_NOPM, 0, 0),
+
+	/* Output Side */
+	/* DAC mixer before sound effect  */
+	SND_SOC_DAPM_MIXER("DAC1 MIXL", SND_SOC_NOPM, 0, 0,
+		rt5682_dac_l_mix, ARRAY_SIZE(rt5682_dac_l_mix)),
+	SND_SOC_DAPM_MIXER("DAC1 MIXR", SND_SOC_NOPM, 0, 0,
+		rt5682_dac_r_mix, ARRAY_SIZE(rt5682_dac_r_mix)),
+
+	/* DAC channel Mux */
+	SND_SOC_DAPM_MUX("DAC L1 Source", SND_SOC_NOPM, 0, 0,
+		&rt5682_alg_dac_l1_mux),
+	SND_SOC_DAPM_MUX("DAC R1 Source", SND_SOC_NOPM, 0, 0,
+		&rt5682_alg_dac_r1_mux),
+
+	/* DAC Mixer */
+	SND_SOC_DAPM_SUPPLY("DAC Stereo1 Filter", RT5682_PWR_DIG_2,
+		RT5682_PWR_DAC_S1F_BIT, 0, set_filter_clk,
+		SND_SOC_DAPM_PRE_PMU),
+	SND_SOC_DAPM_MIXER("Stereo1 DAC MIXL", SND_SOC_NOPM, 0, 0,
+		rt5682_sto1_dac_l_mix, ARRAY_SIZE(rt5682_sto1_dac_l_mix)),
+	SND_SOC_DAPM_MIXER("Stereo1 DAC MIXR", SND_SOC_NOPM, 0, 0,
+		rt5682_sto1_dac_r_mix, ARRAY_SIZE(rt5682_sto1_dac_r_mix)),
+
+	/* DACs */
+	SND_SOC_DAPM_DAC("DAC L1", NULL, RT5682_PWR_DIG_1,
+		RT5682_PWR_DAC_L1_BIT, 0),
+	SND_SOC_DAPM_DAC("DAC R1", NULL, RT5682_PWR_DIG_1,
+		RT5682_PWR_DAC_R1_BIT, 0),
+	SND_SOC_DAPM_SUPPLY_S("DAC 1 Clock", 3, RT5682_CHOP_DAC,
+		RT5682_CKGEN_DAC1_SFT, 0, NULL, 0),
+
+	/* HPO */
+	SND_SOC_DAPM_PGA_S("HP Amp", 1, SND_SOC_NOPM, 0, 0, rt5682_hp_event,
+		SND_SOC_DAPM_POST_PMD | SND_SOC_DAPM_PRE_PMU),
+
+	SND_SOC_DAPM_SUPPLY("HP Amp L", RT5682_PWR_ANLG_1,
+		RT5682_PWR_HA_L_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("HP Amp R", RT5682_PWR_ANLG_1,
+		RT5682_PWR_HA_R_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("Charge Pump", 1, RT5682_DEPOP_1,
+		RT5682_PUMP_EN_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("Capless", 2, RT5682_DEPOP_1,
+		RT5682_CAPLESS_EN_SFT, 0, NULL, 0),
+
+	SND_SOC_DAPM_SWITCH("HPOL Playback", SND_SOC_NOPM, 0, 0,
+		&hpol_switch),
+	SND_SOC_DAPM_SWITCH("HPOR Playback", SND_SOC_NOPM, 0, 0,
+		&hpor_switch),
+
+	/* CLK DET */
+	SND_SOC_DAPM_SUPPLY("CLKDET SYS", RT5682_CLK_DET,
+		RT5682_SYS_CLK_DET_SFT,	0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CLKDET PLL1", RT5682_CLK_DET,
+		RT5682_PLL1_CLK_DET_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CLKDET PLL2", RT5682_CLK_DET,
+		RT5682_PLL2_CLK_DET_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CLKDET", RT5682_CLK_DET,
+		RT5682_POW_CLK_DET_SFT, 0, NULL, 0),
+
+	/* Output Lines */
+	SND_SOC_DAPM_OUTPUT("HPOL"),
+	SND_SOC_DAPM_OUTPUT("HPOR"),
+
+};
+
+static const struct snd_soc_dapm_route rt5682_dapm_routes[] = {
+	/*PLL*/
+	{"ADC Stereo1 Filter", NULL, "PLL1", is_sys_clk_from_pll1},
+	{"DAC Stereo1 Filter", NULL, "PLL1", is_sys_clk_from_pll1},
+
+	/*ASRC*/
+	{"ADC Stereo1 Filter", NULL, "ADC STO1 ASRC", is_using_asrc},
+	{"DAC Stereo1 Filter", NULL, "DAC STO1 ASRC", is_using_asrc},
+	{"ADC STO1 ASRC", NULL, "AD ASRC"},
+	{"ADC STO1 ASRC", NULL, "CLKDET"},
+	{"DAC STO1 ASRC", NULL, "DA ASRC"},
+	{"DAC STO1 ASRC", NULL, "CLKDET"},
+
+	/*Vref*/
+	{"MICBIAS1", NULL, "Vref1"},
+	{"MICBIAS1", NULL, "Vref2"},
+	{"MICBIAS2", NULL, "Vref1"},
+	{"MICBIAS2", NULL, "Vref2"},
+
+	{"CLKDET SYS", NULL, "CLKDET"},
+
+	{"IN1P", NULL, "LDO2"},
+
+	{"BST1 CBJ", NULL, "IN1P"},
+	{"BST1 CBJ", NULL, "CBJ Power"},
+	{"CBJ Power", NULL, "Vref2"},
+
+	{"RECMIX1L", "CBJ Switch", "BST1 CBJ"},
+	{"RECMIX1L", NULL, "RECMIX1L Power"},
+
+	{"ADC1 L", NULL, "RECMIX1L"},
+	{"ADC1 L", NULL, "ADC1 L Power"},
+	{"ADC1 L", NULL, "ADC1 clock"},
+
+	{"DMIC L1", NULL, "DMIC CLK"},
+	{"DMIC L1", NULL, "DMIC1 Power"},
+	{"DMIC R1", NULL, "DMIC CLK"},
+	{"DMIC R1", NULL, "DMIC1 Power"},
+	{"DMIC CLK", NULL, "DMIC ASRC"},
+
+	{"Stereo1 ADC L Mux", "ADC1 L", "ADC1 L"},
+	{"Stereo1 ADC L Mux", "ADC1 R", "ADC1 R"},
+	{"Stereo1 ADC R Mux", "ADC1 L", "ADC1 L"},
+	{"Stereo1 ADC R Mux", "ADC1 R", "ADC1 R"},
+
+	{"Stereo1 ADC L1 Mux", "ADC", "Stereo1 ADC L Mux"},
+	{"Stereo1 ADC L1 Mux", "DAC MIX", "Stereo1 DAC MIXL"},
+	{"Stereo1 ADC L2 Mux", "DMIC", "DMIC L1"},
+	{"Stereo1 ADC L2 Mux", "DAC MIX", "Stereo1 DAC MIXL"},
+
+	{"Stereo1 ADC R1 Mux", "ADC", "Stereo1 ADC R Mux"},
+	{"Stereo1 ADC R1 Mux", "DAC MIX", "Stereo1 DAC MIXR"},
+	{"Stereo1 ADC R2 Mux", "DMIC", "DMIC R1"},
+	{"Stereo1 ADC R2 Mux", "DAC MIX", "Stereo1 DAC MIXR"},
+
+	{"Stereo1 ADC MIXL", "ADC1 Switch", "Stereo1 ADC L1 Mux"},
+	{"Stereo1 ADC MIXL", "ADC2 Switch", "Stereo1 ADC L2 Mux"},
+	{"Stereo1 ADC MIXL", NULL, "ADC Stereo1 Filter"},
+
+	{"Stereo1 ADC MIXR", "ADC1 Switch", "Stereo1 ADC R1 Mux"},
+	{"Stereo1 ADC MIXR", "ADC2 Switch", "Stereo1 ADC R2 Mux"},
+	{"Stereo1 ADC MIXR", NULL, "ADC Stereo1 Filter"},
+
+	{"Stereo1 ADC MIX", NULL, "Stereo1 ADC MIXL"},
+	{"Stereo1 ADC MIX", NULL, "Stereo1 ADC MIXR"},
+
+	{"IF1 01 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF1 01 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF1 01 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF1 01 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+	{"IF1 23 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF1 23 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF1 23 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF1 23 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+	{"IF1 45 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF1 45 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF1 45 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF1 45 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+	{"IF1 67 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF1 67 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF1 67 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF1 67 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+
+	{"IF1_ADC Mux", "Slot 0", "IF1 01 ADC Swap Mux"},
+	{"IF1_ADC Mux", "Slot 2", "IF1 23 ADC Swap Mux"},
+	{"IF1_ADC Mux", "Slot 4", "IF1 45 ADC Swap Mux"},
+	{"IF1_ADC Mux", "Slot 6", "IF1 67 ADC Swap Mux"},
+	{"IF1_ADC Mux", NULL, "I2S1"},
+	{"ADCDAT Mux", "ADCDAT1", "IF1_ADC Mux"},
+	{"AIF1TX", NULL, "ADCDAT Mux"},
+	{"IF2 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF2 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF2 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF2 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+	{"ADCDAT Mux", "ADCDAT2", "IF2 ADC Swap Mux"},
+	{"AIF2TX", NULL, "ADCDAT Mux"},
+
+	{"IF1 DAC1 L", NULL, "AIF1RX"},
+	{"IF1 DAC1 L", NULL, "I2S1"},
+	{"IF1 DAC1 L", NULL, "DAC Stereo1 Filter"},
+	{"IF1 DAC1 R", NULL, "AIF1RX"},
+	{"IF1 DAC1 R", NULL, "I2S1"},
+	{"IF1 DAC1 R", NULL, "DAC Stereo1 Filter"},
+
+	{"DAC1 MIXL", "Stereo ADC Switch", "Stereo1 ADC MIXL"},
+	{"DAC1 MIXL", "DAC1 Switch", "IF1 DAC1 L"},
+	{"DAC1 MIXR", "Stereo ADC Switch", "Stereo1 ADC MIXR"},
+	{"DAC1 MIXR", "DAC1 Switch", "IF1 DAC1 R"},
+
+	{"Stereo1 DAC MIXL", "DAC L1 Switch", "DAC1 MIXL"},
+	{"Stereo1 DAC MIXL", "DAC R1 Switch", "DAC1 MIXR"},
+
+	{"Stereo1 DAC MIXR", "DAC R1 Switch", "DAC1 MIXR"},
+	{"Stereo1 DAC MIXR", "DAC L1 Switch", "DAC1 MIXL"},
+
+	{"DAC L1 Source", "DAC1", "DAC1 MIXL"},
+	{"DAC L1 Source", "Stereo1 DAC Mixer", "Stereo1 DAC MIXL"},
+	{"DAC R1 Source", "DAC1", "DAC1 MIXR"},
+	{"DAC R1 Source", "Stereo1 DAC Mixer", "Stereo1 DAC MIXR"},
+
+	{"DAC L1", NULL, "DAC L1 Source"},
+	{"DAC R1", NULL, "DAC R1 Source"},
+
+	{"DAC L1", NULL, "DAC 1 Clock"},
+	{"DAC R1", NULL, "DAC 1 Clock"},
+
+	{"HP Amp", NULL, "DAC L1"},
+	{"HP Amp", NULL, "DAC R1"},
+	{"HP Amp", NULL, "HP Amp L"},
+	{"HP Amp", NULL, "HP Amp R"},
+	{"HP Amp", NULL, "Capless"},
+	{"HP Amp", NULL, "Charge Pump"},
+	{"HP Amp", NULL, "CLKDET SYS"},
+	{"HP Amp", NULL, "CBJ Power"},
+	{"HP Amp", NULL, "Vref2"},
+	{"HPOL Playback", "Switch", "HP Amp"},
+	{"HPOR Playback", "Switch", "HP Amp"},
+	{"HPOL", NULL, "HPOL Playback"},
+	{"HPOR", NULL, "HPOR Playback"},
+};
+
+static int rt5682_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
+			unsigned int rx_mask, int slots, int slot_width)
+{
+	struct snd_soc_component *component = dai->component;
+	unsigned int cl, val = 0;
+
+	if (tx_mask || rx_mask)
+		snd_soc_component_update_bits(component, RT5682_TDM_ADDA_CTRL_2,
+			RT5682_TDM_EN, RT5682_TDM_EN);
+	else
+		snd_soc_component_update_bits(component, RT5682_TDM_ADDA_CTRL_2,
+			RT5682_TDM_EN, 0);
+
+	switch (slots) {
+	case 4:
+		val |= RT5682_TDM_TX_CH_4;
+		val |= RT5682_TDM_RX_CH_4;
+		break;
+	case 6:
+		val |= RT5682_TDM_TX_CH_6;
+		val |= RT5682_TDM_RX_CH_6;
+		break;
+	case 8:
+		val |= RT5682_TDM_TX_CH_8;
+		val |= RT5682_TDM_RX_CH_8;
+		break;
+	case 2:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	snd_soc_component_update_bits(component, RT5682_TDM_CTRL,
+		RT5682_TDM_TX_CH_MASK | RT5682_TDM_RX_CH_MASK, val);
+
+	switch (slot_width) {
+	case 8:
+		if (tx_mask || rx_mask)
+			return -EINVAL;
+		cl = RT5682_I2S1_TX_CHL_8 | RT5682_I2S1_RX_CHL_8;
+		break;
+	case 16:
+		val = RT5682_TDM_CL_16;
+		cl = RT5682_I2S1_TX_CHL_16 | RT5682_I2S1_RX_CHL_16;
+		break;
+	case 20:
+		val = RT5682_TDM_CL_20;
+		cl = RT5682_I2S1_TX_CHL_20 | RT5682_I2S1_RX_CHL_20;
+		break;
+	case 24:
+		val = RT5682_TDM_CL_24;
+		cl = RT5682_I2S1_TX_CHL_24 | RT5682_I2S1_RX_CHL_24;
+		break;
+	case 32:
+		val = RT5682_TDM_CL_32;
+		cl = RT5682_I2S1_TX_CHL_32 | RT5682_I2S1_RX_CHL_32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	snd_soc_component_update_bits(component, RT5682_TDM_TCON_CTRL,
+		RT5682_TDM_CL_MASK, val);
+	snd_soc_component_update_bits(component, RT5682_I2S1_SDP,
+		RT5682_I2S1_TX_CHL_MASK | RT5682_I2S1_RX_CHL_MASK, cl);
+
+	return 0;
+}
+
+
+static int rt5682_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *component = dai->component;
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+	unsigned int len_1 = 0, len_2 = 0;
+	int pre_div, frame_size;
+
+	rt5682->lrck[dai->id] = params_rate(params);
+	pre_div = rl6231_get_clk_info(rt5682->sysclk, rt5682->lrck[dai->id]);
+
+	frame_size = snd_soc_params_to_frame_size(params);
+	if (frame_size < 0) {
+		dev_err(component->dev, "Unsupported frame size: %d\n",
+			frame_size);
+		return -EINVAL;
+	}
+
+	dev_dbg(dai->dev, "lrck is %dHz and pre_div is %d for iis %d\n",
+				rt5682->lrck[dai->id], pre_div, dai->id);
+
+	switch (params_width(params)) {
+	case 16:
+		break;
+	case 20:
+		len_1 |= RT5682_I2S1_DL_20;
+		len_2 |= RT5682_I2S2_DL_20;
+		break;
+	case 24:
+		len_1 |= RT5682_I2S1_DL_24;
+		len_2 |= RT5682_I2S2_DL_24;
+		break;
+	case 32:
+		len_1 |= RT5682_I2S1_DL_32;
+		len_2 |= RT5682_I2S2_DL_24;
+		break;
+	case 8:
+		len_1 |= RT5682_I2S2_DL_8;
+		len_2 |= RT5682_I2S2_DL_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dai->id) {
+	case RT5682_AIF1:
+		snd_soc_component_update_bits(component, RT5682_I2S1_SDP,
+			RT5682_I2S1_DL_MASK, len_1);
+		if (rt5682->master[RT5682_AIF1]) {
+			snd_soc_component_update_bits(component,
+				RT5682_ADDA_CLK_1, RT5682_I2S_M_DIV_MASK,
+				pre_div << RT5682_I2S_M_DIV_SFT);
+		}
+		if (params_channels(params) == 1) /* mono mode */
+			snd_soc_component_update_bits(component,
+				RT5682_I2S1_SDP, RT5682_I2S1_MONO_MASK,
+				RT5682_I2S1_MONO_EN);
+		else
+			snd_soc_component_update_bits(component,
+				RT5682_I2S1_SDP, RT5682_I2S1_MONO_MASK,
+				RT5682_I2S1_MONO_DIS);
+		break;
+	case RT5682_AIF2:
+		snd_soc_component_update_bits(component, RT5682_I2S2_SDP,
+			RT5682_I2S2_DL_MASK, len_2);
+		if (rt5682->master[RT5682_AIF2]) {
+			snd_soc_component_update_bits(component,
+				RT5682_I2S_M_CLK_CTRL_1, RT5682_I2S2_M_PD_MASK,
+				pre_div << RT5682_I2S2_M_PD_SFT);
+		}
+		if (params_channels(params) == 1) /* mono mode */
+			snd_soc_component_update_bits(component,
+				RT5682_I2S2_SDP, RT5682_I2S2_MONO_MASK,
+				RT5682_I2S2_MONO_EN);
+		else
+			snd_soc_component_update_bits(component,
+				RT5682_I2S2_SDP, RT5682_I2S2_MONO_MASK,
+				RT5682_I2S2_MONO_DIS);
+		break;
+	default:
+		dev_err(component->dev, "Invalid dai->id: %d\n", dai->id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rt5682_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+	struct snd_soc_component *component = dai->component;
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+	unsigned int reg_val = 0, tdm_ctrl = 0;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		rt5682->master[dai->id] = 1;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		rt5682->master[dai->id] = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		reg_val |= RT5682_I2S_BP_INV;
+		tdm_ctrl |= RT5682_TDM_S_BP_INV;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		if (dai->id == RT5682_AIF1)
+			tdm_ctrl |= RT5682_TDM_S_LP_INV | RT5682_TDM_M_BP_INV;
+		else
+			return -EINVAL;
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		if (dai->id == RT5682_AIF1)
+			tdm_ctrl |= RT5682_TDM_S_BP_INV | RT5682_TDM_S_LP_INV |
+				    RT5682_TDM_M_BP_INV | RT5682_TDM_M_LP_INV;
+		else
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		reg_val |= RT5682_I2S_DF_LEFT;
+		tdm_ctrl |= RT5682_TDM_DF_LEFT;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		reg_val |= RT5682_I2S_DF_PCM_A;
+		tdm_ctrl |= RT5682_TDM_DF_PCM_A;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		reg_val |= RT5682_I2S_DF_PCM_B;
+		tdm_ctrl |= RT5682_TDM_DF_PCM_B;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dai->id) {
+	case RT5682_AIF1:
+		snd_soc_component_update_bits(component, RT5682_I2S1_SDP,
+			RT5682_I2S_DF_MASK, reg_val);
+		snd_soc_component_update_bits(component, RT5682_TDM_TCON_CTRL,
+			RT5682_TDM_MS_MASK | RT5682_TDM_S_BP_MASK |
+			RT5682_TDM_DF_MASK | RT5682_TDM_M_BP_MASK |
+			RT5682_TDM_M_LP_MASK | RT5682_TDM_S_LP_MASK,
+			tdm_ctrl | rt5682->master[dai->id]);
+		break;
+	case RT5682_AIF2:
+		if (rt5682->master[dai->id] == 0)
+			reg_val |= RT5682_I2S2_MS_S;
+		snd_soc_component_update_bits(component, RT5682_I2S2_SDP,
+			RT5682_I2S2_MS_MASK | RT5682_I2S_BP_MASK |
+			RT5682_I2S_DF_MASK, reg_val);
+		break;
+	default:
+		dev_err(component->dev, "Invalid dai->id: %d\n", dai->id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int rt5682_set_component_sysclk(struct snd_soc_component *component,
+		int clk_id, int source, unsigned int freq, int dir)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+	unsigned int reg_val = 0, src = 0;
+
+	if (freq == rt5682->sysclk && clk_id == rt5682->sysclk_src)
+		return 0;
+
+	switch (clk_id) {
+	case RT5682_SCLK_S_MCLK:
+		reg_val |= RT5682_SCLK_SRC_MCLK;
+		src = RT5682_CLK_SRC_MCLK;
+		break;
+	case RT5682_SCLK_S_PLL1:
+		reg_val |= RT5682_SCLK_SRC_PLL1;
+		src = RT5682_CLK_SRC_PLL1;
+		break;
+	case RT5682_SCLK_S_PLL2:
+		reg_val |= RT5682_SCLK_SRC_PLL2;
+		src = RT5682_CLK_SRC_PLL2;
+		break;
+	case RT5682_SCLK_S_RCCLK:
+		reg_val |= RT5682_SCLK_SRC_RCCLK;
+		src = RT5682_CLK_SRC_RCCLK;
+		break;
+	default:
+		dev_err(component->dev, "Invalid clock id (%d)\n", clk_id);
+		return -EINVAL;
+	}
+	snd_soc_component_update_bits(component, RT5682_GLB_CLK,
+		RT5682_SCLK_SRC_MASK, reg_val);
+
+	if (rt5682->master[RT5682_AIF2]) {
+		snd_soc_component_update_bits(component,
+			RT5682_I2S_M_CLK_CTRL_1, RT5682_I2S2_SRC_MASK,
+			src << RT5682_I2S2_SRC_SFT);
+	}
+
+	rt5682->sysclk = freq;
+	rt5682->sysclk_src = clk_id;
+
+	dev_dbg(component->dev, "Sysclk is %dHz and clock id is %d\n",
+		freq, clk_id);
+
+	return 0;
+}
+
+static int rt5682_set_component_pll(struct snd_soc_component *component,
+		int pll_id, int source, unsigned int freq_in,
+		unsigned int freq_out)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+	struct rl6231_pll_code pll_code;
+	int ret;
+
+	if (source == rt5682->pll_src && freq_in == rt5682->pll_in &&
+	    freq_out == rt5682->pll_out)
+		return 0;
+
+	if (!freq_in || !freq_out) {
+		dev_dbg(component->dev, "PLL disabled\n");
+
+		rt5682->pll_in = 0;
+		rt5682->pll_out = 0;
+		snd_soc_component_update_bits(component, RT5682_GLB_CLK,
+			RT5682_SCLK_SRC_MASK, RT5682_SCLK_SRC_MCLK);
+		return 0;
+	}
+
+	switch (source) {
+	case RT5682_PLL1_S_MCLK:
+		snd_soc_component_update_bits(component, RT5682_GLB_CLK,
+			RT5682_PLL1_SRC_MASK, RT5682_PLL1_SRC_MCLK);
+		break;
+	case RT5682_PLL1_S_BCLK1:
+		snd_soc_component_update_bits(component, RT5682_GLB_CLK,
+				RT5682_PLL1_SRC_MASK, RT5682_PLL1_SRC_BCLK1);
+		break;
+	default:
+		dev_err(component->dev, "Unknown PLL Source %d\n", source);
+		return -EINVAL;
+	}
+
+	ret = rl6231_pll_calc(freq_in, freq_out, &pll_code);
+	if (ret < 0) {
+		dev_err(component->dev, "Unsupport input clock %d\n", freq_in);
+		return ret;
+	}
+
+	dev_dbg(component->dev, "bypass=%d m=%d n=%d k=%d\n",
+		pll_code.m_bp, (pll_code.m_bp ? 0 : pll_code.m_code),
+		pll_code.n_code, pll_code.k_code);
+
+	snd_soc_component_write(component, RT5682_PLL_CTRL_1,
+		pll_code.n_code << RT5682_PLL_N_SFT | pll_code.k_code);
+	snd_soc_component_write(component, RT5682_PLL_CTRL_2,
+		(pll_code.m_bp ? 0 : pll_code.m_code) << RT5682_PLL_M_SFT |
+		pll_code.m_bp << RT5682_PLL_M_BP_SFT | RT5682_PLL_RST);
+
+	rt5682->pll_in = freq_in;
+	rt5682->pll_out = freq_out;
+	rt5682->pll_src = source;
+
+	return 0;
+}
+
+static int rt5682_set_bclk_ratio(struct snd_soc_dai *dai, unsigned int ratio)
+{
+	struct snd_soc_component *component = dai->component;
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+
+	rt5682->bclk[dai->id] = ratio;
+
+	switch (ratio) {
+	case 64:
+		snd_soc_component_update_bits(component, RT5682_ADDA_CLK_2,
+			RT5682_I2S2_BCLK_MS2_MASK,
+			RT5682_I2S2_BCLK_MS2_64);
+		break;
+	case 32:
+		snd_soc_component_update_bits(component, RT5682_ADDA_CLK_2,
+			RT5682_I2S2_BCLK_MS2_MASK,
+			RT5682_I2S2_BCLK_MS2_32);
+		break;
+	default:
+		dev_err(dai->dev, "Invalid bclk ratio %d\n", ratio);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rt5682_set_bias_level(struct snd_soc_component *component,
+			enum snd_soc_bias_level level)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+
+	switch (level) {
+	case SND_SOC_BIAS_PREPARE:
+		regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
+			RT5682_PWR_MB | RT5682_PWR_BG,
+			RT5682_PWR_MB | RT5682_PWR_BG);
+		regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
+			RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO,
+			RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
+			RT5682_PWR_MB, RT5682_PWR_MB);
+		regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
+			RT5682_DIG_GATE_CTRL, RT5682_DIG_GATE_CTRL);
+		break;
+	case SND_SOC_BIAS_OFF:
+		regmap_update_bits(rt5682->regmap, RT5682_PWR_DIG_1,
+			RT5682_DIG_GATE_CTRL | RT5682_PWR_LDO, 0);
+		regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
+			RT5682_PWR_MB | RT5682_PWR_BG, 0);
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int rt5682_probe(struct snd_soc_component *component)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+
+	rt5682->component = component;
+
+	return 0;
+}
+
+static void rt5682_remove(struct snd_soc_component *component)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+
+	rt5682_reset(rt5682->regmap);
+}
+
+#ifdef CONFIG_PM
+static int rt5682_suspend(struct snd_soc_component *component)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+
+	regcache_cache_only(rt5682->regmap, true);
+	regcache_mark_dirty(rt5682->regmap);
+	return 0;
+}
+
+static int rt5682_resume(struct snd_soc_component *component)
+{
+	struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+
+	regcache_cache_only(rt5682->regmap, false);
+	regcache_sync(rt5682->regmap);
+
+	return 0;
+}
+#else
+#define rt5682_suspend NULL
+#define rt5682_resume NULL
+#endif
+
+#define RT5682_STEREO_RATES SNDRV_PCM_RATE_8000_192000
+#define RT5682_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \
+		SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S8)
+
+static const struct snd_soc_dai_ops rt5682_aif1_dai_ops = {
+	.hw_params = rt5682_hw_params,
+	.set_fmt = rt5682_set_dai_fmt,
+	.set_tdm_slot = rt5682_set_tdm_slot,
+};
+
+static const struct snd_soc_dai_ops rt5682_aif2_dai_ops = {
+	.hw_params = rt5682_hw_params,
+	.set_fmt = rt5682_set_dai_fmt,
+	.set_bclk_ratio = rt5682_set_bclk_ratio,
+};
+
+static struct snd_soc_dai_driver rt5682_dai[] = {
+	{
+		.name = "rt5682-aif1",
+		.id = RT5682_AIF1,
+		.playback = {
+			.stream_name = "AIF1 Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = RT5682_STEREO_RATES,
+			.formats = RT5682_FORMATS,
+		},
+		.capture = {
+			.stream_name = "AIF1 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = RT5682_STEREO_RATES,
+			.formats = RT5682_FORMATS,
+		},
+		.ops = &rt5682_aif1_dai_ops,
+	},
+	{
+		.name = "rt5682-aif2",
+		.id = RT5682_AIF2,
+		.capture = {
+			.stream_name = "AIF2 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = RT5682_STEREO_RATES,
+			.formats = RT5682_FORMATS,
+		},
+		.ops = &rt5682_aif2_dai_ops,
+	},
+};
+
+static const struct snd_soc_component_driver soc_component_dev_rt5682 = {
+	.probe = rt5682_probe,
+	.remove = rt5682_remove,
+	.suspend = rt5682_suspend,
+	.resume = rt5682_resume,
+	.set_bias_level = rt5682_set_bias_level,
+	.controls = rt5682_snd_controls,
+	.num_controls = ARRAY_SIZE(rt5682_snd_controls),
+	.dapm_widgets = rt5682_dapm_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(rt5682_dapm_widgets),
+	.dapm_routes = rt5682_dapm_routes,
+	.num_dapm_routes = ARRAY_SIZE(rt5682_dapm_routes),
+	.set_sysclk = rt5682_set_component_sysclk,
+	.set_pll = rt5682_set_component_pll,
+	.set_jack = rt5682_set_jack_detect,
+	.use_pmdown_time	= 1,
+	.endianness		= 1,
+	.non_legacy_dai_naming	= 1,
+};
+
+static const struct regmap_config rt5682_regmap = {
+	.reg_bits = 16,
+	.val_bits = 16,
+	.max_register = RT5682_I2C_MODE,
+	.volatile_reg = rt5682_volatile_register,
+	.readable_reg = rt5682_readable_register,
+	.cache_type = REGCACHE_RBTREE,
+	.reg_defaults = rt5682_reg,
+	.num_reg_defaults = ARRAY_SIZE(rt5682_reg),
+	.use_single_rw = true,
+};
+
+static const struct i2c_device_id rt5682_i2c_id[] = {
+	{"rt5682", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, rt5682_i2c_id);
+
+static int rt5682_parse_dt(struct rt5682_priv *rt5682, struct device *dev)
+{
+
+	device_property_read_u32(dev, "realtek,dmic1-data-pin",
+		&rt5682->pdata.dmic1_data_pin);
+	device_property_read_u32(dev, "realtek,dmic1-clk-pin",
+		&rt5682->pdata.dmic1_clk_pin);
+	device_property_read_u32(dev, "realtek,jd-src",
+		&rt5682->pdata.jd_src);
+
+	rt5682->pdata.ldo1_en = of_get_named_gpio(dev->of_node,
+		"realtek,ldo1-en-gpios", 0);
+
+	return 0;
+}
+
+static void rt5682_calibrate(struct rt5682_priv *rt5682)
+{
+	int value, count;
+
+	mutex_lock(&rt5682->calibrate_mutex);
+
+	rt5682_reset(rt5682->regmap);
+	regmap_write(rt5682->regmap, RT5682_PWR_ANLG_1, 0xa2bf);
+	usleep_range(15000, 20000);
+	regmap_write(rt5682->regmap, RT5682_PWR_ANLG_1, 0xf2bf);
+	regmap_write(rt5682->regmap, RT5682_MICBIAS_2, 0x0380);
+	regmap_write(rt5682->regmap, RT5682_PWR_DIG_1, 0x8001);
+	regmap_write(rt5682->regmap, RT5682_TEST_MODE_CTRL_1, 0x0000);
+	regmap_write(rt5682->regmap, RT5682_STO1_DAC_MIXER, 0x2080);
+	regmap_write(rt5682->regmap, RT5682_STO1_ADC_MIXER, 0x4040);
+	regmap_write(rt5682->regmap, RT5682_DEPOP_1, 0x0069);
+	regmap_write(rt5682->regmap, RT5682_CHOP_DAC, 0x3000);
+	regmap_write(rt5682->regmap, RT5682_HP_CTRL_2, 0x6000);
+	regmap_write(rt5682->regmap, RT5682_HP_CHARGE_PUMP_1, 0x0f26);
+	regmap_write(rt5682->regmap, RT5682_CALIB_ADC_CTRL, 0x7f05);
+	regmap_write(rt5682->regmap, RT5682_STO1_ADC_MIXER, 0x686c);
+	regmap_write(rt5682->regmap, RT5682_CAL_REC, 0x0d0d);
+	regmap_write(rt5682->regmap, RT5682_HP_CALIB_CTRL_9, 0x000f);
+	regmap_write(rt5682->regmap, RT5682_PWR_DIG_1, 0x8d01);
+	regmap_write(rt5682->regmap, RT5682_HP_CALIB_CTRL_2, 0x0321);
+	regmap_write(rt5682->regmap, RT5682_HP_LOGIC_CTRL_2, 0x0004);
+	regmap_write(rt5682->regmap, RT5682_HP_CALIB_CTRL_1, 0x7c00);
+	regmap_write(rt5682->regmap, RT5682_HP_CALIB_CTRL_3, 0x06a1);
+	regmap_write(rt5682->regmap, RT5682_A_DAC1_MUX, 0x0311);
+	regmap_write(rt5682->regmap, RT5682_RESET_HPF_CTRL, 0x0000);
+	regmap_write(rt5682->regmap, RT5682_ADC_STO1_HP_CTRL_1, 0x3320);
+
+	regmap_write(rt5682->regmap, RT5682_HP_CALIB_CTRL_1, 0xfc00);
+
+	for (count = 0; count < 60; count++) {
+		regmap_read(rt5682->regmap, RT5682_HP_CALIB_STA_1, &value);
+		if (!(value & 0x8000))
+			break;
+
+		usleep_range(10000, 10005);
+	}
+
+	if (count >= 60)
+		pr_err("HP Calibration Failure\n");
+
+	/* restore settings */
+	regmap_write(rt5682->regmap, RT5682_STO1_ADC_MIXER, 0xc0c4);
+	regmap_write(rt5682->regmap, RT5682_PWR_DIG_1, 0x0000);
+
+	mutex_unlock(&rt5682->calibrate_mutex);
+
+}
+
+static int rt5682_i2c_probe(struct i2c_client *i2c,
+		    const struct i2c_device_id *id)
+{
+	struct rt5682_platform_data *pdata = dev_get_platdata(&i2c->dev);
+	struct rt5682_priv *rt5682;
+	int i, ret;
+	unsigned int val;
+
+	rt5682 = devm_kzalloc(&i2c->dev, sizeof(struct rt5682_priv),
+		GFP_KERNEL);
+
+	if (rt5682 == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, rt5682);
+
+	if (pdata)
+		rt5682->pdata = *pdata;
+	else
+		rt5682_parse_dt(rt5682, &i2c->dev);
+
+	rt5682->regmap = devm_regmap_init_i2c(i2c, &rt5682_regmap);
+	if (IS_ERR(rt5682->regmap)) {
+		ret = PTR_ERR(rt5682->regmap);
+		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+			ret);
+		return ret;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(rt5682->supplies); i++)
+		rt5682->supplies[i].supply = rt5682_supply_names[i];
+
+	ret = devm_regulator_bulk_get(&i2c->dev, ARRAY_SIZE(rt5682->supplies),
+				      rt5682->supplies);
+	if (ret != 0) {
+		dev_err(&i2c->dev, "Failed to request supplies: %d\n", ret);
+		return ret;
+	}
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(rt5682->supplies),
+				    rt5682->supplies);
+	if (ret != 0) {
+		dev_err(&i2c->dev, "Failed to enable supplies: %d\n", ret);
+		return ret;
+	}
+
+	if (gpio_is_valid(rt5682->pdata.ldo1_en)) {
+		if (devm_gpio_request_one(&i2c->dev, rt5682->pdata.ldo1_en,
+					  GPIOF_OUT_INIT_HIGH, "rt5682"))
+			dev_err(&i2c->dev, "Fail gpio_request gpio_ldo\n");
+	}
+
+	/* Sleep for 300 ms miniumum */
+	usleep_range(300000, 350000);
+
+	regmap_write(rt5682->regmap, RT5682_I2C_MODE, 0x1);
+	usleep_range(10000, 15000);
+
+	regmap_read(rt5682->regmap, RT5682_DEVICE_ID, &val);
+	if (val != DEVICE_ID) {
+		pr_err("Device with ID register %x is not rt5682\n", val);
+		return -ENODEV;
+	}
+
+	rt5682_reset(rt5682->regmap);
+
+	rt5682_calibrate(rt5682);
+
+	ret = regmap_register_patch(rt5682->regmap, patch_list,
+				    ARRAY_SIZE(patch_list));
+	if (ret != 0)
+		dev_warn(&i2c->dev, "Failed to apply regmap patch: %d\n", ret);
+
+	regmap_write(rt5682->regmap, RT5682_DEPOP_1, 0x0000);
+
+	/* DMIC pin*/
+	if (rt5682->pdata.dmic1_data_pin != RT5682_DMIC1_NULL) {
+		switch (rt5682->pdata.dmic1_data_pin) {
+		case RT5682_DMIC1_DATA_GPIO2: /* share with LRCK2 */
+			regmap_update_bits(rt5682->regmap, RT5682_DMIC_CTRL_1,
+				RT5682_DMIC_1_DP_MASK, RT5682_DMIC_1_DP_GPIO2);
+			regmap_update_bits(rt5682->regmap, RT5682_GPIO_CTRL_1,
+				RT5682_GP2_PIN_MASK, RT5682_GP2_PIN_DMIC_SDA);
+			break;
+
+		case RT5682_DMIC1_DATA_GPIO5: /* share with DACDAT1 */
+			regmap_update_bits(rt5682->regmap, RT5682_DMIC_CTRL_1,
+				RT5682_DMIC_1_DP_MASK, RT5682_DMIC_1_DP_GPIO5);
+			regmap_update_bits(rt5682->regmap, RT5682_GPIO_CTRL_1,
+				RT5682_GP5_PIN_MASK, RT5682_GP5_PIN_DMIC_SDA);
+			break;
+
+		default:
+			dev_warn(&i2c->dev, "invalid DMIC_DAT pin\n");
+			break;
+		}
+
+		switch (rt5682->pdata.dmic1_clk_pin) {
+		case RT5682_DMIC1_CLK_GPIO1: /* share with IRQ */
+			regmap_update_bits(rt5682->regmap, RT5682_GPIO_CTRL_1,
+				RT5682_GP1_PIN_MASK, RT5682_GP1_PIN_DMIC_CLK);
+			break;
+
+		case RT5682_DMIC1_CLK_GPIO3: /* share with BCLK2 */
+			regmap_update_bits(rt5682->regmap, RT5682_GPIO_CTRL_1,
+				RT5682_GP3_PIN_MASK, RT5682_GP3_PIN_DMIC_CLK);
+			break;
+
+		default:
+			dev_warn(&i2c->dev, "invalid DMIC_CLK pin\n");
+			break;
+		}
+	}
+
+	regmap_update_bits(rt5682->regmap, RT5682_PWR_ANLG_1,
+			RT5682_LDO1_DVO_MASK | RT5682_HP_DRIVER_MASK,
+			RT5682_LDO1_DVO_12 | RT5682_HP_DRIVER_5X);
+	regmap_write(rt5682->regmap, RT5682_MICBIAS_2, 0x0380);
+	regmap_update_bits(rt5682->regmap, RT5682_GPIO_CTRL_1,
+			RT5682_GP4_PIN_MASK | RT5682_GP5_PIN_MASK,
+			RT5682_GP4_PIN_ADCDAT1 | RT5682_GP5_PIN_DACDAT1);
+	regmap_write(rt5682->regmap, RT5682_TEST_MODE_CTRL_1, 0x0000);
+
+	INIT_DELAYED_WORK(&rt5682->jack_detect_work,
+				rt5682_jack_detect_handler);
+	INIT_DELAYED_WORK(&rt5682->jd_check_work,
+				rt5682_jd_check_handler);
+
+	mutex_init(&rt5682->calibrate_mutex);
+
+	if (i2c->irq) {
+		ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+			rt5682_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
+			| IRQF_ONESHOT, "rt5682", rt5682);
+		if (ret)
+			dev_err(&i2c->dev, "Failed to reguest IRQ: %d\n", ret);
+
+	}
+
+	return snd_soc_register_component(&i2c->dev, &soc_component_dev_rt5682,
+			rt5682_dai, ARRAY_SIZE(rt5682_dai));
+}
+
+static int rt5682_i2c_remove(struct i2c_client *i2c)
+{
+	snd_soc_unregister_component(&i2c->dev);
+
+	return 0;
+}
+
+static void rt5682_i2c_shutdown(struct i2c_client *client)
+{
+	struct rt5682_priv *rt5682 = i2c_get_clientdata(client);
+
+	rt5682_reset(rt5682->regmap);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id rt5682_of_match[] = {
+	{.compatible = "realtek,rt5682i"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, rt5682_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id rt5682_acpi_match[] = {
+	{"10EC5682", 0,},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, rt5682_acpi_match);
+#endif
+
+static struct i2c_driver rt5682_i2c_driver = {
+	.driver = {
+		.name = "rt5682",
+		.of_match_table = of_match_ptr(rt5682_of_match),
+		.acpi_match_table = ACPI_PTR(rt5682_acpi_match),
+	},
+	.probe = rt5682_i2c_probe,
+	.remove = rt5682_i2c_remove,
+	.shutdown = rt5682_i2c_shutdown,
+	.id_table = rt5682_i2c_id,
+};
+module_i2c_driver(rt5682_i2c_driver);
+
+MODULE_DESCRIPTION("ASoC RT5682 driver");
+MODULE_AUTHOR("Bard Liao <bardliao@realtek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h
new file mode 100644
index 000000000000..8068140ebe3f
--- /dev/null
+++ b/sound/soc/codecs/rt5682.h
@@ -0,0 +1,1324 @@
+/*
+ * rt5682.h  --  RT5682/RT5658 ALSA SoC audio driver
+ *
+ * Copyright 2018 Realtek Microelectronics
+ * Author: Bard Liao <bardliao@realtek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __RT5682_H__
+#define __RT5682_H__
+
+#include <sound/rt5682.h>
+
+#define DEVICE_ID 0x6530
+
+/* Info */
+#define RT5682_RESET				0x0000
+#define RT5682_VERSION_ID			0x00fd
+#define RT5682_VENDOR_ID			0x00fe
+#define RT5682_DEVICE_ID			0x00ff
+/*  I/O - Output */
+#define RT5682_HP_CTRL_1			0x0002
+#define RT5682_HP_CTRL_2			0x0003
+#define RT5682_HPL_GAIN				0x0005
+#define RT5682_HPR_GAIN				0x0006
+
+#define RT5682_I2C_CTRL				0x0008
+
+/* I/O - Input */
+#define RT5682_CBJ_BST_CTRL			0x000b
+#define RT5682_CBJ_CTRL_1			0x0010
+#define RT5682_CBJ_CTRL_2			0x0011
+#define RT5682_CBJ_CTRL_3			0x0012
+#define RT5682_CBJ_CTRL_4			0x0013
+#define RT5682_CBJ_CTRL_5			0x0014
+#define RT5682_CBJ_CTRL_6			0x0015
+#define RT5682_CBJ_CTRL_7			0x0016
+/* I/O - ADC/DAC/DMIC */
+#define RT5682_DAC1_DIG_VOL			0x0019
+#define RT5682_STO1_ADC_DIG_VOL			0x001c
+#define RT5682_STO1_ADC_BOOST			0x001f
+#define RT5682_HP_IMP_GAIN_1			0x0022
+#define RT5682_HP_IMP_GAIN_2			0x0023
+/* Mixer - D-D */
+#define RT5682_SIDETONE_CTRL			0x0024
+#define RT5682_STO1_ADC_MIXER			0x0026
+#define RT5682_AD_DA_MIXER			0x0029
+#define RT5682_STO1_DAC_MIXER			0x002a
+#define RT5682_A_DAC1_MUX			0x002b
+#define RT5682_DIG_INF2_DATA			0x0030
+/* Mixer - ADC */
+#define RT5682_REC_MIXER			0x003c
+#define RT5682_CAL_REC				0x0044
+#define RT5682_ALC_BACK_GAIN			0x0049
+/* Power */
+#define RT5682_PWR_DIG_1			0x0061
+#define RT5682_PWR_DIG_2			0x0062
+#define RT5682_PWR_ANLG_1			0x0063
+#define RT5682_PWR_ANLG_2			0x0064
+#define RT5682_PWR_ANLG_3			0x0065
+#define RT5682_PWR_MIXER			0x0066
+#define RT5682_PWR_VOL				0x0067
+/* Clock Detect */
+#define RT5682_CLK_DET				0x006b
+/* Filter Auto Reset */
+#define RT5682_RESET_LPF_CTRL			0x006c
+#define RT5682_RESET_HPF_CTRL			0x006d
+/* DMIC */
+#define RT5682_DMIC_CTRL_1			0x006e
+/* Format - ADC/DAC */
+#define RT5682_I2S1_SDP				0x0070
+#define RT5682_I2S2_SDP				0x0071
+#define RT5682_ADDA_CLK_1			0x0073
+#define RT5682_ADDA_CLK_2			0x0074
+#define RT5682_I2S1_F_DIV_CTRL_1		0x0075
+#define RT5682_I2S1_F_DIV_CTRL_2		0x0076
+/* Format - TDM Control */
+#define RT5682_TDM_CTRL				0x0079
+#define RT5682_TDM_ADDA_CTRL_1			0x007a
+#define RT5682_TDM_ADDA_CTRL_2			0x007b
+#define RT5682_DATA_SEL_CTRL_1			0x007c
+#define RT5682_TDM_TCON_CTRL			0x007e
+/* Function - Analog */
+#define RT5682_GLB_CLK				0x0080
+#define RT5682_PLL_CTRL_1			0x0081
+#define RT5682_PLL_CTRL_2			0x0082
+#define RT5682_PLL_TRACK_1			0x0083
+#define RT5682_PLL_TRACK_2			0x0084
+#define RT5682_PLL_TRACK_3			0x0085
+#define RT5682_PLL_TRACK_4			0x0086
+#define RT5682_PLL_TRACK_5			0x0087
+#define RT5682_PLL_TRACK_6			0x0088
+#define RT5682_PLL_TRACK_11			0x008c
+#define RT5682_SDW_REF_CLK			0x008d
+#define RT5682_DEPOP_1				0x008e
+#define RT5682_DEPOP_2				0x008f
+#define RT5682_HP_CHARGE_PUMP_1			0x0091
+#define RT5682_HP_CHARGE_PUMP_2			0x0092
+#define RT5682_MICBIAS_1			0x0093
+#define RT5682_MICBIAS_2			0x0094
+#define RT5682_PLL_TRACK_12			0x0098
+#define RT5682_PLL_TRACK_14			0x009a
+#define RT5682_PLL2_CTRL_1			0x009b
+#define RT5682_PLL2_CTRL_2			0x009c
+#define RT5682_PLL2_CTRL_3			0x009d
+#define RT5682_PLL2_CTRL_4			0x009e
+#define RT5682_RC_CLK_CTRL			0x009f
+#define RT5682_I2S_M_CLK_CTRL_1			0x00a0
+#define RT5682_I2S2_F_DIV_CTRL_1		0x00a3
+#define RT5682_I2S2_F_DIV_CTRL_2		0x00a4
+/* Function - Digital */
+#define RT5682_EQ_CTRL_1			0x00ae
+#define RT5682_EQ_CTRL_2			0x00af
+#define RT5682_IRQ_CTRL_1			0x00b6
+#define RT5682_IRQ_CTRL_2			0x00b7
+#define RT5682_IRQ_CTRL_3			0x00b8
+#define RT5682_IRQ_CTRL_4			0x00b9
+#define RT5682_INT_ST_1				0x00be
+#define RT5682_GPIO_CTRL_1			0x00c0
+#define RT5682_GPIO_CTRL_2			0x00c1
+#define RT5682_GPIO_CTRL_3			0x00c2
+#define RT5682_HP_AMP_DET_CTRL_1		0x00d0
+#define RT5682_HP_AMP_DET_CTRL_2		0x00d1
+#define RT5682_MID_HP_AMP_DET			0x00d2
+#define RT5682_LOW_HP_AMP_DET			0x00d3
+#define RT5682_DELAY_BUF_CTRL			0x00d4
+#define RT5682_SV_ZCD_1				0x00d9
+#define RT5682_SV_ZCD_2				0x00da
+#define RT5682_IL_CMD_1				0x00db
+#define RT5682_IL_CMD_2				0x00dc
+#define RT5682_IL_CMD_3				0x00dd
+#define RT5682_IL_CMD_4				0x00de
+#define RT5682_IL_CMD_5				0x00df
+#define RT5682_IL_CMD_6				0x00e0
+#define RT5682_4BTN_IL_CMD_1			0x00e2
+#define RT5682_4BTN_IL_CMD_2			0x00e3
+#define RT5682_4BTN_IL_CMD_3			0x00e4
+#define RT5682_4BTN_IL_CMD_4			0x00e5
+#define RT5682_4BTN_IL_CMD_5			0x00e6
+#define RT5682_4BTN_IL_CMD_6			0x00e7
+#define RT5682_4BTN_IL_CMD_7			0x00e8
+
+#define RT5682_ADC_STO1_HP_CTRL_1		0x00ea
+#define RT5682_ADC_STO1_HP_CTRL_2		0x00eb
+#define RT5682_AJD1_CTRL			0x00f0
+#define RT5682_JD1_THD				0x00f1
+#define RT5682_JD2_THD				0x00f2
+#define RT5682_JD_CTRL_1			0x00f6
+/* General Control */
+#define RT5682_DUMMY_1				0x00fa
+#define RT5682_DUMMY_2				0x00fb
+#define RT5682_DUMMY_3				0x00fc
+
+#define RT5682_DAC_ADC_DIG_VOL1			0x0100
+#define RT5682_BIAS_CUR_CTRL_2			0x010b
+#define RT5682_BIAS_CUR_CTRL_3			0x010c
+#define RT5682_BIAS_CUR_CTRL_4			0x010d
+#define RT5682_BIAS_CUR_CTRL_5			0x010e
+#define RT5682_BIAS_CUR_CTRL_6			0x010f
+#define RT5682_BIAS_CUR_CTRL_7			0x0110
+#define RT5682_BIAS_CUR_CTRL_8			0x0111
+#define RT5682_BIAS_CUR_CTRL_9			0x0112
+#define RT5682_BIAS_CUR_CTRL_10			0x0113
+#define RT5682_VREF_REC_OP_FB_CAP_CTRL		0x0117
+#define RT5682_CHARGE_PUMP_1			0x0125
+#define RT5682_DIG_IN_CTRL_1			0x0132
+#define RT5682_PAD_DRIVING_CTRL			0x0136
+#define RT5682_SOFT_RAMP_DEPOP			0x0138
+#define RT5682_CHOP_DAC				0x013a
+#define RT5682_CHOP_ADC				0x013b
+#define RT5682_CALIB_ADC_CTRL			0x013c
+#define RT5682_VOL_TEST				0x013f
+#define RT5682_SPKVDD_DET_STA			0x0142
+#define RT5682_TEST_MODE_CTRL_1			0x0145
+#define RT5682_TEST_MODE_CTRL_2			0x0146
+#define RT5682_TEST_MODE_CTRL_3			0x0147
+#define RT5682_TEST_MODE_CTRL_4			0x0148
+#define RT5682_TEST_MODE_CTRL_5			0x0149
+#define RT5682_PLL1_INTERNAL			0x0150
+#define RT5682_PLL2_INTERNAL			0x0151
+#define RT5682_STO_NG2_CTRL_1			0x0160
+#define RT5682_STO_NG2_CTRL_2			0x0161
+#define RT5682_STO_NG2_CTRL_3			0x0162
+#define RT5682_STO_NG2_CTRL_4			0x0163
+#define RT5682_STO_NG2_CTRL_5			0x0164
+#define RT5682_STO_NG2_CTRL_6			0x0165
+#define RT5682_STO_NG2_CTRL_7			0x0166
+#define RT5682_STO_NG2_CTRL_8			0x0167
+#define RT5682_STO_NG2_CTRL_9			0x0168
+#define RT5682_STO_NG2_CTRL_10			0x0169
+#define RT5682_STO1_DAC_SIL_DET			0x0190
+#define RT5682_SIL_PSV_CTRL1			0x0194
+#define RT5682_SIL_PSV_CTRL2			0x0195
+#define RT5682_SIL_PSV_CTRL3			0x0197
+#define RT5682_SIL_PSV_CTRL4			0x0198
+#define RT5682_SIL_PSV_CTRL5			0x0199
+#define RT5682_HP_IMP_SENS_CTRL_01		0x01af
+#define RT5682_HP_IMP_SENS_CTRL_02		0x01b0
+#define RT5682_HP_IMP_SENS_CTRL_03		0x01b1
+#define RT5682_HP_IMP_SENS_CTRL_04		0x01b2
+#define RT5682_HP_IMP_SENS_CTRL_05		0x01b3
+#define RT5682_HP_IMP_SENS_CTRL_06		0x01b4
+#define RT5682_HP_IMP_SENS_CTRL_07		0x01b5
+#define RT5682_HP_IMP_SENS_CTRL_08		0x01b6
+#define RT5682_HP_IMP_SENS_CTRL_09		0x01b7
+#define RT5682_HP_IMP_SENS_CTRL_10		0x01b8
+#define RT5682_HP_IMP_SENS_CTRL_11		0x01b9
+#define RT5682_HP_IMP_SENS_CTRL_12		0x01ba
+#define RT5682_HP_IMP_SENS_CTRL_13		0x01bb
+#define RT5682_HP_IMP_SENS_CTRL_14		0x01bc
+#define RT5682_HP_IMP_SENS_CTRL_15		0x01bd
+#define RT5682_HP_IMP_SENS_CTRL_16		0x01be
+#define RT5682_HP_IMP_SENS_CTRL_17		0x01bf
+#define RT5682_HP_IMP_SENS_CTRL_18		0x01c0
+#define RT5682_HP_IMP_SENS_CTRL_19		0x01c1
+#define RT5682_HP_IMP_SENS_CTRL_20		0x01c2
+#define RT5682_HP_IMP_SENS_CTRL_21		0x01c3
+#define RT5682_HP_IMP_SENS_CTRL_22		0x01c4
+#define RT5682_HP_IMP_SENS_CTRL_23		0x01c5
+#define RT5682_HP_IMP_SENS_CTRL_24		0x01c6
+#define RT5682_HP_IMP_SENS_CTRL_25		0x01c7
+#define RT5682_HP_IMP_SENS_CTRL_26		0x01c8
+#define RT5682_HP_IMP_SENS_CTRL_27		0x01c9
+#define RT5682_HP_IMP_SENS_CTRL_28		0x01ca
+#define RT5682_HP_IMP_SENS_CTRL_29		0x01cb
+#define RT5682_HP_IMP_SENS_CTRL_30		0x01cc
+#define RT5682_HP_IMP_SENS_CTRL_31		0x01cd
+#define RT5682_HP_IMP_SENS_CTRL_32		0x01ce
+#define RT5682_HP_IMP_SENS_CTRL_33		0x01cf
+#define RT5682_HP_IMP_SENS_CTRL_34		0x01d0
+#define RT5682_HP_IMP_SENS_CTRL_35		0x01d1
+#define RT5682_HP_IMP_SENS_CTRL_36		0x01d2
+#define RT5682_HP_IMP_SENS_CTRL_37		0x01d3
+#define RT5682_HP_IMP_SENS_CTRL_38		0x01d4
+#define RT5682_HP_IMP_SENS_CTRL_39		0x01d5
+#define RT5682_HP_IMP_SENS_CTRL_40		0x01d6
+#define RT5682_HP_IMP_SENS_CTRL_41		0x01d7
+#define RT5682_HP_IMP_SENS_CTRL_42		0x01d8
+#define RT5682_HP_IMP_SENS_CTRL_43		0x01d9
+#define RT5682_HP_LOGIC_CTRL_1			0x01da
+#define RT5682_HP_LOGIC_CTRL_2			0x01db
+#define RT5682_HP_LOGIC_CTRL_3			0x01dc
+#define RT5682_HP_CALIB_CTRL_1			0x01de
+#define RT5682_HP_CALIB_CTRL_2			0x01df
+#define RT5682_HP_CALIB_CTRL_3			0x01e0
+#define RT5682_HP_CALIB_CTRL_4			0x01e1
+#define RT5682_HP_CALIB_CTRL_5			0x01e2
+#define RT5682_HP_CALIB_CTRL_6			0x01e3
+#define RT5682_HP_CALIB_CTRL_7			0x01e4
+#define RT5682_HP_CALIB_CTRL_9			0x01e6
+#define RT5682_HP_CALIB_CTRL_10			0x01e7
+#define RT5682_HP_CALIB_CTRL_11			0x01e8
+#define RT5682_HP_CALIB_STA_1			0x01ea
+#define RT5682_HP_CALIB_STA_2			0x01eb
+#define RT5682_HP_CALIB_STA_3			0x01ec
+#define RT5682_HP_CALIB_STA_4			0x01ed
+#define RT5682_HP_CALIB_STA_5			0x01ee
+#define RT5682_HP_CALIB_STA_6			0x01ef
+#define RT5682_HP_CALIB_STA_7			0x01f0
+#define RT5682_HP_CALIB_STA_8			0x01f1
+#define RT5682_HP_CALIB_STA_9			0x01f2
+#define RT5682_HP_CALIB_STA_10			0x01f3
+#define RT5682_HP_CALIB_STA_11			0x01f4
+#define RT5682_SAR_IL_CMD_1			0x0210
+#define RT5682_SAR_IL_CMD_2			0x0211
+#define RT5682_SAR_IL_CMD_3			0x0212
+#define RT5682_SAR_IL_CMD_4			0x0213
+#define RT5682_SAR_IL_CMD_5			0x0214
+#define RT5682_SAR_IL_CMD_6			0x0215
+#define RT5682_SAR_IL_CMD_7			0x0216
+#define RT5682_SAR_IL_CMD_8			0x0217
+#define RT5682_SAR_IL_CMD_9			0x0218
+#define RT5682_SAR_IL_CMD_10			0x0219
+#define RT5682_SAR_IL_CMD_11			0x021a
+#define RT5682_SAR_IL_CMD_12			0x021b
+#define RT5682_SAR_IL_CMD_13			0x021c
+#define RT5682_EFUSE_CTRL_1			0x0250
+#define RT5682_EFUSE_CTRL_2			0x0251
+#define RT5682_EFUSE_CTRL_3			0x0252
+#define RT5682_EFUSE_CTRL_4			0x0253
+#define RT5682_EFUSE_CTRL_5			0x0254
+#define RT5682_EFUSE_CTRL_6			0x0255
+#define RT5682_EFUSE_CTRL_7			0x0256
+#define RT5682_EFUSE_CTRL_8			0x0257
+#define RT5682_EFUSE_CTRL_9			0x0258
+#define RT5682_EFUSE_CTRL_10			0x0259
+#define RT5682_EFUSE_CTRL_11			0x025a
+#define RT5682_JD_TOP_VC_VTRL			0x0270
+#define RT5682_DRC1_CTRL_0			0x02ff
+#define RT5682_DRC1_CTRL_1			0x0300
+#define RT5682_DRC1_CTRL_2			0x0301
+#define RT5682_DRC1_CTRL_3			0x0302
+#define RT5682_DRC1_CTRL_4			0x0303
+#define RT5682_DRC1_CTRL_5			0x0304
+#define RT5682_DRC1_CTRL_6			0x0305
+#define RT5682_DRC1_HARD_LMT_CTRL_1		0x0306
+#define RT5682_DRC1_HARD_LMT_CTRL_2		0x0307
+#define RT5682_DRC1_PRIV_1			0x0310
+#define RT5682_DRC1_PRIV_2			0x0311
+#define RT5682_DRC1_PRIV_3			0x0312
+#define RT5682_DRC1_PRIV_4			0x0313
+#define RT5682_DRC1_PRIV_5			0x0314
+#define RT5682_DRC1_PRIV_6			0x0315
+#define RT5682_DRC1_PRIV_7			0x0316
+#define RT5682_DRC1_PRIV_8			0x0317
+#define RT5682_EQ_AUTO_RCV_CTRL1		0x03c0
+#define RT5682_EQ_AUTO_RCV_CTRL2		0x03c1
+#define RT5682_EQ_AUTO_RCV_CTRL3		0x03c2
+#define RT5682_EQ_AUTO_RCV_CTRL4		0x03c3
+#define RT5682_EQ_AUTO_RCV_CTRL5		0x03c4
+#define RT5682_EQ_AUTO_RCV_CTRL6		0x03c5
+#define RT5682_EQ_AUTO_RCV_CTRL7		0x03c6
+#define RT5682_EQ_AUTO_RCV_CTRL8		0x03c7
+#define RT5682_EQ_AUTO_RCV_CTRL9		0x03c8
+#define RT5682_EQ_AUTO_RCV_CTRL10		0x03c9
+#define RT5682_EQ_AUTO_RCV_CTRL11		0x03ca
+#define RT5682_EQ_AUTO_RCV_CTRL12		0x03cb
+#define RT5682_EQ_AUTO_RCV_CTRL13		0x03cc
+#define RT5682_ADC_L_EQ_LPF1_A1			0x03d0
+#define RT5682_R_EQ_LPF1_A1			0x03d1
+#define RT5682_L_EQ_LPF1_H0			0x03d2
+#define RT5682_R_EQ_LPF1_H0			0x03d3
+#define RT5682_L_EQ_BPF1_A1			0x03d4
+#define RT5682_R_EQ_BPF1_A1			0x03d5
+#define RT5682_L_EQ_BPF1_A2			0x03d6
+#define RT5682_R_EQ_BPF1_A2			0x03d7
+#define RT5682_L_EQ_BPF1_H0			0x03d8
+#define RT5682_R_EQ_BPF1_H0			0x03d9
+#define RT5682_L_EQ_BPF2_A1			0x03da
+#define RT5682_R_EQ_BPF2_A1			0x03db
+#define RT5682_L_EQ_BPF2_A2			0x03dc
+#define RT5682_R_EQ_BPF2_A2			0x03dd
+#define RT5682_L_EQ_BPF2_H0			0x03de
+#define RT5682_R_EQ_BPF2_H0			0x03df
+#define RT5682_L_EQ_BPF3_A1			0x03e0
+#define RT5682_R_EQ_BPF3_A1			0x03e1
+#define RT5682_L_EQ_BPF3_A2			0x03e2
+#define RT5682_R_EQ_BPF3_A2			0x03e3
+#define RT5682_L_EQ_BPF3_H0			0x03e4
+#define RT5682_R_EQ_BPF3_H0			0x03e5
+#define RT5682_L_EQ_BPF4_A1			0x03e6
+#define RT5682_R_EQ_BPF4_A1			0x03e7
+#define RT5682_L_EQ_BPF4_A2			0x03e8
+#define RT5682_R_EQ_BPF4_A2			0x03e9
+#define RT5682_L_EQ_BPF4_H0			0x03ea
+#define RT5682_R_EQ_BPF4_H0			0x03eb
+#define RT5682_L_EQ_HPF1_A1			0x03ec
+#define RT5682_R_EQ_HPF1_A1			0x03ed
+#define RT5682_L_EQ_HPF1_H0			0x03ee
+#define RT5682_R_EQ_HPF1_H0			0x03ef
+#define RT5682_L_EQ_PRE_VOL			0x03f0
+#define RT5682_R_EQ_PRE_VOL			0x03f1
+#define RT5682_L_EQ_POST_VOL			0x03f2
+#define RT5682_R_EQ_POST_VOL			0x03f3
+#define RT5682_I2C_MODE				0xffff
+
+
+/* global definition */
+#define RT5682_L_MUTE				(0x1 << 15)
+#define RT5682_L_MUTE_SFT			15
+#define RT5682_VOL_L_MUTE			(0x1 << 14)
+#define RT5682_VOL_L_SFT			14
+#define RT5682_R_MUTE				(0x1 << 7)
+#define RT5682_R_MUTE_SFT			7
+#define RT5682_VOL_R_MUTE			(0x1 << 6)
+#define RT5682_VOL_R_SFT			6
+#define RT5682_L_VOL_MASK			(0x3f << 8)
+#define RT5682_L_VOL_SFT			8
+#define RT5682_R_VOL_MASK			(0x3f)
+#define RT5682_R_VOL_SFT			0
+
+/*Headphone Amp L/R Analog Gain and Digital NG2 Gain Control (0x0005 0x0006)*/
+#define RT5682_G_HP				(0xf << 8)
+#define RT5682_G_HP_SFT				8
+#define RT5682_G_STO_DA_DMIX			(0xf)
+#define RT5682_G_STO_DA_SFT			0
+
+/* CBJ Control (0x000b) */
+#define RT5682_BST_CBJ_MASK			(0xf << 8)
+#define RT5682_BST_CBJ_SFT			8
+
+/* Embeeded Jack and Type Detection Control 1 (0x0010) */
+#define RT5682_EMB_JD_EN			(0x1 << 15)
+#define RT5682_EMB_JD_EN_SFT			15
+#define RT5682_EMB_JD_RST			(0x1 << 14)
+#define RT5682_JD_MODE				(0x1 << 13)
+#define RT5682_JD_MODE_SFT			13
+#define RT5682_DET_TYPE				(0x1 << 12)
+#define RT5682_DET_TYPE_SFT			12
+#define RT5682_POLA_EXT_JD_MASK			(0x1 << 11)
+#define RT5682_POLA_EXT_JD_LOW			(0x1 << 11)
+#define RT5682_POLA_EXT_JD_HIGH			(0x0 << 11)
+#define RT5682_EXT_JD_DIG			(0x1 << 9)
+#define RT5682_POL_FAST_OFF_MASK		(0x1 << 8)
+#define RT5682_POL_FAST_OFF_HIGH		(0x1 << 8)
+#define RT5682_POL_FAST_OFF_LOW			(0x0 << 8)
+#define RT5682_FAST_OFF_MASK			(0x1 << 7)
+#define RT5682_FAST_OFF_EN			(0x1 << 7)
+#define RT5682_FAST_OFF_DIS			(0x0 << 7)
+#define RT5682_VREF_POW_MASK			(0x1 << 6)
+#define RT5682_VREF_POW_FSM			(0x0 << 6)
+#define RT5682_VREF_POW_REG			(0x1 << 6)
+#define RT5682_MB1_PATH_MASK			(0x1 << 5)
+#define RT5682_CTRL_MB1_REG			(0x1 << 5)
+#define RT5682_CTRL_MB1_FSM			(0x0 << 5)
+#define RT5682_MB2_PATH_MASK			(0x1 << 4)
+#define RT5682_CTRL_MB2_REG			(0x1 << 4)
+#define RT5682_CTRL_MB2_FSM			(0x0 << 4)
+#define RT5682_TRIG_JD_MASK			(0x1 << 3)
+#define RT5682_TRIG_JD_HIGH			(0x1 << 3)
+#define RT5682_TRIG_JD_LOW			(0x0 << 3)
+#define RT5682_MIC_CAP_MASK			(0x1 << 1)
+#define RT5682_MIC_CAP_HS			(0x1 << 1)
+#define RT5682_MIC_CAP_HP			(0x0 << 1)
+#define RT5682_MIC_CAP_SRC_MASK			(0x1)
+#define RT5682_MIC_CAP_SRC_REG			(0x1)
+#define RT5682_MIC_CAP_SRC_ANA			(0x0)
+
+/* Embeeded Jack and Type Detection Control 2 (0x0011) */
+#define RT5682_EXT_JD_SRC			(0x7 << 4)
+#define RT5682_EXT_JD_SRC_SFT			4
+#define RT5682_EXT_JD_SRC_GPIO_JD1		(0x0 << 4)
+#define RT5682_EXT_JD_SRC_GPIO_JD2		(0x1 << 4)
+#define RT5682_EXT_JD_SRC_JDH			(0x2 << 4)
+#define RT5682_EXT_JD_SRC_JDL			(0x3 << 4)
+#define RT5682_EXT_JD_SRC_MANUAL		(0x4 << 4)
+#define RT5682_JACK_TYPE_MASK			(0x3)
+
+/* Combo Jack and Type Detection Control 3 (0x0012) */
+#define RT5682_CBJ_IN_BUF_EN			(0x1 << 7)
+
+/* Combo Jack and Type Detection Control 4 (0x0013) */
+#define RT5682_SEL_SHT_MID_TON_MASK		(0x3 << 12)
+#define RT5682_SEL_SHT_MID_TON_2		(0x0 << 12)
+#define RT5682_SEL_SHT_MID_TON_3		(0x1 << 12)
+#define RT5682_CBJ_JD_TEST_MASK			(0x1 << 6)
+#define RT5682_CBJ_JD_TEST_NORM			(0x0 << 6)
+#define RT5682_CBJ_JD_TEST_MODE			(0x1 << 6)
+
+/* DAC1 Digital Volume (0x0019) */
+#define RT5682_DAC_L1_VOL_MASK			(0xff << 8)
+#define RT5682_DAC_L1_VOL_SFT			8
+#define RT5682_DAC_R1_VOL_MASK			(0xff)
+#define RT5682_DAC_R1_VOL_SFT			0
+
+/* ADC Digital Volume Control (0x001c) */
+#define RT5682_ADC_L_VOL_MASK			(0x7f << 8)
+#define RT5682_ADC_L_VOL_SFT			8
+#define RT5682_ADC_R_VOL_MASK			(0x7f)
+#define RT5682_ADC_R_VOL_SFT			0
+
+/* Stereo1 ADC Boost Gain Control (0x001f) */
+#define RT5682_STO1_ADC_L_BST_MASK		(0x3 << 14)
+#define RT5682_STO1_ADC_L_BST_SFT		14
+#define RT5682_STO1_ADC_R_BST_MASK		(0x3 << 12)
+#define RT5682_STO1_ADC_R_BST_SFT		12
+
+/* Sidetone Control (0x0024) */
+#define RT5682_ST_SRC_SEL			(0x1 << 8)
+#define RT5682_ST_SRC_SFT			8
+#define RT5682_ST_EN_MASK			(0x1 << 6)
+#define RT5682_ST_DIS				(0x0 << 6)
+#define RT5682_ST_EN				(0x1 << 6)
+#define RT5682_ST_EN_SFT			6
+
+/* Stereo1 ADC Mixer Control (0x0026) */
+#define RT5682_M_STO1_ADC_L1			(0x1 << 15)
+#define RT5682_M_STO1_ADC_L1_SFT		15
+#define RT5682_M_STO1_ADC_L2			(0x1 << 14)
+#define RT5682_M_STO1_ADC_L2_SFT		14
+#define RT5682_STO1_ADC1L_SRC_MASK		(0x1 << 13)
+#define RT5682_STO1_ADC1L_SRC_SFT		13
+#define RT5682_STO1_ADC1_SRC_ADC		(0x1 << 13)
+#define RT5682_STO1_ADC1_SRC_DACMIX		(0x0 << 13)
+#define RT5682_STO1_ADC2L_SRC_MASK		(0x1 << 12)
+#define RT5682_STO1_ADC2L_SRC_SFT		12
+#define RT5682_STO1_ADCL_SRC_MASK		(0x3 << 10)
+#define RT5682_STO1_ADCL_SRC_SFT		10
+#define RT5682_STO1_DD_L_SRC_MASK		(0x1 << 9)
+#define RT5682_STO1_DD_L_SRC_SFT		9
+#define RT5682_STO1_DMIC_SRC_MASK		(0x1 << 8)
+#define RT5682_STO1_DMIC_SRC_SFT		8
+#define RT5682_STO1_DMIC_SRC_DMIC2		(0x1 << 8)
+#define RT5682_STO1_DMIC_SRC_DMIC1		(0x0 << 8)
+#define RT5682_M_STO1_ADC_R1			(0x1 << 7)
+#define RT5682_M_STO1_ADC_R1_SFT		7
+#define RT5682_M_STO1_ADC_R2			(0x1 << 6)
+#define RT5682_M_STO1_ADC_R2_SFT		6
+#define RT5682_STO1_ADC1R_SRC_MASK		(0x1 << 5)
+#define RT5682_STO1_ADC1R_SRC_SFT		5
+#define RT5682_STO1_ADC2R_SRC_MASK		(0x1 << 4)
+#define RT5682_STO1_ADC2R_SRC_SFT		4
+#define RT5682_STO1_ADCR_SRC_MASK		(0x3 << 2)
+#define RT5682_STO1_ADCR_SRC_SFT		2
+
+/* ADC Mixer to DAC Mixer Control (0x0029) */
+#define RT5682_M_ADCMIX_L			(0x1 << 15)
+#define RT5682_M_ADCMIX_L_SFT			15
+#define RT5682_M_DAC1_L				(0x1 << 14)
+#define RT5682_M_DAC1_L_SFT			14
+#define RT5682_DAC1_R_SEL_MASK			(0x1 << 10)
+#define RT5682_DAC1_R_SEL_SFT			10
+#define RT5682_DAC1_L_SEL_MASK			(0x1 << 8)
+#define RT5682_DAC1_L_SEL_SFT			8
+#define RT5682_M_ADCMIX_R			(0x1 << 7)
+#define RT5682_M_ADCMIX_R_SFT			7
+#define RT5682_M_DAC1_R				(0x1 << 6)
+#define RT5682_M_DAC1_R_SFT			6
+
+/* Stereo1 DAC Mixer Control (0x002a) */
+#define RT5682_M_DAC_L1_STO_L			(0x1 << 15)
+#define RT5682_M_DAC_L1_STO_L_SFT		15
+#define RT5682_G_DAC_L1_STO_L_MASK		(0x1 << 14)
+#define RT5682_G_DAC_L1_STO_L_SFT		14
+#define RT5682_M_DAC_R1_STO_L			(0x1 << 13)
+#define RT5682_M_DAC_R1_STO_L_SFT		13
+#define RT5682_G_DAC_R1_STO_L_MASK		(0x1 << 12)
+#define RT5682_G_DAC_R1_STO_L_SFT		12
+#define RT5682_M_DAC_L1_STO_R			(0x1 << 7)
+#define RT5682_M_DAC_L1_STO_R_SFT		7
+#define RT5682_G_DAC_L1_STO_R_MASK		(0x1 << 6)
+#define RT5682_G_DAC_L1_STO_R_SFT		6
+#define RT5682_M_DAC_R1_STO_R			(0x1 << 5)
+#define RT5682_M_DAC_R1_STO_R_SFT		5
+#define RT5682_G_DAC_R1_STO_R_MASK		(0x1 << 4)
+#define RT5682_G_DAC_R1_STO_R_SFT		4
+
+/* Analog DAC1 Input Source Control (0x002b) */
+#define RT5682_M_ST_STO_L			(0x1 << 9)
+#define RT5682_M_ST_STO_L_SFT			9
+#define RT5682_M_ST_STO_R			(0x1 << 8)
+#define RT5682_M_ST_STO_R_SFT			8
+#define RT5682_DAC_L1_SRC_MASK			(0x3 << 4)
+#define RT5682_A_DACL1_SFT			4
+#define RT5682_DAC_R1_SRC_MASK			(0x3)
+#define RT5682_A_DACR1_SFT			0
+
+/* Digital Interface Data Control (0x0030) */
+#define RT5682_IF2_ADC_SEL_MASK			(0x3 << 0)
+#define RT5682_IF2_ADC_SEL_SFT			0
+
+/* REC Left Mixer Control 2 (0x003c) */
+#define RT5682_G_CBJ_RM1_L			(0x7 << 10)
+#define RT5682_G_CBJ_RM1_L_SFT			10
+#define RT5682_M_CBJ_RM1_L			(0x1 << 7)
+#define RT5682_M_CBJ_RM1_L_SFT			7
+
+/* Power Management for Digital 1 (0x0061) */
+#define RT5682_PWR_I2S1				(0x1 << 15)
+#define RT5682_PWR_I2S1_BIT			15
+#define RT5682_PWR_I2S2				(0x1 << 14)
+#define RT5682_PWR_I2S2_BIT			14
+#define RT5682_PWR_DAC_L1			(0x1 << 11)
+#define RT5682_PWR_DAC_L1_BIT			11
+#define RT5682_PWR_DAC_R1			(0x1 << 10)
+#define RT5682_PWR_DAC_R1_BIT			10
+#define RT5682_PWR_LDO				(0x1 << 8)
+#define RT5682_PWR_LDO_BIT			8
+#define RT5682_PWR_ADC_L1			(0x1 << 4)
+#define RT5682_PWR_ADC_L1_BIT			4
+#define RT5682_PWR_ADC_R1			(0x1 << 3)
+#define RT5682_PWR_ADC_R1_BIT			3
+#define RT5682_DIG_GATE_CTRL			(0x1 << 0)
+#define RT5682_DIG_GATE_CTRL_SFT		0
+
+
+/* Power Management for Digital 2 (0x0062) */
+#define RT5682_PWR_ADC_S1F			(0x1 << 15)
+#define RT5682_PWR_ADC_S1F_BIT			15
+#define RT5682_PWR_DAC_S1F			(0x1 << 10)
+#define RT5682_PWR_DAC_S1F_BIT			10
+
+/* Power Management for Analog 1 (0x0063) */
+#define RT5682_PWR_VREF1			(0x1 << 15)
+#define RT5682_PWR_VREF1_BIT			15
+#define RT5682_PWR_FV1				(0x1 << 14)
+#define RT5682_PWR_FV1_BIT			14
+#define RT5682_PWR_VREF2			(0x1 << 13)
+#define RT5682_PWR_VREF2_BIT			13
+#define RT5682_PWR_FV2				(0x1 << 12)
+#define RT5682_PWR_FV2_BIT			12
+#define RT5682_LDO1_DBG_MASK			(0x3 << 10)
+#define RT5682_PWR_MB				(0x1 << 9)
+#define RT5682_PWR_MB_BIT			9
+#define RT5682_PWR_BG				(0x1 << 7)
+#define RT5682_PWR_BG_BIT			7
+#define RT5682_LDO1_BYPASS_MASK			(0x1 << 6)
+#define RT5682_LDO1_BYPASS			(0x1 << 6)
+#define RT5682_LDO1_NOT_BYPASS			(0x0 << 6)
+#define RT5682_PWR_MA_BIT			6
+#define RT5682_LDO1_DVO_MASK			(0x3 << 4)
+#define RT5682_LDO1_DVO_09			(0x0 << 4)
+#define RT5682_LDO1_DVO_10			(0x1 << 4)
+#define RT5682_LDO1_DVO_12			(0x2 << 4)
+#define RT5682_LDO1_DVO_14			(0x3 << 4)
+#define RT5682_HP_DRIVER_MASK			(0x3 << 2)
+#define RT5682_HP_DRIVER_1X			(0x0 << 2)
+#define RT5682_HP_DRIVER_3X			(0x1 << 2)
+#define RT5682_HP_DRIVER_5X			(0x3 << 2)
+#define RT5682_PWR_HA_L				(0x1 << 1)
+#define RT5682_PWR_HA_L_BIT			1
+#define RT5682_PWR_HA_R				(0x1 << 0)
+#define RT5682_PWR_HA_R_BIT			0
+
+/* Power Management for Analog 2 (0x0064) */
+#define RT5682_PWR_MB1				(0x1 << 11)
+#define RT5682_PWR_MB1_PWR_DOWN			(0x0 << 11)
+#define RT5682_PWR_MB1_BIT			11
+#define RT5682_PWR_MB2				(0x1 << 10)
+#define RT5682_PWR_MB2_PWR_DOWN			(0x0 << 10)
+#define RT5682_PWR_MB2_BIT			10
+#define RT5682_PWR_JDH				(0x1 << 3)
+#define RT5682_PWR_JDH_BIT			3
+#define RT5682_PWR_JDL				(0x1 << 2)
+#define RT5682_PWR_JDL_BIT			2
+#define RT5682_PWR_RM1_L			(0x1 << 1)
+#define RT5682_PWR_RM1_L_BIT			1
+
+/* Power Management for Analog 3 (0x0065) */
+#define RT5682_PWR_CBJ				(0x1 << 9)
+#define RT5682_PWR_CBJ_BIT			9
+#define RT5682_PWR_PLL				(0x1 << 6)
+#define RT5682_PWR_PLL_BIT			6
+#define RT5682_PWR_PLL2B			(0x1 << 5)
+#define RT5682_PWR_PLL2B_BIT			5
+#define RT5682_PWR_PLL2F			(0x1 << 4)
+#define RT5682_PWR_PLL2F_BIT			4
+#define RT5682_PWR_LDO2				(0x1 << 2)
+#define RT5682_PWR_LDO2_BIT			2
+#define RT5682_PWR_DET_SPKVDD			(0x1 << 1)
+#define RT5682_PWR_DET_SPKVDD_BIT		1
+
+/* Power Management for Mixer (0x0066) */
+#define RT5682_PWR_STO1_DAC_L			(0x1 << 5)
+#define RT5682_PWR_STO1_DAC_L_BIT		5
+#define RT5682_PWR_STO1_DAC_R			(0x1 << 4)
+#define RT5682_PWR_STO1_DAC_R_BIT		4
+
+/* MCLK and System Clock Detection Control (0x006b) */
+#define RT5682_SYS_CLK_DET			(0x1 << 15)
+#define RT5682_SYS_CLK_DET_SFT			15
+#define RT5682_PLL1_CLK_DET			(0x1 << 14)
+#define RT5682_PLL1_CLK_DET_SFT			14
+#define RT5682_PLL2_CLK_DET			(0x1 << 13)
+#define RT5682_PLL2_CLK_DET_SFT			13
+#define RT5682_POW_CLK_DET2_SFT			8
+#define RT5682_POW_CLK_DET_SFT			0
+
+/* Digital Microphone Control 1 (0x006e) */
+#define RT5682_DMIC_1_EN_MASK			(0x1 << 15)
+#define RT5682_DMIC_1_EN_SFT			15
+#define RT5682_DMIC_1_DIS			(0x0 << 15)
+#define RT5682_DMIC_1_EN			(0x1 << 15)
+#define RT5682_DMIC_1_DP_MASK			(0x3 << 4)
+#define RT5682_DMIC_1_DP_SFT			4
+#define RT5682_DMIC_1_DP_GPIO2			(0x0 << 4)
+#define RT5682_DMIC_1_DP_GPIO5			(0x1 << 4)
+#define RT5682_DMIC_CLK_MASK			(0xf << 0)
+#define RT5682_DMIC_CLK_SFT			0
+
+/* I2S1 Audio Serial Data Port Control (0x0070) */
+#define RT5682_SEL_ADCDAT_MASK			(0x1 << 15)
+#define RT5682_SEL_ADCDAT_OUT			(0x0 << 15)
+#define RT5682_SEL_ADCDAT_IN			(0x1 << 15)
+#define RT5682_SEL_ADCDAT_SFT			15
+#define RT5682_I2S1_TX_CHL_MASK			(0x7 << 12)
+#define RT5682_I2S1_TX_CHL_SFT			12
+#define RT5682_I2S1_TX_CHL_16			(0x0 << 12)
+#define RT5682_I2S1_TX_CHL_20			(0x1 << 12)
+#define RT5682_I2S1_TX_CHL_24			(0x2 << 12)
+#define RT5682_I2S1_TX_CHL_32			(0x3 << 12)
+#define RT5682_I2S1_TX_CHL_8			(0x4 << 12)
+#define RT5682_I2S1_RX_CHL_MASK			(0x7 << 8)
+#define RT5682_I2S1_RX_CHL_SFT			8
+#define RT5682_I2S1_RX_CHL_16			(0x0 << 8)
+#define RT5682_I2S1_RX_CHL_20			(0x1 << 8)
+#define RT5682_I2S1_RX_CHL_24			(0x2 << 8)
+#define RT5682_I2S1_RX_CHL_32			(0x3 << 8)
+#define RT5682_I2S1_RX_CHL_8			(0x4 << 8)
+#define RT5682_I2S1_MONO_MASK			(0x1 << 7)
+#define RT5682_I2S1_MONO_EN			(0x1 << 7)
+#define RT5682_I2S1_MONO_DIS			(0x0 << 7)
+#define RT5682_I2S2_MONO_MASK			(0x1 << 6)
+#define RT5682_I2S2_MONO_EN			(0x1 << 6)
+#define RT5682_I2S2_MONO_DIS			(0x0 << 6)
+#define RT5682_I2S1_DL_MASK			(0x7 << 4)
+#define RT5682_I2S1_DL_SFT			4
+#define RT5682_I2S1_DL_16			(0x0 << 4)
+#define RT5682_I2S1_DL_20			(0x1 << 4)
+#define RT5682_I2S1_DL_24			(0x2 << 4)
+#define RT5682_I2S1_DL_32			(0x3 << 4)
+#define RT5682_I2S1_DL_8			(0x4 << 4)
+
+/* I2S1/2 Audio Serial Data Port Control (0x0070)(0x0071) */
+#define RT5682_I2S2_MS_MASK			(0x1 << 15)
+#define RT5682_I2S2_MS_SFT			15
+#define RT5682_I2S2_MS_M			(0x0 << 15)
+#define RT5682_I2S2_MS_S			(0x1 << 15)
+#define RT5682_I2S2_PIN_CFG_MASK		(0x1 << 14)
+#define RT5682_I2S2_PIN_CFG_SFT			14
+#define RT5682_I2S2_CLK_SEL_MASK		(0x1 << 11)
+#define RT5682_I2S2_CLK_SEL_SFT			11
+#define RT5682_I2S2_OUT_MASK			(0x1 << 9)
+#define RT5682_I2S2_OUT_SFT			9
+#define RT5682_I2S2_OUT_UM			(0x0 << 9)
+#define RT5682_I2S2_OUT_M			(0x1 << 9)
+#define RT5682_I2S_BP_MASK			(0x1 << 8)
+#define RT5682_I2S_BP_SFT			8
+#define RT5682_I2S_BP_NOR			(0x0 << 8)
+#define RT5682_I2S_BP_INV			(0x1 << 8)
+#define RT5682_I2S2_MONO_EN			(0x1 << 6)
+#define RT5682_I2S2_MONO_DIS			(0x0 << 6)
+#define RT5682_I2S2_DL_MASK			(0x3 << 4)
+#define RT5682_I2S2_DL_SFT			4
+#define RT5682_I2S2_DL_16			(0x0 << 4)
+#define RT5682_I2S2_DL_20			(0x1 << 4)
+#define RT5682_I2S2_DL_24			(0x2 << 4)
+#define RT5682_I2S2_DL_8			(0x3 << 4)
+#define RT5682_I2S_DF_MASK			(0x7)
+#define RT5682_I2S_DF_SFT			0
+#define RT5682_I2S_DF_I2S			(0x0)
+#define RT5682_I2S_DF_LEFT			(0x1)
+#define RT5682_I2S_DF_PCM_A			(0x2)
+#define RT5682_I2S_DF_PCM_B			(0x3)
+#define RT5682_I2S_DF_PCM_A_N			(0x6)
+#define RT5682_I2S_DF_PCM_B_N			(0x7)
+
+/* ADC/DAC Clock Control 1 (0x0073) */
+#define RT5682_ADC_OSR_MASK			(0xf << 12)
+#define RT5682_ADC_OSR_SFT			12
+#define RT5682_ADC_OSR_D_1			(0x0 << 12)
+#define RT5682_ADC_OSR_D_2			(0x1 << 12)
+#define RT5682_ADC_OSR_D_4			(0x2 << 12)
+#define RT5682_ADC_OSR_D_6			(0x3 << 12)
+#define RT5682_ADC_OSR_D_8			(0x4 << 12)
+#define RT5682_ADC_OSR_D_12			(0x5 << 12)
+#define RT5682_ADC_OSR_D_16			(0x6 << 12)
+#define RT5682_ADC_OSR_D_24			(0x7 << 12)
+#define RT5682_ADC_OSR_D_32			(0x8 << 12)
+#define RT5682_ADC_OSR_D_48			(0x9 << 12)
+#define RT5682_I2S_M_DIV_MASK			(0xf << 12)
+#define RT5682_I2S_M_DIV_SFT			8
+#define RT5682_I2S_M_D_1			(0x0 << 8)
+#define RT5682_I2S_M_D_2			(0x1 << 8)
+#define RT5682_I2S_M_D_3			(0x2 << 8)
+#define RT5682_I2S_M_D_4			(0x3 << 8)
+#define RT5682_I2S_M_D_6			(0x4 << 8)
+#define RT5682_I2S_M_D_8			(0x5 << 8)
+#define RT5682_I2S_M_D_12			(0x6 << 8)
+#define RT5682_I2S_M_D_16			(0x7 << 8)
+#define RT5682_I2S_M_D_24			(0x8 << 8)
+#define RT5682_I2S_M_D_32			(0x9 << 8)
+#define RT5682_I2S_M_D_48			(0x10 << 8)
+#define RT5682_I2S_CLK_SRC_MASK			(0x7 << 4)
+#define RT5682_I2S_CLK_SRC_SFT			4
+#define RT5682_I2S_CLK_SRC_MCLK			(0x0 << 4)
+#define RT5682_I2S_CLK_SRC_PLL1			(0x1 << 4)
+#define RT5682_I2S_CLK_SRC_PLL2			(0x2 << 4)
+#define RT5682_I2S_CLK_SRC_SDW			(0x3 << 4)
+#define RT5682_I2S_CLK_SRC_RCCLK		(0x4 << 4) /* 25M */
+#define RT5682_DAC_OSR_MASK			(0xf << 0)
+#define RT5682_DAC_OSR_SFT			0
+#define RT5682_DAC_OSR_D_1			(0x0 << 0)
+#define RT5682_DAC_OSR_D_2			(0x1 << 0)
+#define RT5682_DAC_OSR_D_4			(0x2 << 0)
+#define RT5682_DAC_OSR_D_6			(0x3 << 0)
+#define RT5682_DAC_OSR_D_8			(0x4 << 0)
+#define RT5682_DAC_OSR_D_12			(0x5 << 0)
+#define RT5682_DAC_OSR_D_16			(0x6 << 0)
+#define RT5682_DAC_OSR_D_24			(0x7 << 0)
+#define RT5682_DAC_OSR_D_32			(0x8 << 0)
+#define RT5682_DAC_OSR_D_48			(0x9 << 0)
+
+/* ADC/DAC Clock Control 2 (0x0074) */
+#define RT5682_I2S2_BCLK_MS2_MASK		(0x1 << 11)
+#define RT5682_I2S2_BCLK_MS2_SFT		11
+#define RT5682_I2S2_BCLK_MS2_32			(0x0 << 11)
+#define RT5682_I2S2_BCLK_MS2_64			(0x1 << 11)
+
+
+/* TDM control 1 (0x0079) */
+#define RT5682_TDM_TX_CH_MASK			(0x3 << 12)
+#define RT5682_TDM_TX_CH_2			(0x0 << 12)
+#define RT5682_TDM_TX_CH_4			(0x1 << 12)
+#define RT5682_TDM_TX_CH_6			(0x2 << 12)
+#define RT5682_TDM_TX_CH_8			(0x3 << 12)
+#define RT5682_TDM_RX_CH_MASK			(0x3 << 8)
+#define RT5682_TDM_RX_CH_2			(0x0 << 8)
+#define RT5682_TDM_RX_CH_4			(0x1 << 8)
+#define RT5682_TDM_RX_CH_6			(0x2 << 8)
+#define RT5682_TDM_RX_CH_8			(0x3 << 8)
+#define RT5682_TDM_ADC_LCA_MASK			(0xf << 4)
+#define RT5682_TDM_ADC_LCA_SFT			4
+#define RT5682_TDM_ADC_DL_SFT			0
+
+/* TDM control 2 (0x007a) */
+#define RT5682_IF1_ADC1_SEL_SFT			14
+#define RT5682_IF1_ADC2_SEL_SFT			12
+#define RT5682_IF1_ADC3_SEL_SFT			10
+#define RT5682_IF1_ADC4_SEL_SFT			8
+#define RT5682_TDM_ADC_SEL_SFT			4
+
+/* TDM control 3 (0x007b) */
+#define RT5682_TDM_EN				(0x1 << 7)
+
+/* TDM/I2S control (0x007e) */
+#define RT5682_TDM_S_BP_MASK			(0x1 << 15)
+#define RT5682_TDM_S_BP_SFT			15
+#define RT5682_TDM_S_BP_NOR			(0x0 << 15)
+#define RT5682_TDM_S_BP_INV			(0x1 << 15)
+#define RT5682_TDM_S_LP_MASK			(0x1 << 14)
+#define RT5682_TDM_S_LP_SFT			14
+#define RT5682_TDM_S_LP_NOR			(0x0 << 14)
+#define RT5682_TDM_S_LP_INV			(0x1 << 14)
+#define RT5682_TDM_DF_MASK			(0x7 << 11)
+#define RT5682_TDM_DF_SFT			11
+#define RT5682_TDM_DF_I2S			(0x0 << 11)
+#define RT5682_TDM_DF_LEFT			(0x1 << 11)
+#define RT5682_TDM_DF_PCM_A			(0x2 << 11)
+#define RT5682_TDM_DF_PCM_B			(0x3 << 11)
+#define RT5682_TDM_DF_PCM_A_N			(0x6 << 11)
+#define RT5682_TDM_DF_PCM_B_N			(0x7 << 11)
+#define RT5682_TDM_CL_MASK			(0x3 << 4)
+#define RT5682_TDM_CL_16			(0x0 << 4)
+#define RT5682_TDM_CL_20			(0x1 << 4)
+#define RT5682_TDM_CL_24			(0x2 << 4)
+#define RT5682_TDM_CL_32			(0x3 << 4)
+#define RT5682_TDM_M_BP_MASK			(0x1 << 2)
+#define RT5682_TDM_M_BP_SFT			2
+#define RT5682_TDM_M_BP_NOR			(0x0 << 2)
+#define RT5682_TDM_M_BP_INV			(0x1 << 2)
+#define RT5682_TDM_M_LP_MASK			(0x1 << 1)
+#define RT5682_TDM_M_LP_SFT			1
+#define RT5682_TDM_M_LP_NOR			(0x0 << 1)
+#define RT5682_TDM_M_LP_INV			(0x1 << 1)
+#define RT5682_TDM_MS_MASK			(0x1 << 0)
+#define RT5682_TDM_MS_SFT			0
+#define RT5682_TDM_MS_M				(0x0 << 0)
+#define RT5682_TDM_MS_S				(0x1 << 0)
+
+/* Global Clock Control (0x0080) */
+#define RT5682_SCLK_SRC_MASK			(0x7 << 13)
+#define RT5682_SCLK_SRC_SFT			13
+#define RT5682_SCLK_SRC_MCLK			(0x0 << 13)
+#define RT5682_SCLK_SRC_PLL1			(0x1 << 13)
+#define RT5682_SCLK_SRC_PLL2			(0x2 << 13)
+#define RT5682_SCLK_SRC_SDW			(0x3 << 13)
+#define RT5682_SCLK_SRC_RCCLK			(0x4 << 13)
+#define RT5682_PLL1_SRC_MASK			(0x3 << 10)
+#define RT5682_PLL1_SRC_SFT			10
+#define RT5682_PLL1_SRC_MCLK			(0x0 << 10)
+#define RT5682_PLL1_SRC_BCLK1			(0x1 << 10)
+#define RT5682_PLL1_SRC_SDW			(0x2 << 10)
+#define RT5682_PLL1_SRC_RC			(0x3 << 10)
+#define RT5682_PLL2_SRC_MASK			(0x3 << 8)
+#define RT5682_PLL2_SRC_SFT			8
+#define RT5682_PLL2_SRC_MCLK			(0x0 << 8)
+#define RT5682_PLL2_SRC_BCLK1			(0x1 << 8)
+#define RT5682_PLL2_SRC_SDW			(0x2 << 8)
+#define RT5682_PLL2_SRC_RC			(0x3 << 8)
+
+
+
+#define RT5682_PLL_INP_MAX			40000000
+#define RT5682_PLL_INP_MIN			256000
+/* PLL M/N/K Code Control 1 (0x0081) */
+#define RT5682_PLL_N_MAX			0x001ff
+#define RT5682_PLL_N_MASK			(RT5682_PLL_N_MAX << 7)
+#define RT5682_PLL_N_SFT			7
+#define RT5682_PLL_K_MAX			0x001f
+#define RT5682_PLL_K_MASK			(RT5682_PLL_K_MAX)
+#define RT5682_PLL_K_SFT			0
+
+/* PLL M/N/K Code Control 2 (0x0082) */
+#define RT5682_PLL_M_MAX			0x00f
+#define RT5682_PLL_M_MASK			(RT5682_PLL_M_MAX << 12)
+#define RT5682_PLL_M_SFT			12
+#define RT5682_PLL_M_BP				(0x1 << 11)
+#define RT5682_PLL_M_BP_SFT			11
+#define RT5682_PLL_K_BP				(0x1 << 10)
+#define RT5682_PLL_K_BP_SFT			10
+#define RT5682_PLL_RST				(0x1 << 1)
+
+/* PLL tracking mode 1 (0x0083) */
+#define RT5682_DA_ASRC_MASK			(0x1 << 13)
+#define RT5682_DA_ASRC_SFT			13
+#define RT5682_DAC_STO1_ASRC_MASK		(0x1 << 12)
+#define RT5682_DAC_STO1_ASRC_SFT		12
+#define RT5682_AD_ASRC_MASK			(0x1 << 8)
+#define RT5682_AD_ASRC_SFT			8
+#define RT5682_AD_ASRC_SEL_MASK			(0x1 << 4)
+#define RT5682_AD_ASRC_SEL_SFT			4
+#define RT5682_DMIC_ASRC_MASK			(0x1 << 3)
+#define RT5682_DMIC_ASRC_SFT			3
+#define RT5682_ADC_STO1_ASRC_MASK		(0x1 << 2)
+#define RT5682_ADC_STO1_ASRC_SFT		2
+#define RT5682_DA_ASRC_SEL_MASK			(0x1 << 0)
+#define RT5682_DA_ASRC_SEL_SFT			0
+
+/* PLL tracking mode 2 3 (0x0084)(0x0085)*/
+#define RT5682_FILTER_CLK_SEL_MASK		(0x7 << 12)
+#define RT5682_FILTER_CLK_SEL_SFT		12
+#define RT5682_FILTER_CLK_DIV_MASK		(0xf << 8)
+#define RT5682_FILTER_CLK_DIV_SFT		8
+
+/* ASRC Control 4 (0x0086) */
+#define RT5682_ASRCIN_FTK_N1_MASK		(0x3 << 14)
+#define RT5682_ASRCIN_FTK_N1_SFT		14
+#define RT5682_ASRCIN_FTK_N2_MASK		(0x3 << 12)
+#define RT5682_ASRCIN_FTK_N2_SFT		12
+#define RT5682_ASRCIN_FTK_M1_MASK		(0x7 << 8)
+#define RT5682_ASRCIN_FTK_M1_SFT		8
+#define RT5682_ASRCIN_FTK_M2_MASK		(0x7 << 4)
+#define RT5682_ASRCIN_FTK_M2_SFT		4
+
+/* SoundWire reference clk (0x008d) */
+#define RT5682_PLL2_OUT_MASK			(0x1 << 8)
+#define RT5682_PLL2_OUT_98M			(0x0 << 8)
+#define RT5682_PLL2_OUT_49M			(0x1 << 8)
+#define RT5682_SDW_REF_2_MASK			(0xf << 4)
+#define RT5682_SDW_REF_2_SFT			4
+#define RT5682_SDW_REF_2_48K			(0x0 << 4)
+#define RT5682_SDW_REF_2_96K			(0x1 << 4)
+#define RT5682_SDW_REF_2_192K			(0x2 << 4)
+#define RT5682_SDW_REF_2_32K			(0x3 << 4)
+#define RT5682_SDW_REF_2_24K			(0x4 << 4)
+#define RT5682_SDW_REF_2_16K			(0x5 << 4)
+#define RT5682_SDW_REF_2_12K			(0x6 << 4)
+#define RT5682_SDW_REF_2_8K			(0x7 << 4)
+#define RT5682_SDW_REF_2_44K			(0x8 << 4)
+#define RT5682_SDW_REF_2_88K			(0x9 << 4)
+#define RT5682_SDW_REF_2_176K			(0xa << 4)
+#define RT5682_SDW_REF_2_353K			(0xb << 4)
+#define RT5682_SDW_REF_2_22K			(0xc << 4)
+#define RT5682_SDW_REF_2_384K			(0xd << 4)
+#define RT5682_SDW_REF_2_11K			(0xe << 4)
+#define RT5682_SDW_REF_1_MASK			(0xf << 0)
+#define RT5682_SDW_REF_1_SFT			0
+#define RT5682_SDW_REF_1_48K			(0x0 << 0)
+#define RT5682_SDW_REF_1_96K			(0x1 << 0)
+#define RT5682_SDW_REF_1_192K			(0x2 << 0)
+#define RT5682_SDW_REF_1_32K			(0x3 << 0)
+#define RT5682_SDW_REF_1_24K			(0x4 << 0)
+#define RT5682_SDW_REF_1_16K			(0x5 << 0)
+#define RT5682_SDW_REF_1_12K			(0x6 << 0)
+#define RT5682_SDW_REF_1_8K			(0x7 << 0)
+#define RT5682_SDW_REF_1_44K			(0x8 << 0)
+#define RT5682_SDW_REF_1_88K			(0x9 << 0)
+#define RT5682_SDW_REF_1_176K			(0xa << 0)
+#define RT5682_SDW_REF_1_353K			(0xb << 0)
+#define RT5682_SDW_REF_1_22K			(0xc << 0)
+#define RT5682_SDW_REF_1_384K			(0xd << 0)
+#define RT5682_SDW_REF_1_11K			(0xe << 0)
+
+/* Depop Mode Control 1 (0x008e) */
+#define RT5682_PUMP_EN				(0x1 << 3)
+#define RT5682_PUMP_EN_SFT				3
+#define RT5682_CAPLESS_EN			(0x1 << 0)
+#define RT5682_CAPLESS_EN_SFT			0
+
+/* Depop Mode Control 2 (0x8f) */
+#define RT5682_RAMP_MASK			(0x1 << 12)
+#define RT5682_RAMP_SFT				12
+#define RT5682_RAMP_DIS				(0x0 << 12)
+#define RT5682_RAMP_EN				(0x1 << 12)
+#define RT5682_BPS_MASK				(0x1 << 11)
+#define RT5682_BPS_SFT				11
+#define RT5682_BPS_DIS				(0x0 << 11)
+#define RT5682_BPS_EN				(0x1 << 11)
+#define RT5682_FAST_UPDN_MASK			(0x1 << 10)
+#define RT5682_FAST_UPDN_SFT			10
+#define RT5682_FAST_UPDN_DIS			(0x0 << 10)
+#define RT5682_FAST_UPDN_EN			(0x1 << 10)
+#define RT5682_VLO_MASK				(0x1 << 7)
+#define RT5682_VLO_SFT				7
+#define RT5682_VLO_3V				(0x0 << 7)
+#define RT5682_VLO_33V				(0x1 << 7)
+
+/* HPOUT charge pump 1 (0x0091) */
+#define RT5682_OSW_L_MASK			(0x1 << 11)
+#define RT5682_OSW_L_SFT			11
+#define RT5682_OSW_L_DIS			(0x0 << 11)
+#define RT5682_OSW_L_EN				(0x1 << 11)
+#define RT5682_OSW_R_MASK			(0x1 << 10)
+#define RT5682_OSW_R_SFT			10
+#define RT5682_OSW_R_DIS			(0x0 << 10)
+#define RT5682_OSW_R_EN				(0x1 << 10)
+#define RT5682_PM_HP_MASK			(0x3 << 8)
+#define RT5682_PM_HP_SFT			8
+#define RT5682_PM_HP_LV				(0x0 << 8)
+#define RT5682_PM_HP_MV				(0x1 << 8)
+#define RT5682_PM_HP_HV				(0x2 << 8)
+#define RT5682_IB_HP_MASK			(0x3 << 6)
+#define RT5682_IB_HP_SFT			6
+#define RT5682_IB_HP_125IL			(0x0 << 6)
+#define RT5682_IB_HP_25IL			(0x1 << 6)
+#define RT5682_IB_HP_5IL			(0x2 << 6)
+#define RT5682_IB_HP_1IL			(0x3 << 6)
+
+/* Micbias Control1 (0x93) */
+#define RT5682_MIC1_OV_MASK			(0x3 << 14)
+#define RT5682_MIC1_OV_SFT			14
+#define RT5682_MIC1_OV_2V7			(0x0 << 14)
+#define RT5682_MIC1_OV_2V4			(0x1 << 14)
+#define RT5682_MIC1_OV_2V25			(0x3 << 14)
+#define RT5682_MIC1_OV_1V8			(0x4 << 14)
+#define RT5682_MIC1_CLK_MASK			(0x1 << 13)
+#define RT5682_MIC1_CLK_SFT			13
+#define RT5682_MIC1_CLK_DIS			(0x0 << 13)
+#define RT5682_MIC1_CLK_EN			(0x1 << 13)
+#define RT5682_MIC1_OVCD_MASK			(0x1 << 12)
+#define RT5682_MIC1_OVCD_SFT			12
+#define RT5682_MIC1_OVCD_DIS			(0x0 << 12)
+#define RT5682_MIC1_OVCD_EN			(0x1 << 12)
+#define RT5682_MIC1_OVTH_MASK			(0x3 << 10)
+#define RT5682_MIC1_OVTH_SFT			10
+#define RT5682_MIC1_OVTH_768UA			(0x0 << 10)
+#define RT5682_MIC1_OVTH_960UA			(0x1 << 10)
+#define RT5682_MIC1_OVTH_1152UA			(0x2 << 10)
+#define RT5682_MIC1_OVTH_1960UA			(0x3 << 10)
+#define RT5682_MIC2_OV_MASK			(0x3 << 8)
+#define RT5682_MIC2_OV_SFT			8
+#define RT5682_MIC2_OV_2V7			(0x0 << 8)
+#define RT5682_MIC2_OV_2V4			(0x1 << 8)
+#define RT5682_MIC2_OV_2V25			(0x3 << 8)
+#define RT5682_MIC2_OV_1V8			(0x4 << 8)
+#define RT5682_MIC2_CLK_MASK			(0x1 << 7)
+#define RT5682_MIC2_CLK_SFT			7
+#define RT5682_MIC2_CLK_DIS			(0x0 << 7)
+#define RT5682_MIC2_CLK_EN			(0x1 << 7)
+#define RT5682_MIC2_OVTH_MASK			(0x3 << 4)
+#define RT5682_MIC2_OVTH_SFT			4
+#define RT5682_MIC2_OVTH_768UA			(0x0 << 4)
+#define RT5682_MIC2_OVTH_960UA			(0x1 << 4)
+#define RT5682_MIC2_OVTH_1152UA			(0x2 << 4)
+#define RT5682_MIC2_OVTH_1960UA			(0x3 << 4)
+#define RT5682_PWR_MB_MASK			(0x1 << 3)
+#define RT5682_PWR_MB_SFT			3
+#define RT5682_PWR_MB_PD			(0x0 << 3)
+#define RT5682_PWR_MB_PU			(0x1 << 3)
+
+/* Micbias Control2 (0x0094) */
+#define RT5682_PWR_CLK25M_MASK			(0x1 << 9)
+#define RT5682_PWR_CLK25M_SFT			9
+#define RT5682_PWR_CLK25M_PD			(0x0 << 9)
+#define RT5682_PWR_CLK25M_PU			(0x1 << 9)
+#define RT5682_PWR_CLK1M_MASK			(0x1 << 8)
+#define RT5682_PWR_CLK1M_SFT			8
+#define RT5682_PWR_CLK1M_PD			(0x0 << 8)
+#define RT5682_PWR_CLK1M_PU			(0x1 << 8)
+
+/* RC Clock Control (0x009f) */
+#define RT5682_POW_IRQ				(0x1 << 15)
+#define RT5682_POW_JDH				(0x1 << 14)
+#define RT5682_POW_JDL				(0x1 << 13)
+#define RT5682_POW_ANA				(0x1 << 12)
+
+/* I2S Master Mode Clock Control 1 (0x00a0) */
+#define RT5682_CLK_SRC_MCLK			(0x0)
+#define RT5682_CLK_SRC_PLL1			(0x1)
+#define RT5682_CLK_SRC_PLL2			(0x2)
+#define RT5682_CLK_SRC_SDW			(0x3)
+#define RT5682_CLK_SRC_RCCLK			(0x4)
+#define RT5682_I2S_PD_1				(0x0)
+#define RT5682_I2S_PD_2				(0x1)
+#define RT5682_I2S_PD_3				(0x2)
+#define RT5682_I2S_PD_4				(0x3)
+#define RT5682_I2S_PD_6				(0x4)
+#define RT5682_I2S_PD_8				(0x5)
+#define RT5682_I2S_PD_12			(0x6)
+#define RT5682_I2S_PD_16			(0x7)
+#define RT5682_I2S_PD_24			(0x8)
+#define RT5682_I2S_PD_32			(0x9)
+#define RT5682_I2S_PD_48			(0xa)
+#define RT5682_I2S2_SRC_MASK			(0x3 << 4)
+#define RT5682_I2S2_SRC_SFT			4
+#define RT5682_I2S2_M_PD_MASK			(0xf << 0)
+#define RT5682_I2S2_M_PD_SFT			0
+
+/* IRQ Control 1 (0x00b6) */
+#define RT5682_JD1_PULSE_EN_MASK		(0x1 << 10)
+#define RT5682_JD1_PULSE_EN_SFT			10
+#define RT5682_JD1_PULSE_DIS			(0x0 << 10)
+#define RT5682_JD1_PULSE_EN			(0x1 << 10)
+
+/* IRQ Control 2 (0x00b7) */
+#define RT5682_JD1_EN_MASK			(0x1 << 15)
+#define RT5682_JD1_EN_SFT			15
+#define RT5682_JD1_DIS				(0x0 << 15)
+#define RT5682_JD1_EN				(0x1 << 15)
+#define RT5682_JD1_POL_MASK			(0x1 << 13)
+#define RT5682_JD1_POL_NOR			(0x0 << 13)
+#define RT5682_JD1_POL_INV			(0x1 << 13)
+
+/* IRQ Control 3 (0x00b8) */
+#define RT5682_IL_IRQ_MASK			(0x1 << 7)
+#define RT5682_IL_IRQ_DIS			(0x0 << 7)
+#define RT5682_IL_IRQ_EN			(0x1 << 7)
+
+/* GPIO Control 1 (0x00c0) */
+#define RT5682_GP1_PIN_MASK			(0x3 << 14)
+#define RT5682_GP1_PIN_SFT			14
+#define RT5682_GP1_PIN_GPIO1			(0x0 << 14)
+#define RT5682_GP1_PIN_IRQ			(0x1 << 14)
+#define RT5682_GP1_PIN_DMIC_CLK			(0x2 << 14)
+#define RT5682_GP2_PIN_MASK			(0x3 << 12)
+#define RT5682_GP2_PIN_SFT			12
+#define RT5682_GP2_PIN_GPIO2			(0x0 << 12)
+#define RT5682_GP2_PIN_LRCK2			(0x1 << 12)
+#define RT5682_GP2_PIN_DMIC_SDA			(0x2 << 12)
+#define RT5682_GP3_PIN_MASK			(0x3 << 10)
+#define RT5682_GP3_PIN_SFT			10
+#define RT5682_GP3_PIN_GPIO3			(0x0 << 10)
+#define RT5682_GP3_PIN_BCLK2			(0x1 << 10)
+#define RT5682_GP3_PIN_DMIC_CLK			(0x2 << 10)
+#define RT5682_GP4_PIN_MASK			(0x3 << 8)
+#define RT5682_GP4_PIN_SFT			8
+#define RT5682_GP4_PIN_GPIO4			(0x0 << 8)
+#define RT5682_GP4_PIN_ADCDAT1			(0x1 << 8)
+#define RT5682_GP4_PIN_DMIC_CLK			(0x2 << 8)
+#define RT5682_GP4_PIN_ADCDAT2			(0x3 << 8)
+#define RT5682_GP5_PIN_MASK			(0x3 << 6)
+#define RT5682_GP5_PIN_SFT			6
+#define RT5682_GP5_PIN_GPIO5			(0x0 << 6)
+#define RT5682_GP5_PIN_DACDAT1			(0x1 << 6)
+#define RT5682_GP5_PIN_DMIC_SDA			(0x2 << 6)
+#define RT5682_GP6_PIN_MASK			(0x1 << 5)
+#define RT5682_GP6_PIN_SFT			5
+#define RT5682_GP6_PIN_GPIO6			(0x0 << 5)
+#define RT5682_GP6_PIN_LRCK1			(0x1 << 5)
+
+/* GPIO Control 2 (0x00c1)*/
+#define RT5682_GP1_PF_MASK			(0x1 << 15)
+#define RT5682_GP1_PF_IN			(0x0 << 15)
+#define RT5682_GP1_PF_OUT			(0x1 << 15)
+#define RT5682_GP1_OUT_MASK			(0x1 << 14)
+#define RT5682_GP1_OUT_L			(0x0 << 14)
+#define RT5682_GP1_OUT_H			(0x1 << 14)
+#define RT5682_GP2_PF_MASK			(0x1 << 13)
+#define RT5682_GP2_PF_IN			(0x0 << 13)
+#define RT5682_GP2_PF_OUT			(0x1 << 13)
+#define RT5682_GP2_OUT_MASK			(0x1 << 12)
+#define RT5682_GP2_OUT_L			(0x0 << 12)
+#define RT5682_GP2_OUT_H			(0x1 << 12)
+#define RT5682_GP3_PF_MASK			(0x1 << 11)
+#define RT5682_GP3_PF_IN			(0x0 << 11)
+#define RT5682_GP3_PF_OUT			(0x1 << 11)
+#define RT5682_GP3_OUT_MASK			(0x1 << 10)
+#define RT5682_GP3_OUT_L			(0x0 << 10)
+#define RT5682_GP3_OUT_H			(0x1 << 10)
+#define RT5682_GP4_PF_MASK			(0x1 << 9)
+#define RT5682_GP4_PF_IN			(0x0 << 9)
+#define RT5682_GP4_PF_OUT			(0x1 << 9)
+#define RT5682_GP4_OUT_MASK			(0x1 << 8)
+#define RT5682_GP4_OUT_L			(0x0 << 8)
+#define RT5682_GP4_OUT_H			(0x1 << 8)
+#define RT5682_GP5_PF_MASK			(0x1 << 7)
+#define RT5682_GP5_PF_IN			(0x0 << 7)
+#define RT5682_GP5_PF_OUT			(0x1 << 7)
+#define RT5682_GP5_OUT_MASK			(0x1 << 6)
+#define RT5682_GP5_OUT_L			(0x0 << 6)
+#define RT5682_GP5_OUT_H			(0x1 << 6)
+#define RT5682_GP6_PF_MASK			(0x1 << 5)
+#define RT5682_GP6_PF_IN			(0x0 << 5)
+#define RT5682_GP6_PF_OUT			(0x1 << 5)
+#define RT5682_GP6_OUT_MASK			(0x1 << 4)
+#define RT5682_GP6_OUT_L			(0x0 << 4)
+#define RT5682_GP6_OUT_H			(0x1 << 4)
+
+
+/* GPIO Status (0x00c2) */
+#define RT5682_GP6_STA				(0x1 << 6)
+#define RT5682_GP5_STA				(0x1 << 5)
+#define RT5682_GP4_STA				(0x1 << 4)
+#define RT5682_GP3_STA				(0x1 << 3)
+#define RT5682_GP2_STA				(0x1 << 2)
+#define RT5682_GP1_STA				(0x1 << 1)
+
+/* Soft volume and zero cross control 1 (0x00d9) */
+#define RT5682_SV_MASK				(0x1 << 15)
+#define RT5682_SV_SFT				15
+#define RT5682_SV_DIS				(0x0 << 15)
+#define RT5682_SV_EN				(0x1 << 15)
+#define RT5682_ZCD_MASK				(0x1 << 10)
+#define RT5682_ZCD_SFT				10
+#define RT5682_ZCD_PD				(0x0 << 10)
+#define RT5682_ZCD_PU				(0x1 << 10)
+#define RT5682_SV_DLY_MASK			(0xf)
+#define RT5682_SV_DLY_SFT			0
+
+/* Soft volume and zero cross control 2 (0x00da) */
+#define RT5682_ZCD_BST1_CBJ_MASK		(0x1 << 7)
+#define RT5682_ZCD_BST1_CBJ_SFT			7
+#define RT5682_ZCD_BST1_CBJ_DIS			(0x0 << 7)
+#define RT5682_ZCD_BST1_CBJ_EN			(0x1 << 7)
+#define RT5682_ZCD_RECMIX_MASK			(0x1)
+#define RT5682_ZCD_RECMIX_SFT			0
+#define RT5682_ZCD_RECMIX_DIS			(0x0)
+#define RT5682_ZCD_RECMIX_EN			(0x1)
+
+/* 4 Button Inline Command Control 2 (0x00e3) */
+#define RT5682_4BTN_IL_MASK			(0x1 << 15)
+#define RT5682_4BTN_IL_EN			(0x1 << 15)
+#define RT5682_4BTN_IL_DIS			(0x0 << 15)
+#define RT5682_4BTN_IL_RST_MASK			(0x1 << 14)
+#define RT5682_4BTN_IL_NOR			(0x1 << 14)
+#define RT5682_4BTN_IL_RST			(0x0 << 14)
+
+/* Analog JD Control (0x00f0) */
+#define RT5682_JDH_RS_MASK			(0x1 << 4)
+#define RT5682_JDH_NO_PLUG			(0x1 << 4)
+#define RT5682_JDH_PLUG				(0x0 << 4)
+
+/* Chopper and Clock control for DAC (0x013a)*/
+#define RT5682_CKXEN_DAC1_MASK			(0x1 << 13)
+#define RT5682_CKXEN_DAC1_SFT			13
+#define RT5682_CKGEN_DAC1_MASK			(0x1 << 12)
+#define RT5682_CKGEN_DAC1_SFT			12
+
+/* Chopper and Clock control for ADC (0x013b)*/
+#define RT5682_CKXEN_ADC1_MASK			(0x1 << 13)
+#define RT5682_CKXEN_ADC1_SFT			13
+#define RT5682_CKGEN_ADC1_MASK			(0x1 << 12)
+#define RT5682_CKGEN_ADC1_SFT			12
+
+/* Volume test (0x013f)*/
+#define RT5682_SEL_CLK_VOL_MASK			(0x1 << 15)
+#define RT5682_SEL_CLK_VOL_EN			(0x1 << 15)
+#define RT5682_SEL_CLK_VOL_DIS			(0x0 << 15)
+
+/* Test Mode Control 1 (0x0145) */
+#define RT5682_AD2DA_LB_MASK			(0x1 << 10)
+#define RT5682_AD2DA_LB_SFT			10
+
+/* Stereo Noise Gate Control 1 (0x0160) */
+#define RT5682_NG2_EN_MASK			(0x1 << 15)
+#define RT5682_NG2_EN				(0x1 << 15)
+#define RT5682_NG2_DIS				(0x0 << 15)
+
+/* Stereo1 DAC Silence Detection Control (0x0190) */
+#define RT5682_DEB_STO_DAC_MASK			(0x7 << 4)
+#define RT5682_DEB_80_MS			(0x0 << 4)
+
+/* SAR ADC Inline Command Control 1 (0x0210) */
+#define RT5682_SAR_BUTT_DET_MASK		(0x1 << 15)
+#define RT5682_SAR_BUTT_DET_EN			(0x1 << 15)
+#define RT5682_SAR_BUTT_DET_DIS			(0x0 << 15)
+#define RT5682_SAR_BUTDET_MODE_MASK		(0x1 << 14)
+#define RT5682_SAR_BUTDET_POW_SAV		(0x1 << 14)
+#define RT5682_SAR_BUTDET_POW_NORM		(0x0 << 14)
+#define RT5682_SAR_BUTDET_RST_MASK		(0x1 << 13)
+#define RT5682_SAR_BUTDET_RST_NORMAL		(0x1 << 13)
+#define RT5682_SAR_BUTDET_RST			(0x0 << 13)
+#define RT5682_SAR_POW_MASK			(0x1 << 12)
+#define RT5682_SAR_POW_EN			(0x1 << 12)
+#define RT5682_SAR_POW_DIS			(0x0 << 12)
+#define RT5682_SAR_RST_MASK			(0x1 << 11)
+#define RT5682_SAR_RST_NORMAL			(0x1 << 11)
+#define RT5682_SAR_RST				(0x0 << 11)
+#define RT5682_SAR_BYPASS_MASK			(0x1 << 10)
+#define RT5682_SAR_BYPASS_EN			(0x1 << 10)
+#define RT5682_SAR_BYPASS_DIS			(0x0 << 10)
+#define RT5682_SAR_SEL_MB1_MASK			(0x1 << 9)
+#define RT5682_SAR_SEL_MB1_SEL			(0x1 << 9)
+#define RT5682_SAR_SEL_MB1_NOSEL		(0x0 << 9)
+#define RT5682_SAR_SEL_MB2_MASK			(0x1 << 8)
+#define RT5682_SAR_SEL_MB2_SEL			(0x1 << 8)
+#define RT5682_SAR_SEL_MB2_NOSEL		(0x0 << 8)
+#define RT5682_SAR_SEL_MODE_MASK		(0x1 << 7)
+#define RT5682_SAR_SEL_MODE_CMP			(0x1 << 7)
+#define RT5682_SAR_SEL_MODE_ADC			(0x0 << 7)
+#define RT5682_SAR_SEL_MB1_MB2_MASK		(0x1 << 5)
+#define RT5682_SAR_SEL_MB1_MB2_AUTO		(0x1 << 5)
+#define RT5682_SAR_SEL_MB1_MB2_MANU		(0x0 << 5)
+#define RT5682_SAR_SEL_SIGNAL_MASK		(0x1 << 4)
+#define RT5682_SAR_SEL_SIGNAL_AUTO		(0x1 << 4)
+#define RT5682_SAR_SEL_SIGNAL_MANU		(0x0 << 4)
+
+/* SAR ADC Inline Command Control 13 (0x021c) */
+#define RT5682_SAR_SOUR_MASK			(0x3f)
+#define RT5682_SAR_SOUR_BTN			(0x3f)
+#define RT5682_SAR_SOUR_TYPE			(0x0)
+
+
+/* System Clock Source */
+enum {
+	RT5682_SCLK_S_MCLK,
+	RT5682_SCLK_S_PLL1,
+	RT5682_SCLK_S_PLL2,
+	RT5682_SCLK_S_RCCLK,
+};
+
+/* PLL Source */
+enum {
+	RT5682_PLL1_S_MCLK,
+	RT5682_PLL1_S_BCLK1,
+	RT5682_PLL1_S_RCCLK,
+};
+
+enum {
+	RT5682_AIF1,
+	RT5682_AIF2,
+	RT5682_AIFS
+};
+
+/* filter mask */
+enum {
+	RT5682_DA_STEREO1_FILTER = 0x1,
+	RT5682_AD_STEREO1_FILTER = (0x1 << 1),
+};
+
+enum {
+	RT5682_CLK_SEL_SYS,
+	RT5682_CLK_SEL_I2S1_ASRC,
+	RT5682_CLK_SEL_I2S2_ASRC,
+};
+
+int rt5682_sel_asrc_clk_src(struct snd_soc_component *component,
+		unsigned int filter_mask, unsigned int clk_src);
+
+#endif /* __RT5682_H__ */
-- 
cgit v1.2.3


From 9ce7bc036ae4cfe3393232c86e9e1fea2153c237 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 13 Jun 2018 10:11:56 -0700
Subject: netfilter: ipv6: nf_defrag: reduce struct net memory waste

It is a waste of memory to use a full "struct netns_sysctl_ipv6"
while only one pointer is really used, considering netns_sysctl_ipv6
keeps growing.

Also, since "struct netns_frags" has cache line alignment,
it is better to move the frags_hdr pointer outside, otherwise
we spend a full cache line for this pointer.

This saves 192 bytes of memory per netns.

Fixes: c038a767cd69 ("ipv6: add a new namespace for nf_conntrack_reasm")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/net_namespace.h             | 1 +
 include/net/netns/ipv6.h                | 1 -
 net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++---
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 47e35cce3b64..a71264d75d7f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -128,6 +128,7 @@ struct net {
 #endif
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 	struct netns_nf_frag	nf_frag;
+	struct ctl_table_header *nf_frag_frags_hdr;
 #endif
 	struct sock		*nfnl;
 	struct sock		*nfnl_stash;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index c978a31b0f84..762ac9931b62 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -109,7 +109,6 @@ struct netns_ipv6 {
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 struct netns_nf_frag {
-	struct netns_sysctl_ipv6 sysctl;
 	struct netns_frags	frags;
 };
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5e0332014c17..a452d99c9f52 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -107,7 +107,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 	if (hdr == NULL)
 		goto err_reg;
 
-	net->nf_frag.sysctl.frags_hdr = hdr;
+	net->nf_frag_frags_hdr = hdr;
 	return 0;
 
 err_reg:
@@ -121,8 +121,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
 
-	table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
-	unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+	table = net->nf_frag_frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->nf_frag_frags_hdr);
 	if (!net_eq(net, &init_net))
 		kfree(table);
 }
-- 
cgit v1.2.3


From cf85ec88701610f0de2e959ac447d9613462fbc7 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 9 Jun 2018 16:18:16 +0300
Subject: removed extra extern file_fdatawait_range

Jeff added this extern twice in commit a823e4589e68

Fixes: a823e4589e68 ("mm: add file_fdatawait_range and file_write_and_wait")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..f140c11d35dd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2623,8 +2623,6 @@ static inline int filemap_fdatawait(struct address_space *mapping)
 
 extern bool filemap_range_has_page(struct address_space *, loff_t lstart,
 				  loff_t lend);
-extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
-						loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
 				        loff_t lstart, loff_t lend);
-- 
cgit v1.2.3


From b150c3862d21a4a9ce0f26d8067b9dcd41e2050c Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 5 Jun 2018 08:40:15 +0300
Subject: IB/core: Introduce GID entry reference counts

In order to be able to expose pointers to the ib_gid_attrs in the GID
table we need to make it so the value of the pointer cannot be
changed. Thus each GID table entry gets a unique piece of kref'd memory
that is written only during initialization and remains constant for its
lifetime.

This eventually will allow the struct ib_gid_attrs to be returned without
copy from many of query the APIs, but it also provides a way to track when
all users of a HW table index go away.

For roce we no longer allow an in-use HW table index to be re-used for a
new an different entry. When a GID table entry needs to be removed it is
hidden from the find API, but remains as a valid HW index and all
ib_gid_attr points remain valid. The HW index is not relased until all
users put the kref.

Later patches will broadly replace the use of the sgid_index integer with
the kref'd structure.

Ultimately this will prevent security problems where the OS changes the
properties of a HW GID table entry while an active user object is still
using the entry.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cache.c | 323 +++++++++++++++++++++++++++-------------
 include/rdma/ib_verbs.h         |   1 +
 2 files changed, 221 insertions(+), 103 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index d4751f94a93a..09d83c69ec65 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -66,15 +66,24 @@ enum gid_attr_find_mask {
 	GID_ATTR_FIND_MASK_GID_TYPE	= 1UL << 3,
 };
 
-enum gid_table_entry_props {
-	GID_TABLE_ENTRY_INVALID		= 1UL << 0,
+enum gid_table_entry_state {
+	GID_TABLE_ENTRY_INVALID		= 1,
+	GID_TABLE_ENTRY_VALID		= 2,
+	/*
+	 * Indicates that entry is pending to be removed, there may
+	 * be active users of this GID entry.
+	 * When last user of the GID entry releases reference to it,
+	 * GID entry is detached from the table.
+	 */
+	GID_TABLE_ENTRY_PENDING_DEL	= 3,
 };
 
 struct ib_gid_table_entry {
-	unsigned long	    props;
-	union ib_gid        gid;
-	struct ib_gid_attr  attr;
-	void		   *context;
+	struct kref			kref;
+	struct work_struct		del_work;
+	struct ib_gid_attr		attr;
+	void				*context;
+	enum gid_table_entry_state	state;
 };
 
 struct ib_gid_table {
@@ -90,15 +99,16 @@ struct ib_gid_table {
 	 *
 	 **/
 	/* Any writer to data_vec must hold this lock and the write side of
-	 * rwlock. readers must hold only rwlock. All writers must be in a
+	 * rwlock. Readers must hold only rwlock. All writers must be in a
 	 * sleepable context.
 	 */
 	struct mutex			lock;
-	/* rwlock protects data_vec[ix]->props. */
+	/* rwlock protects data_vec[ix]->state and entry pointer.
+	 */
 	rwlock_t			rwlock;
+	struct ib_gid_table_entry	**data_vec;
 	/* bit field, each bit indicates the index of default GID */
 	u32				default_gid_indices;
-	struct ib_gid_table_entry	*data_vec;
 };
 
 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
@@ -178,26 +188,113 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
 	return device->cache.ports[port - rdma_start_port(device)].gid;
 }
 
-static void del_roce_gid(struct ib_device *device, u8 port_num,
-			 struct ib_gid_table *table, int ix)
+static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
+{
+	return !entry;
+}
+
+static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
+{
+	return entry && entry->state == GID_TABLE_ENTRY_VALID;
+}
+
+static void schedule_free_gid(struct kref *kref)
+{
+	struct ib_gid_table_entry *entry =
+			container_of(kref, struct ib_gid_table_entry, kref);
+
+	queue_work(ib_wq, &entry->del_work);
+}
+
+static void free_gid_entry(struct ib_gid_table_entry *entry)
 {
+	struct ib_device *device = entry->attr.device;
+	u8 port_num = entry->attr.port_num;
+	struct ib_gid_table *table = rdma_gid_table(device, port_num);
+
 	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
-		 device->name, port_num, ix,
-		 table->data_vec[ix].gid.raw);
+		 device->name, port_num, entry->attr.index,
+		 entry->attr.gid.raw);
+
+	mutex_lock(&table->lock);
+	if (rdma_cap_roce_gid_table(device, port_num) &&
+	    entry->state != GID_TABLE_ENTRY_INVALID)
+		device->del_gid(&entry->attr, &entry->context);
+	write_lock_irq(&table->rwlock);
 
-	if (rdma_cap_roce_gid_table(device, port_num))
-		device->del_gid(&table->data_vec[ix].attr,
-				&table->data_vec[ix].context);
-	dev_put(table->data_vec[ix].attr.ndev);
+	/*
+	 * The only way to avoid overwriting NULL in table is
+	 * by comparing if it is same entry in table or not!
+	 * If new entry in table is added by the time we free here,
+	 * don't overwrite the table entry.
+	 */
+	if (entry == table->data_vec[entry->attr.index])
+		table->data_vec[entry->attr.index] = NULL;
+	/* Now this index is ready to be allocated */
+	write_unlock_irq(&table->rwlock);
+	mutex_unlock(&table->lock);
+
+	if (entry->attr.ndev)
+		dev_put(entry->attr.ndev);
+	kfree(entry);
+}
+
+/**
+ * free_gid_work - Release reference to the GID entry
+ * @work: Work structure to refer to GID entry which needs to be
+ * deleted.
+ *
+ * free_gid_work() frees the entry from the HCA's hardware table
+ * if provider supports it. It releases reference to netdevice.
+ */
+static void free_gid_work(struct work_struct *work)
+{
+	struct ib_gid_table_entry *entry =
+		container_of(work, struct ib_gid_table_entry, del_work);
+	free_gid_entry(entry);
 }
 
-static int add_roce_gid(struct ib_gid_table *table,
-			const union ib_gid *gid,
-			const struct ib_gid_attr *attr)
+static struct ib_gid_table_entry *
+alloc_gid_entry(const struct ib_gid_attr *attr)
 {
 	struct ib_gid_table_entry *entry;
-	int ix = attr->index;
-	int ret = 0;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return NULL;
+	kref_init(&entry->kref);
+	memcpy(&entry->attr, attr, sizeof(*attr));
+	if (entry->attr.ndev)
+		dev_hold(entry->attr.ndev);
+	INIT_WORK(&entry->del_work, free_gid_work);
+	entry->state = GID_TABLE_ENTRY_INVALID;
+	return entry;
+}
+
+static void store_gid_entry(struct ib_gid_table *table,
+			    struct ib_gid_table_entry *entry)
+{
+	entry->state = GID_TABLE_ENTRY_VALID;
+
+	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+		 entry->attr.device->name, entry->attr.port_num,
+		 entry->attr.index, entry->attr.gid.raw);
+
+	lockdep_assert_held(&table->lock);
+	write_lock_irq(&table->rwlock);
+	table->data_vec[entry->attr.index] = entry;
+	write_unlock_irq(&table->rwlock);
+}
+
+static void put_gid_entry(struct ib_gid_table_entry *entry)
+{
+	kref_put(&entry->kref, schedule_free_gid);
+}
+
+static int add_roce_gid(struct ib_gid_table_entry *entry)
+{
+	const struct ib_gid_attr *attr = &entry->attr;
+	int ret;
 
 	if (!attr->ndev) {
 		pr_err("%s NULL netdev device=%s port=%d index=%d\n",
@@ -205,38 +302,22 @@ static int add_roce_gid(struct ib_gid_table *table,
 		       attr->index);
 		return -EINVAL;
 	}
-
-	entry = &table->data_vec[ix];
-	if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
-		WARN(1, "GID table corruption device=%s port=%d index=%d\n",
-		     attr->device->name, attr->port_num,
-		     attr->index);
-		return -EINVAL;
-	}
-
 	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
-		ret = attr->device->add_gid(gid, attr, &entry->context);
+		ret = attr->device->add_gid(&attr->gid, attr, &entry->context);
 		if (ret) {
 			pr_err("%s GID add failed device=%s port=%d index=%d\n",
 			       __func__, attr->device->name, attr->port_num,
 			       attr->index);
-			goto add_err;
+			return ret;
 		}
 	}
-	dev_hold(attr->ndev);
-
-add_err:
-	if (!ret)
-		pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
-			 attr->device->name, attr->port_num, ix, gid->raw);
-	return ret;
+	return 0;
 }
 
 /**
  * add_modify_gid - Add or modify GID table entry
  *
  * @table:	GID table in which GID to be added or modified
- * @gid:	GID content
  * @attr:	Attributes of the GID
  *
  * Returns 0 on success or appropriate error code. It accepts zero
@@ -244,34 +325,42 @@ add_err:
  * GID. However such zero GIDs are not added to the cache.
  */
 static int add_modify_gid(struct ib_gid_table *table,
-			  const union ib_gid *gid,
 			  const struct ib_gid_attr *attr)
 {
-	int ret;
+	struct ib_gid_table_entry *entry;
+	int ret = 0;
+
+	/*
+	 * Invalidate any old entry in the table to make it safe to write to
+	 * this index.
+	 */
+	if (is_gid_entry_valid(table->data_vec[attr->index]))
+		put_gid_entry(table->data_vec[attr->index]);
+
+	/*
+	 * Some HCA's report multiple GID entries with only one valid GID, and
+	 * leave other unused entries as the zero GID. Convert zero GIDs to
+	 * empty table entries instead of storing them.
+	 */
+	if (rdma_is_zero_gid(&attr->gid))
+		return 0;
+
+	entry = alloc_gid_entry(attr);
+	if (!entry)
+		return -ENOMEM;
 
 	if (rdma_protocol_roce(attr->device, attr->port_num)) {
-		ret = add_roce_gid(table, gid, attr);
+		ret = add_roce_gid(entry);
 		if (ret)
-			return ret;
-	} else {
-		/*
-		 * Some HCA's report multiple GID entries with only one
-		 * valid GID, but remaining as zero GID.
-		 * So ignore such behavior for IB link layer and don't
-		 * fail the call, but don't add such entry to GID cache.
-		 */
-		if (rdma_is_zero_gid(gid))
-			return 0;
+			goto done;
 	}
 
-	lockdep_assert_held(&table->lock);
-	memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
-	memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-
-	write_lock_irq(&table->rwlock);
-	table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
-	write_unlock_irq(&table->rwlock);
+	store_gid_entry(table, entry);
 	return 0;
+
+done:
+	put_gid_entry(entry);
+	return ret;
 }
 
 /**
@@ -286,16 +375,25 @@ static int add_modify_gid(struct ib_gid_table *table,
 static void del_gid(struct ib_device *ib_dev, u8 port,
 		    struct ib_gid_table *table, int ix)
 {
+	struct ib_gid_table_entry *entry;
+
 	lockdep_assert_held(&table->lock);
+
+	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+		 ib_dev->name, port, ix,
+		 table->data_vec[ix]->attr.gid.raw);
+
 	write_lock_irq(&table->rwlock);
-	table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+	entry = table->data_vec[ix];
+	entry->state = GID_TABLE_ENTRY_PENDING_DEL;
+	/*
+	 * For non RoCE protocol, GID entry slot is ready to use.
+	 */
+	if (!rdma_protocol_roce(ib_dev, port))
+		table->data_vec[ix] = NULL;
 	write_unlock_irq(&table->rwlock);
 
-	if (rdma_protocol_roce(ib_dev, port))
-		del_roce_gid(ib_dev, port, table, ix);
-	memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
-	memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
-	table->data_vec[ix].context = NULL;
+	put_gid_entry(entry);
 }
 
 /* rwlock should be read locked, or lock should be held */
@@ -308,8 +406,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 	int empty = pempty ? -1 : 0;
 
 	while (i < table->sz && (found < 0 || empty < 0)) {
-		struct ib_gid_table_entry *data = &table->data_vec[i];
-		struct ib_gid_attr *attr = &data->attr;
+		struct ib_gid_table_entry *data = table->data_vec[i];
+		struct ib_gid_attr *attr;
 		int curr_index = i;
 
 		i++;
@@ -320,9 +418,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 		 * so lookup free slot only if requested.
 		 */
 		if (pempty && empty < 0) {
-			if (data->props & GID_TABLE_ENTRY_INVALID &&
-			    (default_gid ==
-				is_gid_index_default(table, curr_index))) {
+			if (is_gid_entry_free(data) &&
+			    default_gid ==
+				is_gid_index_default(table, curr_index)) {
 				/*
 				 * Found an invalid (free) entry; allocate it.
 				 * If default GID is requested, then our
@@ -337,22 +435,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 
 		/*
 		 * Additionally find_gid() is used to find valid entry during
-		 * lookup operation, where validity needs to be checked. So
-		 * find the empty entry first to continue to search for a free
-		 * slot and ignore its INVALID flag.
+		 * lookup operation; so ignore the entries which are marked as
+		 * pending for removal and the entries which are marked as
+		 * invalid.
 		 */
-		if (data->props & GID_TABLE_ENTRY_INVALID)
+		if (!is_gid_entry_valid(data))
 			continue;
 
 		if (found >= 0)
 			continue;
 
+		attr = &data->attr;
 		if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
 		    attr->gid_type != val->gid_type)
 			continue;
 
 		if (mask & GID_ATTR_FIND_MASK_GID &&
-		    memcmp(gid, &data->gid, sizeof(*gid)))
+		    memcmp(gid, &data->attr.gid, sizeof(*gid)))
 			continue;
 
 		if (mask & GID_ATTR_FIND_MASK_NETDEV &&
@@ -409,7 +508,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 	attr->device = ib_dev;
 	attr->index = empty;
 	attr->port_num = port;
-	ret = add_modify_gid(table, gid, attr);
+	attr->gid = *gid;
+	ret = add_modify_gid(table, attr);
 	if (!ret)
 		dispatch_gid_change_event(ib_dev, port);
 
@@ -505,7 +605,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 	mutex_lock(&table->lock);
 
 	for (ix = 0; ix < table->sz; ix++) {
-		if (table->data_vec[ix].attr.ndev == ndev) {
+		if (is_gid_entry_valid(table->data_vec[ix]) &&
+		    table->data_vec[ix]->attr.ndev == ndev) {
 			del_gid(ib_dev, port, table, ix);
 			deleted = true;
 		}
@@ -529,12 +630,13 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 	if (index < 0 || index >= table->sz)
 		return -EINVAL;
 
-	if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
+	if (!is_gid_entry_valid(table->data_vec[index]))
 		return -EINVAL;
 
-	memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
+	memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
 	if (attr) {
-		memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
+		memcpy(attr, &table->data_vec[index]->attr,
+		       sizeof(*attr));
 		if (attr->ndev)
 			dev_hold(attr->ndev);
 	}
@@ -681,13 +783,14 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 	for (i = 0; i < table->sz; i++) {
 		struct ib_gid_attr attr;
 
-		if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
+		if (!is_gid_entry_valid(table->data_vec[i]))
 			continue;
 
-		if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
+		if (memcmp(gid, &table->data_vec[i]->attr.gid,
+			   sizeof(*gid)))
 			continue;
 
-		memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
+		memcpy(&attr, &table->data_vec[i]->attr, sizeof(attr));
 
 		if (filter(gid, &attr, context)) {
 			found = true;
@@ -705,9 +808,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 
 static struct ib_gid_table *alloc_gid_table(int sz)
 {
-	struct ib_gid_table *table =
-		kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
-	int i;
+	struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
 
 	if (!table)
 		return NULL;
@@ -720,12 +821,6 @@ static struct ib_gid_table *alloc_gid_table(int sz)
 
 	table->sz = sz;
 	rwlock_init(&table->rwlock);
-
-	/* Mark all entries as invalid so that allocator can allocate
-	 * one of the invalid (free) entry.
-	 */
-	for (i = 0; i < sz; i++)
-		table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
 	return table;
 
 err_free_table:
@@ -733,12 +828,30 @@ err_free_table:
 	return NULL;
 }
 
-static void release_gid_table(struct ib_gid_table *table)
+static void release_gid_table(struct ib_device *device, u8 port,
+			      struct ib_gid_table *table)
 {
-	if (table) {
-		kfree(table->data_vec);
-		kfree(table);
+	bool leak = false;
+	int i;
+
+	if (!table)
+		return;
+
+	for (i = 0; i < table->sz; i++) {
+		if (is_gid_entry_free(table->data_vec[i]))
+			continue;
+		if (kref_read(&table->data_vec[i]->kref) > 1) {
+			pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
+			       device->name, i,
+			       kref_read(&table->data_vec[i]->kref));
+			leak = true;
+		}
 	}
+	if (leak)
+		return;
+
+	kfree(table->data_vec);
+	kfree(table);
 }
 
 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
@@ -752,7 +865,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 
 	mutex_lock(&table->lock);
 	for (i = 0; i < table->sz; ++i) {
-		if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
+		if (is_gid_entry_valid(table->data_vec[i])) {
 			del_gid(ib_dev, port, table, i);
 			deleted = true;
 		}
@@ -821,7 +934,7 @@ static void gid_table_release_one(struct ib_device *ib_dev)
 
 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 		table = ib_dev->cache.ports[port].gid;
-		release_gid_table(table);
+		release_gid_table(ib_dev, port, table);
 		ib_dev->cache.ports[port].gid = NULL;
 	}
 }
@@ -1100,7 +1213,6 @@ static int config_non_roce_gid_cache(struct ib_device *device,
 {
 	struct ib_gid_attr gid_attr = {};
 	struct ib_gid_table *table;
-	union ib_gid gid;
 	int ret = 0;
 	int i;
 
@@ -1112,14 +1224,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
 	for (i = 0; i < gid_tbl_len; ++i) {
 		if (!device->query_gid)
 			continue;
-		ret = device->query_gid(device, port, i, &gid);
+		ret = device->query_gid(device, port, i, &gid_attr.gid);
 		if (ret) {
 			pr_warn("query_gid failed (%d) for %s (index %d)\n",
 				ret, device->name, i);
 			goto err;
 		}
 		gid_attr.index = i;
-		add_modify_gid(table, &gid, &gid_attr);
+		add_modify_gid(table, &gid_attr);
 	}
 err:
 	mutex_unlock(&table->lock);
@@ -1302,4 +1414,9 @@ void ib_cache_cleanup_one(struct ib_device *device)
 	ib_unregister_event_handler(&device->cache.event_handler);
 	flush_workqueue(ib_wq);
 	gid_table_cleanup_one(device);
+
+	/*
+	 * Flush the wq second time for any pending GID delete work.
+	 */
+	flush_workqueue(ib_wq);
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4c6241bc2039..0a77afedabd0 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -94,6 +94,7 @@ enum ib_gid_type {
 struct ib_gid_attr {
 	struct net_device	*ndev;
 	struct ib_device	*device;
+	union ib_gid		gid;
 	enum ib_gid_type	gid_type;
 	u16			index;
 	u8			port_num;
-- 
cgit v1.2.3


From f4df9a7c34d8f9e84af73ce187bcdf6fea65c4cb Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 5 Jun 2018 08:40:16 +0300
Subject: RDMA: Use GID from the ib_gid_attr during the add_gid() callback

Now that ib_gid_attr contains the GID, make use of that in the add_gid()
callback functions for the provider drivers to simplify the add_gid()
implementations.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cache.c                |  2 +-
 drivers/infiniband/hw/bnxt_re/ib_verbs.c       |  5 ++---
 drivers/infiniband/hw/bnxt_re/ib_verbs.h       |  3 +--
 drivers/infiniband/hw/hns/hns_roce_device.h    |  2 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c     |  2 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c     |  2 +-
 drivers/infiniband/hw/hns/hns_roce_main.c      |  6 ++----
 drivers/infiniband/hw/mlx4/main.c              | 12 ++++++------
 drivers/infiniband/hw/mlx5/main.c              |  5 ++---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 10 +++-------
 include/rdma/ib_verbs.h                        |  3 +--
 11 files changed, 21 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 09d83c69ec65..e569956c4e7a 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -303,7 +303,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
 		return -EINVAL;
 	}
 	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
-		ret = attr->device->add_gid(&attr->gid, attr, &entry->context);
+		ret = attr->device->add_gid(attr, &entry->context);
 		if (ret) {
 			pr_err("%s GID add failed device=%s port=%d index=%d\n",
 			       __func__, attr->device->name, attr->port_num,
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index a76e206704d4..62eb9e3346d5 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -364,8 +364,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
 	return rc;
 }
 
-int bnxt_re_add_gid(const union ib_gid *gid,
-		    const struct ib_gid_attr *attr, void **context)
+int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	int rc;
 	u32 tbl_idx = 0;
@@ -377,7 +376,7 @@ int bnxt_re_add_gid(const union ib_gid *gid,
 	if ((attr->ndev) && is_vlan_dev(attr->ndev))
 		vlan_id = vlan_dev_vlan_id(attr->ndev);
 
-	rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid,
+	rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid,
 				 rdev->qplib_res.netdev->dev_addr,
 				 vlan_id, true, &tbl_idx);
 	if (rc == -EALREADY) {
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 5c6414cad4af..bd04d40d897a 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -158,8 +158,7 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
 int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
 		       u16 index, u16 *pkey);
 int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context);
-int bnxt_re_add_gid(const union ib_gid *gid,
-		    const struct ib_gid_attr *attr, void **context);
+int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context);
 int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
 		      int index, union ib_gid *gid);
 enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 31221d506d9a..65f7b68d1777 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -736,7 +736,7 @@ struct hns_roce_hw {
 			 u16 token, int event);
 	int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout);
 	int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
-		       union ib_gid *gid, const struct ib_gid_attr *attr);
+		       const union ib_gid *gid, const struct ib_gid_attr *attr);
 	int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
 	void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
 			enum ib_mtu mtu);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 8013d69c5ac4..235c67dfc6cb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1728,7 +1728,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
 }
 
 static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
-			       int gid_index, union ib_gid *gid,
+			       int gid_index, const union ib_gid *gid,
 			       const struct ib_gid_attr *attr)
 {
 	u32 *p = NULL;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a6e11be0ea0f..454d391b4b40 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1308,7 +1308,7 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
 }
 
 static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
-			       int gid_index, union ib_gid *gid,
+			       int gid_index, const union ib_gid *gid,
 			       const struct ib_gid_attr *attr)
 {
 	enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 21b901cfa2d6..24a2ea0018d9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -74,8 +74,7 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
 	return hr_dev->hw->set_mac(hr_dev, phy_port, addr);
 }
 
-static int hns_roce_add_gid(const union ib_gid *gid,
-			    const struct ib_gid_attr *attr, void **context)
+static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
 	u8 port = attr->port_num - 1;
@@ -87,8 +86,7 @@ static int hns_roce_add_gid(const union ib_gid *gid,
 
 	spin_lock_irqsave(&hr_dev->iboe.lock, flags);
 
-	ret = hr_dev->hw->set_gid(hr_dev, port, attr->index,
-				  (union ib_gid *)gid, attr);
+	ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr);
 
 	spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4ec519afc45b..859089df9f17 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,9 +246,7 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
 	return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
 }
 
-static int mlx4_ib_add_gid(const union ib_gid *gid,
-			   const struct ib_gid_attr *attr,
-			   void **context)
+static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
 	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
@@ -271,8 +269,9 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
 	port_gid_table = &iboe->gids[attr->port_num - 1];
 	spin_lock_bh(&iboe->lock);
 	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
-		if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
-		    (port_gid_table->gids[i].gid_type == attr->gid_type))  {
+		if (!memcmp(&port_gid_table->gids[i].gid,
+			    &attr->gid, sizeof(attr->gid)) &&
+		    port_gid_table->gids[i].gid_type == attr->gid_type)  {
 			found = i;
 			break;
 		}
@@ -289,7 +288,8 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
 				ret = -ENOMEM;
 			} else {
 				*context = port_gid_table->gids[free].ctx;
-				memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+				memcpy(&port_gid_table->gids[free].gid,
+				       &attr->gid, sizeof(attr->gid));
 				port_gid_table->gids[free].gid_type = attr->gid_type;
 				port_gid_table->gids[free].ctx->real_index = free;
 				port_gid_table->gids[free].ctx->refcount = 1;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e52dd21519b4..94669df81342 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -510,12 +510,11 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
 				      vlan_id, port_num);
 }
 
-static int mlx5_ib_add_gid(const union ib_gid *gid,
-			   const struct ib_gid_attr *attr,
+static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
 			   __always_unused void **context)
 {
 	return set_roce_addr(to_mdev(attr->device), attr->port_num,
-			     attr->index, gid, attr);
+			     attr->index, &attr->gid, attr);
 }
 
 static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 0be33a81bbe6..faa1be2d7727 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -62,9 +62,7 @@ static DEFINE_MUTEX(pvrdma_device_list_lock);
 static LIST_HEAD(pvrdma_device_list);
 static struct workqueue_struct *event_wq;
 
-static int pvrdma_add_gid(const union ib_gid *gid,
-			  const struct ib_gid_attr *attr,
-			  void **context);
+static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
 
 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -650,13 +648,11 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
 	return 0;
 }
 
-static int pvrdma_add_gid(const union ib_gid *gid,
-			  const struct ib_gid_attr *attr,
-			  void **context)
+static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	struct pvrdma_dev *dev = to_vdev(attr->device);
 
-	return pvrdma_add_gid_at_index(dev, gid,
+	return pvrdma_add_gid_at_index(dev, &attr->gid,
 				       ib_gid_type_to_pvrdma(attr->gid_type),
 				       attr->index);
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0a77afedabd0..1c153cc046ee 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2342,8 +2342,7 @@ struct ib_device {
 	 * concurrently for different ports. This function is only called when
 	 * roce_gid_table is used.
 	 */
-	int		           (*add_gid)(const union ib_gid *gid,
-					      const struct ib_gid_attr *attr,
+	int		           (*add_gid)(const struct ib_gid_attr *attr,
 					      void **context);
 	/* When calling del_gid, the HW vendor's driver should delete the
 	 * gid of device @device at gid index gid_index of port port_num
-- 
cgit v1.2.3


From bf399c2cadfa66d399d01d5a92a7bb0a112f1568 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 5 Jun 2018 08:40:17 +0300
Subject: IB/core: Introduce GID attribute get, put and hold APIs

This patch introduces three APIs, rdma_get_gid_attr(),
rdma_put_gid_attr(), and rdma_hold_gid_attr() which expose the reference
counting for GID table entries to the entire stack. The kref counting is
based on the struct ib_gid_attr pointer

Later patches will convert more cache query function to return struct
ib_gid_attrs.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cache.c | 86 +++++++++++++++++++++++++++++++++++++++++
 include/rdma/ib_cache.h         |  4 ++
 2 files changed, 90 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index e569956c4e7a..d92525fb47c7 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -286,6 +286,11 @@ static void store_gid_entry(struct ib_gid_table *table,
 	write_unlock_irq(&table->rwlock);
 }
 
+static void get_gid_entry(struct ib_gid_table_entry *entry)
+{
+	kref_get(&entry->kref);
+}
+
 static void put_gid_entry(struct ib_gid_table_entry *entry)
 {
 	kref_put(&entry->kref, schedule_free_gid);
@@ -1208,6 +1213,87 @@ int ib_get_cached_port_state(struct ib_device   *device,
 }
 EXPORT_SYMBOL(ib_get_cached_port_state);
 
+/**
+ * rdma_get_gid_attr - Returns GID attributes for a port of a device
+ * at a requested gid_index, if a valid GID entry exists.
+ * @device:		The device to query.
+ * @port_num:		The port number on the device where the GID value
+ *			is to be queried.
+ * @index:		Index of the GID table entry whose attributes are to
+ *                      be queried.
+ *
+ * rdma_get_gid_attr() acquires reference count of gid attributes from the
+ * cached GID table. Caller must invoke rdma_put_gid_attr() to release
+ * reference to gid attribute regardless of link layer.
+ *
+ * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
+ * code.
+ */
+const struct ib_gid_attr *
+rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
+{
+	const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+	struct ib_gid_table *table;
+	unsigned long flags;
+
+	if (!rdma_is_port_valid(device, port_num))
+		return ERR_PTR(-EINVAL);
+
+	table = rdma_gid_table(device, port_num);
+	if (index < 0 || index >= table->sz)
+		return ERR_PTR(-EINVAL);
+
+	read_lock_irqsave(&table->rwlock, flags);
+	if (!is_gid_entry_valid(table->data_vec[index]))
+		goto done;
+
+	get_gid_entry(table->data_vec[index]);
+	attr = &table->data_vec[index]->attr;
+done:
+	read_unlock_irqrestore(&table->rwlock, flags);
+	return attr;
+}
+EXPORT_SYMBOL(rdma_get_gid_attr);
+
+/**
+ * rdma_put_gid_attr - Release reference to the GID attribute
+ * @attr:		Pointer to the GID attribute whose reference
+ *			needs to be released.
+ *
+ * rdma_put_gid_attr() must be used to release reference whose
+ * reference is acquired using rdma_get_gid_attr() or any APIs
+ * which returns a pointer to the ib_gid_attr regardless of link layer
+ * of IB or RoCE.
+ *
+ */
+void rdma_put_gid_attr(const struct ib_gid_attr *attr)
+{
+	struct ib_gid_table_entry *entry =
+		container_of(attr, struct ib_gid_table_entry, attr);
+
+	put_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_put_gid_attr);
+
+/**
+ * rdma_hold_gid_attr - Get reference to existing GID attribute
+ *
+ * @attr:		Pointer to the GID attribute whose reference
+ *			needs to be taken.
+ *
+ * Increase the reference count to a GID attribute to keep it from being
+ * freed. Callers are required to already be holding a reference to attribute.
+ *
+ */
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
+{
+	struct ib_gid_table_entry *entry =
+		container_of(attr, struct ib_gid_table_entry, attr);
+
+	get_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_hold_gid_attr);
+
 static int config_non_roce_gid_cache(struct ib_device *device,
 				     u8 port, int gid_tbl_len)
 {
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index a5f249828115..00ccd00d0596 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -150,4 +150,8 @@ int ib_get_cached_port_state(struct ib_device *device,
 			      enum ib_port_state *port_active);
 
 bool rdma_is_zero_gid(const union ib_gid *gid);
+const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
+					    u8 port_num, int index);
+void rdma_put_gid_attr(const struct ib_gid_attr *attr);
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
 #endif /* _IB_CACHE_H */
-- 
cgit v1.2.3


From c3d71b69a75cbbc03c8f43571b003ddadd40d056 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 5 Jun 2018 08:40:20 +0300
Subject: IB/core: Provide rdma_ versions of the gid cache API

These versions are functionally similar but all return gid_attrs and
related information via reference instead of via copy.

The old API is preserved, implemented as wrappers around the new, until
all callers can be converted.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cache.c | 264 ++++++++++++++++++++++++----------------
 include/rdma/ib_cache.h         |  17 +++
 2 files changed, 178 insertions(+), 103 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index d92525fb47c7..8a06e743c2dd 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -649,80 +649,37 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
 	return 0;
 }
 
-static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
-				    const union ib_gid *gid,
-				    const struct ib_gid_attr *val,
-				    unsigned long mask,
-				    u8 *port, u16 *index)
-{
-	struct ib_gid_table *table;
-	u8 p;
-	int local_index;
-	unsigned long flags;
-
-	for (p = 0; p < ib_dev->phys_port_cnt; p++) {
-		table = ib_dev->cache.ports[p].gid;
-		read_lock_irqsave(&table->rwlock, flags);
-		local_index = find_gid(table, gid, val, false, mask, NULL);
-		if (local_index >= 0) {
-			if (index)
-				*index = local_index;
-			if (port)
-				*port = p + rdma_start_port(ib_dev);
-			read_unlock_irqrestore(&table->rwlock, flags);
-			return 0;
-		}
-		read_unlock_irqrestore(&table->rwlock, flags);
-	}
-
-	return -ENOENT;
-}
-
-static int ib_cache_gid_find(struct ib_device *ib_dev,
-			     const union ib_gid *gid,
-			     enum ib_gid_type gid_type,
-			     struct net_device *ndev, u8 *port,
-			     u16 *index)
-{
-	unsigned long mask = GID_ATTR_FIND_MASK_GID |
-			     GID_ATTR_FIND_MASK_GID_TYPE;
-	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
-
-	if (ndev)
-		mask |= GID_ATTR_FIND_MASK_NETDEV;
-
-	return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
-					mask, port, index);
-}
-
 /**
- * ib_find_cached_gid_by_port - Returns the GID table index where a specified
- * GID value occurs. It searches for the specified GID value in the local
- * software cache.
+ * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
+ * a valid GID entry for given search parameters. It searches for the specified
+ * GID value in the local software cache.
  * @device: The device to query.
  * @gid: The GID value to search for.
  * @gid_type: The GID type to search for.
  * @port_num: The port number of the device where the GID value should be
  *   searched.
- * @ndev: In RoCE, the net device of the device. Null means ignore.
- * @index: The index into the cached GID table where the GID was found. This
- *   parameter may be NULL.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ *
+ * Returns sgid attributes if the GID is found with valid reference or
+ * returns ERR_PTR for the error.
+ * The caller must invoke rdma_put_gid_attr() to release the reference.
  */
-int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
-			       const union ib_gid *gid,
-			       enum ib_gid_type gid_type,
-			       u8 port, struct net_device *ndev,
-			       u16 *index)
+const struct ib_gid_attr *
+rdma_find_gid_by_port(struct ib_device *ib_dev,
+		      const union ib_gid *gid,
+		      enum ib_gid_type gid_type,
+		      u8 port, struct net_device *ndev)
 {
 	int local_index;
 	struct ib_gid_table *table;
 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
 			     GID_ATTR_FIND_MASK_GID_TYPE;
 	struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+	const struct ib_gid_attr *attr;
 	unsigned long flags;
 
 	if (!rdma_is_port_valid(ib_dev, port))
-		return -ENOENT;
+		return ERR_PTR(-ENOENT);
 
 	table = rdma_gid_table(ib_dev, port);
 
@@ -732,55 +689,49 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
 	read_lock_irqsave(&table->rwlock, flags);
 	local_index = find_gid(table, gid, &val, false, mask, NULL);
 	if (local_index >= 0) {
-		if (index)
-			*index = local_index;
+		get_gid_entry(table->data_vec[local_index]);
+		attr = &table->data_vec[local_index]->attr;
 		read_unlock_irqrestore(&table->rwlock, flags);
-		return 0;
+		return attr;
 	}
 
 	read_unlock_irqrestore(&table->rwlock, flags);
-	return -ENOENT;
+	return ERR_PTR(-ENOENT);
 }
-EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+EXPORT_SYMBOL(rdma_find_gid_by_port);
 
 /**
- * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
- * GID value occurs
+ * rdma_find_gid_by_filter - Returns the GID table attribute where a
+ * specified GID value occurs
  * @device: The device to query.
  * @gid: The GID value to search for.
- * @port_num: The port number of the device where the GID value could be
+ * @port: The port number of the device where the GID value could be
  *   searched.
  * @filter: The filter function is executed on any matching GID in the table.
  *   If the filter function returns true, the corresponding index is returned,
  *   otherwise, we continue searching the GID table. It's guaranteed that
  *   while filter is executed, ndev field is valid and the structure won't
  *   change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found. This
- *   parameter may be NULL.
  *
- * ib_cache_gid_find_by_filter() searches for the specified GID value
+ * rdma_find_gid_by_filter() searches for the specified GID value
  * of which the filter function returns true in the port's GID table.
  * This function is only supported on RoCE ports.
  *
  */
-static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
-				       const union ib_gid *gid,
-				       u8 port,
-				       bool (*filter)(const union ib_gid *,
-						      const struct ib_gid_attr *,
-						      void *),
-				       void *context,
-				       u16 *index)
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+	struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
+	bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+		       void *),
+	void *context)
 {
+	const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
 	struct ib_gid_table *table;
-	unsigned int i;
 	unsigned long flags;
-	bool found = false;
-
+	unsigned int i;
 
 	if (!rdma_is_port_valid(ib_dev, port) ||
 	    !rdma_protocol_roce(ib_dev, port))
-		return -EPROTONOSUPPORT;
+		return ERR_PTR(-EPROTONOSUPPORT);
 
 	table = rdma_gid_table(ib_dev, port);
 
@@ -798,18 +749,34 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
 		memcpy(&attr, &table->data_vec[i]->attr, sizeof(attr));
 
 		if (filter(gid, &attr, context)) {
-			found = true;
-			if (index)
-				*index = i;
+			get_gid_entry(table->data_vec[i]);
+			res = &table->data_vec[i]->attr;
 			break;
 		}
 	}
 	read_unlock_irqrestore(&table->rwlock, flags);
+	return res;
+}
+
+int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
+			       const union ib_gid *gid,
+			       enum ib_gid_type gid_type,
+			       u8 port, struct net_device *ndev,
+			       u16 *index)
+{
+	const struct ib_gid_attr *res;
 
-	if (!found)
-		return -ENOENT;
+	res = rdma_find_gid_by_port(ib_dev, gid, gid_type, port, ndev);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	if (index)
+		*index = res->index;
+	rdma_put_gid_attr(res);
 	return 0;
+
 }
+EXPORT_SYMBOL(ib_find_cached_gid_by_port);
 
 static struct ib_gid_table *alloc_gid_table(int sz)
 {
@@ -1016,27 +983,109 @@ int ib_get_cached_gid(struct ib_device *device,
 EXPORT_SYMBOL(ib_get_cached_gid);
 
 /**
- * ib_find_cached_gid - Returns the port number and GID table index where
- *   a specified GID value occurs.
+ * rdma_query_gid - Read the GID content from the GID software cache
+ * @device:		Device to query the GID
+ * @port_num:		Port number of the device
+ * @index:		Index of the GID table entry to read
+ * @gid:		Pointer to GID where to store the entry's GID
+ *
+ * rdma_query_gid() only reads the GID entry content for requested device,
+ * port and index. It reads for IB, RoCE and iWarp link layers.  It doesn't
+ * hold any reference to the GID table entry in the HCA or software cache.
+ *
+ * Returns 0 on success or appropriate error code.
+ *
+ */
+int rdma_query_gid(struct ib_device *device, u8 port_num,
+		   int index, union ib_gid *gid)
+{
+	struct ib_gid_table *table;
+	unsigned long flags;
+	int res = -EINVAL;
+
+	if (!rdma_is_port_valid(device, port_num))
+		return -EINVAL;
+
+	table = rdma_gid_table(device, port_num);
+	read_lock_irqsave(&table->rwlock, flags);
+
+	if (index < 0 || index >= table->sz ||
+	    !is_gid_entry_valid(table->data_vec[index]))
+		goto done;
+
+	memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
+	res = 0;
+
+done:
+	read_unlock_irqrestore(&table->rwlock, flags);
+	return res;
+}
+EXPORT_SYMBOL(rdma_query_gid);
+
+/**
+ * rdma_find_gid - Returns SGID attributes if the matching GID is found.
  * @device: The device to query.
  * @gid: The GID value to search for.
  * @gid_type: The GID type to search for.
  * @ndev: In RoCE, the net device of the device. NULL means ignore.
- * @port_num: The port number of the device where the GID value was found.
- * @index: The index into the cached GID table where the GID was found.  This
- *   parameter may be NULL.
  *
- * ib_find_cached_gid() searches for the specified GID value in
- * the local software cache.
+ * rdma_find_gid() searches for the specified GID value in the software cache.
+ *
+ * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
+ * error. The caller must invoke rdma_put_gid_attr() to release the reference.
+ *
  */
-int ib_find_cached_gid(struct ib_device *device,
-		       const union ib_gid *gid,
-		       enum ib_gid_type gid_type,
-		       struct net_device *ndev,
-		       u8               *port_num,
-		       u16              *index)
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+					const union ib_gid *gid,
+					enum ib_gid_type gid_type,
+					struct net_device *ndev)
 {
-	return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
+	unsigned long mask = GID_ATTR_FIND_MASK_GID |
+			     GID_ATTR_FIND_MASK_GID_TYPE;
+	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
+	u8 p;
+
+	if (ndev)
+		mask |= GID_ATTR_FIND_MASK_NETDEV;
+
+	for (p = 0; p < device->phys_port_cnt; p++) {
+		struct ib_gid_table *table;
+		unsigned long flags;
+		int index;
+
+		table = device->cache.ports[p].gid;
+		read_lock_irqsave(&table->rwlock, flags);
+		index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
+		if (index >= 0) {
+			const struct ib_gid_attr *attr;
+
+			get_gid_entry(table->data_vec[index]);
+			attr = &table->data_vec[index]->attr;
+			read_unlock_irqrestore(&table->rwlock, flags);
+			return attr;
+		}
+		read_unlock_irqrestore(&table->rwlock, flags);
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL(rdma_find_gid);
+
+int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid,
+		       enum ib_gid_type gid_type, struct net_device *ndev,
+		       u8 *port_num, u16 *index)
+{
+	const struct ib_gid_attr *res;
+
+	res = rdma_find_gid(device, gid, gid_type, ndev);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+	if (port_num)
+		*port_num = res->port_num;
+	if (index)
+		*index = res->index;
+	rdma_put_gid_attr(res);
+	return 0;
 }
 EXPORT_SYMBOL(ib_find_cached_gid);
 
@@ -1048,13 +1097,22 @@ int ib_find_gid_by_filter(struct ib_device *device,
 					 void *),
 			  void *context, u16 *index)
 {
+	const struct ib_gid_attr *res;
+
 	/* Only RoCE GID table supports filter function */
 	if (!rdma_protocol_roce(device, port_num) && filter)
 		return -EPROTONOSUPPORT;
 
-	return ib_cache_gid_find_by_filter(device, gid,
-					   port_num, filter,
-					   context, index);
+	res = rdma_find_gid_by_filter(device, gid, port_num, filter,
+				      context);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	if (index)
+		*index = res->index;
+
+	rdma_put_gid_attr(res);
+	return 0;
 }
 
 int ib_get_cached_pkey(struct ib_device *device,
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 00ccd00d0596..059f7d894939 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -54,6 +54,8 @@ int ib_get_cached_gid(struct ib_device    *device,
 		      int                  index,
 		      union ib_gid        *gid,
 		      struct ib_gid_attr  *attr);
+int rdma_query_gid(struct ib_device *device, u8 port_num, int index,
+		   union ib_gid *gid);
 
 int ib_find_cached_gid(struct ib_device *device,
 		       const union ib_gid *gid,
@@ -61,6 +63,10 @@ int ib_find_cached_gid(struct ib_device *device,
 		       struct net_device *ndev,
 		       u8               *port_num,
 		       u16              *index);
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+					const union ib_gid *gid,
+					enum ib_gid_type gid_type,
+					struct net_device *ndev);
 
 int ib_find_cached_gid_by_port(struct ib_device *device,
 			       const union ib_gid *gid,
@@ -68,6 +74,11 @@ int ib_find_cached_gid_by_port(struct ib_device *device,
 			       u8               port_num,
 			       struct net_device *ndev,
 			       u16              *index);
+const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev,
+						const union ib_gid *gid,
+						enum ib_gid_type gid_type,
+						u8 port,
+						struct net_device *ndev);
 
 int ib_find_gid_by_filter(struct ib_device *device,
 			  const union ib_gid *gid,
@@ -76,6 +87,12 @@ int ib_find_gid_by_filter(struct ib_device *device,
 					 const struct ib_gid_attr *,
 					 void *),
 			  void *context, u16 *index);
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+	struct ib_device *device, const union ib_gid *gid, u8 port_num,
+	bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+		       void *),
+	void *context);
+
 /**
  * ib_get_cached_pkey - Returns a cached PKey table entry
  * @device: The device to query.
-- 
cgit v1.2.3


From 1dfce294577120ec60399a64094ea00e4247103d Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 5 Jun 2018 08:40:22 +0300
Subject: IB: Replace ib_query_gid/ib_get_cached_gid with rdma_query_gid

If the gid_attr argument is NULL then the functions behave identically to
rdma_query_gid. ib_query_gid just calls ib_get_cached_gid, so everything
can be consolidated to one function.

Now that all callers either use rdma_query_gid() or ib_get_cached_gid(),
ib_query_gid() API is removed.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cm.c              |  9 ++++-----
 drivers/infiniband/core/cma.c             |  6 +++---
 drivers/infiniband/core/device.c          | 21 +--------------------
 drivers/infiniband/core/mad.c             |  4 ++--
 drivers/infiniband/core/verbs.c           |  6 ++----
 drivers/infiniband/ulp/ipoib/ipoib_ib.c   |  3 ++-
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  4 ++--
 drivers/infiniband/ulp/srp/ib_srp.c       |  2 +-
 drivers/infiniband/ulp/srpt/ib_srpt.c     |  3 +--
 include/rdma/ib_verbs.h                   |  4 ----
 10 files changed, 18 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 27a7b0a2e27a..800ff69e09b3 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1646,7 +1646,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
 	    (ib_is_opa_gid(&path->sgid))) {
 		union ib_gid sgid;
 
-		if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) {
+		if (rdma_query_gid(dev, port_num, 0, &sgid)) {
 			dev_warn(&dev->dev,
 				 "Error updating sgid in CM request\n");
 			return;
@@ -1997,10 +1997,9 @@ static int cm_req_handler(struct cm_work *work)
 	if (ret) {
 		int err;
 
-		err = ib_get_cached_gid(work->port->cm_dev->ib_device,
-					work->port->port_num, 0,
-					&work->path[0].sgid,
-					NULL);
+		err = rdma_query_gid(work->port->cm_dev->ib_device,
+				     work->port->port_num, 0,
+				     &work->path[0].sgid);
 		if (err)
 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
 				       NULL, 0, NULL, 0);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index bff10ab141b0..c2f478761ae9 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -732,8 +732,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
 
 			if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
 				continue;
-			for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
-						       &gid, NULL);
+			for (i = 0; !rdma_query_gid(cur_dev->device,
+						    p, i, &gid);
 			     i++) {
 				if (!memcmp(&gid, dgid, sizeof(gid))) {
 					cma_dev = cur_dev;
@@ -2791,7 +2791,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
 	p = 1;
 
 port_found:
-	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
+	ret = rdma_query_gid(cma_dev->device, p, 0, &gid);
 	if (ret)
 		goto out;
 
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 6fa4c59dc7a7..b8144f194777 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -861,25 +861,6 @@ int ib_query_port(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_query_port);
 
-/**
- * ib_query_gid - Get GID table entry
- * @device:Device to query
- * @port_num:Port number to query
- * @index:GID table index to query
- * @gid:Returned GID
- * @attr: Returned GID attributes related to this GID index (only in RoCE).
- *   NULL means ignore.
- *
- * ib_query_gid() fetches the specified GID table entry from the cache.
- */
-int ib_query_gid(struct ib_device *device,
-		 u8 port_num, int index, union ib_gid *gid,
-		 struct ib_gid_attr *attr)
-{
-	return ib_get_cached_gid(device, port_num, index, gid, attr);
-}
-EXPORT_SYMBOL(ib_query_gid);
-
 /**
  * ib_enum_roce_netdev - enumerate all RoCE ports
  * @ib_dev : IB device we want to query
@@ -1057,7 +1038,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
 			continue;
 
 		for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
-			ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
+			ret = rdma_query_gid(device, port, i, &tmp_gid);
 			if (ret)
 				return ret;
 			if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index f742ae7a768b..db0f93a9d507 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1896,8 +1896,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
 			const struct ib_global_route *grh =
 					rdma_ah_read_grh(&attr);
 
-			if (ib_get_cached_gid(device, port_num,
-					      grh->sgid_index, &sgid, NULL))
+			if (rdma_query_gid(device, port_num,
+					   grh->sgid_index, &sgid))
 				return 0;
 			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
 				       16);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 0b56828c1319..5e34e359f7b4 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -516,10 +516,8 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
 
 	grh = rdma_ah_retrieve_grh(ah_attr);
 
-	ret = ib_query_gid(device,
-			   rdma_ah_get_port_num(ah_attr),
-			   grh->sgid_index,
-			   &sgid, &sgid_attr);
+	ret = ib_get_cached_gid(device, rdma_ah_get_port_num(ah_attr),
+				grh->sgid_index, &sgid, &sgid_attr);
 	if (ret || !sgid_attr.ndev) {
 		if (!ret)
 			ret = -ENXIO;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f47f9ace1f48..fb3728bf7e40 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -40,6 +40,7 @@
 
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <rdma/ib_cache.h>
 
 #include "ipoib.h"
 
@@ -1069,7 +1070,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
 	bool ret = false;
 
 	netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
-	if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
+	if (rdma_query_gid(priv->ca, priv->port, 0, &gid0))
 		return false;
 
 	netif_addr_lock_bh(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 26cde95bc0f3..f4fac48aeade 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2287,9 +2287,9 @@ static struct net_device *ipoib_add_port(const char *format,
 	priv->dev->broadcast[8] = priv->pkey >> 8;
 	priv->dev->broadcast[9] = priv->pkey & 0xff;
 
-	result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
+	result = rdma_query_gid(hca, port, 0, &priv->local_gid);
 	if (result) {
-		pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
+		pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n",
 			hca->name, port, result);
 		goto device_init_failed;
 	}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 9786b24b956f..34b1aaffa521 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3843,7 +3843,7 @@ static ssize_t srp_create_target(struct device *dev,
 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
 	INIT_WORK(&target->remove_work, srp_remove_work);
 	spin_lock_init(&target->lock);
-	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
+	ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
 	if (ret)
 		goto out;
 
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 3081c629a7f7..698f7779e231 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -575,8 +575,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
 	sport->sm_lid = port_attr.sm_lid;
 	sport->lid = port_attr.lid;
 
-	ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid,
-			   NULL);
+	ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
 	if (ret)
 		goto err_query_port;
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1c153cc046ee..67c458215512 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3046,10 +3046,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
 	return rdma_protocol_iwarp(dev, port_num);
 }
 
-int ib_query_gid(struct ib_device *device,
-		 u8 port_num, int index, union ib_gid *gid,
-		 struct ib_gid_attr *attr);
-
 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
 			 int state);
 int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
-- 
cgit v1.2.3


From 8d9ec9addd6c492a99d3699212653cba92989767 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 13 Jun 2018 10:22:03 +0300
Subject: IB/core: Add a sgid_attr pointer to struct rdma_ah_attr

The sgid_attr will ultimately replace the sgid_index in the ah_attr.
This will allow for all layers to have a consistent view of what
gid table entry was selected as processing runs through all stages of the
stack.

This commit introduces the pointer and ensures it is set before calling
any driver callback that includes a struct ah_attr callback, allowing
future patches to adjust both the drivers and the callers to use
sgid_attr instead of sgid_index.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/verbs.c | 252 +++++++++++++++++++++++++++++++++++-----
 include/rdma/ib_verbs.h         |   7 ++
 2 files changed, 232 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5e34e359f7b4..26ee1de1cc26 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -326,6 +326,85 @@ EXPORT_SYMBOL(ib_dealloc_pd);
 
 /* Address handles */
 
+/*
+ * Validate that the rdma_ah_attr is valid for the device before passing it
+ * off to the driver.
+ */
+static int rdma_check_ah_attr(struct ib_device *device,
+			      struct rdma_ah_attr *ah_attr)
+{
+	if (!rdma_is_port_valid(device, ah_attr->port_num))
+		return -EINVAL;
+
+	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+	    !(ah_attr->ah_flags & IB_AH_GRH))
+		return -EINVAL;
+
+	if (ah_attr->grh.sgid_attr) {
+		/*
+		 * Make sure the passed sgid_attr is consistent with the
+		 * parameters
+		 */
+		if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
+		    ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
+ * On success the caller is responsible to call rdma_unfill_sgid_attr().
+ */
+static int rdma_fill_sgid_attr(struct ib_device *device,
+			       struct rdma_ah_attr *ah_attr,
+			       const struct ib_gid_attr **old_sgid_attr)
+{
+	const struct ib_gid_attr *sgid_attr;
+	struct ib_global_route *grh;
+	int ret;
+
+	*old_sgid_attr = ah_attr->grh.sgid_attr;
+
+	ret = rdma_check_ah_attr(device, ah_attr);
+	if (ret)
+		return ret;
+
+	if (!(ah_attr->ah_flags & IB_AH_GRH))
+		return 0;
+
+	grh = rdma_ah_retrieve_grh(ah_attr);
+	if (grh->sgid_attr)
+		return 0;
+
+	sgid_attr =
+		rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
+	if (IS_ERR(sgid_attr))
+		return PTR_ERR(sgid_attr);
+
+	/* Move ownerhip of the kref into the ah_attr */
+	grh->sgid_attr = sgid_attr;
+	return 0;
+}
+
+static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
+				  const struct ib_gid_attr *old_sgid_attr)
+{
+	/*
+	 * Fill didn't change anything, the caller retains ownership of
+	 * whatever it passed
+	 */
+	if (ah_attr->grh.sgid_attr == old_sgid_attr)
+		return;
+
+	/*
+	 * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
+	 * doesn't see any change in the rdma_ah_attr. If we get here
+	 * old_sgid_attr is NULL.
+	 */
+	rdma_destroy_ah_attr(ah_attr);
+}
+
 static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
 				     struct rdma_ah_attr *ah_attr,
 				     struct ib_udata *udata)
@@ -345,9 +424,30 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
 	return ah;
 }
 
+/**
+ * rdma_create_ah - Creates an address handle for the
+ * given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
 struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
 {
-	return _rdma_create_ah(pd, ah_attr, NULL);
+	const struct ib_gid_attr *old_sgid_attr;
+	struct ib_ah *ah;
+	int ret;
+
+	ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ah = _rdma_create_ah(pd, ah_attr, NULL);
+
+	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+	return ah;
 }
 EXPORT_SYMBOL(rdma_create_ah);
 
@@ -368,15 +468,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
 				  struct rdma_ah_attr *ah_attr,
 				  struct ib_udata *udata)
 {
+	const struct ib_gid_attr *old_sgid_attr;
+	struct ib_ah *ah;
 	int err;
 
+	err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+	if (err)
+		return ERR_PTR(err);
+
 	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
 		err = ib_resolve_eth_dmac(pd->device, ah_attr);
-		if (err)
-			return ERR_PTR(err);
+		if (err) {
+			ah = ERR_PTR(err);
+			goto out;
+		}
 	}
 
-	return _rdma_create_ah(pd, ah_attr, udata);
+	ah = _rdma_create_ah(pd, ah_attr, udata);
+
+out:
+	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+	return ah;
 }
 EXPORT_SYMBOL(rdma_create_user_ah);
 
@@ -632,6 +744,49 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
 }
 EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
 
+/**
+ * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
+ * of the reference
+ *
+ * @attr:	Pointer to AH attribute structure
+ * @dgid:	Destination GID
+ * @flow_label:	Flow label
+ * @hop_limit:	Hop limit
+ * @traffic_class: traffic class
+ * @sgid_attr:	Pointer to SGID attribute
+ *
+ * This takes ownership of the sgid_attr reference. The caller must ensure
+ * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
+ * calling this function.
+ */
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+			     u32 flow_label, u8 hop_limit, u8 traffic_class,
+			     const struct ib_gid_attr *sgid_attr)
+{
+	rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
+			traffic_class);
+	attr->grh.sgid_attr = sgid_attr;
+}
+EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
+
+/**
+ * rdma_destroy_ah_attr - Release reference to SGID attribute of
+ * ah attribute.
+ * @ah_attr: Pointer to ah attribute
+ *
+ * Release reference to the SGID attribute of the ah attribute if it is
+ * non NULL. It is safe to call this multiple times, and safe to call it on
+ * a zero initialized ah_attr.
+ */
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
+{
+	if (ah_attr->grh.sgid_attr) {
+		rdma_put_gid_attr(ah_attr->grh.sgid_attr);
+		ah_attr->grh.sgid_attr = NULL;
+	}
+}
+EXPORT_SYMBOL(rdma_destroy_ah_attr);
+
 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
 				   const struct ib_grh *grh, u8 port_num)
 {
@@ -648,17 +803,29 @@ EXPORT_SYMBOL(ib_create_ah_from_wc);
 
 int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 {
+	const struct ib_gid_attr *old_sgid_attr;
+	int ret;
+
 	if (ah->type != ah_attr->type)
 		return -EINVAL;
 
-	return ah->device->modify_ah ?
+	ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
+	if (ret)
+		return ret;
+
+	ret = ah->device->modify_ah ?
 		ah->device->modify_ah(ah, ah_attr) :
 		-EOPNOTSUPP;
+
+	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+	return ret;
 }
 EXPORT_SYMBOL(rdma_modify_ah);
 
 int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 {
+	ah_attr->grh.sgid_attr = NULL;
+
 	return ah->device->query_ah ?
 		ah->device->query_ah(ah, ah_attr) :
 		-EOPNOTSUPP;
@@ -1294,9 +1461,6 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
 	int           ret = 0;
 	struct ib_global_route *grh;
 
-	if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
-		return -EINVAL;
-
 	grh = rdma_ah_retrieve_grh(ah_attr);
 
 	if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
@@ -1315,6 +1479,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
 	return ret;
 }
 
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+	return (qp->qp_type == IB_QPT_UC ||
+		qp->qp_type == IB_QPT_RC ||
+		qp->qp_type == IB_QPT_XRC_INI ||
+		qp->qp_type == IB_QPT_XRC_TGT);
+}
+
 /**
  * IB core internal function to perform QP attributes modification.
  */
@@ -1322,8 +1494,43 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 			 int attr_mask, struct ib_udata *udata)
 {
 	u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+	const struct ib_gid_attr *old_sgid_attr_av;
+	const struct ib_gid_attr *old_sgid_attr_alt_av;
 	int ret;
 
+	/*
+	 * Today the core code can only handle alternate paths and APM for IB
+	 * ban them in roce mode.
+	 */
+	if (attr_mask & IB_QP_ALT_PATH &&
+	    !rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num))
+		return -EINVAL;
+
+	if (attr_mask & IB_QP_AV) {
+		ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
+					  &old_sgid_attr_av);
+		if (ret)
+			return ret;
+	}
+	if (attr_mask & IB_QP_ALT_PATH) {
+		ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
+					  &old_sgid_attr_alt_av);
+		if (ret)
+			goto out_av;
+	}
+
+	/*
+	 * If the user provided the qp_attr then we have to resolve it. Kernel
+	 * users have to provide already resolved rdma_ah_attr's
+	 */
+	if (udata && (attr_mask & IB_QP_AV) &&
+	    attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+	    is_qp_type_connected(qp)) {
+		ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+		if (ret)
+			goto out;
+	}
+
 	if (rdma_ib_or_roce(qp->device, port)) {
 		if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
 			pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
@@ -1342,17 +1549,15 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 	if (!ret && (attr_mask & IB_QP_PORT))
 		qp->port = attr->port_num;
 
+out:
+	if (attr_mask & IB_QP_ALT_PATH)
+		rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
+out_av:
+	if (attr_mask & IB_QP_AV)
+		rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
 	return ret;
 }
 
-static bool is_qp_type_connected(const struct ib_qp *qp)
-{
-	return (qp->qp_type == IB_QPT_UC ||
-		qp->qp_type == IB_QPT_RC ||
-		qp->qp_type == IB_QPT_XRC_INI ||
-		qp->qp_type == IB_QPT_XRC_TGT);
-}
-
 /**
  * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
  * @ib_qp: The QP to modify.
@@ -1367,17 +1572,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp)
 int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
 			    int attr_mask, struct ib_udata *udata)
 {
-	struct ib_qp *qp = ib_qp->real_qp;
-	int ret;
-
-	if (attr_mask & IB_QP_AV &&
-	    attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
-	    is_qp_type_connected(qp)) {
-		ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
-		if (ret)
-			return ret;
-	}
-	return _ib_modify_qp(qp, attr, attr_mask, udata);
+	return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
 }
 EXPORT_SYMBOL(ib_modify_qp_with_udata);
 
@@ -1449,6 +1644,9 @@ int ib_query_qp(struct ib_qp *qp,
 		int qp_attr_mask,
 		struct ib_qp_init_attr *qp_init_attr)
 {
+	qp_attr->ah_attr.grh.sgid_attr = NULL;
+	qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
+
 	return qp->device->query_qp ?
 		qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
 		-EOPNOTSUPP;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 67c458215512..a3a4b8335668 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -690,6 +690,7 @@ struct ib_event_handler {
 	} while (0)
 
 struct ib_global_route {
+	const struct ib_gid_attr *sgid_attr;
 	union ib_gid	dgid;
 	u32		flow_label;
 	u8		sgid_index;
@@ -4026,8 +4027,14 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
 	grh->sgid_index = sgid_index;
 	grh->hop_limit = hop_limit;
 	grh->traffic_class = traffic_class;
+	grh->sgid_attr = NULL;
 }
 
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+			     u32 flow_label, u8 hop_limit, u8 traffic_class,
+			     const struct ib_gid_attr *sgid_attr);
+
 /**
  * rdma_ah_find_type - Return address handle type.
  *
-- 
cgit v1.2.3


From d97099fe53ed9ab8b17d084bed0099feb08a48c1 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 13 Jun 2018 10:22:05 +0300
Subject: IB{cm, core}: Introduce and use ah_attr copy, move, replace APIs

Introduce AH attribute copy, move and replace APIs to be used by core and
provider drivers.

In CM code flow when ah attribute might be re-initialized twice while
processing incoming request, or initialized once while from path record
while sending out CM requests. Therefore use rdma_move_ah_attr API to
handle such scenarios instead of memcpy().

Provider drivers keeps a copy ah_attr during the lifetime of the ah.
Therefore, use rdma_replace_ah_attr() which conditionally release
reference to old ah_attr and holds reference to new attribute whose
referrence is released when the AH is freed.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/cm.c       |  4 +--
 drivers/infiniband/core/verbs.c    | 54 ++++++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/qedr/verbs.c |  3 ++-
 drivers/infiniband/sw/rdmavt/ah.c  |  4 ++-
 drivers/infiniband/sw/rdmavt/qp.c  |  6 +++--
 include/rdma/ib_verbs.h            |  5 ++++
 6 files changed, 70 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 800ff69e09b3..0f39a879c91d 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
 	if (ret)
 		return ret;
 
-	memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
 }
 
@@ -569,7 +569,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
 	ret = add_cm_id_to_port_list(cm_id_priv, av, port);
 	if (ret)
 		return ret;
-	memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
 }
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 45c095e478ec..2c7129cccc6e 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -326,6 +326,60 @@ EXPORT_SYMBOL(ib_dealloc_pd);
 
 /* Address handles */
 
+/**
+ * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
+ * @dest:       Pointer to destination ah_attr. Contents of the destination
+ *              pointer is assumed to be invalid and attribute are overwritten.
+ * @src:        Pointer to source ah_attr.
+ */
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+		       const struct rdma_ah_attr *src)
+{
+	*dest = *src;
+	if (dest->grh.sgid_attr)
+		rdma_hold_gid_attr(dest->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_copy_ah_attr);
+
+/**
+ * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
+ * @old:        Pointer to existing ah_attr which needs to be replaced.
+ *              old is assumed to be valid or zero'd
+ * @new:        Pointer to the new ah_attr.
+ *
+ * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
+ * old the ah_attr is valid; after that it copies the new attribute and holds
+ * the reference to the replaced ah_attr.
+ */
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+			  const struct rdma_ah_attr *new)
+{
+	rdma_destroy_ah_attr(old);
+	*old = *new;
+	if (old->grh.sgid_attr)
+		rdma_hold_gid_attr(old->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_replace_ah_attr);
+
+/**
+ * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
+ * @dest:       Pointer to destination ah_attr to copy to.
+ *              dest is assumed to be valid or zero'd
+ * @src:        Pointer to the new ah_attr.
+ *
+ * rdma_move_ah_attr() first releases any reference in the destination ah_attr
+ * if it is valid. This also transfers ownership of internal references from
+ * src to dest, making src invalid in the process. No new reference of the src
+ * ah_attr is taken.
+ */
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
+{
+	rdma_destroy_ah_attr(dest);
+	*dest = *src;
+	src->grh.sgid_attr = NULL;
+}
+EXPORT_SYMBOL(rdma_move_ah_attr);
+
 /*
  * Validate that the rdma_ah_attr is valid for the device before passing it
  * off to the driver.
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f7ac8fc9b531..5b2a79b27036 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2302,7 +2302,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
 	if (!ah)
 		return ERR_PTR(-ENOMEM);
 
-	ah->attr = *attr;
+	rdma_copy_ah_attr(&ah->attr, attr);
 
 	return &ah->ibah;
 }
@@ -2311,6 +2311,7 @@ int qedr_destroy_ah(struct ib_ah *ibah)
 {
 	struct qedr_ah *ah = get_qedr_ah(ibah);
 
+	rdma_destroy_ah_attr(&ah->attr);
 	kfree(ah);
 	return 0;
 }
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index ba3639a0d77c..89ec0f64abfc 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -120,7 +120,8 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
 	dev->n_ahs_allocated++;
 	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
-	ah->attr = *ah_attr;
+	rdma_copy_ah_attr(&ah->attr, ah_attr);
+
 	atomic_set(&ah->refcount, 0);
 
 	if (dev->driver_f.notify_new_ah)
@@ -148,6 +149,7 @@ int rvt_destroy_ah(struct ib_ah *ibah)
 	dev->n_ahs_allocated--;
 	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
+	rdma_destroy_ah_attr(&ah->attr);
 	kfree(ah);
 
 	return 0;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 41183bd665ca..815f94c17c48 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1336,13 +1336,13 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		qp->qp_access_flags = attr->qp_access_flags;
 
 	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
+		rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr);
 		qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
 		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
 	}
 
 	if (attr_mask & IB_QP_ALT_PATH) {
-		qp->alt_ah_attr = attr->alt_ah_attr;
+		rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr);
 		qp->s_alt_pkey_index = attr->alt_pkey_index;
 	}
 
@@ -1459,6 +1459,8 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
 	vfree(qp->s_wq);
 	rdi->driver_f.qp_priv_free(rdi, qp);
 	kfree(qp->s_ack_queue);
+	rdma_destroy_ah_attr(&qp->remote_ah_attr);
+	rdma_destroy_ah_attr(&qp->alt_ah_attr);
 	kfree(qp);
 	return 0;
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a3a4b8335668..1e5c1e8ba282 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4034,6 +4034,11 @@ void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
 void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
 			     u32 flow_label, u8 hop_limit, u8 traffic_class,
 			     const struct ib_gid_attr *sgid_attr);
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+		       const struct rdma_ah_attr *src);
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+			  const struct rdma_ah_attr *new);
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
 
 /**
  * rdma_ah_find_type - Return address handle type.
-- 
cgit v1.2.3


From 47ec38666210485de860ab24675acb3d2e7d4954 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Wed, 13 Jun 2018 10:22:06 +0300
Subject: RDMA: Convert drivers to use sgid_attr instead of sgid_index

The core code now ensures that all driver callbacks that receive an
rdma_ah_attrs will have a sgid_attr's pointer if there is a GRH present.

Drivers can use this pointer instead of calling a query function with
sgid_index. This simplifies the drivers and also avoids races where a
gid_index lookup may return different data if it is changed.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 63 +++++++++++---------------------
 drivers/infiniband/hw/hns/hns_roce_ah.c  | 19 ++--------
 drivers/infiniband/hw/mlx4/ah.c          | 16 +++-----
 drivers/infiniband/hw/mlx4/qp.c          | 31 ++++++----------
 drivers/infiniband/hw/mlx5/ah.c          | 11 +-----
 drivers/infiniband/hw/mlx5/main.c        | 32 ++--------------
 drivers/infiniband/hw/mlx5/mlx5_ib.h     |  6 +--
 drivers/infiniband/hw/mlx5/qp.c          | 10 ++---
 drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 24 +++++-------
 drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 21 +++++------
 drivers/infiniband/hw/qedr/verbs.c       | 22 ++++-------
 drivers/infiniband/sw/rxe/rxe_av.c       | 11 +++---
 drivers/infiniband/sw/rxe/rxe_loc.h      |  5 +--
 drivers/infiniband/sw/rxe/rxe_qp.c       | 23 +-----------
 drivers/infiniband/sw/rxe/rxe_verbs.c    | 31 +++-------------
 include/rdma/ib_verbs.h                  |  8 ++--
 16 files changed, 96 insertions(+), 237 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 62eb9e3346d5..134360236c2c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -672,8 +672,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
 	int rc;
 	u8 nw_type;
 
-	struct ib_gid_attr sgid_attr;
-
 	if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
 		dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
 		return ERR_PTR(-EINVAL);
@@ -704,20 +702,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
 				    grh->dgid.raw) &&
 	    !rdma_link_local_addr((struct in6_addr *)
 				  grh->dgid.raw)) {
-		union ib_gid sgid;
+		const struct ib_gid_attr *sgid_attr;
 
-		rc = ib_get_cached_gid(&rdev->ibdev, 1,
-				       grh->sgid_index, &sgid,
-				       &sgid_attr);
-		if (rc) {
-			dev_err(rdev_to_dev(rdev),
-				"Failed to query gid at index %d",
-				grh->sgid_index);
-			goto fail;
-		}
-		dev_put(sgid_attr.ndev);
+		sgid_attr = grh->sgid_attr;
 		/* Get network header type for this GID */
-		nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+		nw_type = rdma_gid_attr_network_type(sgid_attr);
 		switch (nw_type) {
 		case RDMA_NETWORK_IPV4:
 			ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
@@ -1598,9 +1587,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
 	enum ib_qp_state curr_qp_state, new_qp_state;
 	int rc, entries;
-	int status;
-	union ib_gid sgid;
-	struct ib_gid_attr sgid_attr;
 	unsigned int flags;
 	u8 nw_type;
 
@@ -1667,6 +1653,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	if (qp_attr_mask & IB_QP_AV) {
 		const struct ib_global_route *grh =
 			rdma_ah_read_grh(&qp_attr->ah_attr);
+		const struct ib_gid_attr *sgid_attr;
 
 		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
 				     CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
@@ -1690,29 +1677,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 		ether_addr_copy(qp->qplib_qp.ah.dmac,
 				qp_attr->ah_attr.roce.dmac);
 
-		status = ib_get_cached_gid(&rdev->ibdev, 1,
-					   grh->sgid_index,
-					   &sgid, &sgid_attr);
-		if (!status) {
-			memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
-			       ETH_ALEN);
-			dev_put(sgid_attr.ndev);
-			nw_type = ib_gid_to_network_type(sgid_attr.gid_type,
-							 &sgid);
-			switch (nw_type) {
-			case RDMA_NETWORK_IPV4:
-				qp->qplib_qp.nw_type =
-					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
-				break;
-			case RDMA_NETWORK_IPV6:
-				qp->qplib_qp.nw_type =
-					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
-				break;
-			default:
-				qp->qplib_qp.nw_type =
-					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
-				break;
-			}
+		sgid_attr = qp_attr->ah_attr.grh.sgid_attr;
+		memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr,
+		       ETH_ALEN);
+		nw_type = rdma_gid_attr_network_type(sgid_attr);
+		switch (nw_type) {
+		case RDMA_NETWORK_IPV4:
+			qp->qplib_qp.nw_type =
+				CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
+			break;
+		case RDMA_NETWORK_IPV6:
+			qp->qplib_qp.nw_type =
+				CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
+			break;
+		default:
+			qp->qplib_qp.nw_type =
+				CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
+			break;
 		}
 	}
 
@@ -1934,7 +1915,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
 		dev_put(sgid_attr.ndev);
 	}
 	/* Get network header type for this GID */
-	nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+	nw_type = rdma_gid_attr_network_type(&sgid_attr);
 	switch (nw_type) {
 	case RDMA_NETWORK_IPV4:
 		nw_type = BNXT_RE_ROCEV2_IPV4_PACKET;
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index d74928621559..14efa3b9adb2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -44,13 +44,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
 				 struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
+	const struct ib_gid_attr *gid_attr;
 	struct device *dev = hr_dev->dev;
-	struct ib_gid_attr gid_attr;
 	struct hns_roce_ah *ah;
 	u16 vlan_tag = 0xffff;
 	const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
-	union ib_gid sgid;
-	int ret;
 
 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
 	if (!ah)
@@ -59,18 +57,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
 	/* Get mac address */
 	memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
 
-	/* Get source gid */
-	ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr),
-				grh->sgid_index, &sgid, &gid_attr);
-	if (ret) {
-		dev_err(dev, "get sgid failed! ret = %d\n", ret);
-		kfree(ah);
-		return ERR_PTR(ret);
-	}
-
-	if (is_vlan_dev(gid_attr.ndev))
-		vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
-	dev_put(gid_attr.ndev);
+	gid_attr = ah_attr->grh.sgid_attr;
+	if (is_vlan_dev(gid_attr->ndev))
+		vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
 
 	if (vlan_tag < 0x1000)
 		vlan_tag |= (rdma_ah_get_sl(ah_attr) &
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 9345d5b546d1..1ab3681acdcd 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -82,12 +82,11 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
 				    struct mlx4_ib_ah *ah)
 {
 	struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+	const struct ib_gid_attr *gid_attr;
 	struct mlx4_dev *dev = ibdev->dev;
 	int is_mcast = 0;
 	struct in6_addr in6;
 	u16 vlan_tag = 0xffff;
-	union ib_gid sgid;
-	struct ib_gid_attr gid_attr;
 	const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
 	int ret;
 
@@ -96,15 +95,12 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
 		is_mcast = 1;
 
 	memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
-	ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr),
-				grh->sgid_index, &sgid, &gid_attr);
-	if (ret)
-		return ERR_PTR(ret);
 	eth_zero_addr(ah->av.eth.s_mac);
-	if (is_vlan_dev(gid_attr.ndev))
-		vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
-	memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
-	dev_put(gid_attr.ndev);
+	gid_attr = ah_attr->grh.sgid_attr;
+	if (is_vlan_dev(gid_attr->ndev))
+		vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
+	memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
+
 	if (vlan_tag < 0x1000)
 		vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
 	ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 3b8045fd23ed..1538ce6e9dac 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2176,6 +2176,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 {
 	struct ib_uobject *ibuobject;
 	struct ib_srq  *ibsrq;
+	const struct ib_gid_attr *gid_attr = NULL;
 	struct ib_rwq_ind_table *rwq_ind_tbl;
 	enum ib_qp_type qp_type;
 	struct mlx4_ib_dev *dev;
@@ -2356,29 +2357,17 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 	if (attr_mask & IB_QP_AV) {
 		u8 port_num = mlx4_is_bonded(dev->dev) ? 1 :
 			attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
-		union ib_gid gid;
-		struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
 		u16 vlan = 0xffff;
 		u8 smac[ETH_ALEN];
-		int status = 0;
 		int is_eth =
 			rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
 			rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
 
 		if (is_eth) {
-			int index =
-				rdma_ah_read_grh(&attr->ah_attr)->sgid_index;
-
-			status = ib_get_cached_gid(&dev->ib_dev, port_num,
-						   index, &gid, &gid_attr);
-			if (!status) {
-				vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
-				memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
-				dev_put(gid_attr.ndev);
-			}
+			gid_attr = attr->ah_attr.grh.sgid_attr;
+			vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev);
+			memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN);
 		}
-		if (status)
-			goto out;
 
 		if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
 				  port_num, vlan, smac))
@@ -2389,7 +2378,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 
 		if (is_eth &&
 		    (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
-			u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+			u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type);
 
 			if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
 				err = -EINVAL;
@@ -3181,10 +3170,12 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
 					to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
 						       guid_cache[ah->av.ib.gid_index];
 			} else {
-				ib_get_cached_gid(ib_dev,
-						  be32_to_cpu(ah->av.ib.port_pd) >> 24,
-						  ah->av.ib.gid_index,
-						  &sqp->ud_header.grh.source_gid, NULL);
+				err = rdma_query_gid(ib_dev,
+					be32_to_cpu(ah->av.ib.port_pd) >> 24,
+					ah->av.ib.gid_index,
+					&sqp->ud_header.grh.source_gid);
+				if (err)
+					return err;
 			}
 		}
 		memcpy(sqp->ud_header.grh.destination_gid.raw,
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index e6bde32a83f3..ffd03bf1a71e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -37,7 +37,6 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
 				  struct rdma_ah_attr *ah_attr)
 {
 	enum ib_gid_type gid_type;
-	int err;
 
 	if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
 		const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -53,18 +52,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
 	ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
 
 	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
-		err = mlx5_get_roce_gid_type(dev, ah_attr->port_num,
-					     ah_attr->grh.sgid_index,
-					     &gid_type);
-		if (err)
-			return ERR_PTR(err);
+		gid_type = ah_attr->grh.sgid_attr->gid_type;
 
 		memcpy(ah->av.rmac, ah_attr->roce.dmac,
 		       sizeof(ah_attr->roce.dmac));
 		ah->av.udp_sport =
-		mlx5_get_roce_udp_sport(dev,
-					rdma_ah_get_port_num(ah_attr),
-					rdma_ah_read_grh(ah_attr)->sgid_index);
+			mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr);
 		ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
 #define MLX5_ECN_ENABLED BIT(1)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 94669df81342..e6d88f32391b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -524,41 +524,15 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
 			     attr->index, NULL, NULL);
 }
 
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
-			       int index)
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
+			       const struct ib_gid_attr *attr)
 {
-	struct ib_gid_attr attr;
-	union ib_gid gid;
-
-	if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
-		return 0;
-
-	dev_put(attr.ndev);
-
-	if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+	if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
 		return 0;
 
 	return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
 }
 
-int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
-			   int index, enum ib_gid_type *gid_type)
-{
-	struct ib_gid_attr attr;
-	union ib_gid gid;
-	int ret;
-
-	ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
-	if (ret)
-		return ret;
-
-	dev_put(attr.ndev);
-
-	*gid_type = attr.gid_type;
-
-	return 0;
-}
-
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d89c8fe626f6..615bd6e9db6c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1183,10 +1183,8 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
 int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
 			u64 guid, int type);
 
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
-			       int index);
-int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
-			   int index, enum ib_gid_type *gid_type);
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
+			       const struct ib_gid_attr *attr);
 
 void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
 int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a4f1f638509f..e3c4ab9be41d 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2555,18 +2555,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
 		if (!(ah_flags & IB_AH_GRH))
 			return -EINVAL;
-		err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index,
-					     &gid_type);
-		if (err)
-			return err;
+
 		memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
 		if (qp->ibqp.qp_type == IB_QPT_RC ||
 		    qp->ibqp.qp_type == IB_QPT_UC ||
 		    qp->ibqp.qp_type == IB_QPT_XRC_INI ||
 		    qp->ibqp.qp_type == IB_QPT_XRC_TGT)
-			path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
-								  grh->sgid_index);
+			path->udp_sport =
+				mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr);
 		path->dci_cfi_prio_sl = (sl & 0x7) << 4;
+		gid_type = ah->grh.sgid_attr->gid_type;
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
 			path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f;
 	} else {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 3897b64532e1..a51b80bfadb3 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
 }
 
 static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
-			struct rdma_ah_attr *attr, union ib_gid *sgid,
+			struct rdma_ah_attr *attr, const union ib_gid *sgid,
 			int pdid, bool *isvlan, u16 vlan_tag)
 {
 	int status;
@@ -164,11 +164,10 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
 	struct ocrdma_ah *ah;
 	bool isvlan = false;
 	u16 vlan_tag = 0xffff;
-	struct ib_gid_attr sgid_attr;
+	const struct ib_gid_attr *sgid_attr;
 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
 	const struct ib_global_route *grh;
-	union ib_gid sgid;
 
 	if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
 	    !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))
@@ -186,20 +185,15 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
 	if (status)
 		goto av_err;
 
-	status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid,
-				   &sgid_attr);
-	if (status) {
-		pr_err("%s(): Failed to query sgid, status = %d\n",
-		      __func__, status);
-		goto av_conf_err;
-	}
-	if (is_vlan_dev(sgid_attr.ndev))
-		vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
-	dev_put(sgid_attr.ndev);
+	sgid_attr = attr->grh.sgid_attr;
+	if (is_vlan_dev(sgid_attr->ndev))
+		vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev);
+
 	/* Get network header type for this GID */
-	ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+	ah->hdr_type = rdma_gid_attr_network_type(sgid_attr);
 
-	status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag);
+	status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id,
+			     &isvlan, vlan_tag);
 	if (status)
 		goto av_conf_err;
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 6c136e5017fe..c6c87cba943b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -2494,8 +2494,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 {
 	int status;
 	struct rdma_ah_attr *ah_attr = &attrs->ah_attr;
-	union ib_gid sgid;
-	struct ib_gid_attr sgid_attr;
+	const struct ib_gid_attr *sgid_attr;
 	u32 vlan_id = 0xFFFF;
 	u8 mac_addr[6], hdr_type;
 	union {
@@ -2525,25 +2524,23 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 	memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0],
 	       sizeof(cmd->params.dgid));
 
-	status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index,
-				   &sgid, &sgid_attr);
-	if (!status) {
-		vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
-		memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
-		dev_put(sgid_attr.ndev);
-	}
+	sgid_attr = ah_attr->grh.sgid_attr;
+	vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
+	memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN);
 
 	qp->sgid_idx = grh->sgid_index;
-	memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
+	memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0],
+	       sizeof(cmd->params.sgid));
 	status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]);
 	if (status)
 		return status;
+
 	cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
 				(mac_addr[2] << 16) | (mac_addr[3] << 24);
 
-	hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+	hdr_type = rdma_gid_attr_network_type(sgid_attr);
 	if (hdr_type == RDMA_NETWORK_IPV4) {
-		rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
+		rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid);
 		rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid);
 		memcpy(&cmd->params.dgid[0],
 		       &dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 5b2a79b27036..10d8f4134ec0 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1075,27 +1075,19 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
 					  struct qed_rdma_modify_qp_in_params
 					  *qp_params)
 {
+	const struct ib_gid_attr *gid_attr;
 	enum rdma_network_type nw_type;
-	struct ib_gid_attr gid_attr;
 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
-	union ib_gid gid;
 	u32 ipv4_addr;
-	int rc = 0;
 	int i;
 
-	rc = ib_get_cached_gid(ibqp->device,
-			       rdma_ah_get_port_num(&attr->ah_attr),
-			       grh->sgid_index, &gid, &gid_attr);
-	if (rc)
-		return rc;
-
-	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+	gid_attr = grh->sgid_attr;
+	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev);
 
-	dev_put(gid_attr.ndev);
-	nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
+	nw_type = rdma_gid_attr_network_type(gid_attr);
 	switch (nw_type) {
 	case RDMA_NETWORK_IPV6:
-		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
 		       sizeof(qp_params->sgid));
 		memcpy(&qp_params->dgid.bytes[0],
 		       &grh->dgid,
@@ -1105,7 +1097,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
 		break;
 	case RDMA_NETWORK_IB:
-		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
 		       sizeof(qp_params->sgid));
 		memcpy(&qp_params->dgid.bytes[0],
 		       &grh->dgid,
@@ -1115,7 +1107,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
 	case RDMA_NETWORK_IPV4:
 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
-		ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
+		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
 		qp_params->sgid.ipv4_addr = ipv4_addr;
 		ipv4_addr =
 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index c0f972c58d55..26fe8d7dbc55 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -82,15 +82,14 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr)
 	rdma_ah_set_port_num(attr, av->port_num);
 }
 
-void rxe_av_fill_ip_info(struct rxe_av *av,
-			struct rdma_ah_attr *attr,
-			struct ib_gid_attr *sgid_attr,
-			union ib_gid *sgid)
+void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr)
 {
-	rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid);
+	const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr;
+
+	rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
 	rdma_gid2ip((struct sockaddr *)&av->dgid_addr,
 		    &rdma_ah_read_grh(attr)->dgid);
-	av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
+	av->network_type = rdma_gid_attr_network_type(sgid_attr);
 }
 
 struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index a51ece596c43..87d14f7ef21b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -43,10 +43,7 @@ void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
 
 void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
 
-void rxe_av_fill_ip_info(struct rxe_av *av,
-			struct rdma_ah_attr *attr,
-			struct ib_gid_attr *sgid_attr,
-			union ib_gid *sgid);
+void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
 
 struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
 
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b9f7aa1114b2..9f83fc982f31 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -580,9 +580,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 		     struct ib_udata *udata)
 {
 	int err;
-	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
-	union ib_gid sgid;
-	struct ib_gid_attr sgid_attr;
 
 	if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
 		int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
@@ -623,30 +620,14 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 		qp->attr.qkey = attr->qkey;
 
 	if (mask & IB_QP_AV) {
-		ib_get_cached_gid(&rxe->ib_dev, 1,
-				  rdma_ah_read_grh(&attr->ah_attr)->sgid_index,
-				  &sgid, &sgid_attr);
 		rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr);
-		rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr,
-				    &sgid_attr, &sgid);
-		if (sgid_attr.ndev)
-			dev_put(sgid_attr.ndev);
+		rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr);
 	}
 
 	if (mask & IB_QP_ALT_PATH) {
-		u8 sgid_index =
-			rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index;
-
-		ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index,
-				  &sgid, &sgid_attr);
-
 		rxe_av_from_attr(attr->alt_port_num, &qp->alt_av,
 				 &attr->alt_ah_attr);
-		rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr,
-				    &sgid_attr, &sgid);
-		if (sgid_attr.ndev)
-			dev_put(sgid_attr.ndev);
-
+		rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr);
 		qp->attr.alt_port_num = attr->alt_port_num;
 		qp->attr.alt_pkey_index = attr->alt_pkey_index;
 		qp->attr.alt_timeout = attr->alt_timeout;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9deafc3aa6af..9cfd440cebe1 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -222,25 +222,11 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd)
 	return 0;
 }
 
-static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
-		       struct rxe_av *av)
+static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
+			struct rxe_av *av)
 {
-	int err;
-	union ib_gid sgid;
-	struct ib_gid_attr sgid_attr;
-
-	err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr),
-				rdma_ah_read_grh(attr)->sgid_index, &sgid,
-				&sgid_attr);
-	if (err) {
-		pr_err("Failed to query sgid. err = %d\n", err);
-		return err;
-	}
-
 	rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
-	rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid);
-	dev_put(sgid_attr.ndev);
-	return 0;
+	rxe_av_fill_ip_info(av, attr);
 }
 
 static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
@@ -266,13 +252,9 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
 	rxe_add_ref(pd);
 	ah->pd = pd;
 
-	err = rxe_init_av(rxe, attr, &ah->av);
-	if (err)
-		goto err2;
-
+	rxe_init_av(rxe, attr, &ah->av);
 	return &ah->ibah;
 
-err2:
 	rxe_drop_ref(pd);
 	rxe_drop_ref(ah);
 err1:
@@ -289,10 +271,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 	if (err)
 		return err;
 
-	err = rxe_init_av(rxe, attr, &ah->av);
-	if (err)
-		return err;
-
+	rxe_init_av(rxe, attr, &ah->av);
 	return 0;
 }
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1e5c1e8ba282..65f467d65bff 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -149,13 +149,13 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
 	return IB_GID_TYPE_IB;
 }
 
-static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
-							    union ib_gid *gid)
+static inline enum rdma_network_type
+rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
 {
-	if (gid_type == IB_GID_TYPE_IB)
+	if (attr->gid_type == IB_GID_TYPE_IB)
 		return RDMA_NETWORK_IB;
 
-	if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+	if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
 		return RDMA_NETWORK_IPV4;
 	else
 		return RDMA_NETWORK_IPV6;
-- 
cgit v1.2.3


From 1a1f460ff151710289c2f8d4badd8b603b87d610 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 13 Jun 2018 10:22:08 +0300
Subject: RDMA: Hold the sgid_attr inside the struct ib_ah/qp

If the AH has a GRH then hold a reference to the sgid_attr inside the
common struct.

If the QP is modified with an AV that includes a GRH then also hold a
reference to the sgid_attr inside the common struct.

This informs the cache that the sgid_index is in-use so long as the AH or
QP using it exists.

This also means that all drivers can access the sgid_attr directly from
the ah_attr instead of querying the cache during their UD post-send paths.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/verbs.c | 46 +++++++++++++++++++++++++++++++++++++++--
 include/rdma/ib_verbs.h         |  4 ++++
 2 files changed, 48 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 2c7129cccc6e..b0ad739a7bd0 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -459,6 +459,19 @@ static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
 	rdma_destroy_ah_attr(ah_attr);
 }
 
+static const struct ib_gid_attr *
+rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
+		      const struct ib_gid_attr *old_attr)
+{
+	if (old_attr)
+		rdma_put_gid_attr(old_attr);
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
+		return ah_attr->grh.sgid_attr;
+	}
+	return NULL;
+}
+
 static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
 				     struct rdma_ah_attr *ah_attr,
 				     struct ib_udata *udata)
@@ -472,6 +485,8 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
 		ah->pd      = pd;
 		ah->uobject = NULL;
 		ah->type    = ah_attr->type;
+		ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+
 		atomic_inc(&pd->usecnt);
 	}
 
@@ -871,6 +886,7 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 		ah->device->modify_ah(ah, ah_attr) :
 		-EOPNOTSUPP;
 
+	ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
 	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
 	return ret;
 }
@@ -888,13 +904,17 @@ EXPORT_SYMBOL(rdma_query_ah);
 
 int rdma_destroy_ah(struct ib_ah *ah)
 {
+	const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
 	struct ib_pd *pd;
 	int ret;
 
 	pd = ah->pd;
 	ret = ah->device->destroy_ah(ah);
-	if (!ret)
+	if (!ret) {
 		atomic_dec(&pd->usecnt);
+		if (sgid_attr)
+			rdma_put_gid_attr(sgid_attr);
+	}
 
 	return ret;
 }
@@ -1573,6 +1593,13 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 			return ret;
 	}
 	if (attr_mask & IB_QP_ALT_PATH) {
+		/*
+		 * FIXME: This does not track the migration state, so if the
+		 * user loads a new alternate path after the HW has migrated
+		 * from primary->alternate we will keep the wrong
+		 * references. This is OK for IB because the reference
+		 * counting does not serve any functional purpose.
+		 */
 		ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
 					  &old_sgid_attr_alt_av);
 		if (ret)
@@ -1606,8 +1633,17 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 	}
 
 	ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
-	if (!ret && (attr_mask & IB_QP_PORT))
+	if (ret)
+		goto out;
+
+	if (attr_mask & IB_QP_PORT)
 		qp->port = attr->port_num;
+	if (attr_mask & IB_QP_AV)
+		qp->av_sgid_attr =
+			rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
+	if (attr_mask & IB_QP_ALT_PATH)
+		qp->alt_path_sgid_attr = rdma_update_sgid_attr(
+			&attr->alt_ah_attr, qp->alt_path_sgid_attr);
 
 out:
 	if (attr_mask & IB_QP_ALT_PATH)
@@ -1765,6 +1801,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
 
 int ib_destroy_qp(struct ib_qp *qp)
 {
+	const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
+	const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
 	struct ib_pd *pd;
 	struct ib_cq *scq, *rcq;
 	struct ib_srq *srq;
@@ -1795,6 +1833,10 @@ int ib_destroy_qp(struct ib_qp *qp)
 	rdma_restrack_del(&qp->res);
 	ret = qp->device->destroy_qp(qp);
 	if (!ret) {
+		if (alt_path_sgid_attr)
+			rdma_put_gid_attr(alt_path_sgid_attr);
+		if (av_sgid_attr)
+			rdma_put_gid_attr(av_sgid_attr);
 		if (pd)
 			atomic_dec(&pd->usecnt);
 		if (scq)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 65f467d65bff..0232c0f9f717 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1580,6 +1580,7 @@ struct ib_ah {
 	struct ib_device	*device;
 	struct ib_pd		*pd;
 	struct ib_uobject	*uobject;
+	const struct ib_gid_attr *sgid_attr;
 	enum rdma_ah_attr_type	type;
 };
 
@@ -1778,6 +1779,9 @@ struct ib_qp {
 	struct ib_uobject      *uobject;
 	void                  (*event_handler)(struct ib_event *, void *);
 	void		       *qp_context;
+	/* sgid_attrs associated with the AV's */
+	const struct ib_gid_attr *av_sgid_attr;
+	const struct ib_gid_attr *alt_path_sgid_attr;
 	u32			qp_num;
 	u32			max_write_sge;
 	u32			max_read_sge;
-- 
cgit v1.2.3


From 3c60e868c31e4ff144776bf53ff0dfe9e9e4ec15 Mon Sep 17 00:00:00 2001
From: "willy@infradead.org" <willy@infradead.org>
Date: Wed, 13 Jun 2018 11:45:55 -0700
Subject: IDR: Expose the XArray lock

Allow users of the IDR to use the XArray lock for their own
synchronisation purposes.  The IDR continues to rely on the caller to
handle locking, but this lets the caller use the lock embedded in the
IDR data structure instead of allocating their own lock.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/idr.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..3e8215b2c371 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -98,6 +98,17 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
  * period).
  */
 
+#define idr_lock(idr)		xa_lock(&(idr)->idr_rt)
+#define idr_unlock(idr)		xa_unlock(&(idr)->idr_rt)
+#define idr_lock_bh(idr)	xa_lock_bh(&(idr)->idr_rt)
+#define idr_unlock_bh(idr)	xa_unlock_bh(&(idr)->idr_rt)
+#define idr_lock_irq(idr)	xa_lock_irq(&(idr)->idr_rt)
+#define idr_unlock_irq(idr)	xa_unlock_irq(&(idr)->idr_rt)
+#define idr_lock_irqsave(idr, flags) \
+				xa_lock_irqsave(&(idr)->idr_rt, flags)
+#define idr_unlock_irqrestore(idr, flags) \
+				xa_unlock_irqrestore(&(idr)->idr_rt, flags)
+
 void idr_preload(gfp_t gfp_mask);
 
 int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t);
-- 
cgit v1.2.3


From aece27a2f01be4bb7683790f69cd1bed3a0929a2 Mon Sep 17 00:00:00 2001
From: Samuel Morris <samorris@lexmark.com>
Date: Tue, 29 May 2018 10:06:12 +0000
Subject: ata: ahci_platform: allow disabling of hotplug to save power

A number of resources remain powered to support hotplug. On platforms
I've worked with, allowing the ahci_platform to suspend saves about
150mW. This patch enables rpm and allows the device to be auto-suspended
through sysfs.

Signed-off-by: Samuel Morris <samorris@lexmark.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_platform.c    | 11 ++++--
 drivers/ata/libahci_platform.c | 82 +++++++++++++++++++++++++++++++++---------
 include/linux/ahci_platform.h  |  2 ++
 3 files changed, 76 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 99f9a895a459..757729376eda 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -68,8 +68,13 @@ disable_resources:
 	return rc;
 }
 
-static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_platform_suspend,
-			 ahci_platform_resume);
+#ifdef CONFIG_PM_SLEEP
+static const struct dev_pm_ops ahci_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(ahci_platform_suspend, ahci_platform_resume)
+	SET_RUNTIME_PM_OPS(ahci_platform_runtime_suspend,
+			ahci_platform_runtime_resume, NULL)
+};
+#endif
 
 static const struct of_device_id ahci_of_match[] = {
 	{ .compatible = "generic-ahci", },
@@ -98,7 +103,9 @@ static struct platform_driver ahci_driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_of_match,
 		.acpi_match_table = ahci_acpi_match,
+#ifdef CONFIG_PM_SLEEP
 		.pm = &ahci_pm_ops,
+#endif
 	},
 };
 module_platform_driver(ahci_driver);
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 30cc8f1a31e1..feee2e11fb33 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -257,7 +257,7 @@ static void ahci_platform_put_resources(struct device *dev, void *res)
 	int c;
 
 	if (hpriv->got_runtime_pm) {
-		pm_runtime_put_sync(dev);
+		pm_runtime_allow(dev);
 		pm_runtime_disable(dev);
 	}
 
@@ -475,8 +475,10 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev)
 		if (rc == -EPROBE_DEFER)
 			goto err_out;
 	}
+
+	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
-	pm_runtime_get_sync(dev);
+	pm_runtime_forbid(dev);
 	hpriv->got_runtime_pm = true;
 
 	devres_remove_group(dev, NULL);
@@ -705,6 +707,21 @@ int ahci_platform_resume_host(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(ahci_platform_resume_host);
 
+static int _ahci_platform_suspend(struct device *dev)
+{
+	struct ata_host *host = dev_get_drvdata(dev);
+	struct ahci_host_priv *hpriv = host->private_data;
+	int rc;
+
+	rc = ahci_platform_suspend_host(dev);
+	if (rc)
+		return rc;
+
+	ahci_platform_disable_resources(hpriv);
+
+	return 0;
+}
+
 /**
  * ahci_platform_suspend - Suspend an ahci-platform device
  * @dev: the platform device to suspend
@@ -716,20 +733,45 @@ EXPORT_SYMBOL_GPL(ahci_platform_resume_host);
  * 0 on success otherwise a negative error code
  */
 int ahci_platform_suspend(struct device *dev)
+{
+	return _ahci_platform_suspend(dev);
+}
+EXPORT_SYMBOL_GPL(ahci_platform_suspend);
+
+/**
+ * ahci_platform_runtime_suspend - Runtime suspend an ahci-platform device
+ * @dev: the platform device to suspend
+ *
+ * This function suspends the host associated with the device, followed by
+ * disabling all the resources of the device.
+ *
+ * RETURNS:
+ * 0 on success otherwise a negative error code
+ */
+int ahci_platform_runtime_suspend(struct device *dev)
+{
+	return _ahci_platform_suspend(dev);
+}
+EXPORT_SYMBOL_GPL(ahci_platform_runtime_suspend);
+
+static int _ahci_platform_resume(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
 
-	rc = ahci_platform_suspend_host(dev);
+	rc = ahci_platform_enable_resources(hpriv);
 	if (rc)
 		return rc;
 
-	ahci_platform_disable_resources(hpriv);
+	rc = ahci_platform_resume_host(dev);
+	if (rc) {
+		ahci_platform_disable_resources(hpriv);
+		return rc;
+	}
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(ahci_platform_suspend);
 
 /**
  * ahci_platform_resume - Resume an ahci-platform device
@@ -743,31 +785,37 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend);
  */
 int ahci_platform_resume(struct device *dev)
 {
-	struct ata_host *host = dev_get_drvdata(dev);
-	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
 
-	rc = ahci_platform_enable_resources(hpriv);
+	rc = _ahci_platform_resume(dev);
 	if (rc)
 		return rc;
 
-	rc = ahci_platform_resume_host(dev);
-	if (rc)
-		goto disable_resources;
-
 	/* We resumed so update PM runtime state */
 	pm_runtime_disable(dev);
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
 	return 0;
-
-disable_resources:
-	ahci_platform_disable_resources(hpriv);
-
-	return rc;
 }
 EXPORT_SYMBOL_GPL(ahci_platform_resume);
+
+/**
+ * ahci_platform_runtime_resume - Runtime resume an ahci-platform device
+ * @dev: the platform device to resume
+ *
+ * This function enables all the resources of the device followed by
+ * resuming the host associated with the device.
+ *
+ * RETURNS:
+ * 0 on success otherwise a negative error code
+ */
+int ahci_platform_runtime_resume(struct device *dev)
+{
+	return _ahci_platform_resume(dev);
+}
+EXPORT_SYMBOL_GPL(ahci_platform_runtime_resume);
+
 #endif
 
 MODULE_DESCRIPTION("AHCI SATA platform library");
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1b0a17b22cd3..6396e6982103 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -42,5 +42,7 @@ int ahci_platform_suspend_host(struct device *dev);
 int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
+int ahci_platform_runtime_suspend(struct device *dev);
+int ahci_platform_runtime_resume(struct device *dev);
 
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From 33023fb85a42b53bf778bc025f9667b582282be4 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Mon, 18 Jun 2018 08:05:26 -0700
Subject: IB/core: add max_send_sge and max_recv_sge attributes

This patch replaces the ib_device_attr.max_sge with max_send_sge and
max_recv_sge. It allows ulps to take advantage of devices that have very
different send and recv sge depths.  For example cxgb4 has a max_recv_sge
of 4, yet a max_send_sge of 16.  Splitting out these attributes allows
much more efficient use of the SQ for cxgb4 with ulps that use the RDMA_RW
API. Consider a large RDMA WRITE that has 16 scattergather entries.
With max_sge of 4, the ulp would send 4 WRITE WRs, but with max_sge of
16, it can be done with 1 WRITE WR.

Acked-by: Sagi Grimberg <sagi@grimberg.me>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Selvin Xavier <selvin.xavier@broadcom.com>
Acked-by: Shiraz Saleem <shiraz.saleem@intel.com>
Acked-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c            |  2 +-
 drivers/infiniband/hw/bnxt_re/ib_verbs.c        |  3 ++-
 drivers/infiniband/hw/cxgb3/iwch_provider.c     |  3 ++-
 drivers/infiniband/hw/cxgb4/provider.c          |  3 ++-
 drivers/infiniband/hw/hfi1/verbs.c              |  3 ++-
 drivers/infiniband/hw/hns/hns_roce_main.c       |  3 ++-
 drivers/infiniband/hw/i40iw/i40iw_verbs.c       |  3 ++-
 drivers/infiniband/hw/mlx4/main.c               |  4 ++--
 drivers/infiniband/hw/mlx5/main.c               |  3 ++-
 drivers/infiniband/hw/mthca/mthca_provider.c    |  5 +++--
 drivers/infiniband/hw/nes/nes_verbs.c           |  3 ++-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c     |  3 ++-
 drivers/infiniband/hw/qedr/verbs.c              |  3 ++-
 drivers/infiniband/hw/qib/qib_verbs.c           |  3 ++-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c |  3 ++-
 drivers/infiniband/sw/rdmavt/qp.c               |  5 +++--
 drivers/infiniband/sw/rxe/rxe.c                 |  3 ++-
 drivers/infiniband/sw/rxe/rxe_qp.c              |  8 ++++----
 drivers/infiniband/ulp/ipoib/ipoib_cm.c         |  4 ++--
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c      |  2 +-
 drivers/infiniband/ulp/isert/ib_isert.c         |  5 +++--
 drivers/infiniband/ulp/srpt/ib_srpt.c           |  6 ++++--
 drivers/nvme/target/rdma.c                      |  2 +-
 fs/cifs/smbdirect.c                             | 13 ++++++++++---
 include/rdma/ib_verbs.h                         |  3 ++-
 net/rds/ib.c                                    |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_transport.c        |  2 +-
 net/sunrpc/xprtrdma/verbs.c                     |  2 +-
 28 files changed, 65 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5733d0fb0673..908ee8ab3297 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -189,7 +189,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
 	resp->max_qp		= attr->max_qp;
 	resp->max_qp_wr		= attr->max_qp_wr;
 	resp->device_cap_flags	= lower_32_bits(attr->device_cap_flags);
-	resp->max_sge		= attr->max_sge;
+	resp->max_sge		= min(attr->max_send_sge, attr->max_recv_sge);
 	resp->max_sge_rd	= attr->max_sge_rd;
 	resp->max_cq		= attr->max_cq;
 	resp->max_cqe		= attr->max_cqe;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 136eaa78ad4a..6c0c6d3426e0 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -166,7 +166,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 				    | IB_DEVICE_MEM_WINDOW
 				    | IB_DEVICE_MEM_WINDOW_TYPE_2B
 				    | IB_DEVICE_MEM_MGT_EXTENSIONS;
-	ib_attr->max_sge = dev_attr->max_qp_sges;
+	ib_attr->max_send_sge = dev_attr->max_qp_sges;
+	ib_attr->max_recv_sge = dev_attr->max_qp_sges;
 	ib_attr->max_sge_rd = dev_attr->max_qp_sges;
 	ib_attr->max_cq = dev_attr->max_cq;
 	ib_attr->max_cqe = dev_attr->max_cq_wqes;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index be097c6723c0..68bc2f9a532f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1103,7 +1103,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 	props->max_mr_size = dev->attr.max_mr_size;
 	props->max_qp = dev->attr.max_qps;
 	props->max_qp_wr = dev->attr.max_wrs;
-	props->max_sge = dev->attr.max_sge_per_wr;
+	props->max_send_sge = dev->attr.max_sge_per_wr;
+	props->max_recv_sge = dev->attr.max_sge_per_wr;
 	props->max_sge_rd = 1;
 	props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
 	props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 1feade8bb4b3..61b8bdb9423d 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -343,7 +343,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 	props->max_mr_size = T4_MAX_MR_SIZE;
 	props->max_qp = dev->rdev.lldi.vr->qp.size / 2;
 	props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth;
-	props->max_sge = T4_MAX_RECV_SGE;
+	props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE);
+	props->max_recv_sge = T4_MAX_RECV_SGE;
 	props->max_sge_rd = 1;
 	props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter;
 	props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 08991874c0e2..b7c75b63f887 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1410,7 +1410,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
 	rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
 	rdi->dparms.props.max_qp = hfi1_max_qps;
 	rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
-	rdi->dparms.props.max_sge = hfi1_max_sges;
+	rdi->dparms.props.max_send_sge = hfi1_max_sges;
+	rdi->dparms.props.max_recv_sge = hfi1_max_sges;
 	rdi->dparms.props.max_sge_rd = hfi1_max_sges;
 	rdi->dparms.props.max_cq = hfi1_max_cqs;
 	rdi->dparms.props.max_ah = hfi1_max_ahs;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 24a2ea0018d9..850032de8676 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -206,7 +206,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
 	props->max_qp_wr = hr_dev->caps.max_wqes;
 	props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
 				  IB_DEVICE_RC_RNR_NAK_GEN;
-	props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg);
+	props->max_send_sge = hr_dev->caps.max_sq_sg;
+	props->max_recv_sge = hr_dev->caps.max_rq_sg;
 	props->max_sge_rd = 1;
 	props->max_cq = hr_dev->caps.num_cqs;
 	props->max_cqe = hr_dev->caps.max_cqes;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 68679ad4c6da..8884ff71a634 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -71,7 +71,8 @@ static int i40iw_query_device(struct ib_device *ibdev,
 	props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
 	props->max_qp = iwdev->max_qp - iwdev->used_qps;
 	props->max_qp_wr = I40IW_MAX_QP_WRS;
-	props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+	props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+	props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
 	props->max_cq = iwdev->max_cq - iwdev->used_cqs;
 	props->max_cqe = iwdev->max_cqe;
 	props->max_mr = iwdev->max_mr - iwdev->used_mrs;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 908b8e5c5acb..87de1a467d60 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -517,8 +517,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->page_size_cap	   = dev->dev->caps.page_size_cap;
 	props->max_qp		   = dev->dev->quotas.qp;
 	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
-	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
-					 dev->dev->caps.max_rq_sg);
+	props->max_send_sge	   = dev->dev->caps.max_sq_sg;
+	props->max_recv_sge	   = dev->dev->caps.max_rq_sg;
 	props->max_sge_rd	   = MLX4_MAX_SGE_RD;
 	props->max_cq		   = dev->dev->quotas.cq;
 	props->max_cqe		   = dev->dev->caps.max_cqes;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e6d88f32391b..e46cda740479 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -888,7 +888,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
 		     sizeof(struct mlx5_wqe_raddr_seg)) /
 		sizeof(struct mlx5_wqe_data_seg);
-	props->max_sge = min(max_rq_sg, max_sq_sg);
+	props->max_send_sge = max_sq_sg;
+	props->max_recv_sge = max_rq_sg;
 	props->max_sge_rd	   = MLX5_MAX_SGE_RD;
 	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
 	props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 541f237965c7..20febafc1fdd 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -96,8 +96,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
 	props->page_size_cap       = mdev->limits.page_size_cap;
 	props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
 	props->max_qp_wr           = mdev->limits.max_wqes;
-	props->max_sge             = mdev->limits.max_sg;
-	props->max_sge_rd          = props->max_sge;
+	props->max_send_sge        = mdev->limits.max_sg;
+	props->max_recv_sge        = mdev->limits.max_sg;
+	props->max_sge_rd          = mdev->limits.max_sg;
 	props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
 	props->max_cqe             = mdev->limits.max_cqes;
 	props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 32f26556c808..82b8f9630ee8 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -436,7 +436,8 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
 	props->max_mr_size = 0x80000000;
 	props->max_qp = nesibdev->max_qp;
 	props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
-	props->max_sge = nesdev->nesadapter->max_sge;
+	props->max_send_sge = nesdev->nesadapter->max_sge;
+	props->max_recv_sge = nesdev->nesadapter->max_sge;
 	props->max_cq = nesibdev->max_cq;
 	props->max_cqe = nesdev->nesadapter->max_cqe;
 	props->max_mr = nesibdev->max_mr;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 82e20fc32890..1f057fdb3a8c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -89,7 +89,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
 					IB_DEVICE_SYS_IMAGE_GUID |
 					IB_DEVICE_LOCAL_DMA_LKEY |
 					IB_DEVICE_MEM_MGT_EXTENSIONS;
-	attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
+	attr->max_send_sge = dev->attr.max_send_sge;
+	attr->max_recv_sge = dev->attr.max_recv_sge;
 	attr->max_sge_rd = dev->attr.max_rdma_sge;
 	attr->max_cq = dev->attr.max_cq;
 	attr->max_cqe = dev->attr.max_cqe;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 10d8f4134ec0..0c41d54f586b 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -112,7 +112,8 @@ int qedr_query_device(struct ib_device *ibdev,
 	    IB_DEVICE_RC_RNR_NAK_GEN |
 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
 
-	attr->max_sge = qattr->max_sge;
+	attr->max_send_sge = qattr->max_sge;
+	attr->max_recv_sge = qattr->max_sge;
 	attr->max_sge_rd = qattr->max_sge;
 	attr->max_cq = qattr->max_cq;
 	attr->max_cqe = qattr->max_cqe;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 14b4057a2b8f..41babbc0db58 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1489,7 +1489,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
 	rdi->dparms.props.max_mr_size = ~0ULL;
 	rdi->dparms.props.max_qp = ib_qib_max_qps;
 	rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
-	rdi->dparms.props.max_sge = ib_qib_max_sges;
+	rdi->dparms.props.max_send_sge = ib_qib_max_sges;
+	rdi->dparms.props.max_recv_sge = ib_qib_max_sges;
 	rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
 	rdi->dparms.props.max_cq = ib_qib_max_cqs;
 	rdi->dparms.props.max_cqe = ib_qib_max_cqes;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index a51463cd2f37..816cc285daf6 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -82,7 +82,8 @@ int pvrdma_query_device(struct ib_device *ibdev,
 	props->max_qp = dev->dsr->caps.max_qp;
 	props->max_qp_wr = dev->dsr->caps.max_qp_wr;
 	props->device_cap_flags = dev->dsr->caps.device_cap_flags;
-	props->max_sge = dev->dsr->caps.max_sge;
+	props->max_send_sge = dev->dsr->caps.max_sge;
+	props->max_recv_sge = dev->dsr->caps.max_sge;
 	props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge,
 					   dev->dsr->caps.max_sge_rd);
 	props->max_srq = dev->dsr->caps.max_srq;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 815f94c17c48..d29e3c943399 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -780,14 +780,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 	if (!rdi)
 		return ERR_PTR(-EINVAL);
 
-	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
+	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge ||
 	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
 	    init_attr->create_flags)
 		return ERR_PTR(-EINVAL);
 
 	/* Check receive queue parameters if no SRQ is specified. */
 	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
+		if (init_attr->cap.max_recv_sge >
+		    rdi->dparms.props.max_recv_sge ||
 		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
 			return ERR_PTR(-EINVAL);
 
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 7121e1b1eb89..10999fa69281 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -91,7 +91,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
 	rxe->attr.max_qp			= RXE_MAX_QP;
 	rxe->attr.max_qp_wr			= RXE_MAX_QP_WR;
 	rxe->attr.device_cap_flags		= RXE_DEVICE_CAP_FLAGS;
-	rxe->attr.max_sge			= RXE_MAX_SGE;
+	rxe->attr.max_send_sge			= RXE_MAX_SGE;
+	rxe->attr.max_recv_sge			= RXE_MAX_SGE;
 	rxe->attr.max_sge_rd			= RXE_MAX_SGE_RD;
 	rxe->attr.max_cq			= RXE_MAX_CQ;
 	rxe->attr.max_cqe			= (1 << RXE_MAX_LOG_CQE) - 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 9f83fc982f31..c58452daffc7 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -49,9 +49,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
 		goto err1;
 	}
 
-	if (cap->max_send_sge > rxe->attr.max_sge) {
+	if (cap->max_send_sge > rxe->attr.max_send_sge) {
 		pr_warn("invalid send sge = %d > %d\n",
-			cap->max_send_sge, rxe->attr.max_sge);
+			cap->max_send_sge, rxe->attr.max_send_sge);
 		goto err1;
 	}
 
@@ -62,9 +62,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
 			goto err1;
 		}
 
-		if (cap->max_recv_sge > rxe->attr.max_sge) {
+		if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
 			pr_warn("invalid recv sge = %d > %d\n",
-				cap->max_recv_sge, rxe->attr.max_sge);
+				cap->max_recv_sge, rxe->attr.max_recv_sge);
 			goto err1;
 		}
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 6535d9beb24d..23cb1adc636f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1068,8 +1068,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
 	struct ib_qp *tx_qp;
 
 	if (dev->features & NETIF_F_SG)
-		attr.cap.max_send_sge =
-			min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
+		attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
+					      MAX_SKB_FRAGS + 1);
 
 	tx_qp = ib_create_qp(priv->pd, &attr);
 	tx->max_send_sge = attr.cap.max_send_sge;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 984a88096f39..ba4669f24014 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 		.cap = {
 			.max_send_wr  = ipoib_sendq_size,
 			.max_recv_wr  = ipoib_recvq_size,
-			.max_send_sge = min_t(u32, priv->ca->attrs.max_sge,
+			.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
 					      MAX_SKB_FRAGS + 1),
 			.max_recv_sge = IPOIB_UD_RX_SG
 		},
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index cccbcf0eb035..7e056f3c82a0 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -136,7 +136,7 @@ isert_create_qp(struct isert_conn *isert_conn,
 	attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
 	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
 	attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
-	attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+	attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge;
 	attr.cap.max_recv_sge = 1;
 	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	attr.qp_type = IB_QPT_RC;
@@ -299,7 +299,8 @@ isert_create_device_ib_res(struct isert_device *device)
 	struct ib_device *ib_dev = device->ib_device;
 	int ret;
 
-	isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+	isert_dbg("devattr->max_send_sge: %d devattr->max_recv_sge %d\n",
+		  ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge);
 	isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
 
 	ret = isert_alloc_comps(device);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 698f7779e231..1b0b285a0ae0 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1753,13 +1753,15 @@ retry:
 	 */
 	qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr);
 	qp_init->cap.max_rdma_ctxs = sq_size / 2;
-	qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
+	qp_init->cap.max_send_sge = min(attrs->max_send_sge,
+					SRPT_MAX_SG_PER_WQE);
 	qp_init->port_num = ch->sport->port;
 	if (sdev->use_srq) {
 		qp_init->srq = sdev->srq;
 	} else {
 		qp_init->cap.max_recv_wr = ch->rq_size;
-		qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge;
+		qp_init->cap.max_recv_sge = min(attrs->max_recv_sge,
+						SRPT_MAX_SG_PER_WQE);
 	}
 
 	if (ch->using_rdma_cm) {
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d579a7..0d7f3d603f1d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -874,7 +874,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 	qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
 	qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
 	qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
-					ndev->device->attrs.max_sge);
+					ndev->device->attrs.max_send_sge);
 
 	if (ndev->srq) {
 		qp_attr.srq = ndev->srq;
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index e459c97151b3..c5a1cddd8856 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1661,9 +1661,16 @@ static struct smbd_connection *_smbd_get_connection(
 	info->max_receive_size = smbd_max_receive_size;
 	info->keep_alive_interval = smbd_keep_alive_interval;
 
-	if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) {
-		log_rdma_event(ERR, "warning: device max_sge = %d too small\n",
-			info->id->device->attrs.max_sge);
+	if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
+		log_rdma_event(ERR,
+			"warning: device max_send_sge = %d too small\n",
+			info->id->device->attrs.max_send_sge);
+		log_rdma_event(ERR, "Queue Pair creation may fail\n");
+	}
+	if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
+		log_rdma_event(ERR,
+			"warning: device max_recv_sge = %d too small\n",
+			info->id->device->attrs.max_recv_sge);
 		log_rdma_event(ERR, "Queue Pair creation may fail\n");
 	}
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0232c0f9f717..dc5d262739e5 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -345,7 +345,8 @@ struct ib_device_attr {
 	int			max_qp;
 	int			max_qp_wr;
 	u64			device_cap_flags;
-	int			max_sge;
+	int			max_send_sge;
+	int			max_recv_sge;
 	int			max_sge_rd;
 	int			max_cq;
 	int			max_cqe;
diff --git a/net/rds/ib.c b/net/rds/ib.c
index b6ad38e48f62..683b55d4e2b0 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -143,7 +143,7 @@ static void rds_ib_add_one(struct ib_device *device)
 	INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
 
 	rds_ibdev->max_wrs = device->attrs.max_qp_wr;
-	rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
+	rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
 
 	has_fr = (device->attrs.device_cap_flags &
 		  IB_DEVICE_MEM_MGT_EXTENSIONS);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e9535a66bab0..547b2cdf1427 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -476,7 +476,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 
 	/* Qualify the transport resource defaults with the
 	 * capabilities of this particular device */
-	newxprt->sc_max_send_sges = dev->attrs.max_sge;
+	newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
 	/* transport hdr, head iovec, one page list entry, tail iovec */
 	if (newxprt->sc_max_send_sges < 4) {
 		pr_err("svcrdma: too few Send SGEs available (%d)\n",
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 16161a36dc73..112a15abc4a4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -508,7 +508,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	unsigned int max_sge;
 	int rc;
 
-	max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+	max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge,
 			RPCRDMA_MAX_SEND_SGES);
 	if (max_sge < RPCRDMA_MIN_SEND_SGES) {
 		pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
-- 
cgit v1.2.3


From 420c0117db25db38b72b6230223f7a976d3070ea Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 17 Jun 2018 19:02:04 +0200
Subject: dmaengine: pxa: use a dma slave map

In order to remove the specific knowledge of the dma mapping from PXA
drivers, add a default slave map for pxa architectures.

This won't impact MMP architecture, but is aimed only at all PXA boards.

This is the first step, and once all drivers are converted,
pxad_filter_fn() will be made static, and the DMA resources removed from
device.c.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/pxa_dma.c                 | 10 +++++++++-
 include/linux/platform_data/mmp_dma.h |  4 ++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index b53fb618bbf6..9505334f9c6e 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -179,6 +179,8 @@ static unsigned int pxad_drcmr(unsigned int line)
 	return 0x1000 + line * 4;
 }
 
+bool pxad_filter_fn(struct dma_chan *chan, void *param);
+
 /*
  * Debug fs
  */
@@ -1396,9 +1398,10 @@ static int pxad_probe(struct platform_device *op)
 {
 	struct pxad_device *pdev;
 	const struct of_device_id *of_id;
+	const struct dma_slave_map *slave_map = NULL;
 	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
 	struct resource *iores;
-	int ret, dma_channels = 0, nb_requestors = 0;
+	int ret, dma_channels = 0, nb_requestors = 0, slave_map_cnt = 0;
 	const enum dma_slave_buswidth widths =
 		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
 		DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -1429,6 +1432,8 @@ static int pxad_probe(struct platform_device *op)
 	} else if (pdata && pdata->dma_channels) {
 		dma_channels = pdata->dma_channels;
 		nb_requestors = pdata->nb_requestors;
+		slave_map = pdata->slave_map;
+		slave_map_cnt = pdata->slave_map_cnt;
 	} else {
 		dma_channels = 32;	/* default 32 channel */
 	}
@@ -1440,6 +1445,9 @@ static int pxad_probe(struct platform_device *op)
 	pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy;
 	pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg;
 	pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic;
+	pdev->slave.filter.map = slave_map;
+	pdev->slave.filter.mapcnt = slave_map_cnt;
+	pdev->slave.filter.fn = pxad_filter_fn;
 
 	pdev->slave.copy_align = PDMA_ALIGNMENT;
 	pdev->slave.src_addr_widths = widths;
diff --git a/include/linux/platform_data/mmp_dma.h b/include/linux/platform_data/mmp_dma.h
index d1397c8ed94e..6397b9c8149a 100644
--- a/include/linux/platform_data/mmp_dma.h
+++ b/include/linux/platform_data/mmp_dma.h
@@ -12,9 +12,13 @@
 #ifndef MMP_DMA_H
 #define MMP_DMA_H
 
+struct dma_slave_map;
+
 struct mmp_dma_platdata {
 	int dma_channels;
 	int nb_requestors;
+	int slave_map_cnt;
+	const struct dma_slave_map *slave_map;
 };
 
 #endif /* MMP_DMA_H */
-- 
cgit v1.2.3


From b6d1a17f4729e4fda5740a855da91d202db2c118 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 17 Jun 2018 19:02:14 +0200
Subject: dmaengine: pxa: document pxad_param

Add some documentation for the pxad_param structure, and describe the
contract behind the minimal required priority of a DMA channel.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/dma/pxa-dma.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/dma/pxa-dma.h b/include/linux/dma/pxa-dma.h
index e56ec7af4fd7..9fc594f69eff 100644
--- a/include/linux/dma/pxa-dma.h
+++ b/include/linux/dma/pxa-dma.h
@@ -9,6 +9,15 @@ enum pxad_chan_prio {
 	PXAD_PRIO_LOWEST,
 };
 
+/**
+ * struct pxad_param - dma channel request parameters
+ * @drcmr: requestor line number
+ * @prio: minimal mandatory priority of the channel
+ *
+ * If a requested channel is granted, its priority will be at least @prio,
+ * ie. if PXAD_PRIO_LOW is required, the requested channel will be either
+ * PXAD_PRIO_LOW, PXAD_PRIO_NORMAL or PXAD_PRIO_HIGHEST.
+ */
 struct pxad_param {
 	unsigned int drcmr;
 	enum pxad_chan_prio prio;
-- 
cgit v1.2.3


From cd31b80736852d34bc1072f3e579a6fd73a244e7 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 17 Jun 2018 19:02:17 +0200
Subject: ARM: pxa: change SSP DMA channels allocation

Now the dma_slave_map is available for PXA architecture, switch the SSP
device to it.

This specifically means that :
- for platform data based machines, the DMA requestor channels are
  extracted from the slave map, where pxa-ssp-dai.<N> is a 1-1 match to
  ssp.<N>, and the channels are either "rx" or "tx".

- for device tree platforms, the dma node should be hooked into the
  pxa2xx-ac97 or pxa-ssp-dai node.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Daniel Mack <daniel@zonque.org>
---
 arch/arm/plat-pxa/ssp.c    | 47 ----------------------------------------------
 include/linux/pxa2xx_ssp.h |  2 --
 sound/soc/pxa/pxa-ssp.c    |  5 ++---
 3 files changed, 2 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index ba13f793fbce..ed36dcab80f1 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -127,53 +127,6 @@ static int pxa_ssp_probe(struct platform_device *pdev)
 	if (IS_ERR(ssp->clk))
 		return PTR_ERR(ssp->clk);
 
-	if (dev->of_node) {
-		struct of_phandle_args dma_spec;
-		struct device_node *np = dev->of_node;
-		int ret;
-
-		/*
-		 * FIXME: we should allocate the DMA channel from this
-		 * context and pass the channel down to the ssp users.
-		 * For now, we lookup the rx and tx indices manually
-		 */
-
-		/* rx */
-		ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells",
-						 0, &dma_spec);
-
-		if (ret) {
-			dev_err(dev, "Can't parse dmas property\n");
-			return -ENODEV;
-		}
-		ssp->drcmr_rx = dma_spec.args[0];
-		of_node_put(dma_spec.np);
-
-		/* tx */
-		ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells",
-						 1, &dma_spec);
-		if (ret) {
-			dev_err(dev, "Can't parse dmas property\n");
-			return -ENODEV;
-		}
-		ssp->drcmr_tx = dma_spec.args[0];
-		of_node_put(dma_spec.np);
-	} else {
-		res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-		if (res == NULL) {
-			dev_err(dev, "no SSP RX DRCMR defined\n");
-			return -ENODEV;
-		}
-		ssp->drcmr_rx = res->start;
-
-		res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-		if (res == NULL) {
-			dev_err(dev, "no SSP TX DRCMR defined\n");
-			return -ENODEV;
-		}
-		ssp->drcmr_tx = res->start;
-	}
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
 		dev_err(dev, "no memory resource defined\n");
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 8461b18e4608..03a7ca46735b 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -212,8 +212,6 @@ struct ssp_device {
 	int		type;
 	int		use_count;
 	int		irq;
-	int		drcmr_rx;
-	int		drcmr_tx;
 
 	struct device_node	*of_node;
 };
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 6fc986080130..0b441338bdd4 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -105,9 +105,8 @@ static int pxa_ssp_startup(struct snd_pcm_substream *substream,
 	dma = kzalloc(sizeof(struct snd_dmaengine_dai_dma_data), GFP_KERNEL);
 	if (!dma)
 		return -ENOMEM;
-
-	dma->filter_data = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
-				&ssp->drcmr_tx : &ssp->drcmr_rx;
+	dma->chan_name = substream->stream == SNDRV_PCM_STREAM_PLAYBACK ?
+		"tx" : "rx";
 
 	snd_soc_dai_set_dma_data(cpu_dai, substream, dma);
 
-- 
cgit v1.2.3


From b8042b3da925f390c1482bf9dc0898dc0b3ea7b5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 18 Jun 2018 22:39:29 +0200
Subject: ieee80211: bump IEEE80211_MAX_AMPDU_BUF to support HE

Bump the IEEE80211_MAX_AMPDU_BUF size to 0x100 for HE support
and - for now - use IEEE80211_MAX_AMPDU_BUF_HT everywhere.

This is derived from my internal patch, parts of which Luca
had sent upstream.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/realtek/rtlwifi/base.c |  2 +-
 drivers/staging/rtl8188eu/include/wifi.h    |  1 -
 drivers/staging/rtl8712/wifi.h              |  1 -
 drivers/staging/rtl8723bs/include/wifi.h    |  1 -
 drivers/staging/rtlwifi/base.c              |  2 +-
 include/linux/ieee80211.h                   | 10 ++++++----
 net/mac80211/agg-rx.c                       |  4 ++--
 net/mac80211/agg-tx.c                       |  2 +-
 net/mac80211/ht.c                           |  2 +-
 net/mac80211/main.c                         |  4 ++--
 10 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 39c817eddd78..31bd6f714052 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -1904,7 +1904,7 @@ void rtl_rx_ampdu_apply(struct rtl_priv *rtlpriv)
 		 reject_agg, ctrl_agg_size, agg_size);
 
 	rtlpriv->hw->max_rx_aggregation_subframes =
-		(ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF);
+		(ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF_HT);
 }
 EXPORT_SYMBOL(rtl_rx_ampdu_apply);
 
diff --git a/drivers/staging/rtl8188eu/include/wifi.h b/drivers/staging/rtl8188eu/include/wifi.h
index 084a246eec19..6790b7c8cfb1 100644
--- a/drivers/staging/rtl8188eu/include/wifi.h
+++ b/drivers/staging/rtl8188eu/include/wifi.h
@@ -575,7 +575,6 @@ enum ht_cap_ampdu_factor {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 #define OP_MODE_PURE                    0
diff --git a/drivers/staging/rtl8712/wifi.h b/drivers/staging/rtl8712/wifi.h
index 0ed2f44ab4e9..00a4302e9983 100644
--- a/drivers/staging/rtl8712/wifi.h
+++ b/drivers/staging/rtl8712/wifi.h
@@ -574,7 +574,6 @@ struct ieee80211_ht_addt_info {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 /* Spatial Multiplexing Power Save Modes */
diff --git a/drivers/staging/rtl8723bs/include/wifi.h b/drivers/staging/rtl8723bs/include/wifi.h
index 08bc79840b23..559bf2606fb7 100644
--- a/drivers/staging/rtl8723bs/include/wifi.h
+++ b/drivers/staging/rtl8723bs/include/wifi.h
@@ -799,7 +799,6 @@ enum HT_CAP_AMPDU_FACTOR {
  * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
  */
 #define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
 
 
 /* Spatial Multiplexing Power Save Modes */
diff --git a/drivers/staging/rtlwifi/base.c b/drivers/staging/rtlwifi/base.c
index e46e47d93d7d..094827c1879a 100644
--- a/drivers/staging/rtlwifi/base.c
+++ b/drivers/staging/rtlwifi/base.c
@@ -1838,7 +1838,7 @@ void rtl_rx_ampdu_apply(struct rtl_priv *rtlpriv)
 		 reject_agg, ctrl_agg_size, agg_size);
 
 	rtlpriv->hw->max_rx_aggregation_subframes =
-		(ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF);
+		(ctrl_agg_size ? agg_size : IEEE80211_MAX_AMPDU_BUF_HT);
 }
 
 /*********************************************************
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e6a6503bfa33..9c03a7d5e400 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1433,11 +1433,13 @@ struct ieee80211_ht_operation {
 #define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800
 
 /*
- * A-PMDU buffer sizes
- * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2)
+ * A-MPDU buffer sizes
+ * According to HT size varies from 8 to 64 frames
+ * HE adds the ability to have up to 256 frames.
  */
-#define IEEE80211_MIN_AMPDU_BUF 0x8
-#define IEEE80211_MAX_AMPDU_BUF 0x40
+#define IEEE80211_MIN_AMPDU_BUF		0x8
+#define IEEE80211_MAX_AMPDU_BUF_HT	0x40
+#define IEEE80211_MAX_AMPDU_BUF		0x100
 
 
 /* Spatial Multiplexing Power Save Modes (for capability) */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index e83c19d4c292..3ffd853b483f 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -274,7 +274,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 	/* XXX: check own ht delayed BA capability?? */
 	if (((ba_policy != 1) &&
 	     (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
-	    (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
+	    (buf_size > IEEE80211_MAX_AMPDU_BUF_HT)) {
 		status = WLAN_STATUS_INVALID_QOS_PARAM;
 		ht_dbg_ratelimited(sta->sdata,
 				   "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
@@ -283,7 +283,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 	}
 	/* determine default buffer size */
 	if (buf_size == 0)
-		buf_size = IEEE80211_MAX_AMPDU_BUF;
+		buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
 
 	/* make sure the size doesn't exceed the maximum supported by the hw */
 	if (buf_size > sta->sta.max_rx_aggregation_subframes)
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index ac4295296514..86c6bc0432ba 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -514,7 +514,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, params.ssn,
-				     IEEE80211_MAX_AMPDU_BUF,
+				     IEEE80211_MAX_AMPDU_BUF_HT,
 				     tid_tx->timeout);
 }
 
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 26a7ba3b698f..f849ea814993 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -352,7 +352,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
 		    test_and_clear_bit(tid,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
 			___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
-							 IEEE80211_MAX_AMPDU_BUF,
+							 IEEE80211_MAX_AMPDU_BUF_HT,
 							 false, true);
 
 		if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a6f8e3a646d4..070f77862014 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -597,8 +597,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	local->hw.queues = 1;
 	local->hw.max_rates = 1;
 	local->hw.max_report_rates = 0;
-	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
-	local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
+	local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
 	local->hw.offchannel_tx_hw_queue = IEEE80211_INVAL_HW_QUEUE;
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
-- 
cgit v1.2.3


From 41cbb0f5a29592874355e4159489eb08337cd50e Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 9 Jun 2018 09:14:44 +0300
Subject: mac80211: add support for HE

Add support for HE in mac80211 conforming with P802.11ax_D1.4.

Johannes: Fix another bug with the buf_size comparison in agg-rx.c.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  64 ++++++++--
 net/mac80211/Makefile      |   1 +
 net/mac80211/agg-rx.c      |  10 +-
 net/mac80211/agg-tx.c      |  19 ++-
 net/mac80211/cfg.c         |   5 +
 net/mac80211/he.c          |  55 +++++++++
 net/mac80211/ieee80211_i.h |  16 +++
 net/mac80211/main.c        |  19 ++-
 net/mac80211/mlme.c        | 288 ++++++++++++++++++++++++++++++++++++++++++---
 net/mac80211/rx.c          | 127 +++++++++++++++++++-
 net/mac80211/sta_info.c    |  15 ++-
 net/mac80211/sta_info.h    |  20 +++-
 net/mac80211/trace.h       |   2 +-
 net/mac80211/util.c        | 120 ++++++++++++++++++-
 14 files changed, 716 insertions(+), 45 deletions(-)
 create mode 100644 net/mac80211/he.c

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 851a5e19ae32..5790f55c241d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -23,6 +23,7 @@
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
 #include <net/codel.h>
+#include <net/ieee80211_radiotap.h>
 #include <asm/unaligned.h>
 
 /**
@@ -162,6 +163,8 @@ enum ieee80211_ac_numbers {
  * @txop: maximum burst time in units of 32 usecs, 0 meaning disabled
  * @acm: is mandatory admission control required for the access category
  * @uapsd: is U-APSD mode enabled for the queue
+ * @mu_edca: is the MU EDCA configured
+ * @mu_edca_param_rec: MU EDCA Parameter Record for HE
  */
 struct ieee80211_tx_queue_params {
 	u16 txop;
@@ -170,6 +173,8 @@ struct ieee80211_tx_queue_params {
 	u8 aifs;
 	bool acm;
 	bool uapsd;
+	bool mu_edca;
+	struct ieee80211_he_mu_edca_param_ac_rec mu_edca_param_rec;
 };
 
 struct ieee80211_low_level_stats {
@@ -463,6 +468,15 @@ struct ieee80211_mu_group_data {
  * This structure keeps information about a BSS (and an association
  * to that BSS) that can change during the lifetime of the BSS.
  *
+ * @bss_color: 6-bit value to mark inter-BSS frame, if BSS supports HE
+ * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
+ * @multi_sta_back_32bit: supports BA bitmap of 32-bits in Multi-STA BACK
+ * @uora_exists: is the UORA element advertised by AP
+ * @ack_enabled: indicates support to receive a multi-TID that solicits either
+ *	ACK, BACK or both
+ * @uora_ocw_range: UORA element's OCW Range field
+ * @frame_time_rts_th: HE duration RTS threshold, in units of 32us
+ * @he_support: does this BSS support HE
  * @assoc: association status
  * @ibss_joined: indicates whether this station is part of an IBSS
  *	or not
@@ -550,6 +564,14 @@ struct ieee80211_mu_group_data {
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
+	u8 bss_color;
+	u8 htc_trig_based_pkt_ext;
+	bool multi_sta_back_32bit;
+	bool uora_exists;
+	bool ack_enabled;
+	u8 uora_ocw_range;
+	u16 frame_time_rts_th;
+	bool he_support;
 	/* association related data */
 	bool assoc, ibss_joined;
 	bool ibss_creator;
@@ -1106,6 +1128,18 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_AMPDU_EOF_BIT: Value of the EOF bit in the A-MPDU delimiter for this
  *	frame
  * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
+ * @RX_FLAG_RADIOTAP_HE: HE radiotap data is present
+ *	(&struct ieee80211_radiotap_he, mac80211 will fill in
+ *	 - DATA3_DATA_MCS
+ *	 - DATA3_DATA_DCM
+ *	 - DATA3_CODING
+ *	 - DATA5_GI
+ *	 - DATA5_DATA_BW_RU_ALLOC
+ *	 - DATA6_NSTS
+ *	 - DATA3_STBC
+ *	from the RX info data, so leave those zeroed when building this data)
+ * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present
+ *	(&struct ieee80211_radiotap_he_mu)
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR		= BIT(0),
@@ -1134,6 +1168,8 @@ enum mac80211_rx_flags {
 	RX_FLAG_ICV_STRIPPED		= BIT(23),
 	RX_FLAG_AMPDU_EOF_BIT		= BIT(24),
 	RX_FLAG_AMPDU_EOF_BIT_KNOWN	= BIT(25),
+	RX_FLAG_RADIOTAP_HE		= BIT(26),
+	RX_FLAG_RADIOTAP_HE_MU		= BIT(27),
 };
 
 /**
@@ -1164,6 +1200,7 @@ enum mac80211_rx_encoding {
 	RX_ENC_LEGACY = 0,
 	RX_ENC_HT,
 	RX_ENC_VHT,
+	RX_ENC_HE,
 };
 
 /**
@@ -1198,6 +1235,9 @@ enum mac80211_rx_encoding {
  * @encoding: &enum mac80211_rx_encoding
  * @bw: &enum rate_info_bw
  * @enc_flags: uses bits from &enum mac80211_rx_encoding_flags
+ * @he_ru: HE RU, from &enum nl80211_he_ru_alloc
+ * @he_gi: HE GI, from &enum nl80211_he_gi
+ * @he_dcm: HE DCM value
  * @rx_flags: internal RX flags for mac80211
  * @ampdu_reference: A-MPDU reference number, must be a different value for
  *	each A-MPDU but the same for each subframe within one A-MPDU
@@ -1211,7 +1251,8 @@ struct ieee80211_rx_status {
 	u32 flag;
 	u16 freq;
 	u8 enc_flags;
-	u8 encoding:2, bw:3;
+	u8 encoding:2, bw:3, he_ru:3;
+	u8 he_gi:2, he_dcm:1;
 	u8 rate_idx;
 	u8 nss;
 	u8 rx_flags;
@@ -1770,6 +1811,7 @@ struct ieee80211_sta_rates {
  * @supp_rates: Bitmap of supported rates (per band)
  * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
  * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
+ * @he_cap: HE capabilities of this STA
  * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU
  *	that this station is allowed to transmit to us.
  *	Can be modified by driver.
@@ -1805,7 +1847,8 @@ struct ieee80211_sta {
 	u16 aid;
 	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_sta_vht_cap vht_cap;
-	u8 max_rx_aggregation_subframes;
+	struct ieee80211_sta_he_cap he_cap;
+	u16 max_rx_aggregation_subframes;
 	bool wme;
 	u8 uapsd_queues;
 	u8 max_sp;
@@ -2196,10 +2239,11 @@ enum ieee80211_hw_flags {
  *	it shouldn't be set.
  *
  * @max_tx_aggregation_subframes: maximum number of subframes in an
- *	aggregate an HT driver will transmit. Though ADDBA will advertise
- *	a constant value of 64 as some older APs can crash if the window
- *	size is smaller (an example is LinkSys WRT120N with FW v1.0.07
- *	build 002 Jun 18 2012).
+ *	aggregate an HT/HE device will transmit. In HT AddBA we'll
+ *	advertise a constant value of 64 as some older APs crash if
+ *	the window size is smaller (an example is LinkSys WRT120N
+ *	with FW v1.0.07 build 002 Jun 18 2012).
+ *	For AddBA to HE capable peers this value will be used.
  *
  * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum
  *	of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list.
@@ -2216,6 +2260,8 @@ enum ieee80211_hw_flags {
  *	the default is _GI | _BANDWIDTH.
  *	Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values.
  *
+ * @radiotap_he: HE radiotap validity flags
+ *
  * @radiotap_timestamp: Information for the radiotap timestamp field; if the
  *	'units_pos' member is set to a non-negative value it must be set to
  *	a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a
@@ -2263,8 +2309,8 @@ struct ieee80211_hw {
 	u8 max_rates;
 	u8 max_report_rates;
 	u8 max_rate_tries;
-	u8 max_rx_aggregation_subframes;
-	u8 max_tx_aggregation_subframes;
+	u16 max_rx_aggregation_subframes;
+	u16 max_tx_aggregation_subframes;
 	u8 max_tx_fragments;
 	u8 offchannel_tx_hw_queue;
 	u8 radiotap_mcs_details;
@@ -2904,7 +2950,7 @@ struct ieee80211_ampdu_params {
 	struct ieee80211_sta *sta;
 	u16 tid;
 	u16 ssn;
-	u8 buf_size;
+	u16 buf_size;
 	bool amsdu;
 	u16 timeout;
 };
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index e3589ade62e0..bb707789ef2b 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -12,6 +12,7 @@ mac80211-y := \
 	scan.o offchannel.o \
 	ht.o agg-tx.o agg-rx.o \
 	vht.o \
+	he.o \
 	ibss.o \
 	iface.o \
 	rate.o \
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3ffd853b483f..6a4f154c99f6 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -245,6 +245,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 	};
 	int i, ret = -EOPNOTSUPP;
 	u16 status = WLAN_STATUS_REQUEST_DECLINED;
+	u16 max_buf_size;
 
 	if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
 		ht_dbg(sta->sdata,
@@ -268,13 +269,18 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 		goto end;
 	}
 
+	if (sta->sta.he_cap.has_he)
+		max_buf_size = IEEE80211_MAX_AMPDU_BUF;
+	else
+		max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+
 	/* sanity check for incoming parameters:
 	 * check if configuration can support the BA policy
 	 * and if buffer size does not exceeds max value */
 	/* XXX: check own ht delayed BA capability?? */
 	if (((ba_policy != 1) &&
 	     (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
-	    (buf_size > IEEE80211_MAX_AMPDU_BUF_HT)) {
+	    (buf_size > max_buf_size)) {
 		status = WLAN_STATUS_INVALID_QOS_PARAM;
 		ht_dbg_ratelimited(sta->sdata,
 				   "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
@@ -283,7 +289,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 	}
 	/* determine default buffer size */
 	if (buf_size == 0)
-		buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+		buf_size = max_buf_size;
 
 	/* make sure the size doesn't exceed the maximum supported by the hw */
 	if (buf_size > sta->sta.max_rx_aggregation_subframes)
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 86c6bc0432ba..69e831bc317b 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -463,6 +463,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 		.timeout = 0,
 	};
 	int ret;
+	u16 buf_size;
 
 	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
 
@@ -511,11 +512,22 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	sta->ampdu_mlme.addba_req_num[tid]++;
 	spin_unlock_bh(&sta->lock);
 
+	if (sta->sta.he_cap.has_he) {
+		buf_size = local->hw.max_tx_aggregation_subframes;
+	} else {
+		/*
+		 * We really should use what the driver told us it will
+		 * transmit as the maximum, but certain APs (e.g. the
+		 * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012)
+		 * will crash when we use a lower number.
+		 */
+		buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+	}
+
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, params.ssn,
-				     IEEE80211_MAX_AMPDU_BUF_HT,
-				     tid_tx->timeout);
+				     buf_size, tid_tx->timeout);
 }
 
 /*
@@ -905,8 +917,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 {
 	struct tid_ampdu_tx *tid_tx;
 	struct ieee80211_txq *txq;
-	u16 capab, tid;
-	u8 buf_size;
+	u16 capab, tid, buf_size;
 	bool amsdu;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c4e2f7d2bcb8..02f3672e7b5e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1412,6 +1412,11 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
 						    params->vht_capa, sta);
 
+	if (params->he_capa)
+		ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
+						  (void *)params->he_capa,
+						  params->he_capa_len, sta);
+
 	if (params->opmode_notif_used) {
 		/* returned value is only needed for rc update, but the
 		 * rc isn't initialized here yet, so ignore it
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
new file mode 100644
index 000000000000..769078ed5a12
--- /dev/null
+++ b/net/mac80211/he.c
@@ -0,0 +1,55 @@
+/*
+ * HE handling
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "ieee80211_i.h"
+
+void
+ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
+				  struct ieee80211_supported_band *sband,
+				  const u8 *he_cap_ie, u8 he_cap_len,
+				  struct sta_info *sta)
+{
+	struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+	struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
+	u8 he_ppe_size;
+	u8 mcs_nss_size;
+	u8 he_total_size;
+
+	memset(he_cap, 0, sizeof(*he_cap));
+
+	if (!he_cap_ie || !ieee80211_get_he_sta_cap(sband))
+		return;
+
+	/* Make sure size is OK */
+	mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem);
+	he_ppe_size =
+		ieee80211_he_ppe_size(he_cap_ie[sizeof(he_cap->he_cap_elem) +
+						mcs_nss_size],
+				      he_cap_ie_elem->phy_cap_info);
+	he_total_size = sizeof(he_cap->he_cap_elem) + mcs_nss_size +
+			he_ppe_size;
+	if (he_cap_len < he_total_size)
+		return;
+
+	memcpy(&he_cap->he_cap_elem, he_cap_ie, sizeof(he_cap->he_cap_elem));
+
+	/* HE Tx/Rx HE MCS NSS Support Field */
+	memcpy(&he_cap->he_mcs_nss_supp,
+	       &he_cap_ie[sizeof(he_cap->he_cap_elem)], mcs_nss_size);
+
+	/* Check if there are (optional) PPE Thresholds */
+	if (he_cap->he_cap_elem.phy_cap_info[6] &
+	    IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT)
+		memcpy(he_cap->ppe_thres,
+		       &he_cap_ie[sizeof(he_cap->he_cap_elem) + mcs_nss_size],
+		       he_ppe_size);
+
+	he_cap->has_he = true;
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a6c12c104c38..172aeae21ae9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -365,6 +365,7 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_DISABLE_160MHZ	= BIT(13),
 	IEEE80211_STA_DISABLE_WMM	= BIT(14),
 	IEEE80211_STA_ENABLE_RRM	= BIT(15),
+	IEEE80211_STA_DISABLE_HE	= BIT(16),
 };
 
 struct ieee80211_mgd_auth_data {
@@ -1454,6 +1455,10 @@ struct ieee802_11_elems {
 	const struct ieee80211_vht_cap *vht_cap_elem;
 	const struct ieee80211_vht_operation *vht_operation;
 	const struct ieee80211_meshconf_ie *mesh_config;
+	const u8 *he_cap;
+	const struct ieee80211_he_operation *he_operation;
+	const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
+	const u8 *uora_element;
 	const u8 *mesh_id;
 	const u8 *peering;
 	const __le16 *awake_window;
@@ -1483,6 +1488,7 @@ struct ieee802_11_elems {
 	u8 ext_supp_rates_len;
 	u8 wmm_info_len;
 	u8 wmm_param_len;
+	u8 he_cap_len;
 	u8 mesh_id_len;
 	u8 peering_len;
 	u8 preq_len;
@@ -1825,6 +1831,13 @@ void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
 enum nl80211_chan_width
 ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta);
 
+/* HE */
+void
+ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
+				  struct ieee80211_supported_band *sband,
+				  const u8 *he_cap_ie, u8 he_cap_len,
+				  struct sta_info *sta);
+
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_mgmt *mgmt,
@@ -2076,6 +2089,9 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 			       u32 cap);
 u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 				const struct cfg80211_chan_def *chandef);
+u8 *ieee80211_ie_build_he_cap(u8 *pos,
+			      const struct ieee80211_sta_he_cap *he_cap,
+			      u8 *end);
 int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
 			     const struct ieee80211_supported_band *sband,
 			     const u8 *srates, int srates_len, u32 *rates);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 070f77862014..b33faba8cbbe 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -3,6 +3,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright (C) 2017     Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -825,7 +826,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	int result, i;
 	enum nl80211_band band;
 	int channels, max_bitrates;
-	bool supp_ht, supp_vht;
+	bool supp_ht, supp_vht, supp_he;
 	netdev_features_t feature_whitelist;
 	struct cfg80211_chan_def dflt_chandef = {};
 
@@ -905,6 +906,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	max_bitrates = 0;
 	supp_ht = false;
 	supp_vht = false;
+	supp_he = false;
 	for (band = 0; band < NUM_NL80211_BANDS; band++) {
 		struct ieee80211_supported_band *sband;
 
@@ -931,6 +933,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		supp_ht = supp_ht || sband->ht_cap.ht_supported;
 		supp_vht = supp_vht || sband->vht_cap.vht_supported;
 
+		if (!supp_he)
+			supp_he = !!ieee80211_get_he_sta_cap(sband);
+
 		if (!sband->ht_cap.ht_supported)
 			continue;
 
@@ -1020,6 +1025,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		local->scan_ies_len +=
 			2 + sizeof(struct ieee80211_vht_cap);
 
+	/* HE cap element is variable in size - set len to allow max size */
+	/*
+	 * TODO: 1 is added at the end of the calculation to accommodate for
+	 *	the temporary placing of the HE capabilities IE under EXT.
+	 *	Remove it once it is placed in the final place.
+	 */
+	if (supp_he)
+		local->scan_ies_len +=
+			2 + sizeof(struct ieee80211_he_cap_elem) +
+			sizeof(struct ieee80211_he_mcs_nss_supp) +
+			IEEE80211_HE_PPE_THRES_MAX_LEN + 1;
+
 	if (!local->ops->hw_scan) {
 		/* For hw_scan, driver needs to set these up. */
 		local->hw.wiphy->max_scan_ssids = 4;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a44e5b4aaeda..0322d78007ad 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -149,6 +149,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_channel *channel,
 			     const struct ieee80211_ht_operation *ht_oper,
 			     const struct ieee80211_vht_operation *vht_oper,
+			     const struct ieee80211_he_operation *he_oper,
 			     struct cfg80211_chan_def *chandef, bool tracking)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -207,7 +208,27 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 	}
 
 	vht_chandef = *chandef;
-	if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && he_oper &&
+	    (le32_to_cpu(he_oper->he_oper_params) &
+	     IEEE80211_HE_OPERATION_VHT_OPER_INFO)) {
+		struct ieee80211_vht_operation he_oper_vht_cap;
+
+		/*
+		 * Set only first 3 bytes (other 2 aren't used in
+		 * ieee80211_chandef_vht_oper() anyway)
+		 */
+		memcpy(&he_oper_vht_cap, he_oper->optional, 3);
+		he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0);
+
+		if (!ieee80211_chandef_vht_oper(&he_oper_vht_cap,
+						&vht_chandef)) {
+			if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+				sdata_info(sdata,
+					   "HE AP VHT information is invalid, disable HE\n");
+			ret = IEEE80211_STA_DISABLE_HE;
+			goto out;
+		}
+	} else if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
 		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 			sdata_info(sdata,
 				   "AP VHT information is invalid, disable VHT\n");
@@ -300,12 +321,14 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 			       const struct ieee80211_ht_cap *ht_cap,
 			       const struct ieee80211_ht_operation *ht_oper,
 			       const struct ieee80211_vht_operation *vht_oper,
+			       const struct ieee80211_he_operation *he_oper,
 			       const u8 *bssid, u32 *changed)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_channel *chan;
+	struct ieee80211_channel *chan = sdata->vif.bss_conf.chandef.chan;
+	struct ieee80211_supported_band *sband =
+		local->hw.wiphy->bands[chan->band];
 	struct cfg80211_chan_def chandef;
 	u16 ht_opmode;
 	u32 flags;
@@ -320,6 +343,11 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 	if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
 		vht_oper = NULL;
 
+	/* don't check HE if we associated as non-HE station */
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_HE ||
+	    !ieee80211_get_he_sta_cap(sband))
+		he_oper = NULL;
+
 	if (WARN_ON_ONCE(!sta))
 		return -EINVAL;
 
@@ -333,12 +361,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 		sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
 	}
 
-	chan = sdata->vif.bss_conf.chandef.chan;
-	sband = local->hw.wiphy->bands[chan->band];
-
-	/* calculate new channel (type) based on HT/VHT operation IEs */
+	/* calculate new channel (type) based on HT/VHT/HE operation IEs */
 	flags = ieee80211_determine_chantype(sdata, sband, chan,
-					     ht_oper, vht_oper,
+					     ht_oper, vht_oper, he_oper,
 					     &chandef, true);
 
 	/*
@@ -582,6 +607,34 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 	ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
 }
 
+/* This function determines HE capability flags for the association
+ * and builds the IE.
+ */
+static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
+				struct sk_buff *skb,
+				struct ieee80211_supported_band *sband)
+{
+	u8 *pos;
+	const struct ieee80211_sta_he_cap *he_cap = NULL;
+	u8 he_cap_size;
+
+	he_cap = ieee80211_get_he_sta_cap(sband);
+	if (!he_cap)
+		return;
+
+	/*
+	 * TODO: the 1 added is because this temporarily is under the EXTENSION
+	 * IE. Get rid of it when it moves.
+	 */
+	he_cap_size =
+		2 + 1 + sizeof(he_cap->he_cap_elem) +
+		ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
+		ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+				      he_cap->he_cap_elem.phy_cap_info);
+	pos = skb_put(skb, he_cap_size);
+	ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size);
+}
+
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -643,6 +696,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 			2 + 2 * sband->n_channels + /* supported channels */
 			2 + sizeof(struct ieee80211_ht_cap) + /* HT */
 			2 + sizeof(struct ieee80211_vht_cap) + /* VHT */
+			2 + 1 + sizeof(struct ieee80211_he_cap_elem) + /* HE */
+				sizeof(struct ieee80211_he_mcs_nss_supp) +
+				IEEE80211_HE_PPE_THRES_MAX_LEN +
 			assoc_data->ie_len + /* extra IEs */
 			(assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
 			9, /* WMM */
@@ -827,11 +883,41 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		offset = noffset;
 	}
 
+	/* if present, add any custom IEs that go before HE */
+	if (assoc_data->ie_len) {
+		static const u8 before_he[] = {
+			/*
+			 * no need to list the ones split off before VHT
+			 * or generated here
+			 */
+			WLAN_EID_OPMODE_NOTIF,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE,
+			/* 11ai elements */
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_SESSION,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_PUBLIC_KEY,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_KEY_CONFIRM,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_HLP_CONTAINER,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN,
+			/* TODO: add 11ah/11aj/11ak elements */
+		};
+
+		/* RIC already taken above, so no need to handle here anymore */
+		noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len,
+					     before_he, ARRAY_SIZE(before_he),
+					     offset);
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, assoc_data->ie + offset, noffset - offset);
+		offset = noffset;
+	}
+
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 		ieee80211_add_vht_ie(sdata, skb, sband,
 				     &assoc_data->ap_vht_cap);
 
-	/* if present, add any custom non-vendor IEs that go after HT */
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+		ieee80211_add_he_ie(sdata, skb, sband);
+
+	/* if present, add any custom non-vendor IEs that go after HE */
 	if (assoc_data->ie_len) {
 		noffset = ieee80211_ie_split_vendor(assoc_data->ie,
 						    assoc_data->ie_len,
@@ -898,6 +984,11 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	struct ieee80211_hdr_3addr *nullfunc;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
+	/* Don't send NDPs when STA is connected HE */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+		return;
+
 	skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
 		!ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
 	if (!skb)
@@ -929,6 +1020,10 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
 	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
 		return;
 
+	/* Don't send NDPs when connected HE */
+	if (!(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
+		return;
+
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30);
 	if (!skb)
 		return;
@@ -1700,9 +1795,11 @@ static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work)
 }
 
 /* MLME */
-static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
-				     struct ieee80211_sub_if_data *sdata,
-				     const u8 *wmm_param, size_t wmm_param_len)
+static bool
+ieee80211_sta_wmm_params(struct ieee80211_local *local,
+			 struct ieee80211_sub_if_data *sdata,
+			 const u8 *wmm_param, size_t wmm_param_len,
+			 const struct ieee80211_mu_edca_param_set *mu_edca)
 {
 	struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS];
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1749,6 +1846,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				sdata->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
 				uapsd = true;
+			params[ac].mu_edca = !!mu_edca;
+			if (mu_edca)
+				params[ac].mu_edca_param_rec = mu_edca->ac_bk;
 			break;
 		case 2: /* AC_VI */
 			ac = IEEE80211_AC_VI;
@@ -1756,6 +1856,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				sdata->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
 				uapsd = true;
+			params[ac].mu_edca = !!mu_edca;
+			if (mu_edca)
+				params[ac].mu_edca_param_rec = mu_edca->ac_vi;
 			break;
 		case 3: /* AC_VO */
 			ac = IEEE80211_AC_VO;
@@ -1763,6 +1866,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				sdata->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
 				uapsd = true;
+			params[ac].mu_edca = !!mu_edca;
+			if (mu_edca)
+				params[ac].mu_edca_param_rec = mu_edca->ac_vo;
 			break;
 		case 0: /* AC_BE */
 		default:
@@ -1771,6 +1877,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				sdata->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
 				uapsd = true;
+			params[ac].mu_edca = !!mu_edca;
+			if (mu_edca)
+				params[ac].mu_edca_param_rec = mu_edca->ac_be;
 			break;
 		}
 
@@ -3021,6 +3130,25 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
+	/*
+	 * If AP doesn't support HT, or it doesn't have HE mandatory IEs, mark
+	 * HE as disabled. If on the 5GHz band, make sure it supports VHT.
+	 */
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_HT ||
+	    (sband->band == NL80211_BAND_5GHZ &&
+	     ifmgd->flags & IEEE80211_STA_DISABLE_VHT) ||
+	    (!elems.he_cap && !elems.he_operation))
+		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+	    (!elems.he_cap || !elems.he_operation)) {
+		mutex_unlock(&sdata->local->sta_mtx);
+		sdata_info(sdata,
+			   "HE AP is missing HE capability/operation\n");
+		ret = false;
+		goto out;
+	}
+
 	/* Set up internal HT/VHT capabilities */
 	if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
 		ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
@@ -3030,6 +3158,48 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
 						    elems.vht_cap_elem, sta);
 
+	if (elems.he_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+	    elems.he_cap) {
+		ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
+						  elems.he_cap,
+						  elems.he_cap_len,
+						  sta);
+
+		bss_conf->he_support = sta->sta.he_cap.has_he;
+	} else {
+		bss_conf->he_support = false;
+	}
+
+	if (bss_conf->he_support) {
+		u32 he_oper_params =
+			le32_to_cpu(elems.he_operation->he_oper_params);
+
+		bss_conf->bss_color = he_oper_params &
+				      IEEE80211_HE_OPERATION_BSS_COLOR_MASK;
+		bss_conf->htc_trig_based_pkt_ext =
+			(he_oper_params &
+			 IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK) <<
+			IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET;
+		bss_conf->frame_time_rts_th =
+			(he_oper_params &
+			 IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK) <<
+			IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET;
+
+		bss_conf->multi_sta_back_32bit =
+			sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+			IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP;
+
+		bss_conf->ack_enabled =
+			sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+			IEEE80211_HE_MAC_CAP2_ACK_EN;
+
+		bss_conf->uora_exists = !!elems.uora_element;
+		if (elems.uora_element)
+			bss_conf->uora_ocw_range = elems.uora_element[0];
+
+		/* TODO: OPEN: what happens if BSS color disable is set? */
+	}
+
 	/*
 	 * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data
 	 * in their association response, so ignore that data for our own
@@ -3089,7 +3259,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
 		ieee80211_set_wmm_default(sdata, false, false);
 	} else if (!ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
-					     elems.wmm_param_len)) {
+					     elems.wmm_param_len,
+					     elems.mu_edca_param_set)) {
 		/* still enable QoS since we might have HT/VHT */
 		ieee80211_set_wmm_default(sdata, false, true);
 		/* set the disable-WMM flag in this case to disable
@@ -3603,7 +3774,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) &&
 	    ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
-				     elems.wmm_param_len))
+				     elems.wmm_param_len,
+				     elems.mu_edca_param_set))
 		changed |= BSS_CHANGED_QOS;
 
 	/*
@@ -3642,7 +3814,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	if (ieee80211_config_bw(sdata, sta,
 				elems.ht_cap_elem, elems.ht_operation,
-				elems.vht_operation, bssid, &changed)) {
+				elems.vht_operation, elems.he_operation,
+				bssid, &changed)) {
 		mutex_unlock(&local->sta_mtx);
 		sdata_info(sdata,
 			   "failed to follow AP %pM bandwidth change, disconnect\n",
@@ -4279,6 +4452,66 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
 	return chains;
 }
 
+static bool
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_supported_band *sband,
+				    const struct ieee80211_he_operation *he_op)
+{
+	const struct ieee80211_sta_he_cap *sta_he_cap =
+		ieee80211_get_he_sta_cap(sband);
+	u16 ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+	int i;
+
+	if (!sta_he_cap || !he_op)
+		return false;
+
+	/* Need to go over for 80MHz, 160MHz and for 80+80 */
+	for (i = 0; i < 3; i++) {
+		const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
+			&sta_he_cap->he_mcs_nss_supp;
+		u16 sta_mcs_map_rx =
+			le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
+		u16 sta_mcs_map_tx =
+			le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
+		u8 nss;
+		bool verified = true;
+
+		/*
+		 * For each band there is a maximum of 8 spatial streams
+		 * possible. Each of the sta_mcs_map_* is a 16-bit struct built
+		 * of 2 bits per NSS (1-8), with the values defined in enum
+		 * ieee80211_he_mcs_support. Need to make sure STA TX and RX
+		 * capabilities aren't less than the AP's minimum requirements
+		 * for this HE BSS per SS.
+		 * It is enough to find one such band that meets the reqs.
+		 */
+		for (nss = 8; nss > 0; nss--) {
+			u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
+			u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
+			u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+
+			if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+				continue;
+
+			/*
+			 * Make sure the HE AP doesn't require MCSs that aren't
+			 * supported by the client
+			 */
+			if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+			    sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+			    (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
+				verified = false;
+				break;
+			}
+		}
+
+		if (verified)
+			return true;
+	}
+
+	/* If here, STA doesn't meet AP's HE min requirements */
+	return false;
+}
+
 static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 				  struct cfg80211_bss *cbss)
 {
@@ -4287,6 +4520,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 	const struct ieee80211_ht_cap *ht_cap = NULL;
 	const struct ieee80211_ht_operation *ht_oper = NULL;
 	const struct ieee80211_vht_operation *vht_oper = NULL;
+	const struct ieee80211_he_operation *he_oper = NULL;
 	struct ieee80211_supported_band *sband;
 	struct cfg80211_chan_def chandef;
 	int ret;
@@ -4342,6 +4576,25 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+	    ieee80211_get_he_sta_cap(sband)) {
+		const struct cfg80211_bss_ies *ies;
+		const u8 *he_oper_ie;
+
+		ies = rcu_dereference(cbss->ies);
+		he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION,
+						  ies->data, ies->len);
+		if (he_oper_ie &&
+		    he_oper_ie[1] == ieee80211_he_oper_size(&he_oper_ie[3]))
+			he_oper = (void *)(he_oper_ie + 3);
+		else
+			he_oper = NULL;
+
+		if (!he_oper ||
+		    !ieee80211_verify_sta_he_mcs_support(sband, he_oper))
+			ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+	}
+
 	/* Allow VHT if at least one channel on the sband supports 80 MHz */
 	have_80mhz = false;
 	for (i = 0; i < sband->n_channels; i++) {
@@ -4358,7 +4611,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 
 	ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
 						     cbss->channel,
-						     ht_oper, vht_oper,
+						     ht_oper, vht_oper, he_oper,
 						     &chandef, false);
 
 	sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
@@ -4764,8 +5017,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		    req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
 			ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
 			ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+			ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 			netdev_info(sdata->dev,
-				    "disabling HT/VHT due to WEP/TKIP use\n");
+				    "disabling HE/HT/VHT due to WEP/TKIP use\n");
 		}
 	}
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 756ba176db1e..a16ba568e2a3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -175,6 +175,20 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
 		len += 12;
 	}
 
+	if (status->encoding == RX_ENC_HE &&
+	    status->flag & RX_FLAG_RADIOTAP_HE) {
+		len = ALIGN(len, 2);
+		len += 12;
+		BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he) != 12);
+	}
+
+	if (status->encoding == RX_ENC_HE &&
+	    status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+		len = ALIGN(len, 2);
+		len += 12;
+		BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he_mu) != 12);
+	}
+
 	if (status->chains) {
 		/* antenna and antenna signal fields */
 		len += 2 * hweight8(status->chains);
@@ -263,6 +277,19 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	int mpdulen, chain;
 	unsigned long chains = status->chains;
 	struct ieee80211_vendor_radiotap rtap = {};
+	struct ieee80211_radiotap_he he = {};
+	struct ieee80211_radiotap_he_mu he_mu = {};
+
+	if (status->flag & RX_FLAG_RADIOTAP_HE) {
+		he = *(struct ieee80211_radiotap_he *)skb->data;
+		skb_pull(skb, sizeof(he));
+		WARN_ON_ONCE(status->encoding != RX_ENC_HE);
+	}
+
+	if (status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+		he_mu = *(struct ieee80211_radiotap_he_mu *)skb->data;
+		skb_pull(skb, sizeof(he_mu));
+	}
 
 	if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
 		rtap = *(struct ieee80211_vendor_radiotap *)skb->data;
@@ -520,6 +547,89 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		*pos++ = flags;
 	}
 
+	if (status->encoding == RX_ENC_HE &&
+	    status->flag & RX_FLAG_RADIOTAP_HE) {
+#define HE_PREP(f, val)	cpu_to_le16(FIELD_PREP(IEEE80211_RADIOTAP_HE_##f, val))
+
+		if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) {
+			he.data6 |= HE_PREP(DATA6_NSTS,
+					    FIELD_GET(RX_ENC_FLAG_STBC_MASK,
+						      status->enc_flags));
+			he.data3 |= HE_PREP(DATA3_STBC, 1);
+		} else {
+			he.data6 |= HE_PREP(DATA6_NSTS, status->nss);
+		}
+
+#define CHECK_GI(s) \
+	BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \
+		     (int)NL80211_RATE_INFO_HE_GI_##s)
+
+		CHECK_GI(0_8);
+		CHECK_GI(1_6);
+		CHECK_GI(3_2);
+
+		he.data3 |= HE_PREP(DATA3_DATA_MCS, status->rate_idx);
+		he.data3 |= HE_PREP(DATA3_DATA_DCM, status->he_dcm);
+		he.data3 |= HE_PREP(DATA3_CODING,
+				    !!(status->enc_flags & RX_ENC_FLAG_LDPC));
+
+		he.data5 |= HE_PREP(DATA5_GI, status->he_gi);
+
+		switch (status->bw) {
+		case RATE_INFO_BW_20:
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ);
+			break;
+		case RATE_INFO_BW_40:
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ);
+			break;
+		case RATE_INFO_BW_80:
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ);
+			break;
+		case RATE_INFO_BW_160:
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ);
+			break;
+		case RATE_INFO_BW_HE_RU:
+#define CHECK_RU_ALLOC(s) \
+	BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \
+		     NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4)
+
+			CHECK_RU_ALLOC(26);
+			CHECK_RU_ALLOC(52);
+			CHECK_RU_ALLOC(106);
+			CHECK_RU_ALLOC(242);
+			CHECK_RU_ALLOC(484);
+			CHECK_RU_ALLOC(996);
+			CHECK_RU_ALLOC(2x996);
+
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    status->he_ru + 4);
+			break;
+		default:
+			WARN_ONCE(1, "Invalid SU BW %d\n", status->bw);
+		}
+
+		/* ensure 2 byte alignment */
+		while ((pos - (u8 *)rthdr) & 1)
+			pos++;
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+		memcpy(pos, &he, sizeof(he));
+		pos += sizeof(he);
+	}
+
+	if (status->encoding == RX_ENC_HE &&
+	    status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+		/* ensure 2 byte alignment */
+		while ((pos - (u8 *)rthdr) & 1)
+			pos++;
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE_MU);
+		memcpy(pos, &he_mu, sizeof(he_mu));
+		pos += sizeof(he_mu);
+	}
+
 	for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
 		*pos++ = status->chain_signal[chain];
 		*pos++ = chain;
@@ -613,6 +723,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		rcu_dereference(local->monitor_sdata);
 	bool only_monitor = false;
 
+	if (status->flag & RX_FLAG_RADIOTAP_HE)
+		rtap_space += sizeof(struct ieee80211_radiotap_he);
+
+	if (status->flag & RX_FLAG_RADIOTAP_HE_MU)
+		rtap_space += sizeof(struct ieee80211_radiotap_he_mu);
+
 	if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) {
 		struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data;
 
@@ -3386,8 +3502,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
 		status = IEEE80211_SKB_RXCB((rx->skb));
 
 		sband = rx->local->hw.wiphy->bands[status->band];
-		if (!(status->encoding == RX_ENC_HT) &&
-		    !(status->encoding == RX_ENC_VHT))
+		if (status->encoding == RX_ENC_LEGACY)
 			rate = &sband->bitrates[status->rate_idx];
 
 		ieee80211_rx_cooked_monitor(rx, rate);
@@ -4386,6 +4501,14 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
 				      status->rate_idx, status->nss))
 				goto drop;
 			break;
+		case RX_ENC_HE:
+			if (WARN_ONCE(status->rate_idx > 11 ||
+				      !status->nss ||
+				      status->nss > 8,
+				      "Rate marked as an HE rate but data is invalid: MCS: %d, NSS: %d\n",
+				      status->rate_idx, status->nss))
+				goto drop;
+			break;
 		default:
 			WARN_ON_ONCE(1);
 			/* fall through */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index aa96fddfbfc2..aa8fe771a8db 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1323,6 +1323,11 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
 	struct ieee80211_tx_info *info;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 
+	/* Don't send NDPs when STA is connected HE */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
+		return;
+
 	if (qos) {
 		fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
 				 IEEE80211_STYPE_QOS_NULLFUNC |
@@ -1968,7 +1973,7 @@ sta_get_last_rx_stats(struct sta_info *sta)
 	return stats;
 }
 
-static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
+static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
 				  struct rate_info *rinfo)
 {
 	rinfo->bw = STA_STATS_GET(BW, rate);
@@ -2005,6 +2010,14 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
 		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
 		break;
 		}
+	case STA_STATS_RATE_TYPE_HE:
+		rinfo->flags = RATE_INFO_FLAGS_HE_MCS;
+		rinfo->mcs = STA_STATS_GET(HE_MCS, rate);
+		rinfo->nss = STA_STATS_GET(HE_NSS, rate);
+		rinfo->he_gi = STA_STATS_GET(HE_GI, rate);
+		rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate);
+		rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate);
+		break;
 	}
 }
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 81b35f623792..9a04327d71d1 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -170,7 +170,7 @@ struct tid_ampdu_tx {
 	u8 dialog_token;
 	u8 stop_initiator;
 	bool tx_stop;
-	u8 buf_size;
+	u16 buf_size;
 
 	u16 failed_bar_ssn;
 	bool bar_pending;
@@ -405,7 +405,7 @@ struct ieee80211_sta_rx_stats {
 	int last_signal;
 	u8 chains;
 	s8 chain_signal_last[IEEE80211_MAX_CHAINS];
-	u16 last_rate;
+	u32 last_rate;
 	struct u64_stats_sync syncp;
 	u64 bytes;
 	u64 msdu[IEEE80211_NUM_TIDS + 1];
@@ -764,6 +764,7 @@ enum sta_stats_type {
 	STA_STATS_RATE_TYPE_LEGACY,
 	STA_STATS_RATE_TYPE_HT,
 	STA_STATS_RATE_TYPE_VHT,
+	STA_STATS_RATE_TYPE_HE,
 };
 
 #define STA_STATS_FIELD_HT_MCS		GENMASK( 7,  0)
@@ -771,9 +772,14 @@ enum sta_stats_type {
 #define STA_STATS_FIELD_LEGACY_BAND	GENMASK( 7,  4)
 #define STA_STATS_FIELD_VHT_MCS		GENMASK( 3,  0)
 #define STA_STATS_FIELD_VHT_NSS		GENMASK( 7,  4)
+#define STA_STATS_FIELD_HE_MCS		GENMASK( 3,  0)
+#define STA_STATS_FIELD_HE_NSS		GENMASK( 7,  4)
 #define STA_STATS_FIELD_BW		GENMASK(11,  8)
 #define STA_STATS_FIELD_SGI		GENMASK(12, 12)
 #define STA_STATS_FIELD_TYPE		GENMASK(15, 13)
+#define STA_STATS_FIELD_HE_RU		GENMASK(18, 16)
+#define STA_STATS_FIELD_HE_GI		GENMASK(20, 19)
+#define STA_STATS_FIELD_HE_DCM		GENMASK(21, 21)
 
 #define STA_STATS_FIELD(_n, _v)		FIELD_PREP(STA_STATS_FIELD_ ## _n, _v)
 #define STA_STATS_GET(_n, _v)		FIELD_GET(STA_STATS_FIELD_ ## _n, _v)
@@ -782,7 +788,7 @@ enum sta_stats_type {
 
 static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
 {
-	u16 r;
+	u32 r;
 
 	r = STA_STATS_FIELD(BW, s->bw);
 
@@ -804,6 +810,14 @@ static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
 		r |= STA_STATS_FIELD(LEGACY_BAND, s->band);
 		r |= STA_STATS_FIELD(LEGACY_IDX, s->rate_idx);
 		break;
+	case RX_ENC_HE:
+		r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_HE);
+		r |= STA_STATS_FIELD(HE_NSS, s->nss);
+		r |= STA_STATS_FIELD(HE_MCS, s->rate_idx);
+		r |= STA_STATS_FIELD(HE_GI, s->he_gi);
+		r |= STA_STATS_FIELD(HE_RU, s->he_ru);
+		r |= STA_STATS_FIELD(HE_DCM, s->he_dcm);
+		break;
 	default:
 		WARN_ON(1);
 		return STA_STATS_RATE_INVALID;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 80a7edf8d314..0ab69a1964f8 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -92,7 +92,7 @@
 				STA_ENTRY						\
 				__field(u16, tid)					\
 				__field(u16, ssn)					\
-				__field(u8, buf_size)					\
+				__field(u16, buf_size)					\
 				__field(bool, amsdu)					\
 				__field(u16, timeout)					\
 				__field(u16, action)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b744b10465c3..c77c84325348 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1095,6 +1095,21 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			if (elen >= sizeof(*elems->max_idle_period_ie))
 				elems->max_idle_period_ie = (void *)pos;
 			break;
+		case WLAN_EID_EXTENSION:
+			if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA &&
+			    elen >= (sizeof(*elems->mu_edca_param_set) + 1)) {
+				elems->mu_edca_param_set = (void *)&pos[1];
+			} else if (pos[0] == WLAN_EID_EXT_HE_CAPABILITY) {
+				elems->he_cap = (void *)&pos[1];
+				elems->he_cap_len = elen - 1;
+			} else if (pos[0] == WLAN_EID_EXT_HE_OPERATION &&
+				   elen >= sizeof(*elems->he_operation) &&
+				   elen >= ieee80211_he_oper_size(&pos[1])) {
+				elems->he_operation = (void *)&pos[1];
+			} else if (pos[0] == WLAN_EID_EXT_UORA && elen >= 1) {
+				elems->uora_element = (void *)&pos[1];
+			}
+			break;
 		default:
 			break;
 		}
@@ -1356,6 +1371,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 					 size_t *offset, u32 flags)
 {
 	struct ieee80211_supported_band *sband;
+	const struct ieee80211_sta_he_cap *he_cap;
 	u8 *pos = buffer, *end = buffer + buffer_len;
 	size_t noffset;
 	int supp_rates_len, i;
@@ -1463,11 +1479,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 						sband->ht_cap.cap);
 	}
 
-	/*
-	 * If adding more here, adjust code in main.c
-	 * that calculates local->scan_ies_len.
-	 */
-
 	/* insert custom IEs that go before VHT */
 	if (ie && ie_len) {
 		static const u8 before_vht[] = {
@@ -1510,6 +1521,39 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 						 sband->vht_cap.cap);
 	}
 
+	/* insert custom IEs that go before HE */
+	if (ie && ie_len) {
+		static const u8 before_he[] = {
+			/*
+			 * no need to list the ones split off before VHT
+			 * or generated here
+			 */
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_REQ_PARAMS,
+			WLAN_EID_AP_CSN,
+			/* TODO: add 11ah/11aj/11ak elements */
+		};
+		noffset = ieee80211_ie_split(ie, ie_len,
+					     before_he, ARRAY_SIZE(before_he),
+					     *offset);
+		if (end - pos < noffset - *offset)
+			goto out_err;
+		memcpy(pos, ie + *offset, noffset - *offset);
+		pos += noffset - *offset;
+		*offset = noffset;
+	}
+
+	he_cap = ieee80211_get_he_sta_cap(sband);
+	if (he_cap) {
+		pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
+		if (!pos)
+			goto out_err;
+	}
+
+	/*
+	 * If adding more here, adjust code in main.c
+	 * that calculates local->scan_ies_len.
+	 */
+
 	return pos - buffer;
  out_err:
 	WARN_ONCE(1, "not enough space for preq IEs\n");
@@ -2396,6 +2440,72 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 	return pos;
 }
 
+u8 *ieee80211_ie_build_he_cap(u8 *pos,
+			      const struct ieee80211_sta_he_cap *he_cap,
+			      u8 *end)
+{
+	u8 n;
+	u8 ie_len;
+	u8 *orig_pos = pos;
+
+	/* Make sure we have place for the IE */
+	/*
+	 * TODO: the 1 added is because this temporarily is under the EXTENSION
+	 * IE. Get rid of it when it moves.
+	 */
+	if (!he_cap)
+		return orig_pos;
+
+	n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem);
+	ie_len = 2 + 1 +
+		 sizeof(he_cap->he_cap_elem) + n +
+		 ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+				       he_cap->he_cap_elem.phy_cap_info);
+
+	if ((end - pos) < ie_len)
+		return orig_pos;
+
+	*pos++ = WLAN_EID_EXTENSION;
+	pos++; /* We'll set the size later below */
+	*pos++ = WLAN_EID_EXT_HE_CAPABILITY;
+
+	/* Fixed data */
+	memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem));
+	pos += sizeof(he_cap->he_cap_elem);
+
+	memcpy(pos, &he_cap->he_mcs_nss_supp, n);
+	pos += n;
+
+	/* Check if PPE Threshold should be present */
+	if ((he_cap->he_cap_elem.phy_cap_info[6] &
+	     IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0)
+		goto end;
+
+	/*
+	 * Calculate how many PPET16/PPET8 pairs are to come. Algorithm:
+	 * (NSS_M1 + 1) x (num of 1 bits in RU_INDEX_BITMASK)
+	 */
+	n = hweight8(he_cap->ppe_thres[0] &
+		     IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK);
+	n *= (1 + ((he_cap->ppe_thres[0] & IEEE80211_PPE_THRES_NSS_MASK) >>
+		   IEEE80211_PPE_THRES_NSS_POS));
+
+	/*
+	 * Each pair is 6 bits, and we need to add the 7 "header" bits to the
+	 * total size.
+	 */
+	n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7;
+	n = DIV_ROUND_UP(n, 8);
+
+	/* Copy PPE Thresholds */
+	memcpy(pos, &he_cap->ppe_thres, n);
+	pos += n;
+
+end:
+	orig_pos[1] = (pos - orig_pos) - 2;
+	return pos;
+}
+
 u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 			       const struct cfg80211_chan_def *chandef,
 			       u16 prot_mode, bool rifs_mode)
-- 
cgit v1.2.3


From 0e622e80191e75c99b6ecc265c140a37d81e7a63 Mon Sep 17 00:00:00 2001
From: Sricharan R <sricharan@codeaurora.org>
Date: Mon, 4 Jun 2018 13:30:35 -0700
Subject: remoteproc: qcom: mdt_loader: Make the firmware authentication
 optional

qcom_mdt_load function loads the mdt type firmware and
initialises the secure memory as well. Make the initialisation only
when requested by the caller, so that the function can be used
by self-authenticating remoteproc as well.

Acked-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Sricharan R <sricharan@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/soc/qcom/mdt_loader.c       | 87 ++++++++++++++++++++++++++-----------
 include/linux/soc/qcom/mdt_loader.h |  4 ++
 2 files changed, 66 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c
index dc09d7ac905f..1c488024c698 100644
--- a/drivers/soc/qcom/mdt_loader.c
+++ b/drivers/soc/qcom/mdt_loader.c
@@ -74,23 +74,10 @@ ssize_t qcom_mdt_get_size(const struct firmware *fw)
 }
 EXPORT_SYMBOL_GPL(qcom_mdt_get_size);
 
-/**
- * qcom_mdt_load() - load the firmware which header is loaded as fw
- * @dev:	device handle to associate resources with
- * @fw:		firmware object for the mdt file
- * @firmware:	name of the firmware, for construction of segment file names
- * @pas_id:	PAS identifier
- * @mem_region:	allocated memory region to load firmware into
- * @mem_phys:	physical address of allocated memory region
- * @mem_size:	size of the allocated memory region
- * @reloc_base:	adjusted physical address after relocation
- *
- * Returns 0 on success, negative errno otherwise.
- */
-int qcom_mdt_load(struct device *dev, const struct firmware *fw,
-		  const char *firmware, int pas_id, void *mem_region,
-		  phys_addr_t mem_phys, size_t mem_size,
-		  phys_addr_t *reloc_base)
+static int __qcom_mdt_load(struct device *dev, const struct firmware *fw,
+			   const char *firmware, int pas_id, void *mem_region,
+			   phys_addr_t mem_phys, size_t mem_size,
+			   phys_addr_t *reloc_base, bool pas_init)
 {
 	const struct elf32_phdr *phdrs;
 	const struct elf32_phdr *phdr;
@@ -121,10 +108,12 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 	if (!fw_name)
 		return -ENOMEM;
 
-	ret = qcom_scm_pas_init_image(pas_id, fw->data, fw->size);
-	if (ret) {
-		dev_err(dev, "invalid firmware metadata\n");
-		goto out;
+	if (pas_init) {
+		ret = qcom_scm_pas_init_image(pas_id, fw->data, fw->size);
+		if (ret) {
+			dev_err(dev, "invalid firmware metadata\n");
+			goto out;
+		}
 	}
 
 	for (i = 0; i < ehdr->e_phnum; i++) {
@@ -144,10 +133,13 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 	}
 
 	if (relocate) {
-		ret = qcom_scm_pas_mem_setup(pas_id, mem_phys, max_addr - min_addr);
-		if (ret) {
-			dev_err(dev, "unable to setup relocation\n");
-			goto out;
+		if (pas_init) {
+			ret = qcom_scm_pas_mem_setup(pas_id, mem_phys,
+						     max_addr - min_addr);
+			if (ret) {
+				dev_err(dev, "unable to setup relocation\n");
+				goto out;
+			}
 		}
 
 		/*
@@ -202,7 +194,52 @@ out:
 
 	return ret;
 }
+
+/**
+ * qcom_mdt_load() - load the firmware which header is loaded as fw
+ * @dev:	device handle to associate resources with
+ * @fw:		firmware object for the mdt file
+ * @firmware:	name of the firmware, for construction of segment file names
+ * @pas_id:	PAS identifier
+ * @mem_region:	allocated memory region to load firmware into
+ * @mem_phys:	physical address of allocated memory region
+ * @mem_size:	size of the allocated memory region
+ * @reloc_base:	adjusted physical address after relocation
+ *
+ * Returns 0 on success, negative errno otherwise.
+ */
+int qcom_mdt_load(struct device *dev, const struct firmware *fw,
+		  const char *firmware, int pas_id, void *mem_region,
+		  phys_addr_t mem_phys, size_t mem_size,
+		  phys_addr_t *reloc_base)
+{
+	return __qcom_mdt_load(dev, fw, firmware, pas_id, mem_region, mem_phys,
+			       mem_size, reloc_base, true);
+}
 EXPORT_SYMBOL_GPL(qcom_mdt_load);
 
+/**
+ * qcom_mdt_load_no_init() - load the firmware which header is loaded as fw
+ * @dev:	device handle to associate resources with
+ * @fw:		firmware object for the mdt file
+ * @firmware:	name of the firmware, for construction of segment file names
+ * @pas_id:	PAS identifier
+ * @mem_region:	allocated memory region to load firmware into
+ * @mem_phys:	physical address of allocated memory region
+ * @mem_size:	size of the allocated memory region
+ * @reloc_base:	adjusted physical address after relocation
+ *
+ * Returns 0 on success, negative errno otherwise.
+ */
+int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw,
+			  const char *firmware, int pas_id,
+			  void *mem_region, phys_addr_t mem_phys,
+			  size_t mem_size, phys_addr_t *reloc_base)
+{
+	return __qcom_mdt_load(dev, fw, firmware, pas_id, mem_region, mem_phys,
+			       mem_size, reloc_base, false);
+}
+EXPORT_SYMBOL_GPL(qcom_mdt_load_no_init);
+
 MODULE_DESCRIPTION("Firmware parser for Qualcomm MDT format");
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h
index 5b98bbdabc25..944b06aefb0f 100644
--- a/include/linux/soc/qcom/mdt_loader.h
+++ b/include/linux/soc/qcom/mdt_loader.h
@@ -17,4 +17,8 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 		  phys_addr_t mem_phys, size_t mem_size,
 		  phys_addr_t *reloc_base);
 
+int qcom_mdt_load_no_init(struct device *dev, const struct firmware *fw,
+			  const char *fw_name, int pas_id, void *mem_region,
+			  phys_addr_t mem_phys, size_t mem_size,
+			  phys_addr_t *reloc_base);
 #endif
-- 
cgit v1.2.3


From d904ac0320d3c4ff4e9d80e4294ca5dde803696f Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 5 Jun 2018 11:45:07 -0400
Subject: audit: rename FILTER_TYPE to FILTER_EXCLUDE

The AUDIT_FILTER_TYPE name is vague and misleading due to not describing
where or when the filter is applied and obsolete due to its available
filter fields having been expanded.

Userspace has already renamed it from AUDIT_FILTER_TYPE to
AUDIT_FILTER_EXCLUDE without checking if it already exists.  The
userspace maintainer assures that as long as it is set to the same value
it will not be a problem since the userspace code does not treat
compiler warnings as errors.  If this policy changes then checks if it
already exists can be added at the same time.

See: https://github.com/linux-audit/audit-kernel/issues/89

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/uapi/linux/audit.h |  3 ++-
 kernel/audit.c             |  2 +-
 kernel/auditfilter.c       | 10 +++++-----
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index c35aee9ad4a6..4e3eaba84175 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -157,7 +157,8 @@
 #define AUDIT_FILTER_ENTRY	0x02	/* Apply rule at syscall entry */
 #define AUDIT_FILTER_WATCH	0x03	/* Apply rule to file system watches */
 #define AUDIT_FILTER_EXIT	0x04	/* Apply rule at syscall exit */
-#define AUDIT_FILTER_TYPE	0x05	/* Apply rule at audit_log_start */
+#define AUDIT_FILTER_EXCLUDE	0x05	/* Apply rule before record creation */
+#define AUDIT_FILTER_TYPE	AUDIT_FILTER_EXCLUDE /* obsolete misleading naming */
 #define AUDIT_FILTER_FS		0x06	/* Apply rule at __audit_inode_child */
 
 #define AUDIT_NR_FILTERS	7
diff --git a/kernel/audit.c b/kernel/audit.c
index e7478cb58079..5c0a1d7b0c7b 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1754,7 +1754,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
 	if (audit_initialized != AUDIT_INITIALIZED)
 		return NULL;
 
-	if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
+	if (unlikely(!audit_filter(type, AUDIT_FILTER_EXCLUDE)))
 		return NULL;
 
 	/* NOTE: don't ever fail/sleep on these two conditions:
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 6db9847ca031..bf309f2592c4 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -264,7 +264,7 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule_data *
 	case AUDIT_FILTER_TASK:
 #endif
 	case AUDIT_FILTER_USER:
-	case AUDIT_FILTER_TYPE:
+	case AUDIT_FILTER_EXCLUDE:
 	case AUDIT_FILTER_FS:
 		;
 	}
@@ -337,7 +337,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
 {
 	switch(f->type) {
 	case AUDIT_MSGTYPE:
-		if (entry->rule.listnr != AUDIT_FILTER_TYPE &&
+		if (entry->rule.listnr != AUDIT_FILTER_EXCLUDE &&
 		    entry->rule.listnr != AUDIT_FILTER_USER)
 			return -EINVAL;
 		break;
@@ -929,7 +929,7 @@ static inline int audit_add_rule(struct audit_entry *entry)
 	/* If any of these, don't count towards total */
 	switch(entry->rule.listnr) {
 	case AUDIT_FILTER_USER:
-	case AUDIT_FILTER_TYPE:
+	case AUDIT_FILTER_EXCLUDE:
 	case AUDIT_FILTER_FS:
 		dont_count = 1;
 	}
@@ -1011,7 +1011,7 @@ int audit_del_rule(struct audit_entry *entry)
 	/* If any of these, don't count towards total */
 	switch(entry->rule.listnr) {
 	case AUDIT_FILTER_USER:
-	case AUDIT_FILTER_TYPE:
+	case AUDIT_FILTER_EXCLUDE:
 	case AUDIT_FILTER_FS:
 		dont_count = 1;
 	}
@@ -1372,7 +1372,7 @@ int audit_filter(int msgtype, unsigned int listtype)
 				break;
 		}
 		if (result > 0) {
-			if (e->rule.action == AUDIT_NEVER || listtype == AUDIT_FILTER_TYPE)
+			if (e->rule.action == AUDIT_NEVER || listtype == AUDIT_FILTER_EXCLUDE)
 				ret = 0;
 			break;
 		}
-- 
cgit v1.2.3


From f7859590d97614815b35a755c8213dfb8f2766bd Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 5 Jun 2018 19:20:39 -0400
Subject: audit: eliminate audit_enabled magic number comparison

Remove comparison of audit_enabled to magic numbers outside of audit.

Related: https://github.com/linux-audit/audit-kernel/issues/86

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 drivers/tty/tty_audit.c      | 2 +-
 include/linux/audit.h        | 5 ++++-
 include/net/xfrm.h           | 2 +-
 kernel/audit.c               | 3 ---
 net/netfilter/xt_AUDIT.c     | 2 +-
 net/netlabel/netlabel_user.c | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index e30aa6bf9ff9..50f567b6a66e 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -92,7 +92,7 @@ static void tty_audit_buf_push(struct tty_audit_buf *buf)
 {
 	if (buf->valid == 0)
 		return;
-	if (audit_enabled == 0) {
+	if (audit_enabled == AUDIT_OFF) {
 		buf->valid = 0;
 		return;
 	}
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 69c78477590b..9334fbef7bae 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -117,6 +117,9 @@ struct filename;
 
 extern void audit_log_session_info(struct audit_buffer *ab);
 
+#define AUDIT_OFF	0
+#define AUDIT_ON	1
+#define AUDIT_LOCKED	2
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
 				/* Public API */
@@ -202,7 +205,7 @@ static inline int audit_log_task_context(struct audit_buffer *ab)
 static inline void audit_log_task_info(struct audit_buffer *ab,
 				       struct task_struct *tsk)
 { }
-#define audit_enabled 0
+#define audit_enabled AUDIT_OFF
 #endif /* CONFIG_AUDIT */
 
 #ifdef CONFIG_AUDIT_COMPAT_GENERIC
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 557122846e0e..f7f297727ed8 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -735,7 +735,7 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op)
 {
 	struct audit_buffer *audit_buf = NULL;
 
-	if (audit_enabled == 0)
+	if (audit_enabled == AUDIT_OFF)
 		return NULL;
 	audit_buf = audit_log_start(audit_context(), GFP_ATOMIC,
 				    AUDIT_MAC_IPSEC_EVENT);
diff --git a/kernel/audit.c b/kernel/audit.c
index 5c0a1d7b0c7b..0f3222e4edde 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -83,9 +83,6 @@
 #define AUDIT_INITIALIZED	1
 static int	audit_initialized;
 
-#define AUDIT_OFF	0
-#define AUDIT_ON	1
-#define AUDIT_LOCKED	2
 u32		audit_enabled = AUDIT_OFF;
 bool		audit_ever_enabled = !!AUDIT_OFF;
 
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index f368ee6741db..af883f1b64f9 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -72,7 +72,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct audit_buffer *ab;
 	int fam = -1;
 
-	if (audit_enabled == 0)
+	if (audit_enabled == AUDIT_OFF)
 		goto errout;
 	ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
 	if (ab == NULL)
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 2f328af91a52..4676f5bb16ae 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -101,7 +101,7 @@ struct audit_buffer *netlbl_audit_start_common(int type,
 	char *secctx;
 	u32 secctx_len;
 
-	if (audit_enabled == 0)
+	if (audit_enabled == AUDIT_OFF)
 		return NULL;
 
 	audit_buf = audit_log_start(audit_context(), GFP_ATOMIC, type);
-- 
cgit v1.2.3


From cbaa7f0bdbee1969bb311c641abbd0d2af6ba861 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 18 Jun 2018 13:29:37 -0500
Subject: ASoC: Intel: move SKL+ codec ACPI tables to common directory

No functionality change, just move to common tables to make it easier
to deal with SOF and share the same machine drivers - as done
previously for BYT/CHT/HSW/BDW.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi-intel-match.h              |   5 +
 sound/soc/intel/common/Makefile                   |   6 +-
 sound/soc/intel/common/soc-acpi-intel-bxt-match.c |  35 +++++
 sound/soc/intel/common/soc-acpi-intel-cnl-match.c |  29 ++++
 sound/soc/intel/common/soc-acpi-intel-glk-match.c |  23 +++
 sound/soc/intel/common/soc-acpi-intel-kbl-match.c |  91 ++++++++++++
 sound/soc/intel/common/soc-acpi-intel-skl-match.c |  47 +++++++
 sound/soc/intel/skylake/skl.c                     | 162 +---------------------
 8 files changed, 241 insertions(+), 157 deletions(-)
 create mode 100644 sound/soc/intel/common/soc-acpi-intel-bxt-match.c
 create mode 100644 sound/soc/intel/common/soc-acpi-intel-cnl-match.c
 create mode 100644 sound/soc/intel/common/soc-acpi-intel-glk-match.c
 create mode 100644 sound/soc/intel/common/soc-acpi-intel-kbl-match.c
 create mode 100644 sound/soc/intel/common/soc-acpi-intel-skl-match.c

(limited to 'include')

diff --git a/include/sound/soc-acpi-intel-match.h b/include/sound/soc-acpi-intel-match.h
index 9da6388c20a1..917ddd0f2762 100644
--- a/include/sound/soc-acpi-intel-match.h
+++ b/include/sound/soc-acpi-intel-match.h
@@ -29,5 +29,10 @@ extern struct snd_soc_acpi_mach snd_soc_acpi_intel_broadwell_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_baytrail_legacy_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_baytrail_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_cherrytrail_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_skl_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_kbl_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_bxt_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_glk_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_cnl_machines[];
 
 #endif
diff --git a/sound/soc/intel/common/Makefile b/sound/soc/intel/common/Makefile
index 7379d8830c39..915a34cdc8ac 100644
--- a/sound/soc/intel/common/Makefile
+++ b/sound/soc/intel/common/Makefile
@@ -3,7 +3,11 @@ snd-soc-sst-dsp-objs := sst-dsp.o
 snd-soc-sst-acpi-objs := sst-acpi.o
 snd-soc-sst-ipc-objs := sst-ipc.o
 snd-soc-sst-firmware-objs := sst-firmware.o
-snd-soc-acpi-intel-match-objs := soc-acpi-intel-byt-match.o soc-acpi-intel-cht-match.o soc-acpi-intel-hsw-bdw-match.o
+snd-soc-acpi-intel-match-objs := soc-acpi-intel-byt-match.o soc-acpi-intel-cht-match.o \
+	soc-acpi-intel-hsw-bdw-match.o \
+	soc-acpi-intel-skl-match.o soc-acpi-intel-kbl-match.o \
+	soc-acpi-intel-bxt-match.o soc-acpi-intel-glk-match.o \
+	soc-acpi-intel-cnl-match.o
 
 obj-$(CONFIG_SND_SOC_INTEL_SST) += snd-soc-sst-dsp.o snd-soc-sst-ipc.o
 obj-$(CONFIG_SND_SOC_INTEL_SST_ACPI) += snd-soc-sst-acpi.o
diff --git a/sound/soc/intel/common/soc-acpi-intel-bxt-match.c b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c
new file mode 100644
index 000000000000..569f1de97e82
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-bxt-match.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * soc-apci-intel-bxt-match.c - tables and support for BXT ACPI enumeration.
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ *
+ */
+
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+
+static struct snd_soc_acpi_codecs bxt_codecs = {
+	.num_codecs = 1,
+	.codecs = {"MX98357A"}
+};
+
+struct snd_soc_acpi_mach snd_soc_acpi_intel_bxt_machines[] = {
+	{
+		.id = "INT343A",
+		.drv_name = "bxt_alc298s_i2s",
+		.fw_filename = "intel/dsp_fw_bxtn.bin",
+	},
+	{
+		.id = "DLGS7219",
+		.drv_name = "bxt_da7219_max98357a",
+		.fw_filename = "intel/dsp_fw_bxtn.bin",
+		.machine_quirk = snd_soc_acpi_codec_list,
+		.quirk_data = &bxt_codecs,
+	},
+	{},
+};
+EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_bxt_machines);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
diff --git a/sound/soc/intel/common/soc-acpi-intel-cnl-match.c b/sound/soc/intel/common/soc-acpi-intel-cnl-match.c
new file mode 100644
index 000000000000..b83ee053d417
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-cnl-match.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * soc-apci-intel-cnl-match.c - tables and support for CNL ACPI enumeration.
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ *
+ */
+
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include "../skylake/skl.h"
+
+static struct skl_machine_pdata cnl_pdata = {
+	.use_tplg_pcm = true,
+};
+
+struct snd_soc_acpi_mach snd_soc_acpi_intel_cnl_machines[] = {
+	{
+		.id = "INT34C2",
+		.drv_name = "cnl_rt274",
+		.fw_filename = "intel/dsp_fw_cnl.bin",
+		.pdata = &cnl_pdata,
+	},
+	{},
+};
+EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_cnl_machines);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
diff --git a/sound/soc/intel/common/soc-acpi-intel-glk-match.c b/sound/soc/intel/common/soc-acpi-intel-glk-match.c
new file mode 100644
index 000000000000..dee09439e7cb
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-glk-match.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * soc-apci-intel-glk-match.c - tables and support for GLK ACPI enumeration.
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ *
+ */
+
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+
+struct snd_soc_acpi_mach snd_soc_acpi_intel_glk_machines[] = {
+	{
+		.id = "INT343A",
+		.drv_name = "glk_alc298s_i2s",
+		.fw_filename = "intel/dsp_fw_glk.bin",
+	},
+	{},
+};
+EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_glk_machines);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
diff --git a/sound/soc/intel/common/soc-acpi-intel-kbl-match.c b/sound/soc/intel/common/soc-acpi-intel-kbl-match.c
new file mode 100644
index 000000000000..0ee173ca437d
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-kbl-match.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * soc-apci-intel-kbl-match.c - tables and support for KBL ACPI enumeration.
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ *
+ */
+
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include "../skylake/skl.h"
+
+static struct skl_machine_pdata skl_dmic_data;
+
+static struct snd_soc_acpi_codecs kbl_codecs = {
+	.num_codecs = 1,
+	.codecs = {"10508825"}
+};
+
+static struct snd_soc_acpi_codecs kbl_poppy_codecs = {
+	.num_codecs = 1,
+	.codecs = {"10EC5663"}
+};
+
+static struct snd_soc_acpi_codecs kbl_5663_5514_codecs = {
+	.num_codecs = 2,
+	.codecs = {"10EC5663", "10EC5514"}
+};
+
+static struct snd_soc_acpi_codecs kbl_7219_98357_codecs = {
+	.num_codecs = 1,
+	.codecs = {"MX98357A"}
+};
+
+struct snd_soc_acpi_mach snd_soc_acpi_intel_kbl_machines[] = {
+	{
+		.id = "INT343A",
+		.drv_name = "kbl_alc286s_i2s",
+		.fw_filename = "intel/dsp_fw_kbl.bin",
+	},
+	{
+		.id = "INT343B",
+		.drv_name = "kbl_n88l25_s4567",
+		.fw_filename = "intel/dsp_fw_kbl.bin",
+		.machine_quirk = snd_soc_acpi_codec_list,
+		.quirk_data = &kbl_codecs,
+		.pdata = &skl_dmic_data,
+	},
+	{
+		.id = "MX98357A",
+		.drv_name = "kbl_n88l25_m98357a",
+		.fw_filename = "intel/dsp_fw_kbl.bin",
+		.machine_quirk = snd_soc_acpi_codec_list,
+		.quirk_data = &kbl_codecs,
+		.pdata = &skl_dmic_data,
+	},
+	{
+		.id = "MX98927",
+		.drv_name = "kbl_r5514_5663_max",
+		.fw_filename = "intel/dsp_fw_kbl.bin",
+		.machine_quirk = snd_soc_acpi_codec_list,
+		.quirk_data = &kbl_5663_5514_codecs,
+		.pdata = &skl_dmic_data,
+	},
+	{
+		.id = "MX98927",
+		.drv_name = "kbl_rt5663_m98927",
+		.fw_filename = "intel/dsp_fw_kbl.bin",
+		.machine_quirk = snd_soc_acpi_codec_list,
+		.quirk_data = &kbl_poppy_codecs,
+		.pdata = &skl_dmic_data,
+	},
+	{
+		.id = "10EC5663",
+		.drv_name = "kbl_rt5663",
+		.fw_filename = "intel/dsp_fw_kbl.bin",
+	},
+	{
+		.id = "DLGS7219",
+		.drv_name = "kbl_da7219_max98357a",
+		.fw_filename = "intel/dsp_fw_kbl.bin",
+		.machine_quirk = snd_soc_acpi_codec_list,
+		.quirk_data = &kbl_7219_98357_codecs,
+		.pdata = &skl_dmic_data,
+	},
+	{},
+};
+EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_kbl_machines);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
diff --git a/sound/soc/intel/common/soc-acpi-intel-skl-match.c b/sound/soc/intel/common/soc-acpi-intel-skl-match.c
new file mode 100644
index 000000000000..0c9c0edd35b3
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-skl-match.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * soc-apci-intel-skl-match.c - tables and support for SKL ACPI enumeration.
+ *
+ * Copyright (c) 2018, Intel Corporation.
+ *
+ */
+
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include "../skylake/skl.h"
+
+static struct skl_machine_pdata skl_dmic_data;
+
+static struct snd_soc_acpi_codecs skl_codecs = {
+	.num_codecs = 1,
+	.codecs = {"10508825"}
+};
+
+struct snd_soc_acpi_mach snd_soc_acpi_intel_skl_machines[] = {
+	{
+		.id = "INT343A",
+		.drv_name = "skl_alc286s_i2s",
+		.fw_filename = "intel/dsp_fw_release.bin",
+	},
+	{
+		.id = "INT343B",
+		.drv_name = "skl_n88l25_s4567",
+		.fw_filename = "intel/dsp_fw_release.bin",
+		.machine_quirk = snd_soc_acpi_codec_list,
+		.quirk_data = &skl_codecs,
+		.pdata = &skl_dmic_data,
+	},
+	{
+		.id = "MX98357A",
+		.drv_name = "skl_n88l25_m98357a",
+		.fw_filename = "intel/dsp_fw_release.bin",
+		.machine_quirk = snd_soc_acpi_codec_list,
+		.quirk_data = &skl_codecs,
+		.pdata = &skl_dmic_data,
+	},
+	{},
+};
+EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_skl_machines);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 6dec748e8949..670ff9aaca55 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -29,6 +29,7 @@
 #include <linux/delay.h>
 #include <sound/pcm.h>
 #include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
 #include <sound/hda_register.h>
 #include <sound/hdaudio.h>
 #include <sound/hda_i915.h>
@@ -36,8 +37,6 @@
 #include "skl-sst-dsp.h"
 #include "skl-sst-ipc.h"
 
-static struct skl_machine_pdata skl_dmic_data;
-
 /*
  * initialize the PCI registers
  */
@@ -1026,172 +1025,23 @@ static void skl_remove(struct pci_dev *pci)
 	dev_set_drvdata(&pci->dev, NULL);
 }
 
-static struct snd_soc_acpi_codecs skl_codecs = {
-	.num_codecs = 1,
-	.codecs = {"10508825"}
-};
-
-static struct snd_soc_acpi_codecs kbl_codecs = {
-	.num_codecs = 1,
-	.codecs = {"10508825"}
-};
-
-static struct snd_soc_acpi_codecs bxt_codecs = {
-	.num_codecs = 1,
-	.codecs = {"MX98357A"}
-};
-
-static struct snd_soc_acpi_codecs kbl_poppy_codecs = {
-	.num_codecs = 1,
-	.codecs = {"10EC5663"}
-};
-
-static struct snd_soc_acpi_codecs kbl_5663_5514_codecs = {
-	.num_codecs = 2,
-	.codecs = {"10EC5663", "10EC5514"}
-};
-
-static struct snd_soc_acpi_codecs kbl_7219_98357_codecs = {
-	.num_codecs = 1,
-	.codecs = {"MX98357A"}
-};
-
-static struct skl_machine_pdata cnl_pdata = {
-	.use_tplg_pcm = true,
-};
-
-static struct snd_soc_acpi_mach sst_skl_devdata[] = {
-	{
-		.id = "INT343A",
-		.drv_name = "skl_alc286s_i2s",
-		.fw_filename = "intel/dsp_fw_release.bin",
-	},
-	{
-		.id = "INT343B",
-		.drv_name = "skl_n88l25_s4567",
-		.fw_filename = "intel/dsp_fw_release.bin",
-		.machine_quirk = snd_soc_acpi_codec_list,
-		.quirk_data = &skl_codecs,
-		.pdata = &skl_dmic_data
-	},
-	{
-		.id = "MX98357A",
-		.drv_name = "skl_n88l25_m98357a",
-		.fw_filename = "intel/dsp_fw_release.bin",
-		.machine_quirk = snd_soc_acpi_codec_list,
-		.quirk_data = &skl_codecs,
-		.pdata = &skl_dmic_data
-	},
-	{}
-};
-
-static struct snd_soc_acpi_mach sst_bxtp_devdata[] = {
-	{
-		.id = "INT343A",
-		.drv_name = "bxt_alc298s_i2s",
-		.fw_filename = "intel/dsp_fw_bxtn.bin",
-	},
-	{
-		.id = "DLGS7219",
-		.drv_name = "bxt_da7219_max98357a",
-		.fw_filename = "intel/dsp_fw_bxtn.bin",
-		.machine_quirk = snd_soc_acpi_codec_list,
-		.quirk_data = &bxt_codecs,
-	},
-	{}
-};
-
-static struct snd_soc_acpi_mach sst_kbl_devdata[] = {
-	{
-		.id = "INT343A",
-		.drv_name = "kbl_alc286s_i2s",
-		.fw_filename = "intel/dsp_fw_kbl.bin",
-	},
-	{
-		.id = "INT343B",
-		.drv_name = "kbl_n88l25_s4567",
-		.fw_filename = "intel/dsp_fw_kbl.bin",
-		.machine_quirk = snd_soc_acpi_codec_list,
-		.quirk_data = &kbl_codecs,
-		.pdata = &skl_dmic_data
-	},
-	{
-		.id = "MX98357A",
-		.drv_name = "kbl_n88l25_m98357a",
-		.fw_filename = "intel/dsp_fw_kbl.bin",
-		.machine_quirk = snd_soc_acpi_codec_list,
-		.quirk_data = &kbl_codecs,
-		.pdata = &skl_dmic_data
-	},
-	{
-		.id = "MX98927",
-		.drv_name = "kbl_r5514_5663_max",
-		.fw_filename = "intel/dsp_fw_kbl.bin",
-		.machine_quirk = snd_soc_acpi_codec_list,
-		.quirk_data = &kbl_5663_5514_codecs,
-		.pdata = &skl_dmic_data
-	},
-	{
-		.id = "MX98927",
-		.drv_name = "kbl_rt5663_m98927",
-		.fw_filename = "intel/dsp_fw_kbl.bin",
-		.machine_quirk = snd_soc_acpi_codec_list,
-		.quirk_data = &kbl_poppy_codecs,
-		.pdata = &skl_dmic_data
-	},
-	{
-		.id = "10EC5663",
-		.drv_name = "kbl_rt5663",
-		.fw_filename = "intel/dsp_fw_kbl.bin",
-	},
-	{
-		.id = "DLGS7219",
-		.drv_name = "kbl_da7219_max98357a",
-		.fw_filename = "intel/dsp_fw_kbl.bin",
-		.machine_quirk = snd_soc_acpi_codec_list,
-		.quirk_data = &kbl_7219_98357_codecs,
-		.pdata = &skl_dmic_data
-	},
-
-	{}
-};
-
-static struct snd_soc_acpi_mach sst_glk_devdata[] = {
-	{
-		.id = "INT343A",
-		.drv_name = "glk_alc298s_i2s",
-		.fw_filename = "intel/dsp_fw_glk.bin",
-	},
-	{}
-};
-
-static const struct snd_soc_acpi_mach sst_cnl_devdata[] = {
-	{
-		.id = "INT34C2",
-		.drv_name = "cnl_rt274",
-		.fw_filename = "intel/dsp_fw_cnl.bin",
-		.pdata = &cnl_pdata,
-	},
-	{}
-};
-
 /* PCI IDs */
 static const struct pci_device_id skl_ids[] = {
 	/* Sunrise Point-LP */
 	{ PCI_DEVICE(0x8086, 0x9d70),
-		.driver_data = (unsigned long)&sst_skl_devdata},
+		.driver_data = (unsigned long)&snd_soc_acpi_intel_skl_machines},
 	/* BXT-P */
 	{ PCI_DEVICE(0x8086, 0x5a98),
-		.driver_data = (unsigned long)&sst_bxtp_devdata},
+		.driver_data = (unsigned long)&snd_soc_acpi_intel_bxt_machines},
 	/* KBL */
 	{ PCI_DEVICE(0x8086, 0x9D71),
-		.driver_data = (unsigned long)&sst_kbl_devdata},
+		.driver_data = (unsigned long)&snd_soc_acpi_intel_kbl_machines},
 	/* GLK */
 	{ PCI_DEVICE(0x8086, 0x3198),
-		.driver_data = (unsigned long)&sst_glk_devdata},
+		.driver_data = (unsigned long)&snd_soc_acpi_intel_glk_machines},
 	/* CNL */
 	{ PCI_DEVICE(0x8086, 0x9dc8),
-		.driver_data = (unsigned long)&sst_cnl_devdata},
+		.driver_data = (unsigned long)&snd_soc_acpi_intel_cnl_machines},
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, skl_ids);
-- 
cgit v1.2.3


From 6362f0a290023bafd7f991089e81dd9278f154b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 19 Jun 2018 10:12:48 -0600
Subject: libata: add command iterator helpers

Now that we have the internal tag as a special (higher) value tag,
it gets a bit tricky to iterate the internal commands as some loops
will exceed ATA_MAX_QUEUE. Add explicit helpers for iterating pending
commands, both inflight and internal.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8b8946dd63b9..a2257e380789 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1495,6 +1495,29 @@ static inline bool ata_tag_valid(unsigned int tag)
 	return tag < ATA_MAX_QUEUE || ata_tag_internal(tag);
 }
 
+#define __ata_qc_for_each(ap, qc, tag, max_tag, fn)		\
+	for ((tag) = 0; (tag) < (max_tag) &&			\
+	     ({ qc = fn((ap), (tag)); 1; }); (tag)++)		\
+
+/*
+ * Internal use only, iterate commands ignoring error handling and
+ * status of 'qc'.
+ */
+#define ata_qc_for_each_raw(ap, qc, tag)					\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, __ata_qc_from_tag)
+
+/*
+ * Iterate all potential commands that can be queued
+ */
+#define ata_qc_for_each(ap, qc, tag)					\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, ata_qc_from_tag)
+
+/*
+ * Like ata_qc_for_each, but with the internal tag included
+ */
+#define ata_qc_for_each_with_internal(ap, qc, tag)			\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE + 1, ata_qc_from_tag)
+
 /*
  * device helpers
  */
-- 
cgit v1.2.3


From 38b7ca927d6a12bf4466be22a90b7d998f5ae69a Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 8 Mar 2018 14:36:27 +0200
Subject: net/mlx5: Expose DEVX specification

This patch updates the mlx5_ifc structures and
command interface to support DEVX.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |  4 ++
 include/linux/mlx5/device.h                   |  3 ++
 include/linux/mlx5/mlx5_ifc.h                 | 67 ++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index b99d6df3905b..d07f24de8fa3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -310,6 +310,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
 	case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_FPGA_DESTROY_QP:
+	case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -427,6 +428,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_FPGA_MODIFY_QP:
 	case MLX5_CMD_OP_FPGA_QUERY_QP:
 	case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
+	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -599,6 +601,8 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP);
 	MLX5_COMMAND_STR_CASE(FPGA_QUERY_QP_COUNTERS);
 	MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
+	MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 02f72ebf31a7..f8671c0a43aa 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1071,6 +1071,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_GEN(mdev, cap) \
 	MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
 
+#define MLX5_CAP_GEN_64(mdev, cap) \
+	MLX5_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
 #define MLX5_CAP_GEN_MAX(mdev, cap) \
 	MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap)
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 27134c4fcb76..f810772e80c0 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -75,6 +75,15 @@ enum {
 	MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
 };
 
+enum {
+	MLX5_GENERAL_OBJ_TYPES_CAP_UCTX = (1ULL << 4),
+	MLX5_GENERAL_OBJ_TYPES_CAP_UMEM = (1ULL << 5),
+};
+
+enum {
+	MLX5_OBJ_TYPE_UCTX = 0x0004,
+};
+
 enum {
 	MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
 	MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
@@ -242,6 +251,8 @@ enum {
 	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
 	MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
 	MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
+	MLX5_CMD_OP_CREATE_GENERAL_OBJECT         = 0xa00,
+	MLX5_CMD_OP_DESTROY_GENERAL_OBJECT        = 0xa03,
 	MLX5_CMD_OP_MAX
 };
 
@@ -1113,7 +1124,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_3f8[0x3];
 	u8         log_max_current_uc_list[0x5];
 
-	u8         reserved_at_400[0x80];
+	u8         general_obj_types[0x40];
+
+	u8         reserved_at_440[0x40];
 
 	u8         reserved_at_480[0x3];
 	u8         log_max_l2_table[0x5];
@@ -9115,4 +9128,56 @@ struct mlx5_ifc_dealloc_memic_out_bits {
 	u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         obj_type[0x10];
+
+	u8         obj_id[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         obj_id[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_umem_bits {
+	u8         modify_field_select[0x40];
+
+	u8         reserved_at_40[0x5b];
+	u8         log_page_size[0x5];
+
+	u8         page_offset[0x20];
+
+	u8         num_of_mtt[0x40];
+
+	struct mlx5_ifc_mtt_bits  mtt[0];
+};
+
+struct mlx5_ifc_uctx_bits {
+	u8         modify_field_select[0x40];
+
+	u8         reserved_at_40[0x1c0];
+};
+
+struct mlx5_ifc_create_umem_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits   hdr;
+	struct mlx5_ifc_umem_bits                     umem;
+};
+
+struct mlx5_ifc_create_uctx_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits   hdr;
+	struct mlx5_ifc_uctx_bits                     uctx;
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From 8762d149e88dea5bc09e0d7faa84b635807167ab Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Sun, 17 Jun 2018 12:59:52 +0300
Subject: IB/uverbs: Add PTR_IN attributes that are allocated/copied
 automatically

Adding UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY flag to PTR_IN attributes.
By using this flag, the parse automatically allocates and copies the
user-space data. This data is accessible by using uverbs_attr_get_len
and uverbs_attr_get_alloced_ptr inline accessor functions from the
handler.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c | 25 ++++++++++++++++++++++-
 include/rdma/uverbs_ioctl.h            | 36 +++++++++++++++++++++++++++++++++-
 2 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 6759d59a4421..5ac2950978d2 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -114,9 +114,27 @@ static int uverbs_process_attr(struct ib_device *ibdev,
 		    uattr->attr_data.reserved)
 			return -EINVAL;
 
-		e->ptr_attr.data = uattr->data;
 		e->ptr_attr.len = uattr->len;
 		e->ptr_attr.flags = uattr->flags;
+
+		if (val_spec->flags & UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY &&
+		    !uverbs_attr_ptr_is_inline(e)) {
+			void *p;
+
+			p = kvmalloc(uattr->len, GFP_KERNEL);
+			if (!p)
+				return -ENOMEM;
+
+			e->ptr_attr.ptr = p;
+
+			if (copy_from_user(p, u64_to_user_ptr(uattr->data),
+					   uattr->len)) {
+				kvfree(p);
+				return -EFAULT;
+			}
+		} else {
+			e->ptr_attr.data = uattr->data;
+		}
 		break;
 
 	case UVERBS_ATTR_TYPE_IDR:
@@ -200,6 +218,11 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle,
 					spec->obj.access, commit);
 				if (!ret)
 					ret = current_ret;
+			} else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN &&
+				   spec->flags &
+					   UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY &&
+				   !uverbs_attr_ptr_is_inline(attr)) {
+				kvfree(attr->ptr_attr.ptr);
 			}
 		}
 	}
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index bd6bba3a6e04..11cc40ef1cb6 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -65,6 +65,10 @@ enum {
 	UVERBS_ATTR_SPEC_F_MANDATORY	= 1U << 0,
 	/* Support extending attributes by length, validate all unknown size == zero  */
 	UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1,
+	/*
+	 * Valid only for PTR_IN. Allocate and copy the data inside the parser
+	 */
+	UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY = 1U << 2,
 };
 
 /* Specification of a single attribute inside the ioctl message */
@@ -323,7 +327,14 @@ struct uverbs_object_tree_def {
  */
 
 struct uverbs_ptr_attr {
-	u64		data;
+	/*
+	 * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is
+	 * used.
+	 */
+	union {
+		void *ptr;
+		u64 data;
+	};
 	u16		len;
 	/* Combination of bits from enum UVERBS_ATTR_F_XXXX */
 	u16		flags;
@@ -431,6 +442,17 @@ static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_att
 	return attr->obj_attr.uobject;
 }
 
+static inline int
+uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	return attr->ptr_attr.len;
+}
+
 static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
 				 size_t idx, const void *from, size_t size)
 {
@@ -457,6 +479,18 @@ static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
 	return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
 }
 
+static inline void *uverbs_attr_get_alloced_ptr(
+	const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return (void *)attr;
+
+	return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data :
+						 attr->ptr_attr.ptr;
+}
+
 static inline int _uverbs_copy_from(void *to,
 				    const struct uverbs_attr_bundle *attrs_bundle,
 				    size_t idx,
-- 
cgit v1.2.3


From 2d9c1bd7e177bd8b460403db9513b0a223e46ab8 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Sun, 17 Jun 2018 12:59:53 +0300
Subject: IB/uverbs: Add a macro to define a type with no kernel known size

Sometimes the uverbs uAPI  doesn't really care about the structure it gets
from user-space. All it wants to do is to allocate enough space and send
it to the hardware/provider driver. Adding a UVERBS_ATTR_MIN_SIZE that
could be used for this scenarios. We use USHRT_MAX as the kernel known
size to bypass any zero validations.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_std_types.c | 4 ++--
 include/rdma/uverbs_ioctl.h                | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index b570acbd94af..0df0ac9c1de3 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -207,10 +207,10 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev,
  * spec.
  */
 const struct uverbs_attr_def uverbs_uhw_compat_in =
-	UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+	UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_MIN_SIZE(0),
 			      UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
 const struct uverbs_attr_def uverbs_uhw_compat_out =
-	UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+	UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_MIN_SIZE(0),
 			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
 
 void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 11cc40ef1cb6..970357d0ccc4 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -215,6 +215,8 @@ struct uverbs_object_tree_def {
 	.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type)
 #define UVERBS_ATTR_SIZE(_min_len, _len)			\
 	.min_len = _min_len, .len = _len
+#define UVERBS_ATTR_MIN_SIZE(_min_len)				\
+	UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
 
 /*
  * In new compiler, UVERBS_ATTR could be simplified by declaring it as
-- 
cgit v1.2.3


From e502a864c3526aa93b983d4b14e9615b3da430e6 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 12:59:58 +0300
Subject: IB/core: Introduce DECLARE_UVERBS_GLOBAL_METHODS

Introduce a new macro to be used for global methods on a singleton
object.

This macros sets internally the type_attrs to be NULL as such an object
can't be created.

Downstream patches from this series will use this macro.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/uverbs_named_ioctl.h | 4 ++++
 include/rdma/uverbs_std_types.h   | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index c5bb4ebdb0b0..228421f2a427 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -43,6 +43,7 @@
 #define _UVERBS_NAME(x, y)	_UVERBS_PASTE(x, y)
 #define UVERBS_METHOD(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
 #define UVERBS_HANDLER(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
+#define UVERBS_OBJECT(id)	_UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
 
 #define DECLARE_UVERBS_NAMED_METHOD(id, ...)	\
 	DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__)
@@ -56,6 +57,9 @@
 #define DECLARE_UVERBS_NAMED_OBJECT(id, ...)	\
 	DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__)
 
+#define DECLARE_UVERBS_GLOBAL_METHODS(_name, ...)	\
+	DECLARE_UVERBS_NAMED_OBJECT(_name, NULL, ##__VA_ARGS__)
+
 #define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z)
 
 #define UVERBS_NO_OVERRIDE	NULL
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 9d56cdb84655..4c151b67fb6d 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,8 +37,6 @@
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/ib_user_ioctl_verbs.h>
 
-#define UVERBS_OBJECT(id)	uverbs_object_##id
-
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
 #else
-- 
cgit v1.2.3


From 7dc08dcfc8c86cb4457e383734ff6844ddaff876 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 12:59:59 +0300
Subject: IB/core: Expose ib_ucontext from a given ib_uverbs_file

Drivers that use the IOCTL API may have the ib_uverbs_file and need a
way to get the related ib_ucontext from it, this is enabled by this
patch.

Downstream patches from this series will use it.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_main.c | 6 ++++++
 include/rdma/ib_verbs.h               | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 3ae2339dd27a..f5f4bfb59705 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -138,6 +138,12 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
 
+struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
+{
+	return ufile->ucontext;
+}
+EXPORT_SYMBOL(ib_uverbs_get_ucontext);
+
 int uverbs_dealloc_mw(struct ib_mw *mw)
 {
 	struct ib_pd *pd = mw->pd;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index dc5d262739e5..995d517c0a76 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4120,4 +4120,6 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
  */
 void rdma_roce_rescan_device(struct ib_device *ibdev);
 
+struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+
 #endif /* IB_VERBS_H */
-- 
cgit v1.2.3


From a8b92ca1b0e5ce620e425e9d2f89ce44f1a82a82 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 12:59:57 +0300
Subject: IB/mlx5: Introduce DEVX

Introduce DEVX to enable direct device commands in downstream patches
from this series.

In that mode of work the firmware manages the isolation between
processes' resources and as such a DEVX user id is created and assigned
to the given user context upon allocation request.

A capability check is done to make sure that this feature is really
supported by the firmware prior to creating the DEVX user id.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/Makefile  |  1 +
 drivers/infiniband/hw/mlx5/devx.c    | 58 ++++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/main.c    | 24 +++++++++++++--
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 13 ++++++++
 include/uapi/rdma/mlx5-abi.h         |  3 ++
 5 files changed, 96 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/hw/mlx5/devx.c

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index d42b922bede8..577e4c418bae 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
new file mode 100644
index 000000000000..775448910ad1
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_umem.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_ib.h"
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+	u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u64 general_obj_types;
+	void *uctx;
+	void *hdr;
+	int err;
+
+	uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
+	hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
+
+	general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types);
+	if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) ||
+	    !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
+		return -EINVAL;
+
+	if (!capable(CAP_NET_RAW))
+		return -EPERM;
+
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
+
+	err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return 0;
+}
+
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+			  struct mlx5_ib_ucontext *context)
+{
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid);
+
+	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e46cda740479..058a82a55ffe 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1650,8 +1650,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	if (err)
 		return ERR_PTR(err);
 
-	if (req.flags)
-		return ERR_PTR(-EINVAL);
+	if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
+		return ERR_PTR(-EOPNOTSUPP);
 
 	if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
 		return ERR_PTR(-EOPNOTSUPP);
@@ -1735,6 +1735,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 			goto out_uars;
 	}
 
+	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+		/* Block DEVX on Infiniband as of SELinux */
+		if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
+			err = -EPERM;
+			goto out_td;
+		}
+
+		err = mlx5_ib_devx_create(dev, context);
+		if (err)
+			goto out_td;
+	}
+
 	INIT_LIST_HEAD(&context->vma_private_list);
 	mutex_init(&context->vma_private_list_mutex);
 	INIT_LIST_HEAD(&context->db_page_list);
@@ -1795,7 +1807,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 
 	err = ib_copy_to_udata(udata, &resp, resp.response_length);
 	if (err)
-		goto out_td;
+		goto out_mdev;
 
 	bfregi->ver = ver;
 	bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
@@ -1805,6 +1817,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 
 	return &context->ibucontext;
 
+out_mdev:
+	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+		mlx5_ib_devx_destroy(dev, context);
 out_td:
 	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 		mlx5_ib_dealloc_transport_domain(dev, context->tdn);
@@ -1830,6 +1845,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 	struct mlx5_bfreg_info *bfregi;
 
+	if (context->devx_uid)
+		mlx5_ib_devx_destroy(dev, context);
+
 	bfregi = &context->bfregi;
 	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
 		mlx5_ib_dealloc_transport_domain(dev, context->tdn);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 615bd6e9db6c..1c857dd3c77f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -143,6 +143,7 @@ struct mlx5_ib_ucontext {
 
 	u64			lib_caps;
 	DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
+	u16			devx_uid;
 };
 
 static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -1215,6 +1216,18 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
 void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
 				  u8 port_num);
 
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+			struct mlx5_ib_ucontext *context);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+			  struct mlx5_ib_ucontext *context);
+#else
+static inline int
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+		    struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; };
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+					struct mlx5_ib_ucontext *context) {}
+#endif
 static inline void init_query_mad(struct ib_smp *mad)
 {
 	mad->base_version  = 1;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 8daec1fa49cf..5d591ff28139 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -76,6 +76,9 @@ enum mlx5_lib_caps {
 	MLX5_LIB_CAP_4K_UAR	= (__u64)1 << 0,
 };
 
+enum mlx5_ib_alloc_uctx_v2_flags {
+	MLX5_IB_ALLOC_UCTX_DEVX	= 1 << 0,
+};
 struct mlx5_ib_alloc_ucontext_req_v2 {
 	__u32	total_num_bfregs;
 	__u32	num_low_latency_bfregs;
-- 
cgit v1.2.3


From 8aa8c95ce4ccc10a72f6755ee889d9fb1ceb60a6 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 13:00:00 +0300
Subject: IB/mlx5: Add support for DEVX general command

Add support to run general firmware command via the DEVX interface.

A command that works on some object (e.g. CQ, WQ, etc.) will be added
in next patches while maintaining the required object lock.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c        | 87 ++++++++++++++++++++++++++++++++
 include/uapi/rdma/mlx5_user_ioctl_cmds.h | 13 +++++
 2 files changed, 100 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 775448910ad1..9fca6541a175 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -13,6 +13,14 @@
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
 
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
+{
+	return to_mucontext(ib_uverbs_get_ucontext(file));
+}
+
 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
 {
 	u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
@@ -56,3 +64,82 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
 
 	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
+
+static bool devx_is_general_cmd(void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_QUERY_HCA_CAP:
+	case MLX5_CMD_OP_QUERY_VPORT_STATE:
+	case MLX5_CMD_OP_QUERY_ADAPTER:
+	case MLX5_CMD_OP_QUERY_ISSI:
+	case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+	case MLX5_CMD_OP_QUERY_VNIC_ENV:
+	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+	case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+	case MLX5_CMD_OP_NOP:
+	case MLX5_CMD_OP_QUERY_CONG_STATUS:
+	case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+	case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev,
+				  struct ib_uverbs_file *file,
+				  struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
+	int cmd_out_len = uverbs_attr_get_len(attrs,
+					MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
+	void *cmd_out;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	/* Only white list of some general HCA commands are allowed for this method. */
+	if (!devx_is_general_cmd(cmd_in))
+		return -EINVAL;
+
+	cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+	if (!cmd_out)
+		return -ENOMEM;
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		goto other_cmd_free;
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, cmd_out_len);
+
+other_cmd_free:
+	kvfree(cmd_out);
+	return err;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
+	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
+			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
+);
+
+static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER));
+
+static DECLARE_UVERBS_OBJECT_TREE(devx_objects,
+	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX));
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index f7d685ef2d1f..0b456fa91bb4 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -45,4 +45,17 @@ enum mlx5_ib_alloc_dm_attrs {
 	MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
 };
 
+enum mlx5_ib_devx_methods {
+	MLX5_IB_METHOD_DEVX_OTHER  = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum  mlx5_ib_devx_other_attrs {
+	MLX5_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+};
+
+enum mlx5_ib_devx_objects {
+	MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
+};
+
 #endif
-- 
cgit v1.2.3


From 7efce3691d33e1f4263a7c64e8ff39b12922509b Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 13:00:01 +0300
Subject: IB/mlx5: Add obj create and destroy functionality

Add support to create and destroy firmware objects via the DEVX
interface.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c        | 337 ++++++++++++++++++++++++++++++-
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  16 ++
 2 files changed, 350 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 9fca6541a175..87116a3b7916 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -16,6 +16,14 @@
 #define UVERBS_MODULE_NAME mlx5_ib
 #include <rdma/uverbs_named_ioctl.h>
 
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+	struct mlx5_core_dev	*mdev;
+	u32			obj_id;
+	u32			dinlen; /* destroy inbox length */
+	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+};
+
 static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
 {
 	return to_mucontext(ib_uverbs_get_ucontext(file));
@@ -65,7 +73,52 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
 	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
-static bool devx_is_general_cmd(void *in)
+static bool devx_is_obj_create_cmd(const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+	case MLX5_CMD_OP_CREATE_MKEY:
+	case MLX5_CMD_OP_CREATE_CQ:
+	case MLX5_CMD_OP_ALLOC_PD:
+	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+	case MLX5_CMD_OP_CREATE_RMP:
+	case MLX5_CMD_OP_CREATE_SQ:
+	case MLX5_CMD_OP_CREATE_RQ:
+	case MLX5_CMD_OP_CREATE_RQT:
+	case MLX5_CMD_OP_CREATE_TIR:
+	case MLX5_CMD_OP_CREATE_TIS:
+	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_CREATE_QP:
+	case MLX5_CMD_OP_CREATE_SRQ:
+	case MLX5_CMD_OP_CREATE_XRC_SRQ:
+	case MLX5_CMD_OP_CREATE_DCT:
+	case MLX5_CMD_OP_CREATE_XRQ:
+	case MLX5_CMD_OP_ATTACH_TO_MCG:
+	case MLX5_CMD_OP_ALLOC_XRCD:
+		return true;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	{
+		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+		if (op_mod == 0)
+			return true;
+		return false;
+	}
+	default:
+		return false;
+	}
+}
+
+static bool devx_is_general_cmd(const void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
 
@@ -95,7 +148,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev,
 {
 	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
 	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
-	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
+	void *cmd_in = uverbs_attr_get_alloced_ptr(
+		attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
 	int cmd_out_len = uverbs_attr_get_len(attrs,
 					MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
 	void *cmd_out;
@@ -126,6 +180,256 @@ other_cmd_free:
 	return err;
 }
 
+static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
+				       u32 *dinlen,
+				       u32 *obj_id)
+{
+	u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
+	u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
+
+	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	*dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
+
+	MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
+	MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
+
+	switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
+	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+		MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
+		break;
+
+	case MLX5_CMD_OP_CREATE_MKEY:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+		break;
+	case MLX5_CMD_OP_CREATE_CQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+		break;
+	case MLX5_CMD_OP_ALLOC_PD:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+		break;
+	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+		break;
+	case MLX5_CMD_OP_CREATE_RMP:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+		break;
+	case MLX5_CMD_OP_CREATE_SQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+		break;
+	case MLX5_CMD_OP_CREATE_RQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+		break;
+	case MLX5_CMD_OP_CREATE_RQT:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+		break;
+	case MLX5_CMD_OP_CREATE_TIR:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+		break;
+	case MLX5_CMD_OP_CREATE_TIS:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+		break;
+	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+		break;
+	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+		*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
+		*obj_id = MLX5_GET(create_flow_table_out, out, table_id);
+		MLX5_SET(destroy_flow_table_in, din, other_vport,
+			 MLX5_GET(create_flow_table_in,  in, other_vport));
+		MLX5_SET(destroy_flow_table_in, din, vport_number,
+			 MLX5_GET(create_flow_table_in,  in, vport_number));
+		MLX5_SET(destroy_flow_table_in, din, table_type,
+			 MLX5_GET(create_flow_table_in,  in, table_type));
+		MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+		break;
+	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+		*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
+		*obj_id = MLX5_GET(create_flow_group_out, out, group_id);
+		MLX5_SET(destroy_flow_group_in, din, other_vport,
+			 MLX5_GET(create_flow_group_in, in, other_vport));
+		MLX5_SET(destroy_flow_group_in, din, vport_number,
+			 MLX5_GET(create_flow_group_in, in, vport_number));
+		MLX5_SET(destroy_flow_group_in, din, table_type,
+			 MLX5_GET(create_flow_group_in, in, table_type));
+		MLX5_SET(destroy_flow_group_in, din, table_id,
+			 MLX5_GET(create_flow_group_in, in, table_id));
+		MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+		break;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+		*dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
+		*obj_id = MLX5_GET(set_fte_in, in, flow_index);
+		MLX5_SET(delete_fte_in, din, other_vport,
+			 MLX5_GET(set_fte_in,  in, other_vport));
+		MLX5_SET(delete_fte_in, din, vport_number,
+			 MLX5_GET(set_fte_in, in, vport_number));
+		MLX5_SET(delete_fte_in, din, table_type,
+			 MLX5_GET(set_fte_in, in, table_type));
+		MLX5_SET(delete_fte_in, din, table_id,
+			 MLX5_GET(set_fte_in, in, table_id));
+		MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+		break;
+	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+		break;
+	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+		break;
+	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+		break;
+	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+		*dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
+		*obj_id = MLX5_GET(create_scheduling_element_out, out,
+				   scheduling_element_id);
+		MLX5_SET(destroy_scheduling_element_in, din,
+			 scheduling_hierarchy,
+			 MLX5_GET(create_scheduling_element_in, in,
+				  scheduling_hierarchy));
+		MLX5_SET(destroy_scheduling_element_in, din,
+			 scheduling_element_id, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+		break;
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+		*dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
+		*obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+		MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+		break;
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+		*dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
+		*obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+		MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+		break;
+	case MLX5_CMD_OP_CREATE_QP:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+		break;
+	case MLX5_CMD_OP_CREATE_SRQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+		break;
+	case MLX5_CMD_OP_CREATE_XRC_SRQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DESTROY_XRC_SRQ);
+		break;
+	case MLX5_CMD_OP_CREATE_DCT:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+		break;
+	case MLX5_CMD_OP_CREATE_XRQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+		break;
+	case MLX5_CMD_OP_ATTACH_TO_MCG:
+		*dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
+		MLX5_SET(detach_from_mcg_in, din, qpn,
+			 MLX5_GET(attach_to_mcg_in, in, qpn));
+		memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
+		       MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
+		       MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+		break;
+	case MLX5_CMD_OP_ALLOC_XRCD:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+		break;
+	default:
+		/* The entry must match to one of the devx_is_obj_create_cmd */
+		WARN_ON(true);
+		break;
+	}
+}
+
+static int devx_obj_cleanup(struct ib_uobject *uobject,
+			    enum rdma_remove_reason why)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct devx_obj *obj = uobject->object;
+	int ret;
+
+	ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+	if (ret && why == RDMA_REMOVE_DESTROY)
+		return ret;
+
+	kfree(obj);
+	return ret;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_DESTROY)(struct ib_device *ib_dev,
+				    struct ib_uverbs_file *file,
+				    struct uverbs_attr_bundle *attrs)
+{
+	return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_dev,
+				   struct ib_uverbs_file *file,
+				   struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
+	int cmd_out_len =  uverbs_attr_get_len(attrs,
+					MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
+	void *cmd_out;
+	struct ib_uobject *uobj;
+	struct devx_obj *obj;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	if (!devx_is_obj_create_cmd(cmd_in))
+		return -EINVAL;
+
+	obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+	if (!cmd_out) {
+		err = -ENOMEM;
+		goto obj_free;
+	}
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		goto cmd_free;
+
+	uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
+	uobj->object = obj;
+	obj->mdev = dev->mdev;
+	devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id);
+	WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
+	if (err)
+		goto cmd_free;
+
+	kvfree(cmd_out);
+	return 0;
+
+cmd_free:
+	kvfree(cmd_out);
+obj_free:
+	kfree(obj);
+	return err;
+}
+
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
 	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
 			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
@@ -138,8 +442,35 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
 					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
 );
 
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_OBJ,
+			 UVERBS_ACCESS_NEW,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_OBJ,
+			 UVERBS_ACCESS_DESTROY,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
 static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER));
 
+static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
+	&UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup),
+		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
+		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY));
+
 static DECLARE_UVERBS_OBJECT_TREE(devx_objects,
-	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX));
+	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
+	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ));
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 0b456fa91bb4..8d285f4555cd 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -54,8 +54,24 @@ enum  mlx5_ib_devx_other_attrs {
 	MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
 };
 
+enum mlx5_ib_devx_obj_create_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_destroy_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_obj_methods {
+	MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+};
+
 enum mlx5_ib_devx_objects {
 	MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_OBJECT_DEVX_OBJ,
 };
 
 #endif
-- 
cgit v1.2.3


From e662e14d801b01a976e58bc3f8d9fe49b9fcec3a Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 13:00:02 +0300
Subject: IB/mlx5: Add DEVX support for modify and query commands

Add support in DEVX for modify and query commands, the required lock is
taken (i.e. READ/WRITE) by the KABI infrastructure accordingly.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c             | 350 +++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |   5 +
 include/linux/mlx5/mlx5_ifc.h                 |   3 +
 include/uapi/rdma/mlx5_user_ioctl_cmds.h      |  14 ++
 4 files changed, 370 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 87116a3b7916..bb5e40a9edd8 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -73,6 +73,161 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
 	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
+static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+	u32 obj_id;
+
+	switch (opcode) {
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+		obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+		break;
+	case MLX5_CMD_OP_QUERY_MKEY:
+		obj_id = MLX5_GET(query_mkey_in, in, mkey_index);
+		break;
+	case MLX5_CMD_OP_QUERY_CQ:
+		obj_id = MLX5_GET(query_cq_in, in, cqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_CQ:
+		obj_id = MLX5_GET(modify_cq_in, in, cqn);
+		break;
+	case MLX5_CMD_OP_QUERY_SQ:
+		obj_id = MLX5_GET(query_sq_in, in, sqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_SQ:
+		obj_id = MLX5_GET(modify_sq_in, in, sqn);
+		break;
+	case MLX5_CMD_OP_QUERY_RQ:
+		obj_id = MLX5_GET(query_rq_in, in, rqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RQ:
+		obj_id = MLX5_GET(modify_rq_in, in, rqn);
+		break;
+	case MLX5_CMD_OP_QUERY_RMP:
+		obj_id = MLX5_GET(query_rmp_in, in, rmpn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RMP:
+		obj_id = MLX5_GET(modify_rmp_in, in, rmpn);
+		break;
+	case MLX5_CMD_OP_QUERY_RQT:
+		obj_id = MLX5_GET(query_rqt_in, in, rqtn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RQT:
+		obj_id = MLX5_GET(modify_rqt_in, in, rqtn);
+		break;
+	case MLX5_CMD_OP_QUERY_TIR:
+		obj_id = MLX5_GET(query_tir_in, in, tirn);
+		break;
+	case MLX5_CMD_OP_MODIFY_TIR:
+		obj_id = MLX5_GET(modify_tir_in, in, tirn);
+		break;
+	case MLX5_CMD_OP_QUERY_TIS:
+		obj_id = MLX5_GET(query_tis_in, in, tisn);
+		break;
+	case MLX5_CMD_OP_MODIFY_TIS:
+		obj_id = MLX5_GET(modify_tis_in, in, tisn);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+		obj_id = MLX5_GET(query_flow_table_in, in, table_id);
+		break;
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+		obj_id = MLX5_GET(modify_flow_table_in, in, table_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+		obj_id = MLX5_GET(query_flow_group_in, in, group_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+		obj_id = MLX5_GET(query_fte_in, in, flow_index);
+		break;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+		obj_id = MLX5_GET(set_fte_in, in, flow_index);
+		break;
+	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+		obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+		obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id);
+		break;
+	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+		obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+		break;
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+		obj_id = MLX5_GET(query_scheduling_element_in, in,
+				  scheduling_element_id);
+		break;
+	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+		obj_id = MLX5_GET(modify_scheduling_element_in, in,
+				  scheduling_element_id);
+		break;
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+		obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+		break;
+	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+		obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index);
+		break;
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+		obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+		break;
+	case MLX5_CMD_OP_QUERY_QP:
+		obj_id = MLX5_GET(query_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RST2INIT_QP:
+		obj_id = MLX5_GET(rst2init_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_INIT2RTR_QP:
+		obj_id = MLX5_GET(init2rtr_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RTR2RTS_QP:
+		obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RTS2RTS_QP:
+		obj_id = MLX5_GET(rts2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+		obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_2ERR_QP:
+		obj_id = MLX5_GET(qp_2err_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_2RST_QP:
+		obj_id = MLX5_GET(qp_2rst_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_QUERY_DCT:
+		obj_id = MLX5_GET(query_dct_in, in, dctn);
+		break;
+	case MLX5_CMD_OP_QUERY_XRQ:
+		obj_id = MLX5_GET(query_xrq_in, in, xrqn);
+		break;
+	case MLX5_CMD_OP_QUERY_XRC_SRQ:
+		obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn);
+		break;
+	case MLX5_CMD_OP_ARM_XRC_SRQ:
+		obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn);
+		break;
+	case MLX5_CMD_OP_QUERY_SRQ:
+		obj_id = MLX5_GET(query_srq_in, in, srqn);
+		break;
+	case MLX5_CMD_OP_ARM_RQ:
+		obj_id = MLX5_GET(arm_rq_in, in, srq_number);
+		break;
+	case MLX5_CMD_OP_DRAIN_DCT:
+	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+		obj_id = MLX5_GET(drain_dct_in, in, dctn);
+		break;
+	case MLX5_CMD_OP_ARM_XRQ:
+		obj_id = MLX5_GET(arm_xrq_in, in, xrqn);
+		break;
+	default:
+		return false;
+	}
+
+	if (obj_id == obj->obj_id)
+		return true;
+
+	return false;
+}
+
 static bool devx_is_obj_create_cmd(const void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
@@ -118,7 +273,83 @@ static bool devx_is_obj_create_cmd(const void *in)
 	}
 }
 
-static bool devx_is_general_cmd(const void *in)
+static bool devx_is_obj_modify_cmd(const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_MODIFY_CQ:
+	case MLX5_CMD_OP_MODIFY_RMP:
+	case MLX5_CMD_OP_MODIFY_SQ:
+	case MLX5_CMD_OP_MODIFY_RQ:
+	case MLX5_CMD_OP_MODIFY_RQT:
+	case MLX5_CMD_OP_MODIFY_TIR:
+	case MLX5_CMD_OP_MODIFY_TIS:
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_RST2INIT_QP:
+	case MLX5_CMD_OP_INIT2RTR_QP:
+	case MLX5_CMD_OP_RTR2RTS_QP:
+	case MLX5_CMD_OP_RTS2RTS_QP:
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+	case MLX5_CMD_OP_2ERR_QP:
+	case MLX5_CMD_OP_2RST_QP:
+	case MLX5_CMD_OP_ARM_XRC_SRQ:
+	case MLX5_CMD_OP_ARM_RQ:
+	case MLX5_CMD_OP_DRAIN_DCT:
+	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+	case MLX5_CMD_OP_ARM_XRQ:
+		return true;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	{
+		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+
+		if (op_mod == 1)
+			return true;
+		return false;
+	}
+	default:
+		return false;
+	}
+}
+
+static bool devx_is_obj_query_cmd(const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_MKEY:
+	case MLX5_CMD_OP_QUERY_CQ:
+	case MLX5_CMD_OP_QUERY_RMP:
+	case MLX5_CMD_OP_QUERY_SQ:
+	case MLX5_CMD_OP_QUERY_RQ:
+	case MLX5_CMD_OP_QUERY_RQT:
+	case MLX5_CMD_OP_QUERY_TIR:
+	case MLX5_CMD_OP_QUERY_TIS:
+	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_QP:
+	case MLX5_CMD_OP_QUERY_SRQ:
+	case MLX5_CMD_OP_QUERY_XRC_SRQ:
+	case MLX5_CMD_OP_QUERY_DCT:
+	case MLX5_CMD_OP_QUERY_XRQ:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool devx_is_general_cmd(void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
 
@@ -430,6 +661,89 @@ obj_free:
 	return err;
 }
 
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_dev,
+				   struct ib_uverbs_file *file,
+				   struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
+	int cmd_out_len = uverbs_attr_get_len(attrs,
+					MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+							  MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
+	void *cmd_out;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	if (!devx_is_obj_modify_cmd(cmd_in))
+		return -EINVAL;
+
+	if (!devx_is_valid_obj_id(uobj->object, cmd_in))
+		return -EINVAL;
+
+	cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+	if (!cmd_out)
+		return -ENOMEM;
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		goto other_cmd_free;
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+			     cmd_out, cmd_out_len);
+
+other_cmd_free:
+	kvfree(cmd_out);
+	return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_dev,
+				   struct ib_uverbs_file *file,
+				   struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
+	int cmd_out_len = uverbs_attr_get_len(attrs,
+					      MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+							  MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
+	void *cmd_out;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	if (!devx_is_obj_query_cmd(cmd_in))
+		return -EINVAL;
+
+	if (!devx_is_valid_obj_id(uobj->object, cmd_in))
+		return -EINVAL;
+
+	cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+	if (!cmd_out)
+		return -ENOMEM;
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		goto other_cmd_free;
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, cmd_out, cmd_out_len);
+
+other_cmd_free:
+	kvfree(cmd_out);
+	return err;
+}
+
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
 	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
 			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
@@ -463,13 +777,45 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
 			 UVERBS_ACCESS_DESTROY,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_OBJ,
+			 UVERBS_ACCESS_WRITE,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_OBJ,
+			 UVERBS_ACCESS_READ,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+
 static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER));
 
 static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
 	&UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
-		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY));
+		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
+		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
+		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
 
 static DECLARE_UVERBS_OBJECT_TREE(devx_objects,
 	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d07f24de8fa3..9d03a202abb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -429,6 +429,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_FPGA_QUERY_QP:
 	case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
 	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -603,6 +605,9 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(FPGA_DESTROY_QP);
 	MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
 	MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f810772e80c0..ac24ed87c67e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -246,12 +246,15 @@ enum {
 	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+	MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
 	MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
 	MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
 	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
 	MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
 	MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
 	MLX5_CMD_OP_CREATE_GENERAL_OBJECT         = 0xa00,
+	MLX5_CMD_OP_MODIFY_GENERAL_OBJECT         = 0xa01,
+	MLX5_CMD_OP_QUERY_GENERAL_OBJECT          = 0xa02,
 	MLX5_CMD_OP_DESTROY_GENERAL_OBJECT        = 0xa03,
 	MLX5_CMD_OP_MAX
 };
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 8d285f4555cd..97d216b8d053 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -64,9 +64,23 @@ enum mlx5_ib_devx_obj_destroy_attrs {
 	MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
 };
 
+enum mlx5_ib_devx_obj_modify_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_query_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+};
+
 enum mlx5_ib_devx_obj_methods {
 	MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 };
 
 enum mlx5_ib_devx_objects {
-- 
cgit v1.2.3


From 7c043e908a74ae0a935037cdd984d0cb89b2b970 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 13:00:03 +0300
Subject: IB/mlx5: Add support for DEVX query UAR

Return a device UAR index for a given user index via the DEVX interface.

Security note:
The hardware protection mechanism works like this: Each device object that
is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
the device specification manual) upon its creation. Then upon doorbell,
hardware fetches the object context for which the doorbell was rang, and
validates that the UAR through which the DB was rang matches the UAR ID
of the object.

If no match the doorbell is silently ignored by the hardware.  Of
course, the user cannot ring a doorbell on a UAR that was not mapped to
it.

Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
mailboxes (except tagging them with UID), we expose to the user its UAR
ID, so it can embed it in these objects in the expected specification
format. So the only thing the user can do is hurt itself by creating a
QP/SQ/CQ with a UAR ID other than his, and then in this case other users
may ring a doorbell on its objects.

The consequence of that will be that another user can schedule a QP/SQ
of the buggy user for execution (just insert it to the hardware schedule
queue or arm its CQ for event generation), no further harm is expected.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c        | 53 +++++++++++++++++++++++++++++++-
 drivers/infiniband/hw/mlx5/mlx5_ib.h     |  3 ++
 drivers/infiniband/hw/mlx5/qp.c          |  9 ++++--
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  6 ++++
 4 files changed, 67 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index bb5e40a9edd8..9b1804eb9924 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -373,6 +373,50 @@ static bool devx_is_general_cmd(void *in)
 	}
 }
 
+/*
+ *Security note:
+ * The hardware protection mechanism works like this: Each device object that
+ * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
+ * the device specification manual) upon its creation. Then upon doorbell,
+ * hardware fetches the object context for which the doorbell was rang, and
+ * validates that the UAR through which the DB was rang matches the UAR ID
+ * of the object.
+ * If no match the doorbell is silently ignored by the hardware. Of course,
+ * the user cannot ring a doorbell on a UAR that was not mapped to it.
+ * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
+ * mailboxes (except tagging them with UID), we expose to the user its UAR
+ * ID, so it can embed it in these objects in the expected specification
+ * format. So the only thing the user can do is hurt itself by creating a
+ * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
+ * may ring a doorbell on its objects.
+ * The consequence of that will be that another user can schedule a QP/SQ
+ * of the buggy user for execution (just insert it to the hardware schedule
+ * queue or arm its CQ for event generation), no further harm is expected.
+ */
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_dev,
+				  struct ib_uverbs_file *file,
+				  struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	u32 user_idx;
+	s32 dev_idx;
+
+	if (uverbs_copy_from(&user_idx, attrs,
+			     MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
+		return -EFAULT;
+
+	dev_idx = bfregn_to_uar_index(to_mdev(ib_dev),
+				      &c->bfregi, user_idx, true);
+	if (dev_idx < 0)
+		return dev_idx;
+
+	if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+			   &dev_idx, sizeof(dev_idx)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev,
 				  struct ib_uverbs_file *file,
 				  struct uverbs_attr_bundle *attrs)
@@ -744,6 +788,12 @@ other_cmd_free:
 	return err;
 }
 
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR,
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, UVERBS_ATTR_TYPE(u32),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
 	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
 			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
@@ -808,7 +858,8 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
 
 static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
-	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER));
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR));
 
 static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
 	&UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 1c857dd3c77f..e45f364622eb 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1329,4 +1329,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
 unsigned long mlx5_ib_get_xlt_emergency_page(void);
 void mlx5_ib_put_xlt_emergency_page(void);
 
+int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+			struct mlx5_bfreg_info *bfregi, int bfregn,
+			bool dyn_bfreg);
 #endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index e3c4ab9be41d..d5f072c50ee5 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -641,9 +641,9 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
 			       struct mlx5_ib_cq *recv_cq);
 
-static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
-			       struct mlx5_bfreg_info *bfregi, int bfregn,
-			       bool dyn_bfreg)
+int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+			struct mlx5_bfreg_info *bfregi, int bfregn,
+			bool dyn_bfreg)
 {
 	int bfregs_per_sys_page;
 	int index_of_sys_page;
@@ -653,6 +653,9 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
 				MLX5_NON_FP_BFREGS_PER_UAR;
 	index_of_sys_page = bfregn / bfregs_per_sys_page;
 
+	if (index_of_sys_page >= bfregi->num_sys_pages)
+		return -EINVAL;
+
 	if (dyn_bfreg) {
 		index_of_sys_page += bfregi->num_static_sys_pages;
 		if (bfregn > bfregi->num_dyn_bfregs ||
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 97d216b8d053..1252695cd94b 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -47,6 +47,7 @@ enum mlx5_ib_alloc_dm_attrs {
 
 enum mlx5_ib_devx_methods {
 	MLX5_IB_METHOD_DEVX_OTHER  = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DEVX_QUERY_UAR,
 };
 
 enum  mlx5_ib_devx_other_attrs {
@@ -60,6 +61,11 @@ enum mlx5_ib_devx_obj_create_attrs {
 	MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
 };
 
+enum  mlx5_ib_devx_query_uar_attrs {
+	MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+};
+
 enum mlx5_ib_devx_obj_destroy_attrs {
 	MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
 };
-- 
cgit v1.2.3


From aeae94579caf77406a8a235ea33fdb67abe9a57e Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 13:00:04 +0300
Subject: IB/mlx5: Add DEVX support for memory registration

Add support to register a memory with the firmware via the DEVX
interface.

The driver translates a given user address to ib_umem then it will
register the physical addresses with the firmware and get a unique id
for this registration to be used for this virtual address.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c        | 199 ++++++++++++++++++++++++++++++-
 include/linux/mlx5/mlx5_ifc.h            |   1 +
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  18 +++
 3 files changed, 217 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 9b1804eb9924..162321f486eb 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -24,6 +24,22 @@ struct devx_obj {
 	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
 };
 
+struct devx_umem {
+	struct mlx5_core_dev		*mdev;
+	struct ib_umem			*umem;
+	u32				page_offset;
+	int				page_shift;
+	int				ncont;
+	u32				dinlen;
+	u32				dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
+};
+
+struct devx_umem_reg_cmd {
+	void				*in;
+	u32				inlen;
+	u32				out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+};
+
 static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
 {
 	return to_mucontext(ib_uverbs_get_ucontext(file));
@@ -788,6 +804,181 @@ other_cmd_free:
 	return err;
 }
 
+static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
+			 struct uverbs_attr_bundle *attrs,
+			 struct devx_umem *obj)
+{
+	u64 addr;
+	size_t size;
+	int access;
+	int npages;
+	int err;
+	u32 page_mask;
+
+	if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
+	    uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN) ||
+	    uverbs_copy_from(&access, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS))
+		return -EFAULT;
+
+	err = ib_check_mr_access(access);
+	if (err)
+		return err;
+
+	obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
+	if (IS_ERR(obj->umem))
+		return PTR_ERR(obj->umem);
+
+	mlx5_ib_cont_pages(obj->umem, obj->umem->address,
+			   MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
+			   &obj->page_shift, &obj->ncont, NULL);
+
+	if (!npages) {
+		ib_umem_release(obj->umem);
+		return -EINVAL;
+	}
+
+	page_mask = (1 << obj->page_shift) - 1;
+	obj->page_offset = obj->umem->address & page_mask;
+
+	return 0;
+}
+
+static int devx_umem_reg_cmd_alloc(struct devx_umem *obj,
+				   struct devx_umem_reg_cmd *cmd)
+{
+	cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
+		    (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
+	cmd->in = kvzalloc(cmd->inlen, GFP_KERNEL);
+	return cmd->in ? 0 : -ENOMEM;
+}
+
+static void devx_umem_reg_cmd_free(struct devx_umem_reg_cmd *cmd)
+{
+	kvfree(cmd->in);
+}
+
+static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
+				    struct devx_umem *obj,
+				    struct devx_umem_reg_cmd *cmd)
+{
+	void *umem;
+	__be64 *mtt;
+
+	umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
+	mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM);
+	MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
+	MLX5_SET(umem, umem, log_page_size, obj->page_shift -
+					    MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET(umem, umem, page_offset, obj->page_offset);
+	mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
+			     (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
+			     MLX5_IB_MTT_READ);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(struct ib_device *ib_dev,
+				 struct ib_uverbs_file *file,
+				 struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	struct devx_umem_reg_cmd cmd;
+	struct devx_umem *obj;
+	struct ib_uobject *uobj;
+	u32 obj_id;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
+	obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
+	if (err)
+		goto err_obj_free;
+
+	err = devx_umem_reg_cmd_alloc(obj, &cmd);
+	if (err)
+		goto err_umem_release;
+
+	devx_umem_reg_cmd_build(dev, obj, &cmd);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
+			    sizeof(cmd.out));
+	if (err)
+		goto err_umem_reg_cmd_free;
+
+	obj->mdev = dev->mdev;
+	uobj->object = obj;
+	devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
+	if (err)
+		goto err_umem_destroy;
+
+	devx_umem_reg_cmd_free(&cmd);
+
+	return 0;
+
+err_umem_destroy:
+	mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
+err_umem_reg_cmd_free:
+	devx_umem_reg_cmd_free(&cmd);
+err_umem_release:
+	ib_umem_release(obj->umem);
+err_obj_free:
+	kfree(obj);
+	return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_DEREG)(struct ib_device *ib_dev,
+				   struct ib_uverbs_file *file,
+				   struct uverbs_attr_bundle *attrs)
+{
+	return 0;
+}
+
+static int devx_umem_cleanup(struct ib_uobject *uobject,
+			     enum rdma_remove_reason why)
+{
+	struct devx_umem *obj = uobject->object;
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	int err;
+
+	err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+	if (err && why == RDMA_REMOVE_DESTROY)
+		return err;
+
+	ib_umem_release(obj->umem);
+	kfree(obj);
+	return 0;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_UMEM,
+			 UVERBS_ACCESS_NEW,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, UVERBS_ATTR_TYPE(u64),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, UVERBS_ATTR_TYPE(u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
+			 MLX5_IB_OBJECT_DEVX_UMEM,
+			 UVERBS_ACCESS_DESTROY,
+			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR,
 	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
@@ -868,6 +1059,12 @@ static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
 
+static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
+	&UVERBS_TYPE_ALLOC_IDR(0, devx_umem_cleanup),
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
+
 static DECLARE_UVERBS_OBJECT_TREE(devx_objects,
 	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
-	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ));
+	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
+	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ac24ed87c67e..00b539303f5d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -82,6 +82,7 @@ enum {
 
 enum {
 	MLX5_OBJ_TYPE_UCTX = 0x0004,
+	MLX5_OBJ_TYPE_UMEM = 0x0005,
 };
 
 enum {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 1252695cd94b..dbc549a7bf50 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -89,9 +89,27 @@ enum mlx5_ib_devx_obj_methods {
 	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 };
 
+enum mlx5_ib_devx_umem_reg_attrs {
+	MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+};
+
+enum mlx5_ib_devx_umem_dereg_attrs {
+	MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_umem_methods {
+	MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+};
+
 enum mlx5_ib_devx_objects {
 	MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_OBJECT_DEVX_OBJ,
+	MLX5_IB_OBJECT_DEVX_UMEM,
 };
 
 #endif
-- 
cgit v1.2.3


From f6fe01b7181be1751a5d8f19f230eb0d17b9a7b1 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 17 Jun 2018 13:00:05 +0300
Subject: IB/mlx5: Add DEVX query EQN support

Return the matching device EQN for a given user vector number via the
DEVX interface.

Note:
EQs are owned by the kernel and shared by all user processes.
Basically, a user CQ can point to any EQ.
The kernel doesn't enforce any such limitation today either.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c        | 34 +++++++++++++++++++++++++++++++-
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  6 ++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 162321f486eb..dbf2d61591d3 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -389,6 +389,31 @@ static bool devx_is_general_cmd(void *in)
 	}
 }
 
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_dev,
+				  struct ib_uverbs_file *file,
+				  struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	int user_vector;
+	int dev_eqn;
+	unsigned int irqn;
+	int err;
+
+	if (uverbs_copy_from(&user_vector, attrs,
+			     MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
+		return -EFAULT;
+
+	err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
+	if (err < 0)
+		return err;
+
+	if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+			   &dev_eqn, sizeof(dev_eqn)))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
  *Security note:
  * The hardware protection mechanism works like this: Each device object that
@@ -979,6 +1004,12 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG,
 			 UVERBS_ACCESS_DESTROY,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
+static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN,
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, UVERBS_ATTR_TYPE(u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, UVERBS_ATTR_TYPE(u32),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR,
 	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
@@ -1050,7 +1081,8 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 
 static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
-	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR));
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
 
 static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
 	&UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup),
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index dbc549a7bf50..1a05bb4b0b34 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -48,6 +48,7 @@ enum mlx5_ib_alloc_dm_attrs {
 enum mlx5_ib_devx_methods {
 	MLX5_IB_METHOD_DEVX_OTHER  = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_METHOD_DEVX_QUERY_UAR,
+	MLX5_IB_METHOD_DEVX_QUERY_EQN,
 };
 
 enum  mlx5_ib_devx_other_attrs {
@@ -82,6 +83,11 @@ enum mlx5_ib_devx_obj_query_attrs {
 	MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
 };
 
+enum  mlx5_ib_devx_query_eqn_attrs {
+	MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+};
+
 enum mlx5_ib_devx_obj_methods {
 	MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
-- 
cgit v1.2.3


From 9fba738a53dda20e748d6ee240b6c017c8146b4b Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 19 Jun 2018 16:41:41 +0200
Subject: clk: add duty cycle support

Add the possibility to apply and query the clock signal duty cycle ratio.

This is useful when the duty cycle of the clock signal depends on some
other parameters controlled by the clock framework.

For example, the duty cycle of a divider may depends on the raw divider
setting (ratio = N / div) , which is controlled by the CCF. In such case,
going through the pwm framework to control the duty cycle ratio of this
clock would be a burden.

A clock provider is not required to implement the operation to set and get
the duty cycle. If it does not implement .get_duty_cycle(), the ratio is
assumed to be 50%.

This change also adds a new flag, CLK_DUTY_CYCLE_PARENT. This flag should
be used to indicate that a clock, such as gates and muxes, may inherit
the duty cycle ratio of its parent clock. If a clock does not provide a
get_duty_cycle() callback and has CLK_DUTY_CYCLE_PARENT, then the call
will be directly forwarded to its parent clock, if any. For
set_duty_cycle(), the clock should also have CLK_SET_RATE_PARENT for the
call to be forwarded

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Link: lkml.kernel.org/r/20180619144141.8506-1-jbrunet@baylibre.com
---
 drivers/clk/clk.c            | 199 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/clk-provider.h |  26 ++++++
 include/linux/clk.h          |  33 +++++++
 include/trace/events/clk.h   |  36 ++++++++
 4 files changed, 289 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 9760b526ca31..b0a2719d86f3 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -68,6 +68,7 @@ struct clk_core {
 	unsigned long		max_rate;
 	unsigned long		accuracy;
 	int			phase;
+	struct clk_duty		duty;
 	struct hlist_head	children;
 	struct hlist_node	child_node;
 	struct hlist_head	clks;
@@ -2402,6 +2403,172 @@ int clk_get_phase(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_get_phase);
 
+static void clk_core_reset_duty_cycle_nolock(struct clk_core *core)
+{
+	/* Assume a default value of 50% */
+	core->duty.num = 1;
+	core->duty.den = 2;
+}
+
+static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core);
+
+static int clk_core_update_duty_cycle_nolock(struct clk_core *core)
+{
+	struct clk_duty *duty = &core->duty;
+	int ret = 0;
+
+	if (!core->ops->get_duty_cycle)
+		return clk_core_update_duty_cycle_parent_nolock(core);
+
+	ret = core->ops->get_duty_cycle(core->hw, duty);
+	if (ret)
+		goto reset;
+
+	/* Don't trust the clock provider too much */
+	if (duty->den == 0 || duty->num > duty->den) {
+		ret = -EINVAL;
+		goto reset;
+	}
+
+	return 0;
+
+reset:
+	clk_core_reset_duty_cycle_nolock(core);
+	return ret;
+}
+
+static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core)
+{
+	int ret = 0;
+
+	if (core->parent &&
+	    core->flags & CLK_DUTY_CYCLE_PARENT) {
+		ret = clk_core_update_duty_cycle_nolock(core->parent);
+		memcpy(&core->duty, &core->parent->duty, sizeof(core->duty));
+	} else {
+		clk_core_reset_duty_cycle_nolock(core);
+	}
+
+	return ret;
+}
+
+static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core,
+						 struct clk_duty *duty);
+
+static int clk_core_set_duty_cycle_nolock(struct clk_core *core,
+					  struct clk_duty *duty)
+{
+	int ret;
+
+	lockdep_assert_held(&prepare_lock);
+
+	if (clk_core_rate_is_protected(core))
+		return -EBUSY;
+
+	trace_clk_set_duty_cycle(core, duty);
+
+	if (!core->ops->set_duty_cycle)
+		return clk_core_set_duty_cycle_parent_nolock(core, duty);
+
+	ret = core->ops->set_duty_cycle(core->hw, duty);
+	if (!ret)
+		memcpy(&core->duty, duty, sizeof(*duty));
+
+	trace_clk_set_duty_cycle_complete(core, duty);
+
+	return ret;
+}
+
+static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core,
+						 struct clk_duty *duty)
+{
+	int ret = 0;
+
+	if (core->parent &&
+	    core->flags & (CLK_DUTY_CYCLE_PARENT | CLK_SET_RATE_PARENT)) {
+		ret = clk_core_set_duty_cycle_nolock(core->parent, duty);
+		memcpy(&core->duty, &core->parent->duty, sizeof(core->duty));
+	}
+
+	return ret;
+}
+
+/**
+ * clk_set_duty_cycle - adjust the duty cycle ratio of a clock signal
+ * @clk: clock signal source
+ * @num: numerator of the duty cycle ratio to be applied
+ * @den: denominator of the duty cycle ratio to be applied
+ *
+ * Apply the duty cycle ratio if the ratio is valid and the clock can
+ * perform this operation
+ *
+ * Returns (0) on success, a negative errno otherwise.
+ */
+int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den)
+{
+	int ret;
+	struct clk_duty duty;
+
+	if (!clk)
+		return 0;
+
+	/* sanity check the ratio */
+	if (den == 0 || num > den)
+		return -EINVAL;
+
+	duty.num = num;
+	duty.den = den;
+
+	clk_prepare_lock();
+
+	if (clk->exclusive_count)
+		clk_core_rate_unprotect(clk->core);
+
+	ret = clk_core_set_duty_cycle_nolock(clk->core, &duty);
+
+	if (clk->exclusive_count)
+		clk_core_rate_protect(clk->core);
+
+	clk_prepare_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(clk_set_duty_cycle);
+
+static int clk_core_get_scaled_duty_cycle(struct clk_core *core,
+					  unsigned int scale)
+{
+	struct clk_duty *duty = &core->duty;
+	int ret;
+
+	clk_prepare_lock();
+
+	ret = clk_core_update_duty_cycle_nolock(core);
+	if (!ret)
+		ret = mult_frac(scale, duty->num, duty->den);
+
+	clk_prepare_unlock();
+
+	return ret;
+}
+
+/**
+ * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal
+ * @clk: clock signal source
+ * @scale: scaling factor to be applied to represent the ratio as an integer
+ *
+ * Returns the duty cycle ratio of a clock node multiplied by the provided
+ * scaling factor, or negative errno on error.
+ */
+int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale)
+{
+	if (!clk)
+		return 0;
+
+	return clk_core_get_scaled_duty_cycle(clk->core, scale);
+}
+EXPORT_SYMBOL_GPL(clk_get_scaled_duty_cycle);
+
 /**
  * clk_is_match - check if two clk's point to the same hardware clock
  * @p: clk compared against q
@@ -2455,12 +2622,13 @@ static void clk_summary_show_one(struct seq_file *s, struct clk_core *c,
 	if (!c)
 		return;
 
-	seq_printf(s, "%*s%-*s %7d %8d %8d %11lu %10lu %-3d\n",
+	seq_printf(s, "%*s%-*s %7d %8d %8d %11lu %10lu %5d %6d\n",
 		   level * 3 + 1, "",
 		   30 - level * 3, c->name,
 		   c->enable_count, c->prepare_count, c->protect_count,
 		   clk_core_get_rate(c), clk_core_get_accuracy(c),
-		   clk_core_get_phase(c));
+		   clk_core_get_phase(c),
+		   clk_core_get_scaled_duty_cycle(c, 100000));
 }
 
 static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c,
@@ -2482,9 +2650,9 @@ static int clk_summary_show(struct seq_file *s, void *data)
 	struct clk_core *c;
 	struct hlist_head **lists = (struct hlist_head **)s->private;
 
-	seq_puts(s, "                                 enable  prepare  protect                               \n");
-	seq_puts(s, "   clock                          count    count    count        rate   accuracy   phase\n");
-	seq_puts(s, "----------------------------------------------------------------------------------------\n");
+	seq_puts(s, "                                 enable  prepare  protect                                duty\n");
+	seq_puts(s, "   clock                          count    count    count        rate   accuracy phase  cycle\n");
+	seq_puts(s, "---------------------------------------------------------------------------------------------\n");
 
 	clk_prepare_lock();
 
@@ -2511,6 +2679,8 @@ static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level)
 	seq_printf(s, "\"rate\": %lu,", clk_core_get_rate(c));
 	seq_printf(s, "\"accuracy\": %lu,", clk_core_get_accuracy(c));
 	seq_printf(s, "\"phase\": %d", clk_core_get_phase(c));
+	seq_printf(s, "\"duty_cycle\": %u",
+		   clk_core_get_scaled_duty_cycle(c, 100000));
 }
 
 static void clk_dump_subtree(struct seq_file *s, struct clk_core *c, int level)
@@ -2572,6 +2742,7 @@ static const struct {
 	ENTRY(CLK_SET_RATE_UNGATE),
 	ENTRY(CLK_IS_CRITICAL),
 	ENTRY(CLK_OPS_PARENT_ENABLE),
+	ENTRY(CLK_DUTY_CYCLE_PARENT),
 #undef ENTRY
 };
 
@@ -2610,6 +2781,17 @@ static int possible_parents_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(possible_parents);
 
+static int clk_duty_cycle_show(struct seq_file *s, void *data)
+{
+	struct clk_core *core = s->private;
+	struct clk_duty *duty = &core->duty;
+
+	seq_printf(s, "%u/%u\n", duty->num, duty->den);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(clk_duty_cycle);
+
 static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 {
 	struct dentry *root;
@@ -2628,6 +2810,8 @@ static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 	debugfs_create_u32("clk_enable_count", 0444, root, &core->enable_count);
 	debugfs_create_u32("clk_protect_count", 0444, root, &core->protect_count);
 	debugfs_create_u32("clk_notifier_count", 0444, root, &core->notifier_count);
+	debugfs_create_file("clk_duty_cycle", 0444, root, core,
+			    &clk_duty_cycle_fops);
 
 	if (core->num_parents > 1)
 		debugfs_create_file("clk_possible_parents", 0444, root, core,
@@ -2845,6 +3029,11 @@ static int __clk_core_init(struct clk_core *core)
 	else
 		core->phase = 0;
 
+	/*
+	 * Set clk's duty cycle.
+	 */
+	clk_core_update_duty_cycle_nolock(core);
+
 	/*
 	 * Set clk's rate.  The preferred method is to use .recalc_rate.  For
 	 * simple clocks and lazy developers the default fallback is to use the
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index b7cfa037e593..08b1aa70a38d 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -38,6 +38,8 @@
 #define CLK_IS_CRITICAL		BIT(11) /* do not gate, ever */
 /* parents need enable during gate/ungate, set rate and re-parent */
 #define CLK_OPS_PARENT_ENABLE	BIT(12)
+/* duty cycle call may be forwarded to the parent clock */
+#define CLK_DUTY_CYCLE_PARENT	BIT(13)
 
 struct clk;
 struct clk_hw;
@@ -66,6 +68,17 @@ struct clk_rate_request {
 	struct clk_hw *best_parent_hw;
 };
 
+/**
+ * struct clk_duty - Struture encoding the duty cycle ratio of a clock
+ *
+ * @num:	Numerator of the duty cycle ratio
+ * @den:	Denominator of the duty cycle ratio
+ */
+struct clk_duty {
+	unsigned int num;
+	unsigned int den;
+};
+
 /**
  * struct clk_ops -  Callback operations for hardware clocks; these are to
  * be provided by the clock implementation, and will be called by drivers
@@ -169,6 +182,15 @@ struct clk_rate_request {
  *		by the second argument. Valid values for degrees are
  *		0-359. Return 0 on success, otherwise -EERROR.
  *
+ * @get_duty_cycle: Queries the hardware to get the current duty cycle ratio
+ *              of a clock. Returned values denominator cannot be 0 and must be
+ *              superior or equal to the numerator.
+ *
+ * @set_duty_cycle: Apply the duty cycle ratio to this clock signal specified by
+ *              the numerator (2nd argurment) and denominator (3rd  argument).
+ *              Argument must be a valid ratio (denominator > 0
+ *              and >= numerator) Return 0 on success, otherwise -EERROR.
+ *
  * @init:	Perform platform-specific initialization magic.
  *		This is not not used by any of the basic clock types.
  *		Please consider other ways of solving initialization problems
@@ -218,6 +240,10 @@ struct clk_ops {
 					   unsigned long parent_accuracy);
 	int		(*get_phase)(struct clk_hw *hw);
 	int		(*set_phase)(struct clk_hw *hw, int degrees);
+	int		(*get_duty_cycle)(struct clk_hw *hw,
+					  struct clk_duty *duty);
+	int		(*set_duty_cycle)(struct clk_hw *hw,
+					  struct clk_duty *duty);
 	void		(*init)(struct clk_hw *hw);
 	void		(*debug_init)(struct clk_hw *hw, struct dentry *dentry);
 };
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 0dbd0885b2c2..4f750c481b82 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -141,6 +141,27 @@ int clk_set_phase(struct clk *clk, int degrees);
  */
 int clk_get_phase(struct clk *clk);
 
+/**
+ * clk_set_duty_cycle - adjust the duty cycle ratio of a clock signal
+ * @clk: clock signal source
+ * @num: numerator of the duty cycle ratio to be applied
+ * @den: denominator of the duty cycle ratio to be applied
+ *
+ * Adjust the duty cycle of a clock signal by the specified ratio. Returns 0 on
+ * success, -EERROR otherwise.
+ */
+int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den);
+
+/**
+ * clk_get_duty_cycle - return the duty cycle ratio of a clock signal
+ * @clk: clock signal source
+ * @scale: scaling factor to be applied to represent the ratio as an integer
+ *
+ * Returns the duty cycle ratio multiplied by the scale provided, otherwise
+ * returns -EERROR.
+ */
+int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale);
+
 /**
  * clk_is_match - check if two clk's point to the same hardware clock
  * @p: clk compared against q
@@ -183,6 +204,18 @@ static inline long clk_get_phase(struct clk *clk)
 	return -ENOTSUPP;
 }
 
+static inline int clk_set_duty_cycle(struct clk *clk, unsigned int num,
+				     unsigned int den)
+{
+	return -ENOTSUPP;
+}
+
+static inline unsigned int clk_get_scaled_duty_cycle(struct clk *clk,
+						     unsigned int scale)
+{
+	return 0;
+}
+
 static inline bool clk_is_match(const struct clk *p, const struct clk *q)
 {
 	return p == q;
diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h
index 2cd449328aee..9004ffff7f32 100644
--- a/include/trace/events/clk.h
+++ b/include/trace/events/clk.h
@@ -192,6 +192,42 @@ DEFINE_EVENT(clk_phase, clk_set_phase_complete,
 	TP_ARGS(core, phase)
 );
 
+DECLARE_EVENT_CLASS(clk_duty_cycle,
+
+	TP_PROTO(struct clk_core *core, struct clk_duty *duty),
+
+	TP_ARGS(core, duty),
+
+	TP_STRUCT__entry(
+		__string(        name,           core->name              )
+		__field( unsigned int,           num                     )
+		__field( unsigned int,           den                     )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, core->name);
+		__entry->num = duty->num;
+		__entry->den = duty->den;
+	),
+
+	TP_printk("%s %u/%u", __get_str(name), (unsigned int)__entry->num,
+		  (unsigned int)__entry->den)
+);
+
+DEFINE_EVENT(clk_duty_cycle, clk_set_duty_cycle,
+
+	TP_PROTO(struct clk_core *core, struct clk_duty *duty),
+
+	TP_ARGS(core, duty)
+);
+
+DEFINE_EVENT(clk_duty_cycle, clk_set_duty_cycle_complete,
+
+	TP_PROTO(struct clk_core *core, struct clk_duty *duty),
+
+	TP_ARGS(core, duty)
+);
+
 #endif /* _TRACE_CLK_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 2e2ba09e48b72497a9b69fc49ab693f7f0c2e5cf Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Mon, 4 Jun 2018 11:44:02 -0700
Subject: IB/rdmavt, IB/hfi1: Create device dependent s_flags

Move some s_flags defines out of rdmavt and into hfi1 because they are
hfi1 specific and therefore should remain in the driver instead of
bubbling up to rdmavt.

Document device specific ranges in rdmavt and remap
those in hfi1.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/pio.c   |  6 +++---
 drivers/infiniband/hw/hfi1/qp.c    |  6 +++---
 drivers/infiniband/hw/hfi1/qp.h    | 24 ++++++++++++++++++++++--
 drivers/infiniband/hw/hfi1/rc.c    |  6 +++---
 drivers/infiniband/hw/hfi1/ruc.c   | 14 +++++++-------
 drivers/infiniband/hw/hfi1/verbs.c |  4 ++--
 include/rdma/rdmavt_qp.h           | 30 +++++++++++++++++-------------
 7 files changed, 57 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 9cac15d10c4f..363ab0f35369 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -1618,11 +1618,11 @@ static void sc_piobufavail(struct send_context *sc)
 	/* Wake up the most starved one first */
 	if (n)
 		hfi1_qp_wakeup(qps[max_idx],
-			       RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+			       RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
 	for (i = 0; i < n; i++)
 		if (i != max_idx)
 			hfi1_qp_wakeup(qps[i],
-				       RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+				       RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
 }
 
 /* translate a send credit update to a bit code of reasons */
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1697d96151bd..9b1e84a6b1cc 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -273,7 +273,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_PATH_MIG_STATE &&
 	    attr->path_mig_state == IB_MIG_MIGRATED &&
 	    qp->s_mig_state == IB_MIG_ARMED) {
-		qp->s_flags |= RVT_S_AHG_CLEAR;
+		qp->s_flags |= HFI1_S_AHG_CLEAR;
 		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
 		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
@@ -717,7 +717,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
 	qp->remote_ah_attr = qp->alt_ah_attr;
 	qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
 	qp->s_pkey_index = qp->s_alt_pkey_index;
-	qp->s_flags |= RVT_S_AHG_CLEAR;
+	qp->s_flags |= HFI1_S_AHG_CLEAR;
 	priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
 	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 	qp_set_16b(qp);
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b2d4cba8d15b..078cff7560b6 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -1,7 +1,7 @@
 #ifndef _QP_H
 #define _QP_H
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -69,6 +69,26 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
 		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 
+/*
+ * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
+ *
+ * HFI1_S_AHG_VALID - ahg header valid on chip
+ * HFI1_S_AHG_CLEAR - have send engine clear ahg state
+ * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
+ * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
+ */
+#define HFI1_S_AHG_VALID         0x80000000
+#define HFI1_S_AHG_CLEAR         0x40000000
+#define HFI1_S_WAIT_PIO_DRAIN    0x20000000
+#define HFI1_S_MIN_BIT_MASK      0x01000000
+
+/*
+ * overload wait defines
+ */
+
+#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
+#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
+
 /*
  * free_ahg - clear ahg from QP
  */
@@ -77,7 +97,7 @@ static inline void clear_ahg(struct rvt_qp *qp)
 	struct hfi1_qp_priv *priv = qp->priv;
 
 	priv->s_ahg->ahgcount = 0;
-	qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
+	qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
 	if (priv->s_sde && qp->s_ahgidx >= 0)
 		sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
 	qp->s_ahgidx = -1;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1a1a47ac53c6..1d31bd2fa91f 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -241,7 +241,7 @@ bail:
 	smp_wmb();
 	qp->s_flags &= ~(RVT_S_RESP_PENDING
 				| RVT_S_ACK_PENDING
-				| RVT_S_AHG_VALID);
+				| HFI1_S_AHG_VALID);
 	return 0;
 }
 
@@ -1024,7 +1024,7 @@ done:
 	if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	    (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
 		qp->s_flags |= RVT_S_WAIT_PSN;
-	qp->s_flags &= ~RVT_S_AHG_VALID;
+	qp->s_flags &= ~HFI1_S_AHG_VALID;
 }
 
 /*
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index ef4c566e206f..5f56f3c1b4c4 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -194,7 +194,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
 	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		goto unlock;
 
@@ -533,9 +533,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
-	if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
+	if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
 		clear_ahg(qp);
-	if (!(qp->s_flags & RVT_S_AHG_VALID)) {
+	if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
 		/* first middle that needs copy  */
 		if (qp->s_ahgidx < 0)
 			qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
@@ -544,7 +544,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
 			priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
 			/* save to protect a change in another thread */
 			priv->s_ahg->ahgidx = qp->s_ahgidx;
-			qp->s_flags |= RVT_S_AHG_VALID;
+			qp->s_flags |= HFI1_S_AHG_VALID;
 		}
 	} else {
 		/* subsequent middle after valid */
@@ -650,7 +650,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
 	if (middle)
 		build_ahg(qp, bth2);
 	else
-		qp->s_flags &= ~RVT_S_AHG_VALID;
+		qp->s_flags &= ~HFI1_S_AHG_VALID;
 
 	bth0 |= pkey;
 	bth0 |= extra_bytes << 20;
@@ -727,7 +727,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
 	if (middle)
 		build_ahg(qp, bth2);
 	else
-		qp->s_flags &= ~RVT_S_AHG_VALID;
+		qp->s_flags &= ~HFI1_S_AHG_VALID;
 
 	bth0 |= pkey;
 	bth0 |= extra_bytes << 20;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index b7c75b63f887..5cef1224fa9c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1007,7 +1007,7 @@ static int pio_wait(struct rvt_qp *qp,
 			int was_empty;
 
 			dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
-			dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
+			dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
 			qp->s_flags |= flag;
 			was_empty = list_empty(&sc->piowait);
 			iowait_queue(ps->pkts_sent, &priv->s_iowait,
@@ -1376,7 +1376,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 		return pio_wait(qp,
 				ps->s_txreq->psc,
 				ps,
-				RVT_S_WAIT_PIO_DRAIN);
+				HFI1_S_WAIT_PIO_DRAIN);
 	return sr(qp, ps, 0);
 }
 
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 1145a4c154b2..927f6d5b6d0f 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -2,7 +2,7 @@
 #define DEF_RDMAVT_INCQP_H
 
 /*
- * Copyright(c) 2016, 2017 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -91,6 +91,7 @@
  * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests
  * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK
  * RVT_S_ECN - a BECN was queued to the send engine
+ * RVT_S_MAX_BIT_MASK - The max bit that can be used by rdmavt
  */
 #define RVT_S_SIGNAL_REQ_WR	0x0001
 #define RVT_S_BUSY		0x0002
@@ -103,23 +104,26 @@
 #define RVT_S_WAIT_SSN_CREDIT	0x0100
 #define RVT_S_WAIT_DMA		0x0200
 #define RVT_S_WAIT_PIO		0x0400
-#define RVT_S_WAIT_PIO_DRAIN    0x0800
-#define RVT_S_WAIT_TX		0x1000
-#define RVT_S_WAIT_DMA_DESC	0x2000
-#define RVT_S_WAIT_KMEM		0x4000
-#define RVT_S_WAIT_PSN		0x8000
-#define RVT_S_WAIT_ACK		0x10000
-#define RVT_S_SEND_ONE		0x20000
-#define RVT_S_UNLIMITED_CREDIT	0x40000
-#define RVT_S_AHG_VALID		0x80000
-#define RVT_S_AHG_CLEAR		0x100000
-#define RVT_S_ECN		0x200000
+#define RVT_S_WAIT_TX		0x0800
+#define RVT_S_WAIT_DMA_DESC	0x1000
+#define RVT_S_WAIT_KMEM		0x2000
+#define RVT_S_WAIT_PSN		0x4000
+#define RVT_S_WAIT_ACK		0x8000
+#define RVT_S_SEND_ONE		0x10000
+#define RVT_S_UNLIMITED_CREDIT	0x20000
+#define RVT_S_ECN		0x40000
+#define RVT_S_MAX_BIT_MASK	0x800000
+
+/*
+ * Drivers should use s_flags starting with bit 31 down to the bit next to
+ * RVT_S_MAX_BIT_MASK
+ */
 
 /*
  * Wait flags that would prevent any packet type from being sent.
  */
 #define RVT_S_ANY_WAIT_IO \
-	(RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \
+	(RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \
 	 RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
 
 /*
-- 
cgit v1.2.3


From 19e0c58f6552638c86395f0717210326fdf14fd2 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Jun 2018 15:10:56 -0700
Subject: iomap: generic inline data handling

Add generic inline data handling by adding a pointer to the inline data
region to struct iomap.  When handling a buffered IOMAP_INLINE write,
iomap_write_begin will copy the current inline data from the inline data
region into the page cache, and iomap_write_end will copy the changes in
the page cache back to the inline data region.

This doesn't cover inline data reads and direct I/O yet because so far,
we have no users.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
[hch: small cleanups to better fit in with other iomap work]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++------
 include/linux/iomap.h |  1 +
 2 files changed, 56 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/iomap.c b/fs/iomap.c
index 9c454459a1e9..4aecd7c5dbd8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -103,6 +103,26 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static void
+iomap_read_inline_data(struct inode *inode, struct page *page,
+		struct iomap *iomap)
+{
+	size_t size = i_size_read(inode);
+	void *addr;
+
+	if (PageUptodate(page))
+		return;
+
+	BUG_ON(page->index);
+	BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+	addr = kmap_atomic(page);
+	memcpy(addr, iomap->inline_data, size);
+	memset(addr + size, 0, PAGE_SIZE - size);
+	kunmap_atomic(addr);
+	SetPageUptodate(page);
+}
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -133,7 +153,11 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	if (!page)
 		return -ENOMEM;
 
-	status = __block_write_begin_int(page, pos, len, NULL, iomap);
+	if (iomap->type == IOMAP_INLINE)
+		iomap_read_inline_data(inode, page, iomap);
+	else
+		status = __block_write_begin_int(page, pos, len, NULL, iomap);
+
 	if (unlikely(status)) {
 		unlock_page(page);
 		put_page(page);
@@ -146,14 +170,37 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	return status;
 }
 
+static int
+iomap_write_end_inline(struct inode *inode, struct page *page,
+		struct iomap *iomap, loff_t pos, unsigned copied)
+{
+	void *addr;
+
+	WARN_ON_ONCE(!PageUptodate(page));
+	BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
+
+	addr = kmap_atomic(page);
+	memcpy(iomap->inline_data + pos, addr + pos, copied);
+	kunmap_atomic(addr);
+
+	mark_inode_dirty(inode);
+	__generic_write_end(inode, pos, copied, page);
+	return copied;
+}
+
 static int
 iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
-		unsigned copied, struct page *page)
+		unsigned copied, struct page *page, struct iomap *iomap)
 {
 	int ret;
 
-	ret = generic_write_end(NULL, inode->i_mapping, pos, len,
-			copied, page, NULL);
+	if (iomap->type == IOMAP_INLINE) {
+		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
+	} else {
+		ret = generic_write_end(NULL, inode->i_mapping, pos, len,
+				copied, page, NULL);
+	}
+
 	if (ret < len)
 		iomap_write_failed(inode, pos, len);
 	return ret;
@@ -208,7 +255,8 @@ again:
 
 		flush_dcache_page(page);
 
-		status = iomap_write_end(inode, pos, bytes, copied, page);
+		status = iomap_write_end(inode, pos, bytes, copied, page,
+				iomap);
 		if (unlikely(status < 0))
 			break;
 		copied = status;
@@ -302,7 +350,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 		WARN_ON_ONCE(!PageUptodate(page));
 
-		status = iomap_write_end(inode, pos, bytes, bytes, page);
+		status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 		if (unlikely(status <= 0)) {
 			if (WARN_ON_ONCE(status == 0))
 				return -EIO;
@@ -354,7 +402,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
 	zero_user(page, offset, bytes);
 	mark_page_accessed(page);
 
-	return iomap_write_end(inode, pos, bytes, bytes, page);
+	return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
 }
 
 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index a044a824da85..10d6cff7f69a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -55,6 +55,7 @@ struct iomap {
 	u16			flags;	/* flags for mapping */
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
+	void			*inline_data;
 };
 
 /*
-- 
cgit v1.2.3


From 63899c6f8851c32214b19390254fa1ae90b582df Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:56 -0700
Subject: iomap: add a page_done callback

This will be used by gfs2 to attach data to transactions for the journaled
data mode.  But the concept is generic enough that we might be able to
use it for other purposes like encryption/integrity post-processing in the
future.

Based on a patch from Andreas Gruenbacher.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 3 +++
 include/linux/iomap.h | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/fs/iomap.c b/fs/iomap.c
index 4aecd7c5dbd8..a1f71e64ea49 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -201,6 +201,9 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 				copied, page, NULL);
 	}
 
+	if (iomap->page_done)
+		iomap->page_done(inode, pos, copied, page, iomap);
+
 	if (ret < len)
 		iomap_write_failed(inode, pos, len);
 	return ret;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 10d6cff7f69a..45f43865b0f0 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -9,6 +9,7 @@ struct fiemap_extent_info;
 struct inode;
 struct iov_iter;
 struct kiocb;
+struct page;
 struct vm_area_struct;
 struct vm_fault;
 
@@ -56,6 +57,14 @@ struct iomap {
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
 	void			*inline_data;
+
+	/*
+	 * Called when finished processing a page in the mapping returned in
+	 * this iomap.  At least for now this is only supported in the buffered
+	 * write path.
+	 */
+	void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
+			struct page *page, struct iomap *iomap);
 };
 
 /*
-- 
cgit v1.2.3


From e184fde6f3f5353040dd4b5637f823fc87436e55 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 19 Jun 2018 15:10:57 -0700
Subject: iomap: add private pointer to struct iomap

Add a private pointer to struct iomap to allow filesystems to pass data
from iomap_begin to iomap_end.  Will be used by gfs2 for passing on the
on-disk inode buffer head.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/linux/iomap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 45f43865b0f0..1f36523d448a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -57,6 +57,7 @@ struct iomap {
 	struct block_device	*bdev;	/* block device for I/O */
 	struct dax_device	*dax_dev; /* dax_dev for dax operations */
 	void			*inline_data;
+	void			*private; /* filesystem private */
 
 	/*
 	 * Called when finished processing a page in the mapping returned in
-- 
cgit v1.2.3


From 72b4daa241295440f98e80ae21294a67b27ca091 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:57 -0700
Subject: iomap: add an iomap-based readpage and readpages implementation

Simply use iomap_apply to iterate over the file and a submit a bio for
each non-uptodate but mapped region and zero everything else.  Note that
as-is this can not be used for file systems with a blocksize smaller than
the page size, but that support will be added later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 214 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/iomap.h |   3 +
 2 files changed, 216 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/iomap.c b/fs/iomap.c
index a1f71e64ea49..4f10c6b1cf6d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2010 Red Hat, Inc.
- * Copyright (c) 2016 Christoph Hellwig.
+ * Copyright (c) 2016-2018 Christoph Hellwig.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
 #include <linux/mm.h>
+#include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
@@ -123,6 +124,217 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
 	SetPageUptodate(page);
 }
 
+static void
+iomap_read_end_io(struct bio *bio)
+{
+	int error = blk_status_to_errno(bio->bi_status);
+	struct bio_vec *bvec;
+	int i;
+
+	bio_for_each_segment_all(bvec, bio, i)
+		page_endio(bvec->bv_page, false, error);
+	bio_put(bio);
+}
+
+struct iomap_readpage_ctx {
+	struct page		*cur_page;
+	bool			cur_page_in_bio;
+	bool			is_readahead;
+	struct bio		*bio;
+	struct list_head	*pages;
+};
+
+static loff_t
+iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
+		struct iomap *iomap)
+{
+	struct iomap_readpage_ctx *ctx = data;
+	struct page *page = ctx->cur_page;
+	unsigned poff = pos & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	bool is_contig = false;
+	sector_t sector;
+
+	/* we don't support blocksize < PAGE_SIZE quite yet. */
+	WARN_ON_ONCE(pos != page_offset(page));
+	WARN_ON_ONCE(plen != PAGE_SIZE);
+
+	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
+		zero_user(page, poff, plen);
+		SetPageUptodate(page);
+		goto done;
+	}
+
+	ctx->cur_page_in_bio = true;
+
+	/*
+	 * Try to merge into a previous segment if we can.
+	 */
+	sector = iomap_sector(iomap, pos);
+	if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
+		if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+			goto done;
+		is_contig = true;
+	}
+
+	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
+		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+		if (ctx->bio)
+			submit_bio(ctx->bio);
+
+		if (ctx->is_readahead) /* same as readahead_gfp_mask */
+			gfp |= __GFP_NORETRY | __GFP_NOWARN;
+		ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+		ctx->bio->bi_opf = REQ_OP_READ;
+		if (ctx->is_readahead)
+			ctx->bio->bi_opf |= REQ_RAHEAD;
+		ctx->bio->bi_iter.bi_sector = sector;
+		bio_set_dev(ctx->bio, iomap->bdev);
+		ctx->bio->bi_end_io = iomap_read_end_io;
+	}
+
+	__bio_add_page(ctx->bio, page, plen, poff);
+done:
+	return plen;
+}
+
+int
+iomap_readpage(struct page *page, const struct iomap_ops *ops)
+{
+	struct iomap_readpage_ctx ctx = { .cur_page = page };
+	struct inode *inode = page->mapping->host;
+	unsigned poff;
+	loff_t ret;
+
+	WARN_ON_ONCE(page_has_buffers(page));
+
+	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
+		ret = iomap_apply(inode, page_offset(page) + poff,
+				PAGE_SIZE - poff, 0, ops, &ctx,
+				iomap_readpage_actor);
+		if (ret <= 0) {
+			WARN_ON_ONCE(ret == 0);
+			SetPageError(page);
+			break;
+		}
+	}
+
+	if (ctx.bio) {
+		submit_bio(ctx.bio);
+		WARN_ON_ONCE(!ctx.cur_page_in_bio);
+	} else {
+		WARN_ON_ONCE(ctx.cur_page_in_bio);
+		unlock_page(page);
+	}
+
+	/*
+	 * Just like mpage_readpages and block_read_full_page we always
+	 * return 0 and just mark the page as PageError on errors.  This
+	 * should be cleaned up all through the stack eventually.
+	 */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_readpage);
+
+static struct page *
+iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
+		loff_t length, loff_t *done)
+{
+	while (!list_empty(pages)) {
+		struct page *page = lru_to_page(pages);
+
+		if (page_offset(page) >= (u64)pos + length)
+			break;
+
+		list_del(&page->lru);
+		if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
+				GFP_NOFS))
+			return page;
+
+		/*
+		 * If we already have a page in the page cache at index we are
+		 * done.  Upper layers don't care if it is uptodate after the
+		 * readpages call itself as every page gets checked again once
+		 * actually needed.
+		 */
+		*done += PAGE_SIZE;
+		put_page(page);
+	}
+
+	return NULL;
+}
+
+static loff_t
+iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
+		void *data, struct iomap *iomap)
+{
+	struct iomap_readpage_ctx *ctx = data;
+	loff_t done, ret;
+
+	for (done = 0; done < length; done += ret) {
+		if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
+			if (!ctx->cur_page_in_bio)
+				unlock_page(ctx->cur_page);
+			put_page(ctx->cur_page);
+			ctx->cur_page = NULL;
+		}
+		if (!ctx->cur_page) {
+			ctx->cur_page = iomap_next_page(inode, ctx->pages,
+					pos, length, &done);
+			if (!ctx->cur_page)
+				break;
+			ctx->cur_page_in_bio = false;
+		}
+		ret = iomap_readpage_actor(inode, pos + done, length - done,
+				ctx, iomap);
+	}
+
+	return done;
+}
+
+int
+iomap_readpages(struct address_space *mapping, struct list_head *pages,
+		unsigned nr_pages, const struct iomap_ops *ops)
+{
+	struct iomap_readpage_ctx ctx = {
+		.pages		= pages,
+		.is_readahead	= true,
+	};
+	loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
+	loff_t last = page_offset(list_entry(pages->next, struct page, lru));
+	loff_t length = last - pos + PAGE_SIZE, ret = 0;
+
+	while (length > 0) {
+		ret = iomap_apply(mapping->host, pos, length, 0, ops,
+				&ctx, iomap_readpages_actor);
+		if (ret <= 0) {
+			WARN_ON_ONCE(ret == 0);
+			goto done;
+		}
+		pos += ret;
+		length -= ret;
+	}
+	ret = 0;
+done:
+	if (ctx.bio)
+		submit_bio(ctx.bio);
+	if (ctx.cur_page) {
+		if (!ctx.cur_page_in_bio)
+			unlock_page(ctx.cur_page);
+		put_page(ctx.cur_page);
+	}
+
+	/*
+	 * Check that we didn't lose a page due to the arcance calling
+	 * conventions..
+	 */
+	WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iomap_readpages);
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 1f36523d448a..30d314407f66 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -99,6 +99,9 @@ struct iomap_ops {
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
+int iomap_readpage(struct page *page, const struct iomap_ops *ops);
+int iomap_readpages(struct address_space *mapping, struct list_head *pages,
+		unsigned nr_pages, const struct iomap_ops *ops);
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From 83c2b54b9295a5fc0d9c8f1751aaf8099d1760f6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 12 Jun 2018 12:05:43 -0700
Subject: scsi: target: Abstract tag freeing

Introduce target_free_tag() and convert all drivers to use it.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_target.c        | 4 ++--
 drivers/target/iscsi/iscsi_target_util.c | 2 +-
 drivers/target/sbp/sbp_target.c          | 2 +-
 drivers/target/tcm_fc/tfc_cmd.c          | 4 ++--
 drivers/usb/gadget/function/f_tcm.c      | 2 +-
 drivers/vhost/scsi.c                     | 2 +-
 drivers/xen/xen-scsiback.c               | 4 +---
 include/target/target_core_base.h        | 5 +++++
 8 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 0fea2e2326be..11274317118a 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3783,7 +3783,7 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd)
 		return;
 	}
 	cmd->jiffies_at_free = get_jiffies_64();
-	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
+	target_free_tag(sess->se_sess, &cmd->se_cmd);
 }
 EXPORT_SYMBOL(qlt_free_cmd);
 
@@ -4146,7 +4146,7 @@ out_term:
 	qlt_send_term_exchange(qpair, NULL, &cmd->atio, 1, 0);
 
 	qlt_decr_num_pend_cmds(vha);
-	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
+	target_free_tag(sess->se_sess, &cmd->se_cmd);
 	spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
 	spin_lock_irqsave(&ha->tgt.sess_lock, flags);
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 4435bf374d2d..7e98697cfb8e 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -711,7 +711,7 @@ void iscsit_release_cmd(struct iscsi_cmd *cmd)
 	kfree(cmd->iov_data);
 	kfree(cmd->text_in_ptr);
 
-	percpu_ida_free(&sess->se_sess->sess_tag_pool, se_cmd->map_tag);
+	target_free_tag(sess->se_sess, se_cmd);
 }
 EXPORT_SYMBOL(iscsit_release_cmd);
 
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index fb1003921d85..679ae29d25ab 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -1460,7 +1460,7 @@ static void sbp_free_request(struct sbp_target_request *req)
 	kfree(req->pg_tbl);
 	kfree(req->cmd_buf);
 
-	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
+	target_free_tag(se_sess, se_cmd);
 }
 
 static void sbp_mgt_agent_process(struct work_struct *work)
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index ec372860106f..13e4efbe1ce7 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -92,7 +92,7 @@ static void ft_free_cmd(struct ft_cmd *cmd)
 	if (fr_seq(fp))
 		fc_seq_release(fr_seq(fp));
 	fc_frame_free(fp);
-	percpu_ida_free(&sess->se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
+	target_free_tag(sess->se_sess, &cmd->se_cmd);
 	ft_sess_put(sess);	/* undo get from lookup at recv */
 }
 
@@ -461,7 +461,7 @@ static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp)
 	cmd->sess = sess;
 	cmd->seq = fc_seq_assign(lport, fp);
 	if (!cmd->seq) {
-		percpu_ida_free(&se_sess->sess_tag_pool, tag);
+		target_free_tag(se_sess, &cmd->se_cmd);
 		goto busy;
 	}
 	cmd->req_frame = fp;		/* hold frame during cmd */
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index d78dbb73bde8..9f670d9224b9 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1288,7 +1288,7 @@ static void usbg_release_cmd(struct se_cmd *se_cmd)
 	struct se_session *se_sess = se_cmd->se_sess;
 
 	kfree(cmd->data_buf);
-	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
+	target_free_tag(se_sess, se_cmd);
 }
 
 static u32 usbg_sess_get_index(struct se_session *se_sess)
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 17fcd3b2e686..7aaf0e5512ed 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -324,7 +324,7 @@ static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
 	}
 
 	vhost_scsi_put_inflight(tv_cmd->inflight);
-	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
+	target_free_tag(se_sess, se_cmd);
 }
 
 static u32 vhost_scsi_sess_get_index(struct se_session *se_sess)
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 7bc88fd43cfc..ec6635258ed8 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -1377,9 +1377,7 @@ static int scsiback_check_stop_free(struct se_cmd *se_cmd)
 
 static void scsiback_release_cmd(struct se_cmd *se_cmd)
 {
-	struct se_session *se_sess = se_cmd->se_sess;
-
-	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
+	target_free_tag(se_cmd->se_sess, se_cmd);
 }
 
 static u32 scsiback_sess_get_index(struct se_session *se_sess)
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 922a39f45abc..260c2f3e9460 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -934,4 +934,9 @@ static inline void atomic_dec_mb(atomic_t *v)
 	smp_mb__after_atomic();
 }
 
+static inline void target_free_tag(struct se_session *sess, struct se_cmd *cmd)
+{
+	percpu_ida_free(&sess->sess_tag_pool, cmd->map_tag);
+}
+
 #endif /* TARGET_CORE_BASE_H */
-- 
cgit v1.2.3


From 10e9cbb6b531117be0c4a79f2c7fa9a45a0dd532 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 12 Jun 2018 12:05:44 -0700
Subject: scsi: target: Convert target drivers to use sbitmap

The sbitmap and the percpu_ida perform essentially the same task,
allocating tags for commands.  The sbitmap outperforms the percpu_ida as
documented here: https://lkml.org/lkml/2014/4/22/553

The sbitmap interface is a little harder to use, but being able to remove
the percpu_ida code and getting better performance justifies the additional
complexity.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>	# f_tcm
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_target.c        | 10 ++++++----
 drivers/target/iscsi/iscsi_target_util.c | 33 +++++++++++++++++++++++++++++---
 drivers/target/sbp/sbp_target.c          |  5 +++--
 drivers/target/target_core_transport.c   |  5 +++--
 drivers/target/tcm_fc/tfc_cmd.c          |  6 +++---
 drivers/usb/gadget/function/f_tcm.c      |  5 +++--
 drivers/vhost/scsi.c                     |  6 +++---
 drivers/xen/xen-scsiback.c               |  5 +++--
 include/target/iscsi/iscsi_target_core.h |  1 +
 include/target/target_core_base.h        |  7 ++++---
 10 files changed, 59 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 11274317118a..3de11153d1d3 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -4277,9 +4277,9 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
 {
 	struct se_session *se_sess = sess->se_sess;
 	struct qla_tgt_cmd *cmd;
-	int tag;
+	int tag, cpu;
 
-	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
 	if (tag < 0)
 		return NULL;
 
@@ -4292,6 +4292,7 @@ static struct qla_tgt_cmd *qlt_get_tag(scsi_qla_host_t *vha,
 	qlt_incr_num_pend_cmds(vha);
 	cmd->vha = vha;
 	cmd->se_cmd.map_tag = tag;
+	cmd->se_cmd.map_cpu = cpu;
 	cmd->sess = sess;
 	cmd->loop_id = sess->loop_id;
 	cmd->conf_compl_supported = sess->conf_compl_supported;
@@ -5294,7 +5295,7 @@ qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
 	struct fc_port *sess;
 	struct se_session *se_sess;
 	struct qla_tgt_cmd *cmd;
-	int tag;
+	int tag, cpu;
 	unsigned long flags;
 
 	if (unlikely(tgt->tgt_stop)) {
@@ -5326,7 +5327,7 @@ qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
 
 	se_sess = sess->se_sess;
 
-	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
 	if (tag < 0)
 		return;
 
@@ -5357,6 +5358,7 @@ qlt_alloc_qfull_cmd(struct scsi_qla_host *vha,
 	cmd->reset_count = ha->base_qpair->chip_reset;
 	cmd->q_full = 1;
 	cmd->qpair = ha->base_qpair;
+	cmd->se_cmd.map_cpu = cpu;
 
 	if (qfull) {
 		cmd->q_full = 1;
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 7e98697cfb8e..8cfcf9033507 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -17,7 +17,7 @@
  ******************************************************************************/
 
 #include <linux/list.h>
-#include <linux/percpu_ida.h>
+#include <linux/sched/signal.h>
 #include <net/ipv6.h>         /* ipv6_addr_equal() */
 #include <scsi/scsi_tcq.h>
 #include <scsi/iscsi_proto.h>
@@ -147,6 +147,30 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd)
 	spin_unlock_bh(&cmd->r2t_lock);
 }
 
+static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup)
+{
+	int tag = -1;
+	DEFINE_WAIT(wait);
+	struct sbq_wait_state *ws;
+
+	if (state == TASK_RUNNING)
+		return tag;
+
+	ws = &se_sess->sess_tag_pool.ws[0];
+	for (;;) {
+		prepare_to_wait_exclusive(&ws->wait, &wait, state);
+		if (signal_pending_state(state, current))
+			break;
+		tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup);
+		if (tag >= 0)
+			break;
+		schedule();
+	}
+
+	finish_wait(&ws->wait, &wait);
+	return tag;
+}
+
 /*
  * May be called from software interrupt (timer) context for allocating
  * iSCSI NopINs.
@@ -155,9 +179,11 @@ struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, int state)
 {
 	struct iscsi_cmd *cmd;
 	struct se_session *se_sess = conn->sess->se_sess;
-	int size, tag;
+	int size, tag, cpu;
 
-	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, state);
+	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
+	if (tag < 0)
+		tag = iscsit_wait_for_tag(se_sess, state, &cpu);
 	if (tag < 0)
 		return NULL;
 
@@ -166,6 +192,7 @@ struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, int state)
 	memset(cmd, 0, size);
 
 	cmd->se_cmd.map_tag = tag;
+	cmd->se_cmd.map_cpu = cpu;
 	cmd->conn = conn;
 	cmd->data_direction = DMA_NONE;
 	INIT_LIST_HEAD(&cmd->i_conn_node);
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 679ae29d25ab..42b21f2ac8b0 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -926,15 +926,16 @@ static struct sbp_target_request *sbp_mgt_get_req(struct sbp_session *sess,
 {
 	struct se_session *se_sess = sess->se_sess;
 	struct sbp_target_request *req;
-	int tag;
+	int tag, cpu;
 
-	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
 	if (tag < 0)
 		return ERR_PTR(-ENOMEM);
 
 	req = &((struct sbp_target_request *)se_sess->sess_cmd_map)[tag];
 	memset(req, 0, sizeof(*req));
 	req->se_cmd.map_tag = tag;
+	req->se_cmd.map_cpu = cpu;
 	req->se_cmd.tag = next_orb;
 
 	return req;
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ee5081ba5313..89dd475d0a8f 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -260,7 +260,8 @@ int transport_alloc_session_tags(struct se_session *se_sess,
 		}
 	}
 
-	rc = percpu_ida_init(&se_sess->sess_tag_pool, tag_num);
+	rc = sbitmap_queue_init_node(&se_sess->sess_tag_pool, tag_num, -1,
+			false, GFP_KERNEL, NUMA_NO_NODE);
 	if (rc < 0) {
 		pr_err("Unable to init se_sess->sess_tag_pool,"
 			" tag_num: %u\n", tag_num);
@@ -547,7 +548,7 @@ void transport_free_session(struct se_session *se_sess)
 		target_put_nacl(se_nacl);
 	}
 	if (se_sess->sess_cmd_map) {
-		percpu_ida_destroy(&se_sess->sess_tag_pool);
+		sbitmap_queue_free(&se_sess->sess_tag_pool);
 		kvfree(se_sess->sess_cmd_map);
 	}
 	kmem_cache_free(se_sess_cache, se_sess);
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 13e4efbe1ce7..a183d4da7db2 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -28,7 +28,6 @@
 #include <linux/configfs.h>
 #include <linux/ctype.h>
 #include <linux/hash.h>
-#include <linux/percpu_ida.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/libfc.h>
@@ -448,9 +447,9 @@ static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp)
 	struct ft_cmd *cmd;
 	struct fc_lport *lport = sess->tport->lport;
 	struct se_session *se_sess = sess->se_sess;
-	int tag;
+	int tag, cpu;
 
-	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
 	if (tag < 0)
 		goto busy;
 
@@ -458,6 +457,7 @@ static void ft_recv_cmd(struct ft_sess *sess, struct fc_frame *fp)
 	memset(cmd, 0, sizeof(struct ft_cmd));
 
 	cmd->se_cmd.map_tag = tag;
+	cmd->se_cmd.map_cpu = cpu;
 	cmd->sess = sess;
 	cmd->seq = fc_seq_assign(lport, fp);
 	if (!cmd->seq) {
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 9f670d9224b9..5003e857dce7 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1071,15 +1071,16 @@ static struct usbg_cmd *usbg_get_cmd(struct f_uas *fu,
 {
 	struct se_session *se_sess = tv_nexus->tvn_se_sess;
 	struct usbg_cmd *cmd;
-	int tag;
+	int tag, cpu;
 
-	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
 	if (tag < 0)
 		return ERR_PTR(-ENOMEM);
 
 	cmd = &((struct usbg_cmd *)se_sess->sess_cmd_map)[tag];
 	memset(cmd, 0, sizeof(*cmd));
 	cmd->se_cmd.map_tag = tag;
+	cmd->se_cmd.map_cpu = cpu;
 	cmd->se_cmd.tag = cmd->tag = scsi_tag;
 	cmd->fu = fu;
 
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 7aaf0e5512ed..ebaf831285ea 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -46,7 +46,6 @@
 #include <linux/virtio_scsi.h>
 #include <linux/llist.h>
 #include <linux/bitmap.h>
-#include <linux/percpu_ida.h>
 
 #include "vhost.h"
 
@@ -567,7 +566,7 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
 	struct se_session *se_sess;
 	struct scatterlist *sg, *prot_sg;
 	struct page **pages;
-	int tag;
+	int tag, cpu;
 
 	tv_nexus = tpg->tpg_nexus;
 	if (!tv_nexus) {
@@ -576,7 +575,7 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
 	}
 	se_sess = tv_nexus->tvn_se_sess;
 
-	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
 	if (tag < 0) {
 		pr_err("Unable to obtain tag for vhost_scsi_cmd\n");
 		return ERR_PTR(-ENOMEM);
@@ -591,6 +590,7 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
 	cmd->tvc_prot_sgl = prot_sg;
 	cmd->tvc_upages = pages;
 	cmd->tvc_se_cmd.map_tag = tag;
+	cmd->tvc_se_cmd.map_cpu = cpu;
 	cmd->tvc_tag = scsi_tag;
 	cmd->tvc_lun = lun;
 	cmd->tvc_task_attr = task_attr;
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index ec6635258ed8..764dd9aa0131 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -654,9 +654,9 @@ static struct vscsibk_pend *scsiback_get_pend_req(struct vscsiif_back_ring *ring
 	struct scsiback_nexus *nexus = tpg->tpg_nexus;
 	struct se_session *se_sess = nexus->tvn_se_sess;
 	struct vscsibk_pend *req;
-	int tag, i;
+	int tag, cpu, i;
 
-	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
+	tag = sbitmap_queue_get(&se_sess->sess_tag_pool, &cpu);
 	if (tag < 0) {
 		pr_err("Unable to obtain tag for vscsiif_request\n");
 		return ERR_PTR(-ENOMEM);
@@ -665,6 +665,7 @@ static struct vscsibk_pend *scsiback_get_pend_req(struct vscsiif_back_ring *ring
 	req = &((struct vscsibk_pend *)se_sess->sess_cmd_map)[tag];
 	memset(req, 0, sizeof(*req));
 	req->se_cmd.map_tag = tag;
+	req->se_cmd.map_cpu = cpu;
 
 	for (i = 0; i < VSCSI_MAX_GRANTS; i++)
 		req->grant_handles[i] = SCSIBACK_INVALID_HANDLE;
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index cf5f3fff1f1a..f2e6abea8490 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -4,6 +4,7 @@
 
 #include <linux/dma-direction.h>     /* enum dma_data_direction */
 #include <linux/list.h>              /* struct list_head */
+#include <linux/sched.h>
 #include <linux/socket.h>            /* struct sockaddr_storage */
 #include <linux/types.h>             /* u8 */
 #include <scsi/iscsi_proto.h>        /* itt_t */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 260c2f3e9460..448f291125c2 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -4,7 +4,7 @@
 
 #include <linux/configfs.h>      /* struct config_group */
 #include <linux/dma-direction.h> /* enum dma_data_direction */
-#include <linux/percpu_ida.h>    /* struct percpu_ida */
+#include <linux/sbitmap.h>
 #include <linux/percpu-refcount.h>
 #include <linux/semaphore.h>     /* struct semaphore */
 #include <linux/completion.h>
@@ -455,6 +455,7 @@ struct se_cmd {
 	int			sam_task_attr;
 	/* Used for se_sess->sess_tag_pool */
 	unsigned int		map_tag;
+	int			map_cpu;
 	/* Transport protocol dependent state, see transport_state_table */
 	enum transport_state_table t_state;
 	/* See se_cmd_flags_table */
@@ -608,7 +609,7 @@ struct se_session {
 	struct list_head	sess_wait_list;
 	spinlock_t		sess_cmd_lock;
 	void			*sess_cmd_map;
-	struct percpu_ida	sess_tag_pool;
+	struct sbitmap_queue	sess_tag_pool;
 };
 
 struct se_device;
@@ -936,7 +937,7 @@ static inline void atomic_dec_mb(atomic_t *v)
 
 static inline void target_free_tag(struct se_session *sess, struct se_cmd *cmd)
 {
-	percpu_ida_free(&sess->sess_tag_pool, cmd->map_tag);
+	sbitmap_queue_clear(&sess->sess_tag_pool, cmd->map_tag, cmd->map_cpu);
 }
 
 #endif /* TARGET_CORE_BASE_H */
-- 
cgit v1.2.3


From 693ba15c9202fe0283404abe4066e1b986e284eb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 12 Jun 2018 12:05:45 -0700
Subject: scsi: Remove percpu_ida

With its one user gone, remove the library code.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/percpu_ida.h |  83 ----------
 lib/Makefile               |   2 +-
 lib/percpu_ida.c           | 370 ---------------------------------------------
 3 files changed, 1 insertion(+), 454 deletions(-)
 delete mode 100644 include/linux/percpu_ida.h
 delete mode 100644 lib/percpu_ida.c

(limited to 'include')

diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
deleted file mode 100644
index 07d78e4653bc..000000000000
--- a/include/linux/percpu_ida.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PERCPU_IDA_H__
-#define __PERCPU_IDA_H__
-
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/spinlock_types.h>
-#include <linux/wait.h>
-#include <linux/cpumask.h>
-
-struct percpu_ida_cpu;
-
-struct percpu_ida {
-	/*
-	 * number of tags available to be allocated, as passed to
-	 * percpu_ida_init()
-	 */
-	unsigned			nr_tags;
-	unsigned			percpu_max_size;
-	unsigned			percpu_batch_size;
-
-	struct percpu_ida_cpu __percpu	*tag_cpu;
-
-	/*
-	 * Bitmap of cpus that (may) have tags on their percpu freelists:
-	 * steal_tags() uses this to decide when to steal tags, and which cpus
-	 * to try stealing from.
-	 *
-	 * It's ok for a freelist to be empty when its bit is set - steal_tags()
-	 * will just keep looking - but the bitmap _must_ be set whenever a
-	 * percpu freelist does have tags.
-	 */
-	cpumask_t			cpus_have_tags;
-
-	struct {
-		spinlock_t		lock;
-		/*
-		 * When we go to steal tags from another cpu (see steal_tags()),
-		 * we want to pick a cpu at random. Cycling through them every
-		 * time we steal is a bit easier and more or less equivalent:
-		 */
-		unsigned		cpu_last_stolen;
-
-		/* For sleeping on allocation failure */
-		wait_queue_head_t	wait;
-
-		/*
-		 * Global freelist - it's a stack where nr_free points to the
-		 * top
-		 */
-		unsigned		nr_free;
-		unsigned		*freelist;
-	} ____cacheline_aligned_in_smp;
-};
-
-/*
- * Number of tags we move between the percpu freelist and the global freelist at
- * a time
- */
-#define IDA_DEFAULT_PCPU_BATCH_MOVE	32U
-/* Max size of percpu freelist, */
-#define IDA_DEFAULT_PCPU_SIZE	((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2)
-
-int percpu_ida_alloc(struct percpu_ida *pool, int state);
-void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
-
-void percpu_ida_destroy(struct percpu_ida *pool);
-int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
-	unsigned long max_size, unsigned long batch_size);
-static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
-{
-	return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE,
-		IDA_DEFAULT_PCPU_BATCH_MOVE);
-}
-
-typedef int (*percpu_ida_cb)(unsigned, void *);
-int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
-	void *data);
-
-unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu);
-#endif /* __PERCPU_IDA_H__ */
diff --git a/lib/Makefile b/lib/Makefile
index 956b320292fe..055420101965 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -40,7 +40,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
+	 percpu-refcount.o rhashtable.o reciprocal_div.o \
 	 once.o refcount.o usercopy.o errseq.o bucket_locks.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
 obj-y += string_helpers.o
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
deleted file mode 100644
index 9bbd9c5d375a..000000000000
--- a/lib/percpu_ida.c
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
- * Percpu IDA library
- *
- * Copyright (C) 2013 Datera, Inc. Kent Overstreet
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-#include <linux/bug.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/percpu.h>
-#include <linux/sched/signal.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/percpu_ida.h>
-
-struct percpu_ida_cpu {
-	/*
-	 * Even though this is percpu, we need a lock for tag stealing by remote
-	 * CPUs:
-	 */
-	spinlock_t			lock;
-
-	/* nr_free/freelist form a stack of free IDs */
-	unsigned			nr_free;
-	unsigned			freelist[];
-};
-
-static inline void move_tags(unsigned *dst, unsigned *dst_nr,
-			     unsigned *src, unsigned *src_nr,
-			     unsigned nr)
-{
-	*src_nr -= nr;
-	memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr);
-	*dst_nr += nr;
-}
-
-/*
- * Try to steal tags from a remote cpu's percpu freelist.
- *
- * We first check how many percpu freelists have tags
- *
- * Then we iterate through the cpus until we find some tags - we don't attempt
- * to find the "best" cpu to steal from, to keep cacheline bouncing to a
- * minimum.
- */
-static inline void steal_tags(struct percpu_ida *pool,
-			      struct percpu_ida_cpu *tags)
-{
-	unsigned cpus_have_tags, cpu = pool->cpu_last_stolen;
-	struct percpu_ida_cpu *remote;
-
-	for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
-	     cpus_have_tags; cpus_have_tags--) {
-		cpu = cpumask_next(cpu, &pool->cpus_have_tags);
-
-		if (cpu >= nr_cpu_ids) {
-			cpu = cpumask_first(&pool->cpus_have_tags);
-			if (cpu >= nr_cpu_ids)
-				BUG();
-		}
-
-		pool->cpu_last_stolen = cpu;
-		remote = per_cpu_ptr(pool->tag_cpu, cpu);
-
-		cpumask_clear_cpu(cpu, &pool->cpus_have_tags);
-
-		if (remote == tags)
-			continue;
-
-		spin_lock(&remote->lock);
-
-		if (remote->nr_free) {
-			memcpy(tags->freelist,
-			       remote->freelist,
-			       sizeof(unsigned) * remote->nr_free);
-
-			tags->nr_free = remote->nr_free;
-			remote->nr_free = 0;
-		}
-
-		spin_unlock(&remote->lock);
-
-		if (tags->nr_free)
-			break;
-	}
-}
-
-/*
- * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto
- * our percpu freelist:
- */
-static inline void alloc_global_tags(struct percpu_ida *pool,
-				     struct percpu_ida_cpu *tags)
-{
-	move_tags(tags->freelist, &tags->nr_free,
-		  pool->freelist, &pool->nr_free,
-		  min(pool->nr_free, pool->percpu_batch_size));
-}
-
-/**
- * percpu_ida_alloc - allocate a tag
- * @pool: pool to allocate from
- * @state: task state for prepare_to_wait
- *
- * Returns a tag - an integer in the range [0..nr_tags) (passed to
- * tag_pool_init()), or otherwise -ENOSPC on allocation failure.
- *
- * Safe to be called from interrupt context (assuming it isn't passed
- * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
- *
- * @gfp indicates whether or not to wait until a free id is available (it's not
- * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep
- * however long it takes until another thread frees an id (same semantics as a
- * mempool).
- *
- * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE.
- */
-int percpu_ida_alloc(struct percpu_ida *pool, int state)
-{
-	DEFINE_WAIT(wait);
-	struct percpu_ida_cpu *tags;
-	unsigned long flags;
-	int tag = -ENOSPC;
-
-	tags = raw_cpu_ptr(pool->tag_cpu);
-	spin_lock_irqsave(&tags->lock, flags);
-
-	/* Fastpath */
-	if (likely(tags->nr_free >= 0)) {
-		tag = tags->freelist[--tags->nr_free];
-		spin_unlock_irqrestore(&tags->lock, flags);
-		return tag;
-	}
-	spin_unlock_irqrestore(&tags->lock, flags);
-
-	while (1) {
-		spin_lock_irqsave(&pool->lock, flags);
-		tags = this_cpu_ptr(pool->tag_cpu);
-
-		/*
-		 * prepare_to_wait() must come before steal_tags(), in case
-		 * percpu_ida_free() on another cpu flips a bit in
-		 * cpus_have_tags
-		 *
-		 * global lock held and irqs disabled, don't need percpu lock
-		 */
-		if (state != TASK_RUNNING)
-			prepare_to_wait(&pool->wait, &wait, state);
-
-		if (!tags->nr_free)
-			alloc_global_tags(pool, tags);
-		if (!tags->nr_free)
-			steal_tags(pool, tags);
-
-		if (tags->nr_free) {
-			tag = tags->freelist[--tags->nr_free];
-			if (tags->nr_free)
-				cpumask_set_cpu(smp_processor_id(),
-						&pool->cpus_have_tags);
-		}
-
-		spin_unlock_irqrestore(&pool->lock, flags);
-
-		if (tag >= 0 || state == TASK_RUNNING)
-			break;
-
-		if (signal_pending_state(state, current)) {
-			tag = -ERESTARTSYS;
-			break;
-		}
-
-		schedule();
-	}
-	if (state != TASK_RUNNING)
-		finish_wait(&pool->wait, &wait);
-
-	return tag;
-}
-EXPORT_SYMBOL_GPL(percpu_ida_alloc);
-
-/**
- * percpu_ida_free - free a tag
- * @pool: pool @tag was allocated from
- * @tag: a tag previously allocated with percpu_ida_alloc()
- *
- * Safe to be called from interrupt context.
- */
-void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
-{
-	struct percpu_ida_cpu *tags;
-	unsigned long flags;
-	unsigned nr_free;
-
-	BUG_ON(tag >= pool->nr_tags);
-
-	tags = raw_cpu_ptr(pool->tag_cpu);
-
-	spin_lock_irqsave(&tags->lock, flags);
-	tags->freelist[tags->nr_free++] = tag;
-
-	nr_free = tags->nr_free;
-
-	if (nr_free == 1) {
-		cpumask_set_cpu(smp_processor_id(),
-				&pool->cpus_have_tags);
-		wake_up(&pool->wait);
-	}
-	spin_unlock_irqrestore(&tags->lock, flags);
-
-	if (nr_free == pool->percpu_max_size) {
-		spin_lock_irqsave(&pool->lock, flags);
-		spin_lock(&tags->lock);
-
-		if (tags->nr_free == pool->percpu_max_size) {
-			move_tags(pool->freelist, &pool->nr_free,
-				  tags->freelist, &tags->nr_free,
-				  pool->percpu_batch_size);
-
-			wake_up(&pool->wait);
-		}
-		spin_unlock(&tags->lock);
-		spin_unlock_irqrestore(&pool->lock, flags);
-	}
-}
-EXPORT_SYMBOL_GPL(percpu_ida_free);
-
-/**
- * percpu_ida_destroy - release a tag pool's resources
- * @pool: pool to free
- *
- * Frees the resources allocated by percpu_ida_init().
- */
-void percpu_ida_destroy(struct percpu_ida *pool)
-{
-	free_percpu(pool->tag_cpu);
-	free_pages((unsigned long) pool->freelist,
-		   get_order(pool->nr_tags * sizeof(unsigned)));
-}
-EXPORT_SYMBOL_GPL(percpu_ida_destroy);
-
-/**
- * percpu_ida_init - initialize a percpu tag pool
- * @pool: pool to initialize
- * @nr_tags: number of tags that will be available for allocation
- *
- * Initializes @pool so that it can be used to allocate tags - integers in the
- * range [0, nr_tags). Typically, they'll be used by driver code to refer to a
- * preallocated array of tag structures.
- *
- * Allocation is percpu, but sharding is limited by nr_tags - for best
- * performance, the workload should not span more cpus than nr_tags / 128.
- */
-int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
-	unsigned long max_size, unsigned long batch_size)
-{
-	unsigned i, cpu, order;
-
-	memset(pool, 0, sizeof(*pool));
-
-	init_waitqueue_head(&pool->wait);
-	spin_lock_init(&pool->lock);
-	pool->nr_tags = nr_tags;
-	pool->percpu_max_size = max_size;
-	pool->percpu_batch_size = batch_size;
-
-	/* Guard against overflow */
-	if (nr_tags > (unsigned) INT_MAX + 1) {
-		pr_err("percpu_ida_init(): nr_tags too large\n");
-		return -EINVAL;
-	}
-
-	order = get_order(nr_tags * sizeof(unsigned));
-	pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order);
-	if (!pool->freelist)
-		return -ENOMEM;
-
-	for (i = 0; i < nr_tags; i++)
-		pool->freelist[i] = i;
-
-	pool->nr_free = nr_tags;
-
-	pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
-				       pool->percpu_max_size * sizeof(unsigned),
-				       sizeof(unsigned));
-	if (!pool->tag_cpu)
-		goto err;
-
-	for_each_possible_cpu(cpu)
-		spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock);
-
-	return 0;
-err:
-	percpu_ida_destroy(pool);
-	return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(__percpu_ida_init);
-
-/**
- * percpu_ida_for_each_free - iterate free ids of a pool
- * @pool: pool to iterate
- * @fn: interate callback function
- * @data: parameter for @fn
- *
- * Note, this doesn't guarantee to iterate all free ids restrictly. Some free
- * ids might be missed, some might be iterated duplicated, and some might
- * be iterated and not free soon.
- */
-int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
-	void *data)
-{
-	unsigned long flags;
-	struct percpu_ida_cpu *remote;
-	unsigned cpu, i, err = 0;
-
-	for_each_possible_cpu(cpu) {
-		remote = per_cpu_ptr(pool->tag_cpu, cpu);
-		spin_lock_irqsave(&remote->lock, flags);
-		for (i = 0; i < remote->nr_free; i++) {
-			err = fn(remote->freelist[i], data);
-			if (err)
-				break;
-		}
-		spin_unlock_irqrestore(&remote->lock, flags);
-		if (err)
-			goto out;
-	}
-
-	spin_lock_irqsave(&pool->lock, flags);
-	for (i = 0; i < pool->nr_free; i++) {
-		err = fn(pool->freelist[i], data);
-		if (err)
-			break;
-	}
-	spin_unlock_irqrestore(&pool->lock, flags);
-out:
-	return err;
-}
-EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
-
-/**
- * percpu_ida_free_tags - return free tags number of a specific cpu or global pool
- * @pool: pool related
- * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids
- *
- * Note: this just returns a snapshot of free tags number.
- */
-unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu)
-{
-	struct percpu_ida_cpu *remote;
-	if (cpu == nr_cpu_ids)
-		return pool->nr_free;
-	remote = per_cpu_ptr(pool->tag_cpu, cpu);
-	return remote->nr_free;
-}
-EXPORT_SYMBOL_GPL(percpu_ida_free_tags);
-- 
cgit v1.2.3


From 2fa4a32613c9182b00e46872755b0662374424a7 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 10 May 2018 11:05:16 +0800
Subject: scsi: libsas: dynamically allocate and free ata host

Commit 2623c7a5f2 ("libata: add refcounting to ata_host") v4.17+ introduced
refcounting to ata_host and will increase or decrease the refcount when
adding or deleting transport ATA port.

Now the ata host for libsas is embedded in domain_device, and the ->kref
member is not initialized. Afer we add ata transport class, ata_host_get()
will be called when adding transport ATA port and a warning will be
triggered as below:

refcount_t: increment on 0; use-after-free.
WARNING: CPU: 2 PID: 103 at
lib/refcount.c:153 refcount_inc+0x40/0x48 ......  Call trace:
 refcount_inc+0x40/0x48
 ata_host_get+0x10/0x18
 ata_tport_add+0x40/0x120
 ata_sas_tport_add+0xc/0x14
 sas_ata_init+0x7c/0xc8
 sas_discover_domain+0x380/0x53c
 process_one_work+0x12c/0x288
 worker_thread+0x58/0x3f0
 kthread+0xfc/0x128
 ret_from_fork+0x10/0x18

And also when removing transport ATA port ata_host_put() will be called and
another similar warning will be triggered. If the refcount decreased to
zero, the ata host will be freed. But this ata host is only part of
domain_device, it cannot be freed directly.

So we have to change this embedded static ata host to a dynamically
allocated ata host and initialize the ->kref member. To use ata_host_get()
and ata_host_put() in libsas, we need to move the declaration of these
functions to the public libata.h and export them.

Fixes: b6240a4df018 ("scsi: libsas: add transport class for ATA devices")
Signed-off-by: Jason Yan <yanaijie@huawei.com>
CC: John Garry <john.garry@huawei.com>
CC: Taras Kondratiuk <takondra@cisco.com>
CC: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-core.c          |  3 +++
 drivers/ata/libata.h               |  2 --
 drivers/scsi/libsas/sas_ata.c      | 40 +++++++++++++++++++++++++-------------
 drivers/scsi/libsas/sas_discover.c |  2 ++
 include/linux/libata.h             |  2 ++
 include/scsi/libsas.h              |  2 +-
 6 files changed, 34 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 27d15ed7fa3d..89cb4872a09c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6421,6 +6421,7 @@ void ata_host_init(struct ata_host *host, struct device *dev,
 	host->n_tags = ATA_MAX_QUEUE;
 	host->dev = dev;
 	host->ops = ops;
+	kref_init(&host->kref);
 }
 
 void __ata_port_probe(struct ata_port *ap)
@@ -7388,3 +7389,5 @@ EXPORT_SYMBOL_GPL(ata_cable_80wire);
 EXPORT_SYMBOL_GPL(ata_cable_unknown);
 EXPORT_SYMBOL_GPL(ata_cable_ignore);
 EXPORT_SYMBOL_GPL(ata_cable_sata);
+EXPORT_SYMBOL_GPL(ata_host_get);
+EXPORT_SYMBOL_GPL(ata_host_put);
\ No newline at end of file
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 9e21c49cf6be..f953cb4bb1ba 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -100,8 +100,6 @@ extern int ata_port_probe(struct ata_port *ap);
 extern void __ata_port_probe(struct ata_port *ap);
 extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
 				      u8 page, void *buf, unsigned int sectors);
-extern void ata_host_get(struct ata_host *host);
-extern void ata_host_put(struct ata_host *host);
 
 #define to_ata_port(d) container_of(d, struct ata_port, tdev)
 
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 2ac7395112b4..64a958a99f6a 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -552,34 +552,46 @@ int sas_ata_init(struct domain_device *found_dev)
 {
 	struct sas_ha_struct *ha = found_dev->port->ha;
 	struct Scsi_Host *shost = ha->core.shost;
+	struct ata_host *ata_host;
 	struct ata_port *ap;
 	int rc;
 
-	ata_host_init(&found_dev->sata_dev.ata_host, ha->dev, &sas_sata_ops);
-	ap = ata_sas_port_alloc(&found_dev->sata_dev.ata_host,
-				&sata_port_info,
-				shost);
+	ata_host = kzalloc(sizeof(*ata_host), GFP_KERNEL);
+	if (!ata_host)	{
+		SAS_DPRINTK("ata host alloc failed.\n");
+		return -ENOMEM;
+	}
+
+	ata_host_init(ata_host, ha->dev, &sas_sata_ops);
+
+	ap = ata_sas_port_alloc(ata_host, &sata_port_info, shost);
 	if (!ap) {
 		SAS_DPRINTK("ata_sas_port_alloc failed.\n");
-		return -ENODEV;
+		rc = -ENODEV;
+		goto free_host;
 	}
 
 	ap->private_data = found_dev;
 	ap->cbl = ATA_CBL_SATA;
 	ap->scsi_host = shost;
 	rc = ata_sas_port_init(ap);
-	if (rc) {
-		ata_sas_port_destroy(ap);
-		return rc;
-	}
-	rc = ata_sas_tport_add(found_dev->sata_dev.ata_host.dev, ap);
-	if (rc) {
-		ata_sas_port_destroy(ap);
-		return rc;
-	}
+	if (rc)
+		goto destroy_port;
+
+	rc = ata_sas_tport_add(ata_host->dev, ap);
+	if (rc)
+		goto destroy_port;
+
+	found_dev->sata_dev.ata_host = ata_host;
 	found_dev->sata_dev.ap = ap;
 
 	return 0;
+
+destroy_port:
+	ata_sas_port_destroy(ap);
+free_host:
+	ata_host_put(ata_host);
+	return rc;
 }
 
 void sas_ata_task_abort(struct sas_task *task)
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 1ffca28fe6a8..0148ae62a52a 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -316,6 +316,8 @@ void sas_free_device(struct kref *kref)
 	if (dev_is_sata(dev) && dev->sata_dev.ap) {
 		ata_sas_tport_delete(dev->sata_dev.ap);
 		ata_sas_port_destroy(dev->sata_dev.ap);
+		ata_host_put(dev->sata_dev.ata_host);
+		dev->sata_dev.ata_host = NULL;
 		dev->sata_dev.ap = NULL;
 	}
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8b8946dd63b9..33e9718397e2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1110,6 +1110,8 @@ extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports);
 extern struct ata_host *ata_host_alloc_pinfo(struct device *dev,
 			const struct ata_port_info * const * ppi, int n_ports);
 extern int ata_slave_link_init(struct ata_port *ap);
+extern void ata_host_get(struct ata_host *host);
+extern void ata_host_put(struct ata_host *host);
 extern int ata_host_start(struct ata_host *host);
 extern int ata_host_register(struct ata_host *host,
 			     struct scsi_host_template *sht);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 225ab7783dfd..3de3b10da19a 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -161,7 +161,7 @@ struct sata_device {
 	u8     port_no;        /* port number, if this is a PM (Port) */
 
 	struct ata_port *ap;
-	struct ata_host ata_host;
+	struct ata_host *ata_host;
 	struct smp_resp rps_resp ____cacheline_aligned; /* report_phy_sata_resp */
 	u8     fis[ATA_RESP_FIS_SIZE];
 };
-- 
cgit v1.2.3


From 6519750210d9d3330e85d12e0128652edde448d2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 Jun 2018 10:34:50 +0200
Subject: sched/swait: Remove __prepare_to_swait

There is no public user of this API, remove it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: bigeasy@linutronix.de
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180612083909.157076812@infradead.org
---
 include/linux/swait.h | 1 -
 kernel/sched/swait.c  | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/swait.h b/include/linux/swait.h
index bf8cb0dee23c..d6a5e949e4ca 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -161,7 +161,6 @@ extern void swake_up(struct swait_queue_head *q);
 extern void swake_up_all(struct swait_queue_head *q);
 extern void swake_up_locked(struct swait_queue_head *q);
 
-extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
 extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
 extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
 
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index b6fb2c3b3ff7..e68bb1398b05 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -69,7 +69,7 @@ void swake_up_all(struct swait_queue_head *q)
 }
 EXPORT_SYMBOL(swake_up_all);
 
-void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
+static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
 {
 	wait->task = current;
 	if (list_empty(&wait->task_list))
-- 
cgit v1.2.3


From 0abf17bc7790dd0467ed0e38522242f23c5da1c4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 Jun 2018 10:34:51 +0200
Subject: sched/swait: Switch to full exclusive mode

Linus noted that swait basically implements exclusive mode -- because
swake_up() only wakes a single waiter. And because of that it should
take care to properly deal with the interruptible case.

In short, the problem is that swake_up() can race with a signal. In
this this case it is possible the swake_up() 'wakes' the waiter that
is already on the way out because it just got a signal and the wakeup
gets lost.

The normal wait code is very careful and avoids this situation, make
sure we do too.

Copy the exact exclusive semantics from wait.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: bigeasy@linutronix.de
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180612083909.209762413@infradead.org
---
 include/linux/swait.h | 11 ++++++-----
 kernel/sched/swait.c  | 22 +++++++++++++++++-----
 2 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/swait.h b/include/linux/swait.h
index d6a5e949e4ca..dd032061112d 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -38,8 +38,8 @@
  *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
  *    sleeper state.
  *
- *  - the exclusive mode; because this requires preserving the list order
- *    and this is hard.
+ *  - the !exclusive mode; because that leads to O(n) wakeups, everything is
+ *    exclusive.
  *
  *  - custom wake callback functions; because you cannot give any guarantees
  *    about random code. This also allows swait to be used in RT, such that
@@ -167,9 +167,10 @@ extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queu
 extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 
-/* as per ___wait_event() but for swait, therefore "exclusive == 0" */
+/* as per ___wait_event() but for swait, therefore "exclusive == 1" */
 #define ___swait_event(wq, condition, state, ret, cmd)			\
 ({									\
+	__label__ __out;						\
 	struct swait_queue __wait;					\
 	long __ret = ret;						\
 									\
@@ -182,13 +183,13 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 									\
 		if (___wait_is_interruptible(state) && __int) {		\
 			__ret = __int;					\
-			break;						\
+			goto __out;					\
 		}							\
 									\
 		cmd;							\
 	}								\
 	finish_swait(&wq, &__wait);					\
-	__ret;								\
+__out:	__ret;								\
 })
 
 #define __swait_event(wq, condition)					\
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index e68bb1398b05..66890de93ee5 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -73,7 +73,7 @@ static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *w
 {
 	wait->task = current;
 	if (list_empty(&wait->task_list))
-		list_add(&wait->task_list, &q->task_list);
+		list_add_tail(&wait->task_list, &q->task_list);
 }
 
 void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
@@ -89,12 +89,24 @@ EXPORT_SYMBOL(prepare_to_swait);
 
 long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
 {
-	if (signal_pending_state(state, current))
-		return -ERESTARTSYS;
+	unsigned long flags;
+	long ret = 0;
 
-	prepare_to_swait(q, wait, state);
+	raw_spin_lock_irqsave(&q->lock, flags);
+	if (unlikely(signal_pending_state(state, current))) {
+		/*
+		 * See prepare_to_wait_event(). TL;DR, subsequent swake_up()
+		 * must not see us.
+		 */
+		list_del_init(&wait->task_list);
+		ret = -ERESTARTSYS;
+	} else {
+		__prepare_to_swait(q, wait);
+		set_current_state(state);
+	}
+	raw_spin_unlock_irqrestore(&q->lock, flags);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(prepare_to_swait_event);
 
-- 
cgit v1.2.3


From b3dae109fa89d67334bf3349babab3ad9b6f233f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 Jun 2018 10:34:52 +0200
Subject: sched/swait: Rename to exclusive

Since swait basically implemented exclusive waits only, make sure
the API reflects that.

  $ git grep -l -e "\<swake_up\>"
		-e "\<swait_event[^ (]*"
		-e "\<prepare_to_swait\>" | while read file;
    do
	sed -i -e 's/\<swake_up\>/&_one/g'
	       -e 's/\<swait_event[^ (]*/&_exclusive/g'
	       -e 's/\<prepare_to_swait\>/&_exclusive/g' $file;
    done

With a few manual touch-ups.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: bigeasy@linutronix.de
Cc: oleg@redhat.com
Cc: paulmck@linux.vnet.ibm.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180612083909.261946548@infradead.org
---
 arch/mips/kvm/mips.c         |  4 ++--
 arch/powerpc/kvm/book3s_hv.c |  6 +++---
 arch/s390/kvm/interrupt.c    |  2 +-
 arch/x86/kernel/kvm.c        |  4 ++--
 arch/x86/kvm/lapic.c         |  2 +-
 include/linux/swait.h        | 24 ++++++++++++------------
 kernel/power/suspend.c       |  4 ++--
 kernel/rcu/srcutiny.c        |  4 ++--
 kernel/rcu/tree.c            |  8 ++++----
 kernel/rcu/tree_exp.h        |  4 ++--
 kernel/rcu/tree_plugin.h     | 12 ++++++------
 kernel/sched/swait.c         | 10 +++++-----
 virt/kvm/arm/arm.c           |  4 ++--
 virt/kvm/arm/psci.c          |  2 +-
 virt/kvm/async_pf.c          |  2 +-
 virt/kvm/kvm_main.c          |  4 ++--
 16 files changed, 48 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 7cd76f93a438..f7ea8e21656b 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -515,7 +515,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 	dvcpu->arch.wait = 0;
 
 	if (swq_has_sleeper(&dvcpu->wq))
-		swake_up(&dvcpu->wq);
+		swake_up_one(&dvcpu->wq);
 
 	return 0;
 }
@@ -1204,7 +1204,7 @@ static void kvm_mips_comparecount_func(unsigned long data)
 
 	vcpu->arch.wait = 0;
 	if (swq_has_sleeper(&vcpu->wq))
-		swake_up(&vcpu->wq);
+		swake_up_one(&vcpu->wq);
 }
 
 /* low level hrtimer wake routine */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de686b340f4a..ee4a8854985e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -216,7 +216,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
 	if (swq_has_sleeper(wqp)) {
-		swake_up(wqp);
+		swake_up_one(wqp);
 		++vcpu->stat.halt_wakeup;
 	}
 
@@ -3188,7 +3188,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 		}
 	}
 
-	prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
 
 	if (kvmppc_vcore_check_block(vc)) {
 		finish_swait(&vc->wq, &wait);
@@ -3311,7 +3311,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
-			swake_up(&vc->wq);
+			swake_up_one(&vc->wq);
 		}
 
 	}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index daa09f89ca2d..fcb55b02990e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1145,7 +1145,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 		 * yield-candidate.
 		 */
 		vcpu->preempted = true;
-		swake_up(&vcpu->wq);
+		swake_up_one(&vcpu->wq);
 		vcpu->stat.halt_wakeup++;
 	}
 	/*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5b2300b818af..a37bda38d205 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -154,7 +154,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
 
 	for (;;) {
 		if (!n.halted)
-			prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+			prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
 		if (hlist_unhashed(&n.link))
 			break;
 
@@ -188,7 +188,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
 	if (n->halted)
 		smp_send_reschedule(n->cpu);
 	else if (swq_has_sleeper(&n->wq))
-		swake_up(&n->wq);
+		swake_up_one(&n->wq);
 }
 
 static void apf_task_wake_all(void)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b5cd8465d44f..d536d457517b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1379,7 +1379,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
 	 * using swait_active() is safe.
 	 */
 	if (swait_active(q))
-		swake_up(q);
+		swake_up_one(q);
 
 	if (apic_lvtt_tscdeadline(apic))
 		ktimer->expired_tscdeadline = ktimer->tscdeadline;
diff --git a/include/linux/swait.h b/include/linux/swait.h
index dd032061112d..73e06e9986d4 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -16,7 +16,7 @@
  * wait-queues, but the semantics are actually completely different, and
  * every single user we have ever had has been buggy (or pointless).
  *
- * A "swake_up()" only wakes up _one_ waiter, which is not at all what
+ * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what
  * "wake_up()" does, and has led to problems. In other cases, it has
  * been fine, because there's only ever one waiter (kvm), but in that
  * case gthe whole "simple" wait-queue is just pointless to begin with,
@@ -115,7 +115,7 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name
  *      CPU0 - waker                    CPU1 - waiter
  *
  *                                      for (;;) {
- *      @cond = true;                     prepare_to_swait(&wq_head, &wait, state);
+ *      @cond = true;                     prepare_to_swait_exclusive(&wq_head, &wait, state);
  *      smp_mb();                         // smp_mb() from set_current_state()
  *      if (swait_active(wq_head))        if (@cond)
  *        wake_up(wq_head);                      break;
@@ -157,11 +157,11 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
 	return swait_active(wq);
 }
 
-extern void swake_up(struct swait_queue_head *q);
+extern void swake_up_one(struct swait_queue_head *q);
 extern void swake_up_all(struct swait_queue_head *q);
 extern void swake_up_locked(struct swait_queue_head *q);
 
-extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state);
+extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
 extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
 
 extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
@@ -196,7 +196,7 @@ __out:	__ret;								\
 	(void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0,	\
 			    schedule())
 
-#define swait_event(wq, condition)					\
+#define swait_event_exclusive(wq, condition)				\
 do {									\
 	if (condition)							\
 		break;							\
@@ -208,7 +208,7 @@ do {									\
 		      TASK_UNINTERRUPTIBLE, timeout,			\
 		      __ret = schedule_timeout(__ret))
 
-#define swait_event_timeout(wq, condition, timeout)			\
+#define swait_event_timeout_exclusive(wq, condition, timeout)		\
 ({									\
 	long __ret = timeout;						\
 	if (!___wait_cond_timeout(condition))				\
@@ -220,7 +220,7 @@ do {									\
 	___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0,		\
 		      schedule())
 
-#define swait_event_interruptible(wq, condition)			\
+#define swait_event_interruptible_exclusive(wq, condition)		\
 ({									\
 	int __ret = 0;							\
 	if (!(condition))						\
@@ -233,7 +233,7 @@ do {									\
 		      TASK_INTERRUPTIBLE, timeout,			\
 		      __ret = schedule_timeout(__ret))
 
-#define swait_event_interruptible_timeout(wq, condition, timeout)	\
+#define swait_event_interruptible_timeout_exclusive(wq, condition, timeout)\
 ({									\
 	long __ret = timeout;						\
 	if (!___wait_cond_timeout(condition))				\
@@ -246,7 +246,7 @@ do {									\
 	(void)___swait_event(wq, condition, TASK_IDLE, 0, schedule())
 
 /**
- * swait_event_idle - wait without system load contribution
+ * swait_event_idle_exclusive - wait without system load contribution
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
@@ -257,7 +257,7 @@ do {									\
  * condition and doesn't want to contribute to system load. Signals are
  * ignored.
  */
-#define swait_event_idle(wq, condition)					\
+#define swait_event_idle_exclusive(wq, condition)			\
 do {									\
 	if (condition)							\
 		break;							\
@@ -270,7 +270,7 @@ do {									\
 		       __ret = schedule_timeout(__ret))
 
 /**
- * swait_event_idle_timeout - wait up to timeout without load contribution
+ * swait_event_idle_timeout_exclusive - wait up to timeout without load contribution
  * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout at which we'll give up in jiffies
@@ -288,7 +288,7 @@ do {									\
  * or the remaining jiffies (at least 1) if the @condition evaluated
  * to %true before the @timeout elapsed.
  */
-#define swait_event_idle_timeout(wq, condition, timeout)		\
+#define swait_event_idle_timeout_exclusive(wq, condition, timeout)	\
 ({									\
 	long __ret = timeout;						\
 	if (!___wait_cond_timeout(condition))				\
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 87331565e505..70178f6ffdc4 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -92,7 +92,7 @@ static void s2idle_enter(void)
 	/* Push all the CPUs into the idle loop. */
 	wake_up_all_idle_cpus();
 	/* Make the current CPU wait so it can enter the idle loop too. */
-	swait_event(s2idle_wait_head,
+	swait_event_exclusive(s2idle_wait_head,
 		    s2idle_state == S2IDLE_STATE_WAKE);
 
 	cpuidle_pause();
@@ -160,7 +160,7 @@ void s2idle_wake(void)
 	raw_spin_lock_irqsave(&s2idle_lock, flags);
 	if (s2idle_state > S2IDLE_STATE_NONE) {
 		s2idle_state = S2IDLE_STATE_WAKE;
-		swake_up(&s2idle_wait_head);
+		swake_up_one(&s2idle_wait_head);
 	}
 	raw_spin_unlock_irqrestore(&s2idle_lock, flags);
 }
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 622792abe41a..04fc2ed71af8 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -110,7 +110,7 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx)
 
 	WRITE_ONCE(sp->srcu_lock_nesting[idx], newval);
 	if (!newval && READ_ONCE(sp->srcu_gp_waiting))
-		swake_up(&sp->srcu_wq);
+		swake_up_one(&sp->srcu_wq);
 }
 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
 
@@ -140,7 +140,7 @@ void srcu_drive_gp(struct work_struct *wp)
 	idx = sp->srcu_idx;
 	WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
 	WRITE_ONCE(sp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */
-	swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
+	swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
 	WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
 
 	/* Invoke the callbacks we removed above. */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index aa7cade1b9f3..91f888d3b23a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1727,7 +1727,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
 	    !READ_ONCE(rsp->gp_flags) ||
 	    !rsp->gp_kthread)
 		return;
-	swake_up(&rsp->gp_wq);
+	swake_up_one(&rsp->gp_wq);
 }
 
 /*
@@ -2002,7 +2002,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
 }
 
 /*
- * Helper function for swait_event_idle() wakeup at force-quiescent-state
+ * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
  * time.
  */
 static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
@@ -2144,7 +2144,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 					       READ_ONCE(rsp->gpnum),
 					       TPS("reqwait"));
 			rsp->gp_state = RCU_GP_WAIT_GPS;
-			swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
+			swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
 						     RCU_GP_FLAG_INIT);
 			rsp->gp_state = RCU_GP_DONE_GPS;
 			/* Locking provides needed memory barrier. */
@@ -2176,7 +2176,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 					       READ_ONCE(rsp->gpnum),
 					       TPS("fqswait"));
 			rsp->gp_state = RCU_GP_WAIT_FQS;
-			ret = swait_event_idle_timeout(rsp->gp_wq,
+			ret = swait_event_idle_timeout_exclusive(rsp->gp_wq,
 					rcu_gp_fqs_check_wake(rsp, &gf), j);
 			rsp->gp_state = RCU_GP_DOING_FQS;
 			/* Locking provides needed memory barriers. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index d40708e8c5d6..d428cc1064c8 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -212,7 +212,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 			if (wake) {
 				smp_mb(); /* EGP done before wake_up(). */
-				swake_up(&rsp->expedited_wq);
+				swake_up_one(&rsp->expedited_wq);
 			}
 			break;
 		}
@@ -518,7 +518,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
 	jiffies_start = jiffies;
 
 	for (;;) {
-		ret = swait_event_timeout(
+		ret = swait_event_timeout_exclusive(
 				rsp->expedited_wq,
 				sync_rcu_preempt_exp_done_unlocked(rnp_root),
 				jiffies_stall);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7fd12039e512..ad53d133f709 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1854,8 +1854,8 @@ static void __wake_nocb_leader(struct rcu_data *rdp, bool force,
 		WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
 		del_timer(&rdp->nocb_timer);
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-		smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
-		swake_up(&rdp_leader->nocb_wq);
+		smp_mb(); /* ->nocb_leader_sleep before swake_up_one(). */
+		swake_up_one(&rdp_leader->nocb_wq);
 	} else {
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
 	}
@@ -2082,7 +2082,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
 	 */
 	trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
 	for (;;) {
-		swait_event_interruptible(
+		swait_event_interruptible_exclusive(
 			rnp->nocb_gp_wq[c & 0x1],
 			(d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
 		if (likely(d))
@@ -2111,7 +2111,7 @@ wait_again:
 	/* Wait for callbacks to appear. */
 	if (!rcu_nocb_poll) {
 		trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep"));
-		swait_event_interruptible(my_rdp->nocb_wq,
+		swait_event_interruptible_exclusive(my_rdp->nocb_wq,
 				!READ_ONCE(my_rdp->nocb_leader_sleep));
 		raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
 		my_rdp->nocb_leader_sleep = true;
@@ -2176,7 +2176,7 @@ wait_again:
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
 		if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
 			/* List was empty, so wake up the follower.  */
-			swake_up(&rdp->nocb_wq);
+			swake_up_one(&rdp->nocb_wq);
 		}
 	}
 
@@ -2193,7 +2193,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
 {
 	for (;;) {
 		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep"));
-		swait_event_interruptible(rdp->nocb_wq,
+		swait_event_interruptible_exclusive(rdp->nocb_wq,
 					 READ_ONCE(rdp->nocb_follower_head));
 		if (smp_load_acquire(&rdp->nocb_follower_head)) {
 			/* ^^^ Ensure CB invocation follows _head test. */
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 66890de93ee5..66b59ac77c22 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -32,7 +32,7 @@ void swake_up_locked(struct swait_queue_head *q)
 }
 EXPORT_SYMBOL(swake_up_locked);
 
-void swake_up(struct swait_queue_head *q)
+void swake_up_one(struct swait_queue_head *q)
 {
 	unsigned long flags;
 
@@ -40,7 +40,7 @@ void swake_up(struct swait_queue_head *q)
 	swake_up_locked(q);
 	raw_spin_unlock_irqrestore(&q->lock, flags);
 }
-EXPORT_SYMBOL(swake_up);
+EXPORT_SYMBOL(swake_up_one);
 
 /*
  * Does not allow usage from IRQ disabled, since we must be able to
@@ -76,7 +76,7 @@ static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *w
 		list_add_tail(&wait->task_list, &q->task_list);
 }
 
-void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
+void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state)
 {
 	unsigned long flags;
 
@@ -85,7 +85,7 @@ void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int
 	set_current_state(state);
 	raw_spin_unlock_irqrestore(&q->lock, flags);
 }
-EXPORT_SYMBOL(prepare_to_swait);
+EXPORT_SYMBOL(prepare_to_swait_exclusive);
 
 long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
 {
@@ -95,7 +95,7 @@ long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait
 	raw_spin_lock_irqsave(&q->lock, flags);
 	if (unlikely(signal_pending_state(state, current))) {
 		/*
-		 * See prepare_to_wait_event(). TL;DR, subsequent swake_up()
+		 * See prepare_to_wait_event(). TL;DR, subsequent swake_up_one()
 		 * must not see us.
 		 */
 		list_del_init(&wait->task_list);
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 04e554cae3a2..108250e4d376 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -604,7 +604,7 @@ void kvm_arm_resume_guest(struct kvm *kvm)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		vcpu->arch.pause = false;
-		swake_up(kvm_arch_vcpu_wq(vcpu));
+		swake_up_one(kvm_arch_vcpu_wq(vcpu));
 	}
 }
 
@@ -612,7 +612,7 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 {
 	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
 
-	swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
+	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
 				       (!vcpu->arch.pause)));
 
 	if (vcpu->arch.power_off || vcpu->arch.pause) {
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index c95ab4c5a475..9b73d3ad918a 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -155,7 +155,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	smp_mb();		/* Make sure the above is visible */
 
 	wq = kvm_arch_vcpu_wq(vcpu);
-	swake_up(wq);
+	swake_up_one(wq);
 
 	return PSCI_RET_SUCCESS;
 }
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 57bcb27dcf30..23c2519c5b32 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -107,7 +107,7 @@ static void async_pf_execute(struct work_struct *work)
 	trace_kvm_async_pf_completed(addr, gva);
 
 	if (swq_has_sleeper(&vcpu->wq))
-		swake_up(&vcpu->wq);
+		swake_up_one(&vcpu->wq);
 
 	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ada21f47f22b..940a4aed5b2d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2167,7 +2167,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	kvm_arch_vcpu_blocking(vcpu);
 
 	for (;;) {
-		prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
 		if (kvm_vcpu_check_block(vcpu) < 0)
 			break;
@@ -2209,7 +2209,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 
 	wqp = kvm_arch_vcpu_wq(vcpu);
 	if (swq_has_sleeper(wqp)) {
-		swake_up(wqp);
+		swake_up_one(wqp);
 		++vcpu->stat.halt_wakeup;
 		return true;
 	}
-- 
cgit v1.2.3


From c03cea42149de56fbae2301d7123daaa2cfe80e2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 15:10:58 -0700
Subject: iomap: add initial support for writes without buffer heads

For now just limited to blocksize == PAGE_SIZE, where we can simply read
in the full page in write begin, and just set the whole page dirty after
copying data into it.  This code is enabled by default and XFS will now
be feed pages without buffer heads in ->writepage and ->writepages.

If a file system sets the IOMAP_F_BUFFER_HEAD flag on the iomap the old
path will still be used, this both helps the transition in XFS and
prepares for the gfs2 migration to the iomap infrastructure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 115 +++++++++++++++++++++++++++++++++++++++++++++++---
 fs/xfs/xfs_iomap.c    |   6 ++-
 include/linux/iomap.h |   2 +
 3 files changed, 114 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/iomap.c b/fs/iomap.c
index 4f10c6b1cf6d..2ebff76039b5 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -348,6 +348,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 		truncate_pagecache_range(inode, max(pos, i_size), pos + len);
 }
 
+static int
+iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
+		unsigned poff, unsigned plen, unsigned from, unsigned to,
+		struct iomap *iomap)
+{
+	struct bio_vec bvec;
+	struct bio bio;
+
+	if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
+		zero_user_segments(page, poff, from, to, poff + plen);
+		return 0;
+	}
+
+	bio_init(&bio, &bvec, 1);
+	bio.bi_opf = REQ_OP_READ;
+	bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
+	bio_set_dev(&bio, iomap->bdev);
+	__bio_add_page(&bio, page, plen, poff);
+	return submit_bio_wait(&bio);
+}
+
+static int
+__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+		struct page *page, struct iomap *iomap)
+{
+	loff_t block_size = i_blocksize(inode);
+	loff_t block_start = pos & ~(block_size - 1);
+	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
+	unsigned poff = block_start & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
+	unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
+
+	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+
+	if (PageUptodate(page))
+		return 0;
+	if (from <= poff && to >= poff + plen)
+		return 0;
+	return iomap_read_page_sync(inode, block_start, page,
+			poff, plen, from, to, iomap);
+}
+
 static int
 iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, struct iomap *iomap)
@@ -367,9 +409,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 
 	if (iomap->type == IOMAP_INLINE)
 		iomap_read_inline_data(inode, page, iomap);
-	else
+	else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
 		status = __block_write_begin_int(page, pos, len, NULL, iomap);
-
+	else
+		status = __iomap_write_begin(inode, pos, len, page, iomap);
 	if (unlikely(status)) {
 		unlock_page(page);
 		put_page(page);
@@ -382,6 +425,57 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
 	return status;
 }
 
+int
+iomap_set_page_dirty(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+	int newly_dirty;
+
+	if (unlikely(!mapping))
+		return !TestSetPageDirty(page);
+
+	/*
+	 * Lock out page->mem_cgroup migration to keep PageDirty
+	 * synchronized with per-memcg dirty page counters.
+	 */
+	lock_page_memcg(page);
+	newly_dirty = !TestSetPageDirty(page);
+	if (newly_dirty)
+		__set_page_dirty(page, mapping, 0);
+	unlock_page_memcg(page);
+
+	if (newly_dirty)
+		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+	return newly_dirty;
+}
+EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
+
+static int
+__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+		unsigned copied, struct page *page, struct iomap *iomap)
+{
+	flush_dcache_page(page);
+
+	/*
+	 * The blocks that were entirely written will now be uptodate, so we
+	 * don't have to worry about a readpage reading them and overwriting a
+	 * partial write.  However if we have encountered a short write and only
+	 * partially written into a block, it will not be marked uptodate, so a
+	 * readpage might come in and destroy our partial write.
+	 *
+	 * Do the simplest thing, and just treat any short write to a non
+	 * uptodate page as a zero-length write, and force the caller to redo
+	 * the whole thing.
+	 */
+	if (unlikely(copied < len && !PageUptodate(page))) {
+		copied = 0;
+	} else {
+		SetPageUptodate(page);
+		iomap_set_page_dirty(page);
+	}
+	return __generic_write_end(inode, pos, copied, page);
+}
+
 static int
 iomap_write_end_inline(struct inode *inode, struct page *page,
 		struct iomap *iomap, loff_t pos, unsigned copied)
@@ -408,9 +502,11 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 
 	if (iomap->type == IOMAP_INLINE) {
 		ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
-	} else {
+	} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
 		ret = generic_write_end(NULL, inode->i_mapping, pos, len,
 				copied, page, NULL);
+	} else {
+		ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
 	}
 
 	if (iomap->page_done)
@@ -703,11 +799,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 	struct page *page = data;
 	int ret;
 
-	ret = __block_write_begin_int(page, pos, length, NULL, iomap);
-	if (ret)
-		return ret;
+	if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
+		ret = __block_write_begin_int(page, pos, length, NULL, iomap);
+		if (ret)
+			return ret;
+		block_commit_write(page, 0, length);
+	} else {
+		WARN_ON_ONCE(!PageUptodate(page));
+		WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+	}
 
-	block_commit_write(page, 0, length);
 	return length;
 }
 
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 49f5492eed3b..8a3613d576af 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -626,7 +626,7 @@ retry:
 	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
 	 * them out if the write happens to fail.
 	 */
-	iomap->flags = IOMAP_F_NEW;
+	iomap->flags |= IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
 done:
 	if (isnullstartblock(got.br_startblock))
@@ -1019,6 +1019,8 @@ xfs_file_iomap_begin(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
+	iomap->flags |= IOMAP_F_BUFFER_HEAD;
+
 	if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
 			!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
 		/* Reserve delalloc blocks for regular writeback. */
@@ -1119,7 +1121,7 @@ xfs_file_iomap_begin(
 	if (error)
 		return error;
 
-	iomap->flags = IOMAP_F_NEW;
+	iomap->flags |= IOMAP_F_NEW;
 	trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
 
 out_finish:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 30d314407f66..5eb9ca8d7ce5 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -30,6 +30,7 @@ struct vm_fault;
  */
 #define IOMAP_F_NEW		0x01	/* blocks have been newly allocated */
 #define IOMAP_F_DIRTY		0x02	/* uncommitted metadata */
+#define IOMAP_F_BUFFER_HEAD	0x04	/* file system requires buffer heads */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
@@ -102,6 +103,7 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
 int iomap_readpages(struct address_space *mapping, struct list_head *pages,
 		unsigned nr_pages, const struct iomap_ops *ops);
+int iomap_set_page_dirty(struct page *page);
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From 17dbca119312b4e8173d4e25ff64262119fcef38 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:26 -0700
Subject: x86/speculation/l1tf: Add sysfs reporting for l1tf

L1TF core kernel workarounds are cheap and normally always enabled, However
they still should be reported in sysfs if the system is vulnerable or
mitigated. Add the necessary CPU feature/bug bits.

- Extend the existing checks for Meltdowns to determine if the system is
  vulnerable. All CPUs which are not vulnerable to Meltdown are also not
  vulnerable to L1TF

- Check for 32bit non PAE and emit a warning as there is no practical way
  for mitigation due to the limited physical address bits

- If the system has more than MAX_PA/2 physical memory the invert page
  workarounds don't protect the system against the L1TF attack anymore,
  because an inverted physical address will also point to valid
  memory. Print a warning in this case and report that the system is
  vulnerable.

Add a function which returns the PFN limit for the L1TF mitigation, which
will be used in follow up patches for sanity and range checks.

[ tglx: Renamed the CPU feature bit to L1TF_PTEINV ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/cpufeatures.h |  2 ++
 arch/x86/include/asm/processor.h   |  5 +++++
 arch/x86/kernel/cpu/bugs.c         | 40 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c       | 20 +++++++++++++++++++
 drivers/base/cpu.c                 |  8 ++++++++
 include/linux/cpu.h                |  2 ++
 6 files changed, 77 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5701f5cecd31..f41cf9df4a83 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -219,6 +219,7 @@
 #define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
 #define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+#define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -373,5 +374,6 @@
 #define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 #define X86_BUG_SPEC_STORE_BYPASS	X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF			X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index cfd29ee8c3da..7e3ac5eedcd6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -181,6 +181,11 @@ extern const struct seq_operations cpuinfo_op;
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
+static inline unsigned long l1tf_pfn_limit(void)
+{
+	return BIT(boot_cpu_data.x86_phys_bits - 1 - PAGE_SHIFT) - 1;
+}
+
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index cd0fda1fff6d..3ce9674e4887 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -27,9 +27,11 @@
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 #include <asm/intel-family.h>
+#include <asm/e820/api.h>
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
+static void __init l1tf_select_mitigation(void);
 
 /*
  * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
@@ -81,6 +83,8 @@ void __init check_bugs(void)
 	 */
 	ssb_select_mitigation();
 
+	l1tf_select_mitigation();
+
 #ifdef CONFIG_X86_32
 	/*
 	 * Check whether we are able to run this kernel safely on SMP.
@@ -205,6 +209,32 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
+static void __init l1tf_select_mitigation(void)
+{
+	u64 half_pa;
+
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return;
+
+#if CONFIG_PGTABLE_LEVELS == 2
+	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
+	return;
+#endif
+
+	/*
+	 * This is extremely unlikely to happen because almost all
+	 * systems have far more MAX_PA/2 than RAM can be fit into
+	 * DIMM slots.
+	 */
+	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
+	if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
+		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
+		return;
+	}
+
+	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
+}
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -678,6 +708,11 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_SPEC_STORE_BYPASS:
 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
 
+	case X86_BUG_L1TF:
+		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
+			return sprintf(buf, "Mitigation: Page Table Inversion\n");
+		break;
+
 	default:
 		break;
 	}
@@ -704,4 +739,9 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
 }
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0df7151cfef4..9baf684e2b55 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -984,6 +984,21 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
 	{}
 };
 
+static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
+	/* in addition to cpu_no_speculation */
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT1	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_SILVERMONT2	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_AIRMONT		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MERRIFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_MOOREFIELD	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GOLDMONT	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_DENVERTON	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_ATOM_GEMINI_LAKE	},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNL		},
+	{ X86_VENDOR_INTEL,	6,	INTEL_FAM6_XEON_PHI_KNM		},
+	{}
+};
+
 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = 0;
@@ -1010,6 +1025,11 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 		return;
 
 	setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+	if (x86_match_cpu(cpu_no_l1tf))
+		return;
+
+	setup_force_cpu_bug(X86_BUG_L1TF);
 }
 
 /*
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 30cc9c877ebb..eb9443d5bae1 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -540,16 +540,24 @@ ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
+static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
 	&dev_attr_spectre_v1.attr,
 	&dev_attr_spectre_v2.attr,
 	&dev_attr_spec_store_bypass.attr,
+	&dev_attr_l1tf.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a97a63eef59f..d3da5184aa1c 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -55,6 +55,8 @@ extern ssize_t cpu_show_spectre_v2(struct device *dev,
 				   struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_spec_store_bypass(struct device *dev,
 					  struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_l1tf(struct device *dev,
+			     struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
-- 
cgit v1.2.3


From 42e4089c7890725fcd329999252dc489b72f2921 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:27 -0700
Subject: x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE
 mappings

For L1TF PROT_NONE mappings are protected by inverting the PFN in the page
table entry. This sets the high bits in the CPU's address space, thus
making sure to point to not point an unmapped entry to valid cached memory.

Some server system BIOSes put the MMIO mappings high up in the physical
address space. If such an high mapping was mapped to unprivileged users
they could attack low memory by setting such a mapping to PROT_NONE. This
could happen through a special device driver which is not access
protected. Normal /dev/mem is of course access protected.

To avoid this forbid PROT_NONE mappings or mprotect for high MMIO mappings.

Valid page mappings are allowed because the system is then unsafe anyways.

It's not expected that users commonly use PROT_NONE on MMIO. But to
minimize any impact this is only enforced if the mapping actually refers to
a high MMIO address (defined as the MAX_PA-1 bit being set), and also skip
the check for root.

For mmaps this is straight forward and can be handled in vm_insert_pfn and
in remap_pfn_range().

For mprotect it's a bit trickier. At the point where the actual PTEs are
accessed a lot of state has been changed and it would be difficult to undo
on an error. Since this is a uncommon case use a separate early page talk
walk pass for MMIO PROT_NONE mappings that checks for this condition
early. For non MMIO and non PROT_NONE there are no changes.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/include/asm/pgtable.h |  8 +++++++
 arch/x86/mm/mmap.c             | 21 ++++++++++++++++++
 include/asm-generic/pgtable.h  | 12 +++++++++++
 mm/memory.c                    | 37 ++++++++++++++++++++++---------
 mm/mprotect.c                  | 49 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a9c89cb1a9c5..6a090a76fdca 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1338,6 +1338,14 @@ static inline bool pud_access_permitted(pud_t pud, bool write)
 	return __pte_access_permitted(pud_val(pud), write);
 }
 
+#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
+extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+	return boot_cpu_has_bug(X86_BUG_L1TF);
+}
+
 #include <asm-generic/pgtable.h>
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 48c591251600..f40ab8185d94 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -240,3 +240,24 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t count)
 
 	return phys_addr_valid(addr + count - 1);
 }
+
+/*
+ * Only allow root to set high MMIO mappings to PROT_NONE.
+ * This prevents an unpriv. user to set them to PROT_NONE and invert
+ * them, then pointing to valid memory for L1TF speculation.
+ *
+ * Note: for locked down kernels may want to disable the root override.
+ */
+bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+	if (!boot_cpu_has_bug(X86_BUG_L1TF))
+		return true;
+	if (!__pte_needs_invert(pgprot_val(prot)))
+		return true;
+	/* If it's real memory always allow */
+	if (pfn_valid(pfn))
+		return true;
+	if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
+		return false;
+	return true;
+}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f59639afaa39..0ecc1197084b 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1097,4 +1097,16 @@ static inline void init_espfix_bsp(void) { }
 #endif
 #endif
 
+#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
+static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+	return true;
+}
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+	return false;
+}
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..3ba81b44a542 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1886,6 +1886,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 	if (addr < vma->vm_start || addr >= vma->vm_end)
 		return -EFAULT;
 
+	if (!pfn_modify_allowed(pfn, pgprot))
+		return -EACCES;
+
 	track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
 
 	ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
@@ -1921,6 +1924,9 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 
 	track_pfn_insert(vma, &pgprot, pfn);
 
+	if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
+		return -EACCES;
+
 	/*
 	 * If we don't have pte special, then we have to use the pfn_valid()
 	 * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
@@ -1982,6 +1988,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 {
 	pte_t *pte;
 	spinlock_t *ptl;
+	int err = 0;
 
 	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
 	if (!pte)
@@ -1989,12 +1996,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	arch_enter_lazy_mmu_mode();
 	do {
 		BUG_ON(!pte_none(*pte));
+		if (!pfn_modify_allowed(pfn, prot)) {
+			err = -EACCES;
+			break;
+		}
 		set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
-	return 0;
+	return err;
 }
 
 static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -2003,6 +2014,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 {
 	pmd_t *pmd;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	pmd = pmd_alloc(mm, pud, addr);
@@ -2011,9 +2023,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 	VM_BUG_ON(pmd_trans_huge(*pmd));
 	do {
 		next = pmd_addr_end(addr, end);
-		if (remap_pte_range(mm, pmd, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pte_range(mm, pmd, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
@@ -2024,6 +2037,7 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
 {
 	pud_t *pud;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	pud = pud_alloc(mm, p4d, addr);
@@ -2031,9 +2045,10 @@ static inline int remap_pud_range(struct mm_struct *mm, p4d_t *p4d,
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (remap_pmd_range(mm, pud, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pmd_range(mm, pud, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
@@ -2044,6 +2059,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
 {
 	p4d_t *p4d;
 	unsigned long next;
+	int err;
 
 	pfn -= addr >> PAGE_SHIFT;
 	p4d = p4d_alloc(mm, pgd, addr);
@@ -2051,9 +2067,10 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
 		return -ENOMEM;
 	do {
 		next = p4d_addr_end(addr, end);
-		if (remap_pud_range(mm, p4d, addr, next,
-				pfn + (addr >> PAGE_SHIFT), prot))
-			return -ENOMEM;
+		err = remap_pud_range(mm, p4d, addr, next,
+				pfn + (addr >> PAGE_SHIFT), prot);
+		if (err)
+			return err;
 	} while (p4d++, addr = next, addr != end);
 	return 0;
 }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 625608bc8962..6d331620b9e5 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -306,6 +306,42 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 	return pages;
 }
 
+static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
+			       unsigned long next, struct mm_walk *walk)
+{
+	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+		0 : -EACCES;
+}
+
+static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
+				   unsigned long addr, unsigned long next,
+				   struct mm_walk *walk)
+{
+	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+		0 : -EACCES;
+}
+
+static int prot_none_test(unsigned long addr, unsigned long next,
+			  struct mm_walk *walk)
+{
+	return 0;
+}
+
+static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
+			   unsigned long end, unsigned long newflags)
+{
+	pgprot_t new_pgprot = vm_get_page_prot(newflags);
+	struct mm_walk prot_none_walk = {
+		.pte_entry = prot_none_pte_entry,
+		.hugetlb_entry = prot_none_hugetlb_entry,
+		.test_walk = prot_none_test,
+		.mm = current->mm,
+		.private = &new_pgprot,
+	};
+
+	return walk_page_range(start, end, &prot_none_walk);
+}
+
 int
 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	unsigned long start, unsigned long end, unsigned long newflags)
@@ -323,6 +359,19 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 		return 0;
 	}
 
+	/*
+	 * Do PROT_NONE PFN permission checks here when we can still
+	 * bail out without undoing a lot of state. This is a rather
+	 * uncommon case, so doesn't need to be very optimized.
+	 */
+	if (arch_has_pfn_modify_check() &&
+	    (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
+	    (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
+		error = prot_none_walk(vma, start, end, newflags);
+		if (error)
+			return error;
+	}
+
 	/*
 	 * If we make a private mapping writable we increase our commit;
 	 * but (without finer accounting) cannot reduce our commit if we
-- 
cgit v1.2.3


From 377eeaa8e11fe815b1d07c81c4a0e2843a8c15eb Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 13 Jun 2018 15:48:28 -0700
Subject: x86/speculation/l1tf: Limit swap file size to MAX_PA/2

For the L1TF workaround its necessary to limit the swap file size to below
MAX_PA/2, so that the higher bits of the swap offset inverted never point
to valid memory.

Add a mechanism for the architecture to override the swap file size check
in swapfile.c and add a x86 specific max swapfile check function that
enforces that limit.

The check is only enabled if the CPU is vulnerable to L1TF.

In VMs with 42bit MAX_PA the typical limit is 2TB now, on a native system
with 46bit PA it is 32TB. The limit is only per individual swap file, so
it's always possible to exceed these limits with multiple swap files or
partitions.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/mm/init.c       | 15 +++++++++++++++
 include/linux/swapfile.h |  2 ++
 mm/swapfile.c            | 46 ++++++++++++++++++++++++++++++----------------
 3 files changed, 47 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index cee58a972cb2..1b530197d114 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -4,6 +4,8 @@
 #include <linux/swap.h>
 #include <linux/memblock.h>
 #include <linux/bootmem.h>	/* for max_low_pfn */
+#include <linux/swapfile.h>
+#include <linux/swapops.h>
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
@@ -880,3 +882,16 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 	__cachemode2pte_tbl[cache] = __cm_idx2pte(entry);
 	__pte2cachemode_tbl[entry] = cache;
 }
+
+unsigned long max_swapfile_size(void)
+{
+	unsigned long pages;
+
+	pages = generic_max_swapfile_size();
+
+	if (boot_cpu_has_bug(X86_BUG_L1TF)) {
+		/* Limit the swap file size to MAX_PA/2 for L1TF workaround */
+		pages = min_t(unsigned long, l1tf_pfn_limit() + 1, pages);
+	}
+	return pages;
+}
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 06bd7b096167..e06febf62978 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -10,5 +10,7 @@ extern spinlock_t swap_lock;
 extern struct plist_head swap_active_head;
 extern struct swap_info_struct *swap_info[];
 extern int try_to_unuse(unsigned int, bool, unsigned long);
+extern unsigned long generic_max_swapfile_size(void);
+extern unsigned long max_swapfile_size(void);
 
 #endif /* _LINUX_SWAPFILE_H */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2cc2972eedaf..18185ae4f223 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2909,6 +2909,35 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 	return 0;
 }
 
+
+/*
+ * Find out how many pages are allowed for a single swap device. There
+ * are two limiting factors:
+ * 1) the number of bits for the swap offset in the swp_entry_t type, and
+ * 2) the number of bits in the swap pte, as defined by the different
+ * architectures.
+ *
+ * In order to find the largest possible bit mask, a swap entry with
+ * swap type 0 and swap offset ~0UL is created, encoded to a swap pte,
+ * decoded to a swp_entry_t again, and finally the swap offset is
+ * extracted.
+ *
+ * This will mask all the bits from the initial ~0UL mask that can't
+ * be encoded in either the swp_entry_t or the architecture definition
+ * of a swap pte.
+ */
+unsigned long generic_max_swapfile_size(void)
+{
+	return swp_offset(pte_to_swp_entry(
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+}
+
+/* Can be overridden by an architecture for additional checks. */
+__weak unsigned long max_swapfile_size(void)
+{
+	return generic_max_swapfile_size();
+}
+
 static unsigned long read_swap_header(struct swap_info_struct *p,
 					union swap_header *swap_header,
 					struct inode *inode)
@@ -2944,22 +2973,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	p->cluster_next = 1;
 	p->cluster_nr = 0;
 
-	/*
-	 * Find out how many pages are allowed for a single swap
-	 * device. There are two limiting factors: 1) the number
-	 * of bits for the swap offset in the swp_entry_t type, and
-	 * 2) the number of bits in the swap pte as defined by the
-	 * different architectures. In order to find the
-	 * largest possible bit mask, a swap entry with swap type 0
-	 * and swap offset ~0UL is created, encoded to a swap pte,
-	 * decoded to a swp_entry_t again, and finally the swap
-	 * offset is extracted. This will mask all the bits from
-	 * the initial ~0UL mask that can't be encoded in either
-	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.
-	 */
-	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
+	maxpages = max_swapfile_size();
 	last_page = swap_header->info.last_page;
 	if (!last_page) {
 		pr_warn("Empty swap-file\n");
-- 
cgit v1.2.3


From 5a6cf77f5e35e7af35d36a1e7dc21a42f6412e4f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 20 Jun 2018 01:05:07 +0900
Subject: kprobes: Remove jprobe API implementation

Remove functionally empty jprobe API implementations and test cases.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-arch@vger.kernel.org
Link: https://lore.kernel.org/lkml/152942430705.15209.2307050500995264322.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kprobes.h |  3 --
 kernel/kprobes.c        | 78 +---------------------------------------
 kernel/test_kprobes.c   | 94 -------------------------------------------------
 lib/Kconfig.debug       |  2 +-
 4 files changed, 2 insertions(+), 175 deletions(-)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 9440a2fc8893..b520baa65682 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -389,9 +389,6 @@ int register_kprobe(struct kprobe *p);
 void unregister_kprobe(struct kprobe *p);
 int register_kprobes(struct kprobe **kps, int num);
 void unregister_kprobes(struct kprobe **kps, int num);
-int setjmp_pre_handler(struct kprobe *, struct pt_regs *);
-int longjmp_break_handler(struct kprobe *, struct pt_regs *);
-void jprobe_return(void);
 unsigned long arch_deref_entry_point(void *);
 
 int register_kretprobe(struct kretprobe *rp);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ea619021d901..69de130595f7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1272,7 +1272,7 @@ NOKPROBE_SYMBOL(cleanup_rp_inst);
 
 /*
 * Add the new probe to ap->list. Fail if this is the
-* second jprobe at the address - two jprobes can't coexist
+* second break_handler at the address
 */
 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
@@ -1812,77 +1812,6 @@ unsigned long __weak arch_deref_entry_point(void *entry)
 	return (unsigned long)entry;
 }
 
-#if 0
-int register_jprobes(struct jprobe **jps, int num)
-{
-	int ret = 0, i;
-
-	if (num <= 0)
-		return -EINVAL;
-
-	for (i = 0; i < num; i++) {
-		ret = register_jprobe(jps[i]);
-
-		if (ret < 0) {
-			if (i > 0)
-				unregister_jprobes(jps, i);
-			break;
-		}
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(register_jprobes);
-
-int register_jprobe(struct jprobe *jp)
-{
-	unsigned long addr, offset;
-	struct kprobe *kp = &jp->kp;
-
-	/*
-	 * Verify probepoint as well as the jprobe handler are
-	 * valid function entry points.
-	 */
-	addr = arch_deref_entry_point(jp->entry);
-
-	if (kallsyms_lookup_size_offset(addr, NULL, &offset) && offset == 0 &&
-	    kprobe_on_func_entry(kp->addr, kp->symbol_name, kp->offset)) {
-		kp->pre_handler = setjmp_pre_handler;
-		kp->break_handler = longjmp_break_handler;
-		return register_kprobe(kp);
-	}
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(register_jprobe);
-
-void unregister_jprobe(struct jprobe *jp)
-{
-	unregister_jprobes(&jp, 1);
-}
-EXPORT_SYMBOL_GPL(unregister_jprobe);
-
-void unregister_jprobes(struct jprobe **jps, int num)
-{
-	int i;
-
-	if (num <= 0)
-		return;
-	mutex_lock(&kprobe_mutex);
-	for (i = 0; i < num; i++)
-		if (__unregister_kprobe_top(&jps[i]->kp) < 0)
-			jps[i]->kp.addr = NULL;
-	mutex_unlock(&kprobe_mutex);
-
-	synchronize_sched();
-	for (i = 0; i < num; i++) {
-		if (jps[i]->kp.addr)
-			__unregister_kprobe_bottom(&jps[i]->kp);
-	}
-}
-EXPORT_SYMBOL_GPL(unregister_jprobes);
-#endif
-
 #ifdef CONFIG_KRETPROBES
 /*
  * This kprobe pre_handler is registered with every kretprobe. When probe
@@ -2329,8 +2258,6 @@ static void report_probe(struct seq_file *pi, struct kprobe *p,
 
 	if (p->pre_handler == pre_handler_kretprobe)
 		kprobe_type = "r";
-	else if (p->pre_handler == setjmp_pre_handler)
-		kprobe_type = "j";
 	else
 		kprobe_type = "k";
 
@@ -2637,6 +2564,3 @@ late_initcall(debugfs_kprobe_init);
 #endif /* CONFIG_DEBUG_FS */
 
 module_init(init_kprobes);
-
-/* defined in arch/.../kernel/kprobes.c */
-EXPORT_SYMBOL_GPL(jprobe_return);
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index dd53e354f630..7bca480151b0 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -162,90 +162,6 @@ static int test_kprobes(void)
 
 }
 
-#if 0
-static u32 jph_val;
-
-static u32 j_kprobe_target(u32 value)
-{
-	if (preemptible()) {
-		handler_errors++;
-		pr_err("jprobe-handler is preemptible\n");
-	}
-	if (value != rand1) {
-		handler_errors++;
-		pr_err("incorrect value in jprobe handler\n");
-	}
-
-	jph_val = rand1;
-	jprobe_return();
-	return 0;
-}
-
-static struct jprobe jp = {
-	.entry		= j_kprobe_target,
-	.kp.symbol_name = "kprobe_target"
-};
-
-static int test_jprobe(void)
-{
-	int ret;
-
-	ret = register_jprobe(&jp);
-	if (ret < 0) {
-		pr_err("register_jprobe returned %d\n", ret);
-		return ret;
-	}
-
-	ret = target(rand1);
-	unregister_jprobe(&jp);
-	if (jph_val == 0) {
-		pr_err("jprobe handler not called\n");
-		handler_errors++;
-	}
-
-	return 0;
-}
-
-static struct jprobe jp2 = {
-	.entry          = j_kprobe_target,
-	.kp.symbol_name = "kprobe_target2"
-};
-
-static int test_jprobes(void)
-{
-	int ret;
-	struct jprobe *jps[2] = {&jp, &jp2};
-
-	/* addr and flags should be cleard for reusing kprobe. */
-	jp.kp.addr = NULL;
-	jp.kp.flags = 0;
-	ret = register_jprobes(jps, 2);
-	if (ret < 0) {
-		pr_err("register_jprobes returned %d\n", ret);
-		return ret;
-	}
-
-	jph_val = 0;
-	ret = target(rand1);
-	if (jph_val == 0) {
-		pr_err("jprobe handler not called\n");
-		handler_errors++;
-	}
-
-	jph_val = 0;
-	ret = target2(rand1);
-	if (jph_val == 0) {
-		pr_err("jprobe handler2 not called\n");
-		handler_errors++;
-	}
-	unregister_jprobes(jps, 2);
-
-	return 0;
-}
-#else
-#define test_jprobe() (0)
-#define test_jprobes() (0)
-#endif
 #ifdef CONFIG_KRETPROBES
 static u32 krph_val;
 
@@ -383,16 +299,6 @@ int init_test_probes(void)
 	if (ret < 0)
 		errors++;
 
-	num_tests++;
-	ret = test_jprobe();
-	if (ret < 0)
-		errors++;
-
-	num_tests++;
-	ret = test_jprobes();
-	if (ret < 0)
-		errors++;
-
 #ifdef CONFIG_KRETPROBES
 	num_tests++;
 	ret = test_kretprobe();
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8838d1158d19..0b066b3c9284 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1718,7 +1718,7 @@ config KPROBES_SANITY_TEST
 	default n
 	help
 	  This option provides for testing basic kprobes functionality on
-	  boot. A sample kprobe, jprobe and kretprobe are inserted and
+	  boot. Samples of kprobe and kretprobe are inserted and
 	  verified for functionality.
 
 	  Say N if you are unsure.
-- 
cgit v1.2.3


From 4de58696de076d9bd2745d1cbe0930635c3f5ac9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 20 Jun 2018 01:17:15 +0900
Subject: kprobes: Remove jprobe stub API

Remove jprobe stub APIs from linux/kprobes.h since
the jprobe implementation was completely gone.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-arch@vger.kernel.org
Link: https://lore.kernel.org/lkml/152942503572.15209.1652552217914694917.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kprobes.h | 50 -------------------------------------------------
 1 file changed, 50 deletions(-)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b520baa65682..e909413e4e38 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -63,7 +63,6 @@ struct pt_regs;
 struct kretprobe;
 struct kretprobe_instance;
 typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *);
-typedef int (*kprobe_break_handler_t) (struct kprobe *, struct pt_regs *);
 typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *,
 				       unsigned long flags);
 typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *,
@@ -101,12 +100,6 @@ struct kprobe {
 	 */
 	kprobe_fault_handler_t fault_handler;
 
-	/*
-	 * ... called if breakpoint trap occurs in probe handler.
-	 * Return 1 if it handled break, otherwise kernel will see it.
-	 */
-	kprobe_break_handler_t break_handler;
-
 	/* Saved opcode (which has been replaced with breakpoint) */
 	kprobe_opcode_t opcode;
 
@@ -154,24 +147,6 @@ static inline int kprobe_ftrace(struct kprobe *p)
 	return p->flags & KPROBE_FLAG_FTRACE;
 }
 
-/*
- * Special probe type that uses setjmp-longjmp type tricks to resume
- * execution at a specified entry with a matching prototype corresponding
- * to the probed function - a trick to enable arguments to become
- * accessible seamlessly by probe handling logic.
- * Note:
- * Because of the way compilers allocate stack space for local variables
- * etc upfront, regardless of sub-scopes within a function, this mirroring
- * principle currently works only for probes placed on function entry points.
- */
-struct jprobe {
-	struct kprobe kp;
-	void *entry;	/* probe handling code to jump to */
-};
-
-/* For backward compatibility with old code using JPROBE_ENTRY() */
-#define JPROBE_ENTRY(handler)	(handler)
-
 /*
  * Function-return probe -
  * Note:
@@ -436,9 +411,6 @@ static inline void unregister_kprobe(struct kprobe *p)
 static inline void unregister_kprobes(struct kprobe **kps, int num)
 {
 }
-static inline void jprobe_return(void)
-{
-}
 static inline int register_kretprobe(struct kretprobe *rp)
 {
 	return -ENOSYS;
@@ -465,20 +437,6 @@ static inline int enable_kprobe(struct kprobe *kp)
 	return -ENOSYS;
 }
 #endif /* CONFIG_KPROBES */
-static inline int register_jprobe(struct jprobe *p)
-{
-	return -ENOSYS;
-}
-static inline int register_jprobes(struct jprobe **jps, int num)
-{
-	return -ENOSYS;
-}
-static inline void unregister_jprobe(struct jprobe *p)
-{
-}
-static inline void unregister_jprobes(struct jprobe **jps, int num)
-{
-}
 static inline int disable_kretprobe(struct kretprobe *rp)
 {
 	return disable_kprobe(&rp->kp);
@@ -487,14 +445,6 @@ static inline int enable_kretprobe(struct kretprobe *rp)
 {
 	return enable_kprobe(&rp->kp);
 }
-static inline int disable_jprobe(struct jprobe *jp)
-{
-	return -ENOSYS;
-}
-static inline int enable_jprobe(struct jprobe *jp)
-{
-	return -ENOSYS;
-}
 
 #ifndef CONFIG_KPROBES
 static inline bool is_kprobe_insn_slot(unsigned long addr)
-- 
cgit v1.2.3


From 8bd9cb51daac89337295b6f037b0486911e1b408 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 19 Jun 2018 13:53:08 +0100
Subject: locking/atomics, asm-generic: Move some macros from <linux/bitops.h>
 to a new <linux/bits.h> file

In preparation for implementing the asm-generic atomic bitops in terms
of atomic_long_*(), we need to prevent <asm/atomic.h> implementations from
pulling in <linux/bitops.h>. A common reason for this include is for the
BITS_PER_BYTE definition, so move this and some other BIT() and masking
macros into a new header file, <linux/bits.h>.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Cc: yamada.masahiro@socionext.com
Link: https://lore.kernel.org/lkml/1529412794-17720-4-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/bitops.h | 22 +---------------------
 include/linux/bits.h   | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 21 deletions(-)
 create mode 100644 include/linux/bits.h

(limited to 'include')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 4cac4e1a72ff..af419012d77d 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -2,29 +2,9 @@
 #ifndef _LINUX_BITOPS_H
 #define _LINUX_BITOPS_H
 #include <asm/types.h>
+#include <linux/bits.h>
 
-#ifdef	__KERNEL__
-#define BIT(nr)			(1UL << (nr))
-#define BIT_ULL(nr)		(1ULL << (nr))
-#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
-#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
-#define BITS_PER_BYTE		8
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
-#endif
-
-/*
- * Create a contiguous bitmask starting at bit position @l and ending at
- * position @h. For example
- * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
- */
-#define GENMASK(h, l) \
-	(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
-
-#define GENMASK_ULL(h, l) \
-	(((~0ULL) - (1ULL << (l)) + 1) & \
-	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
 
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/bits.h b/include/linux/bits.h
new file mode 100644
index 000000000000..2b7b532c1d51
--- /dev/null
+++ b/include/linux/bits.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_BITS_H
+#define __LINUX_BITS_H
+#include <asm/bitsperlong.h>
+
+#define BIT(nr)			(1UL << (nr))
+#define BIT_ULL(nr)		(1ULL << (nr))
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BIT_ULL_MASK(nr)	(1ULL << ((nr) % BITS_PER_LONG_LONG))
+#define BIT_ULL_WORD(nr)	((nr) / BITS_PER_LONG_LONG)
+#define BITS_PER_BYTE		8
+
+/*
+ * Create a contiguous bitmask starting at bit position @l and ending at
+ * position @h. For example
+ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
+ */
+#define GENMASK(h, l) \
+	(((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) - (1ULL << (l)) + 1) & \
+	 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif	/* __LINUX_BITS_H */
-- 
cgit v1.2.3


From e986a0d6cb36b54838b2f5f8ac2bb5f9baadd977 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 19 Jun 2018 13:53:11 +0100
Subject: locking/atomics, asm-generic/bitops/atomic.h: Rewrite using
 atomic_*() APIs

The atomic bitops can actually be implemented pretty efficiently using
the atomic_*() ops, rather than explicit use of spinlocks.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Cc: yamada.masahiro@socionext.com
Link: https://lore.kernel.org/lkml/1529412794-17720-7-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/bitops/atomic.h | 188 +++++++-----------------------------
 1 file changed, 33 insertions(+), 155 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index 04deffaf5f7d..dd90c9792909 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -2,189 +2,67 @@
 #ifndef _ASM_GENERIC_BITOPS_ATOMIC_H_
 #define _ASM_GENERIC_BITOPS_ATOMIC_H_
 
-#include <asm/types.h>
-#include <linux/irqflags.h>
-
-#ifdef CONFIG_SMP
-#include <asm/spinlock.h>
-#include <asm/cache.h>		/* we use L1_CACHE_BYTES */
-
-/* Use an array of spinlocks for our atomic_ts.
- * Hash function to index into a different SPINLOCK.
- * Since "a" is usually an address, use one spinlock per cacheline.
- */
-#  define ATOMIC_HASH_SIZE 4
-#  define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ]))
-
-extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
-
-/* Can't use raw_spin_lock_irq because of #include problems, so
- * this is the substitute */
-#define _atomic_spin_lock_irqsave(l,f) do {	\
-	arch_spinlock_t *s = ATOMIC_HASH(l);	\
-	local_irq_save(f);			\
-	arch_spin_lock(s);			\
-} while(0)
-
-#define _atomic_spin_unlock_irqrestore(l,f) do {	\
-	arch_spinlock_t *s = ATOMIC_HASH(l);		\
-	arch_spin_unlock(s);				\
-	local_irq_restore(f);				\
-} while(0)
-
-
-#else
-#  define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0)
-#  define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0)
-#endif
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <asm/barrier.h>
 
 /*
- * NMI events can occur at any time, including when interrupts have been
- * disabled by *_irqsave().  So you can get NMI events occurring while a
- * *_bit function is holding a spin lock.  If the NMI handler also wants
- * to do bit manipulation (and they do) then you can get a deadlock
- * between the original caller of *_bit() and the NMI handler.
- *
- * by Keith Owens
+ * Implementation of atomic bitops using atomic-fetch ops.
+ * See Documentation/atomic_bitops.txt for details.
  */
 
-/**
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- *
- * Note: there are no guarantees that this function will not be reordered
- * on non x86 architectures, so if you are writing portable code,
- * make sure not to rely on its reordering guarantees.
- *
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void set_bit(int nr, volatile unsigned long *addr)
+static inline void set_bit(unsigned int nr, volatile unsigned long *p)
 {
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long flags;
-
-	_atomic_spin_lock_irqsave(p, flags);
-	*p  |= mask;
-	_atomic_spin_unlock_irqrestore(p, flags);
+	p += BIT_WORD(nr);
+	atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
-/**
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
- * in order to ensure changes are visible on other processors.
- */
-static inline void clear_bit(int nr, volatile unsigned long *addr)
+static inline void clear_bit(unsigned int nr, volatile unsigned long *p)
 {
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long flags;
-
-	_atomic_spin_lock_irqsave(p, flags);
-	*p &= ~mask;
-	_atomic_spin_unlock_irqrestore(p, flags);
+	p += BIT_WORD(nr);
+	atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
-/**
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered. It may be
- * reordered on other architectures than x86.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-static inline void change_bit(int nr, volatile unsigned long *addr)
+static inline void change_bit(unsigned int nr, volatile unsigned long *p)
 {
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long flags;
-
-	_atomic_spin_lock_irqsave(p, flags);
-	*p ^= mask;
-	_atomic_spin_unlock_irqrestore(p, flags);
+	p += BIT_WORD(nr);
+	atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It may be reordered on other architectures than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_set_bit(unsigned int nr, volatile unsigned long *p)
 {
+	long old;
 	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old;
-	unsigned long flags;
 
-	_atomic_spin_lock_irqsave(p, flags);
-	old = *p;
-	*p = old | mask;
-	_atomic_spin_unlock_irqrestore(p, flags);
+	p += BIT_WORD(nr);
+	if (READ_ONCE(*p) & mask)
+		return 1;
 
-	return (old & mask) != 0;
+	old = atomic_long_fetch_or(mask, (atomic_long_t *)p);
+	return !!(old & mask);
 }
 
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It can be reorderdered on other architectures other than x86.
- * It also implies a memory barrier.
- */
-static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_clear_bit(unsigned int nr, volatile unsigned long *p)
 {
+	long old;
 	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old;
-	unsigned long flags;
 
-	_atomic_spin_lock_irqsave(p, flags);
-	old = *p;
-	*p = old & ~mask;
-	_atomic_spin_unlock_irqrestore(p, flags);
+	p += BIT_WORD(nr);
+	if (!(READ_ONCE(*p) & mask))
+		return 0;
 
-	return (old & mask) != 0;
+	old = atomic_long_fetch_andnot(mask, (atomic_long_t *)p);
+	return !!(old & mask);
 }
 
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.
- * It also implies a memory barrier.
- */
-static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_change_bit(unsigned int nr, volatile unsigned long *p)
 {
+	long old;
 	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old;
-	unsigned long flags;
-
-	_atomic_spin_lock_irqsave(p, flags);
-	old = *p;
-	*p = old ^ mask;
-	_atomic_spin_unlock_irqrestore(p, flags);
 
-	return (old & mask) != 0;
+	p += BIT_WORD(nr);
+	old = atomic_long_fetch_xor(mask, (atomic_long_t *)p);
+	return !!(old & mask);
 }
 
 #endif /* _ASM_GENERIC_BITOPS_ATOMIC_H */
-- 
cgit v1.2.3


From 84c6591103dbeaf393a092a3fc7b09510825f6b9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 19 Jun 2018 13:53:12 +0100
Subject: locking/atomics, asm-generic/bitops/lock.h: Rewrite using
 atomic_fetch_*()

The lock bitops can be implemented more efficiently using the atomic_fetch_*()
ops, which provide finer-grained control over the memory ordering semantics
than the bitops.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-arm-kernel@lists.infradead.org
Cc: yamada.masahiro@socionext.com
Link: https://lore.kernel.org/lkml/1529412794-17720-8-git-send-email-will.deacon@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/bitops/lock.h | 68 ++++++++++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index 67ab280ad134..3ae021368f48 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -2,6 +2,10 @@
 #ifndef _ASM_GENERIC_BITOPS_LOCK_H_
 #define _ASM_GENERIC_BITOPS_LOCK_H_
 
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+
 /**
  * test_and_set_bit_lock - Set a bit and return its old value, for lock
  * @nr: Bit to set
@@ -11,7 +15,20 @@
  * the returned value is 0.
  * It can be used to implement bit locks.
  */
-#define test_and_set_bit_lock(nr, addr)	test_and_set_bit(nr, addr)
+static inline int test_and_set_bit_lock(unsigned int nr,
+					volatile unsigned long *p)
+{
+	long old;
+	unsigned long mask = BIT_MASK(nr);
+
+	p += BIT_WORD(nr);
+	if (READ_ONCE(*p) & mask)
+		return 1;
+
+	old = atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
+	return !!(old & mask);
+}
+
 
 /**
  * clear_bit_unlock - Clear a bit in memory, for unlock
@@ -20,11 +37,11 @@
  *
  * This operation is atomic and provides release barrier semantics.
  */
-#define clear_bit_unlock(nr, addr)	\
-do {					\
-	smp_mb__before_atomic();	\
-	clear_bit(nr, addr);		\
-} while (0)
+static inline void clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
+{
+	p += BIT_WORD(nr);
+	atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p);
+}
 
 /**
  * __clear_bit_unlock - Clear a bit in memory, for unlock
@@ -37,11 +54,38 @@ do {					\
  *
  * See for example x86's implementation.
  */
-#define __clear_bit_unlock(nr, addr)	\
-do {					\
-	smp_mb__before_atomic();	\
-	clear_bit(nr, addr);		\
-} while (0)
+static inline void __clear_bit_unlock(unsigned int nr,
+				      volatile unsigned long *p)
+{
+	unsigned long old;
 
-#endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */
+	p += BIT_WORD(nr);
+	old = READ_ONCE(*p);
+	old &= ~BIT_MASK(nr);
+	atomic_long_set_release((atomic_long_t *)p, old);
+}
+
+/**
+ * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom
+ *                                     byte is negative, for unlock.
+ * @nr: the bit to clear
+ * @addr: the address to start counting from
+ *
+ * This is a bit of a one-trick-pony for the filemap code, which clears
+ * PG_locked and tests PG_waiters,
+ */
+#ifndef clear_bit_unlock_is_negative_byte
+static inline bool clear_bit_unlock_is_negative_byte(unsigned int nr,
+						     volatile unsigned long *p)
+{
+	long old;
+	unsigned long mask = BIT_MASK(nr);
+
+	p += BIT_WORD(nr);
+	old = atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
+	return !!(old & BIT(7));
+}
+#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
+#endif
 
+#endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */
-- 
cgit v1.2.3


From 05736e4ac13c08a4a9b1ef2de26dd31a32cbee57 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 29 May 2018 17:48:27 +0200
Subject: cpu/hotplug: Provide knobs to control SMT

Provide a command line and a sysfs knob to control SMT.

The command line options are:

 'nosmt':	Enumerate secondary threads, but do not online them

 'nosmt=force': Ignore secondary threads completely during enumeration
 		via MP table and ACPI/MADT.

The sysfs control file has the following states (read/write):

 'on':		 SMT is enabled. Secondary threads can be freely onlined
 'off':		 SMT is disabled. Secondary threads, even if enumerated
 		 cannot be onlined
 'forceoff':	 SMT is permanentely disabled. Writes to the control
 		 file are rejected.
 'notsupported': SMT is not supported by the CPU

The command line option 'nosmt' sets the sysfs control to 'off'. This
can be changed to 'on' to reenable SMT during runtime.

The command line option 'nosmt=force' sets the sysfs control to
'forceoff'. This cannot be changed during runtime.

When SMT is 'on' and the control file is changed to 'off' then all online
secondary threads are offlined and attempts to online a secondary thread
later on are rejected.

When SMT is 'off' and the control file is changed to 'on' then secondary
threads can be onlined again. The 'off' -> 'on' transition does not
automatically online the secondary threads.

When the control file is set to 'forceoff', the behaviour is the same as
setting it to 'off', but the operation is irreversible and later writes to
the control file are rejected.

When the control status is 'notsupported' then writes to the control file
are rejected.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu |  20 +++
 Documentation/admin-guide/kernel-parameters.txt    |   8 +
 arch/Kconfig                                       |   3 +
 arch/x86/Kconfig                                   |   1 +
 include/linux/cpu.h                                |  13 ++
 kernel/cpu.c                                       | 170 +++++++++++++++++++++
 6 files changed, 215 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 9c5e7732d249..65d9b844ecfd 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -487,3 +487,23 @@ Description:	Information about CPU vulnerabilities
 		"Not affected"	  CPU is not affected by the vulnerability
 		"Vulnerable"	  CPU is affected and no mitigation in effect
 		"Mitigation: $M"  CPU is affected and mitigation $M is in effect
+
+What:		/sys/devices/system/cpu/smt
+		/sys/devices/system/cpu/smt/active
+		/sys/devices/system/cpu/smt/control
+Date:		June 2018
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Control Symetric Multi Threading (SMT)
+
+		active:  Tells whether SMT is active (enabled and siblings online)
+
+		control: Read/write interface to control SMT. Possible
+			 values:
+
+			 "on"		SMT is enabled
+			 "off"		SMT is disabled
+			 "forceoff"	SMT is force disabled. Cannot be changed.
+			 "notsupported" SMT is not supported by the CPU
+
+			 If control status is "forceoff" or "notsupported" writes
+			 are rejected.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index efc7aa7a0670..8e29c4b6756f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2687,6 +2687,14 @@
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
 
+			[KNL,x86] Disable symmetric multithreading (SMT).
+			nosmt=force: Force disable SMT, similar to disabling
+				     it in the BIOS except that some of the
+				     resource partitioning effects which are
+				     caused by having SMT enabled in the BIOS
+				     cannot be undone. Depending on the CPU
+				     type this might have a performance impact.
+
 	nospectre_v2	[X86] Disable all mitigations for the Spectre variant 2
 			(indirect branch prediction) vulnerability. System may
 			allow data leaks with this option, which is equivalent
diff --git a/arch/Kconfig b/arch/Kconfig
index 1aa59063f1fd..d1f2ed462ac8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -13,6 +13,9 @@ config KEXEC_CORE
 config HAVE_IMA_KEXEC
 	bool
 
+config HOTPLUG_SMT
+	bool
+
 config OPROFILE
 	tristate "OProfile system profiling"
 	depends on PROFILING
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f1dbb4ee19d7..7a34fdf8daf0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -187,6 +187,7 @@ config X86
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_USER_RETURN_NOTIFIER
+	select HOTPLUG_SMT			if SMP
 	select IRQ_FORCED_THREADING
 	select NEED_SG_DMA_LENGTH
 	select PCI_LOCKLESS_CONFIG
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d3da5184aa1c..7532cbf27b1d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -168,4 +168,17 @@ void cpuhp_report_idle_dead(void);
 static inline void cpuhp_report_idle_dead(void) { }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
+enum cpuhp_smt_control {
+	CPU_SMT_ENABLED,
+	CPU_SMT_DISABLED,
+	CPU_SMT_FORCE_DISABLED,
+	CPU_SMT_NOT_SUPPORTED,
+};
+
+#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
+extern enum cpuhp_smt_control cpu_smt_control;
+#else
+# define cpu_smt_control		(CPU_SMT_ENABLED)
+#endif
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e266cb529c1a..d29fdd7e57bb 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -933,6 +933,29 @@ EXPORT_SYMBOL(cpu_down);
 #define takedown_cpu		NULL
 #endif /*CONFIG_HOTPLUG_CPU*/
 
+#ifdef CONFIG_HOTPLUG_SMT
+enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+
+static int __init smt_cmdline_disable(char *str)
+{
+	cpu_smt_control = CPU_SMT_DISABLED;
+	if (str && !strcmp(str, "force")) {
+		pr_info("SMT: Force disabled\n");
+		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+	}
+	return 0;
+}
+early_param("nosmt", smt_cmdline_disable);
+
+static inline bool cpu_smt_allowed(unsigned int cpu)
+{
+	return cpu_smt_control == CPU_SMT_ENABLED ||
+		topology_is_primary_thread(cpu);
+}
+#else
+static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+#endif
+
 /**
  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
  * @cpu: cpu that just started
@@ -1056,6 +1079,10 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
 		err = -EBUSY;
 		goto out;
 	}
+	if (!cpu_smt_allowed(cpu)) {
+		err = -EPERM;
+		goto out;
+	}
 
 	err = _cpu_up(cpu, 0, target);
 out:
@@ -1904,10 +1931,153 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = {
 	NULL
 };
 
+#ifdef CONFIG_HOTPLUG_SMT
+
+static const char *smt_states[] = {
+	[CPU_SMT_ENABLED]		= "on",
+	[CPU_SMT_DISABLED]		= "off",
+	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
+	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
+};
+
+static ssize_t
+show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
+}
+
+static void cpuhp_offline_cpu_device(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
+
+	dev->offline = true;
+	/* Tell user space about the state change */
+	kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
+}
+
+static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
+{
+	int cpu, ret = 0;
+
+	cpu_maps_update_begin();
+	for_each_online_cpu(cpu) {
+		if (topology_is_primary_thread(cpu))
+			continue;
+		ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
+		if (ret)
+			break;
+		/*
+		 * As this needs to hold the cpu maps lock it's impossible
+		 * to call device_offline() because that ends up calling
+		 * cpu_down() which takes cpu maps lock. cpu maps lock
+		 * needs to be held as this might race against in kernel
+		 * abusers of the hotplug machinery (thermal management).
+		 *
+		 * So nothing would update device:offline state. That would
+		 * leave the sysfs entry stale and prevent onlining after
+		 * smt control has been changed to 'off' again. This is
+		 * called under the sysfs hotplug lock, so it is properly
+		 * serialized against the regular offline usage.
+		 */
+		cpuhp_offline_cpu_device(cpu);
+	}
+	if (!ret)
+		cpu_smt_control = ctrlval;
+	cpu_maps_update_done();
+	return ret;
+}
+
+static void cpuhp_smt_enable(void)
+{
+	cpu_maps_update_begin();
+	cpu_smt_control = CPU_SMT_ENABLED;
+	cpu_maps_update_done();
+}
+
+static ssize_t
+store_smt_control(struct device *dev, struct device_attribute *attr,
+		  const char *buf, size_t count)
+{
+	int ctrlval, ret;
+
+	if (sysfs_streq(buf, "on"))
+		ctrlval = CPU_SMT_ENABLED;
+	else if (sysfs_streq(buf, "off"))
+		ctrlval = CPU_SMT_DISABLED;
+	else if (sysfs_streq(buf, "forceoff"))
+		ctrlval = CPU_SMT_FORCE_DISABLED;
+	else
+		return -EINVAL;
+
+	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
+		return -EPERM;
+
+	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+		return -ENODEV;
+
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		return ret;
+
+	if (ctrlval != cpu_smt_control) {
+		switch (ctrlval) {
+		case CPU_SMT_ENABLED:
+			cpuhp_smt_enable();
+			break;
+		case CPU_SMT_DISABLED:
+		case CPU_SMT_FORCE_DISABLED:
+			ret = cpuhp_smt_disable(ctrlval);
+			break;
+		}
+	}
+
+	unlock_device_hotplug();
+	return ret ? ret : count;
+}
+static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
+
+static ssize_t
+show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bool active = topology_max_smt_threads() > 1;
+
+	return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
+}
+static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
+
+static struct attribute *cpuhp_smt_attrs[] = {
+	&dev_attr_control.attr,
+	&dev_attr_active.attr,
+	NULL
+};
+
+static const struct attribute_group cpuhp_smt_attr_group = {
+	.attrs = cpuhp_smt_attrs,
+	.name = "smt",
+	NULL
+};
+
+static int __init cpu_smt_state_init(void)
+{
+	if (!topology_smt_supported())
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+
+	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
+				  &cpuhp_smt_attr_group);
+}
+
+#else
+static inline int cpu_smt_state_init(void) { return 0; }
+#endif
+
 static int __init cpuhp_sysfs_init(void)
 {
 	int cpu, ret;
 
+	ret = cpu_smt_state_init();
+	if (ret)
+		return ret;
+
 	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				 &cpuhp_cpu_root_attr_group);
 	if (ret)
-- 
cgit v1.2.3


From bfc18e389c7a09fbbbed6bf4032396685b14246e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:04 +0100
Subject: atomics/treewide: Rename __atomic_add_unless() =>
 atomic_fetch_add_unless()

While __atomic_add_unless() was originally intended as a building-block
for atomic_add_unless(), it's now used in a number of places around the
kernel. It's the only common atomic operation named __atomic*(), rather
than atomic_*(), and for consistency it would be better named
atomic_fetch_add_unless().

This lack of consistency is slightly confusing, and gets in the way of
scripting atomics. Given that, let's clean things up and promote it to
an official part of the atomics API, in the form of
atomic_fetch_add_unless().

This patch converts definitions and invocations over to the new name,
including the instrumented version, using the following script:

  ----
  git grep -w __atomic_add_unless | while read line; do
  sed -i '{s/\<__atomic_add_unless\>/atomic_fetch_add_unless/}' "${line%%:*}";
  done
  git grep -w __arch_atomic_add_unless | while read line; do
  sed -i '{s/\<__arch_atomic_add_unless\>/arch_atomic_fetch_add_unless/}' "${line%%:*}";
  done
  ----

Note that we do not have atomic{64,_long}_fetch_add_unless(), which will
be introduced by later patches.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-2-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           | 4 ++--
 arch/arc/include/asm/atomic.h             | 4 ++--
 arch/arm/include/asm/atomic.h             | 4 ++--
 arch/arm64/include/asm/atomic.h           | 2 +-
 arch/h8300/include/asm/atomic.h           | 2 +-
 arch/hexagon/include/asm/atomic.h         | 4 ++--
 arch/ia64/include/asm/atomic.h            | 2 +-
 arch/m68k/include/asm/atomic.h            | 2 +-
 arch/mips/include/asm/atomic.h            | 4 ++--
 arch/openrisc/include/asm/atomic.h        | 4 ++--
 arch/parisc/include/asm/atomic.h          | 4 ++--
 arch/powerpc/include/asm/atomic.h         | 8 ++++----
 arch/riscv/include/asm/atomic.h           | 4 ++--
 arch/s390/include/asm/atomic.h            | 2 +-
 arch/sh/include/asm/atomic.h              | 4 ++--
 arch/sparc/include/asm/atomic_32.h        | 2 +-
 arch/sparc/include/asm/atomic_64.h        | 2 +-
 arch/sparc/lib/atomic32.c                 | 4 ++--
 arch/x86/include/asm/atomic.h             | 4 ++--
 arch/xtensa/include/asm/atomic.h          | 4 ++--
 drivers/block/rbd.c                       | 2 +-
 drivers/infiniband/core/rdma_core.c       | 2 +-
 fs/afs/rxrpc.c                            | 2 +-
 include/asm-generic/atomic-instrumented.h | 4 ++--
 include/asm-generic/atomic.h              | 4 ++--
 include/linux/atomic.h                    | 2 +-
 kernel/bpf/syscall.c                      | 4 ++--
 net/rxrpc/call_object.c                   | 2 +-
 net/rxrpc/conn_object.c                   | 4 ++--
 net/rxrpc/local_object.c                  | 2 +-
 net/rxrpc/peer_object.c                   | 2 +-
 31 files changed, 50 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 767bfdd42992..392b15a4dd4f 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -206,7 +206,7 @@ ATOMIC_OPS(xor, xor)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -214,7 +214,7 @@ ATOMIC_OPS(xor, xor)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, new, old;
 	smp_mb();
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 11859287c52a..67121b5ff3a3 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -309,7 +309,7 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #undef ATOMIC_OP
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -317,7 +317,7 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v
  */
-#define __atomic_add_unless(v, a, u)					\
+#define atomic_fetch_add_unless(v, a, u)					\
 ({									\
 	int c, old;							\
 									\
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 66d0e215a773..9d56d0727c9b 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -130,7 +130,7 @@ static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 }
 #define atomic_cmpxchg_relaxed		atomic_cmpxchg_relaxed
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int oldval, newval;
 	unsigned long tmp;
@@ -215,7 +215,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index c0235e0ff849..264d20339f74 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -125,7 +125,7 @@
 #define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
-#define __atomic_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
+#define atomic_fetch_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
 #define atomic_andnot			atomic_andnot
 
 /*
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index b174dec099bf..5c856887fdf2 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -94,7 +94,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
 	h8300flags flags;
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index fb3dfb2a667e..287aa9f394f3 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -164,7 +164,7 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer to value
  * @a: amount to add
  * @u: unless value is equal to u
@@ -173,7 +173,7 @@ ATOMIC_OPS(xor)
  *
  */
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int __oldval;
 	register int tmp;
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 2524fb60fbc2..9d2ddde5f9d5 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,7 +215,7 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index e993e2860ee1..8022d9ea1213 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -211,7 +211,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 0ab176bdb8e8..02fc1553cf9b 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -275,7 +275,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -283,7 +283,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
index 146e1660f00e..b589fac39b92 100644
--- a/arch/openrisc/include/asm/atomic.h
+++ b/arch/openrisc/include/asm/atomic.h
@@ -100,7 +100,7 @@ ATOMIC_OP(xor)
  *
  * This is often used through atomic_inc_not_zero()
  */
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int old, tmp;
 
@@ -119,7 +119,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 
 	return old;
 }
-#define __atomic_add_unless	__atomic_add_unless
+#define atomic_fetch_add_unless	atomic_fetch_add_unless
 
 #include <asm-generic/atomic.h>
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 88bae6676c9b..7748abced766 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -78,7 +78,7 @@ static __inline__ int atomic_read(const atomic_t *v)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -86,7 +86,7 @@ static __inline__ int atomic_read(const atomic_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 682b3e6a1e21..1483261080a1 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -218,7 +218,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
 #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -226,13 +226,13 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int t;
 
 	__asm__ __volatile__ (
 	PPC_ATOMIC_ENTRY_BARRIER
-"1:	lwarx	%0,0,%1		# __atomic_add_unless\n\
+"1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpw	0,%0,%3 \n\
 	beq	2f \n\
 	add	%0,%2,%0 \n"
@@ -538,7 +538,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 
 	__asm__ __volatile__ (
 	PPC_ATOMIC_ENTRY_BARRIER
-"1:	ldarx	%0,0,%1		# __atomic_add_unless\n\
+"1:	ldarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpd	0,%0,%3 \n\
 	beq	2f \n\
 	add	%0,%2,%0 \n"
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 855115ace98c..739e810c857e 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -332,7 +332,7 @@ ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
 #undef ATOMIC_OP
 
 /* This is required to provide a full barrier on success. */
-static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
        int prev, rc;
 
@@ -381,7 +381,7 @@ static __always_inline int atomic64_add_unless(atomic64_t *v, long a, long u)
  */
 static __always_inline int atomic_inc_not_zero(atomic_t *v)
 {
-        return __atomic_add_unless(v, 1, 0);
+        return atomic_fetch_add_unless(v, 1, 0);
 }
 
 #ifndef CONFIG_GENERIC_ATOMIC64
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 4b55532f15c4..c2858cdd8c29 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -90,7 +90,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return __atomic_cmpxchg(&v->counter, old, new);
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 0fd0099f43cc..ef45931ebac5 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -46,7 +46,7 @@
 #define atomic_cmpxchg(v, o, n)		(cmpxchg(&((v)->counter), (o), (n)))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -54,7 +54,7 @@
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index d13ce517f4b9..a58f4b43bcc7 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -27,7 +27,7 @@ int atomic_fetch_or(int, atomic_t *);
 int atomic_fetch_xor(int, atomic_t *);
 int atomic_cmpxchg(atomic_t *, int, int);
 int atomic_xchg(atomic_t *, int);
-int __atomic_add_unless(atomic_t *, int, int);
+int atomic_fetch_add_unless(atomic_t *, int, int);
 void atomic_set(atomic_t *, int);
 
 #define atomic_set_release(v, i)	atomic_set((v), (i))
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 28db058d471b..f416fd3d2708 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -89,7 +89,7 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	return xchg(&v->counter, new);
 }
 
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 465a901a0ada..281fa634bb1a 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -95,7 +95,7 @@ int atomic_cmpxchg(atomic_t *v, int old, int new)
 }
 EXPORT_SYMBOL(atomic_cmpxchg);
 
-int __atomic_add_unless(atomic_t *v, int a, int u)
+int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
 	unsigned long flags;
@@ -107,7 +107,7 @@ int __atomic_add_unless(atomic_t *v, int a, int u)
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 	return ret;
 }
-EXPORT_SYMBOL(__atomic_add_unless);
+EXPORT_SYMBOL(atomic_fetch_add_unless);
 
 /* Atomic operations are already serializing */
 void atomic_set(atomic_t *v, int i)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 0db6bec95489..84ed0bd76aef 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -254,7 +254,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 }
 
 /**
- * __arch_atomic_add_unless - add unless the number is already a given value
+ * arch_atomic_fetch_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -262,7 +262,7 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns the old value of @v.
  */
-static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c = arch_atomic_read(v);
 
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index e7a23f2a519a..4188e56c06c9 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -275,7 +275,7 @@ ATOMIC_OPS(xor)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -283,7 +283,7 @@ ATOMIC_OPS(xor)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index fa0729c1e776..d81c653b9bf6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -61,7 +61,7 @@ static int atomic_inc_return_safe(atomic_t *v)
 {
 	unsigned int counter;
 
-	counter = (unsigned int)__atomic_add_unless(v, 1, 0);
+	counter = (unsigned int)atomic_fetch_add_unless(v, 1, 0);
 	if (counter <= (unsigned int)INT_MAX)
 		return (int)counter;
 
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index a6e904973ba8..475910ffbcb6 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -121,7 +121,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
 	 * this lock.
 	 */
 	if (!exclusive)
-		return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+		return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
 			-EBUSY : 0;
 
 	/* lock is either WRITE or DESTROY - should be exclusive */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a1b18082991b..183cc5418722 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -648,7 +648,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
 	trace_afs_notify_call(rxcall, call);
 	call->need_attention = true;
 
-	u = __atomic_add_unless(&call->usage, 1, 0);
+	u = atomic_fetch_add_unless(&call->usage, 1, 0);
 	if (u != 0) {
 		trace_afs_call(call, afs_call_trace_wake, u,
 			       atomic_read(&call->net->nr_outstanding_calls),
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index ec07f23678ea..b8b14cc2df6c 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -84,10 +84,10 @@ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 ne
 }
 #endif
 
-static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	kasan_check_write(v, sizeof(*v));
-	return __arch_atomic_add_unless(v, a, u);
+	return arch_atomic_fetch_add_unless(v, a, u);
 }
 
 
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index abe6dd9ca2a8..10051ed6d088 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -221,8 +221,8 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
-#ifndef __atomic_add_unless
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+#ifndef atomic_fetch_add_unless
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
 	c = atomic_read(v);
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 01ce3997cb42..9cc982936675 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -530,7 +530,7 @@
  */
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
-	return __atomic_add_unless(v, a, u) != u;
+	return atomic_fetch_add_unless(v, a, u) != u;
 }
 
 /**
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35dc466641f2..f12db70d3bf3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -575,7 +575,7 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
 {
 	int refold;
 
-	refold = __atomic_add_unless(&map->refcnt, 1, 0);
+	refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
 
 	if (refold >= BPF_MAX_REFCNT) {
 		__bpf_map_put(map, false);
@@ -1142,7 +1142,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 {
 	int refold;
 
-	refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0);
+	refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
 
 	if (refold >= BPF_MAX_REFCNT) {
 		__bpf_prog_put(prog, false);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f6734d8cb01a..9486293fef5c 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -415,7 +415,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 bool rxrpc_queue_call(struct rxrpc_call *call)
 {
 	const void *here = __builtin_return_address(0);
-	int n = __atomic_add_unless(&call->usage, 1, 0);
+	int n = atomic_fetch_add_unless(&call->usage, 1, 0);
 	if (n == 0)
 		return false;
 	if (rxrpc_queue_work(&call->processor))
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 4c77a78a252a..77440a356b14 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -266,7 +266,7 @@ void rxrpc_kill_connection(struct rxrpc_connection *conn)
 bool rxrpc_queue_conn(struct rxrpc_connection *conn)
 {
 	const void *here = __builtin_return_address(0);
-	int n = __atomic_add_unless(&conn->usage, 1, 0);
+	int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
 	if (n == 0)
 		return false;
 	if (rxrpc_queue_work(&conn->processor))
@@ -309,7 +309,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
 	const void *here = __builtin_return_address(0);
 
 	if (conn) {
-		int n = __atomic_add_unless(&conn->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
 		else
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index b493e6b62740..777c3ed4cfc0 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -305,7 +305,7 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
 	const void *here = __builtin_return_address(0);
 
 	if (local) {
-		int n = __atomic_add_unless(&local->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&local->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
 		else
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 1b7e8107b3ae..1cf3b408017a 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -406,7 +406,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
 	const void *here = __builtin_return_address(0);
 
 	if (peer) {
-		int n = __atomic_add_unless(&peer->usage, 1, 0);
+		int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
 		if (n > 0)
 			trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
 		else
-- 
cgit v1.2.3


From f74445b6dd6b8f33ed34e005d19ecbb49171dabf Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:06 +0100
Subject: atomics/treewide: Remove atomic_inc_not_zero_hint()

While documentation suggests atomic_inc_not_zero_hint() will perform better
than atomic_inc_not_zero(), this is unlikely to be the case. No architectures
implement atomic_inc_not_zero_hint() directly, and thus it either falls back to
atomic_inc_not_zero(), or a loop using atomic_cmpxchg().

Whenever the hint does not match the value in memory, the repeated use of
atomic_cmpxchg() will be more expensive than the read that
atomic_inc_not_zero_hint() attempts to avoid. For architectures with LL/SC
atomics, a read cannot be avoided, and it would always be better to use
atomic_inc_not_zero() directly. For other architectures, their own
atomic_inc_not_zero() is likely to be more optimal than an atomic_cmpxchg()
loop regardless.

Generally, atomic_inc_not_zero_hint() is liable to perform worse than
atomic_inc_not_zero(). Further, atomic_inc_not_zero_hint() only exists
for atomic_t, and not atomic64_t or atomic_long_t, and there is only one
user in the kernel tree.

Given all this, let's remove atomic_inc_not_zero_hint(), and migrate the
existing user over to atomic_inc_not_zero().

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-4-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 32 --------------------------------
 net/atm/pppoatm.c      |  2 +-
 2 files changed, 1 insertion(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 9cc982936675..5c5620ae5a35 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -571,38 +571,6 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
 }
 #endif
 
-/**
- * atomic_inc_not_zero_hint - increment if not null
- * @v: pointer of type atomic_t
- * @hint: probable value of the atomic before the increment
- *
- * This version of atomic_inc_not_zero() gives a hint of probable
- * value of the atomic. This helps processor to not read the memory
- * before doing the atomic read/modify/write cycle, lowering
- * number of bus transactions on some arches.
- *
- * Returns: 0 if increment was not done, 1 otherwise.
- */
-#ifndef atomic_inc_not_zero_hint
-static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
-{
-	int val, c = hint;
-
-	/* sanity test, should be removed by compiler if hint is a constant */
-	if (!hint)
-		return atomic_inc_not_zero(v);
-
-	do {
-		val = atomic_cmpxchg(v, c, c + 1);
-		if (val == c)
-			return 1;
-		c = val;
-	} while (c);
-
-	return 0;
-}
-#endif
-
 #ifndef atomic_inc_unless_negative
 static inline int atomic_inc_unless_negative(atomic_t *p)
 {
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index af8c4b38b746..d84227d75717 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -244,7 +244,7 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
 	 * the packet count limit, so...
 	 */
 	if (atm_may_send(pvcc->atmvcc, size) &&
-	    atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT))
+	    atomic_inc_not_zero(&pvcc->inflight))
 		return 1;
 
 	/*
-- 
cgit v1.2.3


From ade5ef9280c33993099199c51e2e27c2c4013afd Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:07 +0100
Subject: atomics: Make conditional ops return 'bool'

Some of the atomics return a status value, which is a boolean value
describing whether the operation was performed. To make it clear that
this is a boolean value, let's update the common fallbacks to return
bool, fixing up the return values and comments likewise.

At the same time, let's simplify the description of the operations in
their respective comments.

The instrumented atomics and generic atomic64 implementation are updated
accordingly.

Note that atomic64_dec_if_positive() doesn't follow the usual test op
pattern, and returns the would-be decremented value. This is not
changed.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-5-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-instrumented.h |  2 +-
 include/asm-generic/atomic64.h            |  3 ++-
 include/linux/atomic.h                    | 24 +++++++++++++-----------
 lib/atomic64.c                            |  6 +++---
 4 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index b8b14cc2df6c..497faa4a05e3 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -205,7 +205,7 @@ static __always_inline s64 atomic64_dec_return(atomic64_t *v)
 	return arch_atomic64_dec_return(v);
 }
 
-static __always_inline s64 atomic64_inc_not_zero(atomic64_t *v)
+static __always_inline bool atomic64_inc_not_zero(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_inc_not_zero(v);
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 8d28eb010d0d..a951a721e1bb 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -11,6 +11,7 @@
  */
 #ifndef _ASM_GENERIC_ATOMIC64_H
 #define _ASM_GENERIC_ATOMIC64_H
+#include <linux/types.h>
 
 typedef struct {
 	long long counter;
@@ -52,7 +53,7 @@ ATOMIC64_OPS(xor)
 extern long long atomic64_dec_if_positive(atomic64_t *v);
 extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
 extern long long atomic64_xchg(atomic64_t *v, long long new);
-extern int	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
+extern bool	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
 
 #define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 5c5620ae5a35..307a7f6d619a 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -2,6 +2,8 @@
 /* Atomic operations usable in machine independent code */
 #ifndef _LINUX_ATOMIC_H
 #define _LINUX_ATOMIC_H
+#include <linux/types.h>
+
 #include <asm/atomic.h>
 #include <asm/barrier.h>
 
@@ -525,10 +527,10 @@
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
  *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns true if the addition was done.
  */
-static inline int atomic_add_unless(atomic_t *v, int a, int u)
+static inline bool atomic_add_unless(atomic_t *v, int a, int u)
 {
 	return atomic_fetch_add_unless(v, a, u) != u;
 }
@@ -537,8 +539,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
  * atomic_inc_not_zero - increment unless the number is zero
  * @v: pointer of type atomic_t
  *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
+ * Atomically increments @v by 1, if @v is non-zero.
+ * Returns true if the increment was done.
  */
 #ifndef atomic_inc_not_zero
 #define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
@@ -572,28 +574,28 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_inc_unless_negative
-static inline int atomic_inc_unless_negative(atomic_t *p)
+static inline bool atomic_inc_unless_negative(atomic_t *p)
 {
 	int v, v1;
 	for (v = 0; v >= 0; v = v1) {
 		v1 = atomic_cmpxchg(p, v, v + 1);
 		if (likely(v1 == v))
-			return 1;
+			return true;
 	}
-	return 0;
+	return false;
 }
 #endif
 
 #ifndef atomic_dec_unless_positive
-static inline int atomic_dec_unless_positive(atomic_t *p)
+static inline bool atomic_dec_unless_positive(atomic_t *p)
 {
 	int v, v1;
 	for (v = 0; v <= 0; v = v1) {
 		v1 = atomic_cmpxchg(p, v, v - 1);
 		if (likely(v1 == v))
-			return 1;
+			return true;
 	}
-	return 0;
+	return false;
 }
 #endif
 
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 53c2d5edc826..4230f4b8906c 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -178,16 +178,16 @@ long long atomic64_xchg(atomic64_t *v, long long new)
 }
 EXPORT_SYMBOL(atomic64_xchg);
 
-int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
-	int ret = 0;
+	bool ret = false;
 
 	raw_spin_lock_irqsave(lock, flags);
 	if (v->counter != u) {
 		v->counter += a;
-		ret = 1;
+		ret = true;
 	}
 	raw_spin_unlock_irqrestore(lock, flags);
 	return ret;
-- 
cgit v1.2.3


From bef828204a1bc7a0fd3a24551c4265e9c2ab95ed Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:08 +0100
Subject: atomics/treewide: Make atomic64_inc_not_zero() optional

We define a trivial fallback for atomic_inc_not_zero(), but don't do
the same for atomic64_inc_not_zero(), leading most architectures to
define the same boilerplate.

Let's add a fallback in <linux/atomic.h>, and remove the redundant
implementations. Note that atomic64_add_unless() is always defined in
<linux/atomic.h>, and promotes its arguments to the requisite types, so
we need not do this explicitly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-6-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           |  2 --
 arch/arc/include/asm/atomic.h             |  1 -
 arch/arm/include/asm/atomic.h             |  1 -
 arch/arm64/include/asm/atomic.h           |  2 --
 arch/ia64/include/asm/atomic.h            |  2 --
 arch/mips/include/asm/atomic.h            |  2 --
 arch/parisc/include/asm/atomic.h          |  2 --
 arch/powerpc/include/asm/atomic.h         |  1 +
 arch/riscv/include/asm/atomic.h           |  7 -------
 arch/s390/include/asm/atomic.h            |  1 -
 arch/sparc/include/asm/atomic_64.h        |  2 --
 arch/x86/include/asm/atomic64_32.h        |  2 +-
 arch/x86/include/asm/atomic64_64.h        |  2 --
 include/asm-generic/atomic-instrumented.h |  3 +++
 include/asm-generic/atomic64.h            |  1 -
 include/linux/atomic.h                    | 11 +++++++++++
 16 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 392b15a4dd4f..eb0f25e4c5dd 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -296,8 +296,6 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return old - 1;
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
 #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
 
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index cecdf3403caf..1406825b5e7d 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -603,7 +603,6 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
 #endif	/* !CONFIG_GENERIC_ATOMIC64 */
 
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 9d56d0727c9b..02f3894faa48 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -534,7 +534,6 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 
 #endif /* !CONFIG_GENERIC_ATOMIC64 */
 #endif
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 264d20339f74..ad50412889c5 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -204,7 +204,5 @@
 #define atomic64_add_unless(v, a, u)	(___atomic_add_unless(v, a, u, 64) != u)
 #define atomic64_andnot			atomic64_andnot
 
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
-
 #endif
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 9d2ddde5f9d5..93d48b823220 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -246,8 +246,6 @@ static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 {
 	long c, old, dec;
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 02fc1553cf9b..502e691c6393 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -644,8 +644,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 #define atomic64_dec_return(v) atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v) atomic64_add_return(1, (v))
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 7748abced766..3fd0243bf405 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -305,8 +305,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 /*
  * atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 1483261080a1..e59620ee4f6b 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -582,6 +582,7 @@ static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
 
 	return t1 != 0;
 }
+#define atomic64_inc_not_zero(v) atomic64_inc_not_zero((v))
 
 #endif /* __powerpc64__ */
 
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 0e27e050ba14..18259e90f57e 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -375,13 +375,6 @@ static __always_inline int atomic64_add_unless(atomic64_t *v, long a, long u)
 }
 #endif
 
-#ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline long atomic64_inc_not_zero(atomic64_t *v)
-{
-        return atomic64_add_unless(v, 1, 0);
-}
-#endif
-
 /*
  * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
  * {cmp,}xchg and the operations that return, so they need a full barrier.
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index c2858cdd8c29..66dac30a4fe1 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -212,6 +212,5 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_dec(_v)		atomic64_sub(1, _v)
 #define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
 #define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index f416fd3d2708..07830a316464 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -123,8 +123,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 	return c != (u);
 }
 
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
 long atomic64_dec_if_positive(atomic64_t *v);
 
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 92212bf0484f..2a33cc17801b 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -295,7 +295,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
 	return (int)a;
 }
 
-
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
 static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 {
 	int r;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6106b59d3260..6f95023894b7 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -207,8 +207,6 @@ static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u)
 	return true;
 }
 
-#define arch_atomic64_inc_not_zero(v) arch_atomic64_add_unless((v), 1, 0)
-
 /*
  * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 497faa4a05e3..83bb88d791c4 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -205,11 +205,14 @@ static __always_inline s64 atomic64_dec_return(atomic64_t *v)
 	return arch_atomic64_dec_return(v);
 }
 
+#ifdef arch_atomic64_inc_not_zero
+#define atomic64_inc_not_zero atomic64_inc_not_zero
 static __always_inline bool atomic64_inc_not_zero(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_inc_not_zero(v);
 }
+#endif
 
 static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v)
 {
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index a951a721e1bb..5105275ac825 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -63,6 +63,5 @@ extern bool	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
-#define atomic64_inc_not_zero(v) 	atomic64_add_unless((v), 1LL, 0LL)
 
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 307a7f6d619a..ae3f30923d05 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -1019,6 +1019,17 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #define atomic64_try_cmpxchg_release	atomic64_try_cmpxchg
 #endif /* atomic64_try_cmpxchg */
 
+/**
+ * atomic64_inc_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1, if @v is non-zero.
+ * Returns true if the increment was done.
+ */
+#ifndef atomic64_inc_not_zero
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
+#endif
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
-- 
cgit v1.2.3


From eccc2da8c03f316bba202e15af2be4615f461900 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:09 +0100
Subject: atomics/treewide: Make atomic_fetch_add_unless() optional

Several architectures these have a near-identical implementation based
on atomic_read() and atomic_cmpxchg() which we can instead define in
<linux/atomic.h>, so let's do so, using something close to the existing
x86 implementation with try_cmpxchg().

Where an architecture provides its own atomic_fetch_add_unless(), it
must define a preprocessor symbol for it. The instrumented atomics are
updated accordingly.

Note that arch/arc's existing atomic_fetch_add_unless() had redundant
barriers, as these are already present in its atomic_cmpxchg()
implementation.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Link: https://lore.kernel.org/lkml/20180621121321.4761-7-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           |  2 +-
 arch/arc/include/asm/atomic.h             | 28 ----------------------------
 arch/arm/include/asm/atomic.h             | 11 +----------
 arch/arm64/include/asm/atomic.h           |  1 -
 arch/h8300/include/asm/atomic.h           |  1 +
 arch/hexagon/include/asm/atomic.h         |  1 +
 arch/ia64/include/asm/atomic.h            | 16 ----------------
 arch/m68k/include/asm/atomic.h            | 15 ---------------
 arch/mips/include/asm/atomic.h            | 24 ------------------------
 arch/parisc/include/asm/atomic.h          | 24 ------------------------
 arch/powerpc/include/asm/atomic.h         |  1 +
 arch/riscv/include/asm/atomic.h           |  1 +
 arch/s390/include/asm/atomic.h            | 15 ---------------
 arch/sh/include/asm/atomic.h              | 25 -------------------------
 arch/sparc/include/asm/atomic_32.h        |  2 ++
 arch/sparc/include/asm/atomic_64.h        | 15 ---------------
 arch/x86/include/asm/atomic.h             | 21 ---------------------
 arch/xtensa/include/asm/atomic.h          | 24 ------------------------
 include/asm-generic/atomic-instrumented.h |  4 +++-
 include/asm-generic/atomic.h              | 11 -----------
 include/linux/atomic.h                    | 23 +++++++++++++++++++++++
 21 files changed, 34 insertions(+), 231 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index eb0f25e4c5dd..4a800a3424a3 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -235,7 +235,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	smp_mb();
 	return old;
 }
-
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 
 /**
  * atomic64_add_unless - add unless the number is a given value
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 1406825b5e7d..60da80481c5d 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -308,34 +308,6 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v
- */
-#define atomic_fetch_add_unless(v, a, u)					\
-({									\
-	int c, old;							\
-									\
-	/*								\
-	 * Explicit full memory barrier needed before/after as		\
-	 * LLOCK/SCOND thmeselves don't provide any such semantics	\
-	 */								\
-	smp_mb();							\
-									\
-	c = atomic_read(v);						\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c)\
-		c = old;						\
-									\
-	smp_mb();							\
-									\
-	c;								\
-})
-
 #define atomic_inc(v)			atomic_add(1, v)
 #define atomic_dec(v)			atomic_sub(1, v)
 
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 02f3894faa48..74460aa00fa0 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -156,6 +156,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 
 	return oldval;
 }
+#define atomic_fetch_add_unless		atomic_fetch_add_unless
 
 #else /* ARM_ARCH_6 */
 
@@ -215,16 +216,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-
-	c = atomic_read(v);
-	while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c)
-		c = old;
-	return c;
-}
-
 #endif /* __LINUX_ARM_ARCH__ */
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index ad50412889c5..22c8c43d6689 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -125,7 +125,6 @@
 #define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
-#define atomic_fetch_add_unless(v, a, u)	___atomic_add_unless(v, a, u,)
 #define atomic_andnot			atomic_andnot
 
 /*
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 5c856887fdf2..710364946308 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -106,5 +106,6 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	arch_local_irq_restore(flags);
 	return ret;
 }
+#define atomic_fetch_add_unless		atomic_fetch_add_unless
 
 #endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index d2feeba93c44..86c67e9adbfa 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -196,6 +196,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	);
 	return __oldval;
 }
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 
 #define atomic_inc(v) atomic_add(1, (v))
 #define atomic_dec(v) atomic_sub(1, (v))
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 93d48b823220..cfe44086338e 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,22 +215,6 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
-
 static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
 {
 	long c, old;
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 8022d9ea1213..596882cda224 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -211,19 +211,4 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 502e691c6393..794734e730d9 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -274,30 +274,6 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
 #define atomic_inc_return(v) atomic_add_return(1, (v))
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 3fd0243bf405..b2b6261d05e7 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -77,30 +77,6 @@ static __inline__ int atomic_read(const atomic_t *v)
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #define ATOMIC_OP(op, c_op)						\
 static __inline__ void atomic_##op(int i, atomic_t *v)			\
 {									\
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index e59620ee4f6b..b5646c079c16 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -248,6 +248,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 
 	return t;
 }
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 
 /**
  * atomic_inc_not_zero - increment unless the number is zero
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 18259e90f57e..5f161daefcd2 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -349,6 +349,7 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 		: "memory");
 	return prev;
 }
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 
 #ifndef CONFIG_GENERIC_ATOMIC64
 static __always_inline long __atomic64_add_unless(atomic64_t *v, long a, long u)
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 66dac30a4fe1..26c6b713a7a3 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -90,21 +90,6 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return __atomic_cmpxchg(&v->counter, old, new);
 }
 
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == u))
-			break;
-		old = atomic_cmpxchg(v, c, c + a);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #define ATOMIC64_INIT(i)  { (i) }
 
 static inline long atomic64_read(const atomic64_t *v)
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index ef45931ebac5..422fac764ca1 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -45,31 +45,6 @@
 #define atomic_xchg(v, new)		(xchg(&((v)->counter), new))
 #define atomic_cmpxchg(v, o, n)		(cmpxchg(&((v)->counter), (o), (n)))
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-
-	return c;
-}
-
 #endif /* CONFIG_CPU_J2 */
 
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index a58f4b43bcc7..9d7a15acc0c5 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -30,6 +30,8 @@ int atomic_xchg(atomic_t *, int);
 int atomic_fetch_add_unless(atomic_t *, int, int);
 void atomic_set(atomic_t *, int);
 
+#define atomic_fetch_add_unless	atomic_fetch_add_unless
+
 #define atomic_set_release(v, i)	atomic_set((v), (i))
 
 #define atomic_read(v)          READ_ONCE((v)->counter)
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 07830a316464..e4f1c93db31f 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -89,21 +89,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	return xchg(&v->counter, new);
 }
 
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 84ed0bd76aef..616327ac9d39 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -253,27 +253,6 @@ static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 	return val;
 }
 
-/**
- * arch_atomic_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns the old value of @v.
- */
-static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c = arch_atomic_read(v);
-
-	do {
-		if (unlikely(c == u))
-			break;
-	} while (!arch_atomic_try_cmpxchg(v, &c, c + a));
-
-	return c;
-}
-
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
 #else
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 4188e56c06c9..f4c9f82c40c6 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -274,30 +274,6 @@ ATOMIC_OPS(xor)
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-/**
- * atomic_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
-}
-
 #endif /* __KERNEL__ */
 
 #endif /* _XTENSA_ATOMIC_H */
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 83bb88d791c4..1f9b2a767d3c 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -84,12 +84,14 @@ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 ne
 }
 #endif
 
+#ifdef arch_atomic_fetch_add_unless
+#define atomic_fetch_add_unless atomic_fetch_add_unless
 static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_fetch_add_unless(v, a, u);
 }
-
+#endif
 
 static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 10051ed6d088..757e45821220 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -221,15 +221,4 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
-#ifndef atomic_fetch_add_unless
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c, old;
-	c = atomic_read(v);
-	while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c)
-		c = old;
-	return c;
-}
-#endif
-
 #endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index ae3f30923d05..b89ba36cab94 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -521,6 +521,29 @@
 #endif
 #endif /* xchg_relaxed */
 
+/**
+ * atomic_fetch_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns the original value of @v.
+ */
+#ifndef atomic_fetch_add_unless
+static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c == u))
+			break;
+	} while (!atomic_try_cmpxchg(v, &c, c + a));
+
+	return c;
+}
+#endif
+
 /**
  * atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
-- 
cgit v1.2.3


From 0ae1d994020d75ac065fd42ac4cbf5ac6ce9b255 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:10 +0100
Subject: atomics: Prepare for atomic64_fetch_add_unless()

Currently all architectures must implement atomic_fetch_add_unless(),
with common code providing atomic_add_unless(). Architectures must also
implement atomic64_add_unless() directly, with no corresponding
atomic64_fetch_add_unless().

This divergence is unfortunate, and means that the APIs for atomic_t,
atomic64_t, and atomic_long_t differ.

In preparation for unifying things, with architectures providing
atomic64_fetch_add_unless, this patch adds a generic
atomic64_add_unless() which will use atomic64_fetch_add_unless(). The
instrumented atomics are updated to take this case into account.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Albert Ou <albert@sifive.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Link: https://lore.kernel.org/lkml/20180621121321.4761-8-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-instrumented.h |  9 +++++++++
 include/linux/atomic.h                    | 16 ++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 1f9b2a767d3c..444bf2f9d54d 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -93,11 +93,20 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 }
 #endif
 
+#ifdef arch_atomic64_fetch_add_unless
+#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_add_unless(v, a, u);
+}
+#else
 static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_add_unless(v, a, u);
 }
+#endif
 
 static __always_inline void atomic_inc(atomic_t *v)
 {
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index b89ba36cab94..3c03de648007 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -1042,6 +1042,22 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #define atomic64_try_cmpxchg_release	atomic64_try_cmpxchg
 #endif /* atomic64_try_cmpxchg */
 
+/**
+ * atomic64_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns true if the addition was done.
+ */
+#ifdef atomic64_fetch_add_unless
+static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+	return atomic64_fetch_add_unless(v, a, u) != u;
+}
+#endif
+
 /**
  * atomic64_inc_not_zero - increment unless the number is zero
  * @v: pointer of type atomic64_t
-- 
cgit v1.2.3


From 00b808ab79ead372daf1a0682d1ef271599c0b55 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:11 +0100
Subject: atomics/generic: Define atomic64_fetch_add_unless()

As a step towards unifying the atomic/atomic64/atomic_long APIs, this
patch converts the generic implementation of atomic64_add_unless() into
a generic implementation of atomic64_fetch_add_unless().

A wrapper in <linux/atomic.h> will build atomic_add_unless() atop of
this, provided it is given a preprocessor definition.

No functional change is intended as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-9-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic64.h |  3 ++-
 lib/atomic64.c                 | 14 +++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 5105275ac825..49460107b29a 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -53,7 +53,8 @@ ATOMIC64_OPS(xor)
 extern long long atomic64_dec_if_positive(atomic64_t *v);
 extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
 extern long long atomic64_xchg(atomic64_t *v, long long new);
-extern bool	 atomic64_add_unless(atomic64_t *v, long long a, long long u);
+extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
+#define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
 #define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 4230f4b8906c..1d91e31eceec 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -178,18 +178,18 @@ long long atomic64_xchg(atomic64_t *v, long long new)
 }
 EXPORT_SYMBOL(atomic64_xchg);
 
-bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
+long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
-	bool ret = false;
+	long long val;
 
 	raw_spin_lock_irqsave(lock, flags);
-	if (v->counter != u) {
+	val = v->counter;
+	if (val != u)
 		v->counter += a;
-		ret = true;
-	}
 	raw_spin_unlock_irqrestore(lock, flags);
-	return ret;
+
+	return val;
 }
-EXPORT_SYMBOL(atomic64_add_unless);
+EXPORT_SYMBOL(atomic64_fetch_add_unless);
-- 
cgit v1.2.3


From 356701329fb391184618eda7b7fb68cb35271506 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:17 +0100
Subject: atomics/treewide: Make atomic64_fetch_add_unless() optional

Architectures with atomic64_fetch_add_unless() provide a preprocessor
symbol if they do so, and all other architectures have trivial C
implementations of atomic64_add_unless() which are near-identical.

Let's unify the trivial definitions of atomic64_fetch_add_unless() in
<linux/atomic.h>, so that we always have both
atomic64_fetch_add_unless() and atomic64_add_unless() with less
boilerplate code.

This means that atomic64_add_unless() is always implemented in core
code, and the instrumented atomics are updated accordingly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-15-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/include/asm/atomic.h           | 12 ------------
 arch/ia64/include/asm/atomic.h            | 15 ---------------
 arch/mips/include/asm/atomic.h            | 24 ------------------------
 arch/parisc/include/asm/atomic.h          | 24 ------------------------
 arch/s390/include/asm/atomic.h            | 16 ----------------
 arch/sparc/include/asm/atomic_64.h        | 15 ---------------
 arch/x86/include/asm/atomic64_64.h        | 19 -------------------
 include/asm-generic/atomic-instrumented.h |  6 ------
 include/linux/atomic.h                    | 26 ++++++++++++++++++++++++--
 9 files changed, 24 insertions(+), 133 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 22c8c43d6689..82db0e4febd4 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -40,17 +40,6 @@
 
 #include <asm/cmpxchg.h>
 
-#define ___atomic_add_unless(v, a, u, sfx)				\
-({									\
-	typeof((v)->counter) c, old;					\
-									\
-	c = atomic##sfx##_read(v);					\
-	while (c != (u) &&						\
-	      (old = atomic##sfx##_cmpxchg((v), c, c + (a))) != c)	\
-		c = old;						\
-	c;								\
- })
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)			READ_ONCE((v)->counter)
@@ -200,7 +189,6 @@
 #define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
 #define atomic64_sub_and_test(i, v)	(atomic64_sub_return((i), (v)) == 0)
 #define atomic64_add_negative(i, v)	(atomic64_add_return((i), (v)) < 0)
-#define atomic64_add_unless(v, a, u)	(___atomic_add_unless(v, a, u, 64) != u)
 #define atomic64_andnot			atomic64_andnot
 
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index cfe44086338e..0f80a3eafaba 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,21 +215,6 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
 static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 {
 	long c, old, dec;
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 794734e730d9..d42b27df1548 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -596,30 +596,6 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-/**
- * atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns true iff @v was not @u.
- */
-static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
 #define atomic64_dec_return(v) atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v) atomic64_add_return(1, (v))
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index b2b6261d05e7..f53ba2d6ff67 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -257,30 +257,6 @@ atomic64_read(const atomic64_t *v)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-/**
- * atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
 /*
  * atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 26c6b713a7a3..eb9329741bad 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -153,22 +153,6 @@ ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
 
-static inline int atomic64_add_unless(atomic64_t *v, long i, long u)
-{
-	long c, old;
-
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == u))
-			break;
-		old = atomic64_cmpxchg(v, c, c + i);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != u;
-}
-
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
 	long c, old, dec;
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index e4f1c93db31f..458783e99997 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -93,21 +93,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
-}
-
 long atomic64_dec_if_positive(atomic64_t *v);
 
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6f95023894b7..7e04b294e6eb 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -188,25 +188,6 @@ static inline long arch_atomic64_xchg(atomic64_t *v, long new)
 	return xchg(&v->counter, new);
 }
 
-/**
- * arch_atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
- */
-static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u)
-{
-	s64 c = arch_atomic64_read(v);
-	do {
-		if (unlikely(c == u))
-			return false;
-	} while (!arch_atomic64_try_cmpxchg(v, &c, c + a));
-	return true;
-}
-
 /*
  * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 444bf2f9d54d..2b487f28ef35 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -100,12 +100,6 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add_unless(v, a, u);
 }
-#else
-static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_add_unless(v, a, u);
-}
 #endif
 
 static __always_inline void atomic_inc(atomic_t *v)
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 3c03de648007..530562ac7909 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -1042,6 +1042,30 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #define atomic64_try_cmpxchg_release	atomic64_try_cmpxchg
 #endif /* atomic64_try_cmpxchg */
 
+/**
+ * atomic64_fetch_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns the original value of @v.
+ */
+#ifndef atomic64_fetch_add_unless
+static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
+						  long long u)
+{
+	long long c = atomic64_read(v);
+
+	do {
+		if (unlikely(c == u))
+			break;
+	} while (!atomic64_try_cmpxchg(v, &c, c + a));
+
+	return c;
+}
+#endif
+
 /**
  * atomic64_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
@@ -1051,12 +1075,10 @@ static inline int atomic_dec_if_positive(atomic_t *v)
  * Atomically adds @a to @v, if @v was not already @u.
  * Returns true if the addition was done.
  */
-#ifdef atomic64_fetch_add_unless
 static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
 {
 	return atomic64_fetch_add_unless(v, a, u) != u;
 }
-#endif
 
 /**
  * atomic64_inc_not_zero - increment unless the number is zero
-- 
cgit v1.2.3


From 18cc1814d4e7560412c9c8c6d28f9d6782c8b402 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:18 +0100
Subject: atomics/treewide: Make test ops optional

Some of the atomics return the result of a test applied after the atomic
operation, and almost all architectures implement these as trivial
wrappers around the underlying atomic. Specifically:

 * <atomic>_inc_and_test(v)    is (<atomic>_inc_return(v)    == 0)
 * <atomic>_dec_and_test(v)    is (<atomic>_dec_return(v)    == 0)
 * <atomic>_sub_and_test(i, v) is (<atomic>_sub_return(i, v) == 0)
 * <atomic>_add_negative(i, v) is (<atomic>_add_return(i, v)  < 0)

Rather than have these definitions duplicated in all architectures, with
minor inconsistencies in formatting and documentation, let's make these
operations optional, with default fallbacks as above. Implementations
must now provide a preprocessor symbol.

The instrumented atomics are updated accordingly.

Both x86 and m68k have custom implementations, which are left as-is,
given preprocessor symbols to avoid being overridden.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-16-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           |  12 ---
 arch/arc/include/asm/atomic.h             |  10 ---
 arch/arm/include/asm/atomic.h             |   9 ---
 arch/arm64/include/asm/atomic.h           |   8 --
 arch/h8300/include/asm/atomic.h           |   5 --
 arch/hexagon/include/asm/atomic.h         |   5 --
 arch/ia64/include/asm/atomic.h            |  23 ------
 arch/m68k/include/asm/atomic.h            |   4 +
 arch/mips/include/asm/atomic.h            |  84 --------------------
 arch/parisc/include/asm/atomic.h          |  22 ------
 arch/powerpc/include/asm/atomic.h         |  30 --------
 arch/riscv/include/asm/atomic.h           |  46 -----------
 arch/s390/include/asm/atomic.h            |   8 --
 arch/sh/include/asm/atomic.h              |   4 -
 arch/sparc/include/asm/atomic_32.h        |  15 ----
 arch/sparc/include/asm/atomic_64.h        |  20 -----
 arch/x86/include/asm/atomic.h             |   4 +
 arch/x86/include/asm/atomic64_32.h        |  54 -------------
 arch/x86/include/asm/atomic64_64.h        |   4 +
 arch/xtensa/include/asm/atomic.h          |  42 ----------
 include/asm-generic/atomic-instrumented.h |  24 ++++++
 include/asm-generic/atomic.h              |   9 ---
 include/asm-generic/atomic64.h            |   4 -
 include/linux/atomic.h                    | 124 ++++++++++++++++++++++++++++++
 24 files changed, 160 insertions(+), 410 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index cc486dbb3837..25f8693c5a42 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -297,24 +297,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return old - 1;
 }
 
-#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
-#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
-
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic64_dec_return(v) atomic64_sub_return(1,(v))
 
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 #define atomic64_inc_return(v) atomic64_add_return(1,(v))
 
-#define atomic_sub_and_test(i,v) (atomic_sub_return((i), (v)) == 0)
-#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i), (v)) == 0)
-
-#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
-#define atomic64_inc_and_test(v) (atomic64_add_return(1, (v)) == 0)
-
-#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
-#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0)
-
 #define atomic_inc(v) atomic_add(1,(v))
 #define atomic64_inc(v) atomic64_add(1,(v))
 
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 4917ffa61579..4222e726f84c 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -311,14 +311,8 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #define atomic_inc(v)			atomic_add(1, v)
 #define atomic_dec(v)			atomic_sub(1, v)
 
-#define atomic_inc_and_test(v)		(atomic_add_return(1, v) == 0)
-#define atomic_dec_and_test(v)		(atomic_sub_return(1, v) == 0)
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_sub_and_test(i, v)	(atomic_sub_return(i, v) == 0)
-
-#define atomic_add_negative(i, v)	(atomic_add_return(i, v) < 0)
-
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 
@@ -566,14 +560,10 @@ static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
-#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
-#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
-#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 
 #endif	/* !CONFIG_GENERIC_ATOMIC64 */
 
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 852e1fee72b0..35fb7f504daa 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -248,13 +248,8 @@ ATOMIC_OPS(xor, ^=, eor)
 #define atomic_inc(v)		atomic_add(1, v)
 #define atomic_dec(v)		atomic_sub(1, v)
 
-#define atomic_inc_and_test(v)	(atomic_add_return(1, v) == 0)
-#define atomic_dec_and_test(v)	(atomic_sub_return(1, v) == 0)
 #define atomic_inc_return_relaxed(v)    (atomic_add_return_relaxed(1, v))
 #define atomic_dec_return_relaxed(v)    (atomic_sub_return_relaxed(1, v))
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
-
-#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
 
 #ifndef CONFIG_GENERIC_ATOMIC64
 typedef struct {
@@ -517,14 +512,10 @@ static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
 #define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1LL, (v))
-#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
-#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1LL, (v))
-#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 
 #endif /* !CONFIG_GENERIC_ATOMIC64 */
 #endif
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 82db0e4febd4..edbe53fa3106 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -110,10 +110,6 @@
 
 #define atomic_inc(v)			atomic_add(1, (v))
 #define atomic_dec(v)			atomic_sub(1, (v))
-#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
-#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
-#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
-#define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
 #define atomic_andnot			atomic_andnot
 
 /*
@@ -185,10 +181,6 @@
 
 #define atomic64_inc(v)			atomic64_add(1, (v))
 #define atomic64_dec(v)			atomic64_sub(1, (v))
-#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
-#define atomic64_sub_and_test(i, v)	(atomic64_sub_return((i), (v)) == 0)
-#define atomic64_add_negative(i, v)	(atomic64_add_return((i), (v)) < 0)
 #define atomic64_andnot			atomic64_andnot
 
 #endif
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 710364946308..8977b5157c8f 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -69,17 +69,12 @@ ATOMIC_OPS(sub, -=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-#define atomic_sub_and_test(i, v)	(atomic_sub_return(i, v) == 0)
-
 #define atomic_inc_return(v)		atomic_add_return(1, v)
 #define atomic_dec_return(v)		atomic_sub_return(1, v)
 
 #define atomic_inc(v)			(void)atomic_inc_return(v)
-#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
 
 #define atomic_dec(v)			(void)atomic_dec_return(v)
-#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 86c67e9adbfa..31638f511674 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -201,11 +201,6 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 #define atomic_inc(v) atomic_add(1, (v))
 #define atomic_dec(v) atomic_sub(1, (v))
 
-#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
-#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, (v)) == 0)
-#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
-
 #define atomic_inc_return(v) (atomic_add_return(1, v))
 #define atomic_dec_return(v) (atomic_sub_return(1, v))
 
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 0f80a3eafaba..e4143c462e65 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -231,34 +231,11 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-/*
- * Atomically add I to V and return TRUE if the resulting value is
- * negative.
- */
-static __inline__ int
-atomic_add_negative (int i, atomic_t *v)
-{
-	return atomic_add_return(i, v) < 0;
-}
-
-static __inline__ long
-atomic64_add_negative (__s64 i, atomic64_t *v)
-{
-	return atomic64_add_return(i, v) < 0;
-}
-
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1, (v))
 
-#define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
-#define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
-#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i), (v)) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_sub_return(1, (v)) == 0)
-#define atomic64_inc_and_test(v)	(atomic64_add_return(1, (v)) == 0)
-
 #define atomic_add(i,v)			(void)atomic_add_return((i), (v))
 #define atomic_sub(i,v)			(void)atomic_sub_return((i), (v))
 #define atomic_inc(v)			atomic_add(1, (v))
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 596882cda224..9df09c876fa2 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -138,6 +138,7 @@ static inline int atomic_dec_and_test(atomic_t *v)
 	__asm__ __volatile__("subql #1,%1; seq %0" : "=d" (c), "+m" (*v));
 	return c != 0;
 }
+#define atomic_dec_and_test atomic_dec_and_test
 
 static inline int atomic_dec_and_test_lt(atomic_t *v)
 {
@@ -155,6 +156,7 @@ static inline int atomic_inc_and_test(atomic_t *v)
 	__asm__ __volatile__("addql #1,%1; seq %0" : "=d" (c), "+m" (*v));
 	return c != 0;
 }
+#define atomic_inc_and_test atomic_inc_and_test
 
 #ifdef CONFIG_RMW_INSNS
 
@@ -201,6 +203,7 @@ static inline int atomic_sub_and_test(int i, atomic_t *v)
 			     : ASM_DI (i));
 	return c != 0;
 }
+#define atomic_sub_and_test atomic_sub_and_test
 
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
@@ -210,5 +213,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 			     : ASM_DI (i));
 	return c != 0;
 }
+#define atomic_add_negative atomic_add_negative
 
 #endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index d42b27df1548..fd3008ae164c 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -277,37 +277,6 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
 #define atomic_inc_return(v) atomic_add_return(1, (v))
 
-/*
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-/*
- * atomic_dec_and_test - decrement by 1 and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
-
 /*
  * atomic_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
@@ -330,17 +299,6 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
  */
 #define atomic_dec(v) atomic_sub(1, (v))
 
-/*
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-#define atomic_add_negative(i, v) (atomic_add_return(i, (v)) < 0)
-
 #ifdef CONFIG_64BIT
 
 #define ATOMIC64_INIT(i)    { (i) }
@@ -599,37 +557,6 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 #define atomic64_dec_return(v) atomic64_sub_return(1, (v))
 #define atomic64_inc_return(v) atomic64_add_return(1, (v))
 
-/*
- * atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
-
-/*
- * atomic64_inc_and_test - increment and test
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
-
-/*
- * atomic64_dec_and_test - decrement by 1 and test
- * @v: pointer of type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-#define atomic64_dec_and_test(v) (atomic64_sub_return(1, (v)) == 0)
-
 /*
  * atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic64_t
@@ -652,17 +579,6 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
  */
 #define atomic64_dec(v) atomic64_sub(1, (v))
 
-/*
- * atomic64_add_negative - add and test if negative
- * @v: pointer of type atomic64_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-#define atomic64_add_negative(i, v) (atomic64_add_return(i, (v)) < 0)
-
 #endif /* CONFIG_64BIT */
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index f53ba2d6ff67..f85844ff6336 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -142,22 +142,6 @@ ATOMIC_OPS(xor, ^=)
 #define atomic_inc_return(v)	(atomic_add_return(   1,(v)))
 #define atomic_dec_return(v)	(atomic_add_return(  -1,(v)))
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-#define atomic_dec_and_test(v)	(atomic_dec_return(v) == 0)
-
-#define atomic_sub_and_test(i,v)	(atomic_sub_return((i),(v)) == 0)
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef CONFIG_64BIT
@@ -246,12 +230,6 @@ atomic64_read(const atomic64_t *v)
 #define atomic64_inc_return(v)		(atomic64_add_return(   1,(v)))
 #define atomic64_dec_return(v)		(atomic64_add_return(  -1,(v)))
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
-
-#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
-#define atomic64_sub_and_test(i,v)	(atomic64_sub_return((i),(v)) == 0)
-
 /* exported interface */
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 233dbf31911c..5d76f05d2be3 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -129,8 +129,6 @@ ATOMIC_OPS(xor, xor)
 #undef ATOMIC_OP_RETURN_RELAXED
 #undef ATOMIC_OP
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
 static __inline__ void atomic_inc(atomic_t *v)
 {
 	int t;
@@ -163,16 +161,6 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
 	return t;
 }
 
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
 static __inline__ void atomic_dec(atomic_t *v)
 {
 	int t;
@@ -281,9 +269,6 @@ static __inline__ int atomic_inc_not_zero(atomic_t *v)
 }
 #define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
 
-#define atomic_sub_and_test(a, v)	(atomic_sub_return((a), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_dec_return((v)) == 0)
-
 /*
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1, even if
@@ -413,8 +398,6 @@ ATOMIC64_OPS(xor, xor)
 #undef ATOMIC64_OP_RETURN_RELAXED
 #undef ATOMIC64_OP
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
-
 static __inline__ void atomic64_inc(atomic64_t *v)
 {
 	long t;
@@ -445,16 +428,6 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
 	return t;
 }
 
-/*
- * atomic64_inc_and_test - increment and test
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
-
 static __inline__ void atomic64_dec(atomic64_t *v)
 {
 	long t;
@@ -488,9 +461,6 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
 #define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
 #define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
 
-#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
-#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
-
 /*
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1.
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index d959bbaaad41..68eef0a805ca 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -209,36 +209,6 @@ ATOMIC_OPS(xor, xor, i)
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
-/*
- * The extra atomic operations that are constructed from one of the core
- * AMO-based operations above (aside from sub, which is easier to fit above).
- * These are required to perform a full barrier, but they're OK this way
- * because atomic_*_return is also required to perform a full barrier.
- *
- */
-#define ATOMIC_OP(op, func_op, comp_op, I, c_type, prefix)		\
-static __always_inline							\
-bool atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)		\
-{									\
-	return atomic##prefix##_##func_op##_return(i, v) comp_op I;	\
-}
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, func_op, comp_op, I)				\
-        ATOMIC_OP(op, func_op, comp_op, I,  int,   )
-#else
-#define ATOMIC_OPS(op, func_op, comp_op, I)				\
-        ATOMIC_OP(op, func_op, comp_op, I,  int,   )			\
-        ATOMIC_OP(op, func_op, comp_op, I, long, 64)
-#endif
-
-ATOMIC_OPS(add_and_test, add, ==, 0)
-ATOMIC_OPS(sub_and_test, sub, ==, 0)
-ATOMIC_OPS(add_negative, add,  <, 0)
-
-#undef ATOMIC_OP
-#undef ATOMIC_OPS
-
 #define ATOMIC_OP(op, func_op, I, c_type, prefix)			\
 static __always_inline							\
 void atomic##prefix##_##op(atomic##prefix##_t *v)			\
@@ -315,22 +285,6 @@ ATOMIC_OPS(dec, add, +, -1)
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
-#define ATOMIC_OP(op, func_op, comp_op, I, prefix)			\
-static __always_inline							\
-bool atomic##prefix##_##op(atomic##prefix##_t *v)			\
-{									\
-	return atomic##prefix##_##func_op##_return(v) comp_op I;	\
-}
-
-ATOMIC_OP(inc_and_test, inc, ==, 0,   )
-ATOMIC_OP(dec_and_test, dec, ==, 0,   )
-#ifndef CONFIG_GENERIC_ATOMIC64
-ATOMIC_OP(inc_and_test, inc, ==, 0, 64)
-ATOMIC_OP(dec_and_test, dec, ==, 0, 64)
-#endif
-
-#undef ATOMIC_OP
-
 /* This is required to provide a full barrier on success. */
 static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index eb9329741bad..7f5fbd595f01 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -55,17 +55,13 @@ static inline void atomic_add(int i, atomic_t *v)
 	__atomic_add(i, &v->counter);
 }
 
-#define atomic_add_negative(_i, _v)	(atomic_add_return(_i, _v) < 0)
 #define atomic_inc(_v)			atomic_add(1, _v)
 #define atomic_inc_return(_v)		atomic_add_return(1, _v)
-#define atomic_inc_and_test(_v)		(atomic_add_return(1, _v) == 0)
 #define atomic_sub(_i, _v)		atomic_add(-(int)(_i), _v)
 #define atomic_sub_return(_i, _v)	atomic_add_return(-(int)(_i), _v)
 #define atomic_fetch_sub(_i, _v)	atomic_fetch_add(-(int)(_i), _v)
-#define atomic_sub_and_test(_i, _v)	(atomic_sub_return(_i, _v) == 0)
 #define atomic_dec(_v)			atomic_sub(1, _v)
 #define atomic_dec_return(_v)		atomic_sub_return(1, _v)
-#define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
 #define ATOMIC_OPS(op)							\
 static inline void atomic_##op(int i, atomic_t *v)			\
@@ -170,16 +166,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-#define atomic64_add_negative(_i, _v)	(atomic64_add_return(_i, _v) < 0)
 #define atomic64_inc(_v)		atomic64_add(1, _v)
 #define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
-#define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
 #define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long)(_i), _v)
 #define atomic64_fetch_sub(_i, _v)	atomic64_fetch_add(-(long)(_i), _v)
 #define atomic64_sub(_i, _v)		atomic64_add(-(long)(_i), _v)
-#define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
 #define atomic64_dec(_v)		atomic64_sub(1, _v)
 #define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
-#define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
 
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 422fac764ca1..d438494fa112 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -32,12 +32,8 @@
 #include <asm/atomic-irq.h>
 #endif
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
-#define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 
 #define atomic_inc(v)			atomic_add(1, (v))
 #define atomic_dec(v)			atomic_sub(1, (v))
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 9d7a15acc0c5..3a26573790c6 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -51,19 +51,4 @@ void atomic_set(atomic_t *, int);
 #define atomic_inc_return(v)	(atomic_add_return(        1, (v)))
 #define atomic_dec_return(v)	(atomic_add_return(       -1, (v)))
 
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
-
 #endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 458783e99997..634508282aea 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -56,32 +56,12 @@ ATOMIC_OPS(xor)
 #define atomic_inc_return(v)   atomic_add_return(1, v)
 #define atomic64_inc_return(v) atomic64_add_return(1, v)
 
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
-
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
-#define atomic64_sub_and_test(i, v) (atomic64_sub_return(i, v) == 0)
-
-#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
-#define atomic64_dec_and_test(v) (atomic64_sub_return(1, v) == 0)
-
 #define atomic_inc(v) atomic_add(1, v)
 #define atomic64_inc(v) atomic64_add(1, v)
 
 #define atomic_dec(v) atomic_sub(1, v)
 #define atomic64_dec(v) atomic64_sub(1, v)
 
-#define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
-#define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0)
-
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 
 static inline int atomic_xchg(atomic_t *v, int new)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 616327ac9d39..73bda4abe180 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,6 +80,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
 static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
@@ -117,6 +118,7 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
 static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
@@ -130,6 +132,7 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
 static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
@@ -144,6 +147,7 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
+#define arch_atomic_add_negative arch_atomic_add_negative
 static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 2a33cc17801b..a26810d005e0 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -197,20 +197,6 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
 	return i;
 }
 
-/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int arch_atomic64_sub_and_test(long long i, atomic64_t *v)
-{
-	return arch_atomic64_sub_return(i, v) == 0;
-}
-
 /**
  * arch_atomic64_inc - increment atomic64 variable
  * @v: pointer to type atomic64_t
@@ -235,46 +221,6 @@ static inline void arch_atomic64_dec(atomic64_t *v)
 			       "S" (v) : "memory", "eax", "ecx", "edx");
 }
 
-/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int arch_atomic64_dec_and_test(atomic64_t *v)
-{
-	return arch_atomic64_dec_return(v) == 0;
-}
-
-/**
- * atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int arch_atomic64_inc_and_test(atomic64_t *v)
-{
-	return arch_atomic64_inc_return(v) == 0;
-}
-
-/**
- * arch_atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int arch_atomic64_add_negative(long long i, atomic64_t *v)
-{
-	return arch_atomic64_add_return(i, v) < 0;
-}
-
 /**
  * arch_atomic64_add_unless - add unless the number is a given value
  * @v: pointer of type atomic64_t
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 7e04b294e6eb..6a65228a3db6 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,6 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
 static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
@@ -110,6 +111,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
 static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
@@ -123,6 +125,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
 static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
@@ -137,6 +140,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
+#define arch_atomic64_add_negative arch_atomic64_add_negative
 static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index f4c9f82c40c6..332ae4eca737 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -197,17 +197,6 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0)
-
 /**
  * atomic_inc - increment atomic variable
  * @v: pointer of type atomic_t
@@ -240,37 +229,6 @@ ATOMIC_OPS(xor)
  */
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-#define atomic_dec_and_test(v) (atomic_sub_return(1,(v)) == 0)
-
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_add_return(1,(v)) == 0)
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-#define atomic_add_negative(i,v) (atomic_add_return((i),(v)) < 0)
-
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 2b487f28ef35..6b64c200de73 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -225,29 +225,41 @@ static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v)
 	return arch_atomic64_dec_if_positive(v);
 }
 
+#ifdef arch_atomic_dec_and_test
+#define atomic_dec_and_test atomic_dec_and_test
 static __always_inline bool atomic_dec_and_test(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_dec_and_test(v);
 }
+#endif
 
+#ifdef arch_atomic64_dec_and_test
+#define atomic64_dec_and_test atomic64_dec_and_test
 static __always_inline bool atomic64_dec_and_test(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_dec_and_test(v);
 }
+#endif
 
+#ifdef arch_atomic_inc_and_test
+#define atomic_inc_and_test atomic_inc_and_test
 static __always_inline bool atomic_inc_and_test(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_inc_and_test(v);
 }
+#endif
 
+#ifdef arch_atomic64_inc_and_test
+#define atomic64_inc_and_test atomic64_inc_and_test
 static __always_inline bool atomic64_inc_and_test(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_inc_and_test(v);
 }
+#endif
 
 static __always_inline int atomic_add_return(int i, atomic_t *v)
 {
@@ -333,29 +345,41 @@ static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v)
 	return arch_atomic64_fetch_xor(i, v);
 }
 
+#ifdef arch_atomic_sub_and_test
+#define atomic_sub_and_test atomic_sub_and_test
 static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_sub_and_test(i, v);
 }
+#endif
 
+#ifdef arch_atomic64_sub_and_test
+#define atomic64_sub_and_test atomic64_sub_and_test
 static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_sub_and_test(i, v);
 }
+#endif
 
+#ifdef arch_atomic_add_negative
+#define atomic_add_negative atomic_add_negative
 static __always_inline bool atomic_add_negative(int i, atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_add_negative(i, v);
 }
+#endif
 
+#ifdef arch_atomic64_add_negative
+#define atomic64_add_negative atomic64_add_negative
 static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_add_negative(i, v);
 }
+#endif
 
 static __always_inline unsigned long
 cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new, int size)
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 757e45821220..40cab858aaaa 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -186,11 +186,6 @@ ATOMIC_OP(xor, ^)
 
 #include <linux/irqflags.h>
 
-static inline int atomic_add_negative(int i, atomic_t *v)
-{
-	return atomic_add_return(i, v) < 0;
-}
-
 static inline void atomic_add(int i, atomic_t *v)
 {
 	atomic_add_return(i, v);
@@ -214,10 +209,6 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_return(v)		atomic_add_return(1, (v))
 
-#define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
-#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
-
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 49460107b29a..d3827ab97aa4 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -56,13 +56,9 @@ extern long long atomic64_xchg(atomic64_t *v, long long new);
 extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
-#define atomic64_inc_and_test(v) 	(atomic64_inc_return(v) == 0)
-#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
 #define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
-#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 530562ac7909..3ee8da9023cd 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -569,6 +569,68 @@ static inline bool atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
 #endif
 
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#ifndef atomic_inc_and_test
+static inline bool atomic_inc_and_test(atomic_t *v)
+{
+	return atomic_inc_return(v) == 0;
+}
+#endif
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+#ifndef atomic_dec_and_test
+static inline bool atomic_dec_and_test(atomic_t *v)
+{
+	return atomic_dec_return(v) == 0;
+}
+#endif
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+#ifndef atomic_sub_and_test
+static inline bool atomic_sub_and_test(int i, atomic_t *v)
+{
+	return atomic_sub_return(i, v) == 0;
+}
+#endif
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+#ifndef atomic_add_negative
+static inline bool atomic_add_negative(int i, atomic_t *v)
+{
+	return atomic_add_return(i, v) < 0;
+}
+#endif
+
 #ifndef atomic_andnot
 static inline void atomic_andnot(int i, atomic_t *v)
 {
@@ -1091,6 +1153,68 @@ static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 #endif
 
+/**
+ * atomic64_inc_and_test - increment and test
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#ifndef atomic64_inc_and_test
+static inline bool atomic64_inc_and_test(atomic64_t *v)
+{
+	return atomic64_inc_return(v) == 0;
+}
+#endif
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+#ifndef atomic64_dec_and_test
+static inline bool atomic64_dec_and_test(atomic64_t *v)
+{
+	return atomic64_dec_return(v) == 0;
+}
+#endif
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+#ifndef atomic64_sub_and_test
+static inline bool atomic64_sub_and_test(long long i, atomic64_t *v)
+{
+	return atomic64_sub_return(i, v) == 0;
+}
+#endif
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+#ifndef atomic64_add_negative
+static inline bool atomic64_add_negative(long long i, atomic64_t *v)
+{
+	return atomic64_add_return(i, v) < 0;
+}
+#endif
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
-- 
cgit v1.2.3


From 9837559d8eb01ce834e56fc9a567c1d94ebd3698 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:19 +0100
Subject: atomics/treewide: Make unconditional inc/dec ops optional

Many of the inc/dec ops are mandatory, but for most architectures inc/dec are
simply trivial wrappers around their corresponding add/sub ops.

Let's make all the inc/dec ops optional, so that we can get rid of these
boilerplate wrappers.

The instrumented atomics are updated accordingly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Palmer Dabbelt <palmer@sifive.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-17-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           | 12 -----
 arch/arc/include/asm/atomic.h             | 11 -----
 arch/arm/include/asm/atomic.h             | 11 -----
 arch/arm64/include/asm/atomic.h           | 24 ----------
 arch/h8300/include/asm/atomic.h           |  7 ---
 arch/hexagon/include/asm/atomic.h         |  6 ---
 arch/ia64/include/asm/atomic.h            |  9 ----
 arch/m68k/include/asm/atomic.h            |  5 +-
 arch/mips/include/asm/atomic.h            | 38 ----------------
 arch/parisc/include/asm/atomic.h          | 12 -----
 arch/powerpc/include/asm/atomic.h         |  4 ++
 arch/riscv/include/asm/atomic.h           | 76 -------------------------------
 arch/s390/include/asm/atomic.h            |  8 ----
 arch/sh/include/asm/atomic.h              |  6 ---
 arch/sparc/include/asm/atomic_32.h        |  5 --
 arch/sparc/include/asm/atomic_64.h        | 12 -----
 arch/x86/include/asm/atomic.h             |  5 +-
 arch/x86/include/asm/atomic64_32.h        |  4 ++
 arch/x86/include/asm/atomic64_64.h        |  5 +-
 arch/xtensa/include/asm/atomic.h          | 32 -------------
 include/asm-generic/atomic-instrumented.h | 24 ++++++++++
 include/asm-generic/atomic.h              | 13 ------
 include/asm-generic/atomic64.h            |  5 --
 include/linux/atomic.h                    | 48 +++++++++++++++++++
 24 files changed, 86 insertions(+), 296 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 25f8693c5a42..f6410cb68058 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -297,16 +297,4 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return old - 1;
 }
 
-#define atomic_dec_return(v) atomic_sub_return(1,(v))
-#define atomic64_dec_return(v) atomic64_sub_return(1,(v))
-
-#define atomic_inc_return(v) atomic_add_return(1,(v))
-#define atomic64_inc_return(v) atomic64_add_return(1,(v))
-
-#define atomic_inc(v) atomic_add(1,(v))
-#define atomic64_inc(v) atomic64_add(1,(v))
-
-#define atomic_dec(v) atomic_sub(1,(v))
-#define atomic64_dec(v) atomic64_sub(1,(v))
-
 #endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 4222e726f84c..27b95a928c1e 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -308,12 +308,6 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_inc(v)			atomic_add(1, v)
-#define atomic_dec(v)			atomic_sub(1, v)
-
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-
 #ifdef CONFIG_GENERIC_ATOMIC64
 
 #include <asm-generic/atomic64.h>
@@ -560,11 +554,6 @@ static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_inc(v)			atomic64_add(1LL, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
-#define atomic64_dec(v)			atomic64_sub(1LL, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
-
 #endif	/* !CONFIG_GENERIC_ATOMIC64 */
 
 #endif	/* !__ASSEMBLY__ */
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 35fb7f504daa..5a58d061d3d2 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -245,12 +245,6 @@ ATOMIC_OPS(xor, ^=, eor)
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
-#define atomic_inc(v)		atomic_add(1, v)
-#define atomic_dec(v)		atomic_sub(1, v)
-
-#define atomic_inc_return_relaxed(v)    (atomic_add_return_relaxed(1, v))
-#define atomic_dec_return_relaxed(v)    (atomic_sub_return_relaxed(1, v))
-
 #ifndef CONFIG_GENERIC_ATOMIC64
 typedef struct {
 	long long counter;
@@ -512,11 +506,6 @@ static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 }
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_inc(v)			atomic64_add(1LL, (v))
-#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1LL, (v))
-#define atomic64_dec(v)			atomic64_sub(1LL, (v))
-#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1LL, (v))
-
 #endif /* !CONFIG_GENERIC_ATOMIC64 */
 #endif
 #endif
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index edbe53fa3106..078f785cd97f 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -50,21 +50,11 @@
 #define atomic_add_return_release	atomic_add_return_release
 #define atomic_add_return		atomic_add_return
 
-#define atomic_inc_return_relaxed(v)	atomic_add_return_relaxed(1, (v))
-#define atomic_inc_return_acquire(v)	atomic_add_return_acquire(1, (v))
-#define atomic_inc_return_release(v)	atomic_add_return_release(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-
 #define atomic_sub_return_relaxed	atomic_sub_return_relaxed
 #define atomic_sub_return_acquire	atomic_sub_return_acquire
 #define atomic_sub_return_release	atomic_sub_return_release
 #define atomic_sub_return		atomic_sub_return
 
-#define atomic_dec_return_relaxed(v)	atomic_sub_return_relaxed(1, (v))
-#define atomic_dec_return_acquire(v)	atomic_sub_return_acquire(1, (v))
-#define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-
 #define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
 #define atomic_fetch_add_acquire	atomic_fetch_add_acquire
 #define atomic_fetch_add_release	atomic_fetch_add_release
@@ -108,8 +98,6 @@
 	cmpxchg_release(&((v)->counter), (old), (new))
 #define atomic_cmpxchg(v, old, new)	cmpxchg(&((v)->counter), (old), (new))
 
-#define atomic_inc(v)			atomic_add(1, (v))
-#define atomic_dec(v)			atomic_sub(1, (v))
 #define atomic_andnot			atomic_andnot
 
 /*
@@ -124,21 +112,11 @@
 #define atomic64_add_return_release	atomic64_add_return_release
 #define atomic64_add_return		atomic64_add_return
 
-#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1, (v))
-#define atomic64_inc_return_acquire(v)	atomic64_add_return_acquire(1, (v))
-#define atomic64_inc_return_release(v)	atomic64_add_return_release(1, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
-
 #define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
 #define atomic64_sub_return_acquire	atomic64_sub_return_acquire
 #define atomic64_sub_return_release	atomic64_sub_return_release
 #define atomic64_sub_return		atomic64_sub_return
 
-#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1, (v))
-#define atomic64_dec_return_acquire(v)	atomic64_sub_return_acquire(1, (v))
-#define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
-
 #define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
 #define atomic64_fetch_add_acquire	atomic64_fetch_add_acquire
 #define atomic64_fetch_add_release	atomic64_fetch_add_release
@@ -179,8 +157,6 @@
 #define atomic64_cmpxchg_release	atomic_cmpxchg_release
 #define atomic64_cmpxchg		atomic_cmpxchg
 
-#define atomic64_inc(v)			atomic64_add(1, (v))
-#define atomic64_dec(v)			atomic64_sub(1, (v))
 #define atomic64_andnot			atomic64_andnot
 
 #endif
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 8977b5157c8f..c6b6a06231b2 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -69,13 +69,6 @@ ATOMIC_OPS(sub, -=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_inc_return(v)		atomic_add_return(1, v)
-#define atomic_dec_return(v)		atomic_sub_return(1, v)
-
-#define atomic_inc(v)			(void)atomic_inc_return(v)
-
-#define atomic_dec(v)			(void)atomic_dec_return(v)
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 31638f511674..311b9894ccc8 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -198,10 +198,4 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 }
 #define atomic_fetch_add_unless atomic_fetch_add_unless
 
-#define atomic_inc(v) atomic_add(1, (v))
-#define atomic_dec(v) atomic_sub(1, (v))
-
-#define atomic_inc_return(v) (atomic_add_return(1, v))
-#define atomic_dec_return(v) (atomic_sub_return(1, v))
-
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index e4143c462e65..46a15a974bed 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -231,19 +231,10 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
-
 #define atomic_add(i,v)			(void)atomic_add_return((i), (v))
 #define atomic_sub(i,v)			(void)atomic_sub_return((i), (v))
-#define atomic_inc(v)			atomic_add(1, (v))
-#define atomic_dec(v)			atomic_sub(1, (v))
 
 #define atomic64_add(i,v)		(void)atomic64_add_return((i), (v))
 #define atomic64_sub(i,v)		(void)atomic64_sub_return((i), (v))
-#define atomic64_inc(v)			atomic64_add(1, (v))
-#define atomic64_dec(v)			atomic64_sub(1, (v))
 
 #endif /* _ASM_IA64_ATOMIC_H */
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 9df09c876fa2..47228b0d4163 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -126,11 +126,13 @@ static inline void atomic_inc(atomic_t *v)
 {
 	__asm__ __volatile__("addql #1,%0" : "+m" (*v));
 }
+#define atomic_inc atomic_inc
 
 static inline void atomic_dec(atomic_t *v)
 {
 	__asm__ __volatile__("subql #1,%0" : "+m" (*v));
 }
+#define atomic_dec atomic_dec
 
 static inline int atomic_dec_and_test(atomic_t *v)
 {
@@ -192,9 +194,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
 
 #endif /* !CONFIG_RMW_INSNS */
 
-#define atomic_dec_return(v)	atomic_sub_return(1, (v))
-#define atomic_inc_return(v)	atomic_add_return(1, (v))
-
 static inline int atomic_sub_and_test(int i, atomic_t *v)
 {
 	char c;
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index fd3008ae164c..79be687de4ab 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -274,31 +274,12 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-#define atomic_dec_return(v) atomic_sub_return(1, (v))
-#define atomic_inc_return(v) atomic_add_return(1, (v))
-
 /*
  * atomic_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
  */
 #define atomic_dec_if_positive(v)	atomic_sub_if_positive(1, v)
 
-/*
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-#define atomic_inc(v) atomic_add(1, (v))
-
-/*
- * atomic_dec - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-#define atomic_dec(v) atomic_sub(1, (v))
-
 #ifdef CONFIG_64BIT
 
 #define ATOMIC64_INIT(i)    { (i) }
@@ -554,31 +535,12 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), (new)))
 
-#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
-#define atomic64_inc_return(v) atomic64_add_return(1, (v))
-
 /*
  * atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic64_t
  */
 #define atomic64_dec_if_positive(v)	atomic64_sub_if_positive(1, v)
 
-/*
- * atomic64_inc - increment atomic variable
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1.
- */
-#define atomic64_inc(v) atomic64_add(1, (v))
-
-/*
- * atomic64_dec - decrement and test
- * @v: pointer of type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
-#define atomic64_dec(v) atomic64_sub(1, (v))
-
 #endif /* CONFIG_64BIT */
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index f85844ff6336..10bc490327c1 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -136,12 +136,6 @@ ATOMIC_OPS(xor, ^=)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_inc(v)	(atomic_add(   1,(v)))
-#define atomic_dec(v)	(atomic_add(  -1,(v)))
-
-#define atomic_inc_return(v)	(atomic_add_return(   1,(v)))
-#define atomic_dec_return(v)	(atomic_add_return(  -1,(v)))
-
 #define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef CONFIG_64BIT
@@ -224,12 +218,6 @@ atomic64_read(const atomic64_t *v)
 	return READ_ONCE((v)->counter);
 }
 
-#define atomic64_inc(v)		(atomic64_add(   1,(v)))
-#define atomic64_dec(v)		(atomic64_add(  -1,(v)))
-
-#define atomic64_inc_return(v)		(atomic64_add_return(   1,(v)))
-#define atomic64_dec_return(v)		(atomic64_add_return(  -1,(v)))
-
 /* exported interface */
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 5d76f05d2be3..ebaefdee4a57 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -143,6 +143,7 @@ static __inline__ void atomic_inc(atomic_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
+#define atomic_inc atomic_inc
 
 static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
 {
@@ -175,6 +176,7 @@ static __inline__ void atomic_dec(atomic_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
+#define atomic_dec atomic_dec
 
 static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
 {
@@ -411,6 +413,7 @@ static __inline__ void atomic64_inc(atomic64_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
+#define atomic64_inc atomic64_inc
 
 static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v)
 {
@@ -441,6 +444,7 @@ static __inline__ void atomic64_dec(atomic64_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
+#define atomic64_dec atomic64_dec
 
 static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v)
 {
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 68eef0a805ca..512b89485790 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -209,82 +209,6 @@ ATOMIC_OPS(xor, xor, i)
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
-#define ATOMIC_OP(op, func_op, I, c_type, prefix)			\
-static __always_inline							\
-void atomic##prefix##_##op(atomic##prefix##_t *v)			\
-{									\
-	atomic##prefix##_##func_op(I, v);				\
-}
-
-#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix)			\
-static __always_inline							\
-c_type atomic##prefix##_fetch_##op##_relaxed(atomic##prefix##_t *v)	\
-{									\
-	return atomic##prefix##_fetch_##func_op##_relaxed(I, v);	\
-}									\
-static __always_inline							\
-c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v)		\
-{									\
-	return atomic##prefix##_fetch_##func_op(I, v);			\
-}
-
-#define ATOMIC_OP_RETURN(op, asm_op, c_op, I, c_type, prefix)		\
-static __always_inline							\
-c_type atomic##prefix##_##op##_return_relaxed(atomic##prefix##_t *v)	\
-{									\
-        return atomic##prefix##_fetch_##op##_relaxed(v) c_op I;		\
-}									\
-static __always_inline							\
-c_type atomic##prefix##_##op##_return(atomic##prefix##_t *v)		\
-{									\
-        return atomic##prefix##_fetch_##op(v) c_op I;			\
-}
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS(op, asm_op, c_op, I)					\
-        ATOMIC_OP(       op, asm_op,       I,  int,   )			\
-        ATOMIC_FETCH_OP( op, asm_op,       I,  int,   )			\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )
-#else
-#define ATOMIC_OPS(op, asm_op, c_op, I)					\
-        ATOMIC_OP(       op, asm_op,       I,  int,   )			\
-        ATOMIC_FETCH_OP( op, asm_op,       I,  int,   )			\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I,  int,   )			\
-        ATOMIC_OP(       op, asm_op,       I, long, 64)			\
-        ATOMIC_FETCH_OP( op, asm_op,       I, long, 64)			\
-        ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64)
-#endif
-
-ATOMIC_OPS(inc, add, +,  1)
-ATOMIC_OPS(dec, add, +, -1)
-
-#define atomic_inc_return_relaxed	atomic_inc_return_relaxed
-#define atomic_dec_return_relaxed	atomic_dec_return_relaxed
-#define atomic_inc_return		atomic_inc_return
-#define atomic_dec_return		atomic_dec_return
-
-#define atomic_fetch_inc_relaxed	atomic_fetch_inc_relaxed
-#define atomic_fetch_dec_relaxed	atomic_fetch_dec_relaxed
-#define atomic_fetch_inc		atomic_fetch_inc
-#define atomic_fetch_dec		atomic_fetch_dec
-
-#ifndef CONFIG_GENERIC_ATOMIC64
-#define atomic64_inc_return_relaxed	atomic64_inc_return_relaxed
-#define atomic64_dec_return_relaxed	atomic64_dec_return_relaxed
-#define atomic64_inc_return		atomic64_inc_return
-#define atomic64_dec_return		atomic64_dec_return
-
-#define atomic64_fetch_inc_relaxed	atomic64_fetch_inc_relaxed
-#define atomic64_fetch_dec_relaxed	atomic64_fetch_dec_relaxed
-#define atomic64_fetch_inc		atomic64_fetch_inc
-#define atomic64_fetch_dec		atomic64_fetch_dec
-#endif
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-
 /* This is required to provide a full barrier on success. */
 static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 7f5fbd595f01..376e64af951f 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -55,13 +55,9 @@ static inline void atomic_add(int i, atomic_t *v)
 	__atomic_add(i, &v->counter);
 }
 
-#define atomic_inc(_v)			atomic_add(1, _v)
-#define atomic_inc_return(_v)		atomic_add_return(1, _v)
 #define atomic_sub(_i, _v)		atomic_add(-(int)(_i), _v)
 #define atomic_sub_return(_i, _v)	atomic_add_return(-(int)(_i), _v)
 #define atomic_fetch_sub(_i, _v)	atomic_fetch_add(-(int)(_i), _v)
-#define atomic_dec(_v)			atomic_sub(1, _v)
-#define atomic_dec_return(_v)		atomic_sub_return(1, _v)
 
 #define ATOMIC_OPS(op)							\
 static inline void atomic_##op(int i, atomic_t *v)			\
@@ -166,12 +162,8 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
-#define atomic64_inc(_v)		atomic64_add(1, _v)
-#define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
 #define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long)(_i), _v)
 #define atomic64_fetch_sub(_i, _v)	atomic64_fetch_add(-(long)(_i), _v)
 #define atomic64_sub(_i, _v)		atomic64_add(-(long)(_i), _v)
-#define atomic64_dec(_v)		atomic64_sub(1, _v)
-#define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
 
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index d438494fa112..f37b95a80232 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -32,12 +32,6 @@
 #include <asm/atomic-irq.h>
 #endif
 
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-
-#define atomic_inc(v)			atomic_add(1, (v))
-#define atomic_dec(v)			atomic_sub(1, (v))
-
 #define atomic_xchg(v, new)		(xchg(&((v)->counter), new))
 #define atomic_cmpxchg(v, o, n)		(cmpxchg(&((v)->counter), (o), (n)))
 
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 3a26573790c6..94c930f0bc62 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -38,8 +38,6 @@ void atomic_set(atomic_t *, int);
 
 #define atomic_add(i, v)	((void)atomic_add_return( (int)(i), (v)))
 #define atomic_sub(i, v)	((void)atomic_add_return(-(int)(i), (v)))
-#define atomic_inc(v)		((void)atomic_add_return(        1, (v)))
-#define atomic_dec(v)		((void)atomic_add_return(       -1, (v)))
 
 #define atomic_and(i, v)	((void)atomic_fetch_and((i), (v)))
 #define atomic_or(i, v)		((void)atomic_fetch_or((i), (v)))
@@ -48,7 +46,4 @@ void atomic_set(atomic_t *, int);
 #define atomic_sub_return(i, v)	(atomic_add_return(-(int)(i), (v)))
 #define atomic_fetch_sub(i, v)  (atomic_fetch_add (-(int)(i), (v)))
 
-#define atomic_inc_return(v)	(atomic_add_return(        1, (v)))
-#define atomic_dec_return(v)	(atomic_add_return(       -1, (v)))
-
 #endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 634508282aea..304865c7cdbb 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -50,18 +50,6 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_dec_return(v)   atomic_sub_return(1, v)
-#define atomic64_dec_return(v) atomic64_sub_return(1, v)
-
-#define atomic_inc_return(v)   atomic_add_return(1, v)
-#define atomic64_inc_return(v) atomic64_add_return(1, v)
-
-#define atomic_inc(v) atomic_add(1, v)
-#define atomic64_inc(v) atomic64_add(1, v)
-
-#define atomic_dec(v) atomic_sub(1, v)
-#define atomic64_dec(v) atomic64_sub(1, v)
-
 #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 
 static inline int atomic_xchg(atomic_t *v, int new)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 73bda4abe180..823fd2f320cf 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -92,6 +92,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
  *
  * Atomically increments @v by 1.
  */
+#define arch_atomic_inc arch_atomic_inc
 static __always_inline void arch_atomic_inc(atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "incl %0"
@@ -104,6 +105,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
  *
  * Atomically decrements @v by 1.
  */
+#define arch_atomic_dec arch_atomic_dec
 static __always_inline void arch_atomic_dec(atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "decl %0"
@@ -177,9 +179,6 @@ static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
 	return arch_atomic_add_return(-i, v);
 }
 
-#define arch_atomic_inc_return(v)  (arch_atomic_add_return(1, v))
-#define arch_atomic_dec_return(v)  (arch_atomic_sub_return(1, v))
-
 static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
 {
 	return xadd(&v->counter, i);
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index a26810d005e0..472c7af0ed48 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -158,6 +158,7 @@ static inline long long arch_atomic64_inc_return(atomic64_t *v)
 			     "S" (v) : "memory", "ecx");
 	return a;
 }
+#define arch_atomic64_inc_return arch_atomic64_inc_return
 
 static inline long long arch_atomic64_dec_return(atomic64_t *v)
 {
@@ -166,6 +167,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v)
 			     "S" (v) : "memory", "ecx");
 	return a;
 }
+#define arch_atomic64_dec_return arch_atomic64_dec_return
 
 /**
  * arch_atomic64_add - add integer to atomic64 variable
@@ -203,6 +205,7 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
  *
  * Atomically increments @v by 1.
  */
+#define arch_atomic64_inc arch_atomic64_inc
 static inline void arch_atomic64_inc(atomic64_t *v)
 {
 	__alternative_atomic64(inc, inc_return, /* no output */,
@@ -215,6 +218,7 @@ static inline void arch_atomic64_inc(atomic64_t *v)
  *
  * Atomically decrements @v by 1.
  */
+#define arch_atomic64_dec arch_atomic64_dec
 static inline void arch_atomic64_dec(atomic64_t *v)
 {
 	__alternative_atomic64(dec, dec_return, /* no output */,
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 6a65228a3db6..1b282272a801 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -83,6 +83,7 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
  *
  * Atomically increments @v by 1.
  */
+#define arch_atomic64_inc arch_atomic64_inc
 static __always_inline void arch_atomic64_inc(atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "incq %0"
@@ -96,6 +97,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
  *
  * Atomically decrements @v by 1.
  */
+#define arch_atomic64_dec arch_atomic64_dec
 static __always_inline void arch_atomic64_dec(atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "decq %0"
@@ -173,9 +175,6 @@ static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
 	return xadd(&v->counter, -i);
 }
 
-#define arch_atomic64_inc_return(v)  (arch_atomic64_add_return(1, (v)))
-#define arch_atomic64_dec_return(v)  (arch_atomic64_sub_return(1, (v)))
-
 static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
 {
 	return arch_cmpxchg(&v->counter, old, new);
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 332ae4eca737..7de0149e1cf7 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -197,38 +197,6 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-#define atomic_inc(v) atomic_add(1,(v))
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-#define atomic_inc_return(v) atomic_add_return(1,(v))
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-#define atomic_dec(v) atomic_sub(1,(v))
-
-/**
- * atomic_dec_return - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-#define atomic_dec_return(v) atomic_sub_return(1,(v))
-
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 6b64c200de73..12f9634750d7 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -102,29 +102,41 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
 }
 #endif
 
+#ifdef arch_atomic_inc
+#define atomic_inc atomic_inc
 static __always_inline void atomic_inc(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic_inc(v);
 }
+#endif
 
+#ifdef arch_atomic64_inc
+#define atomic64_inc atomic64_inc
 static __always_inline void atomic64_inc(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic64_inc(v);
 }
+#endif
 
+#ifdef arch_atomic_dec
+#define atomic_dec atomic_dec
 static __always_inline void atomic_dec(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic_dec(v);
 }
+#endif
 
+#ifdef atch_atomic64_dec
+#define atomic64_dec
 static __always_inline void atomic64_dec(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic64_dec(v);
 }
+#endif
 
 static __always_inline void atomic_add(int i, atomic_t *v)
 {
@@ -186,29 +198,41 @@ static __always_inline void atomic64_xor(s64 i, atomic64_t *v)
 	arch_atomic64_xor(i, v);
 }
 
+#ifdef arch_atomic_inc_return
+#define atomic_inc_return atomic_inc_return
 static __always_inline int atomic_inc_return(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_inc_return(v);
 }
+#endif
 
+#ifdef arch_atomic64_in_return
+#define atomic64_inc_return atomic64_inc_return
 static __always_inline s64 atomic64_inc_return(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_inc_return(v);
 }
+#endif
 
+#ifdef arch_atomic_dec_return
+#define atomic_dec_return atomic_dec_return
 static __always_inline int atomic_dec_return(atomic_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic_dec_return(v);
 }
+#endif
 
+#ifdef arch_atomic64_dec_return
+#define atomic64_dec_return atomic64_dec_return
 static __always_inline s64 atomic64_dec_return(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_dec_return(v);
 }
+#endif
 
 #ifdef arch_atomic64_inc_not_zero
 #define atomic64_inc_not_zero atomic64_inc_not_zero
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 40cab858aaaa..13324aa828eb 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -196,19 +196,6 @@ static inline void atomic_sub(int i, atomic_t *v)
 	atomic_sub_return(i, v);
 }
 
-static inline void atomic_inc(atomic_t *v)
-{
-	atomic_add_return(1, v);
-}
-
-static inline void atomic_dec(atomic_t *v)
-{
-	atomic_sub_return(1, v);
-}
-
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index d3827ab97aa4..242b79ae0b57 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -56,9 +56,4 @@ extern long long atomic64_xchg(atomic64_t *v, long long new);
 extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
 #define atomic64_fetch_add_unless atomic64_fetch_add_unless
 
-#define atomic64_inc(v)			atomic64_add(1LL, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
-#define atomic64_dec(v)			atomic64_sub(1LL, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
-
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 3ee8da9023cd..24f345df7ba6 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -97,11 +97,23 @@
 #endif
 #endif /* atomic_add_return_relaxed */
 
+#ifndef atomic_inc
+#define atomic_inc(v)			atomic_add(1, (v))
+#endif
+
 /* atomic_inc_return_relaxed */
 #ifndef atomic_inc_return_relaxed
+
+#ifndef atomic_inc_return
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+#define atomic_inc_return_relaxed(v)	atomic_add_return_relaxed(1, (v))
+#define atomic_inc_return_acquire(v)	atomic_add_return_acquire(1, (v))
+#define atomic_inc_return_release(v)	atomic_add_return_release(1, (v))
+#else /* atomic_inc_return */
 #define  atomic_inc_return_relaxed	atomic_inc_return
 #define  atomic_inc_return_acquire	atomic_inc_return
 #define  atomic_inc_return_release	atomic_inc_return
+#endif /* atomic_inc_return */
 
 #else /* atomic_inc_return_relaxed */
 
@@ -145,11 +157,23 @@
 #endif
 #endif /* atomic_sub_return_relaxed */
 
+#ifndef atomic_dec
+#define atomic_dec(v)			atomic_sub(1, (v))
+#endif
+
 /* atomic_dec_return_relaxed */
 #ifndef atomic_dec_return_relaxed
+
+#ifndef atomic_dec_return
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+#define atomic_dec_return_relaxed(v)	atomic_sub_return_relaxed(1, (v))
+#define atomic_dec_return_acquire(v)	atomic_sub_return_acquire(1, (v))
+#define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
+#else /* atomic_dec_return */
 #define  atomic_dec_return_relaxed	atomic_dec_return
 #define  atomic_dec_return_acquire	atomic_dec_return
 #define  atomic_dec_return_release	atomic_dec_return
+#endif /* atomic_dec_return */
 
 #else /* atomic_dec_return_relaxed */
 
@@ -748,11 +772,23 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #endif
 #endif /* atomic64_add_return_relaxed */
 
+#ifndef atomic64_inc
+#define atomic64_inc(v)			atomic64_add(1, (v))
+#endif
+
 /* atomic64_inc_return_relaxed */
 #ifndef atomic64_inc_return_relaxed
+
+#ifndef atomic64_inc_return
+#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
+#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1, (v))
+#define atomic64_inc_return_acquire(v)	atomic64_add_return_acquire(1, (v))
+#define atomic64_inc_return_release(v)	atomic64_add_return_release(1, (v))
+#else /* atomic64_inc_return */
 #define  atomic64_inc_return_relaxed	atomic64_inc_return
 #define  atomic64_inc_return_acquire	atomic64_inc_return
 #define  atomic64_inc_return_release	atomic64_inc_return
+#endif /* atomic64_inc_return */
 
 #else /* atomic64_inc_return_relaxed */
 
@@ -797,11 +833,23 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #endif
 #endif /* atomic64_sub_return_relaxed */
 
+#ifndef atomic64_dec
+#define atomic64_dec(v)			atomic64_sub(1, (v))
+#endif
+
 /* atomic64_dec_return_relaxed */
 #ifndef atomic64_dec_return_relaxed
+
+#ifndef atomic64_dec_return
+#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
+#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1, (v))
+#define atomic64_dec_return_acquire(v)	atomic64_sub_return_acquire(1, (v))
+#define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
+#else /* atomic64_dec_return */
 #define  atomic64_dec_return_relaxed	atomic64_dec_return
 #define  atomic64_dec_return_acquire	atomic64_dec_return
 #define  atomic64_dec_return_release	atomic64_dec_return
+#endif /* atomic64_dec_return */
 
 #else /* atomic64_dec_return_relaxed */
 
-- 
cgit v1.2.3


From b3a2a05f9111de0b79312e577608a27b0318c0a1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:20 +0100
Subject: atomics/treewide: Make conditional inc/dec ops optional

The conditional inc/dec ops differ for atomic_t and atomic64_t:

- atomic_inc_unless_positive() is optional for atomic_t, and doesn't exist for atomic64_t.
- atomic_dec_unless_negative() is optional for atomic_t, and doesn't exist for atomic64_t.
- atomic_dec_if_positive is optional for atomic_t, and is mandatory for atomic64_t.

Let's make these consistently optional for both. At the same time, let's
clean up the existing fallbacks to use atomic_try_cmpxchg().

The instrumented atomics are updated accordingly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-18-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h           |  1 +
 arch/arc/include/asm/atomic.h             |  1 +
 arch/arm/include/asm/atomic.h             |  1 +
 arch/arm64/include/asm/atomic.h           |  2 +
 arch/ia64/include/asm/atomic.h            | 16 -----
 arch/parisc/include/asm/atomic.h          | 23 --------
 arch/powerpc/include/asm/atomic.h         |  1 +
 arch/s390/include/asm/atomic.h            | 17 ------
 arch/sparc/include/asm/atomic_64.h        |  1 +
 arch/x86/include/asm/atomic64_32.h        |  1 +
 arch/x86/include/asm/atomic64_64.h        | 18 ------
 include/asm-generic/atomic-instrumented.h |  3 +
 include/asm-generic/atomic64.h            |  1 +
 include/linux/atomic.h                    | 97 +++++++++++++++++++++++--------
 14 files changed, 85 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index f6410cb68058..4a6a8f58c9c9 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -296,5 +296,6 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	smp_mb();
 	return old - 1;
 }
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 #endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 27b95a928c1e..8f64f3b79b8a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -517,6 +517,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 
 	return val;
 }
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 /**
  * atomic64_fetch_add_unless - add unless the number is a given value
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 5a58d061d3d2..884c241424fe 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -474,6 +474,7 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 
 	return result;
 }
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
 						  long long u)
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 078f785cd97f..9bca54dda75c 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -159,5 +159,7 @@
 
 #define atomic64_andnot			atomic64_andnot
 
+#define atomic64_dec_if_positive	atomic64_dec_if_positive
+
 #endif
 #endif
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 46a15a974bed..206530d0751b 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -215,22 +215,6 @@ ATOMIC64_FETCH_OP(xor, ^)
 	(cmpxchg(&((v)->counter), old, new))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
-{
-	long c, old, dec;
-	c = atomic64_read(v);
-	for (;;) {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-		old = atomic64_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return dec;
-}
-
 #define atomic_add(i,v)			(void)atomic_add_return((i), (v))
 #define atomic_sub(i,v)			(void)atomic_sub_return((i), (v))
 
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 10bc490327c1..118953d41763 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -223,29 +223,6 @@ atomic64_read(const atomic64_t *v)
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
-/*
- * atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline long atomic64_dec_if_positive(atomic64_t *v)
-{
-	long c, old, dec;
-	c = atomic64_read(v);
-	for (;;) {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-		old = atomic64_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return dec;
-}
-
 #endif /* !CONFIG_64BIT */
 
 
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index ebaefdee4a57..a0156cb43d1f 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -488,6 +488,7 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 
 	return t;
 }
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 #define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
 #define atomic64_cmpxchg_relaxed(v, o, n) \
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 376e64af951f..fd20ab5d4cf7 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -145,23 +145,6 @@ ATOMIC64_OPS(xor)
 
 #undef ATOMIC64_OPS
 
-static inline long atomic64_dec_if_positive(atomic64_t *v)
-{
-	long c, old, dec;
-
-	c = atomic64_read(v);
-	for (;;) {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-		old = atomic64_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return dec;
-}
-
 #define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long)(_i), _v)
 #define atomic64_fetch_sub(_i, _v)	atomic64_fetch_add(-(long)(_i), _v)
 #define atomic64_sub(_i, _v)		atomic64_add(-(long)(_i), _v)
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 304865c7cdbb..6963482c81d8 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -62,5 +62,6 @@ static inline int atomic_xchg(atomic_t *v, int new)
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
 long atomic64_dec_if_positive(atomic64_t *v);
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 472c7af0ed48..ef959f02d070 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -254,6 +254,7 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 	return r;
 }
 
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	long long r;
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 1b282272a801..849f1c566a11 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -191,24 +191,6 @@ static inline long arch_atomic64_xchg(atomic64_t *v, long new)
 	return xchg(&v->counter, new);
 }
 
-/*
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
-{
-	s64 dec, c = arch_atomic64_read(v);
-	do {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-	} while (!arch_atomic64_try_cmpxchg(v, &c, dec));
-	return dec;
-}
-
 static inline void arch_atomic64_and(long i, atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "andq %1,%0"
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 12f9634750d7..3c64e95d5ed0 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -243,11 +243,14 @@ static __always_inline bool atomic64_inc_not_zero(atomic64_t *v)
 }
 #endif
 
+#ifdef arch_atomic64_dec_if_positive
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_dec_if_positive(v);
 }
+#endif
 
 #ifdef arch_atomic_dec_and_test
 #define atomic_dec_and_test atomic_dec_and_test
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 242b79ae0b57..97b28b7f1f29 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -51,6 +51,7 @@ ATOMIC64_OPS(xor)
 #undef ATOMIC64_OP
 
 extern long long atomic64_dec_if_positive(atomic64_t *v);
+#define atomic64_dec_if_positive atomic64_dec_if_positive
 extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n);
 extern long long atomic64_xchg(atomic64_t *v, long long new);
 extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u);
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 24f345df7ba6..93fe5b4041e1 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -683,28 +683,30 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_inc_unless_negative
-static inline bool atomic_inc_unless_negative(atomic_t *p)
+static inline bool atomic_inc_unless_negative(atomic_t *v)
 {
-	int v, v1;
-	for (v = 0; v >= 0; v = v1) {
-		v1 = atomic_cmpxchg(p, v, v + 1);
-		if (likely(v1 == v))
-			return true;
-	}
-	return false;
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c < 0))
+			return false;
+	} while (!atomic_try_cmpxchg(v, &c, c + 1));
+
+	return true;
 }
 #endif
 
 #ifndef atomic_dec_unless_positive
-static inline bool atomic_dec_unless_positive(atomic_t *p)
+static inline bool atomic_dec_unless_positive(atomic_t *v)
 {
-	int v, v1;
-	for (v = 0; v <= 0; v = v1) {
-		v1 = atomic_cmpxchg(p, v, v - 1);
-		if (likely(v1 == v))
-			return true;
-	}
-	return false;
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c > 0))
+			return false;
+	} while (!atomic_try_cmpxchg(v, &c, c - 1));
+
+	return true;
 }
 #endif
 
@@ -718,17 +720,14 @@ static inline bool atomic_dec_unless_positive(atomic_t *p)
 #ifndef atomic_dec_if_positive
 static inline int atomic_dec_if_positive(atomic_t *v)
 {
-	int c, old, dec;
-	c = atomic_read(v);
-	for (;;) {
+	int dec, c = atomic_read(v);
+
+	do {
 		dec = c - 1;
 		if (unlikely(dec < 0))
 			break;
-		old = atomic_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
+	} while (!atomic_try_cmpxchg(v, &c, dec));
+
 	return dec;
 }
 #endif
@@ -1290,6 +1289,56 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v
 }
 #endif
 
+#ifndef atomic64_inc_unless_negative
+static inline bool atomic64_inc_unless_negative(atomic64_t *v)
+{
+	long long c = atomic64_read(v);
+
+	do {
+		if (unlikely(c < 0))
+			return false;
+	} while (!atomic64_try_cmpxchg(v, &c, c + 1));
+
+	return true;
+}
+#endif
+
+#ifndef atomic64_dec_unless_positive
+static inline bool atomic64_dec_unless_positive(atomic64_t *v)
+{
+	long long c = atomic64_read(v);
+
+	do {
+		if (unlikely(c > 0))
+			return false;
+	} while (!atomic64_try_cmpxchg(v, &c, c - 1));
+
+	return true;
+}
+#endif
+
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic64 variable, v, was not decremented.
+ */
+#ifndef atomic64_dec_if_positive
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long long dec, c = atomic64_read(v);
+
+	do {
+		dec = c - 1;
+		if (unlikely(dec < 0))
+			break;
+	} while (!atomic64_try_cmpxchg(v, &c, dec));
+
+	return dec;
+}
+#endif
+
 #define atomic64_cond_read_relaxed(v, c)	smp_cond_load_relaxed(&(v)->counter, (c))
 #define atomic64_cond_read_acquire(v, c)	smp_cond_load_acquire(&(v)->counter, (c))
 
-- 
cgit v1.2.3


From 7cc7eaad49c30ac165ecf84d95b26f7e0d53bd97 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 21 Jun 2018 13:13:21 +0100
Subject: atomics/treewide: Clean up '*_andnot()' ifdeffery

The ifdeffery for atomic*_{fetch_,}andnot() is unlike that for all the
other atomics. If atomic*_andnot() is not defined, the corresponding
atomic*_fetch_andnot() is assumed to not be defined.

Additionally, the fallbacks for the various ordering cases are written
much later in atomic.h as static inlines.

This isn't problematic today, but gets in the way of scripting the
generation of atomics. To prepare for scripting, this patch:

* Switches to separate ifdefs for atomic*_andnot() and
  atomic*_fetch_andnot(), updating implementations as appropriate.

* Moves the fallbacks into the standards ifdefs, as macro expansions
  rather than static inlines.

* Removes trivial andnot implementations from architectures, where these
  are superseded by core code.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/20180621121321.4761-19-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arc/include/asm/atomic.h |  8 ++--
 arch/arm/include/asm/atomic.h |  2 +
 include/linux/atomic.h        | 96 ++++++++++++++-----------------------------
 3 files changed, 36 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 8f64f3b79b8a..4e0072730241 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -187,7 +187,8 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define atomic_andnot atomic_andnot
+#define atomic_andnot		atomic_andnot
+#define atomic_fetch_andnot	atomic_fetch_andnot
 
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, c_op, asm_op)					\
@@ -296,8 +297,6 @@ ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
 	ATOMIC_FETCH_OP(op, c_op, asm_op)
 
 ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
-#define atomic_andnot(mask, v) atomic_and(~(mask), (v))
-#define atomic_fetch_andnot(mask, v) atomic_fetch_and(~(mask), (v))
 ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
 ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 
@@ -430,7 +429,8 @@ static inline long long atomic64_fetch_##op(long long a, atomic64_t *v)	\
 	ATOMIC64_OP_RETURN(op, op1, op2)				\
 	ATOMIC64_FETCH_OP(op, op1, op2)
 
-#define atomic64_andnot atomic64_andnot
+#define atomic64_andnot		atomic64_andnot
+#define atomic64_fetch_andnot	atomic64_fetch_andnot
 
 ATOMIC64_OPS(add, add.f, adc)
 ATOMIC64_OPS(sub, sub.f, sbc)
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 884c241424fe..f74756641410 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -216,6 +216,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+#define atomic_fetch_andnot		atomic_fetch_andnot
+
 #endif /* __LINUX_ARM_ARCH__ */
 
 #define ATOMIC_OPS(op, c_op, asm_op)					\
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 93fe5b4041e1..8e04f1f69bd9 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -354,12 +354,22 @@
 #endif
 #endif /* atomic_fetch_and_relaxed */
 
-#ifdef atomic_andnot
-/* atomic_fetch_andnot_relaxed */
+#ifndef atomic_andnot
+#define atomic_andnot(i, v)		atomic_and(~(int)(i), (v))
+#endif
+
 #ifndef atomic_fetch_andnot_relaxed
-#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot
-#define atomic_fetch_andnot_acquire	atomic_fetch_andnot
-#define atomic_fetch_andnot_release	atomic_fetch_andnot
+
+#ifndef atomic_fetch_andnot
+#define atomic_fetch_andnot(i, v)		atomic_fetch_and(~(int)(i), (v))
+#define atomic_fetch_andnot_relaxed(i, v)	atomic_fetch_and_relaxed(~(int)(i), (v))
+#define atomic_fetch_andnot_acquire(i, v)	atomic_fetch_and_acquire(~(int)(i), (v))
+#define atomic_fetch_andnot_release(i, v)	atomic_fetch_and_release(~(int)(i), (v))
+#else /* atomic_fetch_andnot */
+#define atomic_fetch_andnot_relaxed		atomic_fetch_andnot
+#define atomic_fetch_andnot_acquire		atomic_fetch_andnot
+#define atomic_fetch_andnot_release		atomic_fetch_andnot
+#endif /* atomic_fetch_andnot */
 
 #else /* atomic_fetch_andnot_relaxed */
 
@@ -378,7 +388,6 @@
 	__atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
 #endif
 #endif /* atomic_fetch_andnot_relaxed */
-#endif /* atomic_andnot */
 
 /* atomic_fetch_xor_relaxed */
 #ifndef atomic_fetch_xor_relaxed
@@ -655,33 +664,6 @@ static inline bool atomic_add_negative(int i, atomic_t *v)
 }
 #endif
 
-#ifndef atomic_andnot
-static inline void atomic_andnot(int i, atomic_t *v)
-{
-	atomic_and(~i, v);
-}
-
-static inline int atomic_fetch_andnot(int i, atomic_t *v)
-{
-	return atomic_fetch_and(~i, v);
-}
-
-static inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v)
-{
-	return atomic_fetch_and_relaxed(~i, v);
-}
-
-static inline int atomic_fetch_andnot_acquire(int i, atomic_t *v)
-{
-	return atomic_fetch_and_acquire(~i, v);
-}
-
-static inline int atomic_fetch_andnot_release(int i, atomic_t *v)
-{
-	return atomic_fetch_and_release(~i, v);
-}
-#endif
-
 #ifndef atomic_inc_unless_negative
 static inline bool atomic_inc_unless_negative(atomic_t *v)
 {
@@ -1029,12 +1011,22 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 #endif
 #endif /* atomic64_fetch_and_relaxed */
 
-#ifdef atomic64_andnot
-/* atomic64_fetch_andnot_relaxed */
+#ifndef atomic64_andnot
+#define atomic64_andnot(i, v)		atomic64_and(~(long long)(i), (v))
+#endif
+
 #ifndef atomic64_fetch_andnot_relaxed
-#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot
-#define atomic64_fetch_andnot_acquire	atomic64_fetch_andnot
-#define atomic64_fetch_andnot_release	atomic64_fetch_andnot
+
+#ifndef atomic64_fetch_andnot
+#define atomic64_fetch_andnot(i, v)		atomic64_fetch_and(~(long long)(i), (v))
+#define atomic64_fetch_andnot_relaxed(i, v)	atomic64_fetch_and_relaxed(~(long long)(i), (v))
+#define atomic64_fetch_andnot_acquire(i, v)	atomic64_fetch_and_acquire(~(long long)(i), (v))
+#define atomic64_fetch_andnot_release(i, v)	atomic64_fetch_and_release(~(long long)(i), (v))
+#else /* atomic64_fetch_andnot */
+#define atomic64_fetch_andnot_relaxed		atomic64_fetch_andnot
+#define atomic64_fetch_andnot_acquire		atomic64_fetch_andnot
+#define atomic64_fetch_andnot_release		atomic64_fetch_andnot
+#endif /* atomic64_fetch_andnot */
 
 #else /* atomic64_fetch_andnot_relaxed */
 
@@ -1053,7 +1045,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 	__atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__)
 #endif
 #endif /* atomic64_fetch_andnot_relaxed */
-#endif /* atomic64_andnot */
 
 /* atomic64_fetch_xor_relaxed */
 #ifndef atomic64_fetch_xor_relaxed
@@ -1262,33 +1253,6 @@ static inline bool atomic64_add_negative(long long i, atomic64_t *v)
 }
 #endif
 
-#ifndef atomic64_andnot
-static inline void atomic64_andnot(long long i, atomic64_t *v)
-{
-	atomic64_and(~i, v);
-}
-
-static inline long long atomic64_fetch_andnot(long long i, atomic64_t *v)
-{
-	return atomic64_fetch_and(~i, v);
-}
-
-static inline long long atomic64_fetch_andnot_relaxed(long long i, atomic64_t *v)
-{
-	return atomic64_fetch_and_relaxed(~i, v);
-}
-
-static inline long long atomic64_fetch_andnot_acquire(long long i, atomic64_t *v)
-{
-	return atomic64_fetch_and_acquire(~i, v);
-}
-
-static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v)
-{
-	return atomic64_fetch_and_release(~i, v);
-}
-#endif
-
 #ifndef atomic64_inc_unless_negative
 static inline bool atomic64_inc_unless_negative(atomic64_t *v)
 {
-- 
cgit v1.2.3


From 75a040ff14d9a99fc041f5e1d8f09541cab13ba4 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 1 Apr 2018 01:00:36 +0300
Subject: locking/refcounts: Include fewer headers in <linux/refcount.h>

Debloat <linux/refcount.h>'s dependencies:

- <linux/kernel.h> is not needed, but <linux/compiler.h> is.
- <linux/mutex.h> is not needed, only a forward declaration of "struct mutex".
- <linux/spinlock.h> is not needed, <linux/spinlock_types.h> is enough.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/lkml/20180331220036.GA7676@avx2
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/refcount.h | 1 +
 include/linux/refcount.h        | 7 ++++---
 lib/refcount.c                  | 2 ++
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4cf11d88d3b3..19b90521954c 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -5,6 +5,7 @@
  * PaX/grsecurity.
  */
 #include <linux/refcount.h>
+#include <asm/bug.h>
 
 /*
  * This is the first portion of the refcount error handling, which lives in
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 4193c41e383a..c36addd27dd5 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -3,9 +3,10 @@
 #define _LINUX_REFCOUNT_H
 
 #include <linux/atomic.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/spinlock_types.h>
+
+struct mutex;
 
 /**
  * struct refcount_t - variant of atomic_t specialized for reference counts
diff --git a/lib/refcount.c b/lib/refcount.c
index 0eb48353abe3..4bd842f20749 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -35,7 +35,9 @@
  *
  */
 
+#include <linux/mutex.h>
 #include <linux/refcount.h>
+#include <linux/spinlock.h>
 #include <linux/bug.h>
 
 #ifdef CONFIG_REFCOUNT_FULL
-- 
cgit v1.2.3


From 0af5107cd0640ee3424e337b492e4b11b450ce28 Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Thu, 17 May 2018 11:14:18 +0300
Subject: net/mlx5: Add RoCE RX ICRC encapsulated counter

Add capability bit in PCAM register and RoCE ICRC error counter
to PPCNT register.

Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f810772e80c0..deb3a459a22e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1681,7 +1681,11 @@ struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits {
 
 	u8         rx_buffer_full_low[0x20];
 
-	u8         reserved_at_1c0[0x600];
+	u8         rx_icrc_encapsulated_high[0x20];
+
+	u8         rx_icrc_encapsulated_low[0x20];
+
+	u8         reserved_at_200[0x5c0];
 };
 
 struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits {
@@ -8044,8 +8048,9 @@ struct mlx5_ifc_peir_reg_bits {
 };
 
 struct mlx5_ifc_pcam_enhanced_features_bits {
-	u8         reserved_at_0[0x76];
-
+	u8         reserved_at_0[0x6d];
+	u8         rx_icrc_encapsulated_counter[0x1];
+	u8	   reserved_at_6e[0x8];
 	u8         pfcc_mask[0x1];
 	u8         reserved_at_77[0x4];
 	u8         rx_buffer_fullness_counters[0x1];
-- 
cgit v1.2.3


From be3035e3627d2570de4c2c612ecd095968986437 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 25 May 2018 21:24:34 +0200
Subject: ARM: bcm2835: Add GET_THROTTLED firmware property

Recent Raspberry Pi firmware provides a mailbox property to detect
under-voltage conditions. Here is the current definition.

The u32 value returned by the firmware is divided into 2 parts:
  - lower 16-bits are the live value
  - upper 16-bits are the history or sticky value

  Bits:
  0: undervoltage
  1: arm frequency capped
  2: currently throttled
  16: undervoltage has occurred
  17: arm frequency capped has occurred
  18: throttling has occurred

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 include/soc/bcm2835/raspberrypi-firmware.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/soc/bcm2835/raspberrypi-firmware.h b/include/soc/bcm2835/raspberrypi-firmware.h
index 8ee8991aa099..c4a5c9e9fb47 100644
--- a/include/soc/bcm2835/raspberrypi-firmware.h
+++ b/include/soc/bcm2835/raspberrypi-firmware.h
@@ -75,6 +75,7 @@ enum rpi_firmware_property_tag {
 	RPI_FIRMWARE_GET_EDID_BLOCK =                         0x00030020,
 	RPI_FIRMWARE_GET_CUSTOMER_OTP =                       0x00030021,
 	RPI_FIRMWARE_GET_DOMAIN_STATE =                       0x00030030,
+	RPI_FIRMWARE_GET_THROTTLED =                          0x00030046,
 	RPI_FIRMWARE_SET_CLOCK_STATE =                        0x00038001,
 	RPI_FIRMWARE_SET_CLOCK_RATE =                         0x00038002,
 	RPI_FIRMWARE_SET_VOLTAGE =                            0x00038003,
-- 
cgit v1.2.3


From 0eb71a9da5796851fa87ddc1a534066c0fe54055 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: split rhashtable.h

Due to the use of rhashtables in net namespaces,
rhashtable.h is included in lots of the kernel,
so a small changes can required a large recompilation.
This makes development painful.

This patch splits out rhashtable-types.h which just includes
the major type declarations, and does not include (non-trivial)
inline code.  rhashtable.h is no longer included by anything
in the include/ directory.
Common include files only include rhashtable-types.h so a large
recompilation is only triggered when that changes.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                                |   2 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h |   1 +
 include/linux/ipc.h                        |   2 +-
 include/linux/ipc_namespace.h              |   2 +-
 include/linux/mroute_base.h                |   2 +-
 include/linux/rhashtable-types.h           | 139 +++++++++++++++++++++++++++++
 include/linux/rhashtable.h                 | 127 +-------------------------
 include/net/inet_frag.h                    |   2 +-
 include/net/netfilter/nf_flow_table.h      |   2 +-
 include/net/sctp/structs.h                 |   2 +-
 include/net/seg6.h                         |   2 +-
 include/net/seg6_hmac.h                    |   2 +-
 ipc/msg.c                                  |   1 +
 ipc/sem.c                                  |   1 +
 ipc/shm.c                                  |   1 +
 ipc/util.c                                 |   1 +
 lib/rhashtable.c                           |   1 +
 net/ipv4/inet_fragment.c                   |   1 +
 net/ipv4/ipmr.c                            |   1 +
 net/ipv4/ipmr_base.c                       |   1 +
 net/ipv6/ip6mr.c                           |   1 +
 net/ipv6/seg6.c                            |   1 +
 net/ipv6/seg6_hmac.c                       |   1 +
 net/netfilter/nf_tables_api.c              |   1 +
 net/sctp/input.c                           |   1 +
 net/sctp/socket.c                          |   1 +
 26 files changed, 166 insertions(+), 133 deletions(-)
 create mode 100644 include/linux/rhashtable-types.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index edf3cf5ea691..99e5cef8172e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12162,7 +12162,9 @@ M:	Herbert Xu <herbert@gondor.apana.org.au>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	lib/rhashtable.c
+F:	lib/test_rhashtable.c
 F:	include/linux/rhashtable.h
+F:	include/linux/rhashtable-types.h
 
 RICOH R5C592 MEMORYSTICK DRIVER
 M:	Maxim Levitsky <maximlevitsky@gmail.com>
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 0dbe2d9e22d6..1adb968b8354 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -46,6 +46,7 @@
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
 #include <linux/etherdevice.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 6cc2df7f7ac9..e1c9eea6015b 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -4,7 +4,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/uidgid.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <uapi/linux/ipc.h>
 #include <linux/refcount.h>
 
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index b5630c8eb2f3..6cea726612b7 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -9,7 +9,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ns_common.h>
 #include <linux/refcount.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 struct user_namespace;
 
diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index d633f737b3c6..fd436cdd4725 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -2,7 +2,7 @@
 #define __LINUX_MROUTE_BASE_H
 
 #include <linux/netdevice.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/spinlock.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
new file mode 100644
index 000000000000..9740063ff13b
--- /dev/null
+++ b/include/linux/rhashtable-types.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Resizable, Scalable, Concurrent Hash Table
+ *
+ * Simple structures that might be needed in include
+ * files.
+ */
+
+#ifndef _LINUX_RHASHTABLE_TYPES_H
+#define _LINUX_RHASHTABLE_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+
+struct rhash_head {
+	struct rhash_head __rcu		*next;
+};
+
+struct rhlist_head {
+	struct rhash_head		rhead;
+	struct rhlist_head __rcu	*next;
+};
+
+struct bucket_table;
+
+/**
+ * struct rhashtable_compare_arg - Key for the function rhashtable_compare
+ * @ht: Hash table
+ * @key: Key to compare against
+ */
+struct rhashtable_compare_arg {
+	struct rhashtable *ht;
+	const void *key;
+};
+
+typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
+typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
+			       const void *obj);
+
+/**
+ * struct rhashtable_params - Hash table construction parameters
+ * @nelem_hint: Hint on number of elements, should be 75% of desired size
+ * @key_len: Length of key
+ * @key_offset: Offset of key in struct to be hashed
+ * @head_offset: Offset of rhash_head in struct to be hashed
+ * @max_size: Maximum size while expanding
+ * @min_size: Minimum size while shrinking
+ * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
+ * @automatic_shrinking: Enable automatic shrinking of tables
+ * @nulls_base: Base value to generate nulls marker
+ * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
+ * @obj_hashfn: Function to hash object
+ * @obj_cmpfn: Function to compare key with object
+ */
+struct rhashtable_params {
+	u16			nelem_hint;
+	u16			key_len;
+	u16			key_offset;
+	u16			head_offset;
+	unsigned int		max_size;
+	u16			min_size;
+	bool			automatic_shrinking;
+	u8			locks_mul;
+	u32			nulls_base;
+	rht_hashfn_t		hashfn;
+	rht_obj_hashfn_t	obj_hashfn;
+	rht_obj_cmpfn_t		obj_cmpfn;
+};
+
+/**
+ * struct rhashtable - Hash table handle
+ * @tbl: Bucket table
+ * @key_len: Key length for hashfn
+ * @max_elems: Maximum number of elements in table
+ * @p: Configuration parameters
+ * @rhlist: True if this is an rhltable
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @lock: Spin lock to protect walker list
+ * @nelems: Number of elements in table
+ */
+struct rhashtable {
+	struct bucket_table __rcu	*tbl;
+	unsigned int			key_len;
+	unsigned int			max_elems;
+	struct rhashtable_params	p;
+	bool				rhlist;
+	struct work_struct		run_work;
+	struct mutex                    mutex;
+	spinlock_t			lock;
+	atomic_t			nelems;
+};
+
+/**
+ * struct rhltable - Hash table with duplicate objects in a list
+ * @ht: Underlying rhtable
+ */
+struct rhltable {
+	struct rhashtable ht;
+};
+
+/**
+ * struct rhashtable_walker - Hash table walker
+ * @list: List entry on list of walkers
+ * @tbl: The table that we were walking over
+ */
+struct rhashtable_walker {
+	struct list_head list;
+	struct bucket_table *tbl;
+};
+
+/**
+ * struct rhashtable_iter - Hash table iterator
+ * @ht: Table to iterate through
+ * @p: Current pointer
+ * @list: Current hash list pointer
+ * @walker: Associated rhashtable walker
+ * @slot: Current slot
+ * @skip: Number of entries to skip in slot
+ */
+struct rhashtable_iter {
+	struct rhashtable *ht;
+	struct rhash_head *p;
+	struct rhlist_head *list;
+	struct rhashtable_walker walker;
+	unsigned int slot;
+	unsigned int skip;
+	bool end_of_table;
+};
+
+int rhashtable_init(struct rhashtable *ht,
+		    const struct rhashtable_params *params);
+int rhltable_init(struct rhltable *hlt,
+		  const struct rhashtable_params *params);
+
+#endif /* _LINUX_RHASHTABLE_TYPES_H */
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 4e1f535c2034..48754ab07cdf 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Resizable, Scalable, Concurrent Hash Table
  *
@@ -17,16 +18,14 @@
 #ifndef _LINUX_RHASHTABLE_H
 #define _LINUX_RHASHTABLE_H
 
-#include <linux/atomic.h>
-#include <linux/compiler.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/jhash.h>
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
-#include <linux/mutex.h>
 #include <linux/rculist.h>
 
+#include <linux/rhashtable-types.h>
 /*
  * The end of the chain is marked with a special nulls marks which has
  * the following format:
@@ -64,15 +63,6 @@
  */
 #define RHT_ELASTICITY	16u
 
-struct rhash_head {
-	struct rhash_head __rcu		*next;
-};
-
-struct rhlist_head {
-	struct rhash_head		rhead;
-	struct rhlist_head __rcu	*next;
-};
-
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
@@ -102,114 +92,6 @@ struct bucket_table {
 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
-/**
- * struct rhashtable_compare_arg - Key for the function rhashtable_compare
- * @ht: Hash table
- * @key: Key to compare against
- */
-struct rhashtable_compare_arg {
-	struct rhashtable *ht;
-	const void *key;
-};
-
-typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
-typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
-			       const void *obj);
-
-struct rhashtable;
-
-/**
- * struct rhashtable_params - Hash table construction parameters
- * @nelem_hint: Hint on number of elements, should be 75% of desired size
- * @key_len: Length of key
- * @key_offset: Offset of key in struct to be hashed
- * @head_offset: Offset of rhash_head in struct to be hashed
- * @max_size: Maximum size while expanding
- * @min_size: Minimum size while shrinking
- * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
- * @automatic_shrinking: Enable automatic shrinking of tables
- * @nulls_base: Base value to generate nulls marker
- * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
- * @obj_hashfn: Function to hash object
- * @obj_cmpfn: Function to compare key with object
- */
-struct rhashtable_params {
-	u16			nelem_hint;
-	u16			key_len;
-	u16			key_offset;
-	u16			head_offset;
-	unsigned int		max_size;
-	u16			min_size;
-	bool			automatic_shrinking;
-	u8			locks_mul;
-	u32			nulls_base;
-	rht_hashfn_t		hashfn;
-	rht_obj_hashfn_t	obj_hashfn;
-	rht_obj_cmpfn_t		obj_cmpfn;
-};
-
-/**
- * struct rhashtable - Hash table handle
- * @tbl: Bucket table
- * @key_len: Key length for hashfn
- * @max_elems: Maximum number of elements in table
- * @p: Configuration parameters
- * @rhlist: True if this is an rhltable
- * @run_work: Deferred worker to expand/shrink asynchronously
- * @mutex: Mutex to protect current/future table swapping
- * @lock: Spin lock to protect walker list
- * @nelems: Number of elements in table
- */
-struct rhashtable {
-	struct bucket_table __rcu	*tbl;
-	unsigned int			key_len;
-	unsigned int			max_elems;
-	struct rhashtable_params	p;
-	bool				rhlist;
-	struct work_struct		run_work;
-	struct mutex                    mutex;
-	spinlock_t			lock;
-	atomic_t			nelems;
-};
-
-/**
- * struct rhltable - Hash table with duplicate objects in a list
- * @ht: Underlying rhtable
- */
-struct rhltable {
-	struct rhashtable ht;
-};
-
-/**
- * struct rhashtable_walker - Hash table walker
- * @list: List entry on list of walkers
- * @tbl: The table that we were walking over
- */
-struct rhashtable_walker {
-	struct list_head list;
-	struct bucket_table *tbl;
-};
-
-/**
- * struct rhashtable_iter - Hash table iterator
- * @ht: Table to iterate through
- * @p: Current pointer
- * @list: Current hash list pointer
- * @walker: Associated rhashtable walker
- * @slot: Current slot
- * @skip: Number of entries to skip in slot
- */
-struct rhashtable_iter {
-	struct rhashtable *ht;
-	struct rhash_head *p;
-	struct rhlist_head *list;
-	struct rhashtable_walker walker;
-	unsigned int slot;
-	unsigned int skip;
-	bool end_of_table;
-};
-
 static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
 {
 	return NULLS_MARKER(ht->p.nulls_base + hash);
@@ -376,11 +258,6 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
 }
 #endif /* CONFIG_PROVE_LOCKING */
 
-int rhashtable_init(struct rhashtable *ht,
-		    const struct rhashtable_params *params);
-int rhltable_init(struct rhltable *hlt,
-		  const struct rhashtable_params *params);
-
 void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
 			     struct rhash_head *obj);
 
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index ed07e3786d98..f4272a29dc44 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -2,7 +2,7 @@
 #ifndef __NET_FRAG_H__
 #define __NET_FRAG_H__
 
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 struct netns_frags {
 	/* sysctls */
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ba9fa4592f2b..0e355f4a3d76 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -4,7 +4,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/rcupdate.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/dst.h>
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index dbe1b911a24d..e0f962d27386 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -48,7 +48,7 @@
 #define __sctp_structs_h__
 
 #include <linux/ktime.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 #include <linux/socket.h>	/* linux/in.h needs this!!    */
 #include <linux/in.h>		/* We get struct sockaddr_in. */
 #include <linux/in6.h>		/* We get struct in6_addr     */
diff --git a/include/net/seg6.h b/include/net/seg6.h
index e029e301faa5..2567941a2f32 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -18,7 +18,7 @@
 #include <linux/ipv6.h>
 #include <net/lwtunnel.h>
 #include <linux/seg6.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
 				     __be32 to)
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
index 69c3a106056b..7fda469e2758 100644
--- a/include/net/seg6_hmac.h
+++ b/include/net/seg6_hmac.h
@@ -22,7 +22,7 @@
 #include <linux/route.h>
 #include <net/seg6.h>
 #include <linux/seg6_hmac.h>
-#include <linux/rhashtable.h>
+#include <linux/rhashtable-types.h>
 
 #define SEG6_HMAC_MAX_DIGESTSIZE	160
 #define SEG6_HMAC_RING_SIZE		256
diff --git a/ipc/msg.c b/ipc/msg.c
index 3b6545302598..203281198079 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -38,6 +38,7 @@
 #include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
diff --git a/ipc/sem.c b/ipc/sem.c
index 5af1943ad782..29c0347ef11d 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -86,6 +86,7 @@
 #include <linux/ipc_namespace.h>
 #include <linux/sched/wake_q.h>
 #include <linux/nospec.h>
+#include <linux/rhashtable.h>
 
 #include <linux/uaccess.h>
 #include "util.h"
diff --git a/ipc/shm.c b/ipc/shm.c
index 051a3e1fb8df..d4daf78df6da 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -43,6 +43,7 @@
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <linux/uaccess.h>
 
diff --git a/ipc/util.c b/ipc/util.c
index 4e81182fa0ac..fdffff41f65b 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -63,6 +63,7 @@
 #include <linux/rwsem.h>
 #include <linux/memory.h>
 #include <linux/ipc_namespace.h>
+#include <linux/rhashtable.h>
 
 #include <asm/unistd.h>
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 9427b5766134..c9fafea7dc6e 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -28,6 +28,7 @@
 #include <linux/rhashtable.h>
 #include <linux/err.h>
 #include <linux/export.h>
+#include <linux/rhashtable.h>
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4U
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c9e35b81d093..316518f87294 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -20,6 +20,7 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <net/sock.h>
 #include <net/inet_frag.h>
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9f79b9803a16..82f914122f1b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -60,6 +60,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/compat.h>
 #include <linux/export.h>
+#include <linux/rhashtable.h>
 #include <net/ip_tunnels.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index cafb0506c8c9..1ad9aa62a97b 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -2,6 +2,7 @@
  * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
  */
 
+#include <linux/rhashtable.h>
 #include <linux/mroute_base.h>
 
 /* Sets everything common except 'dev', since that is done under locking */
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0d0f0053bb11..d0b7e0249c13 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/compat.h>
+#include <linux/rhashtable.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/raw.h>
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 0fdf2a55e746..8d0ba757a46c 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -17,6 +17,7 @@
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <net/ipv6.h>
 #include <net/protocol.h>
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 33fb35cbfac1..b1791129a875 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -22,6 +22,7 @@
 #include <linux/icmpv6.h>
 #include <linux/mroute6.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 896d4a36081d..3f211e1025c1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -14,6 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nf_tables.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ba8a6e6c36fa..9bbc5f92c941 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -56,6 +56,7 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/checksum.h>
 #include <net/net_namespace.h>
+#include <linux/rhashtable.h>
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d20f7addee19..0e91e83eea5a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -66,6 +66,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/compat.h>
+#include <linux/rhashtable.h>
 
 #include <net/ip.h>
 #include <net/icmp.h>
-- 
cgit v1.2.3


From 9f9a707738aa7a8b9f78a641b83927ada256a626 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: remove nulls_base and related code.

This "feature" is unused, undocumented, and untested and so doesn't
really belong.  A patch is under development to properly implement
support for detecting when a search gets diverted down a different
chain, which the common purpose of nulls markers.

This patch actually fixes a bug too.  The table resizing allows a
table to grow to 2^31 buckets, but the hash is truncated to 27 bits -
any growth beyond 2^27 is wasteful an ineffective.

This patch results in NULLS_MARKER(0) being used for all chains,
and leaves the use of rht_is_a_null() to test for it.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable-types.h |  2 --
 include/linux/rhashtable.h       | 33 +++------------------------------
 lib/rhashtable.c                 |  8 --------
 lib/test_rhashtable.c            |  5 +----
 net/core/xdp.c                   |  4 ++--
 5 files changed, 6 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h
index 9740063ff13b..763d613ce2c2 100644
--- a/include/linux/rhashtable-types.h
+++ b/include/linux/rhashtable-types.h
@@ -50,7 +50,6 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
  * @min_size: Minimum size while shrinking
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 32)
  * @automatic_shrinking: Enable automatic shrinking of tables
- * @nulls_base: Base value to generate nulls marker
  * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
  * @obj_hashfn: Function to hash object
  * @obj_cmpfn: Function to compare key with object
@@ -64,7 +63,6 @@ struct rhashtable_params {
 	u16			min_size;
 	bool			automatic_shrinking;
 	u8			locks_mul;
-	u32			nulls_base;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
 	rht_obj_cmpfn_t		obj_cmpfn;
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 48754ab07cdf..d9f719af7936 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -28,25 +28,8 @@
 #include <linux/rhashtable-types.h>
 /*
  * The end of the chain is marked with a special nulls marks which has
- * the following format:
- *
- * +-------+-----------------------------------------------------+-+
- * | Base  |                      Hash                           |1|
- * +-------+-----------------------------------------------------+-+
- *
- * Base (4 bits) : Reserved to distinguish between multiple tables.
- *                 Specified via &struct rhashtable_params.nulls_base.
- * Hash (27 bits): Full hash (unmasked) of first element added to bucket
- * 1 (1 bit)     : Nulls marker (always set)
- *
- * The remaining bits of the next pointer remain unused for now.
+ * the least significant bit set.
  */
-#define RHT_BASE_BITS		4
-#define RHT_HASH_BITS		27
-#define RHT_BASE_SHIFT		RHT_HASH_BITS
-
-/* Base bits plus 1 bit for nulls marker */
-#define RHT_HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
 
 /* Maximum chain length before rehash
  *
@@ -92,24 +75,14 @@ struct bucket_table {
 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
-static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
-{
-	return NULLS_MARKER(ht->p.nulls_base + hash);
-}
-
 #define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
-	((ptr) = (typeof(ptr)) rht_marker(ht, hash))
+	((ptr) = (typeof(ptr)) NULLS_MARKER(0))
 
 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
 {
 	return ((unsigned long) ptr & 1);
 }
 
-static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
-{
-	return ((unsigned long) ptr) >> 1;
-}
-
 static inline void *rht_obj(const struct rhashtable *ht,
 			    const struct rhash_head *he)
 {
@@ -119,7 +92,7 @@ static inline void *rht_obj(const struct rhashtable *ht,
 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
 					    unsigned int hash)
 {
-	return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
+	return hash & (tbl->size - 1);
 }
 
 static inline unsigned int rht_key_get_hash(struct rhashtable *ht,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c9fafea7dc6e..688693c919be 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -995,7 +995,6 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
  *	.key_offset = offsetof(struct test_obj, key),
  *	.key_len = sizeof(int),
  *	.hashfn = jhash,
- *	.nulls_base = (1U << RHT_BASE_SHIFT),
  * };
  *
  * Configuration Example 2: Variable length keys
@@ -1029,9 +1028,6 @@ int rhashtable_init(struct rhashtable *ht,
 	    (params->obj_hashfn && !params->obj_cmpfn))
 		return -EINVAL;
 
-	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
-		return -EINVAL;
-
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
 	spin_lock_init(&ht->lock);
@@ -1096,10 +1092,6 @@ int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
 {
 	int err;
 
-	/* No rhlist NULLs marking for now. */
-	if (params->nulls_base)
-		return -EINVAL;
-
 	err = rhashtable_init(&hlt->ht, params);
 	hlt->ht.rhlist = true;
 	return err;
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 6ca59ffcacbe..82ac39ce5310 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -83,7 +83,7 @@ static u32 my_hashfn(const void *data, u32 len, u32 seed)
 {
 	const struct test_obj_rhl *obj = data;
 
-	return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE;
+	return (obj->value.id % 10);
 }
 
 static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
@@ -99,7 +99,6 @@ static struct rhashtable_params test_rht_params = {
 	.key_offset = offsetof(struct test_obj, value),
 	.key_len = sizeof(struct test_obj_val),
 	.hashfn = jhash,
-	.nulls_base = (3U << RHT_BASE_SHIFT),
 };
 
 static struct rhashtable_params test_rht_params_dup = {
@@ -296,8 +295,6 @@ static int __init test_rhltable(unsigned int entries)
 	if (!obj_in_table)
 		goto out_free;
 
-	/* nulls_base not supported in rhlist interface */
-	test_rht_params.nulls_base = 0;
 	err = rhltable_init(&rhlt, &test_rht_params);
 	if (WARN_ON(err))
 		goto out_free;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9d1f22072d5d..31c58719b5a9 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -45,8 +45,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
 	BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
 		     != sizeof(u32));
 
-	/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
-	return key << RHT_HASH_RESERVED_SPACE;
+	/* Use cyclic increasing ID as direct hash key */
+	return key;
 }
 
 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
-- 
cgit v1.2.3


From 9b4f64a227b6f462482a8cc68c7134dc6e26f1c1 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: simplify INIT_RHT_NULLS_HEAD()

The 'ht' and 'hash' arguments to INIT_RHT_NULLS_HEAD() are
no longer used - so drop them.  This allows us to also
remove the nhash argument from nested_table_alloc().

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  2 +-
 lib/rhashtable.c           | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index d9f719af7936..3f3a182bd0b4 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -75,7 +75,7 @@ struct bucket_table {
 	struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
 };
 
-#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
+#define INIT_RHT_NULLS_HEAD(ptr)	\
 	((ptr) = (typeof(ptr)) NULLS_MARKER(0))
 
 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 688693c919be..a81cd27d518c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -116,8 +116,7 @@ static void bucket_table_free_rcu(struct rcu_head *head)
 
 static union nested_table *nested_table_alloc(struct rhashtable *ht,
 					      union nested_table __rcu **prev,
-					      unsigned int shifted,
-					      unsigned int nhash)
+					      unsigned int shifted)
 {
 	union nested_table *ntbl;
 	int i;
@@ -130,8 +129,7 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht,
 
 	if (ntbl && shifted) {
 		for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
-			INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
-					    (i << shifted) | nhash);
+			INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
 	}
 
 	rcu_assign_pointer(*prev, ntbl);
@@ -157,7 +155,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
 		return NULL;
 
 	if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
-				0, 0)) {
+				0)) {
 		kfree(tbl);
 		return NULL;
 	}
@@ -207,7 +205,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 	tbl->hash_rnd = get_random_u32();
 
 	for (i = 0; i < nbuckets; i++)
-		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
+		INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
 
 	return tbl;
 }
@@ -1217,7 +1215,7 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
 	nhash = index;
 	shifted = tbl->nest;
 	ntbl = nested_table_alloc(ht, &ntbl[index].table,
-				  size <= (1 << shift) ? shifted : 0, nhash);
+				  size <= (1 << shift) ? shifted : 0);
 
 	while (ntbl && size > (1 << shift)) {
 		index = hash & ((1 << shift) - 1);
@@ -1226,8 +1224,7 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
 		nhash |= index << shifted;
 		shifted += shift;
 		ntbl = nested_table_alloc(ht, &ntbl[index].table,
-					  size <= (1 << shift) ? shifted : 0,
-					  nhash);
+					  size <= (1 << shift) ? shifted : 0);
 	}
 
 	if (!ntbl)
-- 
cgit v1.2.3


From c0690016a73fe6bd456887bbbe6e10c7f0096554 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 12:52:50 +1000
Subject: rhashtable: clean up dereference of ->future_tbl.

Using rht_dereference_bucket() to dereference
->future_tbl looks like a type error, and could be confusing.
Using rht_dereference_rcu() to test a pointer for NULL
adds an unnecessary barrier - rcu_access_pointer() is preferred
for NULL tests when no lock is held.

This uses 3 different ways to access ->future_tbl.
- if we know the mutex is held, use rht_dereference()
- if we don't hold the mutex, and are only testing for NULL,
  use rcu_access_pointer()
- otherwise (using RCU protection for true dereference),
  use rht_dereference_rcu().

Note that this includes a simplification of the call to
rhashtable_last_table() - we don't do an extra dereference
before the call any more.

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 +-
 lib/rhashtable.c           | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 3f3a182bd0b4..eb7111039247 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -595,7 +595,7 @@ static inline void *__rhashtable_insert_fast(
 	lock = rht_bucket_lock(tbl, hash);
 	spin_lock_bh(lock);
 
-	if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) {
+	if (unlikely(rcu_access_pointer(tbl->future_tbl))) {
 slow_path:
 		spin_unlock_bh(lock);
 		rcu_read_unlock();
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 52ec83212856..0e04947b7e0c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -226,8 +226,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
-	struct bucket_table *new_tbl = rhashtable_last_table(ht,
-		rht_dereference_rcu(old_tbl->future_tbl, ht));
+	struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
 	struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
 	int err = -EAGAIN;
 	struct rhash_head *head, *next, *entry;
@@ -467,7 +466,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
 
 fail:
 	/* Do not fail the insert if someone else did a rehash. */
-	if (likely(rcu_dereference_raw(tbl->future_tbl)))
+	if (likely(rcu_access_pointer(tbl->future_tbl)))
 		return 0;
 
 	/* Schedule async rehash to retry allocation in process context. */
@@ -540,7 +539,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
 	if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
 		return ERR_CAST(data);
 
-	new_tbl = rcu_dereference(tbl->future_tbl);
+	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 	if (new_tbl)
 		return new_tbl;
 
@@ -599,7 +598,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
 			break;
 
 		spin_unlock_bh(lock);
-		tbl = rcu_dereference(tbl->future_tbl);
+		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 	}
 
 	data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
-- 
cgit v1.2.3


From 3f6c65d6255a872846c44182c82c78d3dc6239f5 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 19 Jun 2018 21:42:50 -0700
Subject: tcp: ignore rcv_rtt sample with old ts ecr value

When receiving multiple packets with the same ts ecr value, only try
to compute rcv_rtt sample with the earliest received packet.
This is because the rcv_rtt calculated by later received packets
could possibly include long idle time or other types of delay.
For example:
(1) server sends last packet of reply with TS val V1
(2) client ACKs last packet of reply with TS ecr V1
(3) long idle time passes
(4) client sends next request data packet with TS ecr V1 (again!)
At this time, the rcv_rtt computed on server with TS ecr V1 will be
inflated with the idle time and should get ignored.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h  |  1 +
 net/ipv4/tcp.c       |  1 +
 net/ipv4/tcp_input.c | 14 +++++++++++---
 3 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 72705eaf4b84..3dbea6610304 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -350,6 +350,7 @@ struct tcp_sock {
 #endif
 
 /* Receiver side RTT estimation */
+	u32 rcv_rtt_last_tsecr;
 	struct {
 		u32	rtt_us;
 		u32	seq;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 141acd92e58a..47c45d5be9f9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2563,6 +2563,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	sk->sk_shutdown = 0;
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->srtt_us = 0;
+	tp->rcv_rtt_last_tsecr = 0;
 	tp->write_seq += tp->max_window + 2;
 	if (tp->write_seq == 0)
 		tp->write_seq = 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 355d3dffd021..76ca88f63b70 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -582,9 +582,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tp->rx_opt.rcv_tsecr &&
-	    (TCP_SKB_CB(skb)->end_seq -
-	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+	if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
+		return;
+	tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
+
+	if (TCP_SKB_CB(skb)->end_seq -
+	    TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
 		u32 delta_us;
 
@@ -5475,6 +5478,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 				tcp_ack(sk, skb, 0);
 				__kfree_skb(skb);
 				tcp_data_snd_check(sk);
+				/* When receiving pure ack in fast path, update
+				 * last ts ecr directly instead of calling
+				 * tcp_rcv_rtt_measure_ts()
+				 */
+				tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
 				return;
 			} else { /* Header too small */
 				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
-- 
cgit v1.2.3


From cadefe5f584abaac40dce72009e4de738cbff467 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 20 Jun 2018 16:07:35 -0400
Subject: tcp_bbr: fix bbr pacing rate for internal pacing

This commit makes BBR use only the MSS (without any headers) to
calculate pacing rates when internal TCP-layer pacing is used.

This is necessary to achieve the correct pacing behavior in this case,
since tcp_internal_pacing() uses only the payload length to calculate
pacing delays.

Signed-off-by: Kevin Yang <yyd@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 11 +++++++++++
 net/ipv4/tcp_bbr.c    |  6 +++++-
 net/ipv4/tcp_output.c | 14 --------------
 3 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0448e7c5d2b4..822ee49ed0f9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1184,6 +1184,17 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
 	return tp->is_cwnd_limited;
 }
 
+/* BBR congestion control needs pacing.
+ * Same remark for SO_MAX_PACING_RATE.
+ * sch_fq packet scheduler is efficiently handling pacing,
+ * but is not always installed/used.
+ * Return true if TCP stack should pace packets itself.
+ */
+static inline bool tcp_needs_internal_pacing(const struct sock *sk)
+{
+	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
+}
+
 /* Something is really bad, we could not queue an additional packet,
  * because qdisc is full or receiver sent a 0 window.
  * We do not want to add fuel to the fire, or abort too early,
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 58e2f479ffb4..3b5f45b9e81e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -205,7 +205,11 @@ static u32 bbr_bw(const struct sock *sk)
  */
 static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
 {
-	rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
+	unsigned int mss = tcp_sk(sk)->mss_cache;
+
+	if (!tcp_needs_internal_pacing(sk))
+		mss = tcp_mss_to_mtu(sk, mss);
+	rate *= mss;
 	rate *= gain;
 	rate >>= BBR_SCALE;
 	rate *= USEC_PER_SEC;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8e08b409c71e..f8f6129160dd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -973,17 +973,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-/* BBR congestion control needs pacing.
- * Same remark for SO_MAX_PACING_RATE.
- * sch_fq packet scheduler is efficiently handling pacing,
- * but is not always installed/used.
- * Return true if TCP stack should pace packets itself.
- */
-static bool tcp_needs_internal_pacing(const struct sock *sk)
-{
-	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
-}
-
 static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
 {
 	u64 len_ns;
@@ -995,9 +984,6 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
 	if (!rate || rate == ~0U)
 		return;
 
-	/* Should account for header sizes as sch_fq does,
-	 * but lets make things simple.
-	 */
 	len_ns = (u64)skb->len * NSEC_PER_SEC;
 	do_div(len_ns, rate);
 	hrtimer_start(&tcp_sk(sk)->pacing_timer,
-- 
cgit v1.2.3


From 372401efd9eeee24701848874ecb9845524013ee Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 22 May 2018 18:34:56 +0200
Subject: dt-bindings: clock: add meson axg audio clock controller bindings

Export the clock ids dt-bindings usable by the consumers of the clock
controller and add the documentation for the device tree bindings of
the audio clock controller of the A113 based SoCs.

Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 .../bindings/clock/amlogic,axg-audio-clkc.txt      | 56 +++++++++++++
 include/dt-bindings/clock/axg-audio-clkc.h         | 94 ++++++++++++++++++++++
 2 files changed, 150 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
 create mode 100644 include/dt-bindings/clock/axg-audio-clkc.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
new file mode 100644
index 000000000000..61777ad24f61
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
@@ -0,0 +1,56 @@
+* Amlogic AXG Audio Clock Controllers
+
+The Amlogic AXG audio clock controller generates and supplies clock to the
+other elements of the audio subsystem, such as fifos, i2s, spdif and pdm
+devices.
+
+Required Properties:
+
+- compatible	: should be "amlogic,axg-audio-clkc" for the A113X and A113D
+- reg		: physical base address of the clock controller and length of
+		  memory mapped region.
+- clocks	: a list of phandle + clock-specifier pairs for the clocks listed
+		  in clock-names.
+- clock-names	: must contain the following:
+		  * "pclk" - Main peripheral bus clock
+		  may contain the following:
+		  * "mst_in[0-7]" - 8 input plls to generate clock signals
+		  * "slv_sclk[0-9]" - 10 slave bit clocks provided by external
+				      components.
+		  * "slv_lrclk[0-9]" - 10 slave sample clocks provided by external
+				       components.
+- resets	: phandle of the internal reset line
+- #clock-cells	: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/axg-audio-clkc.h header and can be
+used in device tree sources.
+
+Example:
+
+clkc_audio: clock-controller@0 {
+	compatible = "amlogic,axg-audio-clkc";
+	reg = <0x0 0x0 0x0 0xb4>;
+	#clock-cells = <1>;
+
+	clocks = <&clkc CLKID_AUDIO>,
+		 <&clkc CLKID_MPLL0>,
+		 <&clkc CLKID_MPLL1>,
+		 <&clkc CLKID_MPLL2>,
+		 <&clkc CLKID_MPLL3>,
+		 <&clkc CLKID_HIFI_PLL>,
+		 <&clkc CLKID_FCLK_DIV3>,
+		 <&clkc CLKID_FCLK_DIV4>,
+		 <&clkc CLKID_GP0_PLL>;
+	clock-names = "pclk",
+		      "mst_in0",
+		      "mst_in1",
+		      "mst_in2",
+		      "mst_in3",
+		      "mst_in4",
+		      "mst_in5",
+		      "mst_in6",
+		      "mst_in7";
+	resets = <&reset RESET_AUDIO>;
+};
diff --git a/include/dt-bindings/clock/axg-audio-clkc.h b/include/dt-bindings/clock/axg-audio-clkc.h
new file mode 100644
index 000000000000..fd9c362099d9
--- /dev/null
+++ b/include/dt-bindings/clock/axg-audio-clkc.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Copyright (c) 2018 Baylibre SAS.
+ * Author: Jerome Brunet <jbrunet@baylibre.com>
+ */
+
+#ifndef __AXG_AUDIO_CLKC_BINDINGS_H
+#define __AXG_AUDIO_CLKC_BINDINGS_H
+
+#define AUD_CLKID_SLV_SCLK0		9
+#define AUD_CLKID_SLV_SCLK1		10
+#define AUD_CLKID_SLV_SCLK2		11
+#define AUD_CLKID_SLV_SCLK3		12
+#define AUD_CLKID_SLV_SCLK4		13
+#define AUD_CLKID_SLV_SCLK5		14
+#define AUD_CLKID_SLV_SCLK6		15
+#define AUD_CLKID_SLV_SCLK7		16
+#define AUD_CLKID_SLV_SCLK8		17
+#define AUD_CLKID_SLV_SCLK9		18
+#define AUD_CLKID_SLV_LRCLK0		19
+#define AUD_CLKID_SLV_LRCLK1		20
+#define AUD_CLKID_SLV_LRCLK2		21
+#define AUD_CLKID_SLV_LRCLK3		22
+#define AUD_CLKID_SLV_LRCLK4		23
+#define AUD_CLKID_SLV_LRCLK5		24
+#define AUD_CLKID_SLV_LRCLK6		25
+#define AUD_CLKID_SLV_LRCLK7		26
+#define AUD_CLKID_SLV_LRCLK8		27
+#define AUD_CLKID_SLV_LRCLK9		28
+#define AUD_CLKID_DDR_ARB		29
+#define AUD_CLKID_PDM			30
+#define AUD_CLKID_TDMIN_A		31
+#define AUD_CLKID_TDMIN_B		32
+#define AUD_CLKID_TDMIN_C		33
+#define AUD_CLKID_TDMIN_LB		34
+#define AUD_CLKID_TDMOUT_A		35
+#define AUD_CLKID_TDMOUT_B		36
+#define AUD_CLKID_TDMOUT_C		37
+#define AUD_CLKID_FRDDR_A		38
+#define AUD_CLKID_FRDDR_B		39
+#define AUD_CLKID_FRDDR_C		40
+#define AUD_CLKID_TODDR_A		41
+#define AUD_CLKID_TODDR_B		42
+#define AUD_CLKID_TODDR_C		43
+#define AUD_CLKID_LOOPBACK		44
+#define AUD_CLKID_SPDIFIN		45
+#define AUD_CLKID_SPDIFOUT		46
+#define AUD_CLKID_RESAMPLE		47
+#define AUD_CLKID_POWER_DETECT		48
+#define AUD_CLKID_MST_A_MCLK		49
+#define AUD_CLKID_MST_B_MCLK		50
+#define AUD_CLKID_MST_C_MCLK		51
+#define AUD_CLKID_MST_D_MCLK		52
+#define AUD_CLKID_MST_E_MCLK		53
+#define AUD_CLKID_MST_F_MCLK		54
+#define AUD_CLKID_SPDIFOUT_CLK		55
+#define AUD_CLKID_SPDIFIN_CLK		56
+#define AUD_CLKID_PDM_DCLK		57
+#define AUD_CLKID_PDM_SYSCLK		58
+#define AUD_CLKID_MST_A_SCLK		79
+#define AUD_CLKID_MST_B_SCLK		80
+#define AUD_CLKID_MST_C_SCLK		81
+#define AUD_CLKID_MST_D_SCLK		82
+#define AUD_CLKID_MST_E_SCLK		83
+#define AUD_CLKID_MST_F_SCLK		84
+#define AUD_CLKID_MST_A_LRCLK		86
+#define AUD_CLKID_MST_B_LRCLK		87
+#define AUD_CLKID_MST_C_LRCLK		88
+#define AUD_CLKID_MST_D_LRCLK		89
+#define AUD_CLKID_MST_E_LRCLK		90
+#define AUD_CLKID_MST_F_LRCLK		91
+#define AUD_CLKID_TDMIN_A_SCLK_SEL	116
+#define AUD_CLKID_TDMIN_B_SCLK_SEL	117
+#define AUD_CLKID_TDMIN_C_SCLK_SEL	118
+#define AUD_CLKID_TDMIN_LB_SCLK_SEL	119
+#define AUD_CLKID_TDMOUT_A_SCLK_SEL	120
+#define AUD_CLKID_TDMOUT_B_SCLK_SEL	121
+#define AUD_CLKID_TDMOUT_C_SCLK_SEL	122
+#define AUD_CLKID_TDMIN_A_SCLK		123
+#define AUD_CLKID_TDMIN_B_SCLK		124
+#define AUD_CLKID_TDMIN_C_SCLK		125
+#define AUD_CLKID_TDMIN_LB_SCLK		126
+#define AUD_CLKID_TDMOUT_A_SCLK		127
+#define AUD_CLKID_TDMOUT_B_SCLK		128
+#define AUD_CLKID_TDMOUT_C_SCLK		129
+#define AUD_CLKID_TDMIN_A_LRCLK		130
+#define AUD_CLKID_TDMIN_B_LRCLK		131
+#define AUD_CLKID_TDMIN_C_LRCLK		132
+#define AUD_CLKID_TDMIN_LB_LRCLK	133
+#define AUD_CLKID_TDMOUT_A_LRCLK	134
+#define AUD_CLKID_TDMOUT_B_LRCLK	135
+#define AUD_CLKID_TDMOUT_C_LRCLK	136
+
+#endif /* __AXG_AUDIO_CLKC_BINDINGS_H */
-- 
cgit v1.2.3


From d467239f37c4e49158e0569a26c6a9c365ec5b61 Mon Sep 17 00:00:00 2001
From: Michel Pollet <michel.pollet@bp.renesas.com>
Date: Thu, 14 Jun 2018 11:56:30 +0100
Subject: dt-bindings: clock: Add the r9a06g032-sysctrl.h file

This adds the constants necessary to use the renesas,r9a06g032-sysctrl node.

Signed-off-by: Michel Pollet <michel.pollet@bp.renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 include/dt-bindings/clock/r9a06g032-sysctrl.h | 148 ++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 include/dt-bindings/clock/r9a06g032-sysctrl.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/r9a06g032-sysctrl.h b/include/dt-bindings/clock/r9a06g032-sysctrl.h
new file mode 100644
index 000000000000..90c0f3dc1ba1
--- /dev/null
+++ b/include/dt-bindings/clock/r9a06g032-sysctrl.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * R9A06G032 sysctrl IDs
+ *
+ * Copyright (C) 2018 Renesas Electronics Europe Limited
+ *
+ * Michel Pollet <michel.pollet@bp.renesas.com>, <buserror@gmail.com>
+ */
+
+#ifndef __DT_BINDINGS_R9A06G032_SYSCTRL_H__
+#define __DT_BINDINGS_R9A06G032_SYSCTRL_H__
+
+#define R9A06G032_CLK_PLL_USB		1
+#define R9A06G032_CLK_48		1	/* AKA CLK_PLL_USB */
+#define R9A06G032_MSEBIS_CLK		3	/* AKA CLKOUT_D16 */
+#define R9A06G032_MSEBIM_CLK		3	/* AKA CLKOUT_D16 */
+#define R9A06G032_CLK_DDRPHY_PLLCLK	5	/* AKA CLKOUT_D1OR2 */
+#define R9A06G032_CLK50			6	/* AKA CLKOUT_D20 */
+#define R9A06G032_CLK25			7	/* AKA CLKOUT_D40 */
+#define R9A06G032_CLK125		9	/* AKA CLKOUT_D8 */
+#define R9A06G032_CLK_P5_PG1		17	/* AKA DIV_P5_PG */
+#define R9A06G032_CLK_REF_SYNC		21	/* AKA DIV_REF_SYNC */
+#define R9A06G032_CLK_25_PG4		26
+#define R9A06G032_CLK_25_PG5		27
+#define R9A06G032_CLK_25_PG6		28
+#define R9A06G032_CLK_25_PG7		29
+#define R9A06G032_CLK_25_PG8		30
+#define R9A06G032_CLK_ADC		31
+#define R9A06G032_CLK_ECAT100		32
+#define R9A06G032_CLK_HSR100		33
+#define R9A06G032_CLK_I2C0		34
+#define R9A06G032_CLK_I2C1		35
+#define R9A06G032_CLK_MII_REF		36
+#define R9A06G032_CLK_NAND		37
+#define R9A06G032_CLK_NOUSBP2_PG6	38
+#define R9A06G032_CLK_P1_PG2		39
+#define R9A06G032_CLK_P1_PG3		40
+#define R9A06G032_CLK_P1_PG4		41
+#define R9A06G032_CLK_P4_PG3		42
+#define R9A06G032_CLK_P4_PG4		43
+#define R9A06G032_CLK_P6_PG1		44
+#define R9A06G032_CLK_P6_PG2		45
+#define R9A06G032_CLK_P6_PG3		46
+#define R9A06G032_CLK_P6_PG4		47
+#define R9A06G032_CLK_PCI_USB		48
+#define R9A06G032_CLK_QSPI0		49
+#define R9A06G032_CLK_QSPI1		50
+#define R9A06G032_CLK_RGMII_REF		51
+#define R9A06G032_CLK_RMII_REF		52
+#define R9A06G032_CLK_SDIO0		53
+#define R9A06G032_CLK_SDIO1		54
+#define R9A06G032_CLK_SERCOS100		55
+#define R9A06G032_CLK_SLCD		56
+#define R9A06G032_CLK_SPI0		57
+#define R9A06G032_CLK_SPI1		58
+#define R9A06G032_CLK_SPI2		59
+#define R9A06G032_CLK_SPI3		60
+#define R9A06G032_CLK_SPI4		61
+#define R9A06G032_CLK_SPI5		62
+#define R9A06G032_CLK_SWITCH		63
+#define R9A06G032_HCLK_ECAT125		65
+#define R9A06G032_HCLK_PINCONFIG	66
+#define R9A06G032_HCLK_SERCOS		67
+#define R9A06G032_HCLK_SGPIO2		68
+#define R9A06G032_HCLK_SGPIO3		69
+#define R9A06G032_HCLK_SGPIO4		70
+#define R9A06G032_HCLK_TIMER0		71
+#define R9A06G032_HCLK_TIMER1		72
+#define R9A06G032_HCLK_USBF		73
+#define R9A06G032_HCLK_USBH		74
+#define R9A06G032_HCLK_USBPM		75
+#define R9A06G032_CLK_48_PG_F		76
+#define R9A06G032_CLK_48_PG4		77
+#define R9A06G032_CLK_DDRPHY_PCLK	81	/* AKA CLK_REF_SYNC_D4 */
+#define R9A06G032_CLK_FW		81	/* AKA CLK_REF_SYNC_D4 */
+#define R9A06G032_CLK_CRYPTO		81	/* AKA CLK_REF_SYNC_D4 */
+#define R9A06G032_CLK_A7MP		84	/* AKA DIV_CA7 */
+#define R9A06G032_HCLK_CAN0		85
+#define R9A06G032_HCLK_CAN1		86
+#define R9A06G032_HCLK_DELTASIGMA	87
+#define R9A06G032_HCLK_PWMPTO		88
+#define R9A06G032_HCLK_RSV		89
+#define R9A06G032_HCLK_SGPIO0		90
+#define R9A06G032_HCLK_SGPIO1		91
+#define R9A06G032_RTOS_MDC		92
+#define R9A06G032_CLK_CM3		93
+#define R9A06G032_CLK_DDRC		94
+#define R9A06G032_CLK_ECAT25		95
+#define R9A06G032_CLK_HSR50		96
+#define R9A06G032_CLK_HW_RTOS		97
+#define R9A06G032_CLK_SERCOS50		98
+#define R9A06G032_HCLK_ADC		99
+#define R9A06G032_HCLK_CM3		100
+#define R9A06G032_HCLK_CRYPTO_EIP150	101
+#define R9A06G032_HCLK_CRYPTO_EIP93	102
+#define R9A06G032_HCLK_DDRC		103
+#define R9A06G032_HCLK_DMA0		104
+#define R9A06G032_HCLK_DMA1		105
+#define R9A06G032_HCLK_GMAC0		106
+#define R9A06G032_HCLK_GMAC1		107
+#define R9A06G032_HCLK_GPIO0		108
+#define R9A06G032_HCLK_GPIO1		109
+#define R9A06G032_HCLK_GPIO2		110
+#define R9A06G032_HCLK_HSR		111
+#define R9A06G032_HCLK_I2C0		112
+#define R9A06G032_HCLK_I2C1		113
+#define R9A06G032_HCLK_LCD		114
+#define R9A06G032_HCLK_MSEBI_M		115
+#define R9A06G032_HCLK_MSEBI_S		116
+#define R9A06G032_HCLK_NAND		117
+#define R9A06G032_HCLK_PG_I		118
+#define R9A06G032_HCLK_PG19		119
+#define R9A06G032_HCLK_PG20		120
+#define R9A06G032_HCLK_PG3		121
+#define R9A06G032_HCLK_PG4		122
+#define R9A06G032_HCLK_QSPI0		123
+#define R9A06G032_HCLK_QSPI1		124
+#define R9A06G032_HCLK_ROM		125
+#define R9A06G032_HCLK_RTC		126
+#define R9A06G032_HCLK_SDIO0		127
+#define R9A06G032_HCLK_SDIO1		128
+#define R9A06G032_HCLK_SEMAP		129
+#define R9A06G032_HCLK_SPI0		130
+#define R9A06G032_HCLK_SPI1		131
+#define R9A06G032_HCLK_SPI2		132
+#define R9A06G032_HCLK_SPI3		133
+#define R9A06G032_HCLK_SPI4		134
+#define R9A06G032_HCLK_SPI5		135
+#define R9A06G032_HCLK_SWITCH		136
+#define R9A06G032_HCLK_SWITCH_RG	137
+#define R9A06G032_HCLK_UART0		138
+#define R9A06G032_HCLK_UART1		139
+#define R9A06G032_HCLK_UART2		140
+#define R9A06G032_HCLK_UART3		141
+#define R9A06G032_HCLK_UART4		142
+#define R9A06G032_HCLK_UART5		143
+#define R9A06G032_HCLK_UART6		144
+#define R9A06G032_HCLK_UART7		145
+#define R9A06G032_CLK_UART0		146
+#define R9A06G032_CLK_UART1		147
+#define R9A06G032_CLK_UART2		148
+#define R9A06G032_CLK_UART3		149
+#define R9A06G032_CLK_UART4		150
+#define R9A06G032_CLK_UART5		151
+#define R9A06G032_CLK_UART6		152
+#define R9A06G032_CLK_UART7		153
+
+#endif /* __DT_BINDINGS_R9A06G032_SYSCTRL_H__ */
-- 
cgit v1.2.3


From f4c277b817cc9489fffabffb4e15d2f3b686056c Mon Sep 17 00:00:00 2001
From: Jiada Wang <jiada_wang@mentor.com>
Date: Wed, 20 Jun 2018 18:25:20 +0900
Subject: ASoC: soc-pcm: DPCM cares BE channel constraint

Current DPCM is caring only FE channel configuration. Sometimes
it will be trouble if user selects channel which isn't supported
by BE.

This patch adds new .dpcm_merged_chan on struct snd_soc_dai_link.
DPCM will use FE / BE merged channel if struct snd_soc_dai_link
has it.

Signed-off-by: Jiada Wang <jiada_wang@mentor.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h |  2 ++
 sound/soc/soc-pcm.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 1378dcd2128a..f7579f82cc00 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -957,6 +957,8 @@ struct snd_soc_dai_link {
 
 	/* DPCM used FE & BE merged format */
 	unsigned int dpcm_merged_format:1;
+	/* DPCM used FE & BE merged channel */
+	unsigned int dpcm_merged_chan:1;
 
 	/* pmdown_time is ignored at stop */
 	unsigned int ignore_pmdown_time:1;
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 45b52f7b9690..19ebfc958b9d 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1715,6 +1715,46 @@ static u64 dpcm_runtime_base_format(struct snd_pcm_substream *substream)
 	return formats;
 }
 
+static void dpcm_runtime_base_chan(struct snd_pcm_substream *substream,
+				   unsigned int *channels_min,
+				   unsigned int *channels_max)
+{
+	struct snd_soc_pcm_runtime *fe = substream->private_data;
+	struct snd_soc_dpcm *dpcm;
+	int stream = substream->stream;
+
+	if (!fe->dai_link->dpcm_merged_chan)
+		return;
+
+	*channels_min = 0;
+	*channels_max = UINT_MAX;
+
+	/*
+	 * It returns merged BE codec channel;
+	 * if FE want to use it (= dpcm_merged_chan)
+	 */
+
+	list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be) {
+		struct snd_soc_pcm_runtime *be = dpcm->be;
+		struct snd_soc_dai_driver *codec_dai_drv;
+		struct snd_soc_pcm_stream *codec_stream;
+		int i;
+
+		for (i = 0; i < be->num_codecs; i++) {
+			codec_dai_drv = be->codec_dais[i]->driver;
+			if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+				codec_stream = &codec_dai_drv->playback;
+			else
+				codec_stream = &codec_dai_drv->capture;
+
+			*channels_min = max(*channels_min,
+					    codec_stream->channels_min);
+			*channels_max = min(*channels_max,
+					    codec_stream->channels_max);
+		}
+	}
+}
+
 static void dpcm_set_fe_runtime(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -1722,11 +1762,17 @@ static void dpcm_set_fe_runtime(struct snd_pcm_substream *substream)
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	struct snd_soc_dai_driver *cpu_dai_drv = cpu_dai->driver;
 	u64 format = dpcm_runtime_base_format(substream);
+	unsigned int channels_min = 0, channels_max = UINT_MAX;
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
 		dpcm_init_runtime_hw(runtime, &cpu_dai_drv->playback, format);
 	else
 		dpcm_init_runtime_hw(runtime, &cpu_dai_drv->capture, format);
+
+	dpcm_runtime_base_chan(substream, &channels_min, &channels_max);
+
+	runtime->hw.channels_min = max(channels_min, runtime->hw.channels_min);
+	runtime->hw.channels_max = min(channels_max, runtime->hw.channels_max);
 }
 
 static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd);
-- 
cgit v1.2.3


From 321d7863acf7b1cf921ac18cd5ad5483b3cbb7ec Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 20 Jun 2018 15:47:11 -0600
Subject: IB/uverbs: Delete type and id from uverbs_obj_attr

In this context the uobject is not allowed to be NULL, so type is the same
as uobject->type, and at least for IDR, id is the same as uobject->id.

FD objects should never handle the FD number outside the uAPI boundary
code.

Suggested-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c | 6 ++----
 include/rdma/uverbs_ioctl.h            | 4 ----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 20be6835291e..03065bad8dae 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -152,14 +152,12 @@ static int uverbs_process_attr(struct ib_device *ibdev,
 		object = uverbs_get_object(ibdev, spec->obj.obj_type);
 		if (!object)
 			return -EINVAL;
-		o_attr->type = object->type_attrs;
 
-		o_attr->id = (int)uattr->data;
 		o_attr->uobject = uverbs_get_uobject_from_context(
-					o_attr->type,
+					object->type_attrs,
 					ucontext,
 					spec->obj.access,
-					o_attr->id);
+					(int)uattr->data);
 
 		if (IS_ERR(o_attr->uobject))
 			return PTR_ERR(o_attr->uobject);
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 970357d0ccc4..90a4947ff548 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -344,11 +344,7 @@ struct uverbs_ptr_attr {
 };
 
 struct uverbs_obj_attr {
-	/* pointer to the kernel descriptor -> type, access, etc */
-	const struct uverbs_obj_type	*type;
 	struct ib_uobject		*uobject;
-	/* fd or id in idr of this object */
-	int				id;
 };
 
 struct uverbs_attr {
-- 
cgit v1.2.3


From 30c217ef6411d24b8c8b0478b89335dc2dabe6e8 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 29 May 2018 14:13:45 +0200
Subject: crypto: sha512_generic - add a sha512 0-length pre-computed hash

This patch adds the sha512 pre-computed 0-length hash so that device
drivers can use it when an hardware engine does not support computing a
hash from a 0 length input.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha512_generic.c | 12 ++++++++++++
 include/crypto/sha.h    |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index eba965d18bfc..439723d9273e 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -23,6 +23,18 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
+const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE] = {
+	0xcf, 0x83, 0xe1, 0x35, 0x7e, 0xef, 0xb8, 0xbd,
+	0xf1, 0x54, 0x28, 0x50, 0xd6, 0x6d, 0x80, 0x07,
+	0xd6, 0x20, 0xe4, 0x05, 0x0b, 0x57, 0x15, 0xdc,
+	0x83, 0xf4, 0xa9, 0x21, 0xd3, 0x6c, 0xe9, 0xce,
+	0x47, 0xd0, 0xd1, 0x3c, 0x5d, 0x85, 0xf2, 0xb0,
+	0xff, 0x83, 0x18, 0xd2, 0x87, 0x7e, 0xec, 0x2f,
+	0x63, 0xb9, 0x31, 0xbd, 0x47, 0x41, 0x7a, 0x81,
+	0xa5, 0x38, 0x32, 0x7a, 0xf9, 0x27, 0xda, 0x3e
+};
+EXPORT_SYMBOL_GPL(sha512_zero_message_hash);
+
 static inline u64 Ch(u64 x, u64 y, u64 z)
 {
         return z ^ (x & (y ^ z));
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 0555b571dd34..799f071b93df 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -71,6 +71,8 @@ extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE];
 
 extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE];
 
+extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE];
+
 struct sha1_state {
 	u32 state[SHA1_DIGEST_SIZE / 4];
 	u64 count;
-- 
cgit v1.2.3


From 26f7120b86a964a886ececb9eb8e6dd50276dbb2 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 29 May 2018 14:13:49 +0200
Subject: crypto: sha512_generic - add a sha384 0-length pre-computed hash

This patch adds the sha384 pre-computed 0-length hash so that device
drivers can use it when an hardware engine does not support computing a
hash from a 0 length input.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha512_generic.c | 10 ++++++++++
 include/crypto/sha.h    |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 439723d9273e..0b805d03b5e5 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -23,6 +23,16 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
+const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE] = {
+	0x38, 0xb0, 0x60, 0xa7, 0x51, 0xac, 0x96, 0x38,
+	0x4c, 0xd9, 0x32, 0x7e, 0xb1, 0xb1, 0xe3, 0x6a,
+	0x21, 0xfd, 0xb7, 0x11, 0x14, 0xbe, 0x07, 0x43,
+	0x4c, 0x0c, 0xc7, 0xbf, 0x63, 0xf6, 0xe1, 0xda,
+	0x27, 0x4e, 0xde, 0xbf, 0xe7, 0x6f, 0x65, 0xfb,
+	0xd5, 0x1a, 0xd2, 0xf1, 0x48, 0x98, 0xb9, 0x5b
+};
+EXPORT_SYMBOL_GPL(sha384_zero_message_hash);
+
 const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE] = {
 	0xcf, 0x83, 0xe1, 0x35, 0x7e, 0xef, 0xb8, 0xbd,
 	0xf1, 0x54, 0x28, 0x50, 0xd6, 0x6d, 0x80, 0x07,
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 799f071b93df..8a46202b1857 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -71,6 +71,8 @@ extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE];
 
 extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE];
 
+extern const u8 sha384_zero_message_hash[SHA384_DIGEST_SIZE];
+
 extern const u8 sha512_zero_message_hash[SHA512_DIGEST_SIZE];
 
 struct sha1_state {
-- 
cgit v1.2.3


From 5424ea27390f1f8903e5de0eaa0c5b561e8e877a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Jun 2018 16:27:47 -0700
Subject: netns: get more entropy from net_hash_mix()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct net are effectively allocated from order-1 pages on x86,
with one object per slab, meaning that the 13 low order bits
of their addresses are zero.

Once shifted by L1_CACHE_SHIFT, this leaves 7 zero-bits,
meaning that net_hash_mix() does not help spreading
objects on various hash tables.

For example, TCP listen table has 32 buckets, meaning that
all netns use the same bucket for port 80 or port 443.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/hash.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
index 24c78183a4c2..16a842456189 100644
--- a/include/net/netns/hash.h
+++ b/include/net/netns/hash.h
@@ -9,12 +9,7 @@ struct net;
 static inline u32 net_hash_mix(const struct net *net)
 {
 #ifdef CONFIG_NET_NS
-	/*
-	 * shift this right to eliminate bits, that are
-	 * always zeroed
-	 */
-
-	return (u32)(((unsigned long)net) >> L1_CACHE_SHIFT);
+	return (u32)(((unsigned long)net) >> ilog2(sizeof(*net)));
 #else
 	return 0;
 #endif
-- 
cgit v1.2.3


From 9b42c1f179a614e11893ae4619f0304a38f481ae Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 12:44:26 +0200
Subject: xfrm: Extend the output_mark to support input direction and masking.

We already support setting an output mark at the xfrm_state,
unfortunately this does not support the input direction and
masking the marks that will be applied to the skb. This change
adds support applying a masked value in both directions.

The existing XFRMA_OUTPUT_MARK number is reused for this purpose
and as it is now bi-directional, it is renamed to XFRMA_SET_MARK.

An additional XFRMA_SET_MARK_MASK attribute is added for setting the
mask. If the attribute mask not provided, it is set to 0xffffffff,
keeping the XFRMA_OUTPUT_MARK existing 'full mask' semantics.

Co-developed-by: Tobias Brunner <tobias@strongswan.org>
Co-developed-by: Eyal Birger <eyal.birger@gmail.com>
Co-developed-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Tobias Brunner <tobias@strongswan.org>
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/net/xfrm.h        |  9 ++++++++-
 include/uapi/linux/xfrm.h |  4 +++-
 net/xfrm/xfrm_device.c    |  3 ++-
 net/xfrm/xfrm_input.c     |  2 ++
 net/xfrm/xfrm_output.c    |  3 +--
 net/xfrm/xfrm_policy.c    |  5 +++--
 net/xfrm/xfrm_user.c      | 48 +++++++++++++++++++++++++++++++++++++----------
 7 files changed, 57 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 557122846e0e..3dc83ba26f62 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -166,7 +166,7 @@ struct xfrm_state {
 		int		header_len;
 		int		trailer_len;
 		u32		extra_flags;
-		u32		output_mark;
+		struct xfrm_mark	smark;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
@@ -2012,6 +2012,13 @@ static inline int xfrm_mark_put(struct sk_buff *skb, const struct xfrm_mark *m)
 	return ret;
 }
 
+static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
+{
+	struct xfrm_mark *m = &x->props.smark;
+
+	return (m->v & m->m) | (mark & ~m->m);
+}
+
 static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 				    unsigned int family)
 {
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index e3af2859188b..5a6ed7ce5a29 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -305,9 +305,11 @@ enum xfrm_attr_type_t {
 	XFRMA_ADDRESS_FILTER,	/* struct xfrm_address_filter */
 	XFRMA_PAD,
 	XFRMA_OFFLOAD_DEV,	/* struct xfrm_state_offload */
-	XFRMA_OUTPUT_MARK,	/* __u32 */
+	XFRMA_SET_MARK,		/* __u32 */
+	XFRMA_SET_MARK_MASK,	/* __u32 */
 	__XFRMA_MAX
 
+#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
 #define XFRMA_MAX (__XFRMA_MAX - 1)
 };
 
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 175941e15a6e..16c1230d20fa 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -162,7 +162,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		}
 
 		dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr,
-					x->props.family, x->props.output_mark);
+					x->props.family,
+					xfrm_smark_get(0, x));
 		if (IS_ERR(dst))
 			return 0;
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 352abca2605f..074810436242 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -339,6 +339,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop;
 		}
 
+		skb->mark = xfrm_smark_get(skb->mark, x);
+
 		skb->sp->xvec[skb->sp->len++] = x;
 
 lock:
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 89b178a78dc7..45ba07ab3e4f 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -66,8 +66,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
 			goto error_nolock;
 		}
 
-		if (x->props.output_mark)
-			skb->mark = x->props.output_mark;
+		skb->mark = xfrm_smark_get(skb->mark, x);
 
 		err = x->outer_mode->output(x, skb);
 		if (err) {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5f48251c1319..7637637717ec 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1607,10 +1607,11 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 		dst_copy_metrics(dst1, dst);
 
 		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
+			__u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
+
 			family = xfrm[i]->props.family;
 			dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
-					      &saddr, &daddr, family,
-					      xfrm[i]->props.output_mark);
+					      &saddr, &daddr, family, mark);
 			err = PTR_ERR(dst);
 			if (IS_ERR(dst))
 				goto put_states;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 080035f056d9..9602cc9e05ab 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -527,6 +527,19 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
 		x->replay_maxdiff = nla_get_u32(rt);
 }
 
+static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m)
+{
+	if (attrs[XFRMA_SET_MARK]) {
+		m->v = nla_get_u32(attrs[XFRMA_SET_MARK]);
+		if (attrs[XFRMA_SET_MARK_MASK])
+			m->m = nla_get_u32(attrs[XFRMA_SET_MARK_MASK]);
+		else
+			m->m = 0xffffffff;
+	} else {
+		m->v = m->m = 0;
+	}
+}
+
 static struct xfrm_state *xfrm_state_construct(struct net *net,
 					       struct xfrm_usersa_info *p,
 					       struct nlattr **attrs,
@@ -579,8 +592,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_mark_get(attrs, &x->mark);
 
-	if (attrs[XFRMA_OUTPUT_MARK])
-		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
+	xfrm_smark_init(attrs, &x->props.smark);
 
 	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
@@ -824,6 +836,18 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
 	return 0;
 }
 
+static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m)
+{
+	int ret = 0;
+
+	if (m->v | m->m) {
+		ret = nla_put_u32(skb, XFRMA_SET_MARK, m->v);
+		if (!ret)
+			ret = nla_put_u32(skb, XFRMA_SET_MARK_MASK, m->m);
+	}
+	return ret;
+}
+
 /* Don't change this without updating xfrm_sa_len! */
 static int copy_to_user_state_extra(struct xfrm_state *x,
 				    struct xfrm_usersa_info *p,
@@ -887,6 +911,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 	ret = xfrm_mark_put(skb, &x->mark);
 	if (ret)
 		goto out;
+
+	ret = xfrm_smark_put(skb, &x->props.smark);
+	if (ret)
+		goto out;
+
 	if (x->replay_esn)
 		ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL,
 			      xfrm_replay_state_esn_len(x->replay_esn),
@@ -900,11 +929,7 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 		ret = copy_user_offload(&x->xso, skb);
 	if (ret)
 		goto out;
-	if (x->props.output_mark) {
-		ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark);
-		if (ret)
-			goto out;
-	}
+
 	if (x->security)
 		ret = copy_sec_ctx(x->security, skb);
 out:
@@ -2493,7 +2518,8 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_PROTO]		= { .type = NLA_U8 },
 	[XFRMA_ADDRESS_FILTER]	= { .len = sizeof(struct xfrm_address_filter) },
 	[XFRMA_OFFLOAD_DEV]	= { .len = sizeof(struct xfrm_user_offload) },
-	[XFRMA_OUTPUT_MARK]	= { .type = NLA_U32 },
+	[XFRMA_SET_MARK]	= { .type = NLA_U32 },
+	[XFRMA_SET_MARK_MASK]	= { .type = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2719,8 +2745,10 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
 		l += nla_total_size(sizeof(x->props.extra_flags));
 	if (x->xso.dev)
 		 l += nla_total_size(sizeof(x->xso));
-	if (x->props.output_mark)
-		l += nla_total_size(sizeof(x->props.output_mark));
+	if (x->props.smark.v | x->props.smark.m) {
+		l += nla_total_size(sizeof(x->props.smark.v));
+		l += nla_total_size(sizeof(x->props.smark.m));
+	}
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size_64bit(sizeof(u64));
-- 
cgit v1.2.3


From d159ce7957eec306eacda672e5909e26675ca8ef Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:06:57 +0200
Subject: flow: Extend flow informations with xfrm interface id.

Add a new flowi_xfrm structure with informations needed to do
a xfrm lookup. At the moment it keeps the informations about
the new xfrm interface id needed to lookup xfrm interfaces
that are introduced with a followup patch. We need this new
lookup key as other possible keys, like the ifindex is
already part of the xfrm selector and used as a key to
enforce the output device after the transformation in the
policy/state lookup.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: Benedict Wong <benedictwong@google.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/flow.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/flow.h b/include/net/flow.h
index 8ce21793094e..187c9bef672f 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -26,6 +26,10 @@ struct flowi_tunnel {
 	__be64			tun_id;
 };
 
+struct flowi_xfrm {
+	__u32			if_id;
+};
+
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -39,6 +43,7 @@ struct flowi_common {
 #define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
+	struct flowi_xfrm xfrm;
 	kuid_t  flowic_uid;
 };
 
@@ -78,6 +83,7 @@ struct flowi4 {
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
 #define flowi4_uid		__fl_common.flowic_uid
+#define flowi4_xfrm		__fl_common.xfrm
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -109,6 +115,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
 	fl4->flowi4_tun_key.tun_id = 0;
+	fl4->flowi4_xfrm.if_id = 0;
 	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
@@ -138,6 +145,7 @@ struct flowi6 {
 #define flowi6_secid		__fl_common.flowic_secid
 #define flowi6_tun_key		__fl_common.flowic_tun_key
 #define flowi6_uid		__fl_common.flowic_uid
+#define flowi6_xfrm		__fl_common.xfrm
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	/* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -185,6 +193,7 @@ struct flowi {
 #define flowi_secid	u.__fl_common.flowic_secid
 #define flowi_tun_key	u.__fl_common.flowic_tun_key
 #define flowi_uid	u.__fl_common.flowic_uid
+#define flowi_xfrm	u.__fl_common.xfrm
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
-- 
cgit v1.2.3


From 7e6526404adedf079279aa7aa11722deaca8fe2e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:07:07 +0200
Subject: xfrm: Add a new lookup key to match xfrm interfaces.

This patch adds the xfrm interface id as a lookup key
for xfrm states and policies. With this we can assign
states and policies to virtual xfrm interfaces.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Acked-by: Benedict Wong <benedictwong@google.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/xfrm.h        | 21 +++++++++++++-----
 include/uapi/linux/xfrm.h |  1 +
 net/core/pktgen.c         |  2 +-
 net/key/af_key.c          |  6 +++---
 net/xfrm/xfrm_policy.c    | 18 +++++++++++-----
 net/xfrm/xfrm_state.c     | 19 ++++++++++++-----
 net/xfrm/xfrm_user.c      | 54 +++++++++++++++++++++++++++++++++++++++++------
 7 files changed, 96 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3dc83ba26f62..e8bada4d2a45 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -147,6 +147,7 @@ struct xfrm_state {
 	struct xfrm_id		id;
 	struct xfrm_selector	sel;
 	struct xfrm_mark	mark;
+	u32			if_id;
 	u32			tfcpad;
 
 	u32			genid;
@@ -574,6 +575,7 @@ struct xfrm_policy {
 	atomic_t		genid;
 	u32			priority;
 	u32			index;
+	u32			if_id;
 	struct xfrm_mark	mark;
 	struct xfrm_selector	selector;
 	struct xfrm_lifetime_cfg lft;
@@ -1533,7 +1535,7 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
 				   struct xfrm_tmpl *tmpl,
 				   struct xfrm_policy *pol, int *err,
 				   unsigned short family);
-struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark,
+struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 				       xfrm_address_t *daddr,
 				       xfrm_address_t *saddr,
 				       unsigned short family,
@@ -1690,20 +1692,20 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
 		     void *);
 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
 					  u8 type, int dir,
 					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8, int dir,
-				     u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
+				     int dir, u32 id, int delete, int *err);
 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
 void xfrm_policy_hash_rebuild(struct net *net);
 u32 xfrm_get_acqseq(void);
 int verify_spi_info(u8 proto, u32 min, u32 max);
 int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
 struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
-				 u8 mode, u32 reqid, u8 proto,
+				 u8 mode, u32 reqid, u32 if_id, u8 proto,
 				 const xfrm_address_t *daddr,
 				 const xfrm_address_t *saddr, int create,
 				 unsigned short family);
@@ -2019,6 +2021,15 @@ static inline __u32 xfrm_smark_get(__u32 mark, struct xfrm_state *x)
 	return (m->v & m->m) | (mark & ~m->m);
 }
 
+static inline int xfrm_if_id_put(struct sk_buff *skb, __u32 if_id)
+{
+	int ret = 0;
+
+	if (if_id)
+		ret = nla_put_u32(skb, XFRMA_IF_ID, if_id);
+	return ret;
+}
+
 static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 				    unsigned int family)
 {
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 5a6ed7ce5a29..5f3b9fec7b5f 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -307,6 +307,7 @@ enum xfrm_attr_type_t {
 	XFRMA_OFFLOAD_DEV,	/* struct xfrm_state_offload */
 	XFRMA_SET_MARK,		/* __u32 */
 	XFRMA_SET_MARK_MASK,	/* __u32 */
+	XFRMA_IF_ID,		/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK	/* Compatibility */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 49368e21d228..6d37dbf0aa64 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2255,7 +2255,7 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
 			x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
 		} else {
 			/* slow path: we dont already have xfrm_state */
-			x = xfrm_stateonly_find(pn->net, DUMMY_MARK,
+			x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
 						(xfrm_address_t *)&pkt_dev->cur_daddr,
 						(xfrm_address_t *)&pkt_dev->cur_saddr,
 						AF_INET,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 8bdc1cbe490a..398ebcd614a0 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1383,7 +1383,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 	}
 
 	if (!x)
-		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, proto, xdaddr, xsaddr, 1, family);
+		x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, proto, xdaddr, xsaddr, 1, family);
 
 	if (x == NULL)
 		return -ENOENT;
@@ -2414,7 +2414,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 			return err;
 	}
 
-	xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
+	xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
 				   pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
 				   1, &err);
 	security_xfrm_policy_free(pol_ctx);
@@ -2663,7 +2663,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		return -EINVAL;
 
 	delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
-	xp = xfrm_policy_byid(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN,
+	xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
 			      dir, pol->sadb_x_policy_id, delete, &err);
 	if (xp == NULL)
 		return -ENOENT;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7637637717ec..fc0c69312b2c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -747,6 +747,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	newpos = NULL;
 	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == policy->type &&
+		    pol->if_id == policy->if_id &&
 		    !selector_cmp(&pol->selector, &policy->selector) &&
 		    xfrm_policy_mark_match(policy, pol) &&
 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
@@ -798,8 +799,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
-					  int dir, struct xfrm_selector *sel,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
+					  u8 type, int dir,
+					  struct xfrm_selector *sel,
 					  struct xfrm_sec_ctx *ctx, int delete,
 					  int *err)
 {
@@ -812,6 +814,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 	ret = NULL;
 	hlist_for_each_entry(pol, chain, bydst) {
 		if (pol->type == type &&
+		    pol->if_id == if_id &&
 		    (mark & pol->mark.m) == pol->mark.v &&
 		    !selector_cmp(sel, &pol->selector) &&
 		    xfrm_sec_ctx_match(ctx, pol->security)) {
@@ -837,8 +840,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
-				     int dir, u32 id, int delete, int *err)
+struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
+				     u8 type, int dir, u32 id, int delete,
+				     int *err)
 {
 	struct xfrm_policy *pol, *ret;
 	struct hlist_head *chain;
@@ -853,6 +857,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 	ret = NULL;
 	hlist_for_each_entry(pol, chain, byidx) {
 		if (pol->type == type && pol->index == id &&
+		    pol->if_id == if_id &&
 		    (mark & pol->mark.m) == pol->mark.v) {
 			xfrm_pol_hold(pol);
 			if (delete) {
@@ -1063,6 +1068,7 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
 	bool match;
 
 	if (pol->family != family ||
+	    pol->if_id != fl->flowi_xfrm.if_id ||
 	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
 	    pol->type != type)
 		return ret;
@@ -1177,7 +1183,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 
 		match = xfrm_selector_match(&pol->selector, fl, family);
 		if (match) {
-			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
+			if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+			    pol->if_id != fl->flowi_xfrm.if_id) {
 				pol = NULL;
 				goto out;
 			}
@@ -1305,6 +1312,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
 		newp->lft = old->lft;
 		newp->curlft = old->curlft;
 		newp->mark = old->mark;
+		newp->if_id = old->if_id;
 		newp->action = old->action;
 		newp->flags = old->flags;
 		newp->xfrm_nr = old->xfrm_nr;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8308281f3253..3803b6813fc5 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -941,6 +941,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	int error = 0;
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
+	u32 if_id = fl->flowi_xfrm.if_id;
 	unsigned short encap_family = tmpl->encap_family;
 	unsigned int sequence;
 	struct km_event c;
@@ -955,6 +956,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
@@ -971,6 +973,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
 		    tmpl->mode == x->props.mode &&
@@ -1010,6 +1013,7 @@ found:
 		 * to current session. */
 		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
 		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
+		x->if_id = if_id;
 
 		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
 		if (error) {
@@ -1067,7 +1071,7 @@ out:
 }
 
 struct xfrm_state *
-xfrm_stateonly_find(struct net *net, u32 mark,
+xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 		    xfrm_address_t *daddr, xfrm_address_t *saddr,
 		    unsigned short family, u8 mode, u8 proto, u32 reqid)
 {
@@ -1080,6 +1084,7 @@ xfrm_stateonly_find(struct net *net, u32 mark,
 		if (x->props.family == family &&
 		    x->props.reqid == reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 		    xfrm_state_addr_check(x, daddr, saddr, family) &&
 		    mode == x->props.mode &&
@@ -1160,11 +1165,13 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 	struct xfrm_state *x;
 	unsigned int h;
 	u32 mark = xnew->mark.v & xnew->mark.m;
+	u32 if_id = xnew->if_id;
 
 	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
 	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 		if (x->props.family	== family &&
 		    x->props.reqid	== reqid &&
+		    x->if_id		== if_id &&
 		    (mark & x->mark.m) == x->mark.v &&
 		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
 		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
@@ -1187,7 +1194,7 @@ EXPORT_SYMBOL(xfrm_state_insert);
 static struct xfrm_state *__find_acq_core(struct net *net,
 					  const struct xfrm_mark *m,
 					  unsigned short family, u8 mode,
-					  u32 reqid, u8 proto,
+					  u32 reqid, u32 if_id, u8 proto,
 					  const xfrm_address_t *daddr,
 					  const xfrm_address_t *saddr,
 					  int create)
@@ -1242,6 +1249,7 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 		x->props.family = family;
 		x->props.mode = mode;
 		x->props.reqid = reqid;
+		x->if_id = if_id;
 		x->mark.v = m->v;
 		x->mark.m = m->m;
 		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
@@ -1296,7 +1304,7 @@ int xfrm_state_add(struct xfrm_state *x)
 
 	if (use_spi && !x1)
 		x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
-				     x->props.reqid, x->id.proto,
+				     x->props.reqid, x->if_id, x->id.proto,
 				     &x->id.daddr, &x->props.saddr, 0);
 
 	__xfrm_state_bump_genids(x);
@@ -1395,6 +1403,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 	x->props.flags = orig->props.flags;
 	x->props.extra_flags = orig->props.extra_flags;
 
+	x->if_id = orig->if_id;
 	x->tfcpad = orig->tfcpad;
 	x->replay_maxdiff = orig->replay_maxdiff;
 	x->replay_maxage = orig->replay_maxage;
@@ -1619,13 +1628,13 @@ EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
 
 struct xfrm_state *
 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
-	      u8 proto, const xfrm_address_t *daddr,
+	      u32 if_id, u8 proto, const xfrm_address_t *daddr,
 	      const xfrm_address_t *saddr, int create, unsigned short family)
 {
 	struct xfrm_state *x;
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
-	x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create);
+	x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
 	return x;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 9602cc9e05ab..79245e1c3487 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -594,6 +594,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_smark_init(attrs, &x->props.smark);
 
+	if (attrs[XFRMA_IF_ID])
+		x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
 		goto error;
@@ -929,7 +932,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 		ret = copy_user_offload(&x->xso, skb);
 	if (ret)
 		goto out;
-
+	if (x->if_id) {
+		ret = nla_put_u32(skb, XFRMA_IF_ID, x->if_id);
+		if (ret)
+			goto out;
+	}
 	if (x->security)
 		ret = copy_sec_ctx(x->security, skb);
 out:
@@ -1278,6 +1285,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err;
 	u32 mark;
 	struct xfrm_mark m;
+	u32 if_id = 0;
 
 	p = nlmsg_data(nlh);
 	err = verify_spi_info(p->info.id.proto, p->min, p->max);
@@ -1290,6 +1298,10 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 	x = NULL;
 
 	mark = xfrm_mark_get(attrs, &m);
+
+	if (attrs[XFRMA_IF_ID])
+		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	if (p->info.seq) {
 		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
 		if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) {
@@ -1300,7 +1312,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (!x)
 		x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid,
-				  p->info.id.proto, daddr,
+				  if_id, p->info.id.proto, daddr,
 				  &p->info.saddr, 1,
 				  family);
 	err = -ENOENT;
@@ -1588,6 +1600,9 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
 
 	xfrm_mark_get(attrs, &xp->mark);
 
+	if (attrs[XFRMA_IF_ID])
+		xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	return xp;
  error:
 	*errp = err;
@@ -1733,6 +1748,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 		err = copy_to_user_policy_type(xp->type, skb);
 	if (!err)
 		err = xfrm_mark_put(skb, &xp->mark);
+	if (!err)
+		err = xfrm_if_id_put(skb, xp->if_id);
 	if (err) {
 		nlmsg_cancel(skb, nlh);
 		return err;
@@ -1814,6 +1831,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int delete;
 	struct xfrm_mark m;
 	u32 mark = xfrm_mark_get(attrs, &m);
+	u32 if_id = 0;
 
 	p = nlmsg_data(nlh);
 	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
@@ -1826,8 +1844,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		return err;
 
+	if (attrs[XFRMA_IF_ID])
+		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	if (p->index)
-		xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err);
+		xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -1844,7 +1865,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel,
+		xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel,
 					   ctx, delete, &err);
 		security_xfrm_policy_free(ctx);
 	}
@@ -1967,6 +1988,10 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
 	if (err)
 		goto out_cancel;
 
+	err = xfrm_if_id_put(skb, x->if_id);
+	if (err)
+		goto out_cancel;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -2109,6 +2134,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err = -ENOENT;
 	struct xfrm_mark m;
 	u32 mark = xfrm_mark_get(attrs, &m);
+	u32 if_id = 0;
 
 	err = copy_from_user_policy_type(&type, attrs);
 	if (err)
@@ -2118,8 +2144,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		return err;
 
+	if (attrs[XFRMA_IF_ID])
+		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
 	if (p->index)
-		xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
+		xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err);
 	else {
 		struct nlattr *rt = attrs[XFRMA_SEC_CTX];
 		struct xfrm_sec_ctx *ctx;
@@ -2136,7 +2165,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 			if (err)
 				return err;
 		}
-		xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir,
+		xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir,
 					   &p->sel, ctx, 0, &err);
 		security_xfrm_policy_free(ctx);
 	}
@@ -2520,6 +2549,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_OFFLOAD_DEV]	= { .len = sizeof(struct xfrm_user_offload) },
 	[XFRMA_SET_MARK]	= { .type = NLA_U32 },
 	[XFRMA_SET_MARK_MASK]	= { .type = NLA_U32 },
+	[XFRMA_IF_ID]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
@@ -2651,6 +2681,10 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
 	if (err)
 		return err;
 
+	err = xfrm_if_id_put(skb, x->if_id);
+	if (err)
+		return err;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 }
@@ -2749,6 +2783,8 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
 		l += nla_total_size(sizeof(x->props.smark.v));
 		l += nla_total_size(sizeof(x->props.smark.m));
 	}
+	if (x->if_id)
+		l += nla_total_size(sizeof(x->if_id));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size_64bit(sizeof(u64));
@@ -2878,6 +2914,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 		err = copy_to_user_policy_type(xp->type, skb);
 	if (!err)
 		err = xfrm_mark_put(skb, &xp->mark);
+	if (!err)
+		err = xfrm_if_id_put(skb, xp->if_id);
 	if (err) {
 		nlmsg_cancel(skb, nlh);
 		return err;
@@ -2994,6 +3032,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 		err = copy_to_user_policy_type(xp->type, skb);
 	if (!err)
 		err = xfrm_mark_put(skb, &xp->mark);
+	if (!err)
+		err = xfrm_if_id_put(skb, xp->if_id);
 	if (err) {
 		nlmsg_cancel(skb, nlh);
 		return err;
@@ -3075,6 +3115,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
 		err = copy_to_user_policy_type(xp->type, skb);
 	if (!err)
 		err = xfrm_mark_put(skb, &xp->mark);
+	if (!err)
+		err = xfrm_if_id_put(skb, xp->if_id);
 	if (err)
 		goto out_free_skb;
 
-- 
cgit v1.2.3


From f203b76d78092faf248db3f851840fbecf80b40e Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 12 Jun 2018 14:07:12 +0200
Subject: xfrm: Add virtual xfrm interfaces

This patch adds support for virtual xfrm interfaces.
Packets that are routed through such an interface
are guaranteed to be IPsec transformed or dropped.
It is a generic virtual interface that ensures IPsec
transformation, no need to know what happens behind
the interface. This means that we can tunnel IPv4 and
IPv6 through the same interface and support all xfrm
modes (tunnel, transport and beet) on it.

Co-developed-by: Lorenzo Colitti <lorenzo@google.com>
Co-developed-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Benedict Wong <benedictwong@google.com>
Tested-by: Antony Antony <antony@phenome.org>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
---
 include/net/xfrm.h           |  24 ++
 include/uapi/linux/if_link.h |  10 +
 net/xfrm/Kconfig             |   8 +
 net/xfrm/Makefile            |   1 +
 net/xfrm/xfrm_input.c        |   3 +
 net/xfrm/xfrm_interface.c    | 972 +++++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_policy.c       |  43 ++
 7 files changed, 1061 insertions(+)
 create mode 100644 net/xfrm/xfrm_interface.c

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e8bada4d2a45..3fa578a6a819 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -23,6 +23,7 @@
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/flow.h>
+#include <net/gro_cells.h>
 
 #include <linux/interrupt.h>
 
@@ -293,6 +294,13 @@ struct xfrm_replay {
 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
 
+struct xfrm_if_cb {
+	struct xfrm_if	*(*decode_session)(struct sk_buff *skb);
+};
+
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
+void xfrm_if_unregister_cb(void);
+
 struct net_device;
 struct xfrm_type;
 struct xfrm_dst;
@@ -1039,6 +1047,22 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
+struct xfrm_if_parms {
+	char name[IFNAMSIZ];	/* name of XFRM device */
+	int link;		/* ifindex of underlying L2 interface */
+	u32 if_id;		/* interface identifyer */
+};
+
+struct xfrm_if {
+	struct xfrm_if __rcu *next;	/* next interface in list */
+	struct net_device *dev;		/* virtual device associated with interface */
+	struct net_device *phydev;	/* physical device */
+	struct net *net;		/* netns for packet i/o */
+	struct xfrm_if_parms p;		/* interface parms */
+
+	struct gro_cells gro_cells;
+};
+
 struct xfrm_offload {
 	/* Output sequence number for replay protection on offloading. */
 	struct {
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index cf01b6824244..bff0af507b32 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -459,6 +459,16 @@ enum {
 
 #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
 
+/* XFRM section */
+enum {
+	IFLA_XFRM_UNSPEC,
+	IFLA_XFRM_LINK,
+	IFLA_XFRM_IF_ID,
+	__IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
 enum macsec_validation_type {
 	MACSEC_VALIDATE_DISABLED = 0,
 	MACSEC_VALIDATE_CHECK = 1,
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 286ed25c1a69..53381888a7b3 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -25,6 +25,14 @@ config XFRM_USER
 
 	  If unsure, say Y.
 
+config XFRM_INTERFACE
+	tristate "Transformation virtual interface"
+	depends on XFRM && IPV6
+	---help---
+	  This provides a virtual interface to route IPsec traffic.
+
+	  If unsure, say N.
+
 config XFRM_SUB_POLICY
 	bool "Transformation sub policy support"
 	depends on XFRM
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 0bd2465a8c5a..fbc4552d17b8 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o
 obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
+obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 074810436242..b89c9c7f8c5c 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -320,6 +320,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) {
+		secpath_reset(skb);
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
 		goto drop;
 	}
@@ -328,12 +329,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 				   XFRM_SPI_SKB_CB(skb)->daddroff);
 	do {
 		if (skb->sp->len == XFRM_MAX_DEPTH) {
+			secpath_reset(skb);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
 			goto drop;
 		}
 
 		x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
 		if (x == NULL) {
+			secpath_reset(skb);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
 			xfrm_audit_state_notfound(skb, family, spi, seq);
 			goto drop;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
new file mode 100644
index 000000000000..31cb1c7e3881
--- /dev/null
+++ b/net/xfrm/xfrm_interface.c
@@ -0,0 +1,972 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	XFRM virtual interface
+ *
+ *	Copyright (C) 2018 secunet Security Networks AG
+ *
+ *	Author:
+ *	Steffen Klassert <steffen.klassert@secunet.com>
+ */
+
+#include <linux/module.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/icmp.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_link.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <linux/etherdevice.h>
+
+static int xfrmi_dev_init(struct net_device *dev);
+static void xfrmi_dev_setup(struct net_device *dev);
+static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
+static unsigned int xfrmi_net_id __read_mostly;
+
+struct xfrmi_net {
+	/* lists for storing interfaces in use */
+	struct xfrm_if __rcu *xfrmi[1];
+};
+
+#define for_each_xfrmi_rcu(start, xi) \
+	for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
+
+static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
+{
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+	struct xfrm_if *xi;
+
+	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+		if (x->if_id == xi->p.if_id &&
+		    (xi->dev->flags & IFF_UP))
+			return xi;
+	}
+
+	return NULL;
+}
+
+static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+{
+	struct xfrmi_net *xfrmn;
+	int ifindex;
+	struct xfrm_if *xi;
+
+	if (!skb->dev)
+		return NULL;
+
+	xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
+	ifindex = skb->dev->ifindex;
+
+	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
+		if (ifindex == xi->dev->ifindex &&
+			(xi->dev->flags & IFF_UP))
+				return xi;
+	}
+
+	return NULL;
+}
+
+static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+{
+	struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0];
+
+	rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
+	rcu_assign_pointer(*xip, xi);
+}
+
+static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
+{
+	struct xfrm_if __rcu **xip;
+	struct xfrm_if *iter;
+
+	for (xip = &xfrmn->xfrmi[0];
+	     (iter = rtnl_dereference(*xip)) != NULL;
+	     xip = &iter->next) {
+		if (xi == iter) {
+			rcu_assign_pointer(*xip, xi->next);
+			break;
+		}
+	}
+}
+
+static void xfrmi_dev_free(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+}
+
+static int xfrmi_create2(struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+	int err;
+
+	dev->rtnl_link_ops = &xfrmi_link_ops;
+	err = register_netdevice(dev);
+	if (err < 0)
+		goto out;
+
+	strcpy(xi->p.name, dev->name);
+
+	dev_hold(dev);
+	xfrmi_link(xfrmn, xi);
+
+	return 0;
+
+out:
+	return err;
+}
+
+static struct xfrm_if *xfrmi_create(struct net *net, struct xfrm_if_parms *p)
+{
+	struct net_device *dev;
+	struct xfrm_if *xi;
+	char name[IFNAMSIZ];
+	int err;
+
+	if (p->name[0])
+		strlcpy(name, p->name, IFNAMSIZ);
+	else
+		goto failed;
+
+	dev = alloc_netdev(sizeof(*xi), name, NET_NAME_UNKNOWN, xfrmi_dev_setup);
+	if (!dev)
+		goto failed;
+
+	dev_net_set(dev, net);
+
+	xi = netdev_priv(dev);
+	xi->p = *p;
+	xi->net = net;
+	xi->dev = dev;
+	xi->phydev = dev_get_by_index(net, p->link);
+	if (!xi->phydev)
+		goto failed_free;
+
+	err = xfrmi_create2(dev);
+	if (err < 0)
+		goto failed_dev_put;
+
+	return xi;
+
+failed_dev_put:
+	dev_put(xi->phydev);
+failed_free:
+	free_netdev(dev);
+failed:
+	return NULL;
+}
+
+static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p,
+				   int create)
+{
+	struct xfrm_if __rcu **xip;
+	struct xfrm_if *xi;
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+
+	for (xip = &xfrmn->xfrmi[0];
+	     (xi = rtnl_dereference(*xip)) != NULL;
+	     xip = &xi->next) {
+		if (xi->p.if_id == p->if_id) {
+			if (create)
+				return NULL;
+
+			return xi;
+		}
+	}
+	if (!create)
+		return NULL;
+	return xfrmi_create(net, p);
+}
+
+static void xfrmi_dev_uninit(struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
+
+	xfrmi_unlink(xfrmn, xi);
+	dev_put(xi->phydev);
+	dev_put(dev);
+}
+
+static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
+{
+	skb->tstamp = 0;
+	skb->pkt_type = PACKET_HOST;
+	skb->skb_iif = 0;
+	skb->ignore_df = 0;
+	skb_dst_drop(skb);
+	nf_reset(skb);
+	nf_reset_trace(skb);
+
+	if (!xnet)
+		return;
+
+	ipvs_reset(skb);
+	secpath_reset(skb);
+	skb_orphan(skb);
+	skb->mark = 0;
+}
+
+static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
+{
+	struct pcpu_sw_netstats *tstats;
+	struct xfrm_mode *inner_mode;
+	struct net_device *dev;
+	struct xfrm_state *x;
+	struct xfrm_if *xi;
+	bool xnet;
+
+	if (err && !skb->sp)
+		return 0;
+
+	x = xfrm_input_state(skb);
+
+	xi = xfrmi_lookup(xs_net(x), x);
+	if (!xi)
+		return 1;
+
+	dev = xi->dev;
+	skb->dev = dev;
+
+	if (err) {
+		dev->stats.rx_errors++;
+		dev->stats.rx_dropped++;
+
+		return 0;
+	}
+
+	xnet = !net_eq(xi->net, dev_net(skb->dev));
+
+	if (xnet) {
+		inner_mode = x->inner_mode;
+
+		if (x->sel.family == AF_UNSPEC) {
+			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+			if (inner_mode == NULL) {
+				XFRM_INC_STATS(dev_net(skb->dev),
+					       LINUX_MIB_XFRMINSTATEMODEERROR);
+				return -EINVAL;
+			}
+		}
+
+		if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
+				       inner_mode->afinfo->family))
+			return -EPERM;
+	}
+
+	xfrmi_scrub_packet(skb, xnet);
+
+	tstats = this_cpu_ptr(dev->tstats);
+
+	u64_stats_update_begin(&tstats->syncp);
+	tstats->rx_packets++;
+	tstats->rx_bytes += skb->len;
+	u64_stats_update_end(&tstats->syncp);
+
+	return 0;
+}
+
+static int
+xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net_device_stats *stats = &xi->dev->stats;
+	struct dst_entry *dst = skb_dst(skb);
+	unsigned int length = skb->len;
+	struct net_device *tdev;
+	struct xfrm_state *x;
+	int err = -1;
+	int mtu;
+
+	if (!dst)
+		goto tx_err_link_failure;
+
+	fl->flowi_xfrm.if_id = xi->p.if_id;
+
+	dst_hold(dst);
+	dst = xfrm_lookup(xi->net, dst, fl, NULL, 0);
+	if (IS_ERR(dst)) {
+		err = PTR_ERR(dst);
+		dst = NULL;
+		goto tx_err_link_failure;
+	}
+
+	x = dst->xfrm;
+	if (!x)
+		goto tx_err_link_failure;
+
+	if (x->if_id != xi->p.if_id)
+		goto tx_err_link_failure;
+
+	tdev = dst->dev;
+
+	if (tdev == dev) {
+		stats->collisions++;
+		net_warn_ratelimited("%s: Local routing loop detected!\n",
+				     xi->p.name);
+		goto tx_err_dst_release;
+	}
+
+	mtu = dst_mtu(dst);
+	if (!skb->ignore_df && skb->len > mtu) {
+		skb_dst_update_pmtu(skb, mtu);
+
+		if (skb->protocol == htons(ETH_P_IPV6)) {
+			if (mtu < IPV6_MIN_MTU)
+				mtu = IPV6_MIN_MTU;
+
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		} else {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		}
+
+		dst_release(dst);
+		return -EMSGSIZE;
+	}
+
+	xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
+	skb_dst_set(skb, dst);
+	skb->dev = tdev;
+
+	err = dst_output(xi->net, skb->sk, skb);
+	if (net_xmit_eval(err) == 0) {
+		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->tx_bytes += length;
+		tstats->tx_packets++;
+		u64_stats_update_end(&tstats->syncp);
+	} else {
+		stats->tx_errors++;
+		stats->tx_aborted_errors++;
+	}
+
+	return 0;
+tx_err_link_failure:
+	stats->tx_carrier_errors++;
+	dst_link_failure(skb);
+tx_err_dst_release:
+	dst_release(dst);
+	return err;
+}
+
+static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net_device_stats *stats = &xi->dev->stats;
+	struct flowi fl;
+	int ret;
+
+	memset(&fl, 0, sizeof(fl));
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IPV6):
+		xfrm_decode_session(skb, &fl, AF_INET6);
+		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+		break;
+	case htons(ETH_P_IP):
+		xfrm_decode_session(skb, &fl, AF_INET);
+		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+		break;
+	default:
+		goto tx_err;
+	}
+
+	fl.flowi_oif = xi->phydev->ifindex;
+
+	ret = xfrmi_xmit2(skb, dev, &fl);
+	if (ret < 0)
+		goto tx_err;
+
+	return NETDEV_TX_OK;
+
+tx_err:
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int xfrmi4_err(struct sk_buff *skb, u32 info)
+{
+	const struct iphdr *iph = (const struct iphdr *)skb->data;
+	struct net *net = dev_net(skb->dev);
+	int protocol = iph->protocol;
+	struct ip_comp_hdr *ipch;
+	struct ip_esp_hdr *esph;
+	struct ip_auth_hdr *ah ;
+	struct xfrm_state *x;
+	struct xfrm_if *xi;
+	__be32 spi;
+
+	switch (protocol) {
+	case IPPROTO_ESP:
+		esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
+		spi = esph->spi;
+		break;
+	case IPPROTO_AH:
+		ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
+		spi = ah->spi;
+		break;
+	case IPPROTO_COMP:
+		ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+		spi = htonl(ntohs(ipch->cpi));
+		break;
+	default:
+		return 0;
+	}
+
+	switch (icmp_hdr(skb)->type) {
+	case ICMP_DEST_UNREACH:
+		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+			return 0;
+	case ICMP_REDIRECT:
+		break;
+	default:
+		return 0;
+	}
+
+	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+			      spi, protocol, AF_INET);
+	if (!x)
+		return 0;
+
+	xi = xfrmi_lookup(net, x);
+	if (!xi) {
+		xfrm_state_put(x);
+		return -1;
+	}
+
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
+		ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0);
+	else
+		ipv4_redirect(skb, net, 0, 0, protocol, 0);
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		    u8 type, u8 code, int offset, __be32 info)
+{
+	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+	struct net *net = dev_net(skb->dev);
+	int protocol = iph->nexthdr;
+	struct ip_comp_hdr *ipch;
+	struct ip_esp_hdr *esph;
+	struct ip_auth_hdr *ah;
+	struct xfrm_state *x;
+	struct xfrm_if *xi;
+	__be32 spi;
+
+	switch (protocol) {
+	case IPPROTO_ESP:
+		esph = (struct ip_esp_hdr *)(skb->data + offset);
+		spi = esph->spi;
+		break;
+	case IPPROTO_AH:
+		ah = (struct ip_auth_hdr *)(skb->data + offset);
+		spi = ah->spi;
+		break;
+	case IPPROTO_COMP:
+		ipch = (struct ip_comp_hdr *)(skb->data + offset);
+		spi = htonl(ntohs(ipch->cpi));
+		break;
+	default:
+		return 0;
+	}
+
+	if (type != ICMPV6_PKT_TOOBIG &&
+	    type != NDISC_REDIRECT)
+		return 0;
+
+	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+			      spi, protocol, AF_INET6);
+	if (!x)
+		return 0;
+
+	xi = xfrmi_lookup(net, x);
+	if (!xi) {
+		xfrm_state_put(x);
+		return -1;
+	}
+
+	if (type == NDISC_REDIRECT)
+		ip6_redirect(skb, net, skb->dev->ifindex, 0,
+			     sock_net_uid(net, NULL));
+	else
+		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
+	xfrm_state_put(x);
+
+	return 0;
+}
+
+static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
+{
+	if (xi->p.link != p->link)
+		return -EINVAL;
+
+	xi->p.if_id = p->if_id;
+
+	return 0;
+}
+
+static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
+{
+	struct net *net = dev_net(xi->dev);
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+	int err;
+
+	xfrmi_unlink(xfrmn, xi);
+	synchronize_net();
+	err = xfrmi_change(xi, p);
+	xfrmi_link(xfrmn, xi);
+	netdev_state_change(xi->dev);
+	return err;
+}
+
+static void xfrmi_get_stats64(struct net_device *dev,
+			       struct rtnl_link_stats64 *s)
+{
+	int cpu;
+
+	if (!dev->tstats)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct pcpu_sw_netstats *stats;
+		struct pcpu_sw_netstats tmp;
+		int start;
+
+		stats = per_cpu_ptr(dev->tstats, cpu);
+		do {
+			start = u64_stats_fetch_begin_irq(&stats->syncp);
+			tmp.rx_packets = stats->rx_packets;
+			tmp.rx_bytes   = stats->rx_bytes;
+			tmp.tx_packets = stats->tx_packets;
+			tmp.tx_bytes   = stats->tx_bytes;
+		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+		s->rx_packets += tmp.rx_packets;
+		s->rx_bytes   += tmp.rx_bytes;
+		s->tx_packets += tmp.tx_packets;
+		s->tx_bytes   += tmp.tx_bytes;
+	}
+
+	s->rx_dropped = dev->stats.rx_dropped;
+	s->tx_dropped = dev->stats.tx_dropped;
+}
+
+static int xfrmi_get_iflink(const struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+
+	return xi->phydev->ifindex;
+}
+
+
+static const struct net_device_ops xfrmi_netdev_ops = {
+	.ndo_init	= xfrmi_dev_init,
+	.ndo_uninit	= xfrmi_dev_uninit,
+	.ndo_start_xmit = xfrmi_xmit,
+	.ndo_get_stats64 = xfrmi_get_stats64,
+	.ndo_get_iflink = xfrmi_get_iflink,
+};
+
+static void xfrmi_dev_setup(struct net_device *dev)
+{
+	dev->netdev_ops 	= &xfrmi_netdev_ops;
+	dev->type		= ARPHRD_NONE;
+	dev->hard_header_len 	= ETH_HLEN;
+	dev->min_header_len	= ETH_HLEN;
+	dev->mtu		= ETH_DATA_LEN;
+	dev->min_mtu		= ETH_MIN_MTU;
+	dev->max_mtu		= ETH_DATA_LEN;
+	dev->addr_len		= ETH_ALEN;
+	dev->flags 		= IFF_NOARP;
+	dev->needs_free_netdev	= true;
+	dev->priv_destructor	= xfrmi_dev_free;
+	netif_keep_dst(dev);
+}
+
+static int xfrmi_dev_init(struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net_device *phydev = xi->phydev;
+	int err;
+
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	err = gro_cells_init(&xi->gro_cells, dev);
+	if (err) {
+		free_percpu(dev->tstats);
+		return err;
+	}
+
+	dev->features |= NETIF_F_LLTX;
+
+	dev->needed_headroom = phydev->needed_headroom;
+	dev->needed_tailroom = phydev->needed_tailroom;
+
+	if (is_zero_ether_addr(dev->dev_addr))
+		eth_hw_addr_inherit(dev, phydev);
+	if (is_zero_ether_addr(dev->broadcast))
+		memcpy(dev->broadcast, phydev->broadcast, dev->addr_len);
+
+	return 0;
+}
+
+static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
+			 struct netlink_ext_ack *extack)
+{
+	return 0;
+}
+
+static void xfrmi_netlink_parms(struct nlattr *data[],
+			       struct xfrm_if_parms *parms)
+{
+	memset(parms, 0, sizeof(*parms));
+
+	if (!data)
+		return;
+
+	if (data[IFLA_XFRM_LINK])
+		parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
+
+	if (data[IFLA_XFRM_IF_ID])
+		parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
+}
+
+static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
+			struct nlattr *tb[], struct nlattr *data[],
+			struct netlink_ext_ack *extack)
+{
+	struct net *net = dev_net(dev);
+	struct xfrm_if_parms *p;
+	struct xfrm_if *xi;
+
+	xi = netdev_priv(dev);
+	p = &xi->p;
+
+	xfrmi_netlink_parms(data, p);
+
+	if (!tb[IFLA_IFNAME])
+		return -EINVAL;
+
+	nla_strlcpy(p->name, tb[IFLA_IFNAME], IFNAMSIZ);
+
+	if (!xfrmi_locate(net, p, 1))
+		return -EEXIST;
+
+	return 0;
+}
+
+static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
+{
+	unregister_netdevice_queue(dev, head);
+}
+
+static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
+			   struct nlattr *data[],
+			   struct netlink_ext_ack *extack)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+
+	xfrmi_netlink_parms(data, &xi->p);
+
+	xi = xfrmi_locate(net, &xi->p, 0);
+
+	if (xi) {
+		if (xi->dev != dev)
+			return -EEXIST;
+	} else
+		xi = netdev_priv(dev);
+
+	return xfrmi_update(xi, &xi->p);
+}
+
+static size_t xfrmi_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_XFRM_LINK */
+		nla_total_size(4) +
+		/* IFLA_XFRM_IF_ID */
+		nla_total_size(4) +
+		0;
+}
+
+static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+	struct xfrm_if_parms *parm = &xi->p;
+
+	if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
+	    nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+struct net *xfrmi_get_link_net(const struct net_device *dev)
+{
+	struct xfrm_if *xi = netdev_priv(dev);
+
+	return dev_net(xi->phydev);
+}
+
+static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
+	[IFLA_XFRM_LINK]	= { .type = NLA_U32 },
+	[IFLA_XFRM_IF_ID]	= { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
+	.kind		= "xfrm",
+	.maxtype	= IFLA_XFRM_MAX,
+	.policy		= xfrmi_policy,
+	.priv_size	= sizeof(struct xfrm_if),
+	.setup		= xfrmi_dev_setup,
+	.validate	= xfrmi_validate,
+	.newlink	= xfrmi_newlink,
+	.dellink	= xfrmi_dellink,
+	.changelink	= xfrmi_changelink,
+	.get_size	= xfrmi_get_size,
+	.fill_info	= xfrmi_fill_info,
+	.get_link_net	= xfrmi_get_link_net,
+};
+
+static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn)
+{
+	struct xfrm_if *xi;
+	LIST_HEAD(list);
+
+	xi = rtnl_dereference(xfrmn->xfrmi[0]);
+	if (!xi)
+		return;
+
+	unregister_netdevice_queue(xi->dev, &list);
+	unregister_netdevice_many(&list);
+}
+
+static int __net_init xfrmi_init_net(struct net *net)
+{
+	return 0;
+}
+
+static void __net_exit xfrmi_exit_net(struct net *net)
+{
+	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+
+	rtnl_lock();
+	xfrmi_destroy_interfaces(xfrmn);
+	rtnl_unlock();
+}
+
+static struct pernet_operations xfrmi_net_ops = {
+	.init = xfrmi_init_net,
+	.exit = xfrmi_exit_net,
+	.id   = &xfrmi_net_id,
+	.size = sizeof(struct xfrmi_net),
+};
+
+static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
+	.handler	=	xfrm6_rcv,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi6_err,
+	.priority	=	10,
+};
+
+static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
+	.handler	=	xfrm6_rcv,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi6_err,
+	.priority	=	10,
+};
+
+static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
+	.handler	=	xfrm6_rcv,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi6_err,
+	.priority	=	10,
+};
+
+static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
+	.handler	=	xfrm4_rcv,
+	.input_handler	=	xfrm_input,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi4_err,
+	.priority	=	10,
+};
+
+static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
+	.handler	=	xfrm4_rcv,
+	.input_handler	=	xfrm_input,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi4_err,
+	.priority	=	10,
+};
+
+static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
+	.handler	=	xfrm4_rcv,
+	.input_handler	=	xfrm_input,
+	.cb_handler	=	xfrmi_rcv_cb,
+	.err_handler	=	xfrmi4_err,
+	.priority	=	10,
+};
+
+static int __init xfrmi4_init(void)
+{
+	int err;
+
+	err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
+	if (err < 0)
+		goto xfrm_proto_esp_failed;
+	err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
+	if (err < 0)
+		goto xfrm_proto_ah_failed;
+	err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+	if (err < 0)
+		goto xfrm_proto_comp_failed;
+
+	return 0;
+
+xfrm_proto_comp_failed:
+	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
+	return err;
+}
+
+static void xfrmi4_fini(void)
+{
+	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
+	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
+	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
+}
+
+static int __init xfrmi6_init(void)
+{
+	int err;
+
+	err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
+	if (err < 0)
+		goto xfrm_proto_esp_failed;
+	err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
+	if (err < 0)
+		goto xfrm_proto_ah_failed;
+	err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+	if (err < 0)
+		goto xfrm_proto_comp_failed;
+
+	return 0;
+
+xfrm_proto_comp_failed:
+	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+xfrm_proto_ah_failed:
+	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+xfrm_proto_esp_failed:
+	return err;
+}
+
+static void xfrmi6_fini(void)
+{
+	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
+	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
+	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
+}
+
+static const struct xfrm_if_cb xfrm_if_cb = {
+	.decode_session =	xfrmi_decode_session,
+};
+
+static int __init xfrmi_init(void)
+{
+	const char *msg;
+	int err;
+
+	pr_info("IPsec XFRM device driver\n");
+
+	msg = "tunnel device";
+	err = register_pernet_device(&xfrmi_net_ops);
+	if (err < 0)
+		goto pernet_dev_failed;
+
+	msg = "xfrm4 protocols";
+	err = xfrmi4_init();
+	if (err < 0)
+		goto xfrmi4_failed;
+
+	msg = "xfrm6 protocols";
+	err = xfrmi6_init();
+	if (err < 0)
+		goto xfrmi6_failed;
+
+
+	msg = "netlink interface";
+	err = rtnl_link_register(&xfrmi_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
+
+	xfrm_if_register_cb(&xfrm_if_cb);
+
+	return err;
+
+rtnl_link_failed:
+	xfrmi6_fini();
+xfrmi6_failed:
+	xfrmi4_fini();
+xfrmi4_failed:
+	unregister_pernet_device(&xfrmi_net_ops);
+pernet_dev_failed:
+	pr_err("xfrmi init: failed to register %s\n", msg);
+	return err;
+}
+
+static void __exit xfrmi_fini(void)
+{
+	xfrm_if_unregister_cb();
+	rtnl_link_unregister(&xfrmi_link_ops);
+	xfrmi4_fini();
+	xfrmi6_fini();
+	unregister_pernet_device(&xfrmi_net_ops);
+}
+
+module_init(xfrmi_init);
+module_exit(xfrmi_fini);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("xfrm");
+MODULE_ALIAS_NETDEV("xfrm0");
+MODULE_AUTHOR("Steffen Klassert");
+MODULE_DESCRIPTION("XFRM virtual interface");
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index fc0c69312b2c..d960ea6657b5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -47,6 +47,9 @@ struct xfrm_flo {
 
 static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
 static struct work_struct *xfrm_pcpu_work __read_mostly;
+static DEFINE_SPINLOCK(xfrm_if_cb_lock);
+static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
+
 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
 static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
 						__read_mostly;
@@ -119,6 +122,12 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa
 	return afinfo;
 }
 
+/* Called with rcu_read_lock(). */
+static const struct xfrm_if_cb *xfrm_if_get_cb(void)
+{
+	return rcu_dereference(xfrm_if_cb);
+}
+
 struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif,
 				    const xfrm_address_t *saddr,
 				    const xfrm_address_t *daddr,
@@ -2083,6 +2092,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 
 	if (IS_ERR(xdst)) {
 		err = PTR_ERR(xdst);
+		if (err == -EREMOTE) {
+			xfrm_pols_put(pols, num_pols);
+			return NULL;
+		}
+
 		if (err != -EAGAIN)
 			goto error;
 		goto make_dummy_bundle;
@@ -2176,6 +2190,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			if (IS_ERR(xdst)) {
 				xfrm_pols_put(pols, num_pols);
 				err = PTR_ERR(xdst);
+				if (err == -EREMOTE)
+					goto nopol;
+
 				goto dropdst;
 			} else if (xdst == NULL) {
 				num_xfrms = 0;
@@ -2368,12 +2385,20 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
 			  unsigned int family, int reverse)
 {
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	const struct xfrm_if_cb *ifcb = xfrm_if_get_cb();
+	struct xfrm_if *xi;
 	int err;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl, reverse);
+	if (ifcb) {
+		xi = ifcb->decode_session(skb);
+		if (xi)
+			fl->flowi_xfrm.if_id = xi->p.if_id;
+	}
+
 	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
 	rcu_read_unlock();
 	return err;
@@ -2828,6 +2853,21 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
 
+void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb)
+{
+	spin_lock(&xfrm_if_cb_lock);
+	rcu_assign_pointer(xfrm_if_cb, ifcb);
+	spin_unlock(&xfrm_if_cb_lock);
+}
+EXPORT_SYMBOL(xfrm_if_register_cb);
+
+void xfrm_if_unregister_cb(void)
+{
+	RCU_INIT_POINTER(xfrm_if_cb, NULL);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL(xfrm_if_unregister_cb);
+
 #ifdef CONFIG_XFRM_STATISTICS
 static int __net_init xfrm_statistics_init(struct net *net)
 {
@@ -3008,6 +3048,9 @@ void __init xfrm_init(void)
 	xfrm_dev_init();
 	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
+
+	RCU_INIT_POINTER(xfrm_if_cb, NULL);
+	synchronize_rcu();
 }
 
 #ifdef CONFIG_AUDITSYSCALL
-- 
cgit v1.2.3


From 63a67a926e214dac94e29147c0f3d11499f655a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 23 Jun 2018 17:16:44 -0400
Subject: kill dentry_update_name_case()

the last user is gone

Spotted-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 27 ---------------------------
 include/linux/dcache.h |  2 --
 2 files changed, 29 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..d9323af88ed0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2676,33 +2676,6 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
 }
 EXPORT_SYMBOL(d_exact_alias);
 
-/**
- * dentry_update_name_case - update case insensitive dentry with a new name
- * @dentry: dentry to be updated
- * @name: new name
- *
- * Update a case insensitive dentry with new case of name.
- *
- * dentry must have been returned by d_lookup with name @name. Old and new
- * name lengths must match (ie. no d_compare which allows mismatched name
- * lengths).
- *
- * Parent inode i_mutex must be held over d_lookup and into this call (to
- * keep renames and concurrent inserts, and readdir(2) away).
- */
-void dentry_update_name_case(struct dentry *dentry, const struct qstr *name)
-{
-	BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
-	BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
-
-	spin_lock(&dentry->d_lock);
-	write_seqcount_begin(&dentry->d_seq);
-	memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
-	write_seqcount_end(&dentry->d_seq);
-	spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(dentry_update_name_case);
-
 static void swap_names(struct dentry *dentry, struct dentry *target)
 {
 	if (unlikely(dname_external(target))) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..cee70bf207fc 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -271,8 +271,6 @@ extern void d_rehash(struct dentry *);
  
 extern void d_add(struct dentry *, struct inode *);
 
-extern void dentry_update_name_case(struct dentry *, const struct qstr *);
-
 /* used for rename() and baskets */
 extern void d_move(struct dentry *, struct dentry *);
 extern void d_exchange(struct dentry *, struct dentry *);
-- 
cgit v1.2.3


From d0dd63a8aee1ef89f2e48e554b796b9f9e4fcadb Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 16 Jun 2018 22:11:42 -0700
Subject: time: Introduce struct __kernel_itimerspec

struct itimerspec is not y2038-safe.

Introduce a new struct __kernel_itimerspec based on the kernel internal
y2038-safe struct itimerspec64.

The definition of struct __kernel_itimerspec includes two struct
__kernel_timespec.

Since struct __kernel_timespec has the same representation in native and
compat modes, so does struct __kernel_itimerspec. This helps have a common
entry point for syscalls using struct __kernel_itimerspec.

New y2038-safe syscalls will use this new type. Since most of the new
syscalls are just an update to the native syscalls with the type update,
place the new definition under CONFIG_64BIT_TIME. This helps architectures
that do not support the above config to keep using the old definition of
struct itimerspec.

Also change the get/put_itimerspec64 to use struct__kernel_itimerspec.
This will help 32 bit architectures to use the new syscalls when
architectures select CONFIG_64BIT_TIME.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: viro@zeniv.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: y2038@lists.linaro.org
Link: https://lkml.kernel.org/r/20180617051144.29756-2-deepa.kernel@gmail.com
---
 include/linux/time.h      | 4 ++--
 include/linux/time64.h    | 1 +
 include/uapi/linux/time.h | 7 +++++++
 kernel/time/time.c        | 4 ++--
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index aed74463592d..27d83fd2ae61 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -14,9 +14,9 @@ int get_timespec64(struct timespec64 *ts,
 int put_timespec64(const struct timespec64 *ts,
 		struct __kernel_timespec __user *uts);
 int get_itimerspec64(struct itimerspec64 *it,
-			const struct itimerspec __user *uit);
+			const struct __kernel_itimerspec __user *uit);
 int put_itimerspec64(const struct itimerspec64 *it,
-			struct itimerspec __user *uit);
+			struct __kernel_itimerspec __user *uit);
 
 extern time64_t mktime64(const unsigned int year, const unsigned int mon,
 			const unsigned int day, const unsigned int hour,
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 0a7b2f79cec7..05634afba0db 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -12,6 +12,7 @@ typedef __u64 timeu64_t;
  */
 #ifndef CONFIG_64BIT_TIME
 #define __kernel_timespec timespec
+#define __kernel_itimerspec itimerspec
 #endif
 
 #include <uapi/linux/time.h>
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index fcf936656493..6b56a2208be7 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -49,6 +49,13 @@ struct __kernel_timespec {
 };
 #endif
 
+#ifndef __kernel_itimerspec
+struct __kernel_itimerspec {
+	struct __kernel_timespec it_interval;    /* timer period */
+	struct __kernel_timespec it_value;       /* timer expiration */
+};
+#endif
+
 /*
  * legacy timeval structure, only embedded in structures that
  * traditionally used 'timeval' to pass time intervals (not absolute
diff --git a/kernel/time/time.c b/kernel/time/time.c
index b1225db61eb2..c0195225fdce 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -927,7 +927,7 @@ int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
 EXPORT_SYMBOL_GPL(compat_put_timespec64);
 
 int get_itimerspec64(struct itimerspec64 *it,
-			const struct itimerspec __user *uit)
+			const struct __kernel_itimerspec __user *uit)
 {
 	int ret;
 
@@ -942,7 +942,7 @@ int get_itimerspec64(struct itimerspec64 *it,
 EXPORT_SYMBOL_GPL(get_itimerspec64);
 
 int put_itimerspec64(const struct itimerspec64 *it,
-			struct itimerspec __user *uit)
+			struct __kernel_itimerspec __user *uit)
 {
 	int ret;
 
-- 
cgit v1.2.3


From afef05cf238cfcecdc5ea9b06f31027b13ce6214 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 16 Jun 2018 22:11:43 -0700
Subject: time: Enable get/put_compat_itimerspec64 always

This will aid in enabling the compat syscalls on 32-bit architectures later
on.

Also move compat_itimerspec and related defines to compat_time.h.  The
compat_time.h file will eventually be deleted.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: viro@zeniv.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: y2038@lists.linaro.org
Link: https://lkml.kernel.org/r/20180617051144.29756-3-deepa.kernel@gmail.com
---
 include/linux/compat.h      |  9 ---------
 include/linux/compat_time.h |  9 +++++++++
 kernel/compat.c             | 29 -----------------------------
 kernel/time/time.c          | 21 +++++++++++++++++++++
 4 files changed, 30 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index b1a5562b3215..18a13f2df14b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -109,11 +109,6 @@ typedef	compat_ulong_t		compat_aio_context_t;
 struct compat_sel_arg_struct;
 struct rusage;
 
-struct compat_itimerspec {
-	struct compat_timespec it_interval;
-	struct compat_timespec it_value;
-};
-
 struct compat_utimbuf {
 	compat_time_t		actime;
 	compat_time_t		modtime;
@@ -294,10 +289,6 @@ extern int compat_get_timespec(struct timespec *, const void __user *);
 extern int compat_put_timespec(const struct timespec *, void __user *);
 extern int compat_get_timeval(struct timeval *, const void __user *);
 extern int compat_put_timeval(const struct timeval *, void __user *);
-extern int get_compat_itimerspec64(struct itimerspec64 *its,
-			const struct compat_itimerspec __user *uits);
-extern int put_compat_itimerspec64(const struct itimerspec64 *its,
-			struct compat_itimerspec __user *uits);
 
 struct compat_iovec {
 	compat_uptr_t	iov_base;
diff --git a/include/linux/compat_time.h b/include/linux/compat_time.h
index 31f2774f1994..e70bfd1d2c3f 100644
--- a/include/linux/compat_time.h
+++ b/include/linux/compat_time.h
@@ -17,7 +17,16 @@ struct compat_timeval {
 	s32		tv_usec;
 };
 
+struct compat_itimerspec {
+	struct compat_timespec it_interval;
+	struct compat_timespec it_value;
+};
+
 extern int compat_get_timespec64(struct timespec64 *, const void __user *);
 extern int compat_put_timespec64(const struct timespec64 *, void __user *);
+extern int get_compat_itimerspec64(struct itimerspec64 *its,
+			const struct compat_itimerspec __user *uits);
+extern int put_compat_itimerspec64(const struct itimerspec64 *its,
+			struct compat_itimerspec __user *uits);
 
 #endif /* _LINUX_COMPAT_TIME_H */
diff --git a/kernel/compat.c b/kernel/compat.c
index 702aa846ddac..8e40efc2928a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -324,35 +324,6 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t,  pid, unsigned int, len,
 	return ret;
 }
 
-/* Todo: Delete these extern declarations when get/put_compat_itimerspec64()
- * are moved to kernel/time/time.c .
- */
-extern int __compat_get_timespec64(struct timespec64 *ts64,
-				   const struct compat_timespec __user *cts);
-extern int __compat_put_timespec64(const struct timespec64 *ts64,
-				   struct compat_timespec __user *cts);
-
-int get_compat_itimerspec64(struct itimerspec64 *its,
-			const struct compat_itimerspec __user *uits)
-{
-
-	if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
-	    __compat_get_timespec64(&its->it_value, &uits->it_value))
-		return -EFAULT;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
-
-int put_compat_itimerspec64(const struct itimerspec64 *its,
-			struct compat_itimerspec __user *uits)
-{
-	if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
-	    __compat_put_timespec64(&its->it_value, &uits->it_value))
-		return -EFAULT;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
-
 /*
  * We currently only need the following fields from the sigevent
  * structure: sigev_value, sigev_signo, sig_notify and (sometimes
diff --git a/kernel/time/time.c b/kernel/time/time.c
index c0195225fdce..72caf051901c 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -955,3 +955,24 @@ int put_itimerspec64(const struct itimerspec64 *it,
 	return ret;
 }
 EXPORT_SYMBOL_GPL(put_itimerspec64);
+
+int get_compat_itimerspec64(struct itimerspec64 *its,
+			const struct compat_itimerspec __user *uits)
+{
+
+	if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
+	    __compat_get_timespec64(&its->it_value, &uits->it_value))
+		return -EFAULT;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
+
+int put_compat_itimerspec64(const struct itimerspec64 *its,
+			struct compat_itimerspec __user *uits)
+{
+	if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
+	    __compat_put_timespec64(&its->it_value, &uits->it_value))
+		return -EFAULT;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
-- 
cgit v1.2.3


From 6ff84735070276d72af716e21c3214ee20d60e70 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 16 Jun 2018 22:11:44 -0700
Subject: time: Change types to new y2038 safe __kernel_itimerspec

timer_set/gettime and timerfd_set/get apis use struct itimerspec at the
user interface layer.  struct itimerspec is not y2038-safe.  Change these
interfaces to use y2038-safe struct __kernel_itimerspec instead.  This will
help define new syscalls when 32bit architectures select CONFIG_64BIT_TIME.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: arnd@arndb.de
Cc: viro@zeniv.linux.org.uk
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: y2038@lists.linaro.org
Link: https://lkml.kernel.org/r/20180617051144.29756-4-deepa.kernel@gmail.com
---
 fs/timerfd.c               |  8 ++++----
 include/linux/syscalls.h   | 10 +++++-----
 kernel/time/posix-timers.c | 12 +++++++-----
 3 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/timerfd.c b/fs/timerfd.c
index d84a2bee4f82..8bb926253f88 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -533,8 +533,8 @@ static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
 }
 
 SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
-		const struct itimerspec __user *, utmr,
-		struct itimerspec __user *, otmr)
+		const struct __kernel_itimerspec __user *, utmr,
+		struct __kernel_itimerspec __user *, otmr)
 {
 	struct itimerspec64 new, old;
 	int ret;
@@ -550,7 +550,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	return ret;
 }
 
-SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
+SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr)
 {
 	struct itimerspec64 kotmr;
 	int ret = do_timerfd_gettime(ufd, &kotmr);
@@ -559,7 +559,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
 	return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 		const struct compat_itimerspec __user *, utmr,
 		struct compat_itimerspec __user *, otmr)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 73810808cdf2..38b9ec152024 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -501,9 +501,9 @@ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
 /* fs/timerfd.c */
 asmlinkage long sys_timerfd_create(int clockid, int flags);
 asmlinkage long sys_timerfd_settime(int ufd, int flags,
-				    const struct itimerspec __user *utmr,
-				    struct itimerspec __user *otmr);
-asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
+				    const struct __kernel_itimerspec __user *utmr,
+				    struct __kernel_itimerspec __user *otmr);
+asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr);
 
 /* fs/utimes.c */
 asmlinkage long sys_utimensat(int dfd, const char __user *filename,
@@ -568,10 +568,10 @@ asmlinkage long sys_timer_create(clockid_t which_clock,
 				 struct sigevent __user *timer_event_spec,
 				 timer_t __user * created_timer_id);
 asmlinkage long sys_timer_gettime(timer_t timer_id,
-				struct itimerspec __user *setting);
+				struct __kernel_itimerspec __user *setting);
 asmlinkage long sys_timer_getoverrun(timer_t timer_id);
 asmlinkage long sys_timer_settime(timer_t timer_id, int flags,
-				const struct itimerspec __user *new_setting,
+				const struct __kernel_itimerspec __user *new_setting,
 				struct itimerspec __user *old_setting);
 asmlinkage long sys_timer_delete(timer_t timer_id);
 asmlinkage long sys_clock_settime(clockid_t which_clock,
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index fcf90a10c43a..80d59333c76e 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -743,7 +743,7 @@ static int do_timer_gettime(timer_t timer_id,  struct itimerspec64 *setting)
 
 /* Get the time remaining on a POSIX.1b interval timer. */
 SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
-		struct itimerspec __user *, setting)
+		struct __kernel_itimerspec __user *, setting)
 {
 	struct itimerspec64 cur_setting;
 
@@ -755,7 +755,8 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 	return ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
+
 COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 		       struct compat_itimerspec __user *, setting)
 {
@@ -768,6 +769,7 @@ COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 	}
 	return ret;
 }
+
 #endif
 
 /*
@@ -906,8 +908,8 @@ retry:
 
 /* Set a POSIX.1b interval timer */
 SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
-		const struct itimerspec __user *, new_setting,
-		struct itimerspec __user *, old_setting)
+		const struct __kernel_itimerspec __user *, new_setting,
+		struct __kernel_itimerspec __user *, old_setting)
 {
 	struct itimerspec64 new_spec, old_spec;
 	struct itimerspec64 *rtn = old_setting ? &old_spec : NULL;
@@ -927,7 +929,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 	return error;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 		       struct compat_itimerspec __user *, new,
 		       struct compat_itimerspec __user *, old)
-- 
cgit v1.2.3


From faef87723ace12e5f685437c1e68cbecb69a7a89 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 15 Jun 2018 13:08:51 +0200
Subject: dma-noncoherent: add a arch_sync_dma_for_cpu_all hook

The MIPS bmips platform needs a global flush when transferring ownership
back to the CPU.  Add a hook for that to the dma-noncoherent
implementation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Patchwork: https://patchwork.linux-mips.org/patch/19549/
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Tom Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: iommu@lists.linux-foundation.org
Cc: linux-mips@linux-mips.org
---
 include/linux/dma-noncoherent.h | 8 ++++++++
 kernel/dma/noncoherent.c        | 8 ++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 10b2654d549b..a0aa00cc909d 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -44,4 +44,12 @@ static inline void arch_sync_dma_for_cpu(struct device *dev,
 }
 #endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
 
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
+void arch_sync_dma_for_cpu_all(struct device *dev);
+#else
+static inline void arch_sync_dma_for_cpu_all(struct device *dev)
+{
+}
+#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */
+
 #endif /* _LINUX_DMA_NONCOHERENT_H */
diff --git a/kernel/dma/noncoherent.c b/kernel/dma/noncoherent.c
index 79e9a757387f..031fe235d958 100644
--- a/kernel/dma/noncoherent.c
+++ b/kernel/dma/noncoherent.c
@@ -49,11 +49,13 @@ static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
 	return nents;
 }
 
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
 static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
 		dma_addr_t addr, size_t size, enum dma_data_direction dir)
 {
 	arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
+	arch_sync_dma_for_cpu_all(dev);
 }
 
 static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
@@ -64,6 +66,7 @@ static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
 
 	for_each_sg(sgl, sg, nents, i)
 		arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+	arch_sync_dma_for_cpu_all(dev);
 }
 
 static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
@@ -89,7 +92,8 @@ const struct dma_map_ops dma_noncoherent_ops = {
 	.sync_sg_for_device	= dma_noncoherent_sync_sg_for_device,
 	.map_page		= dma_noncoherent_map_page,
 	.map_sg			= dma_noncoherent_map_sg,
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
 	.sync_single_for_cpu	= dma_noncoherent_sync_single_for_cpu,
 	.sync_sg_for_cpu	= dma_noncoherent_sync_sg_for_cpu,
 	.unmap_page		= dma_noncoherent_unmap_page,
-- 
cgit v1.2.3


From 996302c5e85650722f1e5aeaeaaac12f9f362bf8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 24 Jun 2018 00:37:44 +0900
Subject: module: replace VMLINUX_SYMBOL_STR() with __stringify() or string
 literal

With the special case handling for Blackfin and Metag was removed by
commit 94e58e0ac312 ("export.h: remove code for prefixing symbols with
underscore"), VMLINUX_SYMBOL_STR() is now equivalent to __stringify().

Replace the remaining usages to prepare for the entire removal of
VMLINUX_SYMBOL_STR().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 include/linux/module.h | 4 ++--
 kernel/module.c        | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index d44df9b2c131..f807f15bebbe 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -266,7 +266,7 @@ extern int modules_disabled; /* for sysctl */
 /* Get/put a kernel symbol (calls must be symmetric) */
 void *__symbol_get(const char *symbol);
 void *__symbol_get_gpl(const char *symbol);
-#define symbol_get(x) ((typeof(&x))(__symbol_get(VMLINUX_SYMBOL_STR(x))))
+#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x))))
 
 /* modules using other modules: kdb wants to see this. */
 struct module_use {
@@ -575,7 +575,7 @@ extern void __noreturn __module_put_and_exit(struct module *mod,
 #ifdef CONFIG_MODULE_UNLOAD
 int module_refcount(struct module *mod);
 void __symbol_put(const char *symbol);
-#define symbol_put(x) __symbol_put(VMLINUX_SYMBOL_STR(x))
+#define symbol_put(x) __symbol_put(__stringify(x))
 void symbol_put_addr(void *addr);
 
 /* Sometimes we know we already have a refcount, and it's easier not
diff --git a/kernel/module.c b/kernel/module.c
index b6b3a3c58af1..ba45a84e4287 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1341,14 +1341,12 @@ static inline int check_modstruct_version(const struct load_info *info,
 	 * locking is necessary -- use preempt_disable() to placate lockdep.
 	 */
 	preempt_disable();
-	if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
-			 &crc, true, false)) {
+	if (!find_symbol("module_layout", NULL, &crc, true, false)) {
 		preempt_enable();
 		BUG();
 	}
 	preempt_enable();
-	return check_version(info, VMLINUX_SYMBOL_STR(module_layout),
-			     mod, crc);
+	return check_version(info, "module_layout", mod, crc);
 }
 
 /* First part is kernel version, which we ignore if module has crcs. */
-- 
cgit v1.2.3


From 6136f97cd2dd2b5a88d62bd73cc663a803124c09 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Thu, 21 Jun 2018 19:27:17 -0700
Subject: HID: i2c-hid: Add vddl regulator control

Some wacom w9013 devices have a vddl supply for "low valtage"
requirements. Add support in this driver to turn on this low voltage
supply. We can also drop a handful of error messages because the
regulator core is already printing an error when bulk regulators fail to
enable or disable.

Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Dmitry Torokhov <dtor@chromium.org>
Cc: Doug Anderson <dianders@chromium.org>
Acked-by: Rob Herring <robh@kernel.org>
Cc: <devicetree@vger.kernel.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 .../devicetree/bindings/input/hid-over-i2c.txt     |  3 +-
 drivers/hid/i2c-hid/i2c-hid.c                      | 48 +++++++++++-----------
 include/linux/platform_data/i2c-hid.h              |  7 ++--
 3 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/input/hid-over-i2c.txt b/Documentation/devicetree/bindings/input/hid-over-i2c.txt
index 4d3da9d91de4..89e6ab89ba38 100644
--- a/Documentation/devicetree/bindings/input/hid-over-i2c.txt
+++ b/Documentation/devicetree/bindings/input/hid-over-i2c.txt
@@ -26,7 +26,8 @@ device-specific compatible properties, which should be used in addition to the
 
 - compatible:
   * "wacom,w9013" (Wacom W9013 digitizer). Supports:
-    - vdd-supply
+    - vdd-supply (3.3V)
+    - vddl-supply (1.8V)
     - post-power-on-delay-ms
 
 - vdd-supply: phandle of the regulator that provides the supply voltage.
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index c7d6738dc524..4f532d9238fb 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -1021,21 +1021,20 @@ static int i2c_hid_probe(struct i2c_client *client,
 	/* Parse platform agnostic common properties from ACPI / device tree */
 	i2c_hid_fwnode_probe(client, &ihid->pdata);
 
-	ihid->pdata.supply = devm_regulator_get(&client->dev, "vdd");
-	if (IS_ERR(ihid->pdata.supply)) {
-		ret = PTR_ERR(ihid->pdata.supply);
-		if (ret != -EPROBE_DEFER)
-			dev_err(&client->dev, "Failed to get regulator: %d\n",
-				ret);
-		goto err;
-	}
+	ihid->pdata.supplies[0].supply = "vdd";
+	ihid->pdata.supplies[1].supply = "vddl";
+
+	ret = devm_regulator_bulk_get(&client->dev,
+				      ARRAY_SIZE(ihid->pdata.supplies),
+				      ihid->pdata.supplies);
+	if (ret)
+		return ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ihid->pdata.supplies),
+				    ihid->pdata.supplies);
+	if (ret < 0)
+		return ret;
 
-	ret = regulator_enable(ihid->pdata.supply);
-	if (ret < 0) {
-		dev_err(&client->dev, "Failed to enable regulator: %d\n",
-			ret);
-		goto err;
-	}
 	if (ihid->pdata.post_power_delay_ms)
 		msleep(ihid->pdata.post_power_delay_ms);
 
@@ -1122,9 +1121,8 @@ err_pm:
 	pm_runtime_disable(&client->dev);
 
 err_regulator:
-	regulator_disable(ihid->pdata.supply);
-
-err:
+	regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
+			       ihid->pdata.supplies);
 	i2c_hid_free_buffers(ihid);
 	return ret;
 }
@@ -1147,7 +1145,8 @@ static int i2c_hid_remove(struct i2c_client *client)
 	if (ihid->bufsize)
 		i2c_hid_free_buffers(ihid);
 
-	regulator_disable(ihid->pdata.supply);
+	regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
+			       ihid->pdata.supplies);
 
 	return 0;
 }
@@ -1198,9 +1197,8 @@ static int i2c_hid_suspend(struct device *dev)
 			hid_warn(hid, "Failed to enable irq wake: %d\n",
 				wake_status);
 	} else {
-		ret = regulator_disable(ihid->pdata.supply);
-		if (ret < 0)
-			hid_warn(hid, "Failed to disable supply: %d\n", ret);
+		regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
+				       ihid->pdata.supplies);
 	}
 
 	return 0;
@@ -1215,9 +1213,11 @@ static int i2c_hid_resume(struct device *dev)
 	int wake_status;
 
 	if (!device_may_wakeup(&client->dev)) {
-		ret = regulator_enable(ihid->pdata.supply);
-		if (ret < 0)
-			hid_warn(hid, "Failed to enable supply: %d\n", ret);
+		ret = regulator_bulk_enable(ARRAY_SIZE(ihid->pdata.supplies),
+					    ihid->pdata.supplies);
+		if (ret)
+			hid_warn(hid, "Failed to enable supplies: %d\n", ret);
+
 		if (ihid->pdata.post_power_delay_ms)
 			msleep(ihid->pdata.post_power_delay_ms);
 	} else if (ihid->irq_wake_enabled) {
diff --git a/include/linux/platform_data/i2c-hid.h b/include/linux/platform_data/i2c-hid.h
index 1fb088239d12..c628bb5e1061 100644
--- a/include/linux/platform_data/i2c-hid.h
+++ b/include/linux/platform_data/i2c-hid.h
@@ -12,14 +12,13 @@
 #ifndef __LINUX_I2C_HID_H
 #define __LINUX_I2C_HID_H
 
+#include <linux/regulator/consumer.h>
 #include <linux/types.h>
 
-struct regulator;
-
 /**
  * struct i2chid_platform_data - used by hid over i2c implementation.
  * @hid_descriptor_address: i2c register where the HID descriptor is stored.
- * @supply: regulator for powering on the device.
+ * @supplies: regulators for powering on the device.
  * @post_power_delay_ms: delay after powering on before device is usable.
  *
  * Note that it is the responsibility of the platform driver (or the acpi 5.0
@@ -35,7 +34,7 @@ struct regulator;
  */
 struct i2c_hid_platform_data {
 	u16 hid_descriptor_address;
-	struct regulator *supply;
+	struct regulator_bulk_data supplies[2];
 	int post_power_delay_ms;
 };
 
-- 
cgit v1.2.3


From 8f732850df1b2b4d8d719f7e606dfb3050e7ea11 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 31 May 2018 13:49:29 +0200
Subject: HID: core: allow concurrent registration of drivers

Detected on the Dell XPS 9365.

The laptop has 2 devices that benefit from the hid-generic auto-unbinding.
When those 2 devices are presented to the userspace, udev loads both wacom and
hid-multitouch. When this happens, the code in __hid_bus_reprobe_drivers() is
called concurrently and the second device gets reprobed twice.

An other bug in the power_supply subsystem prevent to remove the wacom driver
if it just finished its initialization, which basically kills the wacom node.

[jkosina@suse.cz: reformat changelog a bit]
Fixes c17a7476e4c4 ("HID: core: rewrite the hid-generic automatic unbind")
Cc: stable@vger.kernel.org # v4.17
Tested-by: Mario Limonciello <mario.limonciello@dell.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 5 ++++-
 include/linux/hid.h    | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 355dc7e49562..a460ec147aee 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1949,6 +1949,8 @@ static int hid_device_probe(struct device *dev)
 	}
 	hdev->io_started = false;
 
+	clear_bit(ffs(HID_STAT_REPROBED), &hdev->status);
+
 	if (!hdev->driver) {
 		id = hid_match_device(hdev, hdrv);
 		if (id == NULL) {
@@ -2212,7 +2214,8 @@ static int __hid_bus_reprobe_drivers(struct device *dev, void *data)
 	struct hid_device *hdev = to_hid_device(dev);
 
 	if (hdev->driver == hdrv &&
-	    !hdrv->match(hdev, hid_ignore_special_drivers))
+	    !hdrv->match(hdev, hid_ignore_special_drivers) &&
+	    !test_and_set_bit(ffs(HID_STAT_REPROBED), &hdev->status))
 		return device_reprobe(dev);
 
 	return 0;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 41a3d5775394..773bcb1d4044 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -511,6 +511,7 @@ struct hid_output_fifo {
 #define HID_STAT_ADDED		BIT(0)
 #define HID_STAT_PARSED		BIT(1)
 #define HID_STAT_DUP_DETECTED	BIT(2)
+#define HID_STAT_REPROBED	BIT(3)
 
 struct hid_input {
 	struct list_head list;
@@ -579,7 +580,7 @@ struct hid_device {							/* device report descriptor */
 	bool battery_avoid_query;
 #endif
 
-	unsigned int status;						/* see STAT flags above */
+	unsigned long status;						/* see STAT flags above */
 	unsigned claimed;						/* Claimed by hidinput, hiddev? */
 	unsigned quirks;						/* Various quirks the device can pull on us */
 	bool io_started;						/* If IO has started */
-- 
cgit v1.2.3


From 379cacc5e566f7197bdeb1ea3e99219d3e880c0a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 8 Jun 2018 16:59:37 -0400
Subject: USB: Report wakeup events on root-hub ports

When a USB device attached to a root-hub port sends a wakeup request
to a sleeping system, we do not report the wakeup event to the PM
core.  This is because a system resume involves waking up all
suspended USB ports as quickly as possible; without the normal
USB_RESUME_TIMEOUT delay, the host controller driver doesn't set the
USB_PORT_STAT_C_SUSPEND flag and so usb_port_resume() doesn't realize
that a wakeup request was received.

However, some environments (such as Chrome OS) want to have all wakeup
events reported so they can be ascribed to the appropriate device.  To
accommodate these environments, this patch adds a new routine to the
hub driver and a corresponding new HCD method to be used when a root
hub resumes.  The HCD method returns a bitmap of ports that have
initiated a wakeup signal but not yet completed resuming.  The hub
driver can then report to the PM core that the child devices attached
to these ports initiated a wakeup event.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Suggested-by: Anshuman Gupta <anshuman.gupta@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c  | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/hcd.h |  1 +
 2 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index fcae521df29b..fef5af7aab92 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3656,12 +3656,54 @@ static int hub_suspend(struct usb_interface *intf, pm_message_t msg)
 	return 0;
 }
 
+/* Report wakeup requests from the ports of a resuming root hub */
+static void report_wakeup_requests(struct usb_hub *hub)
+{
+	struct usb_device	*hdev = hub->hdev;
+	struct usb_device	*udev;
+	struct usb_hcd		*hcd;
+	unsigned long		resuming_ports;
+	int			i;
+
+	if (hdev->parent)
+		return;		/* Not a root hub */
+
+	hcd = bus_to_hcd(hdev->bus);
+	if (hcd->driver->get_resuming_ports) {
+
+		/*
+		 * The get_resuming_ports() method returns a bitmap (origin 0)
+		 * of ports which have started wakeup signaling but have not
+		 * yet finished resuming.  During system resume we will
+		 * resume all the enabled ports, regardless of any wakeup
+		 * signals, which means the wakeup requests would be lost.
+		 * To prevent this, report them to the PM core here.
+		 */
+		resuming_ports = hcd->driver->get_resuming_ports(hcd);
+		for (i = 0; i < hdev->maxchild; ++i) {
+			if (test_bit(i, &resuming_ports)) {
+				udev = hub->ports[i]->child;
+				if (udev)
+					pm_wakeup_event(&udev->dev, 0);
+			}
+		}
+	}
+}
+
 static int hub_resume(struct usb_interface *intf)
 {
 	struct usb_hub *hub = usb_get_intfdata(intf);
 
 	dev_dbg(&intf->dev, "%s\n", __func__);
 	hub_activate(hub, HUB_RESUME);
+
+	/*
+	 * This should be called only for system resume, not runtime resume.
+	 * We can't tell the difference here, so some wakeup requests will be
+	 * reported at the wrong time or more than once.  This shouldn't
+	 * matter much, so long as they do get reported.
+	 */
+	report_wakeup_requests(hub);
 	return 0;
 }
 
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 34a6ded6f319..97e2ddec18b1 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -322,6 +322,7 @@ struct hc_driver {
 	int	(*bus_suspend)(struct usb_hcd *);
 	int	(*bus_resume)(struct usb_hcd *);
 	int	(*start_port_reset)(struct usb_hcd *, unsigned port_num);
+	unsigned long	(*get_resuming_ports)(struct usb_hcd *);
 
 		/* force handover of high-speed port to full-speed companion */
 	void	(*relinquish_port)(struct usb_hcd *, int);
-- 
cgit v1.2.3


From e4db5b61c572475bbbcf63e3c8a2606bfccf2c9d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:26:02 +0200
Subject: xfrm: policy: remove pcpu policy cache

Kristian Evensen says:
  In a project I am involved in, we are running ipsec (Strongswan) on
  different mt7621-based routers. Each router is configured as an
  initiator and has around ~30 tunnels to different responders (running
  on misc. devices). Before the flow cache was removed (kernel 4.9), we
  got a combined throughput of around 70Mbit/s for all tunnels on one
  router. However, we recently switched to kernel 4.14 (4.14.48), and
  the total throughput is somewhere around 57Mbit/s (best-case). I.e., a
  drop of around 20%. Reverting the flow cache removal restores, as
  expected, performance levels to that of kernel 4.9.

When pcpu xdst exists, it has to be validated first before it can be
used.

A negative hit thus increases cost vs. no-cache.

As number of tunnels increases, hit rate decreases so this pcpu caching
isn't a viable strategy.

Furthermore, the xdst cache also needs to run with BH off, so when
removing this the bh disable/enable pairs can be removed too.

Kristian tested a 4.14.y backport of this change and reported
increased performance:

  In our tests, the throughput reduction has been reduced from around -20%
  to -5%. We also see that the overall throughput is independent of the
  number of tunnels, while before the throughput was reduced as the number
  of tunnels increased.

Reported-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     |   1 -
 net/xfrm/xfrm_device.c |  10 ----
 net/xfrm/xfrm_policy.c | 139 +------------------------------------------------
 net/xfrm/xfrm_state.c  |   5 +-
 4 files changed, 3 insertions(+), 152 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3fa578a6a819..a5378613a49c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -332,7 +332,6 @@ int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int fam
 void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo);
 void km_policy_notify(struct xfrm_policy *xp, int dir,
 		      const struct km_event *c);
-void xfrm_policy_cache_flush(void);
 void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 16c1230d20fa..11d56a44e9e8 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -307,12 +307,6 @@ static int xfrm_dev_register(struct net_device *dev)
 	return xfrm_api_check(dev);
 }
 
-static int xfrm_dev_unregister(struct net_device *dev)
-{
-	xfrm_policy_cache_flush();
-	return NOTIFY_DONE;
-}
-
 static int xfrm_dev_feat_change(struct net_device *dev)
 {
 	return xfrm_api_check(dev);
@@ -323,7 +317,6 @@ static int xfrm_dev_down(struct net_device *dev)
 	if (dev->features & NETIF_F_HW_ESP)
 		xfrm_dev_state_flush(dev_net(dev), dev, true);
 
-	xfrm_policy_cache_flush();
 	return NOTIFY_DONE;
 }
 
@@ -335,9 +328,6 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
 	case NETDEV_REGISTER:
 		return xfrm_dev_register(dev);
 
-	case NETDEV_UNREGISTER:
-		return xfrm_dev_unregister(dev);
-
 	case NETDEV_FEAT_CHANGE:
 		return xfrm_dev_feat_change(dev);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d960ea6657b5..ef75891450e7 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -45,8 +45,6 @@ struct xfrm_flo {
 	u8 flags;
 };
 
-static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
-static struct work_struct *xfrm_pcpu_work __read_mostly;
 static DEFINE_SPINLOCK(xfrm_if_cb_lock);
 static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
 
@@ -1732,108 +1730,6 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 
 }
 
-static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
-{
-	this_cpu_write(xfrm_last_dst, xdst);
-	if (old)
-		dst_release(&old->u.dst);
-}
-
-static void __xfrm_pcpu_work_fn(void)
-{
-	struct xfrm_dst *old;
-
-	old = this_cpu_read(xfrm_last_dst);
-	if (old && !xfrm_bundle_ok(old))
-		xfrm_last_dst_update(NULL, old);
-}
-
-static void xfrm_pcpu_work_fn(struct work_struct *work)
-{
-	local_bh_disable();
-	rcu_read_lock();
-	__xfrm_pcpu_work_fn();
-	rcu_read_unlock();
-	local_bh_enable();
-}
-
-void xfrm_policy_cache_flush(void)
-{
-	struct xfrm_dst *old;
-	bool found = false;
-	int cpu;
-
-	might_sleep();
-
-	local_bh_disable();
-	rcu_read_lock();
-	for_each_possible_cpu(cpu) {
-		old = per_cpu(xfrm_last_dst, cpu);
-		if (old && !xfrm_bundle_ok(old)) {
-			if (smp_processor_id() == cpu) {
-				__xfrm_pcpu_work_fn();
-				continue;
-			}
-			found = true;
-			break;
-		}
-	}
-
-	rcu_read_unlock();
-	local_bh_enable();
-
-	if (!found)
-		return;
-
-	get_online_cpus();
-
-	for_each_possible_cpu(cpu) {
-		bool bundle_release;
-
-		rcu_read_lock();
-		old = per_cpu(xfrm_last_dst, cpu);
-		bundle_release = old && !xfrm_bundle_ok(old);
-		rcu_read_unlock();
-
-		if (!bundle_release)
-			continue;
-
-		if (cpu_online(cpu)) {
-			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
-			continue;
-		}
-
-		rcu_read_lock();
-		old = per_cpu(xfrm_last_dst, cpu);
-		if (old && !xfrm_bundle_ok(old)) {
-			per_cpu(xfrm_last_dst, cpu) = NULL;
-			dst_release(&old->u.dst);
-		}
-		rcu_read_unlock();
-	}
-
-	put_online_cpus();
-}
-
-static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst,
-				struct xfrm_state * const xfrm[],
-				int num)
-{
-	const struct dst_entry *dst = &xdst->u.dst;
-	int i;
-
-	if (xdst->num_xfrms != num)
-		return false;
-
-	for (i = 0; i < num; i++) {
-		if (!dst || dst->xfrm != xfrm[i])
-			return false;
-		dst = xfrm_dst_child(dst);
-	}
-
-	return xfrm_bundle_ok(xdst);
-}
-
 static struct xfrm_dst *
 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 			       const struct flowi *fl, u16 family,
@@ -1842,7 +1738,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	struct net *net = xp_net(pols[0]);
 	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
 	struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
-	struct xfrm_dst *xdst, *old;
+	struct xfrm_dst *xdst;
 	struct dst_entry *dst;
 	int err;
 
@@ -1854,22 +1750,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 		return ERR_PTR(err);
 	}
 
-	xdst = this_cpu_read(xfrm_last_dst);
-	if (xdst &&
-	    xdst->u.dst.dev == dst_orig->dev &&
-	    xdst->num_pols == num_pols &&
-	    memcmp(xdst->pols, pols,
-		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
-	    xfrm_xdst_can_reuse(xdst, xfrm, err)) {
-		dst_hold(&xdst->u.dst);
-		xfrm_pols_put(pols, num_pols);
-		while (err > 0)
-			xfrm_state_put(xfrm[--err]);
-		return xdst;
-	}
-
-	old = xdst;
-
 	dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
 	if (IS_ERR(dst)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
@@ -1882,9 +1762,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
 	xdst->policy_genid = atomic_read(&pols[0]->genid);
 
-	atomic_set(&xdst->u.dst.__refcnt, 2);
-	xfrm_last_dst_update(xdst, old);
-
 	return xdst;
 }
 
@@ -2085,11 +1962,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 	if (num_xfrms <= 0)
 		goto make_dummy_bundle;
 
-	local_bh_disable();
 	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
 					      xflo->dst_orig);
-	local_bh_enable();
-
 	if (IS_ERR(xdst)) {
 		err = PTR_ERR(xdst);
 		if (err == -EREMOTE) {
@@ -2181,11 +2055,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 				goto no_transform;
 			}
 
-			local_bh_disable();
 			xdst = xfrm_resolve_and_create_bundle(
 					pols, num_pols, fl,
 					family, dst_orig);
-			local_bh_enable();
 
 			if (IS_ERR(xdst)) {
 				xfrm_pols_put(pols, num_pols);
@@ -3035,15 +2907,6 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
 
 void __init xfrm_init(void)
 {
-	int i;
-
-	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
-				       GFP_KERNEL);
-	BUG_ON(!xfrm_pcpu_work);
-
-	for (i = 0; i < NR_CPUS; i++)
-		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
-
 	register_pernet_subsys(&xfrm_net_ops);
 	xfrm_dev_init();
 	seqcount_init(&xfrm_policy_hash_generation);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3803b6813fc5..e04a510ec992 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -735,10 +735,9 @@ restart:
 	}
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
-	if (cnt) {
+	if (cnt)
 		err = 0;
-		xfrm_policy_cache_flush();
-	}
+
 	return err;
 }
 EXPORT_SYMBOL(xfrm_state_flush);
-- 
cgit v1.2.3


From fd55a281adf6c9b9723d369203fdd92e5ea62a09 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 27 Apr 2018 10:06:03 -0700
Subject: srcu: Introduce srcu_read_{un,}lock_notrace()

Joel Fernandes is using SRCU to protect from-idle tracepoints, which
requires notrace variants of srcu_read_lock() and srcu_read_unlock()
in order to avoid problems with tracepoints in lockdep.  This commit
therefore adds srcu_read_lock_notrace() and srcu_read_unlock_notrace().

[1] http://lkml.kernel.org/r/20180427042656.190746-1-joelaf@google.com

Reported-by: Joel Fernandes <joelaf@google.com>
Intermittently-reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 91494d7e8e41..3e72a291c401 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	return retval;
 }
 
+/* Used by tracing, cannot be traced and cannot invoke lockdep. */
+static inline notrace int
+srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp)
+{
+	int retval;
+
+	retval = __srcu_read_lock(sp);
+	return retval;
+}
+
 /**
  * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
  * @sp: srcu_struct in which to unregister the old reader.
@@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__srcu_read_unlock(sp, idx);
 }
 
+/* Used by tracing, cannot be traced and cannot call lockdep. */
+static inline notrace void
+srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp)
+{
+	__srcu_read_unlock(sp, idx);
+}
+
 /**
  * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
  *
-- 
cgit v1.2.3


From 90127d605f403d814f4986436871210bf8ceb335 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 9 May 2018 10:29:18 -0700
Subject: torture: Make online/offline messages appear only for verbose=2

Some bugs reproduce quickly only at high CPU-hotplug rates, so the
rcutorture TREE03 scenario now has only 200 milliseconds spacing between
CPU-hotplug operations.  At this rate, the torture-test pair of console
messages per operation becomes a bit voluminous.  This commit therefore
converts the torture-test set of "verbose" kernel-boot arguments from
bool to int, and prints the extra console messages only when verbose=2.
The default is still verbose=1.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/torture.h      |  2 +-
 kernel/locking/locktorture.c |  2 +-
 kernel/rcu/rcuperf.c         |  2 +-
 kernel/rcu/rcutorture.c      |  2 +-
 kernel/torture.c             | 12 ++++++------
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 66272862070b..a55e80817dae 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -79,7 +79,7 @@ void stutter_wait(const char *title);
 int torture_stutter_init(int s);
 
 /* Initialization and cleanup. */
-bool torture_init_begin(char *ttype, bool v);
+bool torture_init_begin(char *ttype, int v);
 void torture_init_end(void);
 bool torture_cleanup_begin(void);
 void torture_cleanup_end(void);
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8402b3349dca..4a2e13870a9b 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -57,7 +57,7 @@ torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable.");
 torture_param(int, stat_interval, 60,
 	     "Number of seconds between stats printk()s");
 torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
-torture_param(bool, verbose, true,
+torture_param(int, verbose, 1,
 	     "Enable verbose debugging printk()s");
 
 static char *torture_type = "spin_lock";
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index e232846516b3..fb8094848906 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -88,7 +88,7 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
 torture_param(bool, shutdown, !IS_ENABLED(MODULE),
 	      "Shutdown at end of performance tests.");
-torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
+torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 
 static char *perf_type = "rcu";
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 42fcb7f05fac..a5540bd831c4 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -101,7 +101,7 @@ torture_param(int, test_boost_interval, 7,
 	     "Interval between boost tests, seconds.");
 torture_param(bool, test_no_idle_hz, true,
 	     "Test support for tickless idle CPUs");
-torture_param(bool, verbose, true,
+torture_param(int, verbose, 1,
 	     "Enable verbose debugging printk()s");
 
 static char *torture_type = "rcu";
diff --git a/kernel/torture.c b/kernel/torture.c
index 3de1efbecd6a..840fd33c1cda 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -53,7 +53,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
 
 static char *torture_type;
-static bool verbose;
+static int verbose;
 
 /* Mediate rmmod and system shutdown.  Concurrent rmmod & shutdown illegal! */
 #define FULLSTOP_DONTSTOP 0	/* Normal operation. */
@@ -98,7 +98,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
 	if (!cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
 		return false;
 
-	if (verbose)
+	if (verbose > 1)
 		pr_alert("%s" TORTURE_FLAG
 			 "torture_onoff task: offlining %d\n",
 			 torture_type, cpu);
@@ -111,7 +111,7 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
 				 "torture_onoff task: offline %d failed: errno %d\n",
 				 torture_type, cpu, ret);
 	} else {
-		if (verbose)
+		if (verbose > 1)
 			pr_alert("%s" TORTURE_FLAG
 				 "torture_onoff task: offlined %d\n",
 				 torture_type, cpu);
@@ -147,7 +147,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
 	if (cpu_online(cpu) || !cpu_is_hotpluggable(cpu))
 		return false;
 
-	if (verbose)
+	if (verbose > 1)
 		pr_alert("%s" TORTURE_FLAG
 			 "torture_onoff task: onlining %d\n",
 			 torture_type, cpu);
@@ -160,7 +160,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
 				 "torture_onoff task: online %d failed: errno %d\n",
 				 torture_type, cpu, ret);
 	} else {
-		if (verbose)
+		if (verbose > 1)
 			pr_alert("%s" TORTURE_FLAG
 				 "torture_onoff task: onlined %d\n",
 				 torture_type, cpu);
@@ -647,7 +647,7 @@ static void torture_stutter_cleanup(void)
  * The runnable parameter points to a flag that controls whether or not
  * the test is currently runnable.  If there is no such flag, pass in NULL.
  */
-bool torture_init_begin(char *ttype, bool v)
+bool torture_init_begin(char *ttype, int v)
 {
 	mutex_lock(&fullstop_mutex);
 	if (torture_type != NULL) {
-- 
cgit v1.2.3


From b7403217656dcf6c51f09d0bca7a12db0de8934a Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 19 Jun 2018 10:59:14 +0300
Subject: IB: Make ib_init_ah_attr_from_wc set sgid_attr

The work completion is inspected to determine what dgid table entry was
used to receieve the packet, produces a sgid_attr that matches and sticks
it in the ah_attr.

All callers of this function are now required to release the ah_attr on
success.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/cm.c       |   3 +
 drivers/infiniband/core/user_mad.c |   1 +
 drivers/infiniband/core/verbs.c    | 110 +++++++++++++++++++------------------
 include/rdma/ib_verbs.h            |   7 +++
 4 files changed, 67 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0f39a879c91d..11b85933fb39 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1091,6 +1091,9 @@ retest:
 	wait_for_completion(&cm_id_priv->comp);
 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
 		cm_free_work(work);
+
+	rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
+	rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
 	kfree(cm_id_priv->private_data);
 	kfree(cm_id_priv);
 }
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index bb98c9e4a7fd..c34a6852d691 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent,
 		packet->mad.hdr.traffic_class = grh->traffic_class;
 		memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
 		packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
+		rdma_destroy_ah_attr(&ah_attr);
 	}
 
 	if (queue_packet(file, agent, packet))
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index b0ad739a7bd0..9a72b88fea80 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -636,16 +636,16 @@ static bool find_gid_index(const union ib_gid *gid,
 	return true;
 }
 
-static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
-				   u16 vlan_id, const union ib_gid *sgid,
-				   enum ib_gid_type gid_type,
-				   u16 *gid_index)
+static const struct ib_gid_attr *
+get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
+		       u16 vlan_id, const union ib_gid *sgid,
+		       enum ib_gid_type gid_type)
 {
 	struct find_gid_index_context context = {.vlan_id = vlan_id,
 						 .gid_type = gid_type};
 
-	return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
-				     &context, gid_index);
+	return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+				       &context);
 }
 
 int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
@@ -689,37 +689,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
 static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
 				       struct rdma_ah_attr *ah_attr)
 {
-	struct ib_gid_attr sgid_attr;
-	struct ib_global_route *grh;
+	struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
+	const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
 	int hop_limit = 0xff;
-	union ib_gid sgid;
-	int ret;
-
-	grh = rdma_ah_retrieve_grh(ah_attr);
-
-	ret = ib_get_cached_gid(device, rdma_ah_get_port_num(ah_attr),
-				grh->sgid_index, &sgid, &sgid_attr);
-	if (ret || !sgid_attr.ndev) {
-		if (!ret)
-			ret = -ENXIO;
-		return ret;
-	}
+	int ret = 0;
 
 	/* If destination is link local and source GID is RoCEv1,
 	 * IP stack is not used.
 	 */
 	if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
-	    sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
+	    sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
 		rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
 				ah_attr->roce.dmac);
-		goto done;
+		return ret;
 	}
 
-	ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+	ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
 					   ah_attr->roce.dmac,
-					   sgid_attr.ndev, &hop_limit);
-done:
-	dev_put(sgid_attr.ndev);
+					   sgid_attr->ndev, &hop_limit);
 
 	grh->hop_limit = hop_limit;
 	return ret;
@@ -734,16 +721,18 @@ done:
  * as sgid and, sgid is used as dgid because sgid contains destinations
  * GID whom to respond to.
  *
+ * On success the caller is responsible to call rdma_destroy_ah_attr on the
+ * attr.
  */
 int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
 			    const struct ib_wc *wc, const struct ib_grh *grh,
 			    struct rdma_ah_attr *ah_attr)
 {
 	u32 flow_class;
-	u16 gid_index;
 	int ret;
 	enum rdma_network_type net_type = RDMA_NETWORK_IB;
 	enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+	const struct ib_gid_attr *sgid_attr;
 	int hoplimit = 0xff;
 	union ib_gid dgid;
 	union ib_gid sgid;
@@ -774,40 +763,49 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
 		if (!(wc->wc_flags & IB_WC_GRH))
 			return -EPROTOTYPE;
 
-		ret = get_sgid_index_from_eth(device, port_num,
-					      vlan_id, &dgid,
-					      gid_type, &gid_index);
-		if (ret)
-			return ret;
+		sgid_attr = get_sgid_attr_from_eth(device, port_num,
+						   vlan_id, &dgid,
+						   gid_type);
+		if (IS_ERR(sgid_attr))
+			return PTR_ERR(sgid_attr);
 
 		flow_class = be32_to_cpu(grh->version_tclass_flow);
-		rdma_ah_set_grh(ah_attr, &sgid,
-				flow_class & 0xFFFFF,
-				(u8)gid_index, hoplimit,
-				(flow_class >> 20) & 0xFF);
-		return ib_resolve_unicast_gid_dmac(device, ah_attr);
+		rdma_move_grh_sgid_attr(ah_attr,
+					&sgid,
+					flow_class & 0xFFFFF,
+					hoplimit,
+					(flow_class >> 20) & 0xFF,
+					sgid_attr);
+
+		ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+		if (ret)
+			rdma_destroy_ah_attr(ah_attr);
+
+		return ret;
 	} else {
 		rdma_ah_set_dlid(ah_attr, wc->slid);
 		rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
 
-		if (wc->wc_flags & IB_WC_GRH) {
-			if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
-				ret = ib_find_cached_gid_by_port(device, &dgid,
-								 IB_GID_TYPE_IB,
-								 port_num, NULL,
-								 &gid_index);
-				if (ret)
-					return ret;
-			} else {
-				gid_index = 0;
-			}
+		if ((wc->wc_flags & IB_WC_GRH) == 0)
+			return 0;
 
-			flow_class = be32_to_cpu(grh->version_tclass_flow);
-			rdma_ah_set_grh(ah_attr, &sgid,
+		if (dgid.global.interface_id !=
+					cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+			sgid_attr = rdma_find_gid_by_port(
+				device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
+		} else
+			sgid_attr = rdma_get_gid_attr(device, port_num, 0);
+
+		if (IS_ERR(sgid_attr))
+			return PTR_ERR(sgid_attr);
+		flow_class = be32_to_cpu(grh->version_tclass_flow);
+		rdma_move_grh_sgid_attr(ah_attr,
+					&sgid,
 					flow_class & 0xFFFFF,
-					(u8)gid_index, hoplimit,
-					(flow_class >> 20) & 0xFF);
-		}
+					hoplimit,
+					(flow_class >> 20) & 0xFF,
+					sgid_attr);
+
 		return 0;
 	}
 }
@@ -860,13 +858,17 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
 				   const struct ib_grh *grh, u8 port_num)
 {
 	struct rdma_ah_attr ah_attr;
+	struct ib_ah *ah;
 	int ret;
 
 	ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
 	if (ret)
 		return ERR_PTR(ret);
 
-	return rdma_create_ah(pd, &ah_attr);
+	ah = rdma_create_ah(pd, &ah_attr);
+
+	rdma_destroy_ah_attr(&ah_attr);
+	return ah;
 }
 EXPORT_SYMBOL(ib_create_ah_from_wc);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 995d517c0a76..c01e9c6ed666 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3150,6 +3150,13 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
  *   ignored unless the work completion indicates that the GRH is valid.
  * @ah_attr: Returned attributes that can be used when creating an address
  *   handle for replying to the message.
+ * When ib_init_ah_attr_from_wc() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_wc().
+ *
  */
 int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
 			    const struct ib_wc *wc, const struct ib_grh *grh,
-- 
cgit v1.2.3


From 4ed13a5f2d606d2e6bcc5b8adbf08ed52e76cbb5 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 19 Jun 2018 10:59:17 +0300
Subject: IB/cm: Keep track of the sgid_attr that created the cm id

Hold reference to the the sgid_attr which is used in a cm_id until the
cm_id is destroyed.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/cma.c | 67 ++++++++++++++++++++++++++-----------------
 include/rdma/ib_addr.h        |  2 ++
 2 files changed, 43 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 367aa75ac338..de7d2501a740 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -603,46 +603,54 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
 	return ret;
 }
 
-static inline int cma_validate_port(struct ib_device *device, u8 port,
-				    enum ib_gid_type gid_type,
-				    union ib_gid *gid,
-				    struct rdma_id_private *id_priv)
+static const struct ib_gid_attr *
+cma_validate_port(struct ib_device *device, u8 port,
+		  enum ib_gid_type gid_type,
+		  union ib_gid *gid,
+		  struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	int bound_if_index = dev_addr->bound_dev_if;
+	const struct ib_gid_attr *sgid_attr;
 	int dev_type = dev_addr->dev_type;
 	struct net_device *ndev = NULL;
-	int ret = -ENODEV;
 
 	if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
-		return ret;
+		return ERR_PTR(-ENODEV);
 
 	if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
-		return ret;
+		return ERR_PTR(-ENODEV);
 
 	if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
 		ndev = dev_get_by_index(dev_addr->net, bound_if_index);
 		if (!ndev)
-			return ret;
+			return ERR_PTR(-ENODEV);
 	} else {
 		gid_type = IB_GID_TYPE_IB;
 	}
 
-	ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
-					 ndev, NULL);
-
+	sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
 	if (ndev)
 		dev_put(ndev);
+	return sgid_attr;
+}
 
-	return ret;
+static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
+			       const struct ib_gid_attr *sgid_attr)
+{
+	WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr);
+	id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
 }
 
 static int cma_acquire_dev(struct rdma_id_private *id_priv,
 			   struct rdma_id_private *listen_id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	const struct ib_gid_attr *sgid_attr;
 	struct cma_device *cma_dev;
 	union ib_gid gid, iboe_gid, *gidp;
+	enum ib_gid_type gid_type;
+	enum ib_gid_type default_type;
 	int ret = -ENODEV;
 	u8 port;
 
@@ -662,14 +670,15 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
 		port = listen_id_priv->id.port_num;
 		gidp = rdma_protocol_roce(cma_dev->device, port) ?
 		       &iboe_gid : &gid;
-
-		ret = cma_validate_port(cma_dev->device, port,
-					rdma_protocol_ib(cma_dev->device, port) ?
-					IB_GID_TYPE_IB :
-					listen_id_priv->gid_type, gidp,
-					id_priv);
-		if (!ret) {
+		gid_type = rdma_protocol_ib(cma_dev->device, port) ?
+					    IB_GID_TYPE_IB :
+					    listen_id_priv->gid_type;
+		sgid_attr = cma_validate_port(cma_dev->device, port,
+					      gid_type, gidp, id_priv);
+		if (!IS_ERR(sgid_attr)) {
 			id_priv->id.port_num = port;
+			cma_bind_sgid_attr(id_priv, sgid_attr);
+			ret = 0;
 			goto out;
 		}
 	}
@@ -683,14 +692,16 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
 
 			gidp = rdma_protocol_roce(cma_dev->device, port) ?
 			       &iboe_gid : &gid;
-
-			ret = cma_validate_port(cma_dev->device, port,
-						rdma_protocol_ib(cma_dev->device, port) ?
-						IB_GID_TYPE_IB :
-						cma_dev->default_gid_type[port - 1],
-						gidp, id_priv);
-			if (!ret) {
+			default_type = cma_dev->default_gid_type[port - 1];
+			gid_type =
+				rdma_protocol_ib(cma_dev->device, port) ?
+						 IB_GID_TYPE_IB : default_type;
+			sgid_attr = cma_validate_port(cma_dev->device, port,
+						      gid_type, gidp, id_priv);
+			if (!IS_ERR(sgid_attr)) {
 				id_priv->id.port_num = port;
+				cma_bind_sgid_attr(id_priv, sgid_attr);
+				ret = 0;
 				goto out;
 			}
 		}
@@ -1706,6 +1717,10 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 		cma_deref_id(id_priv->id.context);
 
 	kfree(id_priv->id.route.path_rec);
+
+	if (id_priv->id.route.addr.dev_addr.sgid_attr)
+		rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+
 	put_net(id_priv->id.route.addr.dev_addr.net);
 	kfree(id_priv);
 }
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index c2c8b1fdeead..715394f6d18a 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -58,6 +58,7 @@
  * @bound_dev_if:	An optional device interface index.
  * @transport:		The transport type used.
  * @net:		Network namespace containing the bound_dev_if net_dev.
+ * @sgid_attr:		GID attribute to use for identified SGID
  */
 struct rdma_dev_addr {
 	unsigned char src_dev_addr[MAX_ADDR_LEN];
@@ -67,6 +68,7 @@ struct rdma_dev_addr {
 	int bound_dev_if;
 	enum rdma_transport_type transport;
 	struct net *net;
+	const struct ib_gid_attr *sgid_attr;
 	enum rdma_network_type network;
 	int hoplimit;
 };
-- 
cgit v1.2.3


From 815d456ef21a132b60ce67908d289235e9bb896c Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 19 Jun 2018 10:59:18 +0300
Subject: IB/cm: Pass the sgid_attr through various events

Make the sgid_attr available along with path information to the event
consumer, this allows the consumer to keep using the same GID table entry
as the event is related to.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/cm.c  | 6 ++++--
 drivers/infiniband/core/cma.c | 3 +++
 include/rdma/ib_cm.h          | 3 +++
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index fe1171226c13..00c90d4f27bb 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3671,7 +3671,8 @@ error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 }
 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
 
-static void cm_format_sidr_rep_event(struct cm_work *work)
+static void cm_format_sidr_rep_event(struct cm_work *work,
+				     const struct cm_id_private *cm_id_priv)
 {
 	struct cm_sidr_rep_msg *sidr_rep_msg;
 	struct ib_cm_sidr_rep_event_param *param;
@@ -3684,6 +3685,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work)
 	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
 	param->info = &sidr_rep_msg->info;
 	param->info_len = sidr_rep_msg->info_length;
+	param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
 	work->cm_event.private_data = &sidr_rep_msg->private_data;
 }
 
@@ -3707,7 +3709,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	spin_unlock_irq(&cm_id_priv->lock);
 
-	cm_format_sidr_rep_event(work);
+	cm_format_sidr_rep_event(work, cm_id_priv);
 	cm_process_work(cm_id_priv, work);
 	return 0;
 out:
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index de7d2501a740..f0eeb43b388f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3581,6 +3581,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 	id_priv->cm_id.ib = id;
 
 	req.path = id_priv->id.route.path_rec;
+	req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
 	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
 	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -3642,6 +3643,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	if (route->num_paths == 2)
 		req.alternate_path = &route->path_rec[1];
 
+	req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
+	/* Alternate path SGID attribute currently unsupported */
 	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
 	req.qp_num = id_priv->qp_num;
 	req.qp_type = id_priv->id.qp_type;
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 7979cb04f529..c98d603c0b63 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -246,6 +246,7 @@ struct ib_cm_sidr_rep_event_param {
 	u32			qkey;
 	u32			qpn;
 	void			*info;
+	const struct ib_gid_attr *sgid_attr;
 	u8			info_len;
 };
 
@@ -365,6 +366,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
 struct ib_cm_req_param {
 	struct sa_path_rec	*primary_path;
 	struct sa_path_rec	*alternate_path;
+	const struct ib_gid_attr *ppath_sgid_attr;
 	__be64			service_id;
 	u32			qp_num;
 	enum ib_qp_type		qp_type;
@@ -566,6 +568,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
 
 struct ib_cm_sidr_req_param {
 	struct sa_path_rec	*path;
+	const struct ib_gid_attr *sgid_attr;
 	__be64			service_id;
 	int			timeout_ms;
 	const void		*private_data;
-- 
cgit v1.2.3


From 398391071f2576bbc6351bcb92c78fc432190ac3 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 19 Jun 2018 10:59:19 +0300
Subject: IB/cm: Replace members of sa_path_rec with 'struct sgid_attr *'

While processing a path record entry in CM messages the associated GID
attribute is now also supplied.

Currently for RoCE a netdevice's net namespace pointer and ifindex are
stored in path record entry. Both of these fields of the netdev can change
anytime while processing CM messages. Additionally storing net namespace
without holding reference will lead to use-after-free crash. Therefore it
is removed. Netdevice information for RoCE is instead provided via
referenced gid attribute in ib_cm requests.

Such a design leads to a situation where the kernel can crash when the net
pointer becomes invalid. However today it is always initialized to
init_net, which cannot become invalid. In order to support processing
packets in any arbitrary namespace of the received packet, it is necessary
to avoid such conditions.

This patch removes the dependency on the net pointer and ifindex; instead
it will rely on SGID attribute which contains a pointer to netdev.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/cm.c              | 78 +++++++++++++++++++------------
 drivers/infiniband/core/cma.c             |  5 +-
 drivers/infiniband/core/sa_query.c        | 71 +++++++++++++++++-----------
 drivers/infiniband/core/uverbs_marshall.c |  2 -
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  2 +-
 include/rdma/ib_sa.h                      | 49 ++-----------------
 6 files changed, 97 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 00c90d4f27bb..c2b7edf5857f 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
 	return ret;
 }
 
-static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
+static struct cm_port *
+get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
 {
 	struct cm_device *cm_dev;
 	struct cm_port *port = NULL;
 	unsigned long flags;
-	u8 p;
-	struct net_device *ndev = ib_get_ndev_from_path(path);
-
-	read_lock_irqsave(&cm.device_lock, flags);
-	list_for_each_entry(cm_dev, &cm.device_list, list) {
-		if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
-					sa_conv_pathrec_to_gid_type(path),
-					ndev, &p, NULL)) {
-			port = cm_dev->port[p - 1];
-			break;
+
+	if (attr) {
+		read_lock_irqsave(&cm.device_lock, flags);
+		list_for_each_entry(cm_dev, &cm.device_list, list) {
+			if (cm_dev->ib_device == attr->device) {
+				port = cm_dev->port[attr->port_num - 1];
+				break;
+			}
+		}
+		read_unlock_irqrestore(&cm.device_lock, flags);
+	} else {
+		/* SGID attribute can be NULL in following
+		 * conditions.
+		 * (a) Alternative path
+		 * (b) IB link layer without GRH
+		 * (c) LAP send messages
+		 */
+		read_lock_irqsave(&cm.device_lock, flags);
+		list_for_each_entry(cm_dev, &cm.device_list, list) {
+			attr = rdma_find_gid(cm_dev->ib_device,
+					     &path->sgid,
+					     sa_conv_pathrec_to_gid_type(path),
+					     NULL);
+			if (!IS_ERR(attr)) {
+				port = cm_dev->port[attr->port_num - 1];
+				break;
+			}
 		}
+		read_unlock_irqrestore(&cm.device_lock, flags);
+		if (port)
+			rdma_put_gid_attr(attr);
 	}
-	read_unlock_irqrestore(&cm.device_lock, flags);
-
-	if (ndev)
-		dev_put(ndev);
 	return port;
 }
 
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path,
+			      const struct ib_gid_attr *sgid_attr,
+			      struct cm_av *av,
 			      struct cm_id_private *cm_id_priv)
 {
 	struct rdma_ah_attr new_ah_attr;
@@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
 	struct cm_port *port;
 	int ret;
 
-	port = get_cm_port_from_path(path);
+	port = get_cm_port_from_path(path, sgid_attr);
 	if (!port)
 		return -EINVAL;
 	cm_dev = port->cm_dev;
@@ -562,7 +581,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
 	 * can be used to return an error response.
 	 */
 	ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
-					&new_ah_attr);
+					&new_ah_attr, sgid_attr);
 	if (ret)
 		return ret;
 
@@ -1420,12 +1439,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+	ret = cm_init_av_by_path(param->primary_path,
+				 param->ppath_sgid_attr, &cm_id_priv->av,
 				 cm_id_priv);
 	if (ret)
 		goto error1;
 	if (param->alternate_path) {
-		ret = cm_init_av_by_path(param->alternate_path,
+		ret = cm_init_av_by_path(param->alternate_path, NULL,
 					 &cm_id_priv->alt_av, cm_id_priv);
 		if (ret)
 			goto error1;
@@ -1980,10 +2000,6 @@ static int cm_req_handler(struct cm_work *work)
 	if (gid_attr.ndev) {
 		work->path[0].rec_type =
 			sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
-		sa_path_set_ifindex(&work->path[0],
-				    gid_attr.ndev->ifindex);
-		sa_path_set_ndev(&work->path[0],
-				 dev_net(gid_attr.ndev));
 		dev_put(gid_attr.ndev);
 	} else {
 		cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -1999,7 +2015,7 @@ static int cm_req_handler(struct cm_work *work)
 		sa_path_set_dmac(&work->path[0],
 				 cm_id_priv->av.ah_attr.roce.dmac);
 	work->path[0].hop_limit = grh->hop_limit;
-	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+	ret = cm_init_av_by_path(&work->path[0], &gid_attr, &cm_id_priv->av,
 				 cm_id_priv);
 	if (ret) {
 		int err;
@@ -2018,8 +2034,8 @@ static int cm_req_handler(struct cm_work *work)
 		goto rejected;
 	}
 	if (cm_req_has_alt_path(req_msg)) {
-		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
-					 cm_id_priv);
+		ret = cm_init_av_by_path(&work->path[1], NULL,
+					 &cm_id_priv->alt_av, cm_id_priv);
 		if (ret) {
 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
 				       &work->path[0].sgid,
@@ -3142,7 +3158,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+	ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
 				 cm_id_priv);
 	if (ret)
 		goto out;
@@ -3285,7 +3301,7 @@ static int cm_lap_handler(struct cm_work *work)
 	if (ret)
 		goto unlock;
 
-	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+	cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
 			   cm_id_priv);
 	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
 	cm_id_priv->tid = lap_msg->hdr.tid;
@@ -3487,7 +3503,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
 		return -EINVAL;
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-	ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
+	ret = cm_init_av_by_path(param->path, param->sgid_attr,
+				 &cm_id_priv->av,
+				 cm_id_priv);
 	if (ret)
 		goto out;
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f0eeb43b388f..a735ab4cddda 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2583,8 +2583,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
 	route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
 
 	route->path_rec->roce.route_resolved = true;
-	sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
-	sa_path_set_ifindex(route->path_rec, ndev->ifindex);
 	sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
 	return ndev;
 }
@@ -3510,7 +3508,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 		ib_init_ah_attr_from_path(id_priv->id.device,
 					  id_priv->id.port_num,
 					  id_priv->id.route.path_rec,
-					  &event.param.ud.ah_attr);
+					  &event.param.ud.ah_attr,
+					  rep->sgid_attr);
 		event.param.ud.qp_num = rep->qpn;
 		event.param.ud.qkey = rep->qkey;
 		event.event = RDMA_CM_EVENT_ESTABLISHED;
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index b6da4a6095f1..7005afb8a712 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1229,18 +1229,12 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
 
 static int
 roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
-			     struct sa_path_rec *rec)
+			     struct sa_path_rec *rec,
+			     const struct ib_gid_attr *attr)
 {
 	struct net_device *resolved_dev;
-	struct net_device *ndev;
 	struct net_device *idev;
-	struct rdma_dev_addr dev_addr = {
-		.bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
-				 sa_path_get_ifindex(rec) : 0),
-		.net = sa_path_get_ndev(rec) ?
-			sa_path_get_ndev(rec) :
-			&init_net
-	};
+	struct rdma_dev_addr dev_addr = {};
 	union {
 		struct sockaddr     _sockaddr;
 		struct sockaddr_in  _sockaddr_in;
@@ -1250,6 +1244,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
 
 	if (rec->roce.route_resolved)
 		return 0;
+	if (!attr || !attr->ndev)
+		return -EINVAL;
+
+	dev_addr.bound_dev_if = attr->ndev->ifindex;
+	/* TODO: Use net from the ib_gid_attr once it is added to it,
+	 * until than, limit itself to init_net.
+	 */
+	dev_addr.net = &init_net;
 
 	if (!device->get_netdev)
 		return -EOPNOTSUPP;
@@ -1278,16 +1280,13 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
 		ret = -ENODEV;
 		goto done;
 	}
-	ndev = ib_get_ndev_from_path(rec);
 	rcu_read_lock();
-	if ((ndev && ndev != resolved_dev) ||
+	if (attr->ndev != resolved_dev ||
 	    (resolved_dev != idev &&
 	     !rdma_is_upper_dev_rcu(idev, resolved_dev)))
 		ret = -EHOSTUNREACH;
 	rcu_read_unlock();
 	dev_put(resolved_dev);
-	if (ndev)
-		dev_put(ndev);
 done:
 	dev_put(idev);
 	if (!ret)
@@ -1297,19 +1296,18 @@ done:
 
 static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
 				   struct sa_path_rec *rec,
-				   struct rdma_ah_attr *ah_attr)
+				   struct rdma_ah_attr *ah_attr,
+				   const struct ib_gid_attr *gid_attr)
 {
 	enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
-	struct net_device *ndev;
-	const struct ib_gid_attr *gid_attr;
 
-	ndev = ib_get_ndev_from_path(rec);
-	gid_attr =
-		rdma_find_gid_by_port(device, &rec->sgid, type, port_num, ndev);
-	if (ndev)
-		dev_put(ndev);
-	if (IS_ERR(gid_attr))
-		return PTR_ERR(gid_attr);
+	if (!gid_attr) {
+		gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
+						 port_num, NULL);
+		if (IS_ERR(gid_attr))
+			return PTR_ERR(gid_attr);
+	} else
+		rdma_hold_gid_attr(gid_attr);
 
 	rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
 				be32_to_cpu(rec->flow_label),
@@ -1318,9 +1316,26 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
 	return 0;
 }
 
+/**
+ * ib_init_ah_attr_from_path - Initialize address handle attributes based on
+ *   an SA path record.
+ * @device: Device associated ah attributes initialization.
+ * @port_num: Port on the specified device.
+ * @rec: path record entry to use for ah attributes initialization.
+ * @ah_attr: address handle attributes to initialization from path record.
+ * @sgid_attr: SGID attribute to consider during initialization.
+ *
+ * When ib_init_ah_attr_from_path() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_destroy_ah_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_path().
+ */
 int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 			      struct sa_path_rec *rec,
-			      struct rdma_ah_attr *ah_attr)
+			      struct rdma_ah_attr *ah_attr,
+			      const struct ib_gid_attr *gid_attr)
 {
 	int ret = 0;
 
@@ -1331,7 +1346,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 	rdma_ah_set_static_rate(ah_attr, rec->rate);
 
 	if (sa_path_is_roce(rec)) {
-		ret = roce_resolve_route_from_path(device, port_num, rec);
+		ret = roce_resolve_route_from_path(device, port_num, rec,
+						   gid_attr);
 		if (ret)
 			return ret;
 
@@ -1348,7 +1364,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 	}
 
 	if (rec->hop_limit > 0 || sa_path_is_roce(rec))
-		ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+		ret = init_ah_attr_grh_fields(device, port_num,
+					      rec, ah_attr, gid_attr);
 	return ret;
 }
 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
@@ -1556,8 +1573,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 				  ARRAY_SIZE(path_rec_table),
 				  mad->data, &rec);
 			rec.rec_type = SA_PATH_REC_TYPE_IB;
-			sa_path_set_ndev(&rec, NULL);
-			sa_path_set_ifindex(&rec, 0);
 			sa_path_set_dmac_zero(&rec);
 
 			if (query->conv_pr) {
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index bb372b4713a4..b8d715c68ca4 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
 
 	/* TODO: No need to set this */
 	sa_path_set_dmac_zero(dst);
-	sa_path_set_ndev(dst, NULL);
-	sa_path_set_ifindex(dst, 0);
 }
 EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 45663f3117e5..983e52b871f3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -770,7 +770,7 @@ static void path_rec_completion(int status,
 		struct rdma_ah_attr av;
 
 		if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
-					       pathrec, &av)) {
+					       pathrec, &av, NULL)) {
 			ah = ipoib_create_ah(dev, priv->pd, &av);
 			rdma_destroy_ah_attr(&av);
 		}
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index bacb144f7780..b6ddf2a1b9d8 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -172,12 +172,7 @@ struct sa_path_rec_ib {
  */
 struct sa_path_rec_roce {
 	bool	route_resolved;
-	u8           dmac[ETH_ALEN];
-	/* ignored in IB */
-	int	     ifindex;
-	/* ignored in IB */
-	struct net  *net;
-
+	u8	dmac[ETH_ALEN];
 };
 
 struct sa_path_rec_opa {
@@ -556,13 +551,10 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
 			     enum ib_gid_type gid_type,
 			     struct rdma_ah_attr *ah_attr);
 
-/**
- * ib_init_ah_attr_from_path - Initialize address handle attributes based on
- *   an SA path record.
- */
 int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 			      struct sa_path_rec *rec,
-			      struct rdma_ah_attr *ah_attr);
+			      struct rdma_ah_attr *ah_attr,
+			      const struct ib_gid_attr *sgid_attr);
 
 /**
  * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
@@ -667,45 +659,10 @@ static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec)
 		eth_zero_addr(rec->roce.dmac);
 }
 
-static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex)
-{
-	if (sa_path_is_roce(rec))
-		rec->roce.ifindex = ifindex;
-}
-
-static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net)
-{
-	if (sa_path_is_roce(rec))
-		rec->roce.net = net;
-}
-
 static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec)
 {
 	if (sa_path_is_roce(rec))
 		return rec->roce.dmac;
 	return NULL;
 }
-
-static inline int sa_path_get_ifindex(struct sa_path_rec *rec)
-{
-	if (sa_path_is_roce(rec))
-		return rec->roce.ifindex;
-	return 0;
-}
-
-static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec)
-{
-	if (sa_path_is_roce(rec))
-		return rec->roce.net;
-	return NULL;
-}
-
-static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec)
-{
-	return sa_path_get_ndev(rec) ?
-		dev_get_by_index(sa_path_get_ndev(rec),
-				 sa_path_get_ifindex(rec))
-		: NULL;
-}
-
 #endif /* IB_SA_H */
-- 
cgit v1.2.3


From ea8c2d8f6014b74921dd5a9654a623a725d79608 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 19 Jun 2018 10:59:21 +0300
Subject: RDMA/core: Remove unused ib cache functions

Now that all users have been converted to use the version of these APIs
that returns a gid_attr pointer we can delete the old entry points.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/cache.c | 68 -----------------------------------------
 include/rdma/ib_cache.h         | 39 -----------------------
 2 files changed, 107 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index dada33c53188..357a5cb328c7 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -643,30 +643,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 	return 0;
 }
 
-static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
-			      union ib_gid *gid, struct ib_gid_attr *attr)
-{
-	struct ib_gid_table *table;
-
-	table = rdma_gid_table(ib_dev, port);
-
-	if (index < 0 || index >= table->sz)
-		return -EINVAL;
-
-	if (!is_gid_entry_valid(table->data_vec[index]))
-		return -EINVAL;
-
-	memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
-	if (attr) {
-		memcpy(attr, &table->data_vec[index]->attr,
-		       sizeof(*attr));
-		if (attr->ndev)
-			dev_hold(attr->ndev);
-	}
-
-	return 0;
-}
-
 /**
  * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
  * a valid GID entry for given search parameters. It searches for the specified
@@ -973,28 +949,6 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
 	return err;
 }
 
-int ib_get_cached_gid(struct ib_device *device,
-		      u8                port_num,
-		      int               index,
-		      union ib_gid     *gid,
-		      struct ib_gid_attr *gid_attr)
-{
-	int res;
-	unsigned long flags;
-	struct ib_gid_table *table;
-
-	if (!rdma_is_port_valid(device, port_num))
-		return -EINVAL;
-
-	table = rdma_gid_table(device, port_num);
-	read_lock_irqsave(&table->rwlock, flags);
-	res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
-	read_unlock_irqrestore(&table->rwlock, flags);
-
-	return res;
-}
-EXPORT_SYMBOL(ib_get_cached_gid);
-
 /**
  * rdma_query_gid - Read the GID content from the GID software cache
  * @device:		Device to query the GID
@@ -1102,28 +1056,6 @@ int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid,
 }
 EXPORT_SYMBOL(ib_find_cached_gid);
 
-int ib_find_gid_by_filter(struct ib_device *device,
-			  const union ib_gid *gid,
-			  u8 port_num,
-			  bool (*filter)(const union ib_gid *gid,
-					 const struct ib_gid_attr *,
-					 void *),
-			  void *context, u16 *index)
-{
-	const struct ib_gid_attr *res;
-
-	res = rdma_find_gid_by_filter(device, gid, port_num, filter,
-				      context);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
-
-	if (index)
-		*index = res->index;
-
-	rdma_put_gid_attr(res);
-	return 0;
-}
-
 int ib_get_cached_pkey(struct ib_device *device,
 		       u8                port_num,
 		       int               index,
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 059f7d894939..1108d4220276 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -37,56 +37,17 @@
 
 #include <rdma/ib_verbs.h>
 
-/**
- * ib_get_cached_gid - Returns a cached GID table entry
- * @device: The device to query.
- * @port_num: The port number of the device to query.
- * @index: The index into the cached GID table to query.
- * @gid: The GID value found at the specified index.
- * @attr: The GID attribute found at the specified index (only in RoCE).
- *   NULL means ignore (output parameter).
- *
- * ib_get_cached_gid() fetches the specified GID table entry stored in
- * the local software cache.
- */
-int ib_get_cached_gid(struct ib_device    *device,
-		      u8                   port_num,
-		      int                  index,
-		      union ib_gid        *gid,
-		      struct ib_gid_attr  *attr);
 int rdma_query_gid(struct ib_device *device, u8 port_num, int index,
 		   union ib_gid *gid);
-
-int ib_find_cached_gid(struct ib_device *device,
-		       const union ib_gid *gid,
-		       enum ib_gid_type gid_type,
-		       struct net_device *ndev,
-		       u8               *port_num,
-		       u16              *index);
 const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
 					const union ib_gid *gid,
 					enum ib_gid_type gid_type,
 					struct net_device *ndev);
-
-int ib_find_cached_gid_by_port(struct ib_device *device,
-			       const union ib_gid *gid,
-			       enum ib_gid_type gid_type,
-			       u8               port_num,
-			       struct net_device *ndev,
-			       u16              *index);
 const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev,
 						const union ib_gid *gid,
 						enum ib_gid_type gid_type,
 						u8 port,
 						struct net_device *ndev);
-
-int ib_find_gid_by_filter(struct ib_device *device,
-			  const union ib_gid *gid,
-			  u8 port_num,
-			  bool (*filter)(const union ib_gid *gid,
-					 const struct ib_gid_attr *,
-					 void *),
-			  void *context, u16 *index);
 const struct ib_gid_attr *rdma_find_gid_by_filter(
 	struct ib_device *device, const union ib_gid *gid, u8 port_num,
 	bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
-- 
cgit v1.2.3


From ca576fbbdc80d26ca46dd881944413e7dc05c21d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 24 Jun 2018 11:23:44 +0300
Subject: RDMA/verbs: Drop kernel variant of create_flow

There are no kernel users of this interface so lets drop it.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/verbs.c | 17 -----------------
 include/rdma/ib_verbs.h         |  2 --
 2 files changed, 19 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 9a72b88fea80..5ada09f708f5 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2275,23 +2275,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
 }
 EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
 
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
-			       struct ib_flow_attr *flow_attr,
-			       int domain)
-{
-	struct ib_flow *flow_id;
-	if (!qp->device->create_flow)
-		return ERR_PTR(-EOPNOTSUPP);
-
-	flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
-	if (!IS_ERR(flow_id)) {
-		atomic_inc(&qp->usecnt);
-		flow_id->qp = qp;
-	}
-	return flow_id;
-}
-EXPORT_SYMBOL(ib_create_flow);
-
 int ib_destroy_flow(struct ib_flow *flow_id)
 {
 	int err;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c01e9c6ed666..1c72ca81e5fa 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3807,8 +3807,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
  */
 int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
 
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
-			       struct ib_flow_attr *flow_attr, int domain);
 int ib_destroy_flow(struct ib_flow *flow_id);
 
 static inline int ib_check_mr_access(int flags)
-- 
cgit v1.2.3


From 1ccddc42da03876f60fe2d0a1b124c27ed5ff201 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 24 Jun 2018 11:23:45 +0300
Subject: RDMA/verbs: Drop kernel variant of destroy_flow

Following the removal of ib_create_flow(), adjust the code to get rid of
ib_destroy_flow() too.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c       |  3 ++-
 drivers/infiniband/core/uverbs_std_types.c |  9 ++++++---
 drivers/infiniband/core/verbs.c            | 12 ------------
 include/rdma/ib_verbs.h                    |  2 --
 4 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 985dc86d5610..74c5bc934822 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3631,7 +3631,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		kfree(kern_flow_attr);
 	return 0;
 err_copy:
-	ib_destroy_flow(flow_id);
+	if (!qp->device->destroy_flow(flow_id))
+		atomic_dec(&qp->usecnt);
 err_free:
 	ib_uverbs_flow_resources_free(uflow_res);
 err_free_flow_attr:
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 0df0ac9c1de3..c50d73845a2a 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -48,14 +48,17 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
 static int uverbs_free_flow(struct ib_uobject *uobject,
 			    enum rdma_remove_reason why)
 {
-	int ret;
 	struct ib_flow *flow = (struct ib_flow *)uobject->object;
 	struct ib_uflow_object *uflow =
 		container_of(uobject, struct ib_uflow_object, uobject);
+	struct ib_qp *qp = flow->qp;
+	int ret;
 
-	ret = ib_destroy_flow(flow);
-	if (!ret)
+	ret = qp->device->destroy_flow(flow);
+	if (!ret) {
+		atomic_dec(&qp->usecnt);
 		ib_uverbs_flow_resources_free(uflow->resources);
+	}
 
 	return ret;
 }
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5ada09f708f5..128d94988dd8 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2275,18 +2275,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
 }
 EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
 
-int ib_destroy_flow(struct ib_flow *flow_id)
-{
-	int err;
-	struct ib_qp *qp = flow_id->qp;
-
-	err = qp->device->destroy_flow(flow_id);
-	if (!err)
-		atomic_dec(&qp->usecnt);
-	return err;
-}
-EXPORT_SYMBOL(ib_destroy_flow);
-
 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
 		       struct ib_mr_status *mr_status)
 {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1c72ca81e5fa..8e726fff30fe 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3807,8 +3807,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
  */
 int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
 
-int ib_destroy_flow(struct ib_flow *flow_id);
-
 static inline int ib_check_mr_access(int flags)
 {
 	/*
-- 
cgit v1.2.3


From d4546c2509b1e9cd082e3682dcec98472e37ee5a Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sun, 24 Jun 2018 14:13:49 +0900
Subject: net: Convert GRO SKB handling to list_head.

Manage pending per-NAPI GRO packets via list_head.

Return an SKB pointer from the GRO receive handlers.  When GRO receive
handlers return non-NULL, it means that this SKB needs to be completed
at this time and removed from the NAPI queue.

Several operations are greatly simplified by this transformation,
especially timing out the oldest SKB in the list when gro_count
exceeds MAX_GRO_SKBS, and napi_gro_flush() which walks the queue
in reverse order.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c        | 11 ++++----
 drivers/net/vxlan.c         | 11 ++++----
 include/linux/etherdevice.h |  3 +-
 include/linux/netdevice.h   | 32 ++++++++++-----------
 include/linux/skbuff.h      |  3 +-
 include/linux/udp.h         |  4 +--
 include/net/inet_common.h   |  2 +-
 include/net/tcp.h           |  2 +-
 include/net/udp.h           |  4 +--
 include/net/udp_tunnel.h    |  6 ++--
 net/8021q/vlan.c            | 13 +++++----
 net/core/dev.c              | 68 +++++++++++++++++++--------------------------
 net/core/skbuff.c           |  4 +--
 net/ethernet/eth.c          | 12 ++++----
 net/ipv4/af_inet.c          | 12 ++++----
 net/ipv4/esp4_offload.c     |  4 +--
 net/ipv4/fou.c              | 20 ++++++-------
 net/ipv4/gre_offload.c      |  8 +++---
 net/ipv4/tcp_offload.c      | 14 +++++-----
 net/ipv4/udp_offload.c      | 13 +++++----
 net/ipv6/esp6_offload.c     |  4 +--
 net/ipv6/ip6_offload.c      | 16 +++++------
 net/ipv6/tcpv6_offload.c    |  4 +--
 net/ipv6/udp_offload.c      |  4 +--
 24 files changed, 133 insertions(+), 141 deletions(-)

(limited to 'include')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 750eaa53bf0c..3e94375b9b01 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -418,11 +418,12 @@ static int geneve_hlen(struct genevehdr *gh)
 	return sizeof(*gh) + gh->opt_len * 4;
 }
 
-static struct sk_buff **geneve_gro_receive(struct sock *sk,
-					   struct sk_buff **head,
-					   struct sk_buff *skb)
+static struct sk_buff *geneve_gro_receive(struct sock *sk,
+					  struct list_head *head,
+					  struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
 	struct genevehdr *gh, *gh2;
 	unsigned int hlen, gh_len, off_gnv;
 	const struct packet_offload *ptype;
@@ -449,7 +450,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk,
 			goto out;
 	}
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index aee0e60471f1..cc14e0cd5647 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -568,11 +568,12 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 	return vh;
 }
 
-static struct sk_buff **vxlan_gro_receive(struct sock *sk,
-					  struct sk_buff **head,
-					  struct sk_buff *skb)
+static struct sk_buff *vxlan_gro_receive(struct sock *sk,
+					 struct list_head *head,
+					 struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
 	struct vxlanhdr *vh, *vh2;
 	unsigned int hlen, off_vx;
 	int flush = 1;
@@ -607,7 +608,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
 
 	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 79563840c295..572e11bb8696 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -59,8 +59,7 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
 					   unsigned int rxqs);
 #define devm_alloc_etherdev(dev, sizeof_priv) devm_alloc_etherdev_mqs(dev, sizeof_priv, 1, 1)
 
-struct sk_buff **eth_gro_receive(struct sk_buff **head,
-				 struct sk_buff *skb);
+struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb);
 int eth_gro_complete(struct sk_buff *skb, int nhoff);
 
 /* Reserved Ethernet Addresses per IEEE 802.1Q */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec9850c7936..f176d9873910 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -322,7 +322,7 @@ struct napi_struct {
 	int			poll_owner;
 #endif
 	struct net_device	*dev;
-	struct sk_buff		*gro_list;
+	struct list_head	gro_list;
 	struct sk_buff		*skb;
 	struct hrtimer		timer;
 	struct list_head	dev_list;
@@ -2255,10 +2255,10 @@ static inline int gro_recursion_inc_test(struct sk_buff *skb)
 	return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
 }
 
-typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *);
-static inline struct sk_buff **call_gro_receive(gro_receive_t cb,
-						struct sk_buff **head,
-						struct sk_buff *skb)
+typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *);
+static inline struct sk_buff *call_gro_receive(gro_receive_t cb,
+					       struct list_head *head,
+					       struct sk_buff *skb)
 {
 	if (unlikely(gro_recursion_inc_test(skb))) {
 		NAPI_GRO_CB(skb)->flush |= 1;
@@ -2268,12 +2268,12 @@ static inline struct sk_buff **call_gro_receive(gro_receive_t cb,
 	return cb(head, skb);
 }
 
-typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **,
-					     struct sk_buff *);
-static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb,
-						   struct sock *sk,
-						   struct sk_buff **head,
-						   struct sk_buff *skb)
+typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *,
+					    struct sk_buff *);
+static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
+						  struct sock *sk,
+						  struct list_head *head,
+						  struct sk_buff *skb)
 {
 	if (unlikely(gro_recursion_inc_test(skb))) {
 		NAPI_GRO_CB(skb)->flush |= 1;
@@ -2299,8 +2299,8 @@ struct packet_type {
 struct offload_callbacks {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						netdev_features_t features);
-	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
-						 struct sk_buff *skb);
+	struct sk_buff		*(*gro_receive)(struct list_head *head,
+						struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb, int nhoff);
 };
 
@@ -2568,7 +2568,7 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
 struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_restart(struct net_device *dev);
-int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
 
 static inline unsigned int skb_gro_offset(const struct sk_buff *skb)
 {
@@ -2784,13 +2784,13 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb,
 }
 
 #ifdef CONFIG_XFRM_OFFLOAD
-static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
 {
 	if (PTR_ERR(pp) != -EINPROGRESS)
 		NAPI_GRO_CB(skb)->flush |= flush;
 }
 #else
-static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
+static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush)
 {
 	NAPI_GRO_CB(skb)->flush |= flush;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c86885954994..7ccc601b55d9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -677,7 +677,8 @@ struct sk_buff {
 				int			ip_defrag_offset;
 			};
 		};
-		struct rb_node	rbnode; /* used in netem & tcp stack */
+		struct rb_node		rbnode; /* used in netem & tcp stack */
+		struct list_head	list;
 	};
 	struct sock		*sk;
 
diff --git a/include/linux/udp.h b/include/linux/udp.h
index ca840345571b..320d49d85484 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -74,8 +74,8 @@ struct udp_sock {
 	void (*encap_destroy)(struct sock *sk);
 
 	/* GRO functions for UDP socket */
-	struct sk_buff **	(*gro_receive)(struct sock *sk,
-					       struct sk_buff **head,
+	struct sk_buff *	(*gro_receive)(struct sock *sk,
+					       struct list_head *head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sock *sk,
 						struct sk_buff *skb,
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 384b90c62c0b..3ca969cbd161 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -43,7 +43,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
 int inet_recv_error(struct sock *sk, struct msghdr *msg, int len,
 		    int *addr_len);
 
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb);
 int inet_gro_complete(struct sk_buff *skb, int nhoff);
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				 netdev_features_t features);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 822ee49ed0f9..402a88b0e8a8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1788,7 +1788,7 @@ void tcp_v4_destroy_sock(struct sock *sk);
 
 struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 				netdev_features_t features);
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
 int tcp_gro_complete(struct sk_buff *skb);
 
 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
diff --git a/include/net/udp.h b/include/net/udp.h
index b1ea8b0f5e6a..5723c6128ae4 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -170,8 +170,8 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
 typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
 				     __be16 dport);
 
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
-				 struct udphdr *uh, udp_lookup_t lookup);
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				struct udphdr *uh, udp_lookup_t lookup);
 int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
 
 struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index b95a6927c718..fe680ab6b15a 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -65,9 +65,9 @@ static inline int udp_sock_create(struct net *net,
 
 typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
 typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
-typedef struct sk_buff **(*udp_tunnel_gro_receive_t)(struct sock *sk,
-						     struct sk_buff **head,
-						     struct sk_buff *skb);
+typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
+						    struct list_head *head,
+						    struct sk_buff *skb);
 typedef int (*udp_tunnel_gro_complete_t)(struct sock *sk, struct sk_buff *skb,
 					 int nhoff);
 
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 73a65789271b..99141986efa0 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -647,13 +647,14 @@ out:
 	return err;
 }
 
-static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *vlan_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
-	struct vlan_hdr *vhdr;
-	unsigned int hlen, off_vlan;
 	const struct packet_offload *ptype;
+	unsigned int hlen, off_vlan;
+	struct sk_buff *pp = NULL;
+	struct vlan_hdr *vhdr;
+	struct sk_buff *p;
 	__be16 type;
 	int flush = 1;
 
@@ -675,7 +676,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
 
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		struct vlan_hdr *vhdr2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/core/dev.c b/net/core/dev.c
index a5aa1c7444e6..aa61b9344b46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4881,36 +4881,25 @@ out:
  */
 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
-	struct sk_buff *skb, *prev = NULL;
-
-	/* scan list and build reverse chain */
-	for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
-		skb->prev = prev;
-		prev = skb;
-	}
-
-	for (skb = prev; skb; skb = prev) {
-		skb->next = NULL;
+	struct sk_buff *skb, *p;
 
+	list_for_each_entry_safe_reverse(skb, p, &napi->gro_list, list) {
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
-
-		prev = skb->prev;
+		list_del_init(&skb->list);
 		napi_gro_complete(skb);
 		napi->gro_count--;
 	}
-
-	napi->gro_list = NULL;
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
 static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 {
-	struct sk_buff *p;
 	unsigned int maclen = skb->dev->hard_header_len;
 	u32 hash = skb_get_hash_raw(skb);
+	struct sk_buff *p;
 
-	for (p = napi->gro_list; p; p = p->next) {
+	list_for_each_entry(p, &napi->gro_list, list) {
 		unsigned long diffs;
 
 		NAPI_GRO_CB(p)->flush = 0;
@@ -4977,12 +4966,12 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *head = &offload_base;
-	int same_flow;
+	struct sk_buff *pp = NULL;
 	enum gro_result ret;
+	int same_flow;
 	int grow;
 
 	if (netif_elide_gro(skb->dev))
@@ -5039,11 +5028,8 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 
 	if (pp) {
-		struct sk_buff *nskb = *pp;
-
-		*pp = nskb->next;
-		nskb->next = NULL;
-		napi_gro_complete(nskb);
+		list_del_init(&pp->list);
+		napi_gro_complete(pp);
 		napi->gro_count--;
 	}
 
@@ -5054,15 +5040,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		goto normal;
 
 	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
-		struct sk_buff *nskb = napi->gro_list;
+		struct sk_buff *nskb;
 
-		/* locate the end of the list to select the 'oldest' flow */
-		while (nskb->next) {
-			pp = &nskb->next;
-			nskb = *pp;
-		}
-		*pp = NULL;
-		nskb->next = NULL;
+		nskb = list_last_entry(&napi->gro_list, struct sk_buff, list);
+		list_del(&nskb->list);
 		napi_gro_complete(nskb);
 	} else {
 		napi->gro_count++;
@@ -5071,8 +5052,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	NAPI_GRO_CB(skb)->age = jiffies;
 	NAPI_GRO_CB(skb)->last = skb;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-	skb->next = napi->gro_list;
-	napi->gro_list = skb;
+	list_add(&skb->list, &napi->gro_list);
 	ret = GRO_HELD;
 
 pull:
@@ -5478,7 +5458,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (n->gro_list) {
+	if (!list_empty(&n->gro_list)) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -5687,7 +5667,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (napi->gro_list && !napi_disable_pending(napi) &&
+	if (!list_empty(&napi->gro_list) && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -5701,7 +5681,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
 	napi->gro_count = 0;
-	napi->gro_list = NULL;
+	INIT_LIST_HEAD(&napi->gro_list);
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
@@ -5734,6 +5714,14 @@ void napi_disable(struct napi_struct *n)
 }
 EXPORT_SYMBOL(napi_disable);
 
+static void gro_list_free(struct list_head *head)
+{
+	struct sk_buff *skb, *p;
+
+	list_for_each_entry_safe(skb, p, head, list)
+		kfree_skb(skb);
+}
+
 /* Must be called in process context */
 void netif_napi_del(struct napi_struct *napi)
 {
@@ -5743,8 +5731,8 @@ void netif_napi_del(struct napi_struct *napi)
 	list_del_init(&napi->dev_list);
 	napi_free_frags(napi);
 
-	kfree_skb_list(napi->gro_list);
-	napi->gro_list = NULL;
+	gro_list_free(&napi->gro_list);
+	INIT_LIST_HEAD(&napi->gro_list);
 	napi->gro_count = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
@@ -5787,7 +5775,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (n->gro_list) {
+	if (!list_empty(&n->gro_list)) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c642304f178c..b1f274f22d85 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3815,14 +3815,14 @@ err:
 }
 EXPORT_SYMBOL_GPL(skb_segment);
 
-int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 {
 	struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
 	unsigned int offset = skb_gro_offset(skb);
 	unsigned int headlen = skb_headlen(skb);
 	unsigned int len = skb_gro_len(skb);
-	struct sk_buff *lp, *p = *head;
 	unsigned int delta_truesize;
+	struct sk_buff *lp;
 
 	if (unlikely(p->len + len >= 65536))
 		return -E2BIG;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index ee28440f57c5..fd8faa0dfa61 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -427,13 +427,13 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 }
 EXPORT_SYMBOL(sysfs_format_mac);
 
-struct sk_buff **eth_gro_receive(struct sk_buff **head,
-				 struct sk_buff *skb)
+struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
-	struct ethhdr *eh, *eh2;
-	unsigned int hlen, off_eth;
 	const struct packet_offload *ptype;
+	unsigned int hlen, off_eth;
+	struct sk_buff *pp = NULL;
+	struct ethhdr *eh, *eh2;
+	struct sk_buff *p;
 	__be16 type;
 	int flush = 1;
 
@@ -448,7 +448,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head,
 
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 15e125558c76..06b218a2870f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1384,12 +1384,12 @@ out:
 }
 EXPORT_SYMBOL(inet_gso_segment);
 
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
-	struct sk_buff *p;
+	struct sk_buff *pp = NULL;
 	const struct iphdr *iph;
+	struct sk_buff *p;
 	unsigned int hlen;
 	unsigned int off;
 	unsigned int id;
@@ -1425,7 +1425,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
 	id >>= 16;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		struct iphdr *iph2;
 		u16 flush_id;
 
@@ -1505,8 +1505,8 @@ out:
 }
 EXPORT_SYMBOL(inet_gro_receive);
 
-static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *ipip_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	if (NAPI_GRO_CB(skb)->encap_mark) {
 		NAPI_GRO_CB(skb)->flush = 1;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 7cf755ef9efb..bbeecd13e534 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -28,8 +28,8 @@
 #include <linux/spinlock.h>
 #include <net/udp.h>
 
-static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *esp4_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..efdc9e1f741e 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -224,14 +224,14 @@ drop:
 	return 0;
 }
 
-static struct sk_buff **fou_gro_receive(struct sock *sk,
-					struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *fou_gro_receive(struct sock *sk,
+				       struct list_head *head,
+				       struct sk_buff *skb)
 {
-	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
 	u8 proto = fou_from_sock(sk)->protocol;
 	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
 
 	/* We can clear the encap_mark for FOU as we are essentially doing
 	 * one of two possible things.  We are either adding an L4 tunnel
@@ -305,13 +305,13 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	return guehdr;
 }
 
-static struct sk_buff **gue_gro_receive(struct sock *sk,
-					struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *gue_gro_receive(struct sock *sk,
+				       struct list_head *head,
+				       struct sk_buff *skb)
 {
 	const struct net_offload **offloads;
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct guehdr *guehdr;
 	size_t len, optlen, hdrlen, off;
@@ -397,7 +397,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 
 	skb_gro_pull(skb, hdrlen);
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct guehdr *guehdr2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 1859c473b21a..b9673c21be45 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -108,10 +108,10 @@ out:
 	return segs;
 }
 
-static struct sk_buff **gre_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *gre_gro_receive(struct list_head *head,
+				       struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	const struct gre_base_hdr *greh;
 	unsigned int hlen, grehlen;
@@ -182,7 +182,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 					     null_compute_pseudo);
 	}
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct gre_base_hdr *greh2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8cc7c3487330..f5aee641f825 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -180,9 +180,9 @@ out:
 	return segs;
 }
 
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct tcphdr *th;
 	struct tcphdr *th2;
@@ -220,7 +220,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	len = skb_gro_len(skb);
 	flags = tcp_flag_word(th);
 
-	for (; (p = *head); head = &p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
@@ -233,7 +233,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 		goto found;
 	}
-
+	p = NULL;
 	goto out_check_final;
 
 found:
@@ -263,7 +263,7 @@ found:
 	flush |= (len - 1) >= mss;
 	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
 
-	if (flush || skb_gro_receive(head, skb)) {
+	if (flush || skb_gro_receive(p, skb)) {
 		mss = 1;
 		goto out_check_final;
 	}
@@ -277,7 +277,7 @@ out_check_final:
 					TCP_FLAG_FIN));
 
 	if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
-		pp = head;
+		pp = p;
 
 out:
 	NAPI_GRO_CB(skb)->flush |= (flush != 0);
@@ -302,7 +302,7 @@ int tcp_gro_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_gro_complete);
 
-static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
 	if (!NAPI_GRO_CB(skb)->flush &&
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 92dc9e5a7ff3..ac46c1c55c99 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -343,10 +343,11 @@ out:
 	return segs;
 }
 
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
-				 struct udphdr *uh, udp_lookup_t lookup)
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				struct udphdr *uh, udp_lookup_t lookup)
 {
-	struct sk_buff *p, **pp = NULL;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
 	struct udphdr *uh2;
 	unsigned int off = skb_gro_offset(skb);
 	int flush = 1;
@@ -371,7 +372,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 unflush:
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
@@ -399,8 +400,8 @@ out:
 }
 EXPORT_SYMBOL(udp_gro_receive);
 
-static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *udp4_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 27f59b61f70f..ddfa533a84e5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -49,8 +49,8 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
 	return 0;
 }
 
-static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *esp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 5b3f2f89ef41..37ff4805b20c 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -163,11 +163,11 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
 	return len;
 }
 
-static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *ipv6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct ipv6hdr *iph;
 	unsigned int nlen;
@@ -214,7 +214,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	flush--;
 	nlen = skb_network_header_len(skb);
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct ipv6hdr *iph2;
 		__be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
 
@@ -263,8 +263,8 @@ out:
 	return pp;
 }
 
-static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
-					       struct sk_buff *skb)
+static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
+					      struct sk_buff *skb)
 {
 	/* Common GRO receive for SIT and IP6IP6 */
 
@@ -278,8 +278,8 @@ static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
 	return ipv6_gro_receive(head, skb);
 }
 
-static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head,
-					   struct sk_buff *skb)
+static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
+					  struct sk_buff *skb)
 {
 	/* Common GRO receive for SIT and IP6IP6 */
 
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 278e49cd67d4..e72947c99454 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -15,8 +15,8 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *tcp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
 	if (!NAPI_GRO_CB(skb)->flush &&
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 03a2ff3fe1e6..95dee9ca8d22 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -114,8 +114,8 @@ out:
 	return segs;
 }
 
-static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *udp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-- 
cgit v1.2.3


From 07d78363dcffd9cb1bf6f06a6cac0e0847f3c1de Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sun, 24 Jun 2018 14:14:02 +0900
Subject: net: Convert NAPI gro list into a small hash table.

Improve the performance of GRO receive by splitting flows into
multiple hash chains.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   3 +-
 net/core/dev.c            | 105 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 81 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f176d9873910..c6b377a15869 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -305,6 +305,7 @@ int __init netdev_boot_setup(char *str);
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
+#define GRO_HASH_BUCKETS	8
 struct napi_struct {
 	/* The poll_list must only be managed by the entity which
 	 * changes the state of the NAPI_STATE_SCHED bit.  This means
@@ -322,7 +323,7 @@ struct napi_struct {
 	int			poll_owner;
 #endif
 	struct net_device	*dev;
-	struct list_head	gro_list;
+	struct list_head	gro_hash[GRO_HASH_BUCKETS];
 	struct sk_buff		*skb;
 	struct hrtimer		timer;
 	struct list_head	dev_list;
diff --git a/net/core/dev.c b/net/core/dev.c
index aa61b9344b46..dffed642e686 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4875,15 +4875,12 @@ out:
 	return netif_receive_skb_internal(skb);
 }
 
-/* napi->gro_list contains packets ordered by age.
- * youngest packets at the head of it.
- * Complete skbs in reverse order to reduce latencies.
- */
-void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head,
+				   bool flush_old)
 {
 	struct sk_buff *skb, *p;
 
-	list_for_each_entry_safe_reverse(skb, p, &napi->gro_list, list) {
+	list_for_each_entry_safe_reverse(skb, p, head, list) {
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
 		list_del_init(&skb->list);
@@ -4891,15 +4888,33 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 		napi->gro_count--;
 	}
 }
+
+/* napi->gro_hash contains packets ordered by age.
+ * youngest packets at the head of it.
+ * Complete skbs in reverse order to reduce latencies.
+ */
+void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+{
+	int i;
+
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		struct list_head *head = &napi->gro_hash[i];
+
+		__napi_gro_flush_chain(napi, head, flush_old);
+	}
+}
 EXPORT_SYMBOL(napi_gro_flush);
 
-static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
+static struct list_head *gro_list_prepare(struct napi_struct *napi,
+					  struct sk_buff *skb)
 {
 	unsigned int maclen = skb->dev->hard_header_len;
 	u32 hash = skb_get_hash_raw(skb);
+	struct list_head *head;
 	struct sk_buff *p;
 
-	list_for_each_entry(p, &napi->gro_list, list) {
+	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)];
+	list_for_each_entry(p, head, list) {
 		unsigned long diffs;
 
 		NAPI_GRO_CB(p)->flush = 0;
@@ -4922,6 +4937,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 	}
+
+	return head;
 }
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
@@ -4964,11 +4981,45 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 	}
 }
 
+static void gro_flush_oldest(struct napi_struct *napi)
+{
+	struct sk_buff *oldest = NULL;
+	unsigned long age = jiffies;
+	int i;
+
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		struct list_head *head = &napi->gro_hash[i];
+		struct sk_buff *skb;
+
+		if (list_empty(head))
+			continue;
+
+		skb = list_last_entry(head, struct sk_buff, list);
+		if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) {
+			oldest = skb;
+			age = NAPI_GRO_CB(skb)->age;
+		}
+	}
+
+	/* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is
+	 * impossible.
+	 */
+	if (WARN_ON_ONCE(!oldest))
+		return;
+
+	/* Do not adjust napi->gro_count, caller is adding a new SKB to
+	 * the chain.
+	 */
+	list_del(&oldest->list);
+	napi_gro_complete(oldest);
+}
+
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
+	struct list_head *gro_head;
 	struct sk_buff *pp = NULL;
 	enum gro_result ret;
 	int same_flow;
@@ -4977,7 +5028,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (netif_elide_gro(skb->dev))
 		goto normal;
 
-	gro_list_prepare(napi, skb);
+	gro_head = gro_list_prepare(napi, skb);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
@@ -5011,7 +5062,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 			NAPI_GRO_CB(skb)->csum_valid = 0;
 		}
 
-		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
+		pp = ptype->callbacks.gro_receive(gro_head, skb);
 		break;
 	}
 	rcu_read_unlock();
@@ -5040,11 +5091,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		goto normal;
 
 	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
-		struct sk_buff *nskb;
-
-		nskb = list_last_entry(&napi->gro_list, struct sk_buff, list);
-		list_del(&nskb->list);
-		napi_gro_complete(nskb);
+		gro_flush_oldest(napi);
 	} else {
 		napi->gro_count++;
 	}
@@ -5052,7 +5099,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	NAPI_GRO_CB(skb)->age = jiffies;
 	NAPI_GRO_CB(skb)->last = skb;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-	list_add(&skb->list, &napi->gro_list);
+	list_add(&skb->list, gro_head);
 	ret = GRO_HELD;
 
 pull:
@@ -5458,7 +5505,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (!list_empty(&n->gro_list)) {
+	if (n->gro_count) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -5667,7 +5714,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (!list_empty(&napi->gro_list) && !napi_disable_pending(napi) &&
+	if (napi->gro_count && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -5677,11 +5724,14 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
+	int i;
+
 	INIT_LIST_HEAD(&napi->poll_list);
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
 	napi->gro_count = 0;
-	INIT_LIST_HEAD(&napi->gro_list);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++)
+		INIT_LIST_HEAD(&napi->gro_hash[i]);
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
@@ -5714,12 +5764,16 @@ void napi_disable(struct napi_struct *n)
 }
 EXPORT_SYMBOL(napi_disable);
 
-static void gro_list_free(struct list_head *head)
+static void flush_gro_hash(struct napi_struct *napi)
 {
-	struct sk_buff *skb, *p;
+	int i;
 
-	list_for_each_entry_safe(skb, p, head, list)
-		kfree_skb(skb);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		struct sk_buff *skb, *n;
+
+		list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list)
+			kfree_skb(skb);
+	}
 }
 
 /* Must be called in process context */
@@ -5731,8 +5785,7 @@ void netif_napi_del(struct napi_struct *napi)
 	list_del_init(&napi->dev_list);
 	napi_free_frags(napi);
 
-	gro_list_free(&napi->gro_list);
-	INIT_LIST_HEAD(&napi->gro_list);
+	flush_gro_hash(napi);
 	napi->gro_count = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
@@ -5775,7 +5828,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (!list_empty(&n->gro_list)) {
+	if (n->gro_count) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
-- 
cgit v1.2.3


From fb223502ec0889444965f602f57b1f45f9e9845e Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sun, 24 Jun 2018 10:02:54 -0400
Subject: tcp: add SNMP counter for zero-window drops

It will be helpful if we could display the drops due to zero window or no
enough window space.
So a new SNMP MIB entry is added to track this behavior.
This entry is named LINUX_MIB_TCPZEROWINDOWDROP and published in
/proc/net/netstat in TcpExt line as TCPZeroWindowDrop.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h | 1 +
 net/ipv4/proc.c           | 1 +
 net/ipv4/tcp_input.c      | 8 ++++++--
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 750d89120335..97517f36a5f9 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -279,6 +279,7 @@ enum
 	LINUX_MIB_TCPDELIVERED,			/* TCPDelivered */
 	LINUX_MIB_TCPDELIVEREDCE,		/* TCPDeliveredCE */
 	LINUX_MIB_TCPACKCOMPRESSED,		/* TCPAckCompressed */
+	LINUX_MIB_TCPZEROWINDOWDROP,		/* TCPZeroWindowDrop */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 77350c1256ce..225ef3433fe5 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -287,6 +287,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED),
 	SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
 	SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
+	SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 76ca88f63b70..9c5b3415413f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4668,8 +4668,10 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	 *  Out of sequence packets to the out_of_order_queue.
 	 */
 	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
-		if (tcp_receive_window(tp) == 0)
+		if (tcp_receive_window(tp) == 0) {
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
 			goto out_of_window;
+		}
 
 		/* Ok. In sequence. In window. */
 queue_and_out:
@@ -4735,8 +4737,10 @@ drop:
 		/* If window is closed, drop tail of packet. But after
 		 * remembering D-SACK for its head made in previous line.
 		 */
-		if (!tcp_receive_window(tp))
+		if (!tcp_receive_window(tp)) {
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
 			goto out_of_window;
+		}
 		goto queue_and_out;
 	}
 
-- 
cgit v1.2.3


From fdb5c4531c1e0e50e609df83f736b6f3a02896e2 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Tue, 19 Jun 2018 00:04:24 +0100
Subject: bpf: fix attach type BPF_LIRC_MODE2 dependency wrt CONFIG_CGROUP_BPF

If the kernel is compiled with CONFIG_CGROUP_BPF not enabled, it is not
possible to attach, detach or query IR BPF programs to /dev/lircN devices,
making them impossible to use. For embedded devices, it should be possible
to use IR decoding without cgroups or CONFIG_CGROUP_BPF enabled.

This change requires some refactoring, since bpf_prog_{attach,detach,query}
functions are now always compiled, but their code paths for cgroups need
moving out. Rather than a #ifdef CONFIG_CGROUP_BPF in kernel/bpf/syscall.c,
moving them to kernel/bpf/cgroup.c and kernel/bpf/sockmap.c does not
require #ifdefs since that is already conditionally compiled.

Fixes: f4364dcfc86d ("media: rc: introduce BPF_PROG_LIRC_MODE2")
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/media/rc/bpf-lirc.c | 14 +------
 include/linux/bpf-cgroup.h  | 26 ++++++++++++
 include/linux/bpf.h         |  8 ++++
 include/linux/bpf_lirc.h    |  5 ++-
 kernel/bpf/cgroup.c         | 54 +++++++++++++++++++++++++
 kernel/bpf/sockmap.c        | 18 +++++++++
 kernel/bpf/syscall.c        | 99 ++++++++++-----------------------------------
 7 files changed, 132 insertions(+), 92 deletions(-)

(limited to 'include')

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 40826bba06b6..fcfab6635f9c 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -207,29 +207,19 @@ void lirc_bpf_free(struct rc_dev *rcdev)
 	bpf_prog_array_free(rcdev->raw->progs);
 }
 
-int lirc_prog_attach(const union bpf_attr *attr)
+int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
-	struct bpf_prog *prog;
 	struct rc_dev *rcdev;
 	int ret;
 
 	if (attr->attach_flags)
 		return -EINVAL;
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd,
-				 BPF_PROG_TYPE_LIRC_MODE2);
-	if (IS_ERR(prog))
-		return PTR_ERR(prog);
-
 	rcdev = rc_dev_get_from_fd(attr->target_fd);
-	if (IS_ERR(rcdev)) {
-		bpf_prog_put(prog);
+	if (IS_ERR(rcdev))
 		return PTR_ERR(rcdev);
-	}
 
 	ret = lirc_bpf_attach(rcdev, prog);
-	if (ret)
-		bpf_prog_put(prog);
 
 	put_device(&rcdev->dev);
 
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 975fb4cf1bb7..79795c5fa7c3 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -188,12 +188,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 									      \
 	__ret;								      \
 })
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype, struct bpf_prog *prog);
+int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype);
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr);
 #else
 
+struct bpf_prog;
 struct cgroup_bpf {};
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 
+static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype,
+					 struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
+					union bpf_attr __user *uattr)
+{
+	return -EINVAL;
+}
+
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7df32a3200f7..8827e797ff97 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -696,6 +696,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
 struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
 int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
+int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+			struct bpf_prog *prog);
 #else
 static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
 {
@@ -714,6 +716,12 @@ static inline int sock_map_prog(struct bpf_map *map,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+				      struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
 #endif
 
 #if defined(CONFIG_XDP_SOCKETS)
diff --git a/include/linux/bpf_lirc.h b/include/linux/bpf_lirc.h
index 5f8a4283092d..9d9ff755ec29 100644
--- a/include/linux/bpf_lirc.h
+++ b/include/linux/bpf_lirc.h
@@ -5,11 +5,12 @@
 #include <uapi/linux/bpf.h>
 
 #ifdef CONFIG_BPF_LIRC_MODE2
-int lirc_prog_attach(const union bpf_attr *attr);
+int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int lirc_prog_detach(const union bpf_attr *attr);
 int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
 #else
-static inline int lirc_prog_attach(const union bpf_attr *attr)
+static inline int lirc_prog_attach(const union bpf_attr *attr,
+				   struct bpf_prog *prog)
 {
 	return -EINVAL;
 }
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f7c00bd6f8e4..3d83ee7df381 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -428,6 +428,60 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	return ret;
 }
 
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype, struct bpf_prog *prog)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+				attr->attach_flags);
+	cgroup_put(cgrp);
+	return ret;
+}
+
+int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+	struct bpf_prog *prog;
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	if (IS_ERR(prog))
+		prog = NULL;
+
+	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
+	if (prog)
+		bpf_prog_put(prog);
+
+	cgroup_put(cgrp);
+	return ret;
+}
+
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->query.target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	ret = cgroup_bpf_query(cgrp, attr, uattr);
+
+	cgroup_put(cgrp);
+	return ret;
+}
+
 /**
  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
  * @sk: The socket sending or receiving traffic
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d816c0e..81d0c55a77aa 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1915,6 +1915,24 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
 	return 0;
 }
 
+int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+			struct bpf_prog *prog)
+{
+	int ufd = attr->target_fd;
+	struct bpf_map *map;
+	struct fd f;
+	int err;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = sock_map_prog(map, prog, attr->attach_type);
+	fdput(f);
+	return err;
+}
+
 static void *sock_map_lookup(struct bpf_map *map, void *key)
 {
 	return NULL;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35dc466641f2..d10ecd78105f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1483,8 +1483,6 @@ out_free_tp:
 	return err;
 }
 
-#ifdef CONFIG_CGROUP_BPF
-
 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 					     enum bpf_attach_type attach_type)
 {
@@ -1499,40 +1497,6 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr,
-			       int type, bool attach)
-{
-	struct bpf_prog *prog = NULL;
-	int ufd = attr->target_fd;
-	struct bpf_map *map;
-	struct fd f;
-	int err;
-
-	f = fdget(ufd);
-	map = __bpf_map_get(f);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
-
-	if (attach) {
-		prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
-		if (IS_ERR(prog)) {
-			fdput(f);
-			return PTR_ERR(prog);
-		}
-	}
-
-	err = sock_map_prog(map, prog, attr->attach_type);
-	if (err) {
-		fdput(f);
-		if (prog)
-			bpf_prog_put(prog);
-		return err;
-	}
-
-	fdput(f);
-	return 0;
-}
-
 #define BPF_F_ATTACH_MASK \
 	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
 
@@ -1540,7 +1504,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
 	struct bpf_prog *prog;
-	struct cgroup *cgrp;
 	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
@@ -1577,12 +1540,15 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
 	case BPF_SK_MSG_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
+		ptype = BPF_PROG_TYPE_SK_MSG;
+		break;
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
+		ptype = BPF_PROG_TYPE_SK_SKB;
+		break;
 	case BPF_LIRC_MODE2:
-		return lirc_prog_attach(attr);
+		ptype = BPF_PROG_TYPE_LIRC_MODE2;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1596,18 +1562,20 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp)) {
-		bpf_prog_put(prog);
-		return PTR_ERR(cgrp);
+	switch (ptype) {
+	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_SK_MSG:
+		ret = sockmap_get_from_fd(attr, ptype, prog);
+		break;
+	case BPF_PROG_TYPE_LIRC_MODE2:
+		ret = lirc_prog_attach(attr, prog);
+		break;
+	default:
+		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 	}
 
-	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
-				attr->attach_flags);
 	if (ret)
 		bpf_prog_put(prog);
-	cgroup_put(cgrp);
-
 	return ret;
 }
 
@@ -1616,9 +1584,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 static int bpf_prog_detach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
-	struct bpf_prog *prog;
-	struct cgroup *cgrp;
-	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1651,29 +1616,17 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
 	case BPF_SK_MSG_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
 	case BPF_LIRC_MODE2:
 		return lirc_prog_detach(attr);
 	default:
 		return -EINVAL;
 	}
 
-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
-	if (IS_ERR(prog))
-		prog = NULL;
-
-	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
-	if (prog)
-		bpf_prog_put(prog);
-	cgroup_put(cgrp);
-	return ret;
+	return cgroup_bpf_prog_detach(attr, ptype);
 }
 
 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
@@ -1681,9 +1634,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 static int bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr)
 {
-	struct cgroup *cgrp;
-	int ret;
-
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 	if (CHECK_ATTR(BPF_PROG_QUERY))
@@ -1711,14 +1661,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	default:
 		return -EINVAL;
 	}
-	cgrp = cgroup_get_from_fd(attr->query.target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-	ret = cgroup_bpf_query(cgrp, attr, uattr);
-	cgroup_put(cgrp);
-	return ret;
+
+	return cgroup_bpf_prog_query(attr, uattr);
 }
-#endif /* CONFIG_CGROUP_BPF */
 
 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
 
@@ -2365,7 +2310,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_OBJ_GET:
 		err = bpf_obj_get(&attr);
 		break;
-#ifdef CONFIG_CGROUP_BPF
 	case BPF_PROG_ATTACH:
 		err = bpf_prog_attach(&attr);
 		break;
@@ -2375,7 +2319,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_PROG_QUERY:
 		err = bpf_prog_query(&attr, uattr);
 		break;
-#endif
 	case BPF_PROG_TEST_RUN:
 		err = bpf_prog_test_run(&attr, uattr);
 		break;
-- 
cgit v1.2.3


From a8f62d0c6fe533e07cd1acce7588278f9d6e7720 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 25 Jun 2018 20:37:08 +0800
Subject: ptp: support DPAA FMan 1588 timer in ptp_qoriq

This patch is to support DPAA (Data Path Acceleration Architecture)
1588 timer by adding "fsl,fman-ptp-timer" compatible, sharing
interrupt with FMan, adding FSL_DPAA_ETH dependency, and fixing
up register offset.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/Kconfig           |   2 +-
 drivers/ptp/ptp_qoriq.c       | 104 +++++++++++++++++++++++++++---------------
 include/linux/fsl/ptp_qoriq.h |  38 ++++++++++++---
 3 files changed, 98 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 474c988d2e95..d137c480db46 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -43,7 +43,7 @@ config PTP_1588_CLOCK_DTE
 
 config PTP_1588_CLOCK_QORIQ
 	tristate "Freescale QorIQ 1588 timer as PTP clock"
-	depends on GIANFAR
+	depends on GIANFAR || FSL_DPAA_ETH
 	depends on PTP_1588_CLOCK
 	default y
 	help
diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index e8652c148c52..a14c317b5a38 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -39,11 +39,12 @@
 /* Caller must hold qoriq_ptp->lock. */
 static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
 {
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	u64 ns;
 	u32 lo, hi;
 
-	lo = qoriq_read(&qoriq_ptp->regs->tmr_cnt_l);
-	hi = qoriq_read(&qoriq_ptp->regs->tmr_cnt_h);
+	lo = qoriq_read(&regs->ctrl_regs->tmr_cnt_l);
+	hi = qoriq_read(&regs->ctrl_regs->tmr_cnt_h);
 	ns = ((u64) hi) << 32;
 	ns |= lo;
 	return ns;
@@ -52,16 +53,18 @@ static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
 /* Caller must hold qoriq_ptp->lock. */
 static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns)
 {
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	u32 hi = ns >> 32;
 	u32 lo = ns & 0xffffffff;
 
-	qoriq_write(&qoriq_ptp->regs->tmr_cnt_l, lo);
-	qoriq_write(&qoriq_ptp->regs->tmr_cnt_h, hi);
+	qoriq_write(&regs->ctrl_regs->tmr_cnt_l, lo);
+	qoriq_write(&regs->ctrl_regs->tmr_cnt_h, hi);
 }
 
 /* Caller must hold qoriq_ptp->lock. */
 static void set_alarm(struct qoriq_ptp *qoriq_ptp)
 {
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	u64 ns;
 	u32 lo, hi;
 
@@ -70,16 +73,18 @@ static void set_alarm(struct qoriq_ptp *qoriq_ptp)
 	ns -= qoriq_ptp->tclk_period;
 	hi = ns >> 32;
 	lo = ns & 0xffffffff;
-	qoriq_write(&qoriq_ptp->regs->tmr_alarm1_l, lo);
-	qoriq_write(&qoriq_ptp->regs->tmr_alarm1_h, hi);
+	qoriq_write(&regs->alarm_regs->tmr_alarm1_l, lo);
+	qoriq_write(&regs->alarm_regs->tmr_alarm1_h, hi);
 }
 
 /* Caller must hold qoriq_ptp->lock. */
 static void set_fipers(struct qoriq_ptp *qoriq_ptp)
 {
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+
 	set_alarm(qoriq_ptp);
-	qoriq_write(&qoriq_ptp->regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-	qoriq_write(&qoriq_ptp->regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+	qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
+	qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
 }
 
 /*
@@ -89,16 +94,17 @@ static void set_fipers(struct qoriq_ptp *qoriq_ptp)
 static irqreturn_t isr(int irq, void *priv)
 {
 	struct qoriq_ptp *qoriq_ptp = priv;
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	struct ptp_clock_event event;
 	u64 ns;
 	u32 ack = 0, lo, hi, mask, val;
 
-	val = qoriq_read(&qoriq_ptp->regs->tmr_tevent);
+	val = qoriq_read(&regs->ctrl_regs->tmr_tevent);
 
 	if (val & ETS1) {
 		ack |= ETS1;
-		hi = qoriq_read(&qoriq_ptp->regs->tmr_etts1_h);
-		lo = qoriq_read(&qoriq_ptp->regs->tmr_etts1_l);
+		hi = qoriq_read(&regs->etts_regs->tmr_etts1_h);
+		lo = qoriq_read(&regs->etts_regs->tmr_etts1_l);
 		event.type = PTP_CLOCK_EXTTS;
 		event.index = 0;
 		event.timestamp = ((u64) hi) << 32;
@@ -108,8 +114,8 @@ static irqreturn_t isr(int irq, void *priv)
 
 	if (val & ETS2) {
 		ack |= ETS2;
-		hi = qoriq_read(&qoriq_ptp->regs->tmr_etts2_h);
-		lo = qoriq_read(&qoriq_ptp->regs->tmr_etts2_l);
+		hi = qoriq_read(&regs->etts_regs->tmr_etts2_h);
+		lo = qoriq_read(&regs->etts_regs->tmr_etts2_l);
 		event.type = PTP_CLOCK_EXTTS;
 		event.index = 1;
 		event.timestamp = ((u64) hi) << 32;
@@ -130,16 +136,16 @@ static irqreturn_t isr(int irq, void *priv)
 			hi = ns >> 32;
 			lo = ns & 0xffffffff;
 			spin_lock(&qoriq_ptp->lock);
-			qoriq_write(&qoriq_ptp->regs->tmr_alarm2_l, lo);
-			qoriq_write(&qoriq_ptp->regs->tmr_alarm2_h, hi);
+			qoriq_write(&regs->alarm_regs->tmr_alarm2_l, lo);
+			qoriq_write(&regs->alarm_regs->tmr_alarm2_h, hi);
 			spin_unlock(&qoriq_ptp->lock);
 			qoriq_ptp->alarm_value = ns;
 		} else {
-			qoriq_write(&qoriq_ptp->regs->tmr_tevent, ALM2);
+			qoriq_write(&regs->ctrl_regs->tmr_tevent, ALM2);
 			spin_lock(&qoriq_ptp->lock);
-			mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+			mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 			mask &= ~ALM2EN;
-			qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+			qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
 			spin_unlock(&qoriq_ptp->lock);
 			qoriq_ptp->alarm_value = 0;
 			qoriq_ptp->alarm_interval = 0;
@@ -153,7 +159,7 @@ static irqreturn_t isr(int irq, void *priv)
 	}
 
 	if (ack) {
-		qoriq_write(&qoriq_ptp->regs->tmr_tevent, ack);
+		qoriq_write(&regs->ctrl_regs->tmr_tevent, ack);
 		return IRQ_HANDLED;
 	} else
 		return IRQ_NONE;
@@ -169,6 +175,7 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 	u32 tmr_add;
 	int neg_adj = 0;
 	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 
 	if (scaled_ppm < 0) {
 		neg_adj = 1;
@@ -186,7 +193,7 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 
 	tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
 
-	qoriq_write(&qoriq_ptp->regs->tmr_add, tmr_add);
+	qoriq_write(&regs->ctrl_regs->tmr_add, tmr_add);
 
 	return 0;
 }
@@ -250,6 +257,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 			      struct ptp_clock_request *rq, int on)
 {
 	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 	unsigned long flags;
 	u32 bit, mask;
 
@@ -266,23 +274,23 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 			return -EINVAL;
 		}
 		spin_lock_irqsave(&qoriq_ptp->lock, flags);
-		mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+		mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 		if (on)
 			mask |= bit;
 		else
 			mask &= ~bit;
-		qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+		qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
 		spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 		return 0;
 
 	case PTP_CLK_REQ_PPS:
 		spin_lock_irqsave(&qoriq_ptp->lock, flags);
-		mask = qoriq_read(&qoriq_ptp->regs->tmr_temask);
+		mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 		if (on)
 			mask |= PP1EN;
 		else
 			mask &= ~PP1EN;
-		qoriq_write(&qoriq_ptp->regs->tmr_temask, mask);
+		qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
 		spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 		return 0;
 
@@ -313,10 +321,12 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 {
 	struct device_node *node = dev->dev.of_node;
 	struct qoriq_ptp *qoriq_ptp;
+	struct qoriq_ptp_registers *regs;
 	struct timespec64 now;
 	int err = -ENOMEM;
 	u32 tmr_ctrl;
 	unsigned long flags;
+	void __iomem *base;
 
 	qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL);
 	if (!qoriq_ptp)
@@ -351,7 +361,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 		pr_err("irq not in device tree\n");
 		goto no_node;
 	}
-	if (request_irq(qoriq_ptp->irq, isr, 0, DRIVER, qoriq_ptp)) {
+	if (request_irq(qoriq_ptp->irq, isr, IRQF_SHARED, DRIVER, qoriq_ptp)) {
 		pr_err("request_irq failed\n");
 		goto no_node;
 	}
@@ -368,12 +378,27 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 
 	spin_lock_init(&qoriq_ptp->lock);
 
-	qoriq_ptp->regs = ioremap(qoriq_ptp->rsrc->start,
-				resource_size(qoriq_ptp->rsrc));
-	if (!qoriq_ptp->regs) {
+	base = ioremap(qoriq_ptp->rsrc->start,
+		       resource_size(qoriq_ptp->rsrc));
+	if (!base) {
 		pr_err("ioremap ptp registers failed\n");
 		goto no_ioremap;
 	}
+
+	qoriq_ptp->base = base;
+
+	if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) {
+		qoriq_ptp->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
+		qoriq_ptp->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
+		qoriq_ptp->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET;
+		qoriq_ptp->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET;
+	} else {
+		qoriq_ptp->regs.ctrl_regs = base + CTRL_REGS_OFFSET;
+		qoriq_ptp->regs.alarm_regs = base + ALARM_REGS_OFFSET;
+		qoriq_ptp->regs.fiper_regs = base + FIPER_REGS_OFFSET;
+		qoriq_ptp->regs.etts_regs = base + ETTS_REGS_OFFSET;
+	}
+
 	ktime_get_real_ts64(&now);
 	ptp_qoriq_settime(&qoriq_ptp->caps, &now);
 
@@ -383,13 +408,14 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 
 	spin_lock_irqsave(&qoriq_ptp->lock, flags);
 
-	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   tmr_ctrl);
-	qoriq_write(&qoriq_ptp->regs->tmr_add,    qoriq_ptp->tmr_add);
-	qoriq_write(&qoriq_ptp->regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
-	qoriq_write(&qoriq_ptp->regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-	qoriq_write(&qoriq_ptp->regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+	regs = &qoriq_ptp->regs;
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl);
+	qoriq_write(&regs->ctrl_regs->tmr_add,    qoriq_ptp->tmr_add);
+	qoriq_write(&regs->ctrl_regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
+	qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
+	qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
 	set_alarm(qoriq_ptp);
-	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
 
 	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
 
@@ -405,7 +431,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 	return 0;
 
 no_clock:
-	iounmap(qoriq_ptp->regs);
+	iounmap(qoriq_ptp->base);
 no_ioremap:
 	release_resource(qoriq_ptp->rsrc);
 no_resource:
@@ -419,12 +445,13 @@ no_memory:
 static int qoriq_ptp_remove(struct platform_device *dev)
 {
 	struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev);
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
 
-	qoriq_write(&qoriq_ptp->regs->tmr_temask, 0);
-	qoriq_write(&qoriq_ptp->regs->tmr_ctrl,   0);
+	qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
 
 	ptp_clock_unregister(qoriq_ptp->clock);
-	iounmap(qoriq_ptp->regs);
+	iounmap(qoriq_ptp->base);
 	release_resource(qoriq_ptp->rsrc);
 	free_irq(qoriq_ptp->irq, qoriq_ptp);
 	kfree(qoriq_ptp);
@@ -434,6 +461,7 @@ static int qoriq_ptp_remove(struct platform_device *dev)
 
 static const struct of_device_id match_table[] = {
 	{ .compatible = "fsl,etsec-ptp" },
+	{ .compatible = "fsl,fman-ptp-timer" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, match_table);
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index b462d9ea8007..dc3dac40f069 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -11,9 +11,8 @@
 
 /*
  * qoriq ptp registers
- * Generated by regen.tcl on Thu May 13 01:38:57 PM CEST 2010
  */
-struct qoriq_ptp_registers {
+struct ctrl_regs {
 	u32 tmr_ctrl;     /* Timer control register */
 	u32 tmr_tevent;   /* Timestamp event register */
 	u32 tmr_temask;   /* Timer event mask register */
@@ -28,22 +27,47 @@ struct qoriq_ptp_registers {
 	u8  res1[4];
 	u32 tmroff_h;     /* Timer offset high */
 	u32 tmroff_l;     /* Timer offset low */
-	u8  res2[8];
+};
+
+struct alarm_regs {
 	u32 tmr_alarm1_h; /* Timer alarm 1 high register */
 	u32 tmr_alarm1_l; /* Timer alarm 1 high register */
 	u32 tmr_alarm2_h; /* Timer alarm 2 high register */
 	u32 tmr_alarm2_l; /* Timer alarm 2 high register */
-	u8  res3[48];
+};
+
+struct fiper_regs {
 	u32 tmr_fiper1;   /* Timer fixed period interval */
 	u32 tmr_fiper2;   /* Timer fixed period interval */
 	u32 tmr_fiper3;   /* Timer fixed period interval */
-	u8  res4[20];
+};
+
+struct etts_regs {
 	u32 tmr_etts1_h;  /* Timestamp of general purpose external trigger */
 	u32 tmr_etts1_l;  /* Timestamp of general purpose external trigger */
 	u32 tmr_etts2_h;  /* Timestamp of general purpose external trigger */
 	u32 tmr_etts2_l;  /* Timestamp of general purpose external trigger */
 };
 
+struct qoriq_ptp_registers {
+	struct ctrl_regs __iomem *ctrl_regs;
+	struct alarm_regs __iomem *alarm_regs;
+	struct fiper_regs __iomem *fiper_regs;
+	struct etts_regs __iomem *etts_regs;
+};
+
+/* Offset definitions for the four register groups */
+#define CTRL_REGS_OFFSET	0x0
+#define ALARM_REGS_OFFSET	0x40
+#define FIPER_REGS_OFFSET	0x80
+#define ETTS_REGS_OFFSET	0xa0
+
+#define FMAN_CTRL_REGS_OFFSET	0x80
+#define FMAN_ALARM_REGS_OFFSET	0xb8
+#define FMAN_FIPER_REGS_OFFSET	0xd0
+#define FMAN_ETTS_REGS_OFFSET	0xe0
+
+
 /* Bit definitions for the TMR_CTRL register */
 #define ALM1P                 (1<<31) /* Alarm1 output polarity */
 #define ALM2P                 (1<<30) /* Alarm2 output polarity */
@@ -105,10 +129,10 @@ struct qoriq_ptp_registers {
 #define DRIVER		"ptp_qoriq"
 #define DEFAULT_CKSEL	1
 #define N_EXT_TS	2
-#define REG_SIZE	sizeof(struct qoriq_ptp_registers)
 
 struct qoriq_ptp {
-	struct qoriq_ptp_registers __iomem *regs;
+	void __iomem *base;
+	struct qoriq_ptp_registers regs;
 	spinlock_t lock; /* protects regs */
 	struct ptp_clock *clock;
 	struct ptp_clock_info caps;
-- 
cgit v1.2.3


From 60513bd82c825b659c05957e4f8106ba06f0797f Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 14:30:04 -0700
Subject: net: sched: pass extack pointer to block binds and cb registration

Pass the extact struct from a tc qdisc add to the block bind function and,
in turn, to the setup_tc ndo of binding device via the tc_block_offload
struct. Pass this back to any block callback registrations to allow
netlink logging of fails in the bind process.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c          |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c      |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  2 +-
 drivers/net/ethernet/intel/i40evf/i40evf_main.c    |  2 +-
 drivers/net/ethernet/intel/igb/igb_main.c          |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 10 +++++----
 drivers/net/ethernet/netronome/nfp/bpf/main.c      |  2 +-
 .../net/ethernet/netronome/nfp/flower/offload.c    |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  2 +-
 drivers/net/netdevsim/netdev.c                     |  2 +-
 include/net/pkt_cls.h                              | 11 ++++++----
 net/dsa/slave.c                                    |  2 +-
 net/sched/cls_api.c                                | 25 ++++++++++++++--------
 17 files changed, 43 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 176fc9f4d7de..b5fc6414a951 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7984,7 +7984,7 @@ static int bnxt_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb,
-					     bp, bp);
+					     bp, bp, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp);
 		return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index 05d405905906..0745f2dfc80c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -173,7 +173,7 @@ static int bnxt_vf_rep_setup_tc_block(struct net_device *dev,
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
 					     bnxt_vf_rep_setup_tc_block_cb,
-					     vf_rep, vf_rep);
+					     vf_rep, vf_rep, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block,
 					bnxt_vf_rep_setup_tc_block_cb, vf_rep);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index bc03c175a3cd..96bc177d54de 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3016,7 +3016,7 @@ static int cxgb_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb,
-					     pi, dev);
+					     pi, dev, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi);
 		return 0;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7ad2b1b0b125..426b0ccb1fc6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7554,7 +7554,7 @@ static int i40e_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb,
-					     np, np);
+					     np, np, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np);
 		return 0;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index dc56a8667495..5906c1c1d19d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -2926,7 +2926,7 @@ static int i40evf_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb,
-					     adapter, adapter);
+					     adapter, adapter, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb,
 					adapter);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 6a78d8272eb2..f1e3397bd405 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2728,7 +2728,7 @@ static int igb_setup_tc_block(struct igb_adapter *adapter,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, igb_setup_tc_block_cb,
-					     adapter, adapter);
+					     adapter, adapter, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb,
 					adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 3e87dbbc9024..d29bd8fc3ff3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9325,7 +9325,7 @@ static int ixgbe_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
-					     adapter, adapter);
+					     adapter, adapter, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
 					adapter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 56c1b6f5593e..134f20a182b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3371,7 +3371,7 @@ static int mlx5e_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
-					     priv, priv);
+					     priv, priv, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
 					priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 57987f6546e8..3f2fe95e01d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -797,7 +797,7 @@ static int mlx5e_rep_setup_tc_block(struct net_device *dev,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
-					     priv, priv);
+					     priv, priv, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
 		return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 968b88af2ef5..d2bc335dda11 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1503,7 +1503,8 @@ static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
 
 static int
 mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
-				    struct tcf_block *block, bool ingress)
+				    struct tcf_block *block, bool ingress,
+				    struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_acl_block *acl_block;
@@ -1518,7 +1519,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 			return -ENOMEM;
 		block_cb = __tcf_block_cb_register(block,
 						   mlxsw_sp_setup_tc_block_cb_flower,
-						   mlxsw_sp, acl_block);
+						   mlxsw_sp, acl_block, extack);
 		if (IS_ERR(block_cb)) {
 			err = PTR_ERR(block_cb);
 			goto err_cb_register;
@@ -1596,11 +1597,12 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		err = tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
-					    mlxsw_sp_port);
+					    mlxsw_sp_port, f->extack);
 		if (err)
 			return err;
 		err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port,
-							  f->block, ingress);
+							  f->block, ingress,
+							  f->extack);
 		if (err) {
 			tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
 			return err;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index fcdfb8e7fdea..bf46f7bff912 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -206,7 +206,7 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev,
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
 					     nfp_bpf_setup_tc_block_cb,
-					     nn, nn);
+					     nn, nn, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block,
 					nfp_bpf_setup_tc_block_cb,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index c0e74aa4cb5e..a427dab4bf49 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -627,7 +627,7 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
 					     nfp_flower_setup_tc_block_cb,
-					     repr, repr);
+					     repr, repr, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block,
 					nfp_flower_setup_tc_block_cb,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cba46b62a1cd..2354e30caa78 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3776,7 +3776,7 @@ static int stmmac_setup_tc_block(struct stmmac_priv *priv,
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, stmmac_setup_tc_block_cb,
-				priv, priv);
+				priv, priv, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, stmmac_setup_tc_block_cb, priv);
 		return 0;
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index ec68f38213d9..c9dacc6fcd59 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -260,7 +260,7 @@ nsim_setup_tc_block(struct net_device *dev, struct tc_block_offload *f)
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block, nsim_setup_tc_block_cb,
-					     ns, ns);
+					     ns, ns, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, nsim_setup_tc_block_cb, ns);
 		return 0;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3c1a2c47cd4..a2c6d35ba057 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -73,10 +73,11 @@ void tcf_block_cb_incref(struct tcf_block_cb *block_cb);
 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb);
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv);
+					     void *cb_priv,
+					     struct netlink_ext_ack *extack);
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv);
+			  void *cb_priv, struct netlink_ext_ack *extack);
 void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
 void tcf_block_cb_unregister(struct tcf_block *block,
 			     tc_setup_cb_t *cb, void *cb_ident);
@@ -161,7 +162,8 @@ unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
 static inline
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv)
+					     void *cb_priv,
+					     struct netlink_ext_ack *extack)
 {
 	return NULL;
 }
@@ -169,7 +171,7 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 static inline
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv)
+			  void *cb_priv, struct netlink_ext_ack *extack)
 {
 	return 0;
 }
@@ -596,6 +598,7 @@ struct tc_block_offload {
 	enum tc_block_command command;
 	enum tcf_block_binder_type binder_type;
 	struct tcf_block *block;
+	struct netlink_ext_ack *extack;
 };
 
 struct tc_cls_common_offload {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1e3b6a6d8a40..71536c435132 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -900,7 +900,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 
 	switch (f->command) {
 	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, cb, dev, dev);
+		return tcf_block_cb_register(f->block, cb, dev, dev, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, cb, dev);
 		return 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index cdc3c87c53e6..8c9fb4b827a1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -277,18 +277,21 @@ static bool tcf_block_offload_in_use(struct tcf_block *block)
 static int tcf_block_offload_cmd(struct tcf_block *block,
 				 struct net_device *dev,
 				 struct tcf_block_ext_info *ei,
-				 enum tc_block_command command)
+				 enum tc_block_command command,
+				 struct netlink_ext_ack *extack)
 {
 	struct tc_block_offload bo = {};
 
 	bo.command = command;
 	bo.binder_type = ei->binder_type;
 	bo.block = block;
+	bo.extack = extack;
 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 }
 
 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
-				  struct tcf_block_ext_info *ei)
+				  struct tcf_block_ext_info *ei,
+				  struct netlink_ext_ack *extack)
 {
 	struct net_device *dev = q->dev_queue->dev;
 	int err;
@@ -299,10 +302,12 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 	/* If tc offload feature is disabled and the block we try to bind
 	 * to already has some offloaded filters, forbid to bind.
 	 */
-	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
+	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
+		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
 		return -EOPNOTSUPP;
+	}
 
-	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
+	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_inc;
 	return err;
@@ -322,7 +327,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 
 	if (!dev->netdev_ops->ndo_setup_tc)
 		goto no_offload_dev_dec;
-	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
+	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_dec;
 	return;
@@ -612,7 +617,7 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 	if (err)
 		goto err_chain_head_change_cb_add;
 
-	err = tcf_block_offload_bind(block, q, ei);
+	err = tcf_block_offload_bind(block, q, ei, extack);
 	if (err)
 		goto err_block_offload_bind;
 
@@ -748,7 +753,8 @@ EXPORT_SYMBOL(tcf_block_cb_decref);
 
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv)
+					     void *cb_priv,
+					     struct netlink_ext_ack *extack)
 {
 	struct tcf_block_cb *block_cb;
 
@@ -772,11 +778,12 @@ EXPORT_SYMBOL(__tcf_block_cb_register);
 
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv)
+			  void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tcf_block_cb *block_cb;
 
-	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
+	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
+					   extack);
 	return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
 }
 EXPORT_SYMBOL(tcf_block_cb_register);
-- 
cgit v1.2.3


From e56185c78b500ac4d08768278ad8a25d5b756942 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 14:30:05 -0700
Subject: net: sched: add tcf_proto_op to offload a rule

Create a new tcf_proto_op called 'reoffload' that generates a new offload
message for each node in a tcf_proto. Pointers to the tcf_proto and
whether the offload request is to add or delete the node are included.
Also included is a callback function to send the offload message to and
the option of priv data to go with the cb.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h     | 3 ---
 include/net/sch_generic.h | 6 ++++++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 9e59ebfded62..5ff11adbe2a6 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -190,9 +190,6 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 #endif
 }
 
-typedef int tc_setup_cb_t(enum tc_setup_type type,
-			  void *type_data, void *cb_priv);
-
 #ifdef CONFIG_NET_CLS_ACT
 int tc_setup_cb_egdev_register(const struct net_device *dev,
 			       tc_setup_cb_t *cb, void *cb_priv);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6488daa32f82..18adc9142b18 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -20,6 +20,9 @@ struct qdisc_walker;
 struct tcf_walker;
 struct module;
 
+typedef int tc_setup_cb_t(enum tc_setup_type type,
+			  void *type_data, void *cb_priv);
+
 struct qdisc_rate_table {
 	struct tc_ratespec rate;
 	u32		data[256];
@@ -256,6 +259,9 @@ struct tcf_proto_ops {
 					  bool *last,
 					  struct netlink_ext_ack *);
 	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
+	int			(*reoffload)(struct tcf_proto *tp, bool add,
+					     tc_setup_cb_t *cb, void *cb_priv,
+					     struct netlink_ext_ack *extack);
 	void			(*bind_class)(void *, u32, unsigned long);
 
 	/* rtnetlink specific */
-- 
cgit v1.2.3


From 31533cba4327aefeafe8a7d57de0c737a3b2faa6 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 14:30:06 -0700
Subject: net: sched: cls_flower: implement offload tcf_proto_op

Add the reoffload tcf_proto_op in flower to generate an offload message
for each filter in the given tcf_proto. Call the specified callback with
this new offload message. The function only returns an error if the
callback rejects adding a 'hardware only' rule.

A filter contains a flag to indicate if it is in hardware or not. To
ensure the reoffload function properly maintains this flag, keep a
reference counter for the number of instances of the filter that are in
hardware. Only update the flag when this counter changes from or to 0. Add
a generic helper function to implement this behaviour.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 15 +++++++++++++++
 net/sched/cls_flower.c    | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 18adc9142b18..7432100027b7 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -336,6 +336,21 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
 	block->offloadcnt--;
 }
 
+static inline void
+tc_cls_offload_cnt_update(struct tcf_block *block, unsigned int *cnt,
+			  u32 *flags, bool add)
+{
+	if (add) {
+		if (!*cnt)
+			tcf_block_offload_inc(block, flags);
+		(*cnt)++;
+	} else {
+		(*cnt)--;
+		if (!*cnt)
+			tcf_block_offload_dec(block, flags);
+	}
+}
+
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
 {
 	struct qdisc_skb_cb *qcb;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9e8b26a80fb3..352876bb901b 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -87,6 +87,7 @@ struct cls_fl_filter {
 	struct list_head list;
 	u32 handle;
 	u32 flags;
+	unsigned int in_hw_count;
 	struct rcu_work rwork;
 	struct net_device *hw_dev;
 };
@@ -289,6 +290,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 		fl_hw_destroy_filter(tp, f, NULL);
 		return err;
 	} else if (err > 0) {
+		f->in_hw_count = err;
 		tcf_block_offload_inc(block, &f->flags);
 	}
 
@@ -1087,6 +1089,47 @@ skip:
 	}
 }
 
+static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+			void *cb_priv, struct netlink_ext_ack *extack)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = tp->chain->block;
+	struct fl_flow_mask *mask;
+	struct cls_fl_filter *f;
+	int err;
+
+	list_for_each_entry(mask, &head->masks, list) {
+		list_for_each_entry(f, &mask->filters, list) {
+			if (tc_skip_hw(f->flags))
+				continue;
+
+			tc_cls_common_offload_init(&cls_flower.common, tp,
+						   f->flags, extack);
+			cls_flower.command = add ?
+				TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
+			cls_flower.cookie = (unsigned long)f;
+			cls_flower.dissector = &mask->dissector;
+			cls_flower.mask = &f->mkey;
+			cls_flower.key = &f->key;
+			cls_flower.exts = &f->exts;
+			cls_flower.classid = f->res.classid;
+
+			err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+			if (err) {
+				if (add && tc_skip_sw(f->flags))
+					return err;
+				continue;
+			}
+
+			tc_cls_offload_cnt_update(block, &f->in_hw_count,
+						  &f->flags, add);
+		}
+	}
+
+	return 0;
+}
+
 static int fl_dump_key_val(struct sk_buff *skb,
 			   void *val, int val_type,
 			   void *mask, int mask_type, int len)
@@ -1438,6 +1481,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.change		= fl_change,
 	.delete		= fl_delete,
 	.walk		= fl_walk,
+	.reoffload	= fl_reoffload,
 	.dump		= fl_dump,
 	.bind_class	= fl_bind_class,
 	.owner		= THIS_MODULE,
-- 
cgit v1.2.3


From 326367427cc09d38e4c1d145131ee2e228ac94c5 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 14:30:10 -0700
Subject: net: sched: call reoffload op on block callback reg

Call the reoffload tcf_proto_op on all tcf_proto nodes in all chains of a
block when a callback tries to register to a block that already has
offloaded rules. If all existing rules cannot be offloaded then the
registration is rejected. This replaces the previous policy of rejecting
such callback registration outright.

On unregistration of a callback, the rules are flushed for that given cb.
The implementation of block sharing in the NFP driver, for example,
duplicates shared rules to all devs bound to a block. This meant that
rules could still exist in hw even after a device is unbound from a block
(assuming the block still remains active).

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |  4 +-
 include/net/pkt_cls.h                          |  6 ++-
 net/sched/cls_api.c                            | 54 ++++++++++++++++++++++----
 3 files changed, 52 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d2bc335dda11..52437363766a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1542,7 +1542,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 
 err_block_bind:
 	if (!tcf_block_cb_decref(block_cb)) {
-		__tcf_block_cb_unregister(block_cb);
+		__tcf_block_cb_unregister(block, block_cb);
 err_cb_register:
 		mlxsw_sp_acl_block_destroy(acl_block);
 	}
@@ -1572,7 +1572,7 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
 	err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block,
 					mlxsw_sp_port, ingress);
 	if (!err && !tcf_block_cb_decref(block_cb)) {
-		__tcf_block_cb_unregister(block_cb);
+		__tcf_block_cb_unregister(block, block_cb);
 		mlxsw_sp_acl_block_destroy(acl_block);
 	}
 }
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a2c6d35ba057..4070b8eb6d14 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -78,7 +78,8 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
 			  void *cb_priv, struct netlink_ext_ack *extack);
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb);
 void tcf_block_cb_unregister(struct tcf_block *block,
 			     tc_setup_cb_t *cb, void *cb_ident);
 
@@ -177,7 +178,8 @@ int tcf_block_cb_register(struct tcf_block *block,
 }
 
 static inline
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb)
 {
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8c9fb4b827a1..bbf8dda96b0e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -751,19 +751,53 @@ unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
 }
 EXPORT_SYMBOL(tcf_block_cb_decref);
 
+static int
+tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+			    void *cb_priv, bool add, bool offload_in_use,
+			    struct netlink_ext_ack *extack)
+{
+	struct tcf_chain *chain;
+	struct tcf_proto *tp;
+	int err;
+
+	list_for_each_entry(chain, &block->chain_list, list) {
+		for (tp = rtnl_dereference(chain->filter_chain); tp;
+		     tp = rtnl_dereference(tp->next)) {
+			if (tp->ops->reoffload) {
+				err = tp->ops->reoffload(tp, add, cb, cb_priv,
+							 extack);
+				if (err && add)
+					goto err_playback_remove;
+			} else if (add && offload_in_use) {
+				err = -EOPNOTSUPP;
+				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
+				goto err_playback_remove;
+			}
+		}
+	}
+
+	return 0;
+
+err_playback_remove:
+	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
+				    extack);
+	return err;
+}
+
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
 					     void *cb_priv,
 					     struct netlink_ext_ack *extack)
 {
 	struct tcf_block_cb *block_cb;
+	int err;
 
-	/* At this point, playback of previous block cb calls is not supported,
-	 * so forbid to register to block which already has some offloaded
-	 * filters present.
-	 */
-	if (tcf_block_offload_in_use(block))
-		return ERR_PTR(-EOPNOTSUPP);
+	/* Replay any already present rules */
+	err = tcf_block_playback_offloads(block, cb, cb_priv, true,
+					  tcf_block_offload_in_use(block),
+					  extack);
+	if (err)
+		return ERR_PTR(err);
 
 	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
 	if (!block_cb)
@@ -788,8 +822,12 @@ int tcf_block_cb_register(struct tcf_block *block,
 }
 EXPORT_SYMBOL(tcf_block_cb_register);
 
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb)
 {
+	tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
+				    false, tcf_block_offload_in_use(block),
+				    NULL);
 	list_del(&block_cb->list);
 	kfree(block_cb);
 }
@@ -803,7 +841,7 @@ void tcf_block_cb_unregister(struct tcf_block *block,
 	block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
 	if (!block_cb)
 		return;
-	__tcf_block_cb_unregister(block_cb);
+	__tcf_block_cb_unregister(block, block_cb);
 }
 EXPORT_SYMBOL(tcf_block_cb_unregister);
 
-- 
cgit v1.2.3


From e2607484370ea1b3595a3c51485e66e0cbd17341 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Fri, 22 Jun 2018 16:40:23 -0500
Subject: scsi: target: remove target_find_device

target_find_device is no longer used, so remove it.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Tested-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_device.c  | 24 ------------------------
 include/target/target_core_backend.h |  2 --
 2 files changed, 26 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index e27db4d45a9d..a9ad6ecb1812 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -879,30 +879,6 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb)
 }
 EXPORT_SYMBOL(target_to_linux_sector);
 
-/**
- * target_find_device - find a se_device by its dev_index
- * @id: dev_index
- * @do_depend: true if caller needs target_depend_item to be done
- *
- * If do_depend is true, the caller must do a target_undepend_item
- * when finished using the device.
- *
- * If do_depend is false, the caller must be called in a configfs
- * callback or during removal.
- */
-struct se_device *target_find_device(int id, bool do_depend)
-{
-	struct se_device *dev;
-
-	mutex_lock(&device_mutex);
-	dev = idr_find(&devices_idr, id);
-	if (dev && do_depend && target_depend_item(&dev->dev_group.cg_item))
-		dev = NULL;
-	mutex_unlock(&device_mutex);
-	return dev;
-}
-EXPORT_SYMBOL(target_find_device);
-
 struct devices_idr_iter {
 	int (*fn)(struct se_device *dev, void *data);
 	void *data;
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 34a15d59ed88..c3ac47255218 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -106,8 +106,6 @@ bool	target_lun_is_rdonly(struct se_cmd *);
 sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
 	sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
-struct	se_device *target_find_device(int id, bool do_depend);
-
 bool target_sense_desc_format(struct se_device *dev);
 sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
-- 
cgit v1.2.3


From c84b023a4c1461498abf0eda54f60e2fd64a1ca2 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sun, 24 Jun 2018 22:03:26 +0800
Subject: scsi: read host_busy via scsi_host_busy()

No functional change.

Just introduce scsi_host_busy() and replace the direct read of
scsi_host->host_busy with this new API.

Cc: Omar Sandoval <osandov@fb.com>,
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
Cc: Christoph Hellwig <hch@lst.de>,
Cc: Don Brace <don.brace@microsemi.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/advansys.c                   |  8 ++++----
 drivers/scsi/hosts.c                      | 10 ++++++++++
 drivers/scsi/libsas/sas_scsi_host.c       |  4 ++--
 drivers/scsi/megaraid/megaraid_sas_base.c |  2 +-
 drivers/scsi/mpt3sas/mpt3sas_base.c       |  4 ++--
 drivers/scsi/qlogicpti.c                  |  2 +-
 drivers/scsi/scsi.c                       |  2 +-
 drivers/scsi/scsi_error.c                 |  6 +++---
 drivers/scsi/scsi_sysfs.c                 |  2 +-
 include/scsi/scsi_host.h                  |  1 +
 10 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index c9a52905070e..713f69033f20 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -2416,8 +2416,8 @@ static void asc_prt_scsi_host(struct Scsi_Host *s)
 	struct asc_board *boardp = shost_priv(s);
 
 	printk("Scsi_Host at addr 0x%p, device %s\n", s, dev_name(boardp->dev));
-	printk(" host_busy %u, host_no %d,\n",
-	       atomic_read(&s->host_busy), s->host_no);
+	printk(" host_busy %d, host_no %d,\n",
+	       scsi_host_busy(s), s->host_no);
 
 	printk(" base 0x%lx, io_port 0x%lx, irq %d,\n",
 	       (ulong)s->base, (ulong)s->io_port, boardp->irq);
@@ -3182,8 +3182,8 @@ static void asc_prt_driver_conf(struct seq_file *m, struct Scsi_Host *shost)
 		shost->host_no);
 
 	seq_printf(m,
-		   " host_busy %u, max_id %u, max_lun %llu, max_channel %u\n",
-		   atomic_read(&shost->host_busy), shost->max_id,
+		   " host_busy %d, max_id %u, max_lun %llu, max_channel %u\n",
+		   scsi_host_busy(shost), shost->max_id,
 		   shost->max_lun, shost->max_channel);
 
 	seq_printf(m,
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 3771e59a9fae..ea4b0bb0c1cd 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -563,6 +563,16 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL(scsi_host_get);
 
+/**
+ * scsi_host_busy - Return the host busy counter
+ * @shost:	Pointer to Scsi_Host to inc.
+ **/
+int scsi_host_busy(struct Scsi_Host *shost)
+{
+	return atomic_read(&shost->host_busy);
+}
+EXPORT_SYMBOL(scsi_host_busy);
+
 /**
  * scsi_host_put - dec a Scsi_Host ref count
  * @shost:	Pointer to Scsi_Host to dec.
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index ceab5e5c41c2..33229348dcb6 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -759,7 +759,7 @@ retry:
 	spin_unlock_irq(shost->host_lock);
 
 	SAS_DPRINTK("Enter %s busy: %d failed: %d\n",
-		    __func__, atomic_read(&shost->host_busy), shost->host_failed);
+		    __func__, scsi_host_busy(shost), shost->host_failed);
 	/*
 	 * Deal with commands that still have SAS tasks (i.e. they didn't
 	 * complete via the normal sas_task completion mechanism),
@@ -801,7 +801,7 @@ out:
 		goto retry;
 
 	SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n",
-		    __func__, atomic_read(&shost->host_busy),
+		    __func__, scsi_host_busy(shost),
 		    shost->host_failed, tries);
 }
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index e6ba02793610..9aa9590c5373 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2834,7 +2834,7 @@ static int megasas_reset_bus_host(struct scsi_cmnd *scmd)
 		"SCSI command pointer: (%p)\t SCSI host state: %d\t"
 		" SCSI host busy: %d\t FW outstanding: %d\n",
 		scmd, scmd->device->host->shost_state,
-		atomic_read((atomic_t *)&scmd->device->host->host_busy),
+		scsi_host_busy(scmd->device->host),
 		atomic_read(&instance->fw_outstanding));
 
 	/*
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 2053970fc9f8..dc41bd3de08a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -3250,7 +3250,7 @@ _base_recovery_check(struct MPT3SAS_ADAPTER *ioc)
 	 * See _wait_for_commands_to_complete() call with regards to this code.
 	 */
 	if (ioc->shost_recovery && ioc->pending_io_count) {
-		ioc->pending_io_count = atomic_read(&ioc->shost->host_busy);
+		ioc->pending_io_count = scsi_host_busy(ioc->shost);
 		if (ioc->pending_io_count == 0)
 			wake_up(&ioc->reset_wq);
 	}
@@ -6857,7 +6857,7 @@ mpt3sas_wait_for_commands_to_complete(struct MPT3SAS_ADAPTER *ioc)
 		return;
 
 	/* pending command count */
-	ioc->pending_io_count = atomic_read(&ioc->shost->host_busy);
+	ioc->pending_io_count = scsi_host_busy(ioc->shost);
 
 	if (!ioc->pending_io_count)
 		return;
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index 8578e566ab41..9d09228eee28 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -959,7 +959,7 @@ static inline void update_can_queue(struct Scsi_Host *host, u_int in_ptr, u_int
 	/* Temporary workaround until bug is found and fixed (one bug has been found
 	   already, but fixing it makes things even worse) -jj */
 	int num_free = QLOGICPTI_REQ_QUEUE_LEN - REQ_QUEUE_DEPTH(in_ptr, out_ptr) - 64;
-	host->can_queue = atomic_read(&host->host_busy) + num_free;
+	host->can_queue = scsi_host_busy(host) + num_free;
 	host->sg_tablesize = QLOGICPTI_MAX_SG(num_free);
 }
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 70ef3c39061d..fc1356d101b0 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -167,7 +167,7 @@ void scsi_log_completion(struct scsi_cmnd *cmd, int disposition)
 			if (level > 3)
 				scmd_printk(KERN_INFO, cmd,
 					    "scsi host busy %d failed %d\n",
-					    atomic_read(&cmd->device->host->host_busy),
+					    scsi_host_busy(cmd->device->host),
 					    cmd->device->host->host_failed);
 		}
 	}
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 8932ae81a15a..6a014fd15fe9 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -66,7 +66,7 @@ void scsi_eh_wakeup(struct Scsi_Host *shost)
 {
 	lockdep_assert_held(shost->host_lock);
 
-	if (atomic_read(&shost->host_busy) == shost->host_failed) {
+	if (scsi_host_busy(shost) == shost->host_failed) {
 		trace_scsi_eh_wakeup(shost);
 		wake_up_process(shost->ehandler);
 		SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost,
@@ -2155,7 +2155,7 @@ int scsi_error_handler(void *data)
 			break;
 
 		if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) ||
-		    shost->host_failed != atomic_read(&shost->host_busy)) {
+		    shost->host_failed != scsi_host_busy(shost)) {
 			SCSI_LOG_ERROR_RECOVERY(1,
 				shost_printk(KERN_INFO, shost,
 					     "scsi_eh_%d: sleeping\n",
@@ -2170,7 +2170,7 @@ int scsi_error_handler(void *data)
 				     "scsi_eh_%d: waking up %d/%d/%d\n",
 				     shost->host_no, shost->host_eh_scheduled,
 				     shost->host_failed,
-				     atomic_read(&shost->host_busy)));
+				     scsi_host_busy(shost)));
 
 		/*
 		 * We have a host that is failing for some reason.  Figure out
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 7943b762c12d..de122354d09a 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -382,7 +382,7 @@ static ssize_t
 show_host_busy(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct Scsi_Host *shost = class_to_shost(dev);
-	return snprintf(buf, 20, "%d\n", atomic_read(&shost->host_busy));
+	return snprintf(buf, 20, "%d\n", scsi_host_busy(shost));
 }
 static DEVICE_ATTR(host_busy, S_IRUGO, show_host_busy, NULL);
 
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 53b485fe9b67..5ea06d310a25 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -758,6 +758,7 @@ extern void scsi_scan_host(struct Scsi_Host *);
 extern void scsi_rescan_device(struct device *);
 extern void scsi_remove_host(struct Scsi_Host *);
 extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
+extern int scsi_host_busy(struct Scsi_Host *shost);
 extern void scsi_host_put(struct Scsi_Host *t);
 extern struct Scsi_Host *scsi_host_lookup(unsigned short);
 extern const char *scsi_host_state_name(enum scsi_host_state);
-- 
cgit v1.2.3


From 5560f70cad996e7d90d1c141bcbca0df214eefc9 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 22 Jun 2018 16:08:58 +0800
Subject: hwspinlock: Add one new API to support getting a specific hwlock by
 the name

The hardware spinlock binding already supplied the 'hwlock-names' property
to match and get a specific hwlock, but did not supply one API for users
to get a specific hwlock by the hwlock name. So this patch introduces one
API to support this requirement.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/hwspinlock/hwspinlock_core.c | 29 +++++++++++++++++++++++++++++
 include/linux/hwspinlock.h           |  7 +++++++
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
index d16e6a3d38e8..bea358604bb2 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -367,6 +367,35 @@ out:
 }
 EXPORT_SYMBOL_GPL(of_hwspin_lock_get_id);
 
+/**
+ * of_hwspin_lock_get_id_byname() - get lock id for an specified hwlock name
+ * @np: device node from which to request the specific hwlock
+ * @name: hwlock name
+ *
+ * This function provides a means for DT users of the hwspinlock module to
+ * get the global lock id of a specific hwspinlock using the specified name of
+ * the hwspinlock device, so that it can be requested using the normal
+ * hwspin_lock_request_specific() API.
+ *
+ * Returns the global lock id number on success, -EPROBE_DEFER if the hwspinlock
+ * device is not yet registered, -EINVAL on invalid args specifier value or an
+ * appropriate error as returned from the OF parsing of the DT client node.
+ */
+int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name)
+{
+	int index;
+
+	if (!name)
+		return -EINVAL;
+
+	index = of_property_match_string(np, "hwlock-names", name);
+	if (index < 0)
+		return index;
+
+	return of_hwspin_lock_get_id(np, index);
+}
+EXPORT_SYMBOL_GPL(of_hwspin_lock_get_id_byname);
+
 static int hwspin_lock_register_single(struct hwspinlock *hwlock, int id)
 {
 	struct hwspinlock *tmp;
diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index 57537e67b468..2b6f389a3133 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -66,6 +66,7 @@ int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int,
 							unsigned long *);
 int __hwspin_trylock(struct hwspinlock *, int, unsigned long *);
 void __hwspin_unlock(struct hwspinlock *, int, unsigned long *);
+int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name);
 
 #else /* !CONFIG_HWSPINLOCK */
 
@@ -125,6 +126,12 @@ static inline int hwspin_lock_get_id(struct hwspinlock *hwlock)
 	return 0;
 }
 
+static inline
+int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_HWSPINLOCK */
 
 /**
-- 
cgit v1.2.3


From 4f1acd758b08d93306d13010c74d38dd8d9f5768 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 22 Jun 2018 16:08:59 +0800
Subject: hwspinlock: Add devm_xxx() APIs to request/free hwlock

This patch introduces some devm_xxx() APIs to help to request or free
the hwlocks, which will help to simplify the cleanup code for drivers
requesting one hwlock, ensuring that the hwlock is automatically freed
whenever the device is unbound.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/hwspinlock/hwspinlock_core.c | 110 +++++++++++++++++++++++++++++++++++
 include/linux/hwspinlock.h           |  22 +++++++
 2 files changed, 132 insertions(+)

(limited to 'include')

diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
index bea358604bb2..d542b6fbf0f2 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -735,6 +735,116 @@ out:
 }
 EXPORT_SYMBOL_GPL(hwspin_lock_free);
 
+static int devm_hwspin_lock_match(struct device *dev, void *res, void *data)
+{
+	struct hwspinlock **hwlock = res;
+
+	if (WARN_ON(!hwlock || !*hwlock))
+		return 0;
+
+	return *hwlock == data;
+}
+
+static void devm_hwspin_lock_release(struct device *dev, void *res)
+{
+	hwspin_lock_free(*(struct hwspinlock **)res);
+}
+
+/**
+ * devm_hwspin_lock_free() - free a specific hwspinlock for a managed device
+ * @dev: the device to free the specific hwspinlock
+ * @hwlock: the specific hwspinlock to free
+ *
+ * This function mark @hwlock as free again.
+ * Should only be called with an @hwlock that was retrieved from
+ * an earlier call to hwspin_lock_request{_specific}.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns 0 on success, or an appropriate error code on failure
+ */
+int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock)
+{
+	int ret;
+
+	ret = devres_release(dev, devm_hwspin_lock_release,
+			     devm_hwspin_lock_match, hwlock);
+	WARN_ON(ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_free);
+
+/**
+ * devm_hwspin_lock_request() - request an hwspinlock for a managed device
+ * @dev: the device to request an hwspinlock
+ *
+ * This function should be called by users of the hwspinlock device,
+ * in order to dynamically assign them an unused hwspinlock.
+ * Usually the user of this lock will then have to communicate the lock's id
+ * to the remote core before it can be used for synchronization (to get the
+ * id of a given hwlock, use hwspin_lock_get_id()).
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns the address of the assigned hwspinlock, or NULL on error
+ */
+struct hwspinlock *devm_hwspin_lock_request(struct device *dev)
+{
+	struct hwspinlock **ptr, *hwlock;
+
+	ptr = devres_alloc(devm_hwspin_lock_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	hwlock = hwspin_lock_request();
+	if (!IS_ERR(hwlock)) {
+		*ptr = hwlock;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return hwlock;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_request);
+
+/**
+ * devm_hwspin_lock_request_specific() - request for a specific hwspinlock for
+ *					 a managed device
+ * @dev: the device to request the specific hwspinlock
+ * @id: index of the specific hwspinlock that is requested
+ *
+ * This function should be called by users of the hwspinlock module,
+ * in order to assign them a specific hwspinlock.
+ * Usually early board code will be calling this function in order to
+ * reserve specific hwspinlock ids for predefined purposes.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns the address of the assigned hwspinlock, or NULL on error
+ */
+struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
+						     unsigned int id)
+{
+	struct hwspinlock **ptr, *hwlock;
+
+	ptr = devres_alloc(devm_hwspin_lock_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	hwlock = hwspin_lock_request_specific(id);
+	if (!IS_ERR(hwlock)) {
+		*ptr = hwlock;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return hwlock;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_request_specific);
+
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Hardware spinlock interface");
 MODULE_AUTHOR("Ohad Ben-Cohen <ohad@wizery.com>");
diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index 2b6f389a3133..dfd05938a3cb 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -67,6 +67,10 @@ int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int,
 int __hwspin_trylock(struct hwspinlock *, int, unsigned long *);
 void __hwspin_unlock(struct hwspinlock *, int, unsigned long *);
 int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name);
+int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock);
+struct hwspinlock *devm_hwspin_lock_request(struct device *dev);
+struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
+						     unsigned int id);
 
 #else /* !CONFIG_HWSPINLOCK */
 
@@ -132,6 +136,24 @@ int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name)
 	return 0;
 }
 
+static inline
+int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock)
+{
+	return 0;
+}
+
+static inline struct hwspinlock *devm_hwspin_lock_request(struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline
+struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
+						     unsigned int id)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 #endif /* !CONFIG_HWSPINLOCK */
 
 /**
-- 
cgit v1.2.3


From b03799b0cb35dbc39e89602b1203863e2a6e06bf Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 25 Jun 2018 16:49:06 -0500
Subject: PCI: shpchp: Separate existence of SHPC and permission to use it

The shpchp driver registers for all PCI bridge devices.  Its probe method
should fail if either (1) the bridge doesn't have an SHPC or (2) the OS
isn't allowed to use it (the platform firmware may be operating the SHPC
itself).

Separate these two tests into:

  - A new shpc_capable() that looks for the SHPC hardware and is applicable
    on all systems (ACPI and non-ACPI), and

  - A simplified acpi_get_hp_hw_control_from_firmware() that we call only
    when we already know an SHPC exists and there may be ACPI methods to
    either request permission to use it (_OSC) or transfer control to the
    OS (OSHP).

acpi_get_hp_hw_control_from_firmware() is implemented when CONFIG_ACPI=y,
but does nothing if the current platform doesn't support ACPI.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/pci/hotplug/acpi_pcihp.c  | 36 +++++++++++++++++++-----------------
 drivers/pci/hotplug/shpchp_core.c | 21 +++++++++++++++++++++
 drivers/pci/pci-acpi.c            | 19 +------------------
 include/linux/pci.h               |  1 +
 4 files changed, 42 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 5bd6c1573295..6b7c1ed58e7e 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -73,20 +73,6 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev)
 	acpi_handle chandle, handle;
 	struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL };
 
-	/*
-	 * Per PCI firmware specification, we should run the ACPI _OSC
-	 * method to get control of hotplug hardware before using it. If
-	 * an _OSC is missing, we look for an OSHP to do the same thing.
-	 * To handle different BIOS behavior, we look for _OSC on a root
-	 * bridge preferentially (according to PCI fw spec). Later for
-	 * OSHP within the scope of the hotplug controller and its parents,
-	 * up to the host bridge under which this controller exists.
-	 */
-	if (shpchp_is_native(pdev))
-		return 0;
-
-	/* If _OSC exists, we should not evaluate OSHP */
-
 	/*
 	 * If there's no ACPI host bridge (i.e., ACPI support is compiled
 	 * into the kernel but the hardware platform doesn't support ACPI),
@@ -97,9 +83,25 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev)
 	if (!root)
 		return 0;
 
-	if (root->osc_support_set)
-		goto no_control;
+	/*
+	 * If _OSC exists, it determines whether we're allowed to manage
+	 * the SHPC.  We executed it while enumerating the host bridge.
+	 */
+	if (root->osc_support_set) {
+		if (host->native_shpc_hotplug)
+			return 0;
+		return -ENODEV;
+	}
 
+	/*
+	 * In the absence of _OSC, we're always allowed to manage the SHPC.
+	 * However, if an OSHP method is present, we must execute it so the
+	 * firmware can transfer control to the OS, e.g., direct interrupts
+	 * to the OS instead of to the firmware.
+	 *
+	 * N.B. The PCI Firmware Spec (r3.2, sec 4.8) does not endorse
+	 * searching up the ACPI hierarchy, so the loops below are suspect.
+	 */
 	handle = ACPI_HANDLE(&pdev->dev);
 	if (!handle) {
 		/*
@@ -128,7 +130,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev)
 		if (ACPI_FAILURE(status))
 			break;
 	}
-no_control:
+
 	pci_info(pdev, "Cannot get control of SHPC hotplug\n");
 	kfree(string.pointer);
 	return -ENODEV;
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index e91be287f292..8e3c6ce12f31 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -270,11 +270,30 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
+static bool shpc_capable(struct pci_dev *bridge)
+{
+	/*
+	 * It is assumed that AMD GOLAM chips support SHPC but they do not
+	 * have SHPC capability.
+	 */
+	if (bridge->vendor == PCI_VENDOR_ID_AMD &&
+	    bridge->device == PCI_DEVICE_ID_AMD_GOLAM_7450)
+		return true;
+
+	if (pci_find_capability(bridge, PCI_CAP_ID_SHPC))
+		return true;
+
+	return false;
+}
+
 static int shpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int rc;
 	struct controller *ctrl;
 
+	if (!shpc_capable(pdev))
+		return -ENODEV;
+
 	if (acpi_get_hp_hw_control_from_firmware(pdev))
 		return -ENODEV;
 
@@ -303,6 +322,7 @@ static int shpc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto err_cleanup_slots;
 
+	pdev->shpc_managed = 1;
 	return 0;
 
 err_cleanup_slots:
@@ -319,6 +339,7 @@ static void shpc_remove(struct pci_dev *dev)
 {
 	struct controller *ctrl = pci_get_drvdata(dev);
 
+	dev->shpc_managed = 0;
 	shpchp_remove_ctrl_files(ctrl);
 	ctrl->hpc_ops->release_ctlr(ctrl);
 	kfree(ctrl);
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 65113b6eed14..5100fd2d5a75 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -403,24 +403,7 @@ bool pciehp_is_native(struct pci_dev *bridge)
  */
 bool shpchp_is_native(struct pci_dev *bridge)
 {
-	const struct pci_host_bridge *host;
-
-	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_SHPC))
-		return false;
-
-	/*
-	 * It is assumed that AMD GOLAM chips support SHPC but they do not
-	 * have SHPC capability.
-	 */
-	if (bridge->vendor == PCI_VENDOR_ID_AMD &&
-	    bridge->device == PCI_DEVICE_ID_AMD_GOLAM_7450)
-		return true;
-
-	if (!pci_find_capability(bridge, PCI_CAP_ID_SHPC))
-		return false;
-
-	host = pci_find_host_bridge(bridge->bus);
-	return host->native_shpc_hotplug;
+	return bridge->shpc_managed;
 }
 
 /**
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..f776a1cce120 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -388,6 +388,7 @@ struct pci_dev {
 	unsigned int	is_virtfn:1;
 	unsigned int	reset_fn:1;
 	unsigned int	is_hotplug_bridge:1;
+	unsigned int	shpc_managed:1;		/* SHPC owned by shpchp */
 	unsigned int	is_thunderbolt:1;	/* Thunderbolt controller */
 	unsigned int	__aer_firmware_first_valid:1;
 	unsigned int	__aer_firmware_first:1;
-- 
cgit v1.2.3


From c102780acdbc22e5f664cfaf760eaccffbd6c385 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 22 Jun 2018 16:09:00 +0800
Subject: hwspinlock: Add devm_xxx() APIs to register/unregister one hwlock
 controller

This patch introduces devm_hwspin_lock_register() and devm_hwspin_lock_unregister()
interfaces to help to register or unregister one hardware spinlock controller, that
will help to simplify the cleanup code for hwspinlock drivers.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/hwspinlock/hwspinlock_core.c | 82 ++++++++++++++++++++++++++++++++++++
 include/linux/hwspinlock.h           |  6 +++
 2 files changed, 88 insertions(+)

(limited to 'include')

diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c
index d542b6fbf0f2..ba27465c9d9b 100644
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -529,6 +529,88 @@ int hwspin_lock_unregister(struct hwspinlock_device *bank)
 }
 EXPORT_SYMBOL_GPL(hwspin_lock_unregister);
 
+static void devm_hwspin_lock_unreg(struct device *dev, void *res)
+{
+	hwspin_lock_unregister(*(struct hwspinlock_device **)res);
+}
+
+static int devm_hwspin_lock_device_match(struct device *dev, void *res,
+					 void *data)
+{
+	struct hwspinlock_device **bank = res;
+
+	if (WARN_ON(!bank || !*bank))
+		return 0;
+
+	return *bank == data;
+}
+
+/**
+ * devm_hwspin_lock_unregister() - unregister an hw spinlock device for
+ *				   a managed device
+ * @dev: the backing device
+ * @bank: the hwspinlock device, which usually provides numerous hw locks
+ *
+ * This function should be called from the underlying platform-specific
+ * implementation, to unregister an existing (and unused) hwspinlock.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns 0 on success, or an appropriate error code on failure
+ */
+int devm_hwspin_lock_unregister(struct device *dev,
+				struct hwspinlock_device *bank)
+{
+	int ret;
+
+	ret = devres_release(dev, devm_hwspin_lock_unreg,
+			     devm_hwspin_lock_device_match, bank);
+	WARN_ON(ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_unregister);
+
+/**
+ * devm_hwspin_lock_register() - register a new hw spinlock device for
+ *				 a managed device
+ * @dev: the backing device
+ * @bank: the hwspinlock device, which usually provides numerous hw locks
+ * @ops: hwspinlock handlers for this device
+ * @base_id: id of the first hardware spinlock in this bank
+ * @num_locks: number of hwspinlocks provided by this device
+ *
+ * This function should be called from the underlying platform-specific
+ * implementation, to register a new hwspinlock device instance.
+ *
+ * Should be called from a process context (might sleep)
+ *
+ * Returns 0 on success, or an appropriate error code on failure
+ */
+int devm_hwspin_lock_register(struct device *dev,
+			      struct hwspinlock_device *bank,
+			      const struct hwspinlock_ops *ops,
+			      int base_id, int num_locks)
+{
+	struct hwspinlock_device **ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_hwspin_lock_unreg, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = hwspin_lock_register(bank, dev, ops, base_id, num_locks);
+	if (!ret) {
+		*ptr = bank;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_hwspin_lock_register);
+
 /**
  * __hwspin_lock_request() - tag an hwspinlock as used and power it up
  *
diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index dfd05938a3cb..94064cce6962 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -71,6 +71,12 @@ int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock);
 struct hwspinlock *devm_hwspin_lock_request(struct device *dev);
 struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev,
 						     unsigned int id);
+int devm_hwspin_lock_unregister(struct device *dev,
+				struct hwspinlock_device *bank);
+int devm_hwspin_lock_register(struct device *dev,
+			      struct hwspinlock_device *bank,
+			      const struct hwspinlock_ops *ops,
+			      int base_id, int num_locks);
 
 #else /* !CONFIG_HWSPINLOCK */
 
-- 
cgit v1.2.3


From 2ceda54c44530b8e46363b0b12201855da5fd631 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Fri, 22 Jun 2018 16:09:01 +0800
Subject: hwspinlock: Remove redundant config

The hardware core can not be built as a module, so remove the redundant
CONFIG_HWSPINLOCK_MODULE config.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/hwspinlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h
index 94064cce6962..0afe693be5f4 100644
--- a/include/linux/hwspinlock.h
+++ b/include/linux/hwspinlock.h
@@ -52,7 +52,7 @@ struct hwspinlock_pdata {
 	int base_id;
 };
 
-#if defined(CONFIG_HWSPINLOCK) || defined(CONFIG_HWSPINLOCK_MODULE)
+#ifdef CONFIG_HWSPINLOCK
 
 int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
 		const struct hwspinlock_ops *ops, int base_id, int num_locks);
-- 
cgit v1.2.3


From 618fcff3742b4c62fea24bea1f01a2f002ed4b37 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 26 Jun 2018 07:11:55 -0500
Subject: remoteproc: Rename subdev functions to start/stop

"start" and "stop" are more suitable names for how these two operations
are used, and they fit better with the upcoming introduction of two
additional operations in the struct.

Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
[elder@linaro.org: minor comment edits]
Signed-off-by Alex Elder <elder@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 30 +++++++++++++++---------------
 include/linux/remoteproc.h           | 14 +++++++-------
 2 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index a9609d971f7f..5dd58e6bea88 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -774,13 +774,13 @@ static int rproc_handle_resources(struct rproc *rproc,
 	return ret;
 }
 
-static int rproc_probe_subdevices(struct rproc *rproc)
+static int rproc_start_subdevices(struct rproc *rproc)
 {
 	struct rproc_subdev *subdev;
 	int ret;
 
 	list_for_each_entry(subdev, &rproc->subdevs, node) {
-		ret = subdev->probe(subdev);
+		ret = subdev->start(subdev);
 		if (ret)
 			goto unroll_registration;
 	}
@@ -789,17 +789,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
 
 unroll_registration:
 	list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
-		subdev->remove(subdev, true);
+		subdev->stop(subdev, true);
 
 	return ret;
 }
 
-static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
+static void rproc_stop_subdevices(struct rproc *rproc, bool crashed)
 {
 	struct rproc_subdev *subdev;
 
 	list_for_each_entry_reverse(subdev, &rproc->subdevs, node)
-		subdev->remove(subdev, crashed);
+		subdev->stop(subdev, crashed);
 }
 
 /**
@@ -901,8 +901,8 @@ static int rproc_start(struct rproc *rproc, const struct firmware *fw)
 		return ret;
 	}
 
-	/* probe any subdevices for the remote processor */
-	ret = rproc_probe_subdevices(rproc);
+	/* Start any subdevices for the remote processor */
+	ret = rproc_start_subdevices(rproc);
 	if (ret) {
 		dev_err(dev, "failed to probe subdevices for %s: %d\n",
 			rproc->name, ret);
@@ -1014,8 +1014,8 @@ static int rproc_stop(struct rproc *rproc, bool crashed)
 	struct device *dev = &rproc->dev;
 	int ret;
 
-	/* remove any subdevices for the remote processor */
-	rproc_remove_subdevices(rproc, crashed);
+	/* Stop any subdevices for the remote processor */
+	rproc_stop_subdevices(rproc, crashed);
 
 	/* the installed resource table is no longer accessible */
 	rproc->table_ptr = rproc->cached_table;
@@ -1657,16 +1657,16 @@ EXPORT_SYMBOL(rproc_del);
  * rproc_add_subdev() - add a subdevice to a remoteproc
  * @rproc: rproc handle to add the subdevice to
  * @subdev: subdev handle to register
- * @probe: function to call when the rproc boots
- * @remove: function to call when the rproc shuts down
+ * @start: function to call after the rproc is started
+ * @stop: function to call before the rproc is stopped
  */
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
-		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev, bool crashed))
+		      int (*start)(struct rproc_subdev *subdev),
+		      void (*stop)(struct rproc_subdev *subdev, bool crashed))
 {
-	subdev->probe = probe;
-	subdev->remove = remove;
+	subdev->start = start;
+	subdev->stop = stop;
 
 	list_add_tail(&subdev->node, &rproc->subdevs);
 }
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index dfdaede9139e..bf55bf2a5ee1 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -477,15 +477,15 @@ struct rproc {
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
- * @probe: probe function, called as the rproc is started
- * @remove: remove function, called as the rproc is being stopped, the @crashed
- *	    parameter indicates if this originates from the a recovery
+ * @start: start function, called after the rproc has been started
+ * @stop: stop function, called before the rproc is stopped; the @crashed
+ *	    parameter indicates if this originates from a recovery
  */
 struct rproc_subdev {
 	struct list_head node;
 
-	int (*probe)(struct rproc_subdev *subdev);
-	void (*remove)(struct rproc_subdev *subdev, bool crashed);
+	int (*start)(struct rproc_subdev *subdev);
+	void (*stop)(struct rproc_subdev *subdev, bool crashed);
 };
 
 /* we currently support only two vrings per rvdev */
@@ -568,8 +568,8 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
-		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev, bool crashed));
+		      int (*start)(struct rproc_subdev *subdev),
+		      void (*stop)(struct rproc_subdev *subdev, bool crashed));
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
-- 
cgit v1.2.3


From 4902676f04acc20fe12e49f5f4916f2659c2a7bc Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 26 Jun 2018 07:11:57 -0500
Subject: remoteproc: Make client initialize ops in rproc_subdev

In preparation of adding the additional prepare and unprepare operations
make the client responsible for filling out the function pointers of the
rproc_subdev. This makes the arguments to rproc_add_subdev() more
manageable, in particular when some of the functions are left out.

Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
[elder@linaro.org: added comment about assigning function pointers]
Signed-off-by Alex Elder <elder@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/qcom_common.c     | 18 ++++++++++--------
 drivers/remoteproc/qcom_sysmon.c     |  5 ++++-
 drivers/remoteproc/remoteproc_core.c | 18 +++++++-----------
 include/linux/remoteproc.h           |  5 +----
 4 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
index acfc99f82fb8..4ae87c5b8793 100644
--- a/drivers/remoteproc/qcom_common.c
+++ b/drivers/remoteproc/qcom_common.c
@@ -64,7 +64,10 @@ void qcom_add_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink)
 		return;
 
 	glink->dev = dev;
-	rproc_add_subdev(rproc, &glink->subdev, glink_subdev_probe, glink_subdev_remove);
+	glink->subdev.start = glink_subdev_probe;
+	glink->subdev.stop = glink_subdev_remove;
+
+	rproc_add_subdev(rproc, &glink->subdev);
 }
 EXPORT_SYMBOL_GPL(qcom_add_glink_subdev);
 
@@ -157,7 +160,10 @@ void qcom_add_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd)
 		return;
 
 	smd->dev = dev;
-	rproc_add_subdev(rproc, &smd->subdev, smd_subdev_probe, smd_subdev_remove);
+	smd->subdev.start = smd_subdev_probe;
+	smd->subdev.stop = smd_subdev_remove;
+
+	rproc_add_subdev(rproc, &smd->subdev);
 }
 EXPORT_SYMBOL_GPL(qcom_add_smd_subdev);
 
@@ -202,11 +208,6 @@ void qcom_unregister_ssr_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(qcom_unregister_ssr_notifier);
 
-static int ssr_notify_start(struct rproc_subdev *subdev)
-{
-	return  0;
-}
-
 static void ssr_notify_stop(struct rproc_subdev *subdev, bool crashed)
 {
 	struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
@@ -227,8 +228,9 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
 			 const char *ssr_name)
 {
 	ssr->name = ssr_name;
+	ssr->subdev.stop = ssr_notify_stop;
 
-	rproc_add_subdev(rproc, &ssr->subdev, ssr_notify_start, ssr_notify_stop);
+	rproc_add_subdev(rproc, &ssr->subdev);
 }
 EXPORT_SYMBOL_GPL(qcom_add_ssr_subdev);
 
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
index f085545d7da5..e976a602b015 100644
--- a/drivers/remoteproc/qcom_sysmon.c
+++ b/drivers/remoteproc/qcom_sysmon.c
@@ -469,7 +469,10 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
 
 	qmi_add_lookup(&sysmon->qmi, 43, 0, 0);
 
-	rproc_add_subdev(rproc, &sysmon->subdev, sysmon_start, sysmon_stop);
+	sysmon->subdev.start = sysmon_start;
+	sysmon->subdev.stop = sysmon_stop;
+
+	rproc_add_subdev(rproc, &sysmon->subdev);
 
 	sysmon->nb.notifier_call = sysmon_notify;
 	blocking_notifier_chain_register(&sysmon_notifiers, &sysmon->nb);
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 981ae6dff145..ca39fad175f2 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -399,8 +399,10 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 
 	list_add_tail(&rvdev->node, &rproc->rvdevs);
 
-	rproc_add_subdev(rproc, &rvdev->subdev,
-			 rproc_vdev_do_probe, rproc_vdev_do_remove);
+	rvdev->subdev.start = rproc_vdev_do_probe;
+	rvdev->subdev.stop = rproc_vdev_do_remove;
+
+	rproc_add_subdev(rproc, &rvdev->subdev);
 
 	return 0;
 
@@ -1663,17 +1665,11 @@ EXPORT_SYMBOL(rproc_del);
  * rproc_add_subdev() - add a subdevice to a remoteproc
  * @rproc: rproc handle to add the subdevice to
  * @subdev: subdev handle to register
- * @start: function to call after the rproc is started
- * @stop: function to call before the rproc is stopped
+ *
+ * Caller is responsible for populating optional subdevice function pointers.
  */
-void rproc_add_subdev(struct rproc *rproc,
-		      struct rproc_subdev *subdev,
-		      int (*start)(struct rproc_subdev *subdev),
-		      void (*stop)(struct rproc_subdev *subdev, bool crashed))
+void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev)
 {
-	subdev->start = start;
-	subdev->stop = stop;
-
 	list_add_tail(&subdev->node, &rproc->subdevs);
 }
 EXPORT_SYMBOL(rproc_add_subdev);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index bf55bf2a5ee1..8f1426330cca 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -566,10 +566,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 	return rvdev->rproc;
 }
 
-void rproc_add_subdev(struct rproc *rproc,
-		      struct rproc_subdev *subdev,
-		      int (*start)(struct rproc_subdev *subdev),
-		      void (*stop)(struct rproc_subdev *subdev, bool crashed));
+void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
-- 
cgit v1.2.3


From c455daa4af3c3ee6e841fc9ed42024d6b8d50d66 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 26 Jun 2018 07:11:59 -0500
Subject: remoteproc: Introduce prepare and unprepare for subdevices

On rare occasions a subdevice might need to prepare some hardware
resources before a remote processor is booted, and clean up some
state after it has been shut down.

One such example is the IP Accelerator found in various Qualcomm
platforms, which is accessed directly from both the modem remoteproc
and the application subsystem and requires an intricate lockstep
process when bringing the modem up and down.

Tested-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
[elder@linaro.org: minor description and comment edits]
Signed-off-by Alex Elder <elder@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 56 ++++++++++++++++++++++++++++++++++--
 include/linux/remoteproc.h           |  4 +++
 2 files changed, 57 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 2ede7ae6f5bc..283b258f5e0f 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -776,6 +776,30 @@ static int rproc_handle_resources(struct rproc *rproc,
 	return ret;
 }
 
+static int rproc_prepare_subdevices(struct rproc *rproc)
+{
+	struct rproc_subdev *subdev;
+	int ret;
+
+	list_for_each_entry(subdev, &rproc->subdevs, node) {
+		if (subdev->prepare) {
+			ret = subdev->prepare(subdev);
+			if (ret)
+				goto unroll_preparation;
+		}
+	}
+
+	return 0;
+
+unroll_preparation:
+	list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node) {
+		if (subdev->unprepare)
+			subdev->unprepare(subdev);
+	}
+
+	return ret;
+}
+
 static int rproc_start_subdevices(struct rproc *rproc)
 {
 	struct rproc_subdev *subdev;
@@ -810,6 +834,16 @@ static void rproc_stop_subdevices(struct rproc *rproc, bool crashed)
 	}
 }
 
+static void rproc_unprepare_subdevices(struct rproc *rproc)
+{
+	struct rproc_subdev *subdev;
+
+	list_for_each_entry_reverse(subdev, &rproc->subdevs, node) {
+		if (subdev->unprepare)
+			subdev->unprepare(subdev);
+	}
+}
+
 /**
  * rproc_coredump_cleanup() - clean up dump_segments list
  * @rproc: the remote processor handle
@@ -902,11 +936,18 @@ static int rproc_start(struct rproc *rproc, const struct firmware *fw)
 		rproc->table_ptr = loaded_table;
 	}
 
+	ret = rproc_prepare_subdevices(rproc);
+	if (ret) {
+		dev_err(dev, "failed to prepare subdevices for %s: %d\n",
+			rproc->name, ret);
+		return ret;
+	}
+
 	/* power up the remote processor */
 	ret = rproc->ops->start(rproc);
 	if (ret) {
 		dev_err(dev, "can't start rproc %s: %d\n", rproc->name, ret);
-		return ret;
+		goto unprepare_subdevices;
 	}
 
 	/* Start any subdevices for the remote processor */
@@ -914,8 +955,7 @@ static int rproc_start(struct rproc *rproc, const struct firmware *fw)
 	if (ret) {
 		dev_err(dev, "failed to probe subdevices for %s: %d\n",
 			rproc->name, ret);
-		rproc->ops->stop(rproc);
-		return ret;
+		goto stop_rproc;
 	}
 
 	rproc->state = RPROC_RUNNING;
@@ -923,6 +963,14 @@ static int rproc_start(struct rproc *rproc, const struct firmware *fw)
 	dev_info(dev, "remote processor %s is now up\n", rproc->name);
 
 	return 0;
+
+stop_rproc:
+	rproc->ops->stop(rproc);
+
+unprepare_subdevices:
+	rproc_unprepare_subdevices(rproc);
+
+	return ret;
 }
 
 /*
@@ -1035,6 +1083,8 @@ static int rproc_stop(struct rproc *rproc, bool crashed)
 		return ret;
 	}
 
+	rproc_unprepare_subdevices(rproc);
+
 	rproc->state = RPROC_OFFLINE;
 
 	dev_info(dev, "stopped remote processor %s\n", rproc->name);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 8f1426330cca..e3c5d856b6da 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -477,15 +477,19 @@ struct rproc {
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
+ * @prepare: prepare function, called before the rproc is started
  * @start: start function, called after the rproc has been started
  * @stop: stop function, called before the rproc is stopped; the @crashed
  *	    parameter indicates if this originates from a recovery
+ * @unprepare: unprepare function, called after the rproc has been stopped
  */
 struct rproc_subdev {
 	struct list_head node;
 
+	int (*prepare)(struct rproc_subdev *subdev);
 	int (*start)(struct rproc_subdev *subdev);
 	void (*stop)(struct rproc_subdev *subdev, bool crashed);
+	void (*unprepare)(struct rproc_subdev *subdev);
 };
 
 /* we currently support only two vrings per rvdev */
-- 
cgit v1.2.3


From 0efc8562491b7d36f6bbc4fbc8f3348cb6641e9c Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 31 May 2018 11:16:18 +0300
Subject: net/mlx5: E-Switch, Avoid setup attempt if not being e-switch manager

In smartnic env, the host (PF) driver might not be an e-switch
manager, hence the FW will err on driver attempts to deal with
setting/unsetting the eswitch and as a result the overall setup
of sriov will fail.

Fix that by avoiding the operation if e-switch management is not
allowed for this driver instance. While here, move to use the
correct name for the esw manager capability name.

Fixes: 81848731ff40 ('net/mlx5: E-Switch, Add SR-IOV (FDB) support')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Guy Kushnir <guyk@mellanox.com>
Reviewed-by: Eli Cohen <eli@melloanox.com>
Tested-by: Eli Cohen <eli@melloanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c      | 5 +++--
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c   | 7 ++++++-
 include/linux/mlx5/eswitch.h                      | 2 ++
 include/linux/mlx5/mlx5_ifc.h                     | 2 +-
 7 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 378ad74518ec..60236f73373c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -839,7 +839,7 @@ static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep;
 
-	if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table))
+	if (!MLX5_ESWITCH_MANAGER(priv->mdev))
 		return false;
 
 	rep = rpriv->rep;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index f63dfbcd29fe..103fd6a0cc65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1604,7 +1604,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	if (!ESW_ALLOWED(esw))
 		return 0;
 
-	if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
+	if (!MLX5_ESWITCH_MANAGER(esw->dev) ||
 	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
 		esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 49a75d31185e..f1a86cea86a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -32,6 +32,7 @@
 
 #include <linux/mutex.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
 
 #include "mlx5_core.h"
 #include "fs_core.h"
@@ -2652,7 +2653,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
-	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	if (MLX5_ESWITCH_MANAGER(dev)) {
 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
 			err = init_fdb_root_ns(steering);
 			if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index afd9f4fa22f4..41ad24f0de2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -32,6 +32,7 @@
 
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
+#include <linux/mlx5/eswitch.h>
 #include <linux/module.h>
 #include "mlx5_core.h"
 #include "../../mlxfw/mlxfw.h"
@@ -159,13 +160,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 	}
 
 	if (MLX5_CAP_GEN(dev, vport_group_manager) &&
-	    MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	    MLX5_ESWITCH_MANAGER(dev)) {
 		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
-	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	if (MLX5_ESWITCH_MANAGER(dev)) {
 		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
 		if (err)
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 2a8b529ce6dd..a0674962f02c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -88,6 +88,9 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 		return -EBUSY;
 	}
 
+	if (!MLX5_ESWITCH_MANAGER(dev))
+		goto enable_vfs_hca;
+
 	err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
 	if (err) {
 		mlx5_core_warn(dev,
@@ -95,6 +98,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 		return err;
 	}
 
+enable_vfs_hca:
 	for (vf = 0; vf < num_vfs; vf++) {
 		err = mlx5_core_enable_hca(dev, vf + 1);
 		if (err) {
@@ -140,7 +144,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 	}
 
 out:
-	mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+	if (MLX5_ESWITCH_MANAGER(dev))
+		mlx5_eswitch_disable_sriov(dev->priv.eswitch);
 
 	if (mlx5_wait_for_vf_pages(dev))
 		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index d3c9db492b30..fab5121ffb8f 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -8,6 +8,8 @@
 
 #include <linux/mlx5/driver.h>
 
+#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
+
 enum {
 	SRIOV_NONE,
 	SRIOV_LEGACY,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 27134c4fcb76..ac281f5ec9b8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -922,7 +922,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         vnic_env_queue_counters[0x1];
 	u8         ets[0x1];
 	u8         nic_flow_table[0x1];
-	u8         eswitch_flow_table[0x1];
+	u8         eswitch_manager[0x1];
 	u8         device_memory[0x1];
 	u8         mcam_reg[0x1];
 	u8         pcam_reg[0x1];
-- 
cgit v1.2.3


From 951a8ee6def39e25d0e60b9394e5a249ba8b2390 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 20:36:28 -0700
Subject: nfp: reject binding to shared blocks

TC shared blocks allow multiple qdiscs to be grouped together and filters
shared between them. Currently the chains of filters attached to a block
are only flushed when the block is removed. If a qdisc is removed from a
block but the block still exists, flow del messages are not passed to the
callback registered for that qdisc. For the NFP, this presents the
possibility of rules still existing in hw when they should be removed.

Prevent binding to shared blocks until the kernel can send per qdisc del
messages when block unbinds occur.

tcf_block_shared() was not used outside of the core until now, so also
add an empty implementation for builds with CONFIG_NET_CLS=n.

Fixes: 4861738775d7 ("net: sched: introduce shared filter blocks infrastructure")
Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c       | 3 +++
 drivers/net/ethernet/netronome/nfp/flower/offload.c | 3 +++
 include/net/pkt_cls.h                               | 5 +++++
 3 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index fcdfb8e7fdea..6b15e3b11956 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -202,6 +202,9 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev,
 	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
+	if (tcf_block_shared(f->block))
+		return -EOPNOTSUPP;
+
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 477f584f6d28..525057bee0ed 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -631,6 +631,9 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
+	if (tcf_block_shared(f->block))
+		return -EOPNOTSUPP;
+
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3c1a2c47cd4..20b059574e60 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -111,6 +111,11 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 {
 }
 
+static inline bool tcf_block_shared(struct tcf_block *block)
+{
+	return false;
+}
+
 static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 {
 	return NULL;
-- 
cgit v1.2.3


From 77bc8c28ddac90287bc42c129002eb703288d550 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 18 Jun 2018 17:32:30 +0200
Subject: ARM: mvebu: convert secondary CPU clock sync to hotplug state

The current call site in boot_secondary is causing sleep in invalid context
warnings, as this part of the code is running with interrrupts disabled and
some of the calls into the clock framework might sleep on a mutex.

Convert the secondary CPU clock sync to a hotplug state, which allows to
call it from a sleepable context.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
 arch/arm/mach-mvebu/platsmp.c | 49 ++++++++++++++++++++-----------------------
 include/linux/cpuhotplug.h    |  1 +
 2 files changed, 24 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-mvebu/platsmp.c b/arch/arm/mach-mvebu/platsmp.c
index 4ffbbd217e82..c130497dc6cc 100644
--- a/arch/arm/mach-mvebu/platsmp.c
+++ b/arch/arm/mach-mvebu/platsmp.c
@@ -35,6 +35,8 @@
 #define AXP_BOOTROM_BASE 0xfff00000
 #define AXP_BOOTROM_SIZE 0x100000
 
+static struct clk *boot_cpu_clk;
+
 static struct clk *get_cpu_clk(int cpu)
 {
 	struct clk *cpu_clk;
@@ -48,30 +50,6 @@ static struct clk *get_cpu_clk(int cpu)
 	return cpu_clk;
 }
 
-static void set_secondary_cpu_clock(unsigned int cpu)
-{
-	int thiscpu;
-	unsigned long rate;
-	struct clk *cpu_clk;
-
-	thiscpu = get_cpu();
-
-	cpu_clk = get_cpu_clk(thiscpu);
-	if (!cpu_clk)
-		goto out;
-	clk_prepare_enable(cpu_clk);
-	rate = clk_get_rate(cpu_clk);
-
-	cpu_clk = get_cpu_clk(cpu);
-	if (!cpu_clk)
-		goto out;
-	clk_set_rate(cpu_clk, rate);
-	clk_prepare_enable(cpu_clk);
-
-out:
-	put_cpu();
-}
-
 static int armada_xp_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	int ret, hw_cpu;
@@ -79,7 +57,6 @@ static int armada_xp_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	pr_info("Booting CPU %d\n", cpu);
 
 	hw_cpu = cpu_logical_map(cpu);
-	set_secondary_cpu_clock(hw_cpu);
 	mvebu_pmsu_set_cpu_boot_addr(hw_cpu, armada_xp_secondary_startup);
 
 	/*
@@ -122,6 +99,19 @@ static void __init armada_xp_smp_init_cpus(void)
 		panic("Invalid number of CPUs in DT\n");
 }
 
+static int armada_xp_sync_secondary_clk(unsigned int cpu)
+{
+	struct clk *cpu_clk = get_cpu_clk(cpu);
+
+	if (!cpu_clk || !boot_cpu_clk)
+		return 0;
+
+	clk_prepare_enable(cpu_clk);
+	clk_set_rate(cpu_clk, clk_get_rate(boot_cpu_clk));
+
+	return 0;
+}
+
 static void __init armada_xp_smp_prepare_cpus(unsigned int max_cpus)
 {
 	struct device_node *node;
@@ -131,6 +121,14 @@ static void __init armada_xp_smp_prepare_cpus(unsigned int max_cpus)
 	flush_cache_all();
 	set_cpu_coherent();
 
+	boot_cpu_clk = get_cpu_clk(smp_processor_id());
+	if (boot_cpu_clk) {
+		clk_prepare_enable(boot_cpu_clk);
+		cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
+					  "arm/mvebu/sync_clocks:online",
+					  armada_xp_sync_secondary_clk, NULL);
+	}
+
 	/*
 	 * In order to boot the secondary CPUs we need to ensure
 	 * the bootROM is mapped at the correct address.
@@ -223,7 +221,6 @@ static int mv98dx3236_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	int ret, hw_cpu;
 
 	hw_cpu = cpu_logical_map(cpu);
-	set_secondary_cpu_clock(hw_cpu);
 	mv98dx3236_resume_set_cpu_boot_addr(hw_cpu,
 					    armada_xp_secondary_startup);
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..fa54e5fbea38 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -143,6 +143,7 @@ enum cpuhp_state {
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
 	CPUHP_AP_IRQ_AFFINITY_ONLINE,
+	CPUHP_AP_ARM_MVEBU_SYNC_CLOCKS,
 	CPUHP_AP_PERF_ONLINE,
 	CPUHP_AP_PERF_X86_ONLINE,
 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,
-- 
cgit v1.2.3


From 9b6e543450dc03635899ba56b1c39cc593694560 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 Jun 2018 17:54:47 +0300
Subject: fsnotify: use typedef fsnotify_connp_t for brevity

The object marks manipulation functions fsnotify_destroy_marks()
fsnotify_find_mark() and their helpers take an argument of type
struct fsnotify_mark_connector __rcu ** to dereference the connector
pointer. use a typedef to describe this type for brevity.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.h             |  4 ++--
 fs/notify/mark.c                 | 20 +++++++++-----------
 include/linux/fsnotify_backend.h |  7 ++++---
 3 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 34515d2c4ba3..94cedf8264ba 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -19,8 +19,8 @@ extern struct srcu_struct fsnotify_mark_srcu;
 extern int fsnotify_compare_groups(struct fsnotify_group *a,
 				   struct fsnotify_group *b);
 
-/* Destroy all marks connected via given connector */
-extern void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp);
+/* Destroy all marks attached to an object via connector */
+extern void fsnotify_destroy_marks(fsnotify_connp_t *connp);
 /* run the list of all marks associated with inode and destroy them */
 static inline void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 61f4c5fa34c7..7b595acd8ec9 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -436,10 +436,9 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 	return -1;
 }
 
-static int fsnotify_attach_connector_to_object(
-				struct fsnotify_mark_connector __rcu **connp,
-				struct inode *inode,
-				struct vfsmount *mnt)
+static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
+					       struct inode *inode,
+					       struct vfsmount *mnt)
 {
 	struct fsnotify_mark_connector *conn;
 
@@ -476,7 +475,7 @@ static int fsnotify_attach_connector_to_object(
  * they are sure list cannot go away under them.
  */
 static struct fsnotify_mark_connector *fsnotify_grab_connector(
-				struct fsnotify_mark_connector __rcu **connp)
+						fsnotify_connp_t *connp)
 {
 	struct fsnotify_mark_connector *conn;
 	int idx;
@@ -508,7 +507,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 {
 	struct fsnotify_mark *lmark, *last = NULL;
 	struct fsnotify_mark_connector *conn;
-	struct fsnotify_mark_connector __rcu **connp;
+	fsnotify_connp_t *connp;
 	int cmp;
 	int err = 0;
 
@@ -629,9 +628,8 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
  * Given a list of marks, find the mark associated with given group. If found
  * take a reference to that mark and return it, else return NULL.
  */
-struct fsnotify_mark *fsnotify_find_mark(
-				struct fsnotify_mark_connector __rcu **connp,
-				struct fsnotify_group *group)
+struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
+					 struct fsnotify_group *group)
 {
 	struct fsnotify_mark_connector *conn;
 	struct fsnotify_mark *mark;
@@ -697,8 +695,8 @@ clear:
 	}
 }
 
-/* Destroy all marks attached to inode / vfsmount */
-void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp)
+/* Destroy all marks attached to an object via connector */
+void fsnotify_destroy_marks(fsnotify_connp_t *connp)
 {
 	struct fsnotify_mark_connector *conn;
 	struct fsnotify_mark *mark, *old_mark = NULL;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b38964a7a521..e7cd7fc6e3cf 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -268,6 +268,8 @@ struct fsnotify_mark_connector {
 	struct hlist_head list;
 };
 
+typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
+
 /*
  * A mark is simply an object attached to an in core inode which allows an
  * fsnotify listener to indicate they are either no longer interested in events
@@ -394,9 +396,8 @@ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn);
 extern void fsnotify_init_mark(struct fsnotify_mark *mark,
 			       struct fsnotify_group *group);
 /* Find mark belonging to given group in the list of marks */
-extern struct fsnotify_mark *fsnotify_find_mark(
-				struct fsnotify_mark_connector __rcu **connp,
-				struct fsnotify_group *group);
+extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
+						struct fsnotify_group *group);
 /* attach the mark to the inode or vfsmount */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
 			     struct vfsmount *mnt, int allow_dups);
-- 
cgit v1.2.3


From b812a9f5896379b6cff2ac168ddb5b89037d8e78 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 Jun 2018 17:54:48 +0300
Subject: fsnotify: pass connp and object type to fsnotify_add_mark()

Instead of passing inode and vfsmount arguments to fsnotify_add_mark()
and its _locked variant, pass an abstract object pointer and the object
type.

The helpers fsnotify_obj_{inode,mount} are added to get the concrete
object pointer from abstract object pointer.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 19 +++++++++--------
 fs/notify/fsnotify.h               | 10 +++++++++
 fs/notify/mark.c                   | 42 +++++++++++++++-----------------------
 include/linux/fsnotify_backend.h   | 20 ++++++++++++------
 4 files changed, 52 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ec4d8c59d0e3..81212b251189 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -615,8 +615,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 }
 
 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
-						   struct inode *inode,
-						   struct vfsmount *mnt)
+						   fsnotify_connp_t *connp,
+						   unsigned int type)
 {
 	struct fsnotify_mark *mark;
 	int ret;
@@ -629,7 +629,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 		return ERR_PTR(-ENOMEM);
 
 	fsnotify_init_mark(mark, group);
-	ret = fsnotify_add_mark_locked(mark, inode, mnt, 0);
+	ret = fsnotify_add_mark_locked(mark, connp, type, 0);
 	if (ret) {
 		fsnotify_put_mark(mark);
 		return ERR_PTR(ret);
@@ -643,14 +643,15 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 				      struct vfsmount *mnt, __u32 mask,
 				      unsigned int flags)
 {
+	fsnotify_connp_t *connp = &real_mount(mnt)->mnt_fsnotify_marks;
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
 
 	mutex_lock(&group->mark_mutex);
-	fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks,
-				      group);
+	fsn_mark = fsnotify_find_mark(connp, group);
 	if (!fsn_mark) {
-		fsn_mark = fanotify_add_new_mark(group, NULL, mnt);
+		fsn_mark = fanotify_add_new_mark(group, connp,
+						 FSNOTIFY_OBJ_TYPE_VFSMOUNT);
 		if (IS_ERR(fsn_mark)) {
 			mutex_unlock(&group->mark_mutex);
 			return PTR_ERR(fsn_mark);
@@ -669,6 +670,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 				   struct inode *inode, __u32 mask,
 				   unsigned int flags)
 {
+	fsnotify_connp_t *connp = &inode->i_fsnotify_marks;
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
 
@@ -685,9 +687,10 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 		return 0;
 
 	mutex_lock(&group->mark_mutex);
-	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
+	fsn_mark = fsnotify_find_mark(connp, group);
 	if (!fsn_mark) {
-		fsn_mark = fanotify_add_new_mark(group, inode, NULL);
+		fsn_mark = fanotify_add_new_mark(group, connp,
+						 FSNOTIFY_OBJ_TYPE_INODE);
 		if (IS_ERR(fsn_mark)) {
 			mutex_unlock(&group->mark_mutex);
 			return PTR_ERR(fsn_mark);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 94cedf8264ba..caeee042d1cc 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -9,6 +9,16 @@
 
 #include "../mount.h"
 
+static inline struct inode *fsnotify_obj_inode(fsnotify_connp_t *connp)
+{
+	return container_of(connp, struct inode, i_fsnotify_marks);
+}
+
+static inline struct mount *fsnotify_obj_mount(fsnotify_connp_t *connp)
+{
+	return container_of(connp, struct mount, mnt_fsnotify_marks);
+}
+
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 7b595acd8ec9..7abb73b5beba 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -437,9 +437,9 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 }
 
 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
-					       struct inode *inode,
-					       struct vfsmount *mnt)
+					       unsigned int type)
 {
+	struct inode *inode = NULL;
 	struct fsnotify_mark_connector *conn;
 
 	conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
@@ -447,13 +447,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 		return -ENOMEM;
 	spin_lock_init(&conn->lock);
 	INIT_HLIST_HEAD(&conn->list);
-	if (inode) {
-		conn->type = FSNOTIFY_OBJ_TYPE_INODE;
-		conn->inode = igrab(inode);
-	} else {
-		conn->type = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
-		conn->mnt = mnt;
-	}
+	conn->type = type;
+	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
+		inode = conn->inode = igrab(fsnotify_obj_inode(connp));
+	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+		conn->mnt = &fsnotify_obj_mount(connp)->mnt;
 	/*
 	 * cmpxchg() provides the barrier so that readers of *connp can see
 	 * only initialized structure
@@ -502,27 +500,22 @@ out:
  * priority, highest number first, and then by the group's location in memory.
  */
 static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
-				  struct inode *inode, struct vfsmount *mnt,
+				  fsnotify_connp_t *connp, unsigned int type,
 				  int allow_dups)
 {
 	struct fsnotify_mark *lmark, *last = NULL;
 	struct fsnotify_mark_connector *conn;
-	fsnotify_connp_t *connp;
 	int cmp;
 	int err = 0;
 
-	if (WARN_ON(!inode && !mnt))
+	if (WARN_ON(!fsnotify_valid_obj_type(type)))
 		return -EINVAL;
-	if (inode)
-		connp = &inode->i_fsnotify_marks;
-	else
-		connp = &real_mount(mnt)->mnt_fsnotify_marks;
 restart:
 	spin_lock(&mark->lock);
 	conn = fsnotify_grab_connector(connp);
 	if (!conn) {
 		spin_unlock(&mark->lock);
-		err = fsnotify_attach_connector_to_object(connp, inode, mnt);
+		err = fsnotify_attach_connector_to_object(connp, type);
 		if (err)
 			return err;
 		goto restart;
@@ -568,14 +561,13 @@ out_err:
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group.
  */
-int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode,
-			     struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
+			     fsnotify_connp_t *connp, unsigned int type,
+			     int allow_dups)
 {
 	struct fsnotify_group *group = mark->group;
 	int ret = 0;
 
-	BUG_ON(inode && mnt);
-	BUG_ON(!inode && !mnt);
 	BUG_ON(!mutex_is_locked(&group->mark_mutex));
 
 	/*
@@ -592,7 +584,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode,
 	fsnotify_get_mark(mark); /* for g_list */
 	spin_unlock(&mark->lock);
 
-	ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups);
+	ret = fsnotify_add_mark_list(mark, connp, type, allow_dups);
 	if (ret)
 		goto err;
 
@@ -612,14 +604,14 @@ err:
 	return ret;
 }
 
-int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
-		      struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
+		      unsigned int type, int allow_dups)
 {
 	int ret;
 	struct fsnotify_group *group = mark->group;
 
 	mutex_lock(&group->mark_mutex);
-	ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups);
+	ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups);
 	mutex_unlock(&group->mark_mutex);
 	return ret;
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index e7cd7fc6e3cf..8efb8663453d 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -210,6 +210,11 @@ enum fsnotify_obj_type {
 #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL	(1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
 #define FSNOTIFY_OBJ_ALL_TYPES_MASK	((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
 
+static inline bool fsnotify_valid_obj_type(unsigned int type)
+{
+	return (type < FSNOTIFY_OBJ_TYPE_COUNT);
+}
+
 struct fsnotify_iter_info {
 	struct fsnotify_mark *marks[FSNOTIFY_OBJ_TYPE_COUNT];
 	unsigned int report_mask;
@@ -398,24 +403,27 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark,
 /* Find mark belonging to given group in the list of marks */
 extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
 						struct fsnotify_group *group);
-/* attach the mark to the inode or vfsmount */
-extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
-			     struct vfsmount *mnt, int allow_dups);
+/* attach the mark to the object */
+extern int fsnotify_add_mark(struct fsnotify_mark *mark,
+			     fsnotify_connp_t *connp, unsigned int type,
+			     int allow_dups);
 extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
-				    struct inode *inode, struct vfsmount *mnt,
+				    fsnotify_connp_t *connp, unsigned int type,
 				    int allow_dups);
 /* attach the mark to the inode */
 static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 					  struct inode *inode,
 					  int allow_dups)
 {
-	return fsnotify_add_mark(mark, inode, NULL, allow_dups);
+	return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
+				 FSNOTIFY_OBJ_TYPE_INODE, allow_dups);
 }
 static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
 						 struct inode *inode,
 						 int allow_dups)
 {
-	return fsnotify_add_mark_locked(mark, inode, NULL, allow_dups);
+	return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
+					FSNOTIFY_OBJ_TYPE_INODE, allow_dups);
 }
 /* given a group and a mark, flag mark to be freed when all references are dropped */
 extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
-- 
cgit v1.2.3


From 36f10f55ff1d2867bfc48ed898a9cc0dc6b49dd2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 Jun 2018 17:54:49 +0300
Subject: fsnotify: let connector point to an abstract object

Make the code to attach/detach a connector to object more generic
by letting the fsnotify connector point to an abstract fsnotify_connp_t.
Code that needs to dereference an inode or mount object now uses the
helpers fsnotify_conn_{inode,mount}.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fdinfo.c               |  8 ++++----
 fs/notify/fsnotify.h             | 10 ++++++----
 fs/notify/mark.c                 | 32 ++++++++++++++++----------------
 include/linux/fsnotify_backend.h | 15 ++++++++++-----
 kernel/audit_tree.c              | 17 +++++++++--------
 5 files changed, 45 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 10aac1942c9f..86fcf5814279 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -15,7 +15,7 @@
 #include <linux/exportfs.h>
 
 #include "inotify/inotify.h"
-#include "../fs/mount.h"
+#include "fsnotify.h"
 
 #if defined(CONFIG_PROC_FS)
 
@@ -81,7 +81,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		return;
 
 	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
-	inode = igrab(mark->connector->inode);
+	inode = igrab(fsnotify_conn_inode(mark->connector));
 	if (inode) {
 		/*
 		 * IN_ALL_EVENTS represents all of the mask bits
@@ -117,7 +117,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
 
 	if (mark->connector->type == FSNOTIFY_OBJ_TYPE_INODE) {
-		inode = igrab(mark->connector->inode);
+		inode = igrab(fsnotify_conn_inode(mark->connector));
 		if (!inode)
 			return;
 		seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
@@ -127,7 +127,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		seq_putc(m, '\n');
 		iput(inode);
 	} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
-		struct mount *mnt = real_mount(mark->connector->mnt);
+		struct mount *mnt = fsnotify_conn_mount(mark->connector);
 
 		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
 			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index caeee042d1cc..7902653dd577 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -9,14 +9,16 @@
 
 #include "../mount.h"
 
-static inline struct inode *fsnotify_obj_inode(fsnotify_connp_t *connp)
+static inline struct inode *fsnotify_conn_inode(
+				struct fsnotify_mark_connector *conn)
 {
-	return container_of(connp, struct inode, i_fsnotify_marks);
+	return container_of(conn->obj, struct inode, i_fsnotify_marks);
 }
 
-static inline struct mount *fsnotify_obj_mount(fsnotify_connp_t *connp)
+static inline struct mount *fsnotify_conn_mount(
+				struct fsnotify_mark_connector *conn)
 {
-	return container_of(connp, struct mount, mnt_fsnotify_marks);
+	return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
 }
 
 /* destroy all events sitting in this groups notification queue */
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 7abb73b5beba..959bc73aaae7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -120,14 +120,14 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 			new_mask |= mark->mask;
 	}
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
-		conn->inode->i_fsnotify_mask = new_mask;
+		fsnotify_conn_inode(conn)->i_fsnotify_mask = new_mask;
 	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
-		real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask;
+		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = new_mask;
 }
 
 /*
  * Calculate mask of events for a list of marks. The caller must make sure
- * connector and connector->inode cannot disappear under us.  Callers achieve
+ * connector and connector->obj cannot disappear under us.  Callers achieve
  * this by holding a mark->lock or mark->group->mark_mutex for a mark on this
  * list.
  */
@@ -140,7 +140,8 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 	__fsnotify_recalc_mask(conn);
 	spin_unlock(&conn->lock);
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
-		__fsnotify_update_child_dentry_flags(conn->inode);
+		__fsnotify_update_child_dentry_flags(
+					fsnotify_conn_inode(conn));
 }
 
 /* Free all connectors queued for freeing once SRCU period ends */
@@ -166,20 +167,20 @@ static struct inode *fsnotify_detach_connector_from_object(
 {
 	struct inode *inode = NULL;
 
+	if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
+		return NULL;
+
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
-		inode = conn->inode;
-		rcu_assign_pointer(inode->i_fsnotify_marks, NULL);
+		inode = fsnotify_conn_inode(conn);
 		inode->i_fsnotify_mask = 0;
-		conn->inode = NULL;
-		conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
-		rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks,
-				   NULL);
-		real_mount(conn->mnt)->mnt_fsnotify_mask = 0;
-		conn->mnt = NULL;
-		conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
+		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
 	}
 
+	rcu_assign_pointer(*(conn->obj), NULL);
+	conn->obj = NULL;
+	conn->type = FSNOTIFY_OBJ_TYPE_DETACHED;
+
 	return inode;
 }
 
@@ -448,10 +449,9 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 	spin_lock_init(&conn->lock);
 	INIT_HLIST_HEAD(&conn->list);
 	conn->type = type;
+	conn->obj = connp;
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
-		inode = conn->inode = igrab(fsnotify_obj_inode(connp));
-	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
-		conn->mnt = &fsnotify_obj_mount(connp)->mnt;
+		inode = igrab(fsnotify_conn_inode(conn));
 	/*
 	 * cmpxchg() provides the barrier so that readers of *connp can see
 	 * only initialized structure
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 8efb8663453d..381cfb0e67fa 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -255,6 +255,13 @@ FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
 #define fsnotify_foreach_obj_type(type) \
 	for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++)
 
+/*
+ * fsnotify_connp_t is what we embed in objects which connector can be attached
+ * to. fsnotify_connp_t * is how we refer from connector back to object.
+ */
+struct fsnotify_mark_connector;
+typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
+
 /*
  * Inode / vfsmount point to this structure which tracks all marks attached to
  * the inode / vfsmount. The reference to inode / vfsmount is held by this
@@ -264,17 +271,15 @@ FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
 struct fsnotify_mark_connector {
 	spinlock_t lock;
 	unsigned int type;	/* Type of object [lock] */
-	union {	/* Object pointer [lock] */
-		struct inode *inode;
-		struct vfsmount *mnt;
+	union {
+		/* Object pointer [lock] */
+		fsnotify_connp_t *obj;
 		/* Used listing heads to free after srcu period expires */
 		struct fsnotify_mark_connector *destroy_next;
 	};
 	struct hlist_head list;
 };
 
-typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
-
 /*
  * A mark is simply an object attached to an in core inode which allows an
  * fsnotify listener to indicate they are either no longer interested in events
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index c99ebaae5abc..02feef939560 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -168,7 +168,8 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
 /* Function to return search key in our hash from inode. */
 static unsigned long inode_to_key(const struct inode *inode)
 {
-	return (unsigned long)inode;
+	/* Use address pointed to by connector->obj as the key */
+	return (unsigned long)&inode->i_fsnotify_marks;
 }
 
 /*
@@ -183,7 +184,7 @@ static unsigned long chunk_to_key(struct audit_chunk *chunk)
 	 */
 	if (WARN_ON_ONCE(!chunk->mark.connector))
 		return 0;
-	return (unsigned long)chunk->mark.connector->inode;
+	return (unsigned long)chunk->mark.connector->obj;
 }
 
 static inline struct list_head *chunk_hash(unsigned long key)
@@ -258,7 +259,7 @@ static void untag_chunk(struct node *p)
 	spin_lock(&entry->lock);
 	/*
 	 * mark_mutex protects mark from getting detached and thus also from
-	 * mark->connector->inode getting NULL.
+	 * mark->connector->obj getting NULL.
 	 */
 	if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 		spin_unlock(&entry->lock);
@@ -288,8 +289,8 @@ static void untag_chunk(struct node *p)
 	if (!new)
 		goto Fallback;
 
-	if (fsnotify_add_inode_mark_locked(&new->mark, entry->connector->inode,
-					   1)) {
+	if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
+				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
 		fsnotify_put_mark(&new->mark);
 		goto Fallback;
 	}
@@ -423,7 +424,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	spin_lock(&old_entry->lock);
 	/*
 	 * mark_mutex protects mark from getting detached and thus also from
-	 * mark->connector->inode getting NULL.
+	 * mark->connector->obj getting NULL.
 	 */
 	if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 		/* old_entry is being shot, lets just lie */
@@ -434,8 +435,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 		return -ENOENT;
 	}
 
-	if (fsnotify_add_inode_mark_locked(chunk_entry,
-			     old_entry->connector->inode, 1)) {
+	if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
+				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
 		spin_unlock(&old_entry->lock);
 		mutex_unlock(&old_entry->group->mark_mutex);
 		fsnotify_put_mark(chunk_entry);
-- 
cgit v1.2.3


From 3ac70bfcde812b1b97d8a88a832df59941fa293f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 23 Jun 2018 17:54:50 +0300
Subject: fsnotify: add helper to get mask from connector

Use a helper to get the mask from the object (i.e. i_fsnotify_mask)
to generalize code of add/remove inode/vfsmount mark.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 16 ++++++++--------
 fs/notify/mark.c                   | 25 +++++++++++++++++++++----
 include/linux/fsnotify_backend.h   |  2 ++
 3 files changed, 31 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 81212b251189..3899ad177651 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -542,8 +542,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
-	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
-		fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks);
+	if (removed & fsnotify_conn_mask(fsn_mark->connector))
+		fsnotify_recalc_mask(fsn_mark->connector);
 	if (destroy_mark)
 		fsnotify_detach_mark(fsn_mark);
 	mutex_unlock(&group->mark_mutex);
@@ -571,8 +571,8 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
-	if (removed & inode->i_fsnotify_mask)
-		fsnotify_recalc_mask(inode->i_fsnotify_marks);
+	if (removed & fsnotify_conn_mask(fsn_mark->connector))
+		fsnotify_recalc_mask(fsn_mark->connector);
 	if (destroy_mark)
 		fsnotify_detach_mark(fsn_mark);
 	mutex_unlock(&group->mark_mutex);
@@ -658,8 +658,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 		}
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
-		fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks);
+	if (added & ~fsnotify_conn_mask(fsn_mark->connector))
+		fsnotify_recalc_mask(fsn_mark->connector);
 	mutex_unlock(&group->mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
@@ -697,8 +697,8 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 		}
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	if (added & ~inode->i_fsnotify_mask)
-		fsnotify_recalc_mask(inode->i_fsnotify_marks);
+	if (added & ~fsnotify_conn_mask(fsn_mark->connector))
+		fsnotify_recalc_mask(fsn_mark->connector);
 	mutex_unlock(&group->mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 959bc73aaae7..05506d60131c 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -109,6 +109,23 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
 	refcount_inc(&mark->refcnt);
 }
 
+static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
+{
+	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
+		return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
+	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+		return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
+	return NULL;
+}
+
+__u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn)
+{
+	if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
+		return 0;
+
+	return *fsnotify_conn_mask_p(conn);
+}
+
 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
@@ -119,10 +136,10 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 		if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
 			new_mask |= mark->mask;
 	}
-	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
-		fsnotify_conn_inode(conn)->i_fsnotify_mask = new_mask;
-	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
-		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = new_mask;
+	if (WARN_ON(!fsnotify_valid_obj_type(conn->type)))
+		return;
+
+	*fsnotify_conn_mask_p(conn) = new_mask;
 }
 
 /*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 381cfb0e67fa..2b9b6f1ff5e0 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -401,6 +401,8 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group
 
 /* functions used to manipulate the marks attached to inodes */
 
+/* Get mask of events for a list of marks */
+extern __u32 fsnotify_conn_mask(struct fsnotify_mark_connector *conn);
 /* Calculate mask of events for a list of marks */
 extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn);
 extern void fsnotify_init_mark(struct fsnotify_mark *mark,
-- 
cgit v1.2.3


From 22eceb8bf3e8f1f9b2f566062d06b25807725d7f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 19 Jun 2018 13:57:26 +0200
Subject: printk: Make CONSOLE_LOGLEVEL_QUIET configurable

The goal of passing the "quiet" option to the kernel is for the kernel
to be quiet unless something really is wrong.

Sofar passing quiet has been (mostly) equivalent to passing
loglevel=4 on the kernel commandline. Which means to show any messages
with a level of KERN_ERR or higher severity on the console.

In practice this often does not result in a quiet boot though, since
there are many false-positive or otherwise harmless error messages printed,
defeating the purpose of the quiet option. Esp. the ACPICA code is really
bad wrt this, but there are plenty of others too.

This commit makes CONSOLE_LOGLEVEL_QUIET configurable.

This for example will allow distros which want quiet to really mean quiet
to set CONSOLE_LOGLEVEL_QUIET so that only messages with a higher severity
then KERN_ERR (CRIT, ALERT, EMERG) get printed, avoiding an endless game
of whack-a-mole silencing harmless error messages.

Link: http://lkml.kernel.org/r/20180619115726.3098-1-hdegoede@redhat.com
To: Petr Mladek <pmladek@suse.com>
To: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h |  6 +++---
 lib/Kconfig.debug      | 11 +++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 6d7e800affd8..18602bb3eca8 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -50,15 +50,15 @@ static inline const char *printk_skip_headers(const char *buffer)
 /* We show everything that is MORE important than this.. */
 #define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
 #define CONSOLE_LOGLEVEL_MIN	 1 /* Minimum loglevel we let people use */
-#define CONSOLE_LOGLEVEL_QUIET	 4 /* Shhh ..., when booted with "quiet" */
 #define CONSOLE_LOGLEVEL_DEBUG	10 /* issue debug messages */
 #define CONSOLE_LOGLEVEL_MOTORMOUTH 15	/* You can't shut this one up */
 
 /*
- * Default used to be hard-coded at 7, we're now allowing it to be set from
- * kernel config.
+ * Default used to be hard-coded at 7, quiet used to be hardcoded at 4,
+ * we're now allowing both to be set from kernel config.
  */
 #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
+#define CONSOLE_LOGLEVEL_QUIET	 CONFIG_CONSOLE_LOGLEVEL_QUIET
 
 extern int console_printk[];
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 76555479ae36..a7ef03009e9e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -30,6 +30,17 @@ config CONSOLE_LOGLEVEL_DEFAULT
 	  usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT
 	  option.
 
+config CONSOLE_LOGLEVEL_QUIET
+	int "quiet console loglevel (1-15)"
+	range 1 15
+	default "4"
+	help
+	  loglevel to use when "quiet" is passed on the kernel commandline.
+
+	  When "quiet" is passed on the kernel commandline this loglevel
+	  will be used as the loglevel. IOW passing "quiet" will be the
+	  equivalent of passing "loglevel=<CONSOLE_LOGLEVEL_QUIET>"
+
 config MESSAGE_LOGLEVEL_DEFAULT
 	int "Default message log level (1-7)"
 	range 1 7
-- 
cgit v1.2.3


From e7d4a95da86e0b048702765bbdcdc968aaf312e7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 20 Jun 2018 08:58:28 +0200
Subject: bitfield: fix *_encode_bits()

There's a bug in *_encode_bits() in using ~field_multiplier() for
the check whether or not the constant value fits into the field,
this is wrong and clearly ~field_mask() was intended. This was
triggering for me for both constant and non-constant values.

Additionally, make this case actually into an compile error.
Declaring the extern function that will never exist with just a
warning is pointless as then later we'll just get a link error.

While at it, also fix the indentation in those lines I'm touching.

Finally, as suggested by Andy Shevchenko, add some tests and for
that introduce also u8 helpers. The tests don't compile without
the fix, showing that it's necessary.

Fixes: 00b0c9b82663 ("Add primitives for manipulating bitfields both in host- and fixed-endian.")
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/bitfield.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index cf2588d81148..147a7bb341dd 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -104,7 +104,7 @@
 		(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask));	\
 	})
 
-extern void __compiletime_warning("value doesn't fit into mask")
+extern void __compiletime_error("value doesn't fit into mask")
 __field_overflow(void);
 extern void __compiletime_error("bad bitfield mask")
 __bad_mask(void);
@@ -121,8 +121,8 @@ static __always_inline u64 field_mask(u64 field)
 #define ____MAKE_OP(type,base,to,from)					\
 static __always_inline __##type type##_encode_bits(base v, base field)	\
 {									\
-        if (__builtin_constant_p(v) &&	(v & ~field_multiplier(field)))	\
-			    __field_overflow();				\
+	if (__builtin_constant_p(v) && (v & ~field_mask(field)))	\
+		__field_overflow();					\
 	return to((v & field_mask(field)) * field_multiplier(field));	\
 }									\
 static __always_inline __##type type##_replace_bits(__##type old,	\
-- 
cgit v1.2.3


From 37a3862e1238262e9866d5d66e5f5f9069cee3a1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 20 Jun 2018 08:58:29 +0200
Subject: bitfield: add u8 helpers

There's no reason why we shouldn't pack/unpack bits into/from
u8 values/registers/etc., so add u8 helpers.

Use the ____MAKE_OP() macro directly to avoid having nonsense
le8_encode_bits() and similar functions.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/bitfield.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index 147a7bb341dd..65a6981eef7b 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -143,6 +143,7 @@ static __always_inline base type##_get_bits(__##type v, base field)	\
 	____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu)	\
 	____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu)	\
 	____MAKE_OP(u##size,u##size,,)
+____MAKE_OP(u8,u8,,)
 __MAKE_OP(16)
 __MAKE_OP(32)
 __MAKE_OP(64)
-- 
cgit v1.2.3


From d18e85349f6a65baa69b8ee397efb93e4ca31909 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Mon, 25 Jun 2018 14:02:43 +0200
Subject: clk: sunxi-ng: r40: Export video PLLs

Video PLLs need to be referenced in R40 DT as possible HDMI PHY parent.

Export them.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 drivers/clk/sunxi-ng/ccu-sun8i-r40.h      | 8 ++++++--
 include/dt-bindings/clock/sun8i-r40-ccu.h | 4 ++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-r40.h b/drivers/clk/sunxi-ng/ccu-sun8i-r40.h
index 0db8e1e97af8..db2a1243f9ff 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-r40.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-r40.h
@@ -25,7 +25,9 @@
 #define CLK_PLL_AUDIO_2X	4
 #define CLK_PLL_AUDIO_4X	5
 #define CLK_PLL_AUDIO_8X	6
-#define CLK_PLL_VIDEO0		7
+
+/* PLL_VIDEO0 is exported */
+
 #define CLK_PLL_VIDEO0_2X	8
 #define CLK_PLL_VE		9
 #define CLK_PLL_DDR0		10
@@ -34,7 +36,9 @@
 #define CLK_PLL_PERIPH0_2X	13
 #define CLK_PLL_PERIPH1		14
 #define CLK_PLL_PERIPH1_2X	15
-#define CLK_PLL_VIDEO1		16
+
+/* PLL_VIDEO1 is exported */
+
 #define CLK_PLL_VIDEO1_2X	17
 #define CLK_PLL_SATA		18
 #define CLK_PLL_SATA_OUT	19
diff --git a/include/dt-bindings/clock/sun8i-r40-ccu.h b/include/dt-bindings/clock/sun8i-r40-ccu.h
index 4fa5f69fc297..f9e15a235626 100644
--- a/include/dt-bindings/clock/sun8i-r40-ccu.h
+++ b/include/dt-bindings/clock/sun8i-r40-ccu.h
@@ -43,6 +43,10 @@
 #ifndef _DT_BINDINGS_CLK_SUN8I_R40_H_
 #define _DT_BINDINGS_CLK_SUN8I_R40_H_
 
+#define CLK_PLL_VIDEO0		7
+
+#define CLK_PLL_VIDEO1		16
+
 #define CLK_CPU			24
 
 #define CLK_BUS_MIPI_DSI	29
-- 
cgit v1.2.3


From 4d97f7d53da7dc830dbf416a3d2a6778d267ae68 Mon Sep 17 00:00:00 2001
From: Henry Wilson <henry.wilson@acentic.com>
Date: Thu, 31 May 2018 09:43:03 +0000
Subject: inotify: Add flag IN_MASK_CREATE for inotify_add_watch()

The flag IN_MASK_CREATE is introduced as a flag for inotiy_add_watch()
which prevents inotify from modifying any existing watches when invoked.
If the pathname specified in the call has a watched inode associated
with it and IN_MASK_CREATE is specified, fail with an errno of EEXIST.

Use of IN_MASK_CREATE with IN_MASK_ADD is reserved for future use and
will return EINVAL.

RATIONALE

In the current implementation, there is no way to prevent
inotify_add_watch() from modifying existing watch descriptors. Even if
the caller keeps a record of all watch descriptors collected, this is
only sufficient to detect that an existing watch descriptor may have
been modified.

The assumption that a particular path will map to the same inode over
multiple calls to inotify_add_watch() cannot be made as files can be
renamed or deleted.  It is also not possible to assume that two distinct
paths do no map to the same inode, due to hard-links or a dereferenced
symbolic link. Further uses of inotify_add_watch() to revert the change
may cause other watch descriptors to be modified or created, merely
compunding the problem. There is currently no system call such as
inotify_modify_watch() to explicity modify a watch descriptor, which
would be able to revert unwanted changes. Thus the caller cannot
guarantee to be able to revert any changes to existing watch decriptors.

Additionally the caller cannot assume that the events that are
associated with a watch descriptor are within the set requested, as any
future calls to inotify_add_watch() may unintentionally modify a watch
descriptor's mask. Thus it cannot currently be guaranteed that a watch
descriptor will only generate events which have been requested. The
program must filter events which come through its watch descriptor to
within its expected range.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Henry Wilson <henry.wilson@acentic.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/inotify/inotify_user.c | 9 ++++++++-
 include/linux/inotify.h          | 2 +-
 include/uapi/linux/inotify.h     | 1 +
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf5b779d862..6f48d325c350 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -510,6 +510,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	__u32 old_mask, new_mask;
 	__u32 mask;
 	int add = (arg & IN_MASK_ADD);
+	int create = (arg & IN_MASK_CREATE);
 	int ret;
 
 	mask = inotify_arg_to_mask(arg);
@@ -517,6 +518,8 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
 	if (!fsn_mark)
 		return -ENOENT;
+	else if (create)
+		return -EEXIST;
 
 	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
@@ -718,6 +721,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 	if (unlikely(!f.file))
 		return -EBADF;
 
+	/* IN_MASK_ADD and IN_MASK_CREATE don't make sense together */
+	if (unlikely((mask & IN_MASK_ADD) && (mask & IN_MASK_CREATE)))
+		return -EINVAL;
+
 	/* verify that this is indeed an inotify instance */
 	if (unlikely(f.file->f_op != &inotify_fops)) {
 		ret = -EINVAL;
@@ -806,7 +813,7 @@ static int __init inotify_user_setup(void)
 	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
 	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 
-	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
+	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 22);
 
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 44f9ffe72c87..6a24905f6e1e 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -18,6 +18,6 @@ extern struct ctl_table inotify_table[]; /* for sysctl */
 			  IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT | \
 			  IN_Q_OVERFLOW | IN_IGNORED | IN_ONLYDIR | \
 			  IN_DONT_FOLLOW | IN_EXCL_UNLINK | IN_MASK_ADD | \
-			  IN_ISDIR | IN_ONESHOT)
+			  IN_MASK_CREATE | IN_ISDIR | IN_ONESHOT)
 
 #endif	/* _LINUX_INOTIFY_H */
diff --git a/include/uapi/linux/inotify.h b/include/uapi/linux/inotify.h
index 4800bf2a531d..884b4846b630 100644
--- a/include/uapi/linux/inotify.h
+++ b/include/uapi/linux/inotify.h
@@ -53,6 +53,7 @@ struct inotify_event {
 #define IN_ONLYDIR		0x01000000	/* only watch the path if it is a directory */
 #define IN_DONT_FOLLOW		0x02000000	/* don't follow a sym link */
 #define IN_EXCL_UNLINK		0x04000000	/* exclude events on unlinked objects */
+#define IN_MASK_CREATE		0x10000000	/* only create watches */
 #define IN_MASK_ADD		0x20000000	/* add to the mask of an already existing watch */
 #define IN_ISDIR		0x40000000	/* event occurred against dir */
 #define IN_ONESHOT		0x80000000	/* only send event once */
-- 
cgit v1.2.3


From feb20faec73ba0b30f949d54c4153cf0ad3807c8 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 27 Jun 2018 09:03:51 +0200
Subject: ALSA: hda - Move in_pm accessors to HDA core

The in_pm atomic in hdac_device is an important field used as a flag
as well as a refcount for PM.  The existing snd_hdac_power_up/down
helpers already refer to it in the HD-audio core code, while the code
to actually setting the value (atomic_inc() / _dec()) is open-coded in
HDA legacy side, which is hard to find.

This patch adds the helper functions to set/reset the in_pm counter to
HDA core and use them in HDA legacy side, for making it clearer who /
where the PM is managed.

There is no functional changes, just code refactoring.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h    | 27 +++++++++++++++++++++++++++
 sound/pci/hda/hda_codec.c  | 21 ++++++---------------
 sound/pci/hda/patch_hdmi.c |  2 +-
 3 files changed, 34 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index c052afc27547..294a5a21937b 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -8,6 +8,7 @@
 
 #include <linux/device.h>
 #include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
 #include <linux/timecounter.h>
 #include <sound/core.h>
 #include <sound/memalloc.h>
@@ -171,12 +172,38 @@ int snd_hdac_power_down(struct hdac_device *codec);
 int snd_hdac_power_up_pm(struct hdac_device *codec);
 int snd_hdac_power_down_pm(struct hdac_device *codec);
 int snd_hdac_keep_power_up(struct hdac_device *codec);
+
+/* call this at entering into suspend/resume callbacks in codec driver */
+static inline void snd_hdac_enter_pm(struct hdac_device *codec)
+{
+	atomic_inc(&codec->in_pm);
+}
+
+/* call this at leaving from suspend/resume callbacks in codec driver */
+static inline void snd_hdac_leave_pm(struct hdac_device *codec)
+{
+	atomic_dec(&codec->in_pm);
+}
+
+static inline bool snd_hdac_is_in_pm(struct hdac_device *codec)
+{
+	return atomic_read(&codec->in_pm);
+}
+
+static inline bool snd_hdac_is_power_on(struct hdac_device *codec)
+{
+	return !pm_runtime_suspended(&codec->dev);
+}
 #else
 static inline int snd_hdac_power_up(struct hdac_device *codec) { return 0; }
 static inline int snd_hdac_power_down(struct hdac_device *codec) { return 0; }
 static inline int snd_hdac_power_up_pm(struct hdac_device *codec) { return 0; }
 static inline int snd_hdac_power_down_pm(struct hdac_device *codec) { return 0; }
 static inline int snd_hdac_keep_power_up(struct hdac_device *codec) { return 0; }
+static inline void snd_hdac_enter_pm(struct hdac_device *codec) {}
+static inline void snd_hdac_leave_pm(struct hdac_device *codec) {}
+static inline bool snd_hdac_is_in_pm(struct hdac_device *codec) { return 0; }
+static inline bool snd_hdac_is_power_on(struct hdac_device *codec) { return 1; }
 #endif
 
 /*
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index f42af88bd2de..decd46b51087 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -37,15 +37,8 @@
 #include "hda_jack.h"
 #include <sound/hda_hwdep.h>
 
-#ifdef CONFIG_PM
-#define codec_in_pm(codec)	atomic_read(&(codec)->core.in_pm)
-#define hda_codec_is_power_on(codec) \
-	(!pm_runtime_suspended(hda_codec_dev(codec)))
-#else
-#define codec_in_pm(codec)	0
-#define hda_codec_is_power_on(codec)	1
-#endif
-
+#define codec_in_pm(codec)		snd_hdac_is_in_pm(&codec->core)
+#define hda_codec_is_power_on(codec)	snd_hdac_is_power_on(&codec->core)
 #define codec_has_epss(codec) \
 	((codec)->core.power_caps & AC_PWRST_EPSS)
 #define codec_has_clkstop(codec) \
@@ -2846,14 +2839,13 @@ static unsigned int hda_call_codec_suspend(struct hda_codec *codec)
 {
 	unsigned int state;
 
-	atomic_inc(&codec->core.in_pm);
-
+	snd_hdac_enter_pm(&codec->core);
 	if (codec->patch_ops.suspend)
 		codec->patch_ops.suspend(codec);
 	hda_cleanup_all_streams(codec);
 	state = hda_set_power_state(codec, AC_PWRST_D3);
 	update_power_acct(codec, true);
-	atomic_dec(&codec->core.in_pm);
+	snd_hdac_leave_pm(&codec->core);
 	return state;
 }
 
@@ -2862,8 +2854,7 @@ static unsigned int hda_call_codec_suspend(struct hda_codec *codec)
  */
 static void hda_call_codec_resume(struct hda_codec *codec)
 {
-	atomic_inc(&codec->core.in_pm);
-
+	snd_hdac_enter_pm(&codec->core);
 	if (codec->core.regmap)
 		regcache_mark_dirty(codec->core.regmap);
 
@@ -2886,7 +2877,7 @@ static void hda_call_codec_resume(struct hda_codec *codec)
 		hda_jackpoll_work(&codec->jackpoll_work.work);
 	else
 		snd_hda_jack_report_sync(codec);
-	atomic_dec(&codec->core.in_pm);
+	snd_hdac_leave_pm(&codec->core);
 }
 
 static int hda_codec_runtime_suspend(struct device *dev)
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 8840daf9c6a3..ed2318f79e3c 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2489,7 +2489,7 @@ static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe)
 	if (snd_power_get_state(codec->card) != SNDRV_CTL_POWER_D0)
 		return;
 	/* ditto during suspend/resume process itself */
-	if (atomic_read(&(codec)->core.in_pm))
+	if (snd_hdac_is_in_pm(&codec->core))
 		return;
 
 	snd_hdac_i915_set_bclk(&codec->bus->core);
-- 
cgit v1.2.3


From 3787a39852b0d6a9e67336f8fb5815c13ab78bb6 Mon Sep 17 00:00:00 2001
From: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Date: Fri, 1 Jun 2018 22:53:49 -0500
Subject: ALSA: hdac: Remove usage of struct hdac_ext_device and use
 hdac_device instead

This patch removes the hdac_ext_device structure. The legacy and
enhanced HDaudio capabilities can be handled in a backward-compatible
way without separate definitions.

Follow-up patches in this series handle the bus and driver definitions.

Signed-off-by: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio_ext.h  |  36 +---
 sound/hda/ext/hdac_ext_bus.c |  25 ++-
 sound/soc/codecs/hdac_hdmi.c | 396 +++++++++++++++++++++----------------------
 3 files changed, 204 insertions(+), 253 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index 9c14e21dda85..c1a5ad0e6e39 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -181,48 +181,20 @@ struct hda_dai_map {
 	u32	maxbps;
 };
 
-#define HDA_MAX_NIDS 16
-
-/**
- * struct hdac_ext_device - HDAC Ext device
- *
- * @hdac: hdac core device
- * @nid_list - the dai map which matches the dai-name with the nid
- * @map_cur_idx - the idx in use in dai_map
- * @ops - the hda codec ops common to all codec drivers
- * @pvt_data - private data, for asoc contains asoc codec object
- */
-struct hdac_ext_device {
-	struct hdac_device hdev;
-	struct hdac_ext_bus *ebus;
-
-	/* soc-dai to nid map */
-	struct hda_dai_map nid_list[HDA_MAX_NIDS];
-	unsigned int map_cur_idx;
-
-	/* codec ops */
-	struct hdac_ext_codec_ops ops;
-
-	struct snd_card *card;
-	void *scodec;
-	void *private_data;
-};
-
 struct hdac_ext_dma_params {
 	u32 format;
 	u8 stream_tag;
 };
-#define to_ehdac_device(dev) (container_of((dev), \
-				 struct hdac_ext_device, hdev))
+
 /*
  * HD-audio codec base driver
  */
 struct hdac_ext_driver {
 	struct hdac_driver hdac;
 
-	int	(*probe)(struct hdac_ext_device *dev);
-	int	(*remove)(struct hdac_ext_device *dev);
-	void	(*shutdown)(struct hdac_ext_device *dev);
+	int	(*probe)(struct hdac_device *dev);
+	int	(*remove)(struct hdac_device *dev);
+	void	(*shutdown)(struct hdac_device *dev);
 };
 
 int snd_hda_ext_driver_register(struct hdac_ext_driver *drv);
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 0daf31383084..0e4823fdd411 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -137,17 +137,16 @@ static void default_release(struct device *dev)
  */
 int snd_hdac_ext_bus_device_init(struct hdac_ext_bus *ebus, int addr)
 {
-	struct hdac_ext_device *edev;
 	struct hdac_device *hdev = NULL;
 	struct hdac_bus *bus = ebus_to_hbus(ebus);
 	char name[15];
 	int ret;
 
-	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-	if (!edev)
+	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
 		return -ENOMEM;
-	hdev = &edev->hdev;
-	edev->ebus = ebus;
+
+	hdev->bus = bus;
 
 	snprintf(name, sizeof(name), "ehdaudio%dD%d", ebus->idx, addr);
 
@@ -176,10 +175,8 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_device_init);
  */
 void snd_hdac_ext_bus_device_exit(struct hdac_device *hdev)
 {
-	struct hdac_ext_device *edev = to_ehdac_device(hdev);
-
 	snd_hdac_device_exit(hdev);
-	kfree(edev);
+	kfree(hdev);
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_device_exit);
 
@@ -212,27 +209,25 @@ static inline struct hdac_ext_driver *get_edrv(struct device *dev)
 	return edrv;
 }
 
-static inline struct hdac_ext_device *get_edev(struct device *dev)
+static inline struct hdac_device *get_hdev(struct device *dev)
 {
 	struct hdac_device *hdev = dev_to_hdac_dev(dev);
-	struct hdac_ext_device *edev = to_ehdac_device(hdev);
-
-	return edev;
+	return hdev;
 }
 
 static int hda_ext_drv_probe(struct device *dev)
 {
-	return (get_edrv(dev))->probe(get_edev(dev));
+	return (get_edrv(dev))->probe(get_hdev(dev));
 }
 
 static int hdac_ext_drv_remove(struct device *dev)
 {
-	return (get_edrv(dev))->remove(get_edev(dev));
+	return (get_edrv(dev))->remove(get_hdev(dev));
 }
 
 static void hdac_ext_drv_shutdown(struct device *dev)
 {
-	return (get_edrv(dev))->shutdown(get_edev(dev));
+	return (get_edrv(dev))->shutdown(get_hdev(dev));
 }
 
 /**
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 84f7a7a36e4b..f1e235817a65 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -85,7 +85,7 @@ struct hdac_hdmi_pin {
 	bool mst_capable;
 	struct hdac_hdmi_port *ports;
 	int num_ports;
-	struct hdac_ext_device *edev;
+	struct hdac_device *hdev;
 };
 
 struct hdac_hdmi_port {
@@ -126,6 +126,9 @@ struct hdac_hdmi_drv_data {
 };
 
 struct hdac_hdmi_priv {
+	struct hdac_device *hdev;
+	struct snd_soc_component *component;
+	struct snd_card *card;
 	struct hdac_hdmi_dai_port_map dai_map[HDA_MAX_CVTS];
 	struct list_head pin_list;
 	struct list_head cvt_list;
@@ -139,7 +142,7 @@ struct hdac_hdmi_priv {
 	struct snd_soc_dai_driver *dai_drv;
 };
 
-#define hdev_to_hdmi_priv(_hdev) ((to_ehdac_device(_hdev))->private_data)
+#define hdev_to_hdmi_priv(_hdev) dev_get_drvdata(&(_hdev)->dev)
 
 static struct hdac_hdmi_pcm *
 hdac_hdmi_get_pcm_from_cvt(struct hdac_hdmi_priv *hdmi,
@@ -158,7 +161,7 @@ hdac_hdmi_get_pcm_from_cvt(struct hdac_hdmi_priv *hdmi,
 static void hdac_hdmi_jack_report(struct hdac_hdmi_pcm *pcm,
 		struct hdac_hdmi_port *port, bool is_connect)
 {
-	struct hdac_ext_device *edev = port->pin->edev;
+	struct hdac_device *hdev = port->pin->hdev;
 
 	if (is_connect)
 		snd_soc_dapm_enable_pin(port->dapm, port->jack_pin);
@@ -172,7 +175,7 @@ static void hdac_hdmi_jack_report(struct hdac_hdmi_pcm *pcm,
 		 * ports.
 		 */
 		if (pcm->jack_event == 0) {
-			dev_dbg(&edev->hdev.dev,
+			dev_dbg(&hdev->dev,
 					"jack report for pcm=%d\n",
 					pcm->pcm_id);
 			snd_soc_jack_report(pcm->jack, SND_JACK_AVOUT,
@@ -198,19 +201,18 @@ static void hdac_hdmi_jack_report(struct hdac_hdmi_pcm *pcm,
 /*
  * Get the no devices that can be connected to a port on the Pin widget.
  */
-static int hdac_hdmi_get_port_len(struct hdac_ext_device *edev, hda_nid_t nid)
+static int hdac_hdmi_get_port_len(struct hdac_device *hdev, hda_nid_t nid)
 {
 	unsigned int caps;
 	unsigned int type, param;
 
-	caps = get_wcaps(&edev->hdev, nid);
+	caps = get_wcaps(hdev, nid);
 	type = get_wcaps_type(caps);
 
 	if (!(caps & AC_WCAP_DIGITAL) || (type != AC_WID_PIN))
 		return 0;
 
-	param = snd_hdac_read_parm_uncached(&edev->hdev, nid,
-					AC_PAR_DEVLIST_LEN);
+	param = snd_hdac_read_parm_uncached(hdev, nid, AC_PAR_DEVLIST_LEN);
 	if (param == -1)
 		return param;
 
@@ -222,10 +224,10 @@ static int hdac_hdmi_get_port_len(struct hdac_ext_device *edev, hda_nid_t nid)
  * id selected on the pin. Return 0 means the first port entry
  * is selected or MST is not supported.
  */
-static int hdac_hdmi_port_select_get(struct hdac_ext_device *edev,
+static int hdac_hdmi_port_select_get(struct hdac_device *hdev,
 					struct hdac_hdmi_port *port)
 {
-	return snd_hdac_codec_read(&edev->hdev, port->pin->nid,
+	return snd_hdac_codec_read(hdev, port->pin->nid,
 				0, AC_VERB_GET_DEVICE_SEL, 0);
 }
 
@@ -233,7 +235,7 @@ static int hdac_hdmi_port_select_get(struct hdac_ext_device *edev,
  * Sets the selected port entry for the configuring Pin widget verb.
  * returns error if port set is not equal to port get otherwise success
  */
-static int hdac_hdmi_port_select_set(struct hdac_ext_device *edev,
+static int hdac_hdmi_port_select_set(struct hdac_device *hdev,
 					struct hdac_hdmi_port *port)
 {
 	int num_ports;
@@ -242,8 +244,7 @@ static int hdac_hdmi_port_select_set(struct hdac_ext_device *edev,
 		return 0;
 
 	/* AC_PAR_DEVLIST_LEN is 0 based. */
-	num_ports = hdac_hdmi_get_port_len(edev, port->pin->nid);
-
+	num_ports = hdac_hdmi_get_port_len(hdev, port->pin->nid);
 	if (num_ports < 0)
 		return -EIO;
 	/*
@@ -253,13 +254,13 @@ static int hdac_hdmi_port_select_set(struct hdac_ext_device *edev,
 	if (num_ports + 1  < port->id)
 		return 0;
 
-	snd_hdac_codec_write(&edev->hdev, port->pin->nid, 0,
+	snd_hdac_codec_write(hdev, port->pin->nid, 0,
 			AC_VERB_SET_DEVICE_SEL, port->id);
 
-	if (port->id != hdac_hdmi_port_select_get(edev, port))
+	if (port->id != hdac_hdmi_port_select_get(hdev, port))
 		return -EIO;
 
-	dev_dbg(&edev->hdev.dev, "Selected the port=%d\n", port->id);
+	dev_dbg(&hdev->dev, "Selected the port=%d\n", port->id);
 
 	return 0;
 }
@@ -277,13 +278,6 @@ static struct hdac_hdmi_pcm *get_hdmi_pcm_from_id(struct hdac_hdmi_priv *hdmi,
 	return NULL;
 }
 
-static inline struct hdac_ext_device *to_hda_ext_device(struct device *dev)
-{
-	struct hdac_device *hdev = dev_to_hdac_dev(dev);
-
-	return to_ehdac_device(hdev);
-}
-
 static unsigned int sad_format(const u8 *sad)
 {
 	return ((sad[0] >> 0x3) & 0x1f);
@@ -324,15 +318,13 @@ format_constraint:
 }
 
 static void
-hdac_hdmi_set_dip_index(struct hdac_ext_device *edev, hda_nid_t pin_nid,
+hdac_hdmi_set_dip_index(struct hdac_device *hdev, hda_nid_t pin_nid,
 				int packet_index, int byte_index)
 {
 	int val;
 
 	val = (packet_index << 5) | (byte_index & 0x1f);
-
-	snd_hdac_codec_write(&edev->hdev, pin_nid, 0,
-				AC_VERB_SET_HDMI_DIP_INDEX, val);
+	snd_hdac_codec_write(hdev, pin_nid, 0, AC_VERB_SET_HDMI_DIP_INDEX, val);
 }
 
 struct dp_audio_infoframe {
@@ -347,14 +339,14 @@ struct dp_audio_infoframe {
 	u8 LFEPBL01_LSV36_DM_INH7;
 };
 
-static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *edev,
+static int hdac_hdmi_setup_audio_infoframe(struct hdac_device *hdev,
 		   struct hdac_hdmi_pcm *pcm, struct hdac_hdmi_port *port)
 {
 	uint8_t buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AUDIO_INFOFRAME_SIZE];
 	struct hdmi_audio_infoframe frame;
 	struct hdac_hdmi_pin *pin = port->pin;
 	struct dp_audio_infoframe dp_ai;
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_cvt *cvt = pcm->cvt;
 	u8 *dip;
 	int ret;
@@ -363,11 +355,11 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *edev,
 	u8 conn_type;
 	int channels, ca;
 
-	ca = snd_hdac_channel_allocation(&edev->hdev, port->eld.info.spk_alloc,
+	ca = snd_hdac_channel_allocation(hdev, port->eld.info.spk_alloc,
 			pcm->channels, pcm->chmap_set, true, pcm->chmap);
 
 	channels = snd_hdac_get_active_channels(ca);
-	hdmi->chmap.ops.set_channel_count(&edev->hdev, cvt->nid, channels);
+	hdmi->chmap.ops.set_channel_count(hdev, cvt->nid, channels);
 
 	snd_hdac_setup_channel_mapping(&hdmi->chmap, pin->nid, false, ca,
 				pcm->channels, pcm->chmap, pcm->chmap_set);
@@ -400,32 +392,31 @@ static int hdac_hdmi_setup_audio_infoframe(struct hdac_ext_device *edev,
 		break;
 
 	default:
-		dev_err(&edev->hdev.dev, "Invalid connection type: %d\n",
-						conn_type);
+		dev_err(&hdev->dev, "Invalid connection type: %d\n", conn_type);
 		return -EIO;
 	}
 
 	/* stop infoframe transmission */
-	hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0);
-	snd_hdac_codec_write(&edev->hdev, pin->nid, 0,
+	hdac_hdmi_set_dip_index(hdev, pin->nid, 0x0, 0x0);
+	snd_hdac_codec_write(hdev, pin->nid, 0,
 			AC_VERB_SET_HDMI_DIP_XMIT, AC_DIPXMIT_DISABLE);
 
 
 	/*  Fill infoframe. Index auto-incremented */
-	hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0);
+	hdac_hdmi_set_dip_index(hdev, pin->nid, 0x0, 0x0);
 	if (conn_type == DRM_ELD_CONN_TYPE_HDMI) {
 		for (i = 0; i < sizeof(buffer); i++)
-			snd_hdac_codec_write(&edev->hdev, pin->nid, 0,
+			snd_hdac_codec_write(hdev, pin->nid, 0,
 				AC_VERB_SET_HDMI_DIP_DATA, buffer[i]);
 	} else {
 		for (i = 0; i < sizeof(dp_ai); i++)
-			snd_hdac_codec_write(&edev->hdev, pin->nid, 0,
+			snd_hdac_codec_write(hdev, pin->nid, 0,
 				AC_VERB_SET_HDMI_DIP_DATA, dip[i]);
 	}
 
 	/* Start infoframe */
-	hdac_hdmi_set_dip_index(edev, pin->nid, 0x0, 0x0);
-	snd_hdac_codec_write(&edev->hdev, pin->nid, 0,
+	hdac_hdmi_set_dip_index(hdev, pin->nid, 0x0, 0x0);
+	snd_hdac_codec_write(hdev, pin->nid, 0,
 			AC_VERB_SET_HDMI_DIP_XMIT, AC_DIPXMIT_BEST);
 
 	return 0;
@@ -435,12 +426,12 @@ static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai,
 		unsigned int tx_mask, unsigned int rx_mask,
 		int slots, int slot_width)
 {
-	struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai);
+	struct hdac_device *hdev = hdmi->hdev;
 	struct hdac_hdmi_dai_port_map *dai_map;
 	struct hdac_hdmi_pcm *pcm;
 
-	dev_dbg(&edev->hdev.dev, "%s: strm_tag: %d\n", __func__, tx_mask);
+	dev_dbg(&hdev->dev, "%s: strm_tag: %d\n", __func__, tx_mask);
 
 	dai_map = &hdmi->dai_map[dai->id];
 
@@ -455,8 +446,8 @@ static int hdac_hdmi_set_tdm_slot(struct snd_soc_dai *dai,
 static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream,
 	struct snd_pcm_hw_params *hparams, struct snd_soc_dai *dai)
 {
-	struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai);
+	struct hdac_device *hdev = hdmi->hdev;
 	struct hdac_hdmi_dai_port_map *dai_map;
 	struct hdac_hdmi_port *port;
 	struct hdac_hdmi_pcm *pcm;
@@ -469,7 +460,7 @@ static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream,
 		return -ENODEV;
 
 	if ((!port->eld.monitor_present) || (!port->eld.eld_valid)) {
-		dev_err(&edev->hdev.dev,
+		dev_err(&hdev->dev,
 			"device is not configured for this pin:port%d:%d\n",
 					port->pin->nid, port->id);
 		return -ENODEV;
@@ -489,28 +480,28 @@ static int hdac_hdmi_set_hw_params(struct snd_pcm_substream *substream,
 	return 0;
 }
 
-static int hdac_hdmi_query_port_connlist(struct hdac_ext_device *edev,
+static int hdac_hdmi_query_port_connlist(struct hdac_device *hdev,
 					struct hdac_hdmi_pin *pin,
 					struct hdac_hdmi_port *port)
 {
-	if (!(get_wcaps(&edev->hdev, pin->nid) & AC_WCAP_CONN_LIST)) {
-		dev_warn(&edev->hdev.dev,
+	if (!(get_wcaps(hdev, pin->nid) & AC_WCAP_CONN_LIST)) {
+		dev_warn(&hdev->dev,
 			"HDMI: pin %d wcaps %#x does not support connection list\n",
-			pin->nid, get_wcaps(&edev->hdev, pin->nid));
+			pin->nid, get_wcaps(hdev, pin->nid));
 		return -EINVAL;
 	}
 
-	if (hdac_hdmi_port_select_set(edev, port) < 0)
+	if (hdac_hdmi_port_select_set(hdev, port) < 0)
 		return -EIO;
 
-	port->num_mux_nids = snd_hdac_get_connections(&edev->hdev, pin->nid,
+	port->num_mux_nids = snd_hdac_get_connections(hdev, pin->nid,
 			port->mux_nids, HDA_MAX_CONNECTIONS);
 	if (port->num_mux_nids == 0)
-		dev_warn(&edev->hdev.dev,
+		dev_warn(&hdev->dev,
 			"No connections found for pin:port %d:%d\n",
 						pin->nid, port->id);
 
-	dev_dbg(&edev->hdev.dev, "num_mux_nids %d for pin:port %d:%d\n",
+	dev_dbg(&hdev->dev, "num_mux_nids %d for pin:port %d:%d\n",
 			port->num_mux_nids, pin->nid, port->id);
 
 	return port->num_mux_nids;
@@ -526,7 +517,7 @@ static int hdac_hdmi_query_port_connlist(struct hdac_ext_device *edev,
  * connected.
  */
 static struct hdac_hdmi_port *hdac_hdmi_get_port_from_cvt(
-			struct hdac_ext_device *edev,
+			struct hdac_device *hdev,
 			struct hdac_hdmi_priv *hdmi,
 			struct hdac_hdmi_cvt *cvt)
 {
@@ -541,7 +532,7 @@ static struct hdac_hdmi_port *hdac_hdmi_get_port_from_cvt(
 
 			list_for_each_entry(port, &pcm->port_list, head) {
 				mutex_lock(&pcm->lock);
-				ret = hdac_hdmi_query_port_connlist(edev,
+				ret = hdac_hdmi_query_port_connlist(hdev,
 							port->pin, port);
 				mutex_unlock(&pcm->lock);
 				if (ret < 0)
@@ -568,8 +559,8 @@ static struct hdac_hdmi_port *hdac_hdmi_get_port_from_cvt(
 static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream,
 			struct snd_soc_dai *dai)
 {
-	struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai);
+	struct hdac_device *hdev = hdmi->hdev;
 	struct hdac_hdmi_dai_port_map *dai_map;
 	struct hdac_hdmi_cvt *cvt;
 	struct hdac_hdmi_port *port;
@@ -578,7 +569,7 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream,
 	dai_map = &hdmi->dai_map[dai->id];
 
 	cvt = dai_map->cvt;
-	port = hdac_hdmi_get_port_from_cvt(edev, hdmi, cvt);
+	port = hdac_hdmi_get_port_from_cvt(hdev, hdmi, cvt);
 
 	/*
 	 * To make PA and other userland happy.
@@ -589,7 +580,7 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream,
 	if ((!port->eld.monitor_present) ||
 			(!port->eld.eld_valid)) {
 
-		dev_warn(&edev->hdev.dev,
+		dev_warn(&hdev->dev,
 			"Failed: present?:%d ELD valid?:%d pin:port: %d:%d\n",
 			port->eld.monitor_present, port->eld.eld_valid,
 			port->pin->nid, port->id);
@@ -611,8 +602,7 @@ static int hdac_hdmi_pcm_open(struct snd_pcm_substream *substream,
 static void hdac_hdmi_pcm_close(struct snd_pcm_substream *substream,
 		struct snd_soc_dai *dai)
 {
-	struct hdac_ext_device *edev = snd_soc_dai_get_drvdata(dai);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = snd_soc_dai_get_drvdata(dai);
 	struct hdac_hdmi_dai_port_map *dai_map;
 	struct hdac_hdmi_pcm *pcm;
 
@@ -695,10 +685,10 @@ static void hdac_hdmi_fill_route(struct snd_soc_dapm_route *route,
 	route->connected = handler;
 }
 
-static struct hdac_hdmi_pcm *hdac_hdmi_get_pcm(struct hdac_ext_device *edev,
+static struct hdac_hdmi_pcm *hdac_hdmi_get_pcm(struct hdac_device *hdev,
 					struct hdac_hdmi_port *port)
 {
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pcm *pcm = NULL;
 	struct hdac_hdmi_port *p;
 
@@ -715,33 +705,32 @@ static struct hdac_hdmi_pcm *hdac_hdmi_get_pcm(struct hdac_ext_device *edev,
 	return NULL;
 }
 
-static void hdac_hdmi_set_power_state(struct hdac_ext_device *edev,
+static void hdac_hdmi_set_power_state(struct hdac_device *hdev,
 			     hda_nid_t nid, unsigned int pwr_state)
 {
 	int count;
 	unsigned int state;
 
-	if (get_wcaps(&edev->hdev, nid) & AC_WCAP_POWER) {
-		if (!snd_hdac_check_power_state(&edev->hdev, nid, pwr_state)) {
+	if (get_wcaps(hdev, nid) & AC_WCAP_POWER) {
+		if (!snd_hdac_check_power_state(hdev, nid, pwr_state)) {
 			for (count = 0; count < 10; count++) {
-				snd_hdac_codec_read(&edev->hdev, nid, 0,
+				snd_hdac_codec_read(hdev, nid, 0,
 						AC_VERB_SET_POWER_STATE,
 						pwr_state);
-				state = snd_hdac_sync_power_state(&edev->hdev,
+				state = snd_hdac_sync_power_state(hdev,
 						nid, pwr_state);
 				if (!(state & AC_PWRST_ERROR))
 					break;
 			}
 		}
-
 	}
 }
 
-static void hdac_hdmi_set_amp(struct hdac_ext_device *edev,
+static void hdac_hdmi_set_amp(struct hdac_device *hdev,
 				   hda_nid_t nid, int val)
 {
-	if (get_wcaps(&edev->hdev, nid) & AC_WCAP_OUT_AMP)
-		snd_hdac_codec_write(&edev->hdev, nid, 0,
+	if (get_wcaps(hdev, nid) & AC_WCAP_OUT_AMP)
+		snd_hdac_codec_write(hdev, nid, 0,
 					AC_VERB_SET_AMP_GAIN_MUTE, val);
 }
 
@@ -750,40 +739,40 @@ static int hdac_hdmi_pin_output_widget_event(struct snd_soc_dapm_widget *w,
 					struct snd_kcontrol *kc, int event)
 {
 	struct hdac_hdmi_port *port = w->priv;
-	struct hdac_ext_device *edev = to_hda_ext_device(w->dapm->dev);
+	struct hdac_device *hdev = dev_to_hdac_dev(w->dapm->dev);
 	struct hdac_hdmi_pcm *pcm;
 
-	dev_dbg(&edev->hdev.dev, "%s: widget: %s event: %x\n",
+	dev_dbg(&hdev->dev, "%s: widget: %s event: %x\n",
 			__func__, w->name, event);
 
-	pcm = hdac_hdmi_get_pcm(edev, port);
+	pcm = hdac_hdmi_get_pcm(hdev, port);
 	if (!pcm)
 		return -EIO;
 
 	/* set the device if pin is mst_capable */
-	if (hdac_hdmi_port_select_set(edev, port) < 0)
+	if (hdac_hdmi_port_select_set(hdev, port) < 0)
 		return -EIO;
 
 	switch (event) {
 	case SND_SOC_DAPM_PRE_PMU:
-		hdac_hdmi_set_power_state(edev, port->pin->nid, AC_PWRST_D0);
+		hdac_hdmi_set_power_state(hdev, port->pin->nid, AC_PWRST_D0);
 
 		/* Enable out path for this pin widget */
-		snd_hdac_codec_write(&edev->hdev, port->pin->nid, 0,
+		snd_hdac_codec_write(hdev, port->pin->nid, 0,
 				AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
 
-		hdac_hdmi_set_amp(edev, port->pin->nid, AMP_OUT_UNMUTE);
+		hdac_hdmi_set_amp(hdev, port->pin->nid, AMP_OUT_UNMUTE);
 
-		return hdac_hdmi_setup_audio_infoframe(edev, pcm, port);
+		return hdac_hdmi_setup_audio_infoframe(hdev, pcm, port);
 
 	case SND_SOC_DAPM_POST_PMD:
-		hdac_hdmi_set_amp(edev, port->pin->nid, AMP_OUT_MUTE);
+		hdac_hdmi_set_amp(hdev, port->pin->nid, AMP_OUT_MUTE);
 
 		/* Disable out path for this pin widget */
-		snd_hdac_codec_write(&edev->hdev, port->pin->nid, 0,
+		snd_hdac_codec_write(hdev, port->pin->nid, 0,
 				AC_VERB_SET_PIN_WIDGET_CONTROL, 0);
 
-		hdac_hdmi_set_power_state(edev, port->pin->nid, AC_PWRST_D3);
+		hdac_hdmi_set_power_state(hdev, port->pin->nid, AC_PWRST_D3);
 		break;
 
 	}
@@ -795,11 +784,11 @@ static int hdac_hdmi_cvt_output_widget_event(struct snd_soc_dapm_widget *w,
 					struct snd_kcontrol *kc, int event)
 {
 	struct hdac_hdmi_cvt *cvt = w->priv;
-	struct hdac_ext_device *edev = to_hda_ext_device(w->dapm->dev);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_device *hdev = dev_to_hdac_dev(w->dapm->dev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pcm *pcm;
 
-	dev_dbg(&edev->hdev.dev, "%s: widget: %s event: %x\n",
+	dev_dbg(&hdev->dev, "%s: widget: %s event: %x\n",
 			__func__, w->name, event);
 
 	pcm = hdac_hdmi_get_pcm_from_cvt(hdmi, cvt);
@@ -808,29 +797,29 @@ static int hdac_hdmi_cvt_output_widget_event(struct snd_soc_dapm_widget *w,
 
 	switch (event) {
 	case SND_SOC_DAPM_PRE_PMU:
-		hdac_hdmi_set_power_state(edev, cvt->nid, AC_PWRST_D0);
+		hdac_hdmi_set_power_state(hdev, cvt->nid, AC_PWRST_D0);
 
 		/* Enable transmission */
-		snd_hdac_codec_write(&edev->hdev, cvt->nid, 0,
+		snd_hdac_codec_write(hdev, cvt->nid, 0,
 			AC_VERB_SET_DIGI_CONVERT_1, 1);
 
 		/* Category Code (CC) to zero */
-		snd_hdac_codec_write(&edev->hdev, cvt->nid, 0,
+		snd_hdac_codec_write(hdev, cvt->nid, 0,
 			AC_VERB_SET_DIGI_CONVERT_2, 0);
 
-		snd_hdac_codec_write(&edev->hdev, cvt->nid, 0,
+		snd_hdac_codec_write(hdev, cvt->nid, 0,
 				AC_VERB_SET_CHANNEL_STREAMID, pcm->stream_tag);
-		snd_hdac_codec_write(&edev->hdev, cvt->nid, 0,
+		snd_hdac_codec_write(hdev, cvt->nid, 0,
 				AC_VERB_SET_STREAM_FORMAT, pcm->format);
 		break;
 
 	case SND_SOC_DAPM_POST_PMD:
-		snd_hdac_codec_write(&edev->hdev, cvt->nid, 0,
+		snd_hdac_codec_write(hdev, cvt->nid, 0,
 				AC_VERB_SET_CHANNEL_STREAMID, 0);
-		snd_hdac_codec_write(&edev->hdev, cvt->nid, 0,
+		snd_hdac_codec_write(hdev, cvt->nid, 0,
 				AC_VERB_SET_STREAM_FORMAT, 0);
 
-		hdac_hdmi_set_power_state(edev, cvt->nid, AC_PWRST_D3);
+		hdac_hdmi_set_power_state(hdev, cvt->nid, AC_PWRST_D3);
 		break;
 
 	}
@@ -842,10 +831,10 @@ static int hdac_hdmi_pin_mux_widget_event(struct snd_soc_dapm_widget *w,
 					struct snd_kcontrol *kc, int event)
 {
 	struct hdac_hdmi_port *port = w->priv;
-	struct hdac_ext_device *edev = to_hda_ext_device(w->dapm->dev);
+	struct hdac_device *hdev = dev_to_hdac_dev(w->dapm->dev);
 	int mux_idx;
 
-	dev_dbg(&edev->hdev.dev, "%s: widget: %s event: %x\n",
+	dev_dbg(&hdev->dev, "%s: widget: %s event: %x\n",
 			__func__, w->name, event);
 
 	if (!kc)
@@ -854,11 +843,11 @@ static int hdac_hdmi_pin_mux_widget_event(struct snd_soc_dapm_widget *w,
 	mux_idx = dapm_kcontrol_get_value(kc);
 
 	/* set the device if pin is mst_capable */
-	if (hdac_hdmi_port_select_set(edev, port) < 0)
+	if (hdac_hdmi_port_select_set(hdev, port) < 0)
 		return -EIO;
 
 	if (mux_idx > 0) {
-		snd_hdac_codec_write(&edev->hdev, port->pin->nid, 0,
+		snd_hdac_codec_write(hdev, port->pin->nid, 0,
 			AC_VERB_SET_CONNECT_SEL, (mux_idx - 1));
 	}
 
@@ -877,8 +866,8 @@ static int hdac_hdmi_set_pin_port_mux(struct snd_kcontrol *kcontrol,
 	struct snd_soc_dapm_widget *w = snd_soc_dapm_kcontrol_widget(kcontrol);
 	struct snd_soc_dapm_context *dapm = w->dapm;
 	struct hdac_hdmi_port *port = w->priv;
-	struct hdac_ext_device *edev = to_hda_ext_device(dapm->dev);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_device *hdev = dev_to_hdac_dev(dapm->dev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pcm *pcm = NULL;
 	const char *cvt_name =  e->texts[ucontrol->value.enumerated.item[0]];
 
@@ -931,12 +920,12 @@ static int hdac_hdmi_set_pin_port_mux(struct snd_kcontrol *kcontrol,
  * care of selecting the right one and leaving all other inputs selected to
  * "NONE"
  */
-static int hdac_hdmi_create_pin_port_muxs(struct hdac_ext_device *edev,
+static int hdac_hdmi_create_pin_port_muxs(struct hdac_device *hdev,
 				struct hdac_hdmi_port *port,
 				struct snd_soc_dapm_widget *widget,
 				const char *widget_name)
 {
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pin *pin = port->pin;
 	struct snd_kcontrol_new *kc;
 	struct hdac_hdmi_cvt *cvt;
@@ -948,17 +937,17 @@ static int hdac_hdmi_create_pin_port_muxs(struct hdac_ext_device *edev,
 	int i = 0;
 	int num_items = hdmi->num_cvt + 1;
 
-	kc = devm_kzalloc(&edev->hdev.dev, sizeof(*kc), GFP_KERNEL);
+	kc = devm_kzalloc(&hdev->dev, sizeof(*kc), GFP_KERNEL);
 	if (!kc)
 		return -ENOMEM;
 
-	se = devm_kzalloc(&edev->hdev.dev, sizeof(*se), GFP_KERNEL);
+	se = devm_kzalloc(&hdev->dev, sizeof(*se), GFP_KERNEL);
 	if (!se)
 		return -ENOMEM;
 
 	snprintf(kc_name, NAME_SIZE, "Pin %d port %d Input",
 						pin->nid, port->id);
-	kc->name = devm_kstrdup(&edev->hdev.dev, kc_name, GFP_KERNEL);
+	kc->name = devm_kstrdup(&hdev->dev, kc_name, GFP_KERNEL);
 	if (!kc->name)
 		return -ENOMEM;
 
@@ -976,35 +965,35 @@ static int hdac_hdmi_create_pin_port_muxs(struct hdac_ext_device *edev,
 	se->mask = roundup_pow_of_two(se->items) - 1;
 
 	sprintf(mux_items, "NONE");
-	items[i] = devm_kstrdup(&edev->hdev.dev, mux_items, GFP_KERNEL);
+	items[i] = devm_kstrdup(&hdev->dev, mux_items, GFP_KERNEL);
 	if (!items[i])
 		return -ENOMEM;
 
 	list_for_each_entry(cvt, &hdmi->cvt_list, head) {
 		i++;
 		sprintf(mux_items, "cvt %d", cvt->nid);
-		items[i] = devm_kstrdup(&edev->hdev.dev, mux_items, GFP_KERNEL);
+		items[i] = devm_kstrdup(&hdev->dev, mux_items, GFP_KERNEL);
 		if (!items[i])
 			return -ENOMEM;
 	}
 
-	se->texts = devm_kmemdup(&edev->hdev.dev, items,
+	se->texts = devm_kmemdup(&hdev->dev, items,
 			(num_items  * sizeof(char *)), GFP_KERNEL);
 	if (!se->texts)
 		return -ENOMEM;
 
-	return hdac_hdmi_fill_widget_info(&edev->hdev.dev, widget,
+	return hdac_hdmi_fill_widget_info(&hdev->dev, widget,
 			snd_soc_dapm_mux, port, widget_name, NULL, kc, 1,
 			hdac_hdmi_pin_mux_widget_event,
 			SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_REG);
 }
 
 /* Add cvt <- input <- mux route map */
-static void hdac_hdmi_add_pinmux_cvt_route(struct hdac_ext_device *edev,
+static void hdac_hdmi_add_pinmux_cvt_route(struct hdac_device *hdev,
 			struct snd_soc_dapm_widget *widgets,
 			struct snd_soc_dapm_route *route, int rindex)
 {
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	const struct snd_kcontrol_new *kc;
 	struct soc_enum *se;
 	int mux_index = hdmi->num_cvt + hdmi->num_ports;
@@ -1046,8 +1035,8 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm)
 {
 	struct snd_soc_dapm_widget *widgets;
 	struct snd_soc_dapm_route *route;
-	struct hdac_ext_device *edev = to_hda_ext_device(dapm->dev);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_device *hdev = dev_to_hdac_dev(dapm->dev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct snd_soc_dai_driver *dai_drv = hdmi->dai_drv;
 	char widget_name[NAME_SIZE];
 	struct hdac_hdmi_cvt *cvt;
@@ -1099,7 +1088,7 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm)
 		for (j = 0; j < pin->num_ports; j++) {
 			sprintf(widget_name, "Pin%d-Port%d Mux",
 				pin->nid, pin->ports[j].id);
-			ret = hdac_hdmi_create_pin_port_muxs(edev,
+			ret = hdac_hdmi_create_pin_port_muxs(hdev,
 						&pin->ports[j], &widgets[i],
 						widget_name);
 			if (ret < 0)
@@ -1134,7 +1123,7 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm)
 		}
 	}
 
-	hdac_hdmi_add_pinmux_cvt_route(edev, widgets, route, i);
+	hdac_hdmi_add_pinmux_cvt_route(hdev, widgets, route, i);
 
 	snd_soc_dapm_new_controls(dapm, widgets,
 		((2 * hdmi->num_ports) + hdmi->num_cvt));
@@ -1146,9 +1135,9 @@ static int create_fill_widget_route_map(struct snd_soc_dapm_context *dapm)
 
 }
 
-static int hdac_hdmi_init_dai_map(struct hdac_ext_device *edev)
+static int hdac_hdmi_init_dai_map(struct hdac_device *hdev)
 {
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_dai_port_map *dai_map;
 	struct hdac_hdmi_cvt *cvt;
 	int dai_id = 0;
@@ -1164,7 +1153,7 @@ static int hdac_hdmi_init_dai_map(struct hdac_ext_device *edev)
 		dai_id++;
 
 		if (dai_id == HDA_MAX_CVTS) {
-			dev_warn(&edev->hdev.dev,
+			dev_warn(&hdev->dev,
 				"Max dais supported: %d\n", dai_id);
 			break;
 		}
@@ -1173,9 +1162,9 @@ static int hdac_hdmi_init_dai_map(struct hdac_ext_device *edev)
 	return 0;
 }
 
-static int hdac_hdmi_add_cvt(struct hdac_ext_device *edev, hda_nid_t nid)
+static int hdac_hdmi_add_cvt(struct hdac_device *hdev, hda_nid_t nid)
 {
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_cvt *cvt;
 	char name[NAME_SIZE];
 
@@ -1190,10 +1179,10 @@ static int hdac_hdmi_add_cvt(struct hdac_ext_device *edev, hda_nid_t nid)
 	list_add_tail(&cvt->head, &hdmi->cvt_list);
 	hdmi->num_cvt++;
 
-	return hdac_hdmi_query_cvt_params(&edev->hdev, cvt);
+	return hdac_hdmi_query_cvt_params(hdev, cvt);
 }
 
-static int hdac_hdmi_parse_eld(struct hdac_ext_device *edev,
+static int hdac_hdmi_parse_eld(struct hdac_device *hdev,
 			struct hdac_hdmi_port *port)
 {
 	unsigned int ver, mnl;
@@ -1202,7 +1191,7 @@ static int hdac_hdmi_parse_eld(struct hdac_ext_device *edev,
 						>> DRM_ELD_VER_SHIFT;
 
 	if (ver != ELD_VER_CEA_861D && ver != ELD_VER_PARTIAL) {
-		dev_err(&edev->hdev.dev, "HDMI: Unknown ELD version %d\n", ver);
+		dev_err(&hdev->dev, "HDMI: Unknown ELD version %d\n", ver);
 		return -EINVAL;
 	}
 
@@ -1210,7 +1199,7 @@ static int hdac_hdmi_parse_eld(struct hdac_ext_device *edev,
 		DRM_ELD_MNL_MASK) >> DRM_ELD_MNL_SHIFT;
 
 	if (mnl > ELD_MAX_MNL) {
-		dev_err(&edev->hdev.dev, "HDMI: MNL Invalid %d\n", mnl);
+		dev_err(&hdev->dev, "HDMI: MNL Invalid %d\n", mnl);
 		return -EINVAL;
 	}
 
@@ -1222,8 +1211,8 @@ static int hdac_hdmi_parse_eld(struct hdac_ext_device *edev,
 static void hdac_hdmi_present_sense(struct hdac_hdmi_pin *pin,
 				    struct hdac_hdmi_port *port)
 {
-	struct hdac_ext_device *edev = pin->edev;
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_device *hdev = pin->hdev;
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pcm *pcm;
 	int size = 0;
 	int port_id = -1;
@@ -1241,14 +1230,14 @@ static void hdac_hdmi_present_sense(struct hdac_hdmi_pin *pin,
 	if (pin->mst_capable)
 		port_id = port->id;
 
-	size = snd_hdac_acomp_get_eld(&edev->hdev, pin->nid, port_id,
+	size = snd_hdac_acomp_get_eld(hdev, pin->nid, port_id,
 				&port->eld.monitor_present,
 				port->eld.eld_buffer,
 				ELD_MAX_SIZE);
 
 	if (size > 0) {
 		size = min(size, ELD_MAX_SIZE);
-		if (hdac_hdmi_parse_eld(edev, port) < 0)
+		if (hdac_hdmi_parse_eld(hdev, port) < 0)
 			size = -EINVAL;
 	}
 
@@ -1260,11 +1249,11 @@ static void hdac_hdmi_present_sense(struct hdac_hdmi_pin *pin,
 		port->eld.eld_size = 0;
 	}
 
-	pcm = hdac_hdmi_get_pcm(edev, port);
+	pcm = hdac_hdmi_get_pcm(hdev, port);
 
 	if (!port->eld.monitor_present || !port->eld.eld_valid) {
 
-		dev_err(&edev->hdev.dev, "%s: disconnect for pin:port %d:%d\n",
+		dev_err(&hdev->dev, "%s: disconnect for pin:port %d:%d\n",
 						__func__, pin->nid, port->id);
 
 		/*
@@ -1316,9 +1305,9 @@ static int hdac_hdmi_add_ports(struct hdac_hdmi_priv *hdmi,
 	return 0;
 }
 
-static int hdac_hdmi_add_pin(struct hdac_ext_device *edev, hda_nid_t nid)
+static int hdac_hdmi_add_pin(struct hdac_device *hdev, hda_nid_t nid)
 {
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pin *pin;
 	int ret;
 
@@ -1328,7 +1317,7 @@ static int hdac_hdmi_add_pin(struct hdac_ext_device *edev, hda_nid_t nid)
 
 	pin->nid = nid;
 	pin->mst_capable = false;
-	pin->edev = edev;
+	pin->hdev = hdev;
 	ret = hdac_hdmi_add_ports(hdmi, pin);
 	if (ret < 0)
 		return ret;
@@ -1459,15 +1448,14 @@ static int hdac_hdmi_create_dais(struct hdac_device *hdev,
  * Parse all nodes and store the cvt/pin nids in array
  * Add one time initialization for pin and cvt widgets
  */
-static int hdac_hdmi_parse_and_map_nid(struct hdac_ext_device *edev,
+static int hdac_hdmi_parse_and_map_nid(struct hdac_device *hdev,
 		struct snd_soc_dai_driver **dais, int *num_dais)
 {
 	hda_nid_t nid;
 	int i, num_nodes;
 	struct hdac_hdmi_cvt *temp_cvt, *cvt_next;
 	struct hdac_hdmi_pin *temp_pin, *pin_next;
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
-	struct hdac_device *hdev = &edev->hdev;
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	int ret;
 
 	hdac_hdmi_skl_enable_all_pins(hdev);
@@ -1492,13 +1480,13 @@ static int hdac_hdmi_parse_and_map_nid(struct hdac_ext_device *edev,
 		switch (type) {
 
 		case AC_WID_AUD_OUT:
-			ret = hdac_hdmi_add_cvt(edev, nid);
+			ret = hdac_hdmi_add_cvt(hdev, nid);
 			if (ret < 0)
 				goto free_widgets;
 			break;
 
 		case AC_WID_PIN:
-			ret = hdac_hdmi_add_pin(edev, nid);
+			ret = hdac_hdmi_add_pin(hdev, nid);
 			if (ret < 0)
 				goto free_widgets;
 			break;
@@ -1518,7 +1506,7 @@ static int hdac_hdmi_parse_and_map_nid(struct hdac_ext_device *edev,
 	}
 
 	*num_dais = hdmi->num_cvt;
-	ret = hdac_hdmi_init_dai_map(edev);
+	ret = hdac_hdmi_init_dai_map(hdev);
 	if (ret < 0)
 		goto free_widgets;
 
@@ -1544,17 +1532,17 @@ free_widgets:
 
 static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe)
 {
-	struct hdac_ext_device *edev = aptr;
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_device *hdev = aptr;
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pin *pin = NULL;
 	struct hdac_hdmi_port *hport = NULL;
-	struct snd_soc_component *component = edev->scodec;
+	struct snd_soc_component *component = hdmi->component;
 	int i;
 
 	/* Don't know how this mapping is derived */
 	hda_nid_t pin_nid = port + 0x04;
 
-	dev_dbg(&edev->hdev.dev, "%s: for pin:%d port=%d\n", __func__,
+	dev_dbg(&hdev->dev, "%s: for pin:%d port=%d\n", __func__,
 							pin_nid, pipe);
 
 	/*
@@ -1567,7 +1555,7 @@ static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe)
 			SNDRV_CTL_POWER_D0)
 		return;
 
-	if (atomic_read(&edev->hdev.in_pm))
+	if (atomic_read(&hdev->in_pm))
 		return;
 
 	list_for_each_entry(pin, &hdmi->pin_list, head) {
@@ -1614,15 +1602,15 @@ static struct snd_pcm *hdac_hdmi_get_pcm_from_id(struct snd_soc_card *card,
 
 /* create jack pin kcontrols */
 static int create_fill_jack_kcontrols(struct snd_soc_card *card,
-				    struct hdac_ext_device *edev)
+				    struct hdac_device *hdev)
 {
 	struct hdac_hdmi_pin *pin;
 	struct snd_kcontrol_new *kc;
 	char kc_name[NAME_SIZE], xname[NAME_SIZE];
 	char *name;
 	int i = 0, j;
-	struct snd_soc_component *component = edev->scodec;
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
+	struct snd_soc_component *component = hdmi->component;
 
 	kc = devm_kcalloc(component->dev, hdmi->num_ports,
 				sizeof(*kc), GFP_KERNEL);
@@ -1659,8 +1647,8 @@ static int create_fill_jack_kcontrols(struct snd_soc_card *card,
 int hdac_hdmi_jack_port_init(struct snd_soc_component *component,
 			struct snd_soc_dapm_context *dapm)
 {
-	struct hdac_ext_device *edev = snd_soc_component_get_drvdata(component);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = snd_soc_component_get_drvdata(component);
+	struct hdac_device *hdev = hdmi->hdev;
 	struct hdac_hdmi_pin *pin;
 	struct snd_soc_dapm_widget *widgets;
 	struct snd_soc_dapm_route *route;
@@ -1715,7 +1703,7 @@ int hdac_hdmi_jack_port_init(struct snd_soc_component *component,
 		return ret;
 
 	/* Add Jack Pin switch Kcontrol */
-	ret = create_fill_jack_kcontrols(dapm->card, edev);
+	ret = create_fill_jack_kcontrols(dapm->card, hdev);
 
 	if (ret < 0)
 		return ret;
@@ -1735,8 +1723,8 @@ int hdac_hdmi_jack_init(struct snd_soc_dai *dai, int device,
 				struct snd_soc_jack *jack)
 {
 	struct snd_soc_component *component = dai->component;
-	struct hdac_ext_device *edev = snd_soc_component_get_drvdata(component);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = snd_soc_component_get_drvdata(component);
+	struct hdac_device *hdev = hdmi->hdev;
 	struct hdac_hdmi_pcm *pcm;
 	struct snd_pcm *snd_pcm;
 	int err;
@@ -1758,7 +1746,7 @@ int hdac_hdmi_jack_init(struct snd_soc_dai *dai, int device,
 	if (snd_pcm) {
 		err = snd_hdac_add_chmap_ctls(snd_pcm, device, &hdmi->chmap);
 		if (err < 0) {
-			dev_err(&edev->hdev.dev,
+			dev_err(&hdev->dev,
 				"chmap control add failed with err: %d for pcm: %d\n",
 				err, device);
 			kfree(pcm);
@@ -1772,7 +1760,7 @@ int hdac_hdmi_jack_init(struct snd_soc_dai *dai, int device,
 }
 EXPORT_SYMBOL_GPL(hdac_hdmi_jack_init);
 
-static void hdac_hdmi_present_sense_all_pins(struct hdac_ext_device *edev,
+static void hdac_hdmi_present_sense_all_pins(struct hdac_device *hdev,
 			struct hdac_hdmi_priv *hdmi, bool detect_pin_caps)
 {
 	int i;
@@ -1781,7 +1769,7 @@ static void hdac_hdmi_present_sense_all_pins(struct hdac_ext_device *edev,
 	list_for_each_entry(pin, &hdmi->pin_list, head) {
 		if (detect_pin_caps) {
 
-			if (hdac_hdmi_get_port_len(edev, pin->nid)  == 0)
+			if (hdac_hdmi_get_port_len(hdev, pin->nid)  == 0)
 				pin->mst_capable = false;
 			else
 				pin->mst_capable = true;
@@ -1798,68 +1786,68 @@ static void hdac_hdmi_present_sense_all_pins(struct hdac_ext_device *edev,
 
 static int hdmi_codec_probe(struct snd_soc_component *component)
 {
-	struct hdac_ext_device *edev = snd_soc_component_get_drvdata(component);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = snd_soc_component_get_drvdata(component);
+	struct hdac_device *hdev = hdmi->hdev;
 	struct snd_soc_dapm_context *dapm =
 		snd_soc_component_get_dapm(component);
 	struct hdac_ext_link *hlink = NULL;
 	int ret;
 
-	edev->scodec = component;
+	hdmi->component = component;
 
 	/*
 	 * hold the ref while we probe, also no need to drop the ref on
 	 * exit, we call pm_runtime_suspend() so that will do for us
 	 */
-	hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdev.dev));
+	hlink = snd_hdac_ext_bus_get_link(hbus_to_ebus(hdev->bus),
+						dev_name(&hdev->dev));
 	if (!hlink) {
-		dev_err(&edev->hdev.dev, "hdac link not found\n");
+		dev_err(&hdev->dev, "hdac link not found\n");
 		return -EIO;
 	}
 
-	snd_hdac_ext_bus_link_get(edev->ebus, hlink);
+	snd_hdac_ext_bus_link_get(hbus_to_ebus(hdev->bus), hlink);
 
 	ret = create_fill_widget_route_map(dapm);
 	if (ret < 0)
 		return ret;
 
-	aops.audio_ptr = edev;
+	aops.audio_ptr = hdev;
 	ret = snd_hdac_i915_register_notifier(&aops);
 	if (ret < 0) {
-		dev_err(&edev->hdev.dev, "notifier register failed: err: %d\n",
-				ret);
+		dev_err(&hdev->dev, "notifier register failed: err: %d\n", ret);
 		return ret;
 	}
 
-	hdac_hdmi_present_sense_all_pins(edev, hdmi, true);
+	hdac_hdmi_present_sense_all_pins(hdev, hdmi, true);
 	/* Imp: Store the card pointer in hda_codec */
-	edev->card = dapm->card->snd_card;
+	hdmi->card = dapm->card->snd_card;
 
 	/*
 	 * hdac_device core already sets the state to active and calls
 	 * get_noresume. So enable runtime and set the device to suspend.
 	 */
-	pm_runtime_enable(&edev->hdev.dev);
-	pm_runtime_put(&edev->hdev.dev);
-	pm_runtime_suspend(&edev->hdev.dev);
+	pm_runtime_enable(&hdev->dev);
+	pm_runtime_put(&hdev->dev);
+	pm_runtime_suspend(&hdev->dev);
 
 	return 0;
 }
 
 static void hdmi_codec_remove(struct snd_soc_component *component)
 {
-	struct hdac_ext_device *edev = snd_soc_component_get_drvdata(component);
+	struct hdac_hdmi_priv *hdmi = snd_soc_component_get_drvdata(component);
+	struct hdac_device *hdev = hdmi->hdev;
 
-	pm_runtime_disable(&edev->hdev.dev);
+	pm_runtime_disable(&hdev->dev);
 }
 
 #ifdef CONFIG_PM
 static int hdmi_codec_prepare(struct device *dev)
 {
-	struct hdac_ext_device *edev = to_hda_ext_device(dev);
-	struct hdac_device *hdev = &edev->hdev;
+	struct hdac_device *hdev = dev_to_hdac_dev(dev);
 
-	pm_runtime_get_sync(&edev->hdev.dev);
+	pm_runtime_get_sync(&hdev->dev);
 
 	/*
 	 * Power down afg.
@@ -1876,16 +1864,15 @@ static int hdmi_codec_prepare(struct device *dev)
 
 static void hdmi_codec_complete(struct device *dev)
 {
-	struct hdac_ext_device *edev = to_hda_ext_device(dev);
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
-	struct hdac_device *hdev = &edev->hdev;
+	struct hdac_device *hdev = dev_to_hdac_dev(dev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 
 	/* Power up afg */
 	snd_hdac_codec_read(hdev, hdev->afg, 0,	AC_VERB_SET_POWER_STATE,
 							AC_PWRST_D0);
 
-	hdac_hdmi_skl_enable_all_pins(&edev->hdev);
-	hdac_hdmi_skl_enable_dp12(&edev->hdev);
+	hdac_hdmi_skl_enable_all_pins(hdev);
+	hdac_hdmi_skl_enable_dp12(hdev);
 
 	/*
 	 * As the ELD notify callback request is not entertained while the
@@ -1893,9 +1880,9 @@ static void hdmi_codec_complete(struct device *dev)
 	 * all pins here. pin capablity change is not support, so use the
 	 * already set pin caps.
 	 */
-	hdac_hdmi_present_sense_all_pins(edev, hdmi, false);
+	hdac_hdmi_present_sense_all_pins(hdev, hdmi, false);
 
-	pm_runtime_put_sync(&edev->hdev.dev);
+	pm_runtime_put_sync(&hdev->dev);
 }
 #else
 #define hdmi_codec_prepare NULL
@@ -1922,7 +1909,6 @@ static void hdac_hdmi_get_chmap(struct hdac_device *hdev, int pcm_idx,
 static void hdac_hdmi_set_chmap(struct hdac_device *hdev, int pcm_idx,
 				unsigned char *chmap, int prepared)
 {
-	struct hdac_ext_device *edev = to_ehdac_device(hdev);
 	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pcm *pcm = get_hdmi_pcm_from_id(hdmi, pcm_idx);
 	struct hdac_hdmi_port *port;
@@ -1938,7 +1924,7 @@ static void hdac_hdmi_set_chmap(struct hdac_device *hdev, int pcm_idx,
 	memcpy(pcm->chmap, chmap, ARRAY_SIZE(pcm->chmap));
 	list_for_each_entry(port, &pcm->port_list, head)
 		if (prepared)
-			hdac_hdmi_setup_audio_infoframe(edev, pcm, port);
+			hdac_hdmi_setup_audio_infoframe(hdev, pcm, port);
 	mutex_unlock(&pcm->lock);
 }
 
@@ -1987,10 +1973,9 @@ static struct hdac_hdmi_drv_data intel_drv_data  = {
 	.vendor_nid = INTEL_VENDOR_NID,
 };
 
-static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
+static int hdac_hdmi_dev_probe(struct hdac_device *hdev)
 {
-	struct hdac_device *hdev = &edev->hdev;
-	struct hdac_hdmi_priv *hdmi_priv;
+	struct hdac_hdmi_priv *hdmi_priv = NULL;
 	struct snd_soc_dai_driver *hdmi_dais = NULL;
 	struct hdac_ext_link *hlink = NULL;
 	int num_dais = 0;
@@ -1999,24 +1984,25 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
 	const struct hda_device_id *hdac_id = hdac_get_device_id(hdev, hdrv);
 
 	/* hold the ref while we probe */
-	hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdev.dev));
+	hlink = snd_hdac_ext_bus_get_link(hbus_to_ebus(hdev->bus),
+						dev_name(&hdev->dev));
 	if (!hlink) {
-		dev_err(&edev->hdev.dev, "hdac link not found\n");
+		dev_err(&hdev->dev, "hdac link not found\n");
 		return -EIO;
 	}
 
-	snd_hdac_ext_bus_link_get(edev->ebus, hlink);
+	snd_hdac_ext_bus_link_get(hbus_to_ebus(hdev->bus), hlink);
 
 	hdmi_priv = devm_kzalloc(&hdev->dev, sizeof(*hdmi_priv), GFP_KERNEL);
 	if (hdmi_priv == NULL)
 		return -ENOMEM;
 
-	edev->private_data = hdmi_priv;
 	snd_hdac_register_chmap_ops(hdev, &hdmi_priv->chmap);
 	hdmi_priv->chmap.ops.get_chmap = hdac_hdmi_get_chmap;
 	hdmi_priv->chmap.ops.set_chmap = hdac_hdmi_set_chmap;
 	hdmi_priv->chmap.ops.is_pcm_attached = is_hdac_hdmi_pcm_attached;
 	hdmi_priv->chmap.ops.get_spk_alloc = hdac_hdmi_get_spk_alloc;
+	hdmi_priv->hdev = hdev;
 
 	if (!hdac_id)
 		return -ENODEV;
@@ -2027,7 +2013,7 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
 	else
 		hdmi_priv->drv_data = &intel_drv_data;
 
-	dev_set_drvdata(&hdev->dev, edev);
+	dev_set_drvdata(&hdev->dev, hdmi_priv);
 
 	INIT_LIST_HEAD(&hdmi_priv->pin_list);
 	INIT_LIST_HEAD(&hdmi_priv->cvt_list);
@@ -2038,15 +2024,15 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
 	 * Turned off in the runtime_suspend during the first explicit
 	 * pm_runtime_suspend call.
 	 */
-	ret = snd_hdac_display_power(edev->hdev.bus, true);
+	ret = snd_hdac_display_power(hdev->bus, true);
 	if (ret < 0) {
-		dev_err(&edev->hdev.dev,
+		dev_err(&hdev->dev,
 			"Cannot turn on display power on i915 err: %d\n",
 			ret);
 		return ret;
 	}
 
-	ret = hdac_hdmi_parse_and_map_nid(edev, &hdmi_dais, &num_dais);
+	ret = hdac_hdmi_parse_and_map_nid(hdev, &hdmi_dais, &num_dais);
 	if (ret < 0) {
 		dev_err(&hdev->dev,
 			"Failed in parse and map nid with err: %d\n", ret);
@@ -2058,14 +2044,14 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev)
 	ret = devm_snd_soc_register_component(&hdev->dev, &hdmi_hda_codec,
 					hdmi_dais, num_dais);
 
-	snd_hdac_ext_bus_link_put(edev->ebus, hlink);
+	snd_hdac_ext_bus_link_put(hbus_to_ebus(hdev->bus), hlink);
 
 	return ret;
 }
 
-static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev)
+static int hdac_hdmi_dev_remove(struct hdac_device *hdev)
 {
-	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(&edev->hdev);
+	struct hdac_hdmi_priv *hdmi = hdev_to_hdmi_priv(hdev);
 	struct hdac_hdmi_pin *pin, *pin_next;
 	struct hdac_hdmi_cvt *cvt, *cvt_next;
 	struct hdac_hdmi_pcm *pcm, *pcm_next;
@@ -2105,8 +2091,7 @@ static int hdac_hdmi_dev_remove(struct hdac_ext_device *edev)
 #ifdef CONFIG_PM
 static int hdac_hdmi_runtime_suspend(struct device *dev)
 {
-	struct hdac_ext_device *edev = to_hda_ext_device(dev);
-	struct hdac_device *hdev = &edev->hdev;
+	struct hdac_device *hdev = dev_to_hdac_dev(dev);
 	struct hdac_bus *bus = hdev->bus;
 	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
 	struct hdac_ext_link *hlink = NULL;
@@ -2129,7 +2114,7 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
 							AC_PWRST_D3);
 	err = snd_hdac_display_power(bus, false);
 	if (err < 0) {
-		dev_err(bus->dev, "Cannot turn on display power on i915\n");
+		dev_err(dev, "Cannot turn on display power on i915\n");
 		return err;
 	}
 
@@ -2146,8 +2131,7 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
 
 static int hdac_hdmi_runtime_resume(struct device *dev)
 {
-	struct hdac_ext_device *edev = to_hda_ext_device(dev);
-	struct hdac_device *hdev = &edev->hdev;
+	struct hdac_device *hdev = dev_to_hdac_dev(dev);
 	struct hdac_bus *bus = hdev->bus;
 	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
 	struct hdac_ext_link *hlink = NULL;
@@ -2169,12 +2153,12 @@ static int hdac_hdmi_runtime_resume(struct device *dev)
 
 	err = snd_hdac_display_power(bus, true);
 	if (err < 0) {
-		dev_err(bus->dev, "Cannot turn on display power on i915\n");
+		dev_err(dev, "Cannot turn on display power on i915\n");
 		return err;
 	}
 
-	hdac_hdmi_skl_enable_all_pins(&edev->hdev);
-	hdac_hdmi_skl_enable_dp12(&edev->hdev);
+	hdac_hdmi_skl_enable_all_pins(hdev);
+	hdac_hdmi_skl_enable_dp12(hdev);
 
 	/* Power up afg */
 	snd_hdac_codec_read(hdev, hdev->afg, 0,	AC_VERB_SET_POWER_STATE,
-- 
cgit v1.2.3


From 76f56fae1cf9040325a58d1375291baf71dfaf03 Mon Sep 17 00:00:00 2001
From: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Date: Fri, 1 Jun 2018 22:53:50 -0500
Subject: ALSA: hdac: Remove usage of struct hdac_ext_bus and use hdac_bus
 instead

This patch removes the hdac_ext_bus structure. The legacy and
enhanced HDaudio capabilities can be handled in a backward-compatible
way without separate definitions.

Follow-up patches in this series handle the driver definition.

Signed-off-by: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h                |  15 +++
 include/sound/hdaudio_ext.h            |  74 +++++--------
 sound/hda/ext/hdac_ext_bus.c           |  27 +++--
 sound/hda/ext/hdac_ext_controller.c    |  55 +++++-----
 sound/hda/ext/hdac_ext_stream.c        | 104 ++++++++-----------
 sound/soc/codecs/hdac_hdmi.c           |  22 ++--
 sound/soc/intel/skylake/skl-messages.c |  50 ++++-----
 sound/soc/intel/skylake/skl-nhlt.c     |   8 +-
 sound/soc/intel/skylake/skl-pcm.c      | 112 ++++++++++----------
 sound/soc/intel/skylake/skl-topology.c |  20 ++--
 sound/soc/intel/skylake/skl-topology.h |   6 +-
 sound/soc/intel/skylake/skl.c          | 184 +++++++++++++++------------------
 sound/soc/intel/skylake/skl.h          |   7 +-
 13 files changed, 308 insertions(+), 376 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index c052afc27547..9735b51aef08 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -250,6 +250,11 @@ struct hdac_rb {
  * @mlcap: MultiLink capabilities pointer
  * @gtscap: gts capabilities pointer
  * @drsmcap: dma resume capabilities pointer
+ * @num_streams: streams supported
+ * @idx: HDA link index
+ * @hlink_list: link list of HDA links
+ * @lock: lock for link mgmt
+ * @cmd_dma_state: state of cmd DMAs: CORB and RIRB
  */
 struct hdac_bus {
 	struct device *dev;
@@ -317,6 +322,16 @@ struct hdac_bus {
 	/* i915 component interface */
 	struct i915_audio_component *audio_component;
 	int i915_power_refcount;
+
+	/* parameters required for enhanced capabilities */
+	int num_streams;
+	int idx;
+
+	struct list_head hlink_list;
+
+	struct mutex lock;
+	bool cmd_dma_state;
+
 };
 
 int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev,
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index c1a5ad0e6e39..e5b0cd1ade19 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -4,38 +4,14 @@
 
 #include <sound/hdaudio.h>
 
-/**
- * hdac_ext_bus: HDAC extended bus for extended HDA caps
- *
- * @bus: hdac bus
- * @num_streams: streams supported
- * @hlink_list: link list of HDA links
- * @lock: lock for link mgmt
- * @cmd_dma_state: state of cmd DMAs: CORB and RIRB
- */
-struct hdac_ext_bus {
-	struct hdac_bus bus;
-	int num_streams;
-	int idx;
-
-	struct list_head hlink_list;
-
-	struct mutex lock;
-	bool cmd_dma_state;
-};
-
-int snd_hdac_ext_bus_init(struct hdac_ext_bus *sbus, struct device *dev,
+int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 		      const struct hdac_bus_ops *ops,
 		      const struct hdac_io_ops *io_ops);
 
-void snd_hdac_ext_bus_exit(struct hdac_ext_bus *sbus);
-int snd_hdac_ext_bus_device_init(struct hdac_ext_bus *sbus, int addr);
+void snd_hdac_ext_bus_exit(struct hdac_bus *bus);
+int snd_hdac_ext_bus_device_init(struct hdac_bus *bus, int addr);
 void snd_hdac_ext_bus_device_exit(struct hdac_device *hdev);
-void snd_hdac_ext_bus_device_remove(struct hdac_ext_bus *ebus);
-
-#define ebus_to_hbus(ebus)	(&(ebus)->bus)
-#define hbus_to_ebus(_bus) \
-	container_of(_bus, struct hdac_ext_bus, bus)
+void snd_hdac_ext_bus_device_remove(struct hdac_bus *bus);
 
 #define HDA_CODEC_REV_EXT_ENTRY(_vid, _rev, _name, drv_data) \
 	{ .vendor_id = (_vid), .rev_id = (_rev), .name = (_name), \
@@ -44,14 +20,14 @@ void snd_hdac_ext_bus_device_remove(struct hdac_ext_bus *ebus);
 #define HDA_CODEC_EXT_ENTRY(_vid, _revid, _name, _drv_data) \
 	HDA_CODEC_REV_EXT_ENTRY(_vid, _revid, _name, _drv_data)
 
-void snd_hdac_ext_bus_ppcap_enable(struct hdac_ext_bus *chip, bool enable);
-void snd_hdac_ext_bus_ppcap_int_enable(struct hdac_ext_bus *chip, bool enable);
+void snd_hdac_ext_bus_ppcap_enable(struct hdac_bus *chip, bool enable);
+void snd_hdac_ext_bus_ppcap_int_enable(struct hdac_bus *chip, bool enable);
 
-void snd_hdac_ext_stream_spbcap_enable(struct hdac_ext_bus *chip,
+void snd_hdac_ext_stream_spbcap_enable(struct hdac_bus *chip,
 				 bool enable, int index);
 
-int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_ext_bus *bus);
-struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_ext_bus *bus,
+int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus);
+struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_bus *bus,
 						const char *codec_name);
 
 enum hdac_ext_stream_type {
@@ -100,28 +76,28 @@ struct hdac_ext_stream {
 #define stream_to_hdac_ext_stream(s) \
 	container_of(s, struct hdac_ext_stream, hstream)
 
-void snd_hdac_ext_stream_init(struct hdac_ext_bus *bus,
+void snd_hdac_ext_stream_init(struct hdac_bus *bus,
 				struct hdac_ext_stream *stream, int idx,
 				int direction, int tag);
-int snd_hdac_ext_stream_init_all(struct hdac_ext_bus *ebus, int start_idx,
+int snd_hdac_ext_stream_init_all(struct hdac_bus *bus, int start_idx,
 		int num_stream, int dir);
-void snd_hdac_stream_free_all(struct hdac_ext_bus *ebus);
-void snd_hdac_link_free_all(struct hdac_ext_bus *ebus);
-struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_ext_bus *bus,
+void snd_hdac_stream_free_all(struct hdac_bus *bus);
+void snd_hdac_link_free_all(struct hdac_bus *bus);
+struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus,
 					   struct snd_pcm_substream *substream,
 					   int type);
 void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type);
-void snd_hdac_ext_stream_decouple(struct hdac_ext_bus *bus,
+void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
 				struct hdac_ext_stream *azx_dev, bool decouple);
-void snd_hdac_ext_stop_streams(struct hdac_ext_bus *sbus);
+void snd_hdac_ext_stop_streams(struct hdac_bus *bus);
 
-int snd_hdac_ext_stream_set_spib(struct hdac_ext_bus *ebus,
+int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus,
 				 struct hdac_ext_stream *stream, u32 value);
-int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_ext_bus *ebus,
+int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus,
 				 struct hdac_ext_stream *stream);
-void snd_hdac_ext_stream_drsm_enable(struct hdac_ext_bus *ebus,
+void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus,
 				bool enable, int index);
-int snd_hdac_ext_stream_set_dpibr(struct hdac_ext_bus *ebus,
+int snd_hdac_ext_stream_set_dpibr(struct hdac_bus *bus,
 				struct hdac_ext_stream *stream, u32 value);
 int snd_hdac_ext_stream_set_lpib(struct hdac_ext_stream *stream, u32 value);
 
@@ -144,17 +120,15 @@ struct hdac_ext_link {
 
 int snd_hdac_ext_bus_link_power_up(struct hdac_ext_link *link);
 int snd_hdac_ext_bus_link_power_down(struct hdac_ext_link *link);
-int snd_hdac_ext_bus_link_power_up_all(struct hdac_ext_bus *ebus);
-int snd_hdac_ext_bus_link_power_down_all(struct hdac_ext_bus *ebus);
+int snd_hdac_ext_bus_link_power_up_all(struct hdac_bus *bus);
+int snd_hdac_ext_bus_link_power_down_all(struct hdac_bus *bus);
 void snd_hdac_ext_link_set_stream_id(struct hdac_ext_link *link,
 				 int stream);
 void snd_hdac_ext_link_clear_stream_id(struct hdac_ext_link *link,
 				 int stream);
 
-int snd_hdac_ext_bus_link_get(struct hdac_ext_bus *ebus,
-				struct hdac_ext_link *link);
-int snd_hdac_ext_bus_link_put(struct hdac_ext_bus *ebus,
-				struct hdac_ext_link *link);
+int snd_hdac_ext_bus_link_get(struct hdac_bus *bus, struct hdac_ext_link *link);
+int snd_hdac_ext_bus_link_put(struct hdac_bus *bus, struct hdac_ext_link *link);
 
 /* update register macro */
 #define snd_hdac_updatel(addr, reg, mask, val)		\
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 0e4823fdd411..77547ede9ae8 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -87,7 +87,7 @@ static const struct hdac_io_ops hdac_ext_default_io = {
  *
  * Returns 0 if successful, or a negative error code.
  */
-int snd_hdac_ext_bus_init(struct hdac_ext_bus *ebus, struct device *dev,
+int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 			const struct hdac_bus_ops *ops,
 			const struct hdac_io_ops *io_ops)
 {
@@ -98,15 +98,15 @@ int snd_hdac_ext_bus_init(struct hdac_ext_bus *ebus, struct device *dev,
 	if (io_ops == NULL)
 		io_ops = &hdac_ext_default_io;
 
-	ret = snd_hdac_bus_init(&ebus->bus, dev, ops, io_ops);
+	ret = snd_hdac_bus_init(bus, dev, ops, io_ops);
 	if (ret < 0)
 		return ret;
 
-	INIT_LIST_HEAD(&ebus->hlink_list);
-	ebus->idx = idx++;
+	INIT_LIST_HEAD(&bus->hlink_list);
+	bus->idx = idx++;
 
-	mutex_init(&ebus->lock);
-	ebus->cmd_dma_state = true;
+	mutex_init(&bus->lock);
+	bus->cmd_dma_state = true;
 
 	return 0;
 }
@@ -116,10 +116,10 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_init);
  * snd_hdac_ext_bus_exit - clean up a HD-audio extended bus
  * @ebus: the pointer to extended bus object
  */
-void snd_hdac_ext_bus_exit(struct hdac_ext_bus *ebus)
+void snd_hdac_ext_bus_exit(struct hdac_bus *bus)
 {
-	snd_hdac_bus_exit(&ebus->bus);
-	WARN_ON(!list_empty(&ebus->hlink_list));
+	snd_hdac_bus_exit(bus);
+	WARN_ON(!list_empty(&bus->hlink_list));
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_exit);
 
@@ -135,10 +135,9 @@ static void default_release(struct device *dev)
  *
  * Returns zero for success or a negative error code.
  */
-int snd_hdac_ext_bus_device_init(struct hdac_ext_bus *ebus, int addr)
+int snd_hdac_ext_bus_device_init(struct hdac_bus *bus, int addr)
 {
 	struct hdac_device *hdev = NULL;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
 	char name[15];
 	int ret;
 
@@ -148,7 +147,7 @@ int snd_hdac_ext_bus_device_init(struct hdac_ext_bus *ebus, int addr)
 
 	hdev->bus = bus;
 
-	snprintf(name, sizeof(name), "ehdaudio%dD%d", ebus->idx, addr);
+	snprintf(name, sizeof(name), "ehdaudio%dD%d", bus->idx, addr);
 
 	ret  = snd_hdac_device_init(hdev, bus, name, addr);
 	if (ret < 0) {
@@ -185,14 +184,14 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_device_exit);
  *
  * @ebus: HD-audio extended bus
  */
-void snd_hdac_ext_bus_device_remove(struct hdac_ext_bus *ebus)
+void snd_hdac_ext_bus_device_remove(struct hdac_bus *bus)
 {
 	struct hdac_device *codec, *__codec;
 	/*
 	 * we need to remove all the codec devices objects created in the
 	 * snd_hdac_ext_bus_device_init
 	 */
-	list_for_each_entry_safe(codec, __codec, &ebus->bus.codec_list, list) {
+	list_for_each_entry_safe(codec, __codec, &bus->codec_list, list) {
 		snd_hdac_device_unregister(codec);
 		put_device(&codec->dev);
 	}
diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
index 84f3b8168716..72774119dd11 100644
--- a/sound/hda/ext/hdac_ext_controller.c
+++ b/sound/hda/ext/hdac_ext_controller.c
@@ -39,9 +39,8 @@
  * @ebus: HD-audio extended core bus
  * @enable: flag to turn on/off the capability
  */
-void snd_hdac_ext_bus_ppcap_enable(struct hdac_ext_bus *ebus, bool enable)
+void snd_hdac_ext_bus_ppcap_enable(struct hdac_bus *bus, bool enable)
 {
-	struct hdac_bus *bus = &ebus->bus;
 
 	if (!bus->ppcap) {
 		dev_err(bus->dev, "Address of PP capability is NULL");
@@ -60,9 +59,8 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_ppcap_enable);
  * @ebus: HD-audio extended core bus
  * @enable: flag to enable/disable interrupt
  */
-void snd_hdac_ext_bus_ppcap_int_enable(struct hdac_ext_bus *ebus, bool enable)
+void snd_hdac_ext_bus_ppcap_int_enable(struct hdac_bus *bus, bool enable)
 {
-	struct hdac_bus *bus = &ebus->bus;
 
 	if (!bus->ppcap) {
 		dev_err(bus->dev, "Address of PP capability is NULL\n");
@@ -89,12 +87,11 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_ppcap_int_enable);
  * in hlink_list of extended hdac bus
  * Note: this will be freed on bus exit by driver
  */
-int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_ext_bus *ebus)
+int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_bus *bus)
 {
 	int idx;
 	u32 link_count;
 	struct hdac_ext_link *hlink;
-	struct hdac_bus *bus = &ebus->bus;
 
 	link_count = readl(bus->mlcap + AZX_REG_ML_MLCD) + 1;
 
@@ -114,7 +111,7 @@ int snd_hdac_ext_bus_get_ml_capabilities(struct hdac_ext_bus *ebus)
 		/* since link in On, update the ref */
 		hlink->ref_count = 1;
 
-		list_add_tail(&hlink->list, &ebus->hlink_list);
+		list_add_tail(&hlink->list, &bus->hlink_list);
 	}
 
 	return 0;
@@ -127,12 +124,12 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_get_ml_capabilities);
  * @ebus: HD-audio ext core bus
  */
 
-void snd_hdac_link_free_all(struct hdac_ext_bus *ebus)
+void snd_hdac_link_free_all(struct hdac_bus *bus)
 {
 	struct hdac_ext_link *l;
 
-	while (!list_empty(&ebus->hlink_list)) {
-		l = list_first_entry(&ebus->hlink_list, struct hdac_ext_link, list);
+	while (!list_empty(&bus->hlink_list)) {
+		l = list_first_entry(&bus->hlink_list, struct hdac_ext_link, list);
 		list_del(&l->list);
 		kfree(l);
 	}
@@ -144,7 +141,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_link_free_all);
  * @ebus: HD-audio extended core bus
  * @codec_name: codec name
  */
-struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_ext_bus *ebus,
+struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_bus *bus,
 						 const char *codec_name)
 {
 	int i;
@@ -153,10 +150,10 @@ struct hdac_ext_link *snd_hdac_ext_bus_get_link(struct hdac_ext_bus *ebus,
 
 	if (sscanf(codec_name, "ehdaudio%dD%d", &bus_idx, &addr) != 2)
 		return NULL;
-	if (ebus->idx != bus_idx)
+	if (bus->idx != bus_idx)
 		return NULL;
 
-	list_for_each_entry(hlink, &ebus->hlink_list, list) {
+	list_for_each_entry(hlink, &bus->hlink_list, list) {
 		for (i = 0; i < HDA_MAX_CODECS; i++) {
 			if (hlink->lsdiid & (0x1 << addr))
 				return hlink;
@@ -219,12 +216,12 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_power_down);
  * snd_hdac_ext_bus_link_power_up_all -power up all hda link
  * @ebus: HD-audio extended bus
  */
-int snd_hdac_ext_bus_link_power_up_all(struct hdac_ext_bus *ebus)
+int snd_hdac_ext_bus_link_power_up_all(struct hdac_bus *bus)
 {
 	struct hdac_ext_link *hlink = NULL;
 	int ret;
 
-	list_for_each_entry(hlink, &ebus->hlink_list, list) {
+	list_for_each_entry(hlink, &bus->hlink_list, list) {
 		snd_hdac_updatel(hlink->ml_addr,
 				AZX_REG_ML_LCTL, 0, AZX_MLCTL_SPA);
 		ret = check_hdac_link_power_active(hlink, true);
@@ -240,12 +237,12 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_power_up_all);
  * snd_hdac_ext_bus_link_power_down_all -power down all hda link
  * @ebus: HD-audio extended bus
  */
-int snd_hdac_ext_bus_link_power_down_all(struct hdac_ext_bus *ebus)
+int snd_hdac_ext_bus_link_power_down_all(struct hdac_bus *bus)
 {
 	struct hdac_ext_link *hlink = NULL;
 	int ret;
 
-	list_for_each_entry(hlink, &ebus->hlink_list, list) {
+	list_for_each_entry(hlink, &bus->hlink_list, list) {
 		snd_hdac_updatel(hlink->ml_addr, AZX_REG_ML_LCTL, AZX_MLCTL_SPA, 0);
 		ret = check_hdac_link_power_active(hlink, false);
 		if (ret < 0)
@@ -256,39 +253,39 @@ int snd_hdac_ext_bus_link_power_down_all(struct hdac_ext_bus *ebus)
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_power_down_all);
 
-int snd_hdac_ext_bus_link_get(struct hdac_ext_bus *ebus,
+int snd_hdac_ext_bus_link_get(struct hdac_bus *bus,
 				struct hdac_ext_link *link)
 {
 	int ret = 0;
 
-	mutex_lock(&ebus->lock);
+	mutex_lock(&bus->lock);
 
 	/*
 	 * if we move from 0 to 1, count will be 1 so power up this link
 	 * as well, also check the dma status and trigger that
 	 */
 	if (++link->ref_count == 1) {
-		if (!ebus->cmd_dma_state) {
-			snd_hdac_bus_init_cmd_io(&ebus->bus);
-			ebus->cmd_dma_state = true;
+		if (!bus->cmd_dma_state) {
+			snd_hdac_bus_init_cmd_io(bus);
+			bus->cmd_dma_state = true;
 		}
 
 		ret = snd_hdac_ext_bus_link_power_up(link);
 	}
 
-	mutex_unlock(&ebus->lock);
+	mutex_unlock(&bus->lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_get);
 
-int snd_hdac_ext_bus_link_put(struct hdac_ext_bus *ebus,
+int snd_hdac_ext_bus_link_put(struct hdac_bus *bus,
 				struct hdac_ext_link *link)
 {
 	int ret = 0;
 	struct hdac_ext_link *hlink;
 	bool link_up = false;
 
-	mutex_lock(&ebus->lock);
+	mutex_lock(&bus->lock);
 
 	/*
 	 * if we move from 1 to 0, count will be 0
@@ -301,7 +298,7 @@ int snd_hdac_ext_bus_link_put(struct hdac_ext_bus *ebus,
 		 * now check if all links are off, if so turn off
 		 * cmd dma as well
 		 */
-		list_for_each_entry(hlink, &ebus->hlink_list, list) {
+		list_for_each_entry(hlink, &bus->hlink_list, list) {
 			if (hlink->ref_count) {
 				link_up = true;
 				break;
@@ -309,12 +306,12 @@ int snd_hdac_ext_bus_link_put(struct hdac_ext_bus *ebus,
 		}
 
 		if (!link_up) {
-			snd_hdac_bus_stop_cmd_io(&ebus->bus);
-			ebus->cmd_dma_state = false;
+			snd_hdac_bus_stop_cmd_io(bus);
+			bus->cmd_dma_state = false;
 		}
 	}
 
-	mutex_unlock(&ebus->lock);
+	mutex_unlock(&bus->lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_put);
diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c
index c96d7a7a36af..1bd27576db98 100644
--- a/sound/hda/ext/hdac_ext_stream.c
+++ b/sound/hda/ext/hdac_ext_stream.c
@@ -25,7 +25,7 @@
 
 /**
  * snd_hdac_ext_stream_init - initialize each stream (aka device)
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @stream: HD-audio ext core stream object to initialize
  * @idx: stream index number
  * @direction: stream direction (SNDRV_PCM_STREAM_PLAYBACK or SNDRV_PCM_STREAM_CAPTURE)
@@ -34,18 +34,16 @@
  * initialize the stream, if ppcap is enabled then init those and then
  * invoke hdac stream initialization routine
  */
-void snd_hdac_ext_stream_init(struct hdac_ext_bus *ebus,
+void snd_hdac_ext_stream_init(struct hdac_bus *bus,
 				struct hdac_ext_stream *stream,
 				int idx, int direction, int tag)
 {
-	struct hdac_bus *bus = &ebus->bus;
-
 	if (bus->ppcap) {
 		stream->pphc_addr = bus->ppcap + AZX_PPHC_BASE +
 				AZX_PPHC_INTERVAL * idx;
 
 		stream->pplc_addr = bus->ppcap + AZX_PPLC_BASE +
-				AZX_PPLC_MULTI * ebus->num_streams +
+				AZX_PPLC_MULTI * bus->num_streams +
 				AZX_PPLC_INTERVAL * idx;
 	}
 
@@ -71,12 +69,12 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_init);
 /**
  * snd_hdac_ext_stream_init_all - create and initialize the stream objects
  *   for an extended hda bus
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @start_idx: start index for streams
  * @num_stream: number of streams to initialize
  * @dir: direction of streams
  */
-int snd_hdac_ext_stream_init_all(struct hdac_ext_bus *ebus, int start_idx,
+int snd_hdac_ext_stream_init_all(struct hdac_bus *bus, int start_idx,
 		int num_stream, int dir)
 {
 	int stream_tag = 0;
@@ -88,7 +86,7 @@ int snd_hdac_ext_stream_init_all(struct hdac_ext_bus *ebus, int start_idx,
 		if (!stream)
 			return -ENOMEM;
 		tag = ++stream_tag;
-		snd_hdac_ext_stream_init(ebus, stream, idx, dir, tag);
+		snd_hdac_ext_stream_init(bus, stream, idx, dir, tag);
 		idx++;
 	}
 
@@ -100,17 +98,16 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_init_all);
 /**
  * snd_hdac_stream_free_all - free hdac extended stream objects
  *
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  */
-void snd_hdac_stream_free_all(struct hdac_ext_bus *ebus)
+void snd_hdac_stream_free_all(struct hdac_bus *bus)
 {
 	struct hdac_stream *s, *_s;
 	struct hdac_ext_stream *stream;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
 
 	list_for_each_entry_safe(s, _s, &bus->stream_list, list) {
 		stream = stream_to_hdac_ext_stream(s);
-		snd_hdac_ext_stream_decouple(ebus, stream, false);
+		snd_hdac_ext_stream_decouple(bus, stream, false);
 		list_del(&s->list);
 		kfree(stream);
 	}
@@ -119,15 +116,14 @@ EXPORT_SYMBOL_GPL(snd_hdac_stream_free_all);
 
 /**
  * snd_hdac_ext_stream_decouple - decouple the hdac stream
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @stream: HD-audio ext core stream object to initialize
  * @decouple: flag to decouple
  */
-void snd_hdac_ext_stream_decouple(struct hdac_ext_bus *ebus,
+void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
 				struct hdac_ext_stream *stream, bool decouple)
 {
 	struct hdac_stream *hstream = &stream->hstream;
-	struct hdac_bus *bus = &ebus->bus;
 	u32 val;
 	int mask = AZX_PPCTL_PROCEN(hstream->index);
 
@@ -251,19 +247,18 @@ void snd_hdac_ext_link_clear_stream_id(struct hdac_ext_link *link,
 EXPORT_SYMBOL_GPL(snd_hdac_ext_link_clear_stream_id);
 
 static struct hdac_ext_stream *
-hdac_ext_link_stream_assign(struct hdac_ext_bus *ebus,
+hdac_ext_link_stream_assign(struct hdac_bus *bus,
 				struct snd_pcm_substream *substream)
 {
 	struct hdac_ext_stream *res = NULL;
 	struct hdac_stream *stream = NULL;
-	struct hdac_bus *hbus = &ebus->bus;
 
-	if (!hbus->ppcap) {
-		dev_err(hbus->dev, "stream type not supported\n");
+	if (!bus->ppcap) {
+		dev_err(bus->dev, "stream type not supported\n");
 		return NULL;
 	}
 
-	list_for_each_entry(stream, &hbus->stream_list, list) {
+	list_for_each_entry(stream, &bus->stream_list, list) {
 		struct hdac_ext_stream *hstream = container_of(stream,
 						struct hdac_ext_stream,
 						hstream);
@@ -277,34 +272,33 @@ hdac_ext_link_stream_assign(struct hdac_ext_bus *ebus,
 		}
 
 		if (!hstream->link_locked) {
-			snd_hdac_ext_stream_decouple(ebus, hstream, true);
+			snd_hdac_ext_stream_decouple(bus, hstream, true);
 			res = hstream;
 			break;
 		}
 	}
 	if (res) {
-		spin_lock_irq(&hbus->reg_lock);
+		spin_lock_irq(&bus->reg_lock);
 		res->link_locked = 1;
 		res->link_substream = substream;
-		spin_unlock_irq(&hbus->reg_lock);
+		spin_unlock_irq(&bus->reg_lock);
 	}
 	return res;
 }
 
 static struct hdac_ext_stream *
-hdac_ext_host_stream_assign(struct hdac_ext_bus *ebus,
+hdac_ext_host_stream_assign(struct hdac_bus *bus,
 				struct snd_pcm_substream *substream)
 {
 	struct hdac_ext_stream *res = NULL;
 	struct hdac_stream *stream = NULL;
-	struct hdac_bus *hbus = &ebus->bus;
 
-	if (!hbus->ppcap) {
-		dev_err(hbus->dev, "stream type not supported\n");
+	if (!bus->ppcap) {
+		dev_err(bus->dev, "stream type not supported\n");
 		return NULL;
 	}
 
-	list_for_each_entry(stream, &hbus->stream_list, list) {
+	list_for_each_entry(stream, &bus->stream_list, list) {
 		struct hdac_ext_stream *hstream = container_of(stream,
 						struct hdac_ext_stream,
 						hstream);
@@ -313,17 +307,17 @@ hdac_ext_host_stream_assign(struct hdac_ext_bus *ebus,
 
 		if (!stream->opened) {
 			if (!hstream->decoupled)
-				snd_hdac_ext_stream_decouple(ebus, hstream, true);
+				snd_hdac_ext_stream_decouple(bus, hstream, true);
 			res = hstream;
 			break;
 		}
 	}
 	if (res) {
-		spin_lock_irq(&hbus->reg_lock);
+		spin_lock_irq(&bus->reg_lock);
 		res->hstream.opened = 1;
 		res->hstream.running = 0;
 		res->hstream.substream = substream;
-		spin_unlock_irq(&hbus->reg_lock);
+		spin_unlock_irq(&bus->reg_lock);
 	}
 
 	return res;
@@ -331,7 +325,7 @@ hdac_ext_host_stream_assign(struct hdac_ext_bus *ebus,
 
 /**
  * snd_hdac_ext_stream_assign - assign a stream for the PCM
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @substream: PCM substream to assign
  * @type: type of stream (coupled, host or link stream)
  *
@@ -346,27 +340,26 @@ hdac_ext_host_stream_assign(struct hdac_ext_bus *ebus,
  * the same stream object when it's used beforehand.  when a stream is
  * decoupled, it becomes a host stream and link stream.
  */
-struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_ext_bus *ebus,
+struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus,
 					   struct snd_pcm_substream *substream,
 					   int type)
 {
 	struct hdac_ext_stream *hstream = NULL;
 	struct hdac_stream *stream = NULL;
-	struct hdac_bus *hbus = &ebus->bus;
 
 	switch (type) {
 	case HDAC_EXT_STREAM_TYPE_COUPLED:
-		stream = snd_hdac_stream_assign(hbus, substream);
+		stream = snd_hdac_stream_assign(bus, substream);
 		if (stream)
 			hstream = container_of(stream,
 					struct hdac_ext_stream, hstream);
 		return hstream;
 
 	case HDAC_EXT_STREAM_TYPE_HOST:
-		return hdac_ext_host_stream_assign(ebus, substream);
+		return hdac_ext_host_stream_assign(bus, substream);
 
 	case HDAC_EXT_STREAM_TYPE_LINK:
-		return hdac_ext_link_stream_assign(ebus, substream);
+		return hdac_ext_link_stream_assign(bus, substream);
 
 	default:
 		return NULL;
@@ -384,7 +377,6 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_assign);
 void snd_hdac_ext_stream_release(struct hdac_ext_stream *stream, int type)
 {
 	struct hdac_bus *bus = stream->hstream.bus;
-	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
 
 	switch (type) {
 	case HDAC_EXT_STREAM_TYPE_COUPLED:
@@ -393,13 +385,13 @@ void snd_hdac_ext_stream_release(struct hdac_ext_stream *stream, int type)
 
 	case HDAC_EXT_STREAM_TYPE_HOST:
 		if (stream->decoupled && !stream->link_locked)
-			snd_hdac_ext_stream_decouple(ebus, stream, false);
+			snd_hdac_ext_stream_decouple(bus, stream, false);
 		snd_hdac_stream_release(&stream->hstream);
 		break;
 
 	case HDAC_EXT_STREAM_TYPE_LINK:
 		if (stream->decoupled && !stream->hstream.opened)
-			snd_hdac_ext_stream_decouple(ebus, stream, false);
+			snd_hdac_ext_stream_decouple(bus, stream, false);
 		spin_lock_irq(&bus->reg_lock);
 		stream->link_locked = 0;
 		stream->link_substream = NULL;
@@ -415,16 +407,15 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_release);
 
 /**
  * snd_hdac_ext_stream_spbcap_enable - enable SPIB for a stream
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @enable: flag to enable/disable SPIB
  * @index: stream index for which SPIB need to be enabled
  */
-void snd_hdac_ext_stream_spbcap_enable(struct hdac_ext_bus *ebus,
+void snd_hdac_ext_stream_spbcap_enable(struct hdac_bus *bus,
 				 bool enable, int index)
 {
 	u32 mask = 0;
 	u32 register_mask = 0;
-	struct hdac_bus *bus = &ebus->bus;
 
 	if (!bus->spbcap) {
 		dev_err(bus->dev, "Address of SPB capability is NULL\n");
@@ -446,14 +437,13 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_spbcap_enable);
 
 /**
  * snd_hdac_ext_stream_set_spib - sets the spib value of a stream
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @stream: hdac_ext_stream
  * @value: spib value to set
  */
-int snd_hdac_ext_stream_set_spib(struct hdac_ext_bus *ebus,
+int snd_hdac_ext_stream_set_spib(struct hdac_bus *bus,
 				 struct hdac_ext_stream *stream, u32 value)
 {
-	struct hdac_bus *bus = &ebus->bus;
 
 	if (!bus->spbcap) {
 		dev_err(bus->dev, "Address of SPB capability is NULL\n");
@@ -468,15 +458,14 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_set_spib);
 
 /**
  * snd_hdac_ext_stream_get_spbmaxfifo - gets the spib value of a stream
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @stream: hdac_ext_stream
  *
  * Return maxfifo for the stream
  */
-int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_ext_bus *ebus,
+int snd_hdac_ext_stream_get_spbmaxfifo(struct hdac_bus *bus,
 				 struct hdac_ext_stream *stream)
 {
-	struct hdac_bus *bus = &ebus->bus;
 
 	if (!bus->spbcap) {
 		dev_err(bus->dev, "Address of SPB capability is NULL\n");
@@ -490,11 +479,10 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_get_spbmaxfifo);
 
 /**
  * snd_hdac_ext_stop_streams - stop all stream if running
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  */
-void snd_hdac_ext_stop_streams(struct hdac_ext_bus *ebus)
+void snd_hdac_ext_stop_streams(struct hdac_bus *bus)
 {
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
 	struct hdac_stream *stream;
 
 	if (bus->chip_init) {
@@ -507,16 +495,15 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stop_streams);
 
 /**
  * snd_hdac_ext_stream_drsm_enable - enable DMA resume for a stream
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @enable: flag to enable/disable DRSM
  * @index: stream index for which DRSM need to be enabled
  */
-void snd_hdac_ext_stream_drsm_enable(struct hdac_ext_bus *ebus,
+void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus,
 				bool enable, int index)
 {
 	u32 mask = 0;
 	u32 register_mask = 0;
-	struct hdac_bus *bus = &ebus->bus;
 
 	if (!bus->drsmcap) {
 		dev_err(bus->dev, "Address of DRSM capability is NULL\n");
@@ -538,14 +525,13 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_drsm_enable);
 
 /**
  * snd_hdac_ext_stream_set_dpibr - sets the dpibr value of a stream
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @stream: hdac_ext_stream
  * @value: dpib value to set
  */
-int snd_hdac_ext_stream_set_dpibr(struct hdac_ext_bus *ebus,
+int snd_hdac_ext_stream_set_dpibr(struct hdac_bus *bus,
 				 struct hdac_ext_stream *stream, u32 value)
 {
-	struct hdac_bus *bus = &ebus->bus;
 
 	if (!bus->drsmcap) {
 		dev_err(bus->dev, "Address of DRSM capability is NULL\n");
@@ -560,7 +546,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_set_dpibr);
 
 /**
  * snd_hdac_ext_stream_set_lpib - sets the lpib value of a stream
- * @ebus: HD-audio ext core bus
+ * @bus: HD-audio core bus
  * @stream: hdac_ext_stream
  * @value: lpib value to set
  */
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index f1e235817a65..c3ccc8d9c91d 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1799,14 +1799,13 @@ static int hdmi_codec_probe(struct snd_soc_component *component)
 	 * hold the ref while we probe, also no need to drop the ref on
 	 * exit, we call pm_runtime_suspend() so that will do for us
 	 */
-	hlink = snd_hdac_ext_bus_get_link(hbus_to_ebus(hdev->bus),
-						dev_name(&hdev->dev));
+	hlink = snd_hdac_ext_bus_get_link(hdev->bus, dev_name(&hdev->dev));
 	if (!hlink) {
 		dev_err(&hdev->dev, "hdac link not found\n");
 		return -EIO;
 	}
 
-	snd_hdac_ext_bus_link_get(hbus_to_ebus(hdev->bus), hlink);
+	snd_hdac_ext_bus_link_get(hdev->bus, hlink);
 
 	ret = create_fill_widget_route_map(dapm);
 	if (ret < 0)
@@ -1984,14 +1983,13 @@ static int hdac_hdmi_dev_probe(struct hdac_device *hdev)
 	const struct hda_device_id *hdac_id = hdac_get_device_id(hdev, hdrv);
 
 	/* hold the ref while we probe */
-	hlink = snd_hdac_ext_bus_get_link(hbus_to_ebus(hdev->bus),
-						dev_name(&hdev->dev));
+	hlink = snd_hdac_ext_bus_get_link(hdev->bus, dev_name(&hdev->dev));
 	if (!hlink) {
 		dev_err(&hdev->dev, "hdac link not found\n");
 		return -EIO;
 	}
 
-	snd_hdac_ext_bus_link_get(hbus_to_ebus(hdev->bus), hlink);
+	snd_hdac_ext_bus_link_get(hdev->bus, hlink);
 
 	hdmi_priv = devm_kzalloc(&hdev->dev, sizeof(*hdmi_priv), GFP_KERNEL);
 	if (hdmi_priv == NULL)
@@ -2044,7 +2042,7 @@ static int hdac_hdmi_dev_probe(struct hdac_device *hdev)
 	ret = devm_snd_soc_register_component(&hdev->dev, &hdmi_hda_codec,
 					hdmi_dais, num_dais);
 
-	snd_hdac_ext_bus_link_put(hbus_to_ebus(hdev->bus), hlink);
+	snd_hdac_ext_bus_link_put(hdev->bus, hlink);
 
 	return ret;
 }
@@ -2093,7 +2091,6 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
 {
 	struct hdac_device *hdev = dev_to_hdac_dev(dev);
 	struct hdac_bus *bus = hdev->bus;
-	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
 	struct hdac_ext_link *hlink = NULL;
 	int err;
 
@@ -2118,13 +2115,13 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
 		return err;
 	}
 
-	hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+	hlink = snd_hdac_ext_bus_get_link(bus, dev_name(dev));
 	if (!hlink) {
 		dev_err(dev, "hdac link not found\n");
 		return -EIO;
 	}
 
-	snd_hdac_ext_bus_link_put(ebus, hlink);
+	snd_hdac_ext_bus_link_put(bus, hlink);
 
 	return 0;
 }
@@ -2133,7 +2130,6 @@ static int hdac_hdmi_runtime_resume(struct device *dev)
 {
 	struct hdac_device *hdev = dev_to_hdac_dev(dev);
 	struct hdac_bus *bus = hdev->bus;
-	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
 	struct hdac_ext_link *hlink = NULL;
 	int err;
 
@@ -2143,13 +2139,13 @@ static int hdac_hdmi_runtime_resume(struct device *dev)
 	if (!bus)
 		return 0;
 
-	hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev));
+	hlink = snd_hdac_ext_bus_get_link(bus, dev_name(dev));
 	if (!hlink) {
 		dev_err(dev, "hdac link not found\n");
 		return -EIO;
 	}
 
-	snd_hdac_ext_bus_link_get(ebus, hlink);
+	snd_hdac_ext_bus_link_get(bus, hlink);
 
 	err = snd_hdac_display_power(bus, true);
 	if (err < 0) {
diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c
index d5f9c30eba32..8bfb8b0fa3d5 100644
--- a/sound/soc/intel/skylake/skl-messages.c
+++ b/sound/soc/intel/skylake/skl-messages.c
@@ -33,8 +33,7 @@
 static int skl_alloc_dma_buf(struct device *dev,
 		struct snd_dma_buffer *dmab, size_t size)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 
 	if (!bus)
 		return -ENODEV;
@@ -44,8 +43,7 @@ static int skl_alloc_dma_buf(struct device *dev,
 
 static int skl_free_dma_buf(struct device *dev, struct snd_dma_buffer *dmab)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 
 	if (!bus)
 		return -ENODEV;
@@ -89,8 +87,7 @@ void skl_dsp_enable_notification(struct skl_sst *ctx, bool enable)
 static int skl_dsp_setup_spib(struct device *dev, unsigned int size,
 				int stream_tag, int enable)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 	struct hdac_stream *stream = snd_hdac_get_stream(bus,
 			SNDRV_PCM_STREAM_PLAYBACK, stream_tag);
 	struct hdac_ext_stream *estream;
@@ -100,10 +97,10 @@ static int skl_dsp_setup_spib(struct device *dev, unsigned int size,
 
 	estream = stream_to_hdac_ext_stream(stream);
 	/* enable/disable SPIB for this hdac stream */
-	snd_hdac_ext_stream_spbcap_enable(ebus, enable, stream->index);
+	snd_hdac_ext_stream_spbcap_enable(bus, enable, stream->index);
 
 	/* set the spib value */
-	snd_hdac_ext_stream_set_spib(ebus, estream, size);
+	snd_hdac_ext_stream_set_spib(bus, estream, size);
 
 	return 0;
 }
@@ -111,8 +108,7 @@ static int skl_dsp_setup_spib(struct device *dev, unsigned int size,
 static int skl_dsp_prepare(struct device *dev, unsigned int format,
 			unsigned int size, struct snd_dma_buffer *dmab)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 	struct hdac_ext_stream *estream;
 	struct hdac_stream *stream;
 	struct snd_pcm_substream substream;
@@ -124,7 +120,7 @@ static int skl_dsp_prepare(struct device *dev, unsigned int format,
 	memset(&substream, 0, sizeof(substream));
 	substream.stream = SNDRV_PCM_STREAM_PLAYBACK;
 
-	estream = snd_hdac_ext_stream_assign(ebus, &substream,
+	estream = snd_hdac_ext_stream_assign(bus, &substream,
 					HDAC_EXT_STREAM_TYPE_HOST);
 	if (!estream)
 		return -ENODEV;
@@ -143,9 +139,8 @@ static int skl_dsp_prepare(struct device *dev, unsigned int format,
 
 static int skl_dsp_trigger(struct device *dev, bool start, int stream_tag)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 	struct hdac_stream *stream;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
 
 	if (!bus)
 		return -ENODEV;
@@ -163,10 +158,9 @@ static int skl_dsp_trigger(struct device *dev, bool start, int stream_tag)
 static int skl_dsp_cleanup(struct device *dev,
 		struct snd_dma_buffer *dmab, int stream_tag)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 	struct hdac_stream *stream;
 	struct hdac_ext_stream *estream;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
 
 	if (!bus)
 		return -ENODEV;
@@ -270,8 +264,7 @@ const struct skl_dsp_ops *skl_get_dsp_ops(int pci_id)
 int skl_init_dsp(struct skl *skl)
 {
 	void __iomem *mmio_base;
-	struct hdac_ext_bus *ebus = &skl->ebus;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = skl_to_bus(skl);
 	struct skl_dsp_loader_ops loader_ops;
 	int irq = bus->irq;
 	const struct skl_dsp_ops *ops;
@@ -279,8 +272,8 @@ int skl_init_dsp(struct skl *skl)
 	int ret;
 
 	/* enable ppcap interrupt */
-	snd_hdac_ext_bus_ppcap_enable(&skl->ebus, true);
-	snd_hdac_ext_bus_ppcap_int_enable(&skl->ebus, true);
+	snd_hdac_ext_bus_ppcap_enable(bus, true);
+	snd_hdac_ext_bus_ppcap_int_enable(bus, true);
 
 	/* read the BAR of the ADSP MMIO */
 	mmio_base = pci_ioremap_bar(skl->pci, 4);
@@ -335,12 +328,11 @@ unmap_mmio:
 
 int skl_free_dsp(struct skl *skl)
 {
-	struct hdac_ext_bus *ebus = &skl->ebus;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = skl_to_bus(skl);
 	struct skl_sst *ctx = skl->skl_sst;
 
 	/* disable  ppcap interrupt */
-	snd_hdac_ext_bus_ppcap_int_enable(&skl->ebus, false);
+	snd_hdac_ext_bus_ppcap_int_enable(bus, false);
 
 	ctx->dsp_ops->cleanup(bus->dev, ctx);
 
@@ -383,10 +375,11 @@ int skl_suspend_late_dsp(struct skl *skl)
 int skl_suspend_dsp(struct skl *skl)
 {
 	struct skl_sst *ctx = skl->skl_sst;
+	struct hdac_bus *bus = skl_to_bus(skl);
 	int ret;
 
 	/* if ppcap is not supported return 0 */
-	if (!skl->ebus.bus.ppcap)
+	if (!bus->ppcap)
 		return 0;
 
 	ret = skl_dsp_sleep(ctx->dsp);
@@ -394,8 +387,8 @@ int skl_suspend_dsp(struct skl *skl)
 		return ret;
 
 	/* disable ppcap interrupt */
-	snd_hdac_ext_bus_ppcap_int_enable(&skl->ebus, false);
-	snd_hdac_ext_bus_ppcap_enable(&skl->ebus, false);
+	snd_hdac_ext_bus_ppcap_int_enable(bus, false);
+	snd_hdac_ext_bus_ppcap_enable(bus, false);
 
 	return 0;
 }
@@ -403,15 +396,16 @@ int skl_suspend_dsp(struct skl *skl)
 int skl_resume_dsp(struct skl *skl)
 {
 	struct skl_sst *ctx = skl->skl_sst;
+	struct hdac_bus *bus = skl_to_bus(skl);
 	int ret;
 
 	/* if ppcap is not supported return 0 */
-	if (!skl->ebus.bus.ppcap)
+	if (!bus->ppcap)
 		return 0;
 
 	/* enable ppcap interrupt */
-	snd_hdac_ext_bus_ppcap_enable(&skl->ebus, true);
-	snd_hdac_ext_bus_ppcap_int_enable(&skl->ebus, true);
+	snd_hdac_ext_bus_ppcap_enable(bus, true);
+	snd_hdac_ext_bus_ppcap_int_enable(bus, true);
 
 	/* check if DSP 1st boot is done */
 	if (skl->skl_sst->is_first_boot == true)
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index b9b140275be0..01a050cf8775 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -141,7 +141,7 @@ struct nhlt_specific_cfg
 {
 	struct nhlt_fmt *fmt;
 	struct nhlt_endpoint *epnt;
-	struct hdac_bus *bus = ebus_to_hbus(&skl->ebus);
+	struct hdac_bus *bus = skl_to_bus(skl);
 	struct device *dev = bus->dev;
 	struct nhlt_specific_cfg *sp_config;
 	struct nhlt_acpi_table *nhlt = skl->nhlt;
@@ -228,7 +228,7 @@ static void skl_nhlt_trim_space(char *trim)
 int skl_nhlt_update_topology_bin(struct skl *skl)
 {
 	struct nhlt_acpi_table *nhlt = (struct nhlt_acpi_table *)skl->nhlt;
-	struct hdac_bus *bus = ebus_to_hbus(&skl->ebus);
+	struct hdac_bus *bus = skl_to_bus(skl);
 	struct device *dev = bus->dev;
 
 	dev_dbg(dev, "oem_id %.6s, oem_table_id %8s oem_revision %d\n",
@@ -248,8 +248,8 @@ static ssize_t skl_nhlt_platform_id_show(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct skl *skl = ebus_to_skl(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
+	struct skl *skl = bus_to_skl(bus);
 	struct nhlt_acpi_table *nhlt = (struct nhlt_acpi_table *)skl->nhlt;
 	char platform_id[32];
 
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index afa86b9e4dcf..d7fc3b2d3e68 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -67,16 +67,15 @@ struct hdac_ext_stream *get_hdac_ext_stream(struct snd_pcm_substream *substream)
 	return substream->runtime->private_data;
 }
 
-static struct hdac_ext_bus *get_bus_ctx(struct snd_pcm_substream *substream)
+static struct hdac_bus *get_bus_ctx(struct snd_pcm_substream *substream)
 {
 	struct hdac_ext_stream *stream = get_hdac_ext_stream(substream);
 	struct hdac_stream *hstream = hdac_stream(stream);
 	struct hdac_bus *bus = hstream->bus;
-
-	return hbus_to_ebus(bus);
+	return bus;
 }
 
-static int skl_substream_alloc_pages(struct hdac_ext_bus *ebus,
+static int skl_substream_alloc_pages(struct hdac_bus *bus,
 				 struct snd_pcm_substream *substream,
 				 size_t size)
 {
@@ -95,7 +94,7 @@ static int skl_substream_free_pages(struct hdac_bus *bus,
 	return snd_pcm_lib_free_pages(substream);
 }
 
-static void skl_set_pcm_constrains(struct hdac_ext_bus *ebus,
+static void skl_set_pcm_constrains(struct hdac_bus *bus,
 				 struct snd_pcm_runtime *runtime)
 {
 	snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS);
@@ -105,9 +104,9 @@ static void skl_set_pcm_constrains(struct hdac_ext_bus *ebus,
 				     20, 178000000);
 }
 
-static enum hdac_ext_stream_type skl_get_host_stream_type(struct hdac_ext_bus *ebus)
+static enum hdac_ext_stream_type skl_get_host_stream_type(struct hdac_bus *bus)
 {
-	if ((ebus_to_hbus(ebus))->ppcap)
+	if (bus->ppcap)
 		return HDAC_EXT_STREAM_TYPE_HOST;
 	else
 		return HDAC_EXT_STREAM_TYPE_COUPLED;
@@ -123,9 +122,9 @@ static enum hdac_ext_stream_type skl_get_host_stream_type(struct hdac_ext_bus *e
 static void skl_set_suspend_active(struct snd_pcm_substream *substream,
 					 struct snd_soc_dai *dai, bool enable)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
+	struct hdac_bus *bus = dev_get_drvdata(dai->dev);
 	struct snd_soc_dapm_widget *w;
-	struct skl *skl = ebus_to_skl(ebus);
+	struct skl *skl = bus_to_skl(bus);
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
 		w = dai->playback_widget;
@@ -140,8 +139,7 @@ static void skl_set_suspend_active(struct snd_pcm_substream *substream,
 
 int skl_pcm_host_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 	unsigned int format_val;
 	struct hdac_stream *hstream;
 	struct hdac_ext_stream *stream;
@@ -153,7 +151,7 @@ int skl_pcm_host_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 		return -EINVAL;
 
 	stream = stream_to_hdac_ext_stream(hstream);
-	snd_hdac_ext_stream_decouple(ebus, stream, true);
+	snd_hdac_ext_stream_decouple(bus, stream, true);
 
 	format_val = snd_hdac_calc_stream_format(params->s_freq,
 			params->ch, params->format, params->host_bps, 0);
@@ -177,8 +175,7 @@ int skl_pcm_host_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 
 int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 	unsigned int format_val;
 	struct hdac_stream *hstream;
 	struct hdac_ext_stream *stream;
@@ -190,7 +187,7 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 		return -EINVAL;
 
 	stream = stream_to_hdac_ext_stream(hstream);
-	snd_hdac_ext_stream_decouple(ebus, stream, true);
+	snd_hdac_ext_stream_decouple(bus, stream, true);
 	format_val = snd_hdac_calc_stream_format(params->s_freq, params->ch,
 					params->format, params->link_bps, 0);
 
@@ -201,7 +198,7 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 
 	snd_hdac_ext_link_stream_setup(stream, format_val);
 
-	list_for_each_entry(link, &ebus->hlink_list, list) {
+	list_for_each_entry(link, &bus->hlink_list, list) {
 		if (link->index == params->link_index)
 			snd_hdac_ext_link_set_stream_id(link,
 					hstream->stream_tag);
@@ -215,7 +212,7 @@ int skl_pcm_link_dma_prepare(struct device *dev, struct skl_pipe_params *params)
 static int skl_pcm_open(struct snd_pcm_substream *substream,
 		struct snd_soc_dai *dai)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
+	struct hdac_bus *bus = dev_get_drvdata(dai->dev);
 	struct hdac_ext_stream *stream;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct skl_dma_params *dma_params;
@@ -224,12 +221,12 @@ static int skl_pcm_open(struct snd_pcm_substream *substream,
 
 	dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name);
 
-	stream = snd_hdac_ext_stream_assign(ebus, substream,
-					skl_get_host_stream_type(ebus));
+	stream = snd_hdac_ext_stream_assign(bus, substream,
+					skl_get_host_stream_type(bus));
 	if (stream == NULL)
 		return -EBUSY;
 
-	skl_set_pcm_constrains(ebus, runtime);
+	skl_set_pcm_constrains(bus, runtime);
 
 	/*
 	 * disable WALLCLOCK timestamps for capture streams
@@ -301,7 +298,7 @@ static int skl_pcm_hw_params(struct snd_pcm_substream *substream,
 				struct snd_pcm_hw_params *params,
 				struct snd_soc_dai *dai)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
+	struct hdac_bus *bus = dev_get_drvdata(dai->dev);
 	struct hdac_ext_stream *stream = get_hdac_ext_stream(substream);
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct skl_pipe_params p_params = {0};
@@ -309,7 +306,7 @@ static int skl_pcm_hw_params(struct snd_pcm_substream *substream,
 	int ret, dma_id;
 
 	dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name);
-	ret = skl_substream_alloc_pages(ebus, substream,
+	ret = skl_substream_alloc_pages(bus, substream,
 					  params_buffer_bytes(params));
 	if (ret < 0)
 		return ret;
@@ -343,14 +340,14 @@ static void skl_pcm_close(struct snd_pcm_substream *substream,
 		struct snd_soc_dai *dai)
 {
 	struct hdac_ext_stream *stream = get_hdac_ext_stream(substream);
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
+	struct hdac_bus *bus = dev_get_drvdata(dai->dev);
 	struct skl_dma_params *dma_params = NULL;
-	struct skl *skl = ebus_to_skl(ebus);
+	struct skl *skl = bus_to_skl(bus);
 	struct skl_module_cfg *mconfig;
 
 	dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name);
 
-	snd_hdac_ext_stream_release(stream, skl_get_host_stream_type(ebus));
+	snd_hdac_ext_stream_release(stream, skl_get_host_stream_type(bus));
 
 	dma_params = snd_soc_dai_get_dma_data(dai, substream);
 	/*
@@ -380,7 +377,7 @@ static void skl_pcm_close(struct snd_pcm_substream *substream,
 static int skl_pcm_hw_free(struct snd_pcm_substream *substream,
 		struct snd_soc_dai *dai)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
+	struct hdac_bus *bus = dev_get_drvdata(dai->dev);
 	struct hdac_ext_stream *stream = get_hdac_ext_stream(substream);
 	struct skl *skl = get_skl_ctx(dai->dev);
 	struct skl_module_cfg *mconfig;
@@ -400,7 +397,7 @@ static int skl_pcm_hw_free(struct snd_pcm_substream *substream,
 	snd_hdac_stream_cleanup(hdac_stream(stream));
 	hdac_stream(stream)->prepared = 0;
 
-	return skl_substream_free_pages(ebus_to_hbus(ebus), substream);
+	return skl_substream_free_pages(bus, substream);
 }
 
 static int skl_be_hw_params(struct snd_pcm_substream *substream,
@@ -420,8 +417,7 @@ static int skl_be_hw_params(struct snd_pcm_substream *substream,
 static int skl_decoupled_trigger(struct snd_pcm_substream *substream,
 		int cmd)
 {
-	struct hdac_ext_bus *ebus = get_bus_ctx(substream);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = get_bus_ctx(substream);
 	struct hdac_ext_stream *stream;
 	int start;
 	unsigned long cookie;
@@ -470,7 +466,7 @@ static int skl_pcm_trigger(struct snd_pcm_substream *substream, int cmd,
 	struct skl *skl = get_skl_ctx(dai->dev);
 	struct skl_sst *ctx = skl->skl_sst;
 	struct skl_module_cfg *mconfig;
-	struct hdac_ext_bus *ebus = get_bus_ctx(substream);
+	struct hdac_bus *bus = get_bus_ctx(substream);
 	struct hdac_ext_stream *stream = get_hdac_ext_stream(substream);
 	struct snd_soc_dapm_widget *w;
 	int ret;
@@ -492,9 +488,9 @@ static int skl_pcm_trigger(struct snd_pcm_substream *substream, int cmd,
 			 * dpib & lpib position to resume before starting the
 			 * DMA
 			 */
-			snd_hdac_ext_stream_drsm_enable(ebus, true,
+			snd_hdac_ext_stream_drsm_enable(bus, true,
 						hdac_stream(stream)->index);
-			snd_hdac_ext_stream_set_dpibr(ebus, stream,
+			snd_hdac_ext_stream_set_dpibr(bus, stream,
 							stream->lpib);
 			snd_hdac_ext_stream_set_lpib(stream, stream->lpib);
 		}
@@ -528,14 +524,14 @@ static int skl_pcm_trigger(struct snd_pcm_substream *substream, int cmd,
 		ret = skl_decoupled_trigger(substream, cmd);
 		if ((cmd == SNDRV_PCM_TRIGGER_SUSPEND) && !w->ignore_suspend) {
 			/* save the dpib and lpib positions */
-			stream->dpib = readl(ebus->bus.remap_addr +
+			stream->dpib = readl(bus->remap_addr +
 					AZX_REG_VS_SDXDPIB_XBASE +
 					(AZX_REG_VS_SDXDPIB_XINTERVAL *
 					hdac_stream(stream)->index));
 
 			stream->lpib = snd_hdac_stream_get_pos_lpib(
 							hdac_stream(stream));
-			snd_hdac_ext_stream_decouple(ebus, stream, false);
+			snd_hdac_ext_stream_decouple(bus, stream, false);
 		}
 		break;
 
@@ -546,11 +542,12 @@ static int skl_pcm_trigger(struct snd_pcm_substream *substream, int cmd,
 	return 0;
 }
 
+
 static int skl_link_hw_params(struct snd_pcm_substream *substream,
 				struct snd_pcm_hw_params *params,
 				struct snd_soc_dai *dai)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
+	struct hdac_bus *bus = dev_get_drvdata(dai->dev);
 	struct hdac_ext_stream *link_dev;
 	struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream);
 	struct snd_soc_dai *codec_dai = rtd->codec_dai;
@@ -558,14 +555,14 @@ static int skl_link_hw_params(struct snd_pcm_substream *substream,
 	struct hdac_ext_link *link;
 	int stream_tag;
 
-	link_dev = snd_hdac_ext_stream_assign(ebus, substream,
+	link_dev = snd_hdac_ext_stream_assign(bus, substream,
 					HDAC_EXT_STREAM_TYPE_LINK);
 	if (!link_dev)
 		return -EBUSY;
 
 	snd_soc_dai_set_dma_data(dai, substream, (void *)link_dev);
 
-	link = snd_hdac_ext_bus_get_link(ebus, codec_dai->component->name);
+	link = snd_hdac_ext_bus_get_link(bus, codec_dai->component->name);
 	if (!link)
 		return -EINVAL;
 
@@ -610,7 +607,7 @@ static int skl_link_pcm_trigger(struct snd_pcm_substream *substream,
 {
 	struct hdac_ext_stream *link_dev =
 				snd_soc_dai_get_dma_data(dai, substream);
-	struct hdac_ext_bus *ebus = get_bus_ctx(substream);
+	struct hdac_bus *bus = get_bus_ctx(substream);
 	struct hdac_ext_stream *stream = get_hdac_ext_stream(substream);
 
 	dev_dbg(dai->dev, "In %s cmd=%d\n", __func__, cmd);
@@ -626,7 +623,7 @@ static int skl_link_pcm_trigger(struct snd_pcm_substream *substream,
 	case SNDRV_PCM_TRIGGER_STOP:
 		snd_hdac_ext_link_stream_clear(link_dev);
 		if (cmd == SNDRV_PCM_TRIGGER_SUSPEND)
-			snd_hdac_ext_stream_decouple(ebus, stream, false);
+			snd_hdac_ext_stream_decouple(bus, stream, false);
 		break;
 
 	default:
@@ -638,7 +635,7 @@ static int skl_link_pcm_trigger(struct snd_pcm_substream *substream,
 static int skl_link_hw_free(struct snd_pcm_substream *substream,
 		struct snd_soc_dai *dai)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
+	struct hdac_bus *bus = dev_get_drvdata(dai->dev);
 	struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream);
 	struct hdac_ext_stream *link_dev =
 				snd_soc_dai_get_dma_data(dai, substream);
@@ -648,7 +645,7 @@ static int skl_link_hw_free(struct snd_pcm_substream *substream,
 
 	link_dev->link_prepared = 0;
 
-	link = snd_hdac_ext_bus_get_link(ebus, rtd->codec_dai->component->name);
+	link = snd_hdac_ext_bus_get_link(bus, rtd->codec_dai->component->name);
 	if (!link)
 		return -EINVAL;
 
@@ -1041,8 +1038,7 @@ static int skl_platform_open(struct snd_pcm_substream *substream)
 static int skl_coupled_trigger(struct snd_pcm_substream *substream,
 					int cmd)
 {
-	struct hdac_ext_bus *ebus = get_bus_ctx(substream);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = get_bus_ctx(substream);
 	struct hdac_ext_stream *stream;
 	struct snd_pcm_substream *s;
 	bool start;
@@ -1115,9 +1111,9 @@ static int skl_coupled_trigger(struct snd_pcm_substream *substream,
 static int skl_platform_pcm_trigger(struct snd_pcm_substream *substream,
 					int cmd)
 {
-	struct hdac_ext_bus *ebus = get_bus_ctx(substream);
+	struct hdac_bus *bus = get_bus_ctx(substream);
 
-	if (!(ebus_to_hbus(ebus))->ppcap)
+	if (!bus->ppcap)
 		return skl_coupled_trigger(substream, cmd);
 
 	return 0;
@@ -1127,7 +1123,7 @@ static snd_pcm_uframes_t skl_platform_pcm_pointer
 			(struct snd_pcm_substream *substream)
 {
 	struct hdac_ext_stream *hstream = get_hdac_ext_stream(substream);
-	struct hdac_ext_bus *ebus = get_bus_ctx(substream);
+	struct hdac_bus *bus = get_bus_ctx(substream);
 	unsigned int pos;
 
 	/*
@@ -1152,12 +1148,12 @@ static snd_pcm_uframes_t skl_platform_pcm_pointer
 	 */
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		pos = readl(ebus->bus.remap_addr + AZX_REG_VS_SDXDPIB_XBASE +
+		pos = readl(bus->remap_addr + AZX_REG_VS_SDXDPIB_XBASE +
 				(AZX_REG_VS_SDXDPIB_XINTERVAL *
 				hdac_stream(hstream)->index));
 	} else {
 		udelay(20);
-		readl(ebus->bus.remap_addr +
+		readl(bus->remap_addr +
 				AZX_REG_VS_SDXDPIB_XBASE +
 				(AZX_REG_VS_SDXDPIB_XINTERVAL *
 				 hdac_stream(hstream)->index));
@@ -1242,11 +1238,11 @@ static void skl_pcm_free(struct snd_pcm *pcm)
 static int skl_pcm_new(struct snd_soc_pcm_runtime *rtd)
 {
 	struct snd_soc_dai *dai = rtd->cpu_dai;
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
+	struct hdac_bus *bus = dev_get_drvdata(dai->dev);
 	struct snd_pcm *pcm = rtd->pcm;
 	unsigned int size;
 	int retval = 0;
-	struct skl *skl = ebus_to_skl(ebus);
+	struct skl *skl = bus_to_skl(bus);
 
 	if (dai->driver->playback.channels_min ||
 		dai->driver->capture.channels_min) {
@@ -1356,19 +1352,19 @@ static int skl_populate_modules(struct skl *skl)
 
 static int skl_platform_soc_probe(struct snd_soc_component *component)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(component->dev);
-	struct skl *skl = ebus_to_skl(ebus);
+	struct hdac_bus *bus = dev_get_drvdata(component->dev);
+	struct skl *skl = bus_to_skl(bus);
 	const struct skl_dsp_ops *ops;
 	int ret;
 
 	pm_runtime_get_sync(component->dev);
-	if ((ebus_to_hbus(ebus))->ppcap) {
+	if (bus->ppcap) {
 		skl->component = component;
 
 		/* init debugfs */
 		skl->debugfs = skl_debugfs_init(skl);
 
-		ret = skl_tplg_init(component, ebus);
+		ret = skl_tplg_init(component, bus);
 		if (ret < 0) {
 			dev_err(component->dev, "Failed to init topology!\n");
 			return ret;
@@ -1425,10 +1421,10 @@ static const struct snd_soc_component_driver skl_component  = {
 int skl_platform_register(struct device *dev)
 {
 	int ret;
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
-	struct skl *skl = ebus_to_skl(ebus);
 	struct snd_soc_dai_driver *dais;
 	int num_dais = ARRAY_SIZE(skl_platform_dai);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
+	struct skl *skl = bus_to_skl(bus);
 
 	INIT_LIST_HEAD(&skl->ppl_list);
 	INIT_LIST_HEAD(&skl->bind_list);
@@ -1464,8 +1460,8 @@ err:
 
 int skl_platform_unregister(struct device *dev)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
-	struct skl *skl = ebus_to_skl(ebus);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
+	struct skl *skl = bus_to_skl(bus);
 	struct skl_module_deferred_bind *modules, *tmp;
 
 	if (!list_empty(&skl->bind_list)) {
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index fcdc716754b6..abfdb67c05cc 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -934,7 +934,7 @@ static int skl_tplg_find_moduleid_from_uuid(struct skl *skl,
 	struct soc_bytes_ext *sb = (void *) k->private_value;
 	struct skl_algo_data *bc = (struct skl_algo_data *)sb->dobj.private;
 	struct skl_kpb_params *uuid_params, *params;
-	struct hdac_bus *bus = ebus_to_hbus(skl_to_ebus(skl));
+	struct hdac_bus *bus = skl_to_bus(skl);
 	int i, size, module_id;
 
 	if (bc->set_params == SKL_PARAM_BIND && bc->max) {
@@ -3029,9 +3029,8 @@ static int skl_tplg_widget_load(struct snd_soc_component *cmpnt,
 				struct snd_soc_tplg_dapm_widget *tplg_w)
 {
 	int ret;
-	struct hdac_ext_bus *ebus = snd_soc_component_get_drvdata(cmpnt);
-	struct skl *skl = ebus_to_skl(ebus);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = snd_soc_component_get_drvdata(cmpnt);
+	struct skl *skl = bus_to_skl(bus);
 	struct skl_module_cfg *mconfig;
 
 	if (!tplg_w->priv.size)
@@ -3137,8 +3136,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
 	struct soc_bytes_ext *sb;
 	struct snd_soc_tplg_bytes_control *tplg_bc;
 	struct snd_soc_tplg_enum_control *tplg_ec;
-	struct hdac_ext_bus *ebus  = snd_soc_component_get_drvdata(cmpnt);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus  = snd_soc_component_get_drvdata(cmpnt);
 	struct soc_enum *se;
 
 	switch (hdr->ops.info) {
@@ -3622,9 +3620,8 @@ static int skl_tplg_get_manifest_data(struct snd_soc_tplg_manifest *manifest,
 static int skl_manifest_load(struct snd_soc_component *cmpnt,
 				struct snd_soc_tplg_manifest *manifest)
 {
-	struct hdac_ext_bus *ebus = snd_soc_component_get_drvdata(cmpnt);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
-	struct skl *skl = ebus_to_skl(ebus);
+	struct hdac_bus *bus = snd_soc_component_get_drvdata(cmpnt);
+	struct skl *skl = bus_to_skl(bus);
 
 	/* proceed only if we have private data defined */
 	if (manifest->priv.size == 0)
@@ -3713,12 +3710,11 @@ static void skl_tplg_set_pipe_type(struct skl *skl, struct skl_pipe *pipe)
 /*
  * SKL topology init routine
  */
-int skl_tplg_init(struct snd_soc_component *component, struct hdac_ext_bus *ebus)
+int skl_tplg_init(struct snd_soc_component *component, struct hdac_bus *bus)
 {
 	int ret;
 	const struct firmware *fw;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
-	struct skl *skl = ebus_to_skl(ebus);
+	struct skl *skl = bus_to_skl(bus);
 	struct skl_pipeline *ppl;
 
 	ret = request_firmware(&fw, skl->tplg_name, bus->dev);
diff --git a/sound/soc/intel/skylake/skl-topology.h b/sound/soc/intel/skylake/skl-topology.h
index 6d7e0569695f..daeb6d2bb7fc 100644
--- a/sound/soc/intel/skylake/skl-topology.h
+++ b/sound/soc/intel/skylake/skl-topology.h
@@ -458,9 +458,9 @@ enum skl_channel {
 
 static inline struct skl *get_skl_ctx(struct device *dev)
 {
-	struct hdac_ext_bus *ebus = dev_get_drvdata(dev);
+	struct hdac_bus *bus = dev_get_drvdata(dev);
 
-	return ebus_to_skl(ebus);
+	return bus_to_skl(bus);
 }
 
 int skl_tplg_be_update_params(struct snd_soc_dai *dai,
@@ -470,7 +470,7 @@ int skl_dsp_set_dma_control(struct skl_sst *ctx, u32 *caps,
 void skl_tplg_set_be_dmic_config(struct snd_soc_dai *dai,
 	struct skl_pipe_params *params, int stream);
 int skl_tplg_init(struct snd_soc_component *component,
-				struct hdac_ext_bus *ebus);
+				struct hdac_bus *ebus);
 struct skl_module_cfg *skl_tplg_fe_get_cpr_module(
 		struct snd_soc_dai *dai, int stream);
 int skl_tplg_update_pipe_params(struct device *dev,
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index f0d9793f872a..9c5a701d68ac 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -54,7 +54,7 @@ static void skl_update_pci_byte(struct pci_dev *pci, unsigned int reg,
 
 static void skl_init_pci(struct skl *skl)
 {
-	struct hdac_ext_bus *ebus = &skl->ebus;
+	struct hdac_bus *bus = skl_to_bus(skl);
 
 	/*
 	 * Clear bits 0-2 of PCI register TCSEL (at offset 0x44)
@@ -63,7 +63,7 @@ static void skl_init_pci(struct skl *skl)
 	 * codecs.
 	 * The PCI register TCSEL is defined in the Intel manuals.
 	 */
-	dev_dbg(ebus_to_hbus(ebus)->dev, "Clearing TCSEL\n");
+	dev_dbg(bus->dev, "Clearing TCSEL\n");
 	skl_update_pci_byte(skl->pci, AZX_PCIREG_TCSEL, 0x07, 0);
 }
 
@@ -103,8 +103,7 @@ static void skl_enable_miscbdcge(struct device *dev, bool enable)
 static void skl_clock_power_gating(struct device *dev, bool enable)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
 	u32 val;
 
 	/* Update PDCGE bit of CGCTL register */
@@ -127,7 +126,6 @@ static void skl_clock_power_gating(struct device *dev, bool enable)
  */
 static int skl_init_chip(struct hdac_bus *bus, bool full_reset)
 {
-	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
 	struct hdac_ext_link *hlink;
 	int ret;
 
@@ -135,7 +133,7 @@ static int skl_init_chip(struct hdac_bus *bus, bool full_reset)
 	ret = snd_hdac_bus_init_chip(bus, full_reset);
 
 	/* Reset stream-to-link mapping */
-	list_for_each_entry(hlink, &ebus->hlink_list, list)
+	list_for_each_entry(hlink, &bus->hlink_list, list)
 		bus->io_ops->reg_writel(0, hlink->ml_addr + AZX_REG_ML_LOSIDV);
 
 	skl_enable_miscbdcge(bus->dev, true);
@@ -146,8 +144,7 @@ static int skl_init_chip(struct hdac_bus *bus, bool full_reset)
 void skl_update_d0i3c(struct device *dev, bool enable)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
 	u8 reg;
 	int timeout = 50;
 
@@ -197,8 +194,7 @@ static void skl_stream_update(struct hdac_bus *bus, struct hdac_stream *hstr)
 
 static irqreturn_t skl_interrupt(int irq, void *dev_id)
 {
-	struct hdac_ext_bus *ebus = dev_id;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = dev_id;
 	u32 status;
 
 	if (!pm_runtime_active(bus->dev))
@@ -227,8 +223,7 @@ static irqreturn_t skl_interrupt(int irq, void *dev_id)
 
 static irqreturn_t skl_threaded_handler(int irq, void *dev_id)
 {
-	struct hdac_ext_bus *ebus = dev_id;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = dev_id;
 	u32 status;
 
 	status = snd_hdac_chip_readl(bus, INTSTS);
@@ -238,16 +233,15 @@ static irqreturn_t skl_threaded_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int skl_acquire_irq(struct hdac_ext_bus *ebus, int do_disconnect)
+static int skl_acquire_irq(struct hdac_bus *bus, int do_disconnect)
 {
-	struct skl *skl = ebus_to_skl(ebus);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct skl *skl = bus_to_skl(bus);
 	int ret;
 
 	ret = request_threaded_irq(skl->pci->irq, skl_interrupt,
 			skl_threaded_handler,
 			IRQF_SHARED,
-			KBUILD_MODNAME, ebus);
+			KBUILD_MODNAME, bus);
 	if (ret) {
 		dev_err(bus->dev,
 			"unable to grab IRQ %d, disabling device\n",
@@ -264,21 +258,20 @@ static int skl_acquire_irq(struct hdac_ext_bus *ebus, int do_disconnect)
 static int skl_suspend_late(struct device *dev)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct skl *skl = ebus_to_skl(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
+	struct skl *skl = bus_to_skl(bus);
 
 	return skl_suspend_late_dsp(skl);
 }
 
 #ifdef CONFIG_PM
-static int _skl_suspend(struct hdac_ext_bus *ebus)
+static int _skl_suspend(struct hdac_bus *bus)
 {
-	struct skl *skl = ebus_to_skl(ebus);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct skl *skl = bus_to_skl(bus);
 	struct pci_dev *pci = to_pci_dev(bus->dev);
 	int ret;
 
-	snd_hdac_ext_bus_link_power_down_all(ebus);
+	snd_hdac_ext_bus_link_power_down_all(bus);
 
 	ret = skl_suspend_dsp(skl);
 	if (ret < 0)
@@ -295,10 +288,9 @@ static int _skl_suspend(struct hdac_ext_bus *ebus)
 	return 0;
 }
 
-static int _skl_resume(struct hdac_ext_bus *ebus)
+static int _skl_resume(struct hdac_bus *bus)
 {
-	struct skl *skl = ebus_to_skl(ebus);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct skl *skl = bus_to_skl(bus);
 
 	skl_init_pci(skl);
 	skl_init_chip(bus, true);
@@ -314,9 +306,8 @@ static int _skl_resume(struct hdac_ext_bus *ebus)
 static int skl_suspend(struct device *dev)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct skl *skl  = ebus_to_skl(ebus);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
+	struct skl *skl  = bus_to_skl(bus);
 	int ret = 0;
 
 	/*
@@ -325,15 +316,15 @@ static int skl_suspend(struct device *dev)
 	 */
 	if (skl->supend_active) {
 		/* turn off the links and stop the CORB/RIRB DMA if it is On */
-		snd_hdac_ext_bus_link_power_down_all(ebus);
+		snd_hdac_ext_bus_link_power_down_all(bus);
 
-		if (ebus->cmd_dma_state)
-			snd_hdac_bus_stop_cmd_io(&ebus->bus);
+		if (bus->cmd_dma_state)
+			snd_hdac_bus_stop_cmd_io(bus);
 
 		enable_irq_wake(bus->irq);
 		pci_save_state(pci);
 	} else {
-		ret = _skl_suspend(ebus);
+		ret = _skl_suspend(bus);
 		if (ret < 0)
 			return ret;
 		skl->skl_sst->fw_loaded = false;
@@ -352,9 +343,8 @@ static int skl_suspend(struct device *dev)
 static int skl_resume(struct device *dev)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct skl *skl  = ebus_to_skl(ebus);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
+	struct skl *skl  = bus_to_skl(bus);
 	struct hdac_ext_link *hlink = NULL;
 	int ret;
 
@@ -374,32 +364,32 @@ static int skl_resume(struct device *dev)
 	 */
 	if (skl->supend_active) {
 		pci_restore_state(pci);
-		snd_hdac_ext_bus_link_power_up_all(ebus);
+		snd_hdac_ext_bus_link_power_up_all(bus);
 		disable_irq_wake(bus->irq);
 		/*
 		 * turn On the links which are On before active suspend
 		 * and start the CORB/RIRB DMA if On before
 		 * active suspend.
 		 */
-		list_for_each_entry(hlink, &ebus->hlink_list, list) {
+		list_for_each_entry(hlink, &bus->hlink_list, list) {
 			if (hlink->ref_count)
 				snd_hdac_ext_bus_link_power_up(hlink);
 		}
 
-		if (ebus->cmd_dma_state)
-			snd_hdac_bus_init_cmd_io(&ebus->bus);
 		ret = 0;
+		if (bus->cmd_dma_state)
+			snd_hdac_bus_init_cmd_io(bus);
 	} else {
-		ret = _skl_resume(ebus);
+		ret = _skl_resume(bus);
 
 		/* turn off the links which are off before suspend */
-		list_for_each_entry(hlink, &ebus->hlink_list, list) {
+		list_for_each_entry(hlink, &bus->hlink_list, list) {
 			if (!hlink->ref_count)
 				snd_hdac_ext_bus_link_power_down(hlink);
 		}
 
-		if (!ebus->cmd_dma_state)
-			snd_hdac_bus_stop_cmd_io(&ebus->bus);
+		if (!bus->cmd_dma_state)
+			snd_hdac_bus_stop_cmd_io(bus);
 	}
 
 	return ret;
@@ -410,23 +400,21 @@ static int skl_resume(struct device *dev)
 static int skl_runtime_suspend(struct device *dev)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
 
 	dev_dbg(bus->dev, "in %s\n", __func__);
 
-	return _skl_suspend(ebus);
+	return _skl_suspend(bus);
 }
 
 static int skl_runtime_resume(struct device *dev)
 {
 	struct pci_dev *pci = to_pci_dev(dev);
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
 
 	dev_dbg(bus->dev, "in %s\n", __func__);
 
-	return _skl_resume(ebus);
+	return _skl_resume(bus);
 }
 #endif /* CONFIG_PM */
 
@@ -439,20 +427,19 @@ static const struct dev_pm_ops skl_pm = {
 /*
  * destructor
  */
-static int skl_free(struct hdac_ext_bus *ebus)
+static int skl_free(struct hdac_bus *bus)
 {
-	struct skl *skl  = ebus_to_skl(ebus);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct skl *skl  = bus_to_skl(bus);
 
 	skl->init_done = 0; /* to be sure */
 
-	snd_hdac_ext_stop_streams(ebus);
+	snd_hdac_ext_stop_streams(bus);
 
 	if (bus->irq >= 0)
-		free_irq(bus->irq, (void *)ebus);
+		free_irq(bus->irq, (void *)bus);
 	snd_hdac_bus_free_stream_pages(bus);
-	snd_hdac_stream_free_all(ebus);
-	snd_hdac_link_free_all(ebus);
+	snd_hdac_stream_free_all(bus);
+	snd_hdac_link_free_all(bus);
 
 	if (bus->remap_addr)
 		iounmap(bus->remap_addr);
@@ -460,11 +447,11 @@ static int skl_free(struct hdac_ext_bus *ebus)
 	pci_release_regions(skl->pci);
 	pci_disable_device(skl->pci);
 
-	snd_hdac_ext_bus_exit(ebus);
+	snd_hdac_ext_bus_exit(bus);
 
 	cancel_work_sync(&skl->probe_work);
 	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI))
-		snd_hdac_i915_exit(&ebus->bus);
+		snd_hdac_i915_exit(bus);
 
 	return 0;
 }
@@ -488,8 +475,8 @@ static struct skl_ssp_clk skl_ssp_clks[] = {
 
 static int skl_find_machine(struct skl *skl, void *driver_data)
 {
+	struct hdac_bus *bus = skl_to_bus(skl);
 	struct snd_soc_acpi_mach *mach = driver_data;
-	struct hdac_bus *bus = ebus_to_hbus(&skl->ebus);
 	struct skl_machine_pdata *pdata;
 
 	mach = snd_soc_acpi_find_machine(mach);
@@ -510,7 +497,7 @@ static int skl_find_machine(struct skl *skl, void *driver_data)
 
 static int skl_machine_device_register(struct skl *skl)
 {
-	struct hdac_bus *bus = ebus_to_hbus(&skl->ebus);
+	struct hdac_bus *bus = skl_to_bus(skl);
 	struct snd_soc_acpi_mach *mach = skl->mach;
 	struct platform_device *pdev;
 	int ret;
@@ -544,7 +531,7 @@ static void skl_machine_device_unregister(struct skl *skl)
 
 static int skl_dmic_device_register(struct skl *skl)
 {
-	struct hdac_bus *bus = ebus_to_hbus(&skl->ebus);
+	struct hdac_bus *bus = skl_to_bus(skl);
 	struct platform_device *pdev;
 	int ret;
 
@@ -643,9 +630,8 @@ static void skl_clock_device_unregister(struct skl *skl)
 /*
  * Probe the given codec address
  */
-static int probe_codec(struct hdac_ext_bus *ebus, int addr)
+static int probe_codec(struct hdac_bus *bus, int addr)
 {
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
 	unsigned int cmd = (addr << 28) | (AC_NODE_ROOT << 20) |
 		(AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID;
 	unsigned int res = -1;
@@ -658,13 +644,12 @@ static int probe_codec(struct hdac_ext_bus *ebus, int addr)
 		return -EIO;
 	dev_dbg(bus->dev, "codec #%d probed OK\n", addr);
 
-	return snd_hdac_ext_bus_device_init(ebus, addr);
+	return snd_hdac_ext_bus_device_init(bus, addr);
 }
 
 /* Codec initialization */
-static void skl_codec_create(struct hdac_ext_bus *ebus)
+static void skl_codec_create(struct hdac_bus *bus)
 {
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
 	int c, max_slots;
 
 	max_slots = HDA_MAX_CODECS;
@@ -672,7 +657,7 @@ static void skl_codec_create(struct hdac_ext_bus *ebus)
 	/* First try to probe all given codec slots */
 	for (c = 0; c < max_slots; c++) {
 		if ((bus->codec_mask & (1 << c))) {
-			if (probe_codec(ebus, c) < 0) {
+			if (probe_codec(bus, c) < 0) {
 				/*
 				 * Some BIOSen give you wrong codec addresses
 				 * that don't exist
@@ -722,8 +707,7 @@ static int skl_i915_init(struct hdac_bus *bus)
 static void skl_probe_work(struct work_struct *work)
 {
 	struct skl *skl = container_of(work, struct skl, probe_work);
-	struct hdac_ext_bus *ebus = &skl->ebus;
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = skl_to_bus(skl);
 	struct hdac_ext_link *hlink = NULL;
 	int err;
 
@@ -744,7 +728,7 @@ static void skl_probe_work(struct work_struct *work)
 		dev_info(bus->dev, "no hda codecs found!\n");
 
 	/* create codec instances */
-	skl_codec_create(ebus);
+	skl_codec_create(bus);
 
 	/* register platform dai and controls */
 	err = skl_platform_register(bus->dev);
@@ -773,8 +757,8 @@ static void skl_probe_work(struct work_struct *work)
 	/*
 	 * we are done probing so decrement link counts
 	 */
-	list_for_each_entry(hlink, &ebus->hlink_list, list)
-		snd_hdac_ext_bus_link_put(ebus, hlink);
+	list_for_each_entry(hlink, &bus->hlink_list, list)
+		snd_hdac_ext_bus_link_put(bus, hlink);
 
 	/* configure PM */
 	pm_runtime_put_noidle(bus->dev);
@@ -796,7 +780,7 @@ static int skl_create(struct pci_dev *pci,
 		      struct skl **rskl)
 {
 	struct skl *skl;
-	struct hdac_ext_bus *ebus;
+	struct hdac_bus *bus;
 
 	int err;
 
@@ -811,23 +795,22 @@ static int skl_create(struct pci_dev *pci,
 		pci_disable_device(pci);
 		return -ENOMEM;
 	}
-	ebus = &skl->ebus;
-	snd_hdac_ext_bus_init(ebus, &pci->dev, &bus_core_ops, io_ops);
-	ebus->bus.use_posbuf = 1;
+
+	bus = skl_to_bus(skl);
+	snd_hdac_ext_bus_init(bus, &pci->dev, &bus_core_ops, io_ops);
+	bus->use_posbuf = 1;
 	skl->pci = pci;
 	INIT_WORK(&skl->probe_work, skl_probe_work);
-
-	ebus->bus.bdl_pos_adj = 0;
+	bus->bdl_pos_adj = 0;
 
 	*rskl = skl;
 
 	return 0;
 }
 
-static int skl_first_init(struct hdac_ext_bus *ebus)
+static int skl_first_init(struct hdac_bus *bus)
 {
-	struct skl *skl = ebus_to_skl(ebus);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct skl *skl = bus_to_skl(bus);
 	struct pci_dev *pci = skl->pci;
 	int err;
 	unsigned short gcap;
@@ -848,7 +831,7 @@ static int skl_first_init(struct hdac_ext_bus *ebus)
 
 	snd_hdac_bus_parse_capabilities(bus);
 
-	if (skl_acquire_irq(ebus, 0) < 0)
+	if (skl_acquire_irq(bus, 0) < 0)
 		return -EBUSY;
 
 	pci_set_master(pci);
@@ -872,14 +855,14 @@ static int skl_first_init(struct hdac_ext_bus *ebus)
 	if (!pb_streams && !cp_streams)
 		return -EIO;
 
-	ebus->num_streams = cp_streams + pb_streams;
+	bus->num_streams = cp_streams + pb_streams;
 
 	/* initialize streams */
 	snd_hdac_ext_stream_init_all
-		(ebus, 0, cp_streams, SNDRV_PCM_STREAM_CAPTURE);
+		(bus, 0, cp_streams, SNDRV_PCM_STREAM_CAPTURE);
 	start_idx = cp_streams;
 	snd_hdac_ext_stream_init_all
-		(ebus, start_idx, pb_streams, SNDRV_PCM_STREAM_PLAYBACK);
+		(bus, start_idx, pb_streams, SNDRV_PCM_STREAM_PLAYBACK);
 
 	err = snd_hdac_bus_alloc_stream_pages(bus);
 	if (err < 0)
@@ -895,7 +878,6 @@ static int skl_probe(struct pci_dev *pci,
 		     const struct pci_device_id *pci_id)
 {
 	struct skl *skl;
-	struct hdac_ext_bus *ebus = NULL;
 	struct hdac_bus *bus = NULL;
 	int err;
 
@@ -904,10 +886,9 @@ static int skl_probe(struct pci_dev *pci,
 	if (err < 0)
 		return err;
 
-	ebus = &skl->ebus;
-	bus = ebus_to_hbus(ebus);
+	bus = skl_to_bus(skl);
 
-	err = skl_first_init(ebus);
+	err = skl_first_init(bus);
 	if (err < 0)
 		goto out_free;
 
@@ -928,7 +909,7 @@ static int skl_probe(struct pci_dev *pci,
 
 	skl_nhlt_update_topology_bin(skl);
 
-	pci_set_drvdata(skl->pci, ebus);
+	pci_set_drvdata(skl->pci, bus);
 
 	skl_dmic_data.dmic_num = skl_get_dmic_geo(skl);
 
@@ -952,7 +933,7 @@ static int skl_probe(struct pci_dev *pci,
 		skl->skl_sst->clock_power_gating = skl_clock_power_gating;
 	}
 	if (bus->mlcap)
-		snd_hdac_ext_bus_get_ml_capabilities(ebus);
+		snd_hdac_ext_bus_get_ml_capabilities(bus);
 
 	snd_hdac_bus_stop_chip(bus);
 
@@ -972,31 +953,30 @@ out_clk_free:
 out_nhlt_free:
 	skl_nhlt_free(skl->nhlt);
 out_free:
-	skl_free(ebus);
+	skl_free(bus);
 
 	return err;
 }
 
 static void skl_shutdown(struct pci_dev *pci)
 {
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct hdac_bus *bus = ebus_to_hbus(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
 	struct hdac_stream *s;
 	struct hdac_ext_stream *stream;
 	struct skl *skl;
 
-	if (ebus == NULL)
+	if (!bus)
 		return;
 
-	skl = ebus_to_skl(ebus);
+	skl = bus_to_skl(bus);
 
 	if (!skl->init_done)
 		return;
 
-	snd_hdac_ext_stop_streams(ebus);
+	snd_hdac_ext_stop_streams(bus);
 	list_for_each_entry(s, &bus->stream_list, list) {
 		stream = stream_to_hdac_ext_stream(s);
-		snd_hdac_ext_stream_decouple(ebus, stream, false);
+		snd_hdac_ext_stream_decouple(bus, stream, false);
 	}
 
 	snd_hdac_bus_stop_chip(bus);
@@ -1004,15 +984,15 @@ static void skl_shutdown(struct pci_dev *pci)
 
 static void skl_remove(struct pci_dev *pci)
 {
-	struct hdac_ext_bus *ebus = pci_get_drvdata(pci);
-	struct skl *skl = ebus_to_skl(ebus);
+	struct hdac_bus *bus = pci_get_drvdata(pci);
+	struct skl *skl = bus_to_skl(bus);
 
 	release_firmware(skl->tplg);
 
 	pm_runtime_get_noresume(&pci->dev);
 
 	/* codec removal, invoke bus_device_remove */
-	snd_hdac_ext_bus_device_remove(ebus);
+	snd_hdac_ext_bus_device_remove(bus);
 
 	skl->debugfs = NULL;
 	skl_platform_unregister(&pci->dev);
@@ -1022,7 +1002,7 @@ static void skl_remove(struct pci_dev *pci)
 	skl_clock_device_unregister(skl);
 	skl_nhlt_remove_sysfs(skl);
 	skl_nhlt_free(skl->nhlt);
-	skl_free(ebus);
+	skl_free(bus);
 	dev_set_drvdata(&pci->dev, NULL);
 }
 
diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h
index 0d5375cbcf6e..78aa8bdcb619 100644
--- a/sound/soc/intel/skylake/skl.h
+++ b/sound/soc/intel/skylake/skl.h
@@ -71,7 +71,7 @@ struct skl_fw_config {
 };
 
 struct skl {
-	struct hdac_ext_bus ebus;
+	struct hdac_bus hbus;
 	struct pci_dev *pci;
 
 	unsigned int init_done:1; /* delayed init status */
@@ -105,9 +105,8 @@ struct skl {
 	struct snd_soc_acpi_mach *mach;
 };
 
-#define skl_to_ebus(s)	(&(s)->ebus)
-#define ebus_to_skl(sbus) \
-	container_of(sbus, struct skl, sbus)
+#define skl_to_bus(s)  (&(s)->hbus)
+#define bus_to_skl(bus) container_of(bus, struct skl, hbus)
 
 /* to pass dai dma data */
 struct skl_dma_params {
-- 
cgit v1.2.3


From e1df9317cbb192582ed7aa88c5f294c2336a3c75 Mon Sep 17 00:00:00 2001
From: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Date: Fri, 1 Jun 2018 22:53:51 -0500
Subject: ALSA: hdac: Remove usage of struct hdac_ext_driver, use hdac_driver
 instead

This patch removes the hdac_ext_driver structure. The legacy and
enhanced HDaudio capabilities can be handled in a backward-compatible
way without separate definitions.

Signed-off-by: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h      |  5 +++++
 include/sound/hdaudio_ext.h  | 17 ++---------------
 sound/hda/ext/hdac_ext_bus.c | 30 ++++++++++++++----------------
 sound/soc/codecs/hdac_hdmi.c | 12 +++++-------
 4 files changed, 26 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index 9735b51aef08..59ffe63cf194 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -188,6 +188,11 @@ struct hdac_driver {
 	const struct hda_device_id *id_table;
 	int (*match)(struct hdac_device *dev, struct hdac_driver *drv);
 	void (*unsol_event)(struct hdac_device *dev, unsigned int event);
+
+	/* fields used by ext bus APIs */
+	int (*probe)(struct hdac_device *dev);
+	int (*remove)(struct hdac_device *dev);
+	void (*shutdown)(struct hdac_device *dev);
 };
 
 #define drv_to_hdac_driver(_drv) container_of(_drv, struct hdac_driver, driver)
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index e5b0cd1ade19..3c302477750b 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -160,20 +160,7 @@ struct hdac_ext_dma_params {
 	u8 stream_tag;
 };
 
-/*
- * HD-audio codec base driver
- */
-struct hdac_ext_driver {
-	struct hdac_driver hdac;
-
-	int	(*probe)(struct hdac_device *dev);
-	int	(*remove)(struct hdac_device *dev);
-	void	(*shutdown)(struct hdac_device *dev);
-};
-
-int snd_hda_ext_driver_register(struct hdac_ext_driver *drv);
-void snd_hda_ext_driver_unregister(struct hdac_ext_driver *drv);
-
-#define to_ehdac_driver(_drv) container_of(_drv, struct hdac_ext_driver, hdac)
+int snd_hda_ext_driver_register(struct hdac_driver *drv);
+void snd_hda_ext_driver_unregister(struct hdac_driver *drv);
 
 #endif /* __SOUND_HDAUDIO_EXT_H */
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 77547ede9ae8..52f07766fff3 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -200,12 +200,10 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_device_remove);
 #define dev_to_hdac(dev) (container_of((dev), \
 			struct hdac_device, dev))
 
-static inline struct hdac_ext_driver *get_edrv(struct device *dev)
+static inline struct hdac_driver *get_hdrv(struct device *dev)
 {
 	struct hdac_driver *hdrv = drv_to_hdac_driver(dev->driver);
-	struct hdac_ext_driver *edrv = to_ehdac_driver(hdrv);
-
-	return edrv;
+	return hdrv;
 }
 
 static inline struct hdac_device *get_hdev(struct device *dev)
@@ -216,17 +214,17 @@ static inline struct hdac_device *get_hdev(struct device *dev)
 
 static int hda_ext_drv_probe(struct device *dev)
 {
-	return (get_edrv(dev))->probe(get_hdev(dev));
+	return (get_hdrv(dev))->probe(get_hdev(dev));
 }
 
 static int hdac_ext_drv_remove(struct device *dev)
 {
-	return (get_edrv(dev))->remove(get_hdev(dev));
+	return (get_hdrv(dev))->remove(get_hdev(dev));
 }
 
 static void hdac_ext_drv_shutdown(struct device *dev)
 {
-	return (get_edrv(dev))->shutdown(get_hdev(dev));
+	return (get_hdrv(dev))->shutdown(get_hdev(dev));
 }
 
 /**
@@ -234,20 +232,20 @@ static void hdac_ext_drv_shutdown(struct device *dev)
  *
  * @drv: ext hda driver structure
  */
-int snd_hda_ext_driver_register(struct hdac_ext_driver *drv)
+int snd_hda_ext_driver_register(struct hdac_driver *drv)
 {
-	drv->hdac.type = HDA_DEV_ASOC;
-	drv->hdac.driver.bus = &snd_hda_bus_type;
+	drv->type = HDA_DEV_ASOC;
+	drv->driver.bus = &snd_hda_bus_type;
 	/* we use default match */
 
 	if (drv->probe)
-		drv->hdac.driver.probe = hda_ext_drv_probe;
+		drv->driver.probe = hda_ext_drv_probe;
 	if (drv->remove)
-		drv->hdac.driver.remove = hdac_ext_drv_remove;
+		drv->driver.remove = hdac_ext_drv_remove;
 	if (drv->shutdown)
-		drv->hdac.driver.shutdown = hdac_ext_drv_shutdown;
+		drv->driver.shutdown = hdac_ext_drv_shutdown;
 
-	return driver_register(&drv->hdac.driver);
+	return driver_register(&drv->driver);
 }
 EXPORT_SYMBOL_GPL(snd_hda_ext_driver_register);
 
@@ -256,8 +254,8 @@ EXPORT_SYMBOL_GPL(snd_hda_ext_driver_register);
  *
  * @drv: ext hda driver structure
  */
-void snd_hda_ext_driver_unregister(struct hdac_ext_driver *drv)
+void snd_hda_ext_driver_unregister(struct hdac_driver *drv)
 {
-	driver_unregister(&drv->hdac.driver);
+	driver_unregister(&drv->driver);
 }
 EXPORT_SYMBOL_GPL(snd_hda_ext_driver_unregister);
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index c3ccc8d9c91d..3e3a2a9ef310 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -2186,14 +2186,12 @@ static const struct hda_device_id hdmi_list[] = {
 
 MODULE_DEVICE_TABLE(hdaudio, hdmi_list);
 
-static struct hdac_ext_driver hdmi_driver = {
-	. hdac = {
-		.driver = {
-			.name   = "HDMI HDA Codec",
-			.pm = &hdac_hdmi_pm,
-		},
-		.id_table       = hdmi_list,
+static struct hdac_driver hdmi_driver = {
+	.driver = {
+		.name   = "HDMI HDA Codec",
+		.pm = &hdac_hdmi_pm,
 	},
+	.id_table       = hdmi_list,
 	.probe          = hdac_hdmi_dev_probe,
 	.remove         = hdac_hdmi_dev_remove,
 };
-- 
cgit v1.2.3


From 6298542fa33b6ba0e3effbace5b99b70b93ed9ae Mon Sep 17 00:00:00 2001
From: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Date: Fri, 1 Jun 2018 22:53:57 -0500
Subject: ALSA: hdac: remove memory allocation from
 snd_hdac_ext_bus_device_init

Remove memory allocation within snd_hdac_ext_bus_device_init, to make
its behaviour identical to snd_hdac_bus_device_init. So that caller
can allocate the parent data structure containing hdac_device.
This API change helps in reusing the legacy HDA codec drivers with
ASoC platform drivers.

Signed-off-by: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio_ext.h   | 3 ++-
 sound/hda/ext/hdac_ext_bus.c  | 8 ++------
 sound/soc/intel/skylake/skl.c | 8 +++++++-
 3 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index 3c302477750b..c188b801239f 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -9,7 +9,8 @@ int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 		      const struct hdac_io_ops *io_ops);
 
 void snd_hdac_ext_bus_exit(struct hdac_bus *bus);
-int snd_hdac_ext_bus_device_init(struct hdac_bus *bus, int addr);
+int snd_hdac_ext_bus_device_init(struct hdac_bus *bus, int addr,
+						struct hdac_device *hdev);
 void snd_hdac_ext_bus_device_exit(struct hdac_device *hdev);
 void snd_hdac_ext_bus_device_remove(struct hdac_bus *bus);
 
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 52f07766fff3..1eb58244688e 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -135,16 +135,12 @@ static void default_release(struct device *dev)
  *
  * Returns zero for success or a negative error code.
  */
-int snd_hdac_ext_bus_device_init(struct hdac_bus *bus, int addr)
+int snd_hdac_ext_bus_device_init(struct hdac_bus *bus, int addr,
+					struct hdac_device *hdev)
 {
-	struct hdac_device *hdev = NULL;
 	char name[15];
 	int ret;
 
-	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
-	if (!hdev)
-		return -ENOMEM;
-
 	hdev->bus = bus;
 
 	snprintf(name, sizeof(name), "ehdaudio%dD%d", bus->idx, addr);
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 9c5a701d68ac..3a7f5eb4902b 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -635,6 +635,8 @@ static int probe_codec(struct hdac_bus *bus, int addr)
 	unsigned int cmd = (addr << 28) | (AC_NODE_ROOT << 20) |
 		(AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID;
 	unsigned int res = -1;
+	struct skl *skl = bus_to_skl(bus);
+	struct hdac_device *hdev;
 
 	mutex_lock(&bus->cmd_mutex);
 	snd_hdac_bus_send_cmd(bus, cmd);
@@ -644,7 +646,11 @@ static int probe_codec(struct hdac_bus *bus, int addr)
 		return -EIO;
 	dev_dbg(bus->dev, "codec #%d probed OK\n", addr);
 
-	return snd_hdac_ext_bus_device_init(bus, addr);
+	hdev = devm_kzalloc(&skl->pci->dev, sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
+		return -ENOMEM;
+
+	return snd_hdac_ext_bus_device_init(bus, addr, hdev);
 }
 
 /* Codec initialization */
-- 
cgit v1.2.3


From cb04ba33187ca571142b67c2fb60d0a8c24994c8 Mon Sep 17 00:00:00 2001
From: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Date: Fri, 1 Jun 2018 22:53:58 -0500
Subject: ALSA: hdac: add extended ops in the hdac_bus

Add extended ops in the hdac_bus to allow calling the ASoC HDAC library
ops to reuse the legacy HDA codec drivers with ASoC framework.
Extended ops are used by the legacy codec drivers to call into
hdac_hda library, in the subsequent patches..

Signed-off-by: Rakesh Ughreja <rakesh.a.ughreja@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h       | 9 +++++++++
 include/sound/hdaudio_ext.h   | 3 ++-
 sound/hda/ext/hdac_ext_bus.c  | 4 +++-
 sound/soc/intel/skylake/skl.c | 2 +-
 4 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index 59ffe63cf194..f1baaa88e766 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -213,6 +213,14 @@ struct hdac_bus_ops {
 	int (*link_power)(struct hdac_bus *bus, bool enable);
 };
 
+/*
+ * ops used for ASoC HDA codec drivers
+ */
+struct hdac_ext_bus_ops {
+	int (*hdev_attach)(struct hdac_device *hdev);
+	int (*hdev_detach)(struct hdac_device *hdev);
+};
+
 /*
  * Lowlevel I/O operators
  */
@@ -265,6 +273,7 @@ struct hdac_bus {
 	struct device *dev;
 	const struct hdac_bus_ops *ops;
 	const struct hdac_io_ops *io_ops;
+	const struct hdac_ext_bus_ops *ext_ops;
 
 	/* h/w resources */
 	unsigned long addr;
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index c188b801239f..f34aced69ca8 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -6,7 +6,8 @@
 
 int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 		      const struct hdac_bus_ops *ops,
-		      const struct hdac_io_ops *io_ops);
+		      const struct hdac_io_ops *io_ops,
+		      const struct hdac_ext_bus_ops *ext_ops);
 
 void snd_hdac_ext_bus_exit(struct hdac_bus *bus);
 int snd_hdac_ext_bus_device_init(struct hdac_bus *bus, int addr,
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 1eb58244688e..9c37d9af3023 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -89,7 +89,8 @@ static const struct hdac_io_ops hdac_ext_default_io = {
  */
 int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 			const struct hdac_bus_ops *ops,
-			const struct hdac_io_ops *io_ops)
+			const struct hdac_io_ops *io_ops,
+			const struct hdac_ext_bus_ops *ext_ops)
 {
 	int ret;
 	static int idx;
@@ -102,6 +103,7 @@ int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 	if (ret < 0)
 		return ret;
 
+	bus->ext_ops = ext_ops;
 	INIT_LIST_HEAD(&bus->hlink_list);
 	bus->idx = idx++;
 
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 3a7f5eb4902b..00e051467a40 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -803,7 +803,7 @@ static int skl_create(struct pci_dev *pci,
 	}
 
 	bus = skl_to_bus(skl);
-	snd_hdac_ext_bus_init(bus, &pci->dev, &bus_core_ops, io_ops);
+	snd_hdac_ext_bus_init(bus, &pci->dev, &bus_core_ops, io_ops, NULL);
 	bus->use_posbuf = 1;
 	skl->pci = pci;
 	INIT_WORK(&skl->probe_work, skl_probe_work);
-- 
cgit v1.2.3


From 7a846d3c43b0b6d04300be9ba666b102b57a391a Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 27 Jun 2018 07:45:19 +0800
Subject: dt-bindings: connector: add properties for typec

Add bindings supported by current typec driver, so user can pass
all those properties via dt.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Li Jun <jun.li@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/connector/usb-connector.txt           | 44 +++++++++++++++
 include/dt-bindings/usb/pd.h                       | 62 ++++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 include/dt-bindings/usb/pd.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/connector/usb-connector.txt b/Documentation/devicetree/bindings/connector/usb-connector.txt
index e1463f14af38..8855bfcfd778 100644
--- a/Documentation/devicetree/bindings/connector/usb-connector.txt
+++ b/Documentation/devicetree/bindings/connector/usb-connector.txt
@@ -15,6 +15,33 @@ Optional properties:
 - type: size of the connector, should be specified in case of USB-A, USB-B
   non-fullsize connectors: "mini", "micro".
 
+Optional properties for usb-c-connector:
+- power-role: should be one of "source", "sink" or "dual"(DRP) if typec
+  connector has power support.
+- try-power-role: preferred power role if "dual"(DRP) can support Try.SNK
+  or Try.SRC, should be "sink" for Try.SNK or "source" for Try.SRC.
+- data-role: should be one of "host", "device", "dual"(DRD) if typec
+  connector supports USB data.
+
+Required properties for usb-c-connector with power delivery support:
+- source-pdos: An array of u32 with each entry providing supported power
+  source data object(PDO), the detailed bit definitions of PDO can be found
+  in "Universal Serial Bus Power Delivery Specification" chapter 6.4.1.2
+  Source_Capabilities Message, the order of each entry(PDO) should follow
+  the PD spec chapter 6.4.1. Required for power source and power dual role.
+  User can specify the source PDO array via PDO_FIXED/BATT/VAR() defined in
+  dt-bindings/usb/pd.h.
+- sink-pdos: An array of u32 with each entry providing supported power
+  sink data object(PDO), the detailed bit definitions of PDO can be found
+  in "Universal Serial Bus Power Delivery Specification" chapter 6.4.1.3
+  Sink Capabilities Message, the order of each entry(PDO) should follow
+  the PD spec chapter 6.4.1. Required for power sink and power dual role.
+  User can specify the sink PDO array via PDO_FIXED/BATT/VAR() defined in
+  dt-bindings/usb/pd.h.
+- op-sink-microwatt: Sink required operating power in microwatt, if source
+  can't offer the power, Capability Mismatch is set. Required for power
+  sink and power dual role.
+
 Required nodes:
 - any data bus to the connector should be modeled using the OF graph bindings
   specified in bindings/graph.txt, unless the bus is between parent node and
@@ -73,3 +100,20 @@ ccic: s2mm005@33 {
 		};
 	};
 };
+
+3. USB-C connector attached to a typec port controller(ptn5110), which has
+power delivery support and enables drp.
+
+typec: ptn5110@50 {
+	...
+	usb_con: connector {
+		compatible = "usb-c-connector";
+		label = "USB-C";
+		power-role = "dual";
+		try-power-role = "sink";
+		source-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)>;
+		sink-pdos = <PDO_FIXED(5000, 2000, PDO_FIXED_USB_COMM)
+			     PDO_VAR(5000, 12000, 2000)>;
+		op-sink-microwatt = <10000000>;
+	};
+};
diff --git a/include/dt-bindings/usb/pd.h b/include/dt-bindings/usb/pd.h
new file mode 100644
index 000000000000..7b7a92fefa0a
--- /dev/null
+++ b/include/dt-bindings/usb/pd.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DT_POWER_DELIVERY_H
+#define __DT_POWER_DELIVERY_H
+
+/* Power delivery Power Data Object definitions */
+#define PDO_TYPE_FIXED		0
+#define PDO_TYPE_BATT		1
+#define PDO_TYPE_VAR		2
+#define PDO_TYPE_APDO		3
+
+#define PDO_TYPE_SHIFT		30
+#define PDO_TYPE_MASK		0x3
+
+#define PDO_TYPE(t)	((t) << PDO_TYPE_SHIFT)
+
+#define PDO_VOLT_MASK		0x3ff
+#define PDO_CURR_MASK		0x3ff
+#define PDO_PWR_MASK		0x3ff
+
+#define PDO_FIXED_DUAL_ROLE	(1 << 29) /* Power role swap supported */
+#define PDO_FIXED_SUSPEND	(1 << 28) /* USB Suspend supported (Source) */
+#define PDO_FIXED_HIGHER_CAP	(1 << 28) /* Requires more than vSafe5V (Sink) */
+#define PDO_FIXED_EXTPOWER	(1 << 27) /* Externally powered */
+#define PDO_FIXED_USB_COMM	(1 << 26) /* USB communications capable */
+#define PDO_FIXED_DATA_SWAP	(1 << 25) /* Data role swap supported */
+#define PDO_FIXED_VOLT_SHIFT	10	/* 50mV units */
+#define PDO_FIXED_CURR_SHIFT	0	/* 10mA units */
+
+#define PDO_FIXED_VOLT(mv)	((((mv) / 50) & PDO_VOLT_MASK) << PDO_FIXED_VOLT_SHIFT)
+#define PDO_FIXED_CURR(ma)	((((ma) / 10) & PDO_CURR_MASK) << PDO_FIXED_CURR_SHIFT)
+
+#define PDO_FIXED(mv, ma, flags)			\
+	(PDO_TYPE(PDO_TYPE_FIXED) | (flags) |		\
+	 PDO_FIXED_VOLT(mv) | PDO_FIXED_CURR(ma))
+
+#define VSAFE5V 5000 /* mv units */
+
+#define PDO_BATT_MAX_VOLT_SHIFT	20	/* 50mV units */
+#define PDO_BATT_MIN_VOLT_SHIFT	10	/* 50mV units */
+#define PDO_BATT_MAX_PWR_SHIFT	0	/* 250mW units */
+
+#define PDO_BATT_MIN_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_BATT_MIN_VOLT_SHIFT)
+#define PDO_BATT_MAX_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_BATT_MAX_VOLT_SHIFT)
+#define PDO_BATT_MAX_POWER(mw) ((((mw) / 250) & PDO_PWR_MASK) << PDO_BATT_MAX_PWR_SHIFT)
+
+#define PDO_BATT(min_mv, max_mv, max_mw)			\
+	(PDO_TYPE(PDO_TYPE_BATT) | PDO_BATT_MIN_VOLT(min_mv) |	\
+	 PDO_BATT_MAX_VOLT(max_mv) | PDO_BATT_MAX_POWER(max_mw))
+
+#define PDO_VAR_MAX_VOLT_SHIFT	20	/* 50mV units */
+#define PDO_VAR_MIN_VOLT_SHIFT	10	/* 50mV units */
+#define PDO_VAR_MAX_CURR_SHIFT	0	/* 10mA units */
+
+#define PDO_VAR_MIN_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_VAR_MIN_VOLT_SHIFT)
+#define PDO_VAR_MAX_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_VAR_MAX_VOLT_SHIFT)
+#define PDO_VAR_MAX_CURR(ma) ((((ma) / 10) & PDO_CURR_MASK) << PDO_VAR_MAX_CURR_SHIFT)
+
+#define PDO_VAR(min_mv, max_mv, max_ma)				\
+	(PDO_TYPE(PDO_TYPE_VAR) | PDO_VAR_MIN_VOLT(min_mv) |	\
+	 PDO_VAR_MAX_VOLT(max_mv) | PDO_VAR_MAX_CURR(max_ma))
+
+ #endif /* __DT_POWER_DELIVERY_H */
-- 
cgit v1.2.3


From 5e85a04c8c0d271d7561a770b85741f186398868 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 27 Jun 2018 07:45:22 +0800
Subject: usb: typec: add fwnode to tcpc

Add fwnode handle to get the fwnode so we can get typec configs
it contains.

Suggested-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Li Jun <jun.li@nxp.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/typec/tcpci.c | 7 +++++++
 drivers/usb/typec/tcpm.c      | 1 +
 include/linux/usb/tcpm.h      | 2 ++
 3 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/drivers/staging/typec/tcpci.c b/drivers/staging/typec/tcpci.c
index dd2928824ef6..e59547a32605 100644
--- a/drivers/staging/typec/tcpci.c
+++ b/drivers/staging/typec/tcpci.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
+#include <linux/property.h>
 #include <linux/regmap.h>
 #include <linux/usb/pd.h>
 #include <linux/usb/tcpm.h>
@@ -474,6 +475,12 @@ static int tcpci_parse_config(struct tcpci *tcpci)
 
 	/* TODO: Populate struct tcpc_config from ACPI/device-tree */
 	tcpci->tcpc.config = &tcpci_tcpc_config;
+	tcpci->tcpc.fwnode = device_get_named_child_node(tcpci->dev,
+							 "connector");
+	if (!tcpci->tcpc.fwnode) {
+		dev_err(tcpci->dev, "Can't find connector node.\n");
+		return -EINVAL;
+	}
 
 	return 0;
 }
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index 8a201dd53d36..d22b37bf4e1a 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -4576,6 +4576,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 	else
 		port->try_role = TYPEC_NO_PREFERRED_ROLE;
 
+	port->typec_caps.fwnode = tcpc->fwnode;
 	port->typec_caps.prefer_role = tcpc->config->default_role;
 	port->typec_caps.type = tcpc->config->type;
 	port->typec_caps.data = tcpc->config->data;
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index b231b9314240..193920a2e05f 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -110,6 +110,7 @@ enum tcpc_mux_mode {
 /**
  * struct tcpc_dev - Port configuration and callback functions
  * @config:	Pointer to port configuration
+ * @fwnode:	Pointer to port fwnode
  * @get_vbus:	Called to read current VBUS state
  * @get_current_limit:
  *		Optional; called by the tcpm core when configured as a snk
@@ -138,6 +139,7 @@ enum tcpc_mux_mode {
  */
 struct tcpc_dev {
 	const struct tcpc_config *config;
+	struct fwnode_handle *fwnode;
 
 	int (*init)(struct tcpc_dev *dev);
 	int (*get_vbus)(struct tcpc_dev *dev);
-- 
cgit v1.2.3


From 9c90e02434b66f9bcfc5c9a91a808eee5b7aa21b Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 27 Jun 2018 07:45:23 +0800
Subject: usb: typec: add API to get typec basic port power and data config

This patch adds 3 APIs to get the typec port power and data type,
and preferred power role by its name string.

Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Li Jun <jun.li@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c | 58 +++++++++++++++++++++++++++++++++++++++++++----
 include/linux/usb/typec.h |  3 +++
 2 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 53df10df2f9d..633105917fa6 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -796,12 +796,18 @@ static const char * const typec_data_roles[] = {
 	[TYPEC_HOST]	= "host",
 };
 
-static const char * const typec_port_types[] = {
+static const char * const typec_port_power_roles[] = {
 	[TYPEC_PORT_SRC] = "source",
 	[TYPEC_PORT_SNK] = "sink",
 	[TYPEC_PORT_DRP] = "dual",
 };
 
+static const char * const typec_port_data_roles[] = {
+	[TYPEC_PORT_DFP] = "host",
+	[TYPEC_PORT_UFP] = "device",
+	[TYPEC_PORT_DRD] = "dual",
+};
+
 static const char * const typec_port_types_drp[] = {
 	[TYPEC_PORT_SRC] = "dual [source] sink",
 	[TYPEC_PORT_SNK] = "dual source [sink]",
@@ -932,7 +938,7 @@ static ssize_t power_role_store(struct device *dev,
 	mutex_lock(&port->port_type_lock);
 	if (port->port_type != TYPEC_PORT_DRP) {
 		dev_dbg(dev, "port type fixed at \"%s\"",
-			     typec_port_types[port->port_type]);
+			     typec_port_power_roles[port->port_type]);
 		ret = -EOPNOTSUPP;
 		goto unlock_and_ret;
 	}
@@ -973,7 +979,7 @@ port_type_store(struct device *dev, struct device_attribute *attr,
 		return -EOPNOTSUPP;
 	}
 
-	ret = sysfs_match_string(typec_port_types, buf);
+	ret = sysfs_match_string(typec_port_power_roles, buf);
 	if (ret < 0)
 		return ret;
 
@@ -1007,7 +1013,7 @@ port_type_show(struct device *dev, struct device_attribute *attr,
 		return sprintf(buf, "%s\n",
 			       typec_port_types_drp[port->port_type]);
 
-	return sprintf(buf, "[%s]\n", typec_port_types[port->cap->type]);
+	return sprintf(buf, "[%s]\n", typec_port_power_roles[port->cap->type]);
 }
 static DEVICE_ATTR_RW(port_type);
 
@@ -1252,6 +1258,50 @@ void typec_set_pwr_opmode(struct typec_port *port,
 }
 EXPORT_SYMBOL_GPL(typec_set_pwr_opmode);
 
+/**
+ * typec_find_port_power_role - Get the typec port power capability
+ * @name: port power capability string
+ *
+ * This routine is used to find the typec_port_type by its string name.
+ *
+ * Returns typec_port_type if success, otherwise negative error code.
+ */
+int typec_find_port_power_role(const char *name)
+{
+	return match_string(typec_port_power_roles,
+			    ARRAY_SIZE(typec_port_power_roles), name);
+}
+EXPORT_SYMBOL_GPL(typec_find_port_power_role);
+
+/**
+ * typec_find_power_role - Find the typec one specific power role
+ * @name: power role string
+ *
+ * This routine is used to find the typec_role by its string name.
+ *
+ * Returns typec_role if success, otherwise negative error code.
+ */
+int typec_find_power_role(const char *name)
+{
+	return match_string(typec_roles, ARRAY_SIZE(typec_roles), name);
+}
+EXPORT_SYMBOL_GPL(typec_find_power_role);
+
+/**
+ * typec_find_port_data_role - Get the typec port data capability
+ * @name: port data capability string
+ *
+ * This routine is used to find the typec_port_data by its string name.
+ *
+ * Returns typec_port_data if success, otherwise negative error code.
+ */
+int typec_find_port_data_role(const char *name)
+{
+	return match_string(typec_port_data_roles,
+			    ARRAY_SIZE(typec_port_data_roles), name);
+}
+EXPORT_SYMBOL_GPL(typec_find_port_data_role);
+
 /* ------------------------------------------ */
 /* API for Multiplexer/DeMultiplexer Switches */
 
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 672b39bb0adc..15f8d9a50b31 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -267,4 +267,7 @@ int typec_set_orientation(struct typec_port *port,
 			  enum typec_orientation orientation);
 int typec_set_mode(struct typec_port *port, int mode);
 
+int typec_find_port_power_role(const char *name);
+int typec_find_power_role(const char *name);
+int typec_find_port_data_role(const char *name);
 #endif /* __LINUX_USB_TYPEC_H */
-- 
cgit v1.2.3


From 82f5d7749fa4f3851f705c75b33babf68edb90b8 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:46 +0300
Subject: usb: pd: include kernel.h

This makes life a bit easier for the drivers that include
pd.h. All pd_header_*_le() inline functions defined in pd.h
call le16_to_cpu(), and all *_LE() macros in pd.h call
cpu_to_le16().

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/pd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
index 09b570feb297..f2162e0fe531 100644
--- a/include/linux/usb/pd.h
+++ b/include/linux/usb/pd.h
@@ -15,6 +15,7 @@
 #ifndef __LINUX_USB_PD_H
 #define __LINUX_USB_PD_H
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/usb/typec.h>
 
-- 
cgit v1.2.3


From aaf3f4e925dc2bdc4715142103660285632a245c Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:47 +0300
Subject: usb: typec: function for checking cable plug orientation

This adds function typec_get_orientation() that can be used
for checking the current cable plug orientation.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c | 12 ++++++++++++
 include/linux/usb/typec.h |  1 +
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 633105917fa6..3ef7b99b080f 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1329,6 +1329,18 @@ int typec_set_orientation(struct typec_port *port,
 }
 EXPORT_SYMBOL_GPL(typec_set_orientation);
 
+/**
+ * typec_get_orientation - Get USB Type-C cable plug orientation
+ * @port: USB Type-C Port
+ *
+ * Get current cable plug orientation for @port.
+ */
+enum typec_orientation typec_get_orientation(struct typec_port *port)
+{
+	return port->orientation;
+}
+EXPORT_SYMBOL_GPL(typec_get_orientation);
+
 /**
  * typec_set_mode - Set mode of operation for USB Type-C connector
  * @port: USB Type-C port for the connector
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 15f8d9a50b31..6c33b53d1162 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -265,6 +265,7 @@ void typec_set_pwr_opmode(struct typec_port *port, enum typec_pwr_opmode mode);
 
 int typec_set_orientation(struct typec_port *port,
 			  enum typec_orientation orientation);
+enum typec_orientation typec_get_orientation(struct typec_port *port);
 int typec_set_mode(struct typec_port *port, int mode);
 
 int typec_find_port_power_role(const char *name);
-- 
cgit v1.2.3


From 653d500ccaadd76ccade9f07469cdc66759315b6 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Sun, 6 May 2018 04:06:07 -0400
Subject: media: i2c: lm3560: add support for lm3559 chip

Add support for LM3559, as found in Motorola Droid 4 phone, for
example. SW interface seems to be identical.

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/i2c/lm3560.c | 3 ++-
 include/media/i2c/lm3560.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/i2c/lm3560.c b/drivers/media/i2c/lm3560.c
index b600e03aa94b..49c6644cbba7 100644
--- a/drivers/media/i2c/lm3560.c
+++ b/drivers/media/i2c/lm3560.c
@@ -1,6 +1,6 @@
 /*
  * drivers/media/i2c/lm3560.c
- * General device driver for TI lm3560, FLASH LED Driver
+ * General device driver for TI lm3559, lm3560, FLASH LED Driver
  *
  * Copyright (C) 2013 Texas Instruments
  *
@@ -465,6 +465,7 @@ static int lm3560_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id lm3560_id_table[] = {
+	{LM3559_NAME, 0},
 	{LM3560_NAME, 0},
 	{}
 };
diff --git a/include/media/i2c/lm3560.h b/include/media/i2c/lm3560.h
index a5bd310c9e1e..0e2b1c751a5d 100644
--- a/include/media/i2c/lm3560.h
+++ b/include/media/i2c/lm3560.h
@@ -22,6 +22,7 @@
 
 #include <media/v4l2-subdev.h>
 
+#define LM3559_NAME	"lm3559"
 #define LM3560_NAME	"lm3560"
 #define LM3560_I2C_ADDR	(0x53)
 
-- 
cgit v1.2.3


From aa2f88712718d045855acc6686dbce6a7286d010 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Thu, 17 May 2018 10:30:15 -0400
Subject: media: v4l2-ioctl: create helper to fill in v4l2_standard for ENUMSTD
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prepare for adding a new IOCTL VIDIOC_SUBDEV_ENUMSTD which would
enumerate the standards for a subdevice by breaking out the code which
could be shared between the video and subdevice versions of this IOCTL.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
[hans.verkuil@cisco.com: fixed 'sdandard' typos in v4l2-ioctl.h]
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 66 ++++++++++++++++++++----------------
 include/media/v4l2-ioctl.h           | 15 ++++++--
 2 files changed, 50 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index dd210067151f..eeed14468a17 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -125,6 +125,42 @@ int v4l2_video_std_construct(struct v4l2_standard *vs,
 }
 EXPORT_SYMBOL(v4l2_video_std_construct);
 
+/* Fill in the fields of a v4l2_standard structure according to the
+ * 'id' and 'vs->index' parameters. Returns negative on error. */
+int v4l_video_std_enumstd(struct v4l2_standard *vs, v4l2_std_id id)
+{
+	v4l2_std_id curr_id = 0;
+	unsigned int index = vs->index, i, j = 0;
+	const char *descr = "";
+
+	/* Return -ENODATA if the id for the current input
+	   or output is 0, meaning that it doesn't support this API. */
+	if (id == 0)
+		return -ENODATA;
+
+	/* Return norm array in a canonical way */
+	for (i = 0; i <= index && id; i++) {
+		/* last std value in the standards array is 0, so this
+		   while always ends there since (id & 0) == 0. */
+		while ((id & standards[j].std) != standards[j].std)
+			j++;
+		curr_id = standards[j].std;
+		descr = standards[j].descr;
+		j++;
+		if (curr_id == 0)
+			break;
+		if (curr_id != V4L2_STD_PAL &&
+				curr_id != V4L2_STD_SECAM &&
+				curr_id != V4L2_STD_NTSC)
+			id &= ~curr_id;
+	}
+	if (i <= index)
+		return -EINVAL;
+
+	v4l2_video_std_construct(vs, curr_id, descr);
+	return 0;
+}
+
 /* ----------------------------------------------------------------- */
 /* some arrays for pretty-printing debug messages of enum types      */
 
@@ -1753,36 +1789,8 @@ static int v4l_enumstd(const struct v4l2_ioctl_ops *ops,
 {
 	struct video_device *vfd = video_devdata(file);
 	struct v4l2_standard *p = arg;
-	v4l2_std_id id = vfd->tvnorms, curr_id = 0;
-	unsigned int index = p->index, i, j = 0;
-	const char *descr = "";
-
-	/* Return -ENODATA if the tvnorms for the current input
-	   or output is 0, meaning that it doesn't support this API. */
-	if (id == 0)
-		return -ENODATA;
 
-	/* Return norm array in a canonical way */
-	for (i = 0; i <= index && id; i++) {
-		/* last std value in the standards array is 0, so this
-		   while always ends there since (id & 0) == 0. */
-		while ((id & standards[j].std) != standards[j].std)
-			j++;
-		curr_id = standards[j].std;
-		descr = standards[j].descr;
-		j++;
-		if (curr_id == 0)
-			break;
-		if (curr_id != V4L2_STD_PAL &&
-				curr_id != V4L2_STD_SECAM &&
-				curr_id != V4L2_STD_NTSC)
-			id &= ~curr_id;
-	}
-	if (i <= index)
-		return -EINVAL;
-
-	v4l2_video_std_construct(p, curr_id, descr);
-	return 0;
+	return v4l_video_std_enumstd(p, vfd->tvnorms);
 }
 
 static int v4l_s_std(const struct v4l2_ioctl_ops *ops,
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index a8dbf5b54b5c..5848d92c30da 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -621,7 +621,7 @@ const char *v4l2_norm_to_name(v4l2_std_id id);
  * v4l2_video_std_frame_period - Ancillary routine that fills a
  *	struct &v4l2_fract pointer with the default framerate fraction.
  *
- * @id: analog TV sdandard ID.
+ * @id: analog TV standard ID.
  * @frameperiod: struct &v4l2_fract pointer to be filled
  *
  */
@@ -632,7 +632,7 @@ void v4l2_video_std_frame_period(int id, struct v4l2_fract *frameperiod);
  *	a &v4l2_standard structure according to the @id parameter.
  *
  * @vs: struct &v4l2_standard pointer to be filled
- * @id: analog TV sdandard ID.
+ * @id: analog TV standard ID.
  * @name: name of the standard to be used
  *
  * .. note::
@@ -642,6 +642,17 @@ void v4l2_video_std_frame_period(int id, struct v4l2_fract *frameperiod);
 int v4l2_video_std_construct(struct v4l2_standard *vs,
 				    int id, const char *name);
 
+/**
+ * v4l_video_std_enumstd - Ancillary routine that fills in the fields of
+ *	a &v4l2_standard structure according to the @id and @vs->index
+ *	parameters.
+ *
+ * @vs: struct &v4l2_standard pointer to be filled.
+ * @id: analog TV standard ID.
+ *
+ */
+int v4l_video_std_enumstd(struct v4l2_standard *vs, v4l2_std_id id);
+
 /**
  * v4l_printk_ioctl - Ancillary routine that prints the ioctl in a
  *	human-readable format.
-- 
cgit v1.2.3


From 2b6a440351436d792b1960822da4b7d6e673f568 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 1 Jun 2018 10:22:52 +0200
Subject: gnss: add GNSS receiver subsystem

Add a new subsystem for GNSS (e.g. GPS) receivers.

While GNSS receivers are typically accessed using a UART interface they
often also support other I/O interfaces such as I2C, SPI and USB, while
yet other devices use iomem or even some form of remote-processor
messaging (rpmsg).

The new GNSS subsystem abstracts the underlying interface and provides a
new "gnss" class type, which exposes a character-device interface (e.g.
/dev/gnss0) to user space. This allows GNSS receivers to have a
representation in the Linux device model, something which is important
not least for power management purposes.

Note that the character-device interface provides raw access to whatever
protocol the receiver is (currently) using, such as NMEA 0183, UBX or
SiRF Binary. These protocols are expected to be continued to be handled
by user space for the time being, even if some hybrid solutions are also
conceivable (e.g. to have kernel drivers issue management commands).

This will still allow for better platform integration by allowing GNSS
devices and their resources (e.g. regulators and enable-gpios) to be
described by firmware and managed by kernel drivers rather than
platform-specific scripts and services.

While the current interface is kept minimal, it could be extended using
IOCTLs, sysfs or uevents as needs and proper abstraction levels are
identified and determined (e.g. for device and feature identification).

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS           |   6 +
 drivers/Kconfig       |   2 +
 drivers/Makefile      |   1 +
 drivers/gnss/Kconfig  |  11 ++
 drivers/gnss/Makefile |   7 +
 drivers/gnss/core.c   | 371 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/gnss.h  |  66 +++++++++
 7 files changed, 464 insertions(+)
 create mode 100644 drivers/gnss/Kconfig
 create mode 100644 drivers/gnss/Makefile
 create mode 100644 drivers/gnss/core.c
 create mode 100644 include/linux/gnss.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 6cfd16790add..436e6dcabf34 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6038,6 +6038,12 @@ F:	Documentation/isdn/README.gigaset
 F:	drivers/isdn/gigaset/
 F:	include/uapi/linux/gigaset_dev.h
 
+GNSS SUBSYSTEM
+M:	Johan Hovold <johan@kernel.org>
+S:	Maintained
+F:	drivers/gnss/
+F:	include/linux/gnss.h
+
 GO7007 MPEG CODEC
 M:	Hans Verkuil <hans.verkuil@cisco.com>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 95b9ccc08165..ab4d43923c4d 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -9,6 +9,8 @@ source "drivers/bus/Kconfig"
 
 source "drivers/connector/Kconfig"
 
+source "drivers/gnss/Kconfig"
+
 source "drivers/mtd/Kconfig"
 
 source "drivers/of/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 24cd47014657..cc9a7c5f7d2c 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -185,3 +185,4 @@ obj-$(CONFIG_TEE)		+= tee/
 obj-$(CONFIG_MULTIPLEXER)	+= mux/
 obj-$(CONFIG_UNISYS_VISORBUS)	+= visorbus/
 obj-$(CONFIG_SIOX)		+= siox/
+obj-$(CONFIG_GNSS)		+= gnss/
diff --git a/drivers/gnss/Kconfig b/drivers/gnss/Kconfig
new file mode 100644
index 000000000000..103fcc70992e
--- /dev/null
+++ b/drivers/gnss/Kconfig
@@ -0,0 +1,11 @@
+#
+# GNSS receiver configuration
+#
+
+menuconfig GNSS
+	tristate "GNSS receiver support"
+	---help---
+	  Say Y here if you have a GNSS receiver (e.g. a GPS receiver).
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called gnss.
diff --git a/drivers/gnss/Makefile b/drivers/gnss/Makefile
new file mode 100644
index 000000000000..1f7a7baab1d9
--- /dev/null
+++ b/drivers/gnss/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the GNSS subsystem.
+#
+
+obj-$(CONFIG_GNSS)			+= gnss.o
+gnss-y := core.o
diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c
new file mode 100644
index 000000000000..307894ca2725
--- /dev/null
+++ b/drivers/gnss/core.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GNSS receiver core
+ *
+ * Copyright (C) 2018 Johan Hovold <johan@kernel.org>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/gnss.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+
+#define GNSS_FLAG_HAS_WRITE_RAW		BIT(0)
+
+#define GNSS_MINORS	16
+
+static DEFINE_IDA(gnss_minors);
+static dev_t gnss_first;
+
+/* FIFO size must be a power of two */
+#define GNSS_READ_FIFO_SIZE	4096
+#define GNSS_WRITE_BUF_SIZE	1024
+
+#define to_gnss_device(d) container_of((d), struct gnss_device, dev)
+
+static int gnss_open(struct inode *inode, struct file *file)
+{
+	struct gnss_device *gdev;
+	int ret = 0;
+
+	gdev = container_of(inode->i_cdev, struct gnss_device, cdev);
+
+	get_device(&gdev->dev);
+
+	nonseekable_open(inode, file);
+	file->private_data = gdev;
+
+	down_write(&gdev->rwsem);
+	if (gdev->disconnected) {
+		ret = -ENODEV;
+		goto unlock;
+	}
+
+	if (gdev->count++ == 0) {
+		ret = gdev->ops->open(gdev);
+		if (ret)
+			gdev->count--;
+	}
+unlock:
+	up_write(&gdev->rwsem);
+
+	if (ret)
+		put_device(&gdev->dev);
+
+	return ret;
+}
+
+static int gnss_release(struct inode *inode, struct file *file)
+{
+	struct gnss_device *gdev = file->private_data;
+
+	down_write(&gdev->rwsem);
+	if (gdev->disconnected)
+		goto unlock;
+
+	if (--gdev->count == 0) {
+		gdev->ops->close(gdev);
+		kfifo_reset(&gdev->read_fifo);
+	}
+unlock:
+	up_write(&gdev->rwsem);
+
+	put_device(&gdev->dev);
+
+	return 0;
+}
+
+static ssize_t gnss_read(struct file *file, char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct gnss_device *gdev = file->private_data;
+	unsigned int copied;
+	int ret;
+
+	mutex_lock(&gdev->read_mutex);
+	while (kfifo_is_empty(&gdev->read_fifo)) {
+		mutex_unlock(&gdev->read_mutex);
+
+		if (gdev->disconnected)
+			return 0;
+
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		ret = wait_event_interruptible(gdev->read_queue,
+				gdev->disconnected ||
+				!kfifo_is_empty(&gdev->read_fifo));
+		if (ret)
+			return -ERESTARTSYS;
+
+		mutex_lock(&gdev->read_mutex);
+	}
+
+	ret = kfifo_to_user(&gdev->read_fifo, buf, count, &copied);
+	if (ret == 0)
+		ret = copied;
+
+	mutex_unlock(&gdev->read_mutex);
+
+	return ret;
+}
+
+static ssize_t gnss_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct gnss_device *gdev = file->private_data;
+	size_t written = 0;
+	int ret;
+
+	if (gdev->disconnected)
+		return -EIO;
+
+	if (!count)
+		return 0;
+
+	if (!(gdev->flags & GNSS_FLAG_HAS_WRITE_RAW))
+		return -EIO;
+
+	/* Ignoring O_NONBLOCK, write_raw() is synchronous. */
+
+	ret = mutex_lock_interruptible(&gdev->write_mutex);
+	if (ret)
+		return -ERESTARTSYS;
+
+	for (;;) {
+		size_t n = count - written;
+
+		if (n > GNSS_WRITE_BUF_SIZE)
+			n = GNSS_WRITE_BUF_SIZE;
+
+		if (copy_from_user(gdev->write_buf, buf, n)) {
+			ret = -EFAULT;
+			goto out_unlock;
+		}
+
+		/*
+		 * Assumes write_raw can always accept GNSS_WRITE_BUF_SIZE
+		 * bytes.
+		 *
+		 * FIXME: revisit
+		 */
+		down_read(&gdev->rwsem);
+		if (!gdev->disconnected)
+			ret = gdev->ops->write_raw(gdev, gdev->write_buf, n);
+		else
+			ret = -EIO;
+		up_read(&gdev->rwsem);
+
+		if (ret < 0)
+			break;
+
+		written += ret;
+		buf += ret;
+
+		if (written == count)
+			break;
+	}
+
+	if (written)
+		ret = written;
+out_unlock:
+	mutex_unlock(&gdev->write_mutex);
+
+	return ret;
+}
+
+static __poll_t gnss_poll(struct file *file, poll_table *wait)
+{
+	struct gnss_device *gdev = file->private_data;
+	__poll_t mask = 0;
+
+	poll_wait(file, &gdev->read_queue, wait);
+
+	if (!kfifo_is_empty(&gdev->read_fifo))
+		mask |= EPOLLIN | EPOLLRDNORM;
+	if (gdev->disconnected)
+		mask |= EPOLLHUP;
+
+	return mask;
+}
+
+static const struct file_operations gnss_fops = {
+	.owner		= THIS_MODULE,
+	.open		= gnss_open,
+	.release	= gnss_release,
+	.read		= gnss_read,
+	.write		= gnss_write,
+	.poll		= gnss_poll,
+	.llseek		= no_llseek,
+};
+
+static struct class *gnss_class;
+
+static void gnss_device_release(struct device *dev)
+{
+	struct gnss_device *gdev = to_gnss_device(dev);
+
+	kfree(gdev->write_buf);
+	kfifo_free(&gdev->read_fifo);
+	ida_simple_remove(&gnss_minors, gdev->id);
+	kfree(gdev);
+}
+
+struct gnss_device *gnss_allocate_device(struct device *parent)
+{
+	struct gnss_device *gdev;
+	struct device *dev;
+	int id;
+	int ret;
+
+	gdev = kzalloc(sizeof(*gdev), GFP_KERNEL);
+	if (!gdev)
+		return NULL;
+
+	id = ida_simple_get(&gnss_minors, 0, GNSS_MINORS, GFP_KERNEL);
+	if (id < 0) {
+		kfree(gdev);
+		return ERR_PTR(id);
+	}
+
+	gdev->id = id;
+
+	dev = &gdev->dev;
+	device_initialize(dev);
+	dev->devt = gnss_first + id;
+	dev->class = gnss_class;
+	dev->parent = parent;
+	dev->release = gnss_device_release;
+	dev_set_drvdata(dev, gdev);
+	dev_set_name(dev, "gnss%d", id);
+
+	init_rwsem(&gdev->rwsem);
+	mutex_init(&gdev->read_mutex);
+	mutex_init(&gdev->write_mutex);
+	init_waitqueue_head(&gdev->read_queue);
+
+	ret = kfifo_alloc(&gdev->read_fifo, GNSS_READ_FIFO_SIZE, GFP_KERNEL);
+	if (ret)
+		goto err_put_device;
+
+	gdev->write_buf = kzalloc(GNSS_WRITE_BUF_SIZE, GFP_KERNEL);
+	if (!gdev->write_buf)
+		goto err_put_device;
+
+	cdev_init(&gdev->cdev, &gnss_fops);
+	gdev->cdev.owner = THIS_MODULE;
+
+	return gdev;
+
+err_put_device:
+	put_device(dev);
+
+	return ERR_PTR(-ENOMEM);
+}
+EXPORT_SYMBOL_GPL(gnss_allocate_device);
+
+void gnss_put_device(struct gnss_device *gdev)
+{
+	put_device(&gdev->dev);
+}
+EXPORT_SYMBOL_GPL(gnss_put_device);
+
+int gnss_register_device(struct gnss_device *gdev)
+{
+	int ret;
+
+	/* Set a flag which can be accessed without holding the rwsem. */
+	if (gdev->ops->write_raw != NULL)
+		gdev->flags |= GNSS_FLAG_HAS_WRITE_RAW;
+
+	ret = cdev_device_add(&gdev->cdev, &gdev->dev);
+	if (ret) {
+		dev_err(&gdev->dev, "failed to add device: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gnss_register_device);
+
+void gnss_deregister_device(struct gnss_device *gdev)
+{
+	down_write(&gdev->rwsem);
+	gdev->disconnected = true;
+	if (gdev->count) {
+		wake_up_interruptible(&gdev->read_queue);
+		gdev->ops->close(gdev);
+	}
+	up_write(&gdev->rwsem);
+
+	cdev_device_del(&gdev->cdev, &gdev->dev);
+}
+EXPORT_SYMBOL_GPL(gnss_deregister_device);
+
+/*
+ * Caller guarantees serialisation.
+ *
+ * Must not be called for a closed device.
+ */
+int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf,
+				size_t count)
+{
+	int ret;
+
+	ret = kfifo_in(&gdev->read_fifo, buf, count);
+
+	wake_up_interruptible(&gdev->read_queue);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gnss_insert_raw);
+
+static int __init gnss_module_init(void)
+{
+	int ret;
+
+	ret = alloc_chrdev_region(&gnss_first, 0, GNSS_MINORS, "gnss");
+	if (ret < 0) {
+		pr_err("failed to allocate device numbers: %d\n", ret);
+		return ret;
+	}
+
+	gnss_class = class_create(THIS_MODULE, "gnss");
+	if (IS_ERR(gnss_class)) {
+		ret = PTR_ERR(gnss_class);
+		pr_err("failed to create class: %d\n", ret);
+		goto err_unregister_chrdev;
+	}
+
+	pr_info("GNSS driver registered with major %d\n", MAJOR(gnss_first));
+
+	return 0;
+
+err_unregister_chrdev:
+	unregister_chrdev_region(gnss_first, GNSS_MINORS);
+
+	return ret;
+}
+module_init(gnss_module_init);
+
+static void __exit gnss_module_exit(void)
+{
+	class_destroy(gnss_class);
+	unregister_chrdev_region(gnss_first, GNSS_MINORS);
+	ida_destroy(&gnss_minors);
+}
+module_exit(gnss_module_exit);
+
+MODULE_AUTHOR("Johan Hovold <johan@kernel.org>");
+MODULE_DESCRIPTION("GNSS receiver core");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/gnss.h b/include/linux/gnss.h
new file mode 100644
index 000000000000..e26aeac1e0e2
--- /dev/null
+++ b/include/linux/gnss.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GNSS receiver support
+ *
+ * Copyright (C) 2018 Johan Hovold <johan@kernel.org>
+ */
+
+#ifndef _LINUX_GNSS_H
+#define _LINUX_GNSS_H
+
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/kfifo.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+struct gnss_device;
+
+struct gnss_operations {
+	int (*open)(struct gnss_device *gdev);
+	void (*close)(struct gnss_device *gdev);
+	int (*write_raw)(struct gnss_device *gdev, const unsigned char *buf,
+				size_t count);
+};
+
+struct gnss_device {
+	struct device dev;
+	struct cdev cdev;
+	int id;
+
+	unsigned long flags;
+
+	struct rw_semaphore rwsem;
+	const struct gnss_operations *ops;
+	unsigned int count;
+	unsigned int disconnected:1;
+
+	struct mutex read_mutex;
+	struct kfifo read_fifo;
+	wait_queue_head_t read_queue;
+
+	struct mutex write_mutex;
+	char *write_buf;
+};
+
+struct gnss_device *gnss_allocate_device(struct device *parent);
+void gnss_put_device(struct gnss_device *gdev);
+int gnss_register_device(struct gnss_device *gdev);
+void gnss_deregister_device(struct gnss_device *gdev);
+
+int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf,
+			size_t count);
+
+static inline void gnss_set_drvdata(struct gnss_device *gdev, void *data)
+{
+	dev_set_drvdata(&gdev->dev, data);
+}
+
+static inline void *gnss_get_drvdata(struct gnss_device *gdev)
+{
+	return dev_get_drvdata(&gdev->dev);
+}
+
+#endif /* _LINUX_GNSS_H */
-- 
cgit v1.2.3


From 10f146639fee5ffaf7cf0081c1af518f7d0c533c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 1 Jun 2018 10:22:59 +0200
Subject: gnss: add receiver type support

Add a "type" device attribute and a "GNSS_TYPE" uevent variable which
can be used to determine the type of a GNSS receiver. The currently
identified types reflect the protocol(s) supported by a receiver:

	"NMEA"	NMEA 0183
	"SiRF"	SiRF Binary
	"UBX"	UBX

Note that both SiRF and UBX type receivers typically support a subset of
NMEA 0183 with vendor extensions (e.g. to allow switching to the vendor
protocol).

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-gnss | 15 +++++++++
 MAINTAINERS                                |  1 +
 drivers/gnss/core.c                        | 49 ++++++++++++++++++++++++++++++
 drivers/gnss/sirf.c                        |  1 +
 drivers/gnss/ubx.c                         |  2 ++
 include/linux/gnss.h                       |  9 ++++++
 6 files changed, 77 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-gnss

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-gnss b/Documentation/ABI/testing/sysfs-class-gnss
new file mode 100644
index 000000000000..2467b6900eae
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-gnss
@@ -0,0 +1,15 @@
+What:		/sys/class/gnss/gnssN/type
+Date:		May 2018
+KernelVersion:	4.18
+Contact:	Johan Hovold <johan@kernel.org>
+Description:
+		The GNSS receiver type. The currently identified types reflect
+		the protocol(s) supported by the receiver:
+
+			"NMEA"		NMEA 0183
+			"SiRF"		SiRF Binary
+			"UBX"		UBX
+
+		Note that also non-"NMEA" type receivers typically support a
+		subset of NMEA 0183 with vendor extensions (e.g. to allow
+		switching to a vendor protocol).
diff --git a/MAINTAINERS b/MAINTAINERS
index f980c6186094..e01d220a6f05 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6041,6 +6041,7 @@ F:	include/uapi/linux/gigaset_dev.h
 GNSS SUBSYSTEM
 M:	Johan Hovold <johan@kernel.org>
 S:	Maintained
+F:	Documentation/ABI/testing/sysfs-class-gnss
 F:	Documentation/devicetree/bindings/gnss/
 F:	drivers/gnss/
 F:	include/linux/gnss.h
diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c
index 307894ca2725..f30ef8338b3a 100644
--- a/drivers/gnss/core.c
+++ b/drivers/gnss/core.c
@@ -330,6 +330,52 @@ int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf,
 }
 EXPORT_SYMBOL_GPL(gnss_insert_raw);
 
+static const char * const gnss_type_names[GNSS_TYPE_COUNT] = {
+	[GNSS_TYPE_NMEA]	= "NMEA",
+	[GNSS_TYPE_SIRF]	= "SiRF",
+	[GNSS_TYPE_UBX]		= "UBX",
+};
+
+static const char *gnss_type_name(struct gnss_device *gdev)
+{
+	const char *name = NULL;
+
+	if (gdev->type < GNSS_TYPE_COUNT)
+		name = gnss_type_names[gdev->type];
+
+	if (!name)
+		dev_WARN(&gdev->dev, "type name not defined\n");
+
+	return name;
+}
+
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+				char *buf)
+{
+	struct gnss_device *gdev = to_gnss_device(dev);
+
+	return sprintf(buf, "%s\n", gnss_type_name(gdev));
+}
+static DEVICE_ATTR_RO(type);
+
+static struct attribute *gnss_attrs[] = {
+	&dev_attr_type.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(gnss);
+
+static int gnss_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct gnss_device *gdev = to_gnss_device(dev);
+	int ret;
+
+	ret = add_uevent_var(env, "GNSS_TYPE=%s", gnss_type_name(gdev));
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int __init gnss_module_init(void)
 {
 	int ret;
@@ -347,6 +393,9 @@ static int __init gnss_module_init(void)
 		goto err_unregister_chrdev;
 	}
 
+	gnss_class->dev_groups = gnss_groups;
+	gnss_class->dev_uevent = gnss_uevent;
+
 	pr_info("GNSS driver registered with major %d\n", MAJOR(gnss_first));
 
 	return 0;
diff --git a/drivers/gnss/sirf.c b/drivers/gnss/sirf.c
index 5fb0f730db48..79cb98950013 100644
--- a/drivers/gnss/sirf.c
+++ b/drivers/gnss/sirf.c
@@ -267,6 +267,7 @@ static int sirf_probe(struct serdev_device *serdev)
 	if (!gdev)
 		return -ENOMEM;
 
+	gdev->type = GNSS_TYPE_SIRF;
 	gdev->ops = &sirf_gnss_ops;
 	gnss_set_drvdata(gdev, data);
 
diff --git a/drivers/gnss/ubx.c b/drivers/gnss/ubx.c
index ecddfb362a6f..902b6854b7db 100644
--- a/drivers/gnss/ubx.c
+++ b/drivers/gnss/ubx.c
@@ -77,6 +77,8 @@ static int ubx_probe(struct serdev_device *serdev)
 
 	gserial->ops = &ubx_gserial_ops;
 
+	gserial->gdev->type = GNSS_TYPE_UBX;
+
 	data = gnss_serial_get_drvdata(gserial);
 
 	data->vcc = devm_regulator_get(&serdev->dev, "vcc");
diff --git a/include/linux/gnss.h b/include/linux/gnss.h
index e26aeac1e0e2..43546977098c 100644
--- a/include/linux/gnss.h
+++ b/include/linux/gnss.h
@@ -18,6 +18,14 @@
 
 struct gnss_device;
 
+enum gnss_type {
+	GNSS_TYPE_NMEA = 0,
+	GNSS_TYPE_SIRF,
+	GNSS_TYPE_UBX,
+
+	GNSS_TYPE_COUNT
+};
+
 struct gnss_operations {
 	int (*open)(struct gnss_device *gdev);
 	void (*close)(struct gnss_device *gdev);
@@ -30,6 +38,7 @@ struct gnss_device {
 	struct cdev cdev;
 	int id;
 
+	enum gnss_type type;
 	unsigned long flags;
 
 	struct rw_semaphore rwsem;
-- 
cgit v1.2.3


From d7e913cc58c5f6db5159c72e8f8d1f2be9e2fe2f Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 14 Jun 2018 11:34:03 -0400
Subject: media: mem2mem: Remove unused v4l2_m2m_ops .lock/.unlock

Commit f1a81afc98e3 ("[media] m2m: fix bad unlock balance")
removed the last use of v4l2_m2m_ops.lock and
v4l2_m2m_ops.unlock hooks. They are not actually
used anymore. Remove them.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/coda/coda-common.c          | 26 ++++------------------
 drivers/media/platform/m2m-deinterlace.c           | 20 ++---------------
 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c | 18 ---------------
 drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c | 16 -------------
 drivers/media/platform/mx2_emmaprp.c               | 16 -------------
 drivers/media/platform/sti/delta/delta-v4l2.c      | 18 ---------------
 drivers/media/platform/ti-vpe/vpe.c                | 19 ----------------
 include/media/v4l2-mem2mem.h                       |  6 -----
 8 files changed, 6 insertions(+), 133 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c
index c7631e117dd3..b86d704ae10c 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1297,28 +1297,10 @@ static void coda_job_abort(void *priv)
 		 "Aborting task\n");
 }
 
-static void coda_lock(void *m2m_priv)
-{
-	struct coda_ctx *ctx = m2m_priv;
-	struct coda_dev *pcdev = ctx->dev;
-
-	mutex_lock(&pcdev->dev_mutex);
-}
-
-static void coda_unlock(void *m2m_priv)
-{
-	struct coda_ctx *ctx = m2m_priv;
-	struct coda_dev *pcdev = ctx->dev;
-
-	mutex_unlock(&pcdev->dev_mutex);
-}
-
 static const struct v4l2_m2m_ops coda_m2m_ops = {
 	.device_run	= coda_device_run,
 	.job_ready	= coda_job_ready,
 	.job_abort	= coda_job_abort,
-	.lock		= coda_lock,
-	.unlock		= coda_unlock,
 };
 
 static void set_default_params(struct coda_ctx *ctx)
@@ -2092,9 +2074,9 @@ static int coda_open(struct file *file)
 	INIT_LIST_HEAD(&ctx->buffer_meta_list);
 	spin_lock_init(&ctx->buffer_meta_lock);
 
-	coda_lock(ctx);
+	mutex_lock(&dev->dev_mutex);
 	list_add(&ctx->list, &dev->instances);
-	coda_unlock(ctx);
+	mutex_unlock(&dev->dev_mutex);
 
 	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "Created instance %d (%p)\n",
 		 ctx->idx, ctx);
@@ -2142,9 +2124,9 @@ static int coda_release(struct file *file)
 		flush_work(&ctx->seq_end_work);
 	}
 
-	coda_lock(ctx);
+	mutex_lock(&dev->dev_mutex);
 	list_del(&ctx->list);
-	coda_unlock(ctx);
+	mutex_unlock(&dev->dev_mutex);
 
 	if (ctx->dev->devtype->product == CODA_DX6)
 		coda_free_aux_buf(dev, &ctx->workbuf);
diff --git a/drivers/media/platform/m2m-deinterlace.c b/drivers/media/platform/m2m-deinterlace.c
index 3008892eb8dd..a566ec566cec 100644
--- a/drivers/media/platform/m2m-deinterlace.c
+++ b/drivers/media/platform/m2m-deinterlace.c
@@ -181,20 +181,6 @@ static void deinterlace_job_abort(void *priv)
 	v4l2_m2m_job_finish(pcdev->m2m_dev, ctx->m2m_ctx);
 }
 
-static void deinterlace_lock(void *priv)
-{
-	struct deinterlace_ctx *ctx = priv;
-	struct deinterlace_dev *pcdev = ctx->dev;
-	mutex_lock(&pcdev->dev_mutex);
-}
-
-static void deinterlace_unlock(void *priv)
-{
-	struct deinterlace_ctx *ctx = priv;
-	struct deinterlace_dev *pcdev = ctx->dev;
-	mutex_unlock(&pcdev->dev_mutex);
-}
-
 static void dma_callback(void *data)
 {
 	struct deinterlace_ctx *curr_ctx = data;
@@ -956,9 +942,9 @@ static __poll_t deinterlace_poll(struct file *file,
 	struct deinterlace_ctx *ctx = file->private_data;
 	__poll_t ret;
 
-	deinterlace_lock(ctx);
+	mutex_lock(&ctx->dev->dev_mutex);
 	ret = v4l2_m2m_poll(file, ctx->m2m_ctx, wait);
-	deinterlace_unlock(ctx);
+	mutex_unlock(&ctx->dev->dev_mutex);
 
 	return ret;
 }
@@ -992,8 +978,6 @@ static const struct v4l2_m2m_ops m2m_ops = {
 	.device_run	= deinterlace_device_run,
 	.job_ready	= deinterlace_job_ready,
 	.job_abort	= deinterlace_job_abort,
-	.lock		= deinterlace_lock,
-	.unlock		= deinterlace_unlock,
 };
 
 static int deinterlace_probe(struct platform_device *pdev)
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
index 86f0a7134365..5523edadb86c 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec.c
@@ -1411,28 +1411,10 @@ int mtk_vcodec_dec_ctrls_setup(struct mtk_vcodec_ctx *ctx)
 	return 0;
 }
 
-static void m2mops_vdec_lock(void *m2m_priv)
-{
-	struct mtk_vcodec_ctx *ctx = m2m_priv;
-
-	mtk_v4l2_debug(3, "[%d]", ctx->id);
-	mutex_lock(&ctx->dev->dev_mutex);
-}
-
-static void m2mops_vdec_unlock(void *m2m_priv)
-{
-	struct mtk_vcodec_ctx *ctx = m2m_priv;
-
-	mtk_v4l2_debug(3, "[%d]", ctx->id);
-	mutex_unlock(&ctx->dev->dev_mutex);
-}
-
 const struct v4l2_m2m_ops mtk_vdec_m2m_ops = {
 	.device_run	= m2mops_vdec_device_run,
 	.job_ready	= m2mops_vdec_job_ready,
 	.job_abort	= m2mops_vdec_job_abort,
-	.lock		= m2mops_vdec_lock,
-	.unlock		= m2mops_vdec_unlock,
 };
 
 static const struct vb2_ops mtk_vdec_vb2_ops = {
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
index 1b1a28abbf1f..6ad408514a99 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
@@ -1181,26 +1181,10 @@ static void m2mops_venc_job_abort(void *priv)
 	ctx->state = MTK_STATE_ABORT;
 }
 
-static void m2mops_venc_lock(void *m2m_priv)
-{
-	struct mtk_vcodec_ctx *ctx = m2m_priv;
-
-	mutex_lock(&ctx->dev->dev_mutex);
-}
-
-static void m2mops_venc_unlock(void *m2m_priv)
-{
-	struct mtk_vcodec_ctx *ctx = m2m_priv;
-
-	mutex_unlock(&ctx->dev->dev_mutex);
-}
-
 const struct v4l2_m2m_ops mtk_venc_m2m_ops = {
 	.device_run	= m2mops_venc_device_run,
 	.job_ready	= m2mops_venc_job_ready,
 	.job_abort	= m2mops_venc_job_abort,
-	.lock		= m2mops_venc_lock,
-	.unlock		= m2mops_venc_unlock,
 };
 
 void mtk_vcodec_enc_set_default_params(struct mtk_vcodec_ctx *ctx)
diff --git a/drivers/media/platform/mx2_emmaprp.c b/drivers/media/platform/mx2_emmaprp.c
index 2e64729134b2..83b9db895a64 100644
--- a/drivers/media/platform/mx2_emmaprp.c
+++ b/drivers/media/platform/mx2_emmaprp.c
@@ -250,20 +250,6 @@ static void emmaprp_job_abort(void *priv)
 	v4l2_m2m_job_finish(pcdev->m2m_dev, ctx->m2m_ctx);
 }
 
-static void emmaprp_lock(void *priv)
-{
-	struct emmaprp_ctx *ctx = priv;
-	struct emmaprp_dev *pcdev = ctx->dev;
-	mutex_lock(&pcdev->dev_mutex);
-}
-
-static void emmaprp_unlock(void *priv)
-{
-	struct emmaprp_ctx *ctx = priv;
-	struct emmaprp_dev *pcdev = ctx->dev;
-	mutex_unlock(&pcdev->dev_mutex);
-}
-
 static inline void emmaprp_dump_regs(struct emmaprp_dev *pcdev)
 {
 	dprintk(pcdev,
@@ -885,8 +871,6 @@ static const struct video_device emmaprp_videodev = {
 static const struct v4l2_m2m_ops m2m_ops = {
 	.device_run	= emmaprp_device_run,
 	.job_abort	= emmaprp_job_abort,
-	.lock		= emmaprp_lock,
-	.unlock		= emmaprp_unlock,
 };
 
 static int emmaprp_probe(struct platform_device *pdev)
diff --git a/drivers/media/platform/sti/delta/delta-v4l2.c b/drivers/media/platform/sti/delta/delta-v4l2.c
index 232d508c5b66..0b42acd4e3a6 100644
--- a/drivers/media/platform/sti/delta/delta-v4l2.c
+++ b/drivers/media/platform/sti/delta/delta-v4l2.c
@@ -339,22 +339,6 @@ static void register_decoders(struct delta_dev *delta)
 	}
 }
 
-static void delta_lock(void *priv)
-{
-	struct delta_ctx *ctx = priv;
-	struct delta_dev *delta = ctx->dev;
-
-	mutex_lock(&delta->lock);
-}
-
-static void delta_unlock(void *priv)
-{
-	struct delta_ctx *ctx = priv;
-	struct delta_dev *delta = ctx->dev;
-
-	mutex_unlock(&delta->lock);
-}
-
 static int delta_open_decoder(struct delta_ctx *ctx, u32 streamformat,
 			      u32 pixelformat, const struct delta_dec **pdec)
 {
@@ -1099,8 +1083,6 @@ static const struct v4l2_m2m_ops delta_m2m_ops = {
 	.device_run     = delta_device_run,
 	.job_ready	= delta_job_ready,
 	.job_abort      = delta_job_abort,
-	.lock		= delta_lock,
-	.unlock		= delta_unlock,
 };
 
 /*
diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c
index de968295ca7d..d70871d0ad2d 100644
--- a/drivers/media/platform/ti-vpe/vpe.c
+++ b/drivers/media/platform/ti-vpe/vpe.c
@@ -953,23 +953,6 @@ static void job_abort(void *priv)
 	ctx->aborting = 1;
 }
 
-/*
- * Lock access to the device
- */
-static void vpe_lock(void *priv)
-{
-	struct vpe_ctx *ctx = priv;
-	struct vpe_dev *dev = ctx->dev;
-	mutex_lock(&dev->dev_mutex);
-}
-
-static void vpe_unlock(void *priv)
-{
-	struct vpe_ctx *ctx = priv;
-	struct vpe_dev *dev = ctx->dev;
-	mutex_unlock(&dev->dev_mutex);
-}
-
 static void vpe_dump_regs(struct vpe_dev *dev)
 {
 #define DUMPREG(r) vpe_dbg(dev, "%-35s %08x\n", #r, read_reg(dev, VPE_##r))
@@ -2434,8 +2417,6 @@ static const struct v4l2_m2m_ops m2m_ops = {
 	.device_run	= device_run,
 	.job_ready	= job_ready,
 	.job_abort	= job_abort,
-	.lock		= vpe_lock,
-	.unlock		= vpe_unlock,
 };
 
 static int vpe_runtime_get(struct platform_device *pdev)
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 3d07ba3a8262..8f4b208cfee7 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -40,17 +40,11 @@
  *		v4l2_m2m_job_finish() (as if the transaction ended normally).
  *		This function does not have to (and will usually not) wait
  *		until the device enters a state when it can be stopped.
- * @lock:	optional. Define a driver's own lock callback, instead of using
- *		&v4l2_m2m_ctx->q_lock.
- * @unlock:	optional. Define a driver's own unlock callback, instead of
- *		using &v4l2_m2m_ctx->q_lock.
  */
 struct v4l2_m2m_ops {
 	void (*device_run)(void *priv);
 	int (*job_ready)(void *priv);
 	void (*job_abort)(void *priv);
-	void (*lock)(void *priv);
-	void (*unlock)(void *priv);
 };
 
 struct v4l2_m2m_dev;
-- 
cgit v1.2.3


From 5fd691afdf929061c391d897fa627822c3b2fd5a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 5 Jun 2018 16:53:34 +0200
Subject: atomic/tty: Fix up atomic abuse in ldsem

Mark found ldsem_cmpxchg() needed an (atomic_long_t *) cast to keep
working after making the atomic_long interface type safe.

Needing casts is bad form, which made me look at the code. There are no
ld_semaphore::count users outside of these functions so there is no
reason why it can not be an atomic_long_t in the first place, obviating
the need for this cast.

That also ensures the loads use atomic_long_read(), which implies (at
least) READ_ONCE() in order to guarantee single-copy-atomic loads.

When using atomic_long_try_cmpxchg() the ldsem_cmpxchg() wrapper gets
very thin (the only difference is not changing *old on success, which
most callers don't seem to care about).

So rework the whole thing to use atomic_long_t and its accessors
directly.

While there, fixup all the horrible comment styles.

Cc: Peter Hurley <peter@hurleysoftware.com>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ldsem.c   | 82 ++++++++++++++++++++---------------------------
 include/linux/tty_ldisc.h |  4 +--
 2 files changed, 37 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c
index 37a91b3df980..0c98d88f795a 100644
--- a/drivers/tty/tty_ldsem.c
+++ b/drivers/tty/tty_ldsem.c
@@ -74,28 +74,6 @@ struct ldsem_waiter {
 	struct task_struct *task;
 };
 
-static inline long ldsem_atomic_update(long delta, struct ld_semaphore *sem)
-{
-	return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
-}
-
-/*
- * ldsem_cmpxchg() updates @*old with the last-known sem->count value.
- * Returns 1 if count was successfully changed; @*old will have @new value.
- * Returns 0 if count was not changed; @*old will have most recent sem->count
- */
-static inline int ldsem_cmpxchg(long *old, long new, struct ld_semaphore *sem)
-{
-	long tmp = atomic_long_cmpxchg(&sem->count, *old, new);
-	if (tmp == *old) {
-		*old = new;
-		return 1;
-	} else {
-		*old = tmp;
-		return 0;
-	}
-}
-
 /*
  * Initialize an ldsem:
  */
@@ -109,7 +87,7 @@ void __init_ldsem(struct ld_semaphore *sem, const char *name,
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
 	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
-	sem->count = LDSEM_UNLOCKED;
+	atomic_long_set(&sem->count, LDSEM_UNLOCKED);
 	sem->wait_readers = 0;
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->read_wait);
@@ -122,16 +100,17 @@ static void __ldsem_wake_readers(struct ld_semaphore *sem)
 	struct task_struct *tsk;
 	long adjust, count;
 
-	/* Try to grant read locks to all readers on the read wait list.
+	/*
+	 * Try to grant read locks to all readers on the read wait list.
 	 * Note the 'active part' of the count is incremented by
 	 * the number of readers before waking any processes up.
 	 */
 	adjust = sem->wait_readers * (LDSEM_ACTIVE_BIAS - LDSEM_WAIT_BIAS);
-	count = ldsem_atomic_update(adjust, sem);
+	count = atomic_long_add_return(adjust, &sem->count);
 	do {
 		if (count > 0)
 			break;
-		if (ldsem_cmpxchg(&count, count - adjust, sem))
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count - adjust))
 			return;
 	} while (1);
 
@@ -148,14 +127,15 @@ static void __ldsem_wake_readers(struct ld_semaphore *sem)
 
 static inline int writer_trylock(struct ld_semaphore *sem)
 {
-	/* only wake this writer if the active part of the count can be
+	/*
+	 * Only wake this writer if the active part of the count can be
 	 * transitioned from 0 -> 1
 	 */
-	long count = ldsem_atomic_update(LDSEM_ACTIVE_BIAS, sem);
+	long count = atomic_long_add_return(LDSEM_ACTIVE_BIAS, &sem->count);
 	do {
 		if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS)
 			return 1;
-		if (ldsem_cmpxchg(&count, count - LDSEM_ACTIVE_BIAS, sem))
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count - LDSEM_ACTIVE_BIAS))
 			return 0;
 	} while (1);
 }
@@ -205,12 +185,16 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout)
 	/* set up my own style of waitqueue */
 	raw_spin_lock_irq(&sem->wait_lock);
 
-	/* Try to reverse the lock attempt but if the count has changed
+	/*
+	 * Try to reverse the lock attempt but if the count has changed
 	 * so that reversing fails, check if there are are no waiters,
-	 * and early-out if not */
+	 * and early-out if not
+	 */
 	do {
-		if (ldsem_cmpxchg(&count, count + adjust, sem))
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust)) {
+			count += adjust;
 			break;
+		}
 		if (count > 0) {
 			raw_spin_unlock_irq(&sem->wait_lock);
 			return sem;
@@ -243,12 +227,14 @@ down_read_failed(struct ld_semaphore *sem, long count, long timeout)
 	__set_current_state(TASK_RUNNING);
 
 	if (!timeout) {
-		/* lock timed out but check if this task was just
+		/*
+		 * Lock timed out but check if this task was just
 		 * granted lock ownership - if so, pretend there
-		 * was no timeout; otherwise, cleanup lock wait */
+		 * was no timeout; otherwise, cleanup lock wait.
+		 */
 		raw_spin_lock_irq(&sem->wait_lock);
 		if (waiter.task) {
-			ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem);
+			atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count);
 			list_del(&waiter.list);
 			raw_spin_unlock_irq(&sem->wait_lock);
 			put_task_struct(waiter.task);
@@ -273,11 +259,13 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout)
 	/* set up my own style of waitqueue */
 	raw_spin_lock_irq(&sem->wait_lock);
 
-	/* Try to reverse the lock attempt but if the count has changed
+	/*
+	 * Try to reverse the lock attempt but if the count has changed
 	 * so that reversing fails, check if the lock is now owned,
-	 * and early-out if so */
+	 * and early-out if so.
+	 */
 	do {
-		if (ldsem_cmpxchg(&count, count + adjust, sem))
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count + adjust))
 			break;
 		if ((count & LDSEM_ACTIVE_MASK) == LDSEM_ACTIVE_BIAS) {
 			raw_spin_unlock_irq(&sem->wait_lock);
@@ -303,7 +291,7 @@ down_write_failed(struct ld_semaphore *sem, long count, long timeout)
 	}
 
 	if (!locked)
-		ldsem_atomic_update(-LDSEM_WAIT_BIAS, sem);
+		atomic_long_add_return(-LDSEM_WAIT_BIAS, &sem->count);
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
 
@@ -324,7 +312,7 @@ static int __ldsem_down_read_nested(struct ld_semaphore *sem,
 
 	lockdep_acquire_read(sem, subclass, 0, _RET_IP_);
 
-	count = ldsem_atomic_update(LDSEM_READ_BIAS, sem);
+	count = atomic_long_add_return(LDSEM_READ_BIAS, &sem->count);
 	if (count <= 0) {
 		lock_stat(sem, contended);
 		if (!down_read_failed(sem, count, timeout)) {
@@ -343,7 +331,7 @@ static int __ldsem_down_write_nested(struct ld_semaphore *sem,
 
 	lockdep_acquire(sem, subclass, 0, _RET_IP_);
 
-	count = ldsem_atomic_update(LDSEM_WRITE_BIAS, sem);
+	count = atomic_long_add_return(LDSEM_WRITE_BIAS, &sem->count);
 	if ((count & LDSEM_ACTIVE_MASK) != LDSEM_ACTIVE_BIAS) {
 		lock_stat(sem, contended);
 		if (!down_write_failed(sem, count, timeout)) {
@@ -370,10 +358,10 @@ int __sched ldsem_down_read(struct ld_semaphore *sem, long timeout)
  */
 int ldsem_down_read_trylock(struct ld_semaphore *sem)
 {
-	long count = sem->count;
+	long count = atomic_long_read(&sem->count);
 
 	while (count >= 0) {
-		if (ldsem_cmpxchg(&count, count + LDSEM_READ_BIAS, sem)) {
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_READ_BIAS)) {
 			lockdep_acquire_read(sem, 0, 1, _RET_IP_);
 			lock_stat(sem, acquired);
 			return 1;
@@ -396,10 +384,10 @@ int __sched ldsem_down_write(struct ld_semaphore *sem, long timeout)
  */
 int ldsem_down_write_trylock(struct ld_semaphore *sem)
 {
-	long count = sem->count;
+	long count = atomic_long_read(&sem->count);
 
 	while ((count & LDSEM_ACTIVE_MASK) == 0) {
-		if (ldsem_cmpxchg(&count, count + LDSEM_WRITE_BIAS, sem)) {
+		if (atomic_long_try_cmpxchg(&sem->count, &count, count + LDSEM_WRITE_BIAS)) {
 			lockdep_acquire(sem, 0, 1, _RET_IP_);
 			lock_stat(sem, acquired);
 			return 1;
@@ -417,7 +405,7 @@ void ldsem_up_read(struct ld_semaphore *sem)
 
 	lockdep_release(sem, 1, _RET_IP_);
 
-	count = ldsem_atomic_update(-LDSEM_READ_BIAS, sem);
+	count = atomic_long_add_return(-LDSEM_READ_BIAS, &sem->count);
 	if (count < 0 && (count & LDSEM_ACTIVE_MASK) == 0)
 		ldsem_wake(sem);
 }
@@ -431,7 +419,7 @@ void ldsem_up_write(struct ld_semaphore *sem)
 
 	lockdep_release(sem, 1, _RET_IP_);
 
-	count = ldsem_atomic_update(-LDSEM_WRITE_BIAS, sem);
+	count = atomic_long_add_return(-LDSEM_WRITE_BIAS, &sem->count);
 	if (count < 0)
 		ldsem_wake(sem);
 }
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 1ef64d4ad887..840894ca3fc0 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -119,13 +119,13 @@
 
 #include <linux/fs.h>
 #include <linux/wait.h>
-
+#include <linux/atomic.h>
 
 /*
  * the semaphore definition
  */
 struct ld_semaphore {
-	long			count;
+	atomic_long_t		count;
 	raw_spinlock_t		wait_lock;
 	unsigned int		wait_readers;
 	struct list_head	read_wait;
-- 
cgit v1.2.3


From e1a98c163eb276b5b5e1bece560ed7f0b9eb3b49 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Fri, 15 Jun 2018 09:19:46 -0400
Subject: media: media.h: remove __NEED_MEDIA_LEGACY_API

The __NEED_MEDIA_LEGACY_API define is 1) ugly and 2) dangerous
since it is all too easy for drivers to define it to get hold of
legacy defines. Instead just define what we need in media-device.c
which is the only place where we need the legacy define
(MEDIA_ENT_T_DEVNODE_UNKNOWN).

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/media-device.c | 13 ++++++++++---
 include/uapi/linux/media.h   |  2 +-
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index ae59c3177555..47bb2254fbfd 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -16,9 +16,6 @@
  * GNU General Public License for more details.
  */
 
-/* We need to access legacy defines from linux/media.h */
-#define __NEED_MEDIA_LEGACY_API
-
 #include <linux/compat.h>
 #include <linux/export.h>
 #include <linux/idr.h>
@@ -35,6 +32,16 @@
 
 #ifdef CONFIG_MEDIA_CONTROLLER
 
+/*
+ * Legacy defines from linux/media.h. This is the only place we need this
+ * so we just define it here. The media.h header doesn't expose it to the
+ * kernel to prevent it from being used by drivers, but here (and only here!)
+ * we need it to handle the legacy behavior.
+ */
+#define MEDIA_ENT_SUBTYPE_MASK			0x0000ffff
+#define MEDIA_ENT_T_DEVNODE_UNKNOWN		(MEDIA_ENT_F_OLD_BASE | \
+						 MEDIA_ENT_SUBTYPE_MASK)
+
 /* -----------------------------------------------------------------------------
  * Userspace API
  */
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index c7e9a5cba24e..86c7dcc9cba3 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -348,7 +348,7 @@ struct media_v2_topology {
 #define MEDIA_IOC_SETUP_LINK	_IOWR('|', 0x03, struct media_link_desc)
 #define MEDIA_IOC_G_TOPOLOGY	_IOWR('|', 0x04, struct media_v2_topology)
 
-#if !defined(__KERNEL__) || defined(__NEED_MEDIA_LEGACY_API)
+#ifndef __KERNEL__
 
 /*
  * Legacy symbols used to avoid userspace compilation breakages.
-- 
cgit v1.2.3


From d8ae7242718738ee1bf9bfdd632d2a4b150fdd26 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 26 Jun 2018 23:56:40 -0400
Subject: vt: preserve unicode values corresponding to screen characters

The vt code translates UTF-8 strings into glyph index values and stores
those glyph values directly in the screen buffer. Because there can only
be at most 512 glyphs, it is impossible to represent most unicode
characters, in which case a default glyph (often '?') is displayed
instead. The original unicode value is then lost.

This patch implements the basic screen buffer handling to preserve unicode
values alongside corresponding display glyphs.  It is not activated by
default, meaning that people not relying on that functionality won't get
the implied overhead.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Dave Mielke <Dave@mielke.cc>
Acked-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c            | 220 ++++++++++++++++++++++++++++++++++++++---
 include/linux/console_struct.h |   2 +
 2 files changed, 211 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 1eb1a376a041..7b636638b36d 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -317,6 +317,171 @@ void schedule_console_callback(void)
 	schedule_work(&console_work);
 }
 
+/*
+ * Code to manage unicode-based screen buffers
+ */
+
+#ifdef NO_VC_UNI_SCREEN
+/* this disables and optimizes related code away at compile time */
+#define get_vc_uniscr(vc) NULL
+#else
+#define get_vc_uniscr(vc) vc->vc_uni_screen
+#endif
+
+typedef uint32_t char32_t;
+
+/*
+ * Our screen buffer is preceded by an array of line pointers so that
+ * scrolling only implies some pointer shuffling.
+ */
+struct uni_screen {
+	char32_t *lines[0];
+};
+
+static struct uni_screen *vc_uniscr_alloc(unsigned int cols, unsigned int rows)
+{
+	struct uni_screen *uniscr;
+	void *p;
+	unsigned int memsize, i;
+
+	/* allocate everything in one go */
+	memsize = cols * rows * sizeof(char32_t);
+	memsize += rows * sizeof(char32_t *);
+	p = kmalloc(memsize, GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	/* initial line pointers */
+	uniscr = p;
+	p = uniscr->lines + rows;
+	for (i = 0; i < rows; i++) {
+		uniscr->lines[i] = p;
+		p += cols * sizeof(char32_t);
+	}
+	return uniscr;
+}
+
+static void vc_uniscr_set(struct vc_data *vc, struct uni_screen *new_uniscr)
+{
+	kfree(vc->vc_uni_screen);
+	vc->vc_uni_screen = new_uniscr;
+}
+
+static void vc_uniscr_putc(struct vc_data *vc, char32_t uc)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr)
+		uniscr->lines[vc->vc_y][vc->vc_x] = uc;
+}
+
+static void vc_uniscr_insert(struct vc_data *vc, unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		char32_t *ln = uniscr->lines[vc->vc_y];
+		unsigned int x = vc->vc_x, cols = vc->vc_cols;
+
+		memmove(&ln[x + nr], &ln[x], (cols - x - nr) * sizeof(*ln));
+		memset32(&ln[x], ' ', nr);
+	}
+}
+
+static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		char32_t *ln = uniscr->lines[vc->vc_y];
+		unsigned int x = vc->vc_x, cols = vc->vc_cols;
+
+		memcpy(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln));
+		memset32(&ln[cols - nr], ' ', nr);
+	}
+}
+
+static void vc_uniscr_clear_line(struct vc_data *vc, unsigned int x,
+				 unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		char32_t *ln = uniscr->lines[vc->vc_y];
+
+		memset32(&ln[x], ' ', nr);
+	}
+}
+
+static void vc_uniscr_clear_lines(struct vc_data *vc, unsigned int y,
+				  unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		unsigned int cols = vc->vc_cols;
+
+		while (nr--)
+			memset32(uniscr->lines[y++], ' ', cols);
+	}
+}
+
+static void vc_uniscr_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+			     enum con_scroll dir, unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr) {
+		unsigned int s, d, rescue, clear;
+		char32_t *save[nr];
+
+		s = clear = t;
+		d = t + nr;
+		rescue = b - nr;
+		if (dir == SM_UP) {
+			swap(s, d);
+			swap(clear, rescue);
+		}
+		memcpy(save, uniscr->lines + rescue, nr * sizeof(*save));
+		memmove(uniscr->lines + d, uniscr->lines + s,
+			(b - t - nr) * sizeof(*uniscr->lines));
+		memcpy(uniscr->lines + clear, save, nr * sizeof(*save));
+		vc_uniscr_clear_lines(vc, clear, nr);
+	}
+}
+
+static void vc_uniscr_copy_area(struct uni_screen *dst,
+				unsigned int dst_cols,
+				unsigned int dst_rows,
+				struct uni_screen *src,
+				unsigned int src_cols,
+				unsigned int src_top_row,
+				unsigned int src_bot_row)
+{
+	unsigned int dst_row = 0;
+
+	if (!dst)
+		return;
+
+	while (src_top_row < src_bot_row) {
+		char32_t *src_line = src->lines[src_top_row];
+		char32_t *dst_line = dst->lines[dst_row];
+
+		memcpy(dst_line, src_line, src_cols * sizeof(char32_t));
+		if (dst_cols - src_cols)
+			memset32(dst_line + src_cols, ' ', dst_cols - src_cols);
+		src_top_row++;
+		dst_row++;
+	}
+	while (dst_row < dst_rows) {
+		char32_t *dst_line = dst->lines[dst_row];
+
+		memset32(dst_line, ' ', dst_cols);
+		dst_row++;
+	}
+}
+
+
 static void con_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 		enum con_scroll dir, unsigned int nr)
 {
@@ -326,6 +491,7 @@ static void con_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 		nr = b - t - 1;
 	if (b > vc->vc_rows || t >= b || nr < 1)
 		return;
+	vc_uniscr_scroll(vc, t, b, dir, nr);
 	if (con_is_visible(vc) && vc->vc_sw->con_scroll(vc, t, b, dir, nr))
 		return;
 
@@ -533,6 +699,7 @@ static void insert_char(struct vc_data *vc, unsigned int nr)
 {
 	unsigned short *p = (unsigned short *) vc->vc_pos;
 
+	vc_uniscr_insert(vc, nr);
 	scr_memmovew(p + nr, p, (vc->vc_cols - vc->vc_x - nr) * 2);
 	scr_memsetw(p, vc->vc_video_erase_char, nr * 2);
 	vc->vc_need_wrap = 0;
@@ -545,6 +712,7 @@ static void delete_char(struct vc_data *vc, unsigned int nr)
 {
 	unsigned short *p = (unsigned short *) vc->vc_pos;
 
+	vc_uniscr_delete(vc, nr);
 	scr_memcpyw(p, p + nr, (vc->vc_cols - vc->vc_x - nr) * 2);
 	scr_memsetw(p + vc->vc_cols - vc->vc_x - nr, vc->vc_video_erase_char,
 			nr * 2);
@@ -845,10 +1013,11 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 {
 	unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
 	unsigned long end;
-	unsigned int old_rows, old_row_size;
+	unsigned int old_rows, old_row_size, first_copied_row;
 	unsigned int new_cols, new_rows, new_row_size, new_screen_size;
 	unsigned int user;
 	unsigned short *newscreen;
+	struct uni_screen *new_uniscr = NULL;
 
 	WARN_CONSOLE_UNLOCKED();
 
@@ -875,6 +1044,14 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 	if (!newscreen)
 		return -ENOMEM;
 
+	if (get_vc_uniscr(vc)) {
+		new_uniscr = vc_uniscr_alloc(new_cols, new_rows);
+		if (!new_uniscr) {
+			kfree(newscreen);
+			return -ENOMEM;
+		}
+	}
+
 	if (vc == sel_cons)
 		clear_selection();
 
@@ -884,6 +1061,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 	err = resize_screen(vc, new_cols, new_rows, user);
 	if (err) {
 		kfree(newscreen);
+		kfree(new_uniscr);
 		return err;
 	}
 
@@ -904,18 +1082,24 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 			 * Cursor near the bottom, copy contents from the
 			 * bottom of buffer
 			 */
-			old_origin += (old_rows - new_rows) * old_row_size;
+			first_copied_row = (old_rows - new_rows);
 		} else {
 			/*
 			 * Cursor is in no man's land, copy 1/2 screenful
 			 * from the top and bottom of cursor position
 			 */
-			old_origin += (vc->vc_y - new_rows/2) * old_row_size;
+			first_copied_row = (vc->vc_y - new_rows/2);
 		}
-	}
-
+		old_origin += first_copied_row * old_row_size;
+	} else
+		first_copied_row = 0;
 	end = old_origin + old_row_size * min(old_rows, new_rows);
 
+	vc_uniscr_copy_area(new_uniscr, new_cols, new_rows,
+			    get_vc_uniscr(vc), rlth/2, first_copied_row,
+			    min(old_rows, new_rows));
+	vc_uniscr_set(vc, new_uniscr);
+
 	update_attr(vc);
 
 	while (old_origin < end) {
@@ -1013,6 +1197,7 @@ struct vc_data *vc_deallocate(unsigned int currcons)
 		vc->vc_sw->con_deinit(vc);
 		put_pid(vc->vt_pid);
 		module_put(vc->vc_sw->owner);
+		vc_uniscr_set(vc, NULL);
 		kfree(vc->vc_screenbuf);
 		vc_cons[currcons].d = NULL;
 	}
@@ -1171,15 +1356,22 @@ static void csi_J(struct vc_data *vc, int vpar)
 
 	switch (vpar) {
 		case 0:	/* erase from cursor to end of display */
+			vc_uniscr_clear_line(vc, vc->vc_x,
+					     vc->vc_cols - vc->vc_x);
+			vc_uniscr_clear_lines(vc, vc->vc_y + 1,
+					      vc->vc_rows - vc->vc_y - 1);
 			count = (vc->vc_scr_end - vc->vc_pos) >> 1;
 			start = (unsigned short *)vc->vc_pos;
 			break;
 		case 1:	/* erase from start to cursor */
+			vc_uniscr_clear_line(vc, 0, vc->vc_x + 1);
+			vc_uniscr_clear_lines(vc, 0, vc->vc_y);
 			count = ((vc->vc_pos - vc->vc_origin) >> 1) + 1;
 			start = (unsigned short *)vc->vc_origin;
 			break;
 		case 2: /* erase whole display */
 		case 3: /* (and scrollback buffer later) */
+			vc_uniscr_clear_lines(vc, 0, vc->vc_rows);
 			count = vc->vc_cols * vc->vc_rows;
 			start = (unsigned short *)vc->vc_origin;
 			break;
@@ -1200,25 +1392,27 @@ static void csi_J(struct vc_data *vc, int vpar)
 static void csi_K(struct vc_data *vc, int vpar)
 {
 	unsigned int count;
-	unsigned short * start;
+	unsigned short *start = (unsigned short *)vc->vc_pos;
+	int offset;
 
 	switch (vpar) {
 		case 0:	/* erase from cursor to end of line */
+			offset = 0;
 			count = vc->vc_cols - vc->vc_x;
-			start = (unsigned short *)vc->vc_pos;
 			break;
 		case 1:	/* erase from start of line to cursor */
-			start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
+			offset = -vc->vc_x;
 			count = vc->vc_x + 1;
 			break;
 		case 2: /* erase whole line */
-			start = (unsigned short *)(vc->vc_pos - (vc->vc_x << 1));
+			offset = -vc->vc_x;
 			count = vc->vc_cols;
 			break;
 		default:
 			return;
 	}
-	scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
+	vc_uniscr_clear_line(vc, vc->vc_x + offset, count);
+	scr_memsetw(start + offset, vc->vc_video_erase_char, 2 * count);
 	vc->vc_need_wrap = 0;
 	if (con_should_update(vc))
 		do_update_region(vc, (unsigned long) start, count);
@@ -1232,6 +1426,7 @@ static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar posi
 		vpar++;
 	count = (vpar > vc->vc_cols - vc->vc_x) ? (vc->vc_cols - vc->vc_x) : vpar;
 
+	vc_uniscr_clear_line(vc, vc->vc_x, count);
 	scr_memsetw((unsigned short *)vc->vc_pos, vc->vc_video_erase_char, 2 * count);
 	if (con_should_update(vc))
 		vc->vc_sw->con_clear(vc, vc->vc_y, vc->vc_x, 1, count);
@@ -2188,7 +2383,7 @@ static void con_flush(struct vc_data *vc, unsigned long draw_from,
 /* acquires console_lock */
 static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count)
 {
-	int c, tc, ok, n = 0, draw_x = -1;
+	int c, next_c, tc, ok, n = 0, draw_x = -1;
 	unsigned int currcons;
 	unsigned long draw_from = 0, draw_to = 0;
 	struct vc_data *vc;
@@ -2382,6 +2577,7 @@ rescan_last_byte:
 				con_flush(vc, draw_from, draw_to, &draw_x);
 			}
 
+			next_c = c;
 			while (1) {
 				if (vc->vc_need_wrap || vc->vc_decim)
 					con_flush(vc, draw_from, draw_to,
@@ -2392,6 +2588,7 @@ rescan_last_byte:
 				}
 				if (vc->vc_decim)
 					insert_char(vc, 1);
+				vc_uniscr_putc(vc, next_c);
 				scr_writew(himask ?
 					     ((vc_attr << 8) & ~himask) + ((tc & 0x100) ? himask : 0) + (tc & 0xff) :
 					     (vc_attr << 8) + tc,
@@ -2412,6 +2609,7 @@ rescan_last_byte:
 
 				tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
 				if (tc < 0) tc = ' ';
+				next_c = ' ';
 			}
 			notify_write(vc, c);
 
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index c0ec478ea5bf..2c8d3239899b 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -19,6 +19,7 @@
 
 struct vt_struct;
 struct uni_pagedir;
+struct uni_screen;
 
 #define NPAR 16
 
@@ -140,6 +141,7 @@ struct vc_data {
 	struct vc_data **vc_display_fg;		/* [!] Ptr to var holding fg console for this display */
 	struct uni_pagedir *vc_uni_pagedir;
 	struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */
+	struct uni_screen *vc_uni_screen;	/* unicode screen content */
 	bool vc_panic_force_write; /* when oops/panic this VC can accept forced output/blanking */
 	/* additional information is in vt_kern.h */
 };
-- 
cgit v1.2.3


From d21b0be246bf3bbf569e6e239f56abb529c7154e Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 26 Jun 2018 23:56:41 -0400
Subject: vt: introduce unicode mode for /dev/vcs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that the core vt code knows how to preserve unicode values for each
displayed character, it is then possible to let user space access it via
/dev/vcs*.

Unicode characters are presented as 32 bit values in native endianity
via the /dev/vcsu* devices, mimicking the simple /dev/vcs* devices.
Unicode with attributes (similarly to /dev/vcsa*) is not supported at
the moment.

Data is available only as long as the console is in UTF-8 mode. ENODATA
is returned otherwise.

This was tested with the latest development version (to become
version 5.7) of BRLTTY. Amongst other things, this allows ⠋⠕⠗ ⠞⠓⠊⠎
⠃⠗⠁⠊⠇⠇⠑⠀⠞⠑⠭⠞⠀to appear directly on braille displays regardless of the
console font being used.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Dave Mielke <Dave@mielke.cc>
Acked-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vc_screen.c | 89 ++++++++++++++++++++++++++++++++++++++--------
 drivers/tty/vt/vt.c        | 61 +++++++++++++++++++++++++++++++
 include/linux/selection.h  |  5 +++
 3 files changed, 141 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index e4a66e1fd05f..9c44252e52a3 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -10,6 +10,12 @@
  *	Attribute/character pair is in native endianity.
  *            [minor: N+128]
  *
+ * /dev/vcsuN: similar to /dev/vcsaN but using 4-byte unicode values
+ *	instead of 1-byte screen glyph values.
+ *            [minor: N+64]
+ *
+ * /dev/vcsuaN: same idea as /dev/vcsaN for unicode (not yet implemented).
+ *
  * This replaces screendump and part of selection, so that the system
  * administrator can control access using file system permissions.
  *
@@ -51,6 +57,26 @@
 
 #define CON_BUF_SIZE (CONFIG_BASE_SMALL ? 256 : PAGE_SIZE)
 
+/*
+ * Our minor space:
+ *
+ *   0 ... 63	glyph mode without attributes
+ *  64 ... 127	unicode mode without attributes
+ * 128 ... 191	glyph mode with attributes
+ * 192 ... 255	unused (reserved for unicode with attributes)
+ *
+ * This relies on MAX_NR_CONSOLES being  <= 63, meaning 63 actual consoles
+ * with minors 0, 64, 128 and 192 being proxies for the foreground console.
+ */
+#if MAX_NR_CONSOLES > 63
+#warning "/dev/vcs* devices may not accommodate more than 63 consoles"
+#endif
+
+#define console(inode)		(iminor(inode) & 63)
+#define use_unicode(inode)	(iminor(inode) & 64)
+#define use_attributes(inode)	(iminor(inode) & 128)
+
+
 struct vcs_poll_data {
 	struct notifier_block notifier;
 	unsigned int cons_num;
@@ -102,7 +128,7 @@ vcs_poll_data_get(struct file *file)
 	poll = kzalloc(sizeof(*poll), GFP_KERNEL);
 	if (!poll)
 		return NULL;
-	poll->cons_num = iminor(file_inode(file)) & 127;
+	poll->cons_num = console(file_inode(file));
 	init_waitqueue_head(&poll->waitq);
 	poll->notifier.notifier_call = vcs_notifier;
 	if (register_vt_notifier(&poll->notifier) != 0) {
@@ -140,7 +166,7 @@ vcs_poll_data_get(struct file *file)
 static struct vc_data*
 vcs_vc(struct inode *inode, int *viewed)
 {
-	unsigned int currcons = iminor(inode) & 127;
+	unsigned int currcons = console(inode);
 
 	WARN_CONSOLE_UNLOCKED();
 
@@ -164,7 +190,6 @@ static int
 vcs_size(struct inode *inode)
 {
 	int size;
-	int minor = iminor(inode);
 	struct vc_data *vc;
 
 	WARN_CONSOLE_UNLOCKED();
@@ -175,8 +200,12 @@ vcs_size(struct inode *inode)
 
 	size = vc->vc_rows * vc->vc_cols;
 
-	if (minor & 128)
+	if (use_attributes(inode)) {
+		if (use_unicode(inode))
+			return -EOPNOTSUPP;
 		size = 2*size + HEADER_SIZE;
+	} else if (use_unicode(inode))
+		size *= 4;
 	return size;
 }
 
@@ -197,12 +226,10 @@ static ssize_t
 vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	struct inode *inode = file_inode(file);
-	unsigned int currcons = iminor(inode);
 	struct vc_data *vc;
 	struct vcs_poll_data *poll;
-	long pos;
-	long attr, read;
-	int col, maxcol, viewed;
+	long pos, read;
+	int attr, uni_mode, row, col, maxcol, viewed;
 	unsigned short *org = NULL;
 	ssize_t ret;
 	char *con_buf;
@@ -218,7 +245,8 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	 */
 	console_lock();
 
-	attr = (currcons & 128);
+	uni_mode = use_unicode(inode);
+	attr = use_attributes(inode);
 	ret = -ENXIO;
 	vc = vcs_vc(inode, &viewed);
 	if (!vc)
@@ -227,6 +255,10 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	ret = -EINVAL;
 	if (pos < 0)
 		goto unlock_out;
+	/* we enforce 32-bit alignment for pos and count in unicode mode */
+	if (uni_mode && (pos | count) & 3)
+		goto unlock_out;
+
 	poll = file->private_data;
 	if (count && poll)
 		poll->seen_last_update = true;
@@ -266,7 +298,27 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 		con_buf_start = con_buf0 = con_buf;
 		orig_count = this_round;
 		maxcol = vc->vc_cols;
-		if (!attr) {
+		if (uni_mode) {
+			unsigned int nr;
+
+			ret = vc_uniscr_check(vc);
+			if (ret)
+				break;
+			p /= 4;
+			row = p / vc->vc_cols;
+			col = p % maxcol;
+			nr = maxcol - col;
+			do {
+				if (nr > this_round/4)
+					nr = this_round/4;
+				vc_uniscr_copy_line(vc, con_buf0, row, col, nr);
+				con_buf0 += nr * 4;
+				this_round -= nr * 4;
+				row++;
+				col = 0;
+				nr = maxcol;
+			} while (this_round);
+		} else if (!attr) {
 			org = screen_pos(vc, p, viewed);
 			col = p % maxcol;
 			p += maxcol - col;
@@ -375,7 +427,6 @@ static ssize_t
 vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
 	struct inode *inode = file_inode(file);
-	unsigned int currcons = iminor(inode);
 	struct vc_data *vc;
 	long pos;
 	long attr, size, written;
@@ -396,7 +447,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 	 */
 	console_lock();
 
-	attr = (currcons & 128);
+	attr = use_attributes(inode);
 	ret = -ENXIO;
 	vc = vcs_vc(inode, &viewed);
 	if (!vc)
@@ -593,9 +644,15 @@ vcs_fasync(int fd, struct file *file, int on)
 static int
 vcs_open(struct inode *inode, struct file *filp)
 {
-	unsigned int currcons = iminor(inode) & 127;
+	unsigned int currcons = console(inode);
+	bool attr = use_attributes(inode);
+	bool uni_mode = use_unicode(inode);
 	int ret = 0;
-	
+
+	/* we currently don't support attributes in unicode mode */
+	if (attr && uni_mode)
+		return -EOPNOTSUPP;
+
 	console_lock();
 	if(currcons && !vc_cons_allocated(currcons-1))
 		ret = -ENXIO;
@@ -628,6 +685,8 @@ void vcs_make_sysfs(int index)
 {
 	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, index + 1), NULL,
 		      "vcs%u", index + 1);
+	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, index + 65), NULL,
+		      "vcsu%u", index + 1);
 	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, index + 129), NULL,
 		      "vcsa%u", index + 1);
 }
@@ -635,6 +694,7 @@ void vcs_make_sysfs(int index)
 void vcs_remove_sysfs(int index)
 {
 	device_destroy(vc_class, MKDEV(VCS_MAJOR, index + 1));
+	device_destroy(vc_class, MKDEV(VCS_MAJOR, index + 65));
 	device_destroy(vc_class, MKDEV(VCS_MAJOR, index + 129));
 }
 
@@ -647,6 +707,7 @@ int __init vcs_init(void)
 	vc_class = class_create(THIS_MODULE, "vc");
 
 	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, 0), NULL, "vcs");
+	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, 64), NULL, "vcsu");
 	device_create(vc_class, NULL, MKDEV(VCS_MAJOR, 128), NULL, "vcsa");
 	for (i = 0; i < MIN_NR_CONSOLES; i++)
 		vcs_make_sysfs(i);
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 7b636638b36d..062ce6be7957 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -481,6 +481,67 @@ static void vc_uniscr_copy_area(struct uni_screen *dst,
 	}
 }
 
+/*
+ * Called from vcs_read() to make sure unicode screen retrieval is possible.
+ * This will initialize the unicode screen buffer if not already done.
+ * This returns 0 if OK, or a negative error code otherwise.
+ * In particular, -ENODATA is returned if the console is not in UTF-8 mode.
+ */
+int vc_uniscr_check(struct vc_data *vc)
+{
+	struct uni_screen *uniscr;
+	unsigned short *p;
+	int x, y, mask;
+
+	if (__is_defined(NO_VC_UNI_SCREEN))
+		return -EOPNOTSUPP;
+
+	WARN_CONSOLE_UNLOCKED();
+
+	if (!vc->vc_utf)
+		return -ENODATA;
+
+	if (vc->vc_uni_screen)
+		return 0;
+
+	uniscr = vc_uniscr_alloc(vc->vc_cols, vc->vc_rows);
+	if (!uniscr)
+		return -ENOMEM;
+
+	/*
+	 * Let's populate it initially with (imperfect) reverse translation.
+	 * This is the next best thing we can do short of having it enabled
+	 * from the start even when no users rely on this functionality. True
+	 * unicode content will be available after a complete screen refresh.
+	 */
+	p = (unsigned short *)vc->vc_origin;
+	mask = vc->vc_hi_font_mask | 0xff;
+	for (y = 0; y < vc->vc_rows; y++) {
+		char32_t *line = uniscr->lines[y];
+		for (x = 0; x < vc->vc_cols; x++) {
+			u16 glyph = scr_readw(p++) & mask;
+			line[x] = inverse_translate(vc, glyph, true);
+		}
+	}
+
+	vc->vc_uni_screen = uniscr;
+	return 0;
+}
+
+/*
+ * Called from vcs_read() to get the unicode data from the screen.
+ * This must be preceded by a successful call to vc_uniscr_check() once
+ * the console lock has been taken.
+ */
+void vc_uniscr_copy_line(struct vc_data *vc, void *dest,
+			 unsigned int row, unsigned int col, unsigned int nr)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	BUG_ON(!uniscr);
+	memcpy(dest, &uniscr->lines[row][col], nr * sizeof(char32_t));
+}
+
 
 static void con_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 		enum con_scroll dir, unsigned int nr)
diff --git a/include/linux/selection.h b/include/linux/selection.h
index 5b278ce99d8d..2b34df9f1e26 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -42,4 +42,9 @@ extern u16 vcs_scr_readw(struct vc_data *vc, const u16 *org);
 extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org);
 extern void vcs_scr_updated(struct vc_data *vc);
 
+extern int vc_uniscr_check(struct vc_data *vc);
+extern void vc_uniscr_copy_line(struct vc_data *vc, void *dest,
+				unsigned int row, unsigned int col,
+				unsigned int nr);
+
 #endif
-- 
cgit v1.2.3


From 708d0bff9121506db08adb73845a3c70312fadf3 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 26 Jun 2018 23:56:42 -0400
Subject: vt: unicode fallback for scrollback

There is currently no provision for scrollback content in the core code,
leaving that to backend video drivers where this can be highly optimized.
There is currently no common method for those drivers to tell the core
what part of the scrollback is actually displayed and what size the
scrollback buffer is either. Because of that, the unicode screen buffer
has no provision for any scrollback.

At least we can provide backtranslated glyph values when the scrollback
is active which should be plenty good enough for now.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Dave Mielke <Dave@mielke.cc>
Acked-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vc_screen.c |  3 ++-
 drivers/tty/vt/vt.c        | 31 +++++++++++++++++++++++++++++--
 include/linux/selection.h  |  2 +-
 3 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c
index 9c44252e52a3..2384ea85ffaf 100644
--- a/drivers/tty/vt/vc_screen.c
+++ b/drivers/tty/vt/vc_screen.c
@@ -311,7 +311,8 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 			do {
 				if (nr > this_round/4)
 					nr = this_round/4;
-				vc_uniscr_copy_line(vc, con_buf0, row, col, nr);
+				vc_uniscr_copy_line(vc, con_buf0, viewed,
+						    row, col, nr);
 				con_buf0 += nr * 4;
 				this_round -= nr * 4;
 				row++;
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 062ce6be7957..2d14bb195d98 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -533,13 +533,40 @@ int vc_uniscr_check(struct vc_data *vc)
  * This must be preceded by a successful call to vc_uniscr_check() once
  * the console lock has been taken.
  */
-void vc_uniscr_copy_line(struct vc_data *vc, void *dest,
+void vc_uniscr_copy_line(struct vc_data *vc, void *dest, int viewed,
 			 unsigned int row, unsigned int col, unsigned int nr)
 {
 	struct uni_screen *uniscr = get_vc_uniscr(vc);
+	int offset = row * vc->vc_size_row + col * 2;
+	unsigned long pos;
 
 	BUG_ON(!uniscr);
-	memcpy(dest, &uniscr->lines[row][col], nr * sizeof(char32_t));
+
+	pos = (unsigned long)screenpos(vc, offset, viewed);
+	if (pos >= vc->vc_origin && pos < vc->vc_scr_end) {
+		/*
+		 * Desired position falls in the main screen buffer.
+		 * However the actual row/col might be different if
+		 * scrollback is active.
+		 */
+		row = (pos - vc->vc_origin) / vc->vc_size_row;
+		col = ((pos - vc->vc_origin) % vc->vc_size_row) / 2;
+		memcpy(dest, &uniscr->lines[row][col], nr * sizeof(char32_t));
+	} else {
+		/*
+		 * Scrollback is active. For now let's simply backtranslate
+		 * the screen glyphs until the unicode screen buffer does
+		 * synchronize with console display drivers for a scrollback
+		 * buffer of its own.
+		 */
+		u16 *p = (u16 *)pos;
+		int mask = vc->vc_hi_font_mask | 0xff;
+		char32_t *uni_buf = dest;
+		while (nr--) {
+			u16 glyph = scr_readw(p++) & mask;
+			*uni_buf++ = inverse_translate(vc, glyph, true);
+		}
+	}
 }
 
 
diff --git a/include/linux/selection.h b/include/linux/selection.h
index 2b34df9f1e26..067d2e99c79f 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -43,7 +43,7 @@ extern void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org);
 extern void vcs_scr_updated(struct vc_data *vc);
 
 extern int vc_uniscr_check(struct vc_data *vc);
-extern void vc_uniscr_copy_line(struct vc_data *vc, void *dest,
+extern void vc_uniscr_copy_line(struct vc_data *vc, void *dest, int viewed,
 				unsigned int row, unsigned int col,
 				unsigned int nr);
 
-- 
cgit v1.2.3


From 0a9fe5c375b57fab6d18ed0a6a7f935eefb09db3 Mon Sep 17 00:00:00 2001
From: Yousuk Seung <ysseung@google.com>
Date: Wed, 27 Jun 2018 10:32:19 -0700
Subject: netem: slotting with non-uniform distribution

Extend slotting with support for non-uniform distributions. This is
similar to netem's non-uniform distribution delay feature.

Commit f043efeae2f1 ("netem: support delivering packets in delayed
time slots") added the slotting feature to approximate the behaviors
of media with packet aggregation but only supported a uniform
distribution for delays between transmission attempts. Tests with TCP
BBR with emulated wifi links with non-uniform distributions produced
more useful results.

Syntax:
   slot dist DISTRIBUTION DELAY JITTER [packets MAX_PACKETS] \
      [bytes MAX_BYTES]

The syntax and use of the distribution table is the same as in the
non-uniform distribution delay feature. A file DISTRIBUTION must be
present in TC_LIB_DIR (e.g. /usr/lib/tc) containing numbers scaled by
NETEM_DIST_SCALE. A random value x is selected from the table and it
takes DELAY + ( x * JITTER ) as delay. Correlation between values is not
supported.

Examples:
  Normal distribution delay with mean = 800us and stdev = 100us.
  > tc qdisc add dev eth0 root netem slot dist normal 800us 100us

  Optionally set the max slot size in bytes and/or packets.
  > tc qdisc add dev eth0 root netem slot dist normal 800us 100us \
    bytes 64k packets 42

Signed-off-by: Yousuk Seung <ysseung@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  3 ++
 net/sched/sch_netem.c          | 73 ++++++++++++++++++++++++++++--------------
 2 files changed, 52 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 37b5096ae97b..bad3c03bcf43 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -539,6 +539,7 @@ enum {
 	TCA_NETEM_LATENCY64,
 	TCA_NETEM_JITTER64,
 	TCA_NETEM_SLOT,
+	TCA_NETEM_SLOT_DIST,
 	__TCA_NETEM_MAX,
 };
 
@@ -581,6 +582,8 @@ struct tc_netem_slot {
 	__s64   max_delay;
 	__s32   max_packets;
 	__s32   max_bytes;
+	__s64	dist_delay; /* nsec */
+	__s64	dist_jitter; /* nsec */
 };
 
 enum {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7d6801fc5340..ad18a2052416 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -68,6 +68,11 @@
 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
 */
 
+struct disttable {
+	u32  size;
+	s16 table[0];
+};
+
 struct netem_sched_data {
 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
 	struct rb_root t_root;
@@ -99,10 +104,7 @@ struct netem_sched_data {
 		u32 rho;
 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
 
-	struct disttable {
-		u32  size;
-		s16 table[0];
-	} *delay_dist;
+	struct disttable *delay_dist;
 
 	enum  {
 		CLG_RANDOM,
@@ -142,6 +144,7 @@ struct netem_sched_data {
 		s32 bytes_left;
 	} slot;
 
+	struct disttable *slot_dist;
 };
 
 /* Time stamp put into socket buffer control block
@@ -180,7 +183,7 @@ static u32 get_crandom(struct crndstate *state)
 	u64 value, rho;
 	unsigned long answer;
 
-	if (state->rho == 0)	/* no correlation */
+	if (!state || state->rho == 0)	/* no correlation */
 		return prandom_u32();
 
 	value = prandom_u32();
@@ -601,10 +604,19 @@ finish_segs:
 
 static void get_slot_next(struct netem_sched_data *q, u64 now)
 {
-	q->slot.slot_next = now + q->slot_config.min_delay +
-		(prandom_u32() *
-			(q->slot_config.max_delay -
-				q->slot_config.min_delay) >> 32);
+	s64 next_delay;
+
+	if (!q->slot_dist)
+		next_delay = q->slot_config.min_delay +
+				(prandom_u32() *
+				 (q->slot_config.max_delay -
+				  q->slot_config.min_delay) >> 32);
+	else
+		next_delay = tabledist(q->slot_config.dist_delay,
+				       (s32)(q->slot_config.dist_jitter),
+				       NULL, q->slot_dist);
+
+	q->slot.slot_next = now + next_delay;
 	q->slot.packets_left = q->slot_config.max_packets;
 	q->slot.bytes_left = q->slot_config.max_bytes;
 }
@@ -721,9 +733,9 @@ static void dist_free(struct disttable *d)
  * signed 16 bit values.
  */
 
-static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
+static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
+			  const struct nlattr *attr)
 {
-	struct netem_sched_data *q = qdisc_priv(sch);
 	size_t n = nla_len(attr)/sizeof(__s16);
 	const __s16 *data = nla_data(attr);
 	spinlock_t *root_lock;
@@ -744,7 +756,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	root_lock = qdisc_root_sleeping_lock(sch);
 
 	spin_lock_bh(root_lock);
-	swap(q->delay_dist, d);
+	swap(*tbl, d);
 	spin_unlock_bh(root_lock);
 
 	dist_free(d);
@@ -762,7 +774,8 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
 		q->slot_config.max_bytes = INT_MAX;
 	q->slot.packets_left = q->slot_config.max_packets;
 	q->slot.bytes_left = q->slot_config.max_bytes;
-	if (q->slot_config.min_delay | q->slot_config.max_delay)
+	if (q->slot_config.min_delay | q->slot_config.max_delay |
+	    q->slot_config.dist_jitter)
 		q->slot.slot_next = ktime_get_ns();
 	else
 		q->slot.slot_next = 0;
@@ -926,16 +939,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_NETEM_DELAY_DIST]) {
-		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
-		if (ret) {
-			/* recover clg and loss_model, in case of
-			 * q->clg and q->loss_model were modified
-			 * in get_loss_clg()
-			 */
-			q->clg = old_clg;
-			q->loss_model = old_loss_model;
-			return ret;
-		}
+		ret = get_dist_table(sch, &q->delay_dist,
+				     tb[TCA_NETEM_DELAY_DIST]);
+		if (ret)
+			goto get_table_failure;
+	}
+
+	if (tb[TCA_NETEM_SLOT_DIST]) {
+		ret = get_dist_table(sch, &q->slot_dist,
+				     tb[TCA_NETEM_SLOT_DIST]);
+		if (ret)
+			goto get_table_failure;
 	}
 
 	sch->limit = qopt->limit;
@@ -983,6 +997,15 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
 		get_slot(q, tb[TCA_NETEM_SLOT]);
 
 	return ret;
+
+get_table_failure:
+	/* recover clg and loss_model, in case of
+	 * q->clg and q->loss_model were modified
+	 * in get_loss_clg()
+	 */
+	q->clg = old_clg;
+	q->loss_model = old_loss_model;
+	return ret;
 }
 
 static int netem_init(struct Qdisc *sch, struct nlattr *opt,
@@ -1011,6 +1034,7 @@ static void netem_destroy(struct Qdisc *sch)
 	if (q->qdisc)
 		qdisc_destroy(q->qdisc);
 	dist_free(q->delay_dist);
+	dist_free(q->slot_dist);
 }
 
 static int dump_loss_model(const struct netem_sched_data *q,
@@ -1127,7 +1151,8 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (dump_loss_model(q, skb) != 0)
 		goto nla_put_failure;
 
-	if (q->slot_config.min_delay | q->slot_config.max_delay) {
+	if (q->slot_config.min_delay | q->slot_config.max_delay |
+	    q->slot_config.dist_jitter) {
 		slot = q->slot_config;
 		if (slot.max_packets == INT_MAX)
 			slot.max_packets = 0;
-- 
cgit v1.2.3


From d020d4559de9baf47cafa2669f29ea59d11a914c Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Wed, 27 Jun 2018 13:33:31 -0400
Subject: net sched actions: fix coding style in pedit headers

Fix coding style issues in tc pedit headers detected by the
checkpatch script.

Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_pedit.h        | 1 +
 include/uapi/linux/tc_act/tc_pedit.h | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index 227a6f1d02f4..fac3ad4a86de 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -17,6 +17,7 @@ struct tcf_pedit {
 	struct tc_pedit_key	*tcfp_keys;
 	struct tcf_pedit_key_ex	*tcfp_keys_ex;
 };
+
 #define to_pedit(a) ((struct tcf_pedit *)a)
 
 static inline bool is_tcf_pedit(const struct tc_action *a)
diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h
index 162d1094c41c..24ec792dacc1 100644
--- a/include/uapi/linux/tc_act/tc_pedit.h
+++ b/include/uapi/linux/tc_act/tc_pedit.h
@@ -17,13 +17,15 @@ enum {
 	TCA_PEDIT_KEY_EX,
 	__TCA_PEDIT_MAX
 };
+
 #define TCA_PEDIT_MAX (__TCA_PEDIT_MAX - 1)
-                                                                                
+
 enum {
 	TCA_PEDIT_KEY_EX_HTYPE = 1,
 	TCA_PEDIT_KEY_EX_CMD = 2,
 	__TCA_PEDIT_KEY_EX_MAX
 };
+
 #define TCA_PEDIT_KEY_EX_MAX (__TCA_PEDIT_KEY_EX_MAX - 1)
 
  /* TCA_PEDIT_KEY_EX_HDR_TYPE_NETWROK is a special case for legacy users. It
@@ -38,6 +40,7 @@ enum pedit_header_type {
 	TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
 	__PEDIT_HDR_TYPE_MAX,
 };
+
 #define TCA_PEDIT_HDR_TYPE_MAX (__PEDIT_HDR_TYPE_MAX - 1)
 
 enum pedit_cmd {
@@ -45,6 +48,7 @@ enum pedit_cmd {
 	TCA_PEDIT_KEY_EX_CMD_ADD = 1,
 	__PEDIT_CMD_MAX,
 };
+
 #define TCA_PEDIT_CMD_MAX (__PEDIT_CMD_MAX - 1)
 
 struct tc_pedit_key {
@@ -55,13 +59,14 @@ struct tc_pedit_key {
 	__u32           offmask;
 	__u32           shift;
 };
-                                                                                
+
 struct tc_pedit_sel {
 	tc_gen;
 	unsigned char           nkeys;
 	unsigned char           flags;
 	struct tc_pedit_key     keys[0];
 };
+
 #define tc_pedit tc_pedit_sel
 
 #endif
-- 
cgit v1.2.3


From f564650106a6e85702660fefd59fdff0877ab46a Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fbl@redhat.com>
Date: Wed, 27 Jun 2018 10:34:25 -0300
Subject: netfilter: check if the socket netns is correct.

Netfilter assumes that if the socket is present in the skb, then
it can be used because that reference is cleaned up while the skb
is crossing netns.

We want to change that to preserve the socket reference in a future
patch, so this is a preparation updating netfilter to check if the
socket netns matches before use it.

Signed-off-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_log.h         | 3 ++-
 net/ipv4/netfilter/nf_log_ipv4.c       | 8 ++++----
 net/ipv6/netfilter/nf_log_ipv6.c       | 8 ++++----
 net/netfilter/nf_conntrack_broadcast.c | 2 +-
 net/netfilter/nf_log_common.c          | 5 +++--
 net/netfilter/nf_nat_core.c            | 6 +++++-
 net/netfilter/nft_meta.c               | 9 ++++++---
 net/netfilter/nft_socket.c             | 5 ++++-
 net/netfilter/xt_cgroup.c              | 6 ++++--
 net/netfilter/xt_owner.c               | 2 +-
 net/netfilter/xt_recent.c              | 3 ++-
 net/netfilter/xt_socket.c              | 8 ++++++++
 12 files changed, 44 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index e811ac07ea94..0d3920896d50 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -106,7 +106,8 @@ int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 			   u8 proto, int fragment, unsigned int offset,
 			   unsigned int logflags);
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk);
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+			    struct sock *sk);
 void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
 			       unsigned int hooknum, const struct sk_buff *skb,
 			       const struct net_device *in,
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 4388de0e5380..1e6f28c97d3a 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -35,7 +35,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct nf_log_buf *m,
+static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
 			     const struct nf_loginfo *info,
 			     const struct sk_buff *skb, unsigned int iphoff)
 {
@@ -183,7 +183,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 			/* Max length: 3+maxlen */
 			if (!iphoff) { /* Only recurse once. */
 				nf_log_buf_add(m, "[");
-				dump_ipv4_packet(m, info, skb,
+				dump_ipv4_packet(net, m, info, skb,
 					    iphoff + ih->ihl*4+sizeof(_icmph));
 				nf_log_buf_add(m, "] ");
 			}
@@ -251,7 +251,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & NF_LOG_UID) && !iphoff)
-		nf_log_dump_sk_uid_gid(m, skb->sk);
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
 	if (!iphoff && skb->mark)
@@ -333,7 +333,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
 	if (in != NULL)
 		dump_ipv4_mac_header(m, loginfo, skb);
 
-	dump_ipv4_packet(m, loginfo, skb, 0);
+	dump_ipv4_packet(net, m, loginfo, skb, 0);
 
 	nf_log_buf_close(m);
 }
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index b397a8fe88b9..c6bf580d0f33 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -36,7 +36,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv6_packet(struct nf_log_buf *m,
+static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
 			     const struct nf_loginfo *info,
 			     const struct sk_buff *skb, unsigned int ip6hoff,
 			     int recurse)
@@ -258,7 +258,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 			/* Max length: 3+maxlen */
 			if (recurse) {
 				nf_log_buf_add(m, "[");
-				dump_ipv6_packet(m, info, skb,
+				dump_ipv6_packet(net, m, info, skb,
 						 ptr + sizeof(_icmp6h), 0);
 				nf_log_buf_add(m, "] ");
 			}
@@ -278,7 +278,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & NF_LOG_UID) && recurse)
-		nf_log_dump_sk_uid_gid(m, skb->sk);
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
 	if (recurse && skb->mark)
@@ -365,7 +365,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
 	if (in != NULL)
 		dump_ipv6_mac_header(m, loginfo, skb);
 
-	dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+	dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
 
 	nf_log_buf_close(m);
 }
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index a1086bdec242..5423b197d98a 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -32,7 +32,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 	__be32 mask = 0;
 
 	/* we're only interested in locally generated packets */
-	if (skb->sk == NULL)
+	if (skb->sk == NULL || !net_eq(nf_ct_net(ct), sock_net(skb->sk)))
 		goto out;
 	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
 		goto out;
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index dc61399e30be..a8c5c846aec1 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -132,9 +132,10 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
 
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+			    struct sock *sk)
 {
-	if (!sk || !sk_fullsock(sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk)))
 		return;
 
 	read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 46f9df99d276..86df2a1666fd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -108,6 +108,7 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 	struct flowi fl;
 	unsigned int hh_len;
 	struct dst_entry *dst;
+	struct sock *sk = skb->sk;
 	int err;
 
 	err = xfrm_decode_session(skb, &fl, family);
@@ -119,7 +120,10 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
+	if (sk && !net_eq(net, sock_net(sk)))
+		sk = NULL;
+
+	dst = xfrm_lookup(net, dst, &fl, sk, 0);
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 1105a23bda5e..2b94dcc43456 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -107,7 +107,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_SKUID:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 
 		read_lock_bh(&sk->sk_callback_lock);
@@ -123,7 +124,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_SKGID:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 
 		read_lock_bh(&sk->sk_callback_lock);
@@ -214,7 +216,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 #ifdef CONFIG_CGROUP_NET_CLASSID
 	case NFT_META_CGROUP:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 		*dest = sock_cgroup_classid(&sk->sk_cgrp_data);
 		break;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 74e1b3bd6954..998c2b546f6d 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -23,6 +23,9 @@ static void nft_socket_eval(const struct nft_expr *expr,
 	struct sock *sk = skb->sk;
 	u32 *dest = &regs->data[priv->dreg];
 
+	if (sk && !net_eq(nft_net(pkt), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		switch(nft_pf(pkt)) {
 		case NFPROTO_IPV4:
@@ -39,7 +42,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
 			return;
 		}
 
-	if(!sk) {
+	if (!sk) {
 		nft_reg_store8(dest, 0);
 		return;
 	}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7df2dece57d3..5d92e1781980 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -72,8 +72,9 @@ static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_cgroup_info_v0 *info = par->matchinfo;
+	struct sock *sk = skb->sk;
 
-	if (skb->sk == NULL || !sk_fullsock(skb->sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
 		return false;
 
 	return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
@@ -85,8 +86,9 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_cgroup_info_v1 *info = par->matchinfo;
 	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
 	struct cgroup *ancestor = info->priv;
+	struct sock *sk = skb->sk;
 
-	if (!skb->sk || !sk_fullsock(skb->sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
 		return false;
 
 	if (ancestor)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 3d705c688a27..46686fb73784 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -67,7 +67,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sock *sk = skb_to_full_sk(skb);
 	struct net *net = xt_net(par);
 
-	if (sk == NULL || sk->sk_socket == NULL)
+	if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk)))
 		return (info->match ^ info->invert) == 0;
 	else if (info->match & info->invert & XT_OWNER_SOCKET)
 		/*
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 07085c22b19c..f44de4bc2100 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -265,7 +265,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	/* use TTL as seen before forwarding */
-	if (xt_out(par) != NULL && skb->sk == NULL)
+	if (xt_out(par) != NULL &&
+	    (!skb->sk || !net_eq(net, sock_net(skb->sk))))
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5c0779c4fa3c..0472f3472842 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -56,8 +56,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
+	if (!net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
+
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
@@ -113,8 +117,12 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
+	if (!net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
+
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
-- 
cgit v1.2.3


From 2bbc46e811f0d40ed92ff9c104fce6618049f726 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Thu, 21 Jun 2018 05:41:21 -0400
Subject: media: v4l-common: Make v4l2_find_nearest_size more sparse-friendly

This sparse warning is emitted by using v4l2_find_nearest_size in some
cases. Fix it in the framework.

>> drivers/media/i2c/ov5640.c:1394:14: sparse: incorrect type in assignment
+(different base types) @@    expected struct ov5640_mode_info const *mode @@
+got ststruct ov5640_mode_info const *mode @@
   drivers/media/i2c/ov5640.c:1394:14:    expected struct ov5640_mode_info const
+*mode
   drivers/media/i2c/ov5640.c:1394:14:    got struct ov5640_mode_info const ( *<
+noident> )[9]

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/v4l2-common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 160bca96d524..cdc87ec61e54 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -338,7 +338,7 @@ void v4l_bound_align_image(unsigned int *width, unsigned int wmin,
 	({								\
 		BUILD_BUG_ON(sizeof((array)->width_field) != sizeof(u32) || \
 			     sizeof((array)->height_field) != sizeof(u32)); \
-		(typeof(&(*(array))))__v4l2_find_nearest_size(		\
+		(typeof(&(array)[0]))__v4l2_find_nearest_size(		\
 			(array), array_size, sizeof(*(array)),		\
 			offsetof(typeof(*(array)), width_field),	\
 			offsetof(typeof(*(array)), height_field),	\
-- 
cgit v1.2.3


From 4b09791ba059cc5a5ec7d69049f5d05da65b6418 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnáček <omosnace@redhat.com>
Date: Tue, 26 Jun 2018 13:04:42 +0200
Subject: cred: conditionally declare groups-related functions

The groups-related functions declared in include/linux/cred.h are
defined in kernel/groups.c, which is compiled only when
CONFIG_MULTIUSER=y. Move all these function declarations under #ifdef
CONFIG_MULTIUSER to help avoid accidental usage in contexts where
CONFIG_MULTIUSER might be disabled.

This patch also adds a fallback for groups_search(). Currently this
function is only called from kernel/groups.c itself and
security/keys/permissions.c, where the call is (by coincidence)
optimized away in case CONFIG_MULTIUSER=n. However, the audit subsystem
(which does not depend on CONFIG_MULTIUSER) calls this function in
-next, so the fallback will be needed to avoid compilation errors or
ugly workarounds.

See also:
https://lkml.org/lkml/2018/6/20/670
https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git/commit/?h=next&id=af85d1772e31fed34165a1b3decef340cf4080c0

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/cred.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 631286535d0f..7eed6101c791 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -65,6 +65,12 @@ extern void groups_free(struct group_info *);
 
 extern int in_group_p(kgid_t);
 extern int in_egroup_p(kgid_t);
+extern int groups_search(const struct group_info *, kgid_t);
+
+extern int set_current_groups(struct group_info *);
+extern void set_groups(struct cred *, struct group_info *);
+extern bool may_setgroups(void);
+extern void groups_sort(struct group_info *);
 #else
 static inline void groups_free(struct group_info *group_info)
 {
@@ -78,12 +84,11 @@ static inline int in_egroup_p(kgid_t grp)
 {
         return 1;
 }
+static inline int groups_search(const struct group_info *group_info, kgid_t grp)
+{
+	return 1;
+}
 #endif
-extern int set_current_groups(struct group_info *);
-extern void set_groups(struct cred *, struct group_info *);
-extern int groups_search(const struct group_info *, kgid_t);
-extern bool may_setgroups(void);
-extern void groups_sort(struct group_info *);
 
 /*
  * The security context of a task
-- 
cgit v1.2.3


From 4c79579b44b1876444f4d04de31c1a37098a0350 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 26 Jun 2018 16:21:18 -0700
Subject: bpf: Change bpf_fib_lookup to return lookup status

For ACLs implemented using either FIB rules or FIB entries, the BPF
program needs the FIB lookup status to be able to drop the packet.
Since the bpf_fib_lookup API has not reached a released kernel yet,
change the return code to contain an encoding of the FIB lookup
result and return the nexthop device index in the params struct.

In addition, inform the BPF program of any post FIB lookup reason as
to why the packet needs to go up the stack.

The fib result for unicast routes must have an egress device, so remove
the check that it is non-NULL.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h   | 28 ++++++++++++---
 net/core/filter.c          | 86 +++++++++++++++++++++++++++++-----------------
 samples/bpf/xdp_fwd_kern.c |  8 ++---
 3 files changed, 81 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 59b19b6a40d7..b7db3261c62d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1857,7 +1857,8 @@ union bpf_attr {
  *		is resolved), the nexthop address is returned in ipv4_dst
  *		or ipv6_dst based on family, smac is set to mac address of
  *		egress device, dmac is set to nexthop mac address, rt_metric
- *		is set to metric from route (IPv4/IPv6 only).
+ *		is set to metric from route (IPv4/IPv6 only), and ifindex
+ *		is set to the device index of the nexthop from the FIB lookup.
  *
  *             *plen* argument is the size of the passed in struct.
  *             *flags* argument can be a combination of one or more of the
@@ -1873,9 +1874,10 @@ union bpf_attr {
  *             *ctx* is either **struct xdp_md** for XDP programs or
  *             **struct sk_buff** tc cls_act programs.
  *     Return
- *             Egress device index on success, 0 if packet needs to continue
- *             up the stack for further processing or a negative error in case
- *             of failure.
+ *		* < 0 if any input argument is invalid
+ *		*   0 on success (packet is forwarded, nexthop neighbor exists)
+ *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ *		*     packet is not forwarded or needs assist from full stack
  *
  * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
@@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
 #define BPF_FIB_LOOKUP_DIRECT  BIT(0)
 #define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
 
+enum {
+	BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
+	BPF_FIB_LKUP_RET_BLACKHOLE,    /* dest is blackholed; can be dropped */
+	BPF_FIB_LKUP_RET_UNREACHABLE,  /* dest is unreachable; can be dropped */
+	BPF_FIB_LKUP_RET_PROHIBIT,     /* dest not allowed; can be dropped */
+	BPF_FIB_LKUP_RET_NOT_FWDED,    /* packet is not forwarded */
+	BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
+	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
+	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+};
+
 struct bpf_fib_lookup {
 	/* input:  network family for lookup (AF_INET, AF_INET6)
 	 * output: network family of egress nexthop
@@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {
 
 	/* total length of packet from network header - used for MTU check */
 	__u16	tot_len;
-	__u32	ifindex;  /* L3 device index for lookup */
+
+	/* input: L3 device index for lookup
+	 * output: device index from FIB lookup
+	 */
+	__u32	ifindex;
 
 	union {
 		/* inputs to lookup */
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..0ca6907d7efe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
 	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
 	params->h_vlan_TCI = 0;
 	params->h_vlan_proto = 0;
+	params->ifindex = dev->ifindex;
 
-	return dev->ifindex;
+	return 0;
 }
 #endif
 
@@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	/* verify forwarding is enabled on this interface */
 	in_dev = __in_dev_get_rcu(dev);
 	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
-		return 0;
+		return BPF_FIB_LKUP_RET_FWD_DISABLED;
 
 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
 		fl4.flowi4_iif = 1;
@@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 		tb = fib_get_table(net, tbid);
 		if (unlikely(!tb))
-			return 0;
+			return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
 	} else {
@@ -4135,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
 	}
 
-	if (err || res.type != RTN_UNICAST)
-		return 0;
+	if (err) {
+		/* map fib lookup errors to RTN_ type */
+		if (err == -EINVAL)
+			return BPF_FIB_LKUP_RET_BLACKHOLE;
+		if (err == -EHOSTUNREACH)
+			return BPF_FIB_LKUP_RET_UNREACHABLE;
+		if (err == -EACCES)
+			return BPF_FIB_LKUP_RET_PROHIBIT;
+
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
+	}
+
+	if (res.type != RTN_UNICAST)
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 	if (res.fi->fib_nhs > 1)
 		fib_select_path(net, &res, &fl4, NULL);
@@ -4144,19 +4157,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (check_mtu) {
 		mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
 		if (params->tot_len > mtu)
-			return 0;
+			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
 	nh = &res.fi->fib_nh[res.nh_sel];
 
 	/* do not handle lwt encaps right now */
 	if (nh->nh_lwtstate)
-		return 0;
+		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
 	dev = nh->nh_dev;
-	if (unlikely(!dev))
-		return 0;
-
 	if (nh->nh_gw)
 		params->ipv4_dst = nh->nh_gw;
 
@@ -4166,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	 * rcu_read_lock_bh is not needed here
 	 */
 	neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
-	if (neigh)
-		return bpf_fib_set_fwd_params(params, neigh, dev);
+	if (!neigh)
+		return BPF_FIB_LKUP_RET_NO_NEIGH;
 
-	return 0;
+	return bpf_fib_set_fwd_params(params, neigh, dev);
 }
 #endif
 
@@ -4190,7 +4200,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 	/* link local addresses are never forwarded */
 	if (rt6_need_strict(dst) || rt6_need_strict(src))
-		return 0;
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 	dev = dev_get_by_index_rcu(net, params->ifindex);
 	if (unlikely(!dev))
@@ -4198,7 +4208,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 	idev = __in6_dev_get_safely(dev);
 	if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
-		return 0;
+		return BPF_FIB_LKUP_RET_FWD_DISABLED;
 
 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
 		fl6.flowi6_iif = 1;
@@ -4225,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 		tb = ipv6_stub->fib6_get_table(net, tbid);
 		if (unlikely(!tb))
-			return 0;
+			return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 		f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
 	} else {
@@ -4238,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	}
 
 	if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
-		return 0;
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
+
+	if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
+		switch (f6i->fib6_type) {
+		case RTN_BLACKHOLE:
+			return BPF_FIB_LKUP_RET_BLACKHOLE;
+		case RTN_UNREACHABLE:
+			return BPF_FIB_LKUP_RET_UNREACHABLE;
+		case RTN_PROHIBIT:
+			return BPF_FIB_LKUP_RET_PROHIBIT;
+		default:
+			return BPF_FIB_LKUP_RET_NOT_FWDED;
+		}
+	}
 
-	if (unlikely(f6i->fib6_flags & RTF_REJECT ||
-	    f6i->fib6_type != RTN_UNICAST))
-		return 0;
+	if (f6i->fib6_type != RTN_UNICAST)
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 	if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
 		f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@@ -4252,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (check_mtu) {
 		mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
 		if (params->tot_len > mtu)
-			return 0;
+			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
 	if (f6i->fib6_nh.nh_lwtstate)
-		return 0;
+		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
 	if (f6i->fib6_flags & RTF_GATEWAY)
 		*dst = f6i->fib6_nh.nh_gw;
@@ -4270,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	 */
 	neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
 				      ndisc_hashfn, dst, dev);
-	if (neigh)
-		return bpf_fib_set_fwd_params(params, neigh, dev);
+	if (!neigh)
+		return BPF_FIB_LKUP_RET_NO_NEIGH;
 
-	return 0;
+	return bpf_fib_set_fwd_params(params, neigh, dev);
 }
 #endif
 
@@ -4315,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
 {
 	struct net *net = dev_net(skb->dev);
-	int index = -EAFNOSUPPORT;
+	int rc = -EAFNOSUPPORT;
 
 	if (plen < sizeof(*params))
 		return -EINVAL;
@@ -4326,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
 	switch (params->family) {
 #if IS_ENABLED(CONFIG_INET)
 	case AF_INET:
-		index = bpf_ipv4_fib_lookup(net, params, flags, false);
+		rc = bpf_ipv4_fib_lookup(net, params, flags, false);
 		break;
 #endif
 #if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
-		index = bpf_ipv6_fib_lookup(net, params, flags, false);
+		rc = bpf_ipv6_fib_lookup(net, params, flags, false);
 		break;
 #endif
 	}
 
-	if (index > 0) {
+	if (!rc) {
 		struct net_device *dev;
 
-		dev = dev_get_by_index_rcu(net, index);
+		dev = dev_get_by_index_rcu(net, params->ifindex);
 		if (!is_skb_forwardable(dev, skb))
-			index = 0;
+			rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
-	return index;
+	return rc;
 }
 
 static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
index 6673cdb9f55c..a7e94e7ff87d 100644
--- a/samples/bpf/xdp_fwd_kern.c
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -48,9 +48,9 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 	struct ethhdr *eth = data;
 	struct ipv6hdr *ip6h;
 	struct iphdr *iph;
-	int out_index;
 	u16 h_proto;
 	u64 nh_off;
+	int rc;
 
 	nh_off = sizeof(*eth);
 	if (data + nh_off > data_end)
@@ -101,7 +101,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 
 	fib_params.ifindex = ctx->ingress_ifindex;
 
-	out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
+	rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
 
 	/* verify egress index has xdp support
 	 * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
@@ -109,7 +109,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 	 * NOTE: without verification that egress index supports XDP
 	 *       forwarding packets are dropped.
 	 */
-	if (out_index > 0) {
+	if (rc == 0) {
 		if (h_proto == htons(ETH_P_IP))
 			ip_decrease_ttl(iph);
 		else if (h_proto == htons(ETH_P_IPV6))
@@ -117,7 +117,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 
 		memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
 		memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
-		return bpf_redirect_map(&tx_port, out_index, 0);
+		return bpf_redirect_map(&tx_port, fib_params.ifindex, 0);
 	}
 
 	return XDP_PASS;
-- 
cgit v1.2.3


From b6e71bdebb12cb79f931db358066a33f5f526b6a Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Wed, 27 Jun 2018 14:39:02 -0700
Subject: ila: Flush netlink command to clear xlat table

Add ILA_CMD_FLUSH netlink command to clear the ILA translation table.

Signed-off-by: Tom Herbert <tom@quantonium.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ila.h |  1 +
 net/ipv6/ila/ila.h       |  1 +
 net/ipv6/ila/ila_main.c  |  6 +++++
 net/ipv6/ila/ila_xlat.c  | 62 ++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 68 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/ila.h b/include/uapi/linux/ila.h
index 483b77af4eb8..db45d3e49a12 100644
--- a/include/uapi/linux/ila.h
+++ b/include/uapi/linux/ila.h
@@ -30,6 +30,7 @@ enum {
 	ILA_CMD_ADD,
 	ILA_CMD_DEL,
 	ILA_CMD_GET,
+	ILA_CMD_FLUSH,
 
 	__ILA_CMD_MAX,
 };
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index faba7824ea56..1f747bcbec29 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -123,6 +123,7 @@ void ila_xlat_exit_net(struct net *net);
 int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info);
 int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info);
 int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info);
 int ila_xlat_nl_dump_start(struct netlink_callback *cb);
 int ila_xlat_nl_dump_done(struct netlink_callback *cb);
 int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index f6ac6b14577e..18fac76b9520 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -26,6 +26,12 @@ static const struct genl_ops ila_nl_ops[] = {
 		.policy = ila_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = ILA_CMD_FLUSH,
+		.doit = ila_xlat_nl_cmd_flush,
+		.policy = ila_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 	{
 		.cmd = ILA_CMD_GET,
 		.doit = ila_xlat_nl_cmd_get_mapping,
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index d05de891dfb6..51a15ce50a64 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -164,9 +164,9 @@ static inline void ila_release(struct ila_map *ila)
 	kfree_rcu(ila, rcu);
 }
 
-static void ila_free_cb(void *ptr, void *arg)
+static void ila_free_node(struct ila_map *ila)
 {
-	struct ila_map *ila = (struct ila_map *)ptr, *next;
+	struct ila_map *next;
 
 	/* Assume rcu_readlock held */
 	while (ila) {
@@ -176,6 +176,11 @@ static void ila_free_cb(void *ptr, void *arg)
 	}
 }
 
+static void ila_free_cb(void *ptr, void *arg)
+{
+	ila_free_node((struct ila_map *)ptr);
+}
+
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
 
 static unsigned int
@@ -365,6 +370,59 @@ int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+static inline spinlock_t *lock_from_ila_map(struct ila_net *ilan,
+					    struct ila_map *ila)
+{
+	return ila_get_lock(ilan, ila->xp.ip.locator_match);
+}
+
+int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct ila_net *ilan = net_generic(net, ila_net_id);
+	struct rhashtable_iter iter;
+	struct ila_map *ila;
+	spinlock_t *lock;
+	int ret;
+
+	ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL);
+	if (ret)
+		goto done;
+
+	rhashtable_walk_start(&iter);
+
+	for (;;) {
+		ila = rhashtable_walk_next(&iter);
+
+		if (IS_ERR(ila)) {
+			if (PTR_ERR(ila) == -EAGAIN)
+				continue;
+			ret = PTR_ERR(ila);
+			goto done;
+		} else if (!ila) {
+			break;
+		}
+
+		lock = lock_from_ila_map(ilan, ila);
+
+		spin_lock(lock);
+
+		ret = rhashtable_remove_fast(&ilan->xlat.rhash_table,
+					     &ila->node, rht_params);
+		if (!ret)
+			ila_free_node(ila);
+
+		spin_unlock(lock);
+
+		if (ret)
+			break;
+	}
+
+done:
+	rhashtable_walk_stop(&iter);
+	return ret;
+}
+
 static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
 {
 	if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR,
-- 
cgit v1.2.3


From 0894aa28aece680a81e28906e2f328084a3d55c3 Mon Sep 17 00:00:00 2001
From: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Date: Wed, 13 Jun 2018 13:04:48 +0530
Subject: dmaengine: xilinx_dma: Enable VDMA S2MM vertical flip support

Vertical flip state is exported in xilinx_vdma_config and depending
on IP configuration(c_enable_vert_flip) vertical flip state is
programmed in hardware.

Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Acked-by: Kedareswara rao Appana <appanad@xilinx.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dma.c | 22 ++++++++++++++++++++++
 include/linux/dma/xilinx_dma.h  |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 27b523530c4a..c12442312595 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -115,6 +115,9 @@
 #define XILINX_VDMA_REG_START_ADDRESS(n)	(0x000c + 4 * (n))
 #define XILINX_VDMA_REG_START_ADDRESS_64(n)	(0x000c + 8 * (n))
 
+#define XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP	0x00ec
+#define XILINX_VDMA_ENABLE_VERTICAL_FLIP	BIT(0)
+
 /* HW specific definitions */
 #define XILINX_DMA_MAX_CHANS_PER_DEVICE	0x20
 
@@ -340,6 +343,7 @@ struct xilinx_dma_tx_descriptor {
  * @start_transfer: Differentiate b/w DMA IP's transfer
  * @stop_transfer: Differentiate b/w DMA IP's quiesce
  * @tdest: TDEST value for mcdma
+ * @has_vflip: S2MM vertical flip
  */
 struct xilinx_dma_chan {
 	struct xilinx_dma_device *xdev;
@@ -376,6 +380,7 @@ struct xilinx_dma_chan {
 	void (*start_transfer)(struct xilinx_dma_chan *chan);
 	int (*stop_transfer)(struct xilinx_dma_chan *chan);
 	u16 tdest;
+	bool has_vflip;
 };
 
 /**
@@ -1092,6 +1097,14 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 				desc->async_tx.phys);
 
 	/* Configure the hardware using info in the config structure */
+	if (chan->has_vflip) {
+		reg = dma_read(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP);
+		reg &= ~XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+		reg |= config->vflip_en;
+		dma_write(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP,
+			  reg);
+	}
+
 	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
 
 	if (config->frm_cnt_en)
@@ -2105,6 +2118,8 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
 	}
 
 	chan->config.frm_cnt_en = cfg->frm_cnt_en;
+	chan->config.vflip_en = cfg->vflip_en;
+
 	if (cfg->park)
 		chan->config.park_frm = cfg->park_frm;
 	else
@@ -2428,6 +2443,13 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 		chan->direction = DMA_DEV_TO_MEM;
 		chan->id = chan_id;
 		chan->tdest = chan_id - xdev->nr_channels;
+		chan->has_vflip = of_property_read_bool(node,
+					"xlnx,enable-vert-flip");
+		if (chan->has_vflip) {
+			chan->config.vflip_en = dma_read(chan,
+				XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP) &
+				XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+		}
 
 		chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
 		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h
index 34b98f276ed0..5b6e61e4b3aa 100644
--- a/include/linux/dma/xilinx_dma.h
+++ b/include/linux/dma/xilinx_dma.h
@@ -27,6 +27,7 @@
  * @delay: Delay counter
  * @reset: Reset Channel
  * @ext_fsync: External Frame Sync source
+ * @vflip_en:  Vertical Flip enable
  */
 struct xilinx_vdma_config {
 	int frm_dly;
@@ -39,6 +40,7 @@ struct xilinx_vdma_config {
 	int delay;
 	int reset;
 	int ext_fsync;
+	bool vflip_en;
 };
 
 int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
-- 
cgit v1.2.3


From a7160670b5e2d6b59e0f7a5b7e5bcef3b532c24c Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Wed, 27 Jun 2018 21:33:54 +0200
Subject: ASoC: pxa: clean up function names in pxa2xx-lib

Clean up the namespace a bit and drop the __ prefix of all functions
exported by pxa2xx-lib. This improves the readability of the code.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/pxa2xx-lib.h | 10 +++++-----
 sound/arm/pxa2xx-ac97.c    | 10 +++++-----
 sound/arm/pxa2xx-pcm-lib.c | 22 +++++++++++-----------
 sound/soc/pxa/pxa2xx-pcm.c | 21 +++++++--------------
 4 files changed, 28 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/sound/pxa2xx-lib.h b/include/sound/pxa2xx-lib.h
index 63f75450d3db..b43de38de8b2 100644
--- a/include/sound/pxa2xx-lib.h
+++ b/include/sound/pxa2xx-lib.h
@@ -10,14 +10,14 @@ struct snd_pcm_substream;
 struct snd_pcm_hw_params;
 struct snd_pcm;
 
-extern int __pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
+extern int pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
 				struct snd_pcm_hw_params *params);
-extern int __pxa2xx_pcm_hw_free(struct snd_pcm_substream *substream);
+extern int pxa2xx_pcm_hw_free(struct snd_pcm_substream *substream);
 extern int pxa2xx_pcm_trigger(struct snd_pcm_substream *substream, int cmd);
 extern snd_pcm_uframes_t pxa2xx_pcm_pointer(struct snd_pcm_substream *substream);
-extern int __pxa2xx_pcm_prepare(struct snd_pcm_substream *substream);
-extern int __pxa2xx_pcm_open(struct snd_pcm_substream *substream);
-extern int __pxa2xx_pcm_close(struct snd_pcm_substream *substream);
+extern int pxa2xx_pcm_prepare(struct snd_pcm_substream *substream);
+extern int pxa2xx_pcm_open(struct snd_pcm_substream *substream);
+extern int pxa2xx_pcm_close(struct snd_pcm_substream *substream);
 extern int pxa2xx_pcm_mmap(struct snd_pcm_substream *substream,
 	struct vm_area_struct *vma);
 extern int pxa2xx_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream);
diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c
index 7d8d7b7199dc..0d624337857b 100644
--- a/sound/arm/pxa2xx-ac97.c
+++ b/sound/arm/pxa2xx-ac97.c
@@ -70,7 +70,7 @@ static int pxa2xx_ac97_pcm_open(struct snd_pcm_substream *substream)
 	pxa2xx_audio_ops_t *platform_ops;
 	int ret, i;
 
-	ret = __pxa2xx_pcm_open(substream);
+	ret = pxa2xx_pcm_open(substream);
 	if (ret)
 		return ret;
 
@@ -86,7 +86,7 @@ static int pxa2xx_ac97_pcm_open(struct snd_pcm_substream *substream)
 	if (platform_ops && platform_ops->startup) {
 		ret = platform_ops->startup(substream, platform_ops->priv);
 		if (ret < 0)
-			__pxa2xx_pcm_close(substream);
+			pxa2xx_pcm_close(substream);
 	}
 
 	return ret;
@@ -110,7 +110,7 @@ static int pxa2xx_ac97_pcm_prepare(struct snd_pcm_substream *substream)
 		  AC97_PCM_FRONT_DAC_RATE : AC97_PCM_LR_ADC_RATE;
 	int ret;
 
-	ret = __pxa2xx_pcm_prepare(substream);
+	ret = pxa2xx_pcm_prepare(substream);
 	if (ret < 0)
 		return ret;
 
@@ -178,8 +178,8 @@ static const struct snd_pcm_ops pxa2xx_pcm_ops = {
 	.open		= pxa2xx_ac97_pcm_open,
 	.close		= pxa2xx_ac97_pcm_close,
 	.ioctl		= snd_pcm_lib_ioctl,
-	.hw_params	= __pxa2xx_pcm_hw_params,
-	.hw_free	= __pxa2xx_pcm_hw_free,
+	.hw_params	= pxa2xx_pcm_hw_params,
+	.hw_free	= pxa2xx_pcm_hw_free,
 	.prepare	= pxa2xx_ac97_pcm_prepare,
 	.trigger	= pxa2xx_pcm_trigger,
 	.pointer	= pxa2xx_pcm_pointer,
diff --git a/sound/arm/pxa2xx-pcm-lib.c b/sound/arm/pxa2xx-pcm-lib.c
index b927fa5ddbc0..dc56dbebf441 100644
--- a/sound/arm/pxa2xx-pcm-lib.c
+++ b/sound/arm/pxa2xx-pcm-lib.c
@@ -33,8 +33,8 @@ static const struct snd_pcm_hardware pxa2xx_pcm_hardware = {
 	.fifo_size		= 32,
 };
 
-int __pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
-				struct snd_pcm_hw_params *params)
+int pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
+			 struct snd_pcm_hw_params *params)
 {
 	struct dma_chan *chan = snd_dmaengine_pcm_get_chan(substream);
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
@@ -62,14 +62,14 @@ int __pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
 
 	return 0;
 }
-EXPORT_SYMBOL(__pxa2xx_pcm_hw_params);
+EXPORT_SYMBOL(pxa2xx_pcm_hw_params);
 
-int __pxa2xx_pcm_hw_free(struct snd_pcm_substream *substream)
+int pxa2xx_pcm_hw_free(struct snd_pcm_substream *substream)
 {
 	snd_pcm_set_runtime_buffer(substream, NULL);
 	return 0;
 }
-EXPORT_SYMBOL(__pxa2xx_pcm_hw_free);
+EXPORT_SYMBOL(pxa2xx_pcm_hw_free);
 
 int pxa2xx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
@@ -84,13 +84,13 @@ pxa2xx_pcm_pointer(struct snd_pcm_substream *substream)
 }
 EXPORT_SYMBOL(pxa2xx_pcm_pointer);
 
-int __pxa2xx_pcm_prepare(struct snd_pcm_substream *substream)
+int pxa2xx_pcm_prepare(struct snd_pcm_substream *substream)
 {
 	return 0;
 }
-EXPORT_SYMBOL(__pxa2xx_pcm_prepare);
+EXPORT_SYMBOL(pxa2xx_pcm_prepare);
 
-int __pxa2xx_pcm_open(struct snd_pcm_substream *substream)
+int pxa2xx_pcm_open(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -127,13 +127,13 @@ int __pxa2xx_pcm_open(struct snd_pcm_substream *substream)
 		substream, dma_request_slave_channel(rtd->cpu_dai->dev,
 						     dma_params->chan_name));
 }
-EXPORT_SYMBOL(__pxa2xx_pcm_open);
+EXPORT_SYMBOL(pxa2xx_pcm_open);
 
-int __pxa2xx_pcm_close(struct snd_pcm_substream *substream)
+int pxa2xx_pcm_close(struct snd_pcm_substream *substream)
 {
 	return snd_dmaengine_pcm_close_release_chan(substream);
 }
-EXPORT_SYMBOL(__pxa2xx_pcm_close);
+EXPORT_SYMBOL(pxa2xx_pcm_close);
 
 int pxa2xx_pcm_mmap(struct snd_pcm_substream *substream,
 	struct vm_area_struct *vma)
diff --git a/sound/soc/pxa/pxa2xx-pcm.c b/sound/soc/pxa/pxa2xx-pcm.c
index 445e691126e5..da252d1f732e 100644
--- a/sound/soc/pxa/pxa2xx-pcm.c
+++ b/sound/soc/pxa/pxa2xx-pcm.c
@@ -20,8 +20,8 @@
 #include <sound/pxa2xx-lib.h>
 #include <sound/dmaengine_pcm.h>
 
-static int pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
-	struct snd_pcm_hw_params *params)
+static int __pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
+				  struct snd_pcm_hw_params *params)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_dmaengine_dai_dma_data *dma;
@@ -33,23 +33,16 @@ static int pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
 	if (!dma)
 		return 0;
 
-	return __pxa2xx_pcm_hw_params(substream, params);
-}
-
-static int pxa2xx_pcm_hw_free(struct snd_pcm_substream *substream)
-{
-	__pxa2xx_pcm_hw_free(substream);
-
-	return 0;
+	return pxa2xx_pcm_hw_params(substream, params);
 }
 
 static const struct snd_pcm_ops pxa2xx_pcm_ops = {
-	.open		= __pxa2xx_pcm_open,
-	.close		= __pxa2xx_pcm_close,
+	.open		= pxa2xx_pcm_open,
+	.close		= pxa2xx_pcm_close,
 	.ioctl		= snd_pcm_lib_ioctl,
-	.hw_params	= pxa2xx_pcm_hw_params,
+	.hw_params	= __pxa2xx_pcm_hw_params,
 	.hw_free	= pxa2xx_pcm_hw_free,
-	.prepare	= __pxa2xx_pcm_prepare,
+	.prepare	= pxa2xx_pcm_prepare,
 	.trigger	= pxa2xx_pcm_trigger,
 	.pointer	= pxa2xx_pcm_pointer,
 	.mmap		= pxa2xx_pcm_mmap,
-- 
cgit v1.2.3


From 7afd1b0b2ef9a8120951188a955010ef92bdf885 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Wed, 27 Jun 2018 21:33:55 +0200
Subject: ASoC: pxa: move some functions to pxa2xx-lib

To get rid of some intermediate platform layers, move pxa2xx_soc_pcm_new()
and pxa2xx_pcm_ops in pxa2xx-lib.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/pxa2xx-lib.h |  3 +++
 sound/arm/pxa2xx-ac97.c    |  6 ++---
 sound/arm/pxa2xx-pcm-lib.c | 41 ++++++++++++++++++++++++++++++++++
 sound/soc/pxa/pxa2xx-pcm.c | 55 ----------------------------------------------
 4 files changed, 47 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/sound/pxa2xx-lib.h b/include/sound/pxa2xx-lib.h
index b43de38de8b2..6758fc12fa84 100644
--- a/include/sound/pxa2xx-lib.h
+++ b/include/sound/pxa2xx-lib.h
@@ -8,6 +8,7 @@
 /* PCM */
 struct snd_pcm_substream;
 struct snd_pcm_hw_params;
+struct snd_soc_pcm_runtime;
 struct snd_pcm;
 
 extern int pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
@@ -22,6 +23,8 @@ extern int pxa2xx_pcm_mmap(struct snd_pcm_substream *substream,
 	struct vm_area_struct *vma);
 extern int pxa2xx_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream);
 extern void pxa2xx_pcm_free_dma_buffers(struct snd_pcm *pcm);
+extern int pxa2xx_soc_pcm_new(struct snd_soc_pcm_runtime *rtd);
+extern const struct snd_pcm_ops pxa2xx_pcm_ops;
 
 /* AC97 */
 
diff --git a/sound/arm/pxa2xx-ac97.c b/sound/arm/pxa2xx-ac97.c
index 0d624337857b..1f72672262d0 100644
--- a/sound/arm/pxa2xx-ac97.c
+++ b/sound/arm/pxa2xx-ac97.c
@@ -174,7 +174,7 @@ static int pxa2xx_ac97_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(pxa2xx_ac97_pm_ops, pxa2xx_ac97_suspend, pxa2xx_ac97_resume);
 #endif
 
-static const struct snd_pcm_ops pxa2xx_pcm_ops = {
+static const struct snd_pcm_ops pxa2xx_ac97_pcm_ops = {
 	.open		= pxa2xx_ac97_pcm_open,
 	.close		= pxa2xx_ac97_pcm_close,
 	.ioctl		= snd_pcm_lib_ioctl,
@@ -203,13 +203,13 @@ static int pxa2xx_ac97_pcm_new(struct snd_card *card)
 		goto out;
 
 	stream = SNDRV_PCM_STREAM_PLAYBACK;
-	snd_pcm_set_ops(pcm, stream, &pxa2xx_pcm_ops);
+	snd_pcm_set_ops(pcm, stream, &pxa2xx_ac97_pcm_ops);
 	ret = pxa2xx_pcm_preallocate_dma_buffer(pcm, stream);
 	if (ret)
 		goto out;
 
 	stream = SNDRV_PCM_STREAM_CAPTURE;
-	snd_pcm_set_ops(pcm, stream, &pxa2xx_pcm_ops);
+	snd_pcm_set_ops(pcm, stream, &pxa2xx_ac97_pcm_ops);
 	ret = pxa2xx_pcm_preallocate_dma_buffer(pcm, stream);
 	if (ret)
 		goto out;
diff --git a/sound/arm/pxa2xx-pcm-lib.c b/sound/arm/pxa2xx-pcm-lib.c
index dc56dbebf441..add23d9b4ef6 100644
--- a/sound/arm/pxa2xx-pcm-lib.c
+++ b/sound/arm/pxa2xx-pcm-lib.c
@@ -179,6 +179,47 @@ void pxa2xx_pcm_free_dma_buffers(struct snd_pcm *pcm)
 }
 EXPORT_SYMBOL(pxa2xx_pcm_free_dma_buffers);
 
+int pxa2xx_soc_pcm_new(struct snd_soc_pcm_runtime *rtd)
+{
+	struct snd_card *card = rtd->card->snd_card;
+	struct snd_pcm *pcm = rtd->pcm;
+	int ret;
+
+	ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) {
+		ret = pxa2xx_pcm_preallocate_dma_buffer(pcm,
+			SNDRV_PCM_STREAM_PLAYBACK);
+		if (ret)
+			goto out;
+	}
+
+	if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) {
+		ret = pxa2xx_pcm_preallocate_dma_buffer(pcm,
+			SNDRV_PCM_STREAM_CAPTURE);
+		if (ret)
+			goto out;
+	}
+ out:
+	return ret;
+}
+EXPORT_SYMBOL(pxa2xx_soc_pcm_new);
+
+const struct snd_pcm_ops pxa2xx_pcm_ops = {
+	.open		= pxa2xx_pcm_open,
+	.close		= pxa2xx_pcm_close,
+	.ioctl		= snd_pcm_lib_ioctl,
+	.hw_params	= pxa2xx_pcm_hw_params,
+	.hw_free	= pxa2xx_pcm_hw_free,
+	.prepare	= pxa2xx_pcm_prepare,
+	.trigger	= pxa2xx_pcm_trigger,
+	.pointer	= pxa2xx_pcm_pointer,
+	.mmap		= pxa2xx_pcm_mmap,
+};
+EXPORT_SYMBOL(pxa2xx_pcm_ops);
+
 MODULE_AUTHOR("Nicolas Pitre");
 MODULE_DESCRIPTION("Intel PXA2xx sound library");
 MODULE_LICENSE("GPL");
diff --git a/sound/soc/pxa/pxa2xx-pcm.c b/sound/soc/pxa/pxa2xx-pcm.c
index da252d1f732e..a1df4ec76cbd 100644
--- a/sound/soc/pxa/pxa2xx-pcm.c
+++ b/sound/soc/pxa/pxa2xx-pcm.c
@@ -20,61 +20,6 @@
 #include <sound/pxa2xx-lib.h>
 #include <sound/dmaengine_pcm.h>
 
-static int __pxa2xx_pcm_hw_params(struct snd_pcm_substream *substream,
-				  struct snd_pcm_hw_params *params)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_dmaengine_dai_dma_data *dma;
-
-	dma = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream);
-
-	/* return if this is a bufferless transfer e.g.
-	 * codec <--> BT codec or GSM modem -- lg FIXME */
-	if (!dma)
-		return 0;
-
-	return pxa2xx_pcm_hw_params(substream, params);
-}
-
-static const struct snd_pcm_ops pxa2xx_pcm_ops = {
-	.open		= pxa2xx_pcm_open,
-	.close		= pxa2xx_pcm_close,
-	.ioctl		= snd_pcm_lib_ioctl,
-	.hw_params	= __pxa2xx_pcm_hw_params,
-	.hw_free	= pxa2xx_pcm_hw_free,
-	.prepare	= pxa2xx_pcm_prepare,
-	.trigger	= pxa2xx_pcm_trigger,
-	.pointer	= pxa2xx_pcm_pointer,
-	.mmap		= pxa2xx_pcm_mmap,
-};
-
-static int pxa2xx_soc_pcm_new(struct snd_soc_pcm_runtime *rtd)
-{
-	struct snd_card *card = rtd->card->snd_card;
-	struct snd_pcm *pcm = rtd->pcm;
-	int ret;
-
-	ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32));
-	if (ret)
-		return ret;
-
-	if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) {
-		ret = pxa2xx_pcm_preallocate_dma_buffer(pcm,
-			SNDRV_PCM_STREAM_PLAYBACK);
-		if (ret)
-			goto out;
-	}
-
-	if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) {
-		ret = pxa2xx_pcm_preallocate_dma_buffer(pcm,
-			SNDRV_PCM_STREAM_CAPTURE);
-		if (ret)
-			goto out;
-	}
- out:
-	return ret;
-}
-
 static const struct snd_soc_component_driver pxa2xx_soc_platform = {
 	.ops		= &pxa2xx_pcm_ops,
 	.pcm_new	= pxa2xx_soc_pcm_new,
-- 
cgit v1.2.3


From b0e9a2fe3ff971950833bc0ffc383babd9443bc4 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 28 Jun 2018 15:31:00 +0800
Subject: sctp: add support for SCTP_REUSE_PORT sockopt

This feature is actually already supported by sk->sk_reuse which can be
set by socket level opt SO_REUSEADDR. But it's not working exactly as
RFC6458 demands in section 8.1.27, like:

  - This option only supports one-to-one style SCTP sockets
  - This socket option must not be used after calling bind()
    or sctp_bindx().

Besides, SCTP_REUSE_PORT sockopt should be provided for user's programs.
Otherwise, the programs with SCTP_REUSE_PORT from other systems will not
work in linux.

To separate it from the socket level version, this patch adds 'reuse' in
sctp_sock and it works pretty much as sk->sk_reuse, but with some extra
setup limitations that are needed when it is being enabled.

"It should be noted that the behavior of the socket-level socket option
to reuse ports and/or addresses for SCTP sockets is unspecified", so it
leaves SO_REUSEADDR as is for the compatibility.

Note that the name SCTP_REUSE_PORT is somewhat confusing, as its
functionality is nearly identical to SO_REUSEADDR, but with some
extra restrictions. Here it uses 'reuse' in sctp_sock instead of
'reuseport'. As for sk->sk_reuseport support for SCTP, it will be
added in another patch.

Thanks to Neil to make this clear.

v1->v2:
  - add sctp_sk->reuse to separate it from the socket level version.
v2->v3:
  - improve changelog according to Marcelo's suggestion.

Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 include/uapi/linux/sctp.h  |  1 +
 net/sctp/socket.c          | 62 ++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 57 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e0f962d27386..701a51736fa5 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -220,6 +220,7 @@ struct sctp_sock {
 	__u32 adaptation_ind;
 	__u32 pd_point;
 	__u16	nodelay:1,
+		reuse:1,
 		disable_fragments:1,
 		v4mapped:1,
 		frag_interleave:1,
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index b64d583bf053..c02986a284db 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -100,6 +100,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_RECVNXTINFO	33
 #define SCTP_DEFAULT_SNDINFO	34
 #define SCTP_AUTH_DEACTIVATE_KEY	35
+#define SCTP_REUSE_PORT		36
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0e91e83eea5a..bf11f9cacb63 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4170,6 +4170,28 @@ out:
 	return retval;
 }
 
+static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval,
+				      unsigned int optlen)
+{
+	int val;
+
+	if (!sctp_style(sk, TCP))
+		return -EOPNOTSUPP;
+
+	if (sctp_sk(sk)->ep->base.bind_addr.port)
+		return -EFAULT;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->reuse = !!val;
+
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4364,6 +4386,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_setsockopt_interleaving_supported(sk, optval,
 								optlen);
 		break;
+	case SCTP_REUSE_PORT:
+		retval = sctp_setsockopt_reuse_port(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7197,6 +7222,26 @@ out:
 	return retval;
 }
 
+static int sctp_getsockopt_reuse_port(struct sock *sk, int len,
+				      char __user *optval,
+				      int __user *optlen)
+{
+	int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+	val = sctp_sk(sk)->reuse;
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -7392,6 +7437,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
 								optlen);
 		break;
+	case SCTP_REUSE_PORT:
+		retval = sctp_getsockopt_reuse_port(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7429,6 +7477,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
 
 static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 {
+	bool reuse = (sk->sk_reuse || sctp_sk(sk)->reuse);
 	struct sctp_bind_hashbucket *head; /* hash list */
 	struct sctp_bind_bucket *pp;
 	unsigned short snum;
@@ -7501,13 +7550,11 @@ pp_found:
 		 * used by other socket (pp->owner not empty); that other
 		 * socket is going to be sk2.
 		 */
-		int reuse = sk->sk_reuse;
 		struct sock *sk2;
 
 		pr_debug("%s: found a possible match\n", __func__);
 
-		if (pp->fastreuse && sk->sk_reuse &&
-			sk->sk_state != SCTP_SS_LISTENING)
+		if (pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING)
 			goto success;
 
 		/* Run through the list of sockets bound to the port
@@ -7525,7 +7572,7 @@ pp_found:
 			ep2 = sctp_sk(sk2)->ep;
 
 			if (sk == sk2 ||
-			    (reuse && sk2->sk_reuse &&
+			    (reuse && (sk2->sk_reuse || sctp_sk(sk2)->reuse) &&
 			     sk2->sk_state != SCTP_SS_LISTENING))
 				continue;
 
@@ -7549,12 +7596,12 @@ pp_not_found:
 	 * SO_REUSEADDR on this socket -sk-).
 	 */
 	if (hlist_empty(&pp->owner)) {
-		if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING)
+		if (reuse && sk->sk_state != SCTP_SS_LISTENING)
 			pp->fastreuse = 1;
 		else
 			pp->fastreuse = 0;
 	} else if (pp->fastreuse &&
-		(!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING))
+		   (!reuse || sk->sk_state == SCTP_SS_LISTENING))
 		pp->fastreuse = 0;
 
 	/* We are set, so fill up all the data in the hash table
@@ -7685,7 +7732,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 		err = 0;
 		sctp_unhash_endpoint(ep);
 		sk->sk_state = SCTP_SS_CLOSED;
-		if (sk->sk_reuse)
+		if (sk->sk_reuse || sctp_sk(sk)->reuse)
 			sctp_sk(sk)->bind_hash->fastreuse = 1;
 		goto out;
 	}
@@ -8550,6 +8597,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	newsk->sk_no_check_tx = sk->sk_no_check_tx;
 	newsk->sk_no_check_rx = sk->sk_no_check_rx;
 	newsk->sk_reuse = sk->sk_reuse;
+	sctp_sk(newsk)->reuse = sp->reuse;
 
 	newsk->sk_shutdown = sk->sk_shutdown;
 	newsk->sk_destruct = sctp_destruct_sock;
-- 
cgit v1.2.3


From 256c87c17c53e60882a43dcf3e98f3bf859eaf6f Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Tue, 26 Jun 2018 21:39:36 -0700
Subject: net: check tunnel option type in tunnel flags

Check the tunnel option type stored in tunnel flags when creating options
for tunnels. Thereby ensuring we do not set geneve, vxlan or erspan tunnel
options on interfaces that are not associated with them.

Make sure all users of the infrastructure set correct flags, for the BPF
helper we have to set all bits to keep backward compatibility.

Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c           | 6 ++++--
 drivers/net/vxlan.c            | 3 ++-
 include/net/ip_tunnels.h       | 8 ++++++--
 net/core/filter.c              | 2 +-
 net/ipv4/ip_gre.c              | 2 ++
 net/ipv6/ip6_gre.c             | 2 ++
 net/openvswitch/flow_netlink.c | 7 ++++++-
 7 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 3e94375b9b01..471edd76ff55 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -236,7 +236,8 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
 		}
 		/* Update tunnel dst according to Geneve options. */
 		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
-					gnvh->options, gnvh->opt_len * 4);
+					gnvh->options, gnvh->opt_len * 4,
+					TUNNEL_GENEVE_OPT);
 	} else {
 		/* Drop packets w/ critical options,
 		 * since we don't support any...
@@ -675,7 +676,8 @@ static void geneve_build_header(struct genevehdr *geneveh,
 	geneveh->proto_type = htons(ETH_P_TEB);
 	geneveh->rsvd2 = 0;
 
-	ip_tunnel_info_opts_get(geneveh->options, info);
+	if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
+		ip_tunnel_info_opts_get(geneveh->options, info);
 }
 
 static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index cc14e0cd5647..7eb30d7c8bd7 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2122,7 +2122,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		vni = tunnel_id_to_key32(info->key.tun_id);
 		ifindex = 0;
 		dst_cache = &info->dst_cache;
-		if (info->options_len)
+		if (info->options_len &&
+		    info->key.tun_flags & TUNNEL_VXLAN_OPT)
 			md = ip_tunnel_info_opts(info);
 		ttl = info->key.ttl;
 		tos = info->key.tos;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 90ff430f5e9d..b0d022ff6ea1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -466,10 +466,12 @@ static inline void ip_tunnel_info_opts_get(void *to,
 }
 
 static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
-					   const void *from, int len)
+					   const void *from, int len,
+					   __be16 flags)
 {
 	memcpy(ip_tunnel_info_opts(info), from, len);
 	info->options_len = len;
+	info->key.tun_flags |= flags;
 }
 
 static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
@@ -511,9 +513,11 @@ static inline void ip_tunnel_info_opts_get(void *to,
 }
 
 static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
-					   const void *from, int len)
+					   const void *from, int len,
+					   __be16 flags)
 {
 	info->options_len = 0;
+	info->key.tun_flags |= flags;
 }
 
 #endif /* CONFIG_INET */
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..dade922678f6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3582,7 +3582,7 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
 	if (unlikely(size > IP_TUNNEL_OPTS_MAX))
 		return -ENOMEM;
 
-	ip_tunnel_info_opts_set(info, from, size);
+	ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
 
 	return 0;
 }
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2d8efeecf619..c8ca5d8f0f75 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -587,6 +587,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 		goto err_free_skb;
 
 	key = &tun_info->key;
+	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+		goto err_free_rt;
 	md = ip_tunnel_info_opts(tun_info);
 	if (!md)
 		goto err_free_rt;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c8cf2fdbb13b..367177786e34 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -990,6 +990,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 		fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
 		dsfield = key->tos;
+		if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+			goto tx_err;
 		md = ip_tunnel_info_opts(tun_info);
 		if (!md)
 			goto tx_err;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 492ab0c36f7c..391c4073a6dc 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2516,7 +2516,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	struct ovs_tunnel_info *ovs_tun;
 	struct nlattr *a;
 	int err = 0, start, opts_type;
+	__be16 dst_opt_type;
 
+	dst_opt_type = 0;
 	ovs_match_init(&match, &key, true, NULL);
 	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
 	if (opts_type < 0)
@@ -2528,10 +2530,13 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 			err = validate_geneve_opts(&key);
 			if (err < 0)
 				return err;
+			dst_opt_type = TUNNEL_GENEVE_OPT;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			dst_opt_type = TUNNEL_VXLAN_OPT;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+			dst_opt_type = TUNNEL_ERSPAN_OPT;
 			break;
 		}
 	}
@@ -2574,7 +2579,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	 */
 	ip_tunnel_info_opts_set(tun_info,
 				TUN_METADATA_OPTS(&key, key.tun_opts_len),
-				key.tun_opts_len);
+				key.tun_opts_len, dst_opt_type);
 	add_nested_action_end(*sfa, start);
 
 	return err;
-- 
cgit v1.2.3


From 0ed5269f9e41f495c8e9020c85f5e1644c1afc57 Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Tue, 26 Jun 2018 21:39:37 -0700
Subject: net/sched: add tunnel option support to act_tunnel_key

Allow setting tunnel options using the act_tunnel_key action.

Options are expressed as class:type:data and multiple options
may be listed using a comma delimiter.

 # ip link add name geneve0 type geneve dstport 0 external
 # tc qdisc add dev eth0 ingress
 # tc filter add dev eth0 protocol ip parent ffff: \
     flower indev eth0 \
        ip_proto udp \
        action tunnel_key \
            set src_ip 10.0.99.192 \
            dst_ip 10.0.99.193 \
            dst_port 6081 \
            id 11 \
            geneve_opts 0102:80:00800022,0102:80:00800022 \
    action mirred egress redirect dev geneve0

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tc_act/tc_tunnel_key.h |  26 ++++
 net/sched/act_tunnel_key.c                | 214 +++++++++++++++++++++++++++++-
 2 files changed, 236 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
index 72bbefe5d1d1..e284fec8c467 100644
--- a/include/uapi/linux/tc_act/tc_tunnel_key.h
+++ b/include/uapi/linux/tc_act/tc_tunnel_key.h
@@ -36,9 +36,35 @@ enum {
 	TCA_TUNNEL_KEY_PAD,
 	TCA_TUNNEL_KEY_ENC_DST_PORT,	/* be16 */
 	TCA_TUNNEL_KEY_NO_CSUM,		/* u8 */
+	TCA_TUNNEL_KEY_ENC_OPTS,	/* Nested TCA_TUNNEL_KEY_ENC_OPTS_
+					 * attributes
+					 */
 	__TCA_TUNNEL_KEY_MAX,
 };
 
 #define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1)
 
+enum {
+	TCA_TUNNEL_KEY_ENC_OPTS_UNSPEC,
+	TCA_TUNNEL_KEY_ENC_OPTS_GENEVE,		/* Nested
+						 * TCA_TUNNEL_KEY_ENC_OPTS_
+						 * attributes
+						 */
+	__TCA_TUNNEL_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_TUNNEL_KEY_ENC_OPTS_MAX (__TCA_TUNNEL_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+	TCA_TUNNEL_KEY_ENC_OPT_GENEVE_UNSPEC,
+	TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,		/* be16 */
+	TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,		/* u8 */
+	TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,		/* 4 to 128 bytes */
+
+	__TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX \
+	(__TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX - 1)
+
 #endif
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 20e98ed8d498..ea203e386a92 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <net/geneve.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
@@ -57,6 +58,135 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
 	return action;
 }
 
+static const struct nla_policy
+enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = {
+	[TCA_TUNNEL_KEY_ENC_OPTS_GENEVE]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
+	[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]	   = { .type = NLA_U16 },
+	[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]	   = { .type = NLA_U8 },
+	[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]	   = { .type = NLA_BINARY,
+						       .len = 128 },
+};
+
+static int
+tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
+			   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1];
+	int err, data_len, opt_len;
+	u8 *data;
+
+	err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
+			       nla, geneve_opt_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] ||
+	    !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] ||
+	    !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]) {
+		NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
+		return -EINVAL;
+	}
+
+	data = nla_data(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
+	data_len = nla_len(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
+	if (data_len < 4) {
+		NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
+		return -ERANGE;
+	}
+	if (data_len % 4) {
+		NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
+		return -ERANGE;
+	}
+
+	opt_len = sizeof(struct geneve_opt) + data_len;
+	if (dst) {
+		struct geneve_opt *opt = dst;
+
+		WARN_ON(dst_len < opt_len);
+
+		opt->opt_class =
+			nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]);
+		opt->type = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]);
+		opt->length = data_len / 4; /* length is in units of 4 bytes */
+		opt->r1 = 0;
+		opt->r2 = 0;
+		opt->r3 = 0;
+
+		memcpy(opt + 1, data, data_len);
+	}
+
+	return opt_len;
+}
+
+static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
+				int dst_len, struct netlink_ext_ack *extack)
+{
+	int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
+	const struct nlattr *attr, *head = nla_data(nla);
+
+	err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
+			   enc_opts_policy, extack);
+	if (err)
+		return err;
+
+	nla_for_each_attr(attr, head, len, rem) {
+		switch (nla_type(attr)) {
+		case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+			opt_len = tunnel_key_copy_geneve_opt(attr, dst,
+							     dst_len, extack);
+			if (opt_len < 0)
+				return opt_len;
+			opts_len += opt_len;
+			if (dst) {
+				dst_len -= opt_len;
+				dst += opt_len;
+			}
+			break;
+		}
+	}
+
+	if (!opts_len) {
+		NL_SET_ERR_MSG(extack, "Empty list of tunnel options");
+		return -EINVAL;
+	}
+
+	if (rem > 0) {
+		NL_SET_ERR_MSG(extack, "Trailing data after parsing tunnel key options attributes");
+		return -EINVAL;
+	}
+
+	return opts_len;
+}
+
+static int tunnel_key_get_opts_len(struct nlattr *nla,
+				   struct netlink_ext_ack *extack)
+{
+	return tunnel_key_copy_opts(nla, NULL, 0, extack);
+}
+
+static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
+			       int opts_len, struct netlink_ext_ack *extack)
+{
+	info->options_len = opts_len;
+	switch (nla_type(nla_data(nla))) {
+	case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+#if IS_ENABLED(CONFIG_INET)
+		info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+		return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+					    opts_len, extack);
+#else
+		return -EAFNOSUPPORT;
+#endif
+	default:
+		NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type");
+		return -EINVAL;
+	}
+}
+
 static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 	[TCA_TUNNEL_KEY_PARMS]	    = { .len = sizeof(struct tc_tunnel_key) },
 	[TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
@@ -66,6 +196,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 	[TCA_TUNNEL_KEY_ENC_KEY_ID]   = { .type = NLA_U32 },
 	[TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
 	[TCA_TUNNEL_KEY_NO_CSUM]      = { .type = NLA_U8 },
+	[TCA_TUNNEL_KEY_ENC_OPTS]     = { .type = NLA_NESTED },
 };
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
@@ -81,6 +212,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	struct tcf_tunnel_key *t;
 	bool exists = false;
 	__be16 dst_port = 0;
+	int opts_len = 0;
 	__be64 key_id;
 	__be16 flags;
 	int ret = 0;
@@ -128,6 +260,15 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
 			dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
 
+		if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) {
+			opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS],
+							   extack);
+			if (opts_len < 0) {
+				ret = opts_len;
+				goto err_out;
+			}
+		}
+
 		if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
 		    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
 			__be32 saddr;
@@ -138,7 +279,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 
 			metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
 						    dst_port, flags,
-						    key_id, 0);
+						    key_id, opts_len);
 		} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
 			   tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
 			struct in6_addr saddr;
@@ -162,6 +303,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			goto err_out;
 		}
 
+		if (opts_len) {
+			ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
+						  &metadata->u.tun_info,
+						  opts_len, extack);
+			if (ret < 0)
+				goto err_out;
+		}
+
 		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
 		break;
 	default:
@@ -234,6 +383,61 @@ static void tunnel_key_release(struct tc_action *a)
 	}
 }
 
+static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
+				       const struct ip_tunnel_info *info)
+{
+	int len = info->options_len;
+	u8 *src = (u8 *)(info + 1);
+	struct nlattr *start;
+
+	start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
+	if (!start)
+		return -EMSGSIZE;
+
+	while (len > 0) {
+		struct geneve_opt *opt = (struct geneve_opt *)src;
+
+		if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
+				 opt->opt_class) ||
+		    nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,
+			       opt->type) ||
+		    nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,
+			    opt->length * 4, opt + 1))
+			return -EMSGSIZE;
+
+		len -= sizeof(struct geneve_opt) + opt->length * 4;
+		src += sizeof(struct geneve_opt) + opt->length * 4;
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+}
+
+static int tunnel_key_opts_dump(struct sk_buff *skb,
+				const struct ip_tunnel_info *info)
+{
+	struct nlattr *start;
+	int err;
+
+	if (!info->options_len)
+		return 0;
+
+	start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS);
+	if (!start)
+		return -EMSGSIZE;
+
+	if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+		err = tunnel_key_geneve_opts_dump(skb, info);
+		if (err)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+}
+
 static int tunnel_key_dump_addresses(struct sk_buff *skb,
 				     const struct ip_tunnel_info *info)
 {
@@ -284,8 +488,9 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 		goto nla_put_failure;
 
 	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
-		struct ip_tunnel_key *key =
-			&params->tcft_enc_metadata->u.tun_info.key;
+		struct ip_tunnel_info *info =
+			&params->tcft_enc_metadata->u.tun_info;
+		struct ip_tunnel_key *key = &info->key;
 		__be32 key_id = tunnel_id_to_key32(key->tun_id);
 
 		if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
@@ -293,7 +498,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 					      &params->tcft_enc_metadata->u.tun_info) ||
 		    nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) ||
 		    nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
-			       !(key->tun_flags & TUNNEL_CSUM)))
+			       !(key->tun_flags & TUNNEL_CSUM)) ||
+		    tunnel_key_opts_dump(skb, info))
 			goto nla_put_failure;
 	}
 
-- 
cgit v1.2.3


From 85782e037f8aba8922dadb24a1523ca0b82ab8bc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 28 Jun 2018 23:34:59 +0200
Subject: bpf: undo prog rejection on read-only lock failure

Partially undo commit 9facc336876f ("bpf: reject any prog that failed
read-only lock") since it caused a regression, that is, syzkaller was
able to manage to cause a panic via fault injection deep in set_memory_ro()
path by letting an allocation fail: In x86's __change_page_attr_set_clr()
it was able to change the attributes of the primary mapping but not in
the alias mapping via cpa_process_alias(), so the second, inner call
to the __change_page_attr() via __change_page_attr_set_clr() had to split
a larger page and failed in the alloc_pages() with the artifically triggered
allocation error which is then propagated down to the call site.

Thus, for set_memory_ro() this means that it returned with an error, but
from debugging a probe_kernel_write() revealed EFAULT on that memory since
the primary mapping succeeded to get changed. Therefore the subsequent
hdr->locked = 0 reset triggered the panic as it was performed on read-only
memory, so call-site assumptions were infact wrong to assume that it would
either succeed /or/ not succeed at all since there's no such rollback in
set_memory_*() calls from partial change of mappings, in other words, we're
left in a state that is "half done". A later undo via set_memory_rw() is
succeeding though due to matching permissions on that part (aka due to the
try_preserve_large_page() succeeding). While reproducing locally with
explicitly triggering this error, the initial splitting only happens on
rare occasions and in real world it would additionally need oom conditions,
but that said, it could partially fail. Therefore, it is definitely wrong
to bail out on set_memory_ro() error and reject the program with the
set_memory_*() semantics we have today. Shouldn't have gone the extra mile
since no other user in tree today infact checks for any set_memory_*()
errors, e.g. neither module_enable_ro() / module_disable_ro() for module
RO/NX handling which is mostly default these days nor kprobes core with
alloc_insn_page() / free_insn_page() as examples that could be invoked long
after bootup and original 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks") did neither when it got first introduced to BPF
so "improving" with bailing out was clearly not right when set_memory_*()
cannot handle it today.

Kees suggested that if set_memory_*() can fail, we should annotate it with
__must_check, and all callers need to deal with it gracefully given those
set_memory_*() markings aren't "advisory", but they're expected to actually
do what they say. This might be an option worth to move forward in future
but would at the same time require that set_memory_*() calls from supporting
archs are guaranteed to be "atomic" in that they provide rollback if part
of the range fails, once that happened, the transition from RW -> RO could
be made more robust that way, while subsequent RO -> RW transition /must/
continue guaranteeing to always succeed the undo part.

Reported-by: syzbot+a4eb8c7766952a1ca872@syzkaller.appspotmail.com
Reported-by: syzbot+d866d1925855328eac3b@syzkaller.appspotmail.com
Fixes: 9facc336876f ("bpf: reject any prog that failed read-only lock")
Cc: Laura Abbott <labbott@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 56 ++++++++------------------------------------------
 kernel/bpf/core.c      | 30 +--------------------------
 2 files changed, 9 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20f2659dd829..300baad62c88 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -470,9 +470,7 @@ struct sock_fprog_kern {
 };
 
 struct bpf_binary_header {
-	u16 pages;
-	u16 locked:1;
-
+	u32 pages;
 	/* Some arches need word alignment for their instructions */
 	u8 image[] __aligned(4);
 };
@@ -481,7 +479,7 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	u16			jited:1,	/* Is our filter JIT'ed? */
 				jit_requested:1,/* archs need to JIT the prog */
-				locked:1,	/* Program image locked? */
+				undo_set_mem:1,	/* Passed set_memory_ro() checkpoint */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
@@ -677,46 +675,24 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	fp->locked = 1;
-	if (set_memory_ro((unsigned long)fp, fp->pages))
-		fp->locked = 0;
-#endif
+	fp->undo_set_mem = 1;
+	set_memory_ro((unsigned long)fp, fp->pages);
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	if (fp->locked) {
-		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
-		/* In case set_memory_rw() fails, we want to be the first
-		 * to crash here instead of some random place later on.
-		 */
-		fp->locked = 0;
-	}
-#endif
+	if (fp->undo_set_mem)
+		set_memory_rw((unsigned long)fp, fp->pages);
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	hdr->locked = 1;
-	if (set_memory_ro((unsigned long)hdr, hdr->pages))
-		hdr->locked = 0;
-#endif
+	set_memory_ro((unsigned long)hdr, hdr->pages);
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	if (hdr->locked) {
-		WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
-		/* In case set_memory_rw() fails, we want to be the first
-		 * to crash here instead of some random place later on.
-		 */
-		hdr->locked = 0;
-	}
-#endif
+	set_memory_rw((unsigned long)hdr, hdr->pages);
 }
 
 static inline struct bpf_binary_header *
@@ -728,22 +704,6 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
 	return (void *)addr;
 }
 
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
-{
-	if (!fp->locked)
-		return -ENOLCK;
-	if (fp->jited) {
-		const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
-
-		if (!hdr->locked)
-			return -ENOLCK;
-	}
-
-	return 0;
-}
-#endif
-
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a9e6c04d0f4a..1e5625d46414 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 	bpf_fill_ill_insns(hdr, size);
 
 	hdr->pages = size / PAGE_SIZE;
-	hdr->locked = 0;
-
 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
 		     PAGE_SIZE - sizeof(*hdr));
 	start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
 	return 0;
 }
 
-static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
-{
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	int i, err;
-
-	for (i = 0; i < fp->aux->func_cnt; i++) {
-		err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
-		if (err)
-			return err;
-	}
-
-	return bpf_prog_check_pages_ro_single(fp);
-#endif
-	return 0;
-}
-
 static void bpf_prog_select_func(struct bpf_prog *fp)
 {
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
@@ -1524,17 +1506,7 @@ finalize:
 	 * all eBPF JITs might immediately support all features.
 	 */
 	*err = bpf_check_tail_call(fp);
-	if (*err)
-		return fp;
-
-	/* Checkpoint: at this point onwards any cBPF -> eBPF or
-	 * native eBPF program is read-only. If we failed to change
-	 * the page attributes (e.g. allocation failure from
-	 * splitting large pages), then reject the whole program
-	 * in order to guarantee not ending up with any W+X pages
-	 * from BPF side in kernel.
-	 */
-	*err = bpf_prog_check_pages_ro_locked(fp);
+
 	return fp;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
-- 
cgit v1.2.3


From 55c5e0c602c20cb6f350e5ae357cfd7e04ebb189 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Sat, 2 Jun 2018 11:02:02 -0300
Subject: dt-bindings: clock: imx6ul: Do not change the clock definition order

Commit f5a4670de966 ("clk: imx: Add new clo01 and clo2 controlled
by CCOSR") introduced the CLK_CLKO definitions, but didn't put them
at the end of the list, which may cause dtb breakage when running an old
dtb with a newer kernel.

In order to avoid that, simply add the new CLK_CKO clock definitions
at the end of the list.

Fixes: f5a4670de966 ("clk: imx: Add new clo01 and clo2 controlled by CCOSR")
Reported-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/imx6ul-clock.h | 40 +++++++++++++++-----------------
 1 file changed, 19 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/imx6ul-clock.h b/include/dt-bindings/clock/imx6ul-clock.h
index 9564597cbfac..0aa1d9c3e0b9 100644
--- a/include/dt-bindings/clock/imx6ul-clock.h
+++ b/include/dt-bindings/clock/imx6ul-clock.h
@@ -235,27 +235,25 @@
 #define IMX6UL_CLK_CSI_PODF		222
 #define IMX6UL_CLK_PLL3_120M		223
 #define IMX6UL_CLK_KPP			224
-#define IMX6UL_CLK_CKO1_SEL		225
-#define IMX6UL_CLK_CKO1_PODF		226
-#define IMX6UL_CLK_CKO1			227
-#define IMX6UL_CLK_CKO2_SEL		228
-#define IMX6UL_CLK_CKO2_PODF		229
-#define IMX6UL_CLK_CKO2			230
-#define IMX6UL_CLK_CKO			231
-
-/* For i.MX6ULL */
-#define IMX6ULL_CLK_ESAI_PRED		232
-#define IMX6ULL_CLK_ESAI_PODF		233
-#define IMX6ULL_CLK_ESAI_EXTAL		234
-#define IMX6ULL_CLK_ESAI_MEM		235
-#define IMX6ULL_CLK_ESAI_IPG		236
-#define IMX6ULL_CLK_DCP_CLK		237
-#define IMX6ULL_CLK_EPDC_PRE_SEL	238
-#define IMX6ULL_CLK_EPDC_SEL		239
-#define IMX6ULL_CLK_EPDC_PODF		240
-#define IMX6ULL_CLK_EPDC_ACLK		241
-#define IMX6ULL_CLK_EPDC_PIX		242
-#define IMX6ULL_CLK_ESAI_SEL		243
+#define IMX6ULL_CLK_ESAI_PRED		225
+#define IMX6ULL_CLK_ESAI_PODF		226
+#define IMX6ULL_CLK_ESAI_EXTAL		227
+#define IMX6ULL_CLK_ESAI_MEM		228
+#define IMX6ULL_CLK_ESAI_IPG		229
+#define IMX6ULL_CLK_DCP_CLK		230
+#define IMX6ULL_CLK_EPDC_PRE_SEL	231
+#define IMX6ULL_CLK_EPDC_SEL		232
+#define IMX6ULL_CLK_EPDC_PODF		233
+#define IMX6ULL_CLK_EPDC_ACLK		234
+#define IMX6ULL_CLK_EPDC_PIX		235
+#define IMX6ULL_CLK_ESAI_SEL		236
+#define IMX6UL_CLK_CKO1_SEL		237
+#define IMX6UL_CLK_CKO1_PODF		238
+#define IMX6UL_CLK_CKO1			239
+#define IMX6UL_CLK_CKO2_SEL		240
+#define IMX6UL_CLK_CKO2_PODF		241
+#define IMX6UL_CLK_CKO2			242
+#define IMX6UL_CLK_CKO			243
 #define IMX6UL_CLK_END			244
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6UL_H */
-- 
cgit v1.2.3


From debef195bd5c79210d60f1d6dec38bde3dae2b88 Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Sun, 3 Jun 2018 09:44:04 +0800
Subject: clk: imx6ul: add GPIO clock gates

i.MX6UL has GPIO clock gates in CCM CCGR,
add them into clock tree for clock management.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/imx/clk-imx6ul.c             | 5 +++++
 include/dt-bindings/clock/imx6ul-clock.h | 8 +++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index ba563ba50b40..3ea2d97562fd 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -360,6 +360,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_UART2_SERIAL]	= imx_clk_gate2("uart2_serial",	"uart_podf",	base + 0x68,	28);
 	if (clk_on_imx6ull())
 		clks[IMX6UL_CLK_AIPSTZ3]	= imx_clk_gate2("aips_tz3",	"ahb",		 base + 0x80,	18);
+	clks[IMX6UL_CLK_GPIO2]		= imx_clk_gate2("gpio2",	"ipg",		base + 0x68,	30);
 
 	/* CCGR1 */
 	clks[IMX6UL_CLK_ECSPI1]		= imx_clk_gate2("ecspi1",	"ecspi_podf",	base + 0x6c,	0);
@@ -376,6 +377,8 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_GPT1_SERIAL]	= imx_clk_gate2("gpt1_serial",	"perclk",	base + 0x6c,	22);
 	clks[IMX6UL_CLK_UART4_IPG]	= imx_clk_gate2("uart4_ipg",	"ipg",		base + 0x6c,	24);
 	clks[IMX6UL_CLK_UART4_SERIAL]	= imx_clk_gate2("uart4_serial",	"uart_podf",	base + 0x6c,	24);
+	clks[IMX6UL_CLK_GPIO1]		= imx_clk_gate2("gpio1",	"ipg",		base + 0x6c,	26);
+	clks[IMX6UL_CLK_GPIO5]		= imx_clk_gate2("gpio5",	"ipg",		base + 0x6c,	30);
 
 	/* CCGR2 */
 	if (clk_on_imx6ull()) {
@@ -389,6 +392,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_I2C3]		= imx_clk_gate2("i2c3",		"perclk",	base + 0x70,	10);
 	clks[IMX6UL_CLK_OCOTP]		= imx_clk_gate2("ocotp",	"ipg",		base + 0x70,	12);
 	clks[IMX6UL_CLK_IOMUXC]		= imx_clk_gate2("iomuxc",	"lcdif_podf",	base + 0x70,	14);
+	clks[IMX6UL_CLK_GPIO3]		= imx_clk_gate2("gpio3",	"ipg",		base + 0x70,	26);
 	clks[IMX6UL_CLK_LCDIF_APB]	= imx_clk_gate2("lcdif_apb",	"axi",		base + 0x70,	28);
 	clks[IMX6UL_CLK_PXP]		= imx_clk_gate2("pxp",		"axi",		base + 0x70,	30);
 
@@ -405,6 +409,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_UART6_IPG]	= imx_clk_gate2("uart6_ipg",	"ipg",		base + 0x74,	6);
 	clks[IMX6UL_CLK_UART6_SERIAL]	= imx_clk_gate2("uart6_serial",	"uart_podf",	base + 0x74,	6);
 	clks[IMX6UL_CLK_LCDIF_PIX]	= imx_clk_gate2("lcdif_pix",	"lcdif_podf",	base + 0x74,	10);
+	clks[IMX6UL_CLK_GPIO4]		= imx_clk_gate2("gpio4",	"ipg",		base + 0x74,	12);
 	clks[IMX6UL_CLK_QSPI]		= imx_clk_gate2("qspi1",	"qspi1_podf",	base + 0x74,	14);
 	clks[IMX6UL_CLK_WDOG1]		= imx_clk_gate2("wdog1",	"ipg",		base + 0x74,	16);
 	clks[IMX6UL_CLK_MMDC_P0_FAST]	= imx_clk_gate("mmdc_p0_fast", "mmdc_podf", base + 0x74,	20);
diff --git a/include/dt-bindings/clock/imx6ul-clock.h b/include/dt-bindings/clock/imx6ul-clock.h
index 0aa1d9c3e0b9..f8e0476a3a0e 100644
--- a/include/dt-bindings/clock/imx6ul-clock.h
+++ b/include/dt-bindings/clock/imx6ul-clock.h
@@ -254,6 +254,12 @@
 #define IMX6UL_CLK_CKO2_PODF		241
 #define IMX6UL_CLK_CKO2			242
 #define IMX6UL_CLK_CKO			243
-#define IMX6UL_CLK_END			244
+#define IMX6UL_CLK_GPIO1		244
+#define IMX6UL_CLK_GPIO2		245
+#define IMX6UL_CLK_GPIO3		246
+#define IMX6UL_CLK_GPIO4		247
+#define IMX6UL_CLK_GPIO5		248
+
+#define IMX6UL_CLK_END			249
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6UL_H */
-- 
cgit v1.2.3


From 1c77483e4c50339b0306572167ccbff6b55d051b Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Wed, 20 Jun 2018 17:11:39 +0300
Subject: IB: Improve uverbs_cleanup_ucontext algorithm

Improve uverbs_cleanup_ucontext algorithm to work properly when the
topology graph of the objects cannot be determined at compile time.  This
is the case with objects created via the devx interface in mlx5.

Typically uverbs objects must be created in a strict topologically sorted
order, so that LIFO ordering will generally cause them to be freed
properly. There are only a few cases (eg memory windows) where objects can
point to things out of the strict LIFO order.

Instead of using an explicit ordering scheme where the HW destroy is not
allowed to fail, go over the list multiple times and allow the destroy
function to fail. If progress halts then a final, desperate, cleanup is
done before leaking the memory. This indicates a driver bug.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c                | 113 ++++++++++++---------
 drivers/infiniband/core/uverbs.h                   |   2 +-
 drivers/infiniband/core/uverbs_cmd.c               |  11 +-
 drivers/infiniband/core/uverbs_std_types.c         |  62 ++++++-----
 .../infiniband/core/uverbs_std_types_counters.c    |   9 +-
 drivers/infiniband/core/uverbs_std_types_cq.c      |  18 ++--
 drivers/infiniband/core/uverbs_std_types_dm.c      |   9 +-
 .../infiniband/core/uverbs_std_types_flow_action.c |   9 +-
 drivers/infiniband/core/uverbs_std_types_mr.c      |   3 +-
 drivers/infiniband/hw/mlx5/devx.c                  |   8 +-
 include/rdma/ib_verbs.h                            |  46 ++++++++-
 include/rdma/uverbs_types.h                        |  11 +-
 12 files changed, 190 insertions(+), 111 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index df3c40533252..2ddf1c716ba8 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -360,9 +360,10 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
 
 	/*
 	 * We can only fail gracefully if the user requested to destroy the
-	 * object. In the rest of the cases, just remove whatever you can.
+	 * object or when a retry may be called upon an error.
+	 * In the rest of the cases, just remove whatever you can.
 	 */
-	if (why == RDMA_REMOVE_DESTROY && ret)
+	if (ib_is_destroy_retryable(ret, why, uobj))
 		return ret;
 
 	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
@@ -393,7 +394,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
 		container_of(uobj, struct ib_uobject_file, uobj);
 	int ret = fd_type->context_closed(uobj_file, why);
 
-	if (why == RDMA_REMOVE_DESTROY && ret)
+	if (ib_is_destroy_retryable(ret, why, uobj))
 		return ret;
 
 	if (why == RDMA_REMOVE_DURING_CLEANUP) {
@@ -422,7 +423,7 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
 	struct ib_ucontext *ucontext = uobj->context;
 
 	ret = uobj->type->type_class->remove_commit(uobj, why);
-	if (ret && why == RDMA_REMOVE_DESTROY) {
+	if (ib_is_destroy_retryable(ret, why, uobj)) {
 		/* We couldn't remove the object, so just unlock the uobject */
 		atomic_set(&uobj->usecnt, 0);
 		uobj->type->type_class->lookup_put(uobj, true);
@@ -645,61 +646,77 @@ void uverbs_close_fd(struct file *f)
 	kref_put(uverbs_file_ref, ib_uverbs_release_file);
 }
 
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext,
+				    enum rdma_remove_reason reason)
 {
-	enum rdma_remove_reason reason = device_removed ?
-		RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
-	unsigned int cur_order = 0;
+	struct ib_uobject *obj, *next_obj;
+	int ret = -EINVAL;
+	int err = 0;
 
+	/*
+	 * This shouldn't run while executing other commands on this
+	 * context. Thus, the only thing we should take care of is
+	 * releasing a FD while traversing this list. The FD could be
+	 * closed and released from the _release fop of this FD.
+	 * In order to mitigate this, we add a lock.
+	 * We take and release the lock per traversal in order to let
+	 * other threads (which might still use the FDs) chance to run.
+	 */
+	mutex_lock(&ucontext->uobjects_lock);
 	ucontext->cleanup_reason = reason;
+	list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, list) {
+		/*
+		 * if we hit this WARN_ON, that means we are
+		 * racing with a lookup_get.
+		 */
+		WARN_ON(uverbs_try_lock_object(obj, true));
+		err = obj->type->type_class->remove_commit(obj, reason);
+
+		if (ib_is_destroy_retryable(err, reason, obj)) {
+			pr_debug("ib_uverbs: failed to remove uobject id %d err %d\n",
+				 obj->id, err);
+			atomic_set(&obj->usecnt, 0);
+			continue;
+		}
+
+		if (err)
+			pr_err("ib_uverbs: unable to remove uobject id %d err %d\n",
+				obj->id, err);
+
+		list_del(&obj->list);
+		/* put the ref we took when we created the object */
+		uverbs_uobject_put(obj);
+		ret = 0;
+	}
+	mutex_unlock(&ucontext->uobjects_lock);
+	return ret;
+}
+
+void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+{
+	enum rdma_remove_reason reason = device_removed ?
+					RDMA_REMOVE_DRIVER_REMOVE :
+					RDMA_REMOVE_CLOSE;
 	/*
 	 * Waits for all remove_commit and alloc_commit to finish. Logically, We
 	 * want to hold this forever as the context is going to be destroyed,
 	 * but we'll release it since it causes a "held lock freed" BUG message.
 	 */
 	down_write(&ucontext->cleanup_rwsem);
+	ucontext->cleanup_retryable = true;
+	while (!list_empty(&ucontext->uobjects))
+		if (__uverbs_cleanup_ucontext(ucontext, reason)) {
+			/*
+			 * No entry was cleaned-up successfully during this
+			 * iteration
+			 */
+			break;
+		}
 
-	while (!list_empty(&ucontext->uobjects)) {
-		struct ib_uobject *obj, *next_obj;
-		unsigned int next_order = UINT_MAX;
+	ucontext->cleanup_retryable = false;
+	if (!list_empty(&ucontext->uobjects))
+		__uverbs_cleanup_ucontext(ucontext, reason);
 
-		/*
-		 * This shouldn't run while executing other commands on this
-		 * context. Thus, the only thing we should take care of is
-		 * releasing a FD while traversing this list. The FD could be
-		 * closed and released from the _release fop of this FD.
-		 * In order to mitigate this, we add a lock.
-		 * We take and release the lock per order traversal in order
-		 * to let other threads (which might still use the FDs) chance
-		 * to run.
-		 */
-		mutex_lock(&ucontext->uobjects_lock);
-		list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
-					 list) {
-			if (obj->type->destroy_order == cur_order) {
-				int ret;
-
-				/*
-				 * if we hit this WARN_ON, that means we are
-				 * racing with a lookup_get.
-				 */
-				WARN_ON(uverbs_try_lock_object(obj, true));
-				ret = obj->type->type_class->remove_commit(obj,
-									   reason);
-				list_del(&obj->list);
-				if (ret)
-					pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
-						obj->id, cur_order);
-				/* put the ref we took when we created the object */
-				uverbs_uobject_put(obj);
-			} else {
-				next_order = min(next_order,
-						 obj->type->destroy_order);
-			}
-		}
-		mutex_unlock(&ucontext->uobjects_lock);
-		cur_order = next_order;
-	}
 	up_write(&ucontext->cleanup_rwsem);
 }
 
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c0d40fc3a53a..f9f0bcf76812 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -230,7 +230,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 			     struct ib_event *event);
-int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
 			   enum rdma_remove_reason why);
 
 int uverbs_dealloc_mw(struct ib_mw *mw);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5fc14fde274c..5d0fd36b009d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -116,6 +116,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
 	rcu_read_unlock();
 	ucontext->closing = 0;
+	ucontext->cleanup_retryable = false;
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	ucontext->umem_tree = RB_ROOT_CACHED;
@@ -611,12 +612,13 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
 	return ret ?: in_len;
 }
 
-int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
 			   struct ib_xrcd *xrcd,
 			   enum rdma_remove_reason why)
 {
 	struct inode *inode;
 	int ret;
+	struct ib_uverbs_device *dev = uobject->context->ufile->device;
 
 	inode = xrcd->inode;
 	if (inode && !atomic_dec_and_test(&xrcd->usecnt))
@@ -624,9 +626,12 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
 
 	ret = ib_dealloc_xrcd(xrcd);
 
-	if (why == RDMA_REMOVE_DESTROY && ret)
+	if (ib_is_destroy_retryable(ret, why, uobject)) {
 		atomic_inc(&xrcd->usecnt);
-	else if (inode)
+		return ret;
+	}
+
+	if (inode)
 		xrcd_table_delete(dev, inode);
 
 	return ret;
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index c50d73845a2a..c7f93b205c70 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -77,6 +77,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
 		container_of(uobject, struct ib_uqp_object, uevent.uobject);
 	int ret;
 
+	/*
+	 * If this is a user triggered destroy then do not allow destruction
+	 * until the user cleans up all the mcast bindings. Unlike in other
+	 * places we forcibly clean up the mcast attachments for !DESTROY
+	 * because the mcast attaches are not ubojects and will not be
+	 * destroyed by anything else during cleanup processing.
+	 */
 	if (why == RDMA_REMOVE_DESTROY) {
 		if (!list_empty(&uqp->mcast_list))
 			return -EBUSY;
@@ -85,7 +92,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
 	}
 
 	ret = ib_destroy_qp(qp);
-	if (ret && why == RDMA_REMOVE_DESTROY)
+	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
 	if (uqp->uxrcd)
@@ -103,8 +110,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
 	int ret;
 
 	ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
-	if (!ret || why != RDMA_REMOVE_DESTROY)
-		kfree(ind_tbl);
+	if (ib_is_destroy_retryable(ret, why, uobject))
+		return ret;
+
+	kfree(ind_tbl);
 	return ret;
 }
 
@@ -117,8 +126,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
 	int ret;
 
 	ret = ib_destroy_wq(wq);
-	if (!ret || why != RDMA_REMOVE_DESTROY)
-		ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+	if (ib_is_destroy_retryable(ret, why, uobject))
+		return ret;
+
+	ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
 	return ret;
 }
 
@@ -132,8 +143,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
 	int ret;
 
 	ret = ib_destroy_srq(srq);
-
-	if (ret && why == RDMA_REMOVE_DESTROY)
+	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
 	if (srq_type == IB_SRQT_XRC) {
@@ -155,12 +165,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
 		container_of(uobject, struct ib_uxrcd_object, uobject);
 	int ret;
 
+	ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
+	if (ret)
+		return ret;
+
 	mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
-	if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
-		ret = -EBUSY;
-	else
-		ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
-					     xrcd, why);
+	ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why);
 	mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
 
 	return ret;
@@ -170,9 +180,11 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
 			  enum rdma_remove_reason why)
 {
 	struct ib_pd *pd = uobject->object;
+	int ret;
 
-	if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
-		return -EBUSY;
+	ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
+	if (ret)
+		return ret;
 
 	ib_dealloc_pd((struct ib_pd *)uobject->object);
 	return 0;
@@ -249,44 +261,42 @@ void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
 }
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
-			    &UVERBS_TYPE_ALLOC_FD(0,
-						  sizeof(struct ib_uverbs_completion_event_file),
+			    &UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
 						  uverbs_hot_unplug_completion_event_file,
 						  &uverbs_event_fops,
 						  "[infinibandevent]", O_RDONLY));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
+			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object),
 						      uverbs_free_qp));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
+			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
 						      uverbs_free_srq));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
 			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
-						      0, uverbs_free_flow));
+						      uverbs_free_flow));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
+			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object),
 						      uverbs_free_wq));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
+			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
 						      uverbs_free_xrcd));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
-			    /* 2 is used in order to free the PD after MRs */
-			    &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
+			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
 
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 03b182a684a6..6d0b1ce9fc1f 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -38,10 +38,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
 				enum rdma_remove_reason why)
 {
 	struct ib_counters *counters = uobject->object;
+	int ret;
 
-	if (why == RDMA_REMOVE_DESTROY &&
-	    atomic_read(&counters->usecnt))
-		return -EBUSY;
+	ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
+	if (ret)
+		return ret;
 
 	return counters->device->destroy_counters(counters);
 }
@@ -150,7 +151,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
+			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 3d293d01afea..f67b0895b48c 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -44,12 +44,16 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
 	int ret;
 
 	ret = ib_destroy_cq(cq);
-	if (!ret || why != RDMA_REMOVE_DESTROY)
-		ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
-				      container_of(ev_queue,
-						   struct ib_uverbs_completion_event_file,
-						   ev_queue) : NULL,
-				      ucq);
+	if (ib_is_destroy_retryable(ret, why, uobject))
+		return ret;
+
+	ib_uverbs_release_ucq(
+		uobject->context->ufile,
+		ev_queue ? container_of(ev_queue,
+					struct ib_uverbs_completion_event_file,
+					ev_queue) :
+			   NULL,
+		ucq);
 	return ret;
 }
 
@@ -201,7 +205,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
+			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object),
 						      uverbs_free_cq),
 #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
 			    &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index 8b681575b615..d294660a2e06 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -37,9 +37,11 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
 			  enum rdma_remove_reason why)
 {
 	struct ib_dm *dm = uobject->object;
+	int ret;
 
-	if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt))
-		return -EBUSY;
+	ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
+	if (ret)
+		return ret;
 
 	return dm->device->dealloc_dm(dm);
 }
@@ -102,7 +104,6 @@ static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
-			    /* 1 is used in order to free the DM after MRs */
-			    &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
+			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
 			    &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
 			    &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index a7be51cf2e42..c1875657bc99 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
 				   enum rdma_remove_reason why)
 {
 	struct ib_flow_action *action = uobject->object;
+	int ret;
 
-	if (why == RDMA_REMOVE_DESTROY &&
-	    atomic_read(&action->usecnt))
-		return -EBUSY;
+	ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
+	if (ret)
+		return ret;
 
 	return action->device->destroy_flow_action(action);
 }
@@ -428,7 +429,7 @@ static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTRO
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action),
+			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
 			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
 			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
 			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 68f7cadf088f..d7f7ba3802af 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -142,6 +142,5 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
-			    /* 1 is used in order to free the MR after all the MWs */
-			    &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
+			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
 			    &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index f535e7da2dc5..30f6b612547f 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -675,7 +675,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
 	int ret;
 
 	ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
-	if (ret && why == RDMA_REMOVE_DESTROY)
+	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
 	kfree(obj);
@@ -976,7 +976,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
 	int err;
 
 	err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
-	if (err && why == RDMA_REMOVE_DESTROY)
+	if (ib_is_destroy_retryable(err, why, uobject))
 		return err;
 
 	ib_umem_release(obj->umem);
@@ -1085,14 +1085,14 @@ static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
 
 static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
-	&UVERBS_TYPE_ALLOC_IDR(0, devx_obj_cleanup),
+	&UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
 
 static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
-	&UVERBS_TYPE_ALLOC_IDR(0, devx_umem_cleanup),
+	&UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8e726fff30fe..e1130c6c1377 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1476,7 +1476,10 @@ struct ib_fmr_attr {
 struct ib_umem;
 
 enum rdma_remove_reason {
-	/* Userspace requested uobject deletion. Call could fail */
+	/*
+	 * Userspace requested uobject deletion or initial try
+	 * to remove uobject via cleanup. Call could fail
+	 */
 	RDMA_REMOVE_DESTROY,
 	/* Context deletion. This call should delete the actual object itself */
 	RDMA_REMOVE_CLOSE,
@@ -1503,6 +1506,7 @@ struct ib_ucontext {
 	/* protects cleanup process from other actions */
 	struct rw_semaphore	cleanup_rwsem;
 	enum rdma_remove_reason cleanup_reason;
+	bool cleanup_retryable;
 
 	struct pid             *tgid;
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -2684,6 +2688,46 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
 	return ib_is_buffer_cleared(udata->inbuf + offset, len);
 }
 
+/**
+ * ib_is_destroy_retryable - Check whether the uobject destruction
+ * is retryable.
+ * @ret: The initial destruction return code
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * This function is a helper function that IB layer and low-level drivers
+ * can use to consider whether the destruction of the given uobject is
+ * retry-able.
+ * It checks the original return code, if it wasn't success the destruction
+ * is retryable according to the ucontext state (i.e. cleanup_retryable) and
+ * the remove reason. (i.e. why).
+ * Must be called with the object locked for destroy.
+ */
+static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
+					   struct ib_uobject *uobj)
+{
+	return ret && (why == RDMA_REMOVE_DESTROY ||
+		       uobj->context->cleanup_retryable);
+}
+
+/**
+ * ib_destroy_usecnt - Called during destruction to check the usecnt
+ * @usecnt: The usecnt atomic
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * Non-zero usecnts will block destruction unless destruction was triggered by
+ * a ucontext cleanup.
+ */
+static inline int ib_destroy_usecnt(atomic_t *usecnt,
+				    enum rdma_remove_reason why,
+				    struct ib_uobject *uobj)
+{
+	if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
+		return -EBUSY;
+	return 0;
+}
+
 /**
  * ib_modify_qp_is_ok - Check that the supplied attribute mask
  * contains all required attributes and no attributes not allowed for
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index cc04ec65588d..175495d1b0b8 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -93,7 +93,6 @@ struct uverbs_obj_type_class {
 struct uverbs_obj_type {
 	const struct uverbs_obj_type_class * const type_class;
 	size_t	     obj_size;
-	unsigned int destroy_order;
 };
 
 /*
@@ -152,10 +151,9 @@ extern const struct uverbs_obj_type_class uverbs_fd_class;
 
 #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) -	\
 				   sizeof(char))
-#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _context_closed, _fops, _name, _flags)\
+#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\
 	((&((const struct uverbs_obj_fd_type)				\
 	 {.type = {							\
-		.destroy_order = _order,				\
 		.type_class = &uverbs_fd_class,				\
 		.obj_size = (_obj_size) +				\
 			UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \
@@ -164,18 +162,17 @@ extern const struct uverbs_obj_type_class uverbs_fd_class;
 	 .fops = _fops,							\
 	 .name = _name,							\
 	 .flags = _flags}))->type)
-#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _destroy_object)	\
+#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object)	\
 	((&((const struct uverbs_obj_idr_type)				\
 	 {.type = {							\
-		.destroy_order = _order,				\
 		.type_class = &uverbs_idr_class,			\
 		.obj_size = (_size) +					\
 			UVERBS_BUILD_BUG_ON((_size) <			\
 					    sizeof(struct ib_uobject))	\
 	 },								\
 	 .destroy_object = _destroy_object,}))->type)
-#define UVERBS_TYPE_ALLOC_IDR(_order, _destroy_object)			\
-	 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order,	\
+#define UVERBS_TYPE_ALLOC_IDR(_destroy_object)			\
+	 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject),	\
 				  _destroy_object)
 
 #endif
-- 
cgit v1.2.3


From b1277a226d8c519b8c33e23fe68b4e1658f15963 Mon Sep 17 00:00:00 2001
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Date: Fri, 29 Jun 2018 19:55:03 -0500
Subject: PCI: Cleanup PCI_REBAR_CTRL_BAR_SHIFT handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup PCI_REBAR_CTRL_BAR_SHIFT handling.  That was hard coded instead of
properly defined in the header for some reason.

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c             | 6 +++---
 include/uapi/linux/pci_regs.h | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 44ccfb31363e..1b20c4392f09 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1193,7 +1193,7 @@ static void pci_restore_rebar_state(struct pci_dev *pdev)
 		res = pdev->resource + bar_idx;
 		size = order_base_2((resource_size(res) >> 20) | 1) - 1;
 		ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
-		ctrl |= size << 8;
+		ctrl |= size << PCI_REBAR_CTRL_BAR_SHIFT;
 		pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
 	}
 }
@@ -3098,7 +3098,7 @@ int pci_rebar_get_current_size(struct pci_dev *pdev, int bar)
 		return pos;
 
 	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
-	return (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> 8;
+	return (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT;
 }
 
 /**
@@ -3121,7 +3121,7 @@ int pci_rebar_set_size(struct pci_dev *pdev, int bar, int size)
 
 	pci_read_config_dword(pdev, pos + PCI_REBAR_CTRL, &ctrl);
 	ctrl &= ~PCI_REBAR_CTRL_BAR_SIZE;
-	ctrl |= size << 8;
+	ctrl |= size << PCI_REBAR_CTRL_BAR_SHIFT;
 	pci_write_config_dword(pdev, pos + PCI_REBAR_CTRL, ctrl);
 	return 0;
 }
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 4da87e2ef8a8..82e6b361204e 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -960,8 +960,9 @@
 #define PCI_REBAR_CTRL		8	/* control register */
 #define  PCI_REBAR_CTRL_BAR_IDX		0x00000007  /* BAR index */
 #define  PCI_REBAR_CTRL_NBAR_MASK	0x000000E0  /* # of resizable BARs */
-#define  PCI_REBAR_CTRL_NBAR_SHIFT	5  	    /* shift for # of BARs */
+#define  PCI_REBAR_CTRL_NBAR_SHIFT	5	    /* shift for # of BARs */
 #define  PCI_REBAR_CTRL_BAR_SIZE	0x00001F00  /* BAR size */
+#define  PCI_REBAR_CTRL_BAR_SHIFT	8	    /* shift for BAR size */
 
 /* Dynamic Power Allocation */
 #define PCI_DPA_CAP		4	/* capability register */
-- 
cgit v1.2.3


From cfdfc14e7fb8ae77290e9d5afaeecc0a234a3846 Mon Sep 17 00:00:00 2001
From: Doug Meyer <dmeyer@gigaio.com>
Date: Wed, 23 May 2018 13:18:05 -0700
Subject: switchtec: Use generic PCI Vendor ID and Class Code

Move the Microsemi Switchtec PCI Vendor ID (same as
PCI_VENDOR_ID_PMC_Sierra) to pci_ids.h.   Also, replace Microsemi class
constants with the standard PCI definitions.

Signed-off-by: Doug Meyer <dmeyer@gigaio.com>
[bhelgaas: restore SPDX (I assume it was removed by mistake), remove
device ID definitions]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
---
 drivers/ntb/hw/mscc/ntb_hw_switchtec.c |  3 ++-
 drivers/pci/switch/switchtec.c         | 14 +++++++-------
 include/linux/pci_ids.h                |  1 +
 include/linux/switchtec.h              |  4 ----
 4 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index f624ae27eabe..5ee5f40b4dfc 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -19,6 +19,7 @@
 #include <linux/kthread.h>
 #include <linux/interrupt.h>
 #include <linux/ntb.h>
+#include <linux/pci.h>
 
 MODULE_DESCRIPTION("Microsemi Switchtec(tm) NTB Driver");
 MODULE_VERSION("0.1");
@@ -1487,7 +1488,7 @@ static int switchtec_ntb_add(struct device *dev,
 
 	stdev->sndev = NULL;
 
-	if (stdev->pdev->class != MICROSEMI_NTB_CLASSCODE)
+	if (stdev->pdev->class != (PCI_CLASS_BRIDGE_OTHER << 8))
 		return -ENODEV;
 
 	sndev = kzalloc_node(sizeof(*sndev), GFP_KERNEL, dev_to_node(dev));
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 47cd0c037433..9940cc70f38b 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -641,7 +641,7 @@ static int ioctl_event_summary(struct switchtec_dev *stdev,
 
 	for (i = 0; i < SWITCHTEC_MAX_PFF_CSR; i++) {
 		reg = ioread16(&stdev->mmio_pff_csr[i].vendor_id);
-		if (reg != MICROSEMI_VENDOR_ID)
+		if (reg != PCI_VENDOR_ID_MICROSEMI)
 			break;
 
 		reg = ioread32(&stdev->mmio_pff_csr[i].pff_event_summary);
@@ -1203,7 +1203,7 @@ static void init_pff(struct switchtec_dev *stdev)
 
 	for (i = 0; i < SWITCHTEC_MAX_PFF_CSR; i++) {
 		reg = ioread16(&stdev->mmio_pff_csr[i].vendor_id);
-		if (reg != MICROSEMI_VENDOR_ID)
+		if (reg != PCI_VENDOR_ID_MICROSEMI)
 			break;
 	}
 
@@ -1267,7 +1267,7 @@ static int switchtec_pci_probe(struct pci_dev *pdev,
 	struct switchtec_dev *stdev;
 	int rc;
 
-	if (pdev->class == MICROSEMI_NTB_CLASSCODE)
+	if (pdev->class == (PCI_CLASS_BRIDGE_OTHER << 8))
 		request_module_nowait("ntb_hw_switchtec");
 
 	stdev = stdev_create(pdev);
@@ -1321,19 +1321,19 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
 
 #define SWITCHTEC_PCI_DEVICE(device_id) \
 	{ \
-		.vendor     = MICROSEMI_VENDOR_ID, \
+		.vendor     = PCI_VENDOR_ID_MICROSEMI, \
 		.device     = device_id, \
 		.subvendor  = PCI_ANY_ID, \
 		.subdevice  = PCI_ANY_ID, \
-		.class      = MICROSEMI_MGMT_CLASSCODE, \
+		.class      = (PCI_CLASS_MEMORY_OTHER << 8), \
 		.class_mask = 0xFFFFFFFF, \
 	}, \
 	{ \
-		.vendor     = MICROSEMI_VENDOR_ID, \
+		.vendor     = PCI_VENDOR_ID_MICROSEMI, \
 		.device     = device_id, \
 		.subvendor  = PCI_ANY_ID, \
 		.subdevice  = PCI_ANY_ID, \
-		.class      = MICROSEMI_NTB_CLASSCODE, \
+		.class      = (PCI_CLASS_BRIDGE_OTHER << 8), \
 		.class_mask = 0xFFFFFFFF, \
 	}
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 29502238e510..80aec5b9a6c1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1668,6 +1668,7 @@
 #define PCI_DEVICE_ID_COMPEX_ENET100VG4	0x0112
 
 #define PCI_VENDOR_ID_PMC_Sierra	0x11f8
+#define PCI_VENDOR_ID_MICROSEMI		0x11f8
 
 #define PCI_VENDOR_ID_RP		0x11fe
 #define PCI_DEVICE_ID_RP32INTF		0x0001
diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index ec93e93371fa..ab400af6f0ce 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -19,10 +19,6 @@
 #include <linux/pci.h>
 #include <linux/cdev.h>
 
-#define MICROSEMI_VENDOR_ID         0x11f8
-#define MICROSEMI_NTB_CLASSCODE     0x068000
-#define MICROSEMI_MGMT_CLASSCODE    0x058000
-
 #define SWITCHTEC_MRPC_PAYLOAD_SIZE 1024
 #define SWITCHTEC_MAX_PFF_CSR 48
 
-- 
cgit v1.2.3


From 783e84961b1d7a75045383586b8c49e02b7704cd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 29 Jun 2018 21:17:26 -0500
Subject: PCI: Make pci_get_rom_size() static

pci_get_rom_size() is called only from pci_map_rom(), so it can be static.
Make it static and remove the declaration from include/linux/pci.h.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/rom.c   | 3 ++-
 include/linux/pci.h | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 3a33f5ce314a..137bf0cee897 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -80,7 +80,8 @@ EXPORT_SYMBOL_GPL(pci_disable_rom);
  * The PCI window size could be much larger than the
  * actual image size.
  */
-size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size)
+static size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom,
+			       size_t size)
 {
 	void __iomem *image;
 	int last_image;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..f40d3e05ccd1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1122,7 +1122,6 @@ int pci_enable_rom(struct pci_dev *pdev);
 void pci_disable_rom(struct pci_dev *pdev);
 void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size);
 void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom);
-size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size);
 void __iomem __must_check *pci_platform_rom(struct pci_dev *pdev, size_t *size);
 
 /* Power management related routines */
-- 
cgit v1.2.3


From ea5d0c32498e1a08ff5f3dbeafa4d74895851b0d Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Thu, 28 Jun 2018 00:22:56 -0400
Subject: tcp: add new SNMP counter for drops when try to queue in rcv queue

When sk_rmem_alloc is larger than the receive buffer and we can't
schedule more memory for it, the skb will be dropped.

In above situation, if this skb is put into the ofo queue,
LINUX_MIB_TCPOFODROP is incremented to track it.

While if this skb is put into the receive queue, there's no record.
So a new SNMP counter is introduced to track this behavior.

LINUX_MIB_TCPRCVQDROP:  Number of packets meant to be queued in rcv queue
			but dropped because socket rcvbuf limit hit.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h | 1 +
 net/ipv4/proc.c           | 1 +
 net/ipv4/tcp_input.c      | 8 ++++++--
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 97517f36a5f9..e5ebc83827ab 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -280,6 +280,7 @@ enum
 	LINUX_MIB_TCPDELIVEREDCE,		/* TCPDeliveredCE */
 	LINUX_MIB_TCPACKCOMPRESSED,		/* TCPAckCompressed */
 	LINUX_MIB_TCPZEROWINDOWDROP,		/* TCPZeroWindowDrop */
+	LINUX_MIB_TCPRCVQDROP,			/* TCPRcvQDrop */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 225ef3433fe5..b46e4cf9a55a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -288,6 +288,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
 	SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
 	SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
+	SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c5b3415413f..eecd359595fc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4611,8 +4611,10 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 	skb->data_len = data_len;
 	skb->len = size;
 
-	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+	if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
 		goto err_free;
+	}
 
 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
 	if (err)
@@ -4677,8 +4679,10 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 queue_and_out:
 		if (skb_queue_len(&sk->sk_receive_queue) == 0)
 			sk_forced_mem_schedule(sk, skb->truesize);
-		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
 			goto drop;
+		}
 
 		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
-- 
cgit v1.2.3


From 0afff91c6f5ecef27715ea71e34dc2baacba1060 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Thu, 28 Jun 2018 19:05:05 +0200
Subject: net/smc: add pnetid support

s390 hardware supports the definition of a so-call Physical NETwork
IDentifier (short PNETID) per network device port. These PNETIDS
can be used to identify network devices that are attached to the same
physical network (broadcast domain).

On s390 try to use the PNETID of the ethernet device port used for
initial connecting, and derive the IB device port used for SMC RDMA
traffic.

On platforms without PNETID support fall back to the existing
solution of a configured pnet table.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h  |   2 +
 net/smc/smc_ib.c   |   6 ++-
 net/smc/smc_ib.h   |   3 ++
 net/smc/smc_pnet.c | 109 +++++++++++++++++++++++++++++++++++++++++++----------
 net/smc/smc_pnet.h |  14 +++++++
 5 files changed, 114 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/smc.h b/include/net/smc.h
index 8381d163fefa..2173932fab9d 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -11,6 +11,8 @@
 #ifndef _SMC_H
 #define _SMC_H
 
+#define SMC_MAX_PNETID_LEN	16	/* Max. length of PNET id */
+
 struct smc_hashinfo {
 	rwlock_t lock;
 	struct hlist_head ht;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index f8b159ced032..36de2fd76170 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -504,8 +504,12 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
 	port_cnt = smcibdev->ibdev->phys_port_cnt;
 	for (i = 0;
 	     i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
-	     i++)
+	     i++) {
 		set_bit(i, &smcibdev->port_event_mask);
+		/* determine pnetids of the port */
+		smc_pnetid_by_dev_port(ibdev->dev.parent, i,
+				       smcibdev->pnetid[i]);
+	}
 	schedule_work(&smcibdev->port_event_work);
 }
 
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 2c480b352928..7c1223c91229 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -15,6 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/if_ether.h>
 #include <rdma/ib_verbs.h>
+#include <net/smc.h>
 
 #define SMC_MAX_PORTS			2	/* Max # of ports */
 #define SMC_GID_SIZE			sizeof(union ib_gid)
@@ -40,6 +41,8 @@ struct smc_ib_device {				/* ib-device infos for smc */
 	char			mac[SMC_MAX_PORTS][ETH_ALEN];
 						/* mac address per port*/
 	union ib_gid		gid[SMC_MAX_PORTS]; /* gid per port */
+	u8			pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
+						/* pnetid per port */
 	u8			initialized : 1; /* ib dev CQ, evthdl done */
 	struct work_struct	port_event_work;
 	unsigned long		port_event_mask;
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index a82a5cad0282..cdc6e23b6ce1 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -23,12 +23,10 @@
 #include "smc_pnet.h"
 #include "smc_ib.h"
 
-#define SMC_MAX_PNET_ID_LEN	16	/* Max. length of PNET id */
-
 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
 	[SMC_PNETID_NAME] = {
 		.type = NLA_NUL_STRING,
-		.len = SMC_MAX_PNET_ID_LEN - 1
+		.len = SMC_MAX_PNETID_LEN - 1
 	},
 	[SMC_PNETID_ETHNAME] = {
 		.type = NLA_NUL_STRING,
@@ -65,7 +63,7 @@ static struct smc_pnettable {
  */
 struct smc_pnetentry {
 	struct list_head list;
-	char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
+	char pnet_name[SMC_MAX_PNETID_LEN + 1];
 	struct net_device *ndev;
 	struct smc_ib_device *smcibdev;
 	u8 ib_port;
@@ -209,7 +207,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
 		return false;
 	while (--end >= bf && isspace(*end))
 		;
-	if (end - bf >= SMC_MAX_PNET_ID_LEN)
+	if (end - bf >= SMC_MAX_PNETID_LEN)
 		return false;
 	while (bf <= end) {
 		if (!isalnum(*bf))
@@ -512,26 +510,70 @@ void smc_pnet_exit(void)
 	genl_unregister_family(&smc_pnet_nl_family);
 }
 
-/* PNET table analysis for a given sock:
- * determine ib_device and port belonging to used internal TCP socket
- * ethernet interface.
+/* Determine one base device for stacked net devices.
+ * If the lower device level contains more than one devices
+ * (for instance with bonding slaves), just the first device
+ * is used to reach a base device.
  */
-void smc_pnet_find_roce_resource(struct sock *sk,
-				 struct smc_ib_device **smcibdev, u8 *ibport)
+static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
 {
-	struct dst_entry *dst = sk_dst_get(sk);
-	struct smc_pnetentry *pnetelem;
+	int i, nest_lvl;
 
-	*smcibdev = NULL;
-	*ibport = 0;
+	rtnl_lock();
+	nest_lvl = dev_get_nest_level(ndev);
+	for (i = 0; i < nest_lvl; i++) {
+		struct list_head *lower = &ndev->adj_list.lower;
+
+		if (list_empty(lower))
+			break;
+		lower = lower->next;
+		ndev = netdev_lower_get_next(ndev, &lower);
+	}
+	rtnl_unlock();
+	return ndev;
+}
+
+/* Determine the corresponding IB device port based on the hardware PNETID.
+ * Searching stops at the first matching active IB device port.
+ */
+static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
+					 struct smc_ib_device **smcibdev,
+					 u8 *ibport)
+{
+	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+	struct smc_ib_device *ibdev;
+	int i;
+
+	ndev = pnet_find_base_ndev(ndev);
+	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+				   ndev_pnetid))
+		return; /* pnetid could not be determined */
+
+	spin_lock(&smc_ib_devices.lock);
+	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+		for (i = 1; i <= SMC_MAX_PORTS; i++) {
+			if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
+				    SMC_MAX_PNETID_LEN) &&
+			    smc_ib_port_active(ibdev, i)) {
+				*smcibdev = ibdev;
+				*ibport = i;
+				break;
+			}
+		}
+	}
+	spin_unlock(&smc_ib_devices.lock);
+}
+
+/* Lookup of coupled ib_device via SMC pnet table */
+static void smc_pnet_find_roce_by_table(struct net_device *netdev,
+					struct smc_ib_device **smcibdev,
+					u8 *ibport)
+{
+	struct smc_pnetentry *pnetelem;
 
-	if (!dst)
-		return;
-	if (!dst->dev)
-		goto out_rel;
 	read_lock(&smc_pnettable.lock);
 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
-		if (dst->dev == pnetelem->ndev) {
+		if (netdev == pnetelem->ndev) {
 			if (smc_ib_port_active(pnetelem->smcibdev,
 					       pnetelem->ib_port)) {
 				*smcibdev = pnetelem->smcibdev;
@@ -541,6 +583,35 @@ void smc_pnet_find_roce_resource(struct sock *sk,
 		}
 	}
 	read_unlock(&smc_pnettable.lock);
+}
+
+/* PNET table analysis for a given sock:
+ * determine ib_device and port belonging to used internal TCP socket
+ * ethernet interface.
+ */
+void smc_pnet_find_roce_resource(struct sock *sk,
+				 struct smc_ib_device **smcibdev, u8 *ibport)
+{
+	struct dst_entry *dst = sk_dst_get(sk);
+
+	*smcibdev = NULL;
+	*ibport = 0;
+
+	if (!dst)
+		goto out;
+	if (!dst->dev)
+		goto out_rel;
+
+	/* if possible, lookup via hardware-defined pnetid */
+	smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport);
+	if (*smcibdev)
+		goto out_rel;
+
+	/* lookup via SMC PNET table */
+	smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport);
+
 out_rel:
 	dst_release(dst);
+out:
+	return;
 }
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 5a29519db976..ad4455cde9e7 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -12,8 +12,22 @@
 #ifndef _SMC_PNET_H
 #define _SMC_PNET_H
 
+#if IS_ENABLED(CONFIG_HAVE_PNETID)
+#include <asm/pnet.h>
+#endif
+
 struct smc_ib_device;
 
+static inline int smc_pnetid_by_dev_port(struct device *dev,
+					 unsigned short port, u8 *pnetid)
+{
+#if IS_ENABLED(CONFIG_HAVE_PNETID)
+	return pnet_id_by_dev_port(dev, port, pnetid);
+#else
+	return -ENOENT;
+#endif
+}
+
 int smc_pnet_init(void) __init;
 void smc_pnet_exit(void);
 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
-- 
cgit v1.2.3


From c6ba7c9ba43de1b57e9a53946e7ff988554c84ed Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.ibm.com>
Date: Thu, 28 Jun 2018 19:05:07 +0200
Subject: net/smc: add base infrastructure for SMC-D and ISM

SMC supports two variants: SMC-R and SMC-D. For data transport, SMC-R
uses RDMA devices, SMC-D uses so-called Internal Shared Memory (ISM)
devices. An ISM device only allows shared memory communication between
SMC instances on the same machine. For example, this allows virtual
machines on the same host to communicate via SMC without RDMA devices.

This patch adds the base infrastructure for SMC-D and ISM devices to
the existing SMC code. It contains the following:

* ISM driver interface:
  This interface allows an ISM driver to register ISM devices in SMC. In
  the process, the driver provides a set of device ops for each device.
  SMC uses these ops to execute SMC specific operations on or transfer
  data over the device.

* Core SMC-D link group, connection, and buffer support:
  Link groups, SMC connections and SMC buffers (in smc_core) are
  extended to support SMC-D.

* SMC type checks:
  Some type checks are added to prevent using SMC-R specific code for
  SMC-D and vice versa.

To actually use SMC-D, additional changes to pnetid, CLC, CDC, etc. are
required. These are added in follow-up patches.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h  |  62 +++++++++++
 net/smc/Makefile   |   2 +-
 net/smc/af_smc.c   |  11 +-
 net/smc/smc_core.c | 270 +++++++++++++++++++++++++++++++++++------------
 net/smc/smc_core.h |  71 +++++++++----
 net/smc/smc_diag.c |   3 +-
 net/smc/smc_ism.c  | 304 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_ism.h  |  48 +++++++++
 8 files changed, 679 insertions(+), 92 deletions(-)
 create mode 100644 net/smc/smc_ism.c
 create mode 100644 net/smc/smc_ism.h

(limited to 'include')

diff --git a/include/net/smc.h b/include/net/smc.h
index 2173932fab9d..824a7af8d654 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -20,4 +20,66 @@ struct smc_hashinfo {
 
 int smc_hash_sk(struct sock *sk);
 void smc_unhash_sk(struct sock *sk);
+
+/* SMCD/ISM device driver interface */
+struct smcd_dmb {
+	u64 dmb_tok;
+	u64 rgid;
+	u32 dmb_len;
+	u32 sba_idx;
+	u32 vlan_valid;
+	u32 vlan_id;
+	void *cpu_addr;
+	dma_addr_t dma_addr;
+};
+
+#define ISM_EVENT_DMB	0
+#define ISM_EVENT_GID	1
+#define ISM_EVENT_SWR	2
+
+struct smcd_event {
+	u32 type;
+	u32 code;
+	u64 tok;
+	u64 time;
+	u64 info;
+};
+
+struct smcd_dev;
+
+struct smcd_ops {
+	int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid,
+				u32 vid);
+	int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
+	int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
+	int (*add_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
+	int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
+	int (*set_vlan_required)(struct smcd_dev *dev);
+	int (*reset_vlan_required)(struct smcd_dev *dev);
+	int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq,
+			    u32 event_code, u64 info);
+	int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
+			 bool sf, unsigned int offset, void *data,
+			 unsigned int size);
+};
+
+struct smcd_dev {
+	const struct smcd_ops *ops;
+	struct device dev;
+	void *priv;
+	u64 local_gid;
+	struct list_head list;
+	spinlock_t lock;
+	struct smc_connection **conn;
+	struct list_head vlan;
+	struct workqueue_struct *event_wq;
+};
+
+struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
+				const struct smcd_ops *ops, int max_dmbs);
+int smcd_register_dev(struct smcd_dev *smcd);
+void smcd_unregister_dev(struct smcd_dev *smcd);
+void smcd_free_dev(struct smcd_dev *smcd);
+void smcd_handle_event(struct smcd_dev *dev, struct smcd_event *event);
+void smcd_handle_irq(struct smcd_dev *dev, unsigned int bit);
 #endif	/* _SMC_H */
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 188104654b54..4df96b4b8130 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_SMC)	+= smc.o
 obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
+smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index da7f02edcd37..8ce48799cf68 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -475,8 +475,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	int reason_code = 0;
 
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
-					aclc->hdr.flag);
+	local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
+					ibport, &aclc->lcl, NULL, 0);
 	if (local_contact < 0) {
 		if (local_contact == -ENOMEM)
 			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -491,7 +491,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	smc_conn_save_peer_info(smc, aclc);
 
 	/* create send buffer and rmb */
-	if (smc_buf_create(smc))
+	if (smc_buf_create(smc, false))
 		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
 
 	if (local_contact == SMC_FIRST_CONTACT)
@@ -894,7 +894,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
 				int *local_contact)
 {
 	/* allocate connection / link group */
-	*local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
+	*local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+					 &pclc->lcl, NULL, 0);
 	if (*local_contact < 0) {
 		if (*local_contact == -ENOMEM)
 			return SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -902,7 +903,7 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
 	}
 
 	/* create send buffer and rmb */
-	if (smc_buf_create(new_smc))
+	if (smc_buf_create(new_smc, false))
 		return SMC_CLC_DECL_MEM;
 
 	return 0;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index add82b0266f3..daa88db1841a 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -25,6 +25,7 @@
 #include "smc_llc.h"
 #include "smc_cdc.h"
 #include "smc_close.h"
+#include "smc_ism.h"
 
 #define SMC_LGR_NUM_INCR		256
 #define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
@@ -46,8 +47,8 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
 	 * otherwise there is a risk of out-of-sync link groups.
 	 */
 	mod_delayed_work(system_wq, &lgr->free_work,
-			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
-						 SMC_LGR_FREE_DELAY_SERV);
+			 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
+			 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
 }
 
 /* Register connection's alert token in our lookup structure.
@@ -153,16 +154,18 @@ static void smc_lgr_free_work(struct work_struct *work)
 free:
 	spin_unlock_bh(&smc_lgr_list.lock);
 	if (!delayed_work_pending(&lgr->free_work)) {
-		if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
+		if (!lgr->is_smcd &&
+		    lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
 			smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 		smc_lgr_free(lgr);
 	}
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc,
+static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
 			  struct smc_ib_device *smcibdev, u8 ibport,
-			  char *peer_systemid, unsigned short vlan_id)
+			  char *peer_systemid, unsigned short vlan_id,
+			  struct smcd_dev *smcismdev, u64 peer_gid)
 {
 	struct smc_link_group *lgr;
 	struct smc_link *lnk;
@@ -170,17 +173,23 @@ static int smc_lgr_create(struct smc_sock *smc,
 	int rc = 0;
 	int i;
 
+	if (is_smcd && vlan_id) {
+		rc = smc_ism_get_vlan(smcismdev, vlan_id);
+		if (rc)
+			goto out;
+	}
+
 	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
 	if (!lgr) {
 		rc = -ENOMEM;
 		goto out;
 	}
-	lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
+	lgr->is_smcd = is_smcd;
 	lgr->sync_err = 0;
-	memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
 	lgr->vlan_id = vlan_id;
 	rwlock_init(&lgr->sndbufs_lock);
 	rwlock_init(&lgr->rmbs_lock);
+	rwlock_init(&lgr->conns_lock);
 	for (i = 0; i < SMC_RMBE_SIZES; i++) {
 		INIT_LIST_HEAD(&lgr->sndbufs[i]);
 		INIT_LIST_HEAD(&lgr->rmbs[i]);
@@ -189,36 +198,44 @@ static int smc_lgr_create(struct smc_sock *smc,
 	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
 	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
 	lgr->conns_all = RB_ROOT;
-
-	lnk = &lgr->lnk[SMC_SINGLE_LINK];
-	/* initialize link */
-	lnk->state = SMC_LNK_ACTIVATING;
-	lnk->link_id = SMC_SINGLE_LINK;
-	lnk->smcibdev = smcibdev;
-	lnk->ibport = ibport;
-	lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
-	if (!smcibdev->initialized)
-		smc_ib_setup_per_ibdev(smcibdev);
-	get_random_bytes(rndvec, sizeof(rndvec));
-	lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
-	rc = smc_llc_link_init(lnk);
-	if (rc)
-		goto free_lgr;
-	rc = smc_wr_alloc_link_mem(lnk);
-	if (rc)
-		goto clear_llc_lnk;
-	rc = smc_ib_create_protection_domain(lnk);
-	if (rc)
-		goto free_link_mem;
-	rc = smc_ib_create_queue_pair(lnk);
-	if (rc)
-		goto dealloc_pd;
-	rc = smc_wr_create_link(lnk);
-	if (rc)
-		goto destroy_qp;
-
+	if (is_smcd) {
+		/* SMC-D specific settings */
+		lgr->peer_gid = peer_gid;
+		lgr->smcd = smcismdev;
+	} else {
+		/* SMC-R specific settings */
+		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
+		memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
+
+		lnk = &lgr->lnk[SMC_SINGLE_LINK];
+		/* initialize link */
+		lnk->state = SMC_LNK_ACTIVATING;
+		lnk->link_id = SMC_SINGLE_LINK;
+		lnk->smcibdev = smcibdev;
+		lnk->ibport = ibport;
+		lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
+		if (!smcibdev->initialized)
+			smc_ib_setup_per_ibdev(smcibdev);
+		get_random_bytes(rndvec, sizeof(rndvec));
+		lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
+			(rndvec[2] << 16);
+		rc = smc_llc_link_init(lnk);
+		if (rc)
+			goto free_lgr;
+		rc = smc_wr_alloc_link_mem(lnk);
+		if (rc)
+			goto clear_llc_lnk;
+		rc = smc_ib_create_protection_domain(lnk);
+		if (rc)
+			goto free_link_mem;
+		rc = smc_ib_create_queue_pair(lnk);
+		if (rc)
+			goto dealloc_pd;
+		rc = smc_wr_create_link(lnk);
+		if (rc)
+			goto destroy_qp;
+	}
 	smc->conn.lgr = lgr;
-	rwlock_init(&lgr->conns_lock);
 	spin_lock_bh(&smc_lgr_list.lock);
 	list_add(&lgr->list, &smc_lgr_list.list);
 	spin_unlock_bh(&smc_lgr_list.lock);
@@ -264,7 +281,10 @@ void smc_conn_free(struct smc_connection *conn)
 {
 	if (!conn->lgr)
 		return;
-	smc_cdc_tx_dismiss_slots(conn);
+	if (conn->lgr->is_smcd)
+		smc_ism_unset_conn(conn);
+	else
+		smc_cdc_tx_dismiss_slots(conn);
 	smc_lgr_unregister_conn(conn);
 	smc_buf_unuse(conn);
 }
@@ -280,8 +300,8 @@ static void smc_link_clear(struct smc_link *lnk)
 	smc_wr_free_link_mem(lnk);
 }
 
-static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
-			 struct smc_buf_desc *buf_desc)
+static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
+			  struct smc_buf_desc *buf_desc)
 {
 	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 
@@ -301,6 +321,25 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
 	kfree(buf_desc);
 }
 
+static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
+			  struct smc_buf_desc *buf_desc)
+{
+	if (is_dmb)
+		smc_ism_unregister_dmb(lgr->smcd, buf_desc);
+	else
+		kfree(buf_desc->cpu_addr);
+	kfree(buf_desc);
+}
+
+static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
+			 struct smc_buf_desc *buf_desc)
+{
+	if (lgr->is_smcd)
+		smcd_buf_free(lgr, is_rmb, buf_desc);
+	else
+		smcr_buf_free(lgr, is_rmb, buf_desc);
+}
+
 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 {
 	struct smc_buf_desc *buf_desc, *bf_desc;
@@ -332,7 +371,10 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 void smc_lgr_free(struct smc_link_group *lgr)
 {
 	smc_lgr_free_bufs(lgr);
-	smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
+	if (lgr->is_smcd)
+		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
+	else
+		smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
 	kfree(lgr);
 }
 
@@ -357,7 +399,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
 	lgr->terminating = 1;
 	if (!list_empty(&lgr->list)) /* forget lgr */
 		list_del_init(&lgr->list);
-	smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+	if (!lgr->is_smcd)
+		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 
 	write_lock_bh(&lgr->conns_lock);
 	node = rb_first(&lgr->conns_all);
@@ -374,7 +417,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
 		node = rb_first(&lgr->conns_all);
 	}
 	write_unlock_bh(&lgr->conns_lock);
-	wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+	if (!lgr->is_smcd)
+		wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
 	smc_lgr_schedule_free_work(lgr);
 }
 
@@ -392,13 +436,40 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 
 	spin_lock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
-		if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+		if (!lgr->is_smcd &&
+		    lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
 		    lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
 			__smc_lgr_terminate(lgr);
 	}
 	spin_unlock_bh(&smc_lgr_list.lock);
 }
 
+/* Called when SMC-D device is terminated or peer is lost */
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+{
+	struct smc_link_group *lgr, *l;
+	LIST_HEAD(lgr_free_list);
+
+	/* run common cleanup function and build free list */
+	spin_lock_bh(&smc_lgr_list.lock);
+	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+		if (lgr->is_smcd && lgr->smcd == dev &&
+		    (!peer_gid || lgr->peer_gid == peer_gid) &&
+		    !list_empty(&lgr->list)) {
+			__smc_lgr_terminate(lgr);
+			list_move(&lgr->list, &lgr_free_list);
+		}
+	}
+	spin_unlock_bh(&smc_lgr_list.lock);
+
+	/* cancel the regular free workers and actually free lgrs */
+	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
+		list_del_init(&lgr->list);
+		cancel_delayed_work_sync(&lgr->free_work);
+		smc_lgr_free(lgr);
+	}
+}
+
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
@@ -477,10 +548,30 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 	return -ENODEV;
 }
 
+static bool smcr_lgr_match(struct smc_link_group *lgr,
+			   struct smc_clc_msg_local *lcl,
+			   enum smc_lgr_role role)
+{
+	return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
+		       SMC_SYSTEMID_LEN) &&
+		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
+			SMC_GID_SIZE) &&
+		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
+			sizeof(lcl->mac)) &&
+		lgr->role == role;
+}
+
+static bool smcd_lgr_match(struct smc_link_group *lgr,
+			   struct smcd_dev *smcismdev, u64 peer_gid)
+{
+	return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
+}
+
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc,
+int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
 		    struct smc_ib_device *smcibdev, u8 ibport,
-		    struct smc_clc_msg_local *lcl, int srv_first_contact)
+		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
+		    u64 peer_gid)
 {
 	struct smc_connection *conn = &smc->conn;
 	int local_contact = SMC_FIRST_CONTACT;
@@ -502,17 +593,12 @@ int smc_conn_create(struct smc_sock *smc,
 	spin_lock_bh(&smc_lgr_list.lock);
 	list_for_each_entry(lgr, &smc_lgr_list.list, list) {
 		write_lock_bh(&lgr->conns_lock);
-		if (!memcmp(lgr->peer_systemid, lcl->id_for_peer,
-			    SMC_SYSTEMID_LEN) &&
-		    !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
-			    SMC_GID_SIZE) &&
-		    !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
-			    sizeof(lcl->mac)) &&
+		if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
+		     smcr_lgr_match(lgr, lcl, role)) &&
 		    !lgr->sync_err &&
-		    (lgr->role == role) &&
-		    (lgr->vlan_id == vlan_id) &&
-		    ((role == SMC_CLNT) ||
-		     (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
+		    lgr->vlan_id == vlan_id &&
+		    (role == SMC_CLNT ||
+		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
 			/* link group found */
 			local_contact = SMC_REUSE_CONTACT;
 			conn->lgr = lgr;
@@ -535,12 +621,13 @@ int smc_conn_create(struct smc_sock *smc,
 
 create:
 	if (local_contact == SMC_FIRST_CONTACT) {
-		rc = smc_lgr_create(smc, smcibdev, ibport,
-				    lcl->id_for_peer, vlan_id);
+		rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
+				    lcl->id_for_peer, vlan_id, smcd, peer_gid);
 		if (rc)
 			goto out;
 		smc_lgr_register_conn(conn); /* add smc conn to lgr */
-		rc = smc_link_determine_gid(conn->lgr);
+		if (!is_smcd)
+			rc = smc_link_determine_gid(conn->lgr);
 	}
 	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
 	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
@@ -609,8 +696,8 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
-static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
-					       bool is_rmb, int bufsize)
+static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
+						bool is_rmb, int bufsize)
 {
 	struct smc_buf_desc *buf_desc;
 	struct smc_link *lnk;
@@ -668,7 +755,43 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
 	return buf_desc;
 }
 
-static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
+#define SMCD_DMBE_SIZES		7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+
+static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
+						bool is_dmb, int bufsize)
+{
+	struct smc_buf_desc *buf_desc;
+	int rc;
+
+	if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
+		return ERR_PTR(-EAGAIN);
+
+	/* try to alloc a new DMB */
+	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+	if (!buf_desc)
+		return ERR_PTR(-ENOMEM);
+	if (is_dmb) {
+		rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
+		if (rc) {
+			kfree(buf_desc);
+			return ERR_PTR(-EAGAIN);
+		}
+		memset(buf_desc->cpu_addr, 0, bufsize);
+		buf_desc->len = bufsize;
+	} else {
+		buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
+					     __GFP_NOWARN | __GFP_NORETRY |
+					     __GFP_NOMEMALLOC);
+		if (!buf_desc->cpu_addr) {
+			kfree(buf_desc);
+			return ERR_PTR(-EAGAIN);
+		}
+		buf_desc->len = bufsize;
+	}
+	return buf_desc;
+}
+
+static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 {
 	struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
 	struct smc_connection *conn = &smc->conn;
@@ -706,7 +829,11 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 			break; /* found reusable slot */
 		}
 
-		buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
+		if (is_smcd)
+			buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
+		else
+			buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
+
 		if (PTR_ERR(buf_desc) == -ENOMEM)
 			break;
 		if (IS_ERR(buf_desc))
@@ -728,6 +855,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 		smc->sk.sk_rcvbuf = bufsize * 2;
 		atomic_set(&conn->bytes_to_rcv, 0);
 		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
+		if (is_smcd)
+			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
 	} else {
 		conn->sndbuf_desc = buf_desc;
 		smc->sk.sk_sndbuf = bufsize * 2;
@@ -740,6 +869,8 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!conn->lgr || conn->lgr->is_smcd)
+		return;
 	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 			       conn->sndbuf_desc, DMA_TO_DEVICE);
 }
@@ -748,6 +879,8 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!conn->lgr || conn->lgr->is_smcd)
+		return;
 	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 				  conn->sndbuf_desc, DMA_TO_DEVICE);
 }
@@ -756,6 +889,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!conn->lgr || conn->lgr->is_smcd)
+		return;
 	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 			       conn->rmb_desc, DMA_FROM_DEVICE);
 }
@@ -764,6 +899,8 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!conn->lgr || conn->lgr->is_smcd)
+		return;
 	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 				  conn->rmb_desc, DMA_FROM_DEVICE);
 }
@@ -774,16 +911,16 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
  * extra RMB for every connection in a link group
  */
-int smc_buf_create(struct smc_sock *smc)
+int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 {
 	int rc;
 
 	/* create send buffer */
-	rc = __smc_buf_create(smc, false);
+	rc = __smc_buf_create(smc, is_smcd, false);
 	if (rc)
 		return rc;
 	/* create rmb */
-	rc = __smc_buf_create(smc, true);
+	rc = __smc_buf_create(smc, is_smcd, true);
 	if (rc)
 		smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
 	return rc;
@@ -865,7 +1002,8 @@ void smc_core_exit(void)
 	spin_unlock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
 		list_del_init(&lgr->list);
-		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+		if (!lgr->is_smcd)
+			smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 		cancel_delayed_work_sync(&lgr->free_work);
 		smc_lgr_free(lgr); /* free link group */
 	}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 93cb3523bf50..cd9268a9570e 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -124,15 +124,28 @@ struct smc_buf_desc {
 	void			*cpu_addr;	/* virtual address of buffer */
 	struct page		*pages;
 	int			len;		/* length of buffer */
-	struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
-	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
-						/* for rmb only: memory region
-						 * incl. rkey provided to peer
-						 */
-	u32			order;		/* allocation order */
 	u32			used;		/* currently used / unused */
 	u8			reused	: 1;	/* new created / reused */
 	u8			regerr	: 1;	/* err during registration */
+	union {
+		struct { /* SMC-R */
+			struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];
+						/* virtual buffer */
+			struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
+						/* for rmb only: memory region
+						 * incl. rkey provided to peer
+						 */
+			u32			order;	/* allocation order */
+		};
+		struct { /* SMC-D */
+			unsigned short		sba_idx;
+						/* SBA index number */
+			u64			token;
+						/* DMB token number */
+			dma_addr_t		dma_addr;
+						/* DMA address */
+		};
+	};
 };
 
 struct smc_rtoken {				/* address/key of remote RMB */
@@ -148,12 +161,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
  * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
  */
 
+struct smcd_dev;
+
 struct smc_link_group {
 	struct list_head	list;
-	enum smc_lgr_role	role;		/* client or server */
-	struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX];	/* smc link */
-	char			peer_systemid[SMC_SYSTEMID_LEN];
-						/* unique system_id of peer */
 	struct rb_root		conns_all;	/* connection tree */
 	rwlock_t		conns_lock;	/* protects conns_all */
 	unsigned int		conns_num;	/* current # of connections */
@@ -163,17 +174,35 @@ struct smc_link_group {
 	rwlock_t		sndbufs_lock;	/* protects tx buffers */
 	struct list_head	rmbs[SMC_RMBE_SIZES];	/* rx buffers */
 	rwlock_t		rmbs_lock;	/* protects rx buffers */
-	struct smc_rtoken	rtokens[SMC_RMBS_PER_LGR_MAX]
-				       [SMC_LINKS_PER_LGR_MAX];
-						/* remote addr/key pairs */
-	unsigned long		rtokens_used_mask[BITS_TO_LONGS(
-							SMC_RMBS_PER_LGR_MAX)];
-						/* used rtoken elements */
 
 	u8			id[SMC_LGR_ID_SIZE];	/* unique lgr id */
 	struct delayed_work	free_work;	/* delayed freeing of an lgr */
 	u8			sync_err : 1;	/* lgr no longer fits to peer */
 	u8			terminating : 1;/* lgr is terminating */
+
+	bool			is_smcd;	/* SMC-R or SMC-D */
+	union {
+		struct { /* SMC-R */
+			enum smc_lgr_role	role;
+						/* client or server */
+			struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX];
+						/* smc link */
+			char			peer_systemid[SMC_SYSTEMID_LEN];
+						/* unique system_id of peer */
+			struct smc_rtoken	rtokens[SMC_RMBS_PER_LGR_MAX]
+						[SMC_LINKS_PER_LGR_MAX];
+						/* remote addr/key pairs */
+			unsigned long		rtokens_used_mask[BITS_TO_LONGS
+							(SMC_RMBS_PER_LGR_MAX)];
+						/* used rtoken elements */
+		};
+		struct { /* SMC-D */
+			u64			peer_gid;
+						/* Peer GID (remote) */
+			struct smcd_dev		*smcd;
+						/* ISM device for VLAN reg. */
+		};
+	};
 };
 
 /* Find the connection associated with the given alert token in the link group.
@@ -217,7 +246,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_buf_create(struct smc_sock *smc);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+int smc_buf_create(struct smc_sock *smc, bool is_smcd);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
@@ -227,9 +257,12 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
+
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc,
+int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
 		    struct smc_ib_device *smcibdev, u8 ibport,
-		    struct smc_clc_msg_local *lcl, int srv_first_contact);
+		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
+		    u64 peer_gid);
+void smcd_conn_free(struct smc_connection *conn);
 void smc_core_exit(void);
 #endif
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 839354402215..64ce107c24d9 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -136,7 +136,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 			goto errout;
 	}
 
-	if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+	if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
+	    (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
 	    !list_empty(&smc->conn.lgr->list)) {
 		struct smc_diag_lgrinfo linfo = {
 			.role = smc->conn.lgr->role,
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
new file mode 100644
index 000000000000..ca1ce42fd49f
--- /dev/null
+++ b/net/smc/smc_ism.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Shared Memory Communications Direct over ISM devices (SMC-D)
+ *
+ * Functions for ISM device.
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+
+#include "smc.h"
+#include "smc_core.h"
+#include "smc_ism.h"
+
+struct smcd_dev_list smcd_dev_list = {
+	.list = LIST_HEAD_INIT(smcd_dev_list.list),
+	.lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock)
+};
+
+/* Test if an ISM communication is possible. */
+int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
+{
+	return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
+					   vlan_id);
+}
+
+int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
+		  void *data, size_t len)
+{
+	int rc;
+
+	rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal,
+				  pos->offset, data, len);
+
+	return rc < 0 ? rc : 0;
+}
+
+/* Set a connection using this DMBE. */
+void smc_ism_set_conn(struct smc_connection *conn)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
+	conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn;
+	spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
+}
+
+/* Unset a connection using this DMBE. */
+void smc_ism_unset_conn(struct smc_connection *conn)
+{
+	unsigned long flags;
+
+	if (!conn->rmb_desc)
+		return;
+
+	spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
+	conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL;
+	spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
+}
+
+/* Register a VLAN identifier with the ISM device. Use a reference count
+ * and add a VLAN identifier only when the first DMB using this VLAN is
+ * registered.
+ */
+int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
+{
+	struct smc_ism_vlanid *new_vlan, *vlan;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!vlanid)			/* No valid vlan id */
+		return -EINVAL;
+
+	/* create new vlan entry, in case we need it */
+	new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
+	if (!new_vlan)
+		return -ENOMEM;
+	new_vlan->vlanid = vlanid;
+	refcount_set(&new_vlan->refcnt, 1);
+
+	/* if there is an existing entry, increase count and return */
+	spin_lock_irqsave(&smcd->lock, flags);
+	list_for_each_entry(vlan, &smcd->vlan, list) {
+		if (vlan->vlanid == vlanid) {
+			refcount_inc(&vlan->refcnt);
+			kfree(new_vlan);
+			goto out;
+		}
+	}
+
+	/* no existing entry found.
+	 * add new entry to device; might fail, e.g., if HW limit reached
+	 */
+	if (smcd->ops->add_vlan_id(smcd, vlanid)) {
+		kfree(new_vlan);
+		rc = -EIO;
+		goto out;
+	}
+	list_add_tail(&new_vlan->list, &smcd->vlan);
+out:
+	spin_unlock_irqrestore(&smcd->lock, flags);
+	return rc;
+}
+
+/* Unregister a VLAN identifier with the ISM device. Use a reference count
+ * and remove a VLAN identifier only when the last DMB using this VLAN is
+ * unregistered.
+ */
+int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
+{
+	struct smc_ism_vlanid *vlan;
+	unsigned long flags;
+	bool found = false;
+	int rc = 0;
+
+	if (!vlanid)			/* No valid vlan id */
+		return -EINVAL;
+
+	spin_lock_irqsave(&smcd->lock, flags);
+	list_for_each_entry(vlan, &smcd->vlan, list) {
+		if (vlan->vlanid == vlanid) {
+			if (!refcount_dec_and_test(&vlan->refcnt))
+				goto out;
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		rc = -ENOENT;
+		goto out;		/* VLAN id not in table */
+	}
+
+	/* Found and the last reference just gone */
+	if (smcd->ops->del_vlan_id(smcd, vlanid))
+		rc = -EIO;
+	list_del(&vlan->list);
+	kfree(vlan);
+out:
+	spin_unlock_irqrestore(&smcd->lock, flags);
+	return rc;
+}
+
+int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
+{
+	struct smcd_dmb dmb;
+
+	memset(&dmb, 0, sizeof(dmb));
+	dmb.dmb_tok = dmb_desc->token;
+	dmb.sba_idx = dmb_desc->sba_idx;
+	dmb.cpu_addr = dmb_desc->cpu_addr;
+	dmb.dma_addr = dmb_desc->dma_addr;
+	dmb.dmb_len = dmb_desc->len;
+	return smcd->ops->unregister_dmb(smcd, &dmb);
+}
+
+int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
+			 struct smc_buf_desc *dmb_desc)
+{
+	struct smcd_dmb dmb;
+	int rc;
+
+	memset(&dmb, 0, sizeof(dmb));
+	dmb.dmb_len = dmb_len;
+	dmb.sba_idx = dmb_desc->sba_idx;
+	dmb.vlan_id = lgr->vlan_id;
+	dmb.rgid = lgr->peer_gid;
+	rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb);
+	if (!rc) {
+		dmb_desc->sba_idx = dmb.sba_idx;
+		dmb_desc->token = dmb.dmb_tok;
+		dmb_desc->cpu_addr = dmb.cpu_addr;
+		dmb_desc->dma_addr = dmb.dma_addr;
+		dmb_desc->len = dmb.dmb_len;
+	}
+	return rc;
+}
+
+struct smc_ism_event_work {
+	struct work_struct work;
+	struct smcd_dev *smcd;
+	struct smcd_event event;
+};
+
+/* worker for SMC-D events */
+static void smc_ism_event_work(struct work_struct *work)
+{
+	struct smc_ism_event_work *wrk =
+		container_of(work, struct smc_ism_event_work, work);
+
+	switch (wrk->event.type) {
+	case ISM_EVENT_GID:	/* GID event, token is peer GID */
+		smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+		break;
+	case ISM_EVENT_DMB:
+		break;
+	}
+	kfree(wrk);
+}
+
+static void smcd_release(struct device *dev)
+{
+	struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev);
+
+	kfree(smcd->conn);
+	kfree(smcd);
+}
+
+struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
+				const struct smcd_ops *ops, int max_dmbs)
+{
+	struct smcd_dev *smcd;
+
+	smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
+	if (!smcd)
+		return NULL;
+	smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
+			     GFP_KERNEL);
+	if (!smcd->conn) {
+		kfree(smcd);
+		return NULL;
+	}
+
+	smcd->dev.parent = parent;
+	smcd->dev.release = smcd_release;
+	device_initialize(&smcd->dev);
+	dev_set_name(&smcd->dev, name);
+	smcd->ops = ops;
+
+	spin_lock_init(&smcd->lock);
+	INIT_LIST_HEAD(&smcd->vlan);
+	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
+						 WQ_MEM_RECLAIM, name);
+	return smcd;
+}
+EXPORT_SYMBOL_GPL(smcd_alloc_dev);
+
+int smcd_register_dev(struct smcd_dev *smcd)
+{
+	spin_lock(&smcd_dev_list.lock);
+	list_add_tail(&smcd->list, &smcd_dev_list.list);
+	spin_unlock(&smcd_dev_list.lock);
+
+	return device_add(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_register_dev);
+
+void smcd_unregister_dev(struct smcd_dev *smcd)
+{
+	spin_lock(&smcd_dev_list.lock);
+	list_del(&smcd->list);
+	spin_unlock(&smcd_dev_list.lock);
+	flush_workqueue(smcd->event_wq);
+	destroy_workqueue(smcd->event_wq);
+	smc_smcd_terminate(smcd, 0);
+
+	device_del(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_unregister_dev);
+
+void smcd_free_dev(struct smcd_dev *smcd)
+{
+	put_device(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_free_dev);
+
+/* SMCD Device event handler. Called from ISM device interrupt handler.
+ * Parameters are smcd device pointer,
+ * - event->type (0 --> DMB, 1 --> GID),
+ * - event->code (event code),
+ * - event->tok (either DMB token when event type 0, or GID when event type 1)
+ * - event->time (time of day)
+ * - event->info (debug info).
+ *
+ * Context:
+ * - Function called in IRQ context from ISM device driver event handler.
+ */
+void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
+{
+	struct smc_ism_event_work *wrk;
+
+	/* copy event to event work queue, and let it be handled there */
+	wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
+	if (!wrk)
+		return;
+	INIT_WORK(&wrk->work, smc_ism_event_work);
+	wrk->smcd = smcd;
+	wrk->event = *event;
+	queue_work(smcd->event_wq, &wrk->work);
+}
+EXPORT_SYMBOL_GPL(smcd_handle_event);
+
+/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
+ * Parameters are smcd device pointer and DMB number. Find the connection and
+ * schedule the tasklet for this connection.
+ *
+ * Context:
+ * - Function called in IRQ context from ISM device driver IRQ handler.
+ */
+void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
+{
+}
+EXPORT_SYMBOL_GPL(smcd_handle_irq);
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
new file mode 100644
index 000000000000..aee45b860b79
--- /dev/null
+++ b/net/smc/smc_ism.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Shared Memory Communications Direct over ISM devices (SMC-D)
+ *
+ * SMC-D ISM device structure definitions.
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#ifndef SMCD_ISM_H
+#define SMCD_ISM_H
+
+#include <linux/uio.h>
+
+#include "smc.h"
+
+struct smcd_dev_list {	/* List of SMCD devices */
+	struct list_head list;
+	spinlock_t lock;	/* Protects list of devices */
+};
+
+extern struct smcd_dev_list	smcd_dev_list; /* list of smcd devices */
+
+struct smc_ism_vlanid {			/* VLAN id set on ISM device */
+	struct list_head list;
+	unsigned short vlanid;		/* Vlan id */
+	refcount_t refcnt;		/* Reference count */
+};
+
+struct smc_ism_position {	/* ISM device position to write to */
+	u64 token;		/* Token of DMB */
+	u32 offset;		/* Offset into DMBE */
+	u8 index;		/* Index of DMBE */
+	u8 signal;		/* Generate interrupt on owner side */
+};
+
+struct smcd_dev;
+
+int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
+void smc_ism_set_conn(struct smc_connection *conn);
+void smc_ism_unset_conn(struct smc_connection *conn);
+int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id);
+int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
+int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
+			 struct smc_buf_desc *dmb_desc);
+int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
+int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
+		  void *data, size_t len);
+#endif
-- 
cgit v1.2.3


From 1619f770589a183af56f248de261534b255122de Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.ibm.com>
Date: Thu, 28 Jun 2018 19:05:08 +0200
Subject: net/smc: add pnetid support for SMC-D and ISM

SMC-D relies on PNETIDs to find usable SMC-D/ISM devices for a SMC
connection. This patch adds SMC-D/ISM support to the current PNETID
implementation.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h  |  1 +
 net/smc/smc_ism.c  |  2 ++
 net/smc/smc_pnet.c | 41 +++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_pnet.h |  2 ++
 4 files changed, 46 insertions(+)

(limited to 'include')

diff --git a/include/net/smc.h b/include/net/smc.h
index 824a7af8d654..9ef49f8b1002 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -73,6 +73,7 @@ struct smcd_dev {
 	struct smc_connection **conn;
 	struct list_head vlan;
 	struct workqueue_struct *event_wq;
+	u8 pnetid[SMC_MAX_PNETID_LEN];
 };
 
 struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index ca1ce42fd49f..f44e4dff244a 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -13,6 +13,7 @@
 #include "smc.h"
 #include "smc_core.h"
 #include "smc_ism.h"
+#include "smc_pnet.h"
 
 struct smcd_dev_list smcd_dev_list = {
 	.list = LIST_HEAD_INIT(smcd_dev_list.list),
@@ -227,6 +228,7 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 	device_initialize(&smcd->dev);
 	dev_set_name(&smcd->dev, name);
 	smcd->ops = ops;
+	smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
 
 	spin_lock_init(&smcd->lock);
 	INIT_LIST_HEAD(&smcd->vlan);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index cdc6e23b6ce1..1b6c066d3495 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -22,6 +22,7 @@
 
 #include "smc_pnet.h"
 #include "smc_ib.h"
+#include "smc_ism.h"
 
 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
 	[SMC_PNETID_NAME] = {
@@ -564,6 +565,27 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
 	spin_unlock(&smc_ib_devices.lock);
 }
 
+static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
+					struct smcd_dev **smcismdev)
+{
+	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+	struct smcd_dev *ismdev;
+
+	ndev = pnet_find_base_ndev(ndev);
+	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+				   ndev_pnetid))
+		return; /* pnetid could not be determined */
+
+	spin_lock(&smcd_dev_list.lock);
+	list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
+		if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
+			*smcismdev = ismdev;
+			break;
+		}
+	}
+	spin_unlock(&smcd_dev_list.lock);
+}
+
 /* Lookup of coupled ib_device via SMC pnet table */
 static void smc_pnet_find_roce_by_table(struct net_device *netdev,
 					struct smc_ib_device **smcibdev,
@@ -615,3 +637,22 @@ out_rel:
 out:
 	return;
 }
+
+void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
+{
+	struct dst_entry *dst = sk_dst_get(sk);
+
+	*smcismdev = NULL;
+	if (!dst)
+		goto out;
+	if (!dst->dev)
+		goto out_rel;
+
+	/* if possible, lookup via hardware-defined pnetid */
+	smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
+
+out_rel:
+	dst_release(dst);
+out:
+	return;
+}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index ad4455cde9e7..1e94fd4df7bc 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -17,6 +17,7 @@
 #endif
 
 struct smc_ib_device;
+struct smcd_dev;
 
 static inline int smc_pnetid_by_dev_port(struct device *dev,
 					 unsigned short port, u8 *pnetid)
@@ -33,5 +34,6 @@ void smc_pnet_exit(void);
 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
 void smc_pnet_find_roce_resource(struct sock *sk,
 				 struct smc_ib_device **smcibdev, u8 *ibport);
+void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
 
 #endif
-- 
cgit v1.2.3


From 4b1b7d3b30a6d32ac1a1dcede284e76ef8a8542d Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.ibm.com>
Date: Thu, 28 Jun 2018 19:05:12 +0200
Subject: net/smc: add SMC-D diag support

This patch adds diag support for SMC-D.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Suggested-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc_diag.h | 10 ++++++++++
 net/smc/smc_diag.c            | 15 +++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 0ae5d4685ba3..92be255e534c 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -35,6 +35,7 @@ enum {
 	SMC_DIAG_CONNINFO,
 	SMC_DIAG_LGRINFO,
 	SMC_DIAG_SHUTDOWN,
+	SMC_DIAG_DMBINFO,
 	__SMC_DIAG_MAX,
 };
 
@@ -83,4 +84,13 @@ struct smc_diag_lgrinfo {
 	struct smc_diag_linkinfo	lnk[1];
 	__u8				role;
 };
+
+struct smcd_diag_dmbinfo {		/* SMC-D Socket internals */
+	__u32 linkid;			/* Link identifier */
+	__u64 peer_gid;			/* Peer GID */
+	__u64 my_gid;			/* My GID */
+	__u64 token;			/* Token of DMB */
+	__u64 peer_token;		/* Token of remote DMBE */
+};
+
 #endif /* _UAPI_SMC_DIAG_H_ */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 64ce107c24d9..6d83eef1b743 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -156,6 +156,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 		if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
 			goto errout;
 	}
+	if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
+	    (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
+	    !list_empty(&smc->conn.lgr->list)) {
+		struct smc_connection *conn = &smc->conn;
+		struct smcd_diag_dmbinfo dinfo = {
+			.linkid = *((u32 *)conn->lgr->id),
+			.peer_gid = conn->lgr->peer_gid,
+			.my_gid = conn->lgr->smcd->local_gid,
+			.token = conn->rmb_desc->token,
+			.peer_token = conn->peer_token
+		};
+
+		if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0)
+			goto errout;
+	}
 
 	nlmsg_end(skb, nlh);
 	return 0;
-- 
cgit v1.2.3


From a1be5a20f137bdf436bab86c18998229908ce951 Mon Sep 17 00:00:00 2001
From: GhantaKrishnamurthy MohanKrishna
 <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Date: Fri, 29 Jun 2018 13:26:18 +0200
Subject: tipc: extend sock diag for group communication

This commit extends the existing TIPC socket diagnostics framework
for information related to TIPC group communication.

Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: GhantaKrishnamurthy MohanKrishna <mohan.krishna.ghanta.krishnamurthy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h | 14 ++++++++++++++
 net/tipc/group.c                  | 32 ++++++++++++++++++++++++++++++++
 net/tipc/group.h                  |  1 +
 net/tipc/socket.c                 |  5 +++++
 4 files changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 85c11982c89b..0ebe02ef1a86 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -121,6 +121,7 @@ enum {
 	TIPC_NLA_SOCK_TIPC_STATE,	/* u32 */
 	TIPC_NLA_SOCK_COOKIE,		/* u64 */
 	TIPC_NLA_SOCK_PAD,		/* flag */
+	TIPC_NLA_SOCK_GROUP,		/* nest */
 
 	__TIPC_NLA_SOCK_MAX,
 	TIPC_NLA_SOCK_MAX = __TIPC_NLA_SOCK_MAX - 1
@@ -233,6 +234,19 @@ enum {
 	TIPC_NLA_MON_PEER_MAX = __TIPC_NLA_MON_PEER_MAX - 1
 };
 
+/* Nest, socket group info */
+enum {
+	TIPC_NLA_SOCK_GROUP_ID,			/* u32 */
+	TIPC_NLA_SOCK_GROUP_OPEN,		/* flag */
+	TIPC_NLA_SOCK_GROUP_NODE_SCOPE,		/* flag */
+	TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE,	/* flag */
+	TIPC_NLA_SOCK_GROUP_INSTANCE,		/* u32 */
+	TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT,	/* u32 */
+
+	__TIPC_NLA_SOCK_GROUP_MAX,
+	TIPC_NLA_SOCK_GROUP_MAX = __TIPC_NLA_SOCK_GROUP_MAX - 1
+};
+
 /* Nest, connection info */
 enum {
 	TIPC_NLA_CON_UNSPEC,
diff --git a/net/tipc/group.c b/net/tipc/group.c
index d7a7befeddd4..cbe39e8db39c 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -918,3 +918,35 @@ void tipc_group_member_evt(struct tipc_group *grp,
 	}
 	*sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
 }
+
+int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb)
+{
+	struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP);
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID,
+			grp->type) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE,
+			grp->instance) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT,
+			grp->bc_snd_nxt))
+		goto group_msg_cancel;
+
+	if (grp->scope == TIPC_NODE_SCOPE)
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_NODE_SCOPE))
+			goto group_msg_cancel;
+
+	if (grp->scope == TIPC_CLUSTER_SCOPE)
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE))
+			goto group_msg_cancel;
+
+	if (*grp->open)
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_OPEN))
+			goto group_msg_cancel;
+
+	nla_nest_end(skb, group);
+	return 0;
+
+group_msg_cancel:
+	nla_nest_cancel(skb, group);
+	return -1;
+}
diff --git a/net/tipc/group.h b/net/tipc/group.h
index 5996af6e9f1d..76b4e5a7b39d 100644
--- a/net/tipc/group.h
+++ b/net/tipc/group.h
@@ -72,4 +72,5 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
 			       u32 port, struct sk_buff_head *xmitq);
 u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
 void tipc_group_update_member(struct tipc_member *m, int len);
+int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb);
 #endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 14a5d055717d..840dd995f631 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3316,6 +3316,11 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
 		goto stat_msg_cancel;
 
 	nla_nest_end(skb, stat);
+
+	if (tsk->group)
+		if (tipc_group_fill_sock_diag(tsk->group, skb))
+			goto stat_msg_cancel;
+
 	nla_nest_end(skb, attrs);
 
 	return 0;
-- 
cgit v1.2.3


From 7ce3f912ae0a79e5d738a3ae1f158b281973e849 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Sat, 30 Jun 2018 11:24:24 -0400
Subject: PCI: Enable PASID only if entire path supports End-End TLP prefixes

A PCIe endpoint carries the process address space identifier (PASID) in
the TLP prefix as part of the memory read/write transaction. The address
information in the TLP is relevant only for a given PASID context.

An IOMMU takes PASID value and the address information from the
TLP to look up the physical address in the system.

PASID is an End-End TLP Prefix (PCIe r4.0, sec 6.20).  Sec 2.2.10.2 says

  It is an error to receive a TLP with an End-End TLP Prefix by a
  Receiver that does not support End-End TLP Prefixes. A TLP in
  violation of this rule is handled as a Malformed TLP. This is a
  reported error associated with the Receiving Port (see Section 6.2).

Prevent error condition by proactively requiring End-End TLP prefix to be
supported on the entire data path between the endpoint and the root port
before enabling PASID.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/ats.c             |  3 +++
 drivers/pci/probe.c           | 24 ++++++++++++++++++++++++
 include/linux/pci.h           |  1 +
 include/uapi/linux/pci_regs.h |  1 +
 4 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 4923a2a8e14b..5b78f3b1b918 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -273,6 +273,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (WARN_ON(pdev->pasid_enabled))
 		return -EBUSY;
 
+	if (!pdev->eetlp_prefix_path)
+		return -EINVAL;
+
 	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
 	if (!pos)
 		return -EINVAL;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e32de4b..4c35c2909d57 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2042,6 +2042,29 @@ static void pci_configure_ltr(struct pci_dev *dev)
 #endif
 }
 
+static void pci_configure_eetlp_prefix(struct pci_dev *dev)
+{
+#ifdef CONFIG_PCI_PASID
+	struct pci_dev *bridge;
+	u32 cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap);
+	if (!(cap & PCI_EXP_DEVCAP2_EE_PREFIX))
+		return;
+
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
+		dev->eetlp_prefix_path = 1;
+	else {
+		bridge = pci_upstream_bridge(dev);
+		if (bridge && bridge->eetlp_prefix_path)
+			dev->eetlp_prefix_path = 1;
+	}
+#endif
+}
+
 static void pci_configure_device(struct pci_dev *dev)
 {
 	struct hotplug_params hpp;
@@ -2051,6 +2074,7 @@ static void pci_configure_device(struct pci_dev *dev)
 	pci_configure_extended_tags(dev, NULL);
 	pci_configure_relaxed_ordering(dev);
 	pci_configure_ltr(dev);
+	pci_configure_eetlp_prefix(dev);
 
 	memset(&hpp, 0, sizeof(hpp));
 	ret = pci_get_hp_params(dev, &hpp);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..6ba818449095 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -350,6 +350,7 @@ struct pci_dev {
 	unsigned int	ltr_path:1;	/* Latency Tolerance Reporting
 					   supported from root to here */
 #endif
+	unsigned int	eetlp_prefix_path:1;	/* End-to-End TLP Prefix */
 
 	pci_channel_state_t error_state;	/* Current connectivity state */
 	struct device	dev;			/* Generic device interface */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 4da87e2ef8a8..04d7480db714 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -636,6 +636,7 @@
 #define  PCI_EXP_DEVCAP2_OBFF_MASK	0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG	0x00040000 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE	0x00080000 /* Re-use WAKE# for OBFF */
+#define PCI_EXP_DEVCAP2_EE_PREFIX	0x00200000 /* End-End TLP Prefix */
 #define PCI_EXP_DEVCTL2		40	/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT	0x000f	/* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS	0x0010	/* Completion Timeout Disable */
-- 
cgit v1.2.3


From bb29648102335586e9a66289a1d98a0cb392b6e5 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Jun 2018 10:22:38 -0700
Subject: crypto: vmac - separate tfm and request context

syzbot reported a crash in vmac_final() when multiple threads
concurrently use the same "vmac(aes)" transform through AF_ALG.  The bug
is pretty fundamental: the VMAC template doesn't separate per-request
state from per-tfm (per-key) state like the other hash algorithms do,
but rather stores it all in the tfm context.  That's wrong.

Also, vmac_final() incorrectly zeroes most of the state including the
derived keys and cached pseudorandom pad.  Therefore, only the first
VMAC invocation with a given key calculates the correct digest.

Fix these bugs by splitting the per-tfm state from the per-request state
and using the proper init/update/final sequencing for requests.

Reproducer for the crash:

    #include <linux/if_alg.h>
    #include <sys/socket.h>
    #include <unistd.h>

    int main()
    {
            int fd;
            struct sockaddr_alg addr = {
                    .salg_type = "hash",
                    .salg_name = "vmac(aes)",
            };
            char buf[256] = { 0 };

            fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
            bind(fd, (void *)&addr, sizeof(addr));
            setsockopt(fd, SOL_ALG, ALG_SET_KEY, buf, 16);
            fork();
            fd = accept(fd, NULL, NULL);
            for (;;)
                    write(fd, buf, 256);
    }

The immediate cause of the crash is that vmac_ctx_t.partial_size exceeds
VMAC_NHBYTES, causing vmac_final() to memset() a negative length.

Reported-by: syzbot+264bca3a6e8d645550d3@syzkaller.appspotmail.com
Fixes: f1939f7c5645 ("crypto: vmac - New hash algorithm for intel_txt support")
Cc: <stable@vger.kernel.org> # v2.6.32+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/vmac.c         | 408 ++++++++++++++++++++++----------------------------
 include/crypto/vmac.h |  63 --------
 2 files changed, 181 insertions(+), 290 deletions(-)
 delete mode 100644 include/crypto/vmac.h

(limited to 'include')

diff --git a/crypto/vmac.c b/crypto/vmac.c
index 3034454a3713..bb2fc787d615 100644
--- a/crypto/vmac.c
+++ b/crypto/vmac.c
@@ -1,6 +1,10 @@
 /*
- * Modified to interface to the Linux kernel
+ * VMAC: Message Authentication Code using Universal Hashing
+ *
+ * Reference: https://tools.ietf.org/html/draft-krovetz-vmac-01
+ *
  * Copyright (c) 2009, Intel Corporation.
+ * Copyright (c) 2018, Google Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -16,14 +20,15 @@
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
 
-/* --------------------------------------------------------------------------
- * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
- * This implementation is herby placed in the public domain.
- * The authors offers no warranty. Use at your own risk.
- * Please send bug reports to the authors.
- * Last modified: 17 APR 08, 1700 PDT
- * ----------------------------------------------------------------------- */
+/*
+ * Derived from:
+ *	VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
+ *	This implementation is herby placed in the public domain.
+ *	The authors offers no warranty. Use at your own risk.
+ *	Last modified: 17 APR 08, 1700 PDT
+ */
 
+#include <asm/unaligned.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/crypto.h>
@@ -31,9 +36,35 @@
 #include <linux/scatterlist.h>
 #include <asm/byteorder.h>
 #include <crypto/scatterwalk.h>
-#include <crypto/vmac.h>
 #include <crypto/internal/hash.h>
 
+/*
+ * User definable settings.
+ */
+#define VMAC_TAG_LEN	64
+#define VMAC_KEY_SIZE	128/* Must be 128, 192 or 256			*/
+#define VMAC_KEY_LEN	(VMAC_KEY_SIZE/8)
+#define VMAC_NHBYTES	128/* Must 2^i for any 3 < i < 13 Standard = 128*/
+
+/* per-transform (per-key) context */
+struct vmac_tfm_ctx {
+	struct crypto_cipher *cipher;
+	u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
+	u64 polykey[2*VMAC_TAG_LEN/64];
+	u64 l3key[2*VMAC_TAG_LEN/64];
+};
+
+/* per-request context */
+struct vmac_desc_ctx {
+	union {
+		u8 partial[VMAC_NHBYTES];	/* partial block */
+		__le64 partial_words[VMAC_NHBYTES / 8];
+	};
+	unsigned int partial_size;	/* size of the partial block */
+	bool first_block_processed;
+	u64 polytmp[2*VMAC_TAG_LEN/64];	/* running total of L2-hash */
+};
+
 /*
  * Constants and masks
  */
@@ -318,13 +349,6 @@ static void poly_step_func(u64 *ahi, u64 *alo,
 	} while (0)
 #endif
 
-static void vhash_abort(struct vmac_ctx *ctx)
-{
-	ctx->polytmp[0] = ctx->polykey[0] ;
-	ctx->polytmp[1] = ctx->polykey[1] ;
-	ctx->first_block_processed = 0;
-}
-
 static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 {
 	u64 rh, rl, t, z = 0;
@@ -364,280 +388,209 @@ static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
 	return rl;
 }
 
-static void vhash_update(const unsigned char *m,
-			unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */
-			struct vmac_ctx *ctx)
+/* L1 and L2-hash one or more VMAC_NHBYTES-byte blocks */
+static void vhash_blocks(const struct vmac_tfm_ctx *tctx,
+			 struct vmac_desc_ctx *dctx,
+			 const __le64 *mptr, unsigned int blocks)
 {
-	u64 rh, rl, *mptr;
-	const u64 *kptr = (u64 *)ctx->nhkey;
-	int i;
-	u64 ch, cl;
-	u64 pkh = ctx->polykey[0];
-	u64 pkl = ctx->polykey[1];
-
-	if (!mbytes)
-		return;
-
-	BUG_ON(mbytes % VMAC_NHBYTES);
-
-	mptr = (u64 *)m;
-	i = mbytes / VMAC_NHBYTES;  /* Must be non-zero */
-
-	ch = ctx->polytmp[0];
-	cl = ctx->polytmp[1];
-
-	if (!ctx->first_block_processed) {
-		ctx->first_block_processed = 1;
+	const u64 *kptr = tctx->nhkey;
+	const u64 pkh = tctx->polykey[0];
+	const u64 pkl = tctx->polykey[1];
+	u64 ch = dctx->polytmp[0];
+	u64 cl = dctx->polytmp[1];
+	u64 rh, rl;
+
+	if (!dctx->first_block_processed) {
+		dctx->first_block_processed = true;
 		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
 		rh &= m62;
 		ADD128(ch, cl, rh, rl);
 		mptr += (VMAC_NHBYTES/sizeof(u64));
-		i--;
+		blocks--;
 	}
 
-	while (i--) {
+	while (blocks--) {
 		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
 		rh &= m62;
 		poly_step(ch, cl, pkh, pkl, rh, rl);
 		mptr += (VMAC_NHBYTES/sizeof(u64));
 	}
 
-	ctx->polytmp[0] = ch;
-	ctx->polytmp[1] = cl;
+	dctx->polytmp[0] = ch;
+	dctx->polytmp[1] = cl;
 }
 
-static u64 vhash(unsigned char m[], unsigned int mbytes,
-			u64 *tagl, struct vmac_ctx *ctx)
+static int vmac_setkey(struct crypto_shash *tfm,
+		       const u8 *key, unsigned int keylen)
 {
-	u64 rh, rl, *mptr;
-	const u64 *kptr = (u64 *)ctx->nhkey;
-	int i, remaining;
-	u64 ch, cl;
-	u64 pkh = ctx->polykey[0];
-	u64 pkl = ctx->polykey[1];
-
-	mptr = (u64 *)m;
-	i = mbytes / VMAC_NHBYTES;
-	remaining = mbytes % VMAC_NHBYTES;
-
-	if (ctx->first_block_processed) {
-		ch = ctx->polytmp[0];
-		cl = ctx->polytmp[1];
-	} else if (i) {
-		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, ch, cl);
-		ch &= m62;
-		ADD128(ch, cl, pkh, pkl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-		i--;
-	} else if (remaining) {
-		nh_16(mptr, kptr, 2*((remaining+15)/16), ch, cl);
-		ch &= m62;
-		ADD128(ch, cl, pkh, pkl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-		goto do_l3;
-	} else {/* Empty String */
-		ch = pkh; cl = pkl;
-		goto do_l3;
-	}
-
-	while (i--) {
-		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
-		rh &= m62;
-		poly_step(ch, cl, pkh, pkl, rh, rl);
-		mptr += (VMAC_NHBYTES/sizeof(u64));
-	}
-	if (remaining) {
-		nh_16(mptr, kptr, 2*((remaining+15)/16), rh, rl);
-		rh &= m62;
-		poly_step(ch, cl, pkh, pkl, rh, rl);
-	}
-
-do_l3:
-	vhash_abort(ctx);
-	remaining *= 8;
-	return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining);
-}
+	struct vmac_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+	__be64 out[2];
+	u8 in[16] = { 0 };
+	unsigned int i;
+	int err;
 
-static u64 vmac(unsigned char m[], unsigned int mbytes,
-			const unsigned char n[16], u64 *tagl,
-			struct vmac_ctx_t *ctx)
-{
-	u64 *in_n, *out_p;
-	u64 p, h;
-	int i;
-
-	in_n = ctx->__vmac_ctx.cached_nonce;
-	out_p = ctx->__vmac_ctx.cached_aes;
-
-	i = n[15] & 1;
-	if ((*(u64 *)(n+8) != in_n[1]) || (*(u64 *)(n) != in_n[0])) {
-		in_n[0] = *(u64 *)(n);
-		in_n[1] = *(u64 *)(n+8);
-		((unsigned char *)in_n)[15] &= 0xFE;
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out_p, (unsigned char *)in_n);
-
-		((unsigned char *)in_n)[15] |= (unsigned char)(1-i);
+	if (keylen != VMAC_KEY_LEN) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
 	}
-	p = be64_to_cpup(out_p + i);
-	h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx);
-	return le64_to_cpu(p + h);
-}
 
-static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx)
-{
-	u64 in[2] = {0}, out[2];
-	unsigned i;
-	int err = 0;
-
-	err = crypto_cipher_setkey(ctx->child, user_key, VMAC_KEY_LEN);
+	err = crypto_cipher_setkey(tctx->cipher, key, keylen);
 	if (err)
 		return err;
 
 	/* Fill nh key */
-	((unsigned char *)in)[0] = 0x80;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.nhkey)/8; i += 2) {
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out, (unsigned char *)in);
-		ctx->__vmac_ctx.nhkey[i] = be64_to_cpup(out);
-		ctx->__vmac_ctx.nhkey[i+1] = be64_to_cpup(out+1);
-		((unsigned char *)in)[15] += 1;
+	in[0] = 0x80;
+	for (i = 0; i < ARRAY_SIZE(tctx->nhkey); i += 2) {
+		crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+		tctx->nhkey[i] = be64_to_cpu(out[0]);
+		tctx->nhkey[i+1] = be64_to_cpu(out[1]);
+		in[15]++;
 	}
 
 	/* Fill poly key */
-	((unsigned char *)in)[0] = 0xC0;
-	in[1] = 0;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.polykey)/8; i += 2) {
-		crypto_cipher_encrypt_one(ctx->child,
-			(unsigned char *)out, (unsigned char *)in);
-		ctx->__vmac_ctx.polytmp[i] =
-			ctx->__vmac_ctx.polykey[i] =
-				be64_to_cpup(out) & mpoly;
-		ctx->__vmac_ctx.polytmp[i+1] =
-			ctx->__vmac_ctx.polykey[i+1] =
-				be64_to_cpup(out+1) & mpoly;
-		((unsigned char *)in)[15] += 1;
+	in[0] = 0xC0;
+	in[15] = 0;
+	for (i = 0; i < ARRAY_SIZE(tctx->polykey); i += 2) {
+		crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+		tctx->polykey[i] = be64_to_cpu(out[0]) & mpoly;
+		tctx->polykey[i+1] = be64_to_cpu(out[1]) & mpoly;
+		in[15]++;
 	}
 
 	/* Fill ip key */
-	((unsigned char *)in)[0] = 0xE0;
-	in[1] = 0;
-	for (i = 0; i < sizeof(ctx->__vmac_ctx.l3key)/8; i += 2) {
+	in[0] = 0xE0;
+	in[15] = 0;
+	for (i = 0; i < ARRAY_SIZE(tctx->l3key); i += 2) {
 		do {
-			crypto_cipher_encrypt_one(ctx->child,
-				(unsigned char *)out, (unsigned char *)in);
-			ctx->__vmac_ctx.l3key[i] = be64_to_cpup(out);
-			ctx->__vmac_ctx.l3key[i+1] = be64_to_cpup(out+1);
-			((unsigned char *)in)[15] += 1;
-		} while (ctx->__vmac_ctx.l3key[i] >= p64
-			|| ctx->__vmac_ctx.l3key[i+1] >= p64);
+			crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
+			tctx->l3key[i] = be64_to_cpu(out[0]);
+			tctx->l3key[i+1] = be64_to_cpu(out[1]);
+			in[15]++;
+		} while (tctx->l3key[i] >= p64 || tctx->l3key[i+1] >= p64);
 	}
 
-	/* Invalidate nonce/aes cache and reset other elements */
-	ctx->__vmac_ctx.cached_nonce[0] = (u64)-1; /* Ensure illegal nonce */
-	ctx->__vmac_ctx.cached_nonce[1] = (u64)0;  /* Ensure illegal nonce */
-	ctx->__vmac_ctx.first_block_processed = 0;
-
-	return err;
+	return 0;
 }
 
-static int vmac_setkey(struct crypto_shash *parent,
-		const u8 *key, unsigned int keylen)
+static int vmac_init(struct shash_desc *desc)
 {
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-
-	if (keylen != VMAC_KEY_LEN) {
-		crypto_shash_set_flags(parent, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return vmac_set_key((u8 *)key, ctx);
-}
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
 
-static int vmac_init(struct shash_desc *pdesc)
-{
+	dctx->partial_size = 0;
+	dctx->first_block_processed = false;
+	memcpy(dctx->polytmp, tctx->polykey, sizeof(dctx->polytmp));
 	return 0;
 }
 
-static int vmac_update(struct shash_desc *pdesc, const u8 *p,
-		unsigned int len)
+static int vmac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
 {
-	struct crypto_shash *parent = pdesc->tfm;
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-	int expand;
-	int min;
-
-	expand = VMAC_NHBYTES - ctx->partial_size > 0 ?
-			VMAC_NHBYTES - ctx->partial_size : 0;
-
-	min = len < expand ? len : expand;
-
-	memcpy(ctx->partial + ctx->partial_size, p, min);
-	ctx->partial_size += min;
-
-	if (len < expand)
-		return 0;
-
-	vhash_update(ctx->partial, VMAC_NHBYTES, &ctx->__vmac_ctx);
-	ctx->partial_size = 0;
-
-	len -= expand;
-	p += expand;
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int n;
+
+	if (dctx->partial_size) {
+		n = min(len, VMAC_NHBYTES - dctx->partial_size);
+		memcpy(&dctx->partial[dctx->partial_size], p, n);
+		dctx->partial_size += n;
+		p += n;
+		len -= n;
+		if (dctx->partial_size == VMAC_NHBYTES) {
+			vhash_blocks(tctx, dctx, dctx->partial_words, 1);
+			dctx->partial_size = 0;
+		}
+	}
 
-	if (len % VMAC_NHBYTES) {
-		memcpy(ctx->partial, p + len - (len % VMAC_NHBYTES),
-			len % VMAC_NHBYTES);
-		ctx->partial_size = len % VMAC_NHBYTES;
+	if (len >= VMAC_NHBYTES) {
+		n = round_down(len, VMAC_NHBYTES);
+		/* TODO: 'p' may be misaligned here */
+		vhash_blocks(tctx, dctx, (const __le64 *)p, n / VMAC_NHBYTES);
+		p += n;
+		len -= n;
 	}
 
-	vhash_update(p, len - len % VMAC_NHBYTES, &ctx->__vmac_ctx);
+	if (len) {
+		memcpy(dctx->partial, p, len);
+		dctx->partial_size = len;
+	}
 
 	return 0;
 }
 
-static int vmac_final(struct shash_desc *pdesc, u8 *out)
+static u64 vhash_final(const struct vmac_tfm_ctx *tctx,
+		       struct vmac_desc_ctx *dctx)
 {
-	struct crypto_shash *parent = pdesc->tfm;
-	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
-	vmac_t mac;
-	u8 nonce[16] = {};
-
-	/* vmac() ends up accessing outside the array bounds that
-	 * we specify.  In appears to access up to the next 2-word
-	 * boundary.  We'll just be uber cautious and zero the
-	 * unwritten bytes in the buffer.
-	 */
-	if (ctx->partial_size) {
-		memset(ctx->partial + ctx->partial_size, 0,
-			VMAC_NHBYTES - ctx->partial_size);
+	unsigned int partial = dctx->partial_size;
+	u64 ch = dctx->polytmp[0];
+	u64 cl = dctx->polytmp[1];
+
+	/* L1 and L2-hash the final block if needed */
+	if (partial) {
+		/* Zero-pad to next 128-bit boundary */
+		unsigned int n = round_up(partial, 16);
+		u64 rh, rl;
+
+		memset(&dctx->partial[partial], 0, n - partial);
+		nh_16(dctx->partial_words, tctx->nhkey, n / 8, rh, rl);
+		rh &= m62;
+		if (dctx->first_block_processed)
+			poly_step(ch, cl, tctx->polykey[0], tctx->polykey[1],
+				  rh, rl);
+		else
+			ADD128(ch, cl, rh, rl);
 	}
-	mac = vmac(ctx->partial, ctx->partial_size, nonce, NULL, ctx);
-	memcpy(out, &mac, sizeof(vmac_t));
-	memzero_explicit(&mac, sizeof(vmac_t));
-	memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
-	ctx->partial_size = 0;
+
+	/* L3-hash the 128-bit output of L2-hash */
+	return l3hash(ch, cl, tctx->l3key[0], tctx->l3key[1], partial * 8);
+}
+
+static int vmac_final(struct shash_desc *desc, u8 *out)
+{
+	const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
+	static const u8 nonce[16] = {}; /* TODO: this is insecure */
+	union {
+		u8 bytes[16];
+		__be64 pads[2];
+	} block;
+	int index;
+	u64 hash, pad;
+
+	/* Finish calculating the VHASH of the message */
+	hash = vhash_final(tctx, dctx);
+
+	/* Generate pseudorandom pad by encrypting the nonce */
+	memcpy(&block, nonce, 16);
+	index = block.bytes[15] & 1;
+	block.bytes[15] &= ~1;
+	crypto_cipher_encrypt_one(tctx->cipher, block.bytes, block.bytes);
+	pad = be64_to_cpu(block.pads[index]);
+
+	/* The VMAC is the sum of VHASH and the pseudorandom pad */
+	put_unaligned_le64(hash + pad, out);
 	return 0;
 }
 
 static int vmac_init_tfm(struct crypto_tfm *tfm)
 {
-	struct crypto_cipher *cipher;
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
 	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm);
+	struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+	struct crypto_cipher *cipher;
 
 	cipher = crypto_spawn_cipher(spawn);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
-	ctx->child = cipher;
+	tctx->cipher = cipher;
 	return 0;
 }
 
 static void vmac_exit_tfm(struct crypto_tfm *tfm)
 {
-	struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm);
-	crypto_free_cipher(ctx->child);
+	struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(tctx->cipher);
 }
 
 static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
@@ -674,11 +627,12 @@ static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.base.cra_blocksize = alg->cra_blocksize;
 	inst->alg.base.cra_alignmask = alg->cra_alignmask;
 
-	inst->alg.digestsize = sizeof(vmac_t);
-	inst->alg.base.cra_ctxsize = sizeof(struct vmac_ctx_t);
+	inst->alg.base.cra_ctxsize = sizeof(struct vmac_tfm_ctx);
 	inst->alg.base.cra_init = vmac_init_tfm;
 	inst->alg.base.cra_exit = vmac_exit_tfm;
 
+	inst->alg.descsize = sizeof(struct vmac_desc_ctx);
+	inst->alg.digestsize = VMAC_TAG_LEN / 8;
 	inst->alg.init = vmac_init;
 	inst->alg.update = vmac_update;
 	inst->alg.final = vmac_final;
diff --git a/include/crypto/vmac.h b/include/crypto/vmac.h
deleted file mode 100644
index 6b700c7b2fe1..000000000000
--- a/include/crypto/vmac.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Modified to interface to the Linux kernel
- * Copyright (c) 2009, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#ifndef __CRYPTO_VMAC_H
-#define __CRYPTO_VMAC_H
-
-/* --------------------------------------------------------------------------
- * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
- * This implementation is herby placed in the public domain.
- * The authors offers no warranty. Use at your own risk.
- * Please send bug reports to the authors.
- * Last modified: 17 APR 08, 1700 PDT
- * ----------------------------------------------------------------------- */
-
-/*
- * User definable settings.
- */
-#define VMAC_TAG_LEN	64
-#define VMAC_KEY_SIZE	128/* Must be 128, 192 or 256			*/
-#define VMAC_KEY_LEN	(VMAC_KEY_SIZE/8)
-#define VMAC_NHBYTES	128/* Must 2^i for any 3 < i < 13 Standard = 128*/
-
-/*
- * This implementation uses u32 and u64 as names for unsigned 32-
- * and 64-bit integer types. These are defined in C99 stdint.h. The
- * following may need adaptation if you are not running a C99 or
- * Microsoft C environment.
- */
-struct vmac_ctx {
-	u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
-	u64 polykey[2*VMAC_TAG_LEN/64];
-	u64 l3key[2*VMAC_TAG_LEN/64];
-	u64 polytmp[2*VMAC_TAG_LEN/64];
-	u64 cached_nonce[2];
-	u64 cached_aes[2];
-	int first_block_processed;
-};
-
-typedef u64 vmac_t;
-
-struct vmac_ctx_t {
-	struct crypto_cipher *child;
-	struct vmac_ctx __vmac_ctx;
-	u8 partial[VMAC_NHBYTES];	/* partial block */
-	int partial_size;		/* size of the partial block */
-};
-
-#endif /* __CRYPTO_VMAC_H */
-- 
cgit v1.2.3


From 80d19669ecd34423e85ca04f2210b0e42a47cb16 Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 29 Jun 2018 21:26:41 -0700
Subject: net: Refactor XPS for CPUs and Rx queues

Refactor XPS code to support Tx queue selection based on
CPU(s) map or Rx queue(s) map.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h   |  11 ++-
 include/linux/netdevice.h |  98 ++++++++++++++++++++-
 net/core/dev.c            | 211 ++++++++++++++++++++++++++++++----------------
 net/core/net-sysfs.c      |   4 +-
 4 files changed, 244 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index bf53d893ad02..57f20a0a7794 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -115,12 +115,17 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_active(cpu)		((cpu) == 0)
 #endif
 
-/* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
-	WARN_ON_ONCE(cpu >= nr_cpumask_bits);
+	WARN_ON_ONCE(cpu >= bits);
 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+}
+
+/* verify cpu argument to cpumask_* operators */
+static inline unsigned int cpumask_check(unsigned int cpu)
+{
+	cpu_max_bits_warn(cpu, nr_cpumask_bits);
 	return cpu;
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c6b377a15869..8bf8d6149f79 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -731,10 +731,15 @@ struct xps_map {
  */
 struct xps_dev_maps {
 	struct rcu_head rcu;
-	struct xps_map __rcu *cpu_map[0];
+	struct xps_map __rcu *attr_map[0]; /* Either CPUs map or RXQs map */
 };
-#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +		\
+
+#define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +	\
 	(nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
+
+#define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\
+	(_rxqs * (_tcs) * sizeof(struct xps_map *)))
+
 #endif /* CONFIG_XPS */
 
 #define TC_MAX_QUEUE	16
@@ -1910,7 +1915,8 @@ struct net_device {
 	int			watchdog_timeo;
 
 #ifdef CONFIG_XPS
-	struct xps_dev_maps __rcu *xps_maps;
+	struct xps_dev_maps __rcu *xps_cpus_map;
+	struct xps_dev_maps __rcu *xps_rxqs_map;
 #endif
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
@@ -3259,6 +3265,92 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			u16 index);
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, bool is_rxqs_map);
+
+/**
+ *	netif_attr_test_mask - Test a CPU or Rx queue set in a mask
+ *	@j: CPU/Rx queue index
+ *	@mask: bitmask of all cpus/rx queues
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues.
+ */
+static inline bool netif_attr_test_mask(unsigned long j,
+					const unsigned long *mask,
+					unsigned int nr_bits)
+{
+	cpu_max_bits_warn(j, nr_bits);
+	return test_bit(j, mask);
+}
+
+/**
+ *	netif_attr_test_online - Test for online CPU/Rx queue
+ *	@j: CPU/Rx queue index
+ *	@online_mask: bitmask for CPUs/Rx queues that are online
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns true if a CPU/Rx queue is online.
+ */
+static inline bool netif_attr_test_online(unsigned long j,
+					  const unsigned long *online_mask,
+					  unsigned int nr_bits)
+{
+	cpu_max_bits_warn(j, nr_bits);
+
+	if (online_mask)
+		return test_bit(j, online_mask);
+
+	return (j < nr_bits);
+}
+
+/**
+ *	netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask
+ *	@n: CPU/Rx queue index
+ *	@srcp: the cpumask/Rx queue mask pointer
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set.
+ */
+static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp,
+					       unsigned int nr_bits)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpu_max_bits_warn(n, nr_bits);
+
+	if (srcp)
+		return find_next_bit(srcp, nr_bits, n + 1);
+
+	return n + 1;
+}
+
+/**
+ *	netif_attrmask_next_and - get the next CPU/Rx queue in *src1p & *src2p
+ *	@n: CPU/Rx queue index
+ *	@src1p: the first CPUs/Rx queues mask pointer
+ *	@src2p: the second CPUs/Rx queues mask pointer
+ *	@nr_bits: number of bits in the bitmask
+ *
+ * Returns >= nr_bits if no further CPUs/Rx queues set in both.
+ */
+static inline int netif_attrmask_next_and(int n, const unsigned long *src1p,
+					  const unsigned long *src2p,
+					  unsigned int nr_bits)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpu_max_bits_warn(n, nr_bits);
+
+	if (src1p && src2p)
+		return find_next_and_bit(src1p, src2p, nr_bits, n + 1);
+	else if (src1p)
+		return find_next_bit(src1p, nr_bits, n + 1);
+	else if (src2p)
+		return find_next_bit(src2p, nr_bits, n + 1);
+
+	return n + 1;
+}
 #else
 static inline int netif_set_xps_queue(struct net_device *dev,
 				      const struct cpumask *mask,
diff --git a/net/core/dev.c b/net/core/dev.c
index dffed642e686..71059558dc39 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2092,7 +2092,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 	int pos;
 
 	if (dev_maps)
-		map = xmap_dereference(dev_maps->cpu_map[tci]);
+		map = xmap_dereference(dev_maps->attr_map[tci]);
 	if (!map)
 		return false;
 
@@ -2105,7 +2105,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 			break;
 		}
 
-		RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+		RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 		kfree_rcu(map, rcu);
 		return false;
 	}
@@ -2135,31 +2135,58 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 	return active;
 }
 
+static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
+			   u16 offset, u16 count, bool is_rxqs_map)
+{
+	bool active = false;
+	int i, j;
+
+	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
+	     j < nr_ids;)
+		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+					       count);
+	if (!active) {
+		if (is_rxqs_map) {
+			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+		} else {
+			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+
+			for (i = offset + (count - 1); count--; i--)
+				netdev_queue_numa_node_write(
+					netdev_get_tx_queue(dev, i),
+							NUMA_NO_NODE);
+		}
+		kfree_rcu(dev_maps, rcu);
+	}
+}
+
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 				   u16 count)
 {
+	const unsigned long *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps;
-	int cpu, i;
-	bool active = false;
+	unsigned int nr_ids;
 
 	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
 
-	if (!dev_maps)
-		goto out_no_maps;
-
-	for_each_possible_cpu(cpu)
-		active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
-					       offset, count);
+	dev_maps = xmap_dereference(dev->xps_rxqs_map);
+	if (dev_maps) {
+		nr_ids = dev->num_rx_queues;
+		clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset,
+			       count, true);
 
-	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-		kfree_rcu(dev_maps, rcu);
 	}
 
-	for (i = offset + (count - 1); count--; i--)
-		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
-					     NUMA_NO_NODE);
+	dev_maps = xmap_dereference(dev->xps_cpus_map);
+	if (!dev_maps)
+		goto out_no_maps;
+
+	if (num_possible_cpus() > 1)
+		possible_mask = cpumask_bits(cpu_possible_mask);
+	nr_ids = nr_cpu_ids;
+	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
+		       false);
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
@@ -2170,8 +2197,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 	netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
 }
 
-static struct xps_map *expand_xps_map(struct xps_map *map,
-				      int cpu, u16 index)
+static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
+				      u16 index, bool is_rxqs_map)
 {
 	struct xps_map *new_map;
 	int alloc_len = XPS_MIN_MAP_ALLOC;
@@ -2183,7 +2210,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		return map;
 	}
 
-	/* Need to add queue to this CPU's existing map */
+	/* Need to add tx-queue to this CPU's/rx-queue's existing map */
 	if (map) {
 		if (pos < map->alloc_len)
 			return map;
@@ -2191,9 +2218,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		alloc_len = map->alloc_len * 2;
 	}
 
-	/* Need to allocate new map to store queue on this CPU's map */
-	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
-			       cpu_to_node(cpu));
+	/* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
+	 *  map
+	 */
+	if (is_rxqs_map)
+		new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
+	else
+		new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+				       cpu_to_node(attr_index));
 	if (!new_map)
 		return NULL;
 
@@ -2205,14 +2237,16 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 	return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
-			u16 index)
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, bool is_rxqs_map)
 {
+	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
-	int i, cpu, tci, numa_node_id = -2;
+	int i, j, tci, numa_node_id = -2;
 	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
 	bool active = false;
+	unsigned int nr_ids;
 
 	if (dev->num_tc) {
 		num_tc = dev->num_tc;
@@ -2221,16 +2255,27 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -EINVAL;
 	}
 
-	maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
-	if (maps_sz < L1_CACHE_BYTES)
-		maps_sz = L1_CACHE_BYTES;
-
 	mutex_lock(&xps_map_mutex);
+	if (is_rxqs_map) {
+		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
+		dev_maps = xmap_dereference(dev->xps_rxqs_map);
+		nr_ids = dev->num_rx_queues;
+	} else {
+		maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
+		if (num_possible_cpus() > 1) {
+			online_mask = cpumask_bits(cpu_online_mask);
+			possible_mask = cpumask_bits(cpu_possible_mask);
+		}
+		dev_maps = xmap_dereference(dev->xps_cpus_map);
+		nr_ids = nr_cpu_ids;
+	}
 
-	dev_maps = xmap_dereference(dev->xps_maps);
+	if (maps_sz < L1_CACHE_BYTES)
+		maps_sz = L1_CACHE_BYTES;
 
 	/* allocate memory for queue storage */
-	for_each_cpu_and(cpu, cpu_online_mask, mask) {
+	for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
+	     j < nr_ids;) {
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
@@ -2238,73 +2283,81 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -ENOMEM;
 		}
 
-		tci = cpu * num_tc + tc;
-		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
+		tci = j * num_tc + tc;
+		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
 				 NULL;
 
-		map = expand_xps_map(map, cpu, index);
+		map = expand_xps_map(map, j, index, is_rxqs_map);
 		if (!map)
 			goto error;
 
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 	}
 
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
-	for_each_possible_cpu(cpu) {
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
 		/* copy maps belonging to foreign traffic classes */
-		for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* We need to explicitly update tci as prevous loop
 		 * could break out early if dev_maps is NULL.
 		 */
-		tci = cpu * num_tc + tc;
+		tci = j * num_tc + tc;
 
-		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
-			/* add queue to CPU maps */
+		if (netif_attr_test_mask(j, mask, nr_ids) &&
+		    netif_attr_test_online(j, online_mask, nr_ids)) {
+			/* add tx-queue to CPU/rx-queue maps */
 			int pos = 0;
 
-			map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+			map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
 
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
+			if (!is_rxqs_map) {
+				if (numa_node_id == -2)
+					numa_node_id = cpu_to_node(j);
+				else if (numa_node_id != cpu_to_node(j))
+					numa_node_id = -1;
+			}
 #endif
 		} else if (dev_maps) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* copy maps belonging to foreign traffic classes */
 		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 	}
 
-	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	if (is_rxqs_map)
+		rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
+	else
+		rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
 
 	/* Cleanup old maps */
 	if (!dev_maps)
 		goto out_no_old_maps;
 
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
 			if (map && map != new_map)
 				kfree_rcu(map, rcu);
 		}
@@ -2317,19 +2370,23 @@ out_no_old_maps:
 	active = true;
 
 out_no_new_maps:
-	/* update Tx queue numa node */
-	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-				     (numa_node_id >= 0) ? numa_node_id :
-				     NUMA_NO_NODE);
+	if (!is_rxqs_map) {
+		/* update Tx queue numa node */
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+					     (numa_node_id >= 0) ?
+					     numa_node_id : NUMA_NO_NODE);
+	}
 
 	if (!dev_maps)
 		goto out_no_maps;
 
-	/* removes queue from unused CPUs */
-	for_each_possible_cpu(cpu) {
-		for (i = tc, tci = cpu * num_tc; i--; tci++)
+	/* removes tx-queue from unused CPUs/rx-queues */
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = tc, tci = j * num_tc; i--; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
-		if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+		if (!netif_attr_test_mask(j, mask, nr_ids) ||
+		    !netif_attr_test_online(j, online_mask, nr_ids))
 			active |= remove_xps_queue(dev_maps, tci, index);
 		for (i = num_tc - tc, tci++; --i; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
@@ -2337,7 +2394,10 @@ out_no_new_maps:
 
 	/* free map if not active */
 	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		if (is_rxqs_map)
+			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+		else
+			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 
@@ -2347,11 +2407,12 @@ out_no_maps:
 	return 0;
 error:
 	/* remove any maps that we added */
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			map = dev_maps ?
-			      xmap_dereference(dev_maps->cpu_map[tci]) :
+			      xmap_dereference(dev_maps->attr_map[tci]) :
 			      NULL;
 			if (new_map && new_map != map)
 				kfree(new_map);
@@ -2363,6 +2424,12 @@ error:
 	kfree(new_dev_maps);
 	return -ENOMEM;
 }
+
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+			u16 index)
+{
+	return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+}
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
@@ -3384,7 +3451,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	int queue_index = -1;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (dev_maps) {
 		unsigned int tci = skb->sender_cpu - 1;
 
@@ -3393,7 +3460,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 			tci += netdev_get_prio_tc_map(dev, skb->priority);
 		}
 
-		map = rcu_dereference(dev_maps->cpu_map[tci]);
+		map = rcu_dereference(dev_maps->attr_map[tci]);
 		if (map) {
 			if (map->len == 1)
 				queue_index = map->queues[0];
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bb7e80f4ced3..b39987c81d53 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1227,13 +1227,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 		return -ENOMEM;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (dev_maps) {
 		for_each_possible_cpu(cpu) {
 			int i, tci = cpu * num_tc + tc;
 			struct xps_map *map;
 
-			map = rcu_dereference(dev_maps->cpu_map[tci]);
+			map = rcu_dereference(dev_maps->attr_map[tci]);
 			if (!map)
 				continue;
 
-- 
cgit v1.2.3


From 755c31cd85aea35cf7a5e7253851b52c08eff6e9 Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 29 Jun 2018 21:26:51 -0700
Subject: net: sock: Change tx_queue_mapping in sock_common to unsigned short

Change 'skc_tx_queue_mapping' field in sock_common structure from
'int' to 'unsigned short' type with ~0 indicating unset and
other positive queue values being set. This will accommodate adding
a new 'unsigned short' field in sock_common in the next patch for
rx_queue_mapping.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index b3b75419eafe..37b09c84504b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -214,7 +214,7 @@ struct sock_common {
 		struct hlist_node	skc_node;
 		struct hlist_nulls_node skc_nulls_node;
 	};
-	int			skc_tx_queue_mapping;
+	unsigned short		skc_tx_queue_mapping;
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
@@ -1681,17 +1681,25 @@ static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
 
 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
 {
+	/* sk_tx_queue_mapping accept only upto a 16-bit value */
+	if (WARN_ON_ONCE((unsigned short)tx_queue >= USHRT_MAX))
+		return;
 	sk->sk_tx_queue_mapping = tx_queue;
 }
 
+#define NO_QUEUE_MAPPING	USHRT_MAX
+
 static inline void sk_tx_queue_clear(struct sock *sk)
 {
-	sk->sk_tx_queue_mapping = -1;
+	sk->sk_tx_queue_mapping = NO_QUEUE_MAPPING;
 }
 
 static inline int sk_tx_queue_get(const struct sock *sk)
 {
-	return sk ? sk->sk_tx_queue_mapping : -1;
+	if (sk && sk->sk_tx_queue_mapping != NO_QUEUE_MAPPING)
+		return sk->sk_tx_queue_mapping;
+
+	return -1;
 }
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
-- 
cgit v1.2.3


From c6345ce7d361dce1b5d02a2181ccb598c27fd7ae Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 29 Jun 2018 21:26:57 -0700
Subject: net: Record receive queue number for a connection

This patch adds a new field to sock_common 'skc_rx_queue_mapping'
which holds the receive queue number for the connection. The Rx queue
is marked in tcp_finish_connect() to allow a client app to do
SO_INCOMING_NAPI_ID after a connect() call to get the right queue
association for a socket. Rx queue is also marked in tcp_conn_request()
to allow syn-ack to go on the right tx-queue associated with
the queue on which syn is received.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h |  1 +
 include/net/sock.h      | 28 ++++++++++++++++++++++++++++
 net/core/sock.c         |  2 ++
 net/ipv4/tcp_input.c    |  3 +++
 4 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c5187438af38..9e36fda652b7 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -151,6 +151,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id = skb->napi_id;
 #endif
+	sk_rx_queue_set(sk, skb);
 }
 
 /* variant used for unconnected sockets */
diff --git a/include/net/sock.h b/include/net/sock.h
index 37b09c84504b..2b097cc89727 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -139,6 +139,7 @@ typedef __u64 __bitwise __addrpair;
  *	@skc_node: main hash linkage for various protocol lookup tables
  *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
  *	@skc_tx_queue_mapping: tx queue number for this connection
+ *	@skc_rx_queue_mapping: rx queue number for this connection
  *	@skc_flags: place holder for sk_flags
  *		%SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
  *		%SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -215,6 +216,9 @@ struct sock_common {
 		struct hlist_nulls_node skc_nulls_node;
 	};
 	unsigned short		skc_tx_queue_mapping;
+#ifdef CONFIG_XPS
+	unsigned short		skc_rx_queue_mapping;
+#endif
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
@@ -326,6 +330,9 @@ struct sock {
 #define sk_nulls_node		__sk_common.skc_nulls_node
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
+#ifdef CONFIG_XPS
+#define sk_rx_queue_mapping	__sk_common.skc_rx_queue_mapping
+#endif
 
 #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
 #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
@@ -1702,6 +1709,27 @@ static inline int sk_tx_queue_get(const struct sock *sk)
 	return -1;
 }
 
+static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	if (skb_rx_queue_recorded(skb)) {
+		u16 rx_queue = skb_get_rx_queue(skb);
+
+		if (WARN_ON_ONCE(rx_queue == NO_QUEUE_MAPPING))
+			return;
+
+		sk->sk_rx_queue_mapping = rx_queue;
+	}
+#endif
+}
+
+static inline void sk_rx_queue_clear(struct sock *sk)
+{
+#ifdef CONFIG_XPS
+	sk->sk_rx_queue_mapping = NO_QUEUE_MAPPING;
+#endif
+}
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index bcc41829a16d..dac6d785186b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2818,6 +2818,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_pacing_rate = ~0U;
 	sk->sk_pacing_shift = 10;
 	sk->sk_incoming_cpu = -1;
+
+	sk_rx_queue_clear(sk);
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eecd359595fc..a4731995e899 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,6 +78,7 @@
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
+#include <net/busy_poll.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -5592,6 +5593,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	if (skb) {
 		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
 		security_inet_conn_established(sk, skb);
+		sk_mark_napi_id(sk, skb);
 	}
 
 	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
@@ -6420,6 +6422,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_rsk(req)->snt_isn = isn;
 	tcp_rsk(req)->txhash = net_tx_rndhash();
 	tcp_openreq_init_rwin(req, sk, dst);
+	sk_rx_queue_set(req_to_sk(req), skb);
 	if (!want_cookie) {
 		tcp_reqsk_record_syn(sk, req, skb);
 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
-- 
cgit v1.2.3


From fc9bab24e9c654f62f3d411fc0b041be9e487e9d Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Fri, 29 Jun 2018 21:27:02 -0700
Subject: net: Enable Tx queue selection based on Rx queues

This patch adds support to pick Tx queue based on the Rx queue(s) map
configuration set by the admin through the sysfs attribute
for each Tx queue. If the user configuration for receive queue(s) map
does not apply, then the Tx queue selection falls back to CPU(s) map
based selection and finally to hashing.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 10 +++++++++
 net/core/dev.c     | 62 +++++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 55 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2b097cc89727..2ed99bfa4595 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1730,6 +1730,16 @@ static inline void sk_rx_queue_clear(struct sock *sk)
 #endif
 }
 
+#ifdef CONFIG_XPS
+static inline int sk_rx_queue_get(const struct sock *sk)
+{
+	if (sk && sk->sk_rx_queue_mapping != NO_QUEUE_MAPPING)
+		return sk->sk_rx_queue_mapping;
+
+	return -1;
+}
+#endif
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index 43b5575e40c5..08d58e0debe5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3459,35 +3459,63 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+#ifdef CONFIG_XPS
+static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
+			       struct xps_dev_maps *dev_maps, unsigned int tci)
+{
+	struct xps_map *map;
+	int queue_index = -1;
+
+	if (dev->num_tc) {
+		tci *= dev->num_tc;
+		tci += netdev_get_prio_tc_map(dev, skb->priority);
+	}
+
+	map = rcu_dereference(dev_maps->attr_map[tci]);
+	if (map) {
+		if (map->len == 1)
+			queue_index = map->queues[0];
+		else
+			queue_index = map->queues[reciprocal_scale(
+						skb_get_hash(skb), map->len)];
+		if (unlikely(queue_index >= dev->real_num_tx_queues))
+			queue_index = -1;
+	}
+	return queue_index;
+}
+#endif
+
+static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
+	struct sock *sk = skb->sk;
 	int queue_index = -1;
 
 	if (!static_key_false(&xps_needed))
 		return -1;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_cpus_map);
+	if (!static_key_false(&xps_rxqs_needed))
+		goto get_cpus_map;
+
+	dev_maps = rcu_dereference(dev->xps_rxqs_map);
 	if (dev_maps) {
-		unsigned int tci = skb->sender_cpu - 1;
+		int tci = sk_rx_queue_get(sk);
 
-		if (dev->num_tc) {
-			tci *= dev->num_tc;
-			tci += netdev_get_prio_tc_map(dev, skb->priority);
-		}
+		if (tci >= 0 && tci < dev->num_rx_queues)
+			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+							  tci);
+	}
 
-		map = rcu_dereference(dev_maps->attr_map[tci]);
-		if (map) {
-			if (map->len == 1)
-				queue_index = map->queues[0];
-			else
-				queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
-									   map->len)];
-			if (unlikely(queue_index >= dev->real_num_tx_queues))
-				queue_index = -1;
+get_cpus_map:
+	if (queue_index < 0) {
+		dev_maps = rcu_dereference(dev->xps_cpus_map);
+		if (dev_maps) {
+			unsigned int tci = skb->sender_cpu - 1;
+
+			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+							  tci);
 		}
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.3


From f308d7353d1f5c3ddedc784a367edc72fc51bbaa Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 24 Jun 2018 23:27:22 +0200
Subject: mtd: rawnand: add Reed-Solomon error correction algorithm

Add Reed-Solomon (RS) to the enumeration of ECC algorithms.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/devicetree/bindings/mtd/nand.txt | 2 +-
 drivers/mtd/nand/raw/nand_base.c               | 1 +
 include/linux/mtd/rawnand.h                    | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
index 8bb11d809429..eaef8c657aa5 100644
--- a/Documentation/devicetree/bindings/mtd/nand.txt
+++ b/Documentation/devicetree/bindings/mtd/nand.txt
@@ -25,7 +25,7 @@ Optional NAND chip properties:
 		  Deprecated values:
 		  "soft_bch": use "soft" and nand-ecc-algo instead
 - nand-ecc-algo: string, algorithm of NAND ECC.
-		 Supported values are: "hamming", "bch".
+		 Valid values are: "hamming", "bch", "rs".
 - nand-bus-width : 8 or 16 bus width if not present 8
 - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index b01d15ec4c56..d0af5347f89d 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5777,6 +5777,7 @@ static int of_get_nand_ecc_mode(struct device_node *np)
 static const char * const nand_ecc_algos[] = {
 	[NAND_ECC_HAMMING]	= "hamming",
 	[NAND_ECC_BCH]		= "bch",
+	[NAND_ECC_RS]		= "rs",
 };
 
 static int of_get_nand_ecc_algo(struct device_node *np)
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 3e8ec3b8a39c..2d9cb7acbc3d 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -121,6 +121,7 @@ enum nand_ecc_algo {
 	NAND_ECC_UNKNOWN,
 	NAND_ECC_HAMMING,
 	NAND_ECC_BCH,
+	NAND_ECC_RS,
 };
 
 /*
-- 
cgit v1.2.3


From f922bd798bb94e0adcce26ddd811245443173268 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 24 Jun 2018 23:27:23 +0200
Subject: mtd: rawnand: add an option to specify NAND chip as a boot device

Allow to define a NAND chip as a boot device. This can be helpful
for the selection of the ECC algorithm and strength in case the boot
ROM supports only a subset of controller provided options.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/devicetree/bindings/mtd/nand.txt | 4 ++++
 drivers/mtd/nand/raw/nand_base.c               | 3 +++
 include/linux/mtd/rawnand.h                    | 6 ++++++
 3 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
index eaef8c657aa5..e949c778e983 100644
--- a/Documentation/devicetree/bindings/mtd/nand.txt
+++ b/Documentation/devicetree/bindings/mtd/nand.txt
@@ -43,6 +43,10 @@ Optional NAND chip properties:
 		     This is particularly useful when only the in-band area is
 		     used by the upper layers, and you want to make your NAND
 		     as reliable as possible.
+- nand-is-boot-medium: Whether the NAND chip is a boot medium. Drivers might use
+		       this information to select ECC algorithms supported by
+		       the boot ROM or similar restrictions.
+
 - nand-rb: shall contain the native Ready/Busy ids.
 
 The ECC strength and ECC step size properties define the correction capability
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index d0af5347f89d..1c633f80c3e1 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5859,6 +5859,9 @@ static int nand_dt_init(struct nand_chip *chip)
 	if (of_get_nand_bus_width(dn) == 16)
 		chip->options |= NAND_BUSWIDTH_16;
 
+	if (of_property_read_bool(dn, "nand-is-boot-medium"))
+		chip->options |= NAND_IS_BOOT_MEDIUM;
+
 	if (of_get_nand_on_flash_bbt(dn))
 		chip->bbt_options |= NAND_BBT_USE_FLASH;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 2d9cb7acbc3d..80aeeca03f36 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -219,6 +219,12 @@ enum nand_ecc_algo {
  */
 #define NAND_WAIT_TCCS		0x00200000
 
+/*
+ * Whether the NAND chip is a boot medium. Drivers might use this information
+ * to select ECC algorithms supported by the boot ROM or similar restrictions.
+ */
+#define NAND_IS_BOOT_MEDIUM	0x00400000
+
 /* Options set by nand scan */
 /* Nand scan has allocated controller struct */
 #define NAND_CONTROLLER_ALLOC	0x80000000
-- 
cgit v1.2.3


From 00ce4e039ad5bded462931606c3063ff691964b7 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 25 Jun 2018 10:44:44 +1200
Subject: mtd: rawnand: add manufacturer fixup for ONFI parameter page

This is called after the ONFI parameter page checksum is verified
and allows us to override the contents of the parameter page.

Suggested-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 4 ++++
 include/linux/mtd/rawnand.h      | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 1c633f80c3e1..f362c09a71d7 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5168,6 +5168,10 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 		}
 	}
 
+	if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+	    chip->manufacturer.desc->ops->fixup_onfi_param_page)
+		chip->manufacturer.desc->ops->fixup_onfi_param_page(chip, p);
+
 	/* Check version */
 	val = le16_to_cpu(p->revision);
 	if (val & (1 << 5))
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 80aeeca03f36..dc4538b58b84 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -785,11 +785,15 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
  *	  implementation) if any.
  * @cleanup: the ->init() function may have allocated resources, ->cleanup()
  *	     is here to let vendor specific code release those resources.
+ * @fixup_onfi_param_page: apply vendor specific fixups to the ONFI parameter
+ *			   page. This is called after the checksum is verified.
  */
 struct nand_manufacturer_ops {
 	void (*detect)(struct nand_chip *chip);
 	int (*init)(struct nand_chip *chip);
 	void (*cleanup)(struct nand_chip *chip);
+	void (*fixup_onfi_param_page)(struct nand_chip *chip,
+				      struct nand_onfi_params *p);
 };
 
 /**
-- 
cgit v1.2.3


From 872b71ff084ab125c68073d9e69acfd7095f2015 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Mon, 25 Jun 2018 10:44:45 +1200
Subject: mtd: rawnand: add defines for ONFI version bits

Add defines for the ONFI version bits and use them in
nand_flash_detect_onfi().

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 10 +++++-----
 include/linux/mtd/rawnand.h      | 11 +++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index f362c09a71d7..dbf6e80e9ab5 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5174,15 +5174,15 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 
 	/* Check version */
 	val = le16_to_cpu(p->revision);
-	if (val & (1 << 5))
+	if (val & ONFI_VERSION_2_3)
 		chip->parameters.onfi.version = 23;
-	else if (val & (1 << 4))
+	else if (val & ONFI_VERSION_2_2)
 		chip->parameters.onfi.version = 22;
-	else if (val & (1 << 3))
+	else if (val & ONFI_VERSION_2_1)
 		chip->parameters.onfi.version = 21;
-	else if (val & (1 << 2))
+	else if (val & ONFI_VERSION_2_0)
 		chip->parameters.onfi.version = 20;
-	else if (val & (1 << 1))
+	else if (val & ONFI_VERSION_1_0)
 		chip->parameters.onfi.version = 10;
 
 	if (!chip->parameters.onfi.version) {
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index dc4538b58b84..fbac4741da52 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -237,6 +237,17 @@ enum nand_ecc_algo {
 /* Keep gcc happy */
 struct nand_chip;
 
+/* ONFI version bits */
+#define ONFI_VERSION_1_0		BIT(1)
+#define ONFI_VERSION_2_0		BIT(2)
+#define ONFI_VERSION_2_1		BIT(3)
+#define ONFI_VERSION_2_2		BIT(4)
+#define ONFI_VERSION_2_3		BIT(5)
+#define ONFI_VERSION_3_0		BIT(6)
+#define ONFI_VERSION_3_1		BIT(7)
+#define ONFI_VERSION_3_2		BIT(8)
+#define ONFI_VERSION_4_0		BIT(9)
+
 /* ONFI features */
 #define ONFI_FEATURE_16_BIT_BUS		(1 << 0)
 #define ONFI_FEATURE_EXT_PARAM_PAGE	(1 << 7)
-- 
cgit v1.2.3


From 78c9c4dfbf8c04883941445a195276bb4bb92c76 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 26 Jun 2018 15:21:32 +0200
Subject: posix-timers: Sanitize overrun handling

The posix timer overrun handling is broken because the forwarding functions
can return a huge number of overruns which does not fit in an int. As a
consequence timer_getoverrun(2) and siginfo::si_overrun can turn into
random number generators.

The k_clock::timer_forward() callbacks return a 64 bit value now. Make
k_itimer::ti_overrun[_last] 64bit as well, so the kernel internal
accounting is correct. 3Remove the temporary (int) casts.

Add a helper function which clamps the overrun value returned to user space
via timer_getoverrun(2) or siginfo::si_overrun limited to a positive value
between 0 and INT_MAX. INT_MAX is an indicator for user space that the
overrun value has been clamped.

Reported-by: Team OWL337 <icytxw@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Link: https://lkml.kernel.org/r/20180626132705.018623573@linutronix.de
---
 include/linux/posix-timers.h   |  4 ++--
 kernel/time/posix-cpu-timers.c |  2 +-
 kernel/time/posix-timers.c     | 31 ++++++++++++++++++++-----------
 3 files changed, 23 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index c85704fcdbd2..ee7e987ea1b4 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -95,8 +95,8 @@ struct k_itimer {
 	clockid_t		it_clock;
 	timer_t			it_id;
 	int			it_active;
-	int			it_overrun;
-	int			it_overrun_last;
+	s64			it_overrun;
+	s64			it_overrun_last;
 	int			it_requeue_pending;
 	int			it_sigev_notify;
 	ktime_t			it_interval;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 5a6251ac6f7a..562cc3891b57 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -85,7 +85,7 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
 			continue;
 
 		timer->it.cpu.expires += incr;
-		timer->it_overrun += 1 << i;
+		timer->it_overrun += 1LL << i;
 		delta -= incr;
 	}
 }
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index db1d65963a57..3ac7295306dc 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -283,6 +283,17 @@ static __init int init_posix_timers(void)
 }
 __initcall(init_posix_timers);
 
+/*
+ * The siginfo si_overrun field and the return value of timer_getoverrun(2)
+ * are of type int. Clamp the overrun value to INT_MAX
+ */
+static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
+{
+	s64 sum = timr->it_overrun_last + (s64)baseval;
+
+	return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
+}
+
 static void common_hrtimer_rearm(struct k_itimer *timr)
 {
 	struct hrtimer *timer = &timr->it.real.timer;
@@ -290,9 +301,8 @@ static void common_hrtimer_rearm(struct k_itimer *timr)
 	if (!timr->it_interval)
 		return;
 
-	timr->it_overrun += (unsigned int) hrtimer_forward(timer,
-						timer->base->get_time(),
-						timr->it_interval);
+	timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
+					    timr->it_interval);
 	hrtimer_restart(timer);
 }
 
@@ -321,10 +331,10 @@ void posixtimer_rearm(struct siginfo *info)
 
 		timr->it_active = 1;
 		timr->it_overrun_last = timr->it_overrun;
-		timr->it_overrun = -1;
+		timr->it_overrun = -1LL;
 		++timr->it_requeue_pending;
 
-		info->si_overrun += timr->it_overrun_last;
+		info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
 	}
 
 	unlock_timer(timr, flags);
@@ -418,9 +428,8 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
 					now = ktime_add(now, kj);
 			}
 #endif
-			timr->it_overrun += (unsigned int)
-				hrtimer_forward(timer, now,
-						timr->it_interval);
+			timr->it_overrun += hrtimer_forward(timer, now,
+							    timr->it_interval);
 			ret = HRTIMER_RESTART;
 			++timr->it_requeue_pending;
 			timr->it_active = 1;
@@ -524,7 +533,7 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event,
 	new_timer->it_id = (timer_t) new_timer_id;
 	new_timer->it_clock = which_clock;
 	new_timer->kclock = kc;
-	new_timer->it_overrun = -1;
+	new_timer->it_overrun = -1LL;
 
 	if (event) {
 		rcu_read_lock();
@@ -702,7 +711,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
 	 * expiry time forward by intervals, so expiry is > now.
 	 */
 	if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none))
-		timr->it_overrun += (int)kc->timer_forward(timr, now);
+		timr->it_overrun += kc->timer_forward(timr, now);
 
 	remaining = kc->timer_remaining(timr, now);
 	/* Return 0 only, when the timer is expired and not pending */
@@ -791,7 +800,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
 	if (!timr)
 		return -EINVAL;
 
-	overrun = timr->it_overrun_last;
+	overrun = timer_overrun_to_int(timr, 0);
 	unlock_timer(timr, flags);
 
 	return overrun;
-- 
cgit v1.2.3


From d573454d9b4f00061da314460908e19476d2ff6d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:30:28 +0000
Subject: ASoC: simple-card: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card.h     |  7 ++-----
 sound/soc/generic/simple-card.c | 17 +++++++----------
 2 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/sound/simple_card.h b/include/sound/simple_card.h
index a6a2e1547092..d264e5463f22 100644
--- a/include/sound/simple_card.h
+++ b/include/sound/simple_card.h
@@ -1,12 +1,9 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * ASoC simple sound card support
  *
  * Copyright (C) 2012 Renesas Solutions Corp.
  * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __SIMPLE_CARD_H
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index c5b6e04cd926..64bf3560c1d1 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -1,13 +1,10 @@
-/*
- * ASoC simple sound card support
- *
- * Copyright (C) 2012 Renesas Solutions Corp.
- * Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// ASoC simple sound card support
+//
+// Copyright (C) 2012 Renesas Solutions Corp.
+// Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+
 #include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/module.h>
-- 
cgit v1.2.3


From d613a7f45ebb2f113444630fcbbb8a074c741998 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:30:44 +0000
Subject: ASoC: simple-card-utils: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card_utils.h     |  8 +++-----
 sound/soc/generic/simple-card-utils.c | 15 ++++++---------
 2 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index f82acef3b992..8bc5e2d8b13c 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -1,12 +1,10 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * simple_card_utils.h
  *
  * Copyright (c) 2016 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
+
 #ifndef __SIMPLE_CARD_UTILS_H
 #define __SIMPLE_CARD_UTILS_H
 
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 4398c9580929..d3f3f0fec74c 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -1,12 +1,9 @@
-/*
- * simple-card-utils.c
- *
- * Copyright (c) 2016 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// simple-card-utils.c
+//
+// Copyright (c) 2016 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+
 #include <linux/clk.h>
 #include <linux/gpio.h>
 #include <linux/gpio/consumer.h>
-- 
cgit v1.2.3


From e2cfd2c9673f4e9d08971fed53fe1f41d8270ef0 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:22:15 +0000
Subject: ASoC: soc-dai.h: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index e6f8c40ed43c..a14bc0608ae9 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -1,12 +1,9 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * linux/sound/soc-dai.h -- ALSA SoC Layer
  *
  * Copyright:	2005-2008 Wolfson Microelectronics. PLC.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Digital Audio Interface (DAI) API.
  */
 
-- 
cgit v1.2.3


From b53c34b4b73eb65e6480ada6ec8ddbcc2d4e9817 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:22:58 +0000
Subject: ASoC: soc-dpcm.h: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dpcm.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h
index 806059052bfc..9bb92f187af8 100644
--- a/include/sound/soc-dpcm.h
+++ b/include/sound/soc-dpcm.h
@@ -1,11 +1,8 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * linux/sound/soc-dpcm.h -- ALSA SoC Dynamic PCM Support
  *
  * Author:		Liam Girdwood <lrg@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __LINUX_SND_SOC_DPCM_H
-- 
cgit v1.2.3


From 7730bb13c7472620b585783f248b2dccd09d1819 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:24:04 +0000
Subject: ASoC: soc-acpi: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi-intel-match.h | 14 ++------------
 include/sound/soc-acpi.h             | 13 ++-----------
 sound/soc/soc-acpi.c                 | 20 +++++---------------
 3 files changed, 9 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-acpi-intel-match.h b/include/sound/soc-acpi-intel-match.h
index 917ddd0f2762..bb1d24b703fb 100644
--- a/include/sound/soc-acpi-intel-match.h
+++ b/include/sound/soc-acpi-intel-match.h
@@ -1,16 +1,6 @@
-
-/*
- * Copyright (C) 2017, Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+/* SPDX-License-Identifier: GPL-2.0
  *
+ * Copyright (C) 2017, Intel Corporation. All rights reserved.
  */
 
 #ifndef __LINUX_SND_SOC_ACPI_INTEL_MATCH_H
diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 082224275f52..e45b2330d16a 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -1,15 +1,6 @@
-/*
- * Copyright (C) 2013-15, Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+/* SPDX-License-Identifier: GPL-2.0
  *
+ * Copyright (C) 2013-15, Intel Corporation. All rights reserved.
  */
 
 #ifndef __LINUX_SND_SOC_ACPI_H
diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c
index 3d7e1ff79139..b8e72b52db30 100644
--- a/sound/soc/soc-acpi.c
+++ b/sound/soc/soc-acpi.c
@@ -1,18 +1,8 @@
-/*
- * soc-apci.c - support for ACPI enumeration.
- *
- * Copyright (c) 2013-15, Intel Corporation.
- *
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// soc-apci.c - support for ACPI enumeration.
+//
+// Copyright (c) 2013-15, Intel Corporation.
 
 #include <sound/soc-acpi.h>
 
-- 
cgit v1.2.3


From 873486ed4af3e11bfc20832dff7b124ba652bf77 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:24:18 +0000
Subject: ASoC: soc-core: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  | 11 ++++-------
 sound/soc/soc-core.c | 41 ++++++++++++++++++-----------------------
 2 files changed, 22 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index f7579f82cc00..16f0bf10cc42 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1,13 +1,10 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * linux/sound/soc.h -- ALSA SoC Layer
  *
- * Author:		Liam Girdwood
- * Created:		Aug 11th 2005
+ * Author:	Liam Girdwood
+ * Created:	Aug 11th 2005
  * Copyright:	Wolfson Microelectronics. PLC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __LINUX_SND_SOC_H
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 4663de3cf495..68b08781c832 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1,26 +1,21 @@
-/*
- * soc-core.c  --  ALSA SoC Audio Layer
- *
- * Copyright 2005 Wolfson Microelectronics PLC.
- * Copyright 2005 Openedhand Ltd.
- * Copyright (C) 2010 Slimlogic Ltd.
- * Copyright (C) 2010 Texas Instruments Inc.
- *
- * Author: Liam Girdwood <lrg@slimlogic.co.uk>
- *         with code, comments and ideas from :-
- *         Richard Purdie <richard@openedhand.com>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  TODO:
- *   o Add hw rules to enforce rates, etc.
- *   o More testing with other codecs/machines.
- *   o Add more codecs and platforms to ensure good API coverage.
- *   o Support TDM on PCM and I2S
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// soc-core.c  --  ALSA SoC Audio Layer
+//
+// Copyright 2005 Wolfson Microelectronics PLC.
+// Copyright 2005 Openedhand Ltd.
+// Copyright (C) 2010 Slimlogic Ltd.
+// Copyright (C) 2010 Texas Instruments Inc.
+//
+// Author: Liam Girdwood <lrg@slimlogic.co.uk>
+//         with code, comments and ideas from :-
+//         Richard Purdie <richard@openedhand.com>
+//
+//  TODO:
+//   o Add hw rules to enforce rates, etc.
+//   o More testing with other codecs/machines.
+//   o Add more codecs and platforms to ensure good API coverage.
+//   o Support TDM on PCM and I2S
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-- 
cgit v1.2.3


From c01f3af4d32071915119ffcb933e75d7c165378e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:24:31 +0000
Subject: ASoC: soc-dapm: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h | 11 ++++-------
 sound/soc/soc-dapm.c     | 42 ++++++++++++++++++------------------------
 2 files changed, 22 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index a6ce2de4e20a..af9ef16cc34d 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -1,13 +1,10 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * linux/sound/soc-dapm.h -- ALSA SoC Dynamic Audio Power Management
  *
- * Author:		Liam Girdwood
- * Created:		Aug 11th 2005
+ * Author:	Liam Girdwood
+ * Created:	Aug 11th 2005
  * Copyright:	Wolfson Microelectronics. PLC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __LINUX_SND_SOC_DAPM_H
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index a099c3e45504..0602b2888d52 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1,27 +1,21 @@
-/*
- * soc-dapm.c  --  ALSA SoC Dynamic Audio Power Management
- *
- * Copyright 2005 Wolfson Microelectronics PLC.
- * Author: Liam Girdwood <lrg@slimlogic.co.uk>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  Features:
- *    o Changes power status of internal codec blocks depending on the
- *      dynamic configuration of codec internal audio paths and active
- *      DACs/ADCs.
- *    o Platform power domain - can support external components i.e. amps and
- *      mic/headphone insertion events.
- *    o Automatic Mic Bias support
- *    o Jack insertion power event initiation - e.g. hp insertion will enable
- *      sinks, dacs, etc
- *    o Delayed power down of audio subsystem to reduce pops between a quick
- *      device reopen.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// soc-dapm.c  --  ALSA SoC Dynamic Audio Power Management
+//
+// Copyright 2005 Wolfson Microelectronics PLC.
+// Author: Liam Girdwood <lrg@slimlogic.co.uk>
+//
+//  Features:
+//    o Changes power status of internal codec blocks depending on the
+//      dynamic configuration of codec internal audio paths and active
+//      DACs/ADCs.
+//    o Platform power domain - can support external components i.e. amps and
+//      mic/headphone insertion events.
+//    o Automatic Mic Bias support
+//    o Jack insertion power event initiation - e.g. hp insertion will enable
+//      sinks, dacs, etc
+//    o Delayed power down of audio subsystem to reduce pops between a quick
+//      device reopen.
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-- 
cgit v1.2.3


From f2b6a1b25fecc48a46c8a41636101af8a41c88a8 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:24:45 +0000
Subject: ASoC: soc-topology: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-topology.h |  7 ++-----
 sound/soc/soc-topology.c     | 47 ++++++++++++++++++++------------------------
 2 files changed, 23 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h
index 401ef2c45d6c..fa4b8413d2e2 100644
--- a/include/sound/soc-topology.h
+++ b/include/sound/soc-topology.h
@@ -1,13 +1,10 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * linux/sound/soc-topology.h -- ALSA SoC Firmware Controls and DAPM
  *
  * Copyright (C) 2012 Texas Instruments Inc.
  * Copyright (C) 2015 Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Simple file API to load FW that includes mixers, coefficients, DAPM graphs,
  * algorithms, equalisers, DAIs, widgets, FE caps, BE caps, codec link caps etc.
  */
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 05d177d689e2..66e77e020745 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1,29 +1,24 @@
-/*
- * soc-topology.c  --  ALSA SoC Topology
- *
- * Copyright (C) 2012 Texas Instruments Inc.
- * Copyright (C) 2015 Intel Corporation.
- *
- * Authors: Liam Girdwood <liam.r.girdwood@linux.intel.com>
- *		K, Mythri P <mythri.p.k@intel.com>
- *		Prusty, Subhransu S <subhransu.s.prusty@intel.com>
- *		B, Jayachandran <jayachandran.b@intel.com>
- *		Abdullah, Omair M <omair.m.abdullah@intel.com>
- *		Jin, Yao <yao.jin@intel.com>
- *		Lin, Mengdong <mengdong.lin@intel.com>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  Add support to read audio firmware topology alongside firmware text. The
- *  topology data can contain kcontrols, DAPM graphs, widgets, DAIs, DAI links,
- *  equalizers, firmware, coefficients etc.
- *
- *  This file only manages the core ALSA and ASoC components, all other bespoke
- *  firmware topology data is passed to component drivers for bespoke handling.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// soc-topology.c  --  ALSA SoC Topology
+//
+// Copyright (C) 2012 Texas Instruments Inc.
+// Copyright (C) 2015 Intel Corporation.
+//
+// Authors: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//		K, Mythri P <mythri.p.k@intel.com>
+//		Prusty, Subhransu S <subhransu.s.prusty@intel.com>
+//		B, Jayachandran <jayachandran.b@intel.com>
+//		Abdullah, Omair M <omair.m.abdullah@intel.com>
+//		Jin, Yao <yao.jin@intel.com>
+//		Lin, Mengdong <mengdong.lin@intel.com>
+//
+//  Add support to read audio firmware topology alongside firmware text. The
+//  topology data can contain kcontrols, DAPM graphs, widgets, DAIs, DAI links,
+//  equalizers, firmware, coefficients etc.
+//
+//  This file only manages the core ALSA and ASoC components, all other bespoke
+//  firmware topology data is passed to component drivers for bespoke handling.
 
 #include <linux/kernel.h>
 #include <linux/export.h>
-- 
cgit v1.2.3


From b3ed4c86a700b494fc5058a52531eeb14d6fe00f Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:24:57 +0000
Subject: ASoC: soc-compress: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/compress_driver.h | 21 +++------------------
 sound/soc/soc-compress.c        | 24 +++++++++---------------
 2 files changed, 12 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h
index 9924bc9cbc7c..ea8c93bbb0e0 100644
--- a/include/sound/compress_driver.h
+++ b/include/sound/compress_driver.h
@@ -1,27 +1,12 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  *  compress_driver.h - compress offload driver definations
  *
  *  Copyright (C) 2011 Intel Corporation
  *  Authors:	Vinod Koul <vinod.koul@linux.intel.com>
  *		Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
  */
+
 #ifndef __COMPRESS_DRIVER_H
 #define __COMPRESS_DRIVER_H
 
diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index e095115fa9f9..b9e1673fea51 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c
@@ -1,18 +1,12 @@
-/*
- * soc-compress.c  --  ALSA SoC Compress
- *
- * Copyright (C) 2012 Intel Corp.
- *
- * Authors: Namarta Kohli <namartax.kohli@intel.com>
- *          Ramesh Babu K V <ramesh.babu@linux.intel.com>
- *          Vinod Koul <vinod.koul@linux.intel.com>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// soc-compress.c  --  ALSA SoC Compress
+//
+// Copyright (C) 2012 Intel Corp.
+//
+// Authors: Namarta Kohli <namartax.kohli@intel.com>
+//          Ramesh Babu K V <ramesh.babu@linux.intel.com>
+//          Vinod Koul <vinod.koul@linux.intel.com>
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-- 
cgit v1.2.3


From 1356a6071cf4d7187652cd2b18dfab4763e0dba6 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:25:11 +0000
Subject: ASoC: soc-generic-dmaengine-pcm: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/dmaengine_pcm.h         | 14 +++-----------
 sound/soc/soc-generic-dmaengine-pcm.c | 19 +++++--------------
 2 files changed, 8 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h
index e3481eebdd98..2c4cfaa135a6 100644
--- a/include/sound/dmaengine_pcm.h
+++ b/include/sound/dmaengine_pcm.h
@@ -1,17 +1,9 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0+
+ *
  *  Copyright (C) 2012, Analog Devices Inc.
  *	Author: Lars-Peter Clausen <lars@metafoo.de>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
  */
+
 #ifndef __SOUND_DMAENGINE_PCM_H__
 #define __SOUND_DMAENGINE_PCM_H__
 
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index 56a541b9ff9e..13bdca6e41c5 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -1,17 +1,8 @@
-/*
- *  Copyright (C) 2013, Analog Devices Inc.
- *	Author: Lars-Peter Clausen <lars@metafoo.de>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+//  Copyright (C) 2013, Analog Devices Inc.
+//	Author: Lars-Peter Clausen <lars@metafoo.de>
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/dmaengine.h>
-- 
cgit v1.2.3


From 1a8f0a3c13c136951de7ea24ccb148e745db98a2 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 2 Jul 2018 06:26:27 +0000
Subject: ASoC: ac97: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/ac97/codec.h      |  8 +++-----
 include/sound/ac97/compat.h     |  9 +++------
 include/sound/ac97/controller.h |  8 +++-----
 include/sound/ac97/regs.h       | 20 ++------------------
 include/sound/ac97_codec.h      | 25 +++++--------------------
 sound/soc/soc-ac97.c            | 29 ++++++++++++-----------------
 6 files changed, 28 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/include/sound/ac97/codec.h b/include/sound/ac97/codec.h
index ec04be9ab119..9792d25fa369 100644
--- a/include/sound/ac97/codec.h
+++ b/include/sound/ac97/codec.h
@@ -1,10 +1,8 @@
-/*
- *  Copyright (C) 2016 Robert Jarzmik <robert.jarzmik@free.fr>
+/* SPDX-License-Identifier: GPL-2.0
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *  Copyright (C) 2016 Robert Jarzmik <robert.jarzmik@free.fr>
  */
+
 #ifndef __SOUND_AC97_CODEC2_H
 #define __SOUND_AC97_CODEC2_H
 
diff --git a/include/sound/ac97/compat.h b/include/sound/ac97/compat.h
index 1351cba40048..57e19afa31ab 100644
--- a/include/sound/ac97/compat.h
+++ b/include/sound/ac97/compat.h
@@ -1,14 +1,11 @@
-/*
- *  Copyright (C) 2016 Robert Jarzmik <robert.jarzmik@free.fr>
+/* SPDX-License-Identifier: GPL-2.0
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *  Copyright (C) 2016 Robert Jarzmik <robert.jarzmik@free.fr>
  *
  * This file is for backward compatibility with snd_ac97 structure and its
  * multiple usages, such as the snd_ac97_bus and snd_ac97_build_ops.
- *
  */
+
 #ifndef AC97_COMPAT_H
 #define AC97_COMPAT_H
 
diff --git a/include/sound/ac97/controller.h b/include/sound/ac97/controller.h
index b36ecdd64f14..06b5afb7fa6b 100644
--- a/include/sound/ac97/controller.h
+++ b/include/sound/ac97/controller.h
@@ -1,10 +1,8 @@
-/*
- *  Copyright (C) 2016 Robert Jarzmik <robert.jarzmik@free.fr>
+/* SPDX-License-Identifier: GPL-2.0
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Copyright (C) 2016 Robert Jarzmik <robert.jarzmik@free.fr>
  */
+
 #ifndef AC97_CONTROLLER_H
 #define AC97_CONTROLLER_H
 
diff --git a/include/sound/ac97/regs.h b/include/sound/ac97/regs.h
index 9a4fa0c3264a..843f73f3705a 100644
--- a/include/sound/ac97/regs.h
+++ b/include/sound/ac97/regs.h
@@ -1,27 +1,11 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0+
+ *
  *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
  *  Universal interface for Audio Codec '97
  *
  *  For more details look to AC '97 component specification revision 2.1
  *  by Intel Corporation (http://developer.intel.com).
- *
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
  */
-
 /*
  *  AC'97 codec registers
  */
diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 89d311a503d3..cc383991c0fe 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -1,30 +1,15 @@
-#ifndef __SOUND_AC97_CODEC_H
-#define __SOUND_AC97_CODEC_H
-
-/*
+/* SPDX-License-Identifier: GPL-2.0+
+ *
  *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>
  *  Universal interface for Audio Codec '97
  *
  *  For more details look to AC '97 component specification revision 2.1
  *  by Intel Corporation (http://developer.intel.com).
- *
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
  */
 
+#ifndef __SOUND_AC97_CODEC_H
+#define __SOUND_AC97_CODEC_H
+
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/workqueue.h>
diff --git a/sound/soc/soc-ac97.c b/sound/soc/soc-ac97.c
index 3f424f214bca..c086786e4471 100644
--- a/sound/soc/soc-ac97.c
+++ b/sound/soc/soc-ac97.c
@@ -1,20 +1,15 @@
-/*
- * soc-ac97.c  --  ALSA SoC Audio Layer AC97 support
- *
- * Copyright 2005 Wolfson Microelectronics PLC.
- * Copyright 2005 Openedhand Ltd.
- * Copyright (C) 2010 Slimlogic Ltd.
- * Copyright (C) 2010 Texas Instruments Inc.
- *
- * Author: Liam Girdwood <lrg@slimlogic.co.uk>
- *         with code, comments and ideas from :-
- *         Richard Purdie <richard@openedhand.com>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// soc-ac97.c  --  ALSA SoC Audio Layer AC97 support
+//
+// Copyright 2005 Wolfson Microelectronics PLC.
+// Copyright 2005 Openedhand Ltd.
+// Copyright (C) 2010 Slimlogic Ltd.
+// Copyright (C) 2010 Texas Instruments Inc.
+//
+// Author: Liam Girdwood <lrg@slimlogic.co.uk>
+//         with code, comments and ideas from :-
+//         Richard Purdie <richard@openedhand.com>
 
 #include <linux/ctype.h>
 #include <linux/delay.h>
-- 
cgit v1.2.3


From 05739375f1c0a1048fea8b9c4cb54d9e4a891938 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Fri, 29 Jun 2018 14:59:40 +0200
Subject: ASoC: pxa-ssp: remove .set_pll() and .set_clkdiv() callbacks

The .set_pll() and .set_clkdiv() callbacks are considered legacy and should
not be used anymore. In order to support PXA boards on DT platforms, remove
them and let the code figure out the correct dividers and PLL base
frequencies itself.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h |   8 +++
 sound/soc/pxa/pxa-ssp.c    | 146 ++++++++++++++++++++++-----------------------
 2 files changed, 81 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 03a7ca46735b..13b4244d44c1 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -171,6 +171,14 @@
 #define SSACD_SCDB		(1 << 3)	/* SSPSYSCLK Divider Bypass */
 #define SSACD_ACPS(x)		((x) << 4)	/* Audio clock PLL select */
 #define SSACD_ACDS(x)		((x) << 0)	/* Audio clock divider select */
+#define SSACD_ACDS_1		(0)
+#define SSACD_ACDS_2		(1)
+#define SSACD_ACDS_4		(2)
+#define SSACD_ACDS_8		(3)
+#define SSACD_ACDS_16		(4)
+#define SSACD_ACDS_32		(5)
+#define SSACD_SCDB_4X		(0)
+#define SSACD_SCDB_1X		(1)
 #define SSACD_SCDX8		(1 << 7)	/* SYSCLK division ratio select */
 
 /* LPSS SSP */
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 01d54697ede4..f8339bb01251 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -41,6 +41,7 @@
  */
 struct ssp_priv {
 	struct ssp_device *ssp;
+	unsigned long ssp_clk;
 	unsigned int sysclk;
 	unsigned int dai_fmt;
 	unsigned int configured_dai_fmt;
@@ -192,21 +193,6 @@ static void pxa_ssp_set_scr(struct ssp_device *ssp, u32 div)
 	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
 }
 
-/**
- * pxa_ssp_get_clkdiv - get SSP clock divider
- */
-static u32 pxa_ssp_get_scr(struct ssp_device *ssp)
-{
-	u32 sscr0 = pxa_ssp_read_reg(ssp, SSCR0);
-	u32 div;
-
-	if (ssp->type == PXA25x_SSP)
-		div = ((sscr0 >> 8) & 0xff) * 2 + 2;
-	else
-		div = ((sscr0 >> 8) & 0xfff) + 1;
-	return div;
-}
-
 /*
  * Set the SSP ports SYSCLK.
  */
@@ -262,67 +248,18 @@ static int pxa_ssp_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 	return 0;
 }
 
-/*
- * Set the SSP clock dividers.
- */
-static int pxa_ssp_set_dai_clkdiv(struct snd_soc_dai *cpu_dai,
-	int div_id, int div)
-{
-	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
-	struct ssp_device *ssp = priv->ssp;
-	int val;
-
-	switch (div_id) {
-	case PXA_SSP_AUDIO_DIV_ACDS:
-		val = (pxa_ssp_read_reg(ssp, SSACD) & ~0x7) | SSACD_ACDS(div);
-		pxa_ssp_write_reg(ssp, SSACD, val);
-		break;
-	case PXA_SSP_AUDIO_DIV_SCDB:
-		val = pxa_ssp_read_reg(ssp, SSACD);
-		val &= ~SSACD_SCDB;
-		if (ssp->type == PXA3xx_SSP)
-			val &= ~SSACD_SCDX8;
-		switch (div) {
-		case PXA_SSP_CLK_SCDB_1:
-			val |= SSACD_SCDB;
-			break;
-		case PXA_SSP_CLK_SCDB_4:
-			break;
-		case PXA_SSP_CLK_SCDB_8:
-			if (ssp->type == PXA3xx_SSP)
-				val |= SSACD_SCDX8;
-			else
-				return -EINVAL;
-			break;
-		default:
-			return -EINVAL;
-		}
-		pxa_ssp_write_reg(ssp, SSACD, val);
-		break;
-	case PXA_SSP_DIV_SCR:
-		pxa_ssp_set_scr(ssp, div);
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
 /*
  * Configure the PLL frequency pxa27x and (afaik - pxa320 only)
  */
-static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
-	int source, unsigned int freq_in, unsigned int freq_out)
+static int pxa_ssp_set_pll(struct ssp_priv *priv, unsigned int freq)
 {
-	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
 	struct ssp_device *ssp = priv->ssp;
 	u32 ssacd = pxa_ssp_read_reg(ssp, SSACD) & ~0x70;
 
 	if (ssp->type == PXA3xx_SSP)
 		pxa_ssp_write_reg(ssp, SSACDD, 0);
 
-	switch (freq_out) {
+	switch (freq) {
 	case 5622000:
 		break;
 	case 11345000:
@@ -353,7 +290,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
 			u64 tmp = 19968;
 
 			tmp *= 1000000;
-			do_div(tmp, freq_out);
+			do_div(tmp, freq);
 			val = tmp;
 
 			val = (val << 16) | 64;
@@ -363,7 +300,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
 
 			dev_dbg(&ssp->pdev->dev,
 				"Using SSACDD %x to supply %uHz\n",
-				val, freq_out);
+				val, freq);
 			break;
 		}
 
@@ -568,6 +505,24 @@ static int pxa_ssp_configure_dai_fmt(struct ssp_priv *priv)
 	return 0;
 }
 
+struct pxa_ssp_clock_mode {
+	int rate;
+	int pll;
+	u8 acds;
+	u8 scdb;
+};
+
+static const struct pxa_ssp_clock_mode pxa_ssp_clock_modes[] = {
+	{ .rate =  8000, .pll = 32842000, .acds = SSACD_ACDS_32, .scdb = SSACD_SCDB_4X },
+	{ .rate = 11025, .pll =  5622000, .acds = SSACD_ACDS_4,  .scdb = SSACD_SCDB_4X },
+	{ .rate = 16000, .pll = 32842000, .acds = SSACD_ACDS_16, .scdb = SSACD_SCDB_4X },
+	{ .rate = 22050, .pll =  5622000, .acds = SSACD_ACDS_2,  .scdb = SSACD_SCDB_4X },
+	{ .rate = 44100, .pll = 11345000, .acds = SSACD_ACDS_2,  .scdb = SSACD_SCDB_4X },
+	{ .rate = 48000, .pll = 12235000, .acds = SSACD_ACDS_2,  .scdb = SSACD_SCDB_4X },
+	{ .rate = 96000, .pll = 12235000, .acds = SSACD_ACDS_4,  .scdb = SSACD_SCDB_1X },
+	{}
+};
+
 /*
  * Set the SSP audio DMA parameters and sample size.
  * Can be called multiple times by oss emulation.
@@ -579,11 +534,12 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
 	struct ssp_device *ssp = priv->ssp;
 	int chn = params_channels(params);
-	u32 sscr0;
-	u32 sspsp;
+	u32 sscr0, sspsp;
 	int width = snd_pcm_format_physical_width(params_format(params));
 	int ttsa = pxa_ssp_read_reg(ssp, SSTSA) & 0xf;
 	struct snd_dmaengine_dai_dma_data *dma_data;
+	int rate = params_rate(params);
+	int bclk = rate * chn * (width / 8);
 	int ret;
 
 	dma_data = snd_soc_dai_get_dma_data(cpu_dai, substream);
@@ -623,11 +579,57 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 	}
 	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
 
+	if (sscr0 & SSCR0_ACS) {
+		ret = pxa_ssp_set_pll(priv, bclk);
+
+		/*
+		 * If we were able to generate the bclk directly,
+		 * all is fine. Otherwise, look up the closest rate
+		 * from the table and also set the dividers.
+		 */
+
+		if (ret < 0) {
+			const struct pxa_ssp_clock_mode *m;
+			int ssacd, acds;
+
+			for (m = pxa_ssp_clock_modes; m->rate; m++) {
+				if (m->rate == rate)
+					break;
+			}
+
+			if (!m->rate)
+				return -EINVAL;
+
+			acds = m->acds;
+
+			/* The values in the table are for 16 bits */
+			if (width == 32)
+				acds--;
+
+			ret = pxa_ssp_set_pll(priv, bclk);
+			if (ret < 0)
+				return ret;
+
+			ssacd = pxa_ssp_read_reg(ssp, SSACD);
+			ssacd &= ~(SSACD_ACDS(7) | SSACD_SCDB_1X);
+			ssacd |= SSACD_ACDS(m->acds);
+			ssacd |= m->scdb;
+			pxa_ssp_write_reg(ssp, SSACD, ssacd);
+		}
+	} else if (sscr0 & SSCR0_ECS) {
+		/*
+		 * For setups with external clocking, the PLL and its diviers
+		 * are not active. Instead, the SCR bits in SSCR0 can be used
+		 * to divide the clock.
+		 */
+		pxa_ssp_set_scr(ssp, bclk / rate);
+	}
+
 	switch (priv->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
 	case SND_SOC_DAIFMT_I2S:
 	       sspsp = pxa_ssp_read_reg(ssp, SSPSP);
 
-		if ((pxa_ssp_get_scr(ssp) == 4) && (width == 16)) {
+		if (((priv->sysclk / bclk) == 64) && (width == 16)) {
 			/* This is a special case where the bitclk is 64fs
 			 * and we're not dealing with 2*32 bits of audio
 			 * samples.
@@ -812,8 +814,6 @@ static const struct snd_soc_dai_ops pxa_ssp_dai_ops = {
 	.trigger	= pxa_ssp_trigger,
 	.hw_params	= pxa_ssp_hw_params,
 	.set_sysclk	= pxa_ssp_set_dai_sysclk,
-	.set_clkdiv	= pxa_ssp_set_dai_clkdiv,
-	.set_pll	= pxa_ssp_set_dai_pll,
 	.set_fmt	= pxa_ssp_set_dai_fmt,
 	.set_tdm_slot	= pxa_ssp_set_dai_tdm_slot,
 	.set_tristate	= pxa_ssp_set_dai_tristate,
-- 
cgit v1.2.3


From 88763a5cf80ca59a7c3bea32681ce8f697d9995f Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Tue, 26 Jun 2018 12:53:29 +0200
Subject: powercap / idle_inject: Add an idle injection framework

Initially, the cpu_cooling device for ARM was changed by adding a new
policy inserting idle cycles. The intel_powerclamp driver does a
similar action.

Instead of implementing idle injections privately in the cpu_cooling
device, move the idle injection code in a dedicated framework and give
the opportunity to other frameworks to make use of it.

The framework relies on the smpboot kthreads which handles via its
main loop the common code for hotplugging and [un]parking.

This code was previously tested with the cpu cooling device and went
through several iterations. It results now in split code and API
exported in the header file. It was tested with the cpu cooling device
with success.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Rewrite of all comments ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/Kconfig       |  10 ++
 drivers/powercap/Makefile      |   1 +
 drivers/powercap/idle_inject.c | 356 +++++++++++++++++++++++++++++++++++++++++
 include/linux/idle_inject.h    |  29 ++++
 4 files changed, 396 insertions(+)
 create mode 100644 drivers/powercap/idle_inject.c
 create mode 100644 include/linux/idle_inject.h

(limited to 'include')

diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 85727ef6ce8e..6ac27e5908f5 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -29,4 +29,14 @@ config INTEL_RAPL
 	  controller, CPU core (Power Plance 0), graphics uncore (Power Plane
 	  1), etc.
 
+config IDLE_INJECT
+	bool "Idle injection framework"
+	depends on CPU_IDLE
+	default n
+	help
+	  This enables support for the idle injection framework. It
+	  provides a way to force idle periods on a set of specified
+	  CPUs for power capping. Idle period can be injected
+	  synchronously on a set of specified CPUs or alternatively
+	  on a per CPU basis.
 endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index 0a21ef31372b..1b328854b36e 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_POWERCAP)	+= powercap_sys.o
 obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o
+obj-$(CONFIG_IDLE_INJECT) += idle_inject.o
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
new file mode 100644
index 000000000000..24ff2a068978
--- /dev/null
+++ b/drivers/powercap/idle_inject.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The idle injection framework provides a way to force CPUs to enter idle
+ * states for a specified fraction of time over a specified period.
+ *
+ * It relies on the smpboot kthreads feature providing common code for CPU
+ * hotplug and thread [un]parking.
+ *
+ * All of the kthreads used for idle injection are created at init time.
+ *
+ * Next, the users of the the idle injection framework provide a cpumask via
+ * its register function. The kthreads will be synchronized with respect to
+ * this cpumask.
+ *
+ * The idle + run duration is specified via separate helpers and that allows
+ * idle injection to be started.
+ *
+ * The idle injection kthreads will call play_idle() with the idle duration
+ * specified as per the above.
+ *
+ * After all of them have been woken up, a timer is set to start the next idle
+ * injection cycle.
+ *
+ * The timer interrupt handler will wake up the idle injection kthreads for
+ * all of the CPUs in the cpumask provided by the user.
+ *
+ * Idle injection is stopped synchronously and no leftover idle injection
+ * kthread activity after its completion is guaranteed.
+ *
+ * It is up to the user of this framework to provide a lock for higher-level
+ * synchronization to prevent race conditions like starting idle injection
+ * while unregistering from the framework.
+ */
+#define pr_fmt(fmt) "ii_dev: " fmt
+
+#include <linux/cpu.h>
+#include <linux/hrtimer.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smpboot.h>
+
+#include <uapi/linux/sched/types.h>
+
+/**
+ * struct idle_inject_thread - task on/off switch structure
+ * @tsk: task injecting the idle cycles
+ * @should_run: whether or not to run the task (for the smpboot kthread API)
+ */
+struct idle_inject_thread {
+	struct task_struct *tsk;
+	int should_run;
+};
+
+/**
+ * struct idle_inject_device - idle injection data
+ * @timer: idle injection period timer
+ * @idle_duration_ms: duration of CPU idle time to inject
+ * @run_duration_ms: duration of CPU run time to allow
+ * @cpumask: mask of CPUs affected by idle injection
+ */
+struct idle_inject_device {
+	struct hrtimer timer;
+	unsigned int idle_duration_ms;
+	unsigned int run_duration_ms;
+	unsigned long int cpumask[0];
+};
+
+static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread);
+static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device);
+
+/**
+ * idle_inject_wakeup - Wake up idle injection threads
+ * @ii_dev: target idle injection device
+ *
+ * Every idle injection task associated with the given idle injection device
+ * and running on an online CPU will be woken up.
+ */
+static void idle_inject_wakeup(struct idle_inject_device *ii_dev)
+{
+	struct idle_inject_thread *iit;
+	unsigned int cpu;
+
+	for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) {
+		iit = per_cpu_ptr(&idle_inject_thread, cpu);
+		iit->should_run = 1;
+		wake_up_process(iit->tsk);
+	}
+}
+
+/**
+ * idle_inject_timer_fn - idle injection timer function
+ * @timer: idle injection hrtimer
+ *
+ * This function is called when the idle injection timer expires.  It wakes up
+ * idle injection tasks associated with the timer and they, in turn, invoke
+ * play_idle() to inject a specified amount of CPU idle time.
+ *
+ * Return: HRTIMER_RESTART.
+ */
+static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
+{
+	unsigned int duration_ms;
+	struct idle_inject_device *ii_dev =
+		container_of(timer, struct idle_inject_device, timer);
+
+	duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+	duration_ms += READ_ONCE(ii_dev->idle_duration_ms);
+
+	idle_inject_wakeup(ii_dev);
+
+	hrtimer_forward_now(timer, ms_to_ktime(duration_ms));
+
+	return HRTIMER_RESTART;
+}
+
+/**
+ * idle_inject_fn - idle injection work function
+ * @cpu: the CPU owning the task
+ *
+ * This function calls play_idle() to inject a specified amount of CPU idle
+ * time.
+ */
+static void idle_inject_fn(unsigned int cpu)
+{
+	struct idle_inject_device *ii_dev;
+	struct idle_inject_thread *iit;
+
+	ii_dev = per_cpu(idle_inject_device, cpu);
+	iit = per_cpu_ptr(&idle_inject_thread, cpu);
+
+	/*
+	 * Let the smpboot main loop know that the task should not run again.
+	 */
+	iit->should_run = 0;
+
+	play_idle(READ_ONCE(ii_dev->idle_duration_ms));
+}
+
+/**
+ * idle_inject_set_duration - idle and run duration update helper
+ * @run_duration_ms: CPU run time to allow in milliseconds
+ * @idle_duration_ms: CPU idle time to inject in milliseconds
+ */
+void idle_inject_set_duration(struct idle_inject_device *ii_dev,
+			      unsigned int run_duration_ms,
+			      unsigned int idle_duration_ms)
+{
+	if (run_duration_ms && idle_duration_ms) {
+		WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms);
+		WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms);
+	}
+}
+
+/**
+ * idle_inject_get_duration - idle and run duration retrieval helper
+ * @run_duration_ms: memory location to store the current CPU run time
+ * @idle_duration_ms: memory location to store the current CPU idle time
+ */
+void idle_inject_get_duration(struct idle_inject_device *ii_dev,
+			      unsigned int *run_duration_ms,
+			      unsigned int *idle_duration_ms)
+{
+	*run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+	*idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+}
+
+/**
+ * idle_inject_start - start idle injections
+ * @ii_dev: idle injection control device structure
+ *
+ * The function starts idle injection by first waking up all of the idle
+ * injection kthreads associated with @ii_dev to let them inject CPU idle time
+ * sets up a timer to start the next idle injection period.
+ *
+ * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success.
+ */
+int idle_inject_start(struct idle_inject_device *ii_dev)
+{
+	unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+	unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+
+	if (!idle_duration_ms || !run_duration_ms)
+		return -EINVAL;
+
+	pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
+		 cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
+
+	idle_inject_wakeup(ii_dev);
+
+	hrtimer_start(&ii_dev->timer,
+		      ms_to_ktime(idle_duration_ms + run_duration_ms),
+		      HRTIMER_MODE_REL);
+
+	return 0;
+}
+
+/**
+ * idle_inject_stop - stops idle injections
+ * @ii_dev: idle injection control device structure
+ *
+ * The function stops idle injection and waits for the threads to finish work.
+ * If CPU idle time is being injected when this function runs, then it will
+ * wait until the end of the cycle.
+ *
+ * When it returns, there is no more idle injection kthread activity.  The
+ * kthreads are scheduled out and the periodic timer is off.
+ */
+void idle_inject_stop(struct idle_inject_device *ii_dev)
+{
+	struct idle_inject_thread *iit;
+	unsigned int cpu;
+
+	pr_debug("Stopping idle injection on CPUs '%*pbl'\n",
+		 cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
+
+	hrtimer_cancel(&ii_dev->timer);
+
+	/*
+	 * Stopping idle injection requires all of the idle injection kthreads
+	 * associated with the given cpumask to be parked and stay that way, so
+	 * prevent CPUs from going online at this point.  Any CPUs going online
+	 * after the loop below will be covered by clearing the should_run flag
+	 * that will cause the smpboot main loop to schedule them out.
+	 */
+	cpu_hotplug_disable();
+
+	/*
+	 * Iterate over all (online + offline) CPUs here in case one of them
+	 * goes offline with the should_run flag set so as to prevent its idle
+	 * injection kthread from running when the CPU goes online again after
+	 * the ii_dev has been freed.
+	 */
+	for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
+		iit = per_cpu_ptr(&idle_inject_thread, cpu);
+		iit->should_run = 0;
+
+		wait_task_inactive(iit->tsk, 0);
+	}
+
+	cpu_hotplug_enable();
+}
+
+/**
+ * idle_inject_setup - prepare the current task for idle injection
+ * @cpu: not used
+ *
+ * Called once, this function is in charge of setting the current task's
+ * scheduler parameters to make it an RT task.
+ */
+static void idle_inject_setup(unsigned int cpu)
+{
+	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
+
+	sched_setscheduler(current, SCHED_FIFO, &param);
+}
+
+/**
+ * idle_inject_should_run - function helper for the smpboot API
+ * @cpu: CPU the kthread is running on
+ *
+ * Return: whether or not the thread can run.
+ */
+static int idle_inject_should_run(unsigned int cpu)
+{
+	struct idle_inject_thread *iit =
+		per_cpu_ptr(&idle_inject_thread, cpu);
+
+	return iit->should_run;
+}
+
+/**
+ * idle_inject_register - initialize idle injection on a set of CPUs
+ * @cpumask: CPUs to be affected by idle injection
+ *
+ * This function creates an idle injection control device structure for the
+ * given set of CPUs and initializes the timer associated with it.  It does not
+ * start any injection cycles.
+ *
+ * Return: NULL if memory allocation fails, idle injection control device
+ * pointer on success.
+ */
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
+{
+	struct idle_inject_device *ii_dev;
+	int cpu, cpu_rb;
+
+	ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL);
+	if (!ii_dev)
+		return NULL;
+
+	cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask);
+	hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ii_dev->timer.function = idle_inject_timer_fn;
+
+	for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
+
+		if (per_cpu(idle_inject_device, cpu)) {
+			pr_err("cpu%d is already registered\n", cpu);
+			goto out_rollback;
+		}
+
+		per_cpu(idle_inject_device, cpu) = ii_dev;
+	}
+
+	return ii_dev;
+
+out_rollback:
+	for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) {
+		if (cpu == cpu_rb)
+			break;
+		per_cpu(idle_inject_device, cpu_rb) = NULL;
+	}
+
+	kfree(ii_dev);
+
+	return NULL;
+}
+
+/**
+ * idle_inject_unregister - unregister idle injection control device
+ * @ii_dev: idle injection control device to unregister
+ *
+ * The function stops idle injection for the given control device,
+ * unregisters its kthreads and frees memory allocated when that device was
+ * created.
+ */
+void idle_inject_unregister(struct idle_inject_device *ii_dev)
+{
+	unsigned int cpu;
+
+	idle_inject_stop(ii_dev);
+
+	for_each_cpu(cpu, to_cpumask(ii_dev->cpumask))
+		per_cpu(idle_inject_device, cpu) = NULL;
+
+	kfree(ii_dev);
+}
+
+static struct smp_hotplug_thread idle_inject_threads = {
+	.store = &idle_inject_thread.tsk,
+	.setup = idle_inject_setup,
+	.thread_fn = idle_inject_fn,
+	.thread_comm = "idle_inject/%u",
+	.thread_should_run = idle_inject_should_run,
+};
+
+static int __init idle_inject_init(void)
+{
+	return smpboot_register_percpu_thread(&idle_inject_threads);
+}
+early_initcall(idle_inject_init);
diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
new file mode 100644
index 000000000000..bdc0293fb6cb
--- /dev/null
+++ b/include/linux/idle_inject.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Linaro Ltd
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ */
+#ifndef __IDLE_INJECT_H__
+#define __IDLE_INJECT_H__
+
+/* private idle injection device structure */
+struct idle_inject_device;
+
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask);
+
+void idle_inject_unregister(struct idle_inject_device *ii_dev);
+
+int idle_inject_start(struct idle_inject_device *ii_dev);
+
+void idle_inject_stop(struct idle_inject_device *ii_dev);
+
+void idle_inject_set_duration(struct idle_inject_device *ii_dev,
+				 unsigned int run_duration_ms,
+				 unsigned int idle_duration_ms);
+
+void idle_inject_get_duration(struct idle_inject_device *ii_dev,
+				 unsigned int *run_duration_ms,
+				 unsigned int *idle_duration_ms);
+#endif /* __IDLE_INJECT_H__ */
-- 
cgit v1.2.3


From 603d4cf8fe095b1ee78f423d514427be507fb513 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Sat, 30 Jun 2018 17:38:55 +0200
Subject: net: fix use-after-free in GRO with ESP

Since the addition of GRO for ESP, gro_receive can consume the skb and
return -EINPROGRESS. In that case, the lower layer GRO handler cannot
touch the skb anymore.

Commit 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") converted
some of the gro_receive handlers that can lead to ESP's gro_receive so
that they wouldn't access the skb when -EINPROGRESS is returned, but
missed other spots, mainly in tunneling protocols.

This patch finishes the conversion to using skb_gro_flush_final(), and
adds a new helper, skb_gro_flush_final_remcsum(), used in VXLAN and
GUE.

Fixes: 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c      |  2 +-
 drivers/net/vxlan.c       |  4 +---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 net/8021q/vlan.c          |  2 +-
 net/ipv4/fou.c            |  4 +---
 net/ipv4/gre_offload.c    |  2 +-
 net/ipv4/udp_offload.c    |  2 +-
 7 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 750eaa53bf0c..ada33c2d9ac2 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -476,7 +476,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index aee0e60471f1..f6bb1d54d4bd 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -623,9 +623,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
 	flush = 0;
 
 out:
-	skb_gro_remcsum_cleanup(skb, &grc);
-	skb->remcsum_offload = 0;
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
 
 	return pp;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec9850c7936..3d0cc0b5cec2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2789,11 +2789,31 @@ static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp,
 	if (PTR_ERR(pp) != -EINPROGRESS)
 		NAPI_GRO_CB(skb)->flush |= flush;
 }
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+					       struct sk_buff **pp,
+					       int flush,
+					       struct gro_remcsum *grc)
+{
+	if (PTR_ERR(pp) != -EINPROGRESS) {
+		NAPI_GRO_CB(skb)->flush |= flush;
+		skb_gro_remcsum_cleanup(skb, grc);
+		skb->remcsum_offload = 0;
+	}
+}
 #else
 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
 {
 	NAPI_GRO_CB(skb)->flush |= flush;
 }
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+					       struct sk_buff **pp,
+					       int flush,
+					       struct gro_remcsum *grc)
+{
+	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_remcsum_cleanup(skb, grc);
+	skb->remcsum_offload = 0;
+}
 #endif
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 73a65789271b..8ccee3d01822 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -693,7 +693,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..c9ec1603666b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -448,9 +448,7 @@ next_proto:
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
-	skb_gro_remcsum_cleanup(skb, &grc);
-	skb->remcsum_offload = 0;
+	skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
 
 	return pp;
 }
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 1859c473b21a..6a7d980105f6 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 92dc9e5a7ff3..69c54540d5b4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -394,7 +394,7 @@ unflush:
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 	return pp;
 }
 EXPORT_SYMBOL(udp_gro_receive);
-- 
cgit v1.2.3


From d6f19938eb031ee2158272757db33258153ae59c Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sun, 1 Jul 2018 23:31:30 +0800
Subject: net: expose sk wmem in sock_exceed_buf_limit tracepoint

Currently trace_sock_exceed_buf_limit() only show rmem info,
but wmem limit may also be hit.
So expose wmem info in this tracepoint as well.

Regarding memcg, I think it is better to introduce a new tracepoint(if
that is needed), i.e. trace_memcg_limit_hit other than show memcg info in
trace_sock_exceed_buf_limit.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/sock.h | 30 +++++++++++++++++++++++++-----
 net/core/sock.c             |  6 ++++--
 2 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 3176a3931107..a0c4b8a30966 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -35,6 +35,10 @@
 		EM(TCP_CLOSING)			\
 		EMe(TCP_NEW_SYN_RECV)
 
+#define skmem_kind_names			\
+		EM(SK_MEM_SEND)			\
+		EMe(SK_MEM_RECV)
+
 /* enums need to be exported to user space */
 #undef EM
 #undef EMe
@@ -44,6 +48,7 @@
 family_names
 inet_protocol_names
 tcp_state_names
+skmem_kind_names
 
 #undef EM
 #undef EMe
@@ -59,6 +64,9 @@ tcp_state_names
 #define show_tcp_state_name(val)        \
 	__print_symbolic(val, tcp_state_names)
 
+#define show_skmem_kind_names(val)	\
+	__print_symbolic(val, skmem_kind_names)
+
 TRACE_EVENT(sock_rcvqueue_full,
 
 	TP_PROTO(struct sock *sk, struct sk_buff *skb),
@@ -83,9 +91,9 @@ TRACE_EVENT(sock_rcvqueue_full,
 
 TRACE_EVENT(sock_exceed_buf_limit,
 
-	TP_PROTO(struct sock *sk, struct proto *prot, long allocated),
+	TP_PROTO(struct sock *sk, struct proto *prot, long allocated, int kind),
 
-	TP_ARGS(sk, prot, allocated),
+	TP_ARGS(sk, prot, allocated, kind),
 
 	TP_STRUCT__entry(
 		__array(char, name, 32)
@@ -93,6 +101,10 @@ TRACE_EVENT(sock_exceed_buf_limit,
 		__field(long, allocated)
 		__field(int, sysctl_rmem)
 		__field(int, rmem_alloc)
+		__field(int, sysctl_wmem)
+		__field(int, wmem_alloc)
+		__field(int, wmem_queued)
+		__field(int, kind)
 	),
 
 	TP_fast_assign(
@@ -101,17 +113,25 @@ TRACE_EVENT(sock_exceed_buf_limit,
 		__entry->allocated = allocated;
 		__entry->sysctl_rmem = sk_get_rmem0(sk, prot);
 		__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
+		__entry->sysctl_wmem = sk_get_wmem0(sk, prot);
+		__entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc);
+		__entry->wmem_queued = sk->sk_wmem_queued;
+		__entry->kind = kind;
 	),
 
-	TP_printk("proto:%s sysctl_mem=%ld,%ld,%ld allocated=%ld "
-		"sysctl_rmem=%d rmem_alloc=%d",
+	TP_printk("proto:%s sysctl_mem=%ld,%ld,%ld allocated=%ld sysctl_rmem=%d rmem_alloc=%d sysctl_wmem=%d wmem_alloc=%d wmem_queued=%d kind=%s",
 		__entry->name,
 		__entry->sysctl_mem[0],
 		__entry->sysctl_mem[1],
 		__entry->sysctl_mem[2],
 		__entry->allocated,
 		__entry->sysctl_rmem,
-		__entry->rmem_alloc)
+		__entry->rmem_alloc,
+		__entry->sysctl_wmem,
+		__entry->wmem_alloc,
+		__entry->wmem_queued,
+		show_skmem_kind_names(__entry->kind)
+	)
 );
 
 TRACE_EVENT(inet_sock_set_state,
diff --git a/net/core/sock.c b/net/core/sock.c
index dac6d785186b..8b69ac96a850 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2401,9 +2401,10 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 {
 	struct proto *prot = sk->sk_prot;
 	long allocated = sk_memory_allocated_add(sk, amt);
+	bool charged = true;
 
 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
-	    !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
+	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
 		goto suppress_allocation;
 
 	/* Under limit. */
@@ -2461,7 +2462,8 @@ suppress_allocation:
 			return 1;
 	}
 
-	trace_sock_exceed_buf_limit(sk, prot, allocated);
+	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
+		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
 
 	sk_memory_allocated_sub(sk, amt);
 
-- 
cgit v1.2.3


From c5828150067c47a97f30e690a472e0548d3ac97d Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Thu, 28 Jun 2018 12:05:00 +0200
Subject: Revert "cxl: Add kernel API to allow a context to operate with
 relocate disabled"

Remove abandonned capi support for the Mellanox CX4.
The symbol 'cxl_set_translation_mode' is never called, so
ctx->real_mode is always false.

This reverts commit 7a0d85d313c2066712e530e668bc02bb741a685c.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/api.c    | 19 -------------------
 drivers/misc/cxl/cxl.h    |  1 -
 drivers/misc/cxl/guest.c  |  3 ---
 drivers/misc/cxl/native.c |  3 ++-
 include/misc/cxl.h        |  8 --------
 5 files changed, 2 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 753b1a698fc4..21d620e29fea 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -324,7 +324,6 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
 	if (task) {
 		ctx->pid = get_task_pid(task, PIDTYPE_PID);
 		kernel = false;
-		ctx->real_mode = false;
 
 		/* acquire a reference to the task's mm */
 		ctx->mm = get_task_mm(current);
@@ -388,24 +387,6 @@ void cxl_set_master(struct cxl_context *ctx)
 }
 EXPORT_SYMBOL_GPL(cxl_set_master);
 
-int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode)
-{
-	if (ctx->status == STARTED) {
-		/*
-		 * We could potentially update the PE and issue an update LLCMD
-		 * to support this, but it doesn't seem to have a good use case
-		 * since it's trivial to just create a second kernel context
-		 * with different translation modes, so until someone convinces
-		 * me otherwise:
-		 */
-		return -EBUSY;
-	}
-
-	ctx->real_mode = real_mode;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(cxl_set_translation_mode);
-
 /* wrappers around afu_* file ops which are EXPORTED */
 int cxl_fd_open(struct inode *inode, struct file *file)
 {
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 918d4fb742d1..af8794719956 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -613,7 +613,6 @@ struct cxl_context {
 	bool pe_inserted;
 	bool master;
 	bool kernel;
-	bool real_mode;
 	bool pending_irq;
 	bool pending_fault;
 	bool pending_afu_err;
diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 4644f16606a3..f5dc740fcd13 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -623,9 +623,6 @@ static int guest_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u
 {
 	pr_devel("in %s\n", __func__);
 
-	if (ctx->real_mode)
-		return -EPERM;
-
 	ctx->kernel = kernel;
 	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
 		return attach_afu_directed(ctx, wed, amr);
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 98f867fcef24..c9d5d82dce8e 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -605,6 +605,7 @@ u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9)
 		sr |= CXL_PSL_SR_An_MP;
 	if (mfspr(SPRN_LPCR) & LPCR_TC)
 		sr |= CXL_PSL_SR_An_TC;
+
 	if (kernel) {
 		if (!real_mode)
 			sr |= CXL_PSL_SR_An_R;
@@ -629,7 +630,7 @@ u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9)
 
 static u64 calculate_sr(struct cxl_context *ctx)
 {
-	return cxl_calculate_sr(ctx->master, ctx->kernel, ctx->real_mode,
+	return cxl_calculate_sr(ctx->master, ctx->kernel, false,
 				cxl_is_power9());
 }
 
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index b712be544f8c..82cc6ffafe2d 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -173,14 +173,6 @@ int cxl_afu_reset(struct cxl_context *ctx);
  */
 void cxl_set_master(struct cxl_context *ctx);
 
-/*
- * Sets the context to use real mode memory accesses to operate with
- * translation disabled. Note that this only makes sense for kernel contexts
- * under bare metal, and will not work with virtualisation. May only be
- * performed on stopped contexts.
- */
-int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode);
-
 /*
  * Map and unmap the AFU Problem Space area. The amount and location mapped
  * depends on if this context is a master or slave.
-- 
cgit v1.2.3


From 0cfd7335d1ebea42cf113fd22452f6a10d3960fe Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Thu, 28 Jun 2018 12:05:01 +0200
Subject: Revert "cxl: Add support for interrupts on the Mellanox CX4"

Remove abandonned capi support for the Mellanox CX4.

This reverts commit a2f67d5ee8d950caaa7a6144cf0bfb256500b73e.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-cxl.c  | 84 -------------------------------
 arch/powerpc/platforms/powernv/pci-ioda.c |  4 --
 arch/powerpc/platforms/powernv/pci.h      |  2 -
 drivers/misc/cxl/api.c                    | 71 --------------------------
 drivers/misc/cxl/base.c                   | 31 ------------
 drivers/misc/cxl/cxl.h                    |  4 --
 drivers/misc/cxl/main.c                   |  2 -
 include/misc/cxl-base.h                   |  4 --
 8 files changed, 202 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c
index cee003de63af..c447b7f03c09 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/msi.h>
 #include <asm/pci-bridge.h>
 #include <asm/pnv-pci.h>
 #include <asm/opal.h>
@@ -292,86 +291,3 @@ void pnv_cxl_disable_device(struct pci_dev *dev)
 	cxl_pci_disable_device(dev);
 	cxl_afu_put(afu);
 }
-
-/*
- * This is a special version of pnv_setup_msi_irqs for cards in cxl mode. This
- * function handles setting up the IVTE entries for the XSL to use.
- *
- * We are currently not filling out the MSIX table, since the only currently
- * supported adapter (CX4) uses a custom MSIX table format in cxl mode and it
- * is up to their driver to fill that out. In the future we may fill out the
- * MSIX table (and change the IVTE entries to be an index to the MSIX table)
- * for adapters implementing the Full MSI-X mode described in the CAIA.
- */
-int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
-	struct msi_desc *entry;
-	struct cxl_context *ctx = NULL;
-	unsigned int virq;
-	int hwirq;
-	int afu_irq = 0;
-	int rc;
-
-	if (WARN_ON(!phb) || !phb->msi_bmp.bitmap)
-		return -ENODEV;
-
-	if (pdev->no_64bit_msi && !phb->msi32_support)
-		return -ENODEV;
-
-	rc = cxl_cx4_setup_msi_irqs(pdev, nvec, type);
-	if (rc)
-		return rc;
-
-	for_each_pci_msi_entry(entry, pdev) {
-		if (!entry->msi_attrib.is_64 && !phb->msi32_support) {
-			pr_warn("%s: Supports only 64-bit MSIs\n",
-				pci_name(pdev));
-			return -ENXIO;
-		}
-
-		hwirq = cxl_next_msi_hwirq(pdev, &ctx, &afu_irq);
-		if (WARN_ON(hwirq <= 0))
-			return (hwirq ? hwirq : -ENOMEM);
-
-		virq = irq_create_mapping(NULL, hwirq);
-		if (!virq) {
-			pr_warn("%s: Failed to map cxl mode MSI to linux irq\n",
-				pci_name(pdev));
-			return -ENOMEM;
-		}
-
-		rc = pnv_cxl_ioda_msi_setup(pdev, hwirq, virq);
-		if (rc) {
-			pr_warn("%s: Failed to setup cxl mode MSI\n", pci_name(pdev));
-			irq_dispose_mapping(virq);
-			return rc;
-		}
-
-		irq_set_msi_desc(virq, entry);
-	}
-
-	return 0;
-}
-
-void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
-	struct msi_desc *entry;
-	irq_hw_number_t hwirq;
-
-	if (WARN_ON(!phb))
-		return;
-
-	for_each_pci_msi_entry(entry, pdev) {
-		if (!entry->irq)
-			continue;
-		hwirq = virq_to_hw(entry->irq);
-		irq_set_msi_desc(entry->irq, NULL);
-		irq_dispose_mapping(entry->irq);
-	}
-
-	cxl_cx4_teardown_msi_irqs(pdev);
-}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index ab678177d36e..5b819c55868d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3847,10 +3847,6 @@ static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
 const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = {
 	.dma_dev_setup		= pnv_pci_dma_dev_setup,
 	.dma_bus_setup		= pnv_pci_dma_bus_setup,
-#ifdef CONFIG_PCI_MSI
-	.setup_msi_irqs		= pnv_cxl_cx4_setup_msi_irqs,
-	.teardown_msi_irqs	= pnv_cxl_cx4_teardown_msi_irqs,
-#endif
 	.enable_device_hook	= pnv_cxl_enable_device_hook,
 	.disable_device		= pnv_cxl_disable_device,
 	.release_device		= pnv_pci_release_device,
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index eada4b6068cb..ba41913c7e21 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -265,8 +265,6 @@ extern int pnv_npu2_init(struct pnv_phb *phb);
 /* cxl functions */
 extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev);
 extern void pnv_cxl_disable_device(struct pci_dev *dev);
-extern int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
-extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev);
 
 
 /* phb ops (cxl switches these when enabling the kernel api on the phb) */
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 21d620e29fea..2e5862b7a074 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -11,7 +11,6 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <misc/cxl.h>
-#include <linux/msi.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/sched/mm.h>
@@ -595,73 +594,3 @@ int cxl_get_max_irqs_per_process(struct pci_dev *dev)
 	return afu->irqs_max;
 }
 EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
-
-/*
- * This is a special interrupt allocation routine called from the PHB's MSI
- * setup function. When capi interrupts are allocated in this manner they must
- * still be associated with a running context, but since the MSI APIs have no
- * way to specify this we use the default context associated with the device.
- *
- * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
- * interrupt number, so in order to overcome this their driver informs us of
- * the restriction by setting the maximum interrupts per context, and we
- * allocate additional contexts as necessary so that we can keep the AFU
- * interrupt number within the supported range.
- */
-int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	struct cxl_context *ctx, *new_ctx, *default_ctx;
-	int remaining;
-	int rc;
-
-	ctx = default_ctx = cxl_get_context(pdev);
-	if (WARN_ON(!default_ctx))
-		return -ENODEV;
-
-	remaining = nvec;
-	while (remaining > 0) {
-		rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
-		if (rc) {
-			pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
-			return rc;
-		}
-		remaining -= ctx->afu->irqs_max;
-
-		if (ctx != default_ctx && default_ctx->status == STARTED) {
-			WARN_ON(cxl_start_context(ctx,
-				be64_to_cpu(default_ctx->elem->common.wed),
-				NULL));
-		}
-
-		if (remaining > 0) {
-			new_ctx = cxl_dev_context_init(pdev);
-			if (IS_ERR(new_ctx)) {
-				pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
-				return -ENOSPC;
-			}
-			list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
-			ctx = new_ctx;
-		}
-	}
-
-	return 0;
-}
-/* Exported via cxl_base */
-
-void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	struct cxl_context *ctx, *pos, *tmp;
-
-	ctx = cxl_get_context(pdev);
-	if (WARN_ON(!ctx))
-		return;
-
-	cxl_free_afu_irqs(ctx);
-	list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
-		cxl_stop_context(pos);
-		cxl_free_afu_irqs(pos);
-		list_del(&pos->extra_irq_contexts);
-		cxl_release_context(pos);
-	}
-}
-/* Exported via cxl_base */
diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c
index cd54ce6f6230..fe90f895bb10 100644
--- a/drivers/misc/cxl/base.c
+++ b/drivers/misc/cxl/base.c
@@ -158,37 +158,6 @@ int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_
 }
 EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq);
 
-int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
-	int ret;
-	struct cxl_calls *calls;
-
-	calls = cxl_calls_get();
-	if (!calls)
-		return false;
-
-	ret = calls->cxl_cx4_setup_msi_irqs(pdev, nvec, type);
-
-	cxl_calls_put(calls);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(cxl_cx4_setup_msi_irqs);
-
-void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
-{
-	struct cxl_calls *calls;
-
-	calls = cxl_calls_get();
-	if (!calls)
-		return;
-
-	calls->cxl_cx4_teardown_msi_irqs(pdev);
-
-	cxl_calls_put(calls);
-}
-EXPORT_SYMBOL_GPL(cxl_cx4_teardown_msi_irqs);
-
 static int __init cxl_base_init(void)
 {
 	struct device_node *np;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index af8794719956..9688fe8b4d80 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -879,16 +879,12 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
 bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu);
 void _cxl_pci_disable_device(struct pci_dev *dev);
 int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq);
-int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
-void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev);
 
 struct cxl_calls {
 	void (*cxl_slbia)(struct mm_struct *mm);
 	bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu);
 	void (*cxl_pci_disable_device)(struct pci_dev *dev);
 	int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq);
-	int (*cxl_cx4_setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type);
-	void (*cxl_cx4_teardown_msi_irqs)(struct pci_dev *pdev);
 
 	struct module *owner;
 };
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index c1ba0d42cbc8..59a904efd104 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -107,8 +107,6 @@ static struct cxl_calls cxl_calls = {
 	.cxl_pci_associate_default_context = _cxl_pci_associate_default_context,
 	.cxl_pci_disable_device = _cxl_pci_disable_device,
 	.cxl_next_msi_hwirq = _cxl_next_msi_hwirq,
-	.cxl_cx4_setup_msi_irqs = _cxl_cx4_setup_msi_irqs,
-	.cxl_cx4_teardown_msi_irqs = _cxl_cx4_teardown_msi_irqs,
 	.owner = THIS_MODULE,
 };
 
diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h
index b2ebc91fe09a..bb7e629ae492 100644
--- a/include/misc/cxl-base.h
+++ b/include/misc/cxl-base.h
@@ -43,8 +43,6 @@ void cxl_afu_put(struct cxl_afu *afu);
 void cxl_slbia(struct mm_struct *mm);
 bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu);
 void cxl_pci_disable_device(struct pci_dev *dev);
-int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
-void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev);
 
 #else /* CONFIG_CXL_BASE */
 
@@ -54,8 +52,6 @@ static inline void cxl_afu_put(struct cxl_afu *afu) {}
 static inline void cxl_slbia(struct mm_struct *mm) {}
 static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; }
 static inline void cxl_pci_disable_device(struct pci_dev *dev) {}
-static inline int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { return -ENODEV; }
-static inline void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) {}
 
 #endif /* CONFIG_CXL_BASE */
 
-- 
cgit v1.2.3


From 17d29039388807305ab02a4d6eae7cbe09f81f90 Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Thu, 28 Jun 2018 12:05:02 +0200
Subject: Revert "cxl: Add preliminary workaround for CX4 interrupt limitation"

Remove abandonned capi support for the Mellanox CX4.

This reverts commit cbce0917e2e47d4bf5aa3b5fd6b1247f33e1a126.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/api.c     | 15 ---------------
 drivers/misc/cxl/base.c    | 17 -----------------
 drivers/misc/cxl/context.c |  1 -
 drivers/misc/cxl/cxl.h     | 10 ----------
 drivers/misc/cxl/main.c    |  1 -
 include/misc/cxl.h         | 20 --------------------
 6 files changed, 64 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 2e5862b7a074..34ba67bc41bd 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -181,21 +181,6 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
 	return 0;
 }
 
-int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
-{
-	if (*ctx == NULL || *afu_irq == 0) {
-		*afu_irq = 1;
-		*ctx = cxl_get_context(pdev);
-	} else {
-		(*afu_irq)++;
-		if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) {
-			*ctx = list_next_entry(*ctx, extra_irq_contexts);
-			*afu_irq = 1;
-		}
-	}
-	return cxl_find_afu_irq(*ctx, *afu_irq);
-}
-/* Exported via cxl_base */
 
 int cxl_set_priv(struct cxl_context *ctx, void *priv)
 {
diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c
index fe90f895bb10..e1e80cb99ad9 100644
--- a/drivers/misc/cxl/base.c
+++ b/drivers/misc/cxl/base.c
@@ -141,23 +141,6 @@ void cxl_pci_disable_device(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(cxl_pci_disable_device);
 
-int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
-{
-	int ret;
-	struct cxl_calls *calls;
-
-	calls = cxl_calls_get();
-	if (!calls)
-		return -EBUSY;
-
-	ret = calls->cxl_next_msi_hwirq(pdev, ctx, afu_irq);
-
-	cxl_calls_put(calls);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq);
-
 static int __init cxl_base_init(void)
 {
 	struct device_node *np;
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index c6ec872800a2..0355d42d367f 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -74,7 +74,6 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
 	ctx->pending_afu_err = false;
 
 	INIT_LIST_HEAD(&ctx->irq_names);
-	INIT_LIST_HEAD(&ctx->extra_irq_contexts);
 
 	/*
 	 * When we have to destroy all contexts in cxl_context_detach_all() we
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 9688fe8b4d80..d95c2c98f2ab 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -623,14 +623,6 @@ struct cxl_context {
 
 	struct rcu_head rcu;
 
-	/*
-	 * Only used when more interrupts are allocated via
-	 * pci_enable_msix_range than are supported in the default context, to
-	 * use additional contexts to overcome the limitation. i.e. Mellanox
-	 * CX4 only:
-	 */
-	struct list_head extra_irq_contexts;
-
 	struct mm_struct *mm;
 
 	u16 tidr;
@@ -878,13 +870,11 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
 /* Internal functions wrapped in cxl_base to allow PHB to call them */
 bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu);
 void _cxl_pci_disable_device(struct pci_dev *dev);
-int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq);
 
 struct cxl_calls {
 	void (*cxl_slbia)(struct mm_struct *mm);
 	bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu);
 	void (*cxl_pci_disable_device)(struct pci_dev *dev);
-	int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq);
 
 	struct module *owner;
 };
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 59a904efd104..a7e83624034b 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -106,7 +106,6 @@ static struct cxl_calls cxl_calls = {
 	.cxl_slbia = cxl_slbia_core,
 	.cxl_pci_associate_default_context = _cxl_pci_associate_default_context,
 	.cxl_pci_disable_device = _cxl_pci_disable_device,
-	.cxl_next_msi_hwirq = _cxl_next_msi_hwirq,
 	.owner = THIS_MODULE,
 };
 
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 82cc6ffafe2d..6a3711a2e217 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -183,26 +183,6 @@ void cxl_psa_unmap(void __iomem *addr);
 /*  Get the process element for this context */
 int cxl_process_element(struct cxl_context *ctx);
 
-/*
- * Limit the number of interrupts that a single context can allocate via
- * cxl_start_work. If using the api with a real phb, this may be used to
- * request that additional default contexts be created when allocating
- * interrupts via pci_enable_msix_range. These will be set to the same running
- * state as the default context, and if that is running it will reuse the
- * parameters previously passed to cxl_start_context for the default context.
- */
-int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs);
-int cxl_get_max_irqs_per_process(struct pci_dev *dev);
-
-/*
- * Use to simultaneously iterate over hardware interrupt numbers, contexts and
- * afu interrupt numbers allocated for the device via pci_enable_msix_range and
- * is a useful convenience function when working with hardware that has
- * limitations on the number of interrupts per process. *ctx and *afu_irq
- * should be NULL and 0 to start the iteration.
- */
-int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq);
-
 /*
  * These calls allow drivers to create their own file descriptors and make them
  * identical to the cxl file descriptor user API. An example use case:
-- 
cgit v1.2.3


From 29fea8aa21a69418386e3e08fa546a0ba9bee96d Mon Sep 17 00:00:00 2001
From: Alastair D'Silva <alastair@d-silva.org>
Date: Thu, 28 Jun 2018 12:05:04 +0200
Subject: Revert "cxl: Add cxl_check_and_switch_mode() API to switch bi-modal
 cards"

Remove abandonned capi support for the Mellanox CX4.

This reverts commit b0b5e5918ad1babfd1d43d98c7281926a7b57b9f.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/Kconfig |   8 --
 drivers/misc/cxl/pci.c   | 236 ++++-------------------------------------------
 include/misc/cxl.h       |  25 -----
 3 files changed, 18 insertions(+), 251 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 93397cb05b15..3ce933707828 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -33,11 +33,3 @@ config CXL
 	  CAPI adapters are found in POWER8 based systems.
 
 	  If unsure, say N.
-
-config CXL_BIMODAL
-	bool "Support for bi-modal CAPI cards"
-	depends on HOTPLUG_PCI_POWERNV = y && CXL || HOTPLUG_PCI_POWERNV = m && CXL = m
-	default y
-	help
-	  Select this option to enable support for bi-modal CAPI cards, such as
-	  the Mellanox CX-4.
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 429d6de1dde7..9c5a21fee835 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -55,8 +55,6 @@
 	pci_read_config_byte(dev, vsec + 0xa, dest)
 #define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \
 	pci_write_config_byte(dev, vsec + 0xa, val)
-#define CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, vsec, val) \
-	pci_bus_write_config_byte(bus, devfn, vsec + 0xa, val)
 #define CXL_VSEC_PROTOCOL_MASK   0xe0
 #define CXL_VSEC_PROTOCOL_1024TB 0x80
 #define CXL_VSEC_PROTOCOL_512TB  0x40
@@ -800,234 +798,36 @@ static int setup_cxl_bars(struct pci_dev *dev)
 	return 0;
 }
 
-#ifdef CONFIG_CXL_BIMODAL
-
-struct cxl_switch_work {
-	struct pci_dev *dev;
-	struct work_struct work;
-	int vsec;
-	int mode;
-};
-
-static void switch_card_to_cxl(struct work_struct *work)
+/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */
+static int switch_card_to_cxl(struct pci_dev *dev)
 {
-	struct cxl_switch_work *switch_work =
-		container_of(work, struct cxl_switch_work, work);
-	struct pci_dev *dev = switch_work->dev;
-	struct pci_bus *bus = dev->bus;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pci_dev *bridge;
-	struct pnv_php_slot *php_slot;
-	unsigned int devfn;
+	int vsec;
 	u8 val;
 	int rc;
 
-	dev_info(&bus->dev, "cxl: Preparing for mode switch...\n");
-	bridge = list_first_entry_or_null(&hose->bus->devices, struct pci_dev,
-					  bus_list);
-	if (!bridge) {
-		dev_WARN(&bus->dev, "cxl: Couldn't find root port!\n");
-		goto err_dev_put;
-	}
+	dev_info(&dev->dev, "switch card to CXL\n");
 
-	php_slot = pnv_php_find_slot(pci_device_to_OF_node(bridge));
-	if (!php_slot) {
-		dev_err(&bus->dev, "cxl: Failed to find slot hotplug "
-			           "information. You may need to upgrade "
-			           "skiboot. Aborting.\n");
-		goto err_dev_put;
-	}
-
-	rc = CXL_READ_VSEC_MODE_CONTROL(dev, switch_work->vsec, &val);
-	if (rc) {
-		dev_err(&bus->dev, "cxl: Failed to read CAPI mode control: %i\n", rc);
-		goto err_dev_put;
-	}
-	devfn = dev->devfn;
-
-	/* Release the reference obtained in cxl_check_and_switch_mode() */
-	pci_dev_put(dev);
-
-	dev_dbg(&bus->dev, "cxl: Removing PCI devices from kernel\n");
-	pci_lock_rescan_remove();
-	pci_hp_remove_devices(bridge->subordinate);
-	pci_unlock_rescan_remove();
-
-	/* Switch the CXL protocol on the card */
-	if (switch_work->mode == CXL_BIMODE_CXL) {
-		dev_info(&bus->dev, "cxl: Switching card to CXL mode\n");
-		val &= ~CXL_VSEC_PROTOCOL_MASK;
-		val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE;
-		rc = pnv_cxl_enable_phb_kernel_api(hose, true);
-		if (rc) {
-			dev_err(&bus->dev, "cxl: Failed to enable kernel API"
-				           " on real PHB, aborting\n");
-			goto err_free_work;
-		}
-	} else {
-		dev_WARN(&bus->dev, "cxl: Switching card to PCI mode not supported!\n");
-		goto err_free_work;
-	}
-
-	rc = CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, switch_work->vsec, val);
-	if (rc) {
-		dev_err(&bus->dev, "cxl: Failed to configure CXL protocol: %i\n", rc);
-		goto err_free_work;
-	}
-
-	/*
-	 * The CAIA spec (v1.1, Section 10.6 Bi-modal Device Support) states
-	 * we must wait 100ms after this mode switch before touching PCIe config
-	 * space.
-	 */
-	msleep(100);
-
-	/*
-	 * Hot reset to cause the card to come back in cxl mode. A
-	 * OPAL_RESET_PCI_LINK would be sufficient, but currently lacks support
-	 * in skiboot, so we use a hot reset instead.
-	 *
-	 * We call pci_set_pcie_reset_state() on the bridge, as a CAPI card is
-	 * guaranteed to sit directly under the root port, and setting the reset
-	 * state on a device directly under the root port is equivalent to doing
-	 * it on the root port iself.
-	 */
-	dev_info(&bus->dev, "cxl: Configuration write complete, resetting card\n");
-	pci_set_pcie_reset_state(bridge, pcie_hot_reset);
-	pci_set_pcie_reset_state(bridge, pcie_deassert_reset);
-
-	dev_dbg(&bus->dev, "cxl: Offlining slot\n");
-	rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_OFFLINE);
-	if (rc) {
-		dev_err(&bus->dev, "cxl: OPAL offlining call failed: %i\n", rc);
-		goto err_free_work;
-	}
-
-	dev_dbg(&bus->dev, "cxl: Onlining and probing slot\n");
-	rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_ONLINE);
-	if (rc) {
-		dev_err(&bus->dev, "cxl: OPAL onlining call failed: %i\n", rc);
-		goto err_free_work;
-	}
-
-	pci_lock_rescan_remove();
-	pci_hp_add_devices(bridge->subordinate);
-	pci_unlock_rescan_remove();
-
-	dev_info(&bus->dev, "cxl: CAPI mode switch completed\n");
-	kfree(switch_work);
-	return;
-
-err_dev_put:
-	/* Release the reference obtained in cxl_check_and_switch_mode() */
-	pci_dev_put(dev);
-err_free_work:
-	kfree(switch_work);
-}
-
-int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec)
-{
-	struct cxl_switch_work *work;
-	u8 val;
-	int rc;
-
-	if (!cpu_has_feature(CPU_FTR_HVMODE))
+	if (!(vsec = find_cxl_vsec(dev))) {
+		dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n");
 		return -ENODEV;
-
-	if (!vsec) {
-		vsec = find_cxl_vsec(dev);
-		if (!vsec) {
-			dev_info(&dev->dev, "CXL VSEC not found\n");
-			return -ENODEV;
-		}
 	}
 
-	rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val);
-	if (rc) {
-		dev_err(&dev->dev, "Failed to read current mode control: %i", rc);
+	if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) {
+		dev_err(&dev->dev, "failed to read current mode control: %i", rc);
 		return rc;
 	}
-
-	if (mode == CXL_BIMODE_PCI) {
-		if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) {
-			dev_info(&dev->dev, "Card is already in PCI mode\n");
-			return 0;
-		}
-		/*
-		 * TODO: Before it's safe to switch the card back to PCI mode
-		 * we need to disable the CAPP and make sure any cachelines the
-		 * card holds have been flushed out. Needs skiboot support.
-		 */
-		dev_WARN(&dev->dev, "CXL mode switch to PCI unsupported!\n");
-		return -EIO;
-	}
-
-	if (val & CXL_VSEC_PROTOCOL_ENABLE) {
-		dev_info(&dev->dev, "Card is already in CXL mode\n");
-		return 0;
+	val &= ~CXL_VSEC_PROTOCOL_MASK;
+	val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE;
+	if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) {
+		dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc);
+		return rc;
 	}
-
-	dev_info(&dev->dev, "Card is in PCI mode, scheduling kernel thread "
-			    "to switch to CXL mode\n");
-
-	work = kmalloc(sizeof(struct cxl_switch_work), GFP_KERNEL);
-	if (!work)
-		return -ENOMEM;
-
-	pci_dev_get(dev);
-	work->dev = dev;
-	work->vsec = vsec;
-	work->mode = mode;
-	INIT_WORK(&work->work, switch_card_to_cxl);
-
-	schedule_work(&work->work);
-
 	/*
-	 * We return a failure now to abort the driver init. Once the
-	 * link has been cycled and the card is in cxl mode we will
-	 * come back (possibly using the generic cxl driver), but
-	 * return success as the card should then be in cxl mode.
-	 *
-	 * TODO: What if the card comes back in PCI mode even after
-	 *       the switch?  Don't want to spin endlessly.
+	 * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states
+	 * we must wait 100ms after this mode switch before touching
+	 * PCIe config space.
 	 */
-	return -EBUSY;
-}
-EXPORT_SYMBOL_GPL(cxl_check_and_switch_mode);
-
-#endif /* CONFIG_CXL_BIMODAL */
-
-static int setup_cxl_protocol_area(struct pci_dev *dev)
-{
-	u8 val;
-	int rc;
-	int vsec = find_cxl_vsec(dev);
-
-	if (!vsec) {
-		dev_info(&dev->dev, "CXL VSEC not found\n");
-		return -ENODEV;
-	}
-
-	rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val);
-	if (rc) {
-		dev_err(&dev->dev, "Failed to read current mode control: %i\n", rc);
-		return rc;
-	}
-
-	if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) {
-		dev_err(&dev->dev, "Card not in CAPI mode!\n");
-		return -EIO;
-	}
-
-	if ((val & CXL_VSEC_PROTOCOL_MASK) != CXL_VSEC_PROTOCOL_256TB) {
-		val &= ~CXL_VSEC_PROTOCOL_MASK;
-		val |= CXL_VSEC_PROTOCOL_256TB;
-		rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val);
-		if (rc) {
-			dev_err(&dev->dev, "Failed to set CXL protocol area: %i\n", rc);
-			return rc;
-		}
-	}
+	msleep(100);
 
 	return 0;
 }
@@ -1724,7 +1524,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
 	if ((rc = setup_cxl_bars(dev)))
 		return rc;
 
-	if ((rc = setup_cxl_protocol_area(dev)))
+	if ((rc = switch_card_to_cxl(dev)))
 		return rc;
 
 	if ((rc = cxl_update_image_control(adapter)))
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 6a3711a2e217..74da2e440763 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -39,31 +39,6 @@
 bool cxl_slot_is_supported(struct pci_dev *dev, int flags);
 
 
-#define CXL_BIMODE_CXL 1
-#define CXL_BIMODE_PCI 2
-
-/*
- * Check the mode that the given bi-modal CXL adapter is currently in and
- * change it if necessary. This does not apply to AFU drivers.
- *
- * If the mode matches the requested mode this function will return 0 - if the
- * driver was expecting the generic CXL driver to have bound to the adapter and
- * it gets this return value it should fail the probe function to give the CXL
- * driver a chance to probe it.
- *
- * If the mode does not match it will start a background task to unplug the
- * device from Linux and switch its mode, and will return -EBUSY. At this
- * point the calling driver should make sure it has released the device and
- * fail its probe function.
- *
- * The offset of the CXL VSEC can be provided to this function. If 0 is passed,
- * this function will search for a CXL VSEC with ID 0x1280 and return -ENODEV
- * if it is not found.
- */
-#ifdef CONFIG_CXL_BIMODAL
-int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec);
-#endif
-
 /* Get the AFU associated with a pci_dev */
 struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev);
 
-- 
cgit v1.2.3


From 322dc4af6c95cddc4f9d806197fe6b376cfae350 Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.ibm.com>
Date: Thu, 28 Jun 2018 12:05:07 +0200
Subject: Revert "cxl: Add cxl_slot_is_supported API"

Remove abandonned capi support for the Mellanox CX4.

This reverts commit 4e56f858bdde5cbfb70f61baddfaa56a8ed851bf.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/pci.c | 37 -------------------------------------
 include/misc/cxl.h     | 15 ---------------
 2 files changed, 52 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 193ff22f610b..0ca818396524 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1808,43 +1808,6 @@ int cxl_slot_is_switched(struct pci_dev *dev)
 	return (depth > CXL_MAX_PCIEX_PARENT);
 }
 
-bool cxl_slot_is_supported(struct pci_dev *dev, int flags)
-{
-	if (!cpu_has_feature(CPU_FTR_HVMODE))
-		return false;
-
-	if ((flags & CXL_SLOT_FLAG_DMA) && (!pvr_version_is(PVR_POWER8NVL))) {
-		/*
-		 * CAPP DMA mode is technically supported on regular P8, but
-		 * will EEH if the card attempts to access memory < 4GB, which
-		 * we cannot realistically avoid. We might be able to work
-		 * around the issue, but until then return unsupported:
-		 */
-		return false;
-	}
-
-	if (cxl_slot_is_switched(dev))
-		return false;
-
-	/*
-	 * XXX: This gets a little tricky on regular P8 (not POWER8NVL) since
-	 * the CAPP can be connected to PHB 0, 1 or 2 on a first come first
-	 * served basis, which is racy to check from here. If we need to
-	 * support this in future we might need to consider having this
-	 * function effectively reserve it ahead of time.
-	 *
-	 * Currently, the only user of this API is the Mellanox CX4, which is
-	 * only supported on P8NVL due to the above mentioned limitation of
-	 * CAPP DMA mode and therefore does not need to worry about this. If the
-	 * issue with CAPP DMA mode is later worked around on P8 we might need
-	 * to revisit this.
-	 */
-
-	return true;
-}
-EXPORT_SYMBOL_GPL(cxl_slot_is_supported);
-
-
 static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct cxl *adapter;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 74da2e440763..ea9ff4a1a9ca 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -24,21 +24,6 @@
  * generic PCI API. This API is agnostic to the actual AFU.
  */
 
-#define CXL_SLOT_FLAG_DMA 0x1
-
-/*
- * Checks if the given card is in a cxl capable slot. Pass CXL_SLOT_FLAG_DMA if
- * the card requires CAPP DMA mode to also check if the system supports it.
- * This is intended to be used by bi-modal devices to determine if they can use
- * cxl mode or if they should continue running in PCI mode.
- *
- * Note that this only checks if the slot is cxl capable - it does not
- * currently check if the CAPP is currently available for chips where it can be
- * assigned to different PHBs on a first come first serve basis (i.e. P8)
- */
-bool cxl_slot_is_supported(struct pci_dev *dev, int flags);
-
-
 /* Get the AFU associated with a pci_dev */
 struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev);
 
-- 
cgit v1.2.3


From f18a4e1d973bc69a50419eb8918f458ea89c6c3f Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.ibm.com>
Date: Thu, 28 Jun 2018 12:05:08 +0200
Subject: Revert "cxl: Allow a default context to be associated with an
 external pci_dev"

Remove abandonned capi support for the Mellanox CX4.

This reverts commit a19bd79e31769626d288cc016e21a31b6f47bf6f.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 drivers/misc/cxl/Makefile |  2 +-
 drivers/misc/cxl/base.c   | 35 -----------------------------------
 drivers/misc/cxl/cxl.h    |  6 ------
 drivers/misc/cxl/main.c   |  2 --
 drivers/misc/cxl/phb.c    | 44 --------------------------------------------
 drivers/misc/cxl/vphb.c   | 30 +++++++++++++++++++++++++++---
 include/misc/cxl-base.h   |  6 ------
 7 files changed, 28 insertions(+), 97 deletions(-)
 delete mode 100644 drivers/misc/cxl/phb.c

(limited to 'include')

diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
index 502d41fc9ea5..5eea61b9584f 100644
--- a/drivers/misc/cxl/Makefile
+++ b/drivers/misc/cxl/Makefile
@@ -4,7 +4,7 @@ ccflags-$(CONFIG_PPC_WERROR)	+= -Werror
 
 cxl-y				+= main.o file.o irq.o fault.o native.o
 cxl-y				+= context.o sysfs.o pci.o trace.o
-cxl-y				+= vphb.o phb.o api.o cxllib.o
+cxl-y				+= vphb.o api.o cxllib.o
 cxl-$(CONFIG_PPC_PSERIES)	+= flash.o guest.o of.o hcalls.o
 cxl-$(CONFIG_DEBUG_FS)		+= debugfs.o
 obj-$(CONFIG_CXL)		+= cxl.o
diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c
index e1e80cb99ad9..7557835cdfcd 100644
--- a/drivers/misc/cxl/base.c
+++ b/drivers/misc/cxl/base.c
@@ -106,41 +106,6 @@ int cxl_update_properties(struct device_node *dn,
 }
 EXPORT_SYMBOL_GPL(cxl_update_properties);
 
-/*
- * API calls into the driver that may be called from the PHB code and must be
- * built in.
- */
-bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu)
-{
-	bool ret;
-	struct cxl_calls *calls;
-
-	calls = cxl_calls_get();
-	if (!calls)
-		return false;
-
-	ret = calls->cxl_pci_associate_default_context(dev, afu);
-
-	cxl_calls_put(calls);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(cxl_pci_associate_default_context);
-
-void cxl_pci_disable_device(struct pci_dev *dev)
-{
-	struct cxl_calls *calls;
-
-	calls = cxl_calls_get();
-	if (!calls)
-		return;
-
-	calls->cxl_pci_disable_device(dev);
-
-	cxl_calls_put(calls);
-}
-EXPORT_SYMBOL_GPL(cxl_pci_disable_device);
-
 static int __init cxl_base_init(void)
 {
 	struct device_node *np;
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index d95c2c98f2ab..aa453448201d 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -867,15 +867,9 @@ static inline bool cxl_is_power9_dd1(void)
 ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf,
 				loff_t off, size_t count);
 
-/* Internal functions wrapped in cxl_base to allow PHB to call them */
-bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu);
-void _cxl_pci_disable_device(struct pci_dev *dev);
 
 struct cxl_calls {
 	void (*cxl_slbia)(struct mm_struct *mm);
-	bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu);
-	void (*cxl_pci_disable_device)(struct pci_dev *dev);
-
 	struct module *owner;
 };
 int register_cxl_calls(struct cxl_calls *calls);
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index a7e83624034b..334223b802ee 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -104,8 +104,6 @@ static inline void cxl_slbia_core(struct mm_struct *mm)
 
 static struct cxl_calls cxl_calls = {
 	.cxl_slbia = cxl_slbia_core,
-	.cxl_pci_associate_default_context = _cxl_pci_associate_default_context,
-	.cxl_pci_disable_device = _cxl_pci_disable_device,
 	.owner = THIS_MODULE,
 };
 
diff --git a/drivers/misc/cxl/phb.c b/drivers/misc/cxl/phb.c
deleted file mode 100644
index 6ec69ada19f4..000000000000
--- a/drivers/misc/cxl/phb.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2014-2016 IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/pci.h>
-#include "cxl.h"
-
-bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu)
-{
-	struct cxl_context *ctx;
-
-	/*
-	 * Allocate a context to do cxl things to. This is used for interrupts
-	 * in the peer model using a real phb, and if we eventually do DMA ops
-	 * in the virtual phb, we'll need a default context to attach them to.
-	 */
-	ctx = cxl_dev_context_init(dev);
-	if (IS_ERR(ctx))
-		return false;
-	dev->dev.archdata.cxl_ctx = ctx;
-
-	return (cxl_ops->afu_check_and_enable(afu) == 0);
-}
-/* exported via cxl_base */
-
-void _cxl_pci_disable_device(struct pci_dev *dev)
-{
-	struct cxl_context *ctx = cxl_get_context(dev);
-
-	if (ctx) {
-		if (ctx->status == STARTED) {
-			dev_err(&dev->dev, "Default context started\n");
-			return;
-		}
-		dev->dev.archdata.cxl_ctx = NULL;
-		cxl_release_context(ctx);
-	}
-}
-/* exported via cxl_base */
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index 1a99c9c7a6fb..7908633d9204 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -44,6 +44,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
 {
 	struct pci_controller *phb;
 	struct cxl_afu *afu;
+	struct cxl_context *ctx;
 
 	phb = pci_bus_to_host(dev->bus);
 	afu = (struct cxl_afu *)phb->private_data;
@@ -56,7 +57,30 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev)
 	set_dma_ops(&dev->dev, &dma_nommu_ops);
 	set_dma_offset(&dev->dev, PAGE_OFFSET);
 
-	return _cxl_pci_associate_default_context(dev, afu);
+	/*
+	 * Allocate a context to do cxl things too.  If we eventually do real
+	 * DMA ops, we'll need a default context to attach them to
+	 */
+	ctx = cxl_dev_context_init(dev);
+	if (IS_ERR(ctx))
+		return false;
+	dev->dev.archdata.cxl_ctx = ctx;
+
+	return (cxl_ops->afu_check_and_enable(afu) == 0);
+}
+
+static void cxl_pci_disable_device(struct pci_dev *dev)
+{
+	struct cxl_context *ctx = cxl_get_context(dev);
+
+	if (ctx) {
+		if (ctx->status == STARTED) {
+			dev_err(&dev->dev, "Default context started\n");
+			return;
+		}
+		dev->dev.archdata.cxl_ctx = NULL;
+		cxl_release_context(ctx);
+	}
 }
 
 static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus,
@@ -190,8 +214,8 @@ static struct pci_controller_ops cxl_pci_controller_ops =
 {
 	.probe_mode = cxl_pci_probe_mode,
 	.enable_device_hook = cxl_pci_enable_device_hook,
-	.disable_device = _cxl_pci_disable_device,
-	.release_device = _cxl_pci_disable_device,
+	.disable_device = cxl_pci_disable_device,
+	.release_device = cxl_pci_disable_device,
 	.window_alignment = cxl_pci_window_alignment,
 	.reset_secondary_bus = cxl_pci_reset_secondary_bus,
 	.setup_msi_irqs = cxl_setup_msi_irqs,
diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h
index bb7e629ae492..f53808fa638a 100644
--- a/include/misc/cxl-base.h
+++ b/include/misc/cxl-base.h
@@ -10,8 +10,6 @@
 #ifndef _MISC_CXL_BASE_H
 #define _MISC_CXL_BASE_H
 
-#include <misc/cxl.h>
-
 #ifdef CONFIG_CXL_BASE
 
 #define CXL_IRQ_RANGES 4
@@ -41,8 +39,6 @@ static inline void cxl_ctx_put(void)
 struct cxl_afu *cxl_afu_get(struct cxl_afu *afu);
 void cxl_afu_put(struct cxl_afu *afu);
 void cxl_slbia(struct mm_struct *mm);
-bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu);
-void cxl_pci_disable_device(struct pci_dev *dev);
 
 #else /* CONFIG_CXL_BASE */
 
@@ -50,8 +46,6 @@ static inline bool cxl_ctx_in_use(void) { return false; }
 static inline struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) { return NULL; }
 static inline void cxl_afu_put(struct cxl_afu *afu) {}
 static inline void cxl_slbia(struct mm_struct *mm) {}
-static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; }
-static inline void cxl_pci_disable_device(struct pci_dev *dev) {}
 
 #endif /* CONFIG_CXL_BASE */
 
-- 
cgit v1.2.3


From a7ca13826e478f9b201eb2f9f20de0b978a82ad9 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 29 Jun 2018 14:11:19 +1000
Subject: gpio: aspeed: Add interfaces for co-processor to grab GPIOs

On the Aspeed chip, the GPIOs can be under control of the ARM
chip or of the ColdFire coprocessor. (There's a third command
source, the LPC bus, which we don't use or support yet).

The control of which master is allowed to modify a given
GPIO is per-bank (8 GPIOs).

Unfortunately, systems already exist for which we want to
use GPIOs of both sources in the same bank.

This provides an API exported by the gpio-aspeed driver
that an aspeed coprocessor driver can use to "grab" some
GPIOs for use by the coprocessor, and allow the coprocessor
driver to provide callbacks for arbitrating access.

Once at least one GPIO of a given bank has been "grabbed"
by the coprocessor, the entire bank is marked as being
under coprocessor control. It's command source is switched
to the coprocessor.

If the ARM then tries to write to a GPIO in such a marked bank,
the provided callbacks are used to request access from the
coprocessor driver, which is responsible to doing whatever
is necessary to "pause" the coprocessor or prevent it from
trying to use the GPIOs while the ARM is doing its accesses.

During that time, the command source for the bank is temporarily
switched back to the ARM.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-aspeed.c  | 251 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/gpio/aspeed.h |  15 +++
 2 files changed, 246 insertions(+), 20 deletions(-)
 create mode 100644 include/linux/gpio/aspeed.h

(limited to 'include')

diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index b3968f66b1d2..1e00f4045f9d 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -12,6 +12,7 @@
 #include <asm/div64.h>
 #include <linux/clk.h>
 #include <linux/gpio/driver.h>
+#include <linux/gpio/aspeed.h>
 #include <linux/hashtable.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -22,6 +23,15 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
+/*
+ * These two headers aren't meant to be used by GPIO drivers. We need
+ * them in order to access gpio_chip_hwgpio() which we need to implement
+ * the aspeed specific API which allows the coprocessor to request
+ * access to some GPIOs and to arbitrate between coprocessor and ARM.
+ */
+#include <linux/gpio/consumer.h>
+#include "gpiolib.h"
+
 struct aspeed_bank_props {
 	unsigned int bank;
 	u32 input;
@@ -56,6 +66,7 @@ struct aspeed_gpio {
 	struct clk *clk;
 
 	u32 *dcache;
+	u8 *cf_copro_bankmap;
 };
 
 struct aspeed_gpio_bank {
@@ -83,6 +94,9 @@ struct aspeed_gpio_bank {
 
 static const int debounce_timers[4] = { 0x00, 0x50, 0x54, 0x58 };
 
+static const struct aspeed_gpio_copro_ops *copro_ops;
+static void *copro_data;
+
 static const struct aspeed_gpio_bank aspeed_gpio_banks[] = {
 	{
 		.val_regs = 0x0000,
@@ -323,6 +337,50 @@ static void aspeed_gpio_change_cmd_source(struct aspeed_gpio *gpio,
 	iowrite32(reg, c0);
 }
 
+static bool aspeed_gpio_copro_request(struct aspeed_gpio *gpio,
+				      unsigned int offset)
+{
+	const struct aspeed_gpio_bank *bank = to_bank(offset);
+
+	if (!copro_ops || !gpio->cf_copro_bankmap)
+		return false;
+	if (!gpio->cf_copro_bankmap[offset >> 3])
+		return false;
+	if (!copro_ops->request_access)
+		return false;
+
+	/* Pause the coprocessor */
+	copro_ops->request_access(copro_data);
+
+	/* Change command source back to ARM */
+	aspeed_gpio_change_cmd_source(gpio, bank, offset >> 3, GPIO_CMDSRC_ARM);
+
+	/* Update cache */
+	gpio->dcache[GPIO_BANK(offset)] = ioread32(bank_reg(gpio, bank, reg_rdata));
+
+	return true;
+}
+
+static void aspeed_gpio_copro_release(struct aspeed_gpio *gpio,
+				      unsigned int offset)
+{
+	const struct aspeed_gpio_bank *bank = to_bank(offset);
+
+	if (!copro_ops || !gpio->cf_copro_bankmap)
+		return;
+	if (!gpio->cf_copro_bankmap[offset >> 3])
+		return;
+	if (!copro_ops->release_access)
+		return;
+
+	/* Change command source back to ColdFire */
+	aspeed_gpio_change_cmd_source(gpio, bank, offset >> 3,
+				      GPIO_CMDSRC_COLDFIRE);
+
+	/* Restart the coprocessor */
+	copro_ops->release_access(copro_data);
+}
+
 static int aspeed_gpio_get(struct gpio_chip *gc, unsigned int offset)
 {
 	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
@@ -356,11 +414,15 @@ static void aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset,
 {
 	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
 	unsigned long flags;
+	bool copro;
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
 
 	__aspeed_gpio_set(gc, offset, val);
 
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 }
 
@@ -368,7 +430,9 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset)
 {
 	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
 	const struct aspeed_gpio_bank *bank = to_bank(offset);
+	void __iomem *addr = bank_reg(gpio, bank, reg_dir);
 	unsigned long flags;
+	bool copro;
 	u32 reg;
 
 	if (!have_input(gpio, offset))
@@ -376,8 +440,13 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset)
 
 	spin_lock_irqsave(&gpio->lock, flags);
 
-	reg = ioread32(bank_reg(gpio, bank, reg_dir));
-	iowrite32(reg & ~GPIO_BIT(offset), bank_reg(gpio, bank, reg_dir));
+	reg = ioread32(addr);
+	reg &= ~GPIO_BIT(offset);
+
+	copro = aspeed_gpio_copro_request(gpio, offset);
+	iowrite32(reg, addr);
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
@@ -389,7 +458,9 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc,
 {
 	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
 	const struct aspeed_gpio_bank *bank = to_bank(offset);
+	void __iomem *addr = bank_reg(gpio, bank, reg_dir);
 	unsigned long flags;
+	bool copro;
 	u32 reg;
 
 	if (!have_output(gpio, offset))
@@ -397,10 +468,15 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc,
 
 	spin_lock_irqsave(&gpio->lock, flags);
 
+	reg = ioread32(addr);
+	reg |= GPIO_BIT(offset);
+
+	copro = aspeed_gpio_copro_request(gpio, offset);
 	__aspeed_gpio_set(gc, offset, val);
-	reg = ioread32(bank_reg(gpio, bank, reg_dir));
-	iowrite32(reg | GPIO_BIT(offset), bank_reg(gpio, bank, reg_dir));
+	iowrite32(reg, addr);
 
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
 	return 0;
@@ -430,24 +506,23 @@ static int aspeed_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
 }
 
 static inline int irqd_to_aspeed_gpio_data(struct irq_data *d,
-		struct aspeed_gpio **gpio,
-		const struct aspeed_gpio_bank **bank,
-		u32 *bit)
+					   struct aspeed_gpio **gpio,
+					   const struct aspeed_gpio_bank **bank,
+					   u32 *bit, int *offset)
 {
-	int offset;
 	struct aspeed_gpio *internal;
 
-	offset = irqd_to_hwirq(d);
+	*offset = irqd_to_hwirq(d);
 
 	internal = irq_data_get_irq_chip_data(d);
 
 	/* This might be a bit of a questionable place to check */
-	if (!have_irq(internal, offset))
+	if (!have_irq(internal, *offset))
 		return -ENOTSUPP;
 
 	*gpio = internal;
-	*bank = to_bank(offset);
-	*bit = GPIO_BIT(offset);
+	*bank = to_bank(*offset);
+	*bit = GPIO_BIT(*offset);
 
 	return 0;
 }
@@ -458,17 +533,23 @@ static void aspeed_gpio_irq_ack(struct irq_data *d)
 	struct aspeed_gpio *gpio;
 	unsigned long flags;
 	void __iomem *status_addr;
+	int rc, offset;
+	bool copro;
 	u32 bit;
-	int rc;
 
-	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit);
+	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit, &offset);
 	if (rc)
 		return;
 
 	status_addr = bank_reg(gpio, bank, reg_irq_status);
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
+
 	iowrite32(bit, status_addr);
+
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 }
 
@@ -479,15 +560,17 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
 	unsigned long flags;
 	u32 reg, bit;
 	void __iomem *addr;
-	int rc;
+	int rc, offset;
+	bool copro;
 
-	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit);
+	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit, &offset);
 	if (rc)
 		return;
 
 	addr = bank_reg(gpio, bank, reg_irq_enable);
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
 
 	reg = ioread32(addr);
 	if (set)
@@ -496,6 +579,8 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set)
 		reg &= ~bit;
 	iowrite32(reg, addr);
 
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 }
 
@@ -520,9 +605,10 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type)
 	struct aspeed_gpio *gpio;
 	unsigned long flags;
 	void __iomem *addr;
-	int rc;
+	int rc, offset;
+	bool copro;
 
-	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit);
+	rc = irqd_to_aspeed_gpio_data(d, &gpio, &bank, &bit, &offset);
 	if (rc)
 		return -EINVAL;
 
@@ -548,6 +634,7 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type)
 	}
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
 
 	addr = bank_reg(gpio, bank, reg_irq_type0);
 	reg = ioread32(addr);
@@ -564,6 +651,8 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type)
 	reg = (reg & ~bit) | type2;
 	iowrite32(reg, addr);
 
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
 	irq_set_handler_locked(d, handler);
@@ -658,11 +747,14 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip,
 	struct aspeed_gpio *gpio = gpiochip_get_data(chip);
 	unsigned long flags;
 	void __iomem *treg;
+	bool copro;
 	u32 val;
 
 	treg = bank_reg(gpio, to_bank(offset), reg_tolerance);
 
 	spin_lock_irqsave(&gpio->lock, flags);
+	copro = aspeed_gpio_copro_request(gpio, offset);
+
 	val = readl(treg);
 
 	if (enable)
@@ -671,6 +763,9 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip,
 		val &= ~GPIO_BIT(offset);
 
 	writel(val, treg);
+
+	if (copro)
+		aspeed_gpio_copro_release(gpio, offset);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
 	return 0;
@@ -766,6 +861,9 @@ static void configure_timer(struct aspeed_gpio *gpio, unsigned int offset,
 	void __iomem *addr;
 	u32 val;
 
+	/* Note: Debounce timer isn't under control of the command
+	 * source registers, so no need to sync with the coprocessor
+	 */
 	addr = bank_reg(gpio, bank, reg_debounce_sel1);
 	val = ioread32(addr);
 	iowrite32((val & ~mask) | GPIO_SET_DEBOUNCE1(timer, offset), addr);
@@ -912,6 +1010,111 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
 	return -ENOTSUPP;
 }
 
+/**
+ * aspeed_gpio_copro_set_ops - Sets the callbacks used for handhsaking with
+ *                             the coprocessor for shared GPIO banks
+ * @ops: The callbacks
+ * @data: Pointer passed back to the callbacks
+ */
+int aspeed_gpio_copro_set_ops(const struct aspeed_gpio_copro_ops *ops, void *data)
+{
+	copro_data = data;
+	copro_ops = ops;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(aspeed_gpio_copro_set_ops);
+
+/**
+ * aspeed_gpio_copro_grab_gpio - Mark a GPIO used by the coprocessor. The entire
+ *                               bank gets marked and any access from the ARM will
+ *                               result in handshaking via callbacks.
+ * @desc: The GPIO to be marked
+ * @vreg_offset: If non-NULL, returns the value register offset in the GPIO space
+ * @dreg_offset: If non-NULL, returns the data latch register offset in the GPIO space
+ * @bit: If non-NULL, returns the bit number of the GPIO in the registers
+ */
+int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc,
+				u16 *vreg_offset, u16 *dreg_offset, u8 *bit)
+{
+	struct gpio_chip *chip = gpiod_to_chip(desc);
+	struct aspeed_gpio *gpio = gpiochip_get_data(chip);
+	int rc = 0, bindex, offset = gpio_chip_hwgpio(desc);
+	const struct aspeed_gpio_bank *bank = to_bank(offset);
+	unsigned long flags;
+
+	if (!gpio->cf_copro_bankmap)
+		gpio->cf_copro_bankmap = kzalloc(gpio->config->nr_gpios >> 3, GFP_KERNEL);
+	if (!gpio->cf_copro_bankmap)
+		return -ENOMEM;
+	if (offset < 0 || offset > gpio->config->nr_gpios)
+		return -EINVAL;
+	bindex = offset >> 3;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	/* Sanity check, this shouldn't happen */
+	if (gpio->cf_copro_bankmap[bindex] == 0xff) {
+		rc = -EIO;
+		goto bail;
+	}
+	gpio->cf_copro_bankmap[bindex]++;
+
+	/* Switch command source */
+	if (gpio->cf_copro_bankmap[bindex] == 1)
+		aspeed_gpio_change_cmd_source(gpio, bank, bindex,
+					      GPIO_CMDSRC_COLDFIRE);
+
+	if (vreg_offset)
+		*vreg_offset = bank->val_regs;
+	if (dreg_offset)
+		*dreg_offset = bank->rdata_reg;
+	if (bit)
+		*bit = GPIO_OFFSET(offset);
+ bail:
+	spin_unlock_irqrestore(&gpio->lock, flags);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(aspeed_gpio_copro_grab_gpio);
+
+/**
+ * aspeed_gpio_copro_release_gpio - Unmark a GPIO used by the coprocessor.
+ * @desc: The GPIO to be marked
+ */
+int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc)
+{
+	struct gpio_chip *chip = gpiod_to_chip(desc);
+	struct aspeed_gpio *gpio = gpiochip_get_data(chip);
+	int rc = 0, bindex, offset = gpio_chip_hwgpio(desc);
+	const struct aspeed_gpio_bank *bank = to_bank(offset);
+	unsigned long flags;
+
+	if (!gpio->cf_copro_bankmap)
+		return -ENXIO;
+
+	if (offset < 0 || offset > gpio->config->nr_gpios)
+		return -EINVAL;
+	bindex = offset >> 3;
+
+	spin_lock_irqsave(&gpio->lock, flags);
+
+	/* Sanity check, this shouldn't happen */
+	if (gpio->cf_copro_bankmap[bindex] == 0) {
+		rc = -EIO;
+		goto bail;
+	}
+	gpio->cf_copro_bankmap[bindex]--;
+
+	/* Switch command source */
+	if (gpio->cf_copro_bankmap[bindex] == 0)
+		aspeed_gpio_change_cmd_source(gpio, bank, bindex,
+					      GPIO_CMDSRC_ARM);
+ bail:
+	spin_unlock_irqrestore(&gpio->lock, flags);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(aspeed_gpio_copro_release_gpio);
+
 /*
  * Any banks not specified in a struct aspeed_bank_props array are assumed to
  * have the properties:
@@ -1002,10 +1205,18 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev)
 	if (!gpio->dcache)
 		return -ENOMEM;
 
-	/* Populate it with initial values read from the HW */
+	/*
+	 * Populate it with initial values read from the HW and switch
+	 * all command sources to the ARM by default
+	 */
 	for (i = 0; i < banks; i++) {
-		void __iomem *addr = bank_reg(gpio, &aspeed_gpio_banks[i], reg_rdata);
+		const struct aspeed_gpio_bank *bank = &aspeed_gpio_banks[i];
+		void __iomem *addr = bank_reg(gpio, bank, reg_rdata);
 		gpio->dcache[i] = ioread32(addr);
+		aspeed_gpio_change_cmd_source(gpio, bank, 0, GPIO_CMDSRC_ARM);
+		aspeed_gpio_change_cmd_source(gpio, bank, 1, GPIO_CMDSRC_ARM);
+		aspeed_gpio_change_cmd_source(gpio, bank, 2, GPIO_CMDSRC_ARM);
+		aspeed_gpio_change_cmd_source(gpio, bank, 3, GPIO_CMDSRC_ARM);
 	}
 
 	rc = devm_gpiochip_add_data(&pdev->dev, &gpio->chip, gpio);
diff --git a/include/linux/gpio/aspeed.h b/include/linux/gpio/aspeed.h
new file mode 100644
index 000000000000..1bfb3cdc86d0
--- /dev/null
+++ b/include/linux/gpio/aspeed.h
@@ -0,0 +1,15 @@
+#ifndef __GPIO_ASPEED_H
+#define __GPIO_ASPEED_H
+
+struct aspeed_gpio_copro_ops {
+	int (*request_access)(void *data);
+	int (*release_access)(void *data);
+};
+
+int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc,
+				u16 *vreg_offset, u16 *dreg_offset, u8 *bit);
+int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc);
+int aspeed_gpio_copro_set_ops(const struct aspeed_gpio_copro_ops *ops, void *data);
+
+
+#endif /* __GPIO_ASPEED_H */
-- 
cgit v1.2.3


From 240630e61870e62e39a97225048f9945848fa5f5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 1 Jul 2018 12:15:46 +0200
Subject: ahci: Disable LPM on Lenovo 50 series laptops with a too old BIOS

There have been several reports of LPM related hard freezes about once
a day on multiple Lenovo 50 series models. Strange enough these reports
where not disk model specific as LPM issues usually are and some users
with the exact same disk + laptop where seeing them while other users
where not seeing these issues.

It turns out that enabling LPM triggers a firmware bug somewhere, which
has been fixed in later BIOS versions.

This commit adds a new ahci_broken_lpm() function and a new ATA_FLAG_NO_LPM
for dealing with this.

The ahci_broken_lpm() function contains DMI match info for the 4 models
which are known to be affected by this and the DMI BIOS date field for
known good BIOS versions. If the BIOS date is older then the one in the
table LPM will be disabled and a warning will be printed.

Note the BIOS dates are for known good versions, some older versions may
work too, but we don't know for sure, the table is using dates from BIOS
versions for which users have confirmed that upgrading to that version
makes the problem go away.

Unfortunately I've been unable to get hold of the reporter who reported
that BIOS version 2.35 fixed the problems on the W541 for him. I've been
able to verify the DMI_SYS_VENDOR and DMI_PRODUCT_VERSION from an older
dmidecode, but I don't know the exact BIOS date as reported in the DMI.
Lenovo keeps a changelog with dates in their release notes, but the
dates there are the release dates not the build dates which are in DMI.
So I've chosen to set the date to which we compare to one day past the
release date of the 2.34 BIOS. I plan to fix this with a follow up
commit once I've the necessary info.

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci.c        | 59 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata-core.c |  3 +++
 include/linux/libata.h    |  1 +
 3 files changed, 63 insertions(+)

(limited to 'include')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 738fb22978dd..fdeb3b4d0f4a 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1280,6 +1280,59 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
 	return strcmp(buf, dmi->driver_data) < 0;
 }
 
+static bool ahci_broken_lpm(struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		/* Various Lenovo 50 series have LPM issues with older BIOSen */
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X250"),
+			},
+			.driver_data = "20180406", /* 1.31 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L450"),
+			},
+			.driver_data = "20180420", /* 1.28 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T450s"),
+			},
+			.driver_data = "20180315", /* 1.33 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"),
+			},
+			/*
+			 * Note date based on release notes, 2.35 has been
+			 * reported to be good, but I've been unable to get
+			 * a hold of the reporter to get the DMI BIOS date.
+			 * TODO: fix this.
+			 */
+			.driver_data = "20180310", /* 2.35 */
+		},
+		{ }	/* terminate list */
+	};
+	const struct dmi_system_id *dmi = dmi_first_match(sysids);
+	int year, month, date;
+	char buf[9];
+
+	if (!dmi)
+		return false;
+
+	dmi_get_date(DMI_BIOS_DATE, &year, &month, &date);
+	snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date);
+
+	return strcmp(buf, dmi->driver_data) < 0;
+}
+
 static bool ahci_broken_online(struct pci_dev *pdev)
 {
 #define ENCODE_BUSDEVFN(bus, slot, func)			\
@@ -1694,6 +1747,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			"quirky BIOS, skipping spindown on poweroff\n");
 	}
 
+	if (ahci_broken_lpm(pdev)) {
+		pi.flags |= ATA_FLAG_NO_LPM;
+		dev_warn(&pdev->dev,
+			 "BIOS update required for Link Power Management support\n");
+	}
+
 	if (ahci_broken_suspend(pdev)) {
 		hpriv->flags |= AHCI_HFLAG_NO_SUSPEND;
 		dev_warn(&pdev->dev,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 27d15ed7fa3d..cc71c63df381 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2493,6 +2493,9 @@ int ata_dev_configure(struct ata_device *dev)
 	    (id[ATA_ID_SATA_CAPABILITY] & 0xe) == 0x2)
 		dev->horkage |= ATA_HORKAGE_NOLPM;
 
+	if (ap->flags & ATA_FLAG_NO_LPM)
+		dev->horkage |= ATA_HORKAGE_NOLPM;
+
 	if (dev->horkage & ATA_HORKAGE_NOLPM) {
 		ata_dev_warn(dev, "LPM support broken, forcing max_power\n");
 		dev->link->ap->target_lpm_policy = ATA_LPM_MAX_POWER;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index a2257e380789..32f247cb5e9e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -210,6 +210,7 @@ enum {
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
 					    /* (doesn't imply presence) */
 	ATA_FLAG_SATA		= (1 << 1),
+	ATA_FLAG_NO_LPM		= (1 << 2), /* host not happy with LPM */
 	ATA_FLAG_NO_LOG_PAGE	= (1 << 5), /* do not issue log page read */
 	ATA_FLAG_NO_ATAPI	= (1 << 6), /* No ATAPI support */
 	ATA_FLAG_PIO_DMA	= (1 << 7), /* PIO cmds via DMA */
-- 
cgit v1.2.3


From 93dd2112c7b2fa5512cc4aff2c449420487fcb68 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:48 +0300
Subject: usb: typec: mux: Get the mux identifier from function parameter

In order for the muxes to be usable with alternate modes,
the alternate mode devices will need also to be able to get
a handle to the muxes on top of the port devices. To make
that possible, the muxes need to be possible to request with
an identifier.

This will change the API so that the mux identifier is given
as a function parameter to typec_mux_get(), and the hard-coded
"typec-mux" is replaced with that value.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c     | 2 +-
 drivers/usb/typec/mux.c       | 6 +++---
 include/linux/usb/typec_mux.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 3ef7b99b080f..784e928303d7 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1407,7 +1407,7 @@ struct typec_port *typec_register_port(struct device *parent,
 		goto err_switch;
 	}
 
-	port->mux = typec_mux_get(cap->fwnode ? &port->dev : parent);
+	port->mux = typec_mux_get(parent, "typec-mux");
 	if (IS_ERR(port->mux)) {
 		ret = PTR_ERR(port->mux);
 		goto err_mux;
diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index 9d8330e9c431..ddaac63ecf12 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -123,19 +123,19 @@ static void *typec_mux_match(struct device_connection *con, int ep, void *data)
 /**
  * typec_mux_get - Find USB Type-C Multiplexer
  * @dev: The caller device
+ * @name: Mux identifier
  *
  * Finds a mux linked to the caller. This function is primarily meant for the
  * Type-C drivers. Returns a reference to the mux on success, NULL if no
  * matching connection was found, or ERR_PTR(-EPROBE_DEFER) when a connection
  * was found but the mux has not been enumerated yet.
  */
-struct typec_mux *typec_mux_get(struct device *dev)
+struct typec_mux *typec_mux_get(struct device *dev, const char *name)
 {
 	struct typec_mux *mux;
 
 	mutex_lock(&mux_lock);
-	mux = device_connection_find_match(dev, "typec-mux", NULL,
-					   typec_mux_match);
+	mux = device_connection_find_match(dev, name, NULL, typec_mux_match);
 	if (!IS_ERR_OR_NULL(mux))
 		get_device(mux->dev);
 	mutex_unlock(&mux_lock);
diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h
index 12c1b057834b..79293f630ee1 100644
--- a/include/linux/usb/typec_mux.h
+++ b/include/linux/usb/typec_mux.h
@@ -47,7 +47,7 @@ void typec_switch_put(struct typec_switch *sw);
 int typec_switch_register(struct typec_switch *sw);
 void typec_switch_unregister(struct typec_switch *sw);
 
-struct typec_mux *typec_mux_get(struct device *dev);
+struct typec_mux *typec_mux_get(struct device *dev, const char *name);
 void typec_mux_put(struct typec_mux *mux);
 int typec_mux_register(struct typec_mux *mux);
 void typec_mux_unregister(struct typec_mux *mux);
-- 
cgit v1.2.3


From 4ab8c18d4d67321cc7b660559de17511d4fc0237 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:49 +0300
Subject: usb: typec: Register a device for every mode

Before a device was created for every discovered SVID, but
this will create a device for every discovered mode of every
SVID. The idea is to make it easier to create mode specific
drivers once a bus for the alternate mode is added.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c | 172 +++++++++++++++-------------------------------
 drivers/usb/typec/tcpm.c  |  45 ++++++------
 include/linux/usb/typec.h |  37 +++-------
 3 files changed, 83 insertions(+), 171 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 784e928303d7..96dc9c4f73f0 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -13,31 +13,20 @@
 #include <linux/usb/typec.h>
 #include <linux/usb/typec_mux.h>
 
-struct typec_mode {
-	int				index;
+struct typec_altmode {
+	struct device			dev;
+	u16				svid;
+	u8				mode;
+
 	u32				vdo;
 	char				*desc;
 	enum typec_port_type		roles;
-
-	struct typec_altmode		*alt_mode;
-
 	unsigned int			active:1;
 
+	struct attribute		*attrs[5];
 	char				group_name[6];
 	struct attribute_group		group;
-	struct attribute		*attrs[5];
-	struct device_attribute		vdo_attr;
-	struct device_attribute		desc_attr;
-	struct device_attribute		active_attr;
-	struct device_attribute		roles_attr;
-};
-
-struct typec_altmode {
-	struct device			dev;
-	u16				svid;
-	int				n_modes;
-	struct typec_mode		modes[ALTMODE_MAX_MODES];
-	const struct attribute_group	*mode_groups[ALTMODE_MAX_MODES];
+	const struct attribute_group	*groups[2];
 };
 
 struct typec_plug {
@@ -177,23 +166,20 @@ static void typec_report_identity(struct device *dev)
 /**
  * typec_altmode_update_active - Report Enter/Exit mode
  * @alt: Handle to the alternate mode
- * @mode: Mode index
  * @active: True when the mode has been entered
  *
  * If a partner or cable plug executes Enter/Exit Mode command successfully, the
  * drivers use this routine to report the updated state of the mode.
  */
-void typec_altmode_update_active(struct typec_altmode *alt, int mode,
-				 bool active)
+void typec_altmode_update_active(struct typec_altmode *alt, bool active)
 {
-	struct typec_mode *m = &alt->modes[mode];
 	char dir[6];
 
-	if (m->active == active)
+	if (alt->active == active)
 		return;
 
-	m->active = active;
-	snprintf(dir, sizeof(dir), "mode%d", mode);
+	alt->active = active;
+	snprintf(dir, sizeof(dir), "mode%d", alt->mode);
 	sysfs_notify(&alt->dev.kobj, dir, "active");
 	kobject_uevent(&alt->dev.kobj, KOBJ_CHANGE);
 }
@@ -220,42 +206,36 @@ struct typec_port *typec_altmode2port(struct typec_altmode *alt)
 EXPORT_SYMBOL_GPL(typec_altmode2port);
 
 static ssize_t
-typec_altmode_vdo_show(struct device *dev, struct device_attribute *attr,
-		       char *buf)
+vdo_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       vdo_attr);
+	struct typec_altmode *alt = to_altmode(dev);
 
-	return sprintf(buf, "0x%08x\n", mode->vdo);
+	return sprintf(buf, "0x%08x\n", alt->vdo);
 }
+static DEVICE_ATTR_RO(vdo);
 
 static ssize_t
-typec_altmode_desc_show(struct device *dev, struct device_attribute *attr,
-			char *buf)
+description_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       desc_attr);
+	struct typec_altmode *alt = to_altmode(dev);
 
-	return sprintf(buf, "%s\n", mode->desc ? mode->desc : "");
+	return sprintf(buf, "%s\n", alt->desc ? alt->desc : "");
 }
+static DEVICE_ATTR_RO(description);
 
 static ssize_t
-typec_altmode_active_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
+active_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       active_attr);
+	struct typec_altmode *alt = to_altmode(dev);
 
-	return sprintf(buf, "%s\n", mode->active ? "yes" : "no");
+	return sprintf(buf, "%s\n", alt->active ? "yes" : "no");
 }
 
-static ssize_t
-typec_altmode_active_store(struct device *dev, struct device_attribute *attr,
-			   const char *buf, size_t size)
+static ssize_t active_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t size)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       active_attr);
-	struct typec_port *port = typec_altmode2port(mode->alt_mode);
+	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_port *port = typec_altmode2port(alt);
 	bool activate;
 	int ret;
 
@@ -266,22 +246,22 @@ typec_altmode_active_store(struct device *dev, struct device_attribute *attr,
 	if (ret)
 		return ret;
 
-	ret = port->cap->activate_mode(port->cap, mode->index, activate);
+	ret = port->cap->activate_mode(port->cap, alt->mode, activate);
 	if (ret)
 		return ret;
 
 	return size;
 }
+static DEVICE_ATTR_RW(active);
 
 static ssize_t
-typec_altmode_roles_show(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+supported_roles_show(struct device *dev, struct device_attribute *attr,
+		     char *buf)
 {
-	struct typec_mode *mode = container_of(attr, struct typec_mode,
-					       roles_attr);
+	struct typec_altmode *alt = to_altmode(dev);
 	ssize_t ret;
 
-	switch (mode->roles) {
+	switch (alt->roles) {
 	case TYPEC_PORT_SRC:
 		ret = sprintf(buf, "source\n");
 		break;
@@ -295,61 +275,13 @@ typec_altmode_roles_show(struct device *dev, struct device_attribute *attr,
 	}
 	return ret;
 }
+static DEVICE_ATTR_RO(supported_roles);
 
-static void typec_init_modes(struct typec_altmode *alt,
-			     const struct typec_mode_desc *desc, bool is_port)
+static void typec_altmode_release(struct device *dev)
 {
-	int i;
-
-	for (i = 0; i < alt->n_modes; i++, desc++) {
-		struct typec_mode *mode = &alt->modes[i];
-
-		/* Not considering the human readable description critical */
-		mode->desc = kstrdup(desc->desc, GFP_KERNEL);
-		if (desc->desc && !mode->desc)
-			dev_err(&alt->dev, "failed to copy mode%d desc\n", i);
-
-		mode->alt_mode = alt;
-		mode->vdo = desc->vdo;
-		mode->roles = desc->roles;
-		mode->index = desc->index;
-		sprintf(mode->group_name, "mode%d", desc->index);
-
-		sysfs_attr_init(&mode->vdo_attr.attr);
-		mode->vdo_attr.attr.name = "vdo";
-		mode->vdo_attr.attr.mode = 0444;
-		mode->vdo_attr.show = typec_altmode_vdo_show;
-
-		sysfs_attr_init(&mode->desc_attr.attr);
-		mode->desc_attr.attr.name = "description";
-		mode->desc_attr.attr.mode = 0444;
-		mode->desc_attr.show = typec_altmode_desc_show;
-
-		sysfs_attr_init(&mode->active_attr.attr);
-		mode->active_attr.attr.name = "active";
-		mode->active_attr.attr.mode = 0644;
-		mode->active_attr.show = typec_altmode_active_show;
-		mode->active_attr.store = typec_altmode_active_store;
-
-		mode->attrs[0] = &mode->vdo_attr.attr;
-		mode->attrs[1] = &mode->desc_attr.attr;
-		mode->attrs[2] = &mode->active_attr.attr;
-
-		/* With ports, list the roles that the mode is supported with */
-		if (is_port) {
-			sysfs_attr_init(&mode->roles_attr.attr);
-			mode->roles_attr.attr.name = "supported_roles";
-			mode->roles_attr.attr.mode = 0444;
-			mode->roles_attr.show = typec_altmode_roles_show;
-
-			mode->attrs[3] = &mode->roles_attr.attr;
-		}
-
-		mode->group.attrs = mode->attrs;
-		mode->group.name = mode->group_name;
+	struct typec_altmode *alt = to_altmode(dev);
 
-		alt->mode_groups[i] = &mode->group;
-	}
+	kfree(alt);
 }
 
 static ssize_t svid_show(struct device *dev, struct device_attribute *attr,
@@ -367,16 +299,6 @@ static struct attribute *typec_altmode_attrs[] = {
 };
 ATTRIBUTE_GROUPS(typec_altmode);
 
-static void typec_altmode_release(struct device *dev)
-{
-	struct typec_altmode *alt = to_altmode(dev);
-	int i;
-
-	for (i = 0; i < alt->n_modes; i++)
-		kfree(alt->modes[i].desc);
-	kfree(alt);
-}
-
 static const struct device_type typec_altmode_dev_type = {
 	.name = "typec_alternate_mode",
 	.groups = typec_altmode_groups,
@@ -395,13 +317,27 @@ typec_register_altmode(struct device *parent,
 		return ERR_PTR(-ENOMEM);
 
 	alt->svid = desc->svid;
-	alt->n_modes = desc->n_modes;
-	typec_init_modes(alt, desc->modes, is_typec_port(parent));
+	alt->mode = desc->mode;
+	alt->vdo = desc->vdo;
+	alt->roles = desc->roles;
+
+	alt->attrs[0] = &dev_attr_vdo.attr;
+	alt->attrs[1] = &dev_attr_description.attr;
+	alt->attrs[2] = &dev_attr_active.attr;
+
+	if (is_typec_port(parent))
+		alt->attrs[3] = &dev_attr_supported_roles.attr;
+
+	sprintf(alt->group_name, "mode%d", desc->mode);
+	alt->group.name = alt->group_name;
+	alt->group.attrs = alt->attrs;
+	alt->groups[0] = &alt->group;
 
 	alt->dev.parent = parent;
-	alt->dev.groups = alt->mode_groups;
+	alt->dev.groups = alt->groups;
 	alt->dev.type = &typec_altmode_dev_type;
-	dev_set_name(&alt->dev, "svid-%04x", alt->svid);
+	dev_set_name(&alt->dev, "%s-%04x:%u", dev_name(parent),
+		     alt->svid, alt->mode);
 
 	ret = device_register(&alt->dev);
 	if (ret) {
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index a18af298d96d..6b57e7132e64 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -310,8 +310,8 @@ struct tcpm_port {
 
 	/* Alternate mode data */
 	struct pd_mode_data mode_data;
-	struct typec_altmode *partner_altmode[SVID_DISCOVERY_MAX];
-	struct typec_altmode *port_altmode[SVID_DISCOVERY_MAX];
+	struct typec_altmode *partner_altmode[SVID_DISCOVERY_MAX * 6];
+	struct typec_altmode *port_altmode[SVID_DISCOVERY_MAX * 6];
 
 	/* Deadline in jiffies to exit src_try_wait state */
 	unsigned long max_wait;
@@ -995,7 +995,6 @@ static void svdm_consume_modes(struct tcpm_port *port, const __le32 *payload,
 {
 	struct pd_mode_data *pmdata = &port->mode_data;
 	struct typec_altmode_desc *paltmode;
-	struct typec_mode_desc *pmode;
 	int i;
 
 	if (pmdata->altmodes >= ARRAY_SIZE(port->partner_altmode)) {
@@ -1003,32 +1002,28 @@ static void svdm_consume_modes(struct tcpm_port *port, const __le32 *payload,
 		return;
 	}
 
-	paltmode = &pmdata->altmode_desc[pmdata->altmodes];
-	memset(paltmode, 0, sizeof(*paltmode));
+	for (i = 1; i < cnt; i++) {
+		paltmode = &pmdata->altmode_desc[pmdata->altmodes];
+		memset(paltmode, 0, sizeof(*paltmode));
 
-	paltmode->svid = pmdata->svids[pmdata->svid_index];
+		paltmode->svid = pmdata->svids[pmdata->svid_index];
+		paltmode->mode = i;
+		paltmode->vdo = le32_to_cpu(payload[i]);
 
-	tcpm_log(port, " Alternate mode %d: SVID 0x%04x",
-		 pmdata->altmodes, paltmode->svid);
+		tcpm_log(port, " Alternate mode %d: SVID 0x%04x, VDO %d: 0x%08x",
+			 pmdata->altmodes, paltmode->svid,
+			 paltmode->mode, paltmode->vdo);
 
-	for (i = 1; i < cnt && paltmode->n_modes < ALTMODE_MAX_MODES; i++) {
-		pmode = &paltmode->modes[paltmode->n_modes];
-		memset(pmode, 0, sizeof(*pmode));
-		pmode->vdo = le32_to_cpu(payload[i]);
-		pmode->index = i - 1;
-		paltmode->n_modes++;
-		tcpm_log(port, "  VDO %d: 0x%08x",
-			 pmode->index, pmode->vdo);
-	}
-	port->partner_altmode[pmdata->altmodes] =
-		typec_partner_register_altmode(port->partner, paltmode);
-	if (!port->partner_altmode[pmdata->altmodes]) {
-		tcpm_log(port,
-			 "Failed to register alternate modes for SVID 0x%04x",
-			 paltmode->svid);
-		return;
+		port->partner_altmode[pmdata->altmodes] =
+			typec_partner_register_altmode(port->partner, paltmode);
+		if (!port->partner_altmode[pmdata->altmodes]) {
+			tcpm_log(port,
+				 "Failed to register modes for SVID 0x%04x",
+				 paltmode->svid);
+			return;
+		}
+		pmdata->altmodes++;
 	}
-	pmdata->altmodes++;
 }
 
 #define supports_modal(port)	PD_IDH_MODAL_SUPP((port)->partner_ident.id_header)
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 6c33b53d1162..2dcb1683075f 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -93,41 +93,23 @@ int typec_partner_set_identity(struct typec_partner *partner);
 int typec_cable_set_identity(struct typec_cable *cable);
 
 /*
- * struct typec_mode_desc - Individual Mode of an Alternate Mode
- * @index: Index of the Mode within the SVID
+ * struct typec_altmode_desc - USB Type-C Alternate Mode Descriptor
+ * @svid: Standard or Vendor ID
+ * @mode: Index of the Mode
  * @vdo: VDO returned by Discover Modes USB PD command
- * @desc: Optional human readable description of the mode
  * @roles: Only for ports. DRP if the mode is available in both roles
  *
- * Description of a mode of an Alternate Mode which a connector, cable plug or
- * partner supports. Every mode will have it's own sysfs group. The details are
- * the VDO returned by discover modes command, description for the mode and
- * active flag telling has the mode being entered or not.
+ * Description of an Alternate Mode which a connector, cable plug or partner
+ * supports.
  */
-struct typec_mode_desc {
-	int			index;
+struct typec_altmode_desc {
+	u16			svid;
+	u8			mode;
 	u32			vdo;
-	char			*desc;
 	/* Only used with ports */
 	enum typec_port_type	roles;
 };
 
-/*
- * struct typec_altmode_desc - USB Type-C Alternate Mode Descriptor
- * @svid: Standard or Vendor ID
- * @n_modes: Number of modes
- * @modes: Array of modes supported by the Alternate Mode
- *
- * Representation of an Alternate Mode that has SVID assigned by USB-IF. The
- * array of modes will list the modes of a particular SVID that are supported by
- * a connector, partner of a cable plug.
- */
-struct typec_altmode_desc {
-	u16			svid;
-	int			n_modes;
-	struct typec_mode_desc	modes[ALTMODE_MAX_MODES];
-};
-
 struct typec_altmode
 *typec_partner_register_altmode(struct typec_partner *partner,
 				const struct typec_altmode_desc *desc);
@@ -141,8 +123,7 @@ void typec_unregister_altmode(struct typec_altmode *altmode);
 
 struct typec_port *typec_altmode2port(struct typec_altmode *alt);
 
-void typec_altmode_update_active(struct typec_altmode *alt, int mode,
-				 bool active);
+void typec_altmode_update_active(struct typec_altmode *alt, bool active);
 
 enum typec_plug_index {
 	TYPEC_PLUG_SOP_P,
-- 
cgit v1.2.3


From 8a37d87d72f0c69f837229c04d2fcd7117ea57e7 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:50 +0300
Subject: usb: typec: Bus type for alternate modes

Introducing a simple bus for the alternate modes. Bus allows
binding drivers to the discovered alternate modes the
partners support.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/obsolete/sysfs-class-typec |  48 ++++
 Documentation/ABI/testing/sysfs-bus-typec    |  51 ++++
 Documentation/ABI/testing/sysfs-class-typec  |  62 +----
 Documentation/driver-api/usb/typec_bus.rst   | 136 +++++++++
 MAINTAINERS                                  |  11 +-
 drivers/usb/typec/Makefile                   |   2 +-
 drivers/usb/typec/bus.c                      | 401 +++++++++++++++++++++++++++
 drivers/usb/typec/bus.h                      |  38 +++
 drivers/usb/typec/class.c                    | 366 +++++++++++++++++++-----
 include/linux/mod_devicetable.h              |  15 +
 include/linux/usb/typec.h                    |  14 +-
 include/linux/usb/typec_altmode.h            | 160 +++++++++++
 scripts/mod/devicetable-offsets.c            |   4 +
 scripts/mod/file2alias.c                     |  13 +
 14 files changed, 1174 insertions(+), 147 deletions(-)
 create mode 100644 Documentation/ABI/obsolete/sysfs-class-typec
 create mode 100644 Documentation/ABI/testing/sysfs-bus-typec
 create mode 100644 Documentation/driver-api/usb/typec_bus.rst
 create mode 100644 drivers/usb/typec/bus.c
 create mode 100644 drivers/usb/typec/bus.h
 create mode 100644 include/linux/usb/typec_altmode.h

(limited to 'include')

diff --git a/Documentation/ABI/obsolete/sysfs-class-typec b/Documentation/ABI/obsolete/sysfs-class-typec
new file mode 100644
index 000000000000..32623514ee87
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-class-typec
@@ -0,0 +1,48 @@
+These files are deprecated and will be removed. The same files are available
+under /sys/bus/typec (see Documentation/ABI/testing/sysfs-bus-typec).
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/svid
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		The SVID (Standard or Vendor ID) assigned by USB-IF for this
+		alternate mode.
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Every supported mode will have its own directory. The name of
+		a mode will be "mode<index>" (for example mode1), where <index>
+		is the actual index to the mode VDO returned by Discover Modes
+		USB power delivery command.
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/description
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows description of the mode. The description is optional for
+		the drivers, just like with the Billboard Devices.
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/vdo
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows the VDO in hexadecimal returned by Discover Modes command
+		for this mode.
+
+What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/active
+Date:		April 2017
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows if the mode is active or not. The attribute can be used
+		for entering/exiting the mode with partners and cable plugs, and
+		with the port alternate modes it can be used for disabling
+		support for specific alternate modes. Entering/exiting modes is
+		supported as synchronous operation so write(2) to the attribute
+		does not return until the enter/exit mode operation has
+		finished. The attribute is notified when the mode is
+		entered/exited so poll(2) on the attribute wakes up.
+		Entering/exiting a mode will also generate uevent KOBJ_CHANGE.
+
+		Valid values: yes, no
diff --git a/Documentation/ABI/testing/sysfs-bus-typec b/Documentation/ABI/testing/sysfs-bus-typec
new file mode 100644
index 000000000000..205d9c91e2e1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-typec
@@ -0,0 +1,51 @@
+What:		/sys/bus/typec/devices/.../active
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows if the mode is active or not. The attribute can be used
+		for entering/exiting the mode. Entering/exiting modes is
+		supported as synchronous operation so write(2) to the attribute
+		does not return until the enter/exit mode operation has
+		finished. The attribute is notified when the mode is
+		entered/exited so poll(2) on the attribute wakes up.
+		Entering/exiting a mode will also generate uevent KOBJ_CHANGE.
+
+		Valid values are boolean.
+
+What:		/sys/bus/typec/devices/.../description
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows description of the mode. The description is optional for
+		the drivers, just like with the Billboard Devices.
+
+What:		/sys/bus/typec/devices/.../mode
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		The index number of the mode returned by Discover Modes USB
+		Power Delivery command. Depending on the alternate mode, the
+		mode index may be significant.
+
+		With some alternate modes (SVIDs), the mode index is assigned
+		for specific functionality in the specification for that
+		alternate mode.
+
+		With other alternate modes, the mode index values are not
+		assigned, and can not be therefore used for identification. When
+		the mode index is not assigned, identifying the alternate mode
+		must be done with either mode VDO or the description.
+
+What:		/sys/bus/typec/devices/.../svid
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		The Standard or Vendor ID (SVID) assigned by USB-IF for this
+		alternate mode.
+
+What:		/sys/bus/typec/devices/.../vdo
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows the VDO in hexadecimal returned by Discover Modes command
+		for this mode.
diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec
index 5be552e255e9..d7647b258c3c 100644
--- a/Documentation/ABI/testing/sysfs-class-typec
+++ b/Documentation/ABI/testing/sysfs-class-typec
@@ -222,70 +222,12 @@ Description:
 		available. The value can be polled.
 
 
-Alternate Mode devices.
+USB Type-C port alternate mode devices.
 
-The alternate modes will have Standard or Vendor ID (SVID) assigned by USB-IF.
-The ports, partners and cable plugs can have alternate modes. A supported SVID
-will consist of a set of modes. Every SVID a port/partner/plug supports will
-have a device created for it, and every supported mode for a supported SVID will
-have its own directory under that device. Below <dev> refers to the device for
-the alternate mode.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/svid
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		The SVID (Standard or Vendor ID) assigned by USB-IF for this
-		alternate mode.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		Every supported mode will have its own directory. The name of
-		a mode will be "mode<index>" (for example mode1), where <index>
-		is the actual index to the mode VDO returned by Discover Modes
-		USB power delivery command.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/description
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		Shows description of the mode. The description is optional for
-		the drivers, just like with the Billboard Devices.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/vdo
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		Shows the VDO in hexadecimal returned by Discover Modes command
-		for this mode.
-
-What:		/sys/class/typec/<port|partner|cable>/<dev>/mode<index>/active
-Date:		April 2017
-Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
-Description:
-		Shows if the mode is active or not. The attribute can be used
-		for entering/exiting the mode with partners and cable plugs, and
-		with the port alternate modes it can be used for disabling
-		support for specific alternate modes. Entering/exiting modes is
-		supported as synchronous operation so write(2) to the attribute
-		does not return until the enter/exit mode operation has
-		finished. The attribute is notified when the mode is
-		entered/exited so poll(2) on the attribute wakes up.
-		Entering/exiting a mode will also generate uevent KOBJ_CHANGE.
-
-		Valid values: yes, no
-
-What:		/sys/class/typec/<port>/<dev>/mode<index>/supported_roles
+What:		/sys/class/typec/<port>/<alt mode>/supported_roles
 Date:		April 2017
 Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
 Description:
 		Space separated list of the supported roles.
 
-		This attribute is available for the devices describing the
-		alternate modes a port supports, and it will not be exposed with
-		the devices presenting the alternate modes the partners or cable
-		plugs support.
-
 		Valid values: source, sink
diff --git a/Documentation/driver-api/usb/typec_bus.rst b/Documentation/driver-api/usb/typec_bus.rst
new file mode 100644
index 000000000000..d5eec1715b5b
--- /dev/null
+++ b/Documentation/driver-api/usb/typec_bus.rst
@@ -0,0 +1,136 @@
+
+API for USB Type-C Alternate Mode drivers
+=========================================
+
+Introduction
+------------
+
+Alternate modes require communication with the partner using Vendor Defined
+Messages (VDM) as defined in USB Type-C and USB Power Delivery Specifications.
+The communication is SVID (Standard or Vendor ID) specific, i.e. specific for
+every alternate mode, so every alternate mode will need a custom driver.
+
+USB Type-C bus allows binding a driver to the discovered partner alternate
+modes by using the SVID and the mode number.
+
+USB Type-C Connector Class provides a device for every alternate mode a port
+supports, and separate device for every alternate mode the partner supports.
+The drivers for the alternate modes are bound to the partner alternate mode
+devices, and the port alternate mode devices must be handled by the port
+drivers.
+
+When a new partner alternate mode device is registered, it is linked to the
+alternate mode device of the port that the partner is attached to, that has
+matching SVID and mode. Communication between the port driver and alternate mode
+driver will happen using the same API.
+
+The port alternate mode devices are used as a proxy between the partner and the
+alternate mode drivers, so the port drivers are only expected to pass the SVID
+specific commands from the alternate mode drivers to the partner, and from the
+partners to the alternate mode drivers. No direct SVID specific communication is
+needed from the port drivers, but the port drivers need to provide the operation
+callbacks for the port alternate mode devices, just like the alternate mode
+drivers need to provide them for the partner alternate mode devices.
+
+Usage:
+------
+
+General
+~~~~~~~
+
+By default, the alternate mode drivers are responsible for entering the mode.
+It is also possible to leave the decision about entering the mode to the user
+space (See Documentation/ABI/testing/sysfs-class-typec). Port drivers should not
+enter any modes on their own.
+
+``->vdm`` is the most important callback in the operation callbacks vector. It
+will be used to deliver all the SVID specific commands from the partner to the
+alternate mode driver, and vice versa in case of port drivers. The drivers send
+the SVID specific commands to each other using :c:func:`typec_altmode_vmd()`.
+
+If the communication with the partner using the SVID specific commands results
+in need to reconfigure the pins on the connector, the alternate mode driver
+needs to notify the bus using :c:func:`typec_altmode_notify()`. The driver
+passes the negotiated SVID specific pin configuration value to the function as
+parameter. The bus driver will then configure the mux behind the connector using
+that value as the state value for the mux, and also call blocking notification
+chain to notify the external drivers about the state of the connector that need
+to know it.
+
+NOTE: The SVID specific pin configuration values must always start from
+``TYPEC_STATE_MODAL``. USB Type-C specification defines two default states for
+the connector: ``TYPEC_STATE_USB`` and ``TYPEC_STATE_SAFE``. These values are
+reserved by the bus as the first possible values for the state. When the
+alternate mode is entered, the bus will put the connector into
+``TYPEC_STATE_SAFE`` before sending Enter or Exit Mode command as defined in USB
+Type-C Specification, and also put the connector back to ``TYPEC_STATE_USB``
+after the mode has been exited.
+
+An example of working definitions for SVID specific pin configurations would
+look like this:
+
+enum {
+	ALTMODEX_CONF_A = TYPEC_STATE_MODAL,
+	ALTMODEX_CONF_B,
+	...
+};
+
+Helper macro ``TYPEC_MODAL_STATE()`` can also be used:
+
+#define ALTMODEX_CONF_A = TYPEC_MODAL_STATE(0);
+#define ALTMODEX_CONF_B = TYPEC_MODAL_STATE(1);
+
+Notification chain
+~~~~~~~~~~~~~~~~~~
+
+The drivers for the components that the alternate modes are designed for need to
+get details regarding the results of the negotiation with the partner, and the
+pin configuration of the connector. In case of DisplayPort alternate mode for
+example, the GPU drivers will need to know those details. In case of
+Thunderbolt alternate mode, the thunderbolt drivers will need to know them, and
+so on.
+
+The notification chain is designed for this purpose. The drivers can register
+notifiers with :c:func:`typec_altmode_register_notifier()`.
+
+Cable plug alternate modes
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The alternate mode drivers are not bound to cable plug alternate mode devices,
+only to the partner alternate mode devices. If the alternate mode supports, or
+requires, a cable that responds to SOP Prime, and optionally SOP Double Prime
+messages, the driver for that alternate mode must request handle to the cable
+plug alternate modes using :c:func:`typec_altmode_get_plug()`, and take over
+their control.
+
+Driver API
+----------
+
+Alternate mode driver registering/unregistering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/usb/typec/bus.c
+   :functions: typec_altmode_register_driver typec_altmode_unregister_driver
+
+Alternate mode driver operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/usb/typec/bus.c
+   :functions: typec_altmode_enter typec_altmode_exit typec_altmode_attention typec_altmode_vdm typec_altmode_notify
+
+API for the port drivers
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/usb/typec/bus.c
+   :functions: typec_match_altmode
+
+Cable Plug operations
+~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/usb/typec/bus.c
+   :functions: typec_altmode_get_plug typec_altmode_put_plug
+
+Notifications
+~~~~~~~~~~~~~
+.. kernel-doc:: drivers/usb/typec/class.c
+   :functions: typec_altmode_register_notifier typec_altmode_unregister_notifier
diff --git a/MAINTAINERS b/MAINTAINERS
index 07d1576fc766..f35f39f2072e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14955,7 +14955,7 @@ L:	linux-usb@vger.kernel.org
 S:	Maintained
 F:	drivers/usb/typec/mux/pi3usb30532.c
 
-USB TYPEC SUBSYSTEM
+USB TYPEC CLASS
 M:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
 L:	linux-usb@vger.kernel.org
 S:	Maintained
@@ -14964,6 +14964,15 @@ F:	Documentation/driver-api/usb/typec.rst
 F:	drivers/usb/typec/
 F:	include/linux/usb/typec.h
 
+USB TYPEC BUS FOR ALTERNATE MODES
+M:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+L:	linux-usb@vger.kernel.org
+S:	Maintained
+F:	Documentation/ABI/testing/sysfs-bus-typec
+F:	Documentation/driver-api/usb/typec_bus.rst
+F:	drivers/usb/typec/altmodes/
+F:	include/linux/usb/typec_altmode.h
+
 USB UHCI DRIVER
 M:	Alan Stern <stern@rowland.harvard.edu>
 L:	linux-usb@vger.kernel.org
diff --git a/drivers/usb/typec/Makefile b/drivers/usb/typec/Makefile
index 46f86ee134a2..335ee06748fc 100644
--- a/drivers/usb/typec/Makefile
+++ b/drivers/usb/typec/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_TYPEC)		+= typec.o
-typec-y				:= class.o mux.o
+typec-y				:= class.o mux.o bus.o
 obj-$(CONFIG_TYPEC_TCPM)	+= tcpm.o
 obj-y				+= fusb302/
 obj-$(CONFIG_TYPEC_WCOVE)	+= typec_wcove.o
diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c
new file mode 100644
index 000000000000..999d7904172a
--- /dev/null
+++ b/drivers/usb/typec/bus.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Bus for USB Type-C Alternate Modes
+ *
+ * Copyright (C) 2018 Intel Corporation
+ * Author: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+ */
+
+#include <linux/usb/pd_vdo.h>
+
+#include "bus.h"
+
+static inline int typec_altmode_set_mux(struct altmode *alt, u8 state)
+{
+	return alt->mux ? alt->mux->set(alt->mux, state) : 0;
+}
+
+static int typec_altmode_set_state(struct typec_altmode *adev, int state)
+{
+	bool is_port = is_typec_port(adev->dev.parent);
+	struct altmode *port_altmode;
+	int ret;
+
+	port_altmode = is_port ? to_altmode(adev) : to_altmode(adev)->partner;
+
+	ret = typec_altmode_set_mux(port_altmode, state);
+	if (ret)
+		return ret;
+
+	blocking_notifier_call_chain(&port_altmode->nh, state, NULL);
+
+	return 0;
+}
+
+/* -------------------------------------------------------------------------- */
+/* Common API */
+
+/**
+ * typec_altmode_notify - Communication between the OS and alternate mode driver
+ * @adev: Handle to the alternate mode
+ * @conf: Alternate mode specific configuration value
+ * @data: Alternate mode specific data
+ *
+ * The primary purpose for this function is to allow the alternate mode drivers
+ * to tell which pin configuration has been negotiated with the partner. That
+ * information will then be used for example to configure the muxes.
+ * Communication to the other direction is also possible, and low level device
+ * drivers can also send notifications to the alternate mode drivers. The actual
+ * communication will be specific for every SVID.
+ */
+int typec_altmode_notify(struct typec_altmode *adev,
+			 unsigned long conf, void *data)
+{
+	bool is_port = is_typec_port(adev->dev.parent);
+	struct altmode *altmode;
+	struct altmode *partner;
+	int ret;
+
+	if (!adev)
+		return 0;
+
+	altmode = to_altmode(adev);
+
+	if (!altmode->partner)
+		return -ENODEV;
+
+	partner = altmode->partner;
+
+	ret = typec_altmode_set_mux(is_port ? altmode : partner, (u8)conf);
+	if (ret)
+		return ret;
+
+	blocking_notifier_call_chain(is_port ? &altmode->nh : &partner->nh,
+				     conf, data);
+
+	if (partner->adev.ops && partner->adev.ops->notify)
+		return partner->adev.ops->notify(&partner->adev, conf, data);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(typec_altmode_notify);
+
+/**
+ * typec_altmode_enter - Enter Mode
+ * @adev: The alternate mode
+ *
+ * The alternate mode drivers use this function to enter mode. The port drivers
+ * use this to inform the alternate mode drivers that the partner has initiated
+ * Enter Mode command.
+ */
+int typec_altmode_enter(struct typec_altmode *adev)
+{
+	struct altmode *partner = to_altmode(adev)->partner;
+	struct typec_altmode *pdev = &partner->adev;
+	int ret;
+
+	if (!adev || adev->active)
+		return 0;
+
+	if (!pdev->ops || !pdev->ops->enter)
+		return -EOPNOTSUPP;
+
+	/* Moving to USB Safe State */
+	ret = typec_altmode_set_state(adev, TYPEC_STATE_SAFE);
+	if (ret)
+		return ret;
+
+	/* Enter Mode */
+	return pdev->ops->enter(pdev);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_enter);
+
+/**
+ * typec_altmode_exit - Exit Mode
+ * @adev: The alternate mode
+ *
+ * The partner of @adev has initiated Exit Mode command.
+ */
+int typec_altmode_exit(struct typec_altmode *adev)
+{
+	struct altmode *partner = to_altmode(adev)->partner;
+	struct typec_altmode *pdev = &partner->adev;
+	int ret;
+
+	if (!adev || !adev->active)
+		return 0;
+
+	if (!pdev->ops || !pdev->ops->enter)
+		return -EOPNOTSUPP;
+
+	/* Moving to USB Safe State */
+	ret = typec_altmode_set_state(adev, TYPEC_STATE_SAFE);
+	if (ret)
+		return ret;
+
+	/* Exit Mode command */
+	return pdev->ops->exit(pdev);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_exit);
+
+/**
+ * typec_altmode_attention - Attention command
+ * @adev: The alternate mode
+ * @vdo: VDO for the Attention command
+ *
+ * Notifies the partner of @adev about Attention command.
+ */
+void typec_altmode_attention(struct typec_altmode *adev, u32 vdo)
+{
+	struct typec_altmode *pdev = &to_altmode(adev)->partner->adev;
+
+	if (pdev->ops && pdev->ops->attention)
+		pdev->ops->attention(pdev, vdo);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_attention);
+
+/**
+ * typec_altmode_vdm - Send Vendor Defined Messages (VDM) to the partner
+ * @adev: Alternate mode handle
+ * @header: VDM Header
+ * @vdo: Array of Vendor Defined Data Objects
+ * @count: Number of Data Objects
+ *
+ * The alternate mode drivers use this function for SVID specific communication
+ * with the partner. The port drivers use it to deliver the Structured VDMs
+ * received from the partners to the alternate mode drivers.
+ */
+int typec_altmode_vdm(struct typec_altmode *adev,
+		      const u32 header, const u32 *vdo, int count)
+{
+	struct typec_altmode *pdev;
+	struct altmode *altmode;
+
+	if (!adev)
+		return 0;
+
+	altmode = to_altmode(adev);
+
+	if (!altmode->partner)
+		return -ENODEV;
+
+	pdev = &altmode->partner->adev;
+
+	if (!pdev->ops || !pdev->ops->vdm)
+		return -EOPNOTSUPP;
+
+	return pdev->ops->vdm(pdev, header, vdo, count);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_vdm);
+
+const struct typec_altmode *
+typec_altmode_get_partner(struct typec_altmode *adev)
+{
+	return &to_altmode(adev)->partner->adev;
+}
+EXPORT_SYMBOL_GPL(typec_altmode_get_partner);
+
+/* -------------------------------------------------------------------------- */
+/* API for the alternate mode drivers */
+
+/**
+ * typec_altmode_get_plug - Find cable plug alternate mode
+ * @adev: Handle to partner alternate mode
+ * @index: Cable plug index
+ *
+ * Increment reference count for cable plug alternate mode device. Returns
+ * handle to the cable plug alternate mode, or NULL if none is found.
+ */
+struct typec_altmode *typec_altmode_get_plug(struct typec_altmode *adev,
+					     enum typec_plug_index index)
+{
+	struct altmode *port = to_altmode(adev)->partner;
+
+	if (port->plug[index]) {
+		get_device(&port->plug[index]->adev.dev);
+		return &port->plug[index]->adev;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(typec_altmode_get_plug);
+
+/**
+ * typec_altmode_put_plug - Decrement cable plug alternate mode reference count
+ * @plug: Handle to the cable plug alternate mode
+ */
+void typec_altmode_put_plug(struct typec_altmode *plug)
+{
+	if (plug)
+		put_device(&plug->dev);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_put_plug);
+
+int __typec_altmode_register_driver(struct typec_altmode_driver *drv,
+				    struct module *module)
+{
+	if (!drv->probe)
+		return -EINVAL;
+
+	drv->driver.owner = module;
+	drv->driver.bus = &typec_bus;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(__typec_altmode_register_driver);
+
+void typec_altmode_unregister_driver(struct typec_altmode_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_unregister_driver);
+
+/* -------------------------------------------------------------------------- */
+/* API for the port drivers */
+
+/**
+ * typec_match_altmode - Match SVID to an array of alternate modes
+ * @altmodes: Array of alternate modes
+ * @n: Number of elements in the array, or -1 for NULL termiated arrays
+ * @svid: Standard or Vendor ID to match with
+ *
+ * Return pointer to an alternate mode with SVID mathing @svid, or NULL when no
+ * match is found.
+ */
+struct typec_altmode *typec_match_altmode(struct typec_altmode **altmodes,
+					  size_t n, u16 svid, u8 mode)
+{
+	int i;
+
+	for (i = 0; i < n; i++) {
+		if (!altmodes[i])
+			break;
+		if (altmodes[i]->svid == svid && altmodes[i]->mode == mode)
+			return altmodes[i];
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(typec_match_altmode);
+
+/* -------------------------------------------------------------------------- */
+
+static ssize_t
+description_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct typec_altmode *alt = to_typec_altmode(dev);
+
+	return sprintf(buf, "%s\n", alt->desc ? alt->desc : "");
+}
+static DEVICE_ATTR_RO(description);
+
+static struct attribute *typec_attrs[] = {
+	&dev_attr_description.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(typec);
+
+static int typec_match(struct device *dev, struct device_driver *driver)
+{
+	struct typec_altmode_driver *drv = to_altmode_driver(driver);
+	struct typec_altmode *altmode = to_typec_altmode(dev);
+	const struct typec_device_id *id;
+
+	for (id = drv->id_table; id->svid; id++)
+		if (id->svid == altmode->svid &&
+		    (id->mode == TYPEC_ANY_MODE || id->mode == altmode->mode))
+			return 1;
+	return 0;
+}
+
+static int typec_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct typec_altmode *altmode = to_typec_altmode(dev);
+
+	if (add_uevent_var(env, "SVID=%04X", altmode->svid))
+		return -ENOMEM;
+
+	if (add_uevent_var(env, "MODE=%u", altmode->mode))
+		return -ENOMEM;
+
+	return add_uevent_var(env, "MODALIAS=typec:id%04Xm%02X",
+			      altmode->svid, altmode->mode);
+}
+
+static int typec_altmode_create_links(struct altmode *alt)
+{
+	struct device *port_dev = &alt->partner->adev.dev;
+	struct device *dev = &alt->adev.dev;
+	int err;
+
+	err = sysfs_create_link(&dev->kobj, &port_dev->kobj, "port");
+	if (err)
+		return err;
+
+	err = sysfs_create_link(&port_dev->kobj, &dev->kobj, "partner");
+	if (err)
+		sysfs_remove_link(&dev->kobj, "port");
+
+	return err;
+}
+
+static void typec_altmode_remove_links(struct altmode *alt)
+{
+	sysfs_remove_link(&alt->partner->adev.dev.kobj, "partner");
+	sysfs_remove_link(&alt->adev.dev.kobj, "port");
+}
+
+static int typec_probe(struct device *dev)
+{
+	struct typec_altmode_driver *drv = to_altmode_driver(dev->driver);
+	struct typec_altmode *adev = to_typec_altmode(dev);
+	struct altmode *altmode = to_altmode(adev);
+	int ret;
+
+	/* Fail if the port does not support the alternate mode */
+	if (!altmode->partner)
+		return -ENODEV;
+
+	ret = typec_altmode_create_links(altmode);
+	if (ret) {
+		dev_warn(dev, "failed to create symlinks\n");
+		return ret;
+	}
+
+	ret = drv->probe(adev);
+	if (ret)
+		typec_altmode_remove_links(altmode);
+
+	return ret;
+}
+
+static int typec_remove(struct device *dev)
+{
+	struct typec_altmode_driver *drv = to_altmode_driver(dev->driver);
+	struct typec_altmode *adev = to_typec_altmode(dev);
+	struct altmode *altmode = to_altmode(adev);
+
+	typec_altmode_remove_links(altmode);
+
+	if (drv->remove)
+		drv->remove(to_typec_altmode(dev));
+
+	if (adev->active) {
+		WARN_ON(typec_altmode_set_state(adev, TYPEC_STATE_SAFE));
+		typec_altmode_update_active(adev, false);
+	}
+
+	adev->desc = NULL;
+	adev->ops = NULL;
+
+	return 0;
+}
+
+struct bus_type typec_bus = {
+	.name = "typec",
+	.dev_groups = typec_groups,
+	.match = typec_match,
+	.uevent = typec_uevent,
+	.probe = typec_probe,
+	.remove = typec_remove,
+};
diff --git a/drivers/usb/typec/bus.h b/drivers/usb/typec/bus.h
new file mode 100644
index 000000000000..62aaf8b56bde
--- /dev/null
+++ b/drivers/usb/typec/bus.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __USB_TYPEC_ALTMODE_H__
+#define __USB_TYPEC_ALTMODE_H__
+
+#include <linux/usb/typec_altmode.h>
+#include <linux/usb/typec_mux.h>
+
+struct bus_type;
+
+struct altmode {
+	unsigned int			id;
+	struct typec_altmode		adev;
+	struct typec_mux		*mux;
+
+	enum typec_port_data		roles;
+
+	struct attribute		*attrs[5];
+	char				group_name[6];
+	struct attribute_group		group;
+	const struct attribute_group	*groups[2];
+
+	struct altmode			*partner;
+	struct altmode			*plug[2];
+
+	struct blocking_notifier_head	nh;
+};
+
+#define to_altmode(d) container_of(d, struct altmode, adev)
+
+extern struct bus_type typec_bus;
+extern const struct device_type typec_altmode_dev_type;
+extern const struct device_type typec_port_dev_type;
+
+#define is_typec_altmode(_dev_) (_dev_->type == &typec_altmode_dev_type)
+#define is_typec_port(_dev_) (_dev_->type == &typec_port_dev_type)
+
+#endif /* __USB_TYPEC_ALTMODE_H__ */
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 96dc9c4f73f0..c202975f8097 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -10,28 +10,13 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/usb/typec.h>
-#include <linux/usb/typec_mux.h>
 
-struct typec_altmode {
-	struct device			dev;
-	u16				svid;
-	u8				mode;
-
-	u32				vdo;
-	char				*desc;
-	enum typec_port_type		roles;
-	unsigned int			active:1;
-
-	struct attribute		*attrs[5];
-	char				group_name[6];
-	struct attribute_group		group;
-	const struct attribute_group	*groups[2];
-};
+#include "bus.h"
 
 struct typec_plug {
 	struct device			dev;
 	enum typec_plug_index		index;
+	struct ida			mode_ids;
 };
 
 struct typec_cable {
@@ -46,11 +31,13 @@ struct typec_partner {
 	unsigned int			usb_pd:1;
 	struct usb_pd_identity		*identity;
 	enum typec_accessory		accessory;
+	struct ida			mode_ids;
 };
 
 struct typec_port {
 	unsigned int			id;
 	struct device			dev;
+	struct ida			mode_ids;
 
 	int				prefer_role;
 	enum typec_data_role		data_role;
@@ -71,17 +58,14 @@ struct typec_port {
 #define to_typec_plug(_dev_) container_of(_dev_, struct typec_plug, dev)
 #define to_typec_cable(_dev_) container_of(_dev_, struct typec_cable, dev)
 #define to_typec_partner(_dev_) container_of(_dev_, struct typec_partner, dev)
-#define to_altmode(_dev_) container_of(_dev_, struct typec_altmode, dev)
 
 static const struct device_type typec_partner_dev_type;
 static const struct device_type typec_cable_dev_type;
 static const struct device_type typec_plug_dev_type;
-static const struct device_type typec_port_dev_type;
 
 #define is_typec_partner(_dev_) (_dev_->type == &typec_partner_dev_type)
 #define is_typec_cable(_dev_) (_dev_->type == &typec_cable_dev_type)
 #define is_typec_plug(_dev_) (_dev_->type == &typec_plug_dev_type)
-#define is_typec_port(_dev_) (_dev_->type == &typec_port_dev_type)
 
 static DEFINE_IDA(typec_index_ida);
 static struct class *typec_class;
@@ -163,25 +147,148 @@ static void typec_report_identity(struct device *dev)
 /* ------------------------------------------------------------------------- */
 /* Alternate Modes */
 
+static int altmode_match(struct device *dev, void *data)
+{
+	struct typec_altmode *adev = to_typec_altmode(dev);
+	struct typec_device_id *id = data;
+
+	if (!is_typec_altmode(dev))
+		return 0;
+
+	return ((adev->svid == id->svid) && (adev->mode == id->mode));
+}
+
+static void typec_altmode_set_partner(struct altmode *altmode)
+{
+	struct typec_altmode *adev = &altmode->adev;
+	struct typec_device_id id = { adev->svid, adev->mode, };
+	struct typec_port *port = typec_altmode2port(adev);
+	struct altmode *partner;
+	struct device *dev;
+
+	dev = device_find_child(&port->dev, &id, altmode_match);
+	if (!dev)
+		return;
+
+	/* Bind the port alt mode to the partner/plug alt mode. */
+	partner = to_altmode(to_typec_altmode(dev));
+	altmode->partner = partner;
+
+	/* Bind the partner/plug alt mode to the port alt mode. */
+	if (is_typec_plug(adev->dev.parent)) {
+		struct typec_plug *plug = to_typec_plug(adev->dev.parent);
+
+		partner->plug[plug->index] = altmode;
+	} else {
+		partner->partner = altmode;
+	}
+}
+
+static void typec_altmode_put_partner(struct altmode *altmode)
+{
+	struct altmode *partner = altmode->partner;
+	struct typec_altmode *adev;
+
+	if (!partner)
+		return;
+
+	adev = &partner->adev;
+
+	if (is_typec_plug(adev->dev.parent)) {
+		struct typec_plug *plug = to_typec_plug(adev->dev.parent);
+
+		partner->plug[plug->index] = NULL;
+	} else {
+		partner->partner = NULL;
+	}
+	put_device(&adev->dev);
+}
+
+static int __typec_port_match(struct device *dev, const void *name)
+{
+	return !strcmp((const char *)name, dev_name(dev));
+}
+
+static void *typec_port_match(struct device_connection *con, int ep, void *data)
+{
+	return class_find_device(typec_class, NULL, con->endpoint[ep],
+				 __typec_port_match);
+}
+
+struct typec_altmode *
+typec_altmode_register_notifier(struct device *dev, u16 svid, u8 mode,
+				struct notifier_block *nb)
+{
+	struct typec_device_id id = { svid, mode, };
+	struct device *altmode_dev;
+	struct device *port_dev;
+	struct altmode *altmode;
+	int ret;
+
+	/* Find the port linked to the caller */
+	port_dev = device_connection_find_match(dev, NULL, NULL,
+						typec_port_match);
+	if (IS_ERR_OR_NULL(port_dev))
+		return port_dev ? ERR_CAST(port_dev) : ERR_PTR(-ENODEV);
+
+	/* Find the altmode with matching svid */
+	altmode_dev = device_find_child(port_dev, &id, altmode_match);
+
+	put_device(port_dev);
+
+	if (!altmode_dev)
+		return ERR_PTR(-ENODEV);
+
+	altmode = to_altmode(to_typec_altmode(altmode_dev));
+
+	/* Register notifier */
+	ret = blocking_notifier_chain_register(&altmode->nh, nb);
+	if (ret) {
+		put_device(altmode_dev);
+		return ERR_PTR(ret);
+	}
+
+	return &altmode->adev;
+}
+EXPORT_SYMBOL_GPL(typec_altmode_register_notifier);
+
+void typec_altmode_unregister_notifier(struct typec_altmode *adev,
+				       struct notifier_block *nb)
+{
+	struct altmode *altmode = to_altmode(adev);
+
+	blocking_notifier_chain_unregister(&altmode->nh, nb);
+	put_device(&adev->dev);
+}
+EXPORT_SYMBOL_GPL(typec_altmode_unregister_notifier);
+
 /**
  * typec_altmode_update_active - Report Enter/Exit mode
- * @alt: Handle to the alternate mode
+ * @adev: Handle to the alternate mode
  * @active: True when the mode has been entered
  *
  * If a partner or cable plug executes Enter/Exit Mode command successfully, the
  * drivers use this routine to report the updated state of the mode.
  */
-void typec_altmode_update_active(struct typec_altmode *alt, bool active)
+void typec_altmode_update_active(struct typec_altmode *adev, bool active)
 {
 	char dir[6];
 
-	if (alt->active == active)
+	if (adev->active == active)
 		return;
 
-	alt->active = active;
-	snprintf(dir, sizeof(dir), "mode%d", alt->mode);
-	sysfs_notify(&alt->dev.kobj, dir, "active");
-	kobject_uevent(&alt->dev.kobj, KOBJ_CHANGE);
+	if (!is_typec_port(adev->dev.parent)) {
+		if (!active)
+			module_put(adev->dev.driver->owner);
+		else
+			WARN_ON(!try_module_get(adev->dev.driver->owner));
+	}
+
+	adev->active = active;
+	snprintf(dir, sizeof(dir), "mode%d", adev->mode);
+	sysfs_notify(&adev->dev.kobj, dir, "active");
+	sysfs_notify(&adev->dev.kobj, NULL, "active");
+	kobject_uevent(&adev->dev.kobj, KOBJ_CHANGE);
 }
 EXPORT_SYMBOL_GPL(typec_altmode_update_active);
 
@@ -208,7 +315,7 @@ EXPORT_SYMBOL_GPL(typec_altmode2port);
 static ssize_t
 vdo_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *alt = to_typec_altmode(dev);
 
 	return sprintf(buf, "0x%08x\n", alt->vdo);
 }
@@ -217,7 +324,7 @@ static DEVICE_ATTR_RO(vdo);
 static ssize_t
 description_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *alt = to_typec_altmode(dev);
 
 	return sprintf(buf, "%s\n", alt->desc ? alt->desc : "");
 }
@@ -226,7 +333,7 @@ static DEVICE_ATTR_RO(description);
 static ssize_t
 active_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *alt = to_typec_altmode(dev);
 
 	return sprintf(buf, "%s\n", alt->active ? "yes" : "no");
 }
@@ -234,21 +341,37 @@ active_show(struct device *dev, struct device_attribute *attr, char *buf)
 static ssize_t active_store(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t size)
 {
-	struct typec_altmode *alt = to_altmode(dev);
-	struct typec_port *port = typec_altmode2port(alt);
-	bool activate;
+	struct typec_altmode *adev = to_typec_altmode(dev);
+	struct altmode *altmode = to_altmode(adev);
+	bool enter;
 	int ret;
 
-	if (!port->cap->activate_mode)
-		return -EOPNOTSUPP;
-
-	ret = kstrtobool(buf, &activate);
+	ret = kstrtobool(buf, &enter);
 	if (ret)
 		return ret;
 
-	ret = port->cap->activate_mode(port->cap, alt->mode, activate);
-	if (ret)
-		return ret;
+	if (adev->active == enter)
+		return size;
+
+	if (is_typec_port(adev->dev.parent)) {
+		typec_altmode_update_active(adev, enter);
+
+		/* Make sure that the partner exits the mode before disabling */
+		if (altmode->partner && !enter && altmode->partner->adev.active)
+			typec_altmode_exit(&altmode->partner->adev);
+	} else if (altmode->partner) {
+		if (enter && !altmode->partner->adev.active) {
+			dev_warn(dev, "port has the mode disabled\n");
+			return -EPERM;
+		}
+	}
+
+	/* Note: If there is no driver, the mode will not be entered */
+	if (adev->ops && adev->ops->activate) {
+		ret = adev->ops->activate(adev, enter);
+		if (ret)
+			return ret;
+	}
 
 	return size;
 }
@@ -258,7 +381,7 @@ static ssize_t
 supported_roles_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct altmode *alt = to_altmode(to_typec_altmode(dev));
 	ssize_t ret;
 
 	switch (alt->roles) {
@@ -277,29 +400,72 @@ supported_roles_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(supported_roles);
 
-static void typec_altmode_release(struct device *dev)
+static ssize_t
+mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *adev = to_typec_altmode(dev);
 
-	kfree(alt);
+	return sprintf(buf, "%u\n", adev->mode);
 }
+static DEVICE_ATTR_RO(mode);
 
-static ssize_t svid_show(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+static ssize_t
+svid_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	struct typec_altmode *alt = to_altmode(dev);
+	struct typec_altmode *adev = to_typec_altmode(dev);
 
-	return sprintf(buf, "%04x\n", alt->svid);
+	return sprintf(buf, "%04x\n", adev->svid);
 }
 static DEVICE_ATTR_RO(svid);
 
 static struct attribute *typec_altmode_attrs[] = {
+	&dev_attr_active.attr,
+	&dev_attr_mode.attr,
 	&dev_attr_svid.attr,
+	&dev_attr_vdo.attr,
 	NULL
 };
 ATTRIBUTE_GROUPS(typec_altmode);
 
-static const struct device_type typec_altmode_dev_type = {
+static int altmode_id_get(struct device *dev)
+{
+	struct ida *ids;
+
+	if (is_typec_partner(dev))
+		ids = &to_typec_partner(dev)->mode_ids;
+	else if (is_typec_plug(dev))
+		ids = &to_typec_plug(dev)->mode_ids;
+	else
+		ids = &to_typec_port(dev)->mode_ids;
+
+	return ida_simple_get(ids, 0, 0, GFP_KERNEL);
+}
+
+static void altmode_id_remove(struct device *dev, int id)
+{
+	struct ida *ids;
+
+	if (is_typec_partner(dev))
+		ids = &to_typec_partner(dev)->mode_ids;
+	else if (is_typec_plug(dev))
+		ids = &to_typec_plug(dev)->mode_ids;
+	else
+		ids = &to_typec_port(dev)->mode_ids;
+
+	ida_simple_remove(ids, id);
+}
+
+static void typec_altmode_release(struct device *dev)
+{
+	struct altmode *alt = to_altmode(to_typec_altmode(dev));
+
+	typec_altmode_put_partner(alt);
+
+	altmode_id_remove(alt->adev.dev.parent, alt->id);
+	kfree(alt);
+}
+
+const struct device_type typec_altmode_dev_type = {
 	.name = "typec_alternate_mode",
 	.groups = typec_altmode_groups,
 	.release = typec_altmode_release,
@@ -309,58 +475,74 @@ static struct typec_altmode *
 typec_register_altmode(struct device *parent,
 		       const struct typec_altmode_desc *desc)
 {
-	struct typec_altmode *alt;
+	unsigned int id = altmode_id_get(parent);
+	bool is_port = is_typec_port(parent);
+	struct altmode *alt;
 	int ret;
 
 	alt = kzalloc(sizeof(*alt), GFP_KERNEL);
 	if (!alt)
 		return ERR_PTR(-ENOMEM);
 
-	alt->svid = desc->svid;
-	alt->mode = desc->mode;
-	alt->vdo = desc->vdo;
+	alt->adev.svid = desc->svid;
+	alt->adev.mode = desc->mode;
+	alt->adev.vdo = desc->vdo;
 	alt->roles = desc->roles;
+	alt->id = id;
 
 	alt->attrs[0] = &dev_attr_vdo.attr;
 	alt->attrs[1] = &dev_attr_description.attr;
 	alt->attrs[2] = &dev_attr_active.attr;
 
-	if (is_typec_port(parent))
+	if (is_port) {
 		alt->attrs[3] = &dev_attr_supported_roles.attr;
+		alt->adev.active = true; /* Enabled by default */
+	}
 
 	sprintf(alt->group_name, "mode%d", desc->mode);
 	alt->group.name = alt->group_name;
 	alt->group.attrs = alt->attrs;
 	alt->groups[0] = &alt->group;
 
-	alt->dev.parent = parent;
-	alt->dev.groups = alt->groups;
-	alt->dev.type = &typec_altmode_dev_type;
-	dev_set_name(&alt->dev, "%s-%04x:%u", dev_name(parent),
-		     alt->svid, alt->mode);
+	alt->adev.dev.parent = parent;
+	alt->adev.dev.groups = alt->groups;
+	alt->adev.dev.type = &typec_altmode_dev_type;
+	dev_set_name(&alt->adev.dev, "%s.%u", dev_name(parent), id);
+
+	/* Link partners and plugs with the ports */
+	if (is_port)
+		BLOCKING_INIT_NOTIFIER_HEAD(&alt->nh);
+	else
+		typec_altmode_set_partner(alt);
+
+	/* The partners are bind to drivers */
+	if (is_typec_partner(parent))
+		alt->adev.dev.bus = &typec_bus;
 
-	ret = device_register(&alt->dev);
+	ret = device_register(&alt->adev.dev);
 	if (ret) {
 		dev_err(parent, "failed to register alternate mode (%d)\n",
 			ret);
-		put_device(&alt->dev);
+		put_device(&alt->adev.dev);
 		return ERR_PTR(ret);
 	}
 
-	return alt;
+	return &alt->adev;
 }
 
 /**
  * typec_unregister_altmode - Unregister Alternate Mode
- * @alt: The alternate mode to be unregistered
+ * @adev: The alternate mode to be unregistered
  *
  * Unregister device created with typec_partner_register_altmode(),
  * typec_plug_register_altmode() or typec_port_register_altmode().
  */
-void typec_unregister_altmode(struct typec_altmode *alt)
+void typec_unregister_altmode(struct typec_altmode *adev)
 {
-	if (!IS_ERR_OR_NULL(alt))
-		device_unregister(&alt->dev);
+	if (IS_ERR_OR_NULL(adev))
+		return;
+	typec_mux_put(to_altmode(adev)->mux);
+	device_unregister(&adev->dev);
 }
 EXPORT_SYMBOL_GPL(typec_unregister_altmode);
 
@@ -398,6 +580,7 @@ static void typec_partner_release(struct device *dev)
 {
 	struct typec_partner *partner = to_typec_partner(dev);
 
+	ida_destroy(&partner->mode_ids);
 	kfree(partner);
 }
 
@@ -463,6 +646,7 @@ struct typec_partner *typec_register_partner(struct typec_port *port,
 	if (!partner)
 		return ERR_PTR(-ENOMEM);
 
+	ida_init(&partner->mode_ids);
 	partner->usb_pd = desc->usb_pd;
 	partner->accessory = desc->accessory;
 
@@ -511,6 +695,7 @@ static void typec_plug_release(struct device *dev)
 {
 	struct typec_plug *plug = to_typec_plug(dev);
 
+	ida_destroy(&plug->mode_ids);
 	kfree(plug);
 }
 
@@ -563,6 +748,7 @@ struct typec_plug *typec_register_plug(struct typec_cable *cable,
 
 	sprintf(name, "plug%d", desc->index);
 
+	ida_init(&plug->mode_ids);
 	plug->index = desc->index;
 	plug->dev.class = typec_class;
 	plug->dev.parent = &cable->dev;
@@ -1083,12 +1269,13 @@ static void typec_release(struct device *dev)
 	struct typec_port *port = to_typec_port(dev);
 
 	ida_simple_remove(&typec_index_ida, port->id);
+	ida_destroy(&port->mode_ids);
 	typec_switch_put(port->sw);
 	typec_mux_put(port->mux);
 	kfree(port);
 }
 
-static const struct device_type typec_port_dev_type = {
+const struct device_type typec_port_dev_type = {
 	.name = "typec_port",
 	.groups = typec_groups,
 	.uevent = typec_uevent,
@@ -1279,11 +1466,11 @@ EXPORT_SYMBOL_GPL(typec_get_orientation);
 
 /**
  * typec_set_mode - Set mode of operation for USB Type-C connector
- * @port: USB Type-C port for the connector
- * @mode: Operation mode for the connector
+ * @port: USB Type-C connector
+ * @mode: Accessory Mode, USB Operation or Safe State
  *
- * Set mode @mode for @port. This function will configure the muxes needed to
- * enter @mode.
+ * Configure @port for Accessory Mode @mode. This function will configure the
+ * muxes needed for @mode.
  */
 int typec_set_mode(struct typec_port *port, int mode)
 {
@@ -1297,6 +1484,7 @@ EXPORT_SYMBOL_GPL(typec_set_mode);
  * typec_port_register_altmode - Register USB Type-C Port Alternate Mode
  * @port: USB Type-C Port that supports the alternate mode
  * @desc: Description of the alternate mode
+ * @drvdata: Private pointer to driver specific info
  *
  * This routine is used to register an alternate mode that @port is capable of
  * supporting.
@@ -1307,7 +1495,23 @@ struct typec_altmode *
 typec_port_register_altmode(struct typec_port *port,
 			    const struct typec_altmode_desc *desc)
 {
-	return typec_register_altmode(&port->dev, desc);
+	struct typec_altmode *adev;
+	struct typec_mux *mux;
+	char id[10];
+
+	sprintf(id, "id%04xm%02x", desc->svid, desc->mode);
+
+	mux = typec_mux_get(port->dev.parent, id);
+	if (IS_ERR(mux))
+		return ERR_CAST(mux);
+
+	adev = typec_register_altmode(&port->dev, desc);
+	if (IS_ERR(adev))
+		typec_mux_put(mux);
+	else
+		to_altmode(adev)->mux = mux;
+
+	return adev;
 }
 EXPORT_SYMBOL_GPL(typec_port_register_altmode);
 
@@ -1381,10 +1585,12 @@ struct typec_port *typec_register_port(struct device *parent,
 		break;
 	}
 
+	ida_init(&port->mode_ids);
+	mutex_init(&port->port_type_lock);
+
 	port->id = id;
 	port->cap = cap;
 	port->port_type = cap->type;
-	mutex_init(&port->port_type_lock);
 	port->prefer_role = cap->prefer_role;
 
 	port->dev.class = typec_class;
@@ -1428,8 +1634,19 @@ EXPORT_SYMBOL_GPL(typec_unregister_port);
 
 static int __init typec_init(void)
 {
+	int ret;
+
+	ret = bus_register(&typec_bus);
+	if (ret)
+		return ret;
+
 	typec_class = class_create(THIS_MODULE, "typec");
-	return PTR_ERR_OR_ZERO(typec_class);
+	if (IS_ERR(typec_class)) {
+		bus_unregister(&typec_bus);
+		return PTR_ERR(typec_class);
+	}
+
+	return 0;
 }
 subsys_initcall(typec_init);
 
@@ -1437,6 +1654,7 @@ static void __exit typec_exit(void)
 {
 	class_destroy(typec_class);
 	ida_destroy(&typec_index_ida);
+	bus_unregister(&typec_bus);
 }
 module_exit(typec_exit);
 
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 96a71a648eed..1298a7daa57d 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -746,4 +746,19 @@ struct tb_service_id {
 #define TBSVC_MATCH_PROTOCOL_VERSION	0x0004
 #define TBSVC_MATCH_PROTOCOL_REVISION	0x0008
 
+/* USB Type-C Alternate Modes */
+
+#define TYPEC_ANY_MODE	0x7
+
+/**
+ * struct typec_device_id - USB Type-C alternate mode identifiers
+ * @svid: Standard or Vendor ID
+ * @mode: Mode index
+ */
+struct typec_device_id {
+	__u16 svid;
+	__u8 mode;
+	kernel_ulong_t driver_data;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 2dcb1683075f..7df4ecabc78a 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -5,21 +5,18 @@
 
 #include <linux/types.h>
 
-/* XXX: Once we have a header for USB Power Delivery, this belongs there */
-#define ALTMODE_MAX_MODES	6
-
 /* USB Type-C Specification releases */
 #define USB_TYPEC_REV_1_0	0x100 /* 1.0 */
 #define USB_TYPEC_REV_1_1	0x110 /* 1.1 */
 #define USB_TYPEC_REV_1_2	0x120 /* 1.2 */
 
-struct typec_altmode;
 struct typec_partner;
 struct typec_cable;
 struct typec_plug;
 struct typec_port;
 
 struct fwnode_handle;
+struct device;
 
 enum typec_port_type {
 	TYPEC_PORT_SRC,
@@ -107,7 +104,7 @@ struct typec_altmode_desc {
 	u8			mode;
 	u32			vdo;
 	/* Only used with ports */
-	enum typec_port_type	roles;
+	enum typec_port_data	roles;
 };
 
 struct typec_altmode
@@ -186,7 +183,6 @@ struct typec_partner_desc {
  * @dr_set: Set Data Role
  * @pr_set: Set Power Role
  * @vconn_set: Set VCONN Role
- * @activate_mode: Enter/exit given Alternate Mode
  * @port_type_set: Set port type
  *
  * Static capabilities of a single USB Type-C port.
@@ -212,12 +208,8 @@ struct typec_capability {
 				  enum typec_role);
 	int		(*vconn_set)(const struct typec_capability *,
 				     enum typec_role);
-
-	int		(*activate_mode)(const struct typec_capability *,
-					 int mode, int activate);
 	int		(*port_type_set)(const struct typec_capability *,
-					enum typec_port_type);
-
+					 enum typec_port_type);
 };
 
 /* Specific to try_role(). Indicates the user want's to clear the preference. */
diff --git a/include/linux/usb/typec_altmode.h b/include/linux/usb/typec_altmode.h
new file mode 100644
index 000000000000..9a88c74a1d0d
--- /dev/null
+++ b/include/linux/usb/typec_altmode.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __USB_TYPEC_ALTMODE_H
+#define __USB_TYPEC_ALTMODE_H
+
+#include <linux/mod_devicetable.h>
+#include <linux/usb/typec.h>
+#include <linux/device.h>
+
+#define MODE_DISCOVERY_MAX	6
+
+struct typec_altmode_ops;
+
+/**
+ * struct typec_altmode - USB Type-C alternate mode device
+ * @dev: Driver model's view of this device
+ * @svid: Standard or Vendor ID (SVID) of the alternate mode
+ * @mode: Index of the Mode
+ * @vdo: VDO returned by Discover Modes USB PD command
+ * @active: Tells has the mode been entered or not
+ * @desc: Optional human readable description of the mode
+ * @ops: Operations vector from the driver
+ */
+struct typec_altmode {
+	struct device			dev;
+	u16				svid;
+	int				mode;
+	u32				vdo;
+	unsigned int			active:1;
+
+	char				*desc;
+	const struct typec_altmode_ops	*ops;
+};
+
+#define to_typec_altmode(d) container_of(d, struct typec_altmode, dev)
+
+static inline void typec_altmode_set_drvdata(struct typec_altmode *altmode,
+					     void *data)
+{
+	dev_set_drvdata(&altmode->dev, data);
+}
+
+static inline void *typec_altmode_get_drvdata(struct typec_altmode *altmode)
+{
+	return dev_get_drvdata(&altmode->dev);
+}
+
+/**
+ * struct typec_altmode_ops - Alternate mode specific operations vector
+ * @enter: Operations to be executed with Enter Mode Command
+ * @exit: Operations to be executed with Exit Mode Command
+ * @attention: Callback for Attention Command
+ * @vdm: Callback for SVID specific commands
+ * @notify: Communication channel for platform and the alternate mode
+ * @activate: User callback for Enter/Exit Mode
+ */
+struct typec_altmode_ops {
+	int (*enter)(struct typec_altmode *altmode);
+	int (*exit)(struct typec_altmode *altmode);
+	void (*attention)(struct typec_altmode *altmode, u32 vdo);
+	int (*vdm)(struct typec_altmode *altmode, const u32 hdr,
+		   const u32 *vdo, int cnt);
+	int (*notify)(struct typec_altmode *altmode, unsigned long conf,
+		      void *data);
+	int (*activate)(struct typec_altmode *altmode, int activate);
+};
+
+int typec_altmode_enter(struct typec_altmode *altmode);
+int typec_altmode_exit(struct typec_altmode *altmode);
+void typec_altmode_attention(struct typec_altmode *altmode, u32 vdo);
+int typec_altmode_vdm(struct typec_altmode *altmode,
+		      const u32 header, const u32 *vdo, int count);
+int typec_altmode_notify(struct typec_altmode *altmode, unsigned long conf,
+			 void *data);
+const struct typec_altmode *
+typec_altmode_get_partner(struct typec_altmode *altmode);
+
+/*
+ * These are the connector states (USB, Safe and Alt Mode) defined in USB Type-C
+ * Specification. SVID specific connector states are expected to follow and
+ * start from the value TYPEC_STATE_MODAL.
+ */
+enum {
+	TYPEC_STATE_SAFE,	/* USB Safe State */
+	TYPEC_STATE_USB,	/* USB Operation */
+	TYPEC_STATE_MODAL,	/* Alternate Modes */
+};
+
+/*
+ * For the muxes there is no difference between Accessory Modes and Alternate
+ * Modes, so the Accessory Modes are supplied with specific modal state values
+ * here. Unlike with Alternate Modes, where the mux will be linked with the
+ * alternate mode device, the mux for Accessory Modes will be linked with the
+ * port device instead.
+ *
+ * Port drivers can use TYPEC_MODE_AUDIO and TYPEC_MODE_DEBUG as the mode
+ * value for typec_set_mode() when accessory modes are supported.
+ */
+enum {
+	TYPEC_MODE_AUDIO = TYPEC_STATE_MODAL,	/* Audio Accessory */
+	TYPEC_MODE_DEBUG,			/* Debug Accessory */
+};
+
+#define TYPEC_MODAL_STATE(_state_)	((_state_) + TYPEC_STATE_MODAL)
+
+struct typec_altmode *typec_altmode_get_plug(struct typec_altmode *altmode,
+					     enum typec_plug_index index);
+void typec_altmode_put_plug(struct typec_altmode *plug);
+
+struct typec_altmode *typec_match_altmode(struct typec_altmode **altmodes,
+					  size_t n, u16 svid, u8 mode);
+
+struct typec_altmode *
+typec_altmode_register_notifier(struct device *dev, u16 svid, u8 mode,
+				struct notifier_block *nb);
+
+void typec_altmode_unregister_notifier(struct typec_altmode *adev,
+				       struct notifier_block *nb);
+
+/**
+ * typec_altmode_get_orientation - Get cable plug orientation
+ * altmode: Handle to the alternate mode
+ */
+static inline enum typec_orientation
+typec_altmode_get_orientation(struct typec_altmode *altmode)
+{
+	return typec_get_orientation(typec_altmode2port(altmode));
+}
+
+/**
+ * struct typec_altmode_driver - USB Type-C alternate mode device driver
+ * @id_table: Null terminated array of SVIDs
+ * @probe: Callback for device binding
+ * @remove: Callback for device unbinding
+ * @driver: Device driver model driver
+ *
+ * These drivers will be bind to the partner alternate mode devices. They will
+ * handle all SVID specific communication.
+ */
+struct typec_altmode_driver {
+	const struct typec_device_id *id_table;
+	int (*probe)(struct typec_altmode *altmode);
+	void (*remove)(struct typec_altmode *altmode);
+	struct device_driver driver;
+};
+
+#define to_altmode_driver(d) container_of(d, struct typec_altmode_driver, \
+					  driver)
+
+#define typec_altmode_register_driver(drv) \
+		__typec_altmode_register_driver(drv, THIS_MODULE)
+int __typec_altmode_register_driver(struct typec_altmode_driver *drv,
+				    struct module *module);
+void typec_altmode_unregister_driver(struct typec_altmode_driver *drv);
+
+#define module_typec_altmode_driver(__typec_altmode_driver) \
+	module_driver(__typec_altmode_driver, typec_altmode_register_driver, \
+		      typec_altmode_unregister_driver)
+
+#endif /* __USB_TYPEC_ALTMODE_H */
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 6667f7b491d6..293004499b4d 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -221,5 +221,9 @@ int main(void)
 	DEVID_FIELD(tb_service_id, protocol_version);
 	DEVID_FIELD(tb_service_id, protocol_revision);
 
+	DEVID(typec_device_id);
+	DEVID_FIELD(typec_device_id, svid);
+	DEVID_FIELD(typec_device_id, mode);
+
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 52fd54a8fe39..7be43697ff84 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1352,6 +1352,19 @@ static int do_tbsvc_entry(const char *filename, void *symval, char *alias)
 }
 ADD_TO_DEVTABLE("tbsvc", tb_service_id, do_tbsvc_entry);
 
+/* Looks like: typec:idNmN */
+static int do_typec_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, typec_device_id, svid);
+	DEF_FIELD(symval, typec_device_id, mode);
+
+	sprintf(alias, "typec:id%04X", svid);
+	ADD(alias, "m", mode != TYPEC_ANY_MODE, mode);
+
+	return 1;
+}
+ADD_TO_DEVTABLE("typec", typec_device_id, do_typec_entry);
+
 /* Does namelen bytes of name exactly match the symbol? */
 static bool sym_is(const char *name, unsigned namelen, const char *symbol)
 {
-- 
cgit v1.2.3


From 0e3bb7d6894d9b6e67d6382bb03a46a1dc989588 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:51 +0300
Subject: usb: typec: Add driver for DisplayPort alternate mode

DisplayPort USB Type-C Alt Mode allows DisplayPort displays
and adapters to be attached to the USB Type-C ports on the
system.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-driver-typec-displayport     |  49 ++
 drivers/usb/typec/Kconfig                          |   2 +
 drivers/usb/typec/Makefile                         |   1 +
 drivers/usb/typec/altmodes/Kconfig                 |  14 +
 drivers/usb/typec/altmodes/Makefile                |   2 +
 drivers/usb/typec/altmodes/displayport.c           | 578 +++++++++++++++++++++
 include/linux/usb/typec_dp.h                       |  95 ++++
 7 files changed, 741 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-driver-typec-displayport
 create mode 100644 drivers/usb/typec/altmodes/Kconfig
 create mode 100644 drivers/usb/typec/altmodes/Makefile
 create mode 100644 drivers/usb/typec/altmodes/displayport.c
 create mode 100644 include/linux/usb/typec_dp.h

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-driver-typec-displayport b/Documentation/ABI/testing/sysfs-driver-typec-displayport
new file mode 100644
index 000000000000..231471ad0d4b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-typec-displayport
@@ -0,0 +1,49 @@
+What:		/sys/bus/typec/devices/.../displayport/configuration
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		Shows the current DisplayPort configuration for the connector.
+		Valid values are USB, source and sink. Source means DisplayPort
+		source, and sink means DisplayPort sink.
+
+		All supported configurations are listed as space separated list
+		with the active one wrapped in square brackets.
+
+		Source example:
+
+			USB [source] sink
+
+		The configuration can be changed by writing to the file
+
+		Note. USB configuration does not equal to Exit Mode. It is
+		separate configuration defined in VESA DisplayPort Alt Mode on
+		USB Type-C Standard. Functionally it equals to the situation
+		where the mode has been exited (to exit the mode, see
+		Documentation/ABI/testing/sysfs-bus-typec, and use file
+		/sys/bus/typec/devices/.../active).
+
+What:		/sys/bus/typec/devices/.../displayport/pin_assignment
+Date:		July 2018
+Contact:	Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Description:
+		VESA DisplayPort Alt Mode on USB Type-C Standard defines six
+		different pin assignments for USB Type-C connector that are
+		labeled A, B, C, D, E, and F. The supported pin assignments are
+		listed as space separated list with the active one wrapped in
+		square brackets.
+
+		Example:
+
+			C [D]
+
+		Pin assignment can be changed by writing to the file. It is
+		possible to set pin assignment before configuration has been
+		set, but the assignment will not be active before the
+		connector is actually configured.
+
+		Note. As of VESA DisplayPort Alt Mode on USB Type-C Standard
+		version 1.0b, pin assignments A, B, and F are deprecated. Only
+		pin assignment D can now carry simultaneously one channel of
+		USB SuperSpeed protocol. From user perspective pin assignments C
+		and E are equal, where all channels on the connector are used
+		for carrying DisplayPort protocol (allowing higher resolutions).
diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig
index ee808903983f..00878c386dd0 100644
--- a/drivers/usb/typec/Kconfig
+++ b/drivers/usb/typec/Kconfig
@@ -104,4 +104,6 @@ config TYPEC_TPS6598X
 
 source "drivers/usb/typec/mux/Kconfig"
 
+source "drivers/usb/typec/altmodes/Kconfig"
+
 endif # TYPEC
diff --git a/drivers/usb/typec/Makefile b/drivers/usb/typec/Makefile
index 335ee06748fc..45b0aef428a8 100644
--- a/drivers/usb/typec/Makefile
+++ b/drivers/usb/typec/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_TYPEC)		+= typec.o
 typec-y				:= class.o mux.o bus.o
+obj-$(CONFIG_TYPEC)		+= altmodes/
 obj-$(CONFIG_TYPEC_TCPM)	+= tcpm.o
 obj-y				+= fusb302/
 obj-$(CONFIG_TYPEC_WCOVE)	+= typec_wcove.o
diff --git a/drivers/usb/typec/altmodes/Kconfig b/drivers/usb/typec/altmodes/Kconfig
new file mode 100644
index 000000000000..efef2a64bc51
--- /dev/null
+++ b/drivers/usb/typec/altmodes/Kconfig
@@ -0,0 +1,14 @@
+
+menu "USB Type-C Alternate Mode drivers"
+
+config TYPEC_DP_ALTMODE
+	tristate "DisplayPort Alternate Mode driver"
+	help
+	  DisplayPort USB Type-C Alternate Mode allows DisplayPort
+	  displays and adapters to be attached to the USB Type-C
+	  connectors on the system.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called typec_displayport.
+
+endmenu
diff --git a/drivers/usb/typec/altmodes/Makefile b/drivers/usb/typec/altmodes/Makefile
new file mode 100644
index 000000000000..5caf094ef71a
--- /dev/null
+++ b/drivers/usb/typec/altmodes/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_TYPEC_DP_ALTMODE)		+= typec_displayport.o
+typec_displayport-y			:= displayport.o
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
new file mode 100644
index 000000000000..ef12b15bd484
--- /dev/null
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * USB Typec-C DisplayPort Alternate Mode driver
+ *
+ * Copyright (C) 2018 Intel Corporation
+ * Author: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+ *
+ * DisplayPort is trademark of VESA (www.vesa.org)
+ */
+
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/usb/pd_vdo.h>
+#include <linux/usb/typec_dp.h>
+
+#define DP_HEADER(cmd)			(VDO(USB_TYPEC_DP_SID, 1, cmd) | \
+					 VDO_OPOS(USB_TYPEC_DP_MODE))
+
+enum {
+	DP_CONF_USB,
+	DP_CONF_DFP_D,
+	DP_CONF_UFP_D,
+	DP_CONF_DUAL_D,
+};
+
+/* Helper for setting/getting the pin assignement value to the configuration */
+#define DP_CONF_SET_PIN_ASSIGN(_a_)	((_a_) << 8)
+#define DP_CONF_GET_PIN_ASSIGN(_conf_)	(((_conf_) & GENMASK(15, 8)) >> 8)
+
+/* Pin assignments that use USB3.1 Gen2 signaling to carry DP protocol */
+#define DP_PIN_ASSIGN_GEN2_BR_MASK	(BIT(DP_PIN_ASSIGN_A) | \
+					 BIT(DP_PIN_ASSIGN_B))
+
+/* Pin assignments that use DP v1.3 signaling to carry DP protocol */
+#define DP_PIN_ASSIGN_DP_BR_MASK	(BIT(DP_PIN_ASSIGN_C) | \
+					 BIT(DP_PIN_ASSIGN_D) | \
+					 BIT(DP_PIN_ASSIGN_E) | \
+					 BIT(DP_PIN_ASSIGN_F))
+
+/* DP only pin assignments */
+#define DP_PIN_ASSIGN_DP_ONLY_MASK	(BIT(DP_PIN_ASSIGN_A) | \
+					 BIT(DP_PIN_ASSIGN_C) | \
+					 BIT(DP_PIN_ASSIGN_E))
+
+/* Pin assignments where one channel is for USB */
+#define DP_PIN_ASSIGN_MULTI_FUNC_MASK	(BIT(DP_PIN_ASSIGN_B) | \
+					 BIT(DP_PIN_ASSIGN_D) | \
+					 BIT(DP_PIN_ASSIGN_F))
+
+enum dp_state {
+	DP_STATE_IDLE,
+	DP_STATE_ENTER,
+	DP_STATE_UPDATE,
+	DP_STATE_CONFIGURE,
+	DP_STATE_EXIT,
+};
+
+struct dp_altmode {
+	struct typec_displayport_data data;
+
+	enum dp_state state;
+
+	struct mutex lock; /* device lock */
+	struct work_struct work;
+	struct typec_altmode *alt;
+	const struct typec_altmode *port;
+};
+
+static int dp_altmode_notify(struct dp_altmode *dp)
+{
+	u8 state = get_count_order(DP_CONF_GET_PIN_ASSIGN(dp->data.conf));
+
+	return typec_altmode_notify(dp->alt, TYPEC_MODAL_STATE(state),
+				   &dp->data);
+}
+
+static int dp_altmode_configure(struct dp_altmode *dp, u8 con)
+{
+	u32 conf = DP_CONF_SIGNALING_DP; /* Only DP signaling supported */
+	u8 pin_assign = 0;
+
+	switch (con) {
+	case DP_STATUS_CON_DISABLED:
+		return 0;
+	case DP_STATUS_CON_DFP_D:
+		conf |= DP_CONF_UFP_U_AS_DFP_D;
+		pin_assign = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo) &
+			     DP_CAP_DFP_D_PIN_ASSIGN(dp->port->vdo);
+		break;
+	case DP_STATUS_CON_UFP_D:
+	case DP_STATUS_CON_BOTH: /* NOTE: First acting as DP source */
+		conf |= DP_CONF_UFP_U_AS_UFP_D;
+		pin_assign = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo) &
+			     DP_CAP_UFP_D_PIN_ASSIGN(dp->port->vdo);
+		break;
+	default:
+		break;
+	}
+
+	/* Determining the initial pin assignment. */
+	if (!DP_CONF_GET_PIN_ASSIGN(dp->data.conf)) {
+		/* Is USB together with DP preferred */
+		if (dp->data.status & DP_STATUS_PREFER_MULTI_FUNC &&
+		    pin_assign & DP_PIN_ASSIGN_MULTI_FUNC_MASK)
+			pin_assign &= DP_PIN_ASSIGN_MULTI_FUNC_MASK;
+		else
+			pin_assign &= DP_PIN_ASSIGN_DP_ONLY_MASK;
+
+		if (!pin_assign)
+			return -EINVAL;
+
+		conf |= DP_CONF_SET_PIN_ASSIGN(pin_assign);
+	}
+
+	dp->data.conf = conf;
+
+	return 0;
+}
+
+static int dp_altmode_status_update(struct dp_altmode *dp)
+{
+	bool configured = !!DP_CONF_GET_PIN_ASSIGN(dp->data.conf);
+	u8 con = DP_STATUS_CONNECTION(dp->data.status);
+	int ret = 0;
+
+	if (configured && (dp->data.status & DP_STATUS_SWITCH_TO_USB)) {
+		dp->data.conf = 0;
+		dp->state = DP_STATE_CONFIGURE;
+	} else if (dp->data.status & DP_STATUS_EXIT_DP_MODE) {
+		dp->state = DP_STATE_EXIT;
+	} else if (!(con & DP_CONF_CURRENTLY(dp->data.conf))) {
+		ret = dp_altmode_configure(dp, con);
+		if (!ret)
+			dp->state = DP_STATE_CONFIGURE;
+	}
+
+	return ret;
+}
+
+static int dp_altmode_configured(struct dp_altmode *dp)
+{
+	int ret;
+
+	sysfs_notify(&dp->alt->dev.kobj, "displayport", "configuration");
+
+	if (!dp->data.conf)
+		return typec_altmode_notify(dp->alt, TYPEC_STATE_USB,
+					    &dp->data);
+
+	ret = dp_altmode_notify(dp);
+	if (ret)
+		return ret;
+
+	sysfs_notify(&dp->alt->dev.kobj, "displayport", "pin_assignment");
+
+	return 0;
+}
+
+static int dp_altmode_configure_vdm(struct dp_altmode *dp, u32 conf)
+{
+	u32 header = DP_HEADER(DP_CMD_CONFIGURE);
+	int ret;
+
+	ret = typec_altmode_notify(dp->alt, TYPEC_STATE_SAFE, &dp->data);
+	if (ret) {
+		dev_err(&dp->alt->dev,
+			"unable to put to connector to safe mode\n");
+		return ret;
+	}
+
+	ret = typec_altmode_vdm(dp->alt, header, &conf, 2);
+	if (ret) {
+		if (DP_CONF_GET_PIN_ASSIGN(dp->data.conf))
+			dp_altmode_notify(dp);
+		else
+			typec_altmode_notify(dp->alt, TYPEC_STATE_USB,
+					     &dp->data);
+	}
+
+	return ret;
+}
+
+static void dp_altmode_work(struct work_struct *work)
+{
+	struct dp_altmode *dp = container_of(work, struct dp_altmode, work);
+	u32 header;
+	u32 vdo;
+	int ret;
+
+	mutex_lock(&dp->lock);
+
+	switch (dp->state) {
+	case DP_STATE_ENTER:
+		ret = typec_altmode_enter(dp->alt);
+		if (ret)
+			dev_err(&dp->alt->dev, "failed to enter mode\n");
+		break;
+	case DP_STATE_UPDATE:
+		header = DP_HEADER(DP_CMD_STATUS_UPDATE);
+		vdo = 1;
+		ret = typec_altmode_vdm(dp->alt, header, &vdo, 2);
+		if (ret)
+			dev_err(&dp->alt->dev,
+				"unable to send Status Update command (%d)\n",
+				ret);
+		break;
+	case DP_STATE_CONFIGURE:
+		ret = dp_altmode_configure_vdm(dp, dp->data.conf);
+		if (ret)
+			dev_err(&dp->alt->dev,
+				"unable to send Configure command (%d)\n", ret);
+		break;
+	case DP_STATE_EXIT:
+		if (typec_altmode_exit(dp->alt))
+			dev_err(&dp->alt->dev, "Exit Mode Failed!\n");
+		break;
+	default:
+		break;
+	}
+
+	dp->state = DP_STATE_IDLE;
+
+	mutex_unlock(&dp->lock);
+}
+
+static void dp_altmode_attention(struct typec_altmode *alt, const u32 vdo)
+{
+	struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
+	u8 old_state;
+
+	mutex_lock(&dp->lock);
+
+	old_state = dp->state;
+	dp->data.status = vdo;
+
+	if (old_state != DP_STATE_IDLE)
+		dev_warn(&alt->dev, "ATTENTION while processing state %d\n",
+			 old_state);
+
+	if (dp_altmode_status_update(dp))
+		dev_warn(&alt->dev, "%s: status update failed\n", __func__);
+
+	if (dp_altmode_notify(dp))
+		dev_err(&alt->dev, "%s: notification failed\n", __func__);
+
+	if (old_state == DP_STATE_IDLE && dp->state != DP_STATE_IDLE)
+		schedule_work(&dp->work);
+
+	mutex_unlock(&dp->lock);
+}
+
+static int dp_altmode_vdm(struct typec_altmode *alt,
+			  const u32 hdr, const u32 *vdo, int count)
+{
+	struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
+	int cmd_type = PD_VDO_CMDT(hdr);
+	int cmd = PD_VDO_CMD(hdr);
+	int ret = 0;
+
+	mutex_lock(&dp->lock);
+
+	if (dp->state != DP_STATE_IDLE) {
+		ret = -EBUSY;
+		goto err_unlock;
+	}
+
+	switch (cmd_type) {
+	case CMDT_RSP_ACK:
+		switch (cmd) {
+		case CMD_ENTER_MODE:
+			dp->state = DP_STATE_UPDATE;
+			break;
+		case CMD_EXIT_MODE:
+			dp->data.status = 0;
+			dp->data.conf = 0;
+			break;
+		case DP_CMD_STATUS_UPDATE:
+			dp->data.status = *vdo;
+			ret = dp_altmode_status_update(dp);
+			break;
+		case DP_CMD_CONFIGURE:
+			ret = dp_altmode_configured(dp);
+			break;
+		default:
+			break;
+		}
+		break;
+	case CMDT_RSP_NAK:
+		switch (cmd) {
+		case DP_CMD_CONFIGURE:
+			dp->data.conf = 0;
+			ret = dp_altmode_configured(dp);
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (dp->state != DP_STATE_IDLE)
+		schedule_work(&dp->work);
+
+err_unlock:
+	mutex_unlock(&dp->lock);
+	return ret;
+}
+
+static int dp_altmode_activate(struct typec_altmode *alt, int activate)
+{
+	return activate ? typec_altmode_enter(alt) : typec_altmode_exit(alt);
+}
+
+static const struct typec_altmode_ops dp_altmode_ops = {
+	.attention = dp_altmode_attention,
+	.vdm = dp_altmode_vdm,
+	.activate = dp_altmode_activate,
+};
+
+static const char * const configurations[] = {
+	[DP_CONF_USB]	= "USB",
+	[DP_CONF_DFP_D]	= "source",
+	[DP_CONF_UFP_D]	= "sink",
+};
+
+static ssize_t
+configuration_store(struct device *dev, struct device_attribute *attr,
+		    const char *buf, size_t size)
+{
+	struct dp_altmode *dp = dev_get_drvdata(dev);
+	u32 conf;
+	u32 cap;
+	int con;
+	int ret;
+
+	con = sysfs_match_string(configurations, buf);
+	if (con < 0)
+		return con;
+
+	mutex_lock(&dp->lock);
+
+	if (dp->state != DP_STATE_IDLE) {
+		ret = -EBUSY;
+		goto err_unlock;
+	}
+
+	cap = DP_CAP_CAPABILITY(dp->alt->vdo);
+
+	if ((con == DP_CONF_DFP_D && !(cap & DP_CAP_DFP_D)) ||
+	    (con == DP_CONF_UFP_D && !(cap & DP_CAP_UFP_D)))
+		return -EINVAL;
+
+	conf = dp->data.conf & ~DP_CONF_DUAL_D;
+	conf |= con;
+
+	if (dp->alt->active) {
+		ret = dp_altmode_configure_vdm(dp, conf);
+		if (ret)
+			goto err_unlock;
+	}
+
+	dp->data.conf = conf;
+
+err_unlock:
+	mutex_unlock(&dp->lock);
+
+	return ret ? ret : size;
+}
+
+static ssize_t configuration_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct dp_altmode *dp = dev_get_drvdata(dev);
+	int len;
+	u8 cap;
+	u8 cur;
+	int i;
+
+	mutex_lock(&dp->lock);
+
+	cap = DP_CAP_CAPABILITY(dp->alt->vdo);
+	cur = DP_CONF_CURRENTLY(dp->data.conf);
+
+	len = sprintf(buf, "%s ", cur ? "USB" : "[USB]");
+
+	for (i = 1; i < ARRAY_SIZE(configurations); i++) {
+		if (i == cur)
+			len += sprintf(buf + len, "[%s] ", configurations[i]);
+		else if ((i == DP_CONF_DFP_D && cap & DP_CAP_DFP_D) ||
+			 (i == DP_CONF_UFP_D && cap & DP_CAP_UFP_D))
+			len += sprintf(buf + len, "%s ", configurations[i]);
+	}
+
+	mutex_unlock(&dp->lock);
+
+	buf[len - 1] = '\n';
+	return len;
+}
+static DEVICE_ATTR_RW(configuration);
+
+static const char * const pin_assignments[] = {
+	[DP_PIN_ASSIGN_A] = "A",
+	[DP_PIN_ASSIGN_B] = "B",
+	[DP_PIN_ASSIGN_C] = "C",
+	[DP_PIN_ASSIGN_D] = "D",
+	[DP_PIN_ASSIGN_E] = "E",
+	[DP_PIN_ASSIGN_F] = "F",
+};
+
+static ssize_t
+pin_assignment_store(struct device *dev, struct device_attribute *attr,
+		     const char *buf, size_t size)
+{
+	struct dp_altmode *dp = dev_get_drvdata(dev);
+	u8 assignments;
+	u32 conf;
+	int ret;
+
+	ret = sysfs_match_string(pin_assignments, buf);
+	if (ret < 0)
+		return ret;
+
+	conf = DP_CONF_SET_PIN_ASSIGN(BIT(ret));
+	ret = 0;
+
+	mutex_lock(&dp->lock);
+
+	if (conf & dp->data.conf)
+		goto out_unlock;
+
+	if (dp->state != DP_STATE_IDLE) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
+		assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo);
+	else
+		assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo);
+
+	if (!(DP_CONF_GET_PIN_ASSIGN(conf) & assignments)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	conf |= dp->data.conf & ~DP_CONF_PIN_ASSIGNEMENT_MASK;
+
+	/* Only send Configure command if a configuration has been set */
+	if (dp->alt->active && DP_CONF_CURRENTLY(dp->data.conf)) {
+		ret = dp_altmode_configure_vdm(dp, conf);
+		if (ret)
+			goto out_unlock;
+	}
+
+	dp->data.conf = conf;
+
+out_unlock:
+	mutex_unlock(&dp->lock);
+
+	return ret ? ret : size;
+}
+
+static ssize_t pin_assignment_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct dp_altmode *dp = dev_get_drvdata(dev);
+	u8 assignments;
+	int len = 0;
+	u8 cur;
+	int i;
+
+	mutex_lock(&dp->lock);
+
+	cur = get_count_order(DP_CONF_GET_PIN_ASSIGN(dp->data.conf));
+
+	if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D)
+		assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo);
+	else
+		assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo);
+
+	for (i = 0; assignments; assignments >>= 1, i++) {
+		if (assignments & 1) {
+			if (i == cur)
+				len += sprintf(buf + len, "[%s] ",
+					       pin_assignments[i]);
+			else
+				len += sprintf(buf + len, "%s ",
+					       pin_assignments[i]);
+		}
+	}
+
+	mutex_unlock(&dp->lock);
+
+	buf[len - 1] = '\n';
+	return len;
+}
+static DEVICE_ATTR_RW(pin_assignment);
+
+static struct attribute *dp_altmode_attrs[] = {
+	&dev_attr_configuration.attr,
+	&dev_attr_pin_assignment.attr,
+	NULL
+};
+
+static const struct attribute_group dp_altmode_group = {
+	.name = "displayport",
+	.attrs = dp_altmode_attrs,
+};
+
+static int dp_altmode_probe(struct typec_altmode *alt)
+{
+	const struct typec_altmode *port = typec_altmode_get_partner(alt);
+	struct dp_altmode *dp;
+	int ret;
+
+	/* FIXME: Port can only be DFP_U. */
+
+	/* Make sure we have compatiple pin configurations */
+	if (!(DP_CAP_DFP_D_PIN_ASSIGN(port->vdo) &
+	      DP_CAP_UFP_D_PIN_ASSIGN(alt->vdo)) &&
+	    !(DP_CAP_UFP_D_PIN_ASSIGN(port->vdo) &
+	      DP_CAP_DFP_D_PIN_ASSIGN(alt->vdo)))
+		return -ENODEV;
+
+	ret = sysfs_create_group(&alt->dev.kobj, &dp_altmode_group);
+	if (ret)
+		return ret;
+
+	dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL);
+	if (!dp)
+		return -ENOMEM;
+
+	INIT_WORK(&dp->work, dp_altmode_work);
+	mutex_init(&dp->lock);
+	dp->port = port;
+	dp->alt = alt;
+
+	alt->desc = "DisplayPort";
+	alt->ops = &dp_altmode_ops;
+
+	typec_altmode_set_drvdata(alt, dp);
+
+	dp->state = DP_STATE_ENTER;
+	schedule_work(&dp->work);
+
+	return 0;
+}
+
+static void dp_altmode_remove(struct typec_altmode *alt)
+{
+	struct dp_altmode *dp = typec_altmode_get_drvdata(alt);
+
+	sysfs_remove_group(&alt->dev.kobj, &dp_altmode_group);
+	cancel_work_sync(&dp->work);
+}
+
+static const struct typec_device_id dp_typec_id[] = {
+	{ USB_TYPEC_DP_SID, USB_TYPEC_DP_MODE },
+	{ },
+};
+MODULE_DEVICE_TABLE(typec, dp_typec_id);
+
+static struct typec_altmode_driver dp_altmode_driver = {
+	.id_table = dp_typec_id,
+	.probe = dp_altmode_probe,
+	.remove = dp_altmode_remove,
+	.driver = {
+		.name = "typec_displayport",
+		.owner = THIS_MODULE,
+	},
+};
+module_typec_altmode_driver(dp_altmode_driver);
+
+MODULE_AUTHOR("Heikki Krogerus <heikki.krogerus@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DisplayPort Alternate Mode");
diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h
new file mode 100644
index 000000000000..55ae781d60a9
--- /dev/null
+++ b/include/linux/usb/typec_dp.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __USB_TYPEC_DP_H
+#define __USB_TYPEC_DP_H
+
+#include <linux/usb/typec_altmode.h>
+
+#define USB_TYPEC_DP_SID	0xff01
+#define USB_TYPEC_DP_MODE	1
+
+/*
+ * Connector states matching the pin assignments in DisplayPort Alt Mode
+ * Specification.
+ *
+ * These values are meant primarily to be used by the mux drivers, but they are
+ * also used as the "value" part in the alternate mode notification chain, so
+ * receivers of those notifications will always see them.
+ *
+ * Note. DisplayPort USB Type-C Alt Mode Specification version 1.0b deprecated
+ * pin assignments A, B and F, but they are still defined here for legacy
+ * purposes.
+ */
+enum {
+	TYPEC_DP_STATE_A = TYPEC_STATE_MODAL,	/* Not supported after v1.0b */
+	TYPEC_DP_STATE_B,			/* Not supported after v1.0b */
+	TYPEC_DP_STATE_C,
+	TYPEC_DP_STATE_D,
+	TYPEC_DP_STATE_E,
+	TYPEC_DP_STATE_F,			/* Not supported after v1.0b */
+};
+
+/*
+ * struct typec_displayport_data - DisplayPort Alt Mode specific data
+ * @status: Status Update command VDO content
+ * @conf: Configure command VDO content
+ *
+ * This structure is delivered as the data part with the notifications. It
+ * contains the VDOs from the two DisplayPort Type-C alternate mode specific
+ * commands: Status Update and Configure.
+ *
+ * @status will show for example the status of the HPD signal.
+ */
+struct typec_displayport_data {
+	u32 status;
+	u32 conf;
+};
+
+enum {
+	DP_PIN_ASSIGN_A, /* Not supported after v1.0b */
+	DP_PIN_ASSIGN_B, /* Not supported after v1.0b */
+	DP_PIN_ASSIGN_C,
+	DP_PIN_ASSIGN_D,
+	DP_PIN_ASSIGN_E,
+	DP_PIN_ASSIGN_F, /* Not supported after v1.0b */
+};
+
+/* DisplayPort alt mode specific commands */
+#define DP_CMD_STATUS_UPDATE		VDO_CMD_VENDOR(0)
+#define DP_CMD_CONFIGURE		VDO_CMD_VENDOR(1)
+
+/* DisplayPort Capabilities VDO bits (returned with Discover Modes) */
+#define DP_CAP_CAPABILITY(_cap_)	((_cap_) & 3)
+#define   DP_CAP_UFP_D			1
+#define   DP_CAP_DFP_D			2
+#define   DP_CAP_DFP_D_AND_UFP_D	3
+#define DP_CAP_DP_SIGNALING		BIT(2) /* Always set */
+#define DP_CAP_GEN2			BIT(3) /* Reserved after v1.0b */
+#define DP_CAP_RECEPTACLE		BIT(6)
+#define DP_CAP_USB			BIT(7)
+#define DP_CAP_DFP_D_PIN_ASSIGN(_cap_)	(((_cap_) & GENMASK(15, 8)) >> 8)
+#define DP_CAP_UFP_D_PIN_ASSIGN(_cap_)	(((_cap_) & GENMASK(23, 16)) >> 16)
+
+/* DisplayPort Status Update VDO bits */
+#define DP_STATUS_CONNECTION(_status_)	((_status_) & 3)
+#define   DP_STATUS_CON_DISABLED	0
+#define   DP_STATUS_CON_DFP_D		1
+#define   DP_STATUS_CON_UFP_D		2
+#define   DP_STATUS_CON_BOTH		3
+#define DP_STATUS_POWER_LOW		BIT(2)
+#define DP_STATUS_ENABLED		BIT(3)
+#define DP_STATUS_PREFER_MULTI_FUNC	BIT(4)
+#define DP_STATUS_SWITCH_TO_USB		BIT(5)
+#define DP_STATUS_EXIT_DP_MODE		BIT(6)
+#define DP_STATUS_HPD_STATE		BIT(7) /* 0 = HPD_Low, 1 = HPD_High */
+#define DP_STATUS_IRQ_HPD		BIT(8)
+
+/* DisplayPort Configurations VDO bits */
+#define DP_CONF_CURRENTLY(_conf_)	((_conf_) & 3)
+#define DP_CONF_UFP_U_AS_DFP_D		BIT(0)
+#define DP_CONF_UFP_U_AS_UFP_D		BIT(1)
+#define DP_CONF_SIGNALING_DP		BIT(2)
+#define DP_CONF_SIGNALING_GEN_2		BIT(3) /* Reserved after v1.0b */
+#define DP_CONF_PIN_ASSIGNEMENT_SHIFT	8
+#define DP_CONF_PIN_ASSIGNEMENT_MASK	GENMASK(15, 8)
+
+#endif /* __USB_TYPEC_DP_H */
-- 
cgit v1.2.3


From e9576fe8e605c4413beb91b290b8a473985710de Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 27 Jun 2018 18:19:53 +0300
Subject: usb: typec: tcpm: Support for Alternate Modes

This adds more complete handling of VDMs and registration of
partner alternate modes, and introduces callbacks for
alternate mode operations.

Only DFP role is supported for now.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm.c | 165 ++++++++++++++++++++++++++++++++++++++---------
 include/linux/usb/tcpm.h |   9 ---
 2 files changed, 135 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index 6b57e7132e64..c732fd703961 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -26,7 +26,7 @@
 #include <linux/usb/pd_vdo.h>
 #include <linux/usb/role.h>
 #include <linux/usb/tcpm.h>
-#include <linux/usb/typec.h>
+#include <linux/usb/typec_altmode.h>
 #include <linux/workqueue.h>
 
 #define FOREACH_STATE(S)			\
@@ -169,13 +169,14 @@ enum pd_msg_request {
 /* Alternate mode support */
 
 #define SVID_DISCOVERY_MAX	16
+#define ALTMODE_DISCOVERY_MAX	(SVID_DISCOVERY_MAX * MODE_DISCOVERY_MAX)
 
 struct pd_mode_data {
 	int svid_index;		/* current SVID index		*/
 	int nsvids;
 	u16 svids[SVID_DISCOVERY_MAX];
 	int altmodes;		/* number of alternate modes	*/
-	struct typec_altmode_desc altmode_desc[SVID_DISCOVERY_MAX];
+	struct typec_altmode_desc altmode_desc[ALTMODE_DISCOVERY_MAX];
 };
 
 struct pd_pps_data {
@@ -310,8 +311,8 @@ struct tcpm_port {
 
 	/* Alternate mode data */
 	struct pd_mode_data mode_data;
-	struct typec_altmode *partner_altmode[SVID_DISCOVERY_MAX * 6];
-	struct typec_altmode *port_altmode[SVID_DISCOVERY_MAX * 6];
+	struct typec_altmode *partner_altmode[ALTMODE_DISCOVERY_MAX];
+	struct typec_altmode *port_altmode[ALTMODE_DISCOVERY_MAX];
 
 	/* Deadline in jiffies to exit src_try_wait state */
 	unsigned long max_wait;
@@ -641,14 +642,14 @@ void tcpm_pd_transmit_complete(struct tcpm_port *port,
 }
 EXPORT_SYMBOL_GPL(tcpm_pd_transmit_complete);
 
-static int tcpm_mux_set(struct tcpm_port *port, enum tcpc_mux_mode mode,
+static int tcpm_mux_set(struct tcpm_port *port, int state,
 			enum usb_role usb_role,
 			enum typec_orientation orientation)
 {
 	int ret;
 
-	tcpm_log(port, "Requesting mux mode %d, usb-role %d, orientation %d",
-		 mode, usb_role, orientation);
+	tcpm_log(port, "Requesting mux state %d, usb-role %d, orientation %d",
+		 state, usb_role, orientation);
 
 	ret = typec_set_orientation(port->typec_port, orientation);
 	if (ret)
@@ -660,7 +661,7 @@ static int tcpm_mux_set(struct tcpm_port *port, enum tcpc_mux_mode mode,
 			return ret;
 	}
 
-	return typec_set_mode(port->typec_port, mode);
+	return typec_set_mode(port->typec_port, state);
 }
 
 static int tcpm_set_polarity(struct tcpm_port *port,
@@ -787,7 +788,7 @@ static int tcpm_set_roles(struct tcpm_port *port, bool attached,
 	else
 		usb_role = USB_ROLE_DEVICE;
 
-	ret = tcpm_mux_set(port, TYPEC_MUX_USB, usb_role, orientation);
+	ret = tcpm_mux_set(port, TYPEC_STATE_USB, usb_role, orientation);
 	if (ret < 0)
 		return ret;
 
@@ -1014,36 +1015,57 @@ static void svdm_consume_modes(struct tcpm_port *port, const __le32 *payload,
 			 pmdata->altmodes, paltmode->svid,
 			 paltmode->mode, paltmode->vdo);
 
-		port->partner_altmode[pmdata->altmodes] =
-			typec_partner_register_altmode(port->partner, paltmode);
-		if (!port->partner_altmode[pmdata->altmodes]) {
-			tcpm_log(port,
-				 "Failed to register modes for SVID 0x%04x",
-				 paltmode->svid);
-			return;
-		}
 		pmdata->altmodes++;
 	}
 }
 
+static void tcpm_register_partner_altmodes(struct tcpm_port *port)
+{
+	struct pd_mode_data *modep = &port->mode_data;
+	struct typec_altmode *altmode;
+	int i;
+
+	for (i = 0; i < modep->altmodes; i++) {
+		altmode = typec_partner_register_altmode(port->partner,
+						&modep->altmode_desc[i]);
+		if (!altmode)
+			tcpm_log(port, "Failed to register partner SVID 0x%04x",
+				 modep->altmode_desc[i].svid);
+		port->partner_altmode[i] = altmode;
+	}
+}
+
 #define supports_modal(port)	PD_IDH_MODAL_SUPP((port)->partner_ident.id_header)
 
 static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 			u32 *response)
 {
-	u32 p0 = le32_to_cpu(payload[0]);
-	int cmd_type = PD_VDO_CMDT(p0);
-	int cmd = PD_VDO_CMD(p0);
+	struct typec_altmode *adev;
+	struct typec_altmode *pdev;
 	struct pd_mode_data *modep;
+	u32 p[PD_MAX_PAYLOAD];
 	int rlen = 0;
-	u16 svid;
+	int cmd_type;
+	int cmd;
 	int i;
 
+	for (i = 0; i < cnt; i++)
+		p[i] = le32_to_cpu(payload[i]);
+
+	cmd_type = PD_VDO_CMDT(p[0]);
+	cmd = PD_VDO_CMD(p[0]);
+
 	tcpm_log(port, "Rx VDM cmd 0x%x type %d cmd %d len %d",
-		 p0, cmd_type, cmd, cnt);
+		 p[0], cmd_type, cmd, cnt);
 
 	modep = &port->mode_data;
 
+	adev = typec_match_altmode(port->port_altmode, ALTMODE_DISCOVERY_MAX,
+				   PD_VDO_VID(p[0]), PD_VDO_OPOS(p[0]));
+
+	pdev = typec_match_altmode(port->partner_altmode, ALTMODE_DISCOVERY_MAX,
+				   PD_VDO_VID(p[0]), PD_VDO_OPOS(p[0]));
+
 	switch (cmd_type) {
 	case CMDT_INIT:
 		switch (cmd) {
@@ -1065,17 +1087,19 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 		case CMD_EXIT_MODE:
 			break;
 		case CMD_ATTENTION:
-			break;
+			/* Attention command does not have response */
+			typec_altmode_attention(adev, p[1]);
+			return 0;
 		default:
 			break;
 		}
 		if (rlen >= 1) {
-			response[0] = p0 | VDO_CMDT(CMDT_RSP_ACK);
+			response[0] = p[0] | VDO_CMDT(CMDT_RSP_ACK);
 		} else if (rlen == 0) {
-			response[0] = p0 | VDO_CMDT(CMDT_RSP_NAK);
+			response[0] = p[0] | VDO_CMDT(CMDT_RSP_NAK);
 			rlen = 1;
 		} else {
-			response[0] = p0 | VDO_CMDT(CMDT_RSP_BUSY);
+			response[0] = p[0] | VDO_CMDT(CMDT_RSP_BUSY);
 			rlen = 1;
 		}
 		break;
@@ -1108,14 +1132,39 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 			svdm_consume_modes(port, payload, cnt);
 			modep->svid_index++;
 			if (modep->svid_index < modep->nsvids) {
-				svid = modep->svids[modep->svid_index];
+				u16 svid = modep->svids[modep->svid_index];
 				response[0] = VDO(svid, 1, CMD_DISCOVER_MODES);
 				rlen = 1;
 			} else {
-				/* enter alternate mode if/when implemented */
+				tcpm_register_partner_altmodes(port);
 			}
 			break;
 		case CMD_ENTER_MODE:
+			typec_altmode_update_active(pdev, true);
+
+			if (typec_altmode_vdm(adev, p[0], &p[1], cnt)) {
+				response[0] = VDO(adev->svid, 1, CMD_EXIT_MODE);
+				response[0] |= VDO_OPOS(adev->mode);
+				return 1;
+			}
+			return 0;
+		case CMD_EXIT_MODE:
+			typec_altmode_update_active(pdev, false);
+
+			/* Back to USB Operation */
+			WARN_ON(typec_altmode_notify(adev, TYPEC_STATE_USB,
+						     NULL));
+			break;
+		default:
+			break;
+		}
+		break;
+	case CMDT_RSP_NAK:
+		switch (cmd) {
+		case CMD_ENTER_MODE:
+			/* Back to USB Operation */
+			WARN_ON(typec_altmode_notify(adev, TYPEC_STATE_USB,
+						     NULL));
 			break;
 		default:
 			break;
@@ -1125,6 +1174,9 @@ static int tcpm_pd_svdm(struct tcpm_port *port, const __le32 *payload, int cnt,
 		break;
 	}
 
+	/* Informing the alternate mode drivers about everything */
+	typec_altmode_vdm(adev, p[0], &p[1], cnt);
+
 	return rlen;
 }
 
@@ -1408,6 +1460,57 @@ static int tcpm_validate_caps(struct tcpm_port *port, const u32 *pdo,
 	return 0;
 }
 
+static int tcpm_altmode_enter(struct typec_altmode *altmode)
+{
+	struct tcpm_port *port = typec_altmode_get_drvdata(altmode);
+	u32 header;
+
+	mutex_lock(&port->lock);
+	header = VDO(altmode->svid, 1, CMD_ENTER_MODE);
+	header |= VDO_OPOS(altmode->mode);
+
+	tcpm_queue_vdm(port, header, NULL, 0);
+	mod_delayed_work(port->wq, &port->vdm_state_machine, 0);
+	mutex_unlock(&port->lock);
+
+	return 0;
+}
+
+static int tcpm_altmode_exit(struct typec_altmode *altmode)
+{
+	struct tcpm_port *port = typec_altmode_get_drvdata(altmode);
+	u32 header;
+
+	mutex_lock(&port->lock);
+	header = VDO(altmode->svid, 1, CMD_EXIT_MODE);
+	header |= VDO_OPOS(altmode->mode);
+
+	tcpm_queue_vdm(port, header, NULL, 0);
+	mod_delayed_work(port->wq, &port->vdm_state_machine, 0);
+	mutex_unlock(&port->lock);
+
+	return 0;
+}
+
+static int tcpm_altmode_vdm(struct typec_altmode *altmode,
+			    u32 header, const u32 *data, int count)
+{
+	struct tcpm_port *port = typec_altmode_get_drvdata(altmode);
+
+	mutex_lock(&port->lock);
+	tcpm_queue_vdm(port, header, data, count - 1);
+	mod_delayed_work(port->wq, &port->vdm_state_machine, 0);
+	mutex_unlock(&port->lock);
+
+	return 0;
+}
+
+static const struct typec_altmode_ops tcpm_altmode_ops = {
+	.enter = tcpm_altmode_enter,
+	.exit = tcpm_altmode_exit,
+	.vdm = tcpm_altmode_vdm,
+};
+
 /*
  * PD (data, control) command handling functions
  */
@@ -2539,7 +2642,7 @@ out_disable_vconn:
 out_disable_pd:
 	port->tcpc->set_pd_rx(port->tcpc, false);
 out_disable_mux:
-	tcpm_mux_set(port, TYPEC_MUX_NONE, USB_ROLE_NONE,
+	tcpm_mux_set(port, TYPEC_STATE_SAFE, USB_ROLE_NONE,
 		     TYPEC_ORIENTATION_NONE);
 	return ret;
 }
@@ -2585,7 +2688,7 @@ static void tcpm_reset_port(struct tcpm_port *port)
 	tcpm_init_vconn(port);
 	tcpm_set_current_limit(port, 0, 0);
 	tcpm_set_polarity(port, TYPEC_POLARITY_CC1);
-	tcpm_mux_set(port, TYPEC_MUX_NONE, USB_ROLE_NONE,
+	tcpm_mux_set(port, TYPEC_STATE_SAFE, USB_ROLE_NONE,
 		     TYPEC_ORIENTATION_NONE);
 	tcpm_set_attached_state(port, false);
 	port->try_src_count = 0;
@@ -4706,6 +4809,8 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 					 dev_name(dev), paltmode->svid);
 				break;
 			}
+			typec_altmode_set_drvdata(alt, port);
+			alt->ops = &tcpm_altmode_ops;
 			port->port_altmode[i] = alt;
 			i++;
 			paltmode++;
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index 193920a2e05f..7e7fbfb84e8e 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -98,15 +98,6 @@ struct tcpc_config {
 #define TCPC_MUX_DP_ENABLED		BIT(1)	/* DP enabled */
 #define TCPC_MUX_POLARITY_INVERTED	BIT(2)	/* Polarity inverted */
 
-/* Mux modes, decoded to attributes */
-enum tcpc_mux_mode {
-	TYPEC_MUX_NONE	= 0,				/* Open switch */
-	TYPEC_MUX_USB	= TCPC_MUX_USB_ENABLED,		/* USB only */
-	TYPEC_MUX_DP	= TCPC_MUX_DP_ENABLED,		/* DP only */
-	TYPEC_MUX_DOCK	= TCPC_MUX_USB_ENABLED |	/* Both USB and DP */
-			  TCPC_MUX_DP_ENABLED,
-};
-
 /**
  * struct tcpc_dev - Port configuration and callback functions
  * @config:	Pointer to port configuration
-- 
cgit v1.2.3


From 60a866685006bae0c3db20e4bae31887eb452ff1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 26 Jun 2018 06:49:08 -0300
Subject: gpio.h: fix location of gpio legacy documentation

The location of this doc file was moved. Change its reference
accordingly.

Fixes: 7ee2c13080c9 ("Documentation: gpio: Move legacy documentation to driver-api")
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 91ed23468530..39745b8bdd65 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -14,7 +14,7 @@
 
 #include <linux/errno.h>
 
-/* see Documentation/gpio/gpio-legacy.txt */
+/* see Documentation/driver-api/gpio/legacy.rst */
 
 /* make these flag values available regardless of GPIO kconfig options */
 #define GPIOF_DIR_OUT	(0 << 0)
-- 
cgit v1.2.3


From 317f89712d7aa4acc2d93df27b753d4489826cc5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 22 Jun 2018 14:52:51 -0700
Subject: scsi: target: Rename transport_init_session() into
 transport_alloc_session()

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/iscsi/iscsi_target_login.c |  2 +-
 drivers/target/target_core_transport.c    | 26 ++++++++++++++++++++++----
 include/target/target_core_fabric.h       |  2 +-
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 99501785cdc1..923b1a9fc3dc 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -369,7 +369,7 @@ static int iscsi_login_zero_tsih_s1(
 		return -ENOMEM;
 	}
 
-	sess->se_sess = transport_init_session(TARGET_PROT_NORMAL);
+	sess->se_sess = transport_alloc_session(TARGET_PROT_NORMAL);
 	if (IS_ERR(sess->se_sess)) {
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 5e8329c5ddd0..880e07f40099 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -224,7 +224,11 @@ void transport_subsystem_check_init(void)
 	sub_api_initialized = 1;
 }
 
-struct se_session *transport_init_session(enum target_prot_op sup_prot_ops)
+/**
+ * transport_alloc_session - allocate a session object and initialize it
+ * @sup_prot_ops: bitmask that defines which T10-PI modes are supported.
+ */
+struct se_session *transport_alloc_session(enum target_prot_op sup_prot_ops)
 {
 	struct se_session *se_sess;
 
@@ -243,8 +247,15 @@ struct se_session *transport_init_session(enum target_prot_op sup_prot_ops)
 
 	return se_sess;
 }
-EXPORT_SYMBOL(transport_init_session);
+EXPORT_SYMBOL(transport_alloc_session);
 
+/**
+ * transport_alloc_session_tags - allocate target driver private data
+ * @se_sess:  Session pointer.
+ * @tag_num:  Maximum number of in-flight commands between initiator and target.
+ * @tag_size: Size in bytes of the private data a target driver associates with
+ *	      each command.
+ */
 int transport_alloc_session_tags(struct se_session *se_sess,
 			         unsigned int tag_num, unsigned int tag_size)
 {
@@ -274,6 +285,13 @@ int transport_alloc_session_tags(struct se_session *se_sess,
 }
 EXPORT_SYMBOL(transport_alloc_session_tags);
 
+/**
+ * transport_init_session_tags - allocate a session and target driver private data
+ * @tag_num:  Maximum number of in-flight commands between initiator and target.
+ * @tag_size: Size in bytes of the private data a target driver associates with
+ *	      each command.
+ * @sup_prot_ops: bitmask that defines which T10-PI modes are supported.
+ */
 struct se_session *transport_init_session_tags(unsigned int tag_num,
 					       unsigned int tag_size,
 					       enum target_prot_op sup_prot_ops)
@@ -292,7 +310,7 @@ struct se_session *transport_init_session_tags(unsigned int tag_num,
 		return ERR_PTR(-EINVAL);
 	}
 
-	se_sess = transport_init_session(sup_prot_ops);
+	se_sess = transport_alloc_session(sup_prot_ops);
 	if (IS_ERR(se_sess))
 		return se_sess;
 
@@ -402,7 +420,7 @@ target_alloc_session(struct se_portal_group *tpg,
 	if (tag_num != 0)
 		sess = transport_init_session_tags(tag_num, tag_size, prot_op);
 	else
-		sess = transport_init_session(prot_op);
+		sess = transport_alloc_session(prot_op);
 
 	if (IS_ERR(sess))
 		return sess;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index b297aa0d9651..010df9d5b977 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -115,7 +115,7 @@ struct se_session *target_alloc_session(struct se_portal_group *,
 		int (*callback)(struct se_portal_group *,
 				struct se_session *, void *));
 
-struct se_session *transport_init_session(enum target_prot_op);
+struct se_session *transport_alloc_session(enum target_prot_op);
 int transport_alloc_session_tags(struct se_session *, unsigned int,
 		unsigned int);
 struct se_session *transport_init_session_tags(unsigned int, unsigned int,
-- 
cgit v1.2.3


From d1bff07f387c05cfaba1ea505fc2ee5eac6b2c21 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 22 Jun 2018 14:52:52 -0700
Subject: scsi: target: Introduce transport_init_session()

Other than initializing xcopy_pt_sess.sess_wait_list, this patch does not
change any functionality.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 22 +++++++++++++++++-----
 drivers/target/target_core_xcopy.c     |  5 +----
 include/target/target_core_fabric.h    |  1 +
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 880e07f40099..595b2f12ca6a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -224,6 +224,22 @@ void transport_subsystem_check_init(void)
 	sub_api_initialized = 1;
 }
 
+/**
+ * transport_init_session - initialize a session object
+ * @se_sess: Session object pointer.
+ *
+ * The caller must have zero-initialized @se_sess before calling this function.
+ */
+void transport_init_session(struct se_session *se_sess)
+{
+	INIT_LIST_HEAD(&se_sess->sess_list);
+	INIT_LIST_HEAD(&se_sess->sess_acl_list);
+	INIT_LIST_HEAD(&se_sess->sess_cmd_list);
+	INIT_LIST_HEAD(&se_sess->sess_wait_list);
+	spin_lock_init(&se_sess->sess_cmd_lock);
+}
+EXPORT_SYMBOL(transport_init_session);
+
 /**
  * transport_alloc_session - allocate a session object and initialize it
  * @sup_prot_ops: bitmask that defines which T10-PI modes are supported.
@@ -238,11 +254,7 @@ struct se_session *transport_alloc_session(enum target_prot_op sup_prot_ops)
 				" se_sess_cache\n");
 		return ERR_PTR(-ENOMEM);
 	}
-	INIT_LIST_HEAD(&se_sess->sess_list);
-	INIT_LIST_HEAD(&se_sess->sess_acl_list);
-	INIT_LIST_HEAD(&se_sess->sess_cmd_list);
-	INIT_LIST_HEAD(&se_sess->sess_wait_list);
-	spin_lock_init(&se_sess->sess_cmd_lock);
+	transport_init_session(se_sess);
 	se_sess->sup_prot_ops = sup_prot_ops;
 
 	return se_sess;
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 9ee89e00cd77..2718a933c0c6 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -497,10 +497,7 @@ int target_xcopy_setup_pt(void)
 	INIT_LIST_HEAD(&xcopy_pt_nacl.acl_list);
 	INIT_LIST_HEAD(&xcopy_pt_nacl.acl_sess_list);
 	memset(&xcopy_pt_sess, 0, sizeof(struct se_session));
-	INIT_LIST_HEAD(&xcopy_pt_sess.sess_list);
-	INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list);
-	INIT_LIST_HEAD(&xcopy_pt_sess.sess_cmd_list);
-	spin_lock_init(&xcopy_pt_sess.sess_cmd_lock);
+	transport_init_session(&xcopy_pt_sess);
 
 	xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg;
 	xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 010df9d5b977..84a681b6e5ca 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -115,6 +115,7 @@ struct se_session *target_alloc_session(struct se_portal_group *,
 		int (*callback)(struct se_portal_group *,
 				struct se_session *, void *));
 
+void transport_init_session(struct se_session *);
 struct se_session *transport_alloc_session(enum target_prot_op);
 int transport_alloc_session_tags(struct se_session *, unsigned int,
 		unsigned int);
-- 
cgit v1.2.3


From 00d909a10710a3416272d4179adf3837b9a9f3c2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 22 Jun 2018 14:52:53 -0700
Subject: scsi: target: Make the session shutdown code also wait for commands
 that are being aborted

Target drivers must call target_sess_cmd_list_set_waiting() and
target_wait_for_sess_cmds() before freeing a session. Since freeing a
session is only safe after all commands that are associated with a session
have finished, make target_wait_for_sess_cmds() also wait for commands that
are being aborted. Instead of setting a flag in each pending command from
target_sess_cmd_list_set_waiting() and waiting in
target_wait_for_sess_cmds() on a per-command completion, only set a
per-session flag in the former function and wait on a per-session
completion in the latter function. This change is safe because once a SCSI
initiator system has submitted a command a target system is always allowed
to execute it to completion. See also commit 0f4a943168f3 ("target: Fix
remote-port TMR ABORT + se_cmd fabric stop").

This patch is based on the following two patches:

* Bart Van Assche, target: Simplify session shutdown code, February 19, 2015
  (https://github.com/bvanassche/linux/commit/8df5463d7d7619f2f1b70cfe5172eaef0aa52815).

* Christoph Hellwig, target: Rework session shutdown code, December 7, 2015
  (http://thread.gmane.org/gmane.linux.scsi.target.devel/10695).

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_tmr.c       |  5 +--
 drivers/target/target_core_transport.c | 77 ++++++++++------------------------
 include/target/target_core_base.h      |  3 +-
 3 files changed, 24 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 9c7bc1ca341a..da8125dd3a4c 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -142,7 +142,7 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
 			return false;
 		}
 	}
-	if (sess->sess_tearing_down || se_cmd->cmd_wait_set) {
+	if (sess->sess_tearing_down) {
 		pr_debug("Attempted to abort io tag: %llu already shutdown,"
 			" skipping\n", se_cmd->tag);
 		spin_unlock(&se_cmd->t_state_lock);
@@ -187,7 +187,6 @@ void core_tmr_abort_task(
 		if (!__target_check_io_state(se_cmd, se_sess, 0))
 			continue;
 
-		list_del_init(&se_cmd->se_cmd_list);
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 
 		cancel_work_sync(&se_cmd->work);
@@ -259,7 +258,7 @@ static void core_tmr_drain_tmr_list(
 			spin_unlock(&sess->sess_cmd_lock);
 			continue;
 		}
-		if (sess->sess_tearing_down || cmd->cmd_wait_set) {
+		if (sess->sess_tearing_down) {
 			spin_unlock(&cmd->t_state_lock);
 			spin_unlock(&sess->sess_cmd_lock);
 			continue;
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 595b2f12ca6a..087dddfc5c35 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -235,8 +235,8 @@ void transport_init_session(struct se_session *se_sess)
 	INIT_LIST_HEAD(&se_sess->sess_list);
 	INIT_LIST_HEAD(&se_sess->sess_acl_list);
 	INIT_LIST_HEAD(&se_sess->sess_cmd_list);
-	INIT_LIST_HEAD(&se_sess->sess_wait_list);
 	spin_lock_init(&se_sess->sess_cmd_lock);
+	init_waitqueue_head(&se_sess->cmd_list_wq);
 }
 EXPORT_SYMBOL(transport_init_session);
 
@@ -2728,13 +2728,15 @@ static void target_release_cmd_kref(struct kref *kref)
 	if (se_sess) {
 		spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 		list_del_init(&se_cmd->se_cmd_list);
+		if (list_empty(&se_sess->sess_cmd_list))
+			wake_up(&se_sess->cmd_list_wq);
 
 		spin_lock(&se_cmd->t_state_lock);
 		fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) &&
 			      (se_cmd->transport_state & CMD_T_ABORTED);
 		spin_unlock(&se_cmd->t_state_lock);
 
-		if (se_cmd->cmd_wait_set || fabric_stop) {
+		if (fabric_stop) {
 			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 			target_free_cmd_mem(se_cmd);
 			complete(&se_cmd->cmd_wait_comp);
@@ -2863,78 +2865,41 @@ void target_show_cmd(const char *pfx, struct se_cmd *cmd)
 EXPORT_SYMBOL(target_show_cmd);
 
 /**
- * target_sess_cmd_list_set_waiting - Flag all commands in
- *         sess_cmd_list to complete cmd_wait_comp.  Set
- *         sess_tearing_down so no more commands are queued.
+ * target_sess_cmd_list_set_waiting - Set sess_tearing_down so no new commands are queued.
  * @se_sess:	session to flag
  */
 void target_sess_cmd_list_set_waiting(struct se_session *se_sess)
 {
-	struct se_cmd *se_cmd, *tmp_cmd;
 	unsigned long flags;
-	int rc;
 
 	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
-	if (se_sess->sess_tearing_down) {
-		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-		return;
-	}
 	se_sess->sess_tearing_down = 1;
-	list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list);
-
-	list_for_each_entry_safe(se_cmd, tmp_cmd,
-				 &se_sess->sess_wait_list, se_cmd_list) {
-		rc = kref_get_unless_zero(&se_cmd->cmd_kref);
-		if (rc) {
-			se_cmd->cmd_wait_set = 1;
-			spin_lock(&se_cmd->t_state_lock);
-			se_cmd->transport_state |= CMD_T_FABRIC_STOP;
-			spin_unlock(&se_cmd->t_state_lock);
-		} else
-			list_del_init(&se_cmd->se_cmd_list);
-	}
-
 	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 }
 EXPORT_SYMBOL(target_sess_cmd_list_set_waiting);
 
 /**
- * target_wait_for_sess_cmds - Wait for outstanding descriptors
+ * target_wait_for_sess_cmds - Wait for outstanding commands
  * @se_sess:    session to wait for active I/O
  */
 void target_wait_for_sess_cmds(struct se_session *se_sess)
 {
-	struct se_cmd *se_cmd, *tmp_cmd;
-	unsigned long flags;
-	bool tas;
-
-	list_for_each_entry_safe(se_cmd, tmp_cmd,
-				&se_sess->sess_wait_list, se_cmd_list) {
-		pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:"
-			" %d\n", se_cmd, se_cmd->t_state,
-			se_cmd->se_tfo->get_cmd_state(se_cmd));
-
-		spin_lock_irqsave(&se_cmd->t_state_lock, flags);
-		tas = (se_cmd->transport_state & CMD_T_TAS);
-		spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
-
-		if (!target_put_sess_cmd(se_cmd)) {
-			if (tas)
-				target_put_sess_cmd(se_cmd);
-		}
-
-		wait_for_completion(&se_cmd->cmd_wait_comp);
-		pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d"
-			" fabric state: %d\n", se_cmd, se_cmd->t_state,
-			se_cmd->se_tfo->get_cmd_state(se_cmd));
-
-		se_cmd->se_tfo->release_cmd(se_cmd);
-	}
-
-	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
-	WARN_ON(!list_empty(&se_sess->sess_cmd_list));
-	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+	struct se_cmd *cmd;
+	int ret;
 
+	WARN_ON_ONCE(!se_sess->sess_tearing_down);
+
+	spin_lock_irq(&se_sess->sess_cmd_lock);
+	do {
+		ret = wait_event_interruptible_lock_irq_timeout(
+				se_sess->cmd_list_wq,
+				list_empty(&se_sess->sess_cmd_list),
+				se_sess->sess_cmd_lock, 180 * HZ);
+		list_for_each_entry(cmd, &se_sess->sess_cmd_list, se_cmd_list)
+			target_show_cmd("session shutdown: still waiting for ",
+					cmd);
+	} while (ret <= 0);
+	spin_unlock_irq(&se_sess->sess_cmd_lock);
 }
 EXPORT_SYMBOL(target_wait_for_sess_cmds);
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 448f291125c2..906c1fc485e4 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -443,7 +443,6 @@ struct se_cmd {
 	u8			scsi_asc;
 	u8			scsi_ascq;
 	u16			scsi_sense_length;
-	unsigned		cmd_wait_set:1;
 	unsigned		unknown_data_length:1;
 	bool			state_active:1;
 	u64			tag; /* SAM command identifier aka task tag */
@@ -606,8 +605,8 @@ struct se_session {
 	struct list_head	sess_list;
 	struct list_head	sess_acl_list;
 	struct list_head	sess_cmd_list;
-	struct list_head	sess_wait_list;
 	spinlock_t		sess_cmd_lock;
+	wait_queue_head_t	cmd_list_wq;
 	void			*sess_cmd_map;
 	struct sbitmap_queue	sess_tag_pool;
 };
-- 
cgit v1.2.3


From 7b2cc7dc0dbf5da9cf16ffcf1ca8e904ab574ff5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 22 Jun 2018 14:52:59 -0700
Subject: scsi: target: Simplify the code for waiting for command completion

Instead of embedding the completion that is used for waiting for command
completion in struct se_cmd, let the context that waits for command
completion allocate it. This makes it possible to have a single code path
for non-aborted and aborted commands in target_release_cmd_kref() and
avoids that transport_generic_free_cmd() has to call
cmd->se_tfo->release_cmd() directly. This patch does not change any
functionality. Note: transport_generic_free_cmd() only waits until the
se_cmd reference count has reached zero after it has set both
CMD_T_FABRIC_STOP and CMD_T_ABORTED.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 24 ++++++++----------------
 include/target/target_core_base.h      |  2 +-
 2 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ebd54fc1f13a..cb48dbc2c9ba 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1309,7 +1309,7 @@ void transport_init_se_cmd(
 	INIT_LIST_HEAD(&cmd->se_cmd_list);
 	INIT_LIST_HEAD(&cmd->state_list);
 	init_completion(&cmd->t_transport_stop_comp);
-	init_completion(&cmd->cmd_wait_comp);
+	cmd->compl = NULL;
 	spin_lock_init(&cmd->t_state_lock);
 	INIT_WORK(&cmd->work, NULL);
 	kref_init(&cmd->cmd_kref);
@@ -2658,6 +2658,7 @@ static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas)
  */
 int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
 {
+	DECLARE_COMPLETION_ONSTACK(compl);
 	int ret = 0;
 	bool aborted = false, tas = false;
 
@@ -2676,12 +2677,13 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
 		if (cmd->se_lun)
 			transport_lun_remove_cmd(cmd);
 	}
+	if (aborted)
+		cmd->compl = &compl;
 	if (!aborted || tas)
 		ret = target_put_sess_cmd(cmd);
 	if (aborted) {
 		pr_debug("Detected CMD_T_ABORTED for ITT: %llu\n", cmd->tag);
-		wait_for_completion(&cmd->cmd_wait_comp);
-		cmd->se_tfo->release_cmd(cmd);
+		wait_for_completion(&compl);
 		ret = 1;
 	}
 	return ret;
@@ -2742,31 +2744,21 @@ static void target_release_cmd_kref(struct kref *kref)
 {
 	struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
 	struct se_session *se_sess = se_cmd->se_sess;
+	struct completion *compl = se_cmd->compl;
 	unsigned long flags;
-	bool fabric_stop;
 
 	if (se_sess) {
 		spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 		list_del_init(&se_cmd->se_cmd_list);
 		if (list_empty(&se_sess->sess_cmd_list))
 			wake_up(&se_sess->cmd_list_wq);
-
-		spin_lock(&se_cmd->t_state_lock);
-		fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP) &&
-			      (se_cmd->transport_state & CMD_T_ABORTED);
-		spin_unlock(&se_cmd->t_state_lock);
-
-		if (fabric_stop) {
-			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-			target_free_cmd_mem(se_cmd);
-			complete(&se_cmd->cmd_wait_comp);
-			return;
-		}
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
 	}
 
 	target_free_cmd_mem(se_cmd);
 	se_cmd->se_tfo->release_cmd(se_cmd);
+	if (compl)
+		complete(compl);
 }
 
 /**
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 906c1fc485e4..ca59e065c1fd 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -475,7 +475,7 @@ struct se_cmd {
 	struct se_session	*se_sess;
 	struct se_tmr_req	*se_tmr_req;
 	struct list_head	se_cmd_list;
-	struct completion	cmd_wait_comp;
+	struct completion	*compl;
 	const struct target_core_fabric_ops *se_tfo;
 	sense_reason_t		(*execute_cmd)(struct se_cmd *);
 	sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *);
-- 
cgit v1.2.3


From aa090eabcb341b0c023e802884235c29598df1e5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 22 Jun 2018 14:53:02 -0700
Subject: scsi: target: Remove second argument from fabric_make_tpg()

Since most target drivers do not use the second fabric_make_tpg() argument
("group") and since it is trivial to derive the group pointer from the wwn
pointer, do not pass the group pointer to fabric_make_tpg().

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Cc: Felipe Balbi <felipe.balbi@linux.intel.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/infiniband/ulp/srpt/ib_srpt.c        |  2 --
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c     |  1 -
 drivers/scsi/qla2xxx/tcm_qla2xxx.c           | 12 ++++--------
 drivers/target/iscsi/iscsi_target_configfs.c |  6 ++----
 drivers/target/loopback/tcm_loop.c           |  6 ++----
 drivers/target/sbp/sbp_target.c              |  6 ++----
 drivers/target/target_core_fabric_configfs.c |  2 +-
 drivers/target/tcm_fc/tfc_conf.c             |  5 +----
 drivers/usb/gadget/function/f_tcm.c          |  8 +++-----
 drivers/vhost/scsi.c                         |  4 +---
 drivers/xen/xen-scsiback.c                   |  4 +---
 include/target/target_core_fabric.h          |  2 +-
 12 files changed, 18 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 3081c629a7f7..07b3e1c583bd 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -3597,11 +3597,9 @@ static struct configfs_attribute *srpt_tpg_attrs[] = {
 /**
  * srpt_make_tpg - configfs callback invoked for mkdir /sys/kernel/config/target/$driver/$port/$tpg
  * @wwn: Corresponds to $driver/$port.
- * @group: Not used.
  * @name: $tpg.
  */
 static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
-					     struct config_group *group,
 					     const char *name)
 {
 	struct srpt_port *sport = wwn->priv;
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index c3a76af9f5fa..fdda04e5cf94 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3928,7 +3928,6 @@ static void ibmvscsis_drop_tport(struct se_wwn *wwn)
 }
 
 static struct se_portal_group *ibmvscsis_make_tpg(struct se_wwn *wwn,
-						  struct config_group *group,
 						  const char *name)
 {
 	struct ibmvscsis_tport *tport =
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 7732e9336d43..cfb5d6067f9f 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1049,10 +1049,8 @@ static struct configfs_attribute *tcm_qla2xxx_tpg_attrs[] = {
 	NULL,
 };
 
-static struct se_portal_group *tcm_qla2xxx_make_tpg(
-	struct se_wwn *wwn,
-	struct config_group *group,
-	const char *name)
+static struct se_portal_group *tcm_qla2xxx_make_tpg(struct se_wwn *wwn,
+						    const char *name)
 {
 	struct tcm_qla2xxx_lport *lport = container_of(wwn,
 			struct tcm_qla2xxx_lport, lport_wwn);
@@ -1171,10 +1169,8 @@ static struct configfs_attribute *tcm_qla2xxx_npiv_tpg_attrs[] = {
         NULL,
 };
 
-static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg(
-	struct se_wwn *wwn,
-	struct config_group *group,
-	const char *name)
+static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg(struct se_wwn *wwn,
+							 const char *name)
 {
 	struct tcm_qla2xxx_lport *lport = container_of(wwn,
 			struct tcm_qla2xxx_lport, lport_wwn);
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 0bc346b2c27c..1fcd9bc7189d 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1090,10 +1090,8 @@ static struct configfs_attribute *lio_target_tpg_attrs[] = {
 
 /* Start items for lio_target_tiqn_cit */
 
-static struct se_portal_group *lio_target_tiqn_addtpg(
-	struct se_wwn *wwn,
-	struct config_group *group,
-	const char *name)
+static struct se_portal_group *lio_target_tiqn_addtpg(struct se_wwn *wwn,
+						      const char *name)
 {
 	struct iscsi_portal_group *tpg;
 	struct iscsi_tiqn *tiqn;
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 60d5b918c4ac..4c8a97711291 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -983,10 +983,8 @@ static struct configfs_attribute *tcm_loop_tpg_attrs[] = {
 
 /* Start items for tcm_loop_naa_cit */
 
-static struct se_portal_group *tcm_loop_make_naa_tpg(
-	struct se_wwn *wwn,
-	struct config_group *group,
-	const char *name)
+static struct se_portal_group *tcm_loop_make_naa_tpg(struct se_wwn *wwn,
+						     const char *name)
 {
 	struct tcm_loop_hba *tl_hba = container_of(wwn,
 			struct tcm_loop_hba, tl_hba_wwn);
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 42b21f2ac8b0..b61b79ac98ff 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -2006,10 +2006,8 @@ static void sbp_pre_unlink_lun(
 		pr_err("unlink LUN: failed to update unit directory\n");
 }
 
-static struct se_portal_group *sbp_make_tpg(
-		struct se_wwn *wwn,
-		struct config_group *group,
-		const char *name)
+static struct se_portal_group *sbp_make_tpg(struct se_wwn *wwn,
+					    const char *name)
 {
 	struct sbp_tport *tport =
 		container_of(wwn, struct sbp_tport, tport_wwn);
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index e1416b007aa4..1fa436e865f9 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -841,7 +841,7 @@ static struct config_group *target_fabric_make_tpg(
 		return ERR_PTR(-ENOSYS);
 	}
 
-	se_tpg = tf->tf_ops->fabric_make_tpg(wwn, group, name);
+	se_tpg = tf->tf_ops->fabric_make_tpg(wwn, name);
 	if (!se_tpg || IS_ERR(se_tpg))
 		return ERR_PTR(-EINVAL);
 
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
index 42ee91123dca..e55c4d537592 100644
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -223,10 +223,7 @@ static int ft_init_nodeacl(struct se_node_acl *nacl, const char *name)
 /*
  * local_port port_group (tpg) ops.
  */
-static struct se_portal_group *ft_add_tpg(
-	struct se_wwn *wwn,
-	struct config_group *group,
-	const char *name)
+static struct se_portal_group *ft_add_tpg(struct se_wwn *wwn, const char *name)
 {
 	struct ft_lport_wwn *ft_wwn;
 	struct ft_tpg *tpg;
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 5003e857dce7..4f183176b0b4 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1344,10 +1344,8 @@ static int usbg_init_nodeacl(struct se_node_acl *se_nacl, const char *name)
 	return 0;
 }
 
-static struct se_portal_group *usbg_make_tpg(
-	struct se_wwn *wwn,
-	struct config_group *group,
-	const char *name)
+static struct se_portal_group *usbg_make_tpg(struct se_wwn *wwn,
+					     const char *name)
 {
 	struct usbg_tport *tport = container_of(wwn, struct usbg_tport,
 			tport_wwn);
@@ -1380,7 +1378,7 @@ static struct se_portal_group *usbg_make_tpg(
 			goto unlock_dep;
 	} else {
 		ret = configfs_depend_item_unlocked(
-			group->cg_subsys,
+			wwn->wwn_group.cg_subsys,
 			&opts->func_inst.group.cg_item);
 		if (ret)
 			goto unlock_dep;
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index ebaf831285ea..c84a6edd4c25 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1912,9 +1912,7 @@ static struct configfs_attribute *vhost_scsi_tpg_attrs[] = {
 };
 
 static struct se_portal_group *
-vhost_scsi_make_tpg(struct se_wwn *wwn,
-		   struct config_group *group,
-		   const char *name)
+vhost_scsi_make_tpg(struct se_wwn *wwn, const char *name)
 {
 	struct vhost_scsi_tport *tport = container_of(wwn,
 			struct vhost_scsi_tport, tport_wwn);
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 764dd9aa0131..fd77ccfc7d6e 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -1732,9 +1732,7 @@ static void scsiback_port_unlink(struct se_portal_group *se_tpg,
 }
 
 static struct se_portal_group *
-scsiback_make_tpg(struct se_wwn *wwn,
-		   struct config_group *group,
-		   const char *name)
+scsiback_make_tpg(struct se_wwn *wwn, const char *name)
 {
 	struct scsiback_tport *tport = container_of(wwn,
 			struct scsiback_tport, tport_wwn);
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 84a681b6e5ca..f61aa716cfe1 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -79,7 +79,7 @@ struct target_core_fabric_ops {
 	void (*fabric_drop_wwn)(struct se_wwn *);
 	void (*add_wwn_groups)(struct se_wwn *);
 	struct se_portal_group *(*fabric_make_tpg)(struct se_wwn *,
-				struct config_group *, const char *);
+						   const char *);
 	void (*fabric_drop_tpg)(struct se_portal_group *);
 	int (*fabric_post_link)(struct se_portal_group *,
 				struct se_lun *);
-- 
cgit v1.2.3


From e936a38ac92dd40867ac3b52cfd8f3f70fe717a5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 22 Jun 2018 14:53:07 -0700
Subject: scsi: target: Remove se_dev_entry.ua_count

se_dev_entry.ua_count is only used to check whether or not
se_dev_entry.ua_list is empty. Use list_empty_careful() instead.  Checking
whether or not ua_list is empty without holding the lock that protects that
list is fine because the code that dequeues from that list will check again
whether or not that list is empty.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_device.c |  1 -
 drivers/target/target_core_ua.c     | 12 ++----------
 include/target/target_core_base.h   |  1 -
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index e5c90afb40e6..73675eec740d 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -336,7 +336,6 @@ int core_enable_device_list_for_node(
 		return -ENOMEM;
 	}
 
-	atomic_set(&new->ua_count, 0);
 	spin_lock_init(&new->ua_lock);
 	INIT_LIST_HEAD(&new->ua_list);
 	INIT_LIST_HEAD(&new->lun_link);
diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c
index 9399b38339f9..c8ac242ce888 100644
--- a/drivers/target/target_core_ua.c
+++ b/drivers/target/target_core_ua.c
@@ -55,7 +55,7 @@ target_scsi3_ua_check(struct se_cmd *cmd)
 		rcu_read_unlock();
 		return 0;
 	}
-	if (!atomic_read(&deve->ua_count)) {
+	if (list_empty_careful(&deve->ua_list)) {
 		rcu_read_unlock();
 		return 0;
 	}
@@ -154,7 +154,6 @@ int core_scsi3_ua_allocate(
 				&deve->ua_list);
 		spin_unlock(&deve->ua_lock);
 
-		atomic_inc_mb(&deve->ua_count);
 		return 0;
 	}
 	list_add_tail(&ua->ua_nacl_list, &deve->ua_list);
@@ -164,7 +163,6 @@ int core_scsi3_ua_allocate(
 		" 0x%02x, ASCQ: 0x%02x\n", deve->mapped_lun,
 		asc, ascq);
 
-	atomic_inc_mb(&deve->ua_count);
 	return 0;
 }
 
@@ -196,8 +194,6 @@ void core_scsi3_ua_release_all(
 	list_for_each_entry_safe(ua, ua_p, &deve->ua_list, ua_nacl_list) {
 		list_del(&ua->ua_nacl_list);
 		kmem_cache_free(se_ua_cache, ua);
-
-		atomic_dec_mb(&deve->ua_count);
 	}
 	spin_unlock(&deve->ua_lock);
 }
@@ -263,8 +259,6 @@ bool core_scsi3_ua_for_check_condition(struct se_cmd *cmd, u8 *key, u8 *asc,
 		}
 		list_del(&ua->ua_nacl_list);
 		kmem_cache_free(se_ua_cache, ua);
-
-		atomic_dec_mb(&deve->ua_count);
 	}
 	spin_unlock(&deve->ua_lock);
 	rcu_read_unlock();
@@ -304,7 +298,7 @@ int core_scsi3_ua_clear_for_request_sense(
 		rcu_read_unlock();
 		return -EINVAL;
 	}
-	if (!atomic_read(&deve->ua_count)) {
+	if (list_empty_careful(&deve->ua_list)) {
 		rcu_read_unlock();
 		return -EPERM;
 	}
@@ -327,8 +321,6 @@ int core_scsi3_ua_clear_for_request_sense(
 		}
 		list_del(&ua->ua_nacl_list);
 		kmem_cache_free(se_ua_cache, ua);
-
-		atomic_dec_mb(&deve->ua_count);
 	}
 	spin_unlock(&deve->ua_lock);
 	rcu_read_unlock();
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index ca59e065c1fd..7a4ee7852ca4 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -638,7 +638,6 @@ struct se_dev_entry {
 	atomic_long_t		total_cmds;
 	atomic_long_t		read_bytes;
 	atomic_long_t		write_bytes;
-	atomic_t		ua_count;
 	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
 	struct kref		pr_kref;
 	struct completion	pr_comp;
-- 
cgit v1.2.3


From a9744f7ca200c756e6f8c65b633770a2da711651 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 29 Jun 2018 09:48:20 +0200
Subject: xsk: fix potential race in SKB TX completion code

There is a potential race in the TX completion code for the SKB
case. One process enters the sendmsg code of an AF_XDP socket in order
to send a frame. The execution eventually trickles down to the driver
that is told to send the packet. However, it decides to drop the
packet due to some error condition (e.g., rings full) and frees the
SKB. This will trigger the SKB destructor and a completion will be
sent to the AF_XDP user space through its
single-producer/single-consumer queues.

At the same time a TX interrupt has fired on another core and it
dispatches the TX completion code in the driver. It does its HW
specific things and ends up freeing the SKB associated with the
transmitted packet. This will trigger the SKB destructor and a
completion will be sent to the AF_XDP user space through its
single-producer/single-consumer queues. With a pseudo call stack, it
would look like this:

Core 1:
sendmsg() being called in the application
  netdev_start_xmit()
    Driver entered through ndo_start_xmit
      Driver decides to free the SKB for some reason (e.g., rings full)
        Destructor of SKB called
          xskq_produce_addr() is called to signal completion to user space

Core 2:
TX completion irq
  NAPI loop
    Driver irq handler for TX completions
      Frees the SKB
        Destructor of SKB called
          xskq_produce_addr() is called to signal completion to user space

We now have a violation of the single-producer/single-consumer
principle for our queues as there are two threads trying to produce at
the same time on the same queue.

Fixed by introducing a spin_lock in the destructor. In regards to the
performance, I get around 1.74 Mpps for txonly before and after the
introduction of the spinlock. There is of course some impact due to
the spin lock but it is in the less significant digits that are too
noisy for me to measure. But let us say that the version without the
spin lock got 1.745 Mpps in the best case and the version with 1.735
Mpps in the worst case, then that would mean a maximum drop in
performance of 0.5%.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp_sock.h | 4 ++++
 net/xdp/xsk.c          | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 9fe472f2ac95..7161856bcf9c 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -60,6 +60,10 @@ struct xdp_sock {
 	bool zc;
 	/* Protects multiple processes in the control path */
 	struct mutex mutex;
+	/* Mutual exclusion of NAPI TX thread and sendmsg error paths
+	 * in the SKB destructor callback.
+	 */
+	spinlock_t tx_completion_lock;
 	u64 rx_dropped;
 };
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 15aca73805fc..7d220cbd09b6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb)
 {
 	u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
 	struct xdp_sock *xs = xdp_sk(skb->sk);
+	unsigned long flags;
 
+	spin_lock_irqsave(&xs->tx_completion_lock, flags);
 	WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
+	spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
 
 	sock_wfree(skb);
 }
@@ -755,6 +758,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 
 	xs = xdp_sk(sk);
 	mutex_init(&xs->mutex);
+	spin_lock_init(&xs->tx_completion_lock);
 
 	local_bh_disable();
 	sock_prot_inuse_add(net, &xsk_proto, 1);
-- 
cgit v1.2.3


From dc8fbeb0ffde1f2395449006019e2c89c177df50 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Fri, 22 Jun 2018 00:41:26 +0200
Subject: ARM: OMAP1: Get rid of <mach/ams-delta-fiq.h>

Split the header file into two parts and move them to directories where
they belong.

Information on internal structure of FIQ buffer is moved to
<linux/platform_data/ams-delta-fiq.h> for ams-delta-serio driver use.

Other information used by ams-delta board init file and FIQ code is
made local to mach-omap1 root directory.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 MAINTAINERS                                      |  1 +
 arch/arm/mach-omap1/ams-delta-fiq-handler.S      |  5 +-
 arch/arm/mach-omap1/ams-delta-fiq.c              |  7 ++-
 arch/arm/mach-omap1/ams-delta-fiq.h              | 41 ++++++++++++
 arch/arm/mach-omap1/board-ams-delta.c            |  2 +-
 arch/arm/mach-omap1/include/mach/ams-delta-fiq.h | 79 ------------------------
 drivers/input/serio/ams_delta_serio.c            |  3 +-
 include/linux/platform_data/ams-delta-fiq.h      | 62 +++++++++++++++++++
 8 files changed, 113 insertions(+), 87 deletions(-)
 create mode 100644 arch/arm/mach-omap1/ams-delta-fiq.h
 delete mode 100644 arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
 create mode 100644 include/linux/platform_data/ams-delta-fiq.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9d5eeff51b5f..c78b7dc42a81 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10391,6 +10391,7 @@ F:	arch/arm/plat-omap/
 F:	arch/arm/configs/omap1_defconfig
 F:	drivers/i2c/busses/i2c-omap.c
 F:	include/linux/platform_data/i2c-omap.h
+F:	include/linux/platform_data/ams-delta-fiq.h
 
 OMAP2+ SUPPORT
 M:	Tony Lindgren <tony@atomide.com>
diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
index bf608441b357..ddc27638ba2a 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S
+++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
@@ -14,11 +14,12 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/assembler.h>
+#include <linux/platform_data/ams-delta-fiq.h>
 
+#include <asm/assembler.h>
 #include <mach/board-ams-delta.h>
-#include <mach/ams-delta-fiq.h>
 
+#include "ams-delta-fiq.h"
 #include "iomap.h"
 #include "soc.h"
 
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index 5a6c59ac9b5f..e72935034d42 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -19,12 +19,13 @@
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/platform_data/ams-delta-fiq.h>
 
 #include <mach/board-ams-delta.h>
 
 #include <asm/fiq.h>
 
-#include <mach/ams-delta-fiq.h>
+#include "ams-delta-fiq.h"
 
 static struct fiq_handler fh = {
 	.name	= "ams-delta-fiq"
@@ -35,8 +36,8 @@ static struct fiq_handler fh = {
  * The FIQ and IRQ isrs can both read and write it.
  * It is structured as a header section several 32bit slots,
  * followed by the circular buffer where the FIQ isr stores
- * keystrokes received from the qwerty keyboard.
- * See ams-delta-fiq.h for details of offsets.
+ * keystrokes received from the qwerty keyboard.  See
+ * <linux/platform_data/ams-delta-fiq.h> for details of offsets.
  */
 unsigned int fiq_buffer[1024];
 EXPORT_SYMBOL(fiq_buffer);
diff --git a/arch/arm/mach-omap1/ams-delta-fiq.h b/arch/arm/mach-omap1/ams-delta-fiq.h
new file mode 100644
index 000000000000..3f691d68aa62
--- /dev/null
+++ b/arch/arm/mach-omap1/ams-delta-fiq.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * arch/arm/mach-omap1/ams-delta-fiq.h
+ *
+ * Taken from the original Amstrad modifications to fiq.h
+ *
+ * Copyright (c) 2004 Amstrad Plc
+ * Copyright (c) 2006 Matt Callow
+ * Copyright (c) 2010 Janusz Krzysztofik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __AMS_DELTA_FIQ_H
+#define __AMS_DELTA_FIQ_H
+
+#include <mach/irqs.h>
+
+/*
+ * Interrupt number used for passing control from FIQ to IRQ.
+ * IRQ12, described as reserved, has been selected.
+ */
+#define INT_DEFERRED_FIQ	INT_1510_RES12
+/*
+ * Base address of an interrupt handler that the INT_DEFERRED_FIQ belongs to.
+ */
+#if (INT_DEFERRED_FIQ < IH2_BASE)
+#define DEFERRED_FIQ_IH_BASE	OMAP_IH1_BASE
+#else
+#define DEFERRED_FIQ_IH_BASE	OMAP_IH2_BASE
+#endif
+
+#ifndef __ASSEMBLER__
+extern unsigned char qwerty_fiqin_start, qwerty_fiqin_end;
+
+extern void __init ams_delta_init_fiq(struct gpio_chip *chip);
+#endif
+
+#endif
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index e937372e704a..902c24cc8508 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -41,10 +41,10 @@
 #include <mach/mux.h>
 
 #include <mach/hardware.h>
-#include <mach/ams-delta-fiq.h>
 #include "camera.h"
 #include <mach/usb.h>
 
+#include "ams-delta-fiq.h"
 #include "iomap.h"
 #include "common.h"
 
diff --git a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h b/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
deleted file mode 100644
index a9769ff396bc..000000000000
--- a/arch/arm/mach-omap1/include/mach/ams-delta-fiq.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * arch/arm/mach-omap1/include/ams-delta-fiq.h
- *
- * Taken from the original Amstrad modifications to fiq.h
- *
- * Copyright (c) 2004 Amstrad Plc
- * Copyright (c) 2006 Matt Callow
- * Copyright (c) 2010 Janusz Krzysztofik
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __AMS_DELTA_FIQ_H
-#define __AMS_DELTA_FIQ_H
-
-#include <mach/irqs.h>
-
-/*
- * Interrupt number used for passing control from FIQ to IRQ.
- * IRQ12, described as reserved, has been selected.
- */
-#define INT_DEFERRED_FIQ	INT_1510_RES12
-/*
- * Base address of an interrupt handler that the INT_DEFERRED_FIQ belongs to.
- */
-#if (INT_DEFERRED_FIQ < IH2_BASE)
-#define DEFERRED_FIQ_IH_BASE	OMAP_IH1_BASE
-#else
-#define DEFERRED_FIQ_IH_BASE	OMAP_IH2_BASE
-#endif
-
-/*
- * These are the offsets from the beginning of the fiq_buffer. They are put here
- * since the buffer and header need to be accessed by drivers servicing devices
- * which generate GPIO interrupts - e.g. keyboard, modem, hook switch.
- */
-#define FIQ_MASK		 0
-#define FIQ_STATE		 1
-#define FIQ_KEYS_CNT		 2
-#define FIQ_TAIL_OFFSET		 3
-#define FIQ_HEAD_OFFSET		 4
-#define FIQ_BUF_LEN		 5
-#define FIQ_KEY			 6
-#define FIQ_MISSED_KEYS		 7
-#define FIQ_BUFFER_START	 8
-#define FIQ_GPIO_INT_MASK	 9
-#define FIQ_KEYS_HICNT		10
-#define FIQ_IRQ_PEND		11
-#define FIQ_SIR_CODE_L1		12
-#define IRQ_SIR_CODE_L2		13
-
-#define FIQ_CNT_INT_00		14
-#define FIQ_CNT_INT_KEY		15
-#define FIQ_CNT_INT_MDM		16
-#define FIQ_CNT_INT_03		17
-#define FIQ_CNT_INT_HSW		18
-#define FIQ_CNT_INT_05		19
-#define FIQ_CNT_INT_06		20
-#define FIQ_CNT_INT_07		21
-#define FIQ_CNT_INT_08		22
-#define FIQ_CNT_INT_09		23
-#define FIQ_CNT_INT_10		24
-#define FIQ_CNT_INT_11		25
-#define FIQ_CNT_INT_12		26
-#define FIQ_CNT_INT_13		27
-#define FIQ_CNT_INT_14		28
-#define FIQ_CNT_INT_15		29
-
-#define FIQ_CIRC_BUFF		30      /*Start of circular buffer */
-
-#ifndef __ASSEMBLER__
-extern unsigned int fiq_buffer[];
-extern unsigned char qwerty_fiqin_start, qwerty_fiqin_end;
-
-extern void __init ams_delta_init_fiq(struct gpio_chip *chip);
-#endif
-
-#endif
diff --git a/drivers/input/serio/ams_delta_serio.c b/drivers/input/serio/ams_delta_serio.c
index 7952a29f9540..2602f7cff5ae 100644
--- a/drivers/input/serio/ams_delta_serio.c
+++ b/drivers/input/serio/ams_delta_serio.c
@@ -22,6 +22,7 @@
  */
 #include <linux/gpio.h>
 #include <linux/irq.h>
+#include <linux/platform_data/ams-delta-fiq.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/serio.h>
@@ -30,8 +31,6 @@
 
 #include <mach/board-ams-delta.h>
 
-#include <mach/ams-delta-fiq.h>
-
 #define DRIVER_NAME	"ams-delta-serio"
 
 MODULE_AUTHOR("Matt Callow");
diff --git a/include/linux/platform_data/ams-delta-fiq.h b/include/linux/platform_data/ams-delta-fiq.h
new file mode 100644
index 000000000000..dc0f835ea918
--- /dev/null
+++ b/include/linux/platform_data/ams-delta-fiq.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * include/linux/platform_data/ams-delta-fiq.h
+ *
+ * Taken from the original Amstrad modifications to fiq.h
+ *
+ * Copyright (c) 2004 Amstrad Plc
+ * Copyright (c) 2006 Matt Callow
+ * Copyright (c) 2010 Janusz Krzysztofik
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_PLATFORM_DATA_AMS_DELTA_FIQ_H
+#define __LINUX_PLATFORM_DATA_AMS_DELTA_FIQ_H
+
+/*
+ * These are the offsets from the beginning of the fiq_buffer. They are put here
+ * since the buffer and header need to be accessed by drivers servicing devices
+ * which generate GPIO interrupts - e.g. keyboard, modem, hook switch.
+ */
+#define FIQ_MASK		 0
+#define FIQ_STATE		 1
+#define FIQ_KEYS_CNT		 2
+#define FIQ_TAIL_OFFSET		 3
+#define FIQ_HEAD_OFFSET		 4
+#define FIQ_BUF_LEN		 5
+#define FIQ_KEY			 6
+#define FIQ_MISSED_KEYS		 7
+#define FIQ_BUFFER_START	 8
+#define FIQ_GPIO_INT_MASK	 9
+#define FIQ_KEYS_HICNT		10
+#define FIQ_IRQ_PEND		11
+#define FIQ_SIR_CODE_L1		12
+#define IRQ_SIR_CODE_L2		13
+
+#define FIQ_CNT_INT_00		14
+#define FIQ_CNT_INT_KEY		15
+#define FIQ_CNT_INT_MDM		16
+#define FIQ_CNT_INT_03		17
+#define FIQ_CNT_INT_HSW		18
+#define FIQ_CNT_INT_05		19
+#define FIQ_CNT_INT_06		20
+#define FIQ_CNT_INT_07		21
+#define FIQ_CNT_INT_08		22
+#define FIQ_CNT_INT_09		23
+#define FIQ_CNT_INT_10		24
+#define FIQ_CNT_INT_11		25
+#define FIQ_CNT_INT_12		26
+#define FIQ_CNT_INT_13		27
+#define FIQ_CNT_INT_14		28
+#define FIQ_CNT_INT_15		29
+
+#define FIQ_CIRC_BUFF		30      /*Start of circular buffer */
+
+#ifndef __ASSEMBLER__
+extern unsigned int fiq_buffer[];
+#endif
+
+#endif
-- 
cgit v1.2.3


From 5f73861fae087df19f7337620da65c99e4260c72 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Fri, 22 Jun 2018 00:41:28 +0200
Subject: Input: ams_delta_serio: Get FIQ buffer from platform_data

Instead of exporting the FIQ buffer symbol to be used in
ams-delta-serio driver, pass it to the driver as platform_data.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/ams-delta-fiq.c         |  6 +++---
 arch/arm/mach-omap1/board-ams-delta.c       |  8 ++++++++
 drivers/input/serio/ams_delta_serio.c       | 20 +++++++++++++-------
 include/linux/platform_data/ams-delta-fiq.h |  4 ----
 4 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c
index 82ca4246a5e4..b0dc7ddf5877 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq.c
+++ b/arch/arm/mach-omap1/ams-delta-fiq.c
@@ -40,8 +40,7 @@ static struct fiq_handler fh = {
  * keystrokes received from the qwerty keyboard.  See
  * <linux/platform_data/ams-delta-fiq.h> for details of offsets.
  */
-unsigned int fiq_buffer[1024];
-EXPORT_SYMBOL(fiq_buffer);
+static unsigned int fiq_buffer[1024];
 
 static struct irq_chip *irq_chip;
 static struct irq_data *irq_data[16];
@@ -203,9 +202,10 @@ void __init ams_delta_init_fiq(struct gpio_chip *chip,
 	val = omap_readl(OMAP_IH1_BASE + offset) | 1;
 	omap_writel(val, OMAP_IH1_BASE + offset);
 
-	/* Initialize serio device IRQ resource */
+	/* Initialize serio device IRQ resource and platform_data */
 	serio->resource[0].start = gpiod_to_irq(clk);
 	serio->resource[0].end = serio->resource[0].start;
+	serio->dev.platform_data = fiq_buffer;
 
 	/*
 	 * Since FIQ handler performs handling of GPIO registers for
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index f453b08ed7ef..22f9be297c2a 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -537,6 +537,14 @@ static struct resource ams_delta_serio_resources[] = {
 static struct platform_device ams_delta_serio_device = {
 	.name		= "ams-delta-serio",
 	.id		= PLATFORM_DEVID_NONE,
+	.dev		= {
+		/*
+		 * Initialize .platform_data explicitly with NULL to
+		 * indicate it is going to be used.  It will be replaced
+		 * with FIQ buffer address as soon as FIQ is initialized.
+		 */
+		.platform_data = NULL,
+	},
 	.num_resources	= ARRAY_SIZE(ams_delta_serio_resources),
 	.resource	= ams_delta_serio_resources,
 };
diff --git a/drivers/input/serio/ams_delta_serio.c b/drivers/input/serio/ams_delta_serio.c
index c1f8226f172e..f8663d7891f2 100644
--- a/drivers/input/serio/ams_delta_serio.c
+++ b/drivers/input/serio/ams_delta_serio.c
@@ -37,6 +37,7 @@ MODULE_LICENSE("GPL");
 struct ams_delta_serio {
 	struct serio *serio;
 	struct regulator *vcc;
+	unsigned int *fiq_buffer;
 };
 
 static int check_data(struct serio *serio, int data)
@@ -66,22 +67,23 @@ static int check_data(struct serio *serio, int data)
 static irqreturn_t ams_delta_serio_interrupt(int irq, void *dev_id)
 {
 	struct ams_delta_serio *priv = dev_id;
-	int *circ_buff = &fiq_buffer[FIQ_CIRC_BUFF];
+	int *circ_buff = &priv->fiq_buffer[FIQ_CIRC_BUFF];
 	int data, dfl;
 	u8 scancode;
 
-	fiq_buffer[FIQ_IRQ_PEND] = 0;
+	priv->fiq_buffer[FIQ_IRQ_PEND] = 0;
 
 	/*
 	 * Read data from the circular buffer, check it
 	 * and then pass it on the serio
 	 */
-	while (fiq_buffer[FIQ_KEYS_CNT] > 0) {
+	while (priv->fiq_buffer[FIQ_KEYS_CNT] > 0) {
 
-		data = circ_buff[fiq_buffer[FIQ_HEAD_OFFSET]++];
-		fiq_buffer[FIQ_KEYS_CNT]--;
-		if (fiq_buffer[FIQ_HEAD_OFFSET] == fiq_buffer[FIQ_BUF_LEN])
-			fiq_buffer[FIQ_HEAD_OFFSET] = 0;
+		data = circ_buff[priv->fiq_buffer[FIQ_HEAD_OFFSET]++];
+		priv->fiq_buffer[FIQ_KEYS_CNT]--;
+		if (priv->fiq_buffer[FIQ_HEAD_OFFSET] ==
+		    priv->fiq_buffer[FIQ_BUF_LEN])
+			priv->fiq_buffer[FIQ_HEAD_OFFSET] = 0;
 
 		dfl = check_data(priv->serio, data);
 		scancode = (u8) (data >> 1) & 0xFF;
@@ -116,6 +118,10 @@ static int ams_delta_serio_init(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
+	priv->fiq_buffer = pdev->dev.platform_data;
+	if (!priv->fiq_buffer)
+		return -EINVAL;
+
 	priv->vcc = devm_regulator_get(&pdev->dev, "vcc");
 	if (IS_ERR(priv->vcc)) {
 		err = PTR_ERR(priv->vcc);
diff --git a/include/linux/platform_data/ams-delta-fiq.h b/include/linux/platform_data/ams-delta-fiq.h
index dc0f835ea918..cf4589ccb720 100644
--- a/include/linux/platform_data/ams-delta-fiq.h
+++ b/include/linux/platform_data/ams-delta-fiq.h
@@ -55,8 +55,4 @@
 
 #define FIQ_CIRC_BUFF		30      /*Start of circular buffer */
 
-#ifndef __ASSEMBLER__
-extern unsigned int fiq_buffer[];
-#endif
-
 #endif
-- 
cgit v1.2.3


From d082852f40de5cf55a7a689bf582fced39f5443e Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Fri, 22 Jun 2018 13:32:48 +0800
Subject: ARM: imx: enable bus auto clock gating function for i.mx6sll

i.MX6SLL has HW bus auto clock gating function, enable
it by default to save VDD_SOC_IN power, about 5% ~ 20%
saved depends on different use cases.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/pm-imx6.c                 | 11 +++++++++--
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h |  3 +++
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c
index 791e1fda248e..b08e407d8d96 100644
--- a/arch/arm/mach-imx/pm-imx6.c
+++ b/arch/arm/mach-imx/pm-imx6.c
@@ -648,10 +648,17 @@ void __init imx6dl_pm_init(void)
 
 void __init imx6sl_pm_init(void)
 {
-	if (cpu_is_imx6sl())
+	struct regmap *gpr;
+
+	if (cpu_is_imx6sl()) {
 		imx6_pm_common_init(&imx6sl_pm_data);
-	else
+	} else {
 		imx6_pm_common_init(&imx6sll_pm_data);
+		gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
+		if (!IS_ERR(gpr))
+			regmap_update_bits(gpr, IOMUXC_GPR5,
+				IMX6SLL_GPR5_AFCG_X_BYPASS_MASK, 0);
+	}
 }
 
 void __init imx6sx_pm_init(void)
diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index e06f5f79eaef..6c1ad160ed87 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -457,4 +457,7 @@
 #define MCLK_DIR(x) (x == 1 ? IMX6UL_GPR1_SAI1_MCLK_DIR : x == 2 ? \
 		     IMX6UL_GPR1_SAI2_MCLK_DIR : IMX6UL_GPR1_SAI3_MCLK_DIR)
 
+/* For imx6sll iomux gpr register field define */
+#define IMX6SLL_GPR5_AFCG_X_BYPASS_MASK		(0x1f << 11)
+
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
-- 
cgit v1.2.3


From 1cef1150ef40ec52f507436a14230cbc2623299c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jun 2018 11:45:49 +0200
Subject: kthread, sched/core: Fix kthread_parkme() (again...)

Gaurav reports that commit:

  85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue")

isn't working for him. Because of the following race:

> controller Thread                               CPUHP Thread
> takedown_cpu
> kthread_park
> kthread_parkme
> Set KTHREAD_SHOULD_PARK
>                                                 smpboot_thread_fn
>                                                 set Task interruptible
>
>
> wake_up_process
>  if (!(p->state & state))
>                 goto out;
>
>                                                 Kthread_parkme
>                                                 SET TASK_PARKED
>                                                 schedule
>                                                 raw_spin_lock(&rq->lock)
> ttwu_remote
> waiting for __task_rq_lock
>                                                 context_switch
>
>                                                 finish_lock_switch
>
>
>
>                                                 Case TASK_PARKED
>                                                 kthread_park_complete
>
>
> SET Running

Furthermore, Oleg noticed that the whole scheduler TASK_PARKED
handling is buggered because the TASK_DEAD thing is done with
preemption disabled, the current code can still complete early on
preemption :/

So basically revert that earlier fix and go with a variant of the
alternative mentioned in the commit. Promote TASK_PARKED to special
state to avoid the store-store issue on task->state leading to the
WARN in kthread_unpark() -> __kthread_bind().

But in addition, add wait_task_inactive() to kthread_park() to ensure
the task really is PARKED when we return from kthread_park(). This
avoids the whole kthread still gets migrated nonsense -- although it
would be really good to get this done differently.

Reported-by: Gaurav Kohli <gkohli@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kthread.h |  1 -
 include/linux/sched.h   |  2 +-
 kernel/kthread.c        | 30 ++++++++++++++++++++++++------
 kernel/sched/core.c     | 31 +++++++++++--------------------
 4 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2803264c512f..c1961761311d 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
-void kthread_park_complete(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9256118bd40c..43731fe51c97 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,7 +118,7 @@ struct task_group;
  * the comment with set_special_state().
  */
 #define is_special_task_state(state)				\
-	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
 
 #define __set_current_state(state_value)			\
 	do {							\
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 481951bf091d..750cb8082694 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task)
 static void __kthread_parkme(struct kthread *self)
 {
 	for (;;) {
-		set_current_state(TASK_PARKED);
+		/*
+		 * TASK_PARKED is a special state; we must serialize against
+		 * possible pending wakeups to avoid store-store collisions on
+		 * task->state.
+		 *
+		 * Such a collision might possibly result in the task state
+		 * changin from TASK_PARKED and us failing the
+		 * wait_task_inactive() in kthread_park().
+		 */
+		set_special_state(TASK_PARKED);
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
+
+		complete_all(&self->parked);
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -191,11 +202,6 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
-void kthread_park_complete(struct task_struct *k)
-{
-	complete_all(&to_kthread(k)->parked);
-}
-
 static int kthread(void *_create)
 {
 	/* Copy data: it's on kthread's stack */
@@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k)
 
 	reinit_completion(&kthread->parked);
 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+	/*
+	 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
+	 */
 	wake_up_state(k, TASK_PARKED);
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k)
 	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
 	if (k != current) {
 		wake_up_process(k);
+		/*
+		 * Wait for __kthread_parkme() to complete(), this means we
+		 * _will_ have TASK_PARKED and are about to call schedule().
+		 */
 		wait_for_completion(&kthread->parked);
+		/*
+		 * Now wait for that schedule() to complete and the task to
+		 * get scheduled out.
+		 */
+		WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
 	}
 
 	return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 22fce36426c0..fe365c9a08e9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,7 +7,6 @@
  */
 #include "sched.h"
 
-#include <linux/kthread.h>
 #include <linux/nospec.h>
 
 #include <linux/kcov.h>
@@ -2724,28 +2723,20 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		membarrier_mm_sync_core_before_usermode(mm);
 		mmdrop(mm);
 	}
-	if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
-		switch (prev_state) {
-		case TASK_DEAD:
-			if (prev->sched_class->task_dead)
-				prev->sched_class->task_dead(prev);
+	if (unlikely(prev_state == TASK_DEAD)) {
+		if (prev->sched_class->task_dead)
+			prev->sched_class->task_dead(prev);
 
-			/*
-			 * Remove function-return probe instances associated with this
-			 * task and put them back on the free list.
-			 */
-			kprobe_flush_task(prev);
-
-			/* Task is done with its stack. */
-			put_task_stack(prev);
+		/*
+		 * Remove function-return probe instances associated with this
+		 * task and put them back on the free list.
+		 */
+		kprobe_flush_task(prev);
 
-			put_task_struct(prev);
-			break;
+		/* Task is done with its stack. */
+		put_task_stack(prev);
 
-		case TASK_PARKED:
-			kthread_park_complete(prev);
-			break;
-		}
+		put_task_struct(prev);
 	}
 
 	tick_nohz_task_switch();
-- 
cgit v1.2.3


From 9cf57731b63e37ed995b46690adc604891a9a28f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jun 2018 10:52:03 +0200
Subject: watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work

Oleg suggested to replace the "watchdog/%u" threads with
cpu_stop_work. That removes one thread per CPU while at the same time
fixes softlockup vs SCHED_DEADLINE.

But more importantly, it does away with the single
smpboot_update_cpumask_percpu_thread() user, which allows
cleanups/shrinkage of the smpboot interface.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuhotplug.h |   1 +
 include/linux/nmi.h        |   5 ++
 kernel/cpu.c               |   5 ++
 kernel/watchdog.c          | 137 ++++++++++++++++++++-------------------------
 4 files changed, 71 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..4cf06a64bc02 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -164,6 +164,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+	CPUHP_AP_WATCHDOG_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b8d868d23e79..80664bbeca43 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -33,10 +33,15 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 #define sysctl_hardlockup_all_cpu_backtrace 0
 #endif /* !CONFIG_SMP */
 
+extern int lockup_detector_online_cpu(unsigned int cpu);
+extern int lockup_detector_offline_cpu(unsigned int cpu);
+
 #else /* CONFIG_LOCKUP_DETECTOR */
 static inline void lockup_detector_init(void) { }
 static inline void lockup_detector_soft_poweroff(void) { }
 static inline void lockup_detector_cleanup(void) { }
+#define lockup_detector_online_cpu	NULL
+#define lockup_detector_offline_cpu	NULL
 #endif /* !CONFIG_LOCKUP_DETECTOR */
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0db8938fbb23..191097c45fb1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1344,6 +1344,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 		.startup.single		= perf_event_init_cpu,
 		.teardown.single	= perf_event_exit_cpu,
 	},
+	[CPUHP_AP_WATCHDOG_ONLINE] = {
+		.name			= "lockup_detector:online",
+		.startup.single		= lockup_detector_online_cpu,
+		.teardown.single	= lockup_detector_offline_cpu,
+	},
 	[CPUHP_AP_WORKQUEUE_ONLINE] = {
 		.name			= "workqueue:online",
 		.startup.single		= workqueue_online_cpu,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 576d18045811..b81f777838d5 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -18,18 +18,14 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
-#include <linux/smpboot.h>
-#include <linux/sched/rt.h>
-#include <uapi/linux/sched/types.h>
 #include <linux/tick.h>
-#include <linux/workqueue.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/isolation.h>
+#include <linux/stop_machine.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
-#include <linux/kthread.h>
 
 static DEFINE_MUTEX(watchdog_mutex);
 
@@ -169,11 +165,10 @@ static void lockup_detector_update_enable(void)
 unsigned int __read_mostly softlockup_panic =
 			CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
 
-static bool softlockup_threads_initialized __read_mostly;
+static bool softlockup_initialized __read_mostly;
 static u64 __read_mostly sample_period;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
-static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
 static DEFINE_PER_CPU(bool, soft_watchdog_warn);
@@ -335,6 +330,25 @@ static void watchdog_interrupt_count(void)
 	__this_cpu_inc(hrtimer_interrupts);
 }
 
+/*
+ * The watchdog thread function - touches the timestamp.
+ *
+ * It only runs once every sample_period seconds (4 seconds by
+ * default) to reset the softlockup timestamp. If this gets delayed
+ * for more than 2*watchdog_thresh seconds then the debug-printout
+ * triggers in watchdog_timer_fn().
+ */
+static int softlockup_fn(void *data)
+{
+	__this_cpu_write(soft_lockup_hrtimer_cnt,
+			 __this_cpu_read(hrtimer_interrupts));
+	__touch_watchdog();
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
+
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -350,7 +364,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	watchdog_interrupt_count();
 
 	/* kick the softlockup detector */
-	wake_up_process(__this_cpu_read(softlockup_watchdog));
+	stop_one_cpu_nowait(smp_processor_id(),
+			softlockup_fn, NULL,
+			this_cpu_ptr(&softlockup_stop_work));
 
 	/* .. and repeat */
 	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
@@ -448,17 +464,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	return HRTIMER_RESTART;
 }
 
-static void watchdog_set_prio(unsigned int policy, unsigned int prio)
-{
-	struct sched_param param = { .sched_priority = prio };
-
-	sched_setscheduler(current, policy, &param);
-}
-
 static void watchdog_enable(unsigned int cpu)
 {
 	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
 
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
 	/*
 	 * Start the timer first to prevent the NMI watchdog triggering
 	 * before the timer has a chance to fire.
@@ -473,15 +484,14 @@ static void watchdog_enable(unsigned int cpu)
 	/* Enable the perf event */
 	if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
 		watchdog_nmi_enable(cpu);
-
-	watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
 }
 
 static void watchdog_disable(unsigned int cpu)
 {
 	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
 
-	watchdog_set_prio(SCHED_NORMAL, 0);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
 	/*
 	 * Disable the perf event first. That prevents that a large delay
 	 * between disabling the timer and disabling the perf event causes
@@ -491,77 +501,63 @@ static void watchdog_disable(unsigned int cpu)
 	hrtimer_cancel(hrtimer);
 }
 
-static void watchdog_cleanup(unsigned int cpu, bool online)
+static int softlockup_stop_fn(void *data)
 {
-	watchdog_disable(cpu);
+	watchdog_disable(smp_processor_id());
+	return 0;
 }
 
-static int watchdog_should_run(unsigned int cpu)
+static void softlockup_stop_all(void)
 {
-	return __this_cpu_read(hrtimer_interrupts) !=
-		__this_cpu_read(soft_lockup_hrtimer_cnt);
+	int cpu;
+
+	if (!softlockup_initialized)
+		return;
+
+	for_each_cpu(cpu, &watchdog_allowed_mask)
+		smp_call_on_cpu(cpu, softlockup_stop_fn, NULL, false);
+
+	cpumask_clear(&watchdog_allowed_mask);
 }
 
-/*
- * The watchdog thread function - touches the timestamp.
- *
- * It only runs once every sample_period seconds (4 seconds by
- * default) to reset the softlockup timestamp. If this gets delayed
- * for more than 2*watchdog_thresh seconds then the debug-printout
- * triggers in watchdog_timer_fn().
- */
-static void watchdog(unsigned int cpu)
+static int softlockup_start_fn(void *data)
 {
-	__this_cpu_write(soft_lockup_hrtimer_cnt,
-			 __this_cpu_read(hrtimer_interrupts));
-	__touch_watchdog();
+	watchdog_enable(smp_processor_id());
+	return 0;
 }
 
-static struct smp_hotplug_thread watchdog_threads = {
-	.store			= &softlockup_watchdog,
-	.thread_should_run	= watchdog_should_run,
-	.thread_fn		= watchdog,
-	.thread_comm		= "watchdog/%u",
-	.setup			= watchdog_enable,
-	.cleanup		= watchdog_cleanup,
-	.park			= watchdog_disable,
-	.unpark			= watchdog_enable,
-};
-
-static void softlockup_update_smpboot_threads(void)
+static void softlockup_start_all(void)
 {
-	lockdep_assert_held(&watchdog_mutex);
-
-	if (!softlockup_threads_initialized)
-		return;
+	int cpu;
 
-	smpboot_update_cpumask_percpu_thread(&watchdog_threads,
-					     &watchdog_allowed_mask);
+	cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
+	for_each_cpu(cpu, &watchdog_allowed_mask)
+		smp_call_on_cpu(cpu, softlockup_start_fn, NULL, false);
 }
 
-/* Temporarily park all watchdog threads */
-static void softlockup_park_all_threads(void)
+int lockup_detector_online_cpu(unsigned int cpu)
 {
-	cpumask_clear(&watchdog_allowed_mask);
-	softlockup_update_smpboot_threads();
+	watchdog_enable(cpu);
+	return 0;
 }
 
-/* Unpark enabled threads */
-static void softlockup_unpark_threads(void)
+int lockup_detector_offline_cpu(unsigned int cpu)
 {
-	cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
-	softlockup_update_smpboot_threads();
+	watchdog_disable(cpu);
+	return 0;
 }
 
 static void lockup_detector_reconfigure(void)
 {
 	cpus_read_lock();
 	watchdog_nmi_stop();
-	softlockup_park_all_threads();
+
+	softlockup_stop_all();
 	set_sample_period();
 	lockup_detector_update_enable();
 	if (watchdog_enabled && watchdog_thresh)
-		softlockup_unpark_threads();
+		softlockup_start_all();
+
 	watchdog_nmi_start();
 	cpus_read_unlock();
 	/*
@@ -580,8 +576,6 @@ static void lockup_detector_reconfigure(void)
  */
 static __init void lockup_detector_setup(void)
 {
-	int ret;
-
 	/*
 	 * If sysctl is off and watchdog got disabled on the command line,
 	 * nothing to do here.
@@ -592,24 +586,13 @@ static __init void lockup_detector_setup(void)
 	    !(watchdog_enabled && watchdog_thresh))
 		return;
 
-	ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
-						     &watchdog_allowed_mask);
-	if (ret) {
-		pr_err("Failed to initialize soft lockup detector threads\n");
-		return;
-	}
-
 	mutex_lock(&watchdog_mutex);
-	softlockup_threads_initialized = true;
 	lockup_detector_reconfigure();
+	softlockup_initialized = true;
 	mutex_unlock(&watchdog_mutex);
 }
 
 #else /* CONFIG_SOFTLOCKUP_DETECTOR */
-static inline int watchdog_park_threads(void) { return 0; }
-static inline void watchdog_unpark_threads(void) { }
-static inline int watchdog_enable_all_cpus(void) { return 0; }
-static inline void watchdog_disable_all_cpus(void) { }
 static void lockup_detector_reconfigure(void)
 {
 	cpus_read_lock();
-- 
cgit v1.2.3


From 167a88677b05d6a810f23b871cfb2b5db1808e60 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jun 2018 10:53:01 +0200
Subject: smpboot: Remove cpumask from the API

Now that the sole use of the whole smpboot_*cpumask() API is gone,
remove it.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/smpboot.h | 15 +-------------
 kernel/smpboot.c        | 54 +++++--------------------------------------------
 2 files changed, 6 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index c174844cf663..d0884b525001 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -25,8 +25,6 @@ struct smpboot_thread_data;
  *			parked (cpu offline)
  * @unpark:		Optional unpark function, called when the thread is
  *			unparked (cpu online)
- * @cpumask:		Internal state.  To update which threads are unparked,
- *			call smpboot_update_cpumask_percpu_thread().
  * @selfparking:	Thread is not parked by the park function.
  * @thread_comm:	The base name of the thread
  */
@@ -40,23 +38,12 @@ struct smp_hotplug_thread {
 	void				(*cleanup)(unsigned int cpu, bool online);
 	void				(*park)(unsigned int cpu);
 	void				(*unpark)(unsigned int cpu);
-	cpumask_var_t			cpumask;
 	bool				selfparking;
 	const char			*thread_comm;
 };
 
-int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
-					   const struct cpumask *cpumask);
-
-static inline int
-smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
-{
-	return smpboot_register_percpu_thread_cpumask(plug_thread,
-						      cpu_possible_mask);
-}
+int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread);
 
 void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
-void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
-					  const struct cpumask *);
 
 #endif
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 5043e7433f4b..c230c2dd48e1 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -238,8 +238,7 @@ int smpboot_unpark_threads(unsigned int cpu)
 
 	mutex_lock(&smpboot_threads_lock);
 	list_for_each_entry(cur, &hotplug_threads, list)
-		if (cpumask_test_cpu(cpu, cur->cpumask))
-			smpboot_unpark_thread(cur, cpu);
+		smpboot_unpark_thread(cur, cpu);
 	mutex_unlock(&smpboot_threads_lock);
 	return 0;
 }
@@ -280,34 +279,26 @@ static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
 }
 
 /**
- * smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related
+ * smpboot_register_percpu_thread - Register a per_cpu thread related
  * 					    to hotplug
  * @plug_thread:	Hotplug thread descriptor
- * @cpumask:		The cpumask where threads run
  *
  * Creates and starts the threads on all online cpus.
  */
-int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,
-					   const struct cpumask *cpumask)
+int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
 {
 	unsigned int cpu;
 	int ret = 0;
 
-	if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL))
-		return -ENOMEM;
-	cpumask_copy(plug_thread->cpumask, cpumask);
-
 	get_online_cpus();
 	mutex_lock(&smpboot_threads_lock);
 	for_each_online_cpu(cpu) {
 		ret = __smpboot_create_thread(plug_thread, cpu);
 		if (ret) {
 			smpboot_destroy_threads(plug_thread);
-			free_cpumask_var(plug_thread->cpumask);
 			goto out;
 		}
-		if (cpumask_test_cpu(cpu, cpumask))
-			smpboot_unpark_thread(plug_thread, cpu);
+		smpboot_unpark_thread(plug_thread, cpu);
 	}
 	list_add(&plug_thread->list, &hotplug_threads);
 out:
@@ -315,7 +306,7 @@ out:
 	put_online_cpus();
 	return ret;
 }
-EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread_cpumask);
+EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread);
 
 /**
  * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug
@@ -331,44 +322,9 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
 	smpboot_destroy_threads(plug_thread);
 	mutex_unlock(&smpboot_threads_lock);
 	put_online_cpus();
-	free_cpumask_var(plug_thread->cpumask);
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
 
-/**
- * smpboot_update_cpumask_percpu_thread - Adjust which per_cpu hotplug threads stay parked
- * @plug_thread:	Hotplug thread descriptor
- * @new:		Revised mask to use
- *
- * The cpumask field in the smp_hotplug_thread must not be updated directly
- * by the client, but only by calling this function.
- * This function can only be called on a registered smp_hotplug_thread.
- */
-void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
-					  const struct cpumask *new)
-{
-	struct cpumask *old = plug_thread->cpumask;
-	static struct cpumask tmp;
-	unsigned int cpu;
-
-	lockdep_assert_cpus_held();
-	mutex_lock(&smpboot_threads_lock);
-
-	/* Park threads that were exclusively enabled on the old mask. */
-	cpumask_andnot(&tmp, old, new);
-	for_each_cpu_and(cpu, &tmp, cpu_online_mask)
-		smpboot_park_thread(plug_thread, cpu);
-
-	/* Unpark threads that are exclusively enabled on the new mask. */
-	cpumask_andnot(&tmp, new, old);
-	for_each_cpu_and(cpu, &tmp, cpu_online_mask)
-		smpboot_unpark_thread(plug_thread, cpu);
-
-	cpumask_copy(old, new);
-
-	mutex_unlock(&smpboot_threads_lock);
-}
-
 static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
 
 /*
-- 
cgit v1.2.3


From 6c0ad1df179b8d26ecd5e9954c36146fa87c1b0c Mon Sep 17 00:00:00 2001
From: Yixun Lan <yixun.lan@amlogic.com>
Date: Mon, 2 Jul 2018 21:31:17 +0000
Subject: clk: meson-axg: add pcie and mipi clock bindings

Add the pcie and mipi clock dt-bindings for the pcie driver.

Since the mipi clock isalso used by the pcie driver,
we add it together in this patch.

Tested-by: Jianxin Qin <jianxin.qin@amlogic.com>
Signed-off-by: Yixun Lan <yixun.lan@amlogic.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 include/dt-bindings/clock/axg-clkc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/axg-clkc.h b/include/dt-bindings/clock/axg-clkc.h
index 555937a25504..70371228b7e0 100644
--- a/include/dt-bindings/clock/axg-clkc.h
+++ b/include/dt-bindings/clock/axg-clkc.h
@@ -68,5 +68,8 @@
 #define CLKID_SD_EMMC_B_CLK0			59
 #define CLKID_SD_EMMC_C_CLK0			60
 #define CLKID_HIFI_PLL				69
+#define CLKID_PCIE_CML_EN0			79
+#define CLKID_PCIE_CML_EN1			80
+#define CLKID_MIPI_ENABLE			81
 
 #endif /* __AXG_CLKC_H */
-- 
cgit v1.2.3


From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:22 -0700
Subject: compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline
 declarations

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: sedat.dilek@gmail.com
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index fd282c7d3e5e..573f5a7d42d4 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -65,6 +65,18 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #endif
 
+/*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
 /*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
@@ -72,19 +84,22 @@
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline		__attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
-#define __inline __inline	__attribute__((always_inline,unused)) notrace
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline		__attribute__((unused)) notrace
-#define __inline__ __inline__	__attribute__((unused)) notrace
-#define __inline __inline	__attribute__((unused)) notrace
+#define inline inline		__attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 
-- 
cgit v1.2.3


From 71b38245acb05a38d2d861792bdd99cd9f6a0f78 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Tue, 5 Jun 2018 13:37:51 -0700
Subject: Drivers: hv: vmbus: Add comments on ring buffer signaling

Add comments describing intricacies of Hyper-V ring buffer
signaling code.  This information is not in Hyper-V public
documents, so include here to capture the knowledge for
future coders.

There are no code changes in this commit.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/ring_buffer.c | 65 ++++++++++++++++++++++++++++++++++++++++--------
 include/linux/hyperv.h   | 31 +++++++++++++++++------
 2 files changed, 77 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index be3c8b10b84a..3e90eb91db45 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -431,7 +431,24 @@ static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi,
 }
 
 /*
- * Update host ring buffer after iterating over packets.
+ * Update host ring buffer after iterating over packets. If the host has
+ * stopped queuing new entries because it found the ring buffer full, and
+ * sufficient space is being freed up, signal the host. But be careful to
+ * only signal the host when necessary, both for performance reasons and
+ * because Hyper-V protects itself by throttling guests that signal
+ * inappropriately.
+ *
+ * Determining when to signal is tricky. There are three key data inputs
+ * that must be handled in this order to avoid race conditions:
+ *
+ * 1. Update the read_index
+ * 2. Read the pending_send_sz
+ * 3. Read the current write_index
+ *
+ * The interrupt_mask is not used to determine when to signal. The
+ * interrupt_mask is used only on the guest->host ring buffer when
+ * sending requests to the host. The host does not use it on the host->
+ * guest ring buffer to indicate whether it should be signaled.
  */
 void hv_pkt_iter_close(struct vmbus_channel *channel)
 {
@@ -447,22 +464,30 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
 	start_read_index = rbi->ring_buffer->read_index;
 	rbi->ring_buffer->read_index = rbi->priv_read_index;
 
+	/*
+	 * Older versions of Hyper-V (before WS2102 and Win8) do not
+	 * implement pending_send_sz and simply poll if the host->guest
+	 * ring buffer is full.  No signaling is needed or expected.
+	 */
 	if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz)
 		return;
 
 	/*
 	 * Issue a full memory barrier before making the signaling decision.
-	 * Here is the reason for having this barrier:
-	 * If the reading of the pend_sz (in this function)
-	 * were to be reordered and read before we commit the new read
-	 * index (in the calling function)  we could
-	 * have a problem. If the host were to set the pending_sz after we
-	 * have sampled pending_sz and go to sleep before we commit the
+	 * If reading pending_send_sz were to be reordered and happen
+	 * before we commit the new read_index, a race could occur.  If the
+	 * host were to set the pending_send_sz after we have sampled
+	 * pending_send_sz, and the ring buffer blocks before we commit the
 	 * read index, we could miss sending the interrupt. Issue a full
 	 * memory barrier to address this.
 	 */
 	virt_mb();
 
+	/*
+	 * If the pending_send_sz is zero, then the ring buffer is not
+	 * blocked and there is no need to signal.  This is far by the
+	 * most common case, so exit quickly for best performance.
+	 */
 	pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
 	if (!pending_sz)
 		return;
@@ -476,14 +501,32 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
 	bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index);
 
 	/*
-	 * If there was space before we began iteration,
-	 * then host was not blocked.
+	 * We want to signal the host only if we're transitioning
+	 * from a "not enough free space" state to a "enough free
+	 * space" state.  For example, it's possible that this function
+	 * could run and free up enough space to signal the host, and then
+	 * run again and free up additional space before the host has a
+	 * chance to clear the pending_send_sz.  The 2nd invocation would
+	 * be a null transition from "enough free space" to "enough free
+	 * space", which doesn't warrant a signal.
+	 *
+	 * Exactly filling the ring buffer is treated as "not enough
+	 * space". The ring buffer always must have at least one byte
+	 * empty so the empty and full conditions are distinguishable.
+	 * hv_get_bytes_to_write() doesn't fully tell the truth in
+	 * this regard.
+	 *
+	 * So first check if we were in the "enough free space" state
+	 * before we began the iteration. If so, the host was not
+	 * blocked, and there's no need to signal.
 	 */
-
 	if (curr_write_sz - bytes_read > pending_sz)
 		return;
 
-	/* If pending write will not fit, don't give false hope. */
+	/*
+	 * Similarly, if the new state is "not enough space", then
+	 * there's no need to signal.
+	 */
 	if (curr_write_sz <= pending_sz)
 		return;
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3a3012f57be4..2330f08062c7 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -89,18 +89,33 @@ struct hv_ring_buffer {
 	u32 interrupt_mask;
 
 	/*
-	 * Win8 uses some of the reserved bits to implement
-	 * interrupt driven flow management. On the send side
-	 * we can request that the receiver interrupt the sender
-	 * when the ring transitions from being full to being able
-	 * to handle a message of size "pending_send_sz".
+	 * WS2012/Win8 and later versions of Hyper-V implement interrupt
+	 * driven flow management. The feature bit feat_pending_send_sz
+	 * is set by the host on the host->guest ring buffer, and by the
+	 * guest on the guest->host ring buffer.
 	 *
-	 * Add necessary state for this enhancement.
+	 * The meaning of the feature bit is a bit complex in that it has
+	 * semantics that apply to both ring buffers.  If the guest sets
+	 * the feature bit in the guest->host ring buffer, the guest is
+	 * telling the host that:
+	 * 1) It will set the pending_send_sz field in the guest->host ring
+	 *    buffer when it is waiting for space to become available, and
+	 * 2) It will read the pending_send_sz field in the host->guest
+	 *    ring buffer and interrupt the host when it frees enough space
+	 *
+	 * Similarly, if the host sets the feature bit in the host->guest
+	 * ring buffer, the host is telling the guest that:
+	 * 1) It will set the pending_send_sz field in the host->guest ring
+	 *    buffer when it is waiting for space to become available, and
+	 * 2) It will read the pending_send_sz field in the guest->host
+	 *    ring buffer and interrupt the guest when it frees enough space
+	 *
+	 * If either the guest or host does not set the feature bit that it
+	 * owns, that guest or host must do polling if it encounters a full
+	 * ring buffer, and not signal the other end with an interrupt.
 	 */
 	u32 pending_send_sz;
-
 	u32 reserved1[12];
-
 	union {
 		struct {
 			u32 feat_pending_send_sz:1;
-- 
cgit v1.2.3


From a655de808cbde6c58b3298e704d786b53f59fb5d Mon Sep 17 00:00:00 2001
From: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Date: Mon, 2 Jul 2018 16:59:54 +0100
Subject: ASoC: core: Allow topology to override machine driver FE DAI link
 config.

Machine drivers statically define a number of DAI links that currently
cannot be changed or removed by topology. This means PCMs and platform
components cannot be changed by topology at runtime AND machine drivers
are tightly coupled to topology.

This patch allows topology to override the machine driver DAI link config
in order to reuse machine drivers with different topologies and platform
components. The patch supports :-

1) create new FE PCMs with a topology defined PCM ID.
2) destroy existing static FE PCMs
3) change the platform component driver.
4) assign any new HW params fixups.
5) assign a new card name prefix to differentiate this topology to userspace.

The patch requires no changes to the machine drivers, but does add some
platform component flags that the platform component driver can assign
before loading topologies.

Signed-off-by: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  13 +++++++
 sound/soc/soc-core.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++--
 sound/soc/soc-pcm.c  |  12 ++++++
 3 files changed, 123 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 16f0bf10cc42..870ba6b64817 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -803,6 +803,14 @@ struct snd_soc_component_driver {
 	unsigned int use_pmdown_time:1; /* care pmdown_time at stop */
 	unsigned int endianness:1;
 	unsigned int non_legacy_dai_naming:1;
+
+	/* this component uses topology and ignore machine driver FEs */
+	const char *ignore_machine;
+	const char *topology_name_prefix;
+	int (*be_hw_params_fixup)(struct snd_soc_pcm_runtime *rtd,
+				  struct snd_pcm_hw_params *params);
+	bool use_dai_pcm_id;	/* use the DAI link PCM ID as PCM device number */
+	int be_pcm_base;	/* base device ID for all BE PCMs */
 };
 
 struct snd_soc_component {
@@ -960,6 +968,9 @@ struct snd_soc_dai_link {
 	/* pmdown_time is ignored at stop */
 	unsigned int ignore_pmdown_time:1;
 
+	/* Do not create a PCM for this DAI link (Backend link) */
+	unsigned int ignore:1;
+
 	struct list_head list; /* DAI link list of the soc card */
 	struct snd_soc_dobj dobj; /* For topology */
 };
@@ -999,6 +1010,7 @@ struct snd_soc_card {
 	const char *long_name;
 	const char *driver_name;
 	char dmi_longname[80];
+	char topology_shortname[32];
 
 	struct device *dev;
 	struct snd_card *snd_card;
@@ -1008,6 +1020,7 @@ struct snd_soc_card {
 	struct mutex dapm_mutex;
 
 	bool instantiated;
+	bool topology_shortname_created;
 
 	int (*probe)(struct snd_soc_card *card);
 	int (*late_probe)(struct snd_soc_card *card);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 68b08781c832..00bd58d167dd 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -847,6 +847,9 @@ static int soc_bind_dai_link(struct snd_soc_card *card,
 	const char *platform_name;
 	int i;
 
+	if (dai_link->ignore)
+		return 0;
+
 	dev_dbg(card->dev, "ASoC: binding %s\n", dai_link->name);
 
 	if (soc_is_dai_link_bound(card, dai_link)) {
@@ -1456,7 +1459,9 @@ static int soc_probe_link_dais(struct snd_soc_card *card,
 {
 	struct snd_soc_dai_link *dai_link = rtd->dai_link;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
-	int i, ret;
+	struct snd_soc_rtdcom_list *rtdcom;
+	struct snd_soc_component *component;
+	int i, ret, num;
 
 	dev_dbg(card->dev, "ASoC: probe %s dai link %d late %d\n",
 			card->name, rtd->num, order);
@@ -1502,9 +1507,28 @@ static int soc_probe_link_dais(struct snd_soc_card *card,
 		soc_dpcm_debugfs_add(rtd);
 #endif
 
+	num = rtd->num;
+
+	/*
+	 * most drivers will register their PCMs using DAI link ordering but
+	 * topology based drivers can use the DAI link id field to set PCM
+	 * device number and then use rtd + a base offset of the BEs.
+	 */
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		if (!component->driver->use_dai_pcm_id)
+			continue;
+
+		if (rtd->dai_link->no_pcm)
+			num += component->driver->be_pcm_base;
+		else
+			num = rtd->dai_link->id;
+	}
+
 	if (cpu_dai->driver->compress_new) {
 		/*create compress_device"*/
-		ret = cpu_dai->driver->compress_new(rtd, rtd->num);
+		ret = cpu_dai->driver->compress_new(rtd, num);
 		if (ret < 0) {
 			dev_err(card->dev, "ASoC: can't create compress %s\n",
 					 dai_link->stream_name);
@@ -1514,7 +1538,7 @@ static int soc_probe_link_dais(struct snd_soc_card *card,
 
 		if (!dai_link->params) {
 			/* create the pcm */
-			ret = soc_new_pcm(rtd, rtd->num);
+			ret = soc_new_pcm(rtd, num);
 			if (ret < 0) {
 				dev_err(card->dev, "ASoC: can't create pcm %s :%d\n",
 				       dai_link->stream_name, ret);
@@ -1841,6 +1865,74 @@ int snd_soc_set_dmi_name(struct snd_soc_card *card, const char *flavour)
 EXPORT_SYMBOL_GPL(snd_soc_set_dmi_name);
 #endif /* CONFIG_DMI */
 
+static void soc_check_tplg_fes(struct snd_soc_card *card)
+{
+	struct snd_soc_component *component;
+	const struct snd_soc_component_driver *comp_drv;
+	struct snd_soc_dai_link *dai_link;
+	int i;
+
+	list_for_each_entry(component, &component_list, list) {
+
+		/* does this component override FEs ? */
+		if (!component->driver->ignore_machine)
+			continue;
+
+		/* for this machine ? */
+		if (strcmp(component->driver->ignore_machine,
+			   card->dev->driver->name))
+			continue;
+
+		/* machine matches, so override the rtd data */
+		for (i = 0; i < card->num_links; i++) {
+
+			dai_link = &card->dai_link[i];
+
+			/* ignore this FE */
+			if (dai_link->dynamic) {
+				dai_link->ignore = true;
+				continue;
+			}
+
+			dev_info(card->dev, "info: override FE DAI link %s\n",
+				 card->dai_link[i].name);
+
+			/* override platform component */
+			dai_link->platform_name = component->name;
+
+			/* convert non BE into BE */
+			dai_link->no_pcm = 1;
+
+			/* override any BE fixups */
+			dai_link->be_hw_params_fixup =
+				component->driver->be_hw_params_fixup;
+
+			/* most BE links don't set stream name, so set it to
+			 * dai link name if it's NULL to help bind widgets.
+			 */
+			if (!dai_link->stream_name)
+				dai_link->stream_name = dai_link->name;
+		}
+
+		/* Inform userspace we are using alternate topology */
+		if (component->driver->topology_name_prefix) {
+
+			/* topology shortname created ? */
+			if (!card->topology_shortname_created) {
+				comp_drv = component->driver;
+
+				snprintf(card->topology_shortname, 32, "%s-%s",
+					 comp_drv->topology_name_prefix,
+					 card->name);
+				card->topology_shortname_created = true;
+			}
+
+			/* use topology shortname */
+			card->name = card->topology_shortname;
+		}
+	}
+}
+
 static int snd_soc_instantiate_card(struct snd_soc_card *card)
 {
 	struct snd_soc_pcm_runtime *rtd;
@@ -1850,6 +1942,9 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 	mutex_lock(&client_mutex);
 	mutex_lock_nested(&card->mutex, SND_SOC_CARD_CLASS_INIT);
 
+	/* check whether any platform is ignore machine FE and using topology */
+	soc_check_tplg_fes(card);
+
 	/* bind DAIs */
 	for (i = 0; i < card->num_links; i++) {
 		ret = soc_bind_dai_link(card, &card->dai_link[i]);
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index c2a31b51da4f..b7e67b871c0c 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -859,8 +859,20 @@ int soc_dai_hw_params(struct snd_pcm_substream *substream,
 		      struct snd_pcm_hw_params *params,
 		      struct snd_soc_dai *dai)
 {
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	int ret;
 
+	/* perform any topology hw_params fixups before DAI  */
+	if (rtd->dai_link->be_hw_params_fixup) {
+		ret = rtd->dai_link->be_hw_params_fixup(rtd, params);
+		if (ret < 0) {
+			dev_err(rtd->dev,
+				"ASoC: hw_params topology fixup failed %d\n",
+				ret);
+			return ret;
+		}
+	}
+
 	if (dai->driver->ops->hw_params) {
 		ret = dai->driver->ops->hw_params(substream, params, dai);
 		if (ret < 0) {
-- 
cgit v1.2.3


From cfb8282e18e292a8efc4d13ea1e4547a47dfb2d0 Mon Sep 17 00:00:00 2001
From: Amit Nischal <anischal@codeaurora.org>
Date: Mon, 11 Jun 2018 12:08:15 +0530
Subject: clk: qcom: Enable clocks which needs to be always on for SDM845

There are certain clocks which needs to be always enabled for system
operation. Add support for the same by adding 'CLK_IS_CRITICAL' flag
for such clocks.

Signed-off-by: Amit Nischal <anischal@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/qcom/gcc-sdm845.c               | 43 ++++++++++++++++++++++++++---
 include/dt-bindings/clock/qcom,gcc-sdm845.h |  2 ++
 2 files changed, 41 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/qcom/gcc-sdm845.c b/drivers/clk/qcom/gcc-sdm845.c
index e78e6f5b99fc..0f694ed4238a 100644
--- a/drivers/clk/qcom/gcc-sdm845.c
+++ b/drivers/clk/qcom/gcc-sdm845.c
@@ -1103,6 +1103,7 @@ static struct clk_branch gcc_camera_ahb_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_camera_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -1129,6 +1130,7 @@ static struct clk_branch gcc_camera_xo_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_camera_xo_clk",
+			.flags = CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -1270,6 +1272,7 @@ static struct clk_branch gcc_disp_ahb_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_disp_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -1328,6 +1331,7 @@ static struct clk_branch gcc_disp_xo_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_disp_xo_clk",
+			.flags = CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -1397,6 +1401,7 @@ static struct clk_branch gcc_gpu_cfg_ahb_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_gpu_cfg_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -2985,6 +2990,7 @@ static struct clk_branch gcc_video_ahb_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_video_ahb_clk",
+			.flags = CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -3011,6 +3017,7 @@ static struct clk_branch gcc_video_xo_clk = {
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_video_xo_clk",
+			.flags = CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -3049,6 +3056,36 @@ static struct clk_branch gcc_vs_ctrl_clk = {
 	},
 };
 
+static struct clk_branch gcc_cpuss_dvm_bus_clk = {
+	.halt_reg = 0x48190,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x48190,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cpuss_dvm_bus_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_cpuss_gnoc_clk = {
+	.halt_reg = 0x48004,
+	.halt_check = BRANCH_HALT_VOTED,
+	.hwcg_reg = 0x48004,
+	.hwcg_bit = 1,
+	.clkr = {
+		.enable_reg = 0x52004,
+		.enable_mask = BIT(22),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_cpuss_gnoc_clk",
+			.flags = CLK_IS_CRITICAL,
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
 static struct gdsc pcie_0_gdsc = {
 	.gdscr = 0x6b004,
 	.pd = {
@@ -3344,6 +3381,8 @@ static struct clk_regmap *gcc_sdm845_clocks[] = {
 	[GPLL0] = &gpll0.clkr,
 	[GPLL0_OUT_EVEN] = &gpll0_out_even.clkr,
 	[GPLL4] = &gpll4.clkr,
+	[GCC_CPUSS_DVM_BUS_CLK] = &gcc_cpuss_dvm_bus_clk.clkr,
+	[GCC_CPUSS_GNOC_CLK] = &gcc_cpuss_gnoc_clk.clkr,
 };
 
 static const struct qcom_reset_map gcc_sdm845_resets[] = {
@@ -3433,10 +3472,6 @@ static int gcc_sdm845_probe(struct platform_device *pdev)
 	regmap_update_bits(regmap, 0x09ffc, 0x3, 0x3);
 	regmap_update_bits(regmap, 0x71028, 0x3, 0x3);
 
-	/* Enable CPUSS clocks */
-	regmap_update_bits(regmap, 0x48190, BIT(0), 0x1);
-	regmap_update_bits(regmap, 0x52004, BIT(22), 0x1);
-
 	return qcom_cc_really_probe(pdev, &gcc_sdm845_desc, regmap);
 }
 
diff --git a/include/dt-bindings/clock/qcom,gcc-sdm845.h b/include/dt-bindings/clock/qcom,gcc-sdm845.h
index aca61264f12c..f96fc2dbf60e 100644
--- a/include/dt-bindings/clock/qcom,gcc-sdm845.h
+++ b/include/dt-bindings/clock/qcom,gcc-sdm845.h
@@ -192,6 +192,8 @@
 #define GCC_VS_CTRL_CLK_SRC					182
 #define GCC_VSENSOR_CLK_SRC					183
 #define GPLL4							184
+#define GCC_CPUSS_DVM_BUS_CLK					185
+#define GCC_CPUSS_GNOC_CLK					186
 
 /* GCC Resets */
 #define GCC_MMSS_BCR						0
-- 
cgit v1.2.3


From f1d34bfd70b1b4543a139ea28bad4c001c5f413d Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 19 Jun 2018 15:02:16 +0200
Subject: drm/vmwgfx: Replace vmw_dma_buffer with vmw_buffer_object

Initially vmware buffer objects were only used as DMA buffers, so the name
DMA buffer was a natural one. However, currently they are used also as
dumb buffers and MOBs backing guest backed objects so renaming them to
buffer objects is logical. Particularly since there is a dmabuf subsystem
in the kernel where a dma buffer means something completely different.

This also renames user-space api structures and IOCTL names
correspondingly, but the old names remain defined for now and the ABI
hasn't changed.

There are a couple of minor style changes to make checkpatch happy.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Deepak Rawat <drawat@vmware.com>
---
 drivers/gpu/drm/vmwgfx/Makefile            |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c         | 376 ++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c     | 887 -----------------------------
 drivers/gpu/drm/vmwgfx/vmwgfx_context.c    |  10 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c    |  10 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c     | 376 ------------
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c        |  18 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h        | 129 ++---
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c    |  86 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c         |  16 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c      |   4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c        | 203 ++++---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h        |  58 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c        |  10 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c    |  24 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c   | 280 ++++-----
 drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c       | 100 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_shader.c     |  22 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c       |  64 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c    |  37 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 887 +++++++++++++++++++++++++++++
 include/uapi/drm/vmwgfx_drm.h              |  61 +-
 22 files changed, 1822 insertions(+), 1840 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
 delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
 delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 794cc9d5c9b0..09b2aa08363e 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
-	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
+	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_ttm_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
 	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
-	    vmwgfx_fence.o vmwgfx_dmabuf.o vmwgfx_scrn.o vmwgfx_context.o \
+	    vmwgfx_fence.o vmwgfx_bo.o vmwgfx_scrn.o vmwgfx_context.o \
 	    vmwgfx_surface.o vmwgfx_prime.o vmwgfx_mob.o vmwgfx_shader.o \
 	    vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
 	    vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
new file mode 100644
index 000000000000..f26f658cccdb
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -0,0 +1,376 @@
+/**************************************************************************
+ *
+ * Copyright © 2011-2015 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <drm/ttm/ttm_placement.h>
+
+#include <drm/drmP.h>
+#include "vmwgfx_drv.h"
+
+
+/**
+ * vmw_bo_pin_in_placement - Validate a buffer to placement.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @placement:  The placement to pin it.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ *  -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_bo_pin_in_placement(struct vmw_private *dev_priv,
+			    struct vmw_buffer_object *buf,
+			    struct ttm_placement *placement,
+			    bool interruptible)
+{
+	struct ttm_operation_ctx ctx = {interruptible, false };
+	struct ttm_buffer_object *bo = &buf->base;
+	int ret;
+	uint32_t new_flags;
+
+	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_execbuf_release_pinned_bo(dev_priv);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, NULL);
+	if (unlikely(ret != 0))
+		goto err;
+
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, placement, &ctx);
+
+	if (!ret)
+		vmw_bo_pin_reserved(buf, true);
+
+	ttm_bo_unreserve(bo);
+
+err:
+	ttm_write_unlock(&dev_priv->reservation_sem);
+	return ret;
+}
+
+/**
+ * vmw_bo_pin_in_vram_or_gmr - Move a buffer to vram or gmr.
+ *
+ * This function takes the reservation_sem in write mode.
+ * Flushes and unpins the query bo to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_bo_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
+			      struct vmw_buffer_object *buf,
+			      bool interruptible)
+{
+	struct ttm_operation_ctx ctx = {interruptible, false };
+	struct ttm_buffer_object *bo = &buf->base;
+	int ret;
+	uint32_t new_flags;
+
+	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_execbuf_release_pinned_bo(dev_priv);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, NULL);
+	if (unlikely(ret != 0))
+		goto err;
+
+	if (buf->pin_count > 0) {
+		ret = ttm_bo_mem_compat(&vmw_vram_gmr_placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+		goto out_unreserve;
+	}
+
+	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
+	if (likely(ret == 0) || ret == -ERESTARTSYS)
+		goto out_unreserve;
+
+	ret = ttm_bo_validate(bo, &vmw_vram_placement, &ctx);
+
+out_unreserve:
+	if (!ret)
+		vmw_bo_pin_reserved(buf, true);
+
+	ttm_bo_unreserve(bo);
+err:
+	ttm_write_unlock(&dev_priv->reservation_sem);
+	return ret;
+}
+
+/**
+ * vmw_bo_pin_in_vram - Move a buffer to vram.
+ *
+ * This function takes the reservation_sem in write mode.
+ * Flushes and unpins the query bo to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_bo_pin_in_vram(struct vmw_private *dev_priv,
+		       struct vmw_buffer_object *buf,
+		       bool interruptible)
+{
+	return vmw_bo_pin_in_placement(dev_priv, buf, &vmw_vram_placement,
+				       interruptible);
+}
+
+/**
+ * vmw_bo_pin_in_start_of_vram - Move a buffer to start of vram.
+ *
+ * This function takes the reservation_sem in write mode.
+ * Flushes and unpins the query bo to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to pin.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_bo_pin_in_start_of_vram(struct vmw_private *dev_priv,
+				struct vmw_buffer_object *buf,
+				bool interruptible)
+{
+	struct ttm_operation_ctx ctx = {interruptible, false };
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement placement;
+	struct ttm_place place;
+	int ret = 0;
+	uint32_t new_flags;
+
+	place = vmw_vram_placement.placement[0];
+	place.lpfn = bo->num_pages;
+	placement.num_placement = 1;
+	placement.placement = &place;
+	placement.num_busy_placement = 1;
+	placement.busy_placement = &place;
+
+	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_execbuf_release_pinned_bo(dev_priv);
+	ret = ttm_bo_reserve(bo, interruptible, false, NULL);
+	if (unlikely(ret != 0))
+		goto err_unlock;
+
+	/*
+	 * Is this buffer already in vram but not at the start of it?
+	 * In that case, evict it first because TTM isn't good at handling
+	 * that situation.
+	 */
+	if (bo->mem.mem_type == TTM_PL_VRAM &&
+	    bo->mem.start < bo->num_pages &&
+	    bo->mem.start > 0 &&
+	    buf->pin_count == 0) {
+		ctx.interruptible = false;
+		(void) ttm_bo_validate(bo, &vmw_sys_placement, &ctx);
+	}
+
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(&placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, &placement, &ctx);
+
+	/* For some reason we didn't end up at the start of vram */
+	WARN_ON(ret == 0 && bo->offset != 0);
+	if (!ret)
+		vmw_bo_pin_reserved(buf, true);
+
+	ttm_bo_unreserve(bo);
+err_unlock:
+	ttm_write_unlock(&dev_priv->reservation_sem);
+
+	return ret;
+}
+
+/**
+ * vmw_bo_unpin - Unpin the buffer given buffer, does not move the buffer.
+ *
+ * This function takes the reservation_sem in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to unpin.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_bo_unpin(struct vmw_private *dev_priv,
+		 struct vmw_buffer_object *buf,
+		 bool interruptible)
+{
+	struct ttm_buffer_object *bo = &buf->base;
+	int ret;
+
+	ret = ttm_read_lock(&dev_priv->reservation_sem, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_bo_reserve(bo, interruptible, false, NULL);
+	if (unlikely(ret != 0))
+		goto err;
+
+	vmw_bo_pin_reserved(buf, false);
+
+	ttm_bo_unreserve(bo);
+
+err:
+	ttm_read_unlock(&dev_priv->reservation_sem);
+	return ret;
+}
+
+/**
+ * vmw_bo_get_guest_ptr - Get the guest ptr representing the current placement
+ * of a buffer.
+ *
+ * @bo: Pointer to a struct ttm_buffer_object. Must be pinned or reserved.
+ * @ptr: SVGAGuestPtr returning the result.
+ */
+void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo,
+			  SVGAGuestPtr *ptr)
+{
+	if (bo->mem.mem_type == TTM_PL_VRAM) {
+		ptr->gmrId = SVGA_GMR_FRAMEBUFFER;
+		ptr->offset = bo->offset;
+	} else {
+		ptr->gmrId = bo->mem.start;
+		ptr->offset = 0;
+	}
+}
+
+
+/**
+ * vmw_bo_pin_reserved - Pin or unpin a buffer object without moving it.
+ *
+ * @vbo: The buffer object. Must be reserved.
+ * @pin: Whether to pin or unpin.
+ *
+ */
+void vmw_bo_pin_reserved(struct vmw_buffer_object *vbo, bool pin)
+{
+	struct ttm_operation_ctx ctx = { false, true };
+	struct ttm_place pl;
+	struct ttm_placement placement;
+	struct ttm_buffer_object *bo = &vbo->base;
+	uint32_t old_mem_type = bo->mem.mem_type;
+	int ret;
+
+	lockdep_assert_held(&bo->resv->lock.base);
+
+	if (pin) {
+		if (vbo->pin_count++ > 0)
+			return;
+	} else {
+		WARN_ON(vbo->pin_count <= 0);
+		if (--vbo->pin_count > 0)
+			return;
+	}
+
+	pl.fpfn = 0;
+	pl.lpfn = 0;
+	pl.flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | VMW_PL_FLAG_MOB
+		| TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED;
+	if (pin)
+		pl.flags |= TTM_PL_FLAG_NO_EVICT;
+
+	memset(&placement, 0, sizeof(placement));
+	placement.num_placement = 1;
+	placement.placement = &pl;
+
+	ret = ttm_bo_validate(bo, &placement, &ctx);
+
+	BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type);
+}
+
+
+/*
+ * vmw_buffer_object_unmap - Tear down a cached buffer object map.
+ *
+ * @vbo: The buffer object whose map we are tearing down.
+ *
+ * This function tears down a cached map set up using
+ * vmw_buffer_object_map_and_cache().
+ */
+void vmw_buffer_object_unmap(struct vmw_buffer_object *vbo)
+{
+	if (vbo->map.bo == NULL)
+		return;
+
+	ttm_bo_kunmap(&vbo->map);
+}
+
+
+/*
+ * vmw_buffer_object_map_and_cache - Map a buffer object and cache the map
+ *
+ * @vbo: The buffer object to map
+ * Return: A kernel virtual address or NULL if mapping failed.
+ *
+ * This function maps a buffer object into the kernel address space, or
+ * returns the virtual kernel address of an already existing map. The virtual
+ * address remains valid as long as the buffer object is pinned or reserved.
+ * The cached map is torn down on either
+ * 1) Buffer object move
+ * 2) Buffer object swapout
+ * 3) Buffer object destruction
+ *
+ */
+void *vmw_buffer_object_map_and_cache(struct vmw_buffer_object *vbo)
+{
+	struct ttm_buffer_object *bo = &vbo->base;
+	bool not_used;
+	void *virtual;
+	int ret;
+
+	virtual = ttm_kmap_obj_virtual(&vbo->map, &not_used);
+	if (virtual)
+		return virtual;
+
+	ret = ttm_bo_kmap(bo, 0, bo->num_pages, &vbo->map);
+	if (ret)
+		DRM_ERROR("Buffer object map failed: %d.\n", ret);
+
+	return ttm_kmap_obj_virtual(&vbo->map, &not_used);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
deleted file mode 100644
index 21111fd091f9..000000000000
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ /dev/null
@@ -1,887 +0,0 @@
-/**************************************************************************
- *
- * Copyright © 2009-2015 VMware, Inc., Palo Alto, CA., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "vmwgfx_drv.h"
-#include <drm/ttm/ttm_bo_driver.h>
-#include <drm/ttm/ttm_placement.h>
-#include <drm/ttm/ttm_page_alloc.h>
-
-static const struct ttm_place vram_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
-};
-
-static const struct ttm_place vram_ne_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
-};
-
-static const struct ttm_place sys_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
-};
-
-static const struct ttm_place sys_ne_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
-};
-
-static const struct ttm_place gmr_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
-};
-
-static const struct ttm_place gmr_ne_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
-};
-
-static const struct ttm_place mob_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
-};
-
-static const struct ttm_place mob_ne_placement_flags = {
-	.fpfn = 0,
-	.lpfn = 0,
-	.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
-};
-
-struct ttm_placement vmw_vram_placement = {
-	.num_placement = 1,
-	.placement = &vram_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &vram_placement_flags
-};
-
-static const struct ttm_place vram_gmr_placement_flags[] = {
-	{
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
-	}
-};
-
-static const struct ttm_place gmr_vram_placement_flags[] = {
-	{
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
-	}
-};
-
-struct ttm_placement vmw_vram_gmr_placement = {
-	.num_placement = 2,
-	.placement = vram_gmr_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &gmr_placement_flags
-};
-
-static const struct ttm_place vram_gmr_ne_placement_flags[] = {
-	{
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED |
-			 TTM_PL_FLAG_NO_EVICT
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED |
-			 TTM_PL_FLAG_NO_EVICT
-	}
-};
-
-struct ttm_placement vmw_vram_gmr_ne_placement = {
-	.num_placement = 2,
-	.placement = vram_gmr_ne_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &gmr_ne_placement_flags
-};
-
-struct ttm_placement vmw_vram_sys_placement = {
-	.num_placement = 1,
-	.placement = &vram_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &sys_placement_flags
-};
-
-struct ttm_placement vmw_vram_ne_placement = {
-	.num_placement = 1,
-	.placement = &vram_ne_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &vram_ne_placement_flags
-};
-
-struct ttm_placement vmw_sys_placement = {
-	.num_placement = 1,
-	.placement = &sys_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &sys_placement_flags
-};
-
-struct ttm_placement vmw_sys_ne_placement = {
-	.num_placement = 1,
-	.placement = &sys_ne_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &sys_ne_placement_flags
-};
-
-static const struct ttm_place evictable_placement_flags[] = {
-	{
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
-	}
-};
-
-static const struct ttm_place nonfixed_placement_flags[] = {
-	{
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
-	}
-};
-
-struct ttm_placement vmw_evictable_placement = {
-	.num_placement = 4,
-	.placement = evictable_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &sys_placement_flags
-};
-
-struct ttm_placement vmw_srf_placement = {
-	.num_placement = 1,
-	.num_busy_placement = 2,
-	.placement = &gmr_placement_flags,
-	.busy_placement = gmr_vram_placement_flags
-};
-
-struct ttm_placement vmw_mob_placement = {
-	.num_placement = 1,
-	.num_busy_placement = 1,
-	.placement = &mob_placement_flags,
-	.busy_placement = &mob_placement_flags
-};
-
-struct ttm_placement vmw_mob_ne_placement = {
-	.num_placement = 1,
-	.num_busy_placement = 1,
-	.placement = &mob_ne_placement_flags,
-	.busy_placement = &mob_ne_placement_flags
-};
-
-struct ttm_placement vmw_nonfixed_placement = {
-	.num_placement = 3,
-	.placement = nonfixed_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &sys_placement_flags
-};
-
-struct vmw_ttm_tt {
-	struct ttm_dma_tt dma_ttm;
-	struct vmw_private *dev_priv;
-	int gmr_id;
-	struct vmw_mob *mob;
-	int mem_type;
-	struct sg_table sgt;
-	struct vmw_sg_table vsgt;
-	uint64_t sg_alloc_size;
-	bool mapped;
-};
-
-const size_t vmw_tt_size = sizeof(struct vmw_ttm_tt);
-
-/**
- * Helper functions to advance a struct vmw_piter iterator.
- *
- * @viter: Pointer to the iterator.
- *
- * These functions return false if past the end of the list,
- * true otherwise. Functions are selected depending on the current
- * DMA mapping mode.
- */
-static bool __vmw_piter_non_sg_next(struct vmw_piter *viter)
-{
-	return ++(viter->i) < viter->num_pages;
-}
-
-static bool __vmw_piter_sg_next(struct vmw_piter *viter)
-{
-	return __sg_page_iter_next(&viter->iter);
-}
-
-
-/**
- * Helper functions to return a pointer to the current page.
- *
- * @viter: Pointer to the iterator
- *
- * These functions return a pointer to the page currently
- * pointed to by @viter. Functions are selected depending on the
- * current mapping mode.
- */
-static struct page *__vmw_piter_non_sg_page(struct vmw_piter *viter)
-{
-	return viter->pages[viter->i];
-}
-
-static struct page *__vmw_piter_sg_page(struct vmw_piter *viter)
-{
-	return sg_page_iter_page(&viter->iter);
-}
-
-
-/**
- * Helper functions to return the DMA address of the current page.
- *
- * @viter: Pointer to the iterator
- *
- * These functions return the DMA address of the page currently
- * pointed to by @viter. Functions are selected depending on the
- * current mapping mode.
- */
-static dma_addr_t __vmw_piter_phys_addr(struct vmw_piter *viter)
-{
-	return page_to_phys(viter->pages[viter->i]);
-}
-
-static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter)
-{
-	return viter->addrs[viter->i];
-}
-
-static dma_addr_t __vmw_piter_sg_addr(struct vmw_piter *viter)
-{
-	return sg_page_iter_dma_address(&viter->iter);
-}
-
-
-/**
- * vmw_piter_start - Initialize a struct vmw_piter.
- *
- * @viter: Pointer to the iterator to initialize
- * @vsgt: Pointer to a struct vmw_sg_table to initialize from
- *
- * Note that we're following the convention of __sg_page_iter_start, so that
- * the iterator doesn't point to a valid page after initialization; it has
- * to be advanced one step first.
- */
-void vmw_piter_start(struct vmw_piter *viter, const struct vmw_sg_table *vsgt,
-		     unsigned long p_offset)
-{
-	viter->i = p_offset - 1;
-	viter->num_pages = vsgt->num_pages;
-	switch (vsgt->mode) {
-	case vmw_dma_phys:
-		viter->next = &__vmw_piter_non_sg_next;
-		viter->dma_address = &__vmw_piter_phys_addr;
-		viter->page = &__vmw_piter_non_sg_page;
-		viter->pages = vsgt->pages;
-		break;
-	case vmw_dma_alloc_coherent:
-		viter->next = &__vmw_piter_non_sg_next;
-		viter->dma_address = &__vmw_piter_dma_addr;
-		viter->page = &__vmw_piter_non_sg_page;
-		viter->addrs = vsgt->addrs;
-		viter->pages = vsgt->pages;
-		break;
-	case vmw_dma_map_populate:
-	case vmw_dma_map_bind:
-		viter->next = &__vmw_piter_sg_next;
-		viter->dma_address = &__vmw_piter_sg_addr;
-		viter->page = &__vmw_piter_sg_page;
-		__sg_page_iter_start(&viter->iter, vsgt->sgt->sgl,
-				     vsgt->sgt->orig_nents, p_offset);
-		break;
-	default:
-		BUG();
-	}
-}
-
-/**
- * vmw_ttm_unmap_from_dma - unmap  device addresses previsouly mapped for
- * TTM pages
- *
- * @vmw_tt: Pointer to a struct vmw_ttm_backend
- *
- * Used to free dma mappings previously mapped by vmw_ttm_map_for_dma.
- */
-static void vmw_ttm_unmap_from_dma(struct vmw_ttm_tt *vmw_tt)
-{
-	struct device *dev = vmw_tt->dev_priv->dev->dev;
-
-	dma_unmap_sg(dev, vmw_tt->sgt.sgl, vmw_tt->sgt.nents,
-		DMA_BIDIRECTIONAL);
-	vmw_tt->sgt.nents = vmw_tt->sgt.orig_nents;
-}
-
-/**
- * vmw_ttm_map_for_dma - map TTM pages to get device addresses
- *
- * @vmw_tt: Pointer to a struct vmw_ttm_backend
- *
- * This function is used to get device addresses from the kernel DMA layer.
- * However, it's violating the DMA API in that when this operation has been
- * performed, it's illegal for the CPU to write to the pages without first
- * unmapping the DMA mappings, or calling dma_sync_sg_for_cpu(). It is
- * therefore only legal to call this function if we know that the function
- * dma_sync_sg_for_cpu() is a NOP, and dma_sync_sg_for_device() is at most
- * a CPU write buffer flush.
- */
-static int vmw_ttm_map_for_dma(struct vmw_ttm_tt *vmw_tt)
-{
-	struct device *dev = vmw_tt->dev_priv->dev->dev;
-	int ret;
-
-	ret = dma_map_sg(dev, vmw_tt->sgt.sgl, vmw_tt->sgt.orig_nents,
-			 DMA_BIDIRECTIONAL);
-	if (unlikely(ret == 0))
-		return -ENOMEM;
-
-	vmw_tt->sgt.nents = ret;
-
-	return 0;
-}
-
-/**
- * vmw_ttm_map_dma - Make sure TTM pages are visible to the device
- *
- * @vmw_tt: Pointer to a struct vmw_ttm_tt
- *
- * Select the correct function for and make sure the TTM pages are
- * visible to the device. Allocate storage for the device mappings.
- * If a mapping has already been performed, indicated by the storage
- * pointer being non NULL, the function returns success.
- */
-static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
-{
-	struct vmw_private *dev_priv = vmw_tt->dev_priv;
-	struct ttm_mem_global *glob = vmw_mem_glob(dev_priv);
-	struct vmw_sg_table *vsgt = &vmw_tt->vsgt;
-	struct ttm_operation_ctx ctx = {
-		.interruptible = true,
-		.no_wait_gpu = false
-	};
-	struct vmw_piter iter;
-	dma_addr_t old;
-	int ret = 0;
-	static size_t sgl_size;
-	static size_t sgt_size;
-
-	if (vmw_tt->mapped)
-		return 0;
-
-	vsgt->mode = dev_priv->map_mode;
-	vsgt->pages = vmw_tt->dma_ttm.ttm.pages;
-	vsgt->num_pages = vmw_tt->dma_ttm.ttm.num_pages;
-	vsgt->addrs = vmw_tt->dma_ttm.dma_address;
-	vsgt->sgt = &vmw_tt->sgt;
-
-	switch (dev_priv->map_mode) {
-	case vmw_dma_map_bind:
-	case vmw_dma_map_populate:
-		if (unlikely(!sgl_size)) {
-			sgl_size = ttm_round_pot(sizeof(struct scatterlist));
-			sgt_size = ttm_round_pot(sizeof(struct sg_table));
-		}
-		vmw_tt->sg_alloc_size = sgt_size + sgl_size * vsgt->num_pages;
-		ret = ttm_mem_global_alloc(glob, vmw_tt->sg_alloc_size, &ctx);
-		if (unlikely(ret != 0))
-			return ret;
-
-		ret = sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
-						vsgt->num_pages, 0,
-						(unsigned long)
-						vsgt->num_pages << PAGE_SHIFT,
-						GFP_KERNEL);
-		if (unlikely(ret != 0))
-			goto out_sg_alloc_fail;
-
-		if (vsgt->num_pages > vmw_tt->sgt.nents) {
-			uint64_t over_alloc =
-				sgl_size * (vsgt->num_pages -
-					    vmw_tt->sgt.nents);
-
-			ttm_mem_global_free(glob, over_alloc);
-			vmw_tt->sg_alloc_size -= over_alloc;
-		}
-
-		ret = vmw_ttm_map_for_dma(vmw_tt);
-		if (unlikely(ret != 0))
-			goto out_map_fail;
-
-		break;
-	default:
-		break;
-	}
-
-	old = ~((dma_addr_t) 0);
-	vmw_tt->vsgt.num_regions = 0;
-	for (vmw_piter_start(&iter, vsgt, 0); vmw_piter_next(&iter);) {
-		dma_addr_t cur = vmw_piter_dma_addr(&iter);
-
-		if (cur != old + PAGE_SIZE)
-			vmw_tt->vsgt.num_regions++;
-		old = cur;
-	}
-
-	vmw_tt->mapped = true;
-	return 0;
-
-out_map_fail:
-	sg_free_table(vmw_tt->vsgt.sgt);
-	vmw_tt->vsgt.sgt = NULL;
-out_sg_alloc_fail:
-	ttm_mem_global_free(glob, vmw_tt->sg_alloc_size);
-	return ret;
-}
-
-/**
- * vmw_ttm_unmap_dma - Tear down any TTM page device mappings
- *
- * @vmw_tt: Pointer to a struct vmw_ttm_tt
- *
- * Tear down any previously set up device DMA mappings and free
- * any storage space allocated for them. If there are no mappings set up,
- * this function is a NOP.
- */
-static void vmw_ttm_unmap_dma(struct vmw_ttm_tt *vmw_tt)
-{
-	struct vmw_private *dev_priv = vmw_tt->dev_priv;
-
-	if (!vmw_tt->vsgt.sgt)
-		return;
-
-	switch (dev_priv->map_mode) {
-	case vmw_dma_map_bind:
-	case vmw_dma_map_populate:
-		vmw_ttm_unmap_from_dma(vmw_tt);
-		sg_free_table(vmw_tt->vsgt.sgt);
-		vmw_tt->vsgt.sgt = NULL;
-		ttm_mem_global_free(vmw_mem_glob(dev_priv),
-				    vmw_tt->sg_alloc_size);
-		break;
-	default:
-		break;
-	}
-	vmw_tt->mapped = false;
-}
-
-
-/**
- * vmw_bo_map_dma - Make sure buffer object pages are visible to the device
- *
- * @bo: Pointer to a struct ttm_buffer_object
- *
- * Wrapper around vmw_ttm_map_dma, that takes a TTM buffer object pointer
- * instead of a pointer to a struct vmw_ttm_backend as argument.
- * Note that the buffer object must be either pinned or reserved before
- * calling this function.
- */
-int vmw_bo_map_dma(struct ttm_buffer_object *bo)
-{
-	struct vmw_ttm_tt *vmw_tt =
-		container_of(bo->ttm, struct vmw_ttm_tt, dma_ttm.ttm);
-
-	return vmw_ttm_map_dma(vmw_tt);
-}
-
-
-/**
- * vmw_bo_unmap_dma - Make sure buffer object pages are visible to the device
- *
- * @bo: Pointer to a struct ttm_buffer_object
- *
- * Wrapper around vmw_ttm_unmap_dma, that takes a TTM buffer object pointer
- * instead of a pointer to a struct vmw_ttm_backend as argument.
- */
-void vmw_bo_unmap_dma(struct ttm_buffer_object *bo)
-{
-	struct vmw_ttm_tt *vmw_tt =
-		container_of(bo->ttm, struct vmw_ttm_tt, dma_ttm.ttm);
-
-	vmw_ttm_unmap_dma(vmw_tt);
-}
-
-
-/**
- * vmw_bo_sg_table - Return a struct vmw_sg_table object for a
- * TTM buffer object
- *
- * @bo: Pointer to a struct ttm_buffer_object
- *
- * Returns a pointer to a struct vmw_sg_table object. The object should
- * not be freed after use.
- * Note that for the device addresses to be valid, the buffer object must
- * either be reserved or pinned.
- */
-const struct vmw_sg_table *vmw_bo_sg_table(struct ttm_buffer_object *bo)
-{
-	struct vmw_ttm_tt *vmw_tt =
-		container_of(bo->ttm, struct vmw_ttm_tt, dma_ttm.ttm);
-
-	return &vmw_tt->vsgt;
-}
-
-
-static int vmw_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
-{
-	struct vmw_ttm_tt *vmw_be =
-		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
-	int ret;
-
-	ret = vmw_ttm_map_dma(vmw_be);
-	if (unlikely(ret != 0))
-		return ret;
-
-	vmw_be->gmr_id = bo_mem->start;
-	vmw_be->mem_type = bo_mem->mem_type;
-
-	switch (bo_mem->mem_type) {
-	case VMW_PL_GMR:
-		return vmw_gmr_bind(vmw_be->dev_priv, &vmw_be->vsgt,
-				    ttm->num_pages, vmw_be->gmr_id);
-	case VMW_PL_MOB:
-		if (unlikely(vmw_be->mob == NULL)) {
-			vmw_be->mob =
-				vmw_mob_create(ttm->num_pages);
-			if (unlikely(vmw_be->mob == NULL))
-				return -ENOMEM;
-		}
-
-		return vmw_mob_bind(vmw_be->dev_priv, vmw_be->mob,
-				    &vmw_be->vsgt, ttm->num_pages,
-				    vmw_be->gmr_id);
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-static int vmw_ttm_unbind(struct ttm_tt *ttm)
-{
-	struct vmw_ttm_tt *vmw_be =
-		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
-
-	switch (vmw_be->mem_type) {
-	case VMW_PL_GMR:
-		vmw_gmr_unbind(vmw_be->dev_priv, vmw_be->gmr_id);
-		break;
-	case VMW_PL_MOB:
-		vmw_mob_unbind(vmw_be->dev_priv, vmw_be->mob);
-		break;
-	default:
-		BUG();
-	}
-
-	if (vmw_be->dev_priv->map_mode == vmw_dma_map_bind)
-		vmw_ttm_unmap_dma(vmw_be);
-
-	return 0;
-}
-
-
-static void vmw_ttm_destroy(struct ttm_tt *ttm)
-{
-	struct vmw_ttm_tt *vmw_be =
-		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
-
-	vmw_ttm_unmap_dma(vmw_be);
-	if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
-		ttm_dma_tt_fini(&vmw_be->dma_ttm);
-	else
-		ttm_tt_fini(ttm);
-
-	if (vmw_be->mob)
-		vmw_mob_destroy(vmw_be->mob);
-
-	kfree(vmw_be);
-}
-
-
-static int vmw_ttm_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
-{
-	struct vmw_ttm_tt *vmw_tt =
-		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
-	struct vmw_private *dev_priv = vmw_tt->dev_priv;
-	struct ttm_mem_global *glob = vmw_mem_glob(dev_priv);
-	int ret;
-
-	if (ttm->state != tt_unpopulated)
-		return 0;
-
-	if (dev_priv->map_mode == vmw_dma_alloc_coherent) {
-		size_t size =
-			ttm_round_pot(ttm->num_pages * sizeof(dma_addr_t));
-		ret = ttm_mem_global_alloc(glob, size, ctx);
-		if (unlikely(ret != 0))
-			return ret;
-
-		ret = ttm_dma_populate(&vmw_tt->dma_ttm, dev_priv->dev->dev,
-					ctx);
-		if (unlikely(ret != 0))
-			ttm_mem_global_free(glob, size);
-	} else
-		ret = ttm_pool_populate(ttm, ctx);
-
-	return ret;
-}
-
-static void vmw_ttm_unpopulate(struct ttm_tt *ttm)
-{
-	struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt,
-						 dma_ttm.ttm);
-	struct vmw_private *dev_priv = vmw_tt->dev_priv;
-	struct ttm_mem_global *glob = vmw_mem_glob(dev_priv);
-
-
-	if (vmw_tt->mob) {
-		vmw_mob_destroy(vmw_tt->mob);
-		vmw_tt->mob = NULL;
-	}
-
-	vmw_ttm_unmap_dma(vmw_tt);
-	if (dev_priv->map_mode == vmw_dma_alloc_coherent) {
-		size_t size =
-			ttm_round_pot(ttm->num_pages * sizeof(dma_addr_t));
-
-		ttm_dma_unpopulate(&vmw_tt->dma_ttm, dev_priv->dev->dev);
-		ttm_mem_global_free(glob, size);
-	} else
-		ttm_pool_unpopulate(ttm);
-}
-
-static struct ttm_backend_func vmw_ttm_func = {
-	.bind = vmw_ttm_bind,
-	.unbind = vmw_ttm_unbind,
-	.destroy = vmw_ttm_destroy,
-};
-
-static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
-					uint32_t page_flags)
-{
-	struct vmw_ttm_tt *vmw_be;
-	int ret;
-
-	vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL);
-	if (!vmw_be)
-		return NULL;
-
-	vmw_be->dma_ttm.ttm.func = &vmw_ttm_func;
-	vmw_be->dev_priv = container_of(bo->bdev, struct vmw_private, bdev);
-	vmw_be->mob = NULL;
-
-	if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
-		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags);
-	else
-		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags);
-	if (unlikely(ret != 0))
-		goto out_no_init;
-
-	return &vmw_be->dma_ttm.ttm;
-out_no_init:
-	kfree(vmw_be);
-	return NULL;
-}
-
-static int vmw_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-	return 0;
-}
-
-static int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
-		      struct ttm_mem_type_manager *man)
-{
-	switch (type) {
-	case TTM_PL_SYSTEM:
-		/* System memory */
-
-		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_CACHED;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case TTM_PL_VRAM:
-		/* "On-card" video ram */
-		man->func = &ttm_bo_manager_func;
-		man->gpu_offset = 0;
-		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_CACHED;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case VMW_PL_GMR:
-	case VMW_PL_MOB:
-		/*
-		 * "Guest Memory Regions" is an aperture like feature with
-		 *  one slot per bo. There is an upper limit of the number of
-		 *  slots as well as the bo size.
-		 */
-		man->func = &vmw_gmrid_manager_func;
-		man->gpu_offset = 0;
-		man->flags = TTM_MEMTYPE_FLAG_CMA | TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_CACHED;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	default:
-		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void vmw_evict_flags(struct ttm_buffer_object *bo,
-		     struct ttm_placement *placement)
-{
-	*placement = vmw_sys_placement;
-}
-
-static int vmw_verify_access(struct ttm_buffer_object *bo, struct file *filp)
-{
-	struct ttm_object_file *tfile =
-		vmw_fpriv((struct drm_file *)filp->private_data)->tfile;
-
-	return vmw_user_dmabuf_verify_access(bo, tfile);
-}
-
-static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
-	struct vmw_private *dev_priv = container_of(bdev, struct vmw_private, bdev);
-
-	mem->bus.addr = NULL;
-	mem->bus.is_iomem = false;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
-		return -EINVAL;
-	switch (mem->mem_type) {
-	case TTM_PL_SYSTEM:
-	case VMW_PL_GMR:
-	case VMW_PL_MOB:
-		return 0;
-	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = dev_priv->vram_start;
-		mem->bus.is_iomem = true;
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void vmw_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
-{
-}
-
-static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
-{
-	return 0;
-}
-
-/**
- * vmw_move_notify - TTM move_notify_callback
- *
- * @bo: The TTM buffer object about to move.
- * @mem: The struct ttm_mem_reg indicating to what memory
- *       region the move is taking place.
- *
- * Calls move_notify for all subsystems needing it.
- * (currently only resources).
- */
-static void vmw_move_notify(struct ttm_buffer_object *bo,
-			    bool evict,
-			    struct ttm_mem_reg *mem)
-{
-	vmw_resource_move_notify(bo, mem);
-	vmw_query_move_notify(bo, mem);
-}
-
-
-/**
- * vmw_swap_notify - TTM move_notify_callback
- *
- * @bo: The TTM buffer object about to be swapped out.
- */
-static void vmw_swap_notify(struct ttm_buffer_object *bo)
-{
-	vmw_resource_swap_notify(bo);
-	(void) ttm_bo_wait(bo, false, false);
-}
-
-
-struct ttm_bo_driver vmw_bo_driver = {
-	.ttm_tt_create = &vmw_ttm_tt_create,
-	.ttm_tt_populate = &vmw_ttm_populate,
-	.ttm_tt_unpopulate = &vmw_ttm_unpopulate,
-	.invalidate_caches = vmw_invalidate_caches,
-	.init_mem_type = vmw_init_mem_type,
-	.eviction_valuable = ttm_bo_eviction_valuable,
-	.evict_flags = vmw_evict_flags,
-	.move = NULL,
-	.verify_access = vmw_verify_access,
-	.move_notify = vmw_move_notify,
-	.swap_notify = vmw_swap_notify,
-	.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
-	.io_mem_reserve = &vmw_ttm_io_mem_reserve,
-	.io_mem_free = &vmw_ttm_io_mem_free,
-};
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
index 3767ac335aca..ff8acc74786c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
@@ -38,7 +38,7 @@ struct vmw_user_context {
 	struct vmw_cmdbuf_res_manager *man;
 	struct vmw_resource *cotables[SVGA_COTABLE_DX10_MAX];
 	spinlock_t cotable_lock;
-	struct vmw_dma_buffer *dx_query_mob;
+	struct vmw_buffer_object *dx_query_mob;
 };
 
 static void vmw_user_context_free(struct vmw_resource *res);
@@ -900,7 +900,7 @@ vmw_context_binding_state(struct vmw_resource *ctx)
  * specified in the parameter.  0 otherwise.
  */
 int vmw_context_bind_dx_query(struct vmw_resource *ctx_res,
-			      struct vmw_dma_buffer *mob)
+			      struct vmw_buffer_object *mob)
 {
 	struct vmw_user_context *uctx =
 		container_of(ctx_res, struct vmw_user_context, res);
@@ -908,7 +908,7 @@ int vmw_context_bind_dx_query(struct vmw_resource *ctx_res,
 	if (mob == NULL) {
 		if (uctx->dx_query_mob) {
 			uctx->dx_query_mob->dx_query_ctx = NULL;
-			vmw_dmabuf_unreference(&uctx->dx_query_mob);
+			vmw_bo_unreference(&uctx->dx_query_mob);
 			uctx->dx_query_mob = NULL;
 		}
 
@@ -922,7 +922,7 @@ int vmw_context_bind_dx_query(struct vmw_resource *ctx_res,
 	mob->dx_query_ctx  = ctx_res;
 
 	if (!uctx->dx_query_mob)
-		uctx->dx_query_mob = vmw_dmabuf_reference(mob);
+		uctx->dx_query_mob = vmw_bo_reference(mob);
 
 	return 0;
 }
@@ -932,7 +932,7 @@ int vmw_context_bind_dx_query(struct vmw_resource *ctx_res,
  *
  * @ctx_res: The context resource
  */
-struct vmw_dma_buffer *
+struct vmw_buffer_object *
 vmw_context_get_dx_query_mob(struct vmw_resource *ctx_res)
 {
 	struct vmw_user_context *uctx =
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
index cbf54ea7b4c0..1052cd3cb700 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
@@ -390,7 +390,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size)
 	struct ttm_operation_ctx ctx = { false, false };
 	struct vmw_private *dev_priv = res->dev_priv;
 	struct vmw_cotable *vcotbl = vmw_cotable(res);
-	struct vmw_dma_buffer *buf, *old_buf = res->backup;
+	struct vmw_buffer_object *buf, *old_buf = res->backup;
 	struct ttm_buffer_object *bo, *old_bo = &res->backup->base;
 	size_t old_size = res->backup_size;
 	size_t old_size_read_back = vcotbl->size_read_back;
@@ -415,8 +415,8 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size)
 	if (!buf)
 		return -ENOMEM;
 
-	ret = vmw_dmabuf_init(dev_priv, buf, new_size, &vmw_mob_ne_placement,
-			      true, vmw_dmabuf_bo_free);
+	ret = vmw_bo_init(dev_priv, buf, new_size, &vmw_mob_ne_placement,
+			  true, vmw_bo_bo_free);
 	if (ret) {
 		DRM_ERROR("Failed initializing new cotable MOB.\n");
 		return ret;
@@ -482,7 +482,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size)
 	/* Let go of the old mob. */
 	list_del(&res->mob_head);
 	list_add_tail(&res->mob_head, &buf->res_list);
-	vmw_dmabuf_unreference(&old_buf);
+	vmw_bo_unreference(&old_buf);
 	res->id = vcotbl->type;
 
 	return 0;
@@ -491,7 +491,7 @@ out_map_new:
 	ttm_bo_kunmap(&old_map);
 out_wait:
 	ttm_bo_unreserve(bo);
-	vmw_dmabuf_unreference(&buf);
+	vmw_bo_unreference(&buf);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
deleted file mode 100644
index d59d9dd16ebc..000000000000
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ /dev/null
@@ -1,376 +0,0 @@
-/**************************************************************************
- *
- * Copyright © 2011-2015 VMware, Inc., Palo Alto, CA., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include <drm/ttm/ttm_placement.h>
-
-#include <drm/drmP.h>
-#include "vmwgfx_drv.h"
-
-
-/**
- * vmw_dmabuf_pin_in_placement - Validate a buffer to placement.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to move.
- * @placement:  The placement to pin it.
- * @interruptible:  Use interruptible wait.
- *
- * Returns
- *  -ERESTARTSYS if interrupted by a signal.
- */
-int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
-				struct vmw_dma_buffer *buf,
-				struct ttm_placement *placement,
-				bool interruptible)
-{
-	struct ttm_operation_ctx ctx = {interruptible, false };
-	struct ttm_buffer_object *bo = &buf->base;
-	int ret;
-	uint32_t new_flags;
-
-	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
-	if (unlikely(ret != 0))
-		return ret;
-
-	vmw_execbuf_release_pinned_bo(dev_priv);
-
-	ret = ttm_bo_reserve(bo, interruptible, false, NULL);
-	if (unlikely(ret != 0))
-		goto err;
-
-	if (buf->pin_count > 0)
-		ret = ttm_bo_mem_compat(placement, &bo->mem,
-					&new_flags) == true ? 0 : -EINVAL;
-	else
-		ret = ttm_bo_validate(bo, placement, &ctx);
-
-	if (!ret)
-		vmw_bo_pin_reserved(buf, true);
-
-	ttm_bo_unreserve(bo);
-
-err:
-	ttm_write_unlock(&dev_priv->reservation_sem);
-	return ret;
-}
-
-/**
- * vmw_dmabuf_pin_in_vram_or_gmr - Move a buffer to vram or gmr.
- *
- * This function takes the reservation_sem in write mode.
- * Flushes and unpins the query bo to avoid failures.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to move.
- * @pin:  Pin buffer if true.
- * @interruptible:  Use interruptible wait.
- *
- * Returns
- * -ERESTARTSYS if interrupted by a signal.
- */
-int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
-				  struct vmw_dma_buffer *buf,
-				  bool interruptible)
-{
-	struct ttm_operation_ctx ctx = {interruptible, false };
-	struct ttm_buffer_object *bo = &buf->base;
-	int ret;
-	uint32_t new_flags;
-
-	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
-	if (unlikely(ret != 0))
-		return ret;
-
-	vmw_execbuf_release_pinned_bo(dev_priv);
-
-	ret = ttm_bo_reserve(bo, interruptible, false, NULL);
-	if (unlikely(ret != 0))
-		goto err;
-
-	if (buf->pin_count > 0) {
-		ret = ttm_bo_mem_compat(&vmw_vram_gmr_placement, &bo->mem,
-					&new_flags) == true ? 0 : -EINVAL;
-		goto out_unreserve;
-	}
-
-	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, &ctx);
-	if (likely(ret == 0) || ret == -ERESTARTSYS)
-		goto out_unreserve;
-
-	ret = ttm_bo_validate(bo, &vmw_vram_placement, &ctx);
-
-out_unreserve:
-	if (!ret)
-		vmw_bo_pin_reserved(buf, true);
-
-	ttm_bo_unreserve(bo);
-err:
-	ttm_write_unlock(&dev_priv->reservation_sem);
-	return ret;
-}
-
-/**
- * vmw_dmabuf_pin_in_vram - Move a buffer to vram.
- *
- * This function takes the reservation_sem in write mode.
- * Flushes and unpins the query bo to avoid failures.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to move.
- * @interruptible:  Use interruptible wait.
- *
- * Returns
- * -ERESTARTSYS if interrupted by a signal.
- */
-int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
-			   struct vmw_dma_buffer *buf,
-			   bool interruptible)
-{
-	return vmw_dmabuf_pin_in_placement(dev_priv, buf, &vmw_vram_placement,
-					   interruptible);
-}
-
-/**
- * vmw_dmabuf_pin_in_start_of_vram - Move a buffer to start of vram.
- *
- * This function takes the reservation_sem in write mode.
- * Flushes and unpins the query bo to avoid failures.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to pin.
- * @interruptible:  Use interruptible wait.
- *
- * Returns
- * -ERESTARTSYS if interrupted by a signal.
- */
-int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
-				    struct vmw_dma_buffer *buf,
-				    bool interruptible)
-{
-	struct ttm_operation_ctx ctx = {interruptible, false };
-	struct ttm_buffer_object *bo = &buf->base;
-	struct ttm_placement placement;
-	struct ttm_place place;
-	int ret = 0;
-	uint32_t new_flags;
-
-	place = vmw_vram_placement.placement[0];
-	place.lpfn = bo->num_pages;
-	placement.num_placement = 1;
-	placement.placement = &place;
-	placement.num_busy_placement = 1;
-	placement.busy_placement = &place;
-
-	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
-	if (unlikely(ret != 0))
-		return ret;
-
-	vmw_execbuf_release_pinned_bo(dev_priv);
-	ret = ttm_bo_reserve(bo, interruptible, false, NULL);
-	if (unlikely(ret != 0))
-		goto err_unlock;
-
-	/*
-	 * Is this buffer already in vram but not at the start of it?
-	 * In that case, evict it first because TTM isn't good at handling
-	 * that situation.
-	 */
-	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0 &&
-	    buf->pin_count == 0) {
-		ctx.interruptible = false;
-		(void) ttm_bo_validate(bo, &vmw_sys_placement, &ctx);
-	}
-
-	if (buf->pin_count > 0)
-		ret = ttm_bo_mem_compat(&placement, &bo->mem,
-					&new_flags) == true ? 0 : -EINVAL;
-	else
-		ret = ttm_bo_validate(bo, &placement, &ctx);
-
-	/* For some reason we didn't end up at the start of vram */
-	WARN_ON(ret == 0 && bo->offset != 0);
-	if (!ret)
-		vmw_bo_pin_reserved(buf, true);
-
-	ttm_bo_unreserve(bo);
-err_unlock:
-	ttm_write_unlock(&dev_priv->reservation_sem);
-
-	return ret;
-}
-
-/**
- * vmw_dmabuf_unpin - Unpin the buffer given buffer, does not move the buffer.
- *
- * This function takes the reservation_sem in write mode.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to unpin.
- * @interruptible:  Use interruptible wait.
- *
- * Returns
- * -ERESTARTSYS if interrupted by a signal.
- */
-int vmw_dmabuf_unpin(struct vmw_private *dev_priv,
-		     struct vmw_dma_buffer *buf,
-		     bool interruptible)
-{
-	struct ttm_buffer_object *bo = &buf->base;
-	int ret;
-
-	ret = ttm_read_lock(&dev_priv->reservation_sem, interruptible);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, interruptible, false, NULL);
-	if (unlikely(ret != 0))
-		goto err;
-
-	vmw_bo_pin_reserved(buf, false);
-
-	ttm_bo_unreserve(bo);
-
-err:
-	ttm_read_unlock(&dev_priv->reservation_sem);
-	return ret;
-}
-
-/**
- * vmw_bo_get_guest_ptr - Get the guest ptr representing the current placement
- * of a buffer.
- *
- * @bo: Pointer to a struct ttm_buffer_object. Must be pinned or reserved.
- * @ptr: SVGAGuestPtr returning the result.
- */
-void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo,
-			  SVGAGuestPtr *ptr)
-{
-	if (bo->mem.mem_type == TTM_PL_VRAM) {
-		ptr->gmrId = SVGA_GMR_FRAMEBUFFER;
-		ptr->offset = bo->offset;
-	} else {
-		ptr->gmrId = bo->mem.start;
-		ptr->offset = 0;
-	}
-}
-
-
-/**
- * vmw_bo_pin_reserved - Pin or unpin a buffer object without moving it.
- *
- * @vbo: The buffer object. Must be reserved.
- * @pin: Whether to pin or unpin.
- *
- */
-void vmw_bo_pin_reserved(struct vmw_dma_buffer *vbo, bool pin)
-{
-	struct ttm_operation_ctx ctx = { false, true };
-	struct ttm_place pl;
-	struct ttm_placement placement;
-	struct ttm_buffer_object *bo = &vbo->base;
-	uint32_t old_mem_type = bo->mem.mem_type;
-	int ret;
-
-	lockdep_assert_held(&bo->resv->lock.base);
-
-	if (pin) {
-		if (vbo->pin_count++ > 0)
-			return;
-	} else {
-		WARN_ON(vbo->pin_count <= 0);
-		if (--vbo->pin_count > 0)
-			return;
-	}
-
-	pl.fpfn = 0;
-	pl.lpfn = 0;
-	pl.flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | VMW_PL_FLAG_MOB
-		| TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED;
-	if (pin)
-		pl.flags |= TTM_PL_FLAG_NO_EVICT;
-
-	memset(&placement, 0, sizeof(placement));
-	placement.num_placement = 1;
-	placement.placement = &pl;
-
-	ret = ttm_bo_validate(bo, &placement, &ctx);
-
-	BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type);
-}
-
-
-/*
- * vmw_dma_buffer_unmap - Tear down a cached buffer object map.
- *
- * @vbo: The buffer object whose map we are tearing down.
- *
- * This function tears down a cached map set up using
- * vmw_dma_buffer_map_and_cache().
- */
-void vmw_dma_buffer_unmap(struct vmw_dma_buffer *vbo)
-{
-	if (vbo->map.bo == NULL)
-		return;
-
-	ttm_bo_kunmap(&vbo->map);
-}
-
-
-/*
- * vmw_dma_buffer_map_and_cache - Map a buffer object and cache the map
- *
- * @vbo: The buffer object to map
- * Return: A kernel virtual address or NULL if mapping failed.
- *
- * This function maps a buffer object into the kernel address space, or
- * returns the virtual kernel address of an already existing map. The virtual
- * address remains valid as long as the buffer object is pinned or reserved.
- * The cached map is torn down on either
- * 1) Buffer object move
- * 2) Buffer object swapout
- * 3) Buffer object destruction
- *
- */
-void *vmw_dma_buffer_map_and_cache(struct vmw_dma_buffer *vbo)
-{
-	struct ttm_buffer_object *bo = &vbo->base;
-	bool not_used;
-	void *virtual;
-	int ret;
-
-	virtual = ttm_kmap_obj_virtual(&vbo->map, &not_used);
-	if (virtual)
-		return virtual;
-
-	ret = ttm_bo_kmap(bo, 0, bo->num_pages, &vbo->map);
-	if (ret)
-		DRM_ERROR("Buffer object map failed: %d.\n", ret);
-
-	return ttm_kmap_obj_virtual(&vbo->map, &not_used);
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 09cc721160c4..4f18304226bc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -153,9 +153,9 @@
 static const struct drm_ioctl_desc vmw_ioctls[] = {
 	VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
-	VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl,
+	VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_bo_alloc_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
-	VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_dmabuf_unref_ioctl,
+	VMW_IOCTL_DEF(VMW_UNREF_DMABUF, vmw_bo_unref_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CURSOR_BYPASS,
 		      vmw_kms_cursor_bypass_ioctl,
@@ -219,7 +219,7 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 		      vmw_gb_surface_reference_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_SYNCCPU,
-		      vmw_user_dmabuf_synccpu_ioctl,
+		      vmw_user_bo_synccpu_ioctl,
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CREATE_EXTENDED_CONTEXT,
 		      vmw_extended_context_define_ioctl,
@@ -321,7 +321,7 @@ static void vmw_print_capabilities(uint32_t capabilities)
 static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
 {
 	int ret;
-	struct vmw_dma_buffer *vbo;
+	struct vmw_buffer_object *vbo;
 	struct ttm_bo_kmap_obj map;
 	volatile SVGA3dQueryResult *result;
 	bool dummy;
@@ -335,9 +335,9 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
 	if (!vbo)
 		return -ENOMEM;
 
-	ret = vmw_dmabuf_init(dev_priv, vbo, PAGE_SIZE,
-			      &vmw_sys_ne_placement, false,
-			      &vmw_dmabuf_bo_free);
+	ret = vmw_bo_init(dev_priv, vbo, PAGE_SIZE,
+			  &vmw_sys_ne_placement, false,
+			  &vmw_bo_bo_free);
 	if (unlikely(ret != 0))
 		return ret;
 
@@ -358,7 +358,7 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
 
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Dummy query buffer map failed.\n");
-		vmw_dmabuf_unreference(&vbo);
+		vmw_bo_unreference(&vbo);
 	} else
 		dev_priv->dummy_query_bo = vbo;
 
@@ -460,7 +460,7 @@ static void vmw_release_device_early(struct vmw_private *dev_priv)
 
 	BUG_ON(dev_priv->pinned_bo != NULL);
 
-	vmw_dmabuf_unreference(&dev_priv->dummy_query_bo);
+	vmw_bo_unreference(&dev_priv->dummy_query_bo);
 	if (dev_priv->cman)
 		vmw_cmdbuf_remove_pool(dev_priv->cman);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 5fcbe1620d50..25c2f668ad6c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -86,7 +86,7 @@ struct vmw_fpriv {
 	bool gb_aware;
 };
 
-struct vmw_dma_buffer {
+struct vmw_buffer_object {
 	struct ttm_buffer_object base;
 	struct list_head res_list;
 	s32 pin_count;
@@ -120,7 +120,7 @@ struct vmw_resource {
 	unsigned long backup_size;
 	bool res_dirty; /* Protected by backup buffer reserved */
 	bool backup_dirty; /* Protected by backup buffer reserved */
-	struct vmw_dma_buffer *backup;
+	struct vmw_buffer_object *backup;
 	unsigned long backup_offset;
 	unsigned long pin_count; /* Protected by resource reserved */
 	const struct vmw_res_func *func;
@@ -304,7 +304,7 @@ struct vmw_sw_context{
 	uint32_t cmd_bounce_size;
 	struct list_head resource_list;
 	struct list_head ctx_resource_list; /* For contexts and cotables */
-	struct vmw_dma_buffer *cur_query_bo;
+	struct vmw_buffer_object *cur_query_bo;
 	struct list_head res_relocations;
 	uint32_t *buf_start;
 	struct vmw_res_cache_entry res_cache[vmw_res_max];
@@ -315,7 +315,7 @@ struct vmw_sw_context{
 	bool staged_bindings_inuse;
 	struct list_head staged_cmd_res;
 	struct vmw_resource_val_node *dx_ctx_node;
-	struct vmw_dma_buffer *dx_query_mob;
+	struct vmw_buffer_object *dx_query_mob;
 	struct vmw_resource *dx_query_ctx;
 	struct vmw_cmdbuf_res_manager *man;
 };
@@ -513,8 +513,8 @@ struct vmw_private {
 	 * are protected by the cmdbuf mutex.
 	 */
 
-	struct vmw_dma_buffer *dummy_query_bo;
-	struct vmw_dma_buffer *pinned_bo;
+	struct vmw_buffer_object *dummy_query_bo;
+	struct vmw_buffer_object *pinned_bo;
 	uint32_t query_cid;
 	uint32_t query_cid_valid;
 	bool dummy_query_bo_pinned;
@@ -623,43 +623,43 @@ extern int vmw_user_lookup_handle(struct vmw_private *dev_priv,
 				  struct ttm_object_file *tfile,
 				  uint32_t handle,
 				  struct vmw_surface **out_surf,
-				  struct vmw_dma_buffer **out_buf);
+				  struct vmw_buffer_object **out_buf);
 extern int vmw_user_resource_lookup_handle(
 	struct vmw_private *dev_priv,
 	struct ttm_object_file *tfile,
 	uint32_t handle,
 	const struct vmw_user_resource_conv *converter,
 	struct vmw_resource **p_res);
-extern void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo);
-extern int vmw_dmabuf_init(struct vmw_private *dev_priv,
-			   struct vmw_dma_buffer *vmw_bo,
-			   size_t size, struct ttm_placement *placement,
-			   bool interuptable,
-			   void (*bo_free) (struct ttm_buffer_object *bo));
-extern int vmw_user_dmabuf_verify_access(struct ttm_buffer_object *bo,
-				  struct ttm_object_file *tfile);
-extern int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv,
-				 struct ttm_object_file *tfile,
-				 uint32_t size,
-				 bool shareable,
-				 uint32_t *handle,
-				 struct vmw_dma_buffer **p_dma_buf,
-				 struct ttm_base_object **p_base);
-extern int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
-				     struct vmw_dma_buffer *dma_buf,
-				     uint32_t *handle);
-extern int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data,
-				  struct drm_file *file_priv);
-extern int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data,
-				  struct drm_file *file_priv);
-extern int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data,
-					 struct drm_file *file_priv);
-extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo,
-					 uint32_t cur_validate_node);
-extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo);
-extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
-				  uint32_t id, struct vmw_dma_buffer **out,
-				  struct ttm_base_object **base);
+extern void vmw_bo_bo_free(struct ttm_buffer_object *bo);
+extern int vmw_bo_init(struct vmw_private *dev_priv,
+		       struct vmw_buffer_object *vmw_bo,
+		       size_t size, struct ttm_placement *placement,
+		       bool interuptable,
+		       void (*bo_free)(struct ttm_buffer_object *bo));
+extern int vmw_user_bo_verify_access(struct ttm_buffer_object *bo,
+				     struct ttm_object_file *tfile);
+extern int vmw_user_bo_alloc(struct vmw_private *dev_priv,
+			     struct ttm_object_file *tfile,
+			     uint32_t size,
+			     bool shareable,
+			     uint32_t *handle,
+			     struct vmw_buffer_object **p_dma_buf,
+			     struct ttm_base_object **p_base);
+extern int vmw_user_bo_reference(struct ttm_object_file *tfile,
+				 struct vmw_buffer_object *dma_buf,
+				 uint32_t *handle);
+extern int vmw_bo_alloc_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv);
+extern int vmw_bo_unref_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv);
+extern int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv);
+extern uint32_t vmw_bo_validate_node(struct ttm_buffer_object *bo,
+				     uint32_t cur_validate_node);
+extern void vmw_bo_validate_clear(struct ttm_buffer_object *bo);
+extern int vmw_user_bo_lookup(struct ttm_object_file *tfile,
+			      uint32_t id, struct vmw_buffer_object **out,
+			      struct ttm_base_object **base);
 extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
 extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
@@ -670,43 +670,43 @@ extern int vmw_user_stream_lookup(struct vmw_private *dev_priv,
 				  struct vmw_resource **out);
 extern void vmw_resource_unreserve(struct vmw_resource *res,
 				   bool switch_backup,
-				   struct vmw_dma_buffer *new_backup,
+				   struct vmw_buffer_object *new_backup,
 				   unsigned long new_backup_offset);
 extern void vmw_resource_move_notify(struct ttm_buffer_object *bo,
 				     struct ttm_mem_reg *mem);
 extern void vmw_query_move_notify(struct ttm_buffer_object *bo,
 				  struct ttm_mem_reg *mem);
 extern void vmw_resource_swap_notify(struct ttm_buffer_object *bo);
-extern int vmw_query_readback_all(struct vmw_dma_buffer *dx_query_mob);
+extern int vmw_query_readback_all(struct vmw_buffer_object *dx_query_mob);
 extern void vmw_fence_single_bo(struct ttm_buffer_object *bo,
 				struct vmw_fence_obj *fence);
 extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
 
 
 /**
- * DMA buffer helper routines - vmwgfx_dmabuf.c
+ * Buffer object helper functions - vmwgfx_bo.c
  */
-extern int vmw_dmabuf_pin_in_placement(struct vmw_private *vmw_priv,
-				       struct vmw_dma_buffer *bo,
-				       struct ttm_placement *placement,
+extern int vmw_bo_pin_in_placement(struct vmw_private *vmw_priv,
+				   struct vmw_buffer_object *bo,
+				   struct ttm_placement *placement,
+				   bool interruptible);
+extern int vmw_bo_pin_in_vram(struct vmw_private *dev_priv,
+			      struct vmw_buffer_object *buf,
+			      bool interruptible);
+extern int vmw_bo_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
+				     struct vmw_buffer_object *buf,
+				     bool interruptible);
+extern int vmw_bo_pin_in_start_of_vram(struct vmw_private *vmw_priv,
+				       struct vmw_buffer_object *bo,
 				       bool interruptible);
-extern int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
-				  struct vmw_dma_buffer *buf,
-				  bool interruptible);
-extern int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
-					 struct vmw_dma_buffer *buf,
-					 bool interruptible);
-extern int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *vmw_priv,
-					   struct vmw_dma_buffer *bo,
-					   bool interruptible);
-extern int vmw_dmabuf_unpin(struct vmw_private *vmw_priv,
-			    struct vmw_dma_buffer *bo,
-			    bool interruptible);
+extern int vmw_bo_unpin(struct vmw_private *vmw_priv,
+			struct vmw_buffer_object *bo,
+			bool interruptible);
 extern void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *buf,
 				 SVGAGuestPtr *ptr);
-extern void vmw_bo_pin_reserved(struct vmw_dma_buffer *bo, bool pin);
-extern void *vmw_dma_buffer_map_and_cache(struct vmw_dma_buffer *vbo);
-extern void vmw_dma_buffer_unmap(struct vmw_dma_buffer *vbo);
+extern void vmw_bo_pin_reserved(struct vmw_buffer_object *bo, bool pin);
+extern void *vmw_buffer_object_map_and_cache(struct vmw_buffer_object *vbo);
+extern void vmw_buffer_object_unmap(struct vmw_buffer_object *vbo);
 
 /**
  * Misc Ioctl functionality - vmwgfx_ioctl.c
@@ -758,7 +758,7 @@ extern void vmw_ttm_global_release(struct vmw_private *dev_priv);
 extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma);
 
 /**
- * TTM buffer object driver - vmwgfx_buffer.c
+ * TTM buffer object driver - vmwgfx_ttm_buffer.c
  */
 
 extern const size_t vmw_tt_size;
@@ -1041,8 +1041,8 @@ vmw_context_binding_state(struct vmw_resource *ctx);
 extern void vmw_dx_context_scrub_cotables(struct vmw_resource *ctx,
 					  bool readback);
 extern int vmw_context_bind_dx_query(struct vmw_resource *ctx_res,
-				     struct vmw_dma_buffer *mob);
-extern struct vmw_dma_buffer *
+				     struct vmw_buffer_object *mob);
+extern struct vmw_buffer_object *
 vmw_context_get_dx_query_mob(struct vmw_resource *ctx_res);
 
 
@@ -1243,9 +1243,9 @@ static inline struct vmw_surface *vmw_surface_reference(struct vmw_surface *srf)
 	return srf;
 }
 
-static inline void vmw_dmabuf_unreference(struct vmw_dma_buffer **buf)
+static inline void vmw_bo_unreference(struct vmw_buffer_object **buf)
 {
-	struct vmw_dma_buffer *tmp_buf = *buf;
+	struct vmw_buffer_object *tmp_buf = *buf;
 
 	*buf = NULL;
 	if (tmp_buf != NULL) {
@@ -1255,7 +1255,8 @@ static inline void vmw_dmabuf_unreference(struct vmw_dma_buffer **buf)
 	}
 }
 
-static inline struct vmw_dma_buffer *vmw_dmabuf_reference(struct vmw_dma_buffer *buf)
+static inline struct vmw_buffer_object *
+vmw_bo_reference(struct vmw_buffer_object *buf)
 {
 	if (ttm_bo_reference(&buf->base))
 		return buf;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index c9d5cc237124..a8b194655c40 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -92,7 +92,7 @@ struct vmw_resource_val_node {
 	struct list_head head;
 	struct drm_hash_item hash;
 	struct vmw_resource *res;
-	struct vmw_dma_buffer *new_backup;
+	struct vmw_buffer_object *new_backup;
 	struct vmw_ctx_binding_state *staged_bindings;
 	unsigned long new_backup_offset;
 	u32 first_usage : 1;
@@ -126,9 +126,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
 static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 				 struct vmw_sw_context *sw_context,
 				 SVGAMobId *id,
-				 struct vmw_dma_buffer **vmw_bo_p);
+				 struct vmw_buffer_object **vmw_bo_p);
 static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
-				   struct vmw_dma_buffer *vbo,
+				   struct vmw_buffer_object *vbo,
 				   bool validate_as_mob,
 				   uint32_t *p_val_node);
 /**
@@ -185,7 +185,7 @@ static void vmw_resources_unreserve(struct vmw_sw_context *sw_context,
 		}
 		vmw_resource_unreserve(res, switch_backup, val->new_backup,
 				       val->new_backup_offset);
-		vmw_dmabuf_unreference(&val->new_backup);
+		vmw_bo_unreference(&val->new_backup);
 	}
 }
 
@@ -423,7 +423,7 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
 	}
 
 	if (dev_priv->has_dx && vmw_res_type(ctx) == vmw_res_dx_context) {
-		struct vmw_dma_buffer *dx_query_mob;
+		struct vmw_buffer_object *dx_query_mob;
 
 		dx_query_mob = vmw_context_get_dx_query_mob(ctx);
 		if (dx_query_mob)
@@ -544,7 +544,7 @@ static int vmw_cmd_ok(struct vmw_private *dev_priv,
  * submission is reached.
  */
 static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
-				   struct vmw_dma_buffer *vbo,
+				   struct vmw_buffer_object *vbo,
 				   bool validate_as_mob,
 				   uint32_t *p_val_node)
 {
@@ -616,7 +616,7 @@ static int vmw_resources_reserve(struct vmw_sw_context *sw_context)
 			return ret;
 
 		if (res->backup) {
-			struct vmw_dma_buffer *vbo = res->backup;
+			struct vmw_buffer_object *vbo = res->backup;
 
 			ret = vmw_bo_to_validate_list
 				(sw_context, vbo,
@@ -628,7 +628,7 @@ static int vmw_resources_reserve(struct vmw_sw_context *sw_context)
 	}
 
 	if (sw_context->dx_query_mob) {
-		struct vmw_dma_buffer *expected_dx_query_mob;
+		struct vmw_buffer_object *expected_dx_query_mob;
 
 		expected_dx_query_mob =
 			vmw_context_get_dx_query_mob(sw_context->dx_query_ctx);
@@ -657,7 +657,7 @@ static int vmw_resources_validate(struct vmw_sw_context *sw_context)
 
 	list_for_each_entry(val, &sw_context->resource_list, head) {
 		struct vmw_resource *res = val->res;
-		struct vmw_dma_buffer *backup = res->backup;
+		struct vmw_buffer_object *backup = res->backup;
 
 		ret = vmw_resource_validate(res);
 		if (unlikely(ret != 0)) {
@@ -668,7 +668,7 @@ static int vmw_resources_validate(struct vmw_sw_context *sw_context)
 
 		/* Check if the resource switched backup buffer */
 		if (backup && res->backup && (backup != res->backup)) {
-			struct vmw_dma_buffer *vbo = res->backup;
+			struct vmw_buffer_object *vbo = res->backup;
 
 			ret = vmw_bo_to_validate_list
 				(sw_context, vbo,
@@ -821,7 +821,7 @@ out_no_reloc:
 static int vmw_rebind_all_dx_query(struct vmw_resource *ctx_res)
 {
 	struct vmw_private *dev_priv = ctx_res->dev_priv;
-	struct vmw_dma_buffer *dx_query_mob;
+	struct vmw_buffer_object *dx_query_mob;
 	struct {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdDXBindAllQuery body;
@@ -1152,7 +1152,7 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
  * command batch.
  */
 static int vmw_query_bo_switch_prepare(struct vmw_private *dev_priv,
-				       struct vmw_dma_buffer *new_query_bo,
+				       struct vmw_buffer_object *new_query_bo,
 				       struct vmw_sw_context *sw_context)
 {
 	struct vmw_res_cache_entry *ctx_entry =
@@ -1234,7 +1234,7 @@ static void vmw_query_bo_switch_commit(struct vmw_private *dev_priv,
 	if (dev_priv->pinned_bo != sw_context->cur_query_bo) {
 		if (dev_priv->pinned_bo) {
 			vmw_bo_pin_reserved(dev_priv->pinned_bo, false);
-			vmw_dmabuf_unreference(&dev_priv->pinned_bo);
+			vmw_bo_unreference(&dev_priv->pinned_bo);
 		}
 
 		if (!sw_context->needs_post_query_barrier) {
@@ -1256,7 +1256,7 @@ static void vmw_query_bo_switch_commit(struct vmw_private *dev_priv,
 			dev_priv->query_cid = sw_context->last_query_ctx->id;
 			dev_priv->query_cid_valid = true;
 			dev_priv->pinned_bo =
-				vmw_dmabuf_reference(sw_context->cur_query_bo);
+				vmw_bo_reference(sw_context->cur_query_bo);
 		}
 	}
 }
@@ -1282,15 +1282,14 @@ static void vmw_query_bo_switch_commit(struct vmw_private *dev_priv,
 static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 				 struct vmw_sw_context *sw_context,
 				 SVGAMobId *id,
-				 struct vmw_dma_buffer **vmw_bo_p)
+				 struct vmw_buffer_object **vmw_bo_p)
 {
-	struct vmw_dma_buffer *vmw_bo = NULL;
+	struct vmw_buffer_object *vmw_bo = NULL;
 	uint32_t handle = *id;
 	struct vmw_relocation *reloc;
 	int ret;
 
-	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo,
-				     NULL);
+	ret = vmw_user_bo_lookup(sw_context->fp->tfile, handle, &vmw_bo, NULL);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use MOB buffer.\n");
 		ret = -EINVAL;
@@ -1316,7 +1315,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 	return 0;
 
 out_no_reloc:
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 	*vmw_bo_p = NULL;
 	return ret;
 }
@@ -1343,15 +1342,14 @@ out_no_reloc:
 static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 				   struct vmw_sw_context *sw_context,
 				   SVGAGuestPtr *ptr,
-				   struct vmw_dma_buffer **vmw_bo_p)
+				   struct vmw_buffer_object **vmw_bo_p)
 {
-	struct vmw_dma_buffer *vmw_bo = NULL;
+	struct vmw_buffer_object *vmw_bo = NULL;
 	uint32_t handle = ptr->gmrId;
 	struct vmw_relocation *reloc;
 	int ret;
 
-	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo,
-				     NULL);
+	ret = vmw_user_bo_lookup(sw_context->fp->tfile, handle, &vmw_bo, NULL);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use GMR region.\n");
 		ret = -EINVAL;
@@ -1376,7 +1374,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	return 0;
 
 out_no_reloc:
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 	*vmw_bo_p = NULL;
 	return ret;
 }
@@ -1447,7 +1445,7 @@ static int vmw_cmd_dx_bind_query(struct vmw_private *dev_priv,
 		SVGA3dCmdDXBindQuery q;
 	} *cmd;
 
-	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_buffer_object *vmw_bo;
 	int    ret;
 
 
@@ -1466,7 +1464,7 @@ static int vmw_cmd_dx_bind_query(struct vmw_private *dev_priv,
 	sw_context->dx_query_mob = vmw_bo;
 	sw_context->dx_query_ctx = sw_context->dx_ctx_node->res;
 
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 
 	return ret;
 }
@@ -1549,7 +1547,7 @@ static int vmw_cmd_end_gb_query(struct vmw_private *dev_priv,
 				struct vmw_sw_context *sw_context,
 				SVGA3dCmdHeader *header)
 {
-	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_buffer_object *vmw_bo;
 	struct vmw_query_cmd {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdEndGBQuery q;
@@ -1569,7 +1567,7 @@ static int vmw_cmd_end_gb_query(struct vmw_private *dev_priv,
 
 	ret = vmw_query_bo_switch_prepare(dev_priv, vmw_bo, sw_context);
 
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 	return ret;
 }
 
@@ -1584,7 +1582,7 @@ static int vmw_cmd_end_query(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     SVGA3dCmdHeader *header)
 {
-	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_buffer_object *vmw_bo;
 	struct vmw_query_cmd {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdEndQuery q;
@@ -1623,7 +1621,7 @@ static int vmw_cmd_end_query(struct vmw_private *dev_priv,
 
 	ret = vmw_query_bo_switch_prepare(dev_priv, vmw_bo, sw_context);
 
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 	return ret;
 }
 
@@ -1638,7 +1636,7 @@ static int vmw_cmd_wait_gb_query(struct vmw_private *dev_priv,
 				 struct vmw_sw_context *sw_context,
 				 SVGA3dCmdHeader *header)
 {
-	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_buffer_object *vmw_bo;
 	struct vmw_query_cmd {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdWaitForGBQuery q;
@@ -1656,7 +1654,7 @@ static int vmw_cmd_wait_gb_query(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 	return 0;
 }
 
@@ -1671,7 +1669,7 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 			      struct vmw_sw_context *sw_context,
 			      SVGA3dCmdHeader *header)
 {
-	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_buffer_object *vmw_bo;
 	struct vmw_query_cmd {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdWaitForQuery q;
@@ -1708,7 +1706,7 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 	return 0;
 }
 
@@ -1716,7 +1714,7 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		       struct vmw_sw_context *sw_context,
 		       SVGA3dCmdHeader *header)
 {
-	struct vmw_dma_buffer *vmw_bo = NULL;
+	struct vmw_buffer_object *vmw_bo = NULL;
 	struct vmw_surface *srf = NULL;
 	struct vmw_dma_cmd {
 		SVGA3dCmdHeader header;
@@ -1768,7 +1766,7 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 			     header);
 
 out_no_surface:
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 	return ret;
 }
 
@@ -1887,7 +1885,7 @@ static int vmw_cmd_check_define_gmrfb(struct vmw_private *dev_priv,
 				      struct vmw_sw_context *sw_context,
 				      void *buf)
 {
-	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_buffer_object *vmw_bo;
 	int ret;
 
 	struct {
@@ -1901,7 +1899,7 @@ static int vmw_cmd_check_define_gmrfb(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
-	vmw_dmabuf_unreference(&vmw_bo);
+	vmw_bo_unreference(&vmw_bo);
 
 	return ret;
 }
@@ -1928,7 +1926,7 @@ static int vmw_cmd_res_switch_backup(struct vmw_private *dev_priv,
 				     uint32_t *buf_id,
 				     unsigned long backup_offset)
 {
-	struct vmw_dma_buffer *dma_buf;
+	struct vmw_buffer_object *dma_buf;
 	int ret;
 
 	ret = vmw_translate_mob_ptr(dev_priv, sw_context, buf_id, &dma_buf);
@@ -1939,7 +1937,7 @@ static int vmw_cmd_res_switch_backup(struct vmw_private *dev_priv,
 	if (val_node->first_usage)
 		val_node->no_buffer_needed = true;
 
-	vmw_dmabuf_unreference(&val_node->new_backup);
+	vmw_bo_unreference(&val_node->new_backup);
 	val_node->new_backup = dma_buf;
 	val_node->new_backup_offset = backup_offset;
 
@@ -3701,8 +3699,8 @@ int vmw_validate_single_buffer(struct vmw_private *dev_priv,
 			       bool interruptible,
 			       bool validate_as_mob)
 {
-	struct vmw_dma_buffer *vbo = container_of(bo, struct vmw_dma_buffer,
-						  base);
+	struct vmw_buffer_object *vbo =
+		container_of(bo, struct vmw_buffer_object, base);
 	struct ttm_operation_ctx ctx = { interruptible, true };
 	int ret;
 
@@ -4423,7 +4421,7 @@ void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
 
 	ttm_bo_unref(&query_val.bo);
 	ttm_bo_unref(&pinned_val.bo);
-	vmw_dmabuf_unreference(&dev_priv->pinned_bo);
+	vmw_bo_unreference(&dev_priv->pinned_bo);
 out_unlock:
 	return;
 
@@ -4432,7 +4430,7 @@ out_no_emit:
 out_no_reserve:
 	ttm_bo_unref(&query_val.bo);
 	ttm_bo_unref(&pinned_val.bo);
-	vmw_dmabuf_unreference(&dev_priv->pinned_bo);
+	vmw_bo_unreference(&dev_priv->pinned_bo);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 9b7e0aca5f84..dcde4985c574 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -42,7 +42,7 @@ struct vmw_fb_par {
 	void *vmalloc;
 
 	struct mutex bo_mutex;
-	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_buffer_object *vmw_bo;
 	unsigned bo_size;
 	struct drm_framebuffer *set_fb;
 	struct drm_display_mode *set_mode;
@@ -184,7 +184,7 @@ static void vmw_fb_dirty_flush(struct work_struct *work)
 	struct drm_clip_rect clip;
 	struct drm_framebuffer *cur_fb;
 	u8 *src_ptr, *dst_ptr;
-	struct vmw_dma_buffer *vbo = par->vmw_bo;
+	struct vmw_buffer_object *vbo = par->vmw_bo;
 	void *virtual;
 
 	if (!READ_ONCE(par->dirty.active))
@@ -197,7 +197,7 @@ static void vmw_fb_dirty_flush(struct work_struct *work)
 
 	(void) ttm_read_lock(&vmw_priv->reservation_sem, false);
 	(void) ttm_bo_reserve(&vbo->base, false, false, NULL);
-	virtual = vmw_dma_buffer_map_and_cache(vbo);
+	virtual = vmw_buffer_object_map_and_cache(vbo);
 	if (!virtual)
 		goto out_unreserve;
 
@@ -391,9 +391,9 @@ static void vmw_fb_imageblit(struct fb_info *info, const struct fb_image *image)
  */
 
 static int vmw_fb_create_bo(struct vmw_private *vmw_priv,
-			    size_t size, struct vmw_dma_buffer **out)
+			    size_t size, struct vmw_buffer_object **out)
 {
-	struct vmw_dma_buffer *vmw_bo;
+	struct vmw_buffer_object *vmw_bo;
 	int ret;
 
 	(void) ttm_write_lock(&vmw_priv->reservation_sem, false);
@@ -404,10 +404,10 @@ static int vmw_fb_create_bo(struct vmw_private *vmw_priv,
 		goto err_unlock;
 	}
 
-	ret = vmw_dmabuf_init(vmw_priv, vmw_bo, size,
+	ret = vmw_bo_init(vmw_priv, vmw_bo, size,
 			      &vmw_sys_placement,
 			      false,
-			      &vmw_dmabuf_bo_free);
+			      &vmw_bo_bo_free);
 	if (unlikely(ret != 0))
 		goto err_unlock; /* init frees the buffer on failure */
 
@@ -491,7 +491,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par,
 	}
 
 	if (par->vmw_bo && detach_bo && unref_bo)
-		vmw_dmabuf_unreference(&par->vmw_bo);
+		vmw_bo_unreference(&par->vmw_bo);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index c5e8eae0dbe2..5e0c8f775c92 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -377,8 +377,8 @@ int vmw_present_readback_ioctl(struct drm_device *dev, void *data,
 	}
 
 	vfb = vmw_framebuffer_to_vfb(fb);
-	if (!vfb->dmabuf) {
-		DRM_ERROR("Framebuffer not dmabuf backed.\n");
+	if (!vfb->bo) {
+		DRM_ERROR("Framebuffer not buffer backed.\n");
 		ret = -EINVAL;
 		goto out_no_ttm_lock;
 	}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index ef96ba7432ad..7a32be0cef14 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -85,10 +85,10 @@ static int vmw_cursor_update_image(struct vmw_private *dev_priv,
 	return 0;
 }
 
-static int vmw_cursor_update_dmabuf(struct vmw_private *dev_priv,
-				    struct vmw_dma_buffer *dmabuf,
-				    u32 width, u32 height,
-				    u32 hotspotX, u32 hotspotY)
+static int vmw_cursor_update_bo(struct vmw_private *dev_priv,
+				struct vmw_buffer_object *bo,
+				u32 width, u32 height,
+				u32 hotspotX, u32 hotspotY)
 {
 	struct ttm_bo_kmap_obj map;
 	unsigned long kmap_offset;
@@ -100,13 +100,13 @@ static int vmw_cursor_update_dmabuf(struct vmw_private *dev_priv,
 	kmap_offset = 0;
 	kmap_num = (width*height*4 + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
-	ret = ttm_bo_reserve(&dmabuf->base, true, false, NULL);
+	ret = ttm_bo_reserve(&bo->base, true, false, NULL);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("reserve failed\n");
 		return -EINVAL;
 	}
 
-	ret = ttm_bo_kmap(&dmabuf->base, kmap_offset, kmap_num, &map);
+	ret = ttm_bo_kmap(&bo->base, kmap_offset, kmap_num, &map);
 	if (unlikely(ret != 0))
 		goto err_unreserve;
 
@@ -116,7 +116,7 @@ static int vmw_cursor_update_dmabuf(struct vmw_private *dev_priv,
 
 	ttm_bo_kunmap(&map);
 err_unreserve:
-	ttm_bo_unreserve(&dmabuf->base);
+	ttm_bo_unreserve(&bo->base);
 
 	return ret;
 }
@@ -352,13 +352,13 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane,
 	if (vps->surf)
 		vmw_surface_unreference(&vps->surf);
 
-	if (vps->dmabuf)
-		vmw_dmabuf_unreference(&vps->dmabuf);
+	if (vps->bo)
+		vmw_bo_unreference(&vps->bo);
 
 	if (fb) {
-		if (vmw_framebuffer_to_vfb(fb)->dmabuf) {
-			vps->dmabuf = vmw_framebuffer_to_vfbd(fb)->buffer;
-			vmw_dmabuf_reference(vps->dmabuf);
+		if (vmw_framebuffer_to_vfb(fb)->bo) {
+			vps->bo = vmw_framebuffer_to_vfbd(fb)->buffer;
+			vmw_bo_reference(vps->bo);
 		} else {
 			vps->surf = vmw_framebuffer_to_vfbs(fb)->surface;
 			vmw_surface_reference(vps->surf);
@@ -390,7 +390,7 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
 	}
 
 	du->cursor_surface = vps->surf;
-	du->cursor_dmabuf = vps->dmabuf;
+	du->cursor_bo = vps->bo;
 
 	if (vps->surf) {
 		du->cursor_age = du->cursor_surface->snooper.age;
@@ -399,11 +399,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane,
 					      vps->surf->snooper.image,
 					      64, 64, hotspot_x,
 					      hotspot_y);
-	} else if (vps->dmabuf) {
-		ret = vmw_cursor_update_dmabuf(dev_priv, vps->dmabuf,
-					       plane->state->crtc_w,
-					       plane->state->crtc_h,
-					       hotspot_x, hotspot_y);
+	} else if (vps->bo) {
+		ret = vmw_cursor_update_bo(dev_priv, vps->bo,
+					   plane->state->crtc_w,
+					   plane->state->crtc_h,
+					   hotspot_x, hotspot_y);
 	} else {
 		vmw_cursor_update_position(dev_priv, false, 0, 0);
 		return;
@@ -519,7 +519,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane,
 		ret = -EINVAL;
 	}
 
-	if (!vmw_framebuffer_to_vfb(fb)->dmabuf)
+	if (!vmw_framebuffer_to_vfb(fb)->bo)
 		surface = vmw_framebuffer_to_vfbs(fb)->surface;
 
 	if (surface && !surface->snooper.image) {
@@ -687,8 +687,8 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
 	if (vps->surf)
 		(void) vmw_surface_reference(vps->surf);
 
-	if (vps->dmabuf)
-		(void) vmw_dmabuf_reference(vps->dmabuf);
+	if (vps->bo)
+		(void) vmw_bo_reference(vps->bo);
 
 	state = &vps->base;
 
@@ -745,8 +745,8 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
 	if (vps->surf)
 		vmw_surface_unreference(&vps->surf);
 
-	if (vps->dmabuf)
-		vmw_dmabuf_unreference(&vps->dmabuf);
+	if (vps->bo)
+		vmw_bo_unreference(&vps->bo);
 
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
@@ -902,12 +902,12 @@ static int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 
 /**
  * vmw_kms_readback - Perform a readback from the screen system to
- * a dma-buffer backed framebuffer.
+ * a buffer-object backed framebuffer.
  *
  * @dev_priv: Pointer to the device private structure.
  * @file_priv: Pointer to a struct drm_file identifying the caller.
  * Must be set to NULL if @user_fence_rep is NULL.
- * @vfb: Pointer to the dma-buffer backed framebuffer.
+ * @vfb: Pointer to the buffer-object backed framebuffer.
  * @user_fence_rep: User-space provided structure for fence information.
  * Must be set to non-NULL if @file_priv is non-NULL.
  * @vclips: Array of clip rects.
@@ -951,7 +951,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 					   struct vmw_framebuffer **out,
 					   const struct drm_mode_fb_cmd2
 					   *mode_cmd,
-					   bool is_dmabuf_proxy)
+					   bool is_bo_proxy)
 
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -1019,7 +1019,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, mode_cmd);
 	vfbs->surface = vmw_surface_reference(surface);
 	vfbs->base.user_handle = mode_cmd->handles[0];
-	vfbs->is_dmabuf_proxy = is_dmabuf_proxy;
+	vfbs->is_bo_proxy = is_bo_proxy;
 
 	*out = &vfbs->base;
 
@@ -1038,30 +1038,30 @@ out_err1:
 }
 
 /*
- * Dmabuf framebuffer code
+ * Buffer-object framebuffer code
  */
 
-static void vmw_framebuffer_dmabuf_destroy(struct drm_framebuffer *framebuffer)
+static void vmw_framebuffer_bo_destroy(struct drm_framebuffer *framebuffer)
 {
-	struct vmw_framebuffer_dmabuf *vfbd =
+	struct vmw_framebuffer_bo *vfbd =
 		vmw_framebuffer_to_vfbd(framebuffer);
 
 	drm_framebuffer_cleanup(framebuffer);
-	vmw_dmabuf_unreference(&vfbd->buffer);
+	vmw_bo_unreference(&vfbd->buffer);
 	if (vfbd->base.user_obj)
 		ttm_base_object_unref(&vfbd->base.user_obj);
 
 	kfree(vfbd);
 }
 
-static int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
-				 struct drm_file *file_priv,
-				 unsigned flags, unsigned color,
-				 struct drm_clip_rect *clips,
-				 unsigned num_clips)
+static int vmw_framebuffer_bo_dirty(struct drm_framebuffer *framebuffer,
+				    struct drm_file *file_priv,
+				    unsigned int flags, unsigned int color,
+				    struct drm_clip_rect *clips,
+				    unsigned int num_clips)
 {
 	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
-	struct vmw_framebuffer_dmabuf *vfbd =
+	struct vmw_framebuffer_bo *vfbd =
 		vmw_framebuffer_to_vfbd(framebuffer);
 	struct drm_clip_rect norect;
 	int ret, increment = 1;
@@ -1092,13 +1092,13 @@ static int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 				       true, true, NULL);
 		break;
 	case vmw_du_screen_object:
-		ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, &vfbd->base,
-						  clips, NULL, num_clips,
-						  increment, true, NULL, NULL);
+		ret = vmw_kms_sou_do_bo_dirty(dev_priv, &vfbd->base,
+					      clips, NULL, num_clips,
+					      increment, true, NULL, NULL);
 		break;
 	case vmw_du_legacy:
-		ret = vmw_kms_ldu_do_dmabuf_dirty(dev_priv, &vfbd->base, 0, 0,
-						  clips, num_clips, increment);
+		ret = vmw_kms_ldu_do_bo_dirty(dev_priv, &vfbd->base, 0, 0,
+					      clips, num_clips, increment);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1114,23 +1114,23 @@ static int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 	return ret;
 }
 
-static const struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
-	.destroy = vmw_framebuffer_dmabuf_destroy,
-	.dirty = vmw_framebuffer_dmabuf_dirty,
+static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = {
+	.destroy = vmw_framebuffer_bo_destroy,
+	.dirty = vmw_framebuffer_bo_dirty,
 };
 
 /**
- * Pin the dmabuffer in a location suitable for access by the
+ * Pin the bofer in a location suitable for access by the
  * display system.
  */
 static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
 {
 	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
-	struct vmw_dma_buffer *buf;
+	struct vmw_buffer_object *buf;
 	struct ttm_placement *placement;
 	int ret;
 
-	buf = vfb->dmabuf ?  vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
+	buf = vfb->bo ?  vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
 		vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.backup;
 
 	if (!buf)
@@ -1139,12 +1139,12 @@ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
 	switch (dev_priv->active_display_unit) {
 	case vmw_du_legacy:
 		vmw_overlay_pause_all(dev_priv);
-		ret = vmw_dmabuf_pin_in_start_of_vram(dev_priv, buf, false);
+		ret = vmw_bo_pin_in_start_of_vram(dev_priv, buf, false);
 		vmw_overlay_resume_all(dev_priv);
 		break;
 	case vmw_du_screen_object:
 	case vmw_du_screen_target:
-		if (vfb->dmabuf) {
+		if (vfb->bo) {
 			if (dev_priv->capabilities & SVGA_CAP_3D) {
 				/*
 				 * Use surface DMA to get content to
@@ -1160,8 +1160,7 @@ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
 			placement = &vmw_mob_placement;
 		}
 
-		return vmw_dmabuf_pin_in_placement(dev_priv, buf, placement,
-						   false);
+		return vmw_bo_pin_in_placement(dev_priv, buf, placement, false);
 	default:
 		return -EINVAL;
 	}
@@ -1172,36 +1171,36 @@ static int vmw_framebuffer_pin(struct vmw_framebuffer *vfb)
 static int vmw_framebuffer_unpin(struct vmw_framebuffer *vfb)
 {
 	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
-	struct vmw_dma_buffer *buf;
+	struct vmw_buffer_object *buf;
 
-	buf = vfb->dmabuf ?  vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
+	buf = vfb->bo ?  vmw_framebuffer_to_vfbd(&vfb->base)->buffer :
 		vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.backup;
 
 	if (WARN_ON(!buf))
 		return 0;
 
-	return vmw_dmabuf_unpin(dev_priv, buf, false);
+	return vmw_bo_unpin(dev_priv, buf, false);
 }
 
 /**
- * vmw_create_dmabuf_proxy - create a proxy surface for the DMA buf
+ * vmw_create_bo_proxy - create a proxy surface for the buffer object
  *
  * @dev: DRM device
  * @mode_cmd: parameters for the new surface
- * @dmabuf_mob: MOB backing the DMA buf
+ * @bo_mob: MOB backing the buffer object
  * @srf_out: newly created surface
  *
- * When the content FB is a DMA buf, we create a surface as a proxy to the
+ * When the content FB is a buffer object, we create a surface as a proxy to the
  * same buffer.  This way we can do a surface copy rather than a surface DMA.
  * This is a more efficient approach
  *
  * RETURNS:
  * 0 on success, error code otherwise
  */
-static int vmw_create_dmabuf_proxy(struct drm_device *dev,
-				   const struct drm_mode_fb_cmd2 *mode_cmd,
-				   struct vmw_dma_buffer *dmabuf_mob,
-				   struct vmw_surface **srf_out)
+static int vmw_create_bo_proxy(struct drm_device *dev,
+			       const struct drm_mode_fb_cmd2 *mode_cmd,
+			       struct vmw_buffer_object *bo_mob,
+			       struct vmw_surface **srf_out)
 {
 	uint32_t format;
 	struct drm_vmw_size content_base_size = {0};
@@ -1258,8 +1257,8 @@ static int vmw_create_dmabuf_proxy(struct drm_device *dev,
 	/* Reserve and switch the backing mob. */
 	mutex_lock(&res->dev_priv->cmdbuf_mutex);
 	(void) vmw_resource_reserve(res, false, true);
-	vmw_dmabuf_unreference(&res->backup);
-	res->backup = vmw_dmabuf_reference(dmabuf_mob);
+	vmw_bo_unreference(&res->backup);
+	res->backup = vmw_bo_reference(bo_mob);
 	res->backup_offset = 0;
 	vmw_resource_unreserve(res, false, NULL, 0);
 	mutex_unlock(&res->dev_priv->cmdbuf_mutex);
@@ -1269,21 +1268,21 @@ static int vmw_create_dmabuf_proxy(struct drm_device *dev,
 
 
-static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
-					  struct vmw_dma_buffer *dmabuf,
-					  struct vmw_framebuffer **out,
-					  const struct drm_mode_fb_cmd2
-					  *mode_cmd)
+static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv,
+				      struct vmw_buffer_object *bo,
+				      struct vmw_framebuffer **out,
+				      const struct drm_mode_fb_cmd2
+				      *mode_cmd)
 
 {
 	struct drm_device *dev = dev_priv->dev;
-	struct vmw_framebuffer_dmabuf *vfbd;
+	struct vmw_framebuffer_bo *vfbd;
 	unsigned int requested_size;
 	struct drm_format_name_buf format_name;
 	int ret;
 
 	requested_size = mode_cmd->height * mode_cmd->pitches[0];
-	if (unlikely(requested_size > dmabuf->base.num_pages * PAGE_SIZE)) {
+	if (unlikely(requested_size > bo->base.num_pages * PAGE_SIZE)) {
 		DRM_ERROR("Screen buffer object size is too small "
 			  "for requested mode.\n");
 		return -EINVAL;
@@ -1312,20 +1311,20 @@ static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
 	}
 
 	drm_helper_mode_fill_fb_struct(dev, &vfbd->base.base, mode_cmd);
-	vfbd->base.dmabuf = true;
-	vfbd->buffer = vmw_dmabuf_reference(dmabuf);
+	vfbd->base.bo = true;
+	vfbd->buffer = vmw_bo_reference(bo);
 	vfbd->base.user_handle = mode_cmd->handles[0];
 	*out = &vfbd->base;
 
 	ret = drm_framebuffer_init(dev, &vfbd->base.base,
-				   &vmw_framebuffer_dmabuf_funcs);
+				   &vmw_framebuffer_bo_funcs);
 	if (ret)
 		goto out_err2;
 
 	return 0;
 
 out_err2:
-	vmw_dmabuf_unreference(&dmabuf);
+	vmw_bo_unreference(&bo);
 	kfree(vfbd);
 out_err1:
 	return ret;
@@ -1354,57 +1353,57 @@ vmw_kms_srf_ok(struct vmw_private *dev_priv, uint32_t width, uint32_t height)
  * vmw_kms_new_framebuffer - Create a new framebuffer.
  *
  * @dev_priv: Pointer to device private struct.
- * @dmabuf: Pointer to dma buffer to wrap the kms framebuffer around.
- * Either @dmabuf or @surface must be NULL.
+ * @bo: Pointer to buffer object to wrap the kms framebuffer around.
+ * Either @bo or @surface must be NULL.
  * @surface: Pointer to a surface to wrap the kms framebuffer around.
- * Either @dmabuf or @surface must be NULL.
- * @only_2d: No presents will occur to this dma buffer based framebuffer. This
- * Helps the code to do some important optimizations.
+ * Either @bo or @surface must be NULL.
+ * @only_2d: No presents will occur to this buffer object based framebuffer.
+ * This helps the code to do some important optimizations.
  * @mode_cmd: Frame-buffer metadata.
  */
 struct vmw_framebuffer *
 vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
-			struct vmw_dma_buffer *dmabuf,
+			struct vmw_buffer_object *bo,
 			struct vmw_surface *surface,
 			bool only_2d,
 			const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct vmw_framebuffer *vfb = NULL;
-	bool is_dmabuf_proxy = false;
+	bool is_bo_proxy = false;
 	int ret;
 
 	/*
 	 * We cannot use the SurfaceDMA command in an non-accelerated VM,
-	 * therefore, wrap the DMA buf in a surface so we can use the
+	 * therefore, wrap the buffer object in a surface so we can use the
 	 * SurfaceCopy command.
 	 */
 	if (vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)  &&
-	    dmabuf && only_2d &&
+	    bo && only_2d &&
 	    mode_cmd->width > 64 &&  /* Don't create a proxy for cursor */
 	    dev_priv->active_display_unit == vmw_du_screen_target) {
-		ret = vmw_create_dmabuf_proxy(dev_priv->dev, mode_cmd,
-					      dmabuf, &surface);
+		ret = vmw_create_bo_proxy(dev_priv->dev, mode_cmd,
+					  bo, &surface);
 		if (ret)
 			return ERR_PTR(ret);
 
-		is_dmabuf_proxy = true;
+		is_bo_proxy = true;
 	}
 
 	/* Create the new framebuffer depending one what we have */
 	if (surface) {
 		ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb,
 						      mode_cmd,
-						      is_dmabuf_proxy);
+						      is_bo_proxy);
 
 		/*
-		 * vmw_create_dmabuf_proxy() adds a reference that is no longer
+		 * vmw_create_bo_proxy() adds a reference that is no longer
 		 * needed
 		 */
-		if (is_dmabuf_proxy)
+		if (is_bo_proxy)
 			vmw_surface_unreference(&surface);
-	} else if (dmabuf) {
-		ret = vmw_kms_new_framebuffer_dmabuf(dev_priv, dmabuf, &vfb,
-						     mode_cmd);
+	} else if (bo) {
+		ret = vmw_kms_new_framebuffer_bo(dev_priv, bo, &vfb,
+						 mode_cmd);
 	} else {
 		BUG();
 	}
@@ -1430,7 +1429,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct vmw_framebuffer *vfb = NULL;
 	struct vmw_surface *surface = NULL;
-	struct vmw_dma_buffer *bo = NULL;
+	struct vmw_buffer_object *bo = NULL;
 	struct ttm_base_object *user_obj;
 	int ret;
 
@@ -1466,7 +1465,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 	 * End conditioned code.
 	 */
 
-	/* returns either a dmabuf or surface */
+	/* returns either a bo or surface */
 	ret = vmw_user_lookup_handle(dev_priv, tfile,
 				     mode_cmd->handles[0],
 				     &surface, &bo);
@@ -1494,7 +1493,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 err_out:
 	/* vmw_user_lookup_handle takes one ref so does new_fb */
 	if (bo)
-		vmw_dmabuf_unreference(&bo);
+		vmw_bo_unreference(&bo);
 	if (surface)
 		vmw_surface_unreference(&surface);
 
@@ -2427,7 +2426,7 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv,
  * interrupted by a signal.
  */
 int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
-				  struct vmw_dma_buffer *buf,
+				  struct vmw_buffer_object *buf,
 				  bool interruptible,
 				  bool validate_as_mob,
 				  bool for_cpu_blit)
@@ -2459,7 +2458,7 @@ int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
  * Helper to be used if an error forces the caller to undo the actions of
  * vmw_kms_helper_buffer_prepare.
  */
-void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf)
+void vmw_kms_helper_buffer_revert(struct vmw_buffer_object *buf)
 {
 	if (buf)
 		ttm_bo_unreserve(&buf->base);
@@ -2482,7 +2481,7 @@ void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf)
  */
 void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
 				  struct drm_file *file_priv,
-				  struct vmw_dma_buffer *buf,
+				  struct vmw_buffer_object *buf,
 				  struct vmw_fence_obj **out_fence,
 				  struct drm_vmw_fence_rep __user *
 				  user_fence_rep)
@@ -2522,7 +2521,7 @@ void vmw_kms_helper_resource_revert(struct vmw_validation_ctx *ctx)
 	struct vmw_resource *res = ctx->res;
 
 	vmw_kms_helper_buffer_revert(ctx->buf);
-	vmw_dmabuf_unreference(&ctx->buf);
+	vmw_bo_unreference(&ctx->buf);
 	vmw_resource_unreserve(res, false, NULL, 0);
 	mutex_unlock(&res->dev_priv->cmdbuf_mutex);
 }
@@ -2567,7 +2566,7 @@ int vmw_kms_helper_resource_prepare(struct vmw_resource *res,
 		if (ret)
 			goto out_unreserve;
 
-		ctx->buf = vmw_dmabuf_reference(res->backup);
+		ctx->buf = vmw_bo_reference(res->backup);
 	}
 	ret = vmw_resource_validate(res);
 	if (ret)
@@ -2600,7 +2599,7 @@ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
 		vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf,
 					     out_fence, NULL);
 
-	vmw_dmabuf_unreference(&ctx->buf);
+	vmw_bo_unreference(&ctx->buf);
 	vmw_resource_unreserve(res, false, NULL, 0);
 	mutex_unlock(&res->dev_priv->cmdbuf_mutex);
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 6b7c012719f1..ff1caed38f94 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -90,7 +90,7 @@ struct vmw_kms_dirty {
 #define vmw_framebuffer_to_vfbs(x) \
 	container_of(x, struct vmw_framebuffer_surface, base.base)
 #define vmw_framebuffer_to_vfbd(x) \
-	container_of(x, struct vmw_framebuffer_dmabuf, base.base)
+	container_of(x, struct vmw_framebuffer_bo, base.base)
 
 /**
  * Base class for framebuffers
@@ -102,7 +102,7 @@ struct vmw_framebuffer {
 	struct drm_framebuffer base;
 	int (*pin)(struct vmw_framebuffer *fb);
 	int (*unpin)(struct vmw_framebuffer *fb);
-	bool dmabuf;
+	bool bo;
 	struct ttm_base_object *user_obj;
 	uint32_t user_handle;
 };
@@ -117,15 +117,15 @@ struct vmw_clip_rect {
 struct vmw_framebuffer_surface {
 	struct vmw_framebuffer base;
 	struct vmw_surface *surface;
-	struct vmw_dma_buffer *buffer;
+	struct vmw_buffer_object *buffer;
 	struct list_head head;
-	bool is_dmabuf_proxy;  /* true if this is proxy surface for DMA buf */
+	bool is_bo_proxy;  /* true if this is proxy surface for DMA buf */
 };
 
 
-struct vmw_framebuffer_dmabuf {
+struct vmw_framebuffer_bo {
 	struct vmw_framebuffer base;
-	struct vmw_dma_buffer *buffer;
+	struct vmw_buffer_object *buffer;
 };
 
 
@@ -161,18 +161,18 @@ struct vmw_crtc_state {
  *
  * @base DRM plane object
  * @surf Display surface for STDU
- * @dmabuf display dmabuf for SOU
+ * @bo display bo for SOU
  * @content_fb_type Used by STDU.
- * @dmabuf_size Size of the dmabuf, used by Screen Object Display Unit
+ * @bo_size Size of the bo, used by Screen Object Display Unit
  * @pinned pin count for STDU display surface
  */
 struct vmw_plane_state {
 	struct drm_plane_state base;
 	struct vmw_surface *surf;
-	struct vmw_dma_buffer *dmabuf;
+	struct vmw_buffer_object *bo;
 
 	int content_fb_type;
-	unsigned long dmabuf_size;
+	unsigned long bo_size;
 
 	int pinned;
 
@@ -209,7 +209,7 @@ struct vmw_display_unit {
 	struct drm_plane cursor;
 
 	struct vmw_surface *cursor_surface;
-	struct vmw_dma_buffer *cursor_dmabuf;
+	struct vmw_buffer_object *cursor_bo;
 	size_t cursor_age;
 
 	int cursor_x;
@@ -243,7 +243,7 @@ struct vmw_display_unit {
 
 struct vmw_validation_ctx {
 	struct vmw_resource *res;
-	struct vmw_dma_buffer *buf;
+	struct vmw_buffer_object *buf;
 };
 
 #define vmw_crtc_to_du(x) \
@@ -291,14 +291,14 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv,
 			 struct vmw_kms_dirty *dirty);
 
 int vmw_kms_helper_buffer_prepare(struct vmw_private *dev_priv,
-				  struct vmw_dma_buffer *buf,
+				  struct vmw_buffer_object *buf,
 				  bool interruptible,
 				  bool validate_as_mob,
 				  bool for_cpu_blit);
-void vmw_kms_helper_buffer_revert(struct vmw_dma_buffer *buf);
+void vmw_kms_helper_buffer_revert(struct vmw_buffer_object *buf);
 void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv,
 				  struct drm_file *file_priv,
-				  struct vmw_dma_buffer *buf,
+				  struct vmw_buffer_object *buf,
 				  struct vmw_fence_obj **out_fence,
 				  struct drm_vmw_fence_rep __user *
 				  user_fence_rep);
@@ -316,7 +316,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv,
 		     uint32_t num_clips);
 struct vmw_framebuffer *
 vmw_kms_new_framebuffer(struct vmw_private *dev_priv,
-			struct vmw_dma_buffer *dmabuf,
+			struct vmw_buffer_object *bo,
 			struct vmw_surface *surface,
 			bool only_2d,
 			const struct drm_mode_fb_cmd2 *mode_cmd);
@@ -384,11 +384,11 @@ void vmw_du_connector_destroy_state(struct drm_connector *connector,
  */
 int vmw_kms_ldu_init_display(struct vmw_private *dev_priv);
 int vmw_kms_ldu_close_display(struct vmw_private *dev_priv);
-int vmw_kms_ldu_do_dmabuf_dirty(struct vmw_private *dev_priv,
-				struct vmw_framebuffer *framebuffer,
-				unsigned flags, unsigned color,
-				struct drm_clip_rect *clips,
-				unsigned num_clips, int increment);
+int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv,
+			    struct vmw_framebuffer *framebuffer,
+			    unsigned int flags, unsigned int color,
+			    struct drm_clip_rect *clips,
+			    unsigned int num_clips, int increment);
 int vmw_kms_update_proxy(struct vmw_resource *res,
 			 const struct drm_clip_rect *clips,
 			 unsigned num_clips,
@@ -408,14 +408,14 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
 				 unsigned num_clips, int inc,
 				 struct vmw_fence_obj **out_fence,
 				 struct drm_crtc *crtc);
-int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
-				struct vmw_framebuffer *framebuffer,
-				struct drm_clip_rect *clips,
-				struct drm_vmw_rect *vclips,
-				unsigned num_clips, int increment,
-				bool interruptible,
-				struct vmw_fence_obj **out_fence,
-				struct drm_crtc *crtc);
+int vmw_kms_sou_do_bo_dirty(struct vmw_private *dev_priv,
+			    struct vmw_framebuffer *framebuffer,
+			    struct drm_clip_rect *clips,
+			    struct drm_vmw_rect *vclips,
+			    unsigned int num_clips, int increment,
+			    bool interruptible,
+			    struct vmw_fence_obj **out_fence,
+			    struct drm_crtc *crtc);
 int vmw_kms_sou_readback(struct vmw_private *dev_priv,
 			 struct drm_file *file_priv,
 			 struct vmw_framebuffer *vfb,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 4a5907e3f560..a2dd9a829219 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -547,11 +547,11 @@ int vmw_kms_ldu_close_display(struct vmw_private *dev_priv)
 }
 
 
-int vmw_kms_ldu_do_dmabuf_dirty(struct vmw_private *dev_priv,
-				struct vmw_framebuffer *framebuffer,
-				unsigned flags, unsigned color,
-				struct drm_clip_rect *clips,
-				unsigned num_clips, int increment)
+int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv,
+			    struct vmw_framebuffer *framebuffer,
+			    unsigned int flags, unsigned int color,
+			    struct drm_clip_rect *clips,
+			    unsigned int num_clips, int increment)
 {
 	size_t fifo_size;
 	int i;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 222c9c2123a1..09420ef19ecb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -38,7 +38,7 @@
 #define VMW_OVERLAY_CAP_MASK (SVGA_FIFO_CAP_VIDEO | SVGA_FIFO_CAP_ESCAPE)
 
 struct vmw_stream {
-	struct vmw_dma_buffer *buf;
+	struct vmw_buffer_object *buf;
 	bool claimed;
 	bool paused;
 	struct drm_vmw_control_stream_arg saved;
@@ -94,7 +94,7 @@ static inline void fill_flush(struct vmw_escape_video_flush *cmd,
  * -ERESTARTSYS if interrupted by a signal.
  */
 static int vmw_overlay_send_put(struct vmw_private *dev_priv,
-				struct vmw_dma_buffer *buf,
+				struct vmw_buffer_object *buf,
 				struct drm_vmw_control_stream_arg *arg,
 				bool interruptible)
 {
@@ -225,16 +225,16 @@ static int vmw_overlay_send_stop(struct vmw_private *dev_priv,
  * used with GMRs instead of being locked to vram.
  */
 static int vmw_overlay_move_buffer(struct vmw_private *dev_priv,
-				   struct vmw_dma_buffer *buf,
+				   struct vmw_buffer_object *buf,
 				   bool pin, bool inter)
 {
 	if (!pin)
-		return vmw_dmabuf_unpin(dev_priv, buf, inter);
+		return vmw_bo_unpin(dev_priv, buf, inter);
 
 	if (dev_priv->active_display_unit == vmw_du_legacy)
-		return vmw_dmabuf_pin_in_vram(dev_priv, buf, inter);
+		return vmw_bo_pin_in_vram(dev_priv, buf, inter);
 
-	return vmw_dmabuf_pin_in_vram_or_gmr(dev_priv, buf, inter);
+	return vmw_bo_pin_in_vram_or_gmr(dev_priv, buf, inter);
 }
 
 /**
@@ -278,7 +278,7 @@ static int vmw_overlay_stop(struct vmw_private *dev_priv,
 	}
 
 	if (!pause) {
-		vmw_dmabuf_unreference(&stream->buf);
+		vmw_bo_unreference(&stream->buf);
 		stream->paused = false;
 	} else {
 		stream->paused = true;
@@ -297,7 +297,7 @@ static int vmw_overlay_stop(struct vmw_private *dev_priv,
  * -ERESTARTSYS if interrupted.
  */
 static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
-				     struct vmw_dma_buffer *buf,
+				     struct vmw_buffer_object *buf,
 				     struct drm_vmw_control_stream_arg *arg,
 				     bool interruptible)
 {
@@ -347,7 +347,7 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 	}
 
 	if (stream->buf != buf)
-		stream->buf = vmw_dmabuf_reference(buf);
+		stream->buf = vmw_bo_reference(buf);
 	stream->saved = *arg;
 	/* stream is no longer stopped/paused */
 	stream->paused = false;
@@ -466,7 +466,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data,
 	struct vmw_overlay *overlay = dev_priv->overlay_priv;
 	struct drm_vmw_control_stream_arg *arg =
 	    (struct drm_vmw_control_stream_arg *)data;
-	struct vmw_dma_buffer *buf;
+	struct vmw_buffer_object *buf;
 	struct vmw_resource *res;
 	int ret;
 
@@ -484,13 +484,13 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data,
 		goto out_unlock;
 	}
 
-	ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &buf, NULL);
+	ret = vmw_user_bo_lookup(tfile, arg->handle, &buf, NULL);
 	if (ret)
 		goto out_unlock;
 
 	ret = vmw_overlay_update_stream(dev_priv, buf, arg, true);
 
-	vmw_dmabuf_unreference(&buf);
+	vmw_bo_unreference(&buf);
 
 out_unlock:
 	mutex_unlock(&overlay->mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 6b3a942b18df..5aaf9ac65cba 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -35,9 +35,9 @@
 
 #define VMW_RES_EVICT_ERR_COUNT 10
 
-struct vmw_user_dma_buffer {
+struct vmw_user_buffer_object {
 	struct ttm_prime_object prime;
-	struct vmw_dma_buffer dma;
+	struct vmw_buffer_object vbo;
 };
 
 struct vmw_bo_user_rep {
@@ -45,17 +45,18 @@ struct vmw_bo_user_rep {
 	uint64_t map_handle;
 };
 
-static inline struct vmw_dma_buffer *
-vmw_dma_buffer(struct ttm_buffer_object *bo)
+static inline struct vmw_buffer_object *
+vmw_buffer_object(struct ttm_buffer_object *bo)
 {
-	return container_of(bo, struct vmw_dma_buffer, base);
+	return container_of(bo, struct vmw_buffer_object, base);
 }
 
-static inline struct vmw_user_dma_buffer *
-vmw_user_dma_buffer(struct ttm_buffer_object *bo)
+static inline struct vmw_user_buffer_object *
+vmw_user_buffer_object(struct ttm_buffer_object *bo)
 {
-	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
-	return container_of(vmw_bo, struct vmw_user_dma_buffer, dma);
+	struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
+
+	return container_of(vmw_bo, struct vmw_user_buffer_object, vbo);
 }
 
 struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
@@ -116,7 +117,7 @@ static void vmw_resource_release(struct kref *kref)
 		res->backup_dirty = false;
 		list_del_init(&res->mob_head);
 		ttm_bo_unreserve(bo);
-		vmw_dmabuf_unreference(&res->backup);
+		vmw_bo_unreference(&res->backup);
 	}
 
 	if (likely(res->hw_destroy != NULL)) {
@@ -287,7 +288,7 @@ out_bad_resource:
 }
 
 /**
- * Helper function that looks either a surface or dmabuf.
+ * Helper function that looks either a surface or bo.
  *
  * The pointer this pointed at by out_surf and out_buf needs to be null.
  */
@@ -295,7 +296,7 @@ int vmw_user_lookup_handle(struct vmw_private *dev_priv,
 			   struct ttm_object_file *tfile,
 			   uint32_t handle,
 			   struct vmw_surface **out_surf,
-			   struct vmw_dma_buffer **out_buf)
+			   struct vmw_buffer_object **out_buf)
 {
 	struct vmw_resource *res;
 	int ret;
@@ -311,7 +312,7 @@ int vmw_user_lookup_handle(struct vmw_private *dev_priv,
 	}
 
 	*out_surf = NULL;
-	ret = vmw_user_dmabuf_lookup(tfile, handle, out_buf, NULL);
+	ret = vmw_user_bo_lookup(tfile, handle, out_buf, NULL);
 	return ret;
 }
 
@@ -320,14 +321,14 @@ int vmw_user_lookup_handle(struct vmw_private *dev_priv,
  */
 
 /**
- * vmw_dmabuf_acc_size - Calculate the pinned memory usage of buffers
+ * vmw_bo_acc_size - Calculate the pinned memory usage of buffers
  *
  * @dev_priv: Pointer to a struct vmw_private identifying the device.
  * @size: The requested buffer size.
  * @user: Whether this is an ordinary dma buffer or a user dma buffer.
  */
-static size_t vmw_dmabuf_acc_size(struct vmw_private *dev_priv, size_t size,
-				  bool user)
+static size_t vmw_bo_acc_size(struct vmw_private *dev_priv, size_t size,
+			      bool user)
 {
 	static size_t struct_size, user_struct_size;
 	size_t num_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -337,9 +338,9 @@ static size_t vmw_dmabuf_acc_size(struct vmw_private *dev_priv, size_t size,
 		size_t backend_size = ttm_round_pot(vmw_tt_size);
 
 		struct_size = backend_size +
-			ttm_round_pot(sizeof(struct vmw_dma_buffer));
+			ttm_round_pot(sizeof(struct vmw_buffer_object));
 		user_struct_size = backend_size +
-			ttm_round_pot(sizeof(struct vmw_user_dma_buffer));
+			ttm_round_pot(sizeof(struct vmw_user_buffer_object));
 	}
 
 	if (dev_priv->map_mode == vmw_dma_alloc_coherent)
@@ -350,36 +351,36 @@ static size_t vmw_dmabuf_acc_size(struct vmw_private *dev_priv, size_t size,
 		page_array_size;
 }
 
-void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo)
+void vmw_bo_bo_free(struct ttm_buffer_object *bo)
 {
-	struct vmw_dma_buffer *vmw_bo = vmw_dma_buffer(bo);
+	struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
 
-	vmw_dma_buffer_unmap(vmw_bo);
+	vmw_buffer_object_unmap(vmw_bo);
 	kfree(vmw_bo);
 }
 
-static void vmw_user_dmabuf_destroy(struct ttm_buffer_object *bo)
+static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
 {
-	struct vmw_user_dma_buffer *vmw_user_bo = vmw_user_dma_buffer(bo);
+	struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
 
-	vmw_dma_buffer_unmap(&vmw_user_bo->dma);
+	vmw_buffer_object_unmap(&vmw_user_bo->vbo);
 	ttm_prime_object_kfree(vmw_user_bo, prime);
 }
 
-int vmw_dmabuf_init(struct vmw_private *dev_priv,
-		    struct vmw_dma_buffer *vmw_bo,
-		    size_t size, struct ttm_placement *placement,
-		    bool interruptible,
-		    void (*bo_free) (struct ttm_buffer_object *bo))
+int vmw_bo_init(struct vmw_private *dev_priv,
+		struct vmw_buffer_object *vmw_bo,
+		size_t size, struct ttm_placement *placement,
+		bool interruptible,
+		void (*bo_free)(struct ttm_buffer_object *bo))
 {
 	struct ttm_bo_device *bdev = &dev_priv->bdev;
 	size_t acc_size;
 	int ret;
-	bool user = (bo_free == &vmw_user_dmabuf_destroy);
+	bool user = (bo_free == &vmw_user_bo_destroy);
 
-	BUG_ON(!bo_free && (!user && (bo_free != vmw_dmabuf_bo_free)));
+	WARN_ON_ONCE(!bo_free && (!user && (bo_free != vmw_bo_bo_free)));
 
-	acc_size = vmw_dmabuf_acc_size(dev_priv, size, user);
+	acc_size = vmw_bo_acc_size(dev_priv, size, user);
 	memset(vmw_bo, 0, sizeof(*vmw_bo));
 
 	INIT_LIST_HEAD(&vmw_bo->res_list);
@@ -391,9 +392,9 @@ int vmw_dmabuf_init(struct vmw_private *dev_priv,
 	return ret;
 }
 
-static void vmw_user_dmabuf_release(struct ttm_base_object **p_base)
+static void vmw_user_bo_release(struct ttm_base_object **p_base)
 {
-	struct vmw_user_dma_buffer *vmw_user_bo;
+	struct vmw_user_buffer_object *vmw_user_bo;
 	struct ttm_base_object *base = *p_base;
 	struct ttm_buffer_object *bo;
 
@@ -402,21 +403,22 @@ static void vmw_user_dmabuf_release(struct ttm_base_object **p_base)
 	if (unlikely(base == NULL))
 		return;
 
-	vmw_user_bo = container_of(base, struct vmw_user_dma_buffer,
+	vmw_user_bo = container_of(base, struct vmw_user_buffer_object,
 				   prime.base);
-	bo = &vmw_user_bo->dma.base;
+	bo = &vmw_user_bo->vbo.base;
 	ttm_bo_unref(&bo);
 }
 
-static void vmw_user_dmabuf_ref_obj_release(struct ttm_base_object *base,
-					    enum ttm_ref_type ref_type)
+static void vmw_user_bo_ref_obj_release(struct ttm_base_object *base,
+					enum ttm_ref_type ref_type)
 {
-	struct vmw_user_dma_buffer *user_bo;
-	user_bo = container_of(base, struct vmw_user_dma_buffer, prime.base);
+	struct vmw_user_buffer_object *user_bo;
+
+	user_bo = container_of(base, struct vmw_user_buffer_object, prime.base);
 
 	switch (ref_type) {
 	case TTM_REF_SYNCCPU_WRITE:
-		ttm_bo_synccpu_write_release(&user_bo->dma.base);
+		ttm_bo_synccpu_write_release(&user_bo->vbo.base);
 		break;
 	default:
 		BUG();
@@ -424,7 +426,7 @@ static void vmw_user_dmabuf_ref_obj_release(struct ttm_base_object *base,
 }
 
 /**
- * vmw_user_dmabuf_alloc - Allocate a user dma buffer
+ * vmw_user_bo_alloc - Allocate a user dma buffer
  *
  * @dev_priv: Pointer to a struct device private.
  * @tfile: Pointer to a struct ttm_object_file on which to register the user
@@ -432,18 +434,18 @@ static void vmw_user_dmabuf_ref_obj_release(struct ttm_base_object *base,
  * @size: Size of the dma buffer.
  * @shareable: Boolean whether the buffer is shareable with other open files.
  * @handle: Pointer to where the handle value should be assigned.
- * @p_dma_buf: Pointer to where the refcounted struct vmw_dma_buffer pointer
+ * @p_vbo: Pointer to where the refcounted struct vmw_buffer_object pointer
  * should be assigned.
  */
-int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv,
-			  struct ttm_object_file *tfile,
-			  uint32_t size,
-			  bool shareable,
-			  uint32_t *handle,
-			  struct vmw_dma_buffer **p_dma_buf,
-			  struct ttm_base_object **p_base)
+int vmw_user_bo_alloc(struct vmw_private *dev_priv,
+		      struct ttm_object_file *tfile,
+		      uint32_t size,
+		      bool shareable,
+		      uint32_t *handle,
+		      struct vmw_buffer_object **p_vbo,
+		      struct ttm_base_object **p_base)
 {
-	struct vmw_user_dma_buffer *user_bo;
+	struct vmw_user_buffer_object *user_bo;
 	struct ttm_buffer_object *tmp;
 	int ret;
 
@@ -453,28 +455,28 @@ int vmw_user_dmabuf_alloc(struct vmw_private *dev_priv,
 		return -ENOMEM;
 	}
 
-	ret = vmw_dmabuf_init(dev_priv, &user_bo->dma, size,
-			      (dev_priv->has_mob) ?
-			      &vmw_sys_placement :
-			      &vmw_vram_sys_placement, true,
-			      &vmw_user_dmabuf_destroy);
+	ret = vmw_bo_init(dev_priv, &user_bo->vbo, size,
+			  (dev_priv->has_mob) ?
+			  &vmw_sys_placement :
+			  &vmw_vram_sys_placement, true,
+			  &vmw_user_bo_destroy);
 	if (unlikely(ret != 0))
 		return ret;
 
-	tmp = ttm_bo_reference(&user_bo->dma.base);
+	tmp = ttm_bo_reference(&user_bo->vbo.base);
 	ret = ttm_prime_object_init(tfile,
 				    size,
 				    &user_bo->prime,
 				    shareable,
 				    ttm_buffer_type,
-				    &vmw_user_dmabuf_release,
-				    &vmw_user_dmabuf_ref_obj_release);
+				    &vmw_user_bo_release,
+				    &vmw_user_bo_ref_obj_release);
 	if (unlikely(ret != 0)) {
 		ttm_bo_unref(&tmp);
 		goto out_no_base_object;
 	}
 
-	*p_dma_buf = &user_bo->dma;
+	*p_vbo = &user_bo->vbo;
 	if (p_base) {
 		*p_base = &user_bo->prime.base;
 		kref_get(&(*p_base)->refcount);
@@ -486,21 +488,21 @@ out_no_base_object:
 }
 
 /**
- * vmw_user_dmabuf_verify_access - verify access permissions on this
+ * vmw_user_bo_verify_access - verify access permissions on this
  * buffer object.
  *
  * @bo: Pointer to the buffer object being accessed
  * @tfile: Identifying the caller.
  */
-int vmw_user_dmabuf_verify_access(struct ttm_buffer_object *bo,
+int vmw_user_bo_verify_access(struct ttm_buffer_object *bo,
 				  struct ttm_object_file *tfile)
 {
-	struct vmw_user_dma_buffer *vmw_user_bo;
+	struct vmw_user_buffer_object *vmw_user_bo;
 
-	if (unlikely(bo->destroy != vmw_user_dmabuf_destroy))
+	if (unlikely(bo->destroy != vmw_user_bo_destroy))
 		return -EPERM;
 
-	vmw_user_bo = vmw_user_dma_buffer(bo);
+	vmw_user_bo = vmw_user_buffer_object(bo);
 
 	/* Check that the caller has opened the object. */
 	if (likely(ttm_ref_object_exists(tfile, &vmw_user_bo->prime.base)))
@@ -511,7 +513,7 @@ int vmw_user_dmabuf_verify_access(struct ttm_buffer_object *bo,
 }
 
 /**
- * vmw_user_dmabuf_synccpu_grab - Grab a struct vmw_user_dma_buffer for cpu
+ * vmw_user_bo_synccpu_grab - Grab a struct vmw_user_buffer_object for cpu
  * access, idling previous GPU operations on the buffer and optionally
  * blocking it for further command submissions.
  *
@@ -521,11 +523,11 @@ int vmw_user_dmabuf_verify_access(struct ttm_buffer_object *bo,
  *
  * A blocking grab will be automatically released when @tfile is closed.
  */
-static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
+static int vmw_user_bo_synccpu_grab(struct vmw_user_buffer_object *user_bo,
 					struct ttm_object_file *tfile,
 					uint32_t flags)
 {
-	struct ttm_buffer_object *bo = &user_bo->dma.base;
+	struct ttm_buffer_object *bo = &user_bo->vbo.base;
 	bool existed;
 	int ret;
 
@@ -550,20 +552,20 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
 	ret = ttm_ref_object_add(tfile, &user_bo->prime.base,
 				 TTM_REF_SYNCCPU_WRITE, &existed, false);
 	if (ret != 0 || existed)
-		ttm_bo_synccpu_write_release(&user_bo->dma.base);
+		ttm_bo_synccpu_write_release(&user_bo->vbo.base);
 
 	return ret;
 }
 
 /**
- * vmw_user_dmabuf_synccpu_release - Release a previous grab for CPU access,
+ * vmw_user_bo_synccpu_release - Release a previous grab for CPU access,
  * and unblock command submission on the buffer if blocked.
  *
  * @handle: Handle identifying the buffer object.
  * @tfile: Identifying the caller.
  * @flags: Flags indicating the type of release.
  */
-static int vmw_user_dmabuf_synccpu_release(uint32_t handle,
+static int vmw_user_bo_synccpu_release(uint32_t handle,
 					   struct ttm_object_file *tfile,
 					   uint32_t flags)
 {
@@ -575,7 +577,7 @@ static int vmw_user_dmabuf_synccpu_release(uint32_t handle,
 }
 
 /**
- * vmw_user_dmabuf_synccpu_release - ioctl function implementing the synccpu
+ * vmw_user_bo_synccpu_release - ioctl function implementing the synccpu
  * functionality.
  *
  * @dev: Identifies the drm device.
@@ -585,13 +587,13 @@ static int vmw_user_dmabuf_synccpu_release(uint32_t handle,
  * This function checks the ioctl arguments for validity and calls the
  * relevant synccpu functions.
  */
-int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data,
+int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv)
 {
 	struct drm_vmw_synccpu_arg *arg =
 		(struct drm_vmw_synccpu_arg *) data;
-	struct vmw_dma_buffer *dma_buf;
-	struct vmw_user_dma_buffer *user_bo;
+	struct vmw_buffer_object *vbo;
+	struct vmw_user_buffer_object *user_bo;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct ttm_base_object *buffer_base;
 	int ret;
@@ -606,15 +608,15 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data,
 
 	switch (arg->op) {
 	case drm_vmw_synccpu_grab:
-		ret = vmw_user_dmabuf_lookup(tfile, arg->handle, &dma_buf,
+		ret = vmw_user_bo_lookup(tfile, arg->handle, &vbo,
 					     &buffer_base);
 		if (unlikely(ret != 0))
 			return ret;
 
-		user_bo = container_of(dma_buf, struct vmw_user_dma_buffer,
-				       dma);
-		ret = vmw_user_dmabuf_synccpu_grab(user_bo, tfile, arg->flags);
-		vmw_dmabuf_unreference(&dma_buf);
+		user_bo = container_of(vbo, struct vmw_user_buffer_object,
+				       vbo);
+		ret = vmw_user_bo_synccpu_grab(user_bo, tfile, arg->flags);
+		vmw_bo_unreference(&vbo);
 		ttm_base_object_unref(&buffer_base);
 		if (unlikely(ret != 0 && ret != -ERESTARTSYS &&
 			     ret != -EBUSY)) {
@@ -624,8 +626,8 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data,
 		}
 		break;
 	case drm_vmw_synccpu_release:
-		ret = vmw_user_dmabuf_synccpu_release(arg->handle, tfile,
-						      arg->flags);
+		ret = vmw_user_bo_synccpu_release(arg->handle, tfile,
+						  arg->flags);
 		if (unlikely(ret != 0)) {
 			DRM_ERROR("Failed synccpu release on handle 0x%08x.\n",
 				  (unsigned int) arg->handle);
@@ -640,15 +642,15 @@ int vmw_user_dmabuf_synccpu_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv)
+int vmw_bo_alloc_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
 	union drm_vmw_alloc_dmabuf_arg *arg =
 	    (union drm_vmw_alloc_dmabuf_arg *)data;
 	struct drm_vmw_alloc_dmabuf_req *req = &arg->req;
 	struct drm_vmw_dmabuf_rep *rep = &arg->rep;
-	struct vmw_dma_buffer *dma_buf;
+	struct vmw_buffer_object *vbo;
 	uint32_t handle;
 	int ret;
 
@@ -656,27 +658,27 @@ int vmw_dmabuf_alloc_ioctl(struct drm_device *dev, void *data,
 	if (unlikely(ret != 0))
 		return ret;
 
-	ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile,
-				    req->size, false, &handle, &dma_buf,
-				    NULL);
+	ret = vmw_user_bo_alloc(dev_priv, vmw_fpriv(file_priv)->tfile,
+				req->size, false, &handle, &vbo,
+				NULL);
 	if (unlikely(ret != 0))
-		goto out_no_dmabuf;
+		goto out_no_bo;
 
 	rep->handle = handle;
-	rep->map_handle = drm_vma_node_offset_addr(&dma_buf->base.vma_node);
+	rep->map_handle = drm_vma_node_offset_addr(&vbo->base.vma_node);
 	rep->cur_gmr_id = handle;
 	rep->cur_gmr_offset = 0;
 
-	vmw_dmabuf_unreference(&dma_buf);
+	vmw_bo_unreference(&vbo);
 
-out_no_dmabuf:
+out_no_bo:
 	ttm_read_unlock(&dev_priv->reservation_sem);
 
 	return ret;
 }
 
-int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv)
+int vmw_bo_unref_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
 {
 	struct drm_vmw_unref_dmabuf_arg *arg =
 	    (struct drm_vmw_unref_dmabuf_arg *)data;
@@ -686,11 +688,11 @@ int vmw_dmabuf_unref_ioctl(struct drm_device *dev, void *data,
 					 TTM_REF_USAGE);
 }
 
-int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
-			   uint32_t handle, struct vmw_dma_buffer **out,
+int vmw_user_bo_lookup(struct ttm_object_file *tfile,
+			   uint32_t handle, struct vmw_buffer_object **out,
 			   struct ttm_base_object **p_base)
 {
-	struct vmw_user_dma_buffer *vmw_user_bo;
+	struct vmw_user_buffer_object *vmw_user_bo;
 	struct ttm_base_object *base;
 
 	base = ttm_base_object_lookup(tfile, handle);
@@ -707,28 +709,28 @@ int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
 		return -EINVAL;
 	}
 
-	vmw_user_bo = container_of(base, struct vmw_user_dma_buffer,
+	vmw_user_bo = container_of(base, struct vmw_user_buffer_object,
 				   prime.base);
-	(void)ttm_bo_reference(&vmw_user_bo->dma.base);
+	(void)ttm_bo_reference(&vmw_user_bo->vbo.base);
 	if (p_base)
 		*p_base = base;
 	else
 		ttm_base_object_unref(&base);
-	*out = &vmw_user_bo->dma;
+	*out = &vmw_user_bo->vbo;
 
 	return 0;
 }
 
-int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
-			      struct vmw_dma_buffer *dma_buf,
+int vmw_user_bo_reference(struct ttm_object_file *tfile,
+			      struct vmw_buffer_object *vbo,
 			      uint32_t *handle)
 {
-	struct vmw_user_dma_buffer *user_bo;
+	struct vmw_user_buffer_object *user_bo;
 
-	if (dma_buf->base.destroy != vmw_user_dmabuf_destroy)
+	if (vbo->base.destroy != vmw_user_bo_destroy)
 		return -EINVAL;
 
-	user_bo = container_of(dma_buf, struct vmw_user_dma_buffer, dma);
+	user_bo = container_of(vbo, struct vmw_user_buffer_object, vbo);
 
 	*handle = user_bo->prime.base.hash.key;
 	return ttm_ref_object_add(tfile, &user_bo->prime.base,
@@ -743,7 +745,7 @@ int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
  * @args: Pointer to a struct drm_mode_create_dumb structure
  *
  * This is a driver callback for the core drm create_dumb functionality.
- * Note that this is very similar to the vmw_dmabuf_alloc ioctl, except
+ * Note that this is very similar to the vmw_bo_alloc ioctl, except
  * that the arguments have a different format.
  */
 int vmw_dumb_create(struct drm_file *file_priv,
@@ -751,7 +753,7 @@ int vmw_dumb_create(struct drm_file *file_priv,
 		    struct drm_mode_create_dumb *args)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_dma_buffer *dma_buf;
+	struct vmw_buffer_object *vbo;
 	int ret;
 
 	args->pitch = args->width * ((args->bpp + 7) / 8);
@@ -761,14 +763,14 @@ int vmw_dumb_create(struct drm_file *file_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
-	ret = vmw_user_dmabuf_alloc(dev_priv, vmw_fpriv(file_priv)->tfile,
+	ret = vmw_user_bo_alloc(dev_priv, vmw_fpriv(file_priv)->tfile,
 				    args->size, false, &args->handle,
-				    &dma_buf, NULL);
+				    &vbo, NULL);
 	if (unlikely(ret != 0))
-		goto out_no_dmabuf;
+		goto out_no_bo;
 
-	vmw_dmabuf_unreference(&dma_buf);
-out_no_dmabuf:
+	vmw_bo_unreference(&vbo);
+out_no_bo:
 	ttm_read_unlock(&dev_priv->reservation_sem);
 	return ret;
 }
@@ -788,15 +790,15 @@ int vmw_dumb_map_offset(struct drm_file *file_priv,
 			uint64_t *offset)
 {
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-	struct vmw_dma_buffer *out_buf;
+	struct vmw_buffer_object *out_buf;
 	int ret;
 
-	ret = vmw_user_dmabuf_lookup(tfile, handle, &out_buf, NULL);
+	ret = vmw_user_bo_lookup(tfile, handle, &out_buf, NULL);
 	if (ret != 0)
 		return -EINVAL;
 
 	*offset = drm_vma_node_offset_addr(&out_buf->base.vma_node);
-	vmw_dmabuf_unreference(&out_buf);
+	vmw_bo_unreference(&out_buf);
 	return 0;
 }
 
@@ -829,7 +831,7 @@ static int vmw_resource_buf_alloc(struct vmw_resource *res,
 {
 	unsigned long size =
 		(res->backup_size + PAGE_SIZE - 1) & PAGE_MASK;
-	struct vmw_dma_buffer *backup;
+	struct vmw_buffer_object *backup;
 	int ret;
 
 	if (likely(res->backup)) {
@@ -841,16 +843,16 @@ static int vmw_resource_buf_alloc(struct vmw_resource *res,
 	if (unlikely(!backup))
 		return -ENOMEM;
 
-	ret = vmw_dmabuf_init(res->dev_priv, backup, res->backup_size,
+	ret = vmw_bo_init(res->dev_priv, backup, res->backup_size,
 			      res->func->backup_placement,
 			      interruptible,
-			      &vmw_dmabuf_bo_free);
+			      &vmw_bo_bo_free);
 	if (unlikely(ret != 0))
-		goto out_no_dmabuf;
+		goto out_no_bo;
 
 	res->backup = backup;
 
-out_no_dmabuf:
+out_no_bo:
 	return ret;
 }
 
@@ -919,7 +921,7 @@ out_bind_failed:
  */
 void vmw_resource_unreserve(struct vmw_resource *res,
 			    bool switch_backup,
-			    struct vmw_dma_buffer *new_backup,
+			    struct vmw_buffer_object *new_backup,
 			    unsigned long new_backup_offset)
 {
 	struct vmw_private *dev_priv = res->dev_priv;
@@ -931,11 +933,11 @@ void vmw_resource_unreserve(struct vmw_resource *res,
 		if (res->backup) {
 			lockdep_assert_held(&res->backup->base.resv->lock.base);
 			list_del_init(&res->mob_head);
-			vmw_dmabuf_unreference(&res->backup);
+			vmw_bo_unreference(&res->backup);
 		}
 
 		if (new_backup) {
-			res->backup = vmw_dmabuf_reference(new_backup);
+			res->backup = vmw_bo_reference(new_backup);
 			lockdep_assert_held(&new_backup->base.resv->lock.base);
 			list_add_tail(&res->mob_head, &new_backup->res_list);
 		} else {
@@ -1007,7 +1009,7 @@ out_no_validate:
 out_no_reserve:
 	ttm_bo_unref(&val_buf->bo);
 	if (backup_dirty)
-		vmw_dmabuf_unreference(&res->backup);
+		vmw_bo_unreference(&res->backup);
 
 	return ret;
 }
@@ -1171,7 +1173,7 @@ int vmw_resource_validate(struct vmw_resource *res)
 		goto out_no_validate;
 	else if (!res->func->needs_backup && res->backup) {
 		list_del_init(&res->mob_head);
-		vmw_dmabuf_unreference(&res->backup);
+		vmw_bo_unreference(&res->backup);
 	}
 
 	return 0;
@@ -1230,22 +1232,22 @@ void vmw_fence_single_bo(struct ttm_buffer_object *bo,
 void vmw_resource_move_notify(struct ttm_buffer_object *bo,
 			      struct ttm_mem_reg *mem)
 {
-	struct vmw_dma_buffer *dma_buf;
+	struct vmw_buffer_object *vbo;
 
 	if (mem == NULL)
 		return;
 
-	if (bo->destroy != vmw_dmabuf_bo_free &&
-	    bo->destroy != vmw_user_dmabuf_destroy)
+	if (bo->destroy != vmw_bo_bo_free &&
+	    bo->destroy != vmw_user_bo_destroy)
 		return;
 
-	dma_buf = container_of(bo, struct vmw_dma_buffer, base);
+	vbo = container_of(bo, struct vmw_buffer_object, base);
 
 	/*
 	 * Kill any cached kernel maps before move. An optimization could
 	 * be to do this iff source or destination memory type is VRAM.
 	 */
-	vmw_dma_buffer_unmap(dma_buf);
+	vmw_buffer_object_unmap(vbo);
 
 	if (mem->mem_type != VMW_PL_MOB) {
 		struct vmw_resource *res, *n;
@@ -1254,7 +1256,7 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo,
 		val_buf.bo = bo;
 		val_buf.shared = false;
 
-		list_for_each_entry_safe(res, n, &dma_buf->res_list, mob_head) {
+		list_for_each_entry_safe(res, n, &vbo->res_list, mob_head) {
 
 			if (unlikely(res->func->unbind == NULL))
 				continue;
@@ -1277,12 +1279,12 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo,
  */
 void vmw_resource_swap_notify(struct ttm_buffer_object *bo)
 {
-	if (bo->destroy != vmw_dmabuf_bo_free &&
-	    bo->destroy != vmw_user_dmabuf_destroy)
+	if (bo->destroy != vmw_bo_bo_free &&
+	    bo->destroy != vmw_user_bo_destroy)
 		return;
 
 	/* Kill any cached kernel maps before swapout */
-	vmw_dma_buffer_unmap(vmw_dma_buffer(bo));
+	vmw_buffer_object_unmap(vmw_buffer_object(bo));
 }
 
 
@@ -1294,7 +1296,7 @@ void vmw_resource_swap_notify(struct ttm_buffer_object *bo)
  * Read back cached states from the device if they exist.  This function
  * assumings binding_mutex is held.
  */
-int vmw_query_readback_all(struct vmw_dma_buffer *dx_query_mob)
+int vmw_query_readback_all(struct vmw_buffer_object *dx_query_mob)
 {
 	struct vmw_resource *dx_query_ctx;
 	struct vmw_private *dev_priv;
@@ -1344,7 +1346,7 @@ int vmw_query_readback_all(struct vmw_dma_buffer *dx_query_mob)
 void vmw_query_move_notify(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *mem)
 {
-	struct vmw_dma_buffer *dx_query_mob;
+	struct vmw_buffer_object *dx_query_mob;
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct vmw_private *dev_priv;
 
@@ -1353,7 +1355,7 @@ void vmw_query_move_notify(struct ttm_buffer_object *bo,
 
 	mutex_lock(&dev_priv->binding_mutex);
 
-	dx_query_mob = container_of(bo, struct vmw_dma_buffer, base);
+	dx_query_mob = container_of(bo, struct vmw_buffer_object, base);
 	if (mem == NULL || !dx_query_mob || !dx_query_mob->dx_query_ctx) {
 		mutex_unlock(&dev_priv->binding_mutex);
 		return;
@@ -1481,7 +1483,7 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible)
 		goto out_no_reserve;
 
 	if (res->pin_count == 0) {
-		struct vmw_dma_buffer *vbo = NULL;
+		struct vmw_buffer_object *vbo = NULL;
 
 		if (res->backup) {
 			vbo = res->backup;
@@ -1539,7 +1541,7 @@ void vmw_resource_unpin(struct vmw_resource *res)
 
 	WARN_ON(res->pin_count == 0);
 	if (--res->pin_count == 0 && res->backup) {
-		struct vmw_dma_buffer *vbo = res->backup;
+		struct vmw_buffer_object *vbo = res->backup;
 
 		(void) ttm_bo_reserve(&vbo->base, false, false, NULL);
 		vmw_bo_pin_reserved(vbo, false);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index 9798640cbfcd..74dfd4621b7e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -66,7 +66,7 @@ struct vmw_kms_sou_readback_blit {
 	SVGAFifoCmdBlitScreenToGMRFB body;
 };
 
-struct vmw_kms_sou_dmabuf_blit {
+struct vmw_kms_sou_bo_blit {
 	uint32 header;
 	SVGAFifoCmdBlitGMRFBToScreen body;
 };
@@ -83,7 +83,7 @@ struct vmw_screen_object_unit {
 	struct vmw_display_unit base;
 
 	unsigned long buffer_size; /**< Size of allocated buffer */
-	struct vmw_dma_buffer *buffer; /**< Backing store buffer */
+	struct vmw_buffer_object *buffer; /**< Backing store buffer */
 
 	bool defined;
 };
@@ -240,8 +240,8 @@ static void vmw_sou_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	}
 
 	if (vfb) {
-		sou->buffer = vps->dmabuf;
-		sou->buffer_size = vps->dmabuf_size;
+		sou->buffer = vps->bo;
+		sou->buffer_size = vps->bo_size;
 
 		ret = vmw_sou_fifo_create(dev_priv, sou, crtc->x, crtc->y,
 					  &crtc->mode);
@@ -408,10 +408,10 @@ vmw_sou_primary_plane_cleanup_fb(struct drm_plane *plane,
 	struct drm_crtc *crtc = plane->state->crtc ?
 		plane->state->crtc : old_state->crtc;
 
-	if (vps->dmabuf)
-		vmw_dmabuf_unpin(vmw_priv(crtc->dev), vps->dmabuf, false);
-	vmw_dmabuf_unreference(&vps->dmabuf);
-	vps->dmabuf_size = 0;
+	if (vps->bo)
+		vmw_bo_unpin(vmw_priv(crtc->dev), vps->bo, false);
+	vmw_bo_unreference(&vps->bo);
+	vps->bo_size = 0;
 
 	vmw_du_plane_cleanup_fb(plane, old_state);
 }
@@ -440,8 +440,8 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
 
 
 	if (!new_fb) {
-		vmw_dmabuf_unreference(&vps->dmabuf);
-		vps->dmabuf_size = 0;
+		vmw_bo_unreference(&vps->bo);
+		vps->bo_size = 0;
 
 		return 0;
 	}
@@ -449,22 +449,22 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
 	size = new_state->crtc_w * new_state->crtc_h * 4;
 	dev_priv = vmw_priv(crtc->dev);
 
-	if (vps->dmabuf) {
-		if (vps->dmabuf_size == size) {
+	if (vps->bo) {
+		if (vps->bo_size == size) {
 			/*
 			 * Note that this might temporarily up the pin-count
 			 * to 2, until cleanup_fb() is called.
 			 */
-			return vmw_dmabuf_pin_in_vram(dev_priv, vps->dmabuf,
+			return vmw_bo_pin_in_vram(dev_priv, vps->bo,
 						      true);
 		}
 
-		vmw_dmabuf_unreference(&vps->dmabuf);
-		vps->dmabuf_size = 0;
+		vmw_bo_unreference(&vps->bo);
+		vps->bo_size = 0;
 	}
 
-	vps->dmabuf = kzalloc(sizeof(*vps->dmabuf), GFP_KERNEL);
-	if (!vps->dmabuf)
+	vps->bo = kzalloc(sizeof(*vps->bo), GFP_KERNEL);
+	if (!vps->bo)
 		return -ENOMEM;
 
 	vmw_svga_enable(dev_priv);
@@ -473,22 +473,22 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane,
 	 * resume the overlays, this is preferred to failing to alloc.
 	 */
 	vmw_overlay_pause_all(dev_priv);
-	ret = vmw_dmabuf_init(dev_priv, vps->dmabuf, size,
+	ret = vmw_bo_init(dev_priv, vps->bo, size,
 			      &vmw_vram_ne_placement,
-			      false, &vmw_dmabuf_bo_free);
+			      false, &vmw_bo_bo_free);
 	vmw_overlay_resume_all(dev_priv);
 	if (ret) {
-		vps->dmabuf = NULL; /* vmw_dmabuf_init frees on error */
+		vps->bo = NULL; /* vmw_bo_init frees on error */
 		return ret;
 	}
 
-	vps->dmabuf_size = size;
+	vps->bo_size = size;
 
 	/*
 	 * TTM already thinks the buffer is pinned, but make sure the
 	 * pin_count is upped.
 	 */
-	return vmw_dmabuf_pin_in_vram(dev_priv, vps->dmabuf, true);
+	return vmw_bo_pin_in_vram(dev_priv, vps->bo, true);
 }
 
 
@@ -512,10 +512,10 @@ vmw_sou_primary_plane_atomic_update(struct drm_plane *plane,
 		vclips.w = crtc->mode.hdisplay;
 		vclips.h = crtc->mode.vdisplay;
 
-		if (vfb->dmabuf)
-			ret = vmw_kms_sou_do_dmabuf_dirty(dev_priv, vfb, NULL,
-							  &vclips, 1, 1, true,
-							  &fence, crtc);
+		if (vfb->bo)
+			ret = vmw_kms_sou_do_bo_dirty(dev_priv, vfb, NULL,
+						      &vclips, 1, 1, true,
+						      &fence, crtc);
 		else
 			ret = vmw_kms_sou_do_surface_dirty(dev_priv, vfb, NULL,
 							   &vclips, NULL, 0, 0,
@@ -775,11 +775,11 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
 	return 0;
 }
 
-static int do_dmabuf_define_gmrfb(struct vmw_private *dev_priv,
+static int do_bo_define_gmrfb(struct vmw_private *dev_priv,
 				  struct vmw_framebuffer *framebuffer)
 {
-	struct vmw_dma_buffer *buf =
-		container_of(framebuffer, struct vmw_framebuffer_dmabuf,
+	struct vmw_buffer_object *buf =
+		container_of(framebuffer, struct vmw_framebuffer_bo,
 			     base)->buffer;
 	int depth = framebuffer->base.format->depth;
 	struct {
@@ -970,13 +970,13 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv,
 }
 
 /**
- * vmw_sou_dmabuf_fifo_commit - Callback to submit a set of readback clips.
+ * vmw_sou_bo_fifo_commit - Callback to submit a set of readback clips.
  *
  * @dirty: The closure structure.
  *
  * Commits a previously built command buffer of readback clips.
  */
-static void vmw_sou_dmabuf_fifo_commit(struct vmw_kms_dirty *dirty)
+static void vmw_sou_bo_fifo_commit(struct vmw_kms_dirty *dirty)
 {
 	if (!dirty->num_hits) {
 		vmw_fifo_commit(dirty->dev_priv, 0);
@@ -984,20 +984,20 @@ static void vmw_sou_dmabuf_fifo_commit(struct vmw_kms_dirty *dirty)
 	}
 
 	vmw_fifo_commit(dirty->dev_priv,
-			sizeof(struct vmw_kms_sou_dmabuf_blit) *
+			sizeof(struct vmw_kms_sou_bo_blit) *
 			dirty->num_hits);
 }
 
 /**
- * vmw_sou_dmabuf_clip - Callback to encode a readback cliprect.
+ * vmw_sou_bo_clip - Callback to encode a readback cliprect.
  *
  * @dirty: The closure structure
  *
  * Encodes a BLIT_GMRFB_TO_SCREEN cliprect.
  */
-static void vmw_sou_dmabuf_clip(struct vmw_kms_dirty *dirty)
+static void vmw_sou_bo_clip(struct vmw_kms_dirty *dirty)
 {
-	struct vmw_kms_sou_dmabuf_blit *blit = dirty->cmd;
+	struct vmw_kms_sou_bo_blit *blit = dirty->cmd;
 
 	blit += dirty->num_hits;
 	blit->header = SVGA_CMD_BLIT_GMRFB_TO_SCREEN;
@@ -1012,10 +1012,10 @@ static void vmw_sou_dmabuf_clip(struct vmw_kms_dirty *dirty)
 }
 
 /**
- * vmw_kms_do_dmabuf_dirty - Dirty part of a dma-buffer backed framebuffer
+ * vmw_kms_do_bo_dirty - Dirty part of a buffer-object backed framebuffer
  *
  * @dev_priv: Pointer to the device private structure.
- * @framebuffer: Pointer to the dma-buffer backed framebuffer.
+ * @framebuffer: Pointer to the buffer-object backed framebuffer.
  * @clips: Array of clip rects.
  * @vclips: Alternate array of clip rects. Either @clips or @vclips must
  * be NULL.
@@ -1025,12 +1025,12 @@ static void vmw_sou_dmabuf_clip(struct vmw_kms_dirty *dirty)
  * @out_fence: If non-NULL, will return a ref-counted pointer to a
  * struct vmw_fence_obj. The returned fence pointer may be NULL in which
  * case the device has already synchronized.
- * @crtc: If crtc is passed, perform dmabuf dirty on that crtc only.
+ * @crtc: If crtc is passed, perform bo dirty on that crtc only.
  *
  * Returns 0 on success, negative error code on failure. -ERESTARTSYS if
  * interrupted.
  */
-int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
+int vmw_kms_sou_do_bo_dirty(struct vmw_private *dev_priv,
 				struct vmw_framebuffer *framebuffer,
 				struct drm_clip_rect *clips,
 				struct drm_vmw_rect *vclips,
@@ -1039,8 +1039,8 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 				struct vmw_fence_obj **out_fence,
 				struct drm_crtc *crtc)
 {
-	struct vmw_dma_buffer *buf =
-		container_of(framebuffer, struct vmw_framebuffer_dmabuf,
+	struct vmw_buffer_object *buf =
+		container_of(framebuffer, struct vmw_framebuffer_bo,
 			     base)->buffer;
 	struct vmw_kms_dirty dirty;
 	int ret;
@@ -1050,14 +1050,14 @@ int vmw_kms_sou_do_dmabuf_dirty(struct vmw_private *dev_priv,
 	if (ret)
 		return ret;
 
-	ret = do_dmabuf_define_gmrfb(dev_priv, framebuffer);
+	ret = do_bo_define_gmrfb(dev_priv, framebuffer);
 	if (unlikely(ret != 0))
 		goto out_revert;
 
 	dirty.crtc = crtc;
-	dirty.fifo_commit = vmw_sou_dmabuf_fifo_commit;
-	dirty.clip = vmw_sou_dmabuf_clip;
-	dirty.fifo_reserve_size = sizeof(struct vmw_kms_sou_dmabuf_blit) *
+	dirty.fifo_commit = vmw_sou_bo_fifo_commit;
+	dirty.clip = vmw_sou_bo_clip;
+	dirty.fifo_reserve_size = sizeof(struct vmw_kms_sou_bo_blit) *
 		num_clips;
 	ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips,
 				   0, 0, num_clips, increment, &dirty);
@@ -1116,12 +1116,12 @@ static void vmw_sou_readback_clip(struct vmw_kms_dirty *dirty)
 
 /**
  * vmw_kms_sou_readback - Perform a readback from the screen object system to
- * a dma-buffer backed framebuffer.
+ * a buffer-object backed framebuffer.
  *
  * @dev_priv: Pointer to the device private structure.
  * @file_priv: Pointer to a struct drm_file identifying the caller.
  * Must be set to NULL if @user_fence_rep is NULL.
- * @vfb: Pointer to the dma-buffer backed framebuffer.
+ * @vfb: Pointer to the buffer-object backed framebuffer.
  * @user_fence_rep: User-space provided structure for fence information.
  * Must be set to non-NULL if @file_priv is non-NULL.
  * @vclips: Array of clip rects.
@@ -1139,8 +1139,8 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv,
 			 uint32_t num_clips,
 			 struct drm_crtc *crtc)
 {
-	struct vmw_dma_buffer *buf =
-		container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer;
+	struct vmw_buffer_object *buf =
+		container_of(vfb, struct vmw_framebuffer_bo, base)->buffer;
 	struct vmw_kms_dirty dirty;
 	int ret;
 
@@ -1149,7 +1149,7 @@ int vmw_kms_sou_readback(struct vmw_private *dev_priv,
 	if (ret)
 		return ret;
 
-	ret = do_dmabuf_define_gmrfb(dev_priv, vfb);
+	ret = do_bo_define_gmrfb(dev_priv, vfb);
 	if (unlikely(ret != 0))
 		goto out_revert;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index 73b8e9a16368..f6c939f3ff5e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -159,7 +159,7 @@ static int vmw_gb_shader_init(struct vmw_private *dev_priv,
 			      SVGA3dShaderType type,
 			      uint8_t num_input_sig,
 			      uint8_t num_output_sig,
-			      struct vmw_dma_buffer *byte_code,
+			      struct vmw_buffer_object *byte_code,
 			      void (*res_free) (struct vmw_resource *res))
 {
 	struct vmw_shader *shader = vmw_res_to_shader(res);
@@ -178,7 +178,7 @@ static int vmw_gb_shader_init(struct vmw_private *dev_priv,
 
 	res->backup_size = size;
 	if (byte_code) {
-		res->backup = vmw_dmabuf_reference(byte_code);
+		res->backup = vmw_bo_reference(byte_code);
 		res->backup_offset = offset;
 	}
 	shader->size = size;
@@ -723,7 +723,7 @@ int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data,
 }
 
 static int vmw_user_shader_alloc(struct vmw_private *dev_priv,
-				 struct vmw_dma_buffer *buffer,
+				 struct vmw_buffer_object *buffer,
 				 size_t shader_size,
 				 size_t offset,
 				 SVGA3dShaderType shader_type,
@@ -801,7 +801,7 @@ out:
 
 
 static struct vmw_resource *vmw_shader_alloc(struct vmw_private *dev_priv,
-					     struct vmw_dma_buffer *buffer,
+					     struct vmw_buffer_object *buffer,
 					     size_t shader_size,
 					     size_t offset,
 					     SVGA3dShaderType shader_type)
@@ -862,12 +862,12 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv,
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-	struct vmw_dma_buffer *buffer = NULL;
+	struct vmw_buffer_object *buffer = NULL;
 	SVGA3dShaderType shader_type;
 	int ret;
 
 	if (buffer_handle != SVGA3D_INVALID_ID) {
-		ret = vmw_user_dmabuf_lookup(tfile, buffer_handle,
+		ret = vmw_user_bo_lookup(tfile, buffer_handle,
 					     &buffer, NULL);
 		if (unlikely(ret != 0)) {
 			DRM_ERROR("Could not find buffer for shader "
@@ -906,7 +906,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv,
 
 	ttm_read_unlock(&dev_priv->reservation_sem);
 out_bad_arg:
-	vmw_dmabuf_unreference(&buffer);
+	vmw_bo_unreference(&buffer);
 	return ret;
 }
 
@@ -983,7 +983,7 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv,
 			  struct list_head *list)
 {
 	struct ttm_operation_ctx ctx = { false, true };
-	struct vmw_dma_buffer *buf;
+	struct vmw_buffer_object *buf;
 	struct ttm_bo_kmap_obj map;
 	bool is_iomem;
 	int ret;
@@ -997,8 +997,8 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv,
 	if (unlikely(!buf))
 		return -ENOMEM;
 
-	ret = vmw_dmabuf_init(dev_priv, buf, size, &vmw_sys_ne_placement,
-			      true, vmw_dmabuf_bo_free);
+	ret = vmw_bo_init(dev_priv, buf, size, &vmw_sys_ne_placement,
+			      true, vmw_bo_bo_free);
 	if (unlikely(ret != 0))
 		goto out;
 
@@ -1031,7 +1031,7 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv,
 				 res, list);
 	vmw_resource_unreference(&res);
 no_reserve:
-	vmw_dmabuf_unreference(&buf);
+	vmw_bo_unreference(&buf);
 out:
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 152e96cb1c01..537df9034008 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -44,7 +44,7 @@
 enum stdu_content_type {
 	SAME_AS_DISPLAY = 0,
 	SEPARATE_SURFACE,
-	SEPARATE_DMA
+	SEPARATE_BO
 };
 
 /**
@@ -58,7 +58,7 @@ enum stdu_content_type {
  * @bottom: Bottom side of bounding box.
  * @fb_left: Left side of the framebuffer/content bounding box
  * @fb_top: Top of the framebuffer/content bounding box
- * @buf: DMA buffer when DMA-ing between buffer and screen targets.
+ * @buf: buffer object when DMA-ing between buffer and screen targets.
  * @sid: Surface ID when copying between surface and screen targets.
  */
 struct vmw_stdu_dirty {
@@ -68,7 +68,7 @@ struct vmw_stdu_dirty {
 	s32 fb_left, fb_top;
 	u32 pitch;
 	union {
-		struct vmw_dma_buffer *buf;
+		struct vmw_buffer_object *buf;
 		u32 sid;
 	};
 };
@@ -508,14 +508,14 @@ static int vmw_stdu_crtc_page_flip(struct drm_crtc *crtc,
 
 
 /**
- * vmw_stdu_dmabuf_clip - Callback to encode a suface DMA command cliprect
+ * vmw_stdu_bo_clip - Callback to encode a suface DMA command cliprect
  *
  * @dirty: The closure structure.
  *
  * Encodes a surface DMA command cliprect and updates the bounding box
  * for the DMA.
  */
-static void vmw_stdu_dmabuf_clip(struct vmw_kms_dirty *dirty)
+static void vmw_stdu_bo_clip(struct vmw_kms_dirty *dirty)
 {
 	struct vmw_stdu_dirty *ddirty =
 		container_of(dirty, struct vmw_stdu_dirty, base);
@@ -543,14 +543,14 @@ static void vmw_stdu_dmabuf_clip(struct vmw_kms_dirty *dirty)
 }
 
 /**
- * vmw_stdu_dmabuf_fifo_commit - Callback to fill in and submit a DMA command.
+ * vmw_stdu_bo_fifo_commit - Callback to fill in and submit a DMA command.
  *
  * @dirty: The closure structure.
  *
  * Fills in the missing fields in a DMA command, and optionally encodes
  * a screen target update command, depending on transfer direction.
  */
-static void vmw_stdu_dmabuf_fifo_commit(struct vmw_kms_dirty *dirty)
+static void vmw_stdu_bo_fifo_commit(struct vmw_kms_dirty *dirty)
 {
 	struct vmw_stdu_dirty *ddirty =
 		container_of(dirty, struct vmw_stdu_dirty, base);
@@ -594,13 +594,13 @@ static void vmw_stdu_dmabuf_fifo_commit(struct vmw_kms_dirty *dirty)
 
 
 /**
- * vmw_stdu_dmabuf_cpu_clip - Callback to encode a CPU blit
+ * vmw_stdu_bo_cpu_clip - Callback to encode a CPU blit
  *
  * @dirty: The closure structure.
  *
  * This function calculates the bounding box for all the incoming clips.
  */
-static void vmw_stdu_dmabuf_cpu_clip(struct vmw_kms_dirty *dirty)
+static void vmw_stdu_bo_cpu_clip(struct vmw_kms_dirty *dirty)
 {
 	struct vmw_stdu_dirty *ddirty =
 		container_of(dirty, struct vmw_stdu_dirty, base);
@@ -624,14 +624,14 @@ static void vmw_stdu_dmabuf_cpu_clip(struct vmw_kms_dirty *dirty)
 
 
 /**
- * vmw_stdu_dmabuf_cpu_commit - Callback to do a CPU blit from DMAbuf
+ * vmw_stdu_bo_cpu_commit - Callback to do a CPU blit from buffer object
  *
  * @dirty: The closure structure.
  *
  * For the special case when we cannot create a proxy surface in a
  * 2D VM, we have to do a CPU blit ourselves.
  */
-static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
+static void vmw_stdu_bo_cpu_commit(struct vmw_kms_dirty *dirty)
 {
 	struct vmw_stdu_dirty *ddirty =
 		container_of(dirty, struct vmw_stdu_dirty, base);
@@ -652,7 +652,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
 	if (width == 0 || height == 0)
 		return;
 
-	/* Assume we are blitting from Guest (dmabuf) to Host (display_srf) */
+	/* Assume we are blitting from Guest (bo) to Host (display_srf) */
 	dst_pitch = stdu->display_srf->base_size.width * stdu->cpp;
 	dst_bo = &stdu->display_srf->res.backup->base;
 	dst_offset = ddirty->top * dst_pitch + ddirty->left * stdu->cpp;
@@ -712,13 +712,13 @@ out_cleanup:
 }
 
 /**
- * vmw_kms_stdu_dma - Perform a DMA transfer between a dma-buffer backed
+ * vmw_kms_stdu_dma - Perform a DMA transfer between a buffer-object backed
  * framebuffer and the screen target system.
  *
  * @dev_priv: Pointer to the device private structure.
  * @file_priv: Pointer to a struct drm-file identifying the caller. May be
  * set to NULL, but then @user_fence_rep must also be set to NULL.
- * @vfb: Pointer to the dma-buffer backed framebuffer.
+ * @vfb: Pointer to the buffer-object backed framebuffer.
  * @clips: Array of clip rects. Either @clips or @vclips must be NULL.
  * @vclips: Alternate array of clip rects. Either @clips or @vclips must
  * be NULL.
@@ -747,8 +747,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 		     bool interruptible,
 		     struct drm_crtc *crtc)
 {
-	struct vmw_dma_buffer *buf =
-		container_of(vfb, struct vmw_framebuffer_dmabuf, base)->buffer;
+	struct vmw_buffer_object *buf =
+		container_of(vfb, struct vmw_framebuffer_bo, base)->buffer;
 	struct vmw_stdu_dirty ddirty;
 	int ret;
 	bool cpu_blit = !(dev_priv->capabilities & SVGA_CAP_3D);
@@ -770,8 +770,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 	ddirty.fb_left = ddirty.fb_top = S32_MAX;
 	ddirty.pitch = vfb->base.pitches[0];
 	ddirty.buf = buf;
-	ddirty.base.fifo_commit = vmw_stdu_dmabuf_fifo_commit;
-	ddirty.base.clip = vmw_stdu_dmabuf_clip;
+	ddirty.base.fifo_commit = vmw_stdu_bo_fifo_commit;
+	ddirty.base.clip = vmw_stdu_bo_clip;
 	ddirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_dma) +
 		num_clips * sizeof(SVGA3dCopyBox) +
 		sizeof(SVGA3dCmdSurfaceDMASuffix);
@@ -780,8 +780,8 @@ int vmw_kms_stdu_dma(struct vmw_private *dev_priv,
 
 
 	if (cpu_blit) {
-		ddirty.base.fifo_commit = vmw_stdu_dmabuf_cpu_commit;
-		ddirty.base.clip = vmw_stdu_dmabuf_cpu_clip;
+		ddirty.base.fifo_commit = vmw_stdu_bo_cpu_commit;
+		ddirty.base.clip = vmw_stdu_bo_cpu_clip;
 		ddirty.base.fifo_reserve_size = 0;
 	}
 
@@ -927,7 +927,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv,
 	if (ret)
 		return ret;
 
-	if (vfbs->is_dmabuf_proxy) {
+	if (vfbs->is_bo_proxy) {
 		ret = vmw_kms_update_proxy(srf, clips, num_clips, inc);
 		if (ret)
 			goto out_finish;
@@ -1075,7 +1075,7 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
  * @new_state: info on the new plane state, including the FB
  *
  * This function allocates a new display surface if the content is
- * backed by a DMA.  The display surface is pinned here, and it'll
+ * backed by a buffer object.  The display surface is pinned here, and it'll
  * be unpinned in .cleanup_fb()
  *
  * Returns 0 on success
@@ -1105,13 +1105,13 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 	}
 
 	vfb = vmw_framebuffer_to_vfb(new_fb);
-	new_vfbs = (vfb->dmabuf) ? NULL : vmw_framebuffer_to_vfbs(new_fb);
+	new_vfbs = (vfb->bo) ? NULL : vmw_framebuffer_to_vfbs(new_fb);
 
 	if (new_vfbs && new_vfbs->surface->base_size.width == hdisplay &&
 	    new_vfbs->surface->base_size.height == vdisplay)
 		new_content_type = SAME_AS_DISPLAY;
-	else if (vfb->dmabuf)
-		new_content_type = SEPARATE_DMA;
+	else if (vfb->bo)
+		new_content_type = SEPARATE_BO;
 	else
 		new_content_type = SEPARATE_SURFACE;
 
@@ -1124,10 +1124,10 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 		display_base_size.depth  = 1;
 
 		/*
-		 * If content buffer is a DMA buf, then we have to construct
-		 * surface info
+		 * If content buffer is a buffer object, then we have to
+		 * construct surface info
 		 */
-		if (new_content_type == SEPARATE_DMA) {
+		if (new_content_type == SEPARATE_BO) {
 
 			switch (new_fb->format->cpp[0]*8) {
 			case 32:
@@ -1212,12 +1212,12 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 	vps->content_fb_type = new_content_type;
 
 	/*
-	 * This should only happen if the DMA buf is too large to create a
+	 * This should only happen if the buffer object is too large to create a
 	 * proxy surface for.
-	 * If we are a 2D VM with a DMA buffer then we have to use CPU blit
+	 * If we are a 2D VM with a buffer object then we have to use CPU blit
 	 * so cache these mappings
 	 */
-	if (vps->content_fb_type == SEPARATE_DMA &&
+	if (vps->content_fb_type == SEPARATE_BO &&
 	    !(dev_priv->capabilities & SVGA_CAP_3D))
 		vps->cpp = new_fb->pitches[0] / new_fb->width;
 
@@ -1276,7 +1276,7 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
 		if (ret)
 			DRM_ERROR("Failed to bind surface to STDU.\n");
 
-		if (vfb->dmabuf)
+		if (vfb->bo)
 			ret = vmw_kms_stdu_dma(dev_priv, NULL, vfb, NULL, NULL,
 					       &vclips, 1, 1, true, false,
 					       crtc);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index b236c48bf265..2b2e8aa7114a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -842,12 +842,12 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	if (dev_priv->has_mob && req->shareable) {
 		uint32_t backup_handle;
 
-		ret = vmw_user_dmabuf_alloc(dev_priv, tfile,
-					    res->backup_size,
-					    true,
-					    &backup_handle,
-					    &res->backup,
-					    &user_srf->backup_base);
+		ret = vmw_user_bo_alloc(dev_priv, tfile,
+					res->backup_size,
+					true,
+					&backup_handle,
+					&res->backup,
+					&user_srf->backup_base);
 		if (unlikely(ret != 0)) {
 			vmw_resource_unreference(&res);
 			goto out_unlock;
@@ -1317,14 +1317,14 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data,
 
 
 	if (req->buffer_handle != SVGA3D_INVALID_ID) {
-		ret = vmw_user_dmabuf_lookup(tfile, req->buffer_handle,
-					     &res->backup,
-					     &user_srf->backup_base);
+		ret = vmw_user_bo_lookup(tfile, req->buffer_handle,
+					 &res->backup,
+					 &user_srf->backup_base);
 		if (ret == 0) {
 			if (res->backup->base.num_pages * PAGE_SIZE <
 			    res->backup_size) {
 				DRM_ERROR("Surface backup buffer is too small.\n");
-				vmw_dmabuf_unreference(&res->backup);
+				vmw_bo_unreference(&res->backup);
 				ret = -EINVAL;
 				goto out_unlock;
 			} else {
@@ -1332,13 +1332,13 @@ int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data,
 			}
 		}
 	} else if (req->drm_surface_flags & drm_vmw_surface_flag_create_buffer)
-		ret = vmw_user_dmabuf_alloc(dev_priv, tfile,
-					    res->backup_size,
-					    req->drm_surface_flags &
-					    drm_vmw_surface_flag_shareable,
-					    &backup_handle,
-					    &res->backup,
-					    &user_srf->backup_base);
+		ret = vmw_user_bo_alloc(dev_priv, tfile,
+					res->backup_size,
+					req->drm_surface_flags &
+					drm_vmw_surface_flag_shareable,
+					&backup_handle,
+					&res->backup,
+					&user_srf->backup_base);
 
 	if (unlikely(ret != 0)) {
 		vmw_resource_unreference(&res);
@@ -1414,8 +1414,7 @@ int vmw_gb_surface_reference_ioctl(struct drm_device *dev, void *data,
 	}
 
 	mutex_lock(&dev_priv->cmdbuf_mutex); /* Protect res->backup */
-	ret = vmw_user_dmabuf_reference(tfile, srf->res.backup,
-					&backup_handle);
+	ret = vmw_user_bo_reference(tfile, srf->res.backup, &backup_handle);
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
 
 	if (unlikely(ret != 0)) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
new file mode 100644
index 000000000000..0931f43913b1
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -0,0 +1,887 @@
+/**************************************************************************
+ *
+ * Copyright © 2009-2015 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_drv.h"
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_page_alloc.h>
+
+static const struct ttm_place vram_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+};
+
+static const struct ttm_place vram_ne_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
+static const struct ttm_place sys_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
+};
+
+static const struct ttm_place sys_ne_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
+static const struct ttm_place gmr_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+};
+
+static const struct ttm_place gmr_ne_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
+static const struct ttm_place mob_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+};
+
+static const struct ttm_place mob_ne_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
+struct ttm_placement vmw_vram_placement = {
+	.num_placement = 1,
+	.placement = &vram_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &vram_placement_flags
+};
+
+static const struct ttm_place vram_gmr_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}
+};
+
+static const struct ttm_place gmr_vram_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+	}
+};
+
+struct ttm_placement vmw_vram_gmr_placement = {
+	.num_placement = 2,
+	.placement = vram_gmr_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &gmr_placement_flags
+};
+
+static const struct ttm_place vram_gmr_ne_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED |
+			 TTM_PL_FLAG_NO_EVICT
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED |
+			 TTM_PL_FLAG_NO_EVICT
+	}
+};
+
+struct ttm_placement vmw_vram_gmr_ne_placement = {
+	.num_placement = 2,
+	.placement = vram_gmr_ne_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &gmr_ne_placement_flags
+};
+
+struct ttm_placement vmw_vram_sys_placement = {
+	.num_placement = 1,
+	.placement = &vram_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
+struct ttm_placement vmw_vram_ne_placement = {
+	.num_placement = 1,
+	.placement = &vram_ne_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &vram_ne_placement_flags
+};
+
+struct ttm_placement vmw_sys_placement = {
+	.num_placement = 1,
+	.placement = &sys_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
+struct ttm_placement vmw_sys_ne_placement = {
+	.num_placement = 1,
+	.placement = &sys_ne_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_ne_placement_flags
+};
+
+static const struct ttm_place evictable_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+	}
+};
+
+static const struct ttm_place nonfixed_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+	}
+};
+
+struct ttm_placement vmw_evictable_placement = {
+	.num_placement = 4,
+	.placement = evictable_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
+struct ttm_placement vmw_srf_placement = {
+	.num_placement = 1,
+	.num_busy_placement = 2,
+	.placement = &gmr_placement_flags,
+	.busy_placement = gmr_vram_placement_flags
+};
+
+struct ttm_placement vmw_mob_placement = {
+	.num_placement = 1,
+	.num_busy_placement = 1,
+	.placement = &mob_placement_flags,
+	.busy_placement = &mob_placement_flags
+};
+
+struct ttm_placement vmw_mob_ne_placement = {
+	.num_placement = 1,
+	.num_busy_placement = 1,
+	.placement = &mob_ne_placement_flags,
+	.busy_placement = &mob_ne_placement_flags
+};
+
+struct ttm_placement vmw_nonfixed_placement = {
+	.num_placement = 3,
+	.placement = nonfixed_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
+struct vmw_ttm_tt {
+	struct ttm_dma_tt dma_ttm;
+	struct vmw_private *dev_priv;
+	int gmr_id;
+	struct vmw_mob *mob;
+	int mem_type;
+	struct sg_table sgt;
+	struct vmw_sg_table vsgt;
+	uint64_t sg_alloc_size;
+	bool mapped;
+};
+
+const size_t vmw_tt_size = sizeof(struct vmw_ttm_tt);
+
+/**
+ * Helper functions to advance a struct vmw_piter iterator.
+ *
+ * @viter: Pointer to the iterator.
+ *
+ * These functions return false if past the end of the list,
+ * true otherwise. Functions are selected depending on the current
+ * DMA mapping mode.
+ */
+static bool __vmw_piter_non_sg_next(struct vmw_piter *viter)
+{
+	return ++(viter->i) < viter->num_pages;
+}
+
+static bool __vmw_piter_sg_next(struct vmw_piter *viter)
+{
+	return __sg_page_iter_next(&viter->iter);
+}
+
+
+/**
+ * Helper functions to return a pointer to the current page.
+ *
+ * @viter: Pointer to the iterator
+ *
+ * These functions return a pointer to the page currently
+ * pointed to by @viter. Functions are selected depending on the
+ * current mapping mode.
+ */
+static struct page *__vmw_piter_non_sg_page(struct vmw_piter *viter)
+{
+	return viter->pages[viter->i];
+}
+
+static struct page *__vmw_piter_sg_page(struct vmw_piter *viter)
+{
+	return sg_page_iter_page(&viter->iter);
+}
+
+
+/**
+ * Helper functions to return the DMA address of the current page.
+ *
+ * @viter: Pointer to the iterator
+ *
+ * These functions return the DMA address of the page currently
+ * pointed to by @viter. Functions are selected depending on the
+ * current mapping mode.
+ */
+static dma_addr_t __vmw_piter_phys_addr(struct vmw_piter *viter)
+{
+	return page_to_phys(viter->pages[viter->i]);
+}
+
+static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter)
+{
+	return viter->addrs[viter->i];
+}
+
+static dma_addr_t __vmw_piter_sg_addr(struct vmw_piter *viter)
+{
+	return sg_page_iter_dma_address(&viter->iter);
+}
+
+
+/**
+ * vmw_piter_start - Initialize a struct vmw_piter.
+ *
+ * @viter: Pointer to the iterator to initialize
+ * @vsgt: Pointer to a struct vmw_sg_table to initialize from
+ *
+ * Note that we're following the convention of __sg_page_iter_start, so that
+ * the iterator doesn't point to a valid page after initialization; it has
+ * to be advanced one step first.
+ */
+void vmw_piter_start(struct vmw_piter *viter, const struct vmw_sg_table *vsgt,
+		     unsigned long p_offset)
+{
+	viter->i = p_offset - 1;
+	viter->num_pages = vsgt->num_pages;
+	switch (vsgt->mode) {
+	case vmw_dma_phys:
+		viter->next = &__vmw_piter_non_sg_next;
+		viter->dma_address = &__vmw_piter_phys_addr;
+		viter->page = &__vmw_piter_non_sg_page;
+		viter->pages = vsgt->pages;
+		break;
+	case vmw_dma_alloc_coherent:
+		viter->next = &__vmw_piter_non_sg_next;
+		viter->dma_address = &__vmw_piter_dma_addr;
+		viter->page = &__vmw_piter_non_sg_page;
+		viter->addrs = vsgt->addrs;
+		viter->pages = vsgt->pages;
+		break;
+	case vmw_dma_map_populate:
+	case vmw_dma_map_bind:
+		viter->next = &__vmw_piter_sg_next;
+		viter->dma_address = &__vmw_piter_sg_addr;
+		viter->page = &__vmw_piter_sg_page;
+		__sg_page_iter_start(&viter->iter, vsgt->sgt->sgl,
+				     vsgt->sgt->orig_nents, p_offset);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/**
+ * vmw_ttm_unmap_from_dma - unmap  device addresses previsouly mapped for
+ * TTM pages
+ *
+ * @vmw_tt: Pointer to a struct vmw_ttm_backend
+ *
+ * Used to free dma mappings previously mapped by vmw_ttm_map_for_dma.
+ */
+static void vmw_ttm_unmap_from_dma(struct vmw_ttm_tt *vmw_tt)
+{
+	struct device *dev = vmw_tt->dev_priv->dev->dev;
+
+	dma_unmap_sg(dev, vmw_tt->sgt.sgl, vmw_tt->sgt.nents,
+		DMA_BIDIRECTIONAL);
+	vmw_tt->sgt.nents = vmw_tt->sgt.orig_nents;
+}
+
+/**
+ * vmw_ttm_map_for_dma - map TTM pages to get device addresses
+ *
+ * @vmw_tt: Pointer to a struct vmw_ttm_backend
+ *
+ * This function is used to get device addresses from the kernel DMA layer.
+ * However, it's violating the DMA API in that when this operation has been
+ * performed, it's illegal for the CPU to write to the pages without first
+ * unmapping the DMA mappings, or calling dma_sync_sg_for_cpu(). It is
+ * therefore only legal to call this function if we know that the function
+ * dma_sync_sg_for_cpu() is a NOP, and dma_sync_sg_for_device() is at most
+ * a CPU write buffer flush.
+ */
+static int vmw_ttm_map_for_dma(struct vmw_ttm_tt *vmw_tt)
+{
+	struct device *dev = vmw_tt->dev_priv->dev->dev;
+	int ret;
+
+	ret = dma_map_sg(dev, vmw_tt->sgt.sgl, vmw_tt->sgt.orig_nents,
+			 DMA_BIDIRECTIONAL);
+	if (unlikely(ret == 0))
+		return -ENOMEM;
+
+	vmw_tt->sgt.nents = ret;
+
+	return 0;
+}
+
+/**
+ * vmw_ttm_map_dma - Make sure TTM pages are visible to the device
+ *
+ * @vmw_tt: Pointer to a struct vmw_ttm_tt
+ *
+ * Select the correct function for and make sure the TTM pages are
+ * visible to the device. Allocate storage for the device mappings.
+ * If a mapping has already been performed, indicated by the storage
+ * pointer being non NULL, the function returns success.
+ */
+static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
+{
+	struct vmw_private *dev_priv = vmw_tt->dev_priv;
+	struct ttm_mem_global *glob = vmw_mem_glob(dev_priv);
+	struct vmw_sg_table *vsgt = &vmw_tt->vsgt;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false
+	};
+	struct vmw_piter iter;
+	dma_addr_t old;
+	int ret = 0;
+	static size_t sgl_size;
+	static size_t sgt_size;
+
+	if (vmw_tt->mapped)
+		return 0;
+
+	vsgt->mode = dev_priv->map_mode;
+	vsgt->pages = vmw_tt->dma_ttm.ttm.pages;
+	vsgt->num_pages = vmw_tt->dma_ttm.ttm.num_pages;
+	vsgt->addrs = vmw_tt->dma_ttm.dma_address;
+	vsgt->sgt = &vmw_tt->sgt;
+
+	switch (dev_priv->map_mode) {
+	case vmw_dma_map_bind:
+	case vmw_dma_map_populate:
+		if (unlikely(!sgl_size)) {
+			sgl_size = ttm_round_pot(sizeof(struct scatterlist));
+			sgt_size = ttm_round_pot(sizeof(struct sg_table));
+		}
+		vmw_tt->sg_alloc_size = sgt_size + sgl_size * vsgt->num_pages;
+		ret = ttm_mem_global_alloc(glob, vmw_tt->sg_alloc_size, &ctx);
+		if (unlikely(ret != 0))
+			return ret;
+
+		ret = sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
+						vsgt->num_pages, 0,
+						(unsigned long)
+						vsgt->num_pages << PAGE_SHIFT,
+						GFP_KERNEL);
+		if (unlikely(ret != 0))
+			goto out_sg_alloc_fail;
+
+		if (vsgt->num_pages > vmw_tt->sgt.nents) {
+			uint64_t over_alloc =
+				sgl_size * (vsgt->num_pages -
+					    vmw_tt->sgt.nents);
+
+			ttm_mem_global_free(glob, over_alloc);
+			vmw_tt->sg_alloc_size -= over_alloc;
+		}
+
+		ret = vmw_ttm_map_for_dma(vmw_tt);
+		if (unlikely(ret != 0))
+			goto out_map_fail;
+
+		break;
+	default:
+		break;
+	}
+
+	old = ~((dma_addr_t) 0);
+	vmw_tt->vsgt.num_regions = 0;
+	for (vmw_piter_start(&iter, vsgt, 0); vmw_piter_next(&iter);) {
+		dma_addr_t cur = vmw_piter_dma_addr(&iter);
+
+		if (cur != old + PAGE_SIZE)
+			vmw_tt->vsgt.num_regions++;
+		old = cur;
+	}
+
+	vmw_tt->mapped = true;
+	return 0;
+
+out_map_fail:
+	sg_free_table(vmw_tt->vsgt.sgt);
+	vmw_tt->vsgt.sgt = NULL;
+out_sg_alloc_fail:
+	ttm_mem_global_free(glob, vmw_tt->sg_alloc_size);
+	return ret;
+}
+
+/**
+ * vmw_ttm_unmap_dma - Tear down any TTM page device mappings
+ *
+ * @vmw_tt: Pointer to a struct vmw_ttm_tt
+ *
+ * Tear down any previously set up device DMA mappings and free
+ * any storage space allocated for them. If there are no mappings set up,
+ * this function is a NOP.
+ */
+static void vmw_ttm_unmap_dma(struct vmw_ttm_tt *vmw_tt)
+{
+	struct vmw_private *dev_priv = vmw_tt->dev_priv;
+
+	if (!vmw_tt->vsgt.sgt)
+		return;
+
+	switch (dev_priv->map_mode) {
+	case vmw_dma_map_bind:
+	case vmw_dma_map_populate:
+		vmw_ttm_unmap_from_dma(vmw_tt);
+		sg_free_table(vmw_tt->vsgt.sgt);
+		vmw_tt->vsgt.sgt = NULL;
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_tt->sg_alloc_size);
+		break;
+	default:
+		break;
+	}
+	vmw_tt->mapped = false;
+}
+
+
+/**
+ * vmw_bo_map_dma - Make sure buffer object pages are visible to the device
+ *
+ * @bo: Pointer to a struct ttm_buffer_object
+ *
+ * Wrapper around vmw_ttm_map_dma, that takes a TTM buffer object pointer
+ * instead of a pointer to a struct vmw_ttm_backend as argument.
+ * Note that the buffer object must be either pinned or reserved before
+ * calling this function.
+ */
+int vmw_bo_map_dma(struct ttm_buffer_object *bo)
+{
+	struct vmw_ttm_tt *vmw_tt =
+		container_of(bo->ttm, struct vmw_ttm_tt, dma_ttm.ttm);
+
+	return vmw_ttm_map_dma(vmw_tt);
+}
+
+
+/**
+ * vmw_bo_unmap_dma - Make sure buffer object pages are visible to the device
+ *
+ * @bo: Pointer to a struct ttm_buffer_object
+ *
+ * Wrapper around vmw_ttm_unmap_dma, that takes a TTM buffer object pointer
+ * instead of a pointer to a struct vmw_ttm_backend as argument.
+ */
+void vmw_bo_unmap_dma(struct ttm_buffer_object *bo)
+{
+	struct vmw_ttm_tt *vmw_tt =
+		container_of(bo->ttm, struct vmw_ttm_tt, dma_ttm.ttm);
+
+	vmw_ttm_unmap_dma(vmw_tt);
+}
+
+
+/**
+ * vmw_bo_sg_table - Return a struct vmw_sg_table object for a
+ * TTM buffer object
+ *
+ * @bo: Pointer to a struct ttm_buffer_object
+ *
+ * Returns a pointer to a struct vmw_sg_table object. The object should
+ * not be freed after use.
+ * Note that for the device addresses to be valid, the buffer object must
+ * either be reserved or pinned.
+ */
+const struct vmw_sg_table *vmw_bo_sg_table(struct ttm_buffer_object *bo)
+{
+	struct vmw_ttm_tt *vmw_tt =
+		container_of(bo->ttm, struct vmw_ttm_tt, dma_ttm.ttm);
+
+	return &vmw_tt->vsgt;
+}
+
+
+static int vmw_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
+{
+	struct vmw_ttm_tt *vmw_be =
+		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
+	int ret;
+
+	ret = vmw_ttm_map_dma(vmw_be);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_be->gmr_id = bo_mem->start;
+	vmw_be->mem_type = bo_mem->mem_type;
+
+	switch (bo_mem->mem_type) {
+	case VMW_PL_GMR:
+		return vmw_gmr_bind(vmw_be->dev_priv, &vmw_be->vsgt,
+				    ttm->num_pages, vmw_be->gmr_id);
+	case VMW_PL_MOB:
+		if (unlikely(vmw_be->mob == NULL)) {
+			vmw_be->mob =
+				vmw_mob_create(ttm->num_pages);
+			if (unlikely(vmw_be->mob == NULL))
+				return -ENOMEM;
+		}
+
+		return vmw_mob_bind(vmw_be->dev_priv, vmw_be->mob,
+				    &vmw_be->vsgt, ttm->num_pages,
+				    vmw_be->gmr_id);
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+static int vmw_ttm_unbind(struct ttm_tt *ttm)
+{
+	struct vmw_ttm_tt *vmw_be =
+		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
+
+	switch (vmw_be->mem_type) {
+	case VMW_PL_GMR:
+		vmw_gmr_unbind(vmw_be->dev_priv, vmw_be->gmr_id);
+		break;
+	case VMW_PL_MOB:
+		vmw_mob_unbind(vmw_be->dev_priv, vmw_be->mob);
+		break;
+	default:
+		BUG();
+	}
+
+	if (vmw_be->dev_priv->map_mode == vmw_dma_map_bind)
+		vmw_ttm_unmap_dma(vmw_be);
+
+	return 0;
+}
+
+
+static void vmw_ttm_destroy(struct ttm_tt *ttm)
+{
+	struct vmw_ttm_tt *vmw_be =
+		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
+
+	vmw_ttm_unmap_dma(vmw_be);
+	if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
+		ttm_dma_tt_fini(&vmw_be->dma_ttm);
+	else
+		ttm_tt_fini(ttm);
+
+	if (vmw_be->mob)
+		vmw_mob_destroy(vmw_be->mob);
+
+	kfree(vmw_be);
+}
+
+
+static int vmw_ttm_populate(struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
+{
+	struct vmw_ttm_tt *vmw_tt =
+		container_of(ttm, struct vmw_ttm_tt, dma_ttm.ttm);
+	struct vmw_private *dev_priv = vmw_tt->dev_priv;
+	struct ttm_mem_global *glob = vmw_mem_glob(dev_priv);
+	int ret;
+
+	if (ttm->state != tt_unpopulated)
+		return 0;
+
+	if (dev_priv->map_mode == vmw_dma_alloc_coherent) {
+		size_t size =
+			ttm_round_pot(ttm->num_pages * sizeof(dma_addr_t));
+		ret = ttm_mem_global_alloc(glob, size, ctx);
+		if (unlikely(ret != 0))
+			return ret;
+
+		ret = ttm_dma_populate(&vmw_tt->dma_ttm, dev_priv->dev->dev,
+					ctx);
+		if (unlikely(ret != 0))
+			ttm_mem_global_free(glob, size);
+	} else
+		ret = ttm_pool_populate(ttm, ctx);
+
+	return ret;
+}
+
+static void vmw_ttm_unpopulate(struct ttm_tt *ttm)
+{
+	struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt,
+						 dma_ttm.ttm);
+	struct vmw_private *dev_priv = vmw_tt->dev_priv;
+	struct ttm_mem_global *glob = vmw_mem_glob(dev_priv);
+
+
+	if (vmw_tt->mob) {
+		vmw_mob_destroy(vmw_tt->mob);
+		vmw_tt->mob = NULL;
+	}
+
+	vmw_ttm_unmap_dma(vmw_tt);
+	if (dev_priv->map_mode == vmw_dma_alloc_coherent) {
+		size_t size =
+			ttm_round_pot(ttm->num_pages * sizeof(dma_addr_t));
+
+		ttm_dma_unpopulate(&vmw_tt->dma_ttm, dev_priv->dev->dev);
+		ttm_mem_global_free(glob, size);
+	} else
+		ttm_pool_unpopulate(ttm);
+}
+
+static struct ttm_backend_func vmw_ttm_func = {
+	.bind = vmw_ttm_bind,
+	.unbind = vmw_ttm_unbind,
+	.destroy = vmw_ttm_destroy,
+};
+
+static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
+					uint32_t page_flags)
+{
+	struct vmw_ttm_tt *vmw_be;
+	int ret;
+
+	vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL);
+	if (!vmw_be)
+		return NULL;
+
+	vmw_be->dma_ttm.ttm.func = &vmw_ttm_func;
+	vmw_be->dev_priv = container_of(bo->bdev, struct vmw_private, bdev);
+	vmw_be->mob = NULL;
+
+	if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
+		ret = ttm_dma_tt_init(&vmw_be->dma_ttm, bo, page_flags);
+	else
+		ret = ttm_tt_init(&vmw_be->dma_ttm.ttm, bo, page_flags);
+	if (unlikely(ret != 0))
+		goto out_no_init;
+
+	return &vmw_be->dma_ttm.ttm;
+out_no_init:
+	kfree(vmw_be);
+	return NULL;
+}
+
+static int vmw_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
+{
+	return 0;
+}
+
+static int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
+		      struct ttm_mem_type_manager *man)
+{
+	switch (type) {
+	case TTM_PL_SYSTEM:
+		/* System memory */
+
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_CACHED;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case TTM_PL_VRAM:
+		/* "On-card" video ram */
+		man->func = &ttm_bo_manager_func;
+		man->gpu_offset = 0;
+		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_CACHED;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case VMW_PL_GMR:
+	case VMW_PL_MOB:
+		/*
+		 * "Guest Memory Regions" is an aperture like feature with
+		 *  one slot per bo. There is an upper limit of the number of
+		 *  slots as well as the bo size.
+		 */
+		man->func = &vmw_gmrid_manager_func;
+		man->gpu_offset = 0;
+		man->flags = TTM_MEMTYPE_FLAG_CMA | TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_CACHED;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	default:
+		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void vmw_evict_flags(struct ttm_buffer_object *bo,
+		     struct ttm_placement *placement)
+{
+	*placement = vmw_sys_placement;
+}
+
+static int vmw_verify_access(struct ttm_buffer_object *bo, struct file *filp)
+{
+	struct ttm_object_file *tfile =
+		vmw_fpriv((struct drm_file *)filp->private_data)->tfile;
+
+	return vmw_user_bo_verify_access(bo, tfile);
+}
+
+static int vmw_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+	struct vmw_private *dev_priv = container_of(bdev, struct vmw_private, bdev);
+
+	mem->bus.addr = NULL;
+	mem->bus.is_iomem = false;
+	mem->bus.offset = 0;
+	mem->bus.size = mem->num_pages << PAGE_SHIFT;
+	mem->bus.base = 0;
+	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
+		return -EINVAL;
+	switch (mem->mem_type) {
+	case TTM_PL_SYSTEM:
+	case VMW_PL_GMR:
+	case VMW_PL_MOB:
+		return 0;
+	case TTM_PL_VRAM:
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+		mem->bus.base = dev_priv->vram_start;
+		mem->bus.is_iomem = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void vmw_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+{
+}
+
+static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+	return 0;
+}
+
+/**
+ * vmw_move_notify - TTM move_notify_callback
+ *
+ * @bo: The TTM buffer object about to move.
+ * @mem: The struct ttm_mem_reg indicating to what memory
+ *       region the move is taking place.
+ *
+ * Calls move_notify for all subsystems needing it.
+ * (currently only resources).
+ */
+static void vmw_move_notify(struct ttm_buffer_object *bo,
+			    bool evict,
+			    struct ttm_mem_reg *mem)
+{
+	vmw_resource_move_notify(bo, mem);
+	vmw_query_move_notify(bo, mem);
+}
+
+
+/**
+ * vmw_swap_notify - TTM move_notify_callback
+ *
+ * @bo: The TTM buffer object about to be swapped out.
+ */
+static void vmw_swap_notify(struct ttm_buffer_object *bo)
+{
+	vmw_resource_swap_notify(bo);
+	(void) ttm_bo_wait(bo, false, false);
+}
+
+
+struct ttm_bo_driver vmw_bo_driver = {
+	.ttm_tt_create = &vmw_ttm_tt_create,
+	.ttm_tt_populate = &vmw_ttm_populate,
+	.ttm_tt_unpopulate = &vmw_ttm_unpopulate,
+	.invalidate_caches = vmw_invalidate_caches,
+	.init_mem_type = vmw_init_mem_type,
+	.eviction_valuable = ttm_bo_eviction_valuable,
+	.evict_flags = vmw_evict_flags,
+	.move = NULL,
+	.verify_access = vmw_verify_access,
+	.move_notify = vmw_move_notify,
+	.swap_notify = vmw_swap_notify,
+	.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
+	.io_mem_reserve = &vmw_ttm_io_mem_reserve,
+	.io_mem_free = &vmw_ttm_io_mem_free,
+};
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 0bc784f5e0db..57115a5fe61a 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -40,6 +40,7 @@ extern "C" {
 
 #define DRM_VMW_GET_PARAM            0
 #define DRM_VMW_ALLOC_DMABUF         1
+#define DRM_VMW_ALLOC_BO             1
 #define DRM_VMW_UNREF_DMABUF         2
 #define DRM_VMW_HANDLE_CLOSE         2
 #define DRM_VMW_CURSOR_BYPASS        3
@@ -356,9 +357,9 @@ struct drm_vmw_fence_rep {
 
 /*************************************************************************/
 /**
- * DRM_VMW_ALLOC_DMABUF
+ * DRM_VMW_ALLOC_BO
  *
- * Allocate a DMA buffer that is visible also to the host.
+ * Allocate a buffer object that is visible also to the host.
  * NOTE: The buffer is
  * identified by a handle and an offset, which are private to the guest, but
  * useable in the command stream. The guest kernel may translate these
@@ -366,27 +367,28 @@ struct drm_vmw_fence_rep {
  * be zero at all times, or it may disappear from the interface before it is
  * fixed.
  *
- * The DMA buffer may stay user-space mapped in the guest at all times,
+ * The buffer object may stay user-space mapped in the guest at all times,
  * and is thus suitable for sub-allocation.
  *
- * DMA buffers are mapped using the mmap() syscall on the drm device.
+ * Buffer objects are mapped using the mmap() syscall on the drm device.
  */
 
 /**
- * struct drm_vmw_alloc_dmabuf_req
+ * struct drm_vmw_alloc_bo_req
  *
  * @size: Required minimum size of the buffer.
  *
- * Input data to the DRM_VMW_ALLOC_DMABUF Ioctl.
+ * Input data to the DRM_VMW_ALLOC_BO Ioctl.
  */
 
-struct drm_vmw_alloc_dmabuf_req {
+struct drm_vmw_alloc_bo_req {
 	__u32 size;
 	__u32 pad64;
 };
+#define drm_vmw_alloc_dmabuf_req drm_vmw_alloc_bo_req
 
 /**
- * struct drm_vmw_dmabuf_rep
+ * struct drm_vmw_bo_rep
  *
  * @map_handle: Offset to use in the mmap() call used to map the buffer.
  * @handle: Handle unique to this buffer. Used for unreferencing.
@@ -395,50 +397,32 @@ struct drm_vmw_alloc_dmabuf_req {
  * @cur_gmr_offset: Offset to use in the command stream when this buffer is
  * referenced. See note above.
  *
- * Output data from the DRM_VMW_ALLOC_DMABUF Ioctl.
+ * Output data from the DRM_VMW_ALLOC_BO Ioctl.
  */
 
-struct drm_vmw_dmabuf_rep {
+struct drm_vmw_bo_rep {
 	__u64 map_handle;
 	__u32 handle;
 	__u32 cur_gmr_id;
 	__u32 cur_gmr_offset;
 	__u32 pad64;
 };
+#define drm_vmw_dmabuf_rep drm_vmw_bo_rep
 
 /**
- * union drm_vmw_dmabuf_arg
+ * union drm_vmw_alloc_bo_arg
  *
  * @req: Input data as described above.
  * @rep: Output data as described above.
  *
- * Argument to the DRM_VMW_ALLOC_DMABUF Ioctl.
+ * Argument to the DRM_VMW_ALLOC_BO Ioctl.
  */
 
-union drm_vmw_alloc_dmabuf_arg {
-	struct drm_vmw_alloc_dmabuf_req req;
-	struct drm_vmw_dmabuf_rep rep;
-};
-
-/*************************************************************************/
-/**
- * DRM_VMW_UNREF_DMABUF - Free a DMA buffer.
- *
- */
-
-/**
- * struct drm_vmw_unref_dmabuf_arg
- *
- * @handle: Handle indicating what buffer to free. Obtained from the
- * DRM_VMW_ALLOC_DMABUF Ioctl.
- *
- * Argument to the DRM_VMW_UNREF_DMABUF Ioctl.
- */
-
-struct drm_vmw_unref_dmabuf_arg {
-	__u32 handle;
-	__u32 pad64;
+union drm_vmw_alloc_bo_arg {
+	struct drm_vmw_alloc_bo_req req;
+	struct drm_vmw_bo_rep rep;
 };
+#define drm_vmw_alloc_dmabuf_arg drm_vmw_alloc_bo_arg
 
 /*************************************************************************/
 /**
@@ -1103,9 +1087,8 @@ union drm_vmw_extended_context_arg {
  * DRM_VMW_HANDLE_CLOSE - Close a user-space handle and release its
  * underlying resource.
  *
- * Note that this ioctl is overlaid on the DRM_VMW_UNREF_DMABUF Ioctl.
- * The ioctl arguments therefore need to be identical in layout.
- *
+ * Note that this ioctl is overlaid on the deprecated DRM_VMW_UNREF_DMABUF
+ * Ioctl.
  */
 
 /**
@@ -1119,7 +1102,7 @@ struct drm_vmw_handle_close_arg {
 	__u32 handle;
 	__u32 pad64;
 };
-
+#define drm_vmw_unref_dmabuf_arg drm_vmw_handle_close_arg
 
 #if defined(__cplusplus)
 }
-- 
cgit v1.2.3


From 63b8add24e1a99d7218c904f258623e7c07ce99e Mon Sep 17 00:00:00 2001
From: Elaine Zhang <zhangqing@rock-chips.com>
Date: Fri, 15 Jun 2018 10:16:49 +0800
Subject: clk: rockchip: add dt-binding header for px30

Add the dt-bindings header for the px30, that gets shared between
the clock controller and the clock references in the dts.
Add softreset ID for px30.

Signed-off-by: Elaine Zhang <zhangqing@rock-chips.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/px30-cru.h | 389 +++++++++++++++++++++++++++++++++++
 1 file changed, 389 insertions(+)
 create mode 100644 include/dt-bindings/clock/px30-cru.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/px30-cru.h b/include/dt-bindings/clock/px30-cru.h
new file mode 100644
index 000000000000..00101479f7c4
--- /dev/null
+++ b/include/dt-bindings/clock/px30-cru.h
@@ -0,0 +1,389 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_PX30_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_PX30_H
+
+/* core clocks */
+#define PLL_APLL		1
+#define PLL_DPLL		2
+#define PLL_CPLL		3
+#define PLL_NPLL		4
+#define APLL_BOOST_H		5
+#define APLL_BOOST_L		6
+#define ARMCLK			7
+
+/* sclk gates (special clocks) */
+#define USB480M			14
+#define SCLK_PDM		15
+#define SCLK_I2S0_TX		16
+#define SCLK_I2S0_TX_OUT	17
+#define SCLK_I2S0_RX		18
+#define SCLK_I2S0_RX_OUT	19
+#define SCLK_I2S1		20
+#define SCLK_I2S1_OUT		21
+#define SCLK_I2S2		22
+#define SCLK_I2S2_OUT		23
+#define SCLK_UART1		24
+#define SCLK_UART2		25
+#define SCLK_UART3		26
+#define SCLK_UART4		27
+#define SCLK_UART5		28
+#define SCLK_I2C0		29
+#define SCLK_I2C1		30
+#define SCLK_I2C2		31
+#define SCLK_I2C3		32
+#define SCLK_I2C4		33
+#define SCLK_PWM0		34
+#define SCLK_PWM1		35
+#define SCLK_SPI0		36
+#define SCLK_SPI1		37
+#define SCLK_TIMER0		38
+#define SCLK_TIMER1		39
+#define SCLK_TIMER2		40
+#define SCLK_TIMER3		41
+#define SCLK_TIMER4		42
+#define SCLK_TIMER5		43
+#define SCLK_TSADC		44
+#define SCLK_SARADC		45
+#define SCLK_OTP		46
+#define SCLK_OTP_USR		47
+#define SCLK_CRYPTO		48
+#define SCLK_CRYPTO_APK		49
+#define SCLK_DDRC		50
+#define SCLK_ISP		51
+#define SCLK_CIF_OUT		52
+#define SCLK_RGA_CORE		53
+#define SCLK_VOPB_PWM		54
+#define SCLK_NANDC		55
+#define SCLK_SDIO		56
+#define SCLK_EMMC		57
+#define SCLK_SFC		58
+#define SCLK_SDMMC		59
+#define SCLK_OTG_ADP		60
+#define SCLK_GMAC_SRC		61
+#define SCLK_GMAC		62
+#define SCLK_GMAC_RX_TX		63
+#define SCLK_MAC_REF		64
+#define SCLK_MAC_REFOUT		65
+#define SCLK_MAC_OUT		66
+#define SCLK_SDMMC_DRV		67
+#define SCLK_SDMMC_SAMPLE	68
+#define SCLK_SDIO_DRV		69
+#define SCLK_SDIO_SAMPLE	70
+#define SCLK_EMMC_DRV		71
+#define SCLK_EMMC_SAMPLE	72
+#define SCLK_GPU		73
+#define SCLK_PVTM		74
+#define SCLK_CORE_VPU		75
+#define SCLK_GMAC_RMII		76
+#define SCLK_UART2_SRC		77
+#define SCLK_NANDC_DIV		78
+#define SCLK_NANDC_DIV50	79
+#define SCLK_SDIO_DIV		80
+#define SCLK_SDIO_DIV50		81
+#define SCLK_EMMC_DIV		82
+#define SCLK_EMMC_DIV50		83
+#define SCLK_DDRCLK		84
+#define SCLK_UART1_SRC		85
+
+/* dclk gates */
+#define DCLK_VOPB		150
+#define DCLK_VOPL		151
+
+/* aclk gates */
+#define ACLK_GPU		170
+#define ACLK_BUS_PRE		171
+#define ACLK_CRYPTO		172
+#define ACLK_VI_PRE		173
+#define ACLK_VO_PRE		174
+#define ACLK_VPU		175
+#define ACLK_PERI_PRE		176
+#define ACLK_GMAC		178
+#define ACLK_CIF		179
+#define ACLK_ISP		180
+#define ACLK_VOPB		181
+#define ACLK_VOPL		182
+#define ACLK_RGA		183
+#define ACLK_GIC		184
+#define ACLK_DCF		186
+#define ACLK_DMAC		187
+#define ACLK_BUS_SRC		188
+#define ACLK_PERI_SRC		189
+
+/* hclk gates */
+#define HCLK_BUS_PRE		240
+#define HCLK_CRYPTO		241
+#define HCLK_VI_PRE		242
+#define HCLK_VO_PRE		243
+#define HCLK_VPU		244
+#define HCLK_PERI_PRE		245
+#define HCLK_MMC_NAND		246
+#define HCLK_SDMMC		247
+#define HCLK_USB		248
+#define HCLK_CIF		249
+#define HCLK_ISP		250
+#define HCLK_VOPB		251
+#define HCLK_VOPL		252
+#define HCLK_RGA		253
+#define HCLK_NANDC		254
+#define HCLK_SDIO		255
+#define HCLK_EMMC		256
+#define HCLK_SFC		257
+#define HCLK_OTG		258
+#define HCLK_HOST		259
+#define HCLK_HOST_ARB		260
+#define HCLK_PDM		261
+#define HCLK_I2S0		262
+#define HCLK_I2S1		263
+#define HCLK_I2S2		264
+
+/* pclk gates */
+#define PCLK_BUS_PRE		320
+#define PCLK_DDR		321
+#define PCLK_VO_PRE		322
+#define PCLK_GMAC		323
+#define PCLK_MIPI_DSI		324
+#define PCLK_MIPIDSIPHY		325
+#define PCLK_MIPICSIPHY		326
+#define PCLK_USB_GRF		327
+#define PCLK_DCF		328
+#define PCLK_UART1		329
+#define PCLK_UART2		330
+#define PCLK_UART3		331
+#define PCLK_UART4		332
+#define PCLK_UART5		333
+#define PCLK_I2C0		334
+#define PCLK_I2C1		335
+#define PCLK_I2C2		336
+#define PCLK_I2C3		337
+#define PCLK_I2C4		338
+#define PCLK_PWM0		339
+#define PCLK_PWM1		340
+#define PCLK_SPI0		341
+#define PCLK_SPI1		342
+#define PCLK_SARADC		343
+#define PCLK_TSADC		344
+#define PCLK_TIMER		345
+#define PCLK_OTP_NS		346
+#define PCLK_WDT_NS		347
+#define PCLK_GPIO1		348
+#define PCLK_GPIO2		349
+#define PCLK_GPIO3		350
+#define PCLK_ISP		351
+#define PCLK_CIF		352
+#define PCLK_OTP_PHY		353
+
+#define CLK_NR_CLKS		(PCLK_OTP_PHY + 1)
+
+/* pmu-clocks indices */
+
+#define PLL_GPLL		1
+
+#define SCLK_RTC32K_PMU		4
+#define SCLK_WIFI_PMU		5
+#define SCLK_UART0_PMU		6
+#define SCLK_PVTM_PMU		7
+#define PCLK_PMU_PRE		8
+#define SCLK_REF24M_PMU		9
+#define SCLK_USBPHY_REF		10
+#define SCLK_MIPIDSIPHY_REF	11
+
+#define XIN24M_DIV		12
+
+#define PCLK_GPIO0_PMU		20
+#define PCLK_UART0_PMU		21
+
+#define CLKPMU_NR_CLKS		(PCLK_UART0_PMU + 1)
+
+/* soft-reset indices */
+#define SRST_CORE0_PO		0
+#define SRST_CORE1_PO		1
+#define SRST_CORE2_PO		2
+#define SRST_CORE3_PO		3
+#define SRST_CORE0		4
+#define SRST_CORE1		5
+#define SRST_CORE2		6
+#define SRST_CORE3		7
+#define SRST_CORE0_DBG		8
+#define SRST_CORE1_DBG		9
+#define SRST_CORE2_DBG		10
+#define SRST_CORE3_DBG		11
+#define SRST_TOPDBG		12
+#define SRST_CORE_NOC		13
+#define SRST_STRC_A		14
+#define SRST_L2C		15
+
+#define SRST_DAP		16
+#define SRST_CORE_PVTM		17
+#define SRST_GPU		18
+#define SRST_GPU_NIU		19
+#define SRST_UPCTL2		20
+#define SRST_UPCTL2_A		21
+#define SRST_UPCTL2_P		22
+#define SRST_MSCH		23
+#define SRST_MSCH_P		24
+#define SRST_DDRMON_P		25
+#define SRST_DDRSTDBY_P		26
+#define SRST_DDRSTDBY		27
+#define SRST_DDRGRF_p		28
+#define SRST_AXI_SPLIT_A	29
+#define SRST_AXI_CMD_A		30
+#define SRST_AXI_CMD_P		31
+
+#define SRST_DDRPHY		32
+#define SRST_DDRPHYDIV		33
+#define SRST_DDRPHY_P		34
+#define SRST_VPU_A		36
+#define SRST_VPU_NIU_A		37
+#define SRST_VPU_H		38
+#define SRST_VPU_NIU_H		39
+#define SRST_VI_NIU_A		40
+#define SRST_VI_NIU_H		41
+#define SRST_ISP_H		42
+#define SRST_ISP		43
+#define SRST_CIF_A		44
+#define SRST_CIF_H		45
+#define SRST_CIF_PCLKIN		46
+#define SRST_MIPICSIPHY_P	47
+
+#define SRST_VO_NIU_A		48
+#define SRST_VO_NIU_H		49
+#define SRST_VO_NIU_P		50
+#define SRST_VOPB_A		51
+#define SRST_VOPB_H		52
+#define SRST_VOPB		53
+#define SRST_PWM_VOPB		54
+#define SRST_VOPL_A		55
+#define SRST_VOPL_H		56
+#define SRST_VOPL		57
+#define SRST_RGA_A		58
+#define SRST_RGA_H		59
+#define SRST_RGA		60
+#define SRST_MIPIDSI_HOST_P	61
+#define SRST_MIPIDSIPHY_P	62
+#define SRST_VPU_CORE		63
+
+#define SRST_PERI_NIU_A		64
+#define SRST_USB_NIU_H		65
+#define SRST_USB2OTG_H		66
+#define SRST_USB2OTG		67
+#define SRST_USB2OTG_ADP	68
+#define SRST_USB2HOST_H		69
+#define SRST_USB2HOST_ARB_H	70
+#define SRST_USB2HOST_AUX_H	71
+#define SRST_USB2HOST_EHCI	72
+#define SRST_USB2HOST		73
+#define SRST_USBPHYPOR		74
+#define SRST_USBPHY_OTG_PORT	75
+#define SRST_USBPHY_HOST_PORT	76
+#define SRST_USBPHY_GRF		77
+#define SRST_CPU_BOOST_P	78
+#define SRST_CPU_BOOST		79
+
+#define SRST_MMC_NAND_NIU_H	80
+#define SRST_SDIO_H		81
+#define SRST_EMMC_H		82
+#define SRST_SFC_H		83
+#define SRST_SFC		84
+#define SRST_SDCARD_NIU_H	85
+#define SRST_SDMMC_H		86
+#define SRST_NANDC_H		89
+#define SRST_NANDC		90
+#define SRST_GMAC_NIU_A		92
+#define SRST_GMAC_NIU_P		93
+#define SRST_GMAC_A		94
+
+#define SRST_PMU_NIU_P		96
+#define SRST_PMU_SGRF_P		97
+#define SRST_PMU_GRF_P		98
+#define SRST_PMU		99
+#define SRST_PMU_MEM_P		100
+#define SRST_PMU_GPIO0_P	101
+#define SRST_PMU_UART0_P	102
+#define SRST_PMU_CRU_P		103
+#define SRST_PMU_PVTM		104
+#define SRST_PMU_UART		105
+#define SRST_PMU_NIU_H		106
+#define SRST_PMU_DDR_FAIL_SAVE	107
+#define SRST_PMU_CORE_PERF_A	108
+#define SRST_PMU_CORE_GRF_P	109
+#define SRST_PMU_GPU_PERF_A	110
+#define SRST_PMU_GPU_GRF_P	111
+
+#define SRST_CRYPTO_NIU_A	112
+#define SRST_CRYPTO_NIU_H	113
+#define SRST_CRYPTO_A		114
+#define SRST_CRYPTO_H		115
+#define SRST_CRYPTO		116
+#define SRST_CRYPTO_APK		117
+#define SRST_BUS_NIU_H		120
+#define SRST_USB_NIU_P		121
+#define SRST_BUS_TOP_NIU_P	122
+#define SRST_INTMEM_A		123
+#define SRST_GIC_A		124
+#define SRST_ROM_H		126
+#define SRST_DCF_A		127
+
+#define SRST_DCF_P		128
+#define SRST_PDM_H		129
+#define SRST_PDM		130
+#define SRST_I2S0_H		131
+#define SRST_I2S0_TX		132
+#define SRST_I2S1_H		133
+#define SRST_I2S1		134
+#define SRST_I2S2_H		135
+#define SRST_I2S2		136
+#define SRST_UART1_P		137
+#define SRST_UART1		138
+#define SRST_UART2_P		139
+#define SRST_UART2		140
+#define SRST_UART3_P		141
+#define SRST_UART3		142
+#define SRST_UART4_P		143
+
+#define SRST_UART4		144
+#define SRST_UART5_P		145
+#define SRST_UART5		146
+#define SRST_I2C0_P		147
+#define SRST_I2C0		148
+#define SRST_I2C1_P		149
+#define SRST_I2C1		150
+#define SRST_I2C2_P		151
+#define SRST_I2C2		152
+#define SRST_I2C3_P		153
+#define SRST_I2C3		154
+#define SRST_PWM0_P		157
+#define SRST_PWM0		158
+#define SRST_PWM1_P		159
+
+#define SRST_PWM1		160
+#define SRST_SPI0_P		161
+#define SRST_SPI0		162
+#define SRST_SPI1_P		163
+#define SRST_SPI1		164
+#define SRST_SARADC_P		165
+#define SRST_SARADC		166
+#define SRST_TSADC_P		167
+#define SRST_TSADC		168
+#define SRST_TIMER_P		169
+#define SRST_TIMER0		170
+#define SRST_TIMER1		171
+#define SRST_TIMER2		172
+#define SRST_TIMER3		173
+#define SRST_TIMER4		174
+#define SRST_TIMER5		175
+
+#define SRST_OTP_NS_P		176
+#define SRST_OTP_NS_SBPI	177
+#define SRST_OTP_NS_USR		178
+#define SRST_OTP_PHY_P		179
+#define SRST_OTP_PHY		180
+#define SRST_WDT_NS_P		181
+#define SRST_GPIO1_P		182
+#define SRST_GPIO2_P		183
+#define SRST_GPIO3_P		184
+#define SRST_SGRF_P		185
+#define SRST_GRF_P		186
+#define SRST_I2S0_RX		191
+
+#endif
-- 
cgit v1.2.3


From 4eefd62c17a9a5e7576207e84f3d2b4f73aba750 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Mon, 2 Jul 2018 10:06:51 -0700
Subject: include/rdma/opa_addr.h: Fix an endianness issue

IB_MULTICAST_LID_BASE is defined as follows:

  #define IB_MULTICAST_LID_BASE   cpu_to_be16(0xC000)

Hence use be16_to_cpu() to convert it to CPU endianness. Compile-tested
only.

Fixes: af808ece5ce9 ("IB/SA: Check dlid before SA agent queries for ClassPortInfo")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>
Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
Cc: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/opa_addr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h
index 2bbb7a67e643..66d4393d339c 100644
--- a/include/rdma/opa_addr.h
+++ b/include/rdma/opa_addr.h
@@ -120,7 +120,7 @@ static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr)
 	if (attr->type == RDMA_AH_ATTR_TYPE_IB) {
 		if (!rdma_ah_get_dlid(attr) ||
 		    rdma_ah_get_dlid(attr) >=
-		    be32_to_cpu(IB_MULTICAST_LID_BASE))
+		    be16_to_cpu(IB_MULTICAST_LID_BASE))
 			return false;
 	} else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) {
 		if (!rdma_ah_get_dlid(attr) ||
-- 
cgit v1.2.3


From 63453b59e41173241c4efe9335815f6432fa8586 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 20 Jun 2018 10:51:53 +0200
Subject: i2c: smbus: add unlocked __i2c_smbus_xfer variant

Removes all locking from i2c_smbus_xfer and renames it to __i2c_smbus_xfer,
then adds a new i2c_smbus_xfer function that simply grabs the lock while
calling the unlocked variant.

This is not perfectly equivalent, since i2c_smbus_xfer was callable from
atomic/irq context if you happened to end up emulating SMBus with an I2C
transfer, and that is no longer the case with this patch. It is unknown
(to me) if anything depends on that quirk, but it seems fragile enough to
simply break those cases and require them to call i2c_transfer directly
instead.

While at it, for consistency rename the 2nd to last argument (size) of
the i2c_smbus_xfer declaration to protocol and remove the surplus extern
marker.

Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-smbus.c | 28 ++++++++++++++++++++--------
 include/linux/i2c.h          | 11 ++++++++---
 2 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
index 51970bae3c4a..9cd66cabb84f 100644
--- a/drivers/i2c/i2c-core-smbus.c
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -463,7 +463,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
 			msg[num-1].len++;
 	}
 
-	status = i2c_transfer(adapter, msg, num);
+	status = __i2c_transfer(adapter, msg, num);
 	if (status < 0)
 		goto cleanup;
 	if (status != num) {
@@ -524,9 +524,24 @@ cleanup:
  * This executes an SMBus protocol operation, and returns a negative
  * errno code else zero on success.
  */
-s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
-		   char read_write, u8 command, int protocol,
-		   union i2c_smbus_data *data)
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write,
+		   u8 command, int protocol, union i2c_smbus_data *data)
+{
+	s32 res;
+
+	i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
+	res = __i2c_smbus_xfer(adapter, addr, flags, read_write,
+			       command, protocol, data);
+	i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
+
+	return res;
+}
+EXPORT_SYMBOL(i2c_smbus_xfer);
+
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write,
+		     u8 command, int protocol, union i2c_smbus_data *data)
 {
 	unsigned long orig_jiffies;
 	int try;
@@ -543,8 +558,6 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
 
 	if (adapter->algo->smbus_xfer) {
-		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
-
 		/* Retry automatically on arbitration loss */
 		orig_jiffies = jiffies;
 		for (res = 0, try = 0; try <= adapter->retries; try++) {
@@ -557,7 +570,6 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 				       orig_jiffies + adapter->timeout))
 				break;
 		}
-		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
 
 		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
 			goto trace;
@@ -579,7 +591,7 @@ trace:
 
 	return res;
 }
-EXPORT_SYMBOL(i2c_smbus_xfer);
+EXPORT_SYMBOL(__i2c_smbus_xfer);
 
 /**
  * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 254cd34eeae2..465afb092fa7 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -140,9 +140,14 @@ extern int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
    and probably just as fast.
    Note that we use i2c_adapter here, because you do not need a specific
    smbus adapter to call this function. */
-extern s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
-			  unsigned short flags, char read_write, u8 command,
-			  int size, union i2c_smbus_data *data);
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write, u8 command,
+		   int protocol, union i2c_smbus_data *data);
+
+/* Unlocked flavor */
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write, u8 command,
+		     int protocol, union i2c_smbus_data *data);
 
 /* Now follow the 'nice' access routines. These also document the calling
    conventions of i2c_smbus_xfer. */
-- 
cgit v1.2.3


From 5ccba64a560fa6ca06008d4001f5d46ebeb34b41 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Fri, 2 Feb 2018 10:14:49 +0800
Subject: ftrace: Nuke clear_ftrace_function

clear_ftrace_function is not used outside of ftrace.c and is not help to
use a function, so nuke it per Steve's suggestion.

Link: http://lkml.kernel.org/r/1517537689-34947-1-git-send-email-xieyisheng1@huawei.com

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 --
 kernel/trace/ftrace.c  | 13 +------------
 2 files changed, 1 insertion(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8154f4920fcb..ebb77674be90 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -223,7 +223,6 @@ extern enum ftrace_tracing_type_t ftrace_tracing_type;
  */
 int register_ftrace_function(struct ftrace_ops *ops);
 int unregister_ftrace_function(struct ftrace_ops *ops);
-void clear_ftrace_function(void);
 
 extern void ftrace_stub(unsigned long a0, unsigned long a1,
 			struct ftrace_ops *op, struct pt_regs *regs);
@@ -239,7 +238,6 @@ static inline int ftrace_nr_registered_ops(void)
 {
 	return 0;
 }
-static inline void clear_ftrace_function(void) { }
 static inline void ftrace_kill(void) { }
 static inline void ftrace_free_init_mem(void) { }
 static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index efed9c1cfb7e..caf9cbf35816 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -192,17 +192,6 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 	op->saved_func(ip, parent_ip, op, regs);
 }
 
-/**
- * clear_ftrace_function - reset the ftrace function
- *
- * This NULLs the ftrace function and in essence stops
- * tracing.  There may be lag
- */
-void clear_ftrace_function(void)
-{
-	ftrace_trace_function = ftrace_stub;
-}
-
 static void ftrace_sync(struct work_struct *work)
 {
 	/*
@@ -6689,7 +6678,7 @@ void ftrace_kill(void)
 {
 	ftrace_disabled = 1;
 	ftrace_enabled = 0;
-	clear_ftrace_function();
+	ftrace_trace_function = ftrace_stub;
 }
 
 /**
-- 
cgit v1.2.3


From 077772468ec141b22e1e7c0c58bc09e2f9dc8762 Mon Sep 17 00:00:00 2001
From: Wang Dongsheng <dongsheng.wang@hxt-semitech.com>
Date: Sun, 1 Jul 2018 23:15:46 -0700
Subject: net: phy: marvell: change default m88e1510 LED configuration

The m88e1121 LED default configuration does not apply m88e151x.
So add a function to relpace m88e1121 LED configuration.

Signed-off-by: Wang Dongsheng <dongsheng.wang@hxt-semitech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c   | 54 ++++++++++++++++++++++++++++++---------------
 include/linux/marvell_phy.h |  2 ++
 2 files changed, 38 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index b8f57e9b9379..1cd439bdf608 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -130,8 +130,9 @@
 #define MII_88E1318S_PHY_WOL_CTRL_CLEAR_WOL_STATUS		BIT(12)
 #define MII_88E1318S_PHY_WOL_CTRL_MAGIC_PACKET_MATCH_ENABLE	BIT(14)
 
-#define MII_88E1121_PHY_LED_CTRL	16
+#define MII_PHY_LED_CTRL	        16
 #define MII_88E1121_PHY_LED_DEF		0x0030
+#define MII_88E1510_PHY_LED_DEF		0x1177
 
 #define MII_M1011_PHY_STATUS		0x11
 #define MII_M1011_PHY_STATUS_1000	0x8000
@@ -632,8 +633,40 @@ error:
 	return err;
 }
 
+static void marvell_config_led(struct phy_device *phydev)
+{
+	u16 def_config;
+	int err;
+
+	switch (MARVELL_PHY_FAMILY_ID(phydev->phy_id)) {
+	/* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */
+	case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1121R):
+	case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1318S):
+		def_config = MII_88E1121_PHY_LED_DEF;
+		break;
+	/* Default PHY LED config:
+	 * LED[0] .. 1000Mbps Link
+	 * LED[1] .. 100Mbps Link
+	 * LED[2] .. Blink, Activity
+	 */
+	case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1510):
+		def_config = MII_88E1510_PHY_LED_DEF;
+		break;
+	default:
+		return;
+	}
+
+	err = phy_write_paged(phydev, MII_MARVELL_LED_PAGE, MII_PHY_LED_CTRL,
+			      def_config);
+	if (err < 0)
+		pr_warn("Fail to config marvell phy LED.\n");
+}
+
 static int marvell_config_init(struct phy_device *phydev)
 {
+	/* Set defalut LED */
+	marvell_config_led(phydev);
+
 	/* Set registers from marvell,reg-init DT property */
 	return marvell_of_reg_init(phydev);
 }
@@ -813,21 +846,6 @@ static int m88e1111_config_init(struct phy_device *phydev)
 	return genphy_soft_reset(phydev);
 }
 
-static int m88e1121_config_init(struct phy_device *phydev)
-{
-	int err;
-
-	/* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */
-	err = phy_write_paged(phydev, MII_MARVELL_LED_PAGE,
-			      MII_88E1121_PHY_LED_CTRL,
-			      MII_88E1121_PHY_LED_DEF);
-	if (err < 0)
-		return err;
-
-	/* Set marvell,reg-init configuration from device tree */
-	return marvell_config_init(phydev);
-}
-
 static int m88e1318_config_init(struct phy_device *phydev)
 {
 	if (phy_interrupt_is_valid(phydev)) {
@@ -841,7 +859,7 @@ static int m88e1318_config_init(struct phy_device *phydev)
 			return err;
 	}
 
-	return m88e1121_config_init(phydev);
+	return marvell_config_init(phydev);
 }
 
 static int m88e1510_config_init(struct phy_device *phydev)
@@ -2087,7 +2105,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = &m88e1121_probe,
-		.config_init = &m88e1121_config_init,
+		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 4f5f8c21e283..1eb6f244588d 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -27,6 +27,8 @@
  */
 #define MARVELL_PHY_ID_88E6390		0x01410f90
 
+#define MARVELL_PHY_FAMILY_ID(id)	((id) >> 4)
+
 /* struct phy_device dev_flags definitions */
 #define MARVELL_PHY_M1145_FLAGS_RESISTANCE	0x00000001
 #define MARVELL_PHY_M1118_DNS323_LEDS		0x00000002
-- 
cgit v1.2.3


From 69b9e1e07d98b57b972df3c44647ca8795284d39 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 2 Jul 2018 18:21:11 +0800
Subject: ipv4: add __ip_queue_xmit() that supports tos param

This patch introduces __ip_queue_xmit(), through which the callers
can pass tos param into it without having to set inet->tos. For
ipv6, ip6_xmit() already allows passing tclass parameter.

It's needed when some transport protocol doesn't use inet->tos,
like sctp's per transport dscp, which will be added in next patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h     | 9 ++++++++-
 net/ipv4/ip_output.c | 9 +++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 0d2281b4b27a..09da79d8ceea 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -148,7 +148,8 @@ void ip_send_check(struct iphdr *ip);
 int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 
-int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
+int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+		    __u8 tos);
 void ip_init(void);
 int ip_append_data(struct sock *sk, struct flowi4 *fl4,
 		   int getfrag(void *from, char *to, int offset, int len,
@@ -174,6 +175,12 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4,
 			    struct ipcm_cookie *ipc, struct rtable **rtp,
 			    struct inet_cork *cork, unsigned int flags);
 
+static inline int ip_queue_xmit(struct sock *sk, struct sk_buff *skb,
+				struct flowi *fl)
+{
+	return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
+}
+
 static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4)
 {
 	return __ip_make_skb(sk, fl4, &sk->sk_write_queue, &inet_sk(sk)->cork.base);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b3308e9d9762..188cc586e7ff 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -423,7 +423,8 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
 }
 
 /* Note: skb->sk can be different from sk, in case of tunnels */
-int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
+int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+		    __u8 tos)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct net *net = sock_net(sk);
@@ -462,7 +463,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
 					   inet->inet_dport,
 					   inet->inet_sport,
 					   sk->sk_protocol,
-					   RT_CONN_FLAGS(sk),
+					   RT_CONN_FLAGS_TOS(sk, tos),
 					   sk->sk_bound_dev_if);
 		if (IS_ERR(rt))
 			goto no_route;
@@ -478,7 +479,7 @@ packet_routed:
 	skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
-	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (tos & 0xff));
 	if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
 		iph->frag_off = htons(IP_DF);
 	else
@@ -511,7 +512,7 @@ no_route:
 	kfree_skb(skb);
 	return -EHOSTUNREACH;
 }
-EXPORT_SYMBOL(ip_queue_xmit);
+EXPORT_SYMBOL(__ip_queue_xmit);
 
 static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 {
-- 
cgit v1.2.3


From 8a9c58d28d0f66569737a3295116710ed24573cd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 2 Jul 2018 18:21:12 +0800
Subject: sctp: add support for dscp and flowlabel per transport

Like some other per transport params, flowlabel and dscp are added
in transport, asoc and sctp_sock. By default, transport sets its
value from asoc's, and asoc does it from sctp_sock. flowlabel
only works for ipv6 transport.

Other than that they need to be passed down in sctp_xmit, flow4/6
also needs to set them before looking up route in get_dst.

Note that it uses '& 0x100000' to check if flowlabel is set and
'& 0x1' (tos 1st bit is unused) to check if dscp is set by users,
so that they could be set to 0 by sockopt in next patch.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h       |  7 +++++++
 include/net/sctp/structs.h |  9 +++++++++
 net/sctp/associola.c       |  7 +++++++
 net/sctp/ipv6.c            | 11 +++++++++--
 net/sctp/protocol.c        | 16 ++++++++++++----
 5 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index b36c76635f18..83d94341e003 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -801,4 +801,11 @@ struct sctp_strreset_resptsn {
 	__be32 receivers_next_tsn;
 };
 
+enum {
+	SCTP_DSCP_SET_MASK = 0x1,
+	SCTP_DSCP_VAL_MASK = 0xfc,
+	SCTP_FLOWLABEL_SET_MASK = 0x100000,
+	SCTP_FLOWLABEL_VAL_MASK = 0xfffff
+};
+
 #endif /* __LINUX_SCTP_H__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 701a51736fa5..ab869e0d8326 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -193,6 +193,9 @@ struct sctp_sock {
 	/* This is the max_retrans value for new associations. */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* The initial Path MTU to use for new associations. */
 	__u32 pathmtu;
 
@@ -895,6 +898,9 @@ struct sctp_transport {
 	 */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* This is the partially failed retrans value for the transport
 	 * and will be initialized from the assocs value.  This can be changed
 	 * using the SCTP_PEER_ADDR_THLDS socket option
@@ -1772,6 +1778,9 @@ struct sctp_association {
 	 */
 	__u16 pathmaxrxt;
 
+	__u32 flowlabel;
+	__u8  dscp;
+
 	/* Flag that path mtu update is pending */
 	__u8   pmtu_pending;
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5d5a16204d50..16ecfbc95614 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -115,6 +115,9 @@ static struct sctp_association *sctp_association_init(
 	/* Initialize path max retrans value. */
 	asoc->pathmaxrxt = sp->pathmaxrxt;
 
+	asoc->flowlabel = sp->flowlabel;
+	asoc->dscp = sp->dscp;
+
 	/* Initialize default path MTU. */
 	asoc->pathmtu = sp->pathmtu;
 
@@ -647,6 +650,10 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	peer->sackdelay = asoc->sackdelay;
 	peer->sackfreq = asoc->sackfreq;
 
+	if (addr->sa.sa_family == AF_INET6)
+		peer->flowlabel = asoc->flowlabel;
+	peer->dscp = asoc->dscp;
+
 	/* Enable/disable heartbeat, SACK delay, and path MTU discovery
 	 * based on association setting.
 	 */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0cd2e764f47f..38102bf7f13e 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,12 +209,17 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	struct sock *sk = skb->sk;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi6 *fl6 = &transport->fl.u.ip6;
+	__u8 tclass = np->tclass;
 	int res;
 
 	pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
 		 skb->len, &fl6->saddr, &fl6->daddr);
 
-	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+	if (transport->dscp & SCTP_DSCP_SET_MASK)
+		tclass = transport->dscp & SCTP_DSCP_VAL_MASK;
+
+	if (INET_ECN_is_capable(tclass))
+		IP6_ECN_flow_xmit(sk, fl6->flowlabel);
 
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
 		skb->ignore_df = 1;
@@ -223,7 +228,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
 	rcu_read_lock();
 	res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
-		       np->tclass);
+		       tclass);
 	rcu_read_unlock();
 	return res;
 }
@@ -254,6 +259,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		fl6->flowi6_oif = daddr->v6.sin6_scope_id;
 	else if (asoc)
 		fl6->flowi6_oif = asoc->base.sk->sk_bound_dev_if;
+	if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK)
+		fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK);
 
 	pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr);
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 67f73d3a1356..e948db29ab53 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -426,13 +426,16 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	struct dst_entry *dst = NULL;
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
+	__u8 tos = inet_sk(sk)->tos;
 
+	if (t->dscp & SCTP_DSCP_SET_MASK)
+		tos = t->dscp & SCTP_DSCP_VAL_MASK;
 	memset(fl4, 0x0, sizeof(struct flowi4));
 	fl4->daddr  = daddr->v4.sin_addr.s_addr;
 	fl4->fl4_dport = daddr->v4.sin_port;
 	fl4->flowi4_proto = IPPROTO_SCTP;
 	if (asoc) {
-		fl4->flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
+		fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
 		fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
 		fl4->fl4_sport = htons(asoc->base.bind_addr.port);
 	}
@@ -495,7 +498,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		fl4->fl4_sport = laddr->a.v4.sin_port;
 		flowi4_update_output(fl4,
 				     asoc->base.sk->sk_bound_dev_if,
-				     RT_CONN_FLAGS(asoc->base.sk),
+				     RT_CONN_FLAGS_TOS(asoc->base.sk, tos),
 				     daddr->v4.sin_addr.s_addr,
 				     laddr->a.v4.sin_addr.s_addr);
 
@@ -971,16 +974,21 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
 			       struct sctp_transport *transport)
 {
 	struct inet_sock *inet = inet_sk(skb->sk);
+	__u8 dscp = inet->tos;
 
 	pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb,
-		 skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr);
+		 skb->len, &transport->fl.u.ip4.saddr,
+		 &transport->fl.u.ip4.daddr);
+
+	if (transport->dscp & SCTP_DSCP_SET_MASK)
+		dscp = transport->dscp & SCTP_DSCP_VAL_MASK;
 
 	inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
 			 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
 
 	SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
 
-	return ip_queue_xmit(&inet->sk, skb, &transport->fl);
+	return __ip_queue_xmit(&inet->sk, skb, &transport->fl, dscp);
 }
 
 static struct sctp_af sctp_af_inet;
-- 
cgit v1.2.3


From 0b0dce7a36fb9f1a9dd8245ea82d3a268c6943fe Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 2 Jul 2018 18:21:13 +0800
Subject: sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams

spp_ipv6_flowlabel and spp_dscp are added in sctp_paddrparams in
this patch so that users could set sctp_sock/asoc/transport dscp
and flowlabel with spp_flags SPP_IPV6_FLOWLABEL or SPP_DSCP by
SCTP_PEER_ADDR_PARAMS , as described section 8.1.12 in RFC6458.

As said in last patch, it uses '| 0x100000' or '|0x1' to mark
flowlabel or dscp is set,  so that their values could be set
to 0.

Note that to guarantee that an old app built with old kernel
headers could work on the newer kernel, the param's check in
sctp_g/setsockopt_peer_addr_params() is also improved, which
follows the way that sctp_g/setsockopt_delayed_ack() or some
other sockopts' process that accept two types of params does.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h |   4 ++
 net/sctp/socket.c         | 177 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 175 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index c02986a284db..b479db5c71d9 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -763,6 +763,8 @@ enum  sctp_spp_flags {
 	SPP_SACKDELAY_DISABLE = 1<<6,	/*Disable SACK*/
 	SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE,
 	SPP_HB_TIME_IS_ZERO = 1<<7,	/* Set HB delay to 0 */
+	SPP_IPV6_FLOWLABEL = 1<<8,
+	SPP_DSCP = 1<<9,
 };
 
 struct sctp_paddrparams {
@@ -773,6 +775,8 @@ struct sctp_paddrparams {
 	__u32			spp_pathmtu;
 	__u32			spp_sackdelay;
 	__u32			spp_flags;
+	__u32			spp_ipv6_flowlabel;
+	__u8			spp_dscp;
 } __attribute__((packed, aligned(4)));
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0e4c8332771a..50b7ef975b42 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2393,6 +2393,8 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *     uint32_t                spp_pathmtu;
  *     uint32_t                spp_sackdelay;
  *     uint32_t                spp_flags;
+ *     uint32_t                spp_ipv6_flowlabel;
+ *     uint8_t                 spp_dscp;
  * };
  *
  *   spp_assoc_id    - (one-to-many style socket) This is filled in the
@@ -2472,6 +2474,45 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *                     also that this field is mutually exclusive to
  *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
  *                     results.
+ *
+ *                     SPP_IPV6_FLOWLABEL:  Setting this flag enables the
+ *                     setting of the IPV6 flow label value.  The value is
+ *                     contained in the spp_ipv6_flowlabel field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_ipv6_flowlabel field has a valid value returned.
+ *                     If a specific destination address is set (in the
+ *                     spp_address field), then the value returned is that of
+ *                     the address.  If just an association is specified (and
+ *                     no address), then the association's default flow label
+ *                     is returned.  If neither an association nor a destination
+ *                     is specified, then the socket's default flow label is
+ *                     returned.  For non-IPv6 sockets, this flag will be left
+ *                     cleared.
+ *
+ *                     SPP_DSCP:  Setting this flag enables the setting of the
+ *                     Differentiated Services Code Point (DSCP) value
+ *                     associated with either the association or a specific
+ *                     address.  The value is obtained in the spp_dscp field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_dscp field has a valid value returned.  If a
+ *                     specific destination address is set when called (in the
+ *                     spp_address field), then that specific destination
+ *                     address's DSCP value is returned.  If just an association
+ *                     is specified, then the association's default DSCP is
+ *                     returned.  If neither an association nor a destination is
+ *                     specified, then the socket's default DSCP is returned.
+ *
+ *   spp_ipv6_flowlabel
+ *                   - This field is used in conjunction with the
+ *                     SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label.
+ *                     The 20 least significant bits are used for the flow
+ *                     label.  This setting has precedence over any IPv6-layer
+ *                     setting.
+ *
+ *   spp_dscp        - This field is used in conjunction with the SPP_DSCP flag
+ *                     and contains the DSCP.  The 6 most significant bits are
+ *                     used for the DSCP.  This setting has precedence over any
+ *                     IPv4- or IPv6- layer setting.
  */
 static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 				       struct sctp_transport   *trans,
@@ -2611,6 +2652,51 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 		}
 	}
 
+	if (params->spp_flags & SPP_IPV6_FLOWLABEL) {
+		if (trans && trans->ipaddr.sa.sa_family == AF_INET6) {
+			trans->flowlabel = params->spp_ipv6_flowlabel &
+					   SCTP_FLOWLABEL_VAL_MASK;
+			trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		} else if (asoc) {
+			list_for_each_entry(trans,
+					    &asoc->peer.transport_addr_list,
+					    transports) {
+				if (trans->ipaddr.sa.sa_family != AF_INET6)
+					continue;
+				trans->flowlabel = params->spp_ipv6_flowlabel &
+						   SCTP_FLOWLABEL_VAL_MASK;
+				trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+			}
+			asoc->flowlabel = params->spp_ipv6_flowlabel &
+					  SCTP_FLOWLABEL_VAL_MASK;
+			asoc->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		} else if (sctp_opt2sk(sp)->sk_family == AF_INET6) {
+			sp->flowlabel = params->spp_ipv6_flowlabel &
+					SCTP_FLOWLABEL_VAL_MASK;
+			sp->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		}
+	}
+
+	if (params->spp_flags & SPP_DSCP) {
+		if (trans) {
+			trans->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+			trans->dscp |= SCTP_DSCP_SET_MASK;
+		} else if (asoc) {
+			list_for_each_entry(trans,
+					    &asoc->peer.transport_addr_list,
+					    transports) {
+				trans->dscp = params->spp_dscp &
+					      SCTP_DSCP_VAL_MASK;
+				trans->dscp |= SCTP_DSCP_SET_MASK;
+			}
+			asoc->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+			asoc->dscp |= SCTP_DSCP_SET_MASK;
+		} else {
+			sp->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+			sp->dscp |= SCTP_DSCP_SET_MASK;
+		}
+	}
+
 	return 0;
 }
 
@@ -2625,11 +2711,18 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
 	int error;
 	int hb_change, pmtud_change, sackdelay_change;
 
-	if (optlen != sizeof(struct sctp_paddrparams))
+	if (optlen == sizeof(params)) {
+		if (copy_from_user(&params, optval, optlen))
+			return -EFAULT;
+	} else if (optlen == ALIGN(offsetof(struct sctp_paddrparams,
+					    spp_ipv6_flowlabel), 4)) {
+		if (copy_from_user(&params, optval, optlen))
+			return -EFAULT;
+		if (params.spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL))
+			return -EINVAL;
+	} else {
 		return -EINVAL;
-
-	if (copy_from_user(&params, optval, optlen))
-		return -EFAULT;
+	}
 
 	/* Validate flags and value parameters. */
 	hb_change        = params.spp_flags & SPP_HB;
@@ -5453,6 +5546,45 @@ out:
  *                     also that this field is mutually exclusive to
  *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
  *                     results.
+ *
+ *                     SPP_IPV6_FLOWLABEL:  Setting this flag enables the
+ *                     setting of the IPV6 flow label value.  The value is
+ *                     contained in the spp_ipv6_flowlabel field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_ipv6_flowlabel field has a valid value returned.
+ *                     If a specific destination address is set (in the
+ *                     spp_address field), then the value returned is that of
+ *                     the address.  If just an association is specified (and
+ *                     no address), then the association's default flow label
+ *                     is returned.  If neither an association nor a destination
+ *                     is specified, then the socket's default flow label is
+ *                     returned.  For non-IPv6 sockets, this flag will be left
+ *                     cleared.
+ *
+ *                     SPP_DSCP:  Setting this flag enables the setting of the
+ *                     Differentiated Services Code Point (DSCP) value
+ *                     associated with either the association or a specific
+ *                     address.  The value is obtained in the spp_dscp field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_dscp field has a valid value returned.  If a
+ *                     specific destination address is set when called (in the
+ *                     spp_address field), then that specific destination
+ *                     address's DSCP value is returned.  If just an association
+ *                     is specified, then the association's default DSCP is
+ *                     returned.  If neither an association nor a destination is
+ *                     specified, then the socket's default DSCP is returned.
+ *
+ *   spp_ipv6_flowlabel
+ *                   - This field is used in conjunction with the
+ *                     SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label.
+ *                     The 20 least significant bits are used for the flow
+ *                     label.  This setting has precedence over any IPv6-layer
+ *                     setting.
+ *
+ *   spp_dscp        - This field is used in conjunction with the SPP_DSCP flag
+ *                     and contains the DSCP.  The 6 most significant bits are
+ *                     used for the DSCP.  This setting has precedence over any
+ *                     IPv4- or IPv6- layer setting.
  */
 static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 					    char __user *optval, int __user *optlen)
@@ -5462,9 +5594,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 	struct sctp_association *asoc = NULL;
 	struct sctp_sock        *sp = sctp_sk(sk);
 
-	if (len < sizeof(struct sctp_paddrparams))
+	if (len >= sizeof(params))
+		len = sizeof(params);
+	else if (len >= ALIGN(offsetof(struct sctp_paddrparams,
+				       spp_ipv6_flowlabel), 4))
+		len = ALIGN(offsetof(struct sctp_paddrparams,
+				     spp_ipv6_flowlabel), 4);
+	else
 		return -EINVAL;
-	len = sizeof(struct sctp_paddrparams);
+
 	if (copy_from_user(&params, optval, len))
 		return -EFAULT;
 
@@ -5499,6 +5637,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
 		/*draft-11 doesn't say what to return in spp_flags*/
 		params.spp_flags      = trans->param_flags;
+		if (trans->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+			params.spp_ipv6_flowlabel = trans->flowlabel &
+						    SCTP_FLOWLABEL_VAL_MASK;
+			params.spp_flags |= SPP_IPV6_FLOWLABEL;
+		}
+		if (trans->dscp & SCTP_DSCP_SET_MASK) {
+			params.spp_dscp	= trans->dscp & SCTP_DSCP_VAL_MASK;
+			params.spp_flags |= SPP_DSCP;
+		}
 	} else if (asoc) {
 		/* Fetch association values. */
 		params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval);
@@ -5508,6 +5655,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
 		/*draft-11 doesn't say what to return in spp_flags*/
 		params.spp_flags      = asoc->param_flags;
+		if (asoc->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+			params.spp_ipv6_flowlabel = asoc->flowlabel &
+						    SCTP_FLOWLABEL_VAL_MASK;
+			params.spp_flags |= SPP_IPV6_FLOWLABEL;
+		}
+		if (asoc->dscp & SCTP_DSCP_SET_MASK) {
+			params.spp_dscp	= asoc->dscp & SCTP_DSCP_VAL_MASK;
+			params.spp_flags |= SPP_DSCP;
+		}
 	} else {
 		/* Fetch socket values. */
 		params.spp_hbinterval = sp->hbinterval;
@@ -5517,6 +5673,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
 		/*draft-11 doesn't say what to return in spp_flags*/
 		params.spp_flags      = sp->param_flags;
+		if (sp->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+			params.spp_ipv6_flowlabel = sp->flowlabel &
+						    SCTP_FLOWLABEL_VAL_MASK;
+			params.spp_flags |= SPP_IPV6_FLOWLABEL;
+		}
+		if (sp->dscp & SCTP_DSCP_SET_MASK) {
+			params.spp_dscp	= sp->dscp & SCTP_DSCP_VAL_MASK;
+			params.spp_flags |= SPP_DSCP;
+		}
 	}
 
 	if (copy_to_user(optval, &params, len))
-- 
cgit v1.2.3


From f6ad8c1bcdf014272d08c55b9469536952a0a771 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 2 Jul 2018 16:12:45 +0100
Subject: net: core: trivial netif_receive_skb_list() entry point

Just calls netif_receive_skb() in a loop.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 64480a0f2c16..f67258f057ca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3477,6 +3477,7 @@ int netif_rx(struct sk_buff *skb);
 int netif_rx_ni(struct sk_buff *skb);
 int netif_receive_skb(struct sk_buff *skb);
 int netif_receive_skb_core(struct sk_buff *skb);
+void netif_receive_skb_list(struct list_head *head);
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
 void napi_gro_flush(struct napi_struct *napi, bool flush_old);
 struct sk_buff *napi_get_frags(struct napi_struct *napi);
diff --git a/net/core/dev.c b/net/core/dev.c
index 08d58e0debe5..85c456a4b551 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4906,6 +4906,25 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
+/**
+ *	netif_receive_skb_list - process many receive buffers from network
+ *	@head: list of skbs to process.
+ *
+ *	For now, just calls netif_receive_skb() in a loop, ignoring the
+ *	return value.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ */
+void netif_receive_skb_list(struct list_head *head)
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list)
+		netif_receive_skb(skb);
+}
+EXPORT_SYMBOL(netif_receive_skb_list);
+
 DEFINE_PER_CPU(struct work_struct, flush_works);
 
 /* Network device is going away, flush any packets still pending */
-- 
cgit v1.2.3


From 920572b73280a29e3a9f58807a8b90051b19ee60 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 2 Jul 2018 16:13:11 +0100
Subject: net: core: unwrap skb list receive slightly further

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/net.h | 7 +++++++
 net/core/dev.c             | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 9c886739246a..00aa72ce0e7c 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -223,6 +223,13 @@ DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_entry,
 	TP_ARGS(skb)
 );
 
+DEFINE_EVENT(net_dev_rx_verbose_template, netif_receive_skb_list_entry,
+
+	TP_PROTO(const struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+
 DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_entry,
 
 	TP_PROTO(const struct sk_buff *skb),
diff --git a/net/core/dev.c b/net/core/dev.c
index 85c456a4b551..308acfd48139 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4920,8 +4920,10 @@ void netif_receive_skb_list(struct list_head *head)
 {
 	struct sk_buff *skb, *next;
 
+	list_for_each_entry(skb, head, list)
+		trace_netif_receive_skb_list_entry(skb);
 	list_for_each_entry_safe(skb, next, head, list)
-		netif_receive_skb(skb);
+		netif_receive_skb_internal(skb);
 }
 EXPORT_SYMBOL(netif_receive_skb_list);
 
-- 
cgit v1.2.3


From 4ce0017a373afaaa9ef17614d8fa4f6fde261d18 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 2 Jul 2018 16:13:40 +0100
Subject: net: core: another layer of lists, around PF_MEMALLOC skb handling

First example of a layer splitting the list (rather than merely taking
 individual packets off it).
Involves new list.h function, list_cut_before(), like list_cut_position()
 but cuts on the other side of the given entry.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list.h | 30 ++++++++++++++++++++++++++++++
 net/core/dev.c       | 44 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 66 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index 4b129df4d46b..de04cc5ed536 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -285,6 +285,36 @@ static inline void list_cut_position(struct list_head *list,
 		__list_cut_position(list, head, entry);
 }
 
+/**
+ * list_cut_before - cut a list into two, before given entry
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ *
+ * This helper moves the initial part of @head, up to but
+ * excluding @entry, from @head to @list.  You should pass
+ * in @entry an element you know is on @head.  @list should
+ * be an empty list or a list you do not care about losing
+ * its data.
+ * If @entry == @head, all entries on @head are moved to
+ * @list.
+ */
+static inline void list_cut_before(struct list_head *list,
+				   struct list_head *head,
+				   struct list_head *entry)
+{
+	if (head->next == entry) {
+		INIT_LIST_HEAD(list);
+		return;
+	}
+	list->next = head->next;
+	list->next->prev = list;
+	list->prev = entry->prev;
+	list->prev->next = list;
+	head->next = entry;
+	entry->prev = head;
+}
+
 static inline void __list_splice(const struct list_head *list,
 				 struct list_head *prev,
 				 struct list_head *next)
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e87361df2ab..9aadef976e8c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4784,6 +4784,14 @@ int netif_receive_skb_core(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb_core);
 
+static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list)
+		__netif_receive_skb_core(skb, pfmemalloc);
+}
+
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	int ret;
@@ -4809,6 +4817,34 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	return ret;
 }
 
+static void __netif_receive_skb_list(struct list_head *head)
+{
+	unsigned long noreclaim_flag = 0;
+	struct sk_buff *skb, *next;
+	bool pfmemalloc = false; /* Is current sublist PF_MEMALLOC? */
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
+			struct list_head sublist;
+
+			/* Handle the previous sublist */
+			list_cut_before(&sublist, head, &skb->list);
+			__netif_receive_skb_list_core(&sublist, pfmemalloc);
+			pfmemalloc = !pfmemalloc;
+			/* See comments in __netif_receive_skb */
+			if (pfmemalloc)
+				noreclaim_flag = memalloc_noreclaim_save();
+			else
+				memalloc_noreclaim_restore(noreclaim_flag);
+		}
+	}
+	/* Handle the remaining sublist */
+	__netif_receive_skb_list_core(head, pfmemalloc);
+	/* Restore pflags */
+	if (pfmemalloc)
+		memalloc_noreclaim_restore(noreclaim_flag);
+}
+
 static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@ -4843,14 +4879,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 	return ret;
 }
 
-static void __netif_receive_skb_list(struct list_head *head)
-{
-	struct sk_buff *skb, *next;
-
-	list_for_each_entry_safe(skb, next, head, list)
-		__netif_receive_skb(skb);
-}
-
 static int netif_receive_skb_internal(struct sk_buff *skb)
 {
 	int ret;
-- 
cgit v1.2.3


From 17266ee939849cb095ed7dd9edbec4162172226b Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 2 Jul 2018 16:14:12 +0100
Subject: net: ipv4: listified version of ip_rcv

Also involved adding a way to run a netfilter hook over a list of packets.
 Rather than attempting to make netfilter know about lists (which would be
 a major project in itself) we just let it call the regular okfn (in this
 case ip_rcv_finish()) for any packets it steals, and have it give us back
 a list of packets it's synchronously accepted (which normally NF_HOOK
 would automatically call okfn() on, but we want to be able to potentially
 pass the list to a listified version of okfn().)
The netfilter hooks themselves are indirect calls that still happen per-
 packet (see nf_hook_entry_hookfn()), but again, changing that can be left
 for future work.

There is potential for out-of-order receives if the netfilter hook ends up
 synchronously stealing packets, as they will be processed before any
 accepts earlier in the list.  However, it was already possible for an
 asynchronous accept to cause out-of-order receives, so presumably this is
 considered OK.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 include/linux/netfilter.h | 22 +++++++++++++++
 include/net/ip.h          |  2 ++
 net/core/dev.c            |  8 +++---
 net/ipv4/af_inet.c        |  1 +
 net/ipv4/ip_input.c       | 68 ++++++++++++++++++++++++++++++++++++++++++-----
 6 files changed, 94 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f67258f057ca..c1ef749b6f9f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2297,6 +2297,9 @@ struct packet_type {
 					 struct net_device *,
 					 struct packet_type *,
 					 struct net_device *);
+	void			(*list_func) (struct list_head *,
+					      struct packet_type *,
+					      struct net_device *);
 	bool			(*id_match)(struct packet_type *ptype,
 					    struct sock *sk);
 	void			*af_packet_priv;
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index dd2052f0efb7..5a5e0a2ab2a3 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -288,6 +288,20 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
 	return ret;
 }
 
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+	     struct list_head *head, struct net_device *in, struct net_device *out,
+	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
+		if (ret != 1)
+			list_del(&skb->list);
+	}
+}
+
 /* Call setsockopt() */
 int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
 		  unsigned int len);
@@ -369,6 +383,14 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	return okfn(net, sk, skb);
 }
 
+static inline void
+NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
+	     struct list_head *head, struct net_device *in, struct net_device *out,
+	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+{
+	/* nothing to do */
+}
+
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct sock *sk, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
diff --git a/include/net/ip.h b/include/net/ip.h
index 09da79d8ceea..99d1b835d2aa 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -138,6 +138,8 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
 			  struct ip_options_rcu *opt);
 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	   struct net_device *orig_dev);
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+		 struct net_device *orig_dev);
 int ip_local_deliver(struct sk_buff *skb);
 int ip_mr_input(struct sk_buff *skb);
 int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1bc485bb0678..5e22719ce71d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4806,9 +4806,11 @@ static inline void __netif_receive_skb_list_ptype(struct list_head *head,
 		return;
 	if (list_empty(head))
 		return;
-
-	list_for_each_entry_safe(skb, next, head, list)
-		pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+	if (pt_prev->list_func != NULL)
+		pt_prev->list_func(head, pt_prev, orig_dev);
+	else
+		list_for_each_entry_safe(skb, next, head, list)
+			pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
 static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9263a2c114e0..c716be13d58c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1882,6 +1882,7 @@ fs_initcall(ipv4_offload_init);
 static struct packet_type ip_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
 	.func = ip_rcv,
+	.list_func = ip_list_rcv,
 };
 
 static int __init inet_init(void)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 7582713dd18f..914240830bdf 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -408,10 +408,9 @@ drop_error:
 /*
  * 	Main IP Receive routine.
  */
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 {
 	const struct iphdr *iph;
-	struct net *net;
 	u32 len;
 
 	/* When the interface is in promisc. mode, drop all the crap
@@ -421,7 +420,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		goto drop;
 
 
-	net = dev_net(dev);
 	__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
@@ -489,9 +487,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
-		       net, NULL, skb, dev, NULL,
-		       ip_rcv_finish);
+	return skb;
 
 csum_error:
 	__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
@@ -500,5 +496,63 @@ inhdr_error:
 drop:
 	kfree_skb(skb);
 out:
-	return NET_RX_DROP;
+	return NULL;
+}
+
+/*
+ * IP receive entry point
+ */
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
+	   struct net_device *orig_dev)
+{
+	struct net *net = dev_net(dev);
+
+	skb = ip_rcv_core(skb, net);
+	if (skb == NULL)
+		return NET_RX_DROP;
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
+		       ip_rcv_finish);
+}
+
+static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
+			   struct net *net)
+{
+	struct sk_buff *skb, *next;
+
+	NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
+		     head, dev, NULL, ip_rcv_finish);
+	list_for_each_entry_safe(skb, next, head, list)
+		ip_rcv_finish(net, NULL, skb);
+}
+
+/* Receive a list of IP packets */
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+		 struct net_device *orig_dev)
+{
+	struct net_device *curr_dev = NULL;
+	struct net *curr_net = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct net_device *dev = skb->dev;
+		struct net *net = dev_net(dev);
+
+		skb = ip_rcv_core(skb, net);
+		if (skb == NULL)
+			continue;
+
+		if (curr_dev != dev || curr_net != net) {
+			/* dispatch old sublist */
+			list_cut_before(&sublist, head, &skb->list);
+			if (!list_empty(&sublist))
+				ip_sublist_rcv(&sublist, dev, net);
+			/* start new sublist */
+			curr_dev = dev;
+			curr_net = net;
+		}
+	}
+	/* dispatch final sublist */
+	ip_sublist_rcv(head, curr_dev, curr_net);
 }
-- 
cgit v1.2.3


From 06635894a371be69373fcb5484f559284c8d1ac3 Mon Sep 17 00:00:00 2001
From: Sameer Nanda <snanda@chromium.org>
Date: Wed, 2 May 2018 17:44:16 +0200
Subject: mfd: cros_ec: Add USBPD charger commands and struct definitions.

The USBPD charger driver gets information from the ChromeOS EC, this
patch adds the USBPD charger definitions needed by this driver.

Signed-off-by: Sameer Nanda <snanda@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec_commands.h | 132 +++++++++++++++++++++++++++++++++--
 1 file changed, 128 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index f2edd9969b40..0d926492ac3a 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2593,14 +2593,18 @@ struct ec_params_current_limit {
 } __packed;
 
 /*
- * Set maximum external power current.
+ * Set maximum external voltage / current.
  */
-#define EC_CMD_EXT_POWER_CURRENT_LIMIT 0xa2
+#define EC_CMD_EXTERNAL_POWER_LIMIT 0x00A2
 
-struct ec_params_ext_power_current_limit {
-	uint32_t limit; /* in mA */
+/* Command v0 is used only on Spring and is obsolete + unsupported */
+struct ec_params_external_power_limit_v1 {
+	uint16_t current_lim; /* in mA, or EC_POWER_LIMIT_NONE to clear limit */
+	uint16_t voltage_lim; /* in mV, or EC_POWER_LIMIT_NONE to clear limit */
 } __packed;
 
+#define EC_POWER_LIMIT_NONE 0xffff
+
 /* Inform the EC when entering a sleep state */
 #define EC_CMD_HOST_SLEEP_EVENT 0xa9
 
@@ -2974,6 +2978,12 @@ enum usb_chg_type {
 	USB_CHG_TYPE_VBUS,
 	USB_CHG_TYPE_UNKNOWN,
 };
+enum usb_power_roles {
+	USB_PD_PORT_POWER_DISCONNECTED,
+	USB_PD_PORT_POWER_SOURCE,
+	USB_PD_PORT_POWER_SINK,
+	USB_PD_PORT_POWER_SINK_NOT_CHARGING,
+};
 
 struct usb_chg_measures {
 	uint16_t voltage_max;
@@ -2991,6 +3001,120 @@ struct ec_response_usb_pd_power_info {
 	uint32_t max_power;
 } __packed;
 
+struct ec_params_usb_pd_info_request {
+	uint8_t port;
+} __packed;
+
+/* Read USB-PD Device discovery info */
+#define EC_CMD_USB_PD_DISCOVERY 0x0113
+struct ec_params_usb_pd_discovery_entry {
+	uint16_t vid;  /* USB-IF VID */
+	uint16_t pid;  /* USB-IF PID */
+	uint8_t ptype; /* product type (hub,periph,cable,ama) */
+} __packed;
+
+/* Override default charge behavior */
+#define EC_CMD_PD_CHARGE_PORT_OVERRIDE 0x0114
+
+/* Negative port parameters have special meaning */
+enum usb_pd_override_ports {
+	OVERRIDE_DONT_CHARGE = -2,
+	OVERRIDE_OFF = -1,
+	/* [0, CONFIG_USB_PD_PORT_COUNT): Port# */
+};
+
+struct ec_params_charge_port_override {
+	int16_t override_port; /* Override port# */
+} __packed;
+
+/* Read (and delete) one entry of PD event log */
+#define EC_CMD_PD_GET_LOG_ENTRY 0x0115
+
+struct ec_response_pd_log {
+	uint32_t timestamp; /* relative timestamp in milliseconds */
+	uint8_t type;       /* event type : see PD_EVENT_xx below */
+	uint8_t size_port;  /* [7:5] port number [4:0] payload size in bytes */
+	uint16_t data;      /* type-defined data payload */
+	uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */
+} __packed;
+
+/* The timestamp is the microsecond counter shifted to get about a ms. */
+#define PD_LOG_TIMESTAMP_SHIFT 10 /* 1 LSB = 1024us */
+
+#define PD_LOG_SIZE_MASK  0x1f
+#define PD_LOG_PORT_MASK  0xe0
+#define PD_LOG_PORT_SHIFT    5
+#define PD_LOG_PORT_SIZE(port, size) (((port) << PD_LOG_PORT_SHIFT) | \
+				      ((size) & PD_LOG_SIZE_MASK))
+#define PD_LOG_PORT(size_port) ((size_port) >> PD_LOG_PORT_SHIFT)
+#define PD_LOG_SIZE(size_port) ((size_port) & PD_LOG_SIZE_MASK)
+
+/* PD event log : entry types */
+/* PD MCU events */
+#define PD_EVENT_MCU_BASE       0x00
+#define PD_EVENT_MCU_CHARGE             (PD_EVENT_MCU_BASE+0)
+#define PD_EVENT_MCU_CONNECT            (PD_EVENT_MCU_BASE+1)
+/* Reserved for custom board event */
+#define PD_EVENT_MCU_BOARD_CUSTOM       (PD_EVENT_MCU_BASE+2)
+/* PD generic accessory events */
+#define PD_EVENT_ACC_BASE       0x20
+#define PD_EVENT_ACC_RW_FAIL   (PD_EVENT_ACC_BASE+0)
+#define PD_EVENT_ACC_RW_ERASE  (PD_EVENT_ACC_BASE+1)
+/* PD power supply events */
+#define PD_EVENT_PS_BASE        0x40
+#define PD_EVENT_PS_FAULT      (PD_EVENT_PS_BASE+0)
+/* PD video dongles events */
+#define PD_EVENT_VIDEO_BASE     0x60
+#define PD_EVENT_VIDEO_DP_MODE (PD_EVENT_VIDEO_BASE+0)
+#define PD_EVENT_VIDEO_CODEC   (PD_EVENT_VIDEO_BASE+1)
+/* Returned in the "type" field, when there is no entry available */
+#define PD_EVENT_NO_ENTRY       0xff
+
+/*
+ * PD_EVENT_MCU_CHARGE event definition :
+ * the payload is "struct usb_chg_measures"
+ * the data field contains the port state flags as defined below :
+ */
+/* Port partner is a dual role device */
+#define CHARGE_FLAGS_DUAL_ROLE         BIT(15)
+/* Port is the pending override port */
+#define CHARGE_FLAGS_DELAYED_OVERRIDE  BIT(14)
+/* Port is the override port */
+#define CHARGE_FLAGS_OVERRIDE          BIT(13)
+/* Charger type */
+#define CHARGE_FLAGS_TYPE_SHIFT               3
+#define CHARGE_FLAGS_TYPE_MASK       (0xf << CHARGE_FLAGS_TYPE_SHIFT)
+/* Power delivery role */
+#define CHARGE_FLAGS_ROLE_MASK         (7 <<  0)
+
+/*
+ * PD_EVENT_PS_FAULT data field flags definition :
+ */
+#define PS_FAULT_OCP                          1
+#define PS_FAULT_FAST_OCP                     2
+#define PS_FAULT_OVP                          3
+#define PS_FAULT_DISCH                        4
+
+/*
+ * PD_EVENT_VIDEO_CODEC payload is "struct mcdp_info".
+ */
+struct mcdp_version {
+	uint8_t major;
+	uint8_t minor;
+	uint16_t build;
+} __packed;
+
+struct mcdp_info {
+	uint8_t family[2];
+	uint8_t chipid[2];
+	struct mcdp_version irom;
+	struct mcdp_version fw;
+} __packed;
+
+/* struct mcdp_info field decoding */
+#define MCDP_CHIPID(chipid) ((chipid[0] << 8) | chipid[1])
+#define MCDP_FAMILY(family) ((family[0] << 8) | family[1])
+
 /* Get info about USB-C SS muxes */
 #define EC_CMD_USB_PD_MUX_INFO 0x11a
 
-- 
cgit v1.2.3


From 7494de0454af50215bc46c93c83b88a32ca39fab Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:38 +0200
Subject: mfd: da9063: Replace regmap_add_irq_chip with devm counterpart

Use devm_regmap_add_irq_chip() instead of plain regmap_add_irq_chip(),
which removes the need for da9063_irq_exit() altogether and also
fixes a bug in da9063_device_init() where the da9063_irq_exit() was
not called in a failpath.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c       | 1 -
 drivers/mfd/da9063-irq.c        | 8 ++------
 include/linux/mfd/da9063/core.h | 1 -
 3 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index 6c2870d4e754..2647bb371d86 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -238,7 +238,6 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 void da9063_device_exit(struct da9063 *da9063)
 {
 	mfd_remove_devices(da9063->dev);
-	da9063_irq_exit(da9063);
 }
 
 MODULE_DESCRIPTION("PMIC driver for Dialog DA9063");
diff --git a/drivers/mfd/da9063-irq.c b/drivers/mfd/da9063-irq.c
index 207bbfe55449..da6ceb41f0d1 100644
--- a/drivers/mfd/da9063-irq.c
+++ b/drivers/mfd/da9063-irq.c
@@ -170,7 +170,8 @@ int da9063_irq_init(struct da9063 *da9063)
 		return -EINVAL;
 	}
 
-	ret = regmap_add_irq_chip(da9063->regmap, da9063->chip_irq,
+	ret = devm_regmap_add_irq_chip(da9063->dev, da9063->regmap,
+			da9063->chip_irq,
 			IRQF_TRIGGER_LOW | IRQF_ONESHOT | IRQF_SHARED,
 			da9063->irq_base, &da9063_irq_chip,
 			&da9063->regmap_irq);
@@ -182,8 +183,3 @@ int da9063_irq_init(struct da9063 *da9063)
 
 	return 0;
 }
-
-void da9063_irq_exit(struct da9063 *da9063)
-{
-	regmap_del_irq_chip(da9063->chip_irq, da9063->regmap_irq);
-}
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index f3ae65db4c86..9ab7049977aa 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -95,6 +95,5 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq);
 int da9063_irq_init(struct da9063 *da9063);
 
 void da9063_device_exit(struct da9063 *da9063);
-void da9063_irq_exit(struct da9063 *da9063);
 
 #endif /* __MFD_DA9063_CORE_H__ */
-- 
cgit v1.2.3


From af8df945876c027969fafefb9ec07b79cfadb16f Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:39 +0200
Subject: mfd: da9063: Replace mfd_add_devices with devm counterpart

Use devm_mfd_add_devices() instead of plain mfd_add_devices(), which
removes the need for da9063_device_exit() altogether and also for the
.remove callback in da9063-i2c.c .

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c       | 11 +++--------
 drivers/mfd/da9063-i2c.c        | 10 ----------
 include/linux/mfd/da9063/core.h |  2 --
 3 files changed, 3 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index 2647bb371d86..76258e5709f8 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -226,20 +226,15 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 
 	da9063->irq_base = regmap_irq_chip_get_base(da9063->regmap_irq);
 
-	ret = mfd_add_devices(da9063->dev, -1, da9063_devs,
-			      ARRAY_SIZE(da9063_devs), NULL, da9063->irq_base,
-			      NULL);
+	ret = devm_mfd_add_devices(da9063->dev, -1, da9063_devs,
+				   ARRAY_SIZE(da9063_devs), NULL,
+				   da9063->irq_base, NULL);
 	if (ret)
 		dev_err(da9063->dev, "Cannot add MFD cells\n");
 
 	return ret;
 }
 
-void da9063_device_exit(struct da9063 *da9063)
-{
-	mfd_remove_devices(da9063->dev);
-}
-
 MODULE_DESCRIPTION("PMIC driver for Dialog DA9063");
 MODULE_AUTHOR("Krystian Garbaciak");
 MODULE_AUTHOR("Michal Hajduk");
diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index 981805a2c521..29456e807ed4 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -270,15 +270,6 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 	return da9063_device_init(da9063, i2c->irq);
 }
 
-static int da9063_i2c_remove(struct i2c_client *i2c)
-{
-	struct da9063 *da9063 = i2c_get_clientdata(i2c);
-
-	da9063_device_exit(da9063);
-
-	return 0;
-}
-
 static const struct i2c_device_id da9063_i2c_id[] = {
 	{"da9063", PMIC_DA9063},
 	{},
@@ -291,7 +282,6 @@ static struct i2c_driver da9063_i2c_driver = {
 		.of_match_table = of_match_ptr(da9063_dt_ids),
 	},
 	.probe    = da9063_i2c_probe,
-	.remove   = da9063_i2c_remove,
 	.id_table = da9063_i2c_id,
 };
 
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 9ab7049977aa..8e6684d884e0 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -94,6 +94,4 @@ struct da9063 {
 int da9063_device_init(struct da9063 *da9063, unsigned int irq);
 int da9063_irq_init(struct da9063 *da9063);
 
-void da9063_device_exit(struct da9063 *da9063);
-
 #endif /* __MFD_DA9063_CORE_H__ */
-- 
cgit v1.2.3


From c727eea92c9232169c0d375309718fb398aaec67 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:43 +0200
Subject: mfd: da9063: Replace DA9063_NUM_IRQ with ARRAY_SIZE

Replace DA9063_NUM_IRQ macro which is not used anywhere with
plain ARRAY_SIZE().

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-irq.c        | 2 +-
 include/linux/mfd/da9063/core.h | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/da9063-irq.c b/drivers/mfd/da9063-irq.c
index 044bd867f540..579947f83486 100644
--- a/drivers/mfd/da9063-irq.c
+++ b/drivers/mfd/da9063-irq.c
@@ -94,7 +94,7 @@ static const struct regmap_irq da9063_irqs[] = {
 static const struct regmap_irq_chip da9063_irq_chip = {
 	.name = "da9063-irq",
 	.irqs = da9063_irqs,
-	.num_irqs = DA9063_NUM_IRQ,
+	.num_irqs = ARRAY_SIZE(da9063_irqs),
 	.num_regs = 4,
 	.status_base = DA9063_REG_EVENT_A,
 	.mask_base = DA9063_REG_IRQ_MASK_A,
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 8e6684d884e0..260cd5834861 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -72,9 +72,6 @@ enum da9063_irqs {
 	DA9063_IRQ_GPI15,
 };
 
-#define DA9063_IRQ_BASE_OFFSET	0
-#define DA9063_NUM_IRQ		(DA9063_IRQ_GPI15 + 1 - DA9063_IRQ_BASE_OFFSET)
-
 struct da9063 {
 	/* Device */
 	struct device	*dev;
-- 
cgit v1.2.3


From df7878f9dc77a9f630893811f0401bc021df4fbf Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:44 +0200
Subject: mfd: da9063: Rename PMIC_DA9063 to PMIC_CHIP_ID_DA9063

The PMIC_DA9063 is a complete misnomer, it denotes the value of the
DA9063 chip ID register, so rename it as such. It is also the value
of chip ID register of DA9063L though, so drop the enum as all the
DA9063 "models" share the same chip ID and thus the distinction will
have to be made using DT or otherwise.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c            | 2 +-
 drivers/mfd/da9063-i2c.c             | 2 +-
 drivers/regulator/da9063-regulator.c | 2 +-
 include/linux/mfd/da9063/core.h      | 4 +---
 4 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index f57558590283..c54777cc2f12 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -192,7 +192,7 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 		dev_err(da9063->dev, "Cannot read chip model id.\n");
 		return -EIO;
 	}
-	if (model != PMIC_DA9063) {
+	if (model != PMIC_CHIP_ID_DA9063) {
 		dev_err(da9063->dev, "Invalid chip model id: 0x%02x\n", model);
 		return -ENODEV;
 	}
diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index e9797153bc19..d1fe88777e3f 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -181,7 +181,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 }
 
 static const struct i2c_device_id da9063_i2c_id[] = {
-	{"da9063", PMIC_DA9063},
+	{ "da9063", PMIC_CHIP_ID_DA9063 },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, da9063_i2c_id);
diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c
index 2df26f36c687..7f4ac0413182 100644
--- a/drivers/regulator/da9063-regulator.c
+++ b/drivers/regulator/da9063-regulator.c
@@ -585,7 +585,7 @@ static struct da9063_dev_model regulators_models[] = {
 	{
 		.regulator_info = da9063_regulator_info,
 		.n_regulators = ARRAY_SIZE(da9063_regulator_info),
-		.dev_model = PMIC_DA9063,
+		.dev_model = PMIC_CHIP_ID_DA9063,
 	},
 	{ }
 };
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 260cd5834861..e015b065db7e 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -29,9 +29,7 @@
 #define DA9063_DRVNAME_RTC		"da9063-rtc"
 #define DA9063_DRVNAME_VIBRATION	"da9063-vibration"
 
-enum da9063_models {
-	PMIC_DA9063 = 0x61,
-};
+#define PMIC_CHIP_ID_DA9063		0x61
 
 enum da9063_variant_codes {
 	PMIC_DA9063_AD = 0x3,
-- 
cgit v1.2.3


From 492510dd7d39794e809d8218a2839e857c101ce5 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:45 +0200
Subject: mfd: da9063: Replace model with type

The model number stored in the struct da9063 is the same for all
variants of the da9063 since it is the chip ID, which is always
the same. Replace that with a separate identifier instead, which
allows us to discern the DA9063 variants by setting the type
based on either DT match or otherwise.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-core.c            | 1 -
 drivers/mfd/da9063-i2c.c             | 5 +++--
 drivers/regulator/da9063-regulator.c | 8 ++++----
 include/linux/mfd/da9063/core.h      | 6 +++++-
 4 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/da9063-core.c b/drivers/mfd/da9063-core.c
index c54777cc2f12..ded59990f18c 100644
--- a/drivers/mfd/da9063-core.c
+++ b/drivers/mfd/da9063-core.c
@@ -215,7 +215,6 @@ int da9063_device_init(struct da9063 *da9063, unsigned int irq)
 		return -ENODEV;
 	}
 
-	da9063->model = model;
 	da9063->variant_code = variant_code;
 
 	ret = da9063_irq_init(da9063);
diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index d1fe88777e3f..6fe9c3464b41 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -146,7 +146,7 @@ static const struct of_device_id da9063_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, da9063_dt_ids);
 static int da9063_i2c_probe(struct i2c_client *i2c,
-	const struct i2c_device_id *id)
+			    const struct i2c_device_id *id)
 {
 	struct da9063 *da9063;
 	int ret;
@@ -158,6 +158,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 	i2c_set_clientdata(i2c, da9063);
 	da9063->dev = &i2c->dev;
 	da9063->chip_irq = i2c->irq;
+	da9063->type = id->driver_data;
 
 	if (da9063->variant_code == PMIC_DA9063_AD) {
 		da9063_regmap_config.rd_table = &da9063_ad_readable_table;
@@ -181,7 +182,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 }
 
 static const struct i2c_device_id da9063_i2c_id[] = {
-	{ "da9063", PMIC_CHIP_ID_DA9063 },
+	{ "da9063", PMIC_TYPE_DA9063 },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, da9063_i2c_id);
diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c
index 7f4ac0413182..27ba2a1d14a3 100644
--- a/drivers/regulator/da9063-regulator.c
+++ b/drivers/regulator/da9063-regulator.c
@@ -98,7 +98,7 @@ struct da9063_regulator_info {
 struct da9063_dev_model {
 	const struct da9063_regulator_info	*regulator_info;
 	unsigned				n_regulators;
-	unsigned				dev_model;
+	enum da9063_type			type;
 };
 
 /* Single regulator settings */
@@ -585,7 +585,7 @@ static struct da9063_dev_model regulators_models[] = {
 	{
 		.regulator_info = da9063_regulator_info,
 		.n_regulators = ARRAY_SIZE(da9063_regulator_info),
-		.dev_model = PMIC_CHIP_ID_DA9063,
+		.type = PMIC_TYPE_DA9063,
 	},
 	{ }
 };
@@ -741,12 +741,12 @@ static int da9063_regulator_probe(struct platform_device *pdev)
 
 	/* Find regulators set for particular device model */
 	for (model = regulators_models; model->regulator_info; model++) {
-		if (model->dev_model == da9063->model)
+		if (model->type == da9063->type)
 			break;
 	}
 	if (!model->regulator_info) {
 		dev_err(&pdev->dev, "Chip model not recognised (%u)\n",
-			da9063->model);
+			da9063->type);
 		return -ENODEV;
 	}
 
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index e015b065db7e..9e36097adc42 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -31,6 +31,10 @@
 
 #define PMIC_CHIP_ID_DA9063		0x61
 
+enum da9063_type {
+	PMIC_TYPE_DA9063 = 0,
+};
+
 enum da9063_variant_codes {
 	PMIC_DA9063_AD = 0x3,
 	PMIC_DA9063_BB = 0x5,
@@ -73,7 +77,7 @@ enum da9063_irqs {
 struct da9063 {
 	/* Device */
 	struct device	*dev;
-	unsigned short	model;
+	enum da9063_type type;
 	unsigned char	variant_code;
 	unsigned int	flags;
 
-- 
cgit v1.2.3


From 8ae81814cca96a4b7f66dcf1aeb904a0a077f7f4 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 11 Jun 2018 13:58:46 +0200
Subject: mfd: da9063: Add DA9063L type

Add type for DA9063L, which is a reduced variant of the DA9063
without RTC block and with less regulators.

Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Steve Twiss <stwiss.opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/da9063/core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index 9e36097adc42..71b09154e2db 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -33,6 +33,7 @@
 
 enum da9063_type {
 	PMIC_TYPE_DA9063 = 0,
+	PMIC_TYPE_DA9063L,
 };
 
 enum da9063_variant_codes {
-- 
cgit v1.2.3


From 33bd5ac54dc47e002da4a395aaf9bf158dd17709 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 3 Jul 2018 14:36:21 -0700
Subject: net/ipv6: Revert attempt to simplify route replace and append

NetworkManager likes to manage linklocal prefix routes and does so with
the NLM_F_APPEND flag, breaking attempts to simplify the IPv6 route
code and by extension enable multipath routes with device only nexthops.

Revert f34436a43092 and these followup patches:
6eba08c3626b ("ipv6: Only emit append events for appended routes").
ce45bded6435 ("mlxsw: spectrum_router: Align with new route replace logic")
53b562df8c20 ("mlxsw: spectrum_router: Allow appending to dev-only routes")

Update the fib_tests cases to reflect the old behavior.

Fixes: f34436a43092 ("net/ipv6: Simplify route replace and appending into multipath route")
Signed-off-by: David Ahern <dsahern@gmail.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |  48 +++----
 include/net/ip6_route.h                            |   6 +
 net/ipv6/ip6_fib.c                                 | 156 ++++++++++++---------
 net/ipv6/route.c                                   |   3 +-
 tools/testing/selftests/net/fib_tests.sh           |  41 ------
 5 files changed, 117 insertions(+), 137 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 6aaaf3d9ba31..77b2adb29341 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -4756,6 +4756,12 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
 	kfree(mlxsw_sp_rt6);
 }
 
+static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
+{
+	/* RTF_CACHE routes are ignored */
+	return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
+}
+
 static struct fib6_info *
 mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 {
@@ -4765,11 +4771,11 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
 
 static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
-				 const struct fib6_info *nrt, bool append)
+				 const struct fib6_info *nrt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 
-	if (!append)
+	if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
 		return NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
@@ -4784,7 +4790,8 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 			break;
 		if (rt->fib6_metric < nrt->fib6_metric)
 			continue;
-		if (rt->fib6_metric == nrt->fib6_metric)
+		if (rt->fib6_metric == nrt->fib6_metric &&
+		    mlxsw_sp_fib6_rt_can_mp(rt))
 			return fib6_entry;
 		if (rt->fib6_metric > nrt->fib6_metric)
 			break;
@@ -5163,7 +5170,7 @@ static struct mlxsw_sp_fib6_entry *
 mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 			      const struct fib6_info *nrt, bool replace)
 {
-	struct mlxsw_sp_fib6_entry *fib6_entry;
+	struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
 
 	list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
 		struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
@@ -5172,13 +5179,18 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
 			continue;
 		if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
 			break;
-		if (replace && rt->fib6_metric == nrt->fib6_metric)
-			return fib6_entry;
+		if (replace && rt->fib6_metric == nrt->fib6_metric) {
+			if (mlxsw_sp_fib6_rt_can_mp(rt) ==
+			    mlxsw_sp_fib6_rt_can_mp(nrt))
+				return fib6_entry;
+			if (mlxsw_sp_fib6_rt_can_mp(nrt))
+				fallback = fallback ?: fib6_entry;
+		}
 		if (rt->fib6_metric > nrt->fib6_metric)
-			return fib6_entry;
+			return fallback ?: fib6_entry;
 	}
 
-	return NULL;
+	return fallback;
 }
 
 static int
@@ -5304,8 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
-				    struct fib6_info *rt, bool replace,
-				    bool append)
+				    struct fib6_info *rt, bool replace)
 {
 	struct mlxsw_sp_fib6_entry *fib6_entry;
 	struct mlxsw_sp_fib_node *fib_node;
@@ -5331,7 +5342,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 	/* Before creating a new entry, try to append route to an existing
 	 * multipath entry.
 	 */
-	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, append);
+	fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
 	if (fib6_entry) {
 		err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
 		if (err)
@@ -5339,14 +5350,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 		return 0;
 	}
 
-	/* We received an append event, yet did not find any route to
-	 * append to.
-	 */
-	if (WARN_ON(append)) {
-		err = -EINVAL;
-		goto err_fib6_entry_append;
-	}
-
 	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
 	if (IS_ERR(fib6_entry)) {
 		err = PTR_ERR(fib6_entry);
@@ -5364,7 +5367,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
 err_fib6_node_entry_link:
 	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
 err_fib6_entry_create:
-err_fib6_entry_append:
 err_fib6_entry_nexthop_add:
 	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
 	return err;
@@ -5715,7 +5717,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 	struct mlxsw_sp_fib_event_work *fib_work =
 		container_of(work, struct mlxsw_sp_fib_event_work, work);
 	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
-	bool replace, append;
+	bool replace;
 	int err;
 
 	rtnl_lock();
@@ -5726,10 +5728,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
 	case FIB_EVENT_ENTRY_APPEND: /* fall through */
 	case FIB_EVENT_ENTRY_ADD:
 		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
-		append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
 		err = mlxsw_sp_router_fib6_add(mlxsw_sp,
-					       fib_work->fen6_info.rt, replace,
-					       append);
+					       fib_work->fen6_info.rt, replace);
 		if (err)
 			mlxsw_sp_router_fib_abort(mlxsw_sp);
 		mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 59656fc580df..7b9c82de11cc 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
+static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
+{
+	return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+	       RTF_GATEWAY;
+}
+
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1fb2f3118d60..d212738e9d10 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -935,20 +935,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 {
 	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
-	enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
-	struct fib6_info *iter = NULL, *match = NULL;
+	struct fib6_info *iter = NULL;
 	struct fib6_info __rcu **ins;
+	struct fib6_info __rcu **fallback_ins = NULL;
 	int replace = (info->nlh &&
 		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
-	int append = (info->nlh &&
-		       (info->nlh->nlmsg_flags & NLM_F_APPEND));
 	int add = (!info->nlh ||
 		   (info->nlh->nlmsg_flags & NLM_F_CREATE));
 	int found = 0;
+	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
 	u16 nlflags = NLM_F_EXCL;
 	int err;
 
-	if (append)
+	if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
 		nlflags |= NLM_F_APPEND;
 
 	ins = &fn->leaf;
@@ -970,8 +969,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 
 			nlflags &= ~NLM_F_EXCL;
 			if (replace) {
-				found++;
-				break;
+				if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+					found++;
+					break;
+				}
+				if (rt_can_ecmp)
+					fallback_ins = fallback_ins ?: ins;
+				goto next_iter;
 			}
 
 			if (rt6_duplicate_nexthop(iter, rt)) {
@@ -986,51 +990,71 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
 				return -EEXIST;
 			}
-
-			/* first route that matches */
-			if (!match)
-				match = iter;
+			/* If we have the same destination and the same metric,
+			 * but not the same gateway, then the route we try to
+			 * add is sibling to this route, increment our counter
+			 * of siblings, and later we will add our route to the
+			 * list.
+			 * Only static routes (which don't have flag
+			 * RTF_EXPIRES) are used for ECMPv6.
+			 *
+			 * To avoid long list, we only had siblings if the
+			 * route have a gateway.
+			 */
+			if (rt_can_ecmp &&
+			    rt6_qualify_for_ecmp(iter))
+				rt->fib6_nsiblings++;
 		}
 
 		if (iter->fib6_metric > rt->fib6_metric)
 			break;
 
+next_iter:
 		ins = &iter->fib6_next;
 	}
 
+	if (fallback_ins && !found) {
+		/* No ECMP-able route found, replace first non-ECMP one */
+		ins = fallback_ins;
+		iter = rcu_dereference_protected(*ins,
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
+		found++;
+	}
+
 	/* Reset round-robin state, if necessary */
 	if (ins == &fn->leaf)
 		fn->rr_ptr = NULL;
 
 	/* Link this route to others same route. */
-	if (append && match) {
+	if (rt->fib6_nsiblings) {
+		unsigned int fib6_nsiblings;
 		struct fib6_info *sibling, *temp_sibling;
 
-		if (rt->fib6_flags & RTF_REJECT) {
-			NL_SET_ERR_MSG(extack,
-				       "Can not append a REJECT route");
-			return -EINVAL;
-		} else if (match->fib6_flags & RTF_REJECT) {
-			NL_SET_ERR_MSG(extack,
-				       "Can not append to a REJECT route");
-			return -EINVAL;
+		/* Find the first route that have the same metric */
+		sibling = leaf;
+		while (sibling) {
+			if (sibling->fib6_metric == rt->fib6_metric &&
+			    rt6_qualify_for_ecmp(sibling)) {
+				list_add_tail(&rt->fib6_siblings,
+					      &sibling->fib6_siblings);
+				break;
+			}
+			sibling = rcu_dereference_protected(sibling->fib6_next,
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
 		}
-		event = FIB_EVENT_ENTRY_APPEND;
-		rt->fib6_nsiblings = match->fib6_nsiblings;
-		list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
-		match->fib6_nsiblings++;
-
 		/* For each sibling in the list, increment the counter of
 		 * siblings. BUG() if counters does not match, list of siblings
 		 * is broken!
 		 */
+		fib6_nsiblings = 0;
 		list_for_each_entry_safe(sibling, temp_sibling,
-					 &match->fib6_siblings, fib6_siblings) {
+					 &rt->fib6_siblings, fib6_siblings) {
 			sibling->fib6_nsiblings++;
-			BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
+			BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
+			fib6_nsiblings++;
 		}
-
-		rt6_multipath_rebalance(match);
+		BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
+		rt6_multipath_rebalance(temp_sibling);
 	}
 
 	/*
@@ -1043,8 +1067,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 add:
 		nlflags |= NLM_F_CREATE;
 
-		err = call_fib6_entry_notifiers(info->nl_net, event, rt,
-						extack);
+		err = call_fib6_entry_notifiers(info->nl_net,
+						FIB_EVENT_ENTRY_ADD,
+						rt, extack);
 		if (err)
 			return err;
 
@@ -1062,7 +1087,7 @@ add:
 		}
 
 	} else {
-		struct fib6_info *tmp;
+		int nsiblings;
 
 		if (!found) {
 			if (add)
@@ -1077,57 +1102,48 @@ add:
 		if (err)
 			return err;
 
-		/* if route being replaced has siblings, set tmp to
-		 * last one, otherwise tmp is current route. this is
-		 * used to set fib6_next for new route
-		 */
-		if (iter->fib6_nsiblings)
-			tmp = list_last_entry(&iter->fib6_siblings,
-					      struct fib6_info,
-					      fib6_siblings);
-		else
-			tmp = iter;
-
-		/* insert new route */
 		atomic_inc(&rt->fib6_ref);
 		rcu_assign_pointer(rt->fib6_node, fn);
-		rt->fib6_next = tmp->fib6_next;
+		rt->fib6_next = iter->fib6_next;
 		rcu_assign_pointer(*ins, rt);
-
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
 		if (!(fn->fn_flags & RTN_RTINFO)) {
 			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
 			fn->fn_flags |= RTN_RTINFO;
 		}
+		nsiblings = iter->fib6_nsiblings;
+		iter->fib6_node = NULL;
+		fib6_purge_rt(iter, fn, info->nl_net);
+		if (rcu_access_pointer(fn->rr_ptr) == iter)
+			fn->rr_ptr = NULL;
+		fib6_info_release(iter);
 
-		/* delete old route */
-		rt = iter;
-
-		if (rt->fib6_nsiblings) {
-			struct fib6_info *tmp;
-
+		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
-			list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
-						 fib6_siblings) {
-				iter->fib6_node = NULL;
-				fib6_purge_rt(iter, fn, info->nl_net);
-				if (rcu_access_pointer(fn->rr_ptr) == iter)
-					fn->rr_ptr = NULL;
-				fib6_info_release(iter);
-
-				rt->fib6_nsiblings--;
-				info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+			ins = &rt->fib6_next;
+			iter = rcu_dereference_protected(*ins,
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
+			while (iter) {
+				if (iter->fib6_metric > rt->fib6_metric)
+					break;
+				if (rt6_qualify_for_ecmp(iter)) {
+					*ins = iter->fib6_next;
+					iter->fib6_node = NULL;
+					fib6_purge_rt(iter, fn, info->nl_net);
+					if (rcu_access_pointer(fn->rr_ptr) == iter)
+						fn->rr_ptr = NULL;
+					fib6_info_release(iter);
+					nsiblings--;
+					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+				} else {
+					ins = &iter->fib6_next;
+				}
+				iter = rcu_dereference_protected(*ins,
+					lockdep_is_held(&rt->fib6_table->tb6_lock));
 			}
+			WARN_ON(nsiblings != 0);
 		}
-
-		WARN_ON(rt->fib6_nsiblings != 0);
-
-		rt->fib6_node = NULL;
-		fib6_purge_rt(rt, fn, info->nl_net);
-		if (rcu_access_pointer(fn->rr_ptr) == rt)
-			fn->rr_ptr = NULL;
-		fib6_info_release(rt);
 	}
 
 	return 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 86a0e4333d42..63f99411f0de 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3842,7 +3842,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
 			lockdep_is_held(&rt->fib6_table->tb6_lock));
 	while (iter) {
 		if (iter->fib6_metric == rt->fib6_metric &&
-		    iter->fib6_nsiblings)
+		    rt6_qualify_for_ecmp(iter))
 			return iter;
 		iter = rcu_dereference_protected(iter->fib6_next,
 				lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -4439,7 +4439,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 		 */
 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
 						     NLM_F_REPLACE);
-		cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
 		nhn++;
 	}
 
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 78245d60d8bc..0f45633bd634 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -740,13 +740,6 @@ ipv6_rt_add()
 	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
 	log_test $? 2 "Attempt to add duplicate route - reject route"
 
-	# iproute2 prepend only sets NLM_F_CREATE
-	# - adds a new route; does NOT convert existing route to ECMP
-	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
-	run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2"
-	check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
-	log_test $? 0 "Add new route for existing prefix (w/o NLM_F_EXCL)"
-
 	# route append with same prefix adds a new route
 	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
 	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
@@ -754,27 +747,6 @@ ipv6_rt_add()
 	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
 	log_test $? 0 "Append nexthop to existing route - gw"
 
-	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
-	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
-	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1"
-	log_test $? 0 "Append nexthop to existing route - dev only"
-
-	# multipath route can not have a nexthop that is a reject route
-	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
-	run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64"
-	log_test $? 2 "Append nexthop to existing route - reject route"
-
-	# reject route can not be converted to multipath route
-	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
-	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
-	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
-	log_test $? 2 "Append nexthop to existing reject route - gw"
-
-	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
-	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
-	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
-	log_test $? 2 "Append nexthop to existing reject route - dev only"
-
 	# insert mpath directly
 	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
 	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
@@ -819,13 +791,6 @@ ipv6_rt_replace_single()
 	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
 	log_test $? 0 "Single path with multipath"
 
-	# single path with reject
-	#
-	add_initial_route6 "nexthop via 2001:db8:101::2"
-	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
-	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
-	log_test $? 0 "Single path with reject route"
-
 	# single path with single path using MULTIPATH attribute
 	#
 	add_initial_route6 "via 2001:db8:101::2"
@@ -873,12 +838,6 @@ ipv6_rt_replace_mpath()
 	check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
 	log_test $? 0 "Multipath with single path via multipath attribute"
 
-	# multipath with reject
-	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
-	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
-	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
-	log_test $? 0 "Multipath with reject route"
-
 	# route replace fails - invalid nexthop 1
 	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
 	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
-- 
cgit v1.2.3


From 30000d80b506819ae5dae926f636412a34594ed6 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 3 May 2018 16:15:17 +0200
Subject: backlight: Remove obsolete comment for ->state

Jani spotted this when reviewing my earlier patch to remove the driver
internal usage of this field in:

  Commit 3cf91adaa594 ("backlight: Nuke BL_CORE_DRIVER1")

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/backlight.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 7fbf0539e14a..0b5897446dca 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -79,7 +79,6 @@ struct backlight_properties {
 	/* Backlight type */
 	enum backlight_type type;
 	/* Flags used to signal drivers of state changes */
-	/* Upper 4 bits are reserved for driver internal use */
 	unsigned int state;
 
 #define BL_CORE_SUSPENDED	(1 << 0)	/* backlight is suspended */
-- 
cgit v1.2.3


From 421860b9d47053badce4b247576fa48df9ab4c48 Mon Sep 17 00:00:00 2001
From: Steve Longerbeam <slongerbeam@gmail.com>
Date: Wed, 27 Jun 2018 14:39:43 -0400
Subject: media: v4l2-ctrls: Fix CID base conflict between MAX217X and IMX

When the imx-media driver was initially merged, there was a conflict
with 8d67ae25 ("media: v4l2-ctrls: Reserve controls for MAX217X") which
was not fixed up correctly, resulting in V4L2_CID_USER_MAX217X_BASE and
V4L2_CID_USER_IMX_BASE taking on the same value. Fix by assigning imx
CID base the next available range at 0x10b0.

Signed-off-by: Steve Longerbeam <steve_longerbeam@mentor.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 8d473c979b61..8a75ad7899f3 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -188,7 +188,7 @@ enum v4l2_colorfx {
 
 /* The base for the imx driver controls.
  * We reserve 16 controls for this driver. */
-#define V4L2_CID_USER_IMX_BASE			(V4L2_CID_USER_BASE + 0x1090)
+#define V4L2_CID_USER_IMX_BASE			(V4L2_CID_USER_BASE + 0x10b0)
 
 /* MPEG-class control IDs */
 /* The MPEG controls are applicable to all codec controls
-- 
cgit v1.2.3


From e7e3728bd776d1d1450212ad266832f1003f833f Mon Sep 17 00:00:00 2001
From: Qiaobin Fu <qiaobinf@bu.edu>
Date: Sun, 1 Jul 2018 15:16:27 -0400
Subject: net:sched: add action inheritdsfield to skbedit

The new action inheritdsfield copies the field DS of
IPv4 and IPv6 packets into skb->priority. This enables
later classification of packets based on the DS field.

v5:
*Update the drop counter for TC_ACT_SHOT

v4:
*Not allow setting flags other than the expected ones.

*Allow dumping the pure flags.

v3:
*Use optional flags, so that it won't break old versions of tc.

*Allow users to set both SKBEDIT_F_PRIORITY and SKBEDIT_F_INHERITDSFIELD flags.

v2:
*Fix the style issue

*Move the code from skbmod to skbedit

Original idea by Jamal Hadi Salim <jhs@mojatatu.com>

Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tc_act/tc_skbedit.h |  2 ++
 net/sched/act_skbedit.c                | 41 ++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index fbcfe27a4e6c..6de6071ebed6 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -30,6 +30,7 @@
 #define SKBEDIT_F_MARK			0x4
 #define SKBEDIT_F_PTYPE			0x8
 #define SKBEDIT_F_MASK			0x10
+#define SKBEDIT_F_INHERITDSFIELD	0x20
 
 struct tc_skbedit {
 	tc_gen;
@@ -45,6 +46,7 @@ enum {
 	TCA_SKBEDIT_PAD,
 	TCA_SKBEDIT_PTYPE,
 	TCA_SKBEDIT_MASK,
+	TCA_SKBEDIT_FLAGS,
 	__TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6138d1d71900..dfaf5d8028dd 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -23,6 +23,9 @@
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/dsfield.h>
 
 #include <linux/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_skbedit.h>
@@ -41,6 +44,25 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 
 	if (d->flags & SKBEDIT_F_PRIORITY)
 		skb->priority = d->priority;
+	if (d->flags & SKBEDIT_F_INHERITDSFIELD) {
+		int wlen = skb_network_offset(skb);
+
+		switch (tc_skb_protocol(skb)) {
+		case htons(ETH_P_IP):
+			wlen += sizeof(struct iphdr);
+			if (!pskb_may_pull(skb, wlen))
+				goto err;
+			skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+			break;
+
+		case htons(ETH_P_IPV6):
+			wlen += sizeof(struct ipv6hdr);
+			if (!pskb_may_pull(skb, wlen))
+				goto err;
+			skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+			break;
+		}
+	}
 	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
 	    skb->dev->real_num_tx_queues > d->queue_mapping)
 		skb_set_queue_mapping(skb, d->queue_mapping);
@@ -53,6 +75,11 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 
 	spin_unlock(&d->tcf_lock);
 	return d->tcf_action;
+
+err:
+	d->tcf_qstats.drops++;
+	spin_unlock(&d->tcf_lock);
+	return TC_ACT_SHOT;
 }
 
 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
@@ -62,6 +89,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_MARK]		= { .len = sizeof(u32) },
 	[TCA_SKBEDIT_PTYPE]		= { .len = sizeof(u16) },
 	[TCA_SKBEDIT_MASK]		= { .len = sizeof(u32) },
+	[TCA_SKBEDIT_FLAGS]		= { .len = sizeof(u64) },
 };
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -114,6 +142,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		mask = nla_data(tb[TCA_SKBEDIT_MASK]);
 	}
 
+	if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
+		u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
+
+		if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
+			flags |= SKBEDIT_F_INHERITDSFIELD;
+	}
+
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
 	exists = tcf_idr_check(tn, parm->index, a, bind);
@@ -178,6 +213,7 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 		.action  = d->tcf_action,
 	};
 	struct tcf_t t;
+	u64 pure_flags = 0;
 
 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
@@ -196,6 +232,11 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 	if ((d->flags & SKBEDIT_F_MASK) &&
 	    nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
 		goto nla_put_failure;
+	if (d->flags & SKBEDIT_F_INHERITDSFIELD)
+		pure_flags |= SKBEDIT_F_INHERITDSFIELD;
+	if (pure_flags != 0 &&
+	    nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
+		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &d->tcf_tm);
 	if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
-- 
cgit v1.2.3


From be2fff656322e82f215730839063c2c2ca73d14b Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Mon, 2 Jul 2018 11:36:05 -0400
Subject: media: add helpers for memory-to-memory media controller

A memory-to-memory pipeline device consists in three
entities: two DMA engine and one video processing entities.
The DMA engine entities are linked to a V4L interface.

This commit add a new v4l2_m2m_{un}register_media_controller
API to register this topology.

For instance, a typical mem2mem device topology would
look like this:

Device topology
- entity 1: source (1 pad, 1 link)
            type Node subtype V4L flags 0
	pad0: Source
		-> "proc":1 [ENABLED,IMMUTABLE]

- entity 3: proc (2 pads, 2 links)
            type Node subtype Unknown flags 0
	pad0: Source
		-> "sink":0 [ENABLED,IMMUTABLE]
	pad1: Sink
		<- "source":0 [ENABLED,IMMUTABLE]

- entity 6: sink (1 pad, 1 link)
            type Node subtype V4L flags 0
	pad0: Sink
		<- "proc":0 [ENABLED,IMMUTABLE]

[hans.verkuil@cisco.com: mark interface links as IMMUTABLE]

Suggested-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-dev.c     |  13 ++-
 drivers/media/v4l2-core/v4l2-mem2mem.c | 190 +++++++++++++++++++++++++++++++++
 include/media/v4l2-mem2mem.h           |  19 ++++
 3 files changed, 217 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 5f43f63fa700..69e775930fc4 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -202,7 +202,7 @@ static void v4l2_device_release(struct device *cd)
 	mutex_unlock(&videodev_lock);
 
 #if defined(CONFIG_MEDIA_CONTROLLER)
-	if (v4l2_dev->mdev) {
+	if (v4l2_dev->mdev && vdev->vfl_dir != VFL_DIR_M2M) {
 		/* Remove interfaces and interface links */
 		media_devnode_remove(vdev->intf_devnode);
 		if (vdev->entity.function != MEDIA_ENT_F_UNKNOWN)
@@ -733,19 +733,22 @@ static void determine_valid_ioctls(struct video_device *vdev)
 			BASE_VIDIOC_PRIVATE);
 }
 
-static int video_register_media_controller(struct video_device *vdev, int type)
+static int video_register_media_controller(struct video_device *vdev)
 {
 #if defined(CONFIG_MEDIA_CONTROLLER)
 	u32 intf_type;
 	int ret;
 
-	if (!vdev->v4l2_dev->mdev)
+	/* Memory-to-memory devices are more complex and use
+	 * their own function to register its mc entities.
+	 */
+	if (!vdev->v4l2_dev->mdev || vdev->vfl_dir == VFL_DIR_M2M)
 		return 0;
 
 	vdev->entity.obj_type = MEDIA_ENTITY_TYPE_VIDEO_DEVICE;
 	vdev->entity.function = MEDIA_ENT_F_UNKNOWN;
 
-	switch (type) {
+	switch (vdev->vfl_type) {
 	case VFL_TYPE_GRABBER:
 		intf_type = MEDIA_INTF_T_V4L_VIDEO;
 		vdev->entity.function = MEDIA_ENT_F_IO_V4L;
@@ -994,7 +997,7 @@ int __video_register_device(struct video_device *vdev,
 	v4l2_device_get(vdev->v4l2_dev);
 
 	/* Part 5: Register the entity. */
-	ret = video_register_media_controller(vdev, type);
+	ret = video_register_media_controller(vdev);
 
 	/* Part 6: Activate this minor. The char device can now be used. */
 	set_bit(V4L2_FL_REGISTERED, &vdev->flags);
diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 5f9cd5b74cda..725da74d15d8 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -17,9 +17,11 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 
+#include <media/media-device.h>
 #include <media/videobuf2-v4l2.h>
 #include <media/v4l2-mem2mem.h>
 #include <media/v4l2-dev.h>
+#include <media/v4l2-device.h>
 #include <media/v4l2-fh.h>
 #include <media/v4l2-event.h>
 
@@ -50,6 +52,17 @@ module_param(debug, bool, 0644);
  * offsets but for different queues */
 #define DST_QUEUE_OFF_BASE	(1 << 30)
 
+enum v4l2_m2m_entity_type {
+	MEM2MEM_ENT_TYPE_SOURCE,
+	MEM2MEM_ENT_TYPE_SINK,
+	MEM2MEM_ENT_TYPE_PROC
+};
+
+static const char * const m2m_entity_name[] = {
+	"source",
+	"sink",
+	"proc"
+};
 
 /**
  * struct v4l2_m2m_dev - per-device context
@@ -60,6 +73,15 @@ module_param(debug, bool, 0644);
  */
 struct v4l2_m2m_dev {
 	struct v4l2_m2m_ctx	*curr_ctx;
+#ifdef CONFIG_MEDIA_CONTROLLER
+	struct media_entity	*source;
+	struct media_pad	source_pad;
+	struct media_entity	sink;
+	struct media_pad	sink_pad;
+	struct media_entity	proc;
+	struct media_pad	proc_pads[2];
+	struct media_intf_devnode *intf_devnode;
+#endif
 
 	struct list_head	job_queue;
 	spinlock_t		job_spinlock;
@@ -594,6 +616,174 @@ int v4l2_m2m_mmap(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
 }
 EXPORT_SYMBOL(v4l2_m2m_mmap);
 
+#if defined(CONFIG_MEDIA_CONTROLLER)
+void v4l2_m2m_unregister_media_controller(struct v4l2_m2m_dev *m2m_dev)
+{
+	media_remove_intf_links(&m2m_dev->intf_devnode->intf);
+	media_devnode_remove(m2m_dev->intf_devnode);
+
+	media_entity_remove_links(m2m_dev->source);
+	media_entity_remove_links(&m2m_dev->sink);
+	media_entity_remove_links(&m2m_dev->proc);
+	media_device_unregister_entity(m2m_dev->source);
+	media_device_unregister_entity(&m2m_dev->sink);
+	media_device_unregister_entity(&m2m_dev->proc);
+	kfree(m2m_dev->source->name);
+	kfree(m2m_dev->sink.name);
+	kfree(m2m_dev->proc.name);
+}
+EXPORT_SYMBOL_GPL(v4l2_m2m_unregister_media_controller);
+
+static int v4l2_m2m_register_entity(struct media_device *mdev,
+	struct v4l2_m2m_dev *m2m_dev, enum v4l2_m2m_entity_type type,
+	struct video_device *vdev, int function)
+{
+	struct media_entity *entity;
+	struct media_pad *pads;
+	char *name;
+	unsigned int len;
+	int num_pads;
+	int ret;
+
+	switch (type) {
+	case MEM2MEM_ENT_TYPE_SOURCE:
+		entity = m2m_dev->source;
+		pads = &m2m_dev->source_pad;
+		pads[0].flags = MEDIA_PAD_FL_SOURCE;
+		num_pads = 1;
+		break;
+	case MEM2MEM_ENT_TYPE_SINK:
+		entity = &m2m_dev->sink;
+		pads = &m2m_dev->sink_pad;
+		pads[0].flags = MEDIA_PAD_FL_SINK;
+		num_pads = 1;
+		break;
+	case MEM2MEM_ENT_TYPE_PROC:
+		entity = &m2m_dev->proc;
+		pads = m2m_dev->proc_pads;
+		pads[0].flags = MEDIA_PAD_FL_SINK;
+		pads[1].flags = MEDIA_PAD_FL_SOURCE;
+		num_pads = 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	entity->obj_type = MEDIA_ENTITY_TYPE_BASE;
+	if (type != MEM2MEM_ENT_TYPE_PROC) {
+		entity->info.dev.major = VIDEO_MAJOR;
+		entity->info.dev.minor = vdev->minor;
+	}
+	len = strlen(vdev->name) + 2 + strlen(m2m_entity_name[type]);
+	name = kmalloc(len, GFP_KERNEL);
+	if (!name)
+		return -ENOMEM;
+	snprintf(name, len, "%s-%s", vdev->name, m2m_entity_name[type]);
+	entity->name = name;
+	entity->function = function;
+
+	ret = media_entity_pads_init(entity, num_pads, pads);
+	if (ret)
+		return ret;
+	ret = media_device_register_entity(mdev, entity);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int v4l2_m2m_register_media_controller(struct v4l2_m2m_dev *m2m_dev,
+		struct video_device *vdev, int function)
+{
+	struct media_device *mdev = vdev->v4l2_dev->mdev;
+	struct media_link *link;
+	int ret;
+
+	if (!mdev)
+		return 0;
+
+	/* A memory-to-memory device consists in two
+	 * DMA engine and one video processing entities.
+	 * The DMA engine entities are linked to a V4L interface
+	 */
+
+	/* Create the three entities with their pads */
+	m2m_dev->source = &vdev->entity;
+	ret = v4l2_m2m_register_entity(mdev, m2m_dev,
+			MEM2MEM_ENT_TYPE_SOURCE, vdev, MEDIA_ENT_F_IO_V4L);
+	if (ret)
+		return ret;
+	ret = v4l2_m2m_register_entity(mdev, m2m_dev,
+			MEM2MEM_ENT_TYPE_PROC, vdev, function);
+	if (ret)
+		goto err_rel_entity0;
+	ret = v4l2_m2m_register_entity(mdev, m2m_dev,
+			MEM2MEM_ENT_TYPE_SINK, vdev, MEDIA_ENT_F_IO_V4L);
+	if (ret)
+		goto err_rel_entity1;
+
+	/* Connect the three entities */
+	ret = media_create_pad_link(m2m_dev->source, 0, &m2m_dev->proc, 1,
+			MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED);
+	if (ret)
+		goto err_rel_entity2;
+
+	ret = media_create_pad_link(&m2m_dev->proc, 0, &m2m_dev->sink, 0,
+			MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED);
+	if (ret)
+		goto err_rm_links0;
+
+	/* Create video interface */
+	m2m_dev->intf_devnode = media_devnode_create(mdev,
+			MEDIA_INTF_T_V4L_VIDEO, 0,
+			VIDEO_MAJOR, vdev->minor);
+	if (!m2m_dev->intf_devnode) {
+		ret = -ENOMEM;
+		goto err_rm_links1;
+	}
+
+	/* Connect the two DMA engines to the interface */
+	link = media_create_intf_link(m2m_dev->source,
+			&m2m_dev->intf_devnode->intf,
+			MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED);
+	if (!link) {
+		ret = -ENOMEM;
+		goto err_rm_devnode;
+	}
+
+	link = media_create_intf_link(&m2m_dev->sink,
+			&m2m_dev->intf_devnode->intf,
+			MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED);
+	if (!link) {
+		ret = -ENOMEM;
+		goto err_rm_intf_link;
+	}
+	return 0;
+
+err_rm_intf_link:
+	media_remove_intf_links(&m2m_dev->intf_devnode->intf);
+err_rm_devnode:
+	media_devnode_remove(m2m_dev->intf_devnode);
+err_rm_links1:
+	media_entity_remove_links(&m2m_dev->sink);
+err_rm_links0:
+	media_entity_remove_links(&m2m_dev->proc);
+	media_entity_remove_links(m2m_dev->source);
+err_rel_entity2:
+	media_device_unregister_entity(&m2m_dev->proc);
+	kfree(m2m_dev->proc.name);
+err_rel_entity1:
+	media_device_unregister_entity(&m2m_dev->sink);
+	kfree(m2m_dev->sink.name);
+err_rel_entity0:
+	media_device_unregister_entity(m2m_dev->source);
+	kfree(m2m_dev->source->name);
+	return ret;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(v4l2_m2m_register_media_controller);
+#endif
+
 struct v4l2_m2m_dev *v4l2_m2m_init(const struct v4l2_m2m_ops *m2m_ops)
 {
 	struct v4l2_m2m_dev *m2m_dev;
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 8f4b208cfee7..af48b1eca025 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -47,6 +47,7 @@ struct v4l2_m2m_ops {
 	void (*job_abort)(void *priv);
 };
 
+struct video_device;
 struct v4l2_m2m_dev;
 
 /**
@@ -322,6 +323,24 @@ int v4l2_m2m_mmap(struct file *file, struct v4l2_m2m_ctx *m2m_ctx,
  */
 struct v4l2_m2m_dev *v4l2_m2m_init(const struct v4l2_m2m_ops *m2m_ops);
 
+#if defined(CONFIG_MEDIA_CONTROLLER)
+void v4l2_m2m_unregister_media_controller(struct v4l2_m2m_dev *m2m_dev);
+int v4l2_m2m_register_media_controller(struct v4l2_m2m_dev *m2m_dev,
+			struct video_device *vdev, int function);
+#else
+static inline void
+v4l2_m2m_unregister_media_controller(struct v4l2_m2m_dev *m2m_dev)
+{
+}
+
+static inline int
+v4l2_m2m_register_media_controller(struct v4l2_m2m_dev *m2m_dev,
+		struct video_device *vdev, int function)
+{
+	return 0;
+}
+#endif
+
 /**
  * v4l2_m2m_release() - cleans up and frees a m2m_dev structure
  *
-- 
cgit v1.2.3


From 5520b9467a39d5ec9ce9cd8a9ed01f826b817524 Mon Sep 17 00:00:00 2001
From: Keiichi Watanabe <keiichiw@chromium.org>
Date: Mon, 18 Jun 2018 03:58:52 -0400
Subject: media: v4l2-ctrl: Change control for VP8 profile to menu control

Add a menu control V4L2_CID_MPEG_VIDEO_VP8_PROFILE for VP8 profile and make
V4L2_CID_MPEG_VIDEO_VPX_PROFILE an alias of it. This new control is used to
select the desired profile for VP8 encoder and query for supported profiles by
VP8 encoder/decoder.

Though we have originally a control V4L2_CID_MPEG_VIDEO_VPX_PROFILE and its name
contains 'VPX', it works only for VP8 because supported profiles usually differ
between VP8 and VP9. In addition, this control cannot be used for querying since
it is not a menu control but an integer control, which cannot return an
arbitrary set of supported profiles.

The new control V4L2_CID_MPEG_VIDEO_VP8_PROFILE is a menu control as with
controls for other codec profiles. (e.g. H264)

In addition, this patch also fixes the use of V4L2_CID_MPEG_VIDEO_VPX_PROFILE in
drivers of Qualcomm's venus and Samsung's s5p-mfc.

Signed-off-by: Keiichi Watanabe <keiichiw@chromium.org>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/extended-controls.rst | 25 +++++++++++++++++++---
 drivers/media/platform/qcom/venus/vdec_ctrls.c     | 10 +++++----
 drivers/media/platform/qcom/venus/venc.c           |  4 ++--
 drivers/media/platform/qcom/venus/venc_ctrls.c     | 10 +++++----
 drivers/media/platform/s5p-mfc/s5p_mfc_enc.c       | 15 ++++++-------
 drivers/media/v4l2-core/v4l2-ctrls.c               | 12 ++++++++++-
 include/uapi/linux/v4l2-controls.h                 | 11 +++++++++-
 7 files changed, 64 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index 03931f9b1285..01ef31a934b4 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -1955,9 +1955,28 @@ enum v4l2_vp8_golden_frame_sel -
 ``V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP (integer)``
     Quantization parameter for a P frame for VP8.
 
-``V4L2_CID_MPEG_VIDEO_VPX_PROFILE (integer)``
-    Select the desired profile for VPx encoder. Acceptable values are 0,
-    1, 2 and 3 corresponding to encoder profiles 0, 1, 2 and 3.
+.. _v4l2-mpeg-video-vp8-profile:
+
+``V4L2_CID_MPEG_VIDEO_VP8_PROFILE``
+    (enum)
+
+enum v4l2_mpeg_video_vp8_profile -
+    This control allows selecting the profile for VP8 encoder.
+    This is also used to enumerate supported profiles by VP8 encoder or decoder.
+    Possible values are:
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    * - ``V4L2_MPEG_VIDEO_VP8_PROFILE_0``
+      - Profile 0
+    * - ``V4L2_MPEG_VIDEO_VP8_PROFILE_1``
+      - Profile 1
+    * - ``V4L2_MPEG_VIDEO_VP8_PROFILE_2``
+      - Profile 2
+    * - ``V4L2_MPEG_VIDEO_VP8_PROFILE_3``
+      - Profile 3
 
 
 High Efficiency Video Coding (HEVC/H.265) Control Reference
diff --git a/drivers/media/platform/qcom/venus/vdec_ctrls.c b/drivers/media/platform/qcom/venus/vdec_ctrls.c
index 032839bbc967..f4604b0cd57e 100644
--- a/drivers/media/platform/qcom/venus/vdec_ctrls.c
+++ b/drivers/media/platform/qcom/venus/vdec_ctrls.c
@@ -29,7 +29,7 @@ static int vdec_op_s_ctrl(struct v4l2_ctrl *ctrl)
 		break;
 	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:
-	case V4L2_CID_MPEG_VIDEO_VPX_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
 		ctr->profile = ctrl->val;
 		break;
 	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:
@@ -54,7 +54,7 @@ static int vdec_op_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
 	switch (ctrl->id) {
 	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:
-	case V4L2_CID_MPEG_VIDEO_VPX_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
 		ret = hfi_session_get_property(inst, ptype, &hprop);
 		if (!ret)
 			ctr->profile = hprop.profile_level.profile;
@@ -130,8 +130,10 @@ int vdec_ctrl_init(struct venus_inst *inst)
 	if (ctrl)
 		ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE;
 
-	ctrl = v4l2_ctrl_new_std(&inst->ctrl_handler, &vdec_ctrl_ops,
-				 V4L2_CID_MPEG_VIDEO_VPX_PROFILE, 0, 3, 1, 0);
+	ctrl = v4l2_ctrl_new_std_menu(&inst->ctrl_handler, &vdec_ctrl_ops,
+				      V4L2_CID_MPEG_VIDEO_VP8_PROFILE,
+				      V4L2_MPEG_VIDEO_VP8_PROFILE_3,
+				      0, V4L2_MPEG_VIDEO_VP8_PROFILE_0);
 	if (ctrl)
 		ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE;
 
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index 11dafc7848c5..f7a87a3dbb46 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -223,7 +223,7 @@ static int venc_v4l2_to_hfi(int id, int value)
 		case V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC:
 			return HFI_H264_ENTROPY_CABAC;
 		}
-	case V4L2_CID_MPEG_VIDEO_VPX_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
 		switch (value) {
 		case 0:
 		default:
@@ -754,7 +754,7 @@ static int venc_set_properties(struct venus_inst *inst)
 		level = venc_v4l2_to_hfi(V4L2_CID_MPEG_VIDEO_H264_LEVEL,
 					 ctr->level.h264);
 	} else if (inst->fmt_cap->pixfmt == V4L2_PIX_FMT_VP8) {
-		profile = venc_v4l2_to_hfi(V4L2_CID_MPEG_VIDEO_VPX_PROFILE,
+		profile = venc_v4l2_to_hfi(V4L2_CID_MPEG_VIDEO_VP8_PROFILE,
 					   ctr->profile.vpx);
 		level = 0;
 	} else if (inst->fmt_cap->pixfmt == V4L2_PIX_FMT_MPEG4) {
diff --git a/drivers/media/platform/qcom/venus/venc_ctrls.c b/drivers/media/platform/qcom/venus/venc_ctrls.c
index 21e938a28662..459101728d26 100644
--- a/drivers/media/platform/qcom/venus/venc_ctrls.c
+++ b/drivers/media/platform/qcom/venus/venc_ctrls.c
@@ -101,7 +101,7 @@ static int venc_op_s_ctrl(struct v4l2_ctrl *ctrl)
 	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
 		ctr->profile.h264 = ctrl->val;
 		break;
-	case V4L2_CID_MPEG_VIDEO_VPX_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
 		ctr->profile.vpx = ctrl->val;
 		break;
 	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
@@ -248,6 +248,11 @@ int venc_ctrl_init(struct venus_inst *inst)
 		V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES,
 		0, V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE);
 
+	v4l2_ctrl_new_std_menu(&inst->ctrl_handler, &venc_ctrl_ops,
+		V4L2_CID_MPEG_VIDEO_VP8_PROFILE,
+		V4L2_MPEG_VIDEO_VP8_PROFILE_3,
+		0, V4L2_MPEG_VIDEO_VP8_PROFILE_0);
+
 	v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops,
 		V4L2_CID_MPEG_VIDEO_BITRATE, BITRATE_MIN, BITRATE_MAX,
 		BITRATE_STEP, BITRATE_DEFAULT);
@@ -256,9 +261,6 @@ int venc_ctrl_init(struct venus_inst *inst)
 		V4L2_CID_MPEG_VIDEO_BITRATE_PEAK, BITRATE_MIN, BITRATE_MAX,
 		BITRATE_STEP, BITRATE_DEFAULT_PEAK);
 
-	v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops,
-		V4L2_CID_MPEG_VIDEO_VPX_PROFILE, 0, 3, 1, 0);
-
 	v4l2_ctrl_new_std(&inst->ctrl_handler, &venc_ctrl_ops,
 		V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP, 1, 51, 1, 26);
 
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
index 570f391f2cfd..3ad4f5073002 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
@@ -692,12 +692,12 @@ static struct mfc_control controls[] = {
 		.default_value = 10,
 	},
 	{
-		.id = V4L2_CID_MPEG_VIDEO_VPX_PROFILE,
-		.type = V4L2_CTRL_TYPE_INTEGER,
-		.minimum = 0,
-		.maximum = 3,
-		.step = 1,
-		.default_value = 0,
+		.id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE,
+		.type = V4L2_CTRL_TYPE_MENU,
+		.minimum = V4L2_MPEG_VIDEO_VP8_PROFILE_0,
+		.maximum = V4L2_MPEG_VIDEO_VP8_PROFILE_3,
+		.default_value = V4L2_MPEG_VIDEO_VP8_PROFILE_0,
+		.menu_skip_mask = 0,
 	},
 	{
 		.id = V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP,
@@ -2057,7 +2057,7 @@ static int s5p_mfc_enc_s_ctrl(struct v4l2_ctrl *ctrl)
 	case V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP:
 		p->codec.vp8.rc_p_frame_qp = ctrl->val;
 		break;
-	case V4L2_CID_MPEG_VIDEO_VPX_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
 		p->codec.vp8.profile = ctrl->val;
 		break;
 	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP:
@@ -2711,4 +2711,3 @@ void s5p_mfc_enc_init(struct s5p_mfc_ctx *ctx)
 	f.fmt.pix_mp.pixelformat = DEF_DST_FMT_ENC;
 	ctx->dst_fmt = find_format(&f, MFC_FMT_ENC);
 }
-
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index d1087573da34..4a182a70de36 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -431,6 +431,13 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"Use Previous Specific Frame",
 		NULL,
 	};
+	static const char * const vp8_profile[] = {
+		"0",
+		"1",
+		"2",
+		"3",
+		NULL,
+	};
 
 	static const char * const flash_led_mode[] = {
 		"Off",
@@ -614,6 +621,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		return mpeg4_profile;
 	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
 		return vpx_golden_frame_sel;
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
+		return vp8_profile;
 	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:
 		return jpeg_chroma_subsampling;
 	case V4L2_CID_DV_TX_MODE:
@@ -839,7 +848,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_VPX_MAX_QP:			return "VPX Maximum QP Value";
 	case V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP:		return "VPX I-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP:		return "VPX P-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_VPX_PROFILE:			return "VPX Profile";
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:			return "VP8 Profile";
 
 	/* HEVC controls */
 	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP:		return "HEVC I-Frame QP Value";
@@ -1180,6 +1189,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_DEINTERLACING_MODE:
 	case V4L2_CID_TUNE_DEEMPHASIS:
 	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
 	case V4L2_CID_DETECT_MD_MODE:
 	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 8a75ad7899f3..ab96795b2829 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -587,7 +587,16 @@ enum v4l2_vp8_golden_frame_sel {
 #define V4L2_CID_MPEG_VIDEO_VPX_MAX_QP			(V4L2_CID_MPEG_BASE+508)
 #define V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP		(V4L2_CID_MPEG_BASE+509)
 #define V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP		(V4L2_CID_MPEG_BASE+510)
-#define V4L2_CID_MPEG_VIDEO_VPX_PROFILE			(V4L2_CID_MPEG_BASE+511)
+
+#define V4L2_CID_MPEG_VIDEO_VP8_PROFILE			(V4L2_CID_MPEG_BASE+511)
+enum v4l2_mpeg_video_vp8_profile {
+	V4L2_MPEG_VIDEO_VP8_PROFILE_0				= 0,
+	V4L2_MPEG_VIDEO_VP8_PROFILE_1				= 1,
+	V4L2_MPEG_VIDEO_VP8_PROFILE_2				= 2,
+	V4L2_MPEG_VIDEO_VP8_PROFILE_3				= 3,
+};
+/* Deprecated alias for compatibility reasons. */
+#define V4L2_CID_MPEG_VIDEO_VPX_PROFILE	V4L2_CID_MPEG_VIDEO_VP8_PROFILE
 
 /* CIDs for HEVC encoding. */
 
-- 
cgit v1.2.3


From 80b14dee2bea128928537d61c333f24cb8cbb62f Mon Sep 17 00:00:00 2001
From: Richard Cochran <rcochran@linutronix.de>
Date: Tue, 3 Jul 2018 15:42:48 -0700
Subject: net: Add a new socket option for a future transmit time.

This patch introduces SO_TXTIME. User space enables this option in
order to pass a desired future transmit time in a CMSG when calling
sendmsg(2). The argument to this socket option is a 8-bytes long struct
provided by the uapi header net_tstamp.h defined as:

struct sock_txtime {
	clockid_t 	clockid;
	u32		flags;
};

Note that new fields were added to struct sock by filling a 2-bytes
hole found in the struct. For that reason, neither the struct size or
number of cachelines were altered.

Signed-off-by: Richard Cochran <rcochran@linutronix.de>
Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h  |  3 +++
 arch/ia64/include/uapi/asm/socket.h   |  3 +++
 arch/mips/include/uapi/asm/socket.h   |  3 +++
 arch/parisc/include/uapi/asm/socket.h |  3 +++
 arch/s390/include/uapi/asm/socket.h   |  3 +++
 arch/sparc/include/uapi/asm/socket.h  |  3 +++
 arch/xtensa/include/uapi/asm/socket.h |  3 +++
 include/net/sock.h                    | 10 ++++++++++
 include/uapi/asm-generic/socket.h     |  3 +++
 include/uapi/linux/net_tstamp.h       | 15 +++++++++++++++
 net/core/sock.c                       | 35 +++++++++++++++++++++++++++++++++++
 11 files changed, 84 insertions(+)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index be14f16149d5..065fb372e355 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -112,4 +112,7 @@
 
 #define SO_ZEROCOPY		60
 
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 3efba40adc54..c872c4e6bafb 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -114,4 +114,7 @@
 
 #define SO_ZEROCOPY		60
 
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 49c3d4795963..71370fb3ceef 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -123,4 +123,7 @@
 
 #define SO_ZEROCOPY		60
 
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 1d0fdc3b5d22..061b9cf2a779 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -104,4 +104,7 @@
 
 #define SO_ZEROCOPY		0x4035
 
+#define SO_TXTIME		0x4036
+#define SCM_TXTIME		SO_TXTIME
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 3510c0fd06f4..39d901476ee5 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -111,4 +111,7 @@
 
 #define SO_ZEROCOPY		60
 
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index d58520c2e6ff..7ea35e5601b6 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -101,6 +101,9 @@
 
 #define SO_ZEROCOPY		0x003e
 
+#define SO_TXTIME		0x003f
+#define SCM_TXTIME		SO_TXTIME
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 75a07b8119a9..1de07a7f7680 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -116,4 +116,7 @@
 
 #define SO_ZEROCOPY		60
 
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 2ed99bfa4595..68347b9821c6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -319,6 +319,9 @@ struct sock_common {
   *	@sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
   *	@sk_reuseport_cb: reuseport group container
   *	@sk_rcu: used during RCU grace period
+  *	@sk_clockid: clockid used by time-based scheduling (SO_TXTIME)
+  *	@sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
+  *	@sk_txtime_unused: unused txtime flags
   */
 struct sock {
 	/*
@@ -475,6 +478,11 @@ struct sock {
 	u8			sk_shutdown;
 	u32			sk_tskey;
 	atomic_t		sk_zckey;
+
+	u8			sk_clockid;
+	u8			sk_txtime_deadline_mode : 1,
+				sk_txtime_unused : 7;
+
 	struct socket		*sk_socket;
 	void			*sk_user_data;
 #ifdef CONFIG_SECURITY
@@ -790,6 +798,7 @@ enum sock_flags {
 	SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
 	SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
+	SOCK_TXTIME,
 };
 
 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
@@ -1585,6 +1594,7 @@ void sock_kzfree_s(struct sock *sk, void *mem, int size);
 void sk_send_sigurg(struct sock *sk);
 
 struct sockcm_cookie {
+	u64 transmit_time;
 	u32 mark;
 	u16 tsflags;
 };
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 0ae758c90e54..a12692e5f7a8 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -107,4 +107,7 @@
 
 #define SO_ZEROCOPY		60
 
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 4fe104b2411f..c9a77c353b98 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -141,4 +141,19 @@ struct scm_ts_pktinfo {
 	__u32 reserved[2];
 };
 
+/*
+ * SO_TXTIME gets a struct sock_txtime with flags being an integer bit
+ * field comprised of these values.
+ */
+enum txtime_flags {
+	SOF_TXTIME_DEADLINE_MODE = (1 << 0),
+
+	SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_DEADLINE_MODE)
+};
+
+struct sock_txtime {
+	clockid_t       clockid;        /* reference clockid */
+	u32             flags;          /* flags defined by enum txtime_flags */
+};
+
 #endif /* _NET_TIMESTAMPING_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 6429982eb976..fe64b839f1b2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -91,6 +91,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <asm/unaligned.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/errqueue.h>
@@ -697,6 +698,7 @@ EXPORT_SYMBOL(sk_mc_loop);
 int sock_setsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, unsigned int optlen)
 {
+	struct sock_txtime sk_txtime;
 	struct sock *sk = sock->sk;
 	int val;
 	int valbool;
@@ -1070,6 +1072,24 @@ set_rcvbuf:
 		}
 		break;
 
+	case SO_TXTIME:
+		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+			ret = -EPERM;
+		} else if (optlen != sizeof(struct sock_txtime)) {
+			ret = -EINVAL;
+		} else if (copy_from_user(&sk_txtime, optval,
+			   sizeof(struct sock_txtime))) {
+			ret = -EFAULT;
+		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
+			ret = -EINVAL;
+		} else {
+			sock_valbool_flag(sk, SOCK_TXTIME, true);
+			sk->sk_clockid = sk_txtime.clockid;
+			sk->sk_txtime_deadline_mode =
+				!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
+		}
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1115,6 +1135,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		u64 val64;
 		struct linger ling;
 		struct timeval tm;
+		struct sock_txtime txtime;
 	} v;
 
 	int lv = sizeof(int);
@@ -1403,6 +1424,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sock_flag(sk, SOCK_ZEROCOPY);
 		break;
 
+	case SO_TXTIME:
+		lv = sizeof(v.txtime);
+		v.txtime.clockid = sk->sk_clockid;
+		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
+				  SOF_TXTIME_DEADLINE_MODE : 0;
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -2137,6 +2165,13 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
 		sockc->tsflags |= tsflags;
 		break;
+	case SCM_TXTIME:
+		if (!sock_flag(sk, SOCK_TXTIME))
+			return -EINVAL;
+		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
+			return -EINVAL;
+		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
+		break;
 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
 	case SCM_RIGHTS:
 	case SCM_CREDENTIALS:
-- 
cgit v1.2.3


From bc969a977880511057053642a81371196303ca01 Mon Sep 17 00:00:00 2001
From: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Date: Tue, 3 Jul 2018 15:42:49 -0700
Subject: net: ipv4: Hook into time based transmission

Add a transmit_time field to struct inet_cork, then copy the
timestamp from the CMSG cookie at ip_setup_cork() so we can
safely copy it into the skb later during __ip_make_skb().

For the raw fast path, just perform the copy at raw_send_hdrinc().

Signed-off-by: Richard Cochran <rcochran@linutronix.de>
Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 1 +
 net/ipv4/icmp.c         | 2 ++
 net/ipv4/ip_output.c    | 3 +++
 net/ipv4/ping.c         | 1 +
 net/ipv4/raw.c          | 2 ++
 net/ipv4/udp.c          | 1 +
 6 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 83d5b3c2ac42..314be484c696 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -148,6 +148,7 @@ struct inet_cork {
 	__s16			tos;
 	char			priority;
 	__u16			gso_size;
+	u64			transmit_time;
 };
 
 struct inet_cork_full {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1617604c9284..937239afd68d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -437,6 +437,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	ipc.tx_flags = 0;
 	ipc.ttl = 0;
 	ipc.tos = -1;
+	ipc.sockc.transmit_time = 0;
 
 	if (icmp_param->replyopts.opt.opt.optlen) {
 		ipc.opt = &icmp_param->replyopts.opt;
@@ -715,6 +716,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	ipc.tx_flags = 0;
 	ipc.ttl = 0;
 	ipc.tos = -1;
+	ipc.sockc.transmit_time = 0;
 
 	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 			       type, code, &icmp_param);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 188cc586e7ff..570e3ebc3974 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1154,6 +1154,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 	cork->tos = ipc->tos;
 	cork->priority = ipc->priority;
 	cork->tx_flags = ipc->tx_flags;
+	cork->transmit_time = ipc->sockc.transmit_time;
 
 	return 0;
 }
@@ -1414,6 +1415,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 
 	skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	skb->tstamp = cork->transmit_time;
 	/*
 	 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
 	 * on dst refcount
@@ -1551,6 +1553,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	ipc.tx_flags = 0;
 	ipc.ttl = 0;
 	ipc.tos = -1;
+	ipc.sockc.transmit_time = 0;
 
 	if (replyopts.opt.opt.optlen) {
 		ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2ed64bca54e3..b47492205507 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -746,6 +746,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	ipc.tx_flags = 0;
 	ipc.ttl = 0;
 	ipc.tos = -1;
+	ipc.sockc.transmit_time = 0;
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sk, msg, &ipc, false);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index abb3c9490c55..446af7be2b55 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -381,6 +381,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	skb->tstamp = sockc->transmit_time;
 	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
 
@@ -562,6 +563,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	}
 
 	ipc.sockc.tsflags = sk->sk_tsflags;
+	ipc.sockc.transmit_time = 0;
 	ipc.addr = inet->inet_saddr;
 	ipc.opt = NULL;
 	ipc.tx_flags = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24e116ddae79..5c76ba0666ec 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -930,6 +930,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	ipc.tx_flags = 0;
 	ipc.ttl = 0;
 	ipc.tos = -1;
+	ipc.sockc.transmit_time = 0;
 
 	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
 
-- 
cgit v1.2.3


From 860b642b9c33ea4a6ae2f416607b0b98a9d11bb0 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Tue, 3 Jul 2018 15:42:52 -0700
Subject: net/sched: Allow creating a Qdisc watchdog with other clocks

This adds 'qdisc_watchdog_init_clockid()' that allows a clockid to be
passed, this allows other time references to be used when scheduling
the Qdisc to run.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  2 ++
 net/sched/sch_api.c     | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 815b92a23936..2466ea143d01 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -72,6 +72,8 @@ struct qdisc_watchdog {
 	struct Qdisc	*qdisc;
 };
 
+void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
+				 clockid_t clockid);
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires);
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 54eca685420f..98541c6399db 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -596,12 +596,19 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
+				 clockid_t clockid)
 {
-	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
 	wd->timer.function = qdisc_watchdog;
 	wd->qdisc = qdisc;
 }
+EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
+
+void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+{
+	qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
+}
 EXPORT_SYMBOL(qdisc_watchdog_init);
 
 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
-- 
cgit v1.2.3


From 25db26a91364db00f5a30da2fea8e9afe14a163c Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Tue, 3 Jul 2018 15:42:53 -0700
Subject: net/sched: Introduce the ETF Qdisc

The ETF (Earliest TxTime First) qdisc uses the information added
earlier in this series (the socket option SO_TXTIME and the new
role of sk_buff->tstamp) to schedule packets transmission based
on absolute time.

For some workloads, just bandwidth enforcement is not enough, and
precise control of the transmission of packets is necessary.

Example:

$ tc qdisc replace dev enp2s0 parent root handle 100 mqprio num_tc 3 \
           map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 0

$ tc qdisc add dev enp2s0 parent 100:1 etf delta 100000 \
           clockid CLOCK_TAI

In this example, the Qdisc will provide SW best-effort for the control
of the transmission time to the network adapter, the time stamp in the
socket will be in reference to the clockid CLOCK_TAI and packets
will leave the qdisc "delta" (100000) nanoseconds before its transmission
time.

The ETF qdisc will buffer packets sorted by their txtime. It will drop
packets on enqueue() if their skbuff clockid does not match the clock
reference of the Qdisc. Moreover, on dequeue(), a packet will be dropped
if it expires while being enqueued.

The qdisc also supports the SO_TXTIME deadline mode. For this mode, it
will dequeue a packet as soon as possible and change the skb timestamp
to 'now' during etf_dequeue().

Note that both the qdisc's and the SO_TXTIME ABIs allow for a clockid
to be configured, but it's been decided that usage of CLOCK_TAI should
be enforced until we decide to allow for other clockids to be used.
The rationale here is that PTP times are usually in the TAI scale, thus
no other clocks should be necessary. For now, the qdisc will return
EINVAL if any clocks other than CLOCK_TAI are used.

Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      |   1 +
 include/uapi/linux/pkt_sched.h |  17 ++
 net/sched/Kconfig              |  11 ++
 net/sched/Makefile             |   1 +
 net/sched/sch_etf.c            | 384 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 414 insertions(+)
 create mode 100644 net/sched/sch_etf.c

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c1ef749b6f9f..f06ee8f91e74 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -798,6 +798,7 @@ enum tc_setup_type {
 	TC_SETUP_QDISC_RED,
 	TC_SETUP_QDISC_PRIO,
 	TC_SETUP_QDISC_MQ,
+	TC_SETUP_QDISC_ETF,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index bad3c03bcf43..d5e933ce1447 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -937,4 +937,21 @@ enum {
 
 #define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
 
+
+/* ETF */
+struct tc_etf_qopt {
+	__s32 delta;
+	__s32 clockid;
+	__u32 flags;
+#define TC_ETF_DEADLINE_MODE_ON	BIT(0)
+};
+
+enum {
+	TCA_ETF_UNSPEC,
+	TCA_ETF_PARMS,
+	__TCA_ETF_MAX,
+};
+
+#define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a01169fb5325..fcc89706745b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -183,6 +183,17 @@ config NET_SCH_CBS
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_cbs.
 
+config NET_SCH_ETF
+	tristate "Earliest TxTime First (ETF)"
+	help
+	  Say Y here if you want to use the Earliest TxTime First (ETF) packet
+	  scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_etf.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_etf.
+
 config NET_SCH_GRED
 	tristate "Generic Random Early Detection (GRED)"
 	---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8811d3804878..9a5a7077d217 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
 obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
+obj-$(CONFIG_NET_SCH_ETF)	+= sch_etf.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
new file mode 100644
index 000000000000..4b7f4903ac17
--- /dev/null
+++ b/net/sched/sch_etf.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
+ *
+ * Authors:	Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
+ *		Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/rbtree.h>
+#include <linux/skbuff.h>
+#include <linux/posix-timers.h>
+#include <net/netlink.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
+
+struct etf_sched_data {
+	bool deadline_mode;
+	int clockid;
+	int queue;
+	s32 delta; /* in ns */
+	ktime_t last; /* The txtime of the last skb sent to the netdevice. */
+	struct rb_root head;
+	struct qdisc_watchdog watchdog;
+	ktime_t (*get_time)(void);
+};
+
+static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
+	[TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) },
+};
+
+static inline int validate_input_params(struct tc_etf_qopt *qopt,
+					struct netlink_ext_ack *extack)
+{
+	/* Check if params comply to the following rules:
+	 *	* Clockid and delta must be valid.
+	 *
+	 *	* Dynamic clockids are not supported.
+	 *
+	 *	* Delta must be a positive integer.
+	 */
+	if (qopt->clockid < 0) {
+		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
+		return -ENOTSUPP;
+	}
+
+	if (qopt->clockid != CLOCK_TAI) {
+		NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
+		return -EINVAL;
+	}
+
+	if (qopt->delta < 0) {
+		NL_SET_ERR_MSG(extack, "Delta must be positive");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	ktime_t txtime = nskb->tstamp;
+	struct sock *sk = nskb->sk;
+	ktime_t now;
+
+	if (!sk)
+		return false;
+
+	if (!sock_flag(sk, SOCK_TXTIME))
+		return false;
+
+	/* We don't perform crosstimestamping.
+	 * Drop if packet's clockid differs from qdisc's.
+	 */
+	if (sk->sk_clockid != q->clockid)
+		return false;
+
+	if (sk->sk_txtime_deadline_mode != q->deadline_mode)
+		return false;
+
+	now = q->get_time();
+	if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
+		return false;
+
+	return true;
+}
+
+static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node *p;
+
+	p = rb_first(&q->head);
+	if (!p)
+		return NULL;
+
+	return rb_to_skb(p);
+}
+
+static void reset_watchdog(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb = etf_peek_timesortedlist(sch);
+	ktime_t next;
+
+	if (!skb)
+		return;
+
+	next = ktime_sub_ns(skb->tstamp, q->delta);
+	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
+}
+
+static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
+				      struct sk_buff **to_free)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node **p = &q->head.rb_node, *parent = NULL;
+	ktime_t txtime = nskb->tstamp;
+
+	if (!is_packet_valid(sch, nskb))
+		return qdisc_drop(nskb, sch, to_free);
+
+	while (*p) {
+		struct sk_buff *skb;
+
+		parent = *p;
+		skb = rb_to_skb(parent);
+		if (ktime_after(txtime, skb->tstamp))
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&nskb->rbnode, parent, p);
+	rb_insert_color(&nskb->rbnode, &q->head);
+
+	qdisc_qstats_backlog_inc(sch, nskb);
+	sch->q.qlen++;
+
+	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
+	reset_watchdog(sch);
+
+	return NET_XMIT_SUCCESS;
+}
+
+static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
+				 bool drop)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+
+	rb_erase(&skb->rbnode, &q->head);
+
+	/* The rbnode field in the skb re-uses these fields, now that
+	 * we are done with the rbnode, reset them.
+	 */
+	skb->next = NULL;
+	skb->prev = NULL;
+	skb->dev = qdisc_dev(sch);
+
+	qdisc_qstats_backlog_dec(sch, skb);
+
+	if (drop) {
+		struct sk_buff *to_free = NULL;
+
+		qdisc_drop(skb, sch, &to_free);
+		kfree_skb_list(to_free);
+		qdisc_qstats_overlimit(sch);
+	} else {
+		qdisc_bstats_update(sch, skb);
+
+		q->last = skb->tstamp;
+	}
+
+	sch->q.qlen--;
+}
+
+static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	ktime_t now, next;
+
+	skb = etf_peek_timesortedlist(sch);
+	if (!skb)
+		return NULL;
+
+	now = q->get_time();
+
+	/* Drop if packet has expired while in queue. */
+	/* FIXME: Must return error on the socket's error queue */
+	if (ktime_before(skb->tstamp, now)) {
+		timesortedlist_erase(sch, skb, true);
+		skb = NULL;
+		goto out;
+	}
+
+	/* When in deadline mode, dequeue as soon as possible and change the
+	 * txtime from deadline to (now + delta).
+	 */
+	if (q->deadline_mode) {
+		timesortedlist_erase(sch, skb, false);
+		skb->tstamp = now;
+		goto out;
+	}
+
+	next = ktime_sub_ns(skb->tstamp, q->delta);
+
+	/* Dequeue only if now is within the [txtime - delta, txtime] range. */
+	if (ktime_after(now, next))
+		timesortedlist_erase(sch, skb, false);
+	else
+		skb = NULL;
+
+out:
+	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
+	reset_watchdog(sch);
+
+	return skb;
+}
+
+static int etf_init(struct Qdisc *sch, struct nlattr *opt,
+		    struct netlink_ext_ack *extack)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct nlattr *tb[TCA_ETF_MAX + 1];
+	struct tc_etf_qopt *qopt;
+	int err;
+
+	if (!opt) {
+		NL_SET_ERR_MSG(extack,
+			       "Missing ETF qdisc options which are mandatory");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_ETF_PARMS]) {
+		NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
+		return -EINVAL;
+	}
+
+	qopt = nla_data(tb[TCA_ETF_PARMS]);
+
+	pr_debug("delta %d clockid %d deadline %s\n",
+		 qopt->delta, qopt->clockid,
+		 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
+
+	err = validate_input_params(qopt, extack);
+	if (err < 0)
+		return err;
+
+	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
+
+	/* Everything went OK, save the parameters used. */
+	q->delta = qopt->delta;
+	q->clockid = qopt->clockid;
+	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
+
+	switch (q->clockid) {
+	case CLOCK_REALTIME:
+		q->get_time = ktime_get_real;
+		break;
+	case CLOCK_MONOTONIC:
+		q->get_time = ktime_get;
+		break;
+	case CLOCK_BOOTTIME:
+		q->get_time = ktime_get_boottime;
+		break;
+	case CLOCK_TAI:
+		q->get_time = ktime_get_clocktai;
+		break;
+	default:
+		NL_SET_ERR_MSG(extack, "Clockid is not supported");
+		return -ENOTSUPP;
+	}
+
+	qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
+
+	return 0;
+}
+
+static void timesortedlist_clear(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node *p = rb_first(&q->head);
+
+	while (p) {
+		struct sk_buff *skb = rb_to_skb(p);
+
+		p = rb_next(p);
+
+		rb_erase(&skb->rbnode, &q->head);
+		rtnl_kfree_skbs(skb, skb);
+		sch->q.qlen--;
+	}
+}
+
+static void etf_reset(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+
+	/* Only cancel watchdog if it's been initialized. */
+	if (q->watchdog.qdisc == sch)
+		qdisc_watchdog_cancel(&q->watchdog);
+
+	/* No matter which mode we are on, it's safe to clear both lists. */
+	timesortedlist_clear(sch);
+	__qdisc_reset_queue(&sch->q);
+
+	sch->qstats.backlog = 0;
+	sch->q.qlen = 0;
+
+	q->last = 0;
+}
+
+static void etf_destroy(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+
+	/* Only cancel watchdog if it's been initialized. */
+	if (q->watchdog.qdisc == sch)
+		qdisc_watchdog_cancel(&q->watchdog);
+}
+
+static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct tc_etf_qopt opt = { };
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	opt.delta = q->delta;
+	opt.clockid = q->clockid;
+	if (q->deadline_mode)
+		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
+
+	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
+	.id		=	"etf",
+	.priv_size	=	sizeof(struct etf_sched_data),
+	.enqueue	=	etf_enqueue_timesortedlist,
+	.dequeue	=	etf_dequeue_timesortedlist,
+	.peek		=	etf_peek_timesortedlist,
+	.init		=	etf_init,
+	.reset		=	etf_reset,
+	.destroy	=	etf_destroy,
+	.dump		=	etf_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init etf_module_init(void)
+{
+	return register_qdisc(&etf_qdisc_ops);
+}
+
+static void __exit etf_module_exit(void)
+{
+	unregister_qdisc(&etf_qdisc_ops);
+}
+module_init(etf_module_init)
+module_exit(etf_module_exit)
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 88cab77162e86e0f6a2b7e4f859c1435c4e24feb Mon Sep 17 00:00:00 2001
From: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Date: Tue, 3 Jul 2018 15:42:54 -0700
Subject: net/sched: Add HW offloading capability to ETF

Add infra so etf qdisc supports HW offload of time-based transmission.

For hw offload, the time sorted list is still used, so packets are
dequeued always in order of txtime.

Example:

$ tc qdisc replace dev enp2s0 parent root handle 100 mqprio num_tc 3 \
           map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 0

$ tc qdisc add dev enp2s0 parent 100:1 etf offload delta 100000 \
	   clockid CLOCK_REALTIME

In this example, the Qdisc will use HW offload for the control of the
transmission time through the network adapter. The hrtimer used for
packets scheduling inside the qdisc will use the clockid CLOCK_REALTIME
as reference and packets leave the Qdisc "delta" (100000) nanoseconds
before their transmission time. Because this will be using HW offload and
since dynamic clocks are not supported by the hrtimer, the system clock
and the PHC clock must be synchronized for this mode to behave as
expected.

Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h        |  5 +++
 include/uapi/linux/pkt_sched.h |  1 +
 net/sched/sch_etf.c            | 71 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 76 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2466ea143d01..7dc769e5452b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -155,4 +155,9 @@ struct tc_cbs_qopt_offload {
 	s32 sendslope;
 };
 
+struct tc_etf_qopt_offload {
+	u8 enable;
+	s32 queue;
+};
+
 #endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d5e933ce1447..949118461009 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -944,6 +944,7 @@ struct tc_etf_qopt {
 	__s32 clockid;
 	__u32 flags;
 #define TC_ETF_DEADLINE_MODE_ON	BIT(0)
+#define TC_ETF_OFFLOAD_ON	BIT(1)
 };
 
 enum {
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 4b7f4903ac17..932a136db568 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -20,8 +20,10 @@
 #include <net/sock.h>
 
 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
+#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
 
 struct etf_sched_data {
+	bool offload;
 	bool deadline_mode;
 	int clockid;
 	int queue;
@@ -45,6 +47,9 @@ static inline int validate_input_params(struct tc_etf_qopt *qopt,
 	 *	* Dynamic clockids are not supported.
 	 *
 	 *	* Delta must be a positive integer.
+	 *
+	 * Also note that for the HW offload case, we must
+	 * expect that system clocks have been synchronized to PHC.
 	 */
 	if (qopt->clockid < 0) {
 		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
@@ -225,6 +230,56 @@ out:
 	return skb;
 }
 
+static void etf_disable_offload(struct net_device *dev,
+				struct etf_sched_data *q)
+{
+	struct tc_etf_qopt_offload etf = { };
+	const struct net_device_ops *ops;
+	int err;
+
+	if (!q->offload)
+		return;
+
+	ops = dev->netdev_ops;
+	if (!ops->ndo_setup_tc)
+		return;
+
+	etf.queue = q->queue;
+	etf.enable = 0;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
+	if (err < 0)
+		pr_warn("Couldn't disable ETF offload for queue %d\n",
+			etf.queue);
+}
+
+static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
+			      struct netlink_ext_ack *extack)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct tc_etf_qopt_offload etf = { };
+	int err;
+
+	if (q->offload)
+		return 0;
+
+	if (!ops->ndo_setup_tc) {
+		NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
+		return -EOPNOTSUPP;
+	}
+
+	etf.queue = q->queue;
+	etf.enable = 1;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
+		return err;
+	}
+
+	return 0;
+}
+
 static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 		    struct netlink_ext_ack *extack)
 {
@@ -251,8 +306,9 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 
 	qopt = nla_data(tb[TCA_ETF_PARMS]);
 
-	pr_debug("delta %d clockid %d deadline %s\n",
+	pr_debug("delta %d clockid %d offload %s deadline %s\n",
 		 qopt->delta, qopt->clockid,
+		 OFFLOAD_IS_ON(qopt) ? "on" : "off",
 		 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
 
 	err = validate_input_params(qopt, extack);
@@ -261,9 +317,16 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt,
 
 	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
 
+	if (OFFLOAD_IS_ON(qopt)) {
+		err = etf_enable_offload(dev, q, extack);
+		if (err < 0)
+			return err;
+	}
+
 	/* Everything went OK, save the parameters used. */
 	q->delta = qopt->delta;
 	q->clockid = qopt->clockid;
+	q->offload = OFFLOAD_IS_ON(qopt);
 	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
 
 	switch (q->clockid) {
@@ -326,10 +389,13 @@ static void etf_reset(struct Qdisc *sch)
 static void etf_destroy(struct Qdisc *sch)
 {
 	struct etf_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
 
 	/* Only cancel watchdog if it's been initialized. */
 	if (q->watchdog.qdisc == sch)
 		qdisc_watchdog_cancel(&q->watchdog);
+
+	etf_disable_offload(dev, q);
 }
 
 static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -344,6 +410,9 @@ static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
 
 	opt.delta = q->delta;
 	opt.clockid = q->clockid;
+	if (q->offload)
+		opt.flags |= TC_ETF_OFFLOAD_ON;
+
 	if (q->deadline_mode)
 		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
 
-- 
cgit v1.2.3


From 4b15c7075352668d4467ced7594b676707d11cae Mon Sep 17 00:00:00 2001
From: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Date: Tue, 3 Jul 2018 15:43:00 -0700
Subject: net/sched: Make etf report drops on error_queue

Use the socket error queue for reporting dropped packets if the
socket has enabled that feature through the SO_TXTIME API.

Packets are dropped either on enqueue() if they aren't accepted by the
qdisc or on dequeue() if the system misses their deadline. Those are
reported as different errors so applications can react accordingly.

Userspace can retrieve the errors through the socket error queue and the
corresponding cmsg interfaces. A struct sock_extended_err* is used for
returning the error data, and the packet's timestamp can be retrieved by
adding both ee_data and ee_info fields as e.g.:

    ((__u64) serr->ee_data << 32) + serr->ee_info

This feature is disabled by default and must be explicitly enabled by
applications. Enabling it can bring some overhead for the Tx cycles
of the application.

Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h              |  3 ++-
 include/uapi/linux/errqueue.h   |  4 ++++
 include/uapi/linux/net_tstamp.h |  5 ++++-
 net/core/sock.c                 |  4 ++++
 net/sched/sch_etf.c             | 35 +++++++++++++++++++++++++++++++++--
 5 files changed, 47 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 68347b9821c6..e0eac9ef44b5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -481,7 +481,8 @@ struct sock {
 
 	u8			sk_clockid;
 	u8			sk_txtime_deadline_mode : 1,
-				sk_txtime_unused : 7;
+				sk_txtime_report_errors : 1,
+				sk_txtime_unused : 6;
 
 	struct socket		*sk_socket;
 	void			*sk_user_data;
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index dc64cfaf13da..c0151200f7d1 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -20,12 +20,16 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_ICMP6	3
 #define SO_EE_ORIGIN_TXSTATUS	4
 #define SO_EE_ORIGIN_ZEROCOPY	5
+#define SO_EE_ORIGIN_TXTIME	6
 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
 #define SO_EE_CODE_ZEROCOPY_COPIED	1
 
+#define SO_EE_CODE_TXTIME_INVALID_PARAM	1
+#define SO_EE_CODE_TXTIME_MISSED	2
+
 /**
  *	struct scm_timestamping - timestamps exposed through cmsg
  *
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index c9a77c353b98..f8f4539f1135 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -147,8 +147,11 @@ struct scm_ts_pktinfo {
  */
 enum txtime_flags {
 	SOF_TXTIME_DEADLINE_MODE = (1 << 0),
+	SOF_TXTIME_REPORT_ERRORS = (1 << 1),
 
-	SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_DEADLINE_MODE)
+	SOF_TXTIME_FLAGS_LAST = SOF_TXTIME_REPORT_ERRORS,
+	SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
+				 SOF_TXTIME_FLAGS_LAST
 };
 
 struct sock_txtime {
diff --git a/net/core/sock.c b/net/core/sock.c
index fe64b839f1b2..03fdea5b0f57 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1087,6 +1087,8 @@ set_rcvbuf:
 			sk->sk_clockid = sk_txtime.clockid;
 			sk->sk_txtime_deadline_mode =
 				!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
+			sk->sk_txtime_report_errors =
+				!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
 		}
 		break;
 
@@ -1429,6 +1431,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.txtime.clockid = sk->sk_clockid;
 		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
 				  SOF_TXTIME_DEADLINE_MODE : 0;
+		v.txtime.flags |= sk->sk_txtime_report_errors ?
+				  SOF_TXTIME_REPORT_ERRORS : 0;
 		break;
 
 	default:
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 932a136db568..1538d6fa8165 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/errqueue.h>
 #include <linux/rbtree.h>
 #include <linux/skbuff.h>
 #include <linux/posix-timers.h>
@@ -123,6 +124,32 @@ static void reset_watchdog(struct Qdisc *sch)
 	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
 }
 
+static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
+{
+	struct sock_exterr_skb *serr;
+	struct sk_buff *clone;
+	ktime_t txtime = skb->tstamp;
+
+	if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
+		return;
+
+	clone = skb_clone(skb, GFP_ATOMIC);
+	if (!clone)
+		return;
+
+	serr = SKB_EXT_ERR(clone);
+	serr->ee.ee_errno = err;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
+	serr->ee.ee_type = 0;
+	serr->ee.ee_code = code;
+	serr->ee.ee_pad = 0;
+	serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
+	serr->ee.ee_info = txtime; /* low part of tstamp */
+
+	if (sock_queue_err_skb(skb->sk, clone))
+		kfree_skb(clone);
+}
+
 static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
 				      struct sk_buff **to_free)
 {
@@ -130,8 +157,11 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
 	struct rb_node **p = &q->head.rb_node, *parent = NULL;
 	ktime_t txtime = nskb->tstamp;
 
-	if (!is_packet_valid(sch, nskb))
+	if (!is_packet_valid(sch, nskb)) {
+		report_sock_error(nskb, EINVAL,
+				  SO_EE_CODE_TXTIME_INVALID_PARAM);
 		return qdisc_drop(nskb, sch, to_free);
+	}
 
 	while (*p) {
 		struct sk_buff *skb;
@@ -174,6 +204,8 @@ static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
 	if (drop) {
 		struct sk_buff *to_free = NULL;
 
+		report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
+
 		qdisc_drop(skb, sch, &to_free);
 		kfree_skb_list(to_free);
 		qdisc_qstats_overlimit(sch);
@@ -199,7 +231,6 @@ static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
 	now = q->get_time();
 
 	/* Drop if packet has expired while in queue. */
-	/* FIXME: Must return error on the socket's error queue */
 	if (ktime_before(skb->tstamp, now)) {
 		timesortedlist_erase(sch, skb, true);
 		skb = NULL;
-- 
cgit v1.2.3


From 2a75364d09b05f257f4cd1f718e06e0247eb1dd3 Mon Sep 17 00:00:00 2001
From: Keiichi Watanabe <keiichiw@chromium.org>
Date: Mon, 18 Jun 2018 03:58:53 -0400
Subject: media: v4l2-ctrl: Add control for VP9 profile

Add a new control V4L2_CID_MPEG_VIDEO_VP9_PROFILE for VP9 profiles. This control
allows selecting the desired profile for VP9 encoder and querying for supported
profiles by VP9 encoder/decoder.

Though this control is similar to V4L2_CID_MPEG_VIDEO_VP8_PROFILE, we need to
separate this control from it because supported profiles usually differ between
VP8 and VP9.

Signed-off-by: Keiichi Watanabe <keiichiw@chromium.org>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/extended-controls.rst | 23 ++++++++++++++++++++++
 drivers/media/v4l2-core/v4l2-ctrls.c               | 11 +++++++++++
 include/uapi/linux/v4l2-controls.h                 |  7 +++++++
 3 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index 01ef31a934b4..9f7312bf3365 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -1978,6 +1978,29 @@ enum v4l2_mpeg_video_vp8_profile -
     * - ``V4L2_MPEG_VIDEO_VP8_PROFILE_3``
       - Profile 3
 
+.. _v4l2-mpeg-video-vp9-profile:
+
+``V4L2_CID_MPEG_VIDEO_VP9_PROFILE``
+    (enum)
+
+enum v4l2_mpeg_video_vp9_profile -
+    This control allows selecting the profile for VP9 encoder.
+    This is also used to enumerate supported profiles by VP9 encoder or decoder.
+    Possible values are:
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    * - ``V4L2_MPEG_VIDEO_VP9_PROFILE_0``
+      - Profile 0
+    * - ``V4L2_MPEG_VIDEO_VP9_PROFILE_1``
+      - Profile 1
+    * - ``V4L2_MPEG_VIDEO_VP9_PROFILE_2``
+      - Profile 2
+    * - ``V4L2_MPEG_VIDEO_VP9_PROFILE_3``
+      - Profile 3
+
 
 High Efficiency Video Coding (HEVC/H.265) Control Reference
 -----------------------------------------------------------
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 4a182a70de36..599c1cbff3b9 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -438,6 +438,13 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"3",
 		NULL,
 	};
+	static const char * const vp9_profile[] = {
+		"0",
+		"1",
+		"2",
+		"3",
+		NULL,
+	};
 
 	static const char * const flash_led_mode[] = {
 		"Off",
@@ -623,6 +630,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		return vpx_golden_frame_sel;
 	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
 		return vp8_profile;
+	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:
+		return vp9_profile;
 	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:
 		return jpeg_chroma_subsampling;
 	case V4L2_CID_DV_TX_MODE:
@@ -849,6 +858,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP:		return "VPX I-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP:		return "VPX P-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:			return "VP8 Profile";
+	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:			return "VP9 Profile";
 
 	/* HEVC controls */
 	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP:		return "HEVC I-Frame QP Value";
@@ -1190,6 +1200,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_TUNE_DEEMPHASIS:
 	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
 	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:
 	case V4L2_CID_DETECT_MD_MODE:
 	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index ab96795b2829..e4ee10ee917d 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -597,6 +597,13 @@ enum v4l2_mpeg_video_vp8_profile {
 };
 /* Deprecated alias for compatibility reasons. */
 #define V4L2_CID_MPEG_VIDEO_VPX_PROFILE	V4L2_CID_MPEG_VIDEO_VP8_PROFILE
+#define V4L2_CID_MPEG_VIDEO_VP9_PROFILE			(V4L2_CID_MPEG_BASE+512)
+enum v4l2_mpeg_video_vp9_profile {
+	V4L2_MPEG_VIDEO_VP9_PROFILE_0				= 0,
+	V4L2_MPEG_VIDEO_VP9_PROFILE_1				= 1,
+	V4L2_MPEG_VIDEO_VP9_PROFILE_2				= 2,
+	V4L2_MPEG_VIDEO_VP9_PROFILE_3				= 3,
+};
 
 /* CIDs for HEVC encoding. */
 
-- 
cgit v1.2.3


From b5cb15d9372abc9adc4e844c0c1bf594ca6a7695 Mon Sep 17 00:00:00 2001
From: Chris von Recklinghausen <crecklin@redhat.com>
Date: Tue, 3 Jul 2018 15:43:08 -0400
Subject: usercopy: Allow boot cmdline disabling of hardening

Enabling HARDENED_USERCOPY may cause measurable regressions in networking
performance: up to 8% under UDP flood.

I ran a small packet UDP flood using pktgen vs. a host b2b connected. On
the receiver side the UDP packets are processed by a simple user space
process that just reads and drops them:

https://github.com/netoptimizer/network-testing/blob/master/src/udp_sink.c

Not very useful from a functional PoV, but it helps to pin-point
bottlenecks in the networking stack.

When running a kernel with CONFIG_HARDENED_USERCOPY=y, I see a 5-8%
regression in the receive tput, compared to the same kernel without this
option enabled.

With CONFIG_HARDENED_USERCOPY=y, perf shows ~6% of CPU time spent
cumulatively in __check_object_size (~4%) and __virt_addr_valid (~2%).

The call-chain is:

__GI___libc_recvfrom
entry_SYSCALL_64_after_hwframe
do_syscall_64
__x64_sys_recvfrom
__sys_recvfrom
inet_recvmsg
udp_recvmsg
__check_object_size

udp_recvmsg() actually calls copy_to_iter() (inlined) and the latters
calls check_copy_size() (again, inlined).

A generic distro may want to enable HARDENED_USERCOPY in their default
kernel config, but at the same time, such distro may want to be able to
avoid the performance penalties in with the default configuration and
disable the stricter check on a per-boot basis.

This change adds a boot parameter that conditionally disables
HARDENED_USERCOPY via "hardened_usercopy=off".

Signed-off-by: Chris von Recklinghausen <crecklin@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 11 +++++++++++
 include/linux/jump_label.h                      |  6 ++++++
 mm/usercopy.c                                   | 25 +++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index efc7aa7a0670..560d4dc66f02 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -816,6 +816,17 @@
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
+	hardened_usercopy=
+                        [KNL] Under CONFIG_HARDENED_USERCOPY, whether
+                        hardening is enabled for this boot. Hardened
+                        usercopy checking is used to protect the kernel
+                        from reading or writing beyond known memory
+                        allocation boundaries as a proactive defense
+                        against bounds-checking flaws in the kernel's
+                        copy_to_user()/copy_from_user() interface.
+                on      Perform hardened usercopy checks (default).
+                off     Disable hardened usercopy checks.
+
 	disable_radix	[PPC]
 			Disable RADIX MMU mode on POWER9
 
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index b46b541c67c4..1a0b6f17a5d6 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -299,12 +299,18 @@ struct static_key_false {
 #define DEFINE_STATIC_KEY_TRUE(name)	\
 	struct static_key_true name = STATIC_KEY_TRUE_INIT
 
+#define DEFINE_STATIC_KEY_TRUE_RO(name)	\
+	struct static_key_true name __ro_after_init = STATIC_KEY_TRUE_INIT
+
 #define DECLARE_STATIC_KEY_TRUE(name)	\
 	extern struct static_key_true name
 
 #define DEFINE_STATIC_KEY_FALSE(name)	\
 	struct static_key_false name = STATIC_KEY_FALSE_INIT
 
+#define DEFINE_STATIC_KEY_FALSE_RO(name)	\
+	struct static_key_false name __ro_after_init = STATIC_KEY_FALSE_INIT
+
 #define DECLARE_STATIC_KEY_FALSE(name)	\
 	extern struct static_key_false name
 
diff --git a/mm/usercopy.c b/mm/usercopy.c
index e9e9325f7638..852eb4e53f06 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -20,6 +20,8 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
+#include <linux/atomic.h>
+#include <linux/jump_label.h>
 #include <asm/sections.h>
 
 /*
@@ -240,6 +242,8 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
 	}
 }
 
+static DEFINE_STATIC_KEY_FALSE_RO(bypass_usercopy_checks);
+
 /*
  * Validates that the given object is:
  * - not bogus address
@@ -248,6 +252,9 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
  */
 void __check_object_size(const void *ptr, unsigned long n, bool to_user)
 {
+	if (static_branch_unlikely(&bypass_usercopy_checks))
+		return;
+
 	/* Skip all tests if size is zero. */
 	if (!n)
 		return;
@@ -279,3 +286,21 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user)
 	check_kernel_text_object((const unsigned long)ptr, n, to_user);
 }
 EXPORT_SYMBOL(__check_object_size);
+
+static bool enable_checks __initdata = true;
+
+static int __init parse_hardened_usercopy(char *str)
+{
+	return strtobool(str, &enable_checks);
+}
+
+__setup("hardened_usercopy=", parse_hardened_usercopy);
+
+static int __init set_hardened_usercopy(void)
+{
+	if (enable_checks == false)
+		static_branch_enable(&bypass_usercopy_checks);
+	return 1;
+}
+
+late_initcall(set_hardened_usercopy);
-- 
cgit v1.2.3


From 4d4fb5dc988a36307711be292bde6e39b8bdbceb Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Tue, 19 Jun 2018 08:47:22 +0300
Subject: net/mlx5: Limit scope of dump_fill_mkey function

mlx5_core_dump_fill_mkey() is going to be used in next
patch in IB and doesn't need to be visible to whole
mlx5_core. Move that command to mlx5_ib.

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cmd.c             | 15 +++++++++++++++
 drivers/infiniband/hw/mlx5/cmd.h             |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 17 -----------------
 include/linux/mlx5/driver.h                  |  2 --
 4 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 188512bf46e6..ccc0b5d06a7d 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -32,6 +32,21 @@
 
 #include "cmd.h"
 
+int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey)
+{
+	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {0};
+	int err;
+
+	MLX5_SET(query_special_contexts_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*mkey = MLX5_GET(query_special_contexts_out, out,
+				 dump_fill_mkey);
+	return err;
+}
+
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
 {
 	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index e7206c8a8011..98ea4648c655 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/mlx5/driver.h>
 
+int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
 			       void *out, int out_size);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index f4f02f775c93..0670165afd5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -146,23 +146,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 }
 EXPORT_SYMBOL(mlx5_core_query_mkey);
 
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
-			     u32 *mkey)
-{
-	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {0};
-	int err;
-
-	MLX5_SET(query_special_contexts_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (!err)
-		*mkey = MLX5_GET(query_special_contexts_out, out,
-				 dump_fill_mkey);
-	return err;
-}
-EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
-
 static inline u32 mlx5_get_psv(u32 *out, int psv_index)
 {
 	switch (psv_index) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 80cbb7fdce4a..1cb1c0317b77 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1067,8 +1067,6 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 			   struct mlx5_core_mkey *mkey);
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 			 u32 *out, int outlen);
-int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
-			     u32 *mkey);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
-- 
cgit v1.2.3


From b183ee27f5fb07c8428e2fe45d5f35dac611c45d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 19 Jun 2018 08:47:23 +0300
Subject: net/mlx5: Add hardware definitions for dump_fill_mkey

MLX5 IB HCA offers the memory key, dump_fill_mkey to boost
performance by forcing local HCA operations to skip the PCI bus
access,

This patch adds needed hardware definitions.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index deb3a459a22e..1853e7fd6924 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -885,7 +885,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_eq_sz[0x8];
 	u8         reserved_at_e8[0x2];
 	u8         log_max_mkey[0x6];
-	u8         reserved_at_f0[0xc];
+	u8         reserved_at_f0[0x8];
+	u8         dump_fill_mkey[0x1];
+	u8         reserved_at_f9[0x3];
 	u8         log_max_eq[0x4];
 
 	u8         max_indirection[0x8];
-- 
cgit v1.2.3


From 25bb36e75d7d62dc14ae2306dca38d672e0c3fa0 Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Tue, 19 Jun 2018 08:47:24 +0300
Subject: IB/mlx5: Expose dump and fill memory key

MLX5 IB HCA offers the memory key, dump_fill_mkey to boost
performance, when used in a send or receive operations.

It is used to force local HCA operations to skip the PCI bus access,
while keeping track of the processed length in the ibv_sge handling.

Meaning, instead of a PCI write access the HCA leaves the target
memory untouched, and skips filling that packet section. Similar
behavior is done upon send, the HCA skips data in memory relevant
to this key and saves PCI bus access.

This functionality saves PCI read/write operations.

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 16 ++++++++++++++++
 include/uapi/rdma/mlx5-abi.h      |  3 ++-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e52dd21519b4..bd402691c2c0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1660,6 +1660,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	int err;
 	size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
 				     max_cqe_version);
+	u32 dump_fill_mkey;
 	bool lib_uar_4k;
 
 	if (!dev->ib_active)
@@ -1761,6 +1762,12 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 			goto out_uars;
 	}
 
+	if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+		err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
+		if (err)
+			goto out_td;
+	}
+
 	INIT_LIST_HEAD(&context->vma_private_list);
 	mutex_init(&context->vma_private_list_mutex);
 	INIT_LIST_HEAD(&context->db_page_list);
@@ -1819,6 +1826,15 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 		resp.response_length += sizeof(resp.num_dyn_bfregs);
 	}
 
+	if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
+		if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+			resp.dump_fill_mkey = dump_fill_mkey;
+			resp.comp_mask |=
+				MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+		}
+		resp.response_length += sizeof(resp.dump_fill_mkey);
+	}
+
 	err = ib_copy_to_udata(udata, &resp, resp.response_length);
 	if (err)
 		goto out_td;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 8daec1fa49cf..1f7b7b6bddf0 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -90,6 +90,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
 
 enum mlx5_ib_alloc_ucontext_resp_mask {
 	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
+	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY    = 1UL << 1,
 };
 
 enum mlx5_user_cmds_supp_uhw {
@@ -138,7 +139,7 @@ struct mlx5_ib_alloc_ucontext_resp {
 	__u32	log_uar_size;
 	__u32	num_uars_per_page;
 	__u32	num_dyn_bfregs;
-	__u32	reserved3;
+	__u32	dump_fill_mkey;
 };
 
 struct mlx5_ib_alloc_pd_resp {
-- 
cgit v1.2.3


From 785a19f9d1dd8a4ab2d0633be4656653bd3de1fc Mon Sep 17 00:00:00 2001
From: Chintan Pandya <cpandya@codeaurora.org>
Date: Wed, 27 Jun 2018 08:13:47 -0600
Subject: ioremap: Update pgtable free interfaces with addr

The following kernel panic was observed on ARM64 platform due to a stale
TLB entry.

 1. ioremap with 4K size, a valid pte page table is set.
 2. iounmap it, its pte entry is set to 0.
 3. ioremap the same address with 2M size, update its pmd entry with
    a new value.
 4. CPU may hit an exception because the old pmd entry is still in TLB,
    which leads to a kernel panic.

Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page
table") has addressed this panic by falling to pte mappings in the above
case on ARM64.

To support pmd mappings in all cases, TLB purge needs to be performed
in this case on ARM64.

Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page()
so that TLB purge can be added later in seprate patches.

[toshi.kani@hpe.com: merge changes, rewrite patch description]
Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces")
Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: mhocko@suse.com
Cc: akpm@linux-foundation.org
Cc: hpa@zytor.com
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Will Deacon <will.deacon@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: stable@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180627141348.21777-3-toshi.kani@hpe.com
---
 arch/arm64/mm/mmu.c           |  4 ++--
 arch/x86/mm/pgtable.c         | 12 +++++++-----
 include/asm-generic/pgtable.h |  8 ++++----
 lib/ioremap.c                 |  4 ++--
 4 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 493ff75670ff..8ae5d7ae4af3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp)
 	return 1;
 }
 
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return pud_none(*pud);
 }
 
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return pmd_none(*pmd);
 }
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1aeb7a5dbce5..fbd14e506758 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -723,11 +723,12 @@ int pmd_clear_huge(pmd_t *pmd)
 /**
  * pud_free_pmd_page - Clear pud entry and free pmd page.
  * @pud: Pointer to a PUD.
+ * @addr: Virtual address associated with pud.
  *
  * Context: The pud range has been unmaped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	pmd_t *pmd;
 	int i;
@@ -738,7 +739,7 @@ int pud_free_pmd_page(pud_t *pud)
 	pmd = (pmd_t *)pud_page_vaddr(*pud);
 
 	for (i = 0; i < PTRS_PER_PMD; i++)
-		if (!pmd_free_pte_page(&pmd[i]))
+		if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE)))
 			return 0;
 
 	pud_clear(pud);
@@ -750,11 +751,12 @@ int pud_free_pmd_page(pud_t *pud)
 /**
  * pmd_free_pte_page - Clear pmd entry and free pte page.
  * @pmd: Pointer to a PMD.
+ * @addr: Virtual address associated with pmd.
  *
  * Context: The pmd range has been unmaped and TLB purged.
  * Return: 1 if clearing the entry succeeded. 0 otherwise.
  */
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	pte_t *pte;
 
@@ -770,7 +772,7 @@ int pmd_free_pte_page(pmd_t *pmd)
 
 #else /* !CONFIG_X86_64 */
 
-int pud_free_pmd_page(pud_t *pud)
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return pud_none(*pud);
 }
@@ -779,7 +781,7 @@ int pud_free_pmd_page(pud_t *pud)
  * Disable free page handling on x86-PAE. This assures that ioremap()
  * does not update sync'd pmd entries. See vmalloc_sync_one().
  */
-int pmd_free_pte_page(pmd_t *pmd)
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return pmd_none(*pmd);
 }
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f59639afaa39..b081794ba135 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1019,8 +1019,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
 int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
 int pud_clear_huge(pud_t *pud);
 int pmd_clear_huge(pmd_t *pmd);
-int pud_free_pmd_page(pud_t *pud);
-int pmd_free_pte_page(pmd_t *pmd);
+int pud_free_pmd_page(pud_t *pud, unsigned long addr);
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr);
 #else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
 {
@@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 {
 	return 0;
 }
-static inline int pud_free_pmd_page(pud_t *pud)
+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
 {
 	return 0;
 }
-static inline int pmd_free_pte_page(pmd_t *pmd)
+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
 {
 	return 0;
 }
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 54e5bbaa3200..517f5853ffed 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 		if (ioremap_pmd_enabled() &&
 		    ((next - addr) == PMD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
-		    pmd_free_pte_page(pmd)) {
+		    pmd_free_pte_page(pmd, addr)) {
 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
 				continue;
 		}
@@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 		if (ioremap_pud_enabled() &&
 		    ((next - addr) == PUD_SIZE) &&
 		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
-		    pud_free_pmd_page(pud)) {
+		    pud_free_pmd_page(pud, addr)) {
 			if (pud_set_huge(pud, phys_addr + addr, prot))
 				continue;
 		}
-- 
cgit v1.2.3


From 87fc2a620a398e970872064841b0db7cc6d0149f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:23 +0300
Subject: RDMA/uverbs: Store the specs_root in the struct ib_uverbs_device

The specs are required to operate the uverbs file, so they belong inside
the ib_uverbs_device, not inside the ib_device. The spec passed in the
ib_device is just a communication from the driver and should not be used
during runtime.

This also changes the lifetime of the spec memory to match the
ib_uverbs_device, however at this time the spec_root can still contain
driver pointers after disassociation, so it cannot be used if ib_dev is
NULL. This is preparation for another series.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c    |  4 ++--
 drivers/infiniband/core/rdma_core.h    |  2 +-
 drivers/infiniband/core/uverbs_ioctl.c | 26 ++++++++++++--------------
 drivers/infiniband/core/uverbs_main.c  | 16 +++++++++-------
 drivers/infiniband/hw/mlx5/main.c      |  6 +++---
 include/rdma/ib_verbs.h                |  2 +-
 6 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 2ddf1c716ba8..c67bcdda5760 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -52,10 +52,10 @@ int uverbs_ns_idx(u16 *id, unsigned int ns_count)
 	return ret;
 }
 
-const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
+const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
 						   uint16_t object)
 {
-	const struct uverbs_root_spec *object_hash = ibdev->specs_root;
+	const struct uverbs_root_spec *object_hash = ufile->device->specs_root;
 	const struct uverbs_object_spec_hash *objects;
 	int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
 
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index a243cc2a59f7..8cede4546b25 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -44,7 +44,7 @@
 #include <linux/mutex.h>
 
 int uverbs_ns_idx(u16 *id, unsigned int ns_count);
-const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
+const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
 						   uint16_t object);
 const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
 						   uint16_t method);
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 03065bad8dae..785975a4e3dd 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -46,8 +46,7 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
 			   0, uattr->len - len);
 }
 
-static int uverbs_process_attr(struct ib_device *ibdev,
-			       struct ib_ucontext *ucontext,
+static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 			       const struct ib_uverbs_attr *uattr,
 			       u16 attr_id,
 			       const struct uverbs_attr_spec_hash *attr_spec_bucket,
@@ -145,17 +144,18 @@ static int uverbs_process_attr(struct ib_device *ibdev,
 		if (uattr->attr_data.reserved)
 			return -EINVAL;
 
-		if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
+		if (uattr->len != 0 || !ufile->ucontext ||
+		    uattr->data > INT_MAX)
 			return -EINVAL;
 
 		o_attr = &e->obj_attr;
-		object = uverbs_get_object(ibdev, spec->obj.obj_type);
+		object = uverbs_get_object(ufile, spec->obj.obj_type);
 		if (!object)
 			return -EINVAL;
 
 		o_attr->uobject = uverbs_get_uobject_from_context(
 					object->type_attrs,
-					ucontext,
+					ufile->ucontext,
 					spec->obj.access,
 					(int)uattr->data);
 
@@ -230,8 +230,7 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle,
 	return ret;
 }
 
-static int uverbs_uattrs_process(struct ib_device *ibdev,
-				 struct ib_ucontext *ucontext,
+static int uverbs_uattrs_process(struct ib_uverbs_file *ufile,
 				 const struct ib_uverbs_attr *uattrs,
 				 size_t num_uattrs,
 				 const struct uverbs_method_spec *method,
@@ -267,9 +266,9 @@ static int uverbs_uattrs_process(struct ib_device *ibdev,
 			num_given_buckets = ret + 1;
 
 		attr_spec_bucket = method->attr_buckets[ret];
-		ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id,
-					  attr_spec_bucket, &attr_bundle->hash[ret],
-					  uattr_ptr++);
+		ret = uverbs_process_attr(ufile, uattr, attr_id,
+					  attr_spec_bucket,
+					  &attr_bundle->hash[ret], uattr_ptr++);
 		if (ret) {
 			uverbs_finalize_attrs(attr_bundle,
 					      method->attr_buckets,
@@ -324,9 +323,8 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
 	int finalize_ret;
 	int num_given_buckets;
 
-	num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs,
-						  num_uattrs, method_spec,
-						  attr_bundle, uattr_ptr);
+	num_given_buckets = uverbs_uattrs_process(
+		ufile, uattrs, num_uattrs, method_spec, attr_bundle, uattr_ptr);
 	if (num_given_buckets <= 0)
 		return -EINVAL;
 
@@ -367,7 +365,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 	if (hdr->driver_id != ib_dev->driver_id)
 		return -EINVAL;
 
-	object_spec = uverbs_get_object(ib_dev, hdr->object_id);
+	object_spec = uverbs_get_object(file, hdr->object_id);
 	if (!object_spec)
 		return -EPROTONOSUPPORT;
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index f5f4bfb59705..c05ce5ae5415 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -161,6 +161,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj)
 		container_of(kobj, struct ib_uverbs_device, kobj);
 
 	cleanup_srcu_struct(&dev->disassociate_srcu);
+	uverbs_free_spec_tree(dev->specs_root);
 	kfree(dev);
 }
 
@@ -1067,7 +1068,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
 		goto err_class;
 
-	if (!device->specs_root) {
+	if (!device->driver_specs_root) {
 		const struct uverbs_object_tree_def *default_root[] = {
 			uverbs_default_get_objects()};
 
@@ -1075,8 +1076,13 @@ static void ib_uverbs_add_one(struct ib_device *device)
 								default_root);
 		if (IS_ERR(uverbs_dev->specs_root))
 			goto err_class;
-
-		device->specs_root = uverbs_dev->specs_root;
+	} else {
+		uverbs_dev->specs_root = device->driver_specs_root;
+		/*
+		 * Take responsibility to free the specs allocated by the
+		 * driver.
+		 */
+		device->driver_specs_root = NULL;
 	}
 
 	ib_set_client_data(device, &uverbs_client, uverbs_dev);
@@ -1241,10 +1247,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
 		ib_uverbs_comp_dev(uverbs_dev);
 	if (wait_clients)
 		wait_for_completion(&uverbs_dev->comp);
-	if (uverbs_dev->specs_root) {
-		uverbs_free_spec_tree(uverbs_dev->specs_root);
-		device->specs_root = NULL;
-	}
 
 	kobject_put(&uverbs_dev->kobj);
 }
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d3867286606c..0fb80777aade 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5350,15 +5350,15 @@ static int populate_specs_root(struct mlx5_ib_dev *dev)
 	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
 		default_root[num_trees++] = mlx5_ib_get_devx_tree();
 
-	dev->ib_dev.specs_root =
+	dev->ib_dev.driver_specs_root =
 		uverbs_alloc_spec_tree(num_trees, default_root);
 
-	return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root);
+	return PTR_ERR_OR_ZERO(dev->ib_dev.driver_specs_root);
 }
 
 static void depopulate_specs_root(struct mlx5_ib_dev *dev)
 {
-	uverbs_free_spec_tree(dev->ib_dev.specs_root);
+	uverbs_free_spec_tree(dev->ib_dev.driver_specs_root);
 }
 
 static int mlx5_ib_read_counters(struct ib_counters *counters,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e1130c6c1377..8784d5bfc252 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2602,7 +2602,7 @@ struct ib_device {
 	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
 						     int comp_vector);
 
-	struct uverbs_root_spec		*specs_root;
+	struct uverbs_root_spec		*driver_specs_root;
 	enum rdma_driver_id		driver_id;
 };
 
-- 
cgit v1.2.3


From ad544cfe54cbd1b0b68c620a371ebcde6a3264eb Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:24 +0300
Subject: RDMA/uverbs: Split UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE

Two methods are sharing the same attribute constant, but the attribute
definitions are not the same. This should not have been done, instead
split them into two attributes with the same number.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_std_types_flow_action.c | 12 ++++++++----
 include/uapi/rdma/ib_user_ioctl_cmds.h                 |  7 ++++++-
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index c1875657bc99..afbb19000503 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -321,7 +321,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
 		return ret;
 
 	/* No need to check as this attribute is marked as MANDATORY */
-	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
+	uobj = uverbs_attr_get_uobject(
+		attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
 	action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
 	if (IS_ERR(action))
 		return PTR_ERR(action);
@@ -351,7 +352,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
 	if (ret)
 		return ret;
 
-	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
+	uobj = uverbs_attr_get_uobject(
+		attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
 	action = uobj->object;
 
 	if (action->type != IB_FLOW_ACTION_ESP)
@@ -390,7 +392,8 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
 };
 
 static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+			 UVERBS_OBJECT_FLOW_ACTION,
 			 UVERBS_ACCESS_NEW,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
 	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
@@ -407,7 +410,8 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
 			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
 
 static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+	&UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
+			 UVERBS_OBJECT_FLOW_ACTION,
 			 UVERBS_ACCESS_WRITE,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
 	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 888ac5975a6c..2c881aaf05c2 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -79,7 +79,7 @@ enum uverbs_attrs_destroy_cq_cmd_attr_ids {
 };
 
 enum uverbs_attrs_create_flow_action_esp {
-	UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE,
+	UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
 	UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
 	UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
 	UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
@@ -87,6 +87,11 @@ enum uverbs_attrs_create_flow_action_esp {
 	UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
 };
 
+enum uverbs_attrs_modify_flow_action_esp {
+	UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE =
+		UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+};
+
 enum uverbs_attrs_destroy_flow_action_esp {
 	UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
 };
-- 
cgit v1.2.3


From d108dac08085b6fe3947df9625c76fc9f66c1bbb Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:25 +0300
Subject: RDMA/uverbs: Simplify UVERBS_ATTR family of macros

Instead of using a complex cascade of macros, just directly provide the
initializer list each of the declarations is trying to create.

Now that the macros are simplified this also reworks the uverbs_attr_spec
to be friendly to older compilers by eliminating any unnamed
structures/unions inside, and removing the duplication of some fields. The
structure size remains at 16 bytes which was the original motivation for
some of this oddness.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c             |  20 +--
 drivers/infiniband/core/uverbs_ioctl_merge.c       |   4 +-
 drivers/infiniband/core/uverbs_std_types.c         |   8 +-
 .../infiniband/core/uverbs_std_types_flow_action.c |  24 ++-
 drivers/infiniband/hw/mlx5/devx.c                  |  72 ++++-----
 include/rdma/uverbs_ioctl.h                        | 172 +++++++++------------
 6 files changed, 135 insertions(+), 165 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 785975a4e3dd..62f7382e8513 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -77,13 +77,13 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 
 	switch (spec->type) {
 	case UVERBS_ATTR_TYPE_ENUM_IN:
-		if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems)
+		if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems)
 			return -EOPNOTSUPP;
 
 		if (uattr->attr_data.enum_data.reserved)
 			return -EINVAL;
 
-		val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id];
+		val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id];
 
 		/* Currently we only support PTR_IN based enums */
 		if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
@@ -97,16 +97,16 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		 * longer struct will fail here if used with an old kernel and
 		 * non-zero content, making ABI compat/discovery simpler.
 		 */
-		if (uattr->len > val_spec->ptr.len &&
+		if (uattr->len > val_spec->u.ptr.len &&
 		    val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
-		    !uverbs_is_attr_cleared(uattr, val_spec->ptr.len))
+		    !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
 			return -EOPNOTSUPP;
 
 	/* fall through */
 	case UVERBS_ATTR_TYPE_PTR_OUT:
-		if (uattr->len < val_spec->ptr.min_len ||
+		if (uattr->len < val_spec->u.ptr.min_len ||
 		    (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
-		     uattr->len > val_spec->ptr.len))
+		     uattr->len > val_spec->u.ptr.len))
 			return -EINVAL;
 
 		if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
@@ -149,20 +149,20 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 			return -EINVAL;
 
 		o_attr = &e->obj_attr;
-		object = uverbs_get_object(ufile, spec->obj.obj_type);
+		object = uverbs_get_object(ufile, spec->u.obj.obj_type);
 		if (!object)
 			return -EINVAL;
 
 		o_attr->uobject = uverbs_get_uobject_from_context(
 					object->type_attrs,
 					ufile->ucontext,
-					spec->obj.access,
+					spec->u.obj.access,
 					(int)uattr->data);
 
 		if (IS_ERR(o_attr->uobject))
 			return PTR_ERR(o_attr->uobject);
 
-		if (spec->obj.access == UVERBS_ACCESS_NEW) {
+		if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
 			u64 id = o_attr->uobject->id;
 
 			/* Copy the allocated id to the user-space */
@@ -216,7 +216,7 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle,
 
 				current_ret = uverbs_finalize_object(
 					attr->obj_attr.uobject,
-					spec->obj.access, commit);
+					spec->u.obj.access, commit);
 				if (!ret)
 					ret = current_ret;
 			} else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN &&
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index 6ceb672c4d46..cdada526623e 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -367,8 +367,8 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
 			memcpy(attr, &attr_defs[0]->attr, sizeof(*attr));
 
 			attr_obj_with_special_access = IS_ATTR_OBJECT(attr) &&
-				   (attr->obj.access == UVERBS_ACCESS_NEW ||
-				    attr->obj.access == UVERBS_ACCESS_DESTROY);
+				   (attr->u.obj.access == UVERBS_ACCESS_NEW ||
+				    attr->u.obj.access == UVERBS_ACCESS_DESTROY);
 			num_of_singularities +=  !!attr_obj_with_special_access;
 			if (WARN(num_of_singularities > 1,
 				 "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n",
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index c7f93b205c70..ed63eed7250c 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -222,11 +222,11 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev,
  * spec.
  */
 const struct uverbs_attr_def uverbs_uhw_compat_in =
-	UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_MIN_SIZE(0),
-			      UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_MIN_SIZE(0),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
 const struct uverbs_attr_def uverbs_uhw_compat_out =
-	UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_MIN_SIZE(0),
-			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_MIN_SIZE(0),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
 
 void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
 {
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index afbb19000503..1ba55d4ef590 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -366,28 +366,22 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
 
 static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
 	[IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
-		{ .ptr = {
-			.type = UVERBS_ATTR_TYPE_PTR_IN,
-			UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
-			.flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
-		} },
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO),
 	},
 };
 
 static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
 	[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
-		{ .ptr = {
-			.type = UVERBS_ATTR_TYPE_PTR_IN,
-			/* No need to specify any data */
-			.len = 0,
-		} }
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		/* No need to specify any data */
+		UVERBS_ATTR_SIZE(0, 0),
 	},
 	[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
-		{ .ptr = {
-			.type = UVERBS_ATTR_TYPE_PTR_IN,
-			UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
-			.flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
-		} }
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO),
 	},
 };
 
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 30f6b612547f..2f75edc010ab 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1017,15 +1017,15 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR,
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
-	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
-			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
-	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
-				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
+			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+				     UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+			     UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
 );
 
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE,
@@ -1033,15 +1033,15 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE,
 			 MLX5_IB_OBJECT_DEVX_OBJ,
 			 UVERBS_ACCESS_NEW,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
-			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
-	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
-				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+				     UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+			     UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
 
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
 	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
@@ -1054,30 +1054,30 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
 			 MLX5_IB_OBJECT_DEVX_OBJ,
 			 UVERBS_ACCESS_WRITE,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
-			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
-	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
-				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+				     UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+			     UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
 
 static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
 			 MLX5_IB_OBJECT_DEVX_OBJ,
 			 UVERBS_ACCESS_READ,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
-			       UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-					UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-					UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
-	&UVERBS_ATTR_PTR_OUT_SZ(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
-				UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-				UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-					 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+				     UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+			     UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
 
 static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 90a4947ff548..a7246e9cb148 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -73,46 +73,42 @@ enum {
 
 /* Specification of a single attribute inside the ioctl message */
 struct uverbs_attr_spec {
+	u8 type;
+	u8 flags;
+
 	union {
-		/* Header shared by all following union members - to reduce space. */
-		struct {
-			enum uverbs_attr_type		type;
-			/* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
-			u8				flags;
-		};
 		struct {
-			enum uverbs_attr_type		type;
-			/* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
-			u8				flags;
 			/* Current known size to kernel */
-			u16				len;
+			u16 len;
 			/* User isn't allowed to provide something < min_len */
-			u16				min_len;
+			u16 min_len;
 		} ptr;
+
 		struct {
-			enum uverbs_attr_type		type;
-			/* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
-			u8				flags;
 			/*
 			 * higher bits mean the namespace and lower bits mean
 			 * the type id within the namespace.
 			 */
-			u16			obj_type;
-			u8			access;
+			u16 obj_type;
+			u8 access;
 		} obj;
+
+		struct {
+			u8 num_elems;
+		} enum_def;
+	} u;
+
+	/* This weird split of the enum lets us remove some padding */
+	union {
 		struct {
-			enum uverbs_attr_type		type;
-			/* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
-			u8				flags;
-			u8				num_elems;
 			/*
 			 * The enum attribute can select one of the attributes
 			 * contained in the ids array. Currently only PTR_IN
 			 * attributes are supported in the ids array.
 			 */
-			const struct uverbs_attr_spec	*ids;
+			const struct uverbs_attr_spec *ids;
 		} enum_def;
-	};
+	} u2;
 };
 
 struct uverbs_attr_spec_hash {
@@ -196,92 +192,72 @@ struct uverbs_object_tree_def {
 	const struct uverbs_object_def * const (*objects)[];
 };
 
-#define UA_FLAGS(_flags)  .flags = _flags
-#define __UVERBS_ATTR0(_id, _type, _fld, _attr, ...)              \
-	((const struct uverbs_attr_def)				  \
-	 {.id = _id, .attr = {{._fld = {.type = _type, _attr, .flags = 0, } }, } })
-#define __UVERBS_ATTR1(_id, _type, _fld, _attr, _extra1, ...)      \
-	((const struct uverbs_attr_def)				  \
-	 {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1 } },} })
-#define __UVERBS_ATTR2(_id, _type, _fld, _attr, _extra1, _extra2)    \
-	((const struct uverbs_attr_def)				  \
-	 {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1, _extra2 } },} })
-#define __UVERBS_ATTR(_id, _type, _fld, _attr, _extra1, _extra2, _n, ...)	\
-	__UVERBS_ATTR##_n(_id, _type, _fld, _attr, _extra1, _extra2)
+/*
+ * =======================================
+ *	Attribute Specifications
+ * =======================================
+ */
 
+/* Use in the _type parameter for attribute specifications */
 #define UVERBS_ATTR_TYPE(_type)					\
-	.min_len = sizeof(_type), .len = sizeof(_type)
+	.u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type)
 #define UVERBS_ATTR_STRUCT(_type, _last)			\
-	.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type)
+	.u.ptr.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .u.ptr.len = sizeof(_type)
 #define UVERBS_ATTR_SIZE(_min_len, _len)			\
-	.min_len = _min_len, .len = _len
+	.u.ptr.min_len = _min_len, .u.ptr.len = _len
 #define UVERBS_ATTR_MIN_SIZE(_min_len)				\
 	UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
 
-/*
- * In new compiler, UVERBS_ATTR could be simplified by declaring it as
- * [_id] = {.type = _type, .len = _len, ##__VA_ARGS__}
- * But since we support older compilers too, we need the more complex code.
- */
-#define UVERBS_ATTR(_id, _type, _fld, _attr, ...)			\
-	__UVERBS_ATTR(_id, _type, _fld, _attr, ##__VA_ARGS__, 2, 1, 0)
-#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...)				\
-	UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_IN, ptr, _len, ##__VA_ARGS__)
-/* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */
-#define UVERBS_ATTR_PTR_IN(_id, _type, ...)				\
-	UVERBS_ATTR_PTR_IN_SZ(_id, _type, ##__VA_ARGS__)
-#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...)				\
-	UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_OUT, ptr, _len, ##__VA_ARGS__)
-#define UVERBS_ATTR_PTR_OUT(_id, _type, ...)				\
-	UVERBS_ATTR_PTR_OUT_SZ(_id, _type, ##__VA_ARGS__)
-#define UVERBS_ATTR_ENUM_IN(_id, _enum_arr, ...)			\
-	UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_ENUM_IN, enum_def,		\
-		    .ids = (_enum_arr),					\
-		    .num_elems = ARRAY_SIZE(_enum_arr), ##__VA_ARGS__)
+/* Must be used in the '...' of any UVERBS_ATTR */
+#define UA_FLAGS(_flags) .flags = _flags
+
+#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
+	((const struct uverbs_attr_def){                                       \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_IDR,                        \
+			  .u.obj.obj_type = _idr_type,                         \
+			  .u.obj.access = _access,                             \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...)                       \
+	((const struct uverbs_attr_def){                                       \
+		.id = (_attr_id) +                                             \
+		      BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW &&      \
+					(_access) != UVERBS_ACCESS_READ),      \
+		.attr = { .type = UVERBS_ATTR_TYPE_FD,                         \
+			  .u.obj.obj_type = _fd_type,                          \
+			  .u.obj.access = _access,                             \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...)                               \
+	((const struct uverbs_attr_def){                                       \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_PTR_IN,                     \
+			  _type,                                               \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...)                              \
+	((const struct uverbs_attr_def){                                       \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT,                    \
+			  _type,                                               \
+			  __VA_ARGS__ } })
+
+/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */
+#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...)                          \
+	((const struct uverbs_attr_def){                                       \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN,                    \
+			  .u2.enum_def.ids = _enum_arr,                        \
+			  .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr),       \
+			  __VA_ARGS__ },                                       \
+	})
 
 /*
- * In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring
- * it as
- * {.id = _id,								\
- *  .attr {.type = __obj_class,						\
- *         .obj = {.obj_type = _idr_type,				\
- *                       .access = _access                              \
- *                }, ##__VA_ARGS__ } }
- * But since we support older compilers too, we need the more complex code.
+ * =======================================
+ *	Declaration helpers
+ * =======================================
  */
-#define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\
-	((const struct uverbs_attr_def)					\
-	{.id = _id,							\
-	 .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type,	\
-			    .access = _access, .flags = 0 } }, } })
-#define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\
-	((const struct uverbs_attr_def)					\
-	{.id = _id,							\
-	.attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type,	\
-			   .access = _access, _flags} }, } })
-#define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \
-			   _n, ...)					\
-	___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags)
-#define __UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, ...)	\
-	___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access,		\
-			   ##__VA_ARGS__, 1, 0)
-#define UVERBS_ATTR_IDR(_id, _idr_type, _access, ...)			 \
-	__UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_IDR, _idr_type, _access,\
-			  ##__VA_ARGS__)
-#define UVERBS_ATTR_FD(_id, _fd_type, _access, ...)			\
-	__UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_FD, _fd_type,		\
-			  (_access) + BUILD_BUG_ON_ZERO(		\
-				(_access) != UVERBS_ACCESS_NEW &&	\
-				(_access) != UVERBS_ACCESS_READ),	\
-			  ##__VA_ARGS__)
-#define DECLARE_UVERBS_ATTR_SPEC(_name, ...)				\
-	const struct uverbs_attr_def _name = __VA_ARGS__
-
-#define DECLARE_UVERBS_ENUM(_name, ...)					\
-	const struct uverbs_enum_spec _name = {				\
-		.len = ARRAY_SIZE(((struct uverbs_attr_spec[]){__VA_ARGS__})),\
-		.ids = {__VA_ARGS__},					\
-	}
 #define _UVERBS_METHOD_ATTRS_SZ(...)					\
 	(sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\
 	 sizeof(const struct uverbs_attr_def *))
-- 
cgit v1.2.3


From 595c7736d48037d67e7926f5d3ebf484b95b1d13 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:26 +0300
Subject: RDMA/uverbs: Simplify method definition macros

Instead of the large set of indirecting macros, define the few needed
macros to directly instantiate the struct uverbs_method_def and associated
attributes list.

This is small amount of code duplication but the readability is far
better.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../infiniband/core/uverbs_std_types_counters.c    |  7 +--
 drivers/infiniband/core/uverbs_std_types_cq.c      |  4 +-
 drivers/infiniband/core/uverbs_std_types_dm.c      |  5 +-
 .../infiniband/core/uverbs_std_types_flow_action.c |  7 +--
 drivers/infiniband/core/uverbs_std_types_mr.c      |  2 +-
 drivers/infiniband/hw/mlx5/devx.c                  | 18 +++---
 include/rdma/uverbs_ioctl.h                        | 18 ------
 include/rdma/uverbs_named_ioctl.h                  | 73 ++++++++++++----------
 8 files changed, 60 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 6d0b1ce9fc1f..61fd65bc9496 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -124,21 +124,20 @@ err_read:
 	return ret;
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
 			 UVERBS_OBJECT_COUNTERS,
 			 UVERBS_ACCESS_NEW,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
-	uverbs_destroy_def_handler,
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_COUNTERS_DESTROY,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
 			 UVERBS_OBJECT_COUNTERS,
 			 UVERBS_ACCESS_DESTROY,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 #define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
 			 UVERBS_OBJECT_COUNTERS,
 			 UVERBS_ACCESS_READ,
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index f67b0895b48c..ca0f6a7435d0 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -147,7 +147,7 @@ err_event_file:
 	return ret;
 };
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
 			 UVERBS_ACCESS_NEW,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
@@ -196,7 +196,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
 			      sizeof(resp));
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
 			 UVERBS_ACCESS_DESTROY,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index d294660a2e06..75ac20bed5e4 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -85,7 +85,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
 	return 0;
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
 			 UVERBS_ACCESS_NEW,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
@@ -96,8 +96,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
 			    UVERBS_ATTR_TYPE(u32),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
-	uverbs_destroy_def_handler,
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_DM_FREE,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
 			 UVERBS_OBJECT_DM,
 			 UVERBS_ACCESS_DESTROY,
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index 1ba55d4ef590..1a572bdeff6a 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -385,7 +385,7 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
 	},
 };
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
 			 UVERBS_OBJECT_FLOW_ACTION,
 			 UVERBS_ACCESS_NEW,
@@ -403,7 +403,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
 	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
 			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
 			 UVERBS_OBJECT_FLOW_ACTION,
 			 UVERBS_ACCESS_WRITE,
@@ -419,8 +419,7 @@ static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
 	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
 			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
 
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY,
-	uverbs_destroy_def_handler,
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_FLOW_ACTION_DESTROY,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
 			 UVERBS_OBJECT_FLOW_ACTION,
 			 UVERBS_ACCESS_DESTROY,
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index d7f7ba3802af..0366814b81c2 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -115,7 +115,7 @@ err_dereg:
 	return ret;
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
 	&UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
 			 UVERBS_ACCESS_NEW,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 2f75edc010ab..ecd0900681fa 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -984,7 +984,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
 	return 0;
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG,
 	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
 			 MLX5_IB_OBJECT_DEVX_UMEM,
 			 UVERBS_ACCESS_NEW,
@@ -998,25 +998,25 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG,
 	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32),
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG,
 	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
 			 MLX5_IB_OBJECT_DEVX_UMEM,
 			 UVERBS_ACCESS_DESTROY,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN,
 	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, UVERBS_ATTR_TYPE(u32),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
 	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, UVERBS_ATTR_TYPE(u32),
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR,
 	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
 	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, UVERBS_ATTR_TYPE(u32),
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
 	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
 			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
@@ -1028,7 +1028,7 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
 				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
 );
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE,
 	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
 			 MLX5_IB_OBJECT_DEVX_OBJ,
 			 UVERBS_ACCESS_NEW,
@@ -1043,13 +1043,13 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE,
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
 				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
 	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
 			 MLX5_IB_OBJECT_DEVX_OBJ,
 			 UVERBS_ACCESS_DESTROY,
 			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
 	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
 			 MLX5_IB_OBJECT_DEVX_OBJ,
 			 UVERBS_ACCESS_WRITE,
@@ -1064,7 +1064,7 @@ static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
 				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
 
-static DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
 			 MLX5_IB_OBJECT_DEVX_OBJ,
 			 UVERBS_ACCESS_READ,
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index a7246e9cb148..12fa0eef0ab0 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -258,24 +258,6 @@ struct uverbs_object_tree_def {
  *	Declaration helpers
  * =======================================
  */
-#define _UVERBS_METHOD_ATTRS_SZ(...)					\
-	(sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\
-	 sizeof(const struct uverbs_attr_def *))
-#define _UVERBS_METHOD(_id, _handler, _flags, ...)			\
-	((const struct uverbs_method_def) {				\
-	 .id = _id,							\
-	 .flags = _flags,						\
-	 .handler = _handler,						\
-	 .num_attrs = _UVERBS_METHOD_ATTRS_SZ(__VA_ARGS__),		\
-	 .attrs = &(const struct uverbs_attr_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_METHOD(_name, _id, _handler, ...)		\
-	const struct uverbs_method_def _name =				\
-		_UVERBS_METHOD(_id, _handler, 0, ##__VA_ARGS__)
-#define DECLARE_UVERBS_CTX_METHOD(_name, _id, _handler, _flags, ...)	\
-	const struct uverbs_method_def _name =				\
-		_UVERBS_METHOD(_id, _handler,				\
-			       UVERBS_ACTION_FLAG_CREATE_ROOT,		\
-			       ##__VA_ARGS__)
 #define _UVERBS_OBJECT_METHODS_SZ(...)					\
 	(sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \
 	 sizeof(const struct uverbs_method_def *))
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index 228421f2a427..06eac48ec4f2 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -45,14 +45,32 @@
 #define UVERBS_HANDLER(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
 #define UVERBS_OBJECT(id)	_UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
 
-#define DECLARE_UVERBS_NAMED_METHOD(id, ...)	\
-	DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__)
+#define UVERBS_METHOD_ATTRS(id)                                                \
+	_UVERBS_NAME(UVERBS_MODULE_NAME, _method_attrs_##id)
 
-#define DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(id, handler, ...)	\
-	DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, handler, ##__VA_ARGS__)
+#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...)                           \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.handler = UVERBS_HANDLER(_method_id),                         \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	}
 
-#define DECLARE_UVERBS_NAMED_METHOD_NO_OVERRIDE(id, handler, ...)	\
-	DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, NULL, ##__VA_ARGS__)
+/* Create a standard destroy method using the default handler. The handle_attr
+ * argument must be the attribute specifying the handle to destroy, the
+ * default handler does not support any other attributes.
+ */
+#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr)          \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { _handle_attr };                              \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.handler = uverbs_destroy_def_handler,                         \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	}
 
 #define DECLARE_UVERBS_NAMED_OBJECT(id, ...)	\
 	DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__)
@@ -62,33 +80,22 @@
 
 #define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z)
 
-#define UVERBS_NO_OVERRIDE	NULL
-
-/* This declares a parsing tree with one object and one method. This is usually
- * used for merging driver attributes to the common attributes. The driver has
- * a chance to override the handler and type attrs of the original object.
- * The __VA_ARGS__ just contains a list of attributes.
- */
-#define ADD_UVERBS_ATTRIBUTES(_name, _object, _method, _type_attrs, _handler, ...) \
-static DECLARE_UVERBS_METHOD(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME,	     \
-					       _method_, _name),	     \
-			     _method, _handler, ##__VA_ARGS__);		     \
-									     \
-static DECLARE_UVERBS_OBJECT(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME,	     \
-					       _object_, _name),	     \
-			     _object, _type_attrs,			     \
-			     &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME,	     \
-					       _method_, _name));	     \
-									     \
-static DECLARE_UVERBS_OBJECT_TREE(_name,				     \
-				  &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME,     \
-						     _object_, _name))
-
-/* A very common use case is that the driver doesn't override the handler and
- * type_attrs. Therefore, we provide a simplified macro for this common case.
+/* Used by drivers to declare a complete parsing tree for a single method that
+ * differs only in having additional driver specific attributes.
  */
-#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object, _method, ...)	     \
-	ADD_UVERBS_ATTRIBUTES(_name, _object, _method, UVERBS_NO_OVERRIDE,   \
-			      UVERBS_NO_OVERRIDE, ##__VA_ARGS__)
+#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...)       \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	};                                                                     \
+	static DECLARE_UVERBS_OBJECT(                                          \
+		_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, _object_id, _name),      \
+		_object_id, NULL, &UVERBS_METHOD(_method_id));                 \
+	static DECLARE_UVERBS_OBJECT_TREE(                                     \
+		_name,                                                         \
+		&_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, _object_id, _name))
 
 #endif
-- 
cgit v1.2.3


From 6c61d2a55c4e5980e231fac9bb54e6ff1a5e811b Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:27 +0300
Subject: RDMA/uverbs: Simplify UVERBS_OBJECT and _TREE family of macros

Instead of the large set of indirecting macros, define the few needed
macros to directly instantiate the struct uverbs_oject_tree_def and
associated objects list.

This is small amount of code duplication but the readability is far
better.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs.h              |  2 --
 drivers/infiniband/core/uverbs_std_types.c    | 46 ++++++++++-----------------
 drivers/infiniband/core/uverbs_std_types_cq.c |  2 +-
 drivers/infiniband/hw/mlx5/devx.c             |  8 ++---
 include/rdma/uverbs_ioctl.h                   | 44 ++++++++++++-------------
 include/rdma/uverbs_named_ioctl.h             | 38 +++++++++++++++-------
 6 files changed, 70 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index f9f0bcf76812..a663e2cdc3d0 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -238,8 +238,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
 			     struct ib_uqp_object *uobj);
 
 void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
-extern const struct uverbs_attr_def uverbs_uhw_compat_in;
-extern const struct uverbs_attr_def uverbs_uhw_compat_out;
 long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 int uverbs_destroy_def_handler(struct ib_device *ib_dev,
 			       struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index ed63eed7250c..8d037f722c4d 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -216,18 +216,6 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev,
 	return 0;
 }
 
-/*
- * This spec is used in order to pass information to the hardware driver in a
- * legacy way. Every verb that could get driver specific data should get this
- * spec.
- */
-const struct uverbs_attr_def uverbs_uhw_compat_in =
-	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_MIN_SIZE(0),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
-const struct uverbs_attr_def uverbs_uhw_compat_out =
-	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_MIN_SIZE(0),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
-
 void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
 {
 	/*
@@ -300,23 +288,23 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
 
-static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
-				  &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_PD),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_MR),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_QP),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_AH),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_MW),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_DM),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
+DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+			   &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_PD),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_MR),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_QP),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_AH),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_MW),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_DM),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
 
 const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
 {
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index ca0f6a7435d0..f1f73ac0d2bd 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -165,7 +165,7 @@ DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
 	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
 	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
+	UVERBS_ATTR_UHW());
 
 static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
 						    struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index ecd0900681fa..4156e03b1bbc 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1079,24 +1079,24 @@ DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
 				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
 
-static DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
+DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
 
-static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
 	&UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
 		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
 
-static DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
 	&UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
 
-static DECLARE_UVERBS_OBJECT_TREE(devx_objects,
+DECLARE_UVERBS_OBJECT_TREE(devx_objects,
 	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
 	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
 	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 12fa0eef0ab0..392936ad25ba 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -253,33 +253,33 @@ struct uverbs_object_tree_def {
 			  __VA_ARGS__ },                                       \
 	})
 
+/*
+ * This spec is used in order to pass information to the hardware driver in a
+ * legacy way. Every verb that could get driver specific data should get this
+ * spec.
+ */
+#define UVERBS_ATTR_UHW()                                                      \
+	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                \
+			    UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),      \
+	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                              \
+			     UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
+			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
+
 /*
  * =======================================
  *	Declaration helpers
  * =======================================
  */
-#define _UVERBS_OBJECT_METHODS_SZ(...)					\
-	(sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \
-	 sizeof(const struct uverbs_method_def *))
-#define _UVERBS_OBJECT(_id, _type_attrs, ...)				\
-	((const struct uverbs_object_def) {				\
-	 .id = _id,							\
-	 .type_attrs = _type_attrs,					\
-	 .num_methods = _UVERBS_OBJECT_METHODS_SZ(__VA_ARGS__),		\
-	 .methods = &(const struct uverbs_method_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...)		\
-	const struct uverbs_object_def _name =				\
-		_UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__)
-#define _UVERBS_TREE_OBJECTS_SZ(...)					\
-	(sizeof((const struct uverbs_object_def * const []){__VA_ARGS__}) / \
-	 sizeof(const struct uverbs_object_def *))
-#define _UVERBS_OBJECT_TREE(...)					\
-	((const struct uverbs_object_tree_def) {			\
-	 .num_objects = _UVERBS_TREE_OBJECTS_SZ(__VA_ARGS__),		\
-	 .objects = &(const struct uverbs_object_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_OBJECT_TREE(_name, ...)				\
-	const struct uverbs_object_tree_def _name =			\
-		_UVERBS_OBJECT_TREE(__VA_ARGS__)
+
+#define DECLARE_UVERBS_OBJECT_TREE(_name, ...)                                 \
+	static const struct uverbs_object_def *const _name##_ptr[] = {         \
+		__VA_ARGS__,                                                   \
+	};                                                                     \
+	static const struct uverbs_object_tree_def _name = {                   \
+		.num_objects = ARRAY_SIZE(_name##_ptr),                        \
+		.objects = &_name##_ptr,                                       \
+	}
 
 /* =================================================
  *              Parsing infrastructure
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index 06eac48ec4f2..3ee045d7da4c 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -45,8 +45,9 @@
 #define UVERBS_HANDLER(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
 #define UVERBS_OBJECT(id)	_UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
 
-#define UVERBS_METHOD_ATTRS(id)                                                \
-	_UVERBS_NAME(UVERBS_MODULE_NAME, _method_attrs_##id)
+/* These are static so they do not need to be qualified */
+#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
+#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id
 
 #define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...)                           \
 	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
@@ -72,14 +73,19 @@
 		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
 	}
 
-#define DECLARE_UVERBS_NAMED_OBJECT(id, ...)	\
-	DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__)
+#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...)              \
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { __VA_ARGS__ };                               \
+	const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {           \
+		.id = _object_id,                                              \
+		.type_attrs = _type_attrs,                                     \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	}
 
 #define DECLARE_UVERBS_GLOBAL_METHODS(_name, ...)	\
 	DECLARE_UVERBS_NAMED_OBJECT(_name, NULL, ##__VA_ARGS__)
 
-#define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z)
-
 /* Used by drivers to declare a complete parsing tree for a single method that
  * differs only in having additional driver specific attributes.
  */
@@ -91,11 +97,19 @@
 		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
 		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
 	};                                                                     \
-	static DECLARE_UVERBS_OBJECT(                                          \
-		_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, _object_id, _name),      \
-		_object_id, NULL, &UVERBS_METHOD(_method_id));                 \
-	static DECLARE_UVERBS_OBJECT_TREE(                                     \
-		_name,                                                         \
-		&_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, _object_id, _name))
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { &UVERBS_METHOD(_method_id) };                \
+	static const struct uverbs_object_def _name##_struct = {               \
+		.id = _object_id,                                              \
+		.num_methods = 1,                                              \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	};                                                                     \
+	static const struct uverbs_object_def *const _name##_ptrs[] = {        \
+		&_name##_struct,                                               \
+	};                                                                     \
+	static const struct uverbs_object_tree_def _name = {                   \
+		.num_objects = 1,                                              \
+		.objects = &_name##_ptrs,                                      \
+	}
 
 #endif
-- 
cgit v1.2.3


From 9a119cd597769e0dd432110361ed1deec729ac06 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:28 +0300
Subject: RDMA/uverbs: Get rid of the & in method specifications

Hide it inside the macros. The & is confusing and interferes with using
this as a generic DSL in later patches.

Since this also touches almost every line, also run the specs through
clang-format (with 'BinPackParameters: false') to make the maintenance
easier.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_std_types.c         |  57 ++---
 .../infiniband/core/uverbs_std_types_counters.c    |  50 ++---
 drivers/infiniband/core/uverbs_std_types_cq.c      |  68 +++---
 drivers/infiniband/core/uverbs_std_types_dm.c      |  37 ++--
 .../infiniband/core/uverbs_std_types_flow_action.c | 103 +++++----
 drivers/infiniband/core/uverbs_std_types_mr.c      |  59 ++---
 drivers/infiniband/hw/mlx5/devx.c                  | 240 +++++++++++----------
 drivers/infiniband/hw/mlx5/main.c                  |  32 +--
 include/rdma/uverbs_ioctl.h                        |  20 +-
 include/rdma/uverbs_named_ioctl.h                  |  17 +-
 10 files changed, 377 insertions(+), 306 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 8d037f722c4d..2f1a0b6598fe 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -248,45 +248,50 @@ void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
 	}
 }
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
-			    &UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
-						  uverbs_hot_unplug_completion_event_file,
-						  &uverbs_event_fops,
-						  "[infinibandevent]", O_RDONLY));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object),
-						      uverbs_free_qp));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_COMP_CHANNEL,
+	UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
+			     uverbs_hot_unplug_completion_event_file,
+			     &uverbs_event_fops,
+			     "[infinibandevent]",
+			     O_RDONLY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_QP,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
-			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
-						      uverbs_free_srq));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_SRQ,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+				 uverbs_free_srq));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
-			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
-						      uverbs_free_flow));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_FLOW,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+				 uverbs_free_flow));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object),
-						      uverbs_free_wq));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_WQ,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
-			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
-						      uverbs_free_xrcd));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_XRCD,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
+				 uverbs_free_xrcd));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
-			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
 
 DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
 			   &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 61fd65bc9496..21d61e384623 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -124,34 +124,36 @@ err_read:
 	return ret;
 }
 
-DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
-			 UVERBS_OBJECT_COUNTERS,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_COUNTERS_DESTROY,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
-			 UVERBS_OBJECT_COUNTERS,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_COUNTERS_CREATE,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+			UVERBS_OBJECT_COUNTERS,
+			UVERBS_ACCESS_NEW,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	UVERBS_METHOD_COUNTERS_DESTROY,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+			UVERBS_OBJECT_COUNTERS,
+			UVERBS_ACCESS_DESTROY,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 #define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
-DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
-			 UVERBS_OBJECT_COUNTERS,
-			 UVERBS_ACCESS_READ,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
-			     UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
-			    UVERBS_ATTR_TYPE(__u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_COUNTERS_READ,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+			UVERBS_OBJECT_COUNTERS,
+			UVERBS_ACCESS_READ,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+			    UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+			   UVERBS_ATTR_TYPE(__u32),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
-			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
-
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index f1f73ac0d2bd..0aa16868149f 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -147,24 +147,28 @@ err_event_file:
 	return ret;
 };
 
-DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_CQ_CREATE,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE,
+			UVERBS_OBJECT_CQ,
+			UVERBS_ACCESS_NEW,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+		       UVERBS_OBJECT_COMP_CHANNEL,
+		       UVERBS_ACCESS_READ),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
 			    UVERBS_ATTR_TYPE(u32),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
-			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
-			UVERBS_OBJECT_COMP_CHANNEL,
-			UVERBS_ACCESS_READ),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
 	UVERBS_ATTR_UHW());
 
 static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
@@ -196,20 +200,22 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
 			      sizeof(resp));
 }
 
-DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
-			     UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object),
-						      uverbs_free_cq),
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_CQ_DESTROY,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE,
+			UVERBS_OBJECT_CQ,
+			UVERBS_ACCESS_DESTROY,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+			    UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_CQ,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
+
 #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
-			    &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
-			    &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+	&UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+	&UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
 #endif
-			   );
-
+);
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index 75ac20bed5e4..16e3e7c86a4b 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -85,24 +85,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
 	return 0;
 }
 
-DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
-			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
-			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_DM_FREE,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
-			 UVERBS_OBJECT_DM,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_DM_ALLOC,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE,
+			UVERBS_OBJECT_DM,
+			UVERBS_ACCESS_NEW,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	UVERBS_METHOD_DM_FREE,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+			UVERBS_OBJECT_DM,
+			UVERBS_ACCESS_DESTROY,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
-			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
 			    &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
 			    &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index 1a572bdeff6a..ec3e669071f7 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -385,49 +385,60 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
 	},
 };
 
-DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
-			 UVERBS_OBJECT_FLOW_ACTION,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
-			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
-	&UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
-			     uverbs_flow_action_esp_keymat,
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
-			     uverbs_flow_action_esp_replay),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
-			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
-
-DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
-			 UVERBS_OBJECT_FLOW_ACTION,
-			 UVERBS_ACCESS_WRITE,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
-			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
-	&UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
-			     uverbs_flow_action_esp_keymat),
-	&UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
-			     uverbs_flow_action_esp_replay),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
-			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(UVERBS_METHOD_FLOW_ACTION_DESTROY,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
-			 UVERBS_OBJECT_FLOW_ACTION,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
-			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
-			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
-			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
-			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
-
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+			UVERBS_OBJECT_FLOW_ACTION,
+			UVERBS_ACCESS_NEW,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+			   UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+					      hard_limit_pkts),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+				    UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+			   UVERBS_ATTR_TYPE(__u32)),
+	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+			    uverbs_flow_action_esp_keymat,
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+			    uverbs_flow_action_esp_replay),
+	UVERBS_ATTR_PTR_IN(
+		UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap,
+				   type)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
+			UVERBS_OBJECT_FLOW_ACTION,
+			UVERBS_ACCESS_WRITE,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+			   UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+					      hard_limit_pkts),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+			   UVERBS_ATTR_TYPE(__u32)),
+	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+			    uverbs_flow_action_esp_keymat),
+	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+			    uverbs_flow_action_esp_replay),
+	UVERBS_ATTR_PTR_IN(
+		UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap,
+				   type)));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	UVERBS_METHOD_FLOW_ACTION_DESTROY,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+			UVERBS_OBJECT_FLOW_ACTION,
+			UVERBS_ACCESS_DESTROY,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_FLOW_ACTION,
+	UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
+	&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+	&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+	&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 0366814b81c2..779d6d4950eb 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -115,32 +115,37 @@ err_dereg:
 	return ret;
 }
 
-DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
-			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
-			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD,
-			 UVERBS_ACCESS_READ,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_DM_MR_REG,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
+			UVERBS_OBJECT_MR,
+			UVERBS_ACCESS_NEW,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
+			UVERBS_OBJECT_PD,
+			UVERBS_ACCESS_READ,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
+			UVERBS_OBJECT_DM,
+			UVERBS_ACCESS_READ,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
 			    UVERBS_ATTR_TYPE(u32),
 			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM,
-			 UVERBS_ACCESS_READ,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
-			     UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
-			     UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
-			    &UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
-			    &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+			    UVERBS_ATTR_TYPE(u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_MR,
+	UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+	&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 4156e03b1bbc..3ac3da4c3e23 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -984,122 +984,146 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
 	return 0;
 }
 
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG,
-	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
-			 MLX5_IB_OBJECT_DEVX_UMEM,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR, UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG,
-	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
-			 MLX5_IB_OBJECT_DEVX_UMEM,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN,
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC, UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN, UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR,
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX, UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX, UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OTHER,
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
-			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-				     UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
-	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
-			     UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
-);
-
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE,
-	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
-			 MLX5_IB_OBJECT_DEVX_OBJ,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
-			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-				     UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
-	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
-			     UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
-
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
-	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
-			 MLX5_IB_OBJECT_DEVX_OBJ,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
-	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
-			 MLX5_IB_OBJECT_DEVX_OBJ,
-			 UVERBS_ACCESS_WRITE,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
-			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-				     UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
-	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
-			     UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
-
-DECLARE_UVERBS_NAMED_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY,
-	&UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
-			 MLX5_IB_OBJECT_DEVX_OBJ,
-			 UVERBS_ACCESS_READ,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
-			    UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-				     UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
-	&UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
-			     UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				      UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_UMEM_REG,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
+			MLX5_IB_OBJECT_DEVX_UMEM,
+			UVERBS_ACCESS_NEW,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+			    UVERBS_ATTR_TYPE(u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
+			MLX5_IB_OBJECT_DEVX_UMEM,
+			UVERBS_ACCESS_DESTROY,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_QUERY_EQN,
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+			    UVERBS_ATTR_TYPE(u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_QUERY_UAR,
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+			    UVERBS_ATTR_TYPE(u32),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OTHER,
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+			 UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	UVERBS_ATTR_PTR_OUT(
+		MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OBJ_CREATE,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_NEW,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+			 UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	UVERBS_ATTR_PTR_OUT(
+		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_DESTROY,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_WRITE,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+			 UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	UVERBS_ATTR_PTR_OUT(
+		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_READ,
+			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
+			 UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+	UVERBS_ATTR_PTR_OUT(
+		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
 
 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
-	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
-	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
-	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
+			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
+			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
+			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
 
 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
-	&UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
-		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
-		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
-		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
-		&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
+			    UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
 
 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
-	&UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
-	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
-	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
+			    UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
 
 DECLARE_UVERBS_OBJECT_TREE(devx_objects,
-	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
-	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
-	&UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
+			   &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
+			   &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
+			   &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
 
 const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void)
 {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 0fb80777aade..a0f4361981ab 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5315,20 +5315,24 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
 	mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
-			     UVERBS_METHOD_DM_ALLOC,
-			     &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
-						  UVERBS_ATTR_TYPE(u64),
-						  UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-			     &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
-						  UVERBS_ATTR_TYPE(u16),
-						  UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
-			     UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
-			     &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
-						 UVERBS_ATTR_TYPE(u64),
-						 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+	mlx5_ib_dm,
+	UVERBS_OBJECT_DM,
+	UVERBS_METHOD_DM_ALLOC,
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+			    UVERBS_ATTR_TYPE(u64),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+			    UVERBS_ATTR_TYPE(u16),
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+	mlx5_ib_flow_action,
+	UVERBS_OBJECT_FLOW_ACTION,
+	UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
 
 #define NUM_TREES	3
 static int populate_specs_root(struct mlx5_ib_dev *dev)
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 392936ad25ba..d21c29a0f8ec 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -212,7 +212,7 @@ struct uverbs_object_tree_def {
 #define UA_FLAGS(_flags) .flags = _flags
 
 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
-	((const struct uverbs_attr_def){                                       \
+	(&(const struct uverbs_attr_def){                                      \
 		.id = _attr_id,                                                \
 		.attr = { .type = UVERBS_ATTR_TYPE_IDR,                        \
 			  .u.obj.obj_type = _idr_type,                         \
@@ -220,7 +220,7 @@ struct uverbs_object_tree_def {
 			  __VA_ARGS__ } })
 
 #define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...)                       \
-	((const struct uverbs_attr_def){                                       \
+	(&(const struct uverbs_attr_def){                                      \
 		.id = (_attr_id) +                                             \
 		      BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW &&      \
 					(_access) != UVERBS_ACCESS_READ),      \
@@ -230,14 +230,14 @@ struct uverbs_object_tree_def {
 			  __VA_ARGS__ } })
 
 #define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...)                               \
-	((const struct uverbs_attr_def){                                       \
+	(&(const struct uverbs_attr_def){                                      \
 		.id = _attr_id,                                                \
 		.attr = { .type = UVERBS_ATTR_TYPE_PTR_IN,                     \
 			  _type,                                               \
 			  __VA_ARGS__ } })
 
 #define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...)                              \
-	((const struct uverbs_attr_def){                                       \
+	(&(const struct uverbs_attr_def){                                      \
 		.id = _attr_id,                                                \
 		.attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT,                    \
 			  _type,                                               \
@@ -245,7 +245,7 @@ struct uverbs_object_tree_def {
 
 /* _enum_arry should be a 'static const union uverbs_attr_spec[]' */
 #define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...)                          \
-	((const struct uverbs_attr_def){                                       \
+	(&(const struct uverbs_attr_def){                                      \
 		.id = _attr_id,                                                \
 		.attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN,                    \
 			  .u2.enum_def.ids = _enum_arr,                        \
@@ -259,12 +259,12 @@ struct uverbs_object_tree_def {
  * spec.
  */
 #define UVERBS_ATTR_UHW()                                                      \
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                \
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                 \
+			   UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
+			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),       \
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                               \
 			    UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),      \
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                              \
-			     UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
+			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
 
 /*
  * =======================================
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index 3ee045d7da4c..2eb1767042af 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -78,13 +78,24 @@
 		_object_id)[] = { __VA_ARGS__ };                               \
 	const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {           \
 		.id = _object_id,                                              \
-		.type_attrs = _type_attrs,                                     \
+		.type_attrs = &_type_attrs,                                    \
 		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
 		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
 	}
 
-#define DECLARE_UVERBS_GLOBAL_METHODS(_name, ...)	\
-	DECLARE_UVERBS_NAMED_OBJECT(_name, NULL, ##__VA_ARGS__)
+/*
+ * Declare global methods. These still have a unique object_id because we
+ * identify all uapi methods with a (object,method) tuple. However, they have
+ * no type pointer.
+ */
+#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...)	\
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { __VA_ARGS__ };                               \
+	const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {           \
+		.id = _object_id,                                              \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	}
 
 /* Used by drivers to declare a complete parsing tree for a single method that
  * differs only in having additional driver specific attributes.
-- 
cgit v1.2.3


From 83bb4442330f035bd68ec5d2f5b87bfef1c1a4ab Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:29 +0300
Subject: RDMA/uverbs: Remove UA_FLAGS

This bit of boilerplate isn't really necessary, we can use bitfields
instead of a flags enum and the macros can then individually initialize
them through the __VA_ARGS__ like everything else.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c             | 10 ++--
 drivers/infiniband/core/uverbs_ioctl_merge.c       |  6 +-
 .../infiniband/core/uverbs_std_types_counters.c    | 10 ++--
 drivers/infiniband/core/uverbs_std_types_cq.c      | 17 +++---
 drivers/infiniband/core/uverbs_std_types_dm.c      |  8 +--
 .../infiniband/core/uverbs_std_types_flow_action.c | 40 ++++++++-----
 drivers/infiniband/core/uverbs_std_types_mr.c      | 16 ++---
 drivers/infiniband/hw/mlx5/devx.c                  | 68 +++++++++++-----------
 drivers/infiniband/hw/mlx5/main.c                  |  6 +-
 include/rdma/uverbs_ioctl.h                        | 35 ++++++-----
 10 files changed, 115 insertions(+), 101 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 62f7382e8513..cb6109036129 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -98,14 +98,14 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		 * non-zero content, making ABI compat/discovery simpler.
 		 */
 		if (uattr->len > val_spec->u.ptr.len &&
-		    val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
+		    val_spec->min_sz_or_zero &&
 		    !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
 			return -EOPNOTSUPP;
 
 	/* fall through */
 	case UVERBS_ATTR_TYPE_PTR_OUT:
 		if (uattr->len < val_spec->u.ptr.min_len ||
-		    (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
+		    (!val_spec->min_sz_or_zero &&
 		     uattr->len > val_spec->u.ptr.len))
 			return -EINVAL;
 
@@ -116,8 +116,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		e->ptr_attr.len = uattr->len;
 		e->ptr_attr.flags = uattr->flags;
 
-		if (val_spec->flags & UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY &&
-		    !uverbs_attr_ptr_is_inline(e)) {
+		if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
 			void *p;
 
 			p = kvmalloc(uattr->len, GFP_KERNEL);
@@ -220,8 +219,7 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle,
 				if (!ret)
 					ret = current_ret;
 			} else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN &&
-				   spec->flags &
-					   UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY &&
+				   spec->alloc_and_copy &&
 				   !uverbs_attr_ptr_is_inline(attr)) {
 				kvfree(attr->ptr_attr.ptr);
 			}
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index cdada526623e..ece5c9463dbe 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -374,18 +374,18 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
 				 "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n",
 				 min_id) ||
 			    WARN(attr_obj_with_special_access &&
-				 !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY),
+				 !attr->mandatory,
 				 "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
 				 min_id) ||
 			    WARN(IS_ATTR_OBJECT(attr) &&
-				 attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+				 attr->min_sz_or_zero,
 				 "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
 				 min_id)) {
 				res = -EINVAL;
 				goto free;
 			}
 
-			if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY)
+			if (attr->mandatory)
 				set_bit(min_id, hash->mandatory_attrs_bitmask);
 			min_id++;
 
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 21d61e384623..202e3782e740 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -129,14 +129,14 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
 			UVERBS_OBJECT_COUNTERS,
 			UVERBS_ACCESS_NEW,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	UVERBS_METHOD_COUNTERS_DESTROY,
 	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
 			UVERBS_OBJECT_COUNTERS,
 			UVERBS_ACCESS_DESTROY,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			UA_MANDATORY));
 
 #define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
 DECLARE_UVERBS_NAMED_METHOD(
@@ -144,13 +144,13 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
 			UVERBS_OBJECT_COUNTERS,
 			UVERBS_ACCESS_READ,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
 			    UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			    UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
 			   UVERBS_ATTR_TYPE(__u32),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			   UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
 			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 0aa16868149f..1a14c245b511 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -152,23 +152,24 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE,
 			UVERBS_OBJECT_CQ,
 			UVERBS_ACCESS_NEW,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
 			   UVERBS_ATTR_TYPE(u32),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
 			   UVERBS_ATTR_TYPE(u64),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
 		       UVERBS_OBJECT_COMP_CHANNEL,
-		       UVERBS_ACCESS_READ),
+		       UVERBS_ACCESS_READ,
+		       UA_OPTIONAL),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
 			   UVERBS_ATTR_TYPE(u32),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
 			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			    UA_MANDATORY),
 	UVERBS_ATTR_UHW());
 
 static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
@@ -205,10 +206,10 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE,
 			UVERBS_OBJECT_CQ,
 			UVERBS_ACCESS_DESTROY,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
 			    UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			    UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_OBJECT(
 	UVERBS_OBJECT_CQ,
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index 16e3e7c86a4b..8a2548173a90 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -90,20 +90,20 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE,
 			UVERBS_OBJECT_DM,
 			UVERBS_ACCESS_NEW,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
 			   UVERBS_ATTR_TYPE(u64),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
 			   UVERBS_ATTR_TYPE(u32),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			   UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	UVERBS_METHOD_DM_FREE,
 	UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
 			UVERBS_OBJECT_DM,
 			UVERBS_ACCESS_DESTROY,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
 			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index ec3e669071f7..143dbfdfda6f 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -368,7 +368,7 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
 	[IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
 		.type = UVERBS_ATTR_TYPE_PTR_IN,
 		UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO),
+		UA_MIN_SZ_OR_ZERO
 	},
 };
 
@@ -381,7 +381,7 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
 	[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
 		.type = UVERBS_ATTR_TYPE_PTR_IN,
 		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO),
+		UA_MIN_SZ_OR_ZERO
 	},
 };
 
@@ -390,51 +390,59 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
 			UVERBS_OBJECT_FLOW_ACTION,
 			UVERBS_ACCESS_NEW,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
 			   UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
 					      hard_limit_pkts),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				    UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+			   UA_MANDATORY,
+			   UA_MIN_SZ_OR_ZERO),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
-			   UVERBS_ATTR_TYPE(__u32)),
+			   UVERBS_ATTR_TYPE(__u32),
+			   UA_OPTIONAL),
 	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
 			    uverbs_flow_action_esp_keymat,
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			    UA_MANDATORY),
 	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
-			    uverbs_flow_action_esp_replay),
+			    uverbs_flow_action_esp_replay,
+			    UA_OPTIONAL),
 	UVERBS_ATTR_PTR_IN(
 		UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
 		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap,
-				   type)));
+				   type),
+		UA_OPTIONAL));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
 	UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
 			UVERBS_OBJECT_FLOW_ACTION,
 			UVERBS_ACCESS_WRITE,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
 			   UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
 					      hard_limit_pkts),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+			   UA_OPTIONAL,
+			   UA_MIN_SZ_OR_ZERO),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
-			   UVERBS_ATTR_TYPE(__u32)),
+			   UVERBS_ATTR_TYPE(__u32),
+			   UA_OPTIONAL),
 	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
-			    uverbs_flow_action_esp_keymat),
+			    uverbs_flow_action_esp_keymat,
+			    UA_OPTIONAL),
 	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
-			    uverbs_flow_action_esp_replay),
+			    uverbs_flow_action_esp_replay,
+			    UA_OPTIONAL),
 	UVERBS_ATTR_PTR_IN(
 		UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
 		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap,
-				   type)));
+				   type),
+		UA_OPTIONAL));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	UVERBS_METHOD_FLOW_ACTION_DESTROY,
 	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
 			UVERBS_OBJECT_FLOW_ACTION,
 			UVERBS_ACCESS_DESTROY,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_OBJECT(
 	UVERBS_OBJECT_FLOW_ACTION,
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 779d6d4950eb..c1b9124d611e 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -120,30 +120,30 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
 			UVERBS_OBJECT_MR,
 			UVERBS_ACCESS_NEW,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
 			   UVERBS_ATTR_TYPE(u64),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
 			   UVERBS_ATTR_TYPE(u64),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
 			UVERBS_OBJECT_PD,
 			UVERBS_ACCESS_READ,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
 			   UVERBS_ATTR_TYPE(u32),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
 			UVERBS_OBJECT_DM,
 			UVERBS_ACCESS_READ,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
 			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			    UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
 			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			    UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_OBJECT(
 	UVERBS_OBJECT_MR,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 3ac3da4c3e23..be44e7e837eb 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -989,119 +989,119 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
 			MLX5_IB_OBJECT_DEVX_UMEM,
 			UVERBS_ACCESS_NEW,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
 			   UVERBS_ATTR_TYPE(u64),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
 			   UVERBS_ATTR_TYPE(u64),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
 			   UVERBS_ATTR_TYPE(u32),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
 			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			    UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
 			MLX5_IB_OBJECT_DEVX_UMEM,
 			UVERBS_ACCESS_DESTROY,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_QUERY_EQN,
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
 			   UVERBS_ATTR_TYPE(u32),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
 			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			    UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_QUERY_UAR,
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
 			   UVERBS_ATTR_TYPE(u32),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
 			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			    UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_OTHER,
 	UVERBS_ATTR_PTR_IN(
 		MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-			 UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+		UA_MANDATORY,
+		UA_MIN_SZ_OR_ZERO,
+		UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_PTR_OUT(
 		MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+		UA_MANDATORY,
+		UA_MIN_SZ_OR_ZERO));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_OBJ_CREATE,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
 			MLX5_IB_OBJECT_DEVX_OBJ,
 			UVERBS_ACCESS_NEW,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(
 		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-			 UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+		UA_MANDATORY,
+		UA_MIN_SZ_OR_ZERO,
+		UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_PTR_OUT(
 		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+		UA_MANDATORY,
+		UA_MIN_SZ_OR_ZERO));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
 			MLX5_IB_OBJECT_DEVX_OBJ,
 			UVERBS_ACCESS_DESTROY,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
 			MLX5_IB_OBJECT_DEVX_OBJ,
 			UVERBS_ACCESS_WRITE,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(
 		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-			 UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+		UA_MANDATORY,
+		UA_MIN_SZ_OR_ZERO,
+		UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_PTR_OUT(
 		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+		UA_MANDATORY,
+		UA_MIN_SZ_OR_ZERO));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
 			MLX5_IB_OBJECT_DEVX_OBJ,
 			UVERBS_ACCESS_READ,
-			UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(
 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO |
-			 UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY)),
+		UA_MANDATORY,
+		UA_MIN_SZ_OR_ZERO,
+		UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_PTR_OUT(
 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-		UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-			 UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)));
+		UA_MANDATORY,
+		UA_MIN_SZ_OR_ZERO));
 
 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index a0f4361981ab..54d45e13de34 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5321,10 +5321,10 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
 	UVERBS_METHOD_DM_ALLOC,
 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
 			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+			    UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
 			    UVERBS_ATTR_TYPE(u16),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			    UA_MANDATORY));
 
 ADD_UVERBS_ATTRIBUTES_SIMPLE(
 	mlx5_ib_flow_action,
@@ -5332,7 +5332,7 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
 	UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
 			   UVERBS_ATTR_TYPE(u64),
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			   UA_MANDATORY));
 
 #define NUM_TREES	3
 static int populate_specs_root(struct mlx5_ib_dev *dev)
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index d21c29a0f8ec..6073fd9d9c49 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -61,20 +61,22 @@ enum uverbs_obj_access {
 	UVERBS_ACCESS_DESTROY
 };
 
-enum {
-	UVERBS_ATTR_SPEC_F_MANDATORY	= 1U << 0,
-	/* Support extending attributes by length, validate all unknown size == zero  */
-	UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1,
-	/*
-	 * Valid only for PTR_IN. Allocate and copy the data inside the parser
-	 */
-	UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY = 1U << 2,
-};
-
 /* Specification of a single attribute inside the ioctl message */
+/* good size 16 */
 struct uverbs_attr_spec {
 	u8 type;
-	u8 flags;
+
+	/*
+	 * Support extending attributes by length, validate all
+	 * unknown size == zero
+	 */
+	u8 min_sz_or_zero:1;
+	/*
+	 * Valid only for PTR_IN. Allocate and copy the data inside
+	 * the parser
+	 */
+	u8 alloc_and_copy:1;
+	u8 mandatory:1;
 
 	union {
 		struct {
@@ -209,7 +211,10 @@ struct uverbs_object_tree_def {
 	UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
 
 /* Must be used in the '...' of any UVERBS_ATTR */
-#define UA_FLAGS(_flags) .flags = _flags
+#define UA_ALLOC_AND_COPY .alloc_and_copy = 1
+#define UA_MANDATORY .mandatory = 1
+#define UA_MIN_SZ_OR_ZERO .min_sz_or_zero = 1
+#define UA_OPTIONAL .mandatory = 0
 
 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
 	(&(const struct uverbs_attr_def){                                      \
@@ -261,10 +266,12 @@ struct uverbs_object_tree_def {
 #define UVERBS_ATTR_UHW()                                                      \
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                 \
 			   UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
-			   UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),       \
+			   UA_OPTIONAL,					       \
+			   UA_MIN_SZ_OR_ZERO),				       \
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                               \
 			    UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO))
+			    UA_OPTIONAL,				       \
+			    UA_MIN_SZ_OR_ZERO),				       \
 
 /*
  * =======================================
-- 
cgit v1.2.3


From 540cd69209682a351ab76b83b85ea856b8192720 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:30 +0300
Subject: RDMA/uverbs: Use UVERBS_ATTR_MIN_SIZE correctly and uniformly

This newer macro allows specifying a lower bound on the accepted size, and
has an 'unlimited' upper bound. Due to this it never checks for trailing
zeroing so it doesn't make any sense to combine it with MIN_SZ_OR_ZERO, so
drop MIN_SZ_OR_ZERO when they are used together

There were a couple of places that open coded this pattern, switch them to
use the clearer UVERBS_ATTR_MIN_SIZE for clarity.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_std_types_counters.c |  3 +--
 drivers/infiniband/hw/mlx5/devx.c                   | 16 ++++------------
 include/rdma/uverbs_ioctl.h                         | 17 +++++++++--------
 3 files changed, 14 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 202e3782e740..dfe59ad721f6 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -138,7 +138,6 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 			UVERBS_ACCESS_DESTROY,
 			UA_MANDATORY));
 
-#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
 DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_METHOD_COUNTERS_READ,
 	UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
@@ -146,7 +145,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 			UVERBS_ACCESS_READ,
 			UA_MANDATORY),
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
-			    UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
+			    UVERBS_ATTR_MIN_SIZE(0),
 			    UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
 			   UVERBS_ATTR_TYPE(__u32),
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index be44e7e837eb..192844bf6016 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1034,13 +1034,11 @@ DECLARE_UVERBS_NAMED_METHOD(
 		MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
 		UA_MANDATORY,
-		UA_MIN_SZ_OR_ZERO,
 		UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_PTR_OUT(
 		MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-		UA_MANDATORY,
-		UA_MIN_SZ_OR_ZERO));
+		UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_OBJ_CREATE,
@@ -1052,13 +1050,11 @@ DECLARE_UVERBS_NAMED_METHOD(
 		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
 		UA_MANDATORY,
-		UA_MIN_SZ_OR_ZERO,
 		UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_PTR_OUT(
 		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-		UA_MANDATORY,
-		UA_MIN_SZ_OR_ZERO));
+		UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
@@ -1077,13 +1073,11 @@ DECLARE_UVERBS_NAMED_METHOD(
 		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
 		UA_MANDATORY,
-		UA_MIN_SZ_OR_ZERO,
 		UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_PTR_OUT(
 		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-		UA_MANDATORY,
-		UA_MIN_SZ_OR_ZERO));
+		UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
@@ -1095,13 +1089,11 @@ DECLARE_UVERBS_NAMED_METHOD(
 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
 		UA_MANDATORY,
-		UA_MIN_SZ_OR_ZERO,
 		UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_PTR_OUT(
 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
-		UA_MANDATORY,
-		UA_MIN_SZ_OR_ZERO));
+		UA_MANDATORY));
 
 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 6073fd9d9c49..0b46ef8f0b4c 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -207,8 +207,11 @@ struct uverbs_object_tree_def {
 	.u.ptr.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .u.ptr.len = sizeof(_type)
 #define UVERBS_ATTR_SIZE(_min_len, _len)			\
 	.u.ptr.min_len = _min_len, .u.ptr.len = _len
-#define UVERBS_ATTR_MIN_SIZE(_min_len)				\
-	UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
+/*
+ * Specifies at least min_len bytes must be passed in, but the amount can be
+ * larger, up to the protocol maximum size. No check for zeroing is done.
+ */
+#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
 
 /* Must be used in the '...' of any UVERBS_ATTR */
 #define UA_ALLOC_AND_COPY .alloc_and_copy = 1
@@ -265,13 +268,11 @@ struct uverbs_object_tree_def {
  */
 #define UVERBS_ATTR_UHW()                                                      \
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                 \
-			   UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
-			   UA_OPTIONAL,					       \
-			   UA_MIN_SZ_OR_ZERO),				       \
+			   UVERBS_ATTR_MIN_SIZE(0),			       \
+			   UA_OPTIONAL),				       \
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                               \
-			    UVERBS_ATTR_SIZE(0, USHRT_MAX),		       \
-			    UA_OPTIONAL,				       \
-			    UA_MIN_SZ_OR_ZERO),				       \
+			    UVERBS_ATTR_MIN_SIZE(0),			       \
+			    UA_OPTIONAL)
 
 /*
  * =======================================
-- 
cgit v1.2.3


From 422e3d37ed7ea8b421208a44913c420055334976 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 08:50:31 +0300
Subject: RDMA/uverbs: Combine MIN_SZ_OR_ZERO with UVERBS_ATTR_STRUCT

After all the rework is done it is now possible to include single flags in
the type macros. Any user of UVERBS_ATTR_STRUCT needs to zero check data
past the end of the known struct to be correct, so make this mandatory,
and get rid of MIN_SZ_OR_ZERO as a user flag.

This changes UVERBS_ATTR_TYPE to refer to a struct of exact size with not
possibility of extension, convert the few users of UVERBS_ATTR_TYPE and
MIN_SZ_OR_ZERO to use UVERBS_ATTR_STRUCT.

The one user of UVERBS_ATTR_STRUCT without MIN_SZ_OR_ZERO is just
confused. There is some padding at the end of that struct, but userspace
always provides it with the padding. The construction doesn't test if the
padding is zero, so it is pointless. Just use UVERBS_ATTR_TYPE.

Finally, rename min_sz_or_zero to zero_trailing to better reflect what it
does and hopefully avoid such mis-uses in the future.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c             |  4 +--
 drivers/infiniband/core/uverbs_ioctl_merge.c       |  2 +-
 .../infiniband/core/uverbs_std_types_flow_action.c | 21 +++++++--------
 include/rdma/uverbs_ioctl.h                        | 31 +++++++++++++++-------
 4 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index cb6109036129..5b59c6f0feed 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -98,14 +98,14 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		 * non-zero content, making ABI compat/discovery simpler.
 		 */
 		if (uattr->len > val_spec->u.ptr.len &&
-		    val_spec->min_sz_or_zero &&
+		    val_spec->zero_trailing &&
 		    !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
 			return -EOPNOTSUPP;
 
 	/* fall through */
 	case UVERBS_ATTR_TYPE_PTR_OUT:
 		if (uattr->len < val_spec->u.ptr.min_len ||
-		    (!val_spec->min_sz_or_zero &&
+		    (!val_spec->zero_trailing &&
 		     uattr->len > val_spec->u.ptr.len))
 			return -EINVAL;
 
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index ece5c9463dbe..f81aa888ce5c 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -378,7 +378,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
 				 "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
 				 min_id) ||
 			    WARN(IS_ATTR_OBJECT(attr) &&
-				 attr->min_sz_or_zero,
+				 attr->zero_trailing,
 				 "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
 				 min_id)) {
 				res = -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index 143dbfdfda6f..c753a34cd984 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -367,8 +367,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
 static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
 	[IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
 		.type = UVERBS_ATTR_TYPE_PTR_IN,
-		UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
-		UA_MIN_SZ_OR_ZERO
+		UVERBS_ATTR_STRUCT(
+			struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
+			aes_key),
 	},
 };
 
@@ -380,8 +381,8 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
 	},
 	[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
 		.type = UVERBS_ATTR_TYPE_PTR_IN,
-		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
-		UA_MIN_SZ_OR_ZERO
+		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
+				   size),
 	},
 };
 
@@ -394,8 +395,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
 			   UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
 					      hard_limit_pkts),
-			   UA_MANDATORY,
-			   UA_MIN_SZ_OR_ZERO),
+			   UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
 			   UVERBS_ATTR_TYPE(__u32),
 			   UA_OPTIONAL),
@@ -407,8 +407,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 			    UA_OPTIONAL),
 	UVERBS_ATTR_PTR_IN(
 		UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
-		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap,
-				   type),
+		UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
 		UA_OPTIONAL));
 
 DECLARE_UVERBS_NAMED_METHOD(
@@ -420,8 +419,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
 			   UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
 					      hard_limit_pkts),
-			   UA_OPTIONAL,
-			   UA_MIN_SZ_OR_ZERO),
+			   UA_OPTIONAL),
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
 			   UVERBS_ATTR_TYPE(__u32),
 			   UA_OPTIONAL),
@@ -433,8 +431,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 			    UA_OPTIONAL),
 	UVERBS_ATTR_PTR_IN(
 		UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
-		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap,
-				   type),
+		UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
 		UA_OPTIONAL));
 
 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 0b46ef8f0b4c..017ccf75890c 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -67,10 +67,11 @@ struct uverbs_attr_spec {
 	u8 type;
 
 	/*
-	 * Support extending attributes by length, validate all
-	 * unknown size == zero
+	 * Support extending attributes by length. Allow the user to provide
+	 * more bytes than ptr.len, but check that everything after is zero'd
+	 * by the user.
 	 */
-	u8 min_sz_or_zero:1;
+	u8 zero_trailing:1;
 	/*
 	 * Valid only for PTR_IN. Allocate and copy the data inside
 	 * the parser
@@ -200,13 +201,26 @@ struct uverbs_object_tree_def {
  * =======================================
  */
 
-/* Use in the _type parameter for attribute specifications */
-#define UVERBS_ATTR_TYPE(_type)					\
-	.u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type)
-#define UVERBS_ATTR_STRUCT(_type, _last)			\
-	.u.ptr.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .u.ptr.len = sizeof(_type)
 #define UVERBS_ATTR_SIZE(_min_len, _len)			\
 	.u.ptr.min_len = _min_len, .u.ptr.len = _len
+
+/*
+ * Specifies a uapi structure that cannot be extended. The user must always
+ * supply the whole structure and nothing more. The structure must be declared
+ * in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_TYPE(_type)					\
+	.u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type)
+/*
+ * Specifies a uapi structure where the user must provide at least up to
+ * member 'last'.  Anything after last and up until the end of the structure
+ * can be non-zero, anything longer than the end of the structure must be
+ * zero. The structure must be declared in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_STRUCT(_type, _last)                                       \
+	.zero_trailing = 1,                                                    \
+	UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)),              \
+			 sizeof(_type))
 /*
  * Specifies at least min_len bytes must be passed in, but the amount can be
  * larger, up to the protocol maximum size. No check for zeroing is done.
@@ -216,7 +230,6 @@ struct uverbs_object_tree_def {
 /* Must be used in the '...' of any UVERBS_ATTR */
 #define UA_ALLOC_AND_COPY .alloc_and_copy = 1
 #define UA_MANDATORY .mandatory = 1
-#define UA_MIN_SZ_OR_ZERO .min_sz_or_zero = 1
 #define UA_OPTIONAL .mandatory = 0
 
 #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
-- 
cgit v1.2.3


From 7a872b6fb7fdc4213e9bb4e1c83a65e6b8af7ebd Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 4 Jul 2018 20:19:06 -0700
Subject: soc: ti: wkup_m3_ipc: Add rtc_only with ddr in self refresh mode
 support

Adds rtc_only support. This needs resume function to shutdown and
reboot the m3.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
---
 drivers/soc/ti/wkup_m3_ipc.c | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/wkup_m3_ipc.h  |  3 +++
 2 files changed, 44 insertions(+)

(limited to 'include')

diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c
index 369aef5e7228..b732c39e2754 100644
--- a/drivers/soc/ti/wkup_m3_ipc.c
+++ b/drivers/soc/ti/wkup_m3_ipc.c
@@ -329,12 +329,24 @@ static int wkup_m3_finish_low_power(struct wkup_m3_ipc *m3_ipc)
 	return 0;
 }
 
+/**
+ * wkup_m3_set_rtc_only - Set the rtc_only flag
+ * @wkup_m3_wakeup: struct wkup_m3_wakeup_src * gets assigned the
+ *                  wakeup src value
+ */
+static void wkup_m3_set_rtc_only(struct wkup_m3_ipc *m3_ipc)
+{
+	if (m3_ipc_state)
+		m3_ipc_state->is_rtc_only = true;
+}
+
 static struct wkup_m3_ipc_ops ipc_ops = {
 	.set_mem_type = wkup_m3_set_mem_type,
 	.set_resume_address = wkup_m3_set_resume_address,
 	.prepare_low_power = wkup_m3_prepare_low_power,
 	.finish_low_power = wkup_m3_finish_low_power,
 	.request_pm_status = wkup_m3_request_pm_status,
+	.set_rtc_only = wkup_m3_set_rtc_only,
 };
 
 /**
@@ -484,6 +496,32 @@ static int wkup_m3_ipc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int wkup_m3_ipc_suspend(struct device *dev)
+{
+	/*
+	 * Nothing needs to be done on suspend even with rtc_only flag set
+	 */
+	return 0;
+}
+
+static int wkup_m3_ipc_resume(struct device *dev)
+{
+	if (m3_ipc_state->is_rtc_only) {
+		rproc_shutdown(m3_ipc_state->rproc);
+		rproc_boot(m3_ipc_state->rproc);
+	}
+
+	m3_ipc_state->is_rtc_only = false;
+
+	return 0;
+}
+
+static const struct dev_pm_ops wkup_m3_ipc_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(wkup_m3_ipc_suspend, wkup_m3_ipc_resume)
+};
+#endif
+
 static const struct of_device_id wkup_m3_ipc_of_match[] = {
 	{ .compatible = "ti,am3352-wkup-m3-ipc", },
 	{ .compatible = "ti,am4372-wkup-m3-ipc", },
@@ -497,6 +535,9 @@ static struct platform_driver wkup_m3_ipc_driver = {
 	.driver = {
 		.name = "wkup_m3_ipc",
 		.of_match_table = wkup_m3_ipc_of_match,
+#ifdef CONFIG_PM
+		.pm = &wkup_m3_ipc_pm_ops,
+#endif
 	},
 };
 
diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h
index d6ba7d39a62f..d639df15e8ba 100644
--- a/include/linux/wkup_m3_ipc.h
+++ b/include/linux/wkup_m3_ipc.h
@@ -40,6 +40,7 @@ struct wkup_m3_ipc {
 	struct mbox_chan *mbox;
 
 	struct wkup_m3_ipc_ops *ops;
+	int is_rtc_only;
 };
 
 struct wkup_m3_ipc_ops {
@@ -48,8 +49,10 @@ struct wkup_m3_ipc_ops {
 	int (*prepare_low_power)(struct wkup_m3_ipc *m3_ipc, int state);
 	int (*finish_low_power)(struct wkup_m3_ipc *m3_ipc);
 	int (*request_pm_status)(struct wkup_m3_ipc *m3_ipc);
+	void (*set_rtc_only)(struct wkup_m3_ipc *m3_ipc);
 };
 
 struct wkup_m3_ipc *wkup_m3_ipc_get(void);
 void wkup_m3_ipc_put(struct wkup_m3_ipc *m3_ipc);
+void wkup_m3_set_rtc_only_mode(void);
 #endif /* _LINUX_WKUP_M3_IPC_H */
-- 
cgit v1.2.3


From ec93b62fec9c7138d2b75334d192ecc12376f885 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Wed, 4 Jul 2018 20:19:06 -0700
Subject: soc: ti: wkup_m3_ipc: Add wkup_m3_request_wake_src

Add wkup_m3_request_wake_src to allow users to get the name of
the wakeup source after a DeepSleep or Standby transition.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
---
 drivers/soc/ti/wkup_m3_ipc.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/wkup_m3_ipc.h  |  6 ++++++
 2 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c
index b732c39e2754..6840688236b9 100644
--- a/drivers/soc/ti/wkup_m3_ipc.c
+++ b/drivers/soc/ti/wkup_m3_ipc.c
@@ -46,6 +46,7 @@
 #define M3_BASELINE_VERSION		0x191
 #define M3_STATUS_RESP_MASK		(0xffff << 16)
 #define M3_FW_VERSION_MASK		0xffff
+#define M3_WAKE_SRC_MASK		0xff
 
 #define M3_STATE_UNKNOWN		0
 #define M3_STATE_RESET			1
@@ -55,6 +56,23 @@
 
 static struct wkup_m3_ipc *m3_ipc_state;
 
+static const struct wkup_m3_wakeup_src wakeups[] = {
+	{.irq_nr = 35,	.src = "USB0_PHY"},
+	{.irq_nr = 36,	.src = "USB1_PHY"},
+	{.irq_nr = 40,	.src = "I2C0"},
+	{.irq_nr = 41,	.src = "RTC Timer"},
+	{.irq_nr = 42,	.src = "RTC Alarm"},
+	{.irq_nr = 43,	.src = "Timer0"},
+	{.irq_nr = 44,	.src = "Timer1"},
+	{.irq_nr = 45,	.src = "UART"},
+	{.irq_nr = 46,	.src = "GPIO0"},
+	{.irq_nr = 48,	.src = "MPU_WAKE"},
+	{.irq_nr = 49,	.src = "WDT0"},
+	{.irq_nr = 50,	.src = "WDT1"},
+	{.irq_nr = 51,	.src = "ADC_TSC"},
+	{.irq_nr = 0,	.src = "Unknown"},
+};
+
 static void am33xx_txev_eoi(struct wkup_m3_ipc *m3_ipc)
 {
 	writel(AM33XX_M3_TXEV_ACK,
@@ -329,6 +347,26 @@ static int wkup_m3_finish_low_power(struct wkup_m3_ipc *m3_ipc)
 	return 0;
 }
 
+/**
+ * wkup_m3_request_wake_src - Get the wakeup source info passed from wkup_m3
+ * @m3_ipc: Pointer to wkup_m3_ipc context
+ */
+static const char *wkup_m3_request_wake_src(struct wkup_m3_ipc *m3_ipc)
+{
+	unsigned int wakeup_src_idx;
+	int j, val;
+
+	val = wkup_m3_ctrl_ipc_read(m3_ipc, 6);
+
+	wakeup_src_idx = val & M3_WAKE_SRC_MASK;
+
+	for (j = 0; j < ARRAY_SIZE(wakeups) - 1; j++) {
+		if (wakeups[j].irq_nr == wakeup_src_idx)
+			return wakeups[j].src;
+	}
+	return wakeups[j].src;
+}
+
 /**
  * wkup_m3_set_rtc_only - Set the rtc_only flag
  * @wkup_m3_wakeup: struct wkup_m3_wakeup_src * gets assigned the
@@ -346,6 +384,7 @@ static struct wkup_m3_ipc_ops ipc_ops = {
 	.prepare_low_power = wkup_m3_prepare_low_power,
 	.finish_low_power = wkup_m3_finish_low_power,
 	.request_pm_status = wkup_m3_request_pm_status,
+	.request_wake_src = wkup_m3_request_wake_src,
 	.set_rtc_only = wkup_m3_set_rtc_only,
 };
 
diff --git a/include/linux/wkup_m3_ipc.h b/include/linux/wkup_m3_ipc.h
index d639df15e8ba..e497e621dbb7 100644
--- a/include/linux/wkup_m3_ipc.h
+++ b/include/linux/wkup_m3_ipc.h
@@ -43,12 +43,18 @@ struct wkup_m3_ipc {
 	int is_rtc_only;
 };
 
+struct wkup_m3_wakeup_src {
+	int irq_nr;
+	char src[10];
+};
+
 struct wkup_m3_ipc_ops {
 	void (*set_mem_type)(struct wkup_m3_ipc *m3_ipc, int mem_type);
 	void (*set_resume_address)(struct wkup_m3_ipc *m3_ipc, void *addr);
 	int (*prepare_low_power)(struct wkup_m3_ipc *m3_ipc, int state);
 	int (*finish_low_power)(struct wkup_m3_ipc *m3_ipc);
 	int (*request_pm_status)(struct wkup_m3_ipc *m3_ipc);
+	const char *(*request_wake_src)(struct wkup_m3_ipc *m3_ipc);
 	void (*set_rtc_only)(struct wkup_m3_ipc *m3_ipc);
 };
 
-- 
cgit v1.2.3


From 6312fe77751f57d4fa2b28abeef84c6a95c28136 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Thu, 5 Jul 2018 14:34:32 +0800
Subject: net: limit each hash list length to MAX_GRO_SKBS

After commit 07d78363dcff ("net: Convert NAPI gro list into a small hash
table.")' there is 8 hash buckets, which allows more flows to be held for
merging.  but MAX_GRO_SKBS, the total held skb for merging, is 8 skb still,
limit the hash table performance.

keep MAX_GRO_SKBS as 8 skb, but limit each hash list length to 8 skb, not
the total 8 skb

Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++++-
 net/core/dev.c            | 56 +++++++++++++++++++----------------------------
 2 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f06ee8f91e74..b683971e500d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -302,6 +302,11 @@ struct netdev_boot_setup {
 
 int __init netdev_boot_setup(char *str);
 
+struct gro_list {
+	struct list_head	list;
+	int			count;
+};
+
 /*
  * Structure for NAPI scheduling similar to tasklet but with weighting
  */
@@ -323,7 +328,7 @@ struct napi_struct {
 	int			poll_owner;
 #endif
 	struct net_device	*dev;
-	struct list_head	gro_hash[GRO_HASH_BUCKETS];
+	struct gro_list		gro_hash[GRO_HASH_BUCKETS];
 	struct sk_buff		*skb;
 	struct hrtimer		timer;
 	struct list_head	dev_list;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7e6a2f66db5c..89825c1eccdc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -149,7 +149,6 @@
 
 #include "net-sysfs.h"
 
-/* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 
 /* This should be increased if a protocol with a bigger head is added. */
@@ -5151,9 +5150,10 @@ out:
 	return netif_receive_skb_internal(skb);
 }
 
-static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *head,
+static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 				   bool flush_old)
 {
+	struct list_head *head = &napi->gro_hash[index].list;
 	struct sk_buff *skb, *p;
 
 	list_for_each_entry_safe_reverse(skb, p, head, list) {
@@ -5162,22 +5162,20 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, struct list_head *h
 		list_del_init(&skb->list);
 		napi_gro_complete(skb);
 		napi->gro_count--;
+		napi->gro_hash[index].count--;
 	}
 }
 
-/* napi->gro_hash contains packets ordered by age.
+/* napi->gro_hash[].list contains packets ordered by age.
  * youngest packets at the head of it.
  * Complete skbs in reverse order to reduce latencies.
  */
 void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
-	int i;
-
-	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
-		struct list_head *head = &napi->gro_hash[i];
+	u32 i;
 
-		__napi_gro_flush_chain(napi, head, flush_old);
-	}
+	for (i = 0; i < GRO_HASH_BUCKETS; i++)
+		__napi_gro_flush_chain(napi, i, flush_old);
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
@@ -5189,7 +5187,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
 	struct list_head *head;
 	struct sk_buff *p;
 
-	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)];
+	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
 	list_for_each_entry(p, head, list) {
 		unsigned long diffs;
 
@@ -5257,27 +5255,13 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 	}
 }
 
-static void gro_flush_oldest(struct napi_struct *napi)
+static void gro_flush_oldest(struct list_head *head)
 {
-	struct sk_buff *oldest = NULL;
-	unsigned long age = jiffies;
-	int i;
-
-	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
-		struct list_head *head = &napi->gro_hash[i];
-		struct sk_buff *skb;
-
-		if (list_empty(head))
-			continue;
+	struct sk_buff *oldest;
 
-		skb = list_last_entry(head, struct sk_buff, list);
-		if (!oldest || time_before(NAPI_GRO_CB(skb)->age, age)) {
-			oldest = skb;
-			age = NAPI_GRO_CB(skb)->age;
-		}
-	}
+	oldest = list_last_entry(head, struct sk_buff, list);
 
-	/* We are called with napi->gro_count >= MAX_GRO_SKBS, so this is
+	/* We are called with head length >= MAX_GRO_SKBS, so this is
 	 * impossible.
 	 */
 	if (WARN_ON_ONCE(!oldest))
@@ -5292,6 +5276,7 @@ static void gro_flush_oldest(struct napi_struct *napi)
 
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
+	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
@@ -5358,6 +5343,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		list_del_init(&pp->list);
 		napi_gro_complete(pp);
 		napi->gro_count--;
+		napi->gro_hash[hash].count--;
 	}
 
 	if (same_flow)
@@ -5366,10 +5352,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (NAPI_GRO_CB(skb)->flush)
 		goto normal;
 
-	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
-		gro_flush_oldest(napi);
+	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
+		gro_flush_oldest(gro_head);
 	} else {
 		napi->gro_count++;
+		napi->gro_hash[hash].count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
@@ -6006,8 +5993,10 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
 	napi->gro_count = 0;
-	for (i = 0; i < GRO_HASH_BUCKETS; i++)
-		INIT_LIST_HEAD(&napi->gro_hash[i]);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		INIT_LIST_HEAD(&napi->gro_hash[i].list);
+		napi->gro_hash[i].count = 0;
+	}
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
@@ -6047,8 +6036,9 @@ static void flush_gro_hash(struct napi_struct *napi)
 	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 		struct sk_buff *skb, *n;
 
-		list_for_each_entry_safe(skb, n, &napi->gro_hash[i], list)
+		list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
 			kfree_skb(skb);
+		napi->gro_hash[i].count = 0;
 	}
 }
 
-- 
cgit v1.2.3


From eabaef1896bc06319461a644e3aa139885454def Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:28 +0300
Subject: devlink: Add devlink_param register and unregister

Define configuration parameters data structure.
Add functions to register and unregister the driver supported
configuration parameters table.
For each parameter registered, the driver should fill all the parameter's
fields. In case the only supported configuration mode is "driverinit"
the parameter's get()/set() functions are not required and should be set
to NULL, for any other configuration mode, these functions are required
and should be set by the driver.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  85 +++++++++++++++++++++++++
 include/uapi/linux/devlink.h |  10 +++
 net/core/devlink.c           | 148 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 243 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index e336ea9c73df..4a0687a1fb99 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -27,6 +27,7 @@ struct devlink {
 	struct list_head sb_list;
 	struct list_head dpipe_table_list;
 	struct list_head resource_list;
+	struct list_head param_list;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -295,6 +296,68 @@ struct devlink_resource {
 
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
 
+#define DEVLINK_PARAM_MAX_STRING_VALUE 32
+enum devlink_param_type {
+	DEVLINK_PARAM_TYPE_U8,
+	DEVLINK_PARAM_TYPE_U16,
+	DEVLINK_PARAM_TYPE_U32,
+	DEVLINK_PARAM_TYPE_STRING,
+	DEVLINK_PARAM_TYPE_BOOL,
+};
+
+union devlink_param_value {
+	u8 vu8;
+	u16 vu16;
+	u32 vu32;
+	const char *vstr;
+	bool vbool;
+};
+
+struct devlink_param_gset_ctx {
+	union devlink_param_value val;
+	enum devlink_param_cmode cmode;
+};
+
+/**
+ * struct devlink_param - devlink configuration parameter data
+ * @name: name of the parameter
+ * @generic: indicates if the parameter is generic or driver specific
+ * @type: parameter type
+ * @supported_cmodes: bitmap of supported configuration modes
+ * @get: get parameter value, used for runtime and permanent
+ *       configuration modes
+ * @set: set parameter value, used for runtime and permanent
+ *       configuration modes
+ *
+ * This struct should be used by the driver to fill the data for
+ * a parameter it registers.
+ */
+struct devlink_param {
+	u32 id;
+	const char *name;
+	bool generic;
+	enum devlink_param_type type;
+	unsigned long supported_cmodes;
+	int (*get)(struct devlink *devlink, u32 id,
+		   struct devlink_param_gset_ctx *ctx);
+	int (*set)(struct devlink *devlink, u32 id,
+		   struct devlink_param_gset_ctx *ctx);
+};
+
+struct devlink_param_item {
+	struct list_head list;
+	const struct devlink_param *param;
+	union devlink_param_value driverinit_value;
+	bool driverinit_value_valid;
+};
+
+enum devlink_param_generic_id {
+
+	/* add new param generic ids above here*/
+	__DEVLINK_PARAM_GENERIC_ID_MAX,
+	DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1,
+};
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -430,6 +493,12 @@ void devlink_resource_occ_get_register(struct devlink *devlink,
 				       void *occ_get_priv);
 void devlink_resource_occ_get_unregister(struct devlink *devlink,
 					 u64 resource_id);
+int devlink_params_register(struct devlink *devlink,
+			    const struct devlink_param *params,
+			    size_t params_count);
+void devlink_params_unregister(struct devlink *devlink,
+			       const struct devlink_param *params,
+			       size_t params_count);
 
 #else
 
@@ -622,6 +691,22 @@ devlink_resource_occ_get_unregister(struct devlink *devlink,
 {
 }
 
+static inline int
+devlink_params_register(struct devlink *devlink,
+			const struct devlink_param *params,
+			size_t params_count)
+{
+	return 0;
+}
+
+static inline void
+devlink_params_unregister(struct devlink *devlink,
+			  const struct devlink_param *params,
+			  size_t params_count)
+{
+
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 75cb5450c851..d814fa67c7b9 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -142,6 +142,16 @@ enum devlink_port_flavour {
 				   */
 };
 
+enum devlink_param_cmode {
+	DEVLINK_PARAM_CMODE_RUNTIME,
+	DEVLINK_PARAM_CMODE_DRIVERINIT,
+	DEVLINK_PARAM_CMODE_PERMANENT,
+
+	/* Add new configuration modes above */
+	__DEVLINK_PARAM_CMODE_MAX,
+	DEVLINK_PARAM_CMODE_MAX = __DEVLINK_PARAM_CMODE_MAX - 1
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 22099705cc41..41b1a5d1c992 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2604,6 +2604,82 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 	return devlink->ops->reload(devlink, info->extack);
 }
 
+static const struct devlink_param devlink_param_generic[] = {};
+
+static int devlink_param_generic_verify(const struct devlink_param *param)
+{
+	/* verify it match generic parameter by id and name */
+	if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
+		return -EINVAL;
+	if (strcmp(param->name, devlink_param_generic[param->id].name))
+		return -ENOENT;
+
+	WARN_ON(param->type != devlink_param_generic[param->id].type);
+
+	return 0;
+}
+
+static int devlink_param_driver_verify(const struct devlink_param *param)
+{
+	int i;
+
+	if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
+		return -EINVAL;
+	/* verify no such name in generic params */
+	for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
+		if (!strcmp(param->name, devlink_param_generic[i].name))
+			return -EEXIST;
+
+	return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_name(struct list_head *param_list,
+			   const char *param_name)
+{
+	struct devlink_param_item *param_item;
+
+	list_for_each_entry(param_item, param_list, list)
+		if (!strcmp(param_item->param->name, param_name))
+			return param_item;
+	return NULL;
+}
+
+static int devlink_param_register_one(struct devlink *devlink,
+				      const struct devlink_param *param)
+{
+	struct devlink_param_item *param_item;
+
+	if (devlink_param_find_by_name(&devlink->param_list,
+				       param->name))
+		return -EEXIST;
+
+	if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+		WARN_ON(param->get || param->set);
+	else
+		WARN_ON(!param->get || !param->set);
+
+	param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+	if (!param_item)
+		return -ENOMEM;
+	param_item->param = param;
+
+	list_add_tail(&param_item->list, &devlink->param_list);
+	return 0;
+}
+
+static void devlink_param_unregister_one(struct devlink *devlink,
+					 const struct devlink_param *param)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_name(&devlink->param_list,
+						param->name);
+	WARN_ON(!param_item);
+	list_del(&param_item->list);
+	kfree(param_item);
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -2845,6 +2921,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD(&devlink->sb_list);
 	INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
 	INIT_LIST_HEAD(&devlink->resource_list);
+	INIT_LIST_HEAD(&devlink->param_list);
 	mutex_init(&devlink->lock);
 	return devlink;
 }
@@ -3434,6 +3511,77 @@ out:
 }
 EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
 
+/**
+ *	devlink_params_register - register configuration parameters
+ *
+ *	@devlink: devlink
+ *	@params: configuration parameters array
+ *	@params_count: number of parameters provided
+ *
+ *	Register the configuration parameters supported by the driver.
+ */
+int devlink_params_register(struct devlink *devlink,
+			    const struct devlink_param *params,
+			    size_t params_count)
+{
+	const struct devlink_param *param = params;
+	int i;
+	int err;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < params_count; i++, param++) {
+		if (!param || !param->name || !param->supported_cmodes) {
+			err = -EINVAL;
+			goto rollback;
+		}
+		if (param->generic) {
+			err = devlink_param_generic_verify(param);
+			if (err)
+				goto rollback;
+		} else {
+			err = devlink_param_driver_verify(param);
+			if (err)
+				goto rollback;
+		}
+		err = devlink_param_register_one(devlink, param);
+		if (err)
+			goto rollback;
+	}
+
+	mutex_unlock(&devlink->lock);
+	return 0;
+
+rollback:
+	if (!i)
+		goto unlock;
+	for (param--; i > 0; i--, param--)
+		devlink_param_unregister_one(devlink, param);
+unlock:
+	mutex_unlock(&devlink->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_params_register);
+
+/**
+ *	devlink_params_unregister - unregister configuration parameters
+ *	@devlink: devlink
+ *	@params: configuration parameters to unregister
+ *	@params_count: number of parameters provided
+ */
+void devlink_params_unregister(struct devlink *devlink,
+			       const struct devlink_param *params,
+			       size_t params_count)
+{
+	const struct devlink_param *param = params;
+	int i;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < params_count; i++, param++)
+		devlink_param_unregister_one(devlink, param);
+	mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_params_unregister);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
-- 
cgit v1.2.3


From 45f05def5c44c806f094709f1c9b03dcecdd54f0 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:29 +0300
Subject: devlink: Add param get command

Add param get command which gets data per parameter.
Option to dump the parameters data per device.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  11 ++
 net/core/devlink.c           | 250 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 261 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index d814fa67c7b9..2ccfe84176bf 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -78,6 +78,8 @@ enum devlink_command {
 	 */
 	DEVLINK_CMD_RELOAD,
 
+	DEVLINK_CMD_PARAM_GET,		/* can dump */
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -248,6 +250,15 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_NUMBER,		/* u32 */
 	DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER,	/* u32 */
 
+	DEVLINK_ATTR_PARAM,			/* nested */
+	DEVLINK_ATTR_PARAM_NAME,		/* string */
+	DEVLINK_ATTR_PARAM_GENERIC,		/* flag */
+	DEVLINK_ATTR_PARAM_TYPE,		/* u8 */
+	DEVLINK_ATTR_PARAM_VALUES_LIST,		/* nested */
+	DEVLINK_ATTR_PARAM_VALUE,		/* nested */
+	DEVLINK_ATTR_PARAM_VALUE_DATA,		/* dynamic */
+	DEVLINK_ATTR_PARAM_VALUE_CMODE,		/* u8 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 41b1a5d1c992..b22d41275f0b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2645,6 +2645,248 @@ devlink_param_find_by_name(struct list_head *param_list,
 	return NULL;
 }
 
+static bool
+devlink_param_cmode_is_supported(const struct devlink_param *param,
+				 enum devlink_param_cmode cmode)
+{
+	return test_bit(cmode, &param->supported_cmodes);
+}
+
+static int devlink_param_get(struct devlink *devlink,
+			     const struct devlink_param *param,
+			     struct devlink_param_gset_ctx *ctx)
+{
+	if (!param->get)
+		return -EOPNOTSUPP;
+	return param->get(devlink, param->id, ctx);
+}
+
+static int
+devlink_param_type_to_nla_type(enum devlink_param_type param_type)
+{
+	switch (param_type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		return NLA_U8;
+	case DEVLINK_PARAM_TYPE_U16:
+		return NLA_U16;
+	case DEVLINK_PARAM_TYPE_U32:
+		return NLA_U32;
+	case DEVLINK_PARAM_TYPE_STRING:
+		return NLA_STRING;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		return NLA_FLAG;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+devlink_nl_param_value_fill_one(struct sk_buff *msg,
+				enum devlink_param_type type,
+				enum devlink_param_cmode cmode,
+				union devlink_param_value val)
+{
+	struct nlattr *param_value_attr;
+
+	param_value_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUE);
+	if (!param_value_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
+		goto value_nest_cancel;
+
+	switch (type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_U16:
+		if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_U32:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_STRING:
+		if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
+				   val.vstr))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		if (val.vbool &&
+		    nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
+			goto value_nest_cancel;
+		break;
+	}
+
+	nla_nest_end(msg, param_value_attr);
+	return 0;
+
+value_nest_cancel:
+	nla_nest_cancel(msg, param_value_attr);
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
+				 struct devlink_param_item *param_item,
+				 enum devlink_command cmd,
+				 u32 portid, u32 seq, int flags)
+{
+	union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+	const struct devlink_param *param = param_item->param;
+	struct devlink_param_gset_ctx ctx;
+	struct nlattr *param_values_list;
+	struct nlattr *param_attr;
+	int nla_type;
+	void *hdr;
+	int err;
+	int i;
+
+	/* Get value from driver part to driverinit configuration mode */
+	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+		if (!devlink_param_cmode_is_supported(param, i))
+			continue;
+		if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+			if (!param_item->driverinit_value_valid)
+				return -EOPNOTSUPP;
+			param_value[i] = param_item->driverinit_value;
+		} else {
+			ctx.cmode = i;
+			err = devlink_param_get(devlink, param, &ctx);
+			if (err)
+				return err;
+			param_value[i] = ctx.val;
+		}
+	}
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto genlmsg_cancel;
+	param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM);
+	if (!param_attr)
+		goto genlmsg_cancel;
+	if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
+		goto param_nest_cancel;
+	if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
+		goto param_nest_cancel;
+
+	nla_type = devlink_param_type_to_nla_type(param->type);
+	if (nla_type < 0)
+		goto param_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
+		goto param_nest_cancel;
+
+	param_values_list = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUES_LIST);
+	if (!param_values_list)
+		goto param_nest_cancel;
+
+	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+		if (!devlink_param_cmode_is_supported(param, i))
+			continue;
+		err = devlink_nl_param_value_fill_one(msg, param->type,
+						      i, param_value[i]);
+		if (err)
+			goto values_list_nest_cancel;
+	}
+
+	nla_nest_end(msg, param_values_list);
+	nla_nest_end(msg, param_attr);
+	genlmsg_end(msg, hdr);
+	return 0;
+
+values_list_nest_cancel:
+	nla_nest_end(msg, param_values_list);
+param_nest_cancel:
+	nla_nest_cancel(msg, param_attr);
+genlmsg_cancel:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
+					   struct netlink_callback *cb)
+{
+	struct devlink_param_item *param_item;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(param_item, &devlink->param_list, list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_param_fill(msg, devlink, param_item,
+						    DEVLINK_CMD_PARAM_GET,
+						    NETLINK_CB(cb->skb).portid,
+						    cb->nlh->nlmsg_seq,
+						    NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static struct devlink_param_item *
+devlink_param_get_from_info(struct devlink *devlink,
+			    struct genl_info *info)
+{
+	char *param_name;
+
+	if (!info->attrs[DEVLINK_ATTR_PARAM_NAME])
+		return NULL;
+
+	param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
+	return devlink_param_find_by_name(&devlink->param_list, param_name);
+}
+
+static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_param_item *param_item;
+	struct sk_buff *msg;
+	int err;
+
+	param_item = devlink_param_get_from_info(devlink, info);
+	if (!param_item)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_param_fill(msg, devlink, param_item,
+				    DEVLINK_CMD_PARAM_GET,
+				    info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
 static int devlink_param_register_one(struct devlink *devlink,
 				      const struct devlink_param *param)
 {
@@ -2883,6 +3125,14 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
+	{
+		.cmd = DEVLINK_CMD_PARAM_GET,
+		.doit = devlink_nl_cmd_param_get_doit,
+		.dumpit = devlink_nl_cmd_param_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From e3b7ca18ad7b2f47ebd3b6e6ce58a42c6ec24746 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:30 +0300
Subject: devlink: Add param set command

Add param set command to set value for a parameter.
Value can be set to any of the supported configuration modes.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |   4 ++
 include/uapi/linux/devlink.h |   1 +
 net/core/devlink.c           | 134 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 139 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 4a0687a1fb99..88062752dcd7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -328,6 +328,7 @@ struct devlink_param_gset_ctx {
  *       configuration modes
  * @set: set parameter value, used for runtime and permanent
  *       configuration modes
+ * @validate: validate input value is applicable (within value range, etc.)
  *
  * This struct should be used by the driver to fill the data for
  * a parameter it registers.
@@ -342,6 +343,9 @@ struct devlink_param {
 		   struct devlink_param_gset_ctx *ctx);
 	int (*set)(struct devlink *devlink, u32 id,
 		   struct devlink_param_gset_ctx *ctx);
+	int (*validate)(struct devlink *devlink, u32 id,
+			union devlink_param_value val,
+			struct netlink_ext_ack *extack);
 };
 
 struct devlink_param_item {
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 2ccfe84176bf..ea0623e568f0 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -79,6 +79,7 @@ enum devlink_command {
 	DEVLINK_CMD_RELOAD,
 
 	DEVLINK_CMD_PARAM_GET,		/* can dump */
+	DEVLINK_CMD_PARAM_SET,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b22d41275f0b..0cd7a42dcec2 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2661,6 +2661,15 @@ static int devlink_param_get(struct devlink *devlink,
 	return param->get(devlink, param->id, ctx);
 }
 
+static int devlink_param_set(struct devlink *devlink,
+			     const struct devlink_param *param,
+			     struct devlink_param_gset_ctx *ctx)
+{
+	if (!param->set)
+		return -EOPNOTSUPP;
+	return param->set(devlink, param->id, ctx);
+}
+
 static int
 devlink_param_type_to_nla_type(enum devlink_param_type param_type)
 {
@@ -2847,6 +2856,69 @@ out:
 	return msg->len;
 }
 
+static int
+devlink_param_type_get_from_info(struct genl_info *info,
+				 enum devlink_param_type *param_type)
+{
+	if (!info->attrs[DEVLINK_ATTR_PARAM_TYPE])
+		return -EINVAL;
+
+	switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
+	case NLA_U8:
+		*param_type = DEVLINK_PARAM_TYPE_U8;
+		break;
+	case NLA_U16:
+		*param_type = DEVLINK_PARAM_TYPE_U16;
+		break;
+	case NLA_U32:
+		*param_type = DEVLINK_PARAM_TYPE_U32;
+		break;
+	case NLA_STRING:
+		*param_type = DEVLINK_PARAM_TYPE_STRING;
+		break;
+	case NLA_FLAG:
+		*param_type = DEVLINK_PARAM_TYPE_BOOL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+devlink_param_value_get_from_info(const struct devlink_param *param,
+				  struct genl_info *info,
+				  union devlink_param_value *value)
+{
+	if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
+	    !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+		return -EINVAL;
+
+	switch (param->type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+		break;
+	case DEVLINK_PARAM_TYPE_U16:
+		value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+		break;
+	case DEVLINK_PARAM_TYPE_U32:
+		value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+		break;
+	case DEVLINK_PARAM_TYPE_STRING:
+		if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) >
+		    DEVLINK_PARAM_MAX_STRING_VALUE)
+			return -EINVAL;
+		value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+		break;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
+			       true : false;
+		break;
+	}
+	return 0;
+}
+
 static struct devlink_param_item *
 devlink_param_get_from_info(struct devlink *devlink,
 			    struct genl_info *info)
@@ -2887,6 +2959,58 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
 	return genlmsg_reply(msg, info);
 }
 
+static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	enum devlink_param_type param_type;
+	struct devlink_param_gset_ctx ctx;
+	enum devlink_param_cmode cmode;
+	struct devlink_param_item *param_item;
+	const struct devlink_param *param;
+	union devlink_param_value value;
+	int err = 0;
+
+	param_item = devlink_param_get_from_info(devlink, info);
+	if (!param_item)
+		return -EINVAL;
+	param = param_item->param;
+	err = devlink_param_type_get_from_info(info, &param_type);
+	if (err)
+		return err;
+	if (param_type != param->type)
+		return -EINVAL;
+	err = devlink_param_value_get_from_info(param, info, &value);
+	if (err)
+		return err;
+	if (param->validate) {
+		err = param->validate(devlink, param->id, value, info->extack);
+		if (err)
+			return err;
+	}
+
+	if (!info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE])
+		return -EINVAL;
+	cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
+	if (!devlink_param_cmode_is_supported(param, cmode))
+		return -EOPNOTSUPP;
+
+	if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+		param_item->driverinit_value = value;
+		param_item->driverinit_value_valid = true;
+	} else {
+		if (!param->set)
+			return -EOPNOTSUPP;
+		ctx.val = value;
+		ctx.cmode = cmode;
+		err = devlink_param_set(devlink, param, &ctx);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int devlink_param_register_one(struct devlink *devlink,
 				      const struct devlink_param *param)
 {
@@ -2942,6 +3066,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
 	[DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
+	[DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -3133,6 +3260,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = DEVLINK_CMD_PARAM_SET,
+		.doit = devlink_nl_cmd_param_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From ec01aeb1803eaaf0d006e7b07b5ddb5e429c38a4 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:31 +0300
Subject: devlink: Add support for get/set driverinit value

"driverinit" configuration mode value is held by devlink to enable
the driver query the value after reload. Two additional functions
added to help the driver get/set the value from/to devlink:
devlink_param_driverinit_value_set() and
devlink_param_driverinit_value_get().

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 18 ++++++++++++
 net/core/devlink.c    | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 88062752dcd7..3302e43b09a4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -503,6 +503,10 @@ int devlink_params_register(struct devlink *devlink,
 void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count);
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value *init_val);
+int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value init_val);
 
 #else
 
@@ -711,6 +715,20 @@ devlink_params_unregister(struct devlink *devlink,
 
 }
 
+static inline int
+devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				   union devlink_param_value *init_val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+				   union devlink_param_value init_val)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 0cd7a42dcec2..3af08f4562b5 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2645,6 +2645,17 @@ devlink_param_find_by_name(struct list_head *param_list,
 	return NULL;
 }
 
+static struct devlink_param_item *
+devlink_param_find_by_id(struct list_head *param_list, u32 param_id)
+{
+	struct devlink_param_item *param_item;
+
+	list_for_each_entry(param_item, param_list, list)
+		if (param_item->param->id == param_id)
+			return param_item;
+	return NULL;
+}
+
 static bool
 devlink_param_cmode_is_supported(const struct devlink_param *param,
 				 enum devlink_param_cmode cmode)
@@ -3966,6 +3977,72 @@ void devlink_params_unregister(struct devlink *devlink,
 }
 EXPORT_SYMBOL_GPL(devlink_params_unregister);
 
+/**
+ *	devlink_param_driverinit_value_get - get configuration parameter
+ *					     value for driver initializing
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *	@init_val: value of parameter in driverinit configuration mode
+ *
+ *	This function should be used by the driver to get driverinit
+ *	configuration for initialization after reload command.
+ */
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value *init_val)
+{
+	struct devlink_param_item *param_item;
+
+	if (!devlink->ops || !devlink->ops->reload)
+		return -EOPNOTSUPP;
+
+	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+	if (!param_item)
+		return -EINVAL;
+
+	if (!param_item->driverinit_value_valid ||
+	    !devlink_param_cmode_is_supported(param_item->param,
+					      DEVLINK_PARAM_CMODE_DRIVERINIT))
+		return -EOPNOTSUPP;
+
+	*init_val = param_item->driverinit_value;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
+
+/**
+ *	devlink_param_driverinit_value_set - set value of configuration
+ *					     parameter for driverinit
+ *					     configuration mode
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *	@init_val: value of parameter to set for driverinit configuration mode
+ *
+ *	This function should be used by the driver to set driverinit
+ *	configuration mode default value.
+ */
+int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value init_val)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+	if (!param_item)
+		return -EINVAL;
+
+	if (!devlink_param_cmode_is_supported(param_item->param,
+					      DEVLINK_PARAM_CMODE_DRIVERINIT))
+		return -EOPNOTSUPP;
+
+	param_item->driverinit_value = init_val;
+	param_item->driverinit_value_valid = true;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
-- 
cgit v1.2.3


From ea601e17098856ee059f35c2a75659e57df81f25 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:32 +0300
Subject: devlink: Add devlink notifications support for params

Add devlink_param_notify() function to support devlink param notifications.
Add notification call to devlink param set, register and unregister
functions.
Add devlink_param_value_changed() function to enable the driver notify
devlink on value change. Driver should use this function after value was
changed on any configuration mode part to driverinit.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  7 +++++++
 include/uapi/linux/devlink.h |  2 ++
 net/core/devlink.c           | 50 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3302e43b09a4..792edaa996ba 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -507,6 +507,7 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value *init_val);
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val);
+void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
 
 #else
 
@@ -729,6 +730,12 @@ devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 	return -EOPNOTSUPP;
 }
 
+static inline void
+devlink_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ea0623e568f0..68641fb56654 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -80,6 +80,8 @@ enum devlink_command {
 
 	DEVLINK_CMD_PARAM_GET,		/* can dump */
 	DEVLINK_CMD_PARAM_SET,
+	DEVLINK_CMD_PARAM_NEW,
+	DEVLINK_CMD_PARAM_DEL,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3af08f4562b5..89d948fd4727 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2828,6 +2828,28 @@ genlmsg_cancel:
 	return -EMSGSIZE;
 }
 
+static void devlink_param_notify(struct devlink *devlink,
+				 struct devlink_param_item *param_item,
+				 enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+	err = devlink_nl_param_fill(msg, devlink, param_item, cmd, 0, 0, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
 static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
 					   struct netlink_callback *cb)
 {
@@ -3019,6 +3041,7 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
 			return err;
 	}
 
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
 	return 0;
 }
 
@@ -3042,6 +3065,7 @@ static int devlink_param_register_one(struct devlink *devlink,
 	param_item->param = param;
 
 	list_add_tail(&param_item->list, &devlink->param_list);
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
 	return 0;
 }
 
@@ -3053,6 +3077,7 @@ static void devlink_param_unregister_one(struct devlink *devlink,
 	param_item = devlink_param_find_by_name(&devlink->param_list,
 						param->name);
 	WARN_ON(!param_item);
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_DEL);
 	list_del(&param_item->list);
 	kfree(param_item);
 }
@@ -4039,10 +4064,35 @@ int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 	param_item->driverinit_value = init_val;
 	param_item->driverinit_value_valid = true;
 
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
 
+/**
+ *	devlink_param_value_changed - notify devlink on a parameter's value
+ *				      change. Should be called by the driver
+ *				      right after the change.
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *
+ *	This function should be used by the driver to notify devlink on value
+ *	change, excluding driverinit configuration mode.
+ *	For driverinit configuration mode driver should use the function
+ *	devlink_param_driverinit_value_set() instead.
+ */
+void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+	WARN_ON(!param_item);
+
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devlink_param_value_changed);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
-- 
cgit v1.2.3


From 036467c3990c75ec8ce97e517a864b52e184a1aa Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 4 Jul 2018 14:30:33 +0300
Subject: devlink: Add generic parameters internal_err_reset and max_macs

Add 2 first generic parameters to devlink configuration parameters set:
internal_err_reset - When set enables reset device on internal errors.
max_macs - max number of MACs per ETH port.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 31 +++++++++++++++++++++++++++++++
 net/core/devlink.c    | 14 +++++++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 792edaa996ba..a1c230d18911 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -356,12 +356,43 @@ struct devlink_param_item {
 };
 
 enum devlink_param_generic_id {
+	DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+	DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
 	DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1,
 };
 
+#define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset"
+#define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
+
+#define DEVLINK_PARAM_GENERIC_MAX_MACS_NAME "max_macs"
+#define DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE DEVLINK_PARAM_TYPE_U32
+
+#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
+{									\
+	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
+	.name = DEVLINK_PARAM_GENERIC_##_id##_NAME,			\
+	.type = DEVLINK_PARAM_GENERIC_##_id##_TYPE,			\
+	.generic = true,						\
+	.supported_cmodes = _cmodes,					\
+	.get = _get,							\
+	.set = _set,							\
+	.validate = _validate,						\
+}
+
+#define DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes, _get, _set, _validate)	\
+{									\
+	.id = _id,							\
+	.name = _name,							\
+	.type = _type,							\
+	.supported_cmodes = _cmodes,					\
+	.get = _get,							\
+	.set = _set,							\
+	.validate = _validate,						\
+}
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 89d948fd4727..5bbd0aa7571a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2604,7 +2604,19 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 	return devlink->ops->reload(devlink, info->extack);
 }
 
-static const struct devlink_param devlink_param_generic[] = {};
+static const struct devlink_param devlink_param_generic[] = {
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+		.name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
+		.type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+		.name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
+		.type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
+	},
+
+};
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
 {
-- 
cgit v1.2.3


From f567bcdae2b052bab94be7903863cb9ab47c907c Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Wed, 4 Jul 2018 14:30:36 +0300
Subject: devlink: Add enable_sriov boolean generic parameter

enable_sriov - Enables Single-Root Input/Output Virtualization(SR-IOV)
characteristic of the device.

Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 net/core/devlink.c    | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index a1c230d18911..8ed571385626 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -358,6 +358,7 @@ struct devlink_param_item {
 enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
 	DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+	DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -370,6 +371,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_MAX_MACS_NAME "max_macs"
 #define DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE DEVLINK_PARAM_TYPE_U32
 
+#define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME "enable_sriov"
+#define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 5bbd0aa7571a..470f3dbfecfe 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2615,7 +2615,11 @@ static const struct devlink_param devlink_param_generic[] = {
 		.name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
 		.type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
 	},
-
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
+	},
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
-- 
cgit v1.2.3


From a9ba23d48dbc6ffd08426bb10f05720e0b9f5c14 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Wed, 4 Jul 2018 09:58:05 -0400
Subject: ipv6: make ipv6_renew_options() interrupt/kernel safe

At present the ipv6_renew_options_kern() function ends up calling into
access_ok() which is problematic if done from inside an interrupt as
access_ok() calls WARN_ON_IN_IRQ() on some (all?) architectures
(x86-64 is affected).  Example warning/backtrace is shown below:

 WARNING: CPU: 1 PID: 3144 at lib/usercopy.c:11 _copy_from_user+0x85/0x90
 ...
 Call Trace:
  <IRQ>
  ipv6_renew_option+0xb2/0xf0
  ipv6_renew_options+0x26a/0x340
  ipv6_renew_options_kern+0x2c/0x40
  calipso_req_setattr+0x72/0xe0
  netlbl_req_setattr+0x126/0x1b0
  selinux_netlbl_inet_conn_request+0x80/0x100
  selinux_inet_conn_request+0x6d/0xb0
  security_inet_conn_request+0x32/0x50
  tcp_conn_request+0x35f/0xe00
  ? __lock_acquire+0x250/0x16c0
  ? selinux_socket_sock_rcv_skb+0x1ae/0x210
  ? tcp_rcv_state_process+0x289/0x106b
  tcp_rcv_state_process+0x289/0x106b
  ? tcp_v6_do_rcv+0x1a7/0x3c0
  tcp_v6_do_rcv+0x1a7/0x3c0
  tcp_v6_rcv+0xc82/0xcf0
  ip6_input_finish+0x10d/0x690
  ip6_input+0x45/0x1e0
  ? ip6_rcv_finish+0x1d0/0x1d0
  ipv6_rcv+0x32b/0x880
  ? ip6_make_skb+0x1e0/0x1e0
  __netif_receive_skb_core+0x6f2/0xdf0
  ? process_backlog+0x85/0x250
  ? process_backlog+0x85/0x250
  ? process_backlog+0xec/0x250
  process_backlog+0xec/0x250
  net_rx_action+0x153/0x480
  __do_softirq+0xd9/0x4f7
  do_softirq_own_stack+0x2a/0x40
  </IRQ>
  ...

While not present in the backtrace, ipv6_renew_option() ends up calling
access_ok() via the following chain:

  access_ok()
  _copy_from_user()
  copy_from_user()
  ipv6_renew_option()

The fix presented in this patch is to perform the userspace copy
earlier in the call chain such that it is only called when the option
data is actually coming from userspace; that place is
do_ipv6_setsockopt().  Not only does this solve the problem seen in
the backtrace above, it also allows us to simplify the code quite a
bit by removing ipv6_renew_options_kern() completely.  We also take
this opportunity to cleanup ipv6_renew_options()/ipv6_renew_option()
a small amount as well.

This patch is heavily based on a rough patch by Al Viro.  I've taken
his original patch, converted a kmemdup() call in do_ipv6_setsockopt()
to a memdup_user() call, made better use of the e_inval jump target in
the same function, and cleaned up the use ipv6_renew_option() by
ipv6_renew_options().

CC: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h       |   9 +---
 net/ipv6/calipso.c       |   9 ++--
 net/ipv6/exthdrs.c       | 111 +++++++++++++----------------------------------
 net/ipv6/ipv6_sockglue.c |  27 ++++++++----
 4 files changed, 53 insertions(+), 103 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 16475c269749..d02881e4ad1f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -355,14 +355,7 @@ struct ipv6_txoptions *ipv6_dup_options(struct sock *sk,
 struct ipv6_txoptions *ipv6_renew_options(struct sock *sk,
 					  struct ipv6_txoptions *opt,
 					  int newtype,
-					  struct ipv6_opt_hdr __user *newopt,
-					  int newoptlen);
-struct ipv6_txoptions *
-ipv6_renew_options_kern(struct sock *sk,
-			struct ipv6_txoptions *opt,
-			int newtype,
-			struct ipv6_opt_hdr *newopt,
-			int newoptlen);
+					  struct ipv6_opt_hdr *newopt);
 struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 					  struct ipv6_txoptions *opt);
 
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 1323b9679cf7..1c0bb9fb76e6 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -799,8 +799,7 @@ static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop)
 {
 	struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts;
 
-	txopts = ipv6_renew_options_kern(sk, old, IPV6_HOPOPTS,
-					 hop, hop ? ipv6_optlen(hop) : 0);
+	txopts = ipv6_renew_options(sk, old, IPV6_HOPOPTS, hop);
 	txopt_put(old);
 	if (IS_ERR(txopts))
 		return PTR_ERR(txopts);
@@ -1222,8 +1221,7 @@ static int calipso_req_setattr(struct request_sock *req,
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
-	txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
-					 new, new ? ipv6_optlen(new) : 0);
+	txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new);
 
 	kfree(new);
 
@@ -1260,8 +1258,7 @@ static void calipso_req_delattr(struct request_sock *req)
 	if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new))
 		return; /* Nothing to do */
 
-	txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
-					 new, new ? ipv6_optlen(new) : 0);
+	txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new);
 
 	if (!IS_ERR(txopts)) {
 		txopts = xchg(&req_inet->ipv6_opt, txopts);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 5bc2bf3733ab..20291c2036fc 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -1015,29 +1015,21 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
 }
 EXPORT_SYMBOL_GPL(ipv6_dup_options);
 
-static int ipv6_renew_option(void *ohdr,
-			     struct ipv6_opt_hdr __user *newopt, int newoptlen,
-			     int inherit,
-			     struct ipv6_opt_hdr **hdr,
-			     char **p)
+static void ipv6_renew_option(int renewtype,
+			      struct ipv6_opt_hdr **dest,
+			      struct ipv6_opt_hdr *old,
+			      struct ipv6_opt_hdr *new,
+			      int newtype, char **p)
 {
-	if (inherit) {
-		if (ohdr) {
-			memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
-			*hdr = (struct ipv6_opt_hdr *)*p;
-			*p += CMSG_ALIGN(ipv6_optlen(*hdr));
-		}
-	} else {
-		if (newopt) {
-			if (copy_from_user(*p, newopt, newoptlen))
-				return -EFAULT;
-			*hdr = (struct ipv6_opt_hdr *)*p;
-			if (ipv6_optlen(*hdr) > newoptlen)
-				return -EINVAL;
-			*p += CMSG_ALIGN(newoptlen);
-		}
-	}
-	return 0;
+	struct ipv6_opt_hdr *src;
+
+	src = (renewtype == newtype ? new : old);
+	if (!src)
+		return;
+
+	memcpy(*p, src, ipv6_optlen(src));
+	*dest = (struct ipv6_opt_hdr *)*p;
+	*p += CMSG_ALIGN(ipv6_optlen(*dest));
 }
 
 /**
@@ -1063,13 +1055,11 @@ static int ipv6_renew_option(void *ohdr,
  */
 struct ipv6_txoptions *
 ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
-		   int newtype,
-		   struct ipv6_opt_hdr __user *newopt, int newoptlen)
+		   int newtype, struct ipv6_opt_hdr *newopt)
 {
 	int tot_len = 0;
 	char *p;
 	struct ipv6_txoptions *opt2;
-	int err;
 
 	if (opt) {
 		if (newtype != IPV6_HOPOPTS && opt->hopopt)
@@ -1082,8 +1072,8 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
 			tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
 	}
 
-	if (newopt && newoptlen)
-		tot_len += CMSG_ALIGN(newoptlen);
+	if (newopt)
+		tot_len += CMSG_ALIGN(ipv6_optlen(newopt));
 
 	if (!tot_len)
 		return NULL;
@@ -1098,29 +1088,19 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
 	opt2->tot_len = tot_len;
 	p = (char *)(opt2 + 1);
 
-	err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
-				newtype != IPV6_HOPOPTS,
-				&opt2->hopopt, &p);
-	if (err)
-		goto out;
-
-	err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
-				newtype != IPV6_RTHDRDSTOPTS,
-				&opt2->dst0opt, &p);
-	if (err)
-		goto out;
-
-	err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
-				newtype != IPV6_RTHDR,
-				(struct ipv6_opt_hdr **)&opt2->srcrt, &p);
-	if (err)
-		goto out;
-
-	err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
-				newtype != IPV6_DSTOPTS,
-				&opt2->dst1opt, &p);
-	if (err)
-		goto out;
+	ipv6_renew_option(IPV6_HOPOPTS, &opt2->hopopt,
+			  (opt ? opt->hopopt : NULL),
+			  newopt, newtype, &p);
+	ipv6_renew_option(IPV6_RTHDRDSTOPTS, &opt2->dst0opt,
+			  (opt ? opt->dst0opt : NULL),
+			  newopt, newtype, &p);
+	ipv6_renew_option(IPV6_RTHDR,
+			  (struct ipv6_opt_hdr **)&opt2->srcrt,
+			  (opt ? (struct ipv6_opt_hdr *)opt->srcrt : NULL),
+			  newopt, newtype, &p);
+	ipv6_renew_option(IPV6_DSTOPTS, &opt2->dst1opt,
+			  (opt ? opt->dst1opt : NULL),
+			  newopt, newtype, &p);
 
 	opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) +
 			  (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) +
@@ -1128,37 +1108,6 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
 	opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0);
 
 	return opt2;
-out:
-	sock_kfree_s(sk, opt2, opt2->tot_len);
-	return ERR_PTR(err);
-}
-
-/**
- * ipv6_renew_options_kern - replace a specific ext hdr with a new one.
- *
- * @sk: sock from which to allocate memory
- * @opt: original options
- * @newtype: option type to replace in @opt
- * @newopt: new option of type @newtype to replace (kernel-mem)
- * @newoptlen: length of @newopt
- *
- * See ipv6_renew_options().  The difference is that @newopt is
- * kernel memory, rather than user memory.
- */
-struct ipv6_txoptions *
-ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt,
-			int newtype, struct ipv6_opt_hdr *newopt,
-			int newoptlen)
-{
-	struct ipv6_txoptions *ret_val;
-	const mm_segment_t old_fs = get_fs();
-
-	set_fs(KERNEL_DS);
-	ret_val = ipv6_renew_options(sk, opt, newtype,
-				     (struct ipv6_opt_hdr __user *)newopt,
-				     newoptlen);
-	set_fs(old_fs);
-	return ret_val;
 }
 
 struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4d780c7f0130..c95c3486d904 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -398,6 +398,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	case IPV6_DSTOPTS:
 	{
 		struct ipv6_txoptions *opt;
+		struct ipv6_opt_hdr *new = NULL;
+
+		/* hop-by-hop / destination options are privileged option */
+		retv = -EPERM;
+		if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
+			break;
 
 		/* remove any sticky options header with a zero option
 		 * length, per RFC3542.
@@ -409,17 +415,22 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		else if (optlen < sizeof(struct ipv6_opt_hdr) ||
 			 optlen & 0x7 || optlen > 8 * 255)
 			goto e_inval;
-
-		/* hop-by-hop / destination options are privileged option */
-		retv = -EPERM;
-		if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
-			break;
+		else {
+			new = memdup_user(optval, optlen);
+			if (IS_ERR(new)) {
+				retv = PTR_ERR(new);
+				break;
+			}
+			if (unlikely(ipv6_optlen(new) > optlen)) {
+				kfree(new);
+				goto e_inval;
+			}
+		}
 
 		opt = rcu_dereference_protected(np->opt,
 						lockdep_sock_is_held(sk));
-		opt = ipv6_renew_options(sk, opt, optname,
-					 (struct ipv6_opt_hdr __user *)optval,
-					 optlen);
+		opt = ipv6_renew_options(sk, opt, optname, new);
+		kfree(new);
 		if (IS_ERR(opt)) {
 			retv = PTR_ERR(opt);
 			break;
-- 
cgit v1.2.3


From 0380cf7dbaca75c524e34b30979f0806124fa8e6 Mon Sep 17 00:00:00 2001
From: pascal paillet <p.paillet@st.com>
Date: Thu, 5 Jul 2018 14:25:57 +0000
Subject: regulator: core: Change suspend_late to suspend

Change suspend_late ops to suspend normal ops. The goal is to avoid
requesting all the regulator drivers to be operational in suspend late
phase.

Signed-off-by: pascal paillet <p.paillet@st.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c         | 26 +++++++++++++-------------
 include/linux/regulator/driver.h |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 6ed568b96c0e..da9b0fed8330 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -4441,7 +4441,7 @@ void regulator_unregister(struct regulator_dev *rdev)
 EXPORT_SYMBOL_GPL(regulator_unregister);
 
 #ifdef CONFIG_SUSPEND
-static int _regulator_suspend_late(struct device *dev, void *data)
+static int _regulator_suspend(struct device *dev, void *data)
 {
 	struct regulator_dev *rdev = dev_to_rdev(dev);
 	suspend_state_t *state = data;
@@ -4455,20 +4455,20 @@ static int _regulator_suspend_late(struct device *dev, void *data)
 }
 
 /**
- * regulator_suspend_late - prepare regulators for system wide suspend
+ * regulator_suspend - prepare regulators for system wide suspend
  * @state: system suspend state
  *
  * Configure each regulator with it's suspend operating parameters for state.
  */
-static int regulator_suspend_late(struct device *dev)
+static int regulator_suspend(struct device *dev)
 {
 	suspend_state_t state = pm_suspend_target_state;
 
 	return class_for_each_device(&regulator_class, NULL, &state,
-				     _regulator_suspend_late);
+				     _regulator_suspend);
 }
 
-static int _regulator_resume_early(struct device *dev, void *data)
+static int _regulator_resume(struct device *dev, void *data)
 {
 	int ret = 0;
 	struct regulator_dev *rdev = dev_to_rdev(dev);
@@ -4481,35 +4481,35 @@ static int _regulator_resume_early(struct device *dev, void *data)
 
 	regulator_lock(rdev);
 
-	if (rdev->desc->ops->resume_early &&
+	if (rdev->desc->ops->resume &&
 	    (rstate->enabled == ENABLE_IN_SUSPEND ||
 	     rstate->enabled == DISABLE_IN_SUSPEND))
-		ret = rdev->desc->ops->resume_early(rdev);
+		ret = rdev->desc->ops->resume(rdev);
 
 	regulator_unlock(rdev);
 
 	return ret;
 }
 
-static int regulator_resume_early(struct device *dev)
+static int regulator_resume(struct device *dev)
 {
 	suspend_state_t state = pm_suspend_target_state;
 
 	return class_for_each_device(&regulator_class, NULL, &state,
-				     _regulator_resume_early);
+				     _regulator_resume);
 }
 
 #else /* !CONFIG_SUSPEND */
 
-#define regulator_suspend_late	NULL
-#define regulator_resume_early	NULL
+#define regulator_suspend	NULL
+#define regulator_resume	NULL
 
 #endif /* !CONFIG_SUSPEND */
 
 #ifdef CONFIG_PM
 static const struct dev_pm_ops __maybe_unused regulator_pm_ops = {
-	.suspend_late	= regulator_suspend_late,
-	.resume_early	= regulator_resume_early,
+	.suspend	= regulator_suspend,
+	.resume		= regulator_resume,
 };
 #endif
 
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index dea96ee39fdc..0fd8fbb74763 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -220,7 +220,7 @@ struct regulator_ops {
 	/* set regulator suspend operating mode (defined in consumer.h) */
 	int (*set_suspend_mode) (struct regulator_dev *, unsigned int mode);
 
-	int (*resume_early)(struct regulator_dev *rdev);
+	int (*resume)(struct regulator_dev *rdev);
 
 	int (*set_pull_down) (struct regulator_dev *);
 };
-- 
cgit v1.2.3


From d8842211b6f63d3f069df973d137de0a85964965 Mon Sep 17 00:00:00 2001
From: pascal paillet <p.paillet@st.com>
Date: Thu, 5 Jul 2018 14:25:56 +0000
Subject: driver core: Add device_link_remove function

Device_link_remove uses the same arguments than device_link_add. The Goal
is to avoid storing the link pointer.

Signed-off-by: pascal paillet <p.paillet@st.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/core.c    | 30 ++++++++++++++++++++++++++++++
 include/linux/device.h |  1 +
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 36622b52e419..3b380b1f2768 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -365,6 +365,36 @@ void device_link_del(struct device_link *link)
 }
 EXPORT_SYMBOL_GPL(device_link_del);
 
+/**
+ * device_link_remove - remove a link between two devices.
+ * @consumer: Consumer end of the link.
+ * @supplier: Supplier end of the link.
+ *
+ * The caller must ensure proper synchronization of this function with runtime
+ * PM.
+ */
+void device_link_remove(void *consumer, struct device *supplier)
+{
+	struct device_link *link;
+
+	if (WARN_ON(consumer == supplier))
+		return;
+
+	device_links_write_lock();
+	device_pm_lock();
+
+	list_for_each_entry(link, &supplier->links.consumers, s_node) {
+		if (link->consumer == consumer) {
+			kref_put(&link->kref, __device_link_del);
+			break;
+		}
+	}
+
+	device_pm_unlock();
+	device_links_write_unlock();
+}
+EXPORT_SYMBOL_GPL(device_link_remove);
+
 static void device_links_missing_supplier(struct device *dev)
 {
 	struct device_link *link;
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..9c1c3b1d5e11 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1316,6 +1316,7 @@ extern const char *dev_driver_string(const struct device *dev);
 struct device_link *device_link_add(struct device *consumer,
 				    struct device *supplier, u32 flags);
 void device_link_del(struct device_link *link);
+void device_link_remove(void *consumer, struct device *supplier);
 
 #ifdef CONFIG_PRINTK
 
-- 
cgit v1.2.3


From 2282e125a406e09331c5a785e3df29035c99a607 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 2 Jul 2018 22:05:21 +0200
Subject: leds: triggers: let struct led_trigger::activate() return an error
 code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Given that activating a trigger can fail, let the callback return an
indication. This prevents to have a trigger active according to the
"trigger" sysfs attribute but not functional.

All users are changed accordingly to return 0 for now. There is no intended
change in behaviour.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/led-triggers.c               | 24 +++++++++++++++++++++---
 drivers/leds/trigger/ledtrig-activity.c   |  8 +++++---
 drivers/leds/trigger/ledtrig-backlight.c  |  8 +++++---
 drivers/leds/trigger/ledtrig-default-on.c |  3 ++-
 drivers/leds/trigger/ledtrig-gpio.c       |  8 +++++---
 drivers/leds/trigger/ledtrig-heartbeat.c  |  8 +++++---
 drivers/leds/trigger/ledtrig-netdev.c     |  8 +++++---
 drivers/leds/trigger/ledtrig-oneshot.c    |  8 +++++---
 drivers/leds/trigger/ledtrig-timer.c      |  8 +++++---
 drivers/leds/trigger/ledtrig-transient.c  |  8 +++++---
 drivers/tty/vt/keyboard.c                 |  4 +++-
 drivers/usb/core/ledtrig-usbport.c        |  7 ++++---
 include/linux/leds.h                      | 14 +++++++++-----
 net/bluetooth/leds.c                      |  6 ++++--
 net/mac80211/led.c                        | 20 +++++++++++++++-----
 net/rfkill/core.c                         |  4 +++-
 16 files changed, 101 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 431123b048a2..a8786f4b3453 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -103,15 +103,16 @@ ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr,
 EXPORT_SYMBOL_GPL(led_trigger_show);
 
 /* Caller must ensure led_cdev->trigger_lock held */
-void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
+int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 {
 	unsigned long flags;
 	char *event = NULL;
 	char *envp[2];
 	const char *name;
+	int ret;
 
 	if (!led_cdev->trigger && !trig)
-		return;
+		return 0;
 
 	name = trig ? trig->name : "none";
 	event = kasprintf(GFP_KERNEL, "TRIGGER=%s", name);
@@ -134,8 +135,14 @@ void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 		list_add_tail(&led_cdev->trig_list, &trig->led_cdevs);
 		write_unlock_irqrestore(&trig->leddev_list_lock, flags);
 		led_cdev->trigger = trig;
+
 		if (trig->activate)
-			trig->activate(led_cdev);
+			ret = trig->activate(led_cdev);
+		else
+			ret = 0;
+
+		if (ret)
+			goto err_activate;
 	}
 
 	if (event) {
@@ -146,6 +153,17 @@ void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 				"%s: Error sending uevent\n", __func__);
 		kfree(event);
 	}
+
+	return 0;
+
+err_activate:
+	led_cdev->trigger = NULL;
+	write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
+	list_del(&led_cdev->trig_list);
+	write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock, flags);
+	led_set_brightness(led_cdev, LED_OFF);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(led_trigger_set);
 
diff --git a/drivers/leds/trigger/ledtrig-activity.c b/drivers/leds/trigger/ledtrig-activity.c
index b39e1bb9a9ec..2fc065fb1754 100644
--- a/drivers/leds/trigger/ledtrig-activity.c
+++ b/drivers/leds/trigger/ledtrig-activity.c
@@ -178,20 +178,20 @@ static ssize_t led_invert_store(struct device *dev,
 
 static DEVICE_ATTR(invert, 0644, led_invert_show, led_invert_store);
 
-static void activity_activate(struct led_classdev *led_cdev)
+static int activity_activate(struct led_classdev *led_cdev)
 {
 	struct activity_data *activity_data;
 	int rc;
 
 	activity_data = kzalloc(sizeof(*activity_data), GFP_KERNEL);
 	if (!activity_data)
-		return;
+		return 0;
 
 	led_cdev->trigger_data = activity_data;
 	rc = device_create_file(led_cdev->dev, &dev_attr_invert);
 	if (rc) {
 		kfree(led_cdev->trigger_data);
-		return;
+		return 0;
 	}
 
 	activity_data->led_cdev = led_cdev;
@@ -201,6 +201,8 @@ static void activity_activate(struct led_classdev *led_cdev)
 	led_activity_function(&activity_data->timer);
 	set_bit(LED_BLINK_SW, &led_cdev->work_flags);
 	led_cdev->activated = true;
+
+	return 0;
 }
 
 static void activity_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-backlight.c b/drivers/leds/trigger/ledtrig-backlight.c
index 1ca1f1608f76..84512960d630 100644
--- a/drivers/leds/trigger/ledtrig-backlight.c
+++ b/drivers/leds/trigger/ledtrig-backlight.c
@@ -97,7 +97,7 @@ static ssize_t bl_trig_invert_store(struct device *dev,
 }
 static DEVICE_ATTR(inverted, 0644, bl_trig_invert_show, bl_trig_invert_store);
 
-static void bl_trig_activate(struct led_classdev *led)
+static int bl_trig_activate(struct led_classdev *led)
 {
 	int ret;
 
@@ -107,7 +107,7 @@ static void bl_trig_activate(struct led_classdev *led)
 	led->trigger_data = n;
 	if (!n) {
 		dev_err(led->dev, "unable to allocate backlight trigger\n");
-		return;
+		return 0;
 	}
 
 	ret = device_create_file(led->dev, &dev_attr_inverted);
@@ -124,11 +124,13 @@ static void bl_trig_activate(struct led_classdev *led)
 		dev_err(led->dev, "unable to register backlight trigger\n");
 	led->activated = true;
 
-	return;
+	return 0;
 
 err_invert:
 	led->trigger_data = NULL;
 	kfree(n);
+
+	return 0;
 }
 
 static void bl_trig_deactivate(struct led_classdev *led)
diff --git a/drivers/leds/trigger/ledtrig-default-on.c b/drivers/leds/trigger/ledtrig-default-on.c
index 4ccea04b7a6b..2996fe672198 100644
--- a/drivers/leds/trigger/ledtrig-default-on.c
+++ b/drivers/leds/trigger/ledtrig-default-on.c
@@ -16,9 +16,10 @@
 #include <linux/leds.h>
 #include "../leds.h"
 
-static void defon_trig_activate(struct led_classdev *led_cdev)
+static int defon_trig_activate(struct led_classdev *led_cdev)
 {
 	led_set_brightness_nosleep(led_cdev, led_cdev->max_brightness);
+	return 0;
 }
 
 static struct led_trigger defon_led_trigger = {
diff --git a/drivers/leds/trigger/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c
index 93906a17a4b6..f5358c40d03f 100644
--- a/drivers/leds/trigger/ledtrig-gpio.c
+++ b/drivers/leds/trigger/ledtrig-gpio.c
@@ -162,14 +162,14 @@ static ssize_t gpio_trig_gpio_store(struct device *dev,
 }
 static DEVICE_ATTR(gpio, 0644, gpio_trig_gpio_show, gpio_trig_gpio_store);
 
-static void gpio_trig_activate(struct led_classdev *led)
+static int gpio_trig_activate(struct led_classdev *led)
 {
 	struct gpio_trig_data *gpio_data;
 	int ret;
 
 	gpio_data = kzalloc(sizeof(*gpio_data), GFP_KERNEL);
 	if (!gpio_data)
-		return;
+		return 0;
 
 	ret = device_create_file(led->dev, &dev_attr_gpio);
 	if (ret)
@@ -187,7 +187,7 @@ static void gpio_trig_activate(struct led_classdev *led)
 	led->trigger_data = gpio_data;
 	led->activated = true;
 
-	return;
+	return 0;
 
 err_brightness:
 	device_remove_file(led->dev, &dev_attr_inverted);
@@ -197,6 +197,8 @@ err_inverted:
 
 err_gpio:
 	kfree(gpio_data);
+
+	return 0;
 }
 
 static void gpio_trig_deactivate(struct led_classdev *led)
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index 304b929edb8e..03a8dfce8fb9 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -121,21 +121,21 @@ static ssize_t led_invert_store(struct device *dev,
 
 static DEVICE_ATTR(invert, 0644, led_invert_show, led_invert_store);
 
-static void heartbeat_trig_activate(struct led_classdev *led_cdev)
+static int heartbeat_trig_activate(struct led_classdev *led_cdev)
 {
 	struct heartbeat_trig_data *heartbeat_data;
 	int rc;
 
 	heartbeat_data = kzalloc(sizeof(*heartbeat_data), GFP_KERNEL);
 	if (!heartbeat_data)
-		return;
+		return 0;
 
 	led_cdev->trigger_data = heartbeat_data;
 	heartbeat_data->led_cdev = led_cdev;
 	rc = device_create_file(led_cdev->dev, &dev_attr_invert);
 	if (rc) {
 		kfree(led_cdev->trigger_data);
-		return;
+		return 0;
 	}
 
 	timer_setup(&heartbeat_data->timer, led_heartbeat_function, 0);
@@ -145,6 +145,8 @@ static void heartbeat_trig_activate(struct led_classdev *led_cdev)
 	led_heartbeat_function(&heartbeat_data->timer);
 	set_bit(LED_BLINK_SW, &led_cdev->work_flags);
 	led_cdev->activated = true;
+
+	return 0;
 }
 
 static void heartbeat_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index 6df4781a6308..bb05937255de 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -388,14 +388,14 @@ static void netdev_trig_work(struct work_struct *work)
 			(atomic_read(&trigger_data->interval)*2));
 }
 
-static void netdev_trig_activate(struct led_classdev *led_cdev)
+static int netdev_trig_activate(struct led_classdev *led_cdev)
 {
 	struct led_netdev_data *trigger_data;
 	int rc;
 
 	trigger_data = kzalloc(sizeof(struct led_netdev_data), GFP_KERNEL);
 	if (!trigger_data)
-		return;
+		return 0;
 
 	spin_lock_init(&trigger_data->lock);
 
@@ -432,7 +432,7 @@ static void netdev_trig_activate(struct led_classdev *led_cdev)
 	rc = register_netdevice_notifier(&trigger_data->notifier);
 	if (rc)
 		goto err_out_interval;
-	return;
+	return 0;
 
 err_out_interval:
 	device_remove_file(led_cdev->dev, &dev_attr_interval);
@@ -447,6 +447,8 @@ err_out_device_name:
 err_out:
 	led_cdev->trigger_data = NULL;
 	kfree(trigger_data);
+
+	return 0;
 }
 
 static void netdev_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-oneshot.c b/drivers/leds/trigger/ledtrig-oneshot.c
index b8ea9f0f1e19..7bbf92bd7f80 100644
--- a/drivers/leds/trigger/ledtrig-oneshot.c
+++ b/drivers/leds/trigger/ledtrig-oneshot.c
@@ -122,14 +122,14 @@ static DEVICE_ATTR(delay_off, 0644, led_delay_off_show, led_delay_off_store);
 static DEVICE_ATTR(invert, 0644, led_invert_show, led_invert_store);
 static DEVICE_ATTR(shot, 0200, NULL, led_shot);
 
-static void oneshot_trig_activate(struct led_classdev *led_cdev)
+static int oneshot_trig_activate(struct led_classdev *led_cdev)
 {
 	struct oneshot_trig_data *oneshot_data;
 	int rc;
 
 	oneshot_data = kzalloc(sizeof(*oneshot_data), GFP_KERNEL);
 	if (!oneshot_data)
-		return;
+		return 0;
 
 	led_cdev->trigger_data = oneshot_data;
 
@@ -151,7 +151,7 @@ static void oneshot_trig_activate(struct led_classdev *led_cdev)
 
 	led_cdev->activated = true;
 
-	return;
+	return 0;
 
 err_out_invert:
 	device_remove_file(led_cdev->dev, &dev_attr_invert);
@@ -161,6 +161,8 @@ err_out_delayon:
 	device_remove_file(led_cdev->dev, &dev_attr_delay_on);
 err_out_trig_data:
 	kfree(led_cdev->trigger_data);
+
+	return 0;
 }
 
 static void oneshot_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-timer.c b/drivers/leds/trigger/ledtrig-timer.c
index 10fc0966b0e3..527055d815ad 100644
--- a/drivers/leds/trigger/ledtrig-timer.c
+++ b/drivers/leds/trigger/ledtrig-timer.c
@@ -70,7 +70,7 @@ static ssize_t led_delay_off_store(struct device *dev,
 static DEVICE_ATTR(delay_on, 0644, led_delay_on_show, led_delay_on_store);
 static DEVICE_ATTR(delay_off, 0644, led_delay_off_show, led_delay_off_store);
 
-static void timer_trig_activate(struct led_classdev *led_cdev)
+static int timer_trig_activate(struct led_classdev *led_cdev)
 {
 	int rc;
 
@@ -78,7 +78,7 @@ static void timer_trig_activate(struct led_classdev *led_cdev)
 
 	rc = device_create_file(led_cdev->dev, &dev_attr_delay_on);
 	if (rc)
-		return;
+		return 0;
 	rc = device_create_file(led_cdev->dev, &dev_attr_delay_off);
 	if (rc)
 		goto err_out_delayon;
@@ -87,10 +87,12 @@ static void timer_trig_activate(struct led_classdev *led_cdev)
 		      &led_cdev->blink_delay_off);
 	led_cdev->activated = true;
 
-	return;
+	return 0;
 
 err_out_delayon:
 	device_remove_file(led_cdev->dev, &dev_attr_delay_on);
+
+	return 0;
 }
 
 static void timer_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/leds/trigger/ledtrig-transient.c b/drivers/leds/trigger/ledtrig-transient.c
index 9d1769073562..a55fc58179a9 100644
--- a/drivers/leds/trigger/ledtrig-transient.c
+++ b/drivers/leds/trigger/ledtrig-transient.c
@@ -152,7 +152,7 @@ static DEVICE_ATTR(duration, 0644, transient_duration_show,
 		   transient_duration_store);
 static DEVICE_ATTR(state, 0644, transient_state_show, transient_state_store);
 
-static void transient_trig_activate(struct led_classdev *led_cdev)
+static int transient_trig_activate(struct led_classdev *led_cdev)
 {
 	int rc;
 	struct transient_trig_data *tdata;
@@ -161,7 +161,7 @@ static void transient_trig_activate(struct led_classdev *led_cdev)
 	if (!tdata) {
 		dev_err(led_cdev->dev,
 			"unable to allocate transient trigger\n");
-		return;
+		return 0;
 	}
 	led_cdev->trigger_data = tdata;
 	tdata->led_cdev = led_cdev;
@@ -181,7 +181,7 @@ static void transient_trig_activate(struct led_classdev *led_cdev)
 	timer_setup(&tdata->timer, transient_timer_function, 0);
 	led_cdev->activated = true;
 
-	return;
+	return 0;
 
 err_out_state:
 	device_remove_file(led_cdev->dev, &dev_attr_duration);
@@ -191,6 +191,8 @@ err_out:
 	dev_err(led_cdev->dev, "unable to register transient trigger\n");
 	led_cdev->trigger_data = NULL;
 	kfree(tdata);
+
+	return 0;
 }
 
 static void transient_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index d5b4a2b44ab8..de310621b8e7 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -959,7 +959,7 @@ struct kbd_led_trigger {
 	unsigned int mask;
 };
 
-static void kbd_led_trigger_activate(struct led_classdev *cdev)
+static int kbd_led_trigger_activate(struct led_classdev *cdev)
 {
 	struct kbd_led_trigger *trigger =
 		container_of(cdev->trigger, struct kbd_led_trigger, trigger);
@@ -970,6 +970,8 @@ static void kbd_led_trigger_activate(struct led_classdev *cdev)
 				  ledstate & trigger->mask ?
 					LED_FULL : LED_OFF);
 	tasklet_enable(&keyboard_tasklet);
+
+	return 0;
 }
 
 #define KBD_LED_TRIGGER(_led_bit, _name) {			\
diff --git a/drivers/usb/core/ledtrig-usbport.c b/drivers/usb/core/ledtrig-usbport.c
index d775ffea20c3..aa1d51458da7 100644
--- a/drivers/usb/core/ledtrig-usbport.c
+++ b/drivers/usb/core/ledtrig-usbport.c
@@ -298,14 +298,14 @@ static int usbport_trig_notify(struct notifier_block *nb, unsigned long action,
 	return NOTIFY_DONE;
 }
 
-static void usbport_trig_activate(struct led_classdev *led_cdev)
+static int usbport_trig_activate(struct led_classdev *led_cdev)
 {
 	struct usbport_trig_data *usbport_data;
 	int err;
 
 	usbport_data = kzalloc(sizeof(*usbport_data), GFP_KERNEL);
 	if (!usbport_data)
-		return;
+		return 0;
 	usbport_data->led_cdev = led_cdev;
 
 	/* List of ports */
@@ -322,10 +322,11 @@ static void usbport_trig_activate(struct led_classdev *led_cdev)
 	usb_register_notify(&usbport_data->nb);
 
 	led_cdev->activated = true;
-	return;
+	return 0;
 
 err_free:
 	kfree(usbport_data);
+	return 0;
 }
 
 static void usbport_trig_deactivate(struct led_classdev *led_cdev)
diff --git a/include/linux/leds.h b/include/linux/leds.h
index b7e82550e655..ba5baaaa43bf 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -253,7 +253,7 @@ static inline bool led_sysfs_is_disabled(struct led_classdev *led_cdev)
 struct led_trigger {
 	/* Trigger Properties */
 	const char	 *name;
-	void		(*activate)(struct led_classdev *led_cdev);
+	int		(*activate)(struct led_classdev *led_cdev);
 	void		(*deactivate)(struct led_classdev *led_cdev);
 
 	/* LEDs under control by this trigger (for simple triggers) */
@@ -288,8 +288,8 @@ extern void led_trigger_blink_oneshot(struct led_trigger *trigger,
 				      unsigned long *delay_off,
 				      int invert);
 extern void led_trigger_set_default(struct led_classdev *led_cdev);
-extern void led_trigger_set(struct led_classdev *led_cdev,
-			struct led_trigger *trigger);
+extern int led_trigger_set(struct led_classdev *led_cdev,
+			   struct led_trigger *trigger);
 extern void led_trigger_remove(struct led_classdev *led_cdev);
 
 static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
@@ -334,8 +334,12 @@ static inline void led_trigger_blink_oneshot(struct led_trigger *trigger,
 				      unsigned long *delay_off,
 				      int invert) {}
 static inline void led_trigger_set_default(struct led_classdev *led_cdev) {}
-static inline void led_trigger_set(struct led_classdev *led_cdev,
-				struct led_trigger *trigger) {}
+static inline int led_trigger_set(struct led_classdev *led_cdev,
+				  struct led_trigger *trigger)
+{
+	return 0;
+}
+
 static inline void led_trigger_remove(struct led_classdev *led_cdev) {}
 static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 {
diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
index cb670b5594eb..6d59a5023231 100644
--- a/net/bluetooth/leds.c
+++ b/net/bluetooth/leds.c
@@ -43,7 +43,7 @@ void hci_leds_update_powered(struct hci_dev *hdev, bool enabled)
 	led_trigger_event(bt_power_led_trigger, enabled ? LED_FULL : LED_OFF);
 }
 
-static void power_activate(struct led_classdev *led_cdev)
+static int power_activate(struct led_classdev *led_cdev)
 {
 	struct hci_basic_led_trigger *htrig;
 	bool powered;
@@ -52,10 +52,12 @@ static void power_activate(struct led_classdev *led_cdev)
 	powered = test_bit(HCI_UP, &htrig->hdev->flags);
 
 	led_trigger_event(led_cdev->trigger, powered ? LED_FULL : LED_OFF);
+
+	return 0;
 }
 
 static struct led_trigger *led_allocate_basic(struct hci_dev *hdev,
-			void (*activate)(struct led_classdev *led_cdev),
+			int (*activate)(struct led_classdev *led_cdev),
 			const char *name)
 {
 	struct hci_basic_led_trigger *htrig;
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index ba0b507ea691..d6c66fc19716 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -52,13 +52,15 @@ void ieee80211_free_led_names(struct ieee80211_local *local)
 	kfree(local->radio_led.name);
 }
 
-static void ieee80211_tx_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_tx_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     tx_led);
 
 	atomic_inc(&local->tx_led_active);
+
+	return 0;
 }
 
 static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev)
@@ -70,13 +72,15 @@ static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev)
 	atomic_dec(&local->tx_led_active);
 }
 
-static void ieee80211_rx_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_rx_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     rx_led);
 
 	atomic_inc(&local->rx_led_active);
+
+	return 0;
 }
 
 static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev)
@@ -88,13 +92,15 @@ static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev)
 	atomic_dec(&local->rx_led_active);
 }
 
-static void ieee80211_assoc_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_assoc_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     assoc_led);
 
 	atomic_inc(&local->assoc_led_active);
+
+	return 0;
 }
 
 static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev)
@@ -106,13 +112,15 @@ static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev)
 	atomic_dec(&local->assoc_led_active);
 }
 
-static void ieee80211_radio_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_radio_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     radio_led);
 
 	atomic_inc(&local->radio_led_active);
+
+	return 0;
 }
 
 static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev)
@@ -124,13 +132,15 @@ static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev)
 	atomic_dec(&local->radio_led_active);
 }
 
-static void ieee80211_tpt_led_activate(struct led_classdev *led_cdev)
+static int ieee80211_tpt_led_activate(struct led_classdev *led_cdev)
 {
 	struct ieee80211_local *local = container_of(led_cdev->trigger,
 						     struct ieee80211_local,
 						     tpt_led);
 
 	atomic_inc(&local->tpt_led_active);
+
+	return 0;
 }
 
 static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev)
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index a7a4e6ff9be2..1355f5ca8d22 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -141,13 +141,15 @@ static void rfkill_led_trigger_event(struct rfkill *rfkill)
 		led_trigger_event(trigger, LED_FULL);
 }
 
-static void rfkill_led_trigger_activate(struct led_classdev *led)
+static int rfkill_led_trigger_activate(struct led_classdev *led)
 {
 	struct rfkill *rfkill;
 
 	rfkill = container_of(led->trigger, struct rfkill, led_trigger);
 
 	rfkill_led_trigger_event(rfkill);
+
+	return 0;
 }
 
 const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
-- 
cgit v1.2.3


From a7e7a3156300a7e1982b03cc9cb8fb0c86434c49 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 2 Jul 2018 22:05:22 +0200
Subject: leds: triggers: add device attribute support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As many triggers use device attributes, add support for these in
led_trigger_set which allows simplifying the drivers accordingly.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/led-triggers.c | 12 ++++++++++++
 include/linux/leds.h        | 11 +++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index a8786f4b3453..3f3e8728d82c 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -127,6 +127,7 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 		led_stop_software_blink(led_cdev);
 		if (led_cdev->trigger->deactivate)
 			led_cdev->trigger->deactivate(led_cdev);
+		device_remove_groups(led_cdev->dev, led_cdev->trigger->groups);
 		led_cdev->trigger = NULL;
 		led_set_brightness(led_cdev, LED_OFF);
 	}
@@ -143,6 +144,12 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 
 		if (ret)
 			goto err_activate;
+
+		ret = device_add_groups(led_cdev->dev, trig->groups);
+		if (ret) {
+			dev_err(led_cdev->dev, "Failed to add trigger attributes\n");
+			goto err_add_groups;
+		}
 	}
 
 	if (event) {
@@ -156,7 +163,12 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 
 	return 0;
 
+err_add_groups:
+
+	if (trig->deactivate)
+		trig->deactivate(led_cdev);
 err_activate:
+
 	led_cdev->trigger = NULL;
 	write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
 	list_del(&led_cdev->trig_list);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index ba5baaaa43bf..33484a5c7478 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -262,8 +262,19 @@ struct led_trigger {
 
 	/* Link to next registered trigger */
 	struct list_head  next_trig;
+
+	const struct attribute_group **groups;
 };
 
+/*
+ * Currently the attributes in struct led_trigger::groups are added directly to
+ * the LED device. As this might change in the future, the following
+ * macros abstract getting the LED device and its trigger_data from the dev
+ * parameter passed to the attribute accessor functions.
+ */
+#define led_trigger_get_led(dev)	((struct led_classdev *)dev_get_drvdata((dev)))
+#define led_trigger_get_drvdata(dev)	(led_get_trigger_data(led_trigger_get_led(dev)))
+
 ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr,
 			const char *buf, size_t count);
 ssize_t led_trigger_show(struct device *dev, struct device_attribute *attr,
-- 
cgit v1.2.3


From a0b750768371e410d77b60bcf49c18bd45078d55 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 2 Jul 2018 22:05:24 +0200
Subject: leds: triggers: define module_led_trigger helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This helps to simplify modules that provide a simple led_trigger. It's
inspired by module_platform_driver, module_i2c_driver et al.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 include/linux/leds.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 33484a5c7478..a3ee10846a4b 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -326,6 +326,10 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 extern void led_trigger_rename_static(const char *name,
 				      struct led_trigger *trig);
 
+#define module_led_trigger(__led_trigger) \
+	module_driver(__led_trigger, led_trigger_register, \
+		      led_trigger_unregister)
+
 #else
 
 /* Trigger has no members */
-- 
cgit v1.2.3


From 9acc560de2aac73ef99c54f0fdfb86b4684296b5 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 2 Jul 2018 22:05:25 +0200
Subject: leds: triggers: new function led_set_trigger_data()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is the natural counter part to the already existing
led_get_trigger_data().

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 include/linux/leds.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index a3ee10846a4b..834683d603f9 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -303,6 +303,12 @@ extern int led_trigger_set(struct led_classdev *led_cdev,
 			   struct led_trigger *trigger);
 extern void led_trigger_remove(struct led_classdev *led_cdev);
 
+static inline void led_set_trigger_data(struct led_classdev *led_cdev,
+					void *trigger_data)
+{
+	led_cdev->trigger_data = trigger_data;
+}
+
 static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 {
 	return led_cdev->trigger_data;
@@ -356,6 +362,7 @@ static inline int led_trigger_set(struct led_classdev *led_cdev,
 }
 
 static inline void led_trigger_remove(struct led_classdev *led_cdev) {}
+static inline void led_set_trigger_data(struct led_classdev *led_cdev) {}
 static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 {
 	return NULL;
-- 
cgit v1.2.3


From 180fc134d712a93a2bbc3d11ed657b5208e6f90f Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Tue, 5 Jun 2018 12:43:23 -0400
Subject: drm/scheduler: Rename cleanup functions v2.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Everything in the flush code path (i.e. waiting for SW queue
to become empty) names with *_flush()
and everything in the release code path names *_fini()

This patch also effect the amdgpu and etnaviv drivers which
use those functions.

v2:
Also pplay the change to vd3.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Acked-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ++--
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c     |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_drv.c     |  4 ++--
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 18 +++++++++---------
 drivers/gpu/drm/v3d/v3d_drv.c             |  2 +-
 include/drm/gpu_scheduler.h               |  6 +++---
 11 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 64b3a1ed04dc..c0f06c02f2de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -104,7 +104,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 failed:
 	for (j = 0; j < i; j++)
-		drm_sched_entity_fini(&adev->rings[j]->sched,
+		drm_sched_entity_destroy(&adev->rings[j]->sched,
 				      &ctx->rings[j].entity);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
@@ -178,7 +178,7 @@ static void amdgpu_ctx_do_release(struct kref *ref)
 		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
 			continue;
 
-		drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
+		drm_sched_entity_destroy(&ctx->adev->rings[i]->sched,
 			&ctx->rings[i].entity);
 	}
 
@@ -466,7 +466,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
 				continue;
 
-			max_wait = drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
+			max_wait = drm_sched_entity_flush(&ctx->adev->rings[i]->sched,
 					  &ctx->rings[i].entity, max_wait);
 		}
 	}
@@ -492,7 +492,7 @@ void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
 				continue;
 
 			if (kref_read(&ctx->refcount) == 1)
-				drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
+				drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
 					&ctx->rings[i].entity);
 			else
 				DRM_ERROR("ctx %p is still alive\n", ctx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0c084d3d0865..0246cb87d9e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -162,7 +162,7 @@ error_mem:
 static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
 {
 	if (adev->mman.mem_global_referenced) {
-		drm_sched_entity_fini(adev->mman.entity.sched,
+		drm_sched_entity_destroy(adev->mman.entity.sched,
 				      &adev->mman.entity);
 		mutex_destroy(&adev->mman.gtt_window_lock);
 		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index cc15d3230402..0b46ea1c6290 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -309,7 +309,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
 		kfree(adev->uvd.inst[j].saved_bo);
 
-		drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
+		drm_sched_entity_destroy(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
 
 		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
 				      &adev->uvd.inst[j].gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 23d960ec1cf2..b0dcdfd85f5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -222,7 +222,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	if (adev->vce.vcpu_bo == NULL)
 		return 0;
 
-	drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
+	drm_sched_entity_destroy(&adev->vce.ring[0].sched, &adev->vce.entity);
 
 	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
 		(void **)&adev->vce.cpu_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 590db78b8c72..837066076ccf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2643,7 +2643,7 @@ error_free_root:
 	vm->root.base.bo = NULL;
 
 error_free_sched_entity:
-	drm_sched_entity_fini(&ring->sched, &vm->entity);
+	drm_sched_entity_destroy(&ring->sched, &vm->entity);
 
 	return r;
 }
@@ -2780,7 +2780,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 	}
 
-	drm_sched_entity_fini(vm->entity.sched, &vm->entity);
+	drm_sched_entity_destroy(vm->entity.sched, &vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
 		dev_err(adev->dev, "still active bo inside vm\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index bfddf97dd13e..1df1c6115341 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -470,7 +470,7 @@ static int uvd_v6_0_sw_fini(void *handle)
 		return r;
 
 	if (uvd_v6_0_enc_support(adev)) {
-		drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc);
+		drm_sched_entity_destroy(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc);
 
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
 			amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 57d32f21b3a6..ba244d3b74db 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -491,7 +491,7 @@ static int uvd_v7_0_sw_fini(void *handle)
 		return r;
 
 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
-		drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc);
+		drm_sched_entity_destroy(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc);
 
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
 			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index e5013a999147..45bfdf4cc107 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -78,8 +78,8 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
 				gpu->lastctx = NULL;
 			mutex_unlock(&gpu->lock);
 
-			drm_sched_entity_fini(&gpu->sched,
-					      &ctx->sched_entity[i]);
+			drm_sched_entity_destroy(&gpu->sched,
+						&ctx->sched_entity[i]);
 		}
 	}
 
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 6a316701da73..7d2560699b84 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -256,7 +256,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 
 
 /**
- * drm_sched_entity_do_release - Destroy a context entity
+ * drm_sched_entity_flush - Flush a context entity
  *
  * @sched: scheduler instance
  * @entity: scheduler entity
@@ -267,7 +267,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
  *
  * Returns the remaining time in jiffies left from the input timeout
  */
-long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
+long drm_sched_entity_flush(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity, long timeout)
 {
 	long ret = timeout;
@@ -294,7 +294,7 @@ long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
 
 	return ret;
 }
-EXPORT_SYMBOL(drm_sched_entity_do_release);
+EXPORT_SYMBOL(drm_sched_entity_flush);
 
 /**
  * drm_sched_entity_cleanup - Destroy a context entity
@@ -306,7 +306,7 @@ EXPORT_SYMBOL(drm_sched_entity_do_release);
  * entity and signals all jobs with an error code if the process was killed.
  *
  */
-void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
+void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity)
 {
 
@@ -357,7 +357,7 @@ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
 	dma_fence_put(entity->last_scheduled);
 	entity->last_scheduled = NULL;
 }
-EXPORT_SYMBOL(drm_sched_entity_cleanup);
+EXPORT_SYMBOL(drm_sched_entity_fini);
 
 /**
  * drm_sched_entity_fini - Destroy a context entity
@@ -367,13 +367,13 @@ EXPORT_SYMBOL(drm_sched_entity_cleanup);
  *
  * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup()
  */
-void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
+void drm_sched_entity_destroy(struct drm_gpu_scheduler *sched,
 				struct drm_sched_entity *entity)
 {
-	drm_sched_entity_do_release(sched, entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY);
-	drm_sched_entity_cleanup(sched, entity);
+	drm_sched_entity_flush(sched, entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY);
+	drm_sched_entity_fini(sched, entity);
 }
-EXPORT_SYMBOL(drm_sched_entity_fini);
+EXPORT_SYMBOL(drm_sched_entity_destroy);
 
 static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
 {
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index cdb582043b4f..567f7d46d912 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -151,7 +151,7 @@ v3d_postclose(struct drm_device *dev, struct drm_file *file)
 	enum v3d_queue q;
 
 	for (q = 0; q < V3D_MAX_QUEUES; q++) {
-		drm_sched_entity_fini(&v3d->queue[q].sched,
+		drm_sched_entity_destroy(&v3d->queue[q].sched,
 				      &v3d_priv->sched_entity[q]);
 	}
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 7c2dfd6cc1af..4214ceb71c05 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -284,12 +284,12 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
 			  struct drm_sched_entity *entity,
 			  struct drm_sched_rq *rq,
 			  atomic_t *guilty);
-long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
+long drm_sched_entity_flush(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity, long timeout);
-void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
-			   struct drm_sched_entity *entity);
 void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity);
+void drm_sched_entity_destroy(struct drm_gpu_scheduler *sched,
+			   struct drm_sched_entity *entity);
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 			       struct drm_sched_entity *entity);
 void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
-- 
cgit v1.2.3


From 576c7218a1546e0153480b208b125509cec71470 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 25 Jun 2018 13:17:41 -0500
Subject: PCI: Export pcie_get_speed_cap and pcie_get_width_cap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So drivers can use them.  This can be used to replace
duplicate code in the drm subsystem.

Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/pci/pci.c   | 2 ++
 include/linux/pci.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 97acba712e4e..22adaf35b136 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5222,6 +5222,7 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
 
 	return PCI_SPEED_UNKNOWN;
 }
+EXPORT_SYMBOL(pcie_get_speed_cap);
 
 /**
  * pcie_get_width_cap - query for the PCI device's link width capability
@@ -5240,6 +5241,7 @@ enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev)
 
 	return PCIE_LNK_WIDTH_UNKNOWN;
 }
+EXPORT_SYMBOL(pcie_get_width_cap);
 
 /**
  * pcie_bandwidth_capable - calculate a PCI device's link bandwidth capability
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..6e0c0803b241 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -261,6 +261,9 @@ enum pci_bus_speed {
 	PCI_SPEED_UNKNOWN		= 0xff,
 };
 
+enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev);
+enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev);
+
 struct pci_cap_saved_data {
 	u16		cap_nr;
 	bool		cap_extended;
-- 
cgit v1.2.3


From 289278cb7d32685ee75baf599857d7049c4b8030 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 25 Jun 2018 15:14:46 -0500
Subject: drm: drop drm_pcie_get_speed_cap_mask and drm_pcie_get_max_link_width
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These functions duplicated functionality which was ultimately added
to the pci core.

All users of these functions have been ported to using the newly
exposed pci functionality.  These functions are no longer used,
so drop them.

Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/drm_pci.c | 58 -----------------------------------------------
 include/drm/drm_pci.h     |  7 ------
 2 files changed, 65 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 4db9c515b74f..896e42a34895 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -326,64 +326,6 @@ int drm_legacy_pci_init(struct drm_driver *driver, struct pci_driver *pdriver)
 }
 EXPORT_SYMBOL(drm_legacy_pci_init);
 
-int drm_pcie_get_speed_cap_mask(struct drm_device *dev, u32 *mask)
-{
-	struct pci_dev *root;
-	u32 lnkcap, lnkcap2;
-
-	*mask = 0;
-	if (!dev->pdev)
-		return -EINVAL;
-
-	root = dev->pdev->bus->self;
-
-	/* we've been informed via and serverworks don't make the cut */
-	if (root->vendor == PCI_VENDOR_ID_VIA ||
-	    root->vendor == PCI_VENDOR_ID_SERVERWORKS)
-		return -EINVAL;
-
-	pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
-	pcie_capability_read_dword(root, PCI_EXP_LNKCAP2, &lnkcap2);
-
-	if (lnkcap2) {	/* PCIe r3.0-compliant */
-		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
-			*mask |= DRM_PCIE_SPEED_25;
-		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
-			*mask |= DRM_PCIE_SPEED_50;
-		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
-			*mask |= DRM_PCIE_SPEED_80;
-	} else {	/* pre-r3.0 */
-		if (lnkcap & PCI_EXP_LNKCAP_SLS_2_5GB)
-			*mask |= DRM_PCIE_SPEED_25;
-		if (lnkcap & PCI_EXP_LNKCAP_SLS_5_0GB)
-			*mask |= (DRM_PCIE_SPEED_25 | DRM_PCIE_SPEED_50);
-	}
-
-	DRM_INFO("probing gen 2 caps for device %x:%x = %x/%x\n", root->vendor, root->device, lnkcap, lnkcap2);
-	return 0;
-}
-EXPORT_SYMBOL(drm_pcie_get_speed_cap_mask);
-
-int drm_pcie_get_max_link_width(struct drm_device *dev, u32 *mlw)
-{
-	struct pci_dev *root;
-	u32 lnkcap;
-
-	*mlw = 0;
-	if (!dev->pdev)
-		return -EINVAL;
-
-	root = dev->pdev->bus->self;
-
-	pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
-
-	*mlw = (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4;
-
-	DRM_INFO("probing mlw for device %x:%x = %x\n", root->vendor, root->device, lnkcap);
-	return 0;
-}
-EXPORT_SYMBOL(drm_pcie_get_max_link_width);
-
 #else
 
 void drm_pci_agp_destroy(struct drm_device *dev) {}
diff --git a/include/drm/drm_pci.h b/include/drm/drm_pci.h
index 674599025d7d..8181e9e7cf1d 100644
--- a/include/drm/drm_pci.h
+++ b/include/drm/drm_pci.h
@@ -58,11 +58,4 @@ static inline int drm_get_pci_dev(struct pci_dev *pdev,
 }
 #endif
 
-#define DRM_PCIE_SPEED_25 1
-#define DRM_PCIE_SPEED_50 2
-#define DRM_PCIE_SPEED_80 4
-
-int drm_pcie_get_speed_cap_mask(struct drm_device *dev, u32 *speed_mask);
-int drm_pcie_get_max_link_width(struct drm_device *dev, u32 *mlw);
-
 #endif /* _DRM_PCI_H_ */
-- 
cgit v1.2.3


From d8269e2cbf908f9d26aa5d3217236227dffd1d89 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Thu, 5 Jul 2018 15:49:42 +0100
Subject: net: ipv6: listify ipv6_rcv() and ip6_rcv_finish()

Essentially the same as the ipv4 equivalents.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h   |   2 +
 net/ipv6/af_inet6.c  |   1 +
 net/ipv6/ip6_input.c | 131 ++++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 118 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 16475c269749..b7843e0b16ee 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -922,6 +922,8 @@ static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6)
 
 int ipv6_rcv(struct sk_buff *skb, struct net_device *dev,
 	     struct packet_type *pt, struct net_device *orig_dev);
+void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
+		   struct net_device *orig_dev);
 
 int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9ed0eae91758..c9535354149f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 static struct packet_type ipv6_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IPV6),
 	.func = ipv6_rcv,
+	.list_func = ipv6_list_rcv,
 };
 
 static int __init ipv6_packet_init(void)
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index f08d34491ece..6242682be876 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,17 +47,11 @@
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
-int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
+				struct sk_buff *skb)
 {
 	void (*edemux)(struct sk_buff *skb);
 
-	/* if ingress device is enslaved to an L3 master device pass the
-	 * skb to its handler for processing
-	 */
-	skb = l3mdev_ip6_rcv(skb);
-	if (!skb)
-		return NET_RX_SUCCESS;
-
 	if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
 		const struct inet6_protocol *ipprot;
 
@@ -67,20 +61,73 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 	}
 	if (!skb_valid_dst(skb))
 		ip6_route_input(skb);
+}
+
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	/* if ingress device is enslaved to an L3 master device pass the
+	 * skb to its handler for processing
+	 */
+	skb = l3mdev_ip6_rcv(skb);
+	if (!skb)
+		return NET_RX_SUCCESS;
+	ip6_rcv_finish_core(net, sk, skb);
 
 	return dst_input(skb);
 }
 
-int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static void ip6_sublist_rcv_finish(struct list_head *head)
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list)
+		dst_input(skb);
+}
+
+static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
+				struct list_head *head)
+{
+	struct dst_entry *curr_dst = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	INIT_LIST_HEAD(&sublist);
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct dst_entry *dst;
+
+		list_del(&skb->list);
+		/* if ingress device is enslaved to an L3 master device pass the
+		 * skb to its handler for processing
+		 */
+		skb = l3mdev_ip6_rcv(skb);
+		if (!skb)
+			continue;
+		ip6_rcv_finish_core(net, sk, skb);
+		dst = skb_dst(skb);
+		if (curr_dst != dst) {
+			/* dispatch old sublist */
+			if (!list_empty(&sublist))
+				ip6_sublist_rcv_finish(&sublist);
+			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
+			curr_dst = dst;
+		}
+		list_add_tail(&skb->list, &sublist);
+	}
+	/* dispatch final sublist */
+	ip6_sublist_rcv_finish(&sublist);
+}
+
+static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
+				    struct net *net)
 {
 	const struct ipv6hdr *hdr;
 	u32 pkt_len;
 	struct inet6_dev *idev;
-	struct net *net = dev_net(skb->dev);
 
 	if (skb->pkt_type == PACKET_OTHERHOST) {
 		kfree_skb(skb);
-		return NET_RX_DROP;
+		return NULL;
 	}
 
 	rcu_read_lock();
@@ -196,7 +243,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		if (ipv6_parse_hopopts(skb) < 0) {
 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 			rcu_read_unlock();
-			return NET_RX_DROP;
+			return NULL;
 		}
 	}
 
@@ -205,15 +252,67 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
-		       net, NULL, skb, dev, NULL,
-		       ip6_rcv_finish);
+	return skb;
 err:
 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 drop:
 	rcu_read_unlock();
 	kfree_skb(skb);
-	return NET_RX_DROP;
+	return NULL;
+}
+
+int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct net *net = dev_net(skb->dev);
+
+	skb = ip6_rcv_core(skb, dev, net);
+	if (skb == NULL)
+		return NET_RX_DROP;
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
+		       ip6_rcv_finish);
+}
+
+static void ip6_sublist_rcv(struct list_head *head, struct net_device *dev,
+			    struct net *net)
+{
+	NF_HOOK_LIST(NFPROTO_IPV6, NF_INET_PRE_ROUTING, net, NULL,
+		     head, dev, NULL, ip6_rcv_finish);
+	ip6_list_rcv_finish(net, NULL, head);
+}
+
+/* Receive a list of IPv6 packets */
+void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
+		   struct net_device *orig_dev)
+{
+	struct net_device *curr_dev = NULL;
+	struct net *curr_net = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	INIT_LIST_HEAD(&sublist);
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct net_device *dev = skb->dev;
+		struct net *net = dev_net(dev);
+
+		list_del(&skb->list);
+		skb = ip6_rcv_core(skb, dev, net);
+		if (skb == NULL)
+			continue;
+
+		if (curr_dev != dev || curr_net != net) {
+			/* dispatch old sublist */
+			if (!list_empty(&sublist))
+				ip6_sublist_rcv(&sublist, curr_dev, curr_net);
+			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
+			curr_dev = dev;
+			curr_net = net;
+		}
+		list_add_tail(&skb->list, &sublist);
+	}
+	/* dispatch final sublist */
+	ip6_sublist_rcv(&sublist, curr_dev, curr_net);
 }
 
 /*
-- 
cgit v1.2.3


From cfdb0c2d095ac5d7f09cac1317b7d0a9e8178134 Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Fri, 29 Jun 2018 12:12:50 +0530
Subject: Bluetooth: Store Resolv list size

When the controller supports the Read LE Resolv List size feature, the
maximum list size are read and now stored.

Before patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 17.979791
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 17.980629
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 17.980786
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 17.981627
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #59 [hci0] 17.981786
> HCI Event: Command Complete (0x0e) plen 12               #60 [hci0] 17.982636
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

After patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 13.338168
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 13.338842
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 13.339029
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 13.339939
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 13.340152
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 13.340952
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #61 [hci0] 13.341180
> HCI Event: Command Complete (0x0e) plen 12               #62 [hci0] 13.341898
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  6 ++++++
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_core.c         |  8 ++++++++
 net/bluetooth/hci_debugfs.c      | 19 +++++++++++++++++++
 net/bluetooth/hci_event.c        | 18 ++++++++++++++++++
 5 files changed, 53 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 1668211297a9..484f24c7a415 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1490,6 +1490,12 @@ struct hci_cp_le_write_def_data_len {
 	__le16	tx_time;
 } __packed;
 
+#define HCI_OP_LE_READ_RESOLV_LIST_SIZE	0x202a
+struct hci_rp_le_read_resolv_list_size {
+	__u8	status;
+	__u8	size;
+} __packed;
+
 #define HCI_OP_LE_READ_MAX_DATA_LEN	0x202f
 struct hci_rp_le_read_max_data_len {
 	__u8	status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 893bbbb5d2fa..409f49bd8338 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -221,6 +221,7 @@ struct hci_dev {
 	__u8		features[HCI_MAX_PAGES][8];
 	__u8		le_features[8];
 	__u8		le_white_list_size;
+	__u8		le_resolv_list_size;
 	__u8		le_states[8];
 	__u8		commands[64];
 	__u8		hci_ver;
@@ -367,6 +368,7 @@ struct hci_dev {
 	struct list_head	identity_resolving_keys;
 	struct list_head	remote_oob_data;
 	struct list_head	le_white_list;
+	struct list_head	le_resolv_list;
 	struct list_head	le_conn_params;
 	struct list_head	pend_le_conns;
 	struct list_head	pend_le_reports;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ee8ef1228263..036e14267d0a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -714,6 +714,12 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 			hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
 		}
 
+		if (hdev->commands[34] & 0x40) {
+			/* Read LE Resolving List Size */
+			hci_req_add(req, HCI_OP_LE_READ_RESOLV_LIST_SIZE,
+				    0, NULL);
+		}
+
 		if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
 			/* Read LE Maximum Data Length */
 			hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL);
@@ -3017,6 +3023,7 @@ struct hci_dev *hci_alloc_dev(void)
 	INIT_LIST_HEAD(&hdev->identity_resolving_keys);
 	INIT_LIST_HEAD(&hdev->remote_oob_data);
 	INIT_LIST_HEAD(&hdev->le_white_list);
+	INIT_LIST_HEAD(&hdev->le_resolv_list);
 	INIT_LIST_HEAD(&hdev->le_conn_params);
 	INIT_LIST_HEAD(&hdev->pend_le_conns);
 	INIT_LIST_HEAD(&hdev->pend_le_reports);
@@ -3218,6 +3225,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	hci_remote_oob_data_clear(hdev);
 	hci_adv_instances_clear(hdev);
 	hci_bdaddr_list_clear(&hdev->le_white_list);
+	hci_bdaddr_list_clear(&hdev->le_resolv_list);
 	hci_conn_params_clear_all(hdev);
 	hci_discovery_filter_clear(hdev);
 	hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 0d8ab5b3c177..51f5b1efc3a5 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -694,6 +694,21 @@ static int white_list_show(struct seq_file *f, void *ptr)
 
 DEFINE_SHOW_ATTRIBUTE(white_list);
 
+static int resolv_list_show(struct seq_file *f, void *ptr)
+{
+	struct hci_dev *hdev = f->private;
+	struct bdaddr_list *b;
+
+	hci_dev_lock(hdev);
+	list_for_each_entry(b, &hdev->le_resolv_list, list)
+		seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(resolv_list);
+
 static int identity_resolving_keys_show(struct seq_file *f, void *ptr)
 {
 	struct hci_dev *hdev = f->private;
@@ -955,6 +970,10 @@ void hci_debugfs_create_le(struct hci_dev *hdev)
 			  &hdev->le_white_list_size);
 	debugfs_create_file("white_list", 0444, hdev->debugfs, hdev,
 			    &white_list_fops);
+	debugfs_create_u8("resolv_list_size", 0444, hdev->debugfs,
+			  &hdev->le_resolv_list_size);
+	debugfs_create_file("resolv_list", 0444, hdev->debugfs, hdev,
+			    &resolv_list_fops);
 	debugfs_create_file("identity_resolving_keys", 0400, hdev->debugfs,
 			    hdev, &identity_resolving_keys_fops);
 	debugfs_create_file("long_term_keys", 0400, hdev->debugfs, hdev,
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 235b5aaab23d..6ee69a79258f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -221,6 +221,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 	hdev->ssp_debug_mode = 0;
 
 	hci_bdaddr_list_clear(&hdev->le_white_list);
+	hci_bdaddr_list_clear(&hdev->le_resolv_list);
 }
 
 static void hci_cc_read_stored_link_key(struct hci_dev *hdev,
@@ -1306,6 +1307,19 @@ static void hci_cc_le_write_def_data_len(struct hci_dev *hdev,
 	hdev->le_def_tx_time = le16_to_cpu(sent->tx_time);
 }
 
+static void hci_cc_le_read_resolv_list_size(struct hci_dev *hdev,
+					   struct sk_buff *skb)
+{
+	struct hci_rp_le_read_resolv_list_size *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size);
+
+	if (rp->status)
+		return;
+
+	hdev->le_resolv_list_size = rp->size;
+}
+
 static void hci_cc_le_read_max_data_len(struct hci_dev *hdev,
 					struct sk_buff *skb)
 {
@@ -3015,6 +3029,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_le_write_def_data_len(hdev, skb);
 		break;
 
+	case HCI_OP_LE_READ_RESOLV_LIST_SIZE:
+		hci_cc_le_read_resolv_list_size(hdev, skb);
+		break;
+
 	case HCI_OP_LE_READ_MAX_DATA_LEN:
 		hci_cc_le_read_max_data_len(hdev, skb);
 		break;
-- 
cgit v1.2.3


From 545f2596b907f0747170c7cb71edc74cecf68c5c Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Fri, 29 Jun 2018 12:13:20 +0530
Subject: Bluetooth: Add HCI command for clear Resolv list

Check for Resolv list supported by controller. So check the supported
commmand first before issuing this command i.e.,HCI_OP_LE_CLEAR_RESOLV_LIST

Before patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 13.338168
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 13.338842
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 13.339029
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 13.339939
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 13.340152
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 13.340952
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #61 [hci0] 13.341180
> HCI Event: Command Complete (0x0e) plen 12               #62 [hci0] 13.341898
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

After patch:
< HCI Command: LE Read White List... (0x08|0x000f) plen 0  #55 [hci0] 28.919131
> HCI Event: Command Complete (0x0e) plen 5                #56 [hci0] 28.920016
      LE Read White List Size (0x08|0x000f) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear White List (0x08|0x0010) plen 0    #57 [hci0] 28.920164
> HCI Event: Command Complete (0x0e) plen 4                #58 [hci0] 28.920873
      LE Clear White List (0x08|0x0010) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Resolving L.. (0x08|0x002a) plen 0  #59 [hci0] 28.921109
> HCI Event: Command Complete (0x0e) plen 5                #60 [hci0] 28.922016
      LE Read Resolving List Size (0x08|0x002a) ncmd 1
        Status: Success (0x00)
        Size: 25
< HCI Command: LE Clear Resolving... (0x08|0x0029) plen 0  #61 [hci0] 28.922166
> HCI Event: Command Complete (0x0e) plen 4                #62 [hci0] 28.922872
      LE Clear Resolving List (0x08|0x0029) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Read Maximum Dat.. (0x08|0x002f) plen 0  #63 [hci0] 28.923117
> HCI Event: Command Complete (0x0e) plen 12               #64 [hci0] 28.924030
      LE Read Maximum Data Length (0x08|0x002f) ncmd 1
        Status: Success (0x00)
        Max TX octets: 251
        Max TX time: 17040
        Max RX octets: 251
        Max RX time: 17040

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  2 ++
 net/bluetooth/hci_core.c    |  5 +++++
 net/bluetooth/hci_event.c   | 17 +++++++++++++++++
 3 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 484f24c7a415..4af1a3a4d9b1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1490,6 +1490,8 @@ struct hci_cp_le_write_def_data_len {
 	__le16	tx_time;
 } __packed;
 
+#define HCI_OP_LE_CLEAR_RESOLV_LIST	0x2029
+
 #define HCI_OP_LE_READ_RESOLV_LIST_SIZE	0x202a
 struct hci_rp_le_read_resolv_list_size {
 	__u8	status;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 036e14267d0a..ce2447d89ce1 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -720,6 +720,11 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 				    0, NULL);
 		}
 
+		if (hdev->commands[34] & 0x20) {
+			/* Clear LE Resolving List */
+			hci_req_add(req, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL);
+		}
+
 		if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
 			/* Read LE Maximum Data Length */
 			hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6ee69a79258f..562e7a854ed6 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1307,6 +1307,19 @@ static void hci_cc_le_write_def_data_len(struct hci_dev *hdev,
 	hdev->le_def_tx_time = le16_to_cpu(sent->tx_time);
 }
 
+static void hci_cc_le_clear_resolv_list(struct hci_dev *hdev,
+				       struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	hci_bdaddr_list_clear(&hdev->le_resolv_list);
+}
+
 static void hci_cc_le_read_resolv_list_size(struct hci_dev *hdev,
 					   struct sk_buff *skb)
 {
@@ -3029,6 +3042,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_le_write_def_data_len(hdev, skb);
 		break;
 
+	case HCI_OP_LE_CLEAR_RESOLV_LIST:
+		hci_cc_le_clear_resolv_list(hdev, skb);
+		break;
+
 	case HCI_OP_LE_READ_RESOLV_LIST_SIZE:
 		hci_cc_le_read_resolv_list_size(hdev, skb);
 		break;
-- 
cgit v1.2.3


From 1b0707a781eeaeeaacb464e18bb3f049b99b496b Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 19:50:38 +0000
Subject: Bluetooth: remove unused bt-nokia-h4p.h header

Nothing in tree use this header which seems a remains of a staging
driver.
This patch remove it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/platform_data/bt-nokia-h4p.h | 38 ------------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 include/linux/platform_data/bt-nokia-h4p.h

(limited to 'include')

diff --git a/include/linux/platform_data/bt-nokia-h4p.h b/include/linux/platform_data/bt-nokia-h4p.h
deleted file mode 100644
index 30d169dfadf3..000000000000
--- a/include/linux/platform_data/bt-nokia-h4p.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * This file is part of Nokia H4P bluetooth driver
- *
- * Copyright (C) 2010 Nokia Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-
-/**
- * struct hci_h4p_platform data - hci_h4p Platform data structure
- */
-struct hci_h4p_platform_data {
-	int chip_type;
-	int bt_sysclk;
-	unsigned int bt_wakeup_gpio;
-	unsigned int host_wakeup_gpio;
-	unsigned int reset_gpio;
-	int reset_gpio_shared;
-	unsigned int uart_irq;
-	phys_addr_t uart_base;
-	const char *uart_iclk;
-	const char *uart_fclk;
-	void (*set_pm_limits)(struct device *dev, bool set);
-};
-- 
cgit v1.2.3


From 0f725561e168485eff7277d683405c05b192f537 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Thu, 7 Jun 2018 09:56:59 -0700
Subject: iommu/vt-d: Add definitions for PFSID

When SRIOV VF device IOTLB is invalidated, we need to provide
the PF source ID such that IOMMU hardware can gauge the depth
of invalidation queue which is shared among VFs. This is needed
when device invalidation throttle (DIT) capability is supported.

This patch adds bit definitions for checking and tracking PFSID.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: "Ashok Raj" <ashok.raj@intel.com>
Cc: "Lu Baolu" <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 1 +
 include/linux/intel-iommu.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 14e4b3722428..7f6194ef48f4 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -422,6 +422,7 @@ struct device_domain_info {
 	struct list_head global; /* link to global list */
 	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */
+	u16 pfsid;		/* SRIOV physical function source ID */
 	u8 pasid_supported:3;
 	u8 pasid_enabled:1;
 	u8 pri_supported:1;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1df940196ab2..3b1c37155572 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -114,6 +114,7 @@
  * Extended Capability Register
  */
 
+#define ecap_dit(e)		((e >> 41) & 0x1)
 #define ecap_pasid(e)		((e >> 40) & 0x1)
 #define ecap_pss(e)		((e >> 35) & 0x1f)
 #define ecap_eafs(e)		((e >> 34) & 0x1)
@@ -283,6 +284,7 @@ enum {
 #define QI_DEV_IOTLB_SID(sid)	((u64)((sid) & 0xffff) << 32)
 #define QI_DEV_IOTLB_QDEP(qdep)	(((qdep) & 0x1f) << 16)
 #define QI_DEV_IOTLB_ADDR(addr)	((u64)(addr) & VTD_PAGE_MASK)
+#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
 #define QI_DEV_IOTLB_SIZE	1
 #define QI_DEV_IOTLB_MAX_INVS	32
 
@@ -307,6 +309,7 @@ enum {
 #define QI_DEV_EIOTLB_PASID(p)	(((u64)p) << 32)
 #define QI_DEV_EIOTLB_SID(sid)	((u64)((sid) & 0xffff) << 16)
 #define QI_DEV_EIOTLB_QDEP(qd)	((u64)((qd) & 0x1f) << 4)
+#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
 #define QI_DEV_EIOTLB_MAX_INVS	32
 
 #define QI_PGRP_IDX(idx)	(((u64)(idx)) << 55)
-- 
cgit v1.2.3


From 1c48db44924298ad0cb5a6386b88017539be8822 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Thu, 7 Jun 2018 09:57:00 -0700
Subject: iommu/vt-d: Fix dev iotlb pfsid use

PFSID should be used in the invalidation descriptor for flushing
device IOTLBs on SRIOV VFs.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: "Ashok Raj" <ashok.raj@intel.com>
Cc: "Lu Baolu" <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c        |  6 +++---
 drivers/iommu/intel-iommu.c | 17 ++++++++++++++++-
 include/linux/intel-iommu.h |  5 ++---
 3 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 75456b5aa825..d9c748b6f9e4 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1339,8 +1339,8 @@ void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 	qi_submit_sync(&desc, iommu);
 }
 
-void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
-			u64 addr, unsigned mask)
+void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+			u16 qdep, u64 addr, unsigned mask)
 {
 	struct qi_desc desc;
 
@@ -1355,7 +1355,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
 		qdep = 0;
 
 	desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
-		   QI_DIOTLB_TYPE;
+		   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
 
 	qi_submit_sync(&desc, iommu);
 }
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 7f6194ef48f4..497ef94c5a8c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1475,6 +1475,20 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info)
 		return;
 
 	pdev = to_pci_dev(info->dev);
+	/* For IOMMU that supports device IOTLB throttling (DIT), we assign
+	 * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
+	 * queue depth at PF level. If DIT is not set, PFSID will be treated as
+	 * reserved, which should be set to 0.
+	 */
+	if (!ecap_dit(info->iommu->ecap))
+		info->pfsid = 0;
+	else {
+		struct pci_dev *pf_pdev;
+
+		/* pdev will be returned if device is not a vf */
+		pf_pdev = pci_physfn(pdev);
+		info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
+	}
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 	/* The PCIe spec, in its wisdom, declares that the behaviour of
@@ -1540,7 +1554,8 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
 
 		sid = info->bus << 8 | info->devfn;
 		qdep = info->ats_qdep;
-		qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
+		qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
+				qdep, addr, mask);
 	}
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3b1c37155572..6692b40ca814 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -455,9 +455,8 @@ extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
 			     u8 fm, u64 type);
 extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 			  unsigned int size_order, u64 type);
-extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
-			       u64 addr, unsigned mask);
-
+extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+			u16 qdep, u64 addr, unsigned mask);
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
-- 
cgit v1.2.3


From bad614b24293ae463e74d2465685f0e4e229baca Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 12 Jun 2018 16:41:21 -0500
Subject: iommu: Enable debugfs exposure of IOMMU driver internals

Provide base enablement for using debugfs to expose internal data of an
IOMMU driver. When called, create the /sys/kernel/debug/iommu directory.

Emit a strong warning at boot time to indicate that this feature is
enabled.

This function is called from iommu_init, and creates the initial DebugFS
directory. Drivers may then call iommu_debugfs_new_driver_dir() to
instantiate a device-specific directory to expose internal data.
It will return a pointer to the new dentry structure created in
/sys/kernel/debug/iommu, or NULL in the event of a failure.

Since the IOMMU driver can not be removed from the running system, there
is no need for an "off" function.

Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig         | 10 +++++++
 drivers/iommu/Makefile        |  1 +
 drivers/iommu/iommu-debugfs.c | 66 +++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/iommu.c         |  2 ++
 include/linux/iommu.h         |  7 +++++
 5 files changed, 86 insertions(+)
 create mode 100644 drivers/iommu/iommu-debugfs.c

(limited to 'include')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index e055d228bfb9..ab9181d8af3b 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -60,6 +60,16 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
 
 endmenu
 
+config IOMMU_DEBUGFS
+	bool "Export IOMMU internals in DebugFS"
+	depends on DEBUG_FS
+	help
+	  Allows exposure of IOMMU device internals. This option enables
+	  the use of debugfs by IOMMU drivers as required. Devices can,
+	  at initialization time, cause the IOMMU code to create a top-level
+	  debug/iommu directory, and then populate a subdirectory with
+	  entries as required.
+
 config IOMMU_IOVA
 	tristate
 
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 1fb695854809..74cfbc392862 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o
 obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
diff --git a/drivers/iommu/iommu-debugfs.c b/drivers/iommu/iommu-debugfs.c
new file mode 100644
index 000000000000..3b1bf88fd1b0
--- /dev/null
+++ b/drivers/iommu/iommu-debugfs.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IOMMU debugfs core infrastructure
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/debugfs.h>
+
+struct dentry *iommu_debugfs_dir;
+
+/**
+ * iommu_debugfs_setup - create the top-level iommu directory in debugfs
+ *
+ * Provide base enablement for using debugfs to expose internal data of an
+ * IOMMU driver. When called, this function creates the
+ * /sys/kernel/debug/iommu directory.
+ *
+ * Emit a strong warning at boot time to indicate that this feature is
+ * enabled.
+ *
+ * This function is called from iommu_init; drivers may then call
+ * iommu_debugfs_new_driver_dir() to instantiate a vendor-specific
+ * directory to be used to expose internal data.
+ */
+void iommu_debugfs_setup(void)
+{
+	if (!iommu_debugfs_dir) {
+		iommu_debugfs_dir = debugfs_create_dir("iommu", NULL);
+		pr_warn("\n");
+		pr_warn("*************************************************************\n");
+		pr_warn("**     NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE    **\n");
+		pr_warn("**                                                         **\n");
+		pr_warn("**  IOMMU DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL  **\n");
+		pr_warn("**                                                         **\n");
+		pr_warn("** This means that this kernel is built to expose internal **\n");
+		pr_warn("** IOMMU data structures, which may compromise security on **\n");
+		pr_warn("** your system.                                            **\n");
+		pr_warn("**                                                         **\n");
+		pr_warn("** If you see this message and you are not debugging the   **\n");
+		pr_warn("** kernel, report this immediately to your vendor!         **\n");
+		pr_warn("**                                                         **\n");
+		pr_warn("**     NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE    **\n");
+		pr_warn("*************************************************************\n");
+	}
+}
+
+/**
+ * iommu_debugfs_new_driver_dir - create a vendor directory under debugfs/iommu
+ * @vendor: name of the vendor-specific subdirectory to create
+ *
+ * This function is called by an IOMMU driver to create the top-level debugfs
+ * directory for that driver.
+ *
+ * Return: upon success, a pointer to the dentry for the new directory.
+ *         NULL in case of failure.
+ */
+struct dentry *iommu_debugfs_new_driver_dir(const char *vendor)
+{
+	return debugfs_create_dir(vendor, iommu_debugfs_dir);
+}
+EXPORT_SYMBOL_GPL(iommu_debugfs_new_driver_dir);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 63b37563db7e..d227b864a109 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1748,6 +1748,8 @@ static int __init iommu_init(void)
 					       NULL, kernel_kobj);
 	BUG_ON(!iommu_group_kset);
 
+	iommu_debugfs_setup();
+
 	return 0;
 }
 core_initcall(iommu_init);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 19938ee6eb31..7447b0b0579a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -698,4 +698,11 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 
 #endif /* CONFIG_IOMMU_API */
 
+#ifdef CONFIG_IOMMU_DEBUGFS
+extern	struct dentry *iommu_debugfs_dir;
+void iommu_debugfs_setup(void);
+#else
+static inline void iommu_debugfs_setup(void) {}
+#endif
+
 #endif /* __LINUX_IOMMU_H */
-- 
cgit v1.2.3


From 5711b4e89319c2912f20b2a4f371c1525fc9551d Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Thu, 5 Jul 2018 12:01:53 +0200
Subject: netfilter: nf_tproxy: fix possible non-linear access to transport
 header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes a silent out-of-bound read possibility that was present
because of the misuse of this function.

Mostly it was called with a struct udphdr *hp which had only the udphdr
part linearized by the skb_header_pointer, however
nf_tproxy_get_sock_v{4,6} uses it as a tcphdr pointer, so some reads for
tcp specific attributes may be invalid.

Fixes: a583636a83ea ("inet: refactor inet[6]_lookup functions to take skb")
Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tproxy.h   |  4 ++--
 net/ipv4/netfilter/nf_tproxy_ipv4.c | 18 ++++++++++++------
 net/ipv6/netfilter/nf_tproxy_ipv6.c | 18 ++++++++++++------
 net/netfilter/xt_TPROXY.c           |  8 ++++----
 4 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 9754a50ecde9..4cc64c8446eb 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -64,7 +64,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
  * belonging to established connections going through that one.
  */
 struct sock *
-nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
 		      const u8 protocol,
 		      const __be32 saddr, const __be32 daddr,
 		      const __be16 sport, const __be16 dport,
@@ -103,7 +103,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
 			    struct sock *sk);
 
 struct sock *
-nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
 		      const u8 protocol,
 		      const struct in6_addr *saddr, const struct in6_addr *daddr,
 		      const __be16 sport, const __be16 dport,
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 805e83ec3ad9..164714104965 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -37,7 +37,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
 		 * to a listener socket if there's one */
 		struct sock *sk2;
 
-		sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+		sk2 = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
 					    iph->saddr, laddr ? laddr : iph->daddr,
 					    hp->source, lport ? lport : hp->dest,
 					    skb->dev, NF_TPROXY_LOOKUP_LISTENER);
@@ -71,7 +71,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
 EXPORT_SYMBOL_GPL(nf_tproxy_laddr4);
 
 struct sock *
-nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
 		      const u8 protocol,
 		      const __be32 saddr, const __be32 daddr,
 		      const __be16 sport, const __be16 dport,
@@ -79,16 +79,21 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
 		      const enum nf_tproxy_lookup_t lookup_type)
 {
 	struct sock *sk;
-	struct tcphdr *tcph;
 
 	switch (protocol) {
-	case IPPROTO_TCP:
+	case IPPROTO_TCP: {
+		struct tcphdr _hdr, *hp;
+
+		hp = skb_header_pointer(skb, ip_hdrlen(skb),
+					sizeof(struct tcphdr), &_hdr);
+		if (hp == NULL)
+			return NULL;
+
 		switch (lookup_type) {
 		case NF_TPROXY_LOOKUP_LISTENER:
-			tcph = hp;
 			sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
 						    ip_hdrlen(skb) +
-						      __tcp_hdrlen(tcph),
+						      __tcp_hdrlen(hp),
 						    saddr, sport,
 						    daddr, dport,
 						    in->ifindex, 0);
@@ -110,6 +115,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
 			BUG();
 		}
 		break;
+		}
 	case IPPROTO_UDP:
 		sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
 				     in->ifindex);
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index bf1d6c421e3b..5dfd33af6451 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -55,7 +55,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
 		 * to a listener socket if there's one */
 		struct sock *sk2;
 
-		sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto,
+		sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, tproto,
 					    &iph->saddr,
 					    nf_tproxy_laddr6(skb, laddr, &iph->daddr),
 					    hp->source,
@@ -72,7 +72,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
 EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6);
 
 struct sock *
-nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
 		      const u8 protocol,
 		      const struct in6_addr *saddr, const struct in6_addr *daddr,
 		      const __be16 sport, const __be16 dport,
@@ -80,15 +80,20 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 		      const enum nf_tproxy_lookup_t lookup_type)
 {
 	struct sock *sk;
-	struct tcphdr *tcph;
 
 	switch (protocol) {
-	case IPPROTO_TCP:
+	case IPPROTO_TCP: {
+		struct tcphdr _hdr, *hp;
+
+		hp = skb_header_pointer(skb, thoff,
+					sizeof(struct tcphdr), &_hdr);
+		if (hp == NULL)
+			return NULL;
+
 		switch (lookup_type) {
 		case NF_TPROXY_LOOKUP_LISTENER:
-			tcph = hp;
 			sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
-						   thoff + __tcp_hdrlen(tcph),
+						   thoff + __tcp_hdrlen(hp),
 						   saddr, sport,
 						   daddr, ntohs(dport),
 						   in->ifindex, 0);
@@ -110,6 +115,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
 			BUG();
 		}
 		break;
+		}
 	case IPPROTO_UDP:
 		sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
 				     in->ifindex);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 58fce4e749a9..d76550a8b642 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -61,7 +61,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
 	 * addresses, this happens if the redirect already happened
 	 * and the current packet belongs to an already established
 	 * connection */
-	sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+	sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
 				   iph->saddr, iph->daddr,
 				   hp->source, hp->dest,
 				   skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
@@ -77,7 +77,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
 	else if (!sk)
 		/* no, there's no established connection, check if
 		 * there's a listener on the redirected addr/port */
-		sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+		sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
 					   iph->saddr, laddr,
 					   hp->source, lport,
 					   skb->dev, NF_TPROXY_LOOKUP_LISTENER);
@@ -150,7 +150,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	 * addresses, this happens if the redirect already happened
 	 * and the current packet belongs to an already established
 	 * connection */
-	sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
+	sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto,
 				   &iph->saddr, &iph->daddr,
 				   hp->source, hp->dest,
 				   xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
@@ -171,7 +171,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	else if (!sk)
 		/* no there's no established connection, check if
 		 * there's a listener on the redirected addr/port */
-		sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
+		sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff,
 					   tproto, &iph->saddr, laddr,
 					   hp->source, lport,
 					   xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
-- 
cgit v1.2.3


From a948f713842ad5c23f125efc61dee6951893219c Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 3 Jul 2018 15:05:48 -0500
Subject: nl80211/mac80211: allow non-linear skb in rx_control_port

The current implementation of cfg80211_rx_control_port assumed that the
caller could provide a contiguous region of memory for the control port
frame to be sent up to userspace.  Unfortunately, many drivers produce
non-linear skbs, especially for data frames.  This resulted in userspace
getting notified of control port frames with correct metadata (from
address, port, etc) yet garbage / nonsense contents, resulting in bad
handshakes, disconnections, etc.

mac80211 linearizes skbs containing management frames.  But it didn't
seem worthwhile to do this for control port frames.  Thus the signature
of cfg80211_rx_control_port was changed to take the skb directly.
nl80211 then takes care of obtaining control port frame data directly
from the (linear | non-linear) skb.

The caller is still responsible for freeing the skb,
cfg80211_rx_control_port does not take ownership of it.

Fixes: 6a671a50f819 ("nl80211: Add CMD_CONTROL_PORT_FRAME API")
Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[fix some kernel-doc formatting, add fixes tag]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 12 ++++++------
 net/mac80211/rx.c      |  5 +----
 net/wireless/nl80211.c | 24 +++++++++++++++---------
 net/wireless/trace.h   | 18 ++++++++++--------
 4 files changed, 32 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5fbfe61f41c6..1beb3ead0385 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5835,10 +5835,11 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 /**
  * cfg80211_rx_control_port - notification about a received control port frame
  * @dev: The device the frame matched to
- * @buf: control port frame
- * @len: length of the frame data
- * @addr: The peer from which the frame was received
- * @proto: frame protocol, typically PAE or Pre-authentication
+ * @skb: The skbuf with the control port frame.  It is assumed that the skbuf
+ *	is 802.3 formatted (with 802.3 header).  The skb can be non-linear.
+ *	This function does not take ownership of the skb, so the caller is
+ *	responsible for any cleanup.  The caller must also ensure that
+ *	skb->protocol is set appropriately.
  * @unencrypted: Whether the frame was received unencrypted
  *
  * This function is used to inform userspace about a received control port
@@ -5851,8 +5852,7 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
  * Return: %true if the frame was passed to userspace
  */
 bool cfg80211_rx_control_port(struct net_device *dev,
-			      const u8 *buf, size_t len,
-			      const u8 *addr, u16 proto, bool unencrypted);
+			      struct sk_buff *skb, bool unencrypted);
 
 /**
  * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0a38cc1cbebc..932985ca4e66 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2254,11 +2254,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
 		     sdata->control_port_over_nl80211)) {
 		struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 		bool noencrypt = status->flag & RX_FLAG_DECRYPTED;
-		struct ethhdr *ehdr = eth_hdr(skb);
 
-		cfg80211_rx_control_port(dev, skb->data, skb->len,
-					 ehdr->h_source,
-					 be16_to_cpu(skb->protocol), noencrypt);
+		cfg80211_rx_control_port(dev, skb, noencrypt);
 		dev_kfree_skb(skb);
 	} else {
 		/* deliver to local stack */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4eece06be1e7..b6c700572755 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14923,20 +14923,24 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
 
 static int __nl80211_rx_control_port(struct net_device *dev,
-				     const u8 *buf, size_t len,
-				     const u8 *addr, u16 proto,
+				     struct sk_buff *skb,
 				     bool unencrypted, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct ethhdr *ehdr = eth_hdr(skb);
+	const u8 *addr = ehdr->h_source;
+	u16 proto = be16_to_cpu(skb->protocol);
 	struct sk_buff *msg;
 	void *hdr;
+	struct nlattr *frame;
+
 	u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid);
 
 	if (!nlportid)
 		return -ENOENT;
 
-	msg = nlmsg_new(100 + len, gfp);
+	msg = nlmsg_new(100 + skb->len, gfp);
 	if (!msg)
 		return -ENOMEM;
 
@@ -14950,13 +14954,17 @@ static int __nl80211_rx_control_port(struct net_device *dev,
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
 	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
 			      NL80211_ATTR_PAD) ||
-	    nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
 	    nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) ||
 	    (unencrypted && nla_put_flag(msg,
 					 NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT)))
 		goto nla_put_failure;
 
+	frame = nla_reserve(msg, NL80211_ATTR_FRAME, skb->len);
+	if (!frame)
+		goto nla_put_failure;
+
+	skb_copy_bits(skb, 0, nla_data(frame), skb->len);
 	genlmsg_end(msg, hdr);
 
 	return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
@@ -14967,14 +14975,12 @@ static int __nl80211_rx_control_port(struct net_device *dev,
 }
 
 bool cfg80211_rx_control_port(struct net_device *dev,
-			      const u8 *buf, size_t len,
-			      const u8 *addr, u16 proto, bool unencrypted)
+			      struct sk_buff *skb, bool unencrypted)
 {
 	int ret;
 
-	trace_cfg80211_rx_control_port(dev, buf, len, addr, proto, unencrypted);
-	ret = __nl80211_rx_control_port(dev, buf, len, addr, proto,
-					unencrypted, GFP_ATOMIC);
+	trace_cfg80211_rx_control_port(dev, skb, unencrypted);
+	ret = __nl80211_rx_control_port(dev, skb, unencrypted, GFP_ATOMIC);
 	trace_cfg80211_return_bool(ret == 0);
 	return ret == 0;
 }
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 2b417a2fe63f..7c73510b161f 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2627,23 +2627,25 @@ TRACE_EVENT(cfg80211_mgmt_tx_status,
 );
 
 TRACE_EVENT(cfg80211_rx_control_port,
-	TP_PROTO(struct net_device *netdev, const u8 *buf, size_t len,
-		 const u8 *addr, u16 proto, bool unencrypted),
-	TP_ARGS(netdev, buf, len, addr, proto, unencrypted),
+	TP_PROTO(struct net_device *netdev, struct sk_buff *skb,
+		 bool unencrypted),
+	TP_ARGS(netdev, skb, unencrypted),
 	TP_STRUCT__entry(
 		NETDEV_ENTRY
-		MAC_ENTRY(addr)
+		__field(int, len)
+		MAC_ENTRY(from)
 		__field(u16, proto)
 		__field(bool, unencrypted)
 	),
 	TP_fast_assign(
 		NETDEV_ASSIGN;
-		MAC_ASSIGN(addr, addr);
-		__entry->proto = proto;
+		__entry->len = skb->len;
+		MAC_ASSIGN(from, eth_hdr(skb)->h_source);
+		__entry->proto = be16_to_cpu(skb->protocol);
 		__entry->unencrypted = unencrypted;
 	),
-	TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT " proto: 0x%x, unencrypted: %s",
-		  NETDEV_PR_ARG, MAC_PR_ARG(addr),
+	TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s",
+		  NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from),
 		  __entry->proto, BOOL_TO_STR(__entry->unencrypted))
 );
 
-- 
cgit v1.2.3


From 818b7587b4d34e989ea6c042eeb8d50ffa5be13e Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Wed, 27 Jun 2018 10:31:20 -0500
Subject: x86: irq_remapping: Move irq remapping mode enum

The enum is currently defined in Intel-specific DMAR header file,
but it is also used by APIC common code. Therefore, move it to
a more appropriate interrupt-remapping common header file.
This will also be used by subsequent patches.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/include/asm/irq_remapping.h | 5 +++++
 include/linux/dmar.h                 | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 023b4a9fc846..5f26962eff42 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -33,6 +33,11 @@ enum irq_remap_cap {
 	IRQ_POSTING_CAP = 0,
 };
 
+enum {
+	IRQ_REMAP_XAPIC_MODE,
+	IRQ_REMAP_X2APIC_MODE,
+};
+
 struct vcpu_data {
 	u64 pi_desc_addr;	/* Physical address of PI Descriptor */
 	u32 vector;		/* Guest vector of the interrupt */
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e2433bc50210..843a41ba7e28 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -265,11 +265,6 @@ static inline void dmar_copy_shared_irte(struct irte *dst, struct irte *src)
 #define PDA_LOW_BIT    26
 #define PDA_HIGH_BIT   32
 
-enum {
-	IRQ_REMAP_XAPIC_MODE,
-	IRQ_REMAP_X2APIC_MODE,
-};
-
 /* Can't use the common MSI interrupt functions
  * since DMAR is not a pci device
  */
-- 
cgit v1.2.3


From d64c5cf8e89d124355924c513a42b16f0d7d3a03 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Date: Fri, 6 Jul 2018 13:50:36 +0100
Subject: ALSA: pcm: Allow drivers to set R/W wait time.

Currently ALSA core blocks userspace for about 10 seconds for PCM R/W IO.
This needs to be configurable for modern hardware like DSPs where no
pointer update in milliseconds can indicate terminal DSP errors.

Add a substream variable to set the wait time in ms. This allows userspace
and drivers to recover more quickly from terminal DSP errors.

Signed-off-by: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h  |  1 +
 sound/core/pcm_lib.c | 17 ++++++++++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index e054c583d3b3..fcdf358a25f0 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -462,6 +462,7 @@ struct snd_pcm_substream {
         /* -- timer section -- */
 	struct snd_timer *timer;		/* timer */
 	unsigned timer_running: 1;	/* time is running */
+	long wait_time;	/* time in ms for R/W to wait for avail */
 	/* -- next substream -- */
 	struct snd_pcm_substream *next;
 	/* -- linked substreams -- */
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index c1d2e8e1fc6b..5736860f325b 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1833,12 +1833,19 @@ static int wait_for_avail(struct snd_pcm_substream *substream,
 	if (runtime->no_period_wakeup)
 		wait_time = MAX_SCHEDULE_TIMEOUT;
 	else {
-		wait_time = 10;
-		if (runtime->rate) {
-			long t = runtime->period_size * 2 / runtime->rate;
-			wait_time = max(t, wait_time);
+		/* use wait time from substream if available */
+		if (substream->wait_time) {
+			wait_time = substream->wait_time;
+		} else {
+			wait_time = 10;
+
+			if (runtime->rate) {
+				long t = runtime->period_size * 2 /
+					 runtime->rate;
+				wait_time = max(t, wait_time);
+			}
+			wait_time = msecs_to_jiffies(wait_time * 1000);
 		}
-		wait_time = msecs_to_jiffies(wait_time * 1000);
 	}
 
 	for (;;) {
-- 
cgit v1.2.3


From 3ffa6583e24e1ad1abab836d24bfc9d2308074e5 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Mon, 25 Jun 2018 09:51:48 +0200
Subject: power: remove possible deadlock when unregistering power_supply

If a device gets removed right after having registered a power_supply node,
we might enter in a deadlock between the remove call (that has a lock on
the parent device) and the deferred register work.

Allow the deferred register work to exit without taking the lock when
we are in the remove state.

Stack trace on a Ubuntu 16.04:

[16072.109121] INFO: task kworker/u16:2:1180 blocked for more than 120 seconds.
[16072.109127]       Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu
[16072.109129] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[16072.109132] kworker/u16:2   D    0  1180      2 0x80000000
[16072.109142] Workqueue: events_power_efficient power_supply_deferred_register_work
[16072.109144] Call Trace:
[16072.109152]  __schedule+0x3d6/0x8b0
[16072.109155]  schedule+0x36/0x80
[16072.109158]  schedule_preempt_disabled+0xe/0x10
[16072.109161]  __mutex_lock.isra.2+0x2ab/0x4e0
[16072.109166]  __mutex_lock_slowpath+0x13/0x20
[16072.109168]  ? __mutex_lock_slowpath+0x13/0x20
[16072.109171]  mutex_lock+0x2f/0x40
[16072.109174]  power_supply_deferred_register_work+0x2b/0x50
[16072.109179]  process_one_work+0x15b/0x410
[16072.109182]  worker_thread+0x4b/0x460
[16072.109186]  kthread+0x10c/0x140
[16072.109189]  ? process_one_work+0x410/0x410
[16072.109191]  ? kthread_create_on_node+0x70/0x70
[16072.109194]  ret_from_fork+0x35/0x40
[16072.109199] INFO: task test:2257 blocked for more than 120 seconds.
[16072.109202]       Not tainted 4.13.0-41-generic #46~16.04.1-Ubuntu
[16072.109204] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[16072.109206] test            D    0  2257   2256 0x00000004
[16072.109208] Call Trace:
[16072.109211]  __schedule+0x3d6/0x8b0
[16072.109215]  schedule+0x36/0x80
[16072.109218]  schedule_timeout+0x1f3/0x360
[16072.109221]  ? check_preempt_curr+0x5a/0xa0
[16072.109224]  ? ttwu_do_wakeup+0x1e/0x150
[16072.109227]  wait_for_completion+0xb4/0x140
[16072.109230]  ? wait_for_completion+0xb4/0x140
[16072.109233]  ? wake_up_q+0x70/0x70
[16072.109236]  flush_work+0x129/0x1e0
[16072.109240]  ? worker_detach_from_pool+0xb0/0xb0
[16072.109243]  __cancel_work_timer+0x10f/0x190
[16072.109247]  ? device_del+0x264/0x310
[16072.109250]  ? __wake_up+0x44/0x50
[16072.109253]  cancel_delayed_work_sync+0x13/0x20
[16072.109257]  power_supply_unregister+0x37/0xb0
[16072.109260]  devm_power_supply_release+0x11/0x20
[16072.109263]  release_nodes+0x110/0x200
[16072.109266]  devres_release_group+0x7c/0xb0
[16072.109274]  wacom_remove+0xc2/0x110 [wacom]
[16072.109279]  hid_device_remove+0x6e/0xd0 [hid]
[16072.109284]  device_release_driver_internal+0x158/0x210
[16072.109288]  device_release_driver+0x12/0x20
[16072.109291]  bus_remove_device+0xec/0x160
[16072.109293]  device_del+0x1de/0x310
[16072.109298]  hid_destroy_device+0x27/0x60 [hid]
[16072.109303]  usbhid_disconnect+0x51/0x70 [usbhid]
[16072.109308]  usb_unbind_interface+0x77/0x270
[16072.109311]  device_release_driver_internal+0x158/0x210
[16072.109315]  device_release_driver+0x12/0x20
[16072.109318]  usb_driver_release_interface+0x77/0x80
[16072.109321]  proc_ioctl+0x20f/0x250
[16072.109325]  usbdev_do_ioctl+0x57f/0x1140
[16072.109327]  ? __wake_up+0x44/0x50
[16072.109331]  usbdev_ioctl+0xe/0x20
[16072.109336]  do_vfs_ioctl+0xa4/0x600
[16072.109339]  ? vfs_write+0x15a/0x1b0
[16072.109343]  SyS_ioctl+0x79/0x90
[16072.109347]  entry_SYSCALL_64_fastpath+0x24/0xab
[16072.109349] RIP: 0033:0x7f20da807f47
[16072.109351] RSP: 002b:00007ffc422ae398 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[16072.109353] RAX: ffffffffffffffda RBX: 00000000010b8560 RCX: 00007f20da807f47
[16072.109355] RDX: 00007ffc422ae3a0 RSI: 00000000c0105512 RDI: 0000000000000009
[16072.109356] RBP: 0000000000000000 R08: 00007ffc422ae3e0 R09: 0000000000000010
[16072.109357] R10: 00000000000000a6 R11: 0000000000000246 R12: 0000000000000000
[16072.109359] R13: 00000000010b8560 R14: 00007ffc422ae2e0 R15: 0000000000000000

Reported-and-tested-by: Richard Hughes <rhughes@redhat.com>
Tested-by: Aaron Skomra <Aaron.Skomra@wacom.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Fixes: 7f1a57fdd6cb ("power_supply: Fix possible NULL pointer dereference on early uevent")
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/power/supply/power_supply_core.c | 11 +++++++++--
 include/linux/power_supply.h             |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index d21f478741c1..e85361878450 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/err.h>
@@ -140,8 +141,13 @@ static void power_supply_deferred_register_work(struct work_struct *work)
 	struct power_supply *psy = container_of(work, struct power_supply,
 						deferred_register_work.work);
 
-	if (psy->dev.parent)
-		mutex_lock(&psy->dev.parent->mutex);
+	if (psy->dev.parent) {
+		while (!mutex_trylock(&psy->dev.parent->mutex)) {
+			if (psy->removing)
+				return;
+			msleep(10);
+		}
+	}
 
 	power_supply_changed(psy);
 
@@ -1082,6 +1088,7 @@ EXPORT_SYMBOL_GPL(devm_power_supply_register_no_ws);
 void power_supply_unregister(struct power_supply *psy)
 {
 	WARN_ON(atomic_dec_return(&psy->use_cnt));
+	psy->removing = true;
 	cancel_work_sync(&psy->changed_work);
 	cancel_delayed_work_sync(&psy->deferred_register_work);
 	sysfs_remove_link(&psy->dev.kobj, "powers");
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index b21c4bd96b84..f80769175c56 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -269,6 +269,7 @@ struct power_supply {
 	spinlock_t changed_lock;
 	bool changed;
 	bool initialized;
+	bool removing;
 	atomic_t use_cnt;
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd;
-- 
cgit v1.2.3


From 7efe25a70c37395b41b034335345d6855926c2a6 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 19:44:56 +0000
Subject: iommu/shmobile: Remove unused include/linux/platform_data/sh_ipmmu.h
 header

include/linux/platform_data/sh_ipmmu.h is unused since commit

	ae50dc4874c5 ("iommu/shmobile: Remove unused Renesas IPMMU/IPMMUI")

Let's remove it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/platform_data/sh_ipmmu.h | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 include/linux/platform_data/sh_ipmmu.h

(limited to 'include')

diff --git a/include/linux/platform_data/sh_ipmmu.h b/include/linux/platform_data/sh_ipmmu.h
deleted file mode 100644
index 39f7405cdac5..000000000000
--- a/include/linux/platform_data/sh_ipmmu.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* sh_ipmmu.h
- *
- * Copyright (C) 2012  Hideki EIRAKU
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- */
-
-#ifndef __SH_IPMMU_H__
-#define __SH_IPMMU_H__
-
-struct shmobile_ipmmu_platform_data {
-	const char * const *dev_names;
-	unsigned int num_dev_names;
-};
-
-#endif /* __SH_IPMMU_H__ */
-- 
cgit v1.2.3


From 663336ee2628096df0ce2b546b148cb74b5249fe Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 9 May 2018 08:15:46 -0700
Subject: device: Add #define dev_fmt similar to #define pr_fmt

Add a prefixing macro to dev_<level> uses similar to the pr_fmt
prefixing macro used in pr_<level> calls.

This can help avoid some string duplication in dev_<level> uses.

The default, like pr_fmt, is an empty #define dev_fmt(fmt) fmt

Rename the existing dev_<level> functions to _dev_<level> and
introduce #define dev_<level> _dev_<level> macros that use the
new #define dev_fmt

Miscellanea:

o Consistently use #defines with fmt, ... and ##__VA_ARGS__
o Remove unnecessary externs

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    |  12 +++---
 include/linux/device.h | 103 ++++++++++++++++++++++++++++---------------------
 2 files changed, 64 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index df3e1a44707a..ceb8ce90aebb 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3002,12 +3002,12 @@ void func(const struct device *dev, const char *fmt, ...)	\
 }								\
 EXPORT_SYMBOL(func);
 
-define_dev_printk_level(dev_emerg, KERN_EMERG);
-define_dev_printk_level(dev_alert, KERN_ALERT);
-define_dev_printk_level(dev_crit, KERN_CRIT);
-define_dev_printk_level(dev_err, KERN_ERR);
-define_dev_printk_level(dev_warn, KERN_WARNING);
-define_dev_printk_level(dev_notice, KERN_NOTICE);
+define_dev_printk_level(_dev_emerg, KERN_EMERG);
+define_dev_printk_level(_dev_alert, KERN_ALERT);
+define_dev_printk_level(_dev_crit, KERN_CRIT);
+define_dev_printk_level(_dev_err, KERN_ERR);
+define_dev_printk_level(_dev_warn, KERN_WARNING);
+define_dev_printk_level(_dev_notice, KERN_NOTICE);
 define_dev_printk_level(_dev_info, KERN_INFO);
 
 #endif
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..2eaa9ea13c09 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1317,30 +1317,34 @@ struct device_link *device_link_add(struct device *consumer,
 				    struct device *supplier, u32 flags);
 void device_link_del(struct device_link *link);
 
+#ifndef dev_fmt
+#define dev_fmt(fmt) fmt
+#endif
+
 #ifdef CONFIG_PRINTK
 
-extern __printf(3, 0)
+__printf(3, 0)
 int dev_vprintk_emit(int level, const struct device *dev,
 		     const char *fmt, va_list args);
-extern __printf(3, 4)
+__printf(3, 4)
 int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...);
 
-extern __printf(3, 4)
+__printf(3, 4)
 void dev_printk(const char *level, const struct device *dev,
 		const char *fmt, ...);
-extern __printf(2, 3)
-void dev_emerg(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_alert(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_crit(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_err(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_warn(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
-void dev_notice(const struct device *dev, const char *fmt, ...);
-extern __printf(2, 3)
+__printf(2, 3)
+void _dev_emerg(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_alert(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_crit(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_err(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_warn(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
+void _dev_notice(const struct device *dev, const char *fmt, ...);
+__printf(2, 3)
 void _dev_info(const struct device *dev, const char *fmt, ...);
 
 #else
@@ -1358,26 +1362,26 @@ static inline void __dev_printk(const char *level, const struct device *dev,
 {}
 static inline __printf(3, 4)
 void dev_printk(const char *level, const struct device *dev,
-		const char *fmt, ...)
+		 const char *fmt, ...)
 {}
 
 static inline __printf(2, 3)
-void dev_emerg(const struct device *dev, const char *fmt, ...)
+void _dev_emerg(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_crit(const struct device *dev, const char *fmt, ...)
+void _dev_crit(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_alert(const struct device *dev, const char *fmt, ...)
+void _dev_alert(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_err(const struct device *dev, const char *fmt, ...)
+void _dev_err(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_warn(const struct device *dev, const char *fmt, ...)
+void _dev_warn(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
-void dev_notice(const struct device *dev, const char *fmt, ...)
+void _dev_notice(const struct device *dev, const char *fmt, ...)
 {}
 static inline __printf(2, 3)
 void _dev_info(const struct device *dev, const char *fmt, ...)
@@ -1386,27 +1390,36 @@ void _dev_info(const struct device *dev, const char *fmt, ...)
 #endif
 
 /*
- * Stupid hackaround for existing uses of non-printk uses dev_info
- *
- * Note that the definition of dev_info below is actually _dev_info
- * and a macro is used to avoid redefining dev_info
+ * #defines for all the dev_<level> macros to prefix with whatever
+ * possible use of #define dev_fmt(fmt) ...
  */
 
-#define dev_info(dev, fmt, arg...) _dev_info(dev, fmt, ##arg)
+#define dev_emerg(dev, fmt, ...)					\
+	_dev_emerg(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_crit(dev, fmt, ...)						\
+	_dev_crit(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_alert(dev, fmt, ...)					\
+	_dev_alert(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_err(dev, fmt, ...)						\
+	_dev_err(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_warn(dev, fmt, ...)						\
+	_dev_warn(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_notice(dev, fmt, ...)					\
+	_dev_notice(dev, dev_fmt(fmt), ##__VA_ARGS__)
+#define dev_info(dev, fmt, ...)						\
+	_dev_info(dev, dev_fmt(fmt), ##__VA_ARGS__)
 
 #if defined(CONFIG_DYNAMIC_DEBUG)
-#define dev_dbg(dev, format, ...)		     \
-do {						     \
-	dynamic_dev_dbg(dev, format, ##__VA_ARGS__); \
-} while (0)
+#define dev_dbg(dev, fmt, ...)						\
+	dynamic_dev_dbg(dev, dev_fmt(fmt), ##__VA_ARGS__)
 #elif defined(DEBUG)
-#define dev_dbg(dev, format, arg...)		\
-	dev_printk(KERN_DEBUG, dev, format, ##arg)
+#define dev_dbg(dev, fmt, ...)						\
+	dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__)
 #else
-#define dev_dbg(dev, format, arg...)				\
-({								\
-	if (0)							\
-		dev_printk(KERN_DEBUG, dev, format, ##arg);	\
+#define dev_dbg(dev, fmt, ...)						\
+({									\
+	if (0)								\
+		dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \
 })
 #endif
 
@@ -1478,7 +1491,7 @@ do {									\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
 	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
 	    __ratelimit(&_rs))						\
-		__dynamic_dev_dbg(&descriptor, dev, fmt,		\
+		__dynamic_dev_dbg(&descriptor, dev, dev_fmt(fmt),	\
 				  ##__VA_ARGS__);			\
 } while (0)
 #elif defined(DEBUG)
@@ -1488,23 +1501,23 @@ do {									\
 				      DEFAULT_RATELIMIT_INTERVAL,	\
 				      DEFAULT_RATELIMIT_BURST);		\
 	if (__ratelimit(&_rs))						\
-		dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
+		dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \
 } while (0)
 #else
 #define dev_dbg_ratelimited(dev, fmt, ...)				\
 do {									\
 	if (0)								\
-		dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__);	\
+		dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \
 } while (0)
 #endif
 
 #ifdef VERBOSE_DEBUG
 #define dev_vdbg	dev_dbg
 #else
-#define dev_vdbg(dev, format, arg...)				\
-({								\
-	if (0)							\
-		dev_printk(KERN_DEBUG, dev, format, ##arg);	\
+#define dev_vdbg(dev, fmt, ...)						\
+({									\
+	if (0)								\
+		dev_printk(KERN_DEBUG, dev, dev_fmt(fmt), ##__VA_ARGS__); \
 })
 #endif
 
-- 
cgit v1.2.3


From e240cd0df48185a28c153f83a39ba3940e3e9b86 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 6 Jul 2018 19:06:43 +0200
Subject: netfilter: nf_tables: place all set backends in one single module

This patch disallows rbtree with single elements, which is causing
problems with the recent timeout support. Before this patch, you
could opt out individual set representations per module, which is
just adding extra complexity.

Fixes: 8d8540c4f5e0("netfilter: nft_set_rbtree: add timeout support")
Reported-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h |  6 ++++++
 net/netfilter/Kconfig                  | 25 +++++++------------------
 net/netfilter/Makefile                 |  7 ++++---
 net/netfilter/nf_tables_set_core.c     | 28 ++++++++++++++++++++++++++++
 net/netfilter/nft_set_bitmap.c         | 19 +------------------
 net/netfilter/nft_set_hash.c           | 29 +++--------------------------
 net/netfilter/nft_set_rbtree.c         | 19 +------------------
 7 files changed, 50 insertions(+), 83 deletions(-)
 create mode 100644 net/netfilter/nf_tables_set_core.c

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index e0c0c2558ec4..a05134507e7b 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -65,4 +65,10 @@ extern const struct nft_expr_ops nft_payload_fast_ops;
 extern struct static_key_false nft_counters_enabled;
 extern struct static_key_false nft_trace_enabled;
 
+extern struct nft_set_type nft_set_rhash_type;
+extern struct nft_set_type nft_set_hash_type;
+extern struct nft_set_type nft_set_hash_fast_type;
+extern struct nft_set_type nft_set_rbtree_type;
+extern struct nft_set_type nft_set_bitmap_type;
+
 #endif /* _NET_NF_TABLES_CORE_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index dbd7d1fad277..f0a1c536ef15 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -460,6 +460,13 @@ config NF_TABLES
 
 if NF_TABLES
 
+config NF_TABLES_SET
+	tristate "Netfilter nf_tables set infrastructure"
+	help
+	  This option enables the nf_tables set infrastructure that allows to
+	  look up for elements in a set and to build one-way mappings between
+	  matchings and actions.
+
 config NF_TABLES_INET
 	depends on IPV6
 	select NF_TABLES_IPV4
@@ -493,24 +500,6 @@ config NFT_FLOW_OFFLOAD
 	  This option adds the "flow_offload" expression that you can use to
 	  choose what flows are placed into the hardware.
 
-config NFT_SET_RBTREE
-	tristate "Netfilter nf_tables rbtree set module"
-	help
-	  This option adds the "rbtree" set type (Red Black tree) that is used
-	  to build interval-based sets.
-
-config NFT_SET_HASH
-	tristate "Netfilter nf_tables hash set module"
-	help
-	  This option adds the "hash" set type that is used to build one-way
-	  mappings between matchings and actions.
-
-config NFT_SET_BITMAP
-	tristate "Netfilter nf_tables bitmap set module"
-	help
-	  This option adds the "bitmap" set type that is used to build sets
-	  whose keys are smaller or equal to 16 bits.
-
 config NFT_COUNTER
 	tristate "Netfilter nf_tables counter module"
 	help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 44449389e527..8a76dced974d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -78,7 +78,11 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
 		  nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
 		  nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
 
+nf_tables_set-objs := nf_tables_set_core.o \
+		      nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o
+
 obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
+obj-$(CONFIG_NF_TABLES_SET)	+= nf_tables_set.o
 obj-$(CONFIG_NFT_COMPAT)	+= nft_compat.o
 obj-$(CONFIG_NFT_CONNLIMIT)	+= nft_connlimit.o
 obj-$(CONFIG_NFT_NUMGEN)	+= nft_numgen.o
@@ -91,9 +95,6 @@ obj-$(CONFIG_NFT_QUEUE)		+= nft_queue.o
 obj-$(CONFIG_NFT_QUOTA)		+= nft_quota.o
 obj-$(CONFIG_NFT_REJECT) 	+= nft_reject.o
 obj-$(CONFIG_NFT_REJECT_INET)	+= nft_reject_inet.o
-obj-$(CONFIG_NFT_SET_RBTREE)	+= nft_set_rbtree.o
-obj-$(CONFIG_NFT_SET_HASH)	+= nft_set_hash.o
-obj-$(CONFIG_NFT_SET_BITMAP)	+= nft_set_bitmap.o
 obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
 obj-$(CONFIG_NFT_LOG)		+= nft_log.o
 obj-$(CONFIG_NFT_MASQ)		+= nft_masq.o
diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c
new file mode 100644
index 000000000000..814789644bd3
--- /dev/null
+++ b/net/netfilter/nf_tables_set_core.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <net/netfilter/nf_tables_core.h>
+
+static int __init nf_tables_set_module_init(void)
+{
+	nft_register_set(&nft_set_hash_fast_type);
+	nft_register_set(&nft_set_hash_type);
+	nft_register_set(&nft_set_rhash_type);
+	nft_register_set(&nft_set_bitmap_type);
+	nft_register_set(&nft_set_rbtree_type);
+
+	return 0;
+}
+
+static void __exit nf_tables_set_module_exit(void)
+{
+	nft_unregister_set(&nft_set_rbtree_type);
+	nft_unregister_set(&nft_set_bitmap_type);
+	nft_unregister_set(&nft_set_rhash_type);
+	nft_unregister_set(&nft_set_hash_type);
+	nft_unregister_set(&nft_set_hash_fast_type);
+}
+
+module_init(nf_tables_set_module_init);
+module_exit(nf_tables_set_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index d6626e01c7ee..128bc16f52dd 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -296,7 +296,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
 	return true;
 }
 
-static struct nft_set_type nft_bitmap_type __read_mostly = {
+struct nft_set_type nft_set_bitmap_type __read_mostly = {
 	.owner		= THIS_MODULE,
 	.ops		= {
 		.privsize	= nft_bitmap_privsize,
@@ -314,20 +314,3 @@ static struct nft_set_type nft_bitmap_type __read_mostly = {
 		.get		= nft_bitmap_get,
 	},
 };
-
-static int __init nft_bitmap_module_init(void)
-{
-	return nft_register_set(&nft_bitmap_type);
-}
-
-static void __exit nft_bitmap_module_exit(void)
-{
-	nft_unregister_set(&nft_bitmap_type);
-}
-
-module_init(nft_bitmap_module_init);
-module_exit(nft_bitmap_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6f9a1365a09f..72ef35b51cac 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -654,7 +654,7 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features
 	return true;
 }
 
-static struct nft_set_type nft_rhash_type __read_mostly = {
+struct nft_set_type nft_set_rhash_type __read_mostly = {
 	.owner		= THIS_MODULE,
 	.features	= NFT_SET_MAP | NFT_SET_OBJECT |
 			  NFT_SET_TIMEOUT | NFT_SET_EVAL,
@@ -677,7 +677,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = {
 	},
 };
 
-static struct nft_set_type nft_hash_type __read_mostly = {
+struct nft_set_type nft_set_hash_type __read_mostly = {
 	.owner		= THIS_MODULE,
 	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
 	.ops		= {
@@ -697,7 +697,7 @@ static struct nft_set_type nft_hash_type __read_mostly = {
 	},
 };
 
-static struct nft_set_type nft_hash_fast_type __read_mostly = {
+struct nft_set_type nft_set_hash_fast_type __read_mostly = {
 	.owner		= THIS_MODULE,
 	.features	= NFT_SET_MAP | NFT_SET_OBJECT,
 	.ops		= {
@@ -716,26 +716,3 @@ static struct nft_set_type nft_hash_fast_type __read_mostly = {
 		.get		= nft_hash_get,
 	},
 };
-
-static int __init nft_hash_module_init(void)
-{
-	if (nft_register_set(&nft_hash_fast_type) ||
-	    nft_register_set(&nft_hash_type) ||
-	    nft_register_set(&nft_rhash_type))
-		return 1;
-	return 0;
-}
-
-static void __exit nft_hash_module_exit(void)
-{
-	nft_unregister_set(&nft_rhash_type);
-	nft_unregister_set(&nft_hash_type);
-	nft_unregister_set(&nft_hash_fast_type);
-}
-
-module_init(nft_hash_module_init);
-module_exit(nft_hash_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 7f3a9a211034..1f8f257cb518 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -462,7 +462,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
 	return true;
 }
 
-static struct nft_set_type nft_rbtree_type __read_mostly = {
+struct nft_set_type nft_set_rbtree_type __read_mostly = {
 	.owner		= THIS_MODULE,
 	.features	= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
 	.ops		= {
@@ -481,20 +481,3 @@ static struct nft_set_type nft_rbtree_type __read_mostly = {
 		.get		= nft_rbtree_get,
 	},
 };
-
-static int __init nft_rbtree_module_init(void)
-{
-	return nft_register_set(&nft_rbtree_type);
-}
-
-static void __exit nft_rbtree_module_exit(void)
-{
-	nft_unregister_set(&nft_rbtree_type);
-}
-
-module_init(nft_rbtree_module_init);
-module_exit(nft_rbtree_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
-- 
cgit v1.2.3


From 3b4c2511c004521efa89875512b5039a04d5e2e0 Mon Sep 17 00:00:00 2001
From: Neha Bhende <bhenden@vmware.com>
Date: Mon, 18 Jun 2018 16:44:48 -0700
Subject: drm/vmwgfx: Add CAP2 support in vmwgfx

The device exposes a new capability register. Add support for it.

Signed-off-by: Neha Bhende <bhenden@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   | 17 +++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   |  1 +
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c |  3 +++
 include/uapi/drm/vmwgfx_drm.h         |  1 +
 4 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 1128420de2c0..f2fad88e4c54 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -258,6 +258,15 @@ MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
 module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
 
 
+static void vmw_print_capabilities2(uint32_t capabilities2)
+{
+	DRM_INFO("Capabilities2:\n");
+	if (capabilities2 & SVGA_CAP2_GROW_OTABLE)
+		DRM_INFO("  Grow oTable.\n");
+	if (capabilities2 & SVGA_CAP2_INTRA_SURFACE_COPY)
+		DRM_INFO("  IntraSurface copy.\n");
+}
+
 static void vmw_print_capabilities(uint32_t capabilities)
 {
 	DRM_INFO("Capabilities:\n");
@@ -684,6 +693,12 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	}
 
 	dev_priv->capabilities = vmw_read(dev_priv, SVGA_REG_CAPABILITIES);
+
+	if (dev_priv->capabilities & SVGA_CAP_CAP2_REGISTER) {
+		dev_priv->capabilities2 = vmw_read(dev_priv, SVGA_REG_CAP2);
+	}
+
+
 	ret = vmw_dma_select_mode(dev_priv);
 	if (unlikely(ret != 0)) {
 		DRM_INFO("Restricting capabilities due to IOMMU setup.\n");
@@ -752,6 +767,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	}
 
 	vmw_print_capabilities(dev_priv->capabilities);
+	if (dev_priv->capabilities & SVGA_CAP_CAP2_REGISTER)
+		vmw_print_capabilities2(dev_priv->capabilities2);
 
 	ret = vmw_dma_masks(dev_priv);
 	if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a3a0826958a1..920365c0e9ab 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -386,6 +386,7 @@ struct vmw_private {
 	uint32_t initial_height;
 	u32 *mmio_virt;
 	uint32_t capabilities;
+	uint32_t capabilities2;
 	uint32_t max_gmr_ids;
 	uint32_t max_gmr_pages;
 	uint32_t max_mob_pages;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 6872c7ee8a08..ac6da0da2824 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -56,6 +56,9 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	case DRM_VMW_PARAM_HW_CAPS:
 		param->value = dev_priv->capabilities;
 		break;
+	case DRM_VMW_PARAM_HW_CAPS2:
+		param->value = dev_priv->capabilities2;
+		break;
 	case DRM_VMW_PARAM_FIFO_CAPS:
 		param->value = dev_priv->fifo.capabilities;
 		break;
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 57115a5fe61a..84e81b38ca18 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -95,6 +95,7 @@ extern "C" {
 #define DRM_VMW_PARAM_MAX_MOB_SIZE     10
 #define DRM_VMW_PARAM_SCREEN_TARGET    11
 #define DRM_VMW_PARAM_DX               12
+#define DRM_VMW_PARAM_HW_CAPS2         13
 
 /**
  * enum drm_vmw_handle_type - handle type for ref ioctls
-- 
cgit v1.2.3


From 14b1c33e84295693c3b1a1d7c6ac82b3f384cd17 Mon Sep 17 00:00:00 2001
From: Deepak Rawat <drawat@vmware.com>
Date: Wed, 20 Jun 2018 14:48:35 -0700
Subject: drm/vmwgfx: Add new ioctl for GB surface create and reference

New ioctls DRM_VMW_GB_SURFACE_CREATE_EXT and DRM_VMW_GB_SURFACE_REF_EXT
are added which support 64-bit wide svga device surface flags, quality
level and multisample pattern.

Signed-off-by: Deepak Rawat <drawat@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     |  12 +
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h     |   8 +
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c     |  20 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c    |   2 +
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 468 +++++++++++++++++++++-----------
 include/uapi/drm/vmwgfx_drm.h           | 102 +++++++
 6 files changed, 437 insertions(+), 175 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6cf81e19182f..59229111f303 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -137,6 +137,12 @@
 #define DRM_IOCTL_VMW_CREATE_EXTENDED_CONTEXT			\
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_CREATE_EXTENDED_CONTEXT,	\
 		struct drm_vmw_context_arg)
+#define DRM_IOCTL_VMW_GB_SURFACE_CREATE_EXT				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_GB_SURFACE_CREATE_EXT,	\
+		union drm_vmw_gb_surface_create_ext_arg)
+#define DRM_IOCTL_VMW_GB_SURFACE_REF_EXT				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_GB_SURFACE_REF_EXT,		\
+		union drm_vmw_gb_surface_reference_ext_arg)
 
 /**
  * The core DRM version of this macro doesn't account for
@@ -224,6 +230,12 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 	VMW_IOCTL_DEF(VMW_CREATE_EXTENDED_CONTEXT,
 		      vmw_extended_context_define_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
+	VMW_IOCTL_DEF(VMW_GB_SURFACE_CREATE_EXT,
+		      vmw_gb_surface_define_ext_ioctl,
+		      DRM_AUTH | DRM_RENDER_ALLOW),
+	VMW_IOCTL_DEF(VMW_GB_SURFACE_REF_EXT,
+		      vmw_gb_surface_reference_ext_ioctl,
+		      DRM_AUTH | DRM_RENDER_ALLOW),
 };
 
 static const struct pci_device_id vmw_pci_id_list[] = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 7e5c93083036..59af14714797 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1087,7 +1087,15 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
 			       uint32_t multisample_count,
 			       uint32_t array_size,
 			       struct drm_vmw_size size,
+			       SVGA3dMSPattern multisample_pattern,
+			       SVGA3dMSQualityLevel quality_level,
 			       struct vmw_surface **srf_out);
+extern int vmw_gb_surface_define_ext_ioctl(struct drm_device *dev,
+					   void *data,
+					   struct drm_file *file_priv);
+extern int vmw_gb_surface_reference_ext_ioctl(struct drm_device *dev,
+					      void *data,
+					      struct drm_file *file_priv);
 
 /*
  * Shader management - vmwgfx_shader.c
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 0fb363458ab5..3201b0a51d10 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1238,15 +1238,17 @@ static int vmw_create_bo_proxy(struct drm_device *dev,
 	content_base_size.depth  = 1;
 
 	ret = vmw_surface_gb_priv_define(dev,
-			0, /* kernel visible only */
-			0, /* flags */
-			format,
-			true, /* can be a scanout buffer */
-			1, /* num of mip levels */
-			0,
-			0,
-			content_base_size,
-			srf_out);
+					 0, /* kernel visible only */
+					 0, /* flags */
+					 format,
+					 true, /* can be a scanout buffer */
+					 1, /* num of mip levels */
+					 0,
+					 0,
+					 content_base_size,
+					 SVGA3D_MS_PATTERN_NONE,
+					 SVGA3D_MS_QUALITY_NONE,
+					 srf_out);
 	if (ret) {
 		DRM_ERROR("Failed to allocate proxy content buffer\n");
 		return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 6630abf3a95c..f9872c9e60c4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -1188,6 +1188,8 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 				 content_srf.multisample_count,
 				 0,
 				 display_base_size,
+				 content_srf.multisample_pattern,
+				 content_srf.quality_level,
 				 &vps->surf);
 			if (ret != 0) {
 				DRM_ERROR("Couldn't allocate STDU surface.\n");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index a5f93f62c7fa..1d4c010a0e48 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -33,6 +33,10 @@
 #include "vmwgfx_binding.h"
 #include "device_include/svga3d_surfacedefs.h"
 
+#define SVGA3D_FLAGS_64(upper32, lower32) (((uint64_t)upper32 << 32) | lower32)
+#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) (svga3d_flags >> 32)
+#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \
+	(svga3d_flags & ((uint64_t)U32_MAX))
 
 /**
  * struct vmw_user_surface - User-space visible surface resource
@@ -81,7 +85,16 @@ static int vmw_gb_surface_unbind(struct vmw_resource *res,
 				 bool readback,
 				 struct ttm_validate_buffer *val_buf);
 static int vmw_gb_surface_destroy(struct vmw_resource *res);
-
+static int
+vmw_gb_surface_define_internal(struct drm_device *dev,
+			       struct drm_vmw_gb_surface_create_ext_req *req,
+			       struct drm_vmw_gb_surface_create_rep *rep,
+			       struct drm_file *file_priv);
+static int
+vmw_gb_surface_reference_internal(struct drm_device *dev,
+				  struct drm_vmw_surface_arg *req,
+				  struct drm_vmw_gb_surface_ref_ext_rep *rep,
+				  struct drm_file *file_priv);
 
 static const struct vmw_user_resource_conv user_surface_conv = {
 	.object_type = VMW_RES_SURFACE,
@@ -1289,193 +1302,55 @@ static int vmw_gb_surface_destroy(struct vmw_resource *res)
 
 /**
  * vmw_gb_surface_define_ioctl - Ioctl function implementing
- *                               the user surface define functionality.
+ * the user surface define functionality.
  *
- * @dev:            Pointer to a struct drm_device.
- * @data:           Pointer to data copied from / to user-space.
- * @file_priv:      Pointer to a drm file private structure.
+ * @dev: Pointer to a struct drm_device.
+ * @data: Pointer to data copied from / to user-space.
+ * @file_priv: Pointer to a drm file private structure.
  */
 int vmw_gb_surface_define_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv)
 {
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_surface *user_srf;
-	struct vmw_surface *srf;
-	struct vmw_resource *res;
-	struct vmw_resource *tmp;
 	union drm_vmw_gb_surface_create_arg *arg =
 	    (union drm_vmw_gb_surface_create_arg *)data;
-	struct drm_vmw_gb_surface_create_req *req = &arg->req;
 	struct drm_vmw_gb_surface_create_rep *rep = &arg->rep;
-	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-	int ret;
-	uint32_t size;
-	uint32_t backup_handle = 0;
-
-	if (req->multisample_count != 0)
-		return -EINVAL;
-
-	if (req->mip_levels > DRM_VMW_MAX_MIP_LEVELS)
-		return -EINVAL;
-
-	if (unlikely(vmw_user_surface_size == 0))
-		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
-			128;
-
-	size = vmw_user_surface_size + 128;
-
-	/* Define a surface based on the parameters. */
-	ret = vmw_surface_gb_priv_define(dev,
-			size,
-			(SVGA3dSurfaceAllFlags)req->svga3d_flags,
-			req->format,
-			req->drm_surface_flags & drm_vmw_surface_flag_scanout,
-			req->mip_levels,
-			req->multisample_count,
-			req->array_size,
-			req->base_size,
-			&srf);
-	if (unlikely(ret != 0))
-		return ret;
-
-	user_srf = container_of(srf, struct vmw_user_surface, srf);
-	if (drm_is_primary_client(file_priv))
-		user_srf->master = drm_master_get(file_priv->master);
-
-	ret = ttm_read_lock(&dev_priv->reservation_sem, true);
-	if (unlikely(ret != 0))
-		return ret;
-
-	res = &user_srf->srf.res;
-
-
-	if (req->buffer_handle != SVGA3D_INVALID_ID) {
-		ret = vmw_user_bo_lookup(tfile, req->buffer_handle,
-					 &res->backup,
-					 &user_srf->backup_base);
-		if (ret == 0) {
-			if (res->backup->base.num_pages * PAGE_SIZE <
-			    res->backup_size) {
-				DRM_ERROR("Surface backup buffer is too small.\n");
-				vmw_bo_unreference(&res->backup);
-				ret = -EINVAL;
-				goto out_unlock;
-			} else {
-				backup_handle = req->buffer_handle;
-			}
-		}
-	} else if (req->drm_surface_flags & drm_vmw_surface_flag_create_buffer)
-		ret = vmw_user_bo_alloc(dev_priv, tfile,
-					res->backup_size,
-					req->drm_surface_flags &
-					drm_vmw_surface_flag_shareable,
-					&backup_handle,
-					&res->backup,
-					&user_srf->backup_base);
-
-	if (unlikely(ret != 0)) {
-		vmw_resource_unreference(&res);
-		goto out_unlock;
-	}
-
-	tmp = vmw_resource_reference(res);
-	ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime,
-				    req->drm_surface_flags &
-				    drm_vmw_surface_flag_shareable,
-				    VMW_RES_SURFACE,
-				    &vmw_user_surface_base_release, NULL);
-
-	if (unlikely(ret != 0)) {
-		vmw_resource_unreference(&tmp);
-		vmw_resource_unreference(&res);
-		goto out_unlock;
-	}
-
-	rep->handle      = user_srf->prime.base.hash.key;
-	rep->backup_size = res->backup_size;
-	if (res->backup) {
-		rep->buffer_map_handle =
-			drm_vma_node_offset_addr(&res->backup->base.vma_node);
-		rep->buffer_size = res->backup->base.num_pages * PAGE_SIZE;
-		rep->buffer_handle = backup_handle;
-	} else {
-		rep->buffer_map_handle = 0;
-		rep->buffer_size = 0;
-		rep->buffer_handle = SVGA3D_INVALID_ID;
-	}
+	struct drm_vmw_gb_surface_create_ext_req req_ext;
 
-	vmw_resource_unreference(&res);
+	req_ext.base = arg->req;
+	req_ext.version = drm_vmw_gb_surface_v1;
+	req_ext.svga3d_flags_upper_32_bits = 0;
+	req_ext.multisample_pattern = SVGA3D_MS_PATTERN_NONE;
+	req_ext.quality_level = SVGA3D_MS_QUALITY_NONE;
+	req_ext.must_be_zero = 0;
 
-out_unlock:
-	ttm_read_unlock(&dev_priv->reservation_sem);
-	return ret;
+	return vmw_gb_surface_define_internal(dev, &req_ext, rep, file_priv);
 }
 
 /**
  * vmw_gb_surface_reference_ioctl - Ioctl function implementing
- *                                  the user surface reference functionality.
+ * the user surface reference functionality.
  *
- * @dev:            Pointer to a struct drm_device.
- * @data:           Pointer to data copied from / to user-space.
- * @file_priv:      Pointer to a drm file private structure.
+ * @dev: Pointer to a struct drm_device.
+ * @data: Pointer to data copied from / to user-space.
+ * @file_priv: Pointer to a drm file private structure.
  */
 int vmw_gb_surface_reference_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_priv)
 {
-	struct vmw_private *dev_priv = vmw_priv(dev);
 	union drm_vmw_gb_surface_reference_arg *arg =
 	    (union drm_vmw_gb_surface_reference_arg *)data;
 	struct drm_vmw_surface_arg *req = &arg->req;
 	struct drm_vmw_gb_surface_ref_rep *rep = &arg->rep;
-	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-	struct vmw_surface *srf;
-	struct vmw_user_surface *user_srf;
-	struct ttm_base_object *base;
-	uint32_t backup_handle;
-	int ret = -EINVAL;
+	struct drm_vmw_gb_surface_ref_ext_rep rep_ext;
+	int ret;
+
+	ret = vmw_gb_surface_reference_internal(dev, req, &rep_ext, file_priv);
 
-	ret = vmw_surface_handle_reference(dev_priv, file_priv, req->sid,
-					   req->handle_type, &base);
 	if (unlikely(ret != 0))
 		return ret;
 
-	user_srf = container_of(base, struct vmw_user_surface, prime.base);
-	srf = &user_srf->srf;
-	if (!srf->res.backup) {
-		DRM_ERROR("Shared GB surface is missing a backup buffer.\n");
-		goto out_bad_resource;
-	}
-
-	mutex_lock(&dev_priv->cmdbuf_mutex); /* Protect res->backup */
-	ret = vmw_user_bo_reference(tfile, srf->res.backup, &backup_handle);
-	mutex_unlock(&dev_priv->cmdbuf_mutex);
-
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Could not add a reference to a GB surface "
-			  "backup buffer.\n");
-		(void) ttm_ref_object_base_unref(tfile, base->hash.key,
-						 TTM_REF_USAGE);
-		goto out_bad_resource;
-	}
-
-	rep->creq.svga3d_flags = (uint32_t)srf->flags;
-	rep->creq.format = srf->format;
-	rep->creq.mip_levels = srf->mip_levels[0];
-	rep->creq.drm_surface_flags = 0;
-	rep->creq.multisample_count = srf->multisample_count;
-	rep->creq.autogen_filter = srf->autogen_filter;
-	rep->creq.array_size = srf->array_size;
-	rep->creq.buffer_handle = backup_handle;
-	rep->creq.base_size = srf->base_size;
-	rep->crep.handle = user_srf->prime.base.hash.key;
-	rep->crep.backup_size = srf->res.backup_size;
-	rep->crep.buffer_handle = backup_handle;
-	rep->crep.buffer_map_handle =
-		drm_vma_node_offset_addr(&srf->res.backup->base.vma_node);
-	rep->crep.buffer_size = srf->res.backup->base.num_pages * PAGE_SIZE;
-
-out_bad_resource:
-	ttm_base_object_unref(&base);
+	rep->creq = rep_ext.creq.base;
+	rep->crep = rep_ext.crep;
 
 	return ret;
 }
@@ -1493,6 +1368,8 @@ out_bad_resource:
  * @multisample_count:
  * @array_size: Surface array size.
  * @size: width, heigh, depth of the surface requested
+ * @multisample_pattern: Multisampling pattern when msaa is supported
+ * @quality_level: Precision settings
  * @user_srf_out: allocated user_srf.  Set to NULL on failure.
  *
  * GB surfaces allocated by this function will not have a user mode handle, and
@@ -1509,6 +1386,8 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
 			       uint32_t multisample_count,
 			       uint32_t array_size,
 			       struct drm_vmw_size size,
+			       SVGA3dMSPattern multisample_pattern,
+			       SVGA3dMSQualityLevel quality_level,
 			       struct vmw_surface **srf_out)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
@@ -1519,7 +1398,7 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
 	};
 	struct vmw_surface *srf;
 	int ret;
-	u32 num_layers;
+	u32 num_layers = 1;
 
 	*srf_out = NULL;
 
@@ -1594,15 +1473,13 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
 	srf->autogen_filter    = SVGA3D_TEX_FILTER_NONE;
 	srf->array_size        = array_size;
 	srf->multisample_count = multisample_count;
-	srf->multisample_pattern = SVGA3D_MS_PATTERN_NONE;
-	srf->quality_level = SVGA3D_MS_QUALITY_NONE;
+	srf->multisample_pattern = multisample_pattern;
+	srf->quality_level = quality_level;
 
 	if (array_size)
 		num_layers = array_size;
 	else if (svga3d_flags & SVGA3D_SURFACE_CUBEMAP)
 		num_layers = SVGA3D_MAX_SURFACE_FACES;
-	else
-		num_layers = 1;
 
 	srf->res.backup_size   =
 		svga3dsurface_get_serialized_size(srf->format,
@@ -1633,3 +1510,262 @@ out_unlock:
 	ttm_read_unlock(&dev_priv->reservation_sem);
 	return ret;
 }
+
+/**
+ * vmw_gb_surface_define_ext_ioctl - Ioctl function implementing
+ * the user surface define functionality.
+ *
+ * @dev: Pointer to a struct drm_device.
+ * @data: Pointer to data copied from / to user-space.
+ * @file_priv: Pointer to a drm file private structure.
+ */
+int vmw_gb_surface_define_ext_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	union drm_vmw_gb_surface_create_ext_arg *arg =
+	    (union drm_vmw_gb_surface_create_ext_arg *)data;
+	struct drm_vmw_gb_surface_create_ext_req *req = &arg->req;
+	struct drm_vmw_gb_surface_create_rep *rep = &arg->rep;
+
+	return vmw_gb_surface_define_internal(dev, req, rep, file_priv);
+}
+
+/**
+ * vmw_gb_surface_reference_ext_ioctl - Ioctl function implementing
+ * the user surface reference functionality.
+ *
+ * @dev: Pointer to a struct drm_device.
+ * @data: Pointer to data copied from / to user-space.
+ * @file_priv: Pointer to a drm file private structure.
+ */
+int vmw_gb_surface_reference_ext_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_priv)
+{
+	union drm_vmw_gb_surface_reference_ext_arg *arg =
+	    (union drm_vmw_gb_surface_reference_ext_arg *)data;
+	struct drm_vmw_surface_arg *req = &arg->req;
+	struct drm_vmw_gb_surface_ref_ext_rep *rep = &arg->rep;
+
+	return vmw_gb_surface_reference_internal(dev, req, rep, file_priv);
+}
+
+/**
+ * vmw_gb_surface_define_internal - Ioctl function implementing
+ * the user surface define functionality.
+ *
+ * @dev: Pointer to a struct drm_device.
+ * @req: Request argument from user-space.
+ * @rep: Response argument to user-space.
+ * @file_priv: Pointer to a drm file private structure.
+ */
+static int
+vmw_gb_surface_define_internal(struct drm_device *dev,
+			       struct drm_vmw_gb_surface_create_ext_req *req,
+			       struct drm_vmw_gb_surface_create_rep *rep,
+			       struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct vmw_user_surface *user_srf;
+	struct vmw_surface *srf;
+	struct vmw_resource *res;
+	struct vmw_resource *tmp;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	int ret;
+	uint32_t size;
+	uint32_t backup_handle = 0;
+	SVGA3dSurfaceAllFlags svga3d_flags_64 =
+		SVGA3D_FLAGS_64(req->svga3d_flags_upper_32_bits,
+				req->base.svga3d_flags);
+
+	if (!dev_priv->has_sm4_1) {
+		/*
+		 * If SM4_1 is not support then cannot send 64-bit flag to
+		 * device.
+		 */
+		if (req->svga3d_flags_upper_32_bits != 0)
+			return -EINVAL;
+
+		if (req->base.multisample_count != 0)
+			return -EINVAL;
+
+		if (req->multisample_pattern != SVGA3D_MS_PATTERN_NONE)
+			return -EINVAL;
+
+		if (req->quality_level != SVGA3D_MS_QUALITY_NONE)
+			return -EINVAL;
+	}
+
+	if (req->base.mip_levels > DRM_VMW_MAX_MIP_LEVELS)
+		return -EINVAL;
+
+	if (unlikely(vmw_user_surface_size == 0))
+		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
+			128;
+
+	size = vmw_user_surface_size + 128;
+
+	/* Define a surface based on the parameters. */
+	ret = vmw_surface_gb_priv_define(dev,
+					 size,
+					 svga3d_flags_64,
+					 req->base.format,
+					 req->base.drm_surface_flags &
+					 drm_vmw_surface_flag_scanout,
+					 req->base.mip_levels,
+					 req->base.multisample_count,
+					 req->base.array_size,
+					 req->base.base_size,
+					 req->multisample_pattern,
+					 req->quality_level,
+					 &srf);
+	if (unlikely(ret != 0))
+		return ret;
+
+	user_srf = container_of(srf, struct vmw_user_surface, srf);
+	if (drm_is_primary_client(file_priv))
+		user_srf->master = drm_master_get(file_priv->master);
+
+	ret = ttm_read_lock(&dev_priv->reservation_sem, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	res = &user_srf->srf.res;
+
+	if (req->base.buffer_handle != SVGA3D_INVALID_ID) {
+		ret = vmw_user_bo_lookup(tfile, req->base.buffer_handle,
+					 &res->backup,
+					 &user_srf->backup_base);
+		if (ret == 0) {
+			if (res->backup->base.num_pages * PAGE_SIZE <
+			    res->backup_size) {
+				DRM_ERROR("Surface backup buffer too small.\n");
+				vmw_bo_unreference(&res->backup);
+				ret = -EINVAL;
+				goto out_unlock;
+			} else {
+				backup_handle = req->base.buffer_handle;
+			}
+		}
+	} else if (req->base.drm_surface_flags &
+		   drm_vmw_surface_flag_create_buffer)
+		ret = vmw_user_bo_alloc(dev_priv, tfile,
+					res->backup_size,
+					req->base.drm_surface_flags &
+					drm_vmw_surface_flag_shareable,
+					&backup_handle,
+					&res->backup,
+					&user_srf->backup_base);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&res);
+		goto out_unlock;
+	}
+
+	tmp = vmw_resource_reference(res);
+	ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime,
+				    req->base.drm_surface_flags &
+				    drm_vmw_surface_flag_shareable,
+				    VMW_RES_SURFACE,
+				    &vmw_user_surface_base_release, NULL);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&tmp);
+		vmw_resource_unreference(&res);
+		goto out_unlock;
+	}
+
+	rep->handle      = user_srf->prime.base.hash.key;
+	rep->backup_size = res->backup_size;
+	if (res->backup) {
+		rep->buffer_map_handle =
+			drm_vma_node_offset_addr(&res->backup->base.vma_node);
+		rep->buffer_size = res->backup->base.num_pages * PAGE_SIZE;
+		rep->buffer_handle = backup_handle;
+	} else {
+		rep->buffer_map_handle = 0;
+		rep->buffer_size = 0;
+		rep->buffer_handle = SVGA3D_INVALID_ID;
+	}
+
+	vmw_resource_unreference(&res);
+
+out_unlock:
+	ttm_read_unlock(&dev_priv->reservation_sem);
+	return ret;
+}
+
+/**
+ * vmw_gb_surface_reference_internal - Ioctl function implementing
+ * the user surface reference functionality.
+ *
+ * @dev: Pointer to a struct drm_device.
+ * @req: Pointer to user-space request surface arg.
+ * @rep: Pointer to response to user-space.
+ * @file_priv: Pointer to a drm file private structure.
+ */
+static int
+vmw_gb_surface_reference_internal(struct drm_device *dev,
+				  struct drm_vmw_surface_arg *req,
+				  struct drm_vmw_gb_surface_ref_ext_rep *rep,
+				  struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_surface *srf;
+	struct vmw_user_surface *user_srf;
+	struct ttm_base_object *base;
+	uint32_t backup_handle;
+	int ret = -EINVAL;
+
+	ret = vmw_surface_handle_reference(dev_priv, file_priv, req->sid,
+					   req->handle_type, &base);
+	if (unlikely(ret != 0))
+		return ret;
+
+	user_srf = container_of(base, struct vmw_user_surface, prime.base);
+	srf = &user_srf->srf;
+	if (!srf->res.backup) {
+		DRM_ERROR("Shared GB surface is missing a backup buffer.\n");
+		goto out_bad_resource;
+	}
+
+	mutex_lock(&dev_priv->cmdbuf_mutex); /* Protect res->backup */
+	ret = vmw_user_bo_reference(tfile, srf->res.backup, &backup_handle);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could not add a reference to a GB surface "
+			  "backup buffer.\n");
+		(void) ttm_ref_object_base_unref(tfile, base->hash.key,
+						 TTM_REF_USAGE);
+		goto out_bad_resource;
+	}
+
+	rep->creq.base.svga3d_flags = SVGA3D_FLAGS_LOWER_32(srf->flags);
+	rep->creq.base.format = srf->format;
+	rep->creq.base.mip_levels = srf->mip_levels[0];
+	rep->creq.base.drm_surface_flags = 0;
+	rep->creq.base.multisample_count = srf->multisample_count;
+	rep->creq.base.autogen_filter = srf->autogen_filter;
+	rep->creq.base.array_size = srf->array_size;
+	rep->creq.base.buffer_handle = backup_handle;
+	rep->creq.base.base_size = srf->base_size;
+	rep->crep.handle = user_srf->prime.base.hash.key;
+	rep->crep.backup_size = srf->res.backup_size;
+	rep->crep.buffer_handle = backup_handle;
+	rep->crep.buffer_map_handle =
+		drm_vma_node_offset_addr(&srf->res.backup->base.vma_node);
+	rep->crep.buffer_size = srf->res.backup->base.num_pages * PAGE_SIZE;
+
+	rep->creq.version = drm_vmw_gb_surface_v1;
+	rep->creq.svga3d_flags_upper_32_bits =
+		SVGA3D_FLAGS_UPPER_32(srf->flags);
+	rep->creq.multisample_pattern = srf->multisample_pattern;
+	rep->creq.quality_level = srf->quality_level;
+	rep->creq.must_be_zero = 0;
+
+out_bad_resource:
+	ttm_base_object_unref(&base);
+
+	return ret;
+}
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 84e81b38ca18..68ff37d4c035 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -69,6 +69,8 @@ extern "C" {
 #define DRM_VMW_GB_SURFACE_REF       24
 #define DRM_VMW_SYNCCPU              25
 #define DRM_VMW_CREATE_EXTENDED_CONTEXT 26
+#define DRM_VMW_GB_SURFACE_CREATE_EXT   27
+#define DRM_VMW_GB_SURFACE_REF_EXT      28
 
 /*************************************************************************/
 /**
@@ -1105,6 +1107,106 @@ struct drm_vmw_handle_close_arg {
 };
 #define drm_vmw_unref_dmabuf_arg drm_vmw_handle_close_arg
 
+/*************************************************************************/
+/**
+ * DRM_VMW_GB_SURFACE_CREATE_EXT - Create a host guest-backed surface.
+ *
+ * Allocates a surface handle and queues a create surface command
+ * for the host on the first use of the surface. The surface ID can
+ * be used as the surface ID in commands referencing the surface.
+ *
+ * This new command extends DRM_VMW_GB_SURFACE_CREATE by adding version
+ * parameter and 64 bit svga flag.
+ */
+
+/**
+ * enum drm_vmw_surface_version
+ *
+ * @drm_vmw_surface_gb_v1: Corresponds to current gb surface format with
+ * svga3d surface flags split into 2, upper half and lower half.
+ */
+enum drm_vmw_surface_version {
+	drm_vmw_gb_surface_v1
+};
+
+/**
+ * struct drm_vmw_gb_surface_create_ext_req
+ *
+ * @base: Surface create parameters.
+ * @version: Version of surface create ioctl.
+ * @svga3d_flags_upper_32_bits: Upper 32 bits of svga3d flags.
+ * @multisample_pattern: Multisampling pattern when msaa is supported.
+ * @quality_level: Precision settings for each sample.
+ * @must_be_zero: Reserved for future usage.
+ *
+ * Input argument to the  DRM_VMW_GB_SURFACE_CREATE_EXT Ioctl.
+ * Part of output argument for the DRM_VMW_GB_SURFACE_REF_EXT Ioctl.
+ */
+struct drm_vmw_gb_surface_create_ext_req {
+	struct drm_vmw_gb_surface_create_req base;
+	enum drm_vmw_surface_version version;
+	uint32_t svga3d_flags_upper_32_bits;
+	SVGA3dMSPattern multisample_pattern;
+	SVGA3dMSQualityLevel quality_level;
+	uint64_t must_be_zero;
+};
+
+/**
+ * union drm_vmw_gb_surface_create_ext_arg
+ *
+ * @req: Input argument as described above.
+ * @rep: Output argument as described above.
+ *
+ * Argument to the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl.
+ */
+union drm_vmw_gb_surface_create_ext_arg {
+	struct drm_vmw_gb_surface_create_rep rep;
+	struct drm_vmw_gb_surface_create_ext_req req;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_GB_SURFACE_REF_EXT - Reference a host surface.
+ *
+ * Puts a reference on a host surface with a given handle, as previously
+ * returned by the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl.
+ * A reference will make sure the surface isn't destroyed while we hold
+ * it and will allow the calling client to use the surface handle in
+ * the command stream.
+ *
+ * On successful return, the Ioctl returns the surface information given
+ * to and returned from the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl.
+ */
+
+/**
+ * struct drm_vmw_gb_surface_ref_ext_rep
+ *
+ * @creq: The data used as input when the surface was created, as described
+ *        above at "struct drm_vmw_gb_surface_create_ext_req"
+ * @crep: Additional data output when the surface was created, as described
+ *        above at "struct drm_vmw_gb_surface_create_rep"
+ *
+ * Output Argument to the DRM_VMW_GB_SURFACE_REF_EXT ioctl.
+ */
+struct drm_vmw_gb_surface_ref_ext_rep {
+	struct drm_vmw_gb_surface_create_ext_req creq;
+	struct drm_vmw_gb_surface_create_rep crep;
+};
+
+/**
+ * union drm_vmw_gb_surface_reference_ext_arg
+ *
+ * @req: Input data as described above at "struct drm_vmw_surface_arg"
+ * @rep: Output data as described above at
+ *       "struct drm_vmw_gb_surface_ref_ext_rep"
+ *
+ * Argument to the DRM_VMW_GB_SURFACE_REF Ioctl.
+ */
+union drm_vmw_gb_surface_reference_ext_arg {
+	struct drm_vmw_gb_surface_ref_ext_rep rep;
+	struct drm_vmw_surface_arg req;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
cgit v1.2.3


From 9b07b287aa3ef49695bfad59c1870cd942f579a8 Mon Sep 17 00:00:00 2001
From: Deepak Rawat <drawat@vmware.com>
Date: Wed, 20 Jun 2018 15:09:43 -0700
Subject: drm/vmwgfx: Expose SM4_1 param to user space

A new param DRM_VMW_PARAM_SM4_1, is added for user space to determine
availability of SM4.1.

Minor version bump for SM4.1.

Signed-off-by: Deepak Rawat <drawat@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h   | 6 +++---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 3 +++
 include/uapi/drm/vmwgfx_drm.h         | 4 ++++
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a67b54e4fd50..f1b803d34c59 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -43,10 +43,10 @@
 #include <linux/sync_file.h>
 
 #define VMWGFX_DRIVER_NAME "vmwgfx"
-#define VMWGFX_DRIVER_DATE "20180322"
+#define VMWGFX_DRIVER_DATE "20180704"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 14
-#define VMWGFX_DRIVER_PATCHLEVEL 1
+#define VMWGFX_DRIVER_MINOR 15
+#define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index ac6da0da2824..e825192ca30e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -116,6 +116,9 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	case DRM_VMW_PARAM_DX:
 		param->value = dev_priv->has_dx;
 		break;
+	case DRM_VMW_PARAM_SM4_1:
+		param->value = dev_priv->has_sm4_1;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 68ff37d4c035..399f58317cff 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -82,6 +82,9 @@ extern "C" {
  *
  * DRM_VMW_PARAM_OVERLAY_IOCTL:
  * Does the driver support the overlay ioctl.
+ *
+ * DRM_VMW_PARAM_SM4_1
+ * SM4_1 support is enabled.
  */
 
 #define DRM_VMW_PARAM_NUM_STREAMS      0
@@ -98,6 +101,7 @@ extern "C" {
 #define DRM_VMW_PARAM_SCREEN_TARGET    11
 #define DRM_VMW_PARAM_DX               12
 #define DRM_VMW_PARAM_HW_CAPS2         13
+#define DRM_VMW_PARAM_SM4_1            14
 
 /**
  * enum drm_vmw_handle_type - handle type for ref ioctls
-- 
cgit v1.2.3


From 18df02fb79cb20ba4de1e02e1df41ba83f8b9bec Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Sun, 17 Jun 2018 14:05:34 +0200
Subject: dts: clk: add devicetree bindings for MAX9485

This patch adds the devicetree bindings for MAX9485, a programmable audio
clock generator.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/maxim,max9485.txt    | 59 ++++++++++++++++++++++
 include/dt-bindings/clock/maxim,max9485.h          | 18 +++++++
 2 files changed, 77 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/maxim,max9485.txt
 create mode 100644 include/dt-bindings/clock/maxim,max9485.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/maxim,max9485.txt b/Documentation/devicetree/bindings/clock/maxim,max9485.txt
new file mode 100644
index 000000000000..61bec1100a94
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/maxim,max9485.txt
@@ -0,0 +1,59 @@
+Devicetree bindings for Maxim MAX9485 Programmable Audio Clock Generator
+
+This device exposes 4 clocks in total:
+
+- MAX9485_MCLKOUT: 	A gated, buffered output of the input clock of 27 MHz
+- MAX9485_CLKOUT:	A PLL that can be configured to 16 different discrete
+			frequencies
+- MAX9485_CLKOUT[1,2]:	Two gated outputs for MAX9485_CLKOUT
+
+MAX9485_CLKOUT[1,2] are children of MAX9485_CLKOUT which upchain all rate set
+requests.
+
+Required properties:
+- compatible:	"maxim,max9485"
+- clocks:	Input clock, must provice 27.000 MHz
+- clock-names:	Must be set to "xclk"
+- #clock-cells: From common clock binding; shall be set to 1
+
+Optional properties:
+- reset-gpios:		GPIO descriptor connected to the #RESET input pin
+- vdd-supply:		A regulator node for Vdd
+- clock-output-names:	Name of output clocks, as defined in common clock
+			bindings
+
+If not explicitly set, the output names are "mclkout", "clkout", "clkout1"
+and "clkout2".
+
+Clocks are defined as preprocessor macros in the dt-binding header.
+
+Example:
+
+	#include <dt-bindings/clock/maxim,max9485.h>
+
+	xo-27mhz: xo-27mhz {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <27000000>;
+	};
+
+	&i2c0 {
+		max9485: audio-clock@63 {
+			reg = <0x63>;
+			compatible = "maxim,max9485";
+			clock-names = "xclk";
+			clocks = <&xo-27mhz>;
+			reset-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
+			vdd-supply = <&3v3-reg>;
+			#clock-cells = <1>;
+		};
+	};
+
+	// Clock consumer node
+
+	foo@0 {
+		compatible = "bar,foo";
+		/* ... */
+		clock-names = "foo-input-clk";
+		clocks = <&max9485 MAX9485_CLKOUT1>;
+	};
diff --git a/include/dt-bindings/clock/maxim,max9485.h b/include/dt-bindings/clock/maxim,max9485.h
new file mode 100644
index 000000000000..185b09ce1869
--- /dev/null
+++ b/include/dt-bindings/clock/maxim,max9485.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2018 Daniel Mack
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __DT_BINDINGS_MAX9485_CLK_H
+#define __DT_BINDINGS_MAX9485_CLK_H
+
+#define MAX9485_MCLKOUT	0
+#define MAX9485_CLKOUT	1
+#define MAX9485_CLKOUT1	2
+#define MAX9485_CLKOUT2	3
+
+#endif /* __DT_BINDINGS_MAX9485_CLK_H */
-- 
cgit v1.2.3


From a2344b9e3a8c5c2064306b0d99b0e9a6c4813c08 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 17:05:28 +0530
Subject: Bluetooth: Use extended scanning if controller supports

This implements Set extended scan param and set extended scan enable
commands and use it for start LE scan based on controller support.

The new features added in these commands are setting of new PHY for
scanning and setting of scan duration. Both features are disabled
for now, meaning only 1M PHY is set and scan duration is set to 0
which means that scanning will be done untill scan disable is called.

< HCI Command: LE Set Extended Scan Parameters (0x08|0x0041) plen 8
        Own address type: Random (0x01)
        Filter policy: Accept all advertisement (0x00)
        PHYs: 0x01
        Entry 0: LE 1M
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Parameters (0x08|0x0041) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6
        Extended scan: Enabled (0x01)
        Filter duplicates: Enabled (0x01)
        Duration: 0 msec (0x0000)
        Period: 0.00 sec (0x0000)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Enable (0x08|0x0042) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  24 +++++++++
 include/net/bluetooth/hci_core.h |   4 ++
 net/bluetooth/hci_event.c        |  51 ++++++++++++++++++
 net/bluetooth/hci_request.c      | 110 ++++++++++++++++++++++++++++++---------
 4 files changed, 164 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4af1a3a4d9b1..8c2868f439e7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1514,6 +1514,30 @@ struct hci_cp_le_set_default_phy {
 	__u8    rx_phys;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_SCAN_PARAMS   0x2041
+struct hci_cp_le_set_ext_scan_params {
+	__u8    own_addr_type;
+	__u8    filter_policy;
+	__u8    scanning_phys;
+	__u8    data[0];
+} __packed;
+
+#define LE_SCAN_PHY_1M 0x01
+
+struct hci_cp_le_scan_phy_params {
+	__u8    type;
+	__le16  interval;
+	__le16  window;
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_SCAN_ENABLE   0x2042
+struct hci_cp_le_set_ext_scan_enable {
+	__u8    enable;
+	__u8    filter_dup;
+	__le16  duration;
+	__le16  period;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 409f49bd8338..cc0bde74dd45 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1158,6 +1158,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
 				hci_dev_test_flag(dev, HCI_SC_ENABLED))
 
+/* Use ext scanning if set ext scan param and ext scan enable is supported */
+#define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
+			   ((dev)->commands[37] & 0x40))
+
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 9ec07cd4ab13..15afad005d72 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1098,6 +1098,31 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_le_set_ext_scan_param(struct hci_dev *hdev,
+					 struct sk_buff *skb)
+{
+	struct hci_cp_le_set_ext_scan_params *cp;
+	__u8 status = *((__u8 *) skb->data);
+	struct hci_cp_le_scan_phy_params *phy_param;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_SCAN_PARAMS);
+	if (!cp)
+		return;
+
+	phy_param = (void *)cp->data;
+
+	hci_dev_lock(hdev);
+
+	hdev->le_scan_type = phy_param->type;
+
+	hci_dev_unlock(hdev);
+}
+
 static bool has_pending_adv_report(struct hci_dev *hdev)
 {
 	struct discovery_state *d = &hdev->discovery;
@@ -1202,6 +1227,24 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
 	le_set_scan_enable_complete(hdev, cp->enable);
 }
 
+static void hci_cc_le_set_ext_scan_enable(struct hci_dev *hdev,
+				      struct sk_buff *skb)
+{
+	struct hci_cp_le_set_ext_scan_enable *cp;
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_SCAN_ENABLE);
+	if (!cp)
+		return;
+
+	le_set_scan_enable_complete(hdev, cp->enable);
+}
+
 static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
 					   struct sk_buff *skb)
 {
@@ -3079,6 +3122,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_write_ssp_debug_mode(hdev, skb);
 		break;
 
+	case HCI_OP_LE_SET_EXT_SCAN_PARAMS:
+		hci_cc_le_set_ext_scan_param(hdev, skb);
+		break;
+
+	case HCI_OP_LE_SET_EXT_SCAN_ENABLE:
+		hci_cc_le_set_ext_scan_enable(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode);
 		break;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 76dcc3f14cea..faf7c711234c 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -647,11 +647,22 @@ void __hci_req_update_eir(struct hci_request *req)
 
 void hci_req_add_le_scan_disable(struct hci_request *req)
 {
-	struct hci_cp_le_set_scan_enable cp;
+	struct hci_dev *hdev = req->hdev;
 
-	memset(&cp, 0, sizeof(cp));
-	cp.enable = LE_SCAN_DISABLE;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+	if (use_ext_scan(hdev)) {
+		struct hci_cp_le_set_ext_scan_enable cp;
+
+		memset(&cp, 0, sizeof(cp));
+		cp.enable = LE_SCAN_DISABLE;
+		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE, sizeof(cp),
+			    &cp);
+	} else {
+		struct hci_cp_le_set_scan_enable cp;
+
+		memset(&cp, 0, sizeof(cp));
+		cp.enable = LE_SCAN_DISABLE;
+		hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+	}
 }
 
 static void add_to_white_list(struct hci_request *req,
@@ -770,23 +781,60 @@ static bool scan_use_rpa(struct hci_dev *hdev)
 static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
 			       u16 window, u8 own_addr_type, u8 filter_policy)
 {
-	struct hci_cp_le_set_scan_param param_cp;
-	struct hci_cp_le_set_scan_enable enable_cp;
-
-	memset(&param_cp, 0, sizeof(param_cp));
-	param_cp.type = type;
-	param_cp.interval = cpu_to_le16(interval);
-	param_cp.window = cpu_to_le16(window);
-	param_cp.own_address_type = own_addr_type;
-	param_cp.filter_policy = filter_policy;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
-		    &param_cp);
+	struct hci_dev *hdev = req->hdev;
 
-	memset(&enable_cp, 0, sizeof(enable_cp));
-	enable_cp.enable = LE_SCAN_ENABLE;
-	enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
-		    &enable_cp);
+	/* Use ext scanning if set ext scan param and ext scan enable is
+	 * supported
+	 */
+	if (use_ext_scan(hdev)) {
+		struct hci_cp_le_set_ext_scan_params *ext_param_cp;
+		struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
+		struct hci_cp_le_scan_phy_params *phy_params;
+		/* Ony single PHY (1M) is supported as of now */
+		u8 data[sizeof(*ext_param_cp) + sizeof(*phy_params) * 1];
+
+		ext_param_cp = (void *)data;
+		phy_params = (void *)ext_param_cp->data;
+
+		memset(ext_param_cp, 0, sizeof(*ext_param_cp));
+		ext_param_cp->own_addr_type = own_addr_type;
+		ext_param_cp->filter_policy = filter_policy;
+		ext_param_cp->scanning_phys = LE_SCAN_PHY_1M;
+
+		memset(phy_params, 0, sizeof(*phy_params));
+		phy_params->type = type;
+		phy_params->interval = cpu_to_le16(interval);
+		phy_params->window = cpu_to_le16(window);
+
+		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_PARAMS,
+			    sizeof(*ext_param_cp) + sizeof(*phy_params),
+			    ext_param_cp);
+
+		memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
+		ext_enable_cp.enable = LE_SCAN_ENABLE;
+		ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+
+		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
+			    sizeof(ext_enable_cp), &ext_enable_cp);
+	} else {
+		struct hci_cp_le_set_scan_param param_cp;
+		struct hci_cp_le_set_scan_enable enable_cp;
+
+		memset(&param_cp, 0, sizeof(param_cp));
+		param_cp.type = type;
+		param_cp.interval = cpu_to_le16(interval);
+		param_cp.window = cpu_to_le16(window);
+		param_cp.own_address_type = own_addr_type;
+		param_cp.filter_policy = filter_policy;
+		hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
+			    &param_cp);
+
+		memset(&enable_cp, 0, sizeof(enable_cp));
+		enable_cp.enable = LE_SCAN_ENABLE;
+		enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+		hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
+			    &enable_cp);
+	}
 }
 
 void hci_req_add_le_passive_scan(struct hci_request *req)
@@ -1948,7 +1996,6 @@ discov_stopped:
 static int le_scan_restart(struct hci_request *req, unsigned long opt)
 {
 	struct hci_dev *hdev = req->hdev;
-	struct hci_cp_le_set_scan_enable cp;
 
 	/* If controller is not scanning we are done. */
 	if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
@@ -1956,10 +2003,23 @@ static int le_scan_restart(struct hci_request *req, unsigned long opt)
 
 	hci_req_add_le_scan_disable(req);
 
-	memset(&cp, 0, sizeof(cp));
-	cp.enable = LE_SCAN_ENABLE;
-	cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+	if (use_ext_scan(hdev)) {
+		struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
+
+		memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
+		ext_enable_cp.enable = LE_SCAN_ENABLE;
+		ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+
+		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
+			    sizeof(ext_enable_cp), &ext_enable_cp);
+	} else {
+		struct hci_cp_le_set_scan_enable cp;
+
+		memset(&cp, 0, sizeof(cp));
+		cp.enable = LE_SCAN_ENABLE;
+		cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+		hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From c215e9397b00b3045a668120ed7dbd89f2866e74 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 17:05:29 +0530
Subject: Bluetooth: Process extended ADV report event

This patch enables Extended ADV report event if extended scanning
is supported in the controller and process the same.

The new features are not handled and for now its as good as
legacy ADV report.

> HCI Event: LE Meta Event (0x3e) plen 53
      LE Extended Advertising Report (0x0d)
        Num reports: 1
        Entry 0
          Event type: 0x0013
            Props: 0x0013
              Connectable
              Scannable
              Use legacy advertising PDUs
            Data status: Complete
          Legacy PDU Type: ADV_IND (0x0013)
          Address type: Random (0x01)
          Address: DB:7E:2E:1A:85:E8 (Static)
          Primary PHY: LE 1M
          Secondary PHY: LE 1M
          SID: 0x00
          TX power: 0 dBm
          RSSI: -90 dBm (0xa6)
          Periodic advertising invteral: 0.00 msec (0x0000)
          Direct address type: Public (0x00)
          Direct address: 00:00:00:00:00:00 (OUI 00-00-00)
          Data length: 0x1b
        0f 09 44 65 73 69 67 6e 65 72 20 4d 6f 75 73 65  ..Designer Mouse
        03 19 c2 03 02 01 05 03 03 12 18                 ...........

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 26 +++++++++++++++++++++++
 net/bluetooth/hci_core.c    |  9 ++++++++
 net/bluetooth/hci_event.c   | 52 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8c2868f439e7..0ec51eb14810 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1925,6 +1925,15 @@ struct hci_ev_le_conn_complete {
 #define LE_ADV_SCAN_IND		0x02
 #define LE_ADV_NONCONN_IND	0x03
 #define LE_ADV_SCAN_RSP		0x04
+#define LE_ADV_INVALID		0x05
+
+/* Legacy event types in extended adv report */
+#define LE_LEGACY_ADV_IND		0x0013
+#define LE_LEGACY_ADV_DIRECT_IND 	0x0015
+#define LE_LEGACY_ADV_SCAN_IND		0x0012
+#define LE_LEGACY_NONCONN_IND		0x0010
+#define LE_LEGACY_SCAN_RSP_ADV		0x001b
+#define LE_LEGACY_SCAN_RSP_ADV_SCAN	0x001a
 
 #define ADDR_LE_DEV_PUBLIC	0x00
 #define ADDR_LE_DEV_RANDOM	0x01
@@ -1989,6 +1998,23 @@ struct hci_ev_le_direct_adv_info {
 	__s8	 rssi;
 } __packed;
 
+#define HCI_EV_LE_EXT_ADV_REPORT    0x0d
+struct hci_ev_le_ext_adv_report {
+	__le16 	 evt_type;
+	__u8	 bdaddr_type;
+	bdaddr_t bdaddr;
+	__u8	 primary_phy;
+	__u8	 secondary_phy;
+	__u8	 sid;
+	__u8	 tx_power;
+	__s8	 rssi;
+	__le16 	 interval;
+	__u8  	 direct_addr_type;
+	bdaddr_t direct_addr;
+	__u8  	 length;
+	__u8	 data[0];
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ce2447d89ce1..e3ec2d782762 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -695,6 +695,15 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 		if (hdev->commands[35] & (0x20 | 0x40))
 			events[1] |= 0x08;        /* LE PHY Update Complete */
 
+		/* If the controller supports LE Set Extended Scan Parameters
+		 * and LE Set Extended Scan Enable commands, enable the
+		 * corresponding event.
+		 */
+		if (use_ext_scan(hdev))
+			events[1] |= 0x10;	/* LE Extended Advertising
+						 * Report
+						 */
+
 		hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events),
 			    events);
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 15afad005d72..6c6fd4f55f23 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5048,6 +5048,54 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static u8 convert_legacy_evt_type(u16 evt_type)
+{
+	switch (evt_type) {
+	case LE_LEGACY_ADV_IND:
+		return LE_ADV_IND;
+	case LE_LEGACY_ADV_DIRECT_IND:
+		return LE_ADV_DIRECT_IND;
+	case LE_LEGACY_ADV_SCAN_IND:
+		return LE_ADV_SCAN_IND;
+	case LE_LEGACY_NONCONN_IND:
+		return LE_ADV_NONCONN_IND;
+	case LE_LEGACY_SCAN_RSP_ADV:
+	case LE_LEGACY_SCAN_RSP_ADV_SCAN:
+		return LE_ADV_SCAN_RSP;
+	}
+
+	BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x",
+				   evt_type);
+
+	return LE_ADV_INVALID;
+}
+
+static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	u8 num_reports = skb->data[0];
+	void *ptr = &skb->data[1];
+
+	hci_dev_lock(hdev);
+
+	while (num_reports--) {
+		struct hci_ev_le_ext_adv_report *ev = ptr;
+		u8 legacy_evt_type;
+		u16 evt_type;
+
+		evt_type = __le16_to_cpu(ev->evt_type);
+		legacy_evt_type = convert_legacy_evt_type(evt_type);
+		if (legacy_evt_type != LE_ADV_INVALID) {
+			process_adv_report(hdev, legacy_evt_type, &ev->bdaddr,
+					   ev->bdaddr_type, NULL, 0, ev->rssi,
+					   ev->data, ev->length);
+		}
+
+		ptr += sizeof(*ev) + ev->length + 1;
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev,
 					    struct sk_buff *skb)
 {
@@ -5280,6 +5328,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_le_direct_adv_report_evt(hdev, skb);
 		break;
 
+	case HCI_EV_LE_EXT_ADV_REPORT:
+		hci_le_ext_adv_report_evt(hdev, skb);
+		break;
+
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From fc20654389364a55eeab837244b24f8da75009f6 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Wed, 27 Jun 2018 21:41:23 +0200
Subject: clk: pxa: export 32kHz PLL

This clock is especially used by the RTC driver, so export it so that
devicetree users can use it.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/pxa/clk-pxa25x.c          | 6 ++++--
 drivers/clk/pxa/clk-pxa27x.c          | 7 ++++---
 drivers/clk/pxa/clk-pxa3xx.c          | 7 ++++---
 include/dt-bindings/clock/pxa-clock.h | 3 ++-
 4 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/pxa/clk-pxa25x.c b/drivers/clk/pxa/clk-pxa25x.c
index 6416c1f8e632..e88f8e01fe3a 100644
--- a/drivers/clk/pxa/clk-pxa25x.c
+++ b/drivers/clk/pxa/clk-pxa25x.c
@@ -292,8 +292,10 @@ static void __init pxa25x_register_plls(void)
 {
 	clk_register_fixed_rate(NULL, "osc_3_6864mhz", NULL,
 				CLK_GET_RATE_NOCACHE, 3686400);
-	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE, 32768);
+	clkdev_pxa_register(CLK_OSC32k768, "osc_32_768khz", NULL,
+			    clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
+						    CLK_GET_RATE_NOCACHE,
+						    32768));
 	clk_register_fixed_rate(NULL, "clk_dummy", NULL, 0, 0);
 	clk_register_fixed_factor(NULL, "ppll_95_85mhz", "osc_3_6864mhz",
 				  0, 26, 1);
diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c
index 25a30194d27a..d40b63e7bbce 100644
--- a/drivers/clk/pxa/clk-pxa27x.c
+++ b/drivers/clk/pxa/clk-pxa27x.c
@@ -314,9 +314,10 @@ static void __init pxa27x_register_plls(void)
 	clk_register_fixed_rate(NULL, "osc_13mhz", NULL,
 				CLK_GET_RATE_NOCACHE,
 				13 * MHz);
-	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE,
-				32768 * KHz);
+	clkdev_pxa_register(CLK_OSC32k768, "osc_32_768khz", NULL,
+			    clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
+						    CLK_GET_RATE_NOCACHE,
+						    32768 * KHz));
 	clk_register_fixed_rate(NULL, "clk_dummy", NULL, 0, 0);
 	clk_register_fixed_factor(NULL, "ppll_312mhz", "osc_13mhz", 0, 24, 1);
 }
diff --git a/drivers/clk/pxa/clk-pxa3xx.c b/drivers/clk/pxa/clk-pxa3xx.c
index 2d126df2bccd..7aa120c3bd08 100644
--- a/drivers/clk/pxa/clk-pxa3xx.c
+++ b/drivers/clk/pxa/clk-pxa3xx.c
@@ -286,9 +286,10 @@ static void __init pxa3xx_register_plls(void)
 	clk_register_fixed_rate(NULL, "osc_13mhz", NULL,
 				CLK_GET_RATE_NOCACHE,
 				13 * MHz);
-	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
-				CLK_GET_RATE_NOCACHE,
-				32768);
+	clkdev_pxa_register(CLK_OSC32k768, "osc_32_768khz", NULL,
+			    clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
+						    CLK_GET_RATE_NOCACHE,
+						    32768));
 	clk_register_fixed_rate(NULL, "ring_osc_120mhz", NULL,
 				CLK_GET_RATE_NOCACHE,
 				120 * MHz);
diff --git a/include/dt-bindings/clock/pxa-clock.h b/include/dt-bindings/clock/pxa-clock.h
index e65803b1dc7e..0b0fd2b01538 100644
--- a/include/dt-bindings/clock/pxa-clock.h
+++ b/include/dt-bindings/clock/pxa-clock.h
@@ -72,6 +72,7 @@
 #define CLK_USIM 58
 #define CLK_USIM1 59
 #define CLK_USMI0 60
-#define CLK_MAX 61
+#define CLK_OSC32k768 61
+#define CLK_MAX 62
 
 #endif
-- 
cgit v1.2.3


From 4d94f95d30c8fbfe86068e9abed110974d697cf5 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Fri, 6 Jul 2018 22:50:32 +0200
Subject: Bluetooth: Use extended LE Connection if supported

This implements extended LE craete connection and enhanced
LE conn complete event if the controller supports.

For now it is as good as legacy LE connection and event as
no new features in the extended connection is handled.

< HCI Command: LE Extended Create Connection (0x08|0x0043) plen 26
        Filter policy: White list is not used (0x00)
        Own address type: Public (0x00)
        Peer address type: Random (0x01)
        Peer address: DB:7E:2E:1D:85:E8 (Static)
        Initiating PHYs: 0x01
        Entry 0: LE 1M
          Scan interval: 60.000 msec (0x0060)
          Scan window: 60.000 msec (0x0060)
          Min connection interval: 50.00 msec (0x0028)
          Max connection interval: 70.00 msec (0x0038)
          Connection latency: 0 (0x0000)
          Supervision timeout: 420 msec (0x002a)
          Min connection length: 0.000 msec (0x0000)
          Max connection length: 0.000 msec (0x0000)
> HCI Event: Command Status (0x0f) plen 4
      LE Extended Create Connection (0x08|0x0043) ncmd 2
        Status: Success (0x00)
> HCI Event: LE Meta Event (0x3e) plen 31
      LE Enhanced Connection Complete (0x0a)
        Status: Success (0x00)
        Handle: 3585
        Role: Master (0x00)
        Peer address type: Random (0x01)
        Peer address: DB:7E:2E:1D:85:E8 (Static)
        Local resolvable private address: 00:00:00:00:00:00 (Non-Resolvable)
        Peer resolvable private address: 00:00:00:00:00:00 (Non-Resolvable)
        Connection interval: 67.50 msec (0x0036)
        Connection latency: 0 (0x0000)
        Supervision timeout: 420 msec (0x002a)
        Master clock accuracy: 0x00
@ MGMT Event: Device Connected (0x000b) plen 40
        LE Address: DB:7E:2E:1D:85:E8 (Static)
        Flags: 0x00000000
        Data length: 27
        Name (complete): Designer Mouse
        Appearance: Mouse (0x03c2)
        Flags: 0x05
          LE Limited Discoverable Mode
          BR/EDR Not Supported
        16-bit Service UUIDs (complete): 1 entry
          Human Interface Device (0x1812)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 36 ++++++++++++++++++++
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_conn.c         | 72 ++++++++++++++++++++++++++++++----------
 net/bluetooth/hci_core.c         |  8 +++++
 net/bluetooth/hci_event.c        | 47 ++++++++++++++++++++++++++
 5 files changed, 147 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0ec51eb14810..73e48be5bbb3 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1538,6 +1538,27 @@ struct hci_cp_le_set_ext_scan_enable {
 	__le16  period;
 } __packed;
 
+#define HCI_OP_LE_EXT_CREATE_CONN    0x2043
+struct hci_cp_le_ext_create_conn {
+	__u8      filter_policy;
+	__u8      own_addr_type;
+	__u8      peer_addr_type;
+	bdaddr_t  peer_addr;
+	__u8      phys;
+	__u8      data[0];
+} __packed;
+
+struct hci_cp_le_ext_conn_param {
+	__le16 scan_interval;
+	__le16 scan_window;
+	__le16 conn_interval_min;
+	__le16 conn_interval_max;
+	__le16 conn_latency;
+	__le16 supervision_timeout;
+	__le16 min_ce_len;
+	__le16 max_ce_len;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
@@ -2015,6 +2036,21 @@ struct hci_ev_le_ext_adv_report {
 	__u8	 data[0];
 } __packed;
 
+#define HCI_EV_LE_ENHANCED_CONN_COMPLETE    0x0a
+struct hci_ev_le_enh_conn_complete {
+	__u8      status;
+	__le16    handle;
+	__u8      role;
+	__u8      bdaddr_type;
+	bdaddr_t  bdaddr;
+	bdaddr_t  local_rpa;
+	bdaddr_t  peer_rpa;
+	__le16    interval;
+	__le16    latency;
+	__le16    supervision_timeout;
+	__u8      clk_accurancy;
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index cc0bde74dd45..a74453571264 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1161,6 +1161,8 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 /* Use ext scanning if set ext scan param and ext scan enable is supported */
 #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
 			   ((dev)->commands[37] & 0x40))
+/* Use ext create connection if command is supported */
+#define use_ext_conn(dev) ((dev)->commands[37] & 0x80)
 
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 45ff5dc124cc..cc967ca67962 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -752,7 +752,6 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
 				       struct hci_conn *conn,
 				       bdaddr_t *direct_rpa)
 {
-	struct hci_cp_le_create_conn cp;
 	struct hci_dev *hdev = conn->hdev;
 	u8 own_addr_type;
 
@@ -775,25 +774,62 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
 			return;
 	}
 
-	memset(&cp, 0, sizeof(cp));
+	if (use_ext_conn(hdev)) {
+		struct hci_cp_le_ext_create_conn *cp;
+		struct hci_cp_le_ext_conn_param *p;
+		/* As of now only LE 1M is supported */
+		u8 data[sizeof(*cp) + sizeof(*p) * 1];
 
-	/* Set window to be the same value as the interval to enable
-	 * continuous scanning.
-	 */
-	cp.scan_interval = cpu_to_le16(hdev->le_scan_interval);
-	cp.scan_window = cp.scan_interval;
+		cp = (void *) data;
+		p = (void *) cp->data;
 
-	bacpy(&cp.peer_addr, &conn->dst);
-	cp.peer_addr_type = conn->dst_type;
-	cp.own_address_type = own_addr_type;
-	cp.conn_interval_min = cpu_to_le16(conn->le_conn_min_interval);
-	cp.conn_interval_max = cpu_to_le16(conn->le_conn_max_interval);
-	cp.conn_latency = cpu_to_le16(conn->le_conn_latency);
-	cp.supervision_timeout = cpu_to_le16(conn->le_supv_timeout);
-	cp.min_ce_len = cpu_to_le16(0x0000);
-	cp.max_ce_len = cpu_to_le16(0x0000);
-
-	hci_req_add(req, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
+		memset(cp, 0, sizeof(*cp));
+
+		bacpy(&cp->peer_addr, &conn->dst);
+		cp->peer_addr_type = conn->dst_type;
+		cp->own_addr_type = own_addr_type;
+		cp->phys = LE_SCAN_PHY_1M;
+
+		memset(p, 0, sizeof(*p));
+
+		/* Set window to be the same value as the interval to enable
+		 * continuous scanning.
+		 */
+
+		p->scan_interval = cpu_to_le16(hdev->le_scan_interval);
+		p->scan_window = p->scan_interval;
+		p->conn_interval_min = cpu_to_le16(conn->le_conn_min_interval);
+		p->conn_interval_max = cpu_to_le16(conn->le_conn_max_interval);
+		p->conn_latency = cpu_to_le16(conn->le_conn_latency);
+		p->supervision_timeout = cpu_to_le16(conn->le_supv_timeout);
+		p->min_ce_len = cpu_to_le16(0x0000);
+		p->max_ce_len = cpu_to_le16(0x0000);
+
+		hci_req_add(req, HCI_OP_LE_EXT_CREATE_CONN, sizeof(data), data);
+
+	} else {
+		struct hci_cp_le_create_conn cp;
+
+		memset(&cp, 0, sizeof(cp));
+
+		/* Set window to be the same value as the interval to enable
+		 * continuous scanning.
+		 */
+		cp.scan_interval = cpu_to_le16(hdev->le_scan_interval);
+		cp.scan_window = cp.scan_interval;
+
+		bacpy(&cp.peer_addr, &conn->dst);
+		cp.peer_addr_type = conn->dst_type;
+		cp.own_address_type = own_addr_type;
+		cp.conn_interval_min = cpu_to_le16(conn->le_conn_min_interval);
+		cp.conn_interval_max = cpu_to_le16(conn->le_conn_max_interval);
+		cp.conn_latency = cpu_to_le16(conn->le_conn_latency);
+		cp.supervision_timeout = cpu_to_le16(conn->le_supv_timeout);
+		cp.min_ce_len = cpu_to_le16(0x0000);
+		cp.max_ce_len = cpu_to_le16(0x0000);
+
+		hci_req_add(req, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
+	}
 
 	conn->state = BT_CONNECT;
 	clear_bit(HCI_CONN_SCANNING, &conn->flags);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e3ec2d782762..f5c21004186c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -704,6 +704,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 						 * Report
 						 */
 
+		/* If the controller supports the LE Extended Create Connection
+		 * command, enable the corresponding event.
+		 */
+		if (use_ext_conn(hdev))
+			events[1] |= 0x02;      /* LE Enhanced Connection
+						 * Complete
+						 */
+
 		hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events),
 			    events);
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 14e42e157de9..68192152c23b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2031,6 +2031,31 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cs_le_ext_create_conn(struct hci_dev *hdev, u8 status)
+{
+	struct hci_cp_le_ext_create_conn *cp;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	/* All connection failure handling is taken care of by the
+	 * hci_le_conn_failed function which is triggered by the HCI
+	 * request completion callbacks used for connecting.
+	 */
+	if (status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_LE_EXT_CREATE_CONN);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	cs_le_create_conn(hdev, &cp->peer_addr, cp->peer_addr_type,
+			  cp->own_addr_type, cp->filter_policy);
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cs_le_read_remote_features(struct hci_dev *hdev, u8 status)
 {
 	struct hci_cp_le_read_remote_features *cp;
@@ -3233,6 +3258,10 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cs_le_start_enc(hdev, ev->status);
 		break;
 
+	case HCI_OP_LE_EXT_CREATE_CONN:
+		hci_cs_le_ext_create_conn(hdev, ev->status);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode);
 		break;
@@ -4733,6 +4762,20 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			     le16_to_cpu(ev->supervision_timeout));
 }
 
+static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev,
+					 struct sk_buff *skb)
+{
+	struct hci_ev_le_enh_conn_complete *ev = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
+
+	le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
+			     ev->role, le16_to_cpu(ev->handle),
+			     le16_to_cpu(ev->interval),
+			     le16_to_cpu(ev->latency),
+			     le16_to_cpu(ev->supervision_timeout));
+}
+
 static void hci_le_conn_update_complete_evt(struct hci_dev *hdev,
 					    struct sk_buff *skb)
 {
@@ -5352,6 +5395,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_le_ext_adv_report_evt(hdev, skb);
 		break;
 
+	case HCI_EV_LE_ENHANCED_CONN_COMPLETE:
+		hci_le_enh_conn_complete_evt(hdev, skb);
+		break;
+
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From cd88259a7215e0737f8ef2c2842f41922ae87d8d Mon Sep 17 00:00:00 2001
From: Lei YU <mine260309@gmail.com>
Date: Tue, 26 Jun 2018 09:55:25 +0800
Subject: clk: aspeed: Fix SDCLK name

The SDCLK was named SDCLKCLK, and no one has used this yet.
Fix it.

Signed-off-by: Lei YU <mine260309@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-aspeed.c                 | 2 +-
 include/dt-bindings/clock/aspeed-clock.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/clk-aspeed.c b/drivers/clk/clk-aspeed.c
index 38b366b00c57..f49c6842c604 100644
--- a/drivers/clk/clk-aspeed.c
+++ b/drivers/clk/clk-aspeed.c
@@ -109,7 +109,7 @@ static const struct aspeed_gate_data aspeed_gates[] = {
 	[ASPEED_CLK_GATE_RSACLK] =	{ 24, -1, "rsaclk-gate",	NULL,	0 }, /* RSA */
 	[ASPEED_CLK_GATE_UART3CLK] =	{ 25, -1, "uart3clk-gate",	"uart",	0 }, /* UART3 */
 	[ASPEED_CLK_GATE_UART4CLK] =	{ 26, -1, "uart4clk-gate",	"uart",	0 }, /* UART4 */
-	[ASPEED_CLK_GATE_SDCLKCLK] =	{ 27, 16, "sdclk-gate",		NULL,	0 }, /* SDIO/SD */
+	[ASPEED_CLK_GATE_SDCLK] =	{ 27, 16, "sdclk-gate",		NULL,	0 }, /* SDIO/SD */
 	[ASPEED_CLK_GATE_LHCCLK] =	{ 28, -1, "lhclk-gate",		"lhclk", 0 }, /* LPC master/LPC+ */
 };
 
diff --git a/include/dt-bindings/clock/aspeed-clock.h b/include/dt-bindings/clock/aspeed-clock.h
index 44761849fcbe..f43738607d77 100644
--- a/include/dt-bindings/clock/aspeed-clock.h
+++ b/include/dt-bindings/clock/aspeed-clock.h
@@ -25,7 +25,7 @@
 #define ASPEED_CLK_GATE_RSACLK		19
 #define ASPEED_CLK_GATE_UART3CLK	20
 #define ASPEED_CLK_GATE_UART4CLK	21
-#define ASPEED_CLK_GATE_SDCLKCLK	22
+#define ASPEED_CLK_GATE_SDCLK		22
 #define ASPEED_CLK_GATE_LHCCLK		23
 #define ASPEED_CLK_HPLL			24
 #define ASPEED_CLK_AHB			25
-- 
cgit v1.2.3


From 9d8108f9f3cb996a9677223fcd1feeb1f3e05566 Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Fri, 22 Jun 2018 14:32:33 +0800
Subject: clk: imx6sll: add GPIO LPCGs

According to Reference Manual Rev.0, 06/2017, there are GPIO LPCGs
defined in CCM CCGRs, add them into clock tree.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/imx/clk-imx6sll.c             | 6 ++++++
 include/dt-bindings/clock/imx6sll-clock.h | 9 ++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/imx/clk-imx6sll.c b/drivers/clk/imx/clk-imx6sll.c
index 3651c77fbabe..627064597af9 100644
--- a/drivers/clk/imx/clk-imx6sll.c
+++ b/drivers/clk/imx/clk-imx6sll.c
@@ -253,6 +253,7 @@ static void __init imx6sll_clocks_init(struct device_node *ccm_node)
 	clks[IMX6SLL_CLK_DCP]		= imx_clk_gate2("dcp", "ahb", base + 0x68, 10);
 	clks[IMX6SLL_CLK_UART2_IPG]	= imx_clk_gate2("uart2_ipg", "ipg", base + 0x68, 28);
 	clks[IMX6SLL_CLK_UART2_SERIAL]	= imx_clk_gate2("uart2_serial",	"uart_podf", base + 0x68, 28);
+	clks[IMX6SLL_CLK_GPIO2]		= imx_clk_gate2("gpio2", "ipg", base + 0x68, 30);
 
 	/* CCGR1 */
 	clks[IMX6SLL_CLK_ECSPI1]	= imx_clk_gate2("ecspi1",	"ecspi_podf", base + 0x6c, 0);
@@ -267,13 +268,17 @@ static void __init imx6sll_clocks_init(struct device_node *ccm_node)
 	clks[IMX6SLL_CLK_GPT_SERIAL]	= imx_clk_gate2("gpt1_serial",	"perclk", base + 0x6c, 22);
 	clks[IMX6SLL_CLK_UART4_IPG]	= imx_clk_gate2("uart4_ipg",	"ipg", base + 0x6c, 24);
 	clks[IMX6SLL_CLK_UART4_SERIAL]	= imx_clk_gate2("uart4_serail",	"uart_podf", base + 0x6c, 24);
+	clks[IMX6SLL_CLK_GPIO1]		= imx_clk_gate2("gpio1",	"ipg", base + 0x6c, 26);
+	clks[IMX6SLL_CLK_GPIO5]		= imx_clk_gate2("gpio5",	"ipg", base + 0x6c, 30);
 
 	/* CCGR2 */
+	clks[IMX6SLL_CLK_GPIO6]		= imx_clk_gate2("gpio6",	"ipg",    base + 0x70, 0);
 	clks[IMX6SLL_CLK_CSI]		= imx_clk_gate2("csi",		"axi",    base + 0x70,	2);
 	clks[IMX6SLL_CLK_I2C1]		= imx_clk_gate2("i2c1",		"perclk", base + 0x70,	6);
 	clks[IMX6SLL_CLK_I2C2]		= imx_clk_gate2("i2c2",		"perclk", base + 0x70,	8);
 	clks[IMX6SLL_CLK_I2C3]		= imx_clk_gate2("i2c3",		"perclk", base + 0x70,	10);
 	clks[IMX6SLL_CLK_OCOTP]		= imx_clk_gate2("ocotp",	"ipg",    base + 0x70,	12);
+	clks[IMX6SLL_CLK_GPIO3]		= imx_clk_gate2("gpio3",	"ipg",    base + 0x70,	26);
 	clks[IMX6SLL_CLK_LCDIF_APB]	= imx_clk_gate2("lcdif_apb",	"axi",    base + 0x70,	28);
 	clks[IMX6SLL_CLK_PXP]		= imx_clk_gate2("pxp",		"axi",    base + 0x70,	30);
 
@@ -283,6 +288,7 @@ static void __init imx6sll_clocks_init(struct device_node *ccm_node)
 	clks[IMX6SLL_CLK_EPDC_AXI]	= imx_clk_gate2("epdc_aclk",	"axi",		base + 0x74, 4);
 	clks[IMX6SLL_CLK_EPDC_PIX]	= imx_clk_gate2("epdc_pix",	"epdc_podf",	base + 0x74, 4);
 	clks[IMX6SLL_CLK_LCDIF_PIX]	= imx_clk_gate2("lcdif_pix",	"lcdif_podf",	base + 0x74, 10);
+	clks[IMX6SLL_CLK_GPIO4]		= imx_clk_gate2("gpio4",	"ipg",		base + 0x74, 12);
 	clks[IMX6SLL_CLK_WDOG1]		= imx_clk_gate2("wdog1",	"ipg",		base + 0x74, 16);
 	clks[IMX6SLL_CLK_MMDC_P0_FAST]	= imx_clk_gate_flags("mmdc_p0_fast", "mmdc_podf",  base + 0x74,	20, CLK_IS_CRITICAL);
 	clks[IMX6SLL_CLK_MMDC_P0_IPG]	= imx_clk_gate2_flags("mmdc_p0_ipg", "ipg",	   base + 0x74,	24, CLK_IS_CRITICAL);
diff --git a/include/dt-bindings/clock/imx6sll-clock.h b/include/dt-bindings/clock/imx6sll-clock.h
index 151111e68f4f..1036475f997d 100644
--- a/include/dt-bindings/clock/imx6sll-clock.h
+++ b/include/dt-bindings/clock/imx6sll-clock.h
@@ -197,6 +197,13 @@
 #define IMX6SLL_CLK_EXTERN_AUDIO_PODF   171
 #define IMX6SLL_CLK_EXTERN_AUDIO        172
 
-#define IMX6SLL_CLK_END			173
+#define IMX6SLL_CLK_GPIO1               173
+#define IMX6SLL_CLK_GPIO2               174
+#define IMX6SLL_CLK_GPIO3               175
+#define IMX6SLL_CLK_GPIO4               176
+#define IMX6SLL_CLK_GPIO5               177
+#define IMX6SLL_CLK_GPIO6               178
+
+#define IMX6SLL_CLK_END			179
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6SLL_H */
-- 
cgit v1.2.3


From 5740c99e9d30b81fcc478797e7215c61e241f44e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 6 Jul 2018 23:57:03 +0200
Subject: vfs: dedupe: return int

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/btrfs/ctree.h   |  4 ++--
 fs/btrfs/ioctl.c   | 10 +++-------
 fs/ocfs2/file.c    | 17 ++++++-----------
 fs/read_write.c    |  4 ++--
 fs/xfs/xfs_file.c  | 19 ++-----------------
 include/linux/fs.h |  2 +-
 6 files changed, 16 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 118346aceea9..1c7c13334423 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3247,8 +3247,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
 				struct btrfs_ioctl_space_info *space);
 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
-ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
-			   struct file *dst_file, u64 dst_loff);
+int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+			    struct file *dst_file, u64 dst_loff);
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c2837a32d689..94dc8e6c44ce 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3600,13 +3600,12 @@ out_free:
 	return ret;
 }
 
-ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
-				struct file *dst_file, u64 dst_loff)
+int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+			    struct file *dst_file, u64 dst_loff)
 {
 	struct inode *src = file_inode(src_file);
 	struct inode *dst = file_inode(dst_file);
 	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-	ssize_t res;
 
 	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
 		/*
@@ -3617,10 +3616,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 		return -EINVAL;
 	}
 
-	res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
-	if (res)
-		return res;
-	return olen;
+	return btrfs_extent_same(src, loff, olen, dst, dst_loff);
 }
 
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 255f758af03a..f96f018463f7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2537,19 +2537,14 @@ static int ocfs2_file_clone_range(struct file *file_in,
 					 len, false);
 }
 
-static ssize_t ocfs2_file_dedupe_range(struct file *src_file,
-				       u64 loff,
-				       u64 len,
-				       struct file *dst_file,
-				       u64 dst_loff)
+static int ocfs2_file_dedupe_range(struct file *src_file,
+				   u64 loff,
+				   u64 len,
+				   struct file *dst_file,
+				   u64 dst_loff)
 {
-	int error;
-
-	error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+	return ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
 					  len, true);
-	if (error)
-		return error;
-	return len;
 }
 
 const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/read_write.c b/fs/read_write.c
index f43bb12b4759..fa64e51ef4cf 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1976,7 +1976,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 	u16 count = same->dest_count;
 	struct file *dst_file;
 	loff_t dst_off;
-	ssize_t deduped;
+	int deduped;
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EINVAL;
@@ -2056,7 +2056,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 			else if (deduped < 0)
 				info->status = deduped;
 			else
-				info->bytes_deduped += deduped;
+				info->bytes_deduped += len;
 		}
 
 next_file:
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a3e7767a5715..547ef7e8aec1 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -933,7 +933,7 @@ xfs_file_clone_range(
 				     len, false);
 }
 
-STATIC ssize_t
+STATIC int
 xfs_file_dedupe_range(
 	struct file	*src_file,
 	u64		loff,
@@ -941,23 +941,8 @@ xfs_file_dedupe_range(
 	struct file	*dst_file,
 	u64		dst_loff)
 {
-	struct inode	*srci = file_inode(src_file);
-	u64		max_dedupe;
-	int		error;
-
-	/*
-	 * Since we have to read all these pages in to compare them, cut
-	 * it off at MAX_RW_COUNT/2 rounded down to the nearest block.
-	 * That means we won't do more than MAX_RW_COUNT IO per request.
-	 */
-	max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
-	if (len > max_dedupe)
-		len = max_dedupe;
-	error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+	return xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
 				     len, true);
-	if (error)
-		return error;
-	return len;
 }
 
 STATIC int
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..b81c4b7e339f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1749,7 +1749,7 @@ struct file_operations {
 			loff_t, size_t, unsigned int);
 	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
-	ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
+	int (*dedupe_file_range)(struct file *, u64, u64, struct file *,
 			u64);
 } __randomize_layout;
 
-- 
cgit v1.2.3


From 87eb5eb2423213ac0e7315ce5d275f1ff80e0263 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 6 Jul 2018 23:57:03 +0200
Subject: vfs: dedupe: rationalize args

Clean up f_op->dedupe_file_range() interface.

1) Use loff_t for offsets and length instead of u64
2) Order the arguments the same way as {copy|clone}_file_range().

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/btrfs/ctree.h   |  5 +++--
 fs/btrfs/ioctl.c   |  7 ++++---
 fs/ocfs2/file.c    | 12 ++++++------
 fs/read_write.c    |  4 ++--
 fs/xfs/xfs_file.c  | 12 ++++++------
 include/linux/fs.h |  2 +-
 6 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1c7c13334423..d9d924017dfb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3247,8 +3247,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
 				struct btrfs_ioctl_space_info *space);
 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
-int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
-			    struct file *dst_file, u64 dst_loff);
+int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
+			    struct file *dst_file, loff_t dst_loff,
+			    u64 olen);
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 94dc8e6c44ce..755c9a306321 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3600,8 +3600,9 @@ out_free:
 	return ret;
 }
 
-int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
-			    struct file *dst_file, u64 dst_loff)
+int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
+			    struct file *dst_file, loff_t dst_loff,
+			    u64 olen)
 {
 	struct inode *src = file_inode(src_file);
 	struct inode *dst = file_inode(dst_file);
@@ -3616,7 +3617,7 @@ int btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 		return -EINVAL;
 	}
 
-	return btrfs_extent_same(src, loff, olen, dst, dst_loff);
+	return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
 }
 
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f96f018463f7..9fa35cb6f6e0 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2537,13 +2537,13 @@ static int ocfs2_file_clone_range(struct file *file_in,
 					 len, false);
 }
 
-static int ocfs2_file_dedupe_range(struct file *src_file,
-				   u64 loff,
-				   u64 len,
-				   struct file *dst_file,
-				   u64 dst_loff)
+static int ocfs2_file_dedupe_range(struct file *file_in,
+				   loff_t pos_in,
+				   struct file *file_out,
+				   loff_t pos_out,
+				   u64 len)
 {
-	return ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 					  len, true);
 }
 
diff --git a/fs/read_write.c b/fs/read_write.c
index fa64e51ef4cf..c31794f92c2c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2049,8 +2049,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 			info->status = -EINVAL;
 		} else {
 			deduped = dst_file->f_op->dedupe_file_range(file, off,
-							len, dst_file,
-							info->dest_offset);
+							dst_file,
+							info->dest_offset, len);
 			if (deduped == -EBADE)
 				info->status = FILE_DEDUPE_RANGE_DIFFERS;
 			else if (deduped < 0)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 547ef7e8aec1..0f40ba54d83f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -935,13 +935,13 @@ xfs_file_clone_range(
 
 STATIC int
 xfs_file_dedupe_range(
-	struct file	*src_file,
-	u64		loff,
-	u64		len,
-	struct file	*dst_file,
-	u64		dst_loff)
+	struct file	*file_in,
+	loff_t		pos_in,
+	struct file	*file_out,
+	loff_t		pos_out,
+	u64		len)
 {
-	return xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
+	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 				     len, true);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b81c4b7e339f..a8fee2f44981 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1749,7 +1749,7 @@ struct file_operations {
 			loff_t, size_t, unsigned int);
 	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
-	int (*dedupe_file_range)(struct file *, u64, u64, struct file *,
+	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
 } __randomize_layout;
 
-- 
cgit v1.2.3


From 06ae48269d1e0324d806fca30fe77112f4a4a14a Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Fri, 6 Jul 2018 15:13:18 -0700
Subject: lib: reciprocal_div: implement the improved algorithm on the paper
 mentioned

The new added "reciprocal_value_adv" implements the advanced version of the
algorithm described in Figure 4.2 of the paper except when
"divisor > (1U << 31)" whose ceil(log2(d)) result will be 32 which then
requires u128 divide on host. The exception case could be easily handled
before calling "reciprocal_value_adv".

The advanced version requires more complex calculation to get the
reciprocal multiplier and other control variables, but then could reduce
the required emulation operations.

It makes no sense to use this advanced version for host divide emulation,
those extra complexities for calculating multiplier etc could completely
waive our saving on emulation operations.

However, it makes sense to use it for JIT divide code generation (for
example eBPF JIT backends) for which we are willing to trade performance of
JITed code with that of host. As shown by the following pseudo code, the
required emulation operations could go down from 6 (the basic version) to 3
or 4.

To use the result of "reciprocal_value_adv", suppose we want to calculate
n/d, the C-style pseudo code will be the following, it could be easily
changed to real code generation for other JIT targets.

  struct reciprocal_value_adv rvalue;
  u8 pre_shift, exp;

  // handle exception case.
  if (d >= (1U << 31)) {
    result = n >= d;
    return;
  }
  rvalue = reciprocal_value_adv(d, 32)
  exp = rvalue.exp;
  if (rvalue.is_wide_m && !(d & 1)) {
    // floor(log2(d & (2^32 -d)))
    pre_shift = fls(d & -d) - 1;
    rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
  } else {
    pre_shift = 0;
  }

  // code generation starts.
  if (imm == 1U << exp) {
    result = n >> exp;
  } else if (rvalue.is_wide_m) {
    // pre_shift must be zero when reached here.
    t = (n * rvalue.m) >> 32;
    result = n - t;
    result >>= 1;
    result += t;
    result >>= rvalue.sh - 1;
  } else {
    if (pre_shift)
      result = n >> pre_shift;
    result = ((u64)result * rvalue.m) >> 32;
    result >>= rvalue.sh;
  }

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/reciprocal_div.h | 68 ++++++++++++++++++++++++++++++++++++++++++
 lib/reciprocal_div.c           | 41 +++++++++++++++++++++++++
 2 files changed, 109 insertions(+)

(limited to 'include')

diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h
index e031e9f2f9d8..585ce89c0f33 100644
--- a/include/linux/reciprocal_div.h
+++ b/include/linux/reciprocal_div.h
@@ -25,6 +25,9 @@ struct reciprocal_value {
 	u8 sh1, sh2;
 };
 
+/* "reciprocal_value" and "reciprocal_divide" together implement the basic
+ * version of the algorithm described in Figure 4.1 of the paper.
+ */
 struct reciprocal_value reciprocal_value(u32 d);
 
 static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
@@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
 	return (t + ((a - t) >> R.sh1)) >> R.sh2;
 }
 
+struct reciprocal_value_adv {
+	u32 m;
+	u8 sh, exp;
+	bool is_wide_m;
+};
+
+/* "reciprocal_value_adv" implements the advanced version of the algorithm
+ * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
+ * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
+ * exception case could be easily handled before calling "reciprocal_value_adv".
+ *
+ * The advanced version requires more complex calculation to get the reciprocal
+ * multiplier and other control variables, but then could reduce the required
+ * emulation operations.
+ *
+ * It makes no sense to use this advanced version for host divide emulation,
+ * those extra complexities for calculating multiplier etc could completely
+ * waive our saving on emulation operations.
+ *
+ * However, it makes sense to use it for JIT divide code generation for which
+ * we are willing to trade performance of JITed code with that of host. As shown
+ * by the following pseudo code, the required emulation operations could go down
+ * from 6 (the basic version) to 3 or 4.
+ *
+ * To use the result of "reciprocal_value_adv", suppose we want to calculate
+ * n/d, the pseudo C code will be:
+ *
+ *   struct reciprocal_value_adv rvalue;
+ *   u8 pre_shift, exp;
+ *
+ *   // handle exception case.
+ *   if (d >= (1U << 31)) {
+ *     result = n >= d;
+ *     return;
+ *   }
+ *
+ *   rvalue = reciprocal_value_adv(d, 32)
+ *   exp = rvalue.exp;
+ *   if (rvalue.is_wide_m && !(d & 1)) {
+ *     // floor(log2(d & (2^32 -d)))
+ *     pre_shift = fls(d & -d) - 1;
+ *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
+ *   } else {
+ *     pre_shift = 0;
+ *   }
+ *
+ *   // code generation starts.
+ *   if (imm == 1U << exp) {
+ *     result = n >> exp;
+ *   } else if (rvalue.is_wide_m) {
+ *     // pre_shift must be zero when reached here.
+ *     t = (n * rvalue.m) >> 32;
+ *     result = n - t;
+ *     result >>= 1;
+ *     result += t;
+ *     result >>= rvalue.sh - 1;
+ *   } else {
+ *     if (pre_shift)
+ *       result = n >> pre_shift;
+ *     result = ((u64)result * rvalue.m) >> 32;
+ *     result >>= rvalue.sh;
+ *   }
+ */
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
+
 #endif /* _LINUX_RECIPROCAL_DIV_H */
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index fcb4ce682c6f..bf043258fa00 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
@@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d)
 	return R;
 }
 EXPORT_SYMBOL(reciprocal_value);
+
+struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec)
+{
+	struct reciprocal_value_adv R;
+	u32 l, post_shift;
+	u64 mhigh, mlow;
+
+	/* ceil(log2(d)) */
+	l = fls(d - 1);
+	/* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to
+	 * be handled before calling "reciprocal_value_adv", please see the
+	 * comment at include/linux/reciprocal_div.h.
+	 */
+	WARN(l == 32,
+	     "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor",
+	     d, __func__);
+	post_shift = l;
+	mlow = 1ULL << (32 + l);
+	do_div(mlow, d);
+	mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec));
+	do_div(mhigh, d);
+
+	for (; post_shift > 0; post_shift--) {
+		u64 lo = mlow >> 1, hi = mhigh >> 1;
+
+		if (lo >= hi)
+			break;
+
+		mlow = lo;
+		mhigh = hi;
+	}
+
+	R.m = (u32)mhigh;
+	R.sh = post_shift;
+	R.exp = l;
+	R.is_wide_m = mhigh > U32_MAX;
+
+	return R;
+}
+EXPORT_SYMBOL(reciprocal_value_adv);
-- 
cgit v1.2.3


From 6c79d12e945e85556674a04cde13657a5d7943da Mon Sep 17 00:00:00 2001
From: Taniya Das <tdas@codeaurora.org>
Date: Sat, 23 Jun 2018 19:49:26 +0530
Subject: dt-bindings: clock: Introduce QCOM Display clock bindings

Add device tree bindings for display clock controller for Qualcomm
Technology Inc's SDM845 SoCs.

Signed-off-by: Taniya Das <tdas@codeaurora.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/qcom,dispcc.txt      | 19 +++++++++
 include/dt-bindings/clock/qcom,dispcc-sdm845.h     | 45 ++++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,dispcc.txt
 create mode 100644 include/dt-bindings/clock/qcom,dispcc-sdm845.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc.txt b/Documentation/devicetree/bindings/clock/qcom,dispcc.txt
new file mode 100644
index 000000000000..d639e18d0b85
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,dispcc.txt
@@ -0,0 +1,19 @@
+Qualcomm Technologies, Inc. Display Clock Controller Binding
+------------------------------------------------------------
+
+Required properties :
+
+- compatible : shall contain "qcom,sdm845-dispcc"
+- reg : shall contain base register location and length.
+- #clock-cells : from common clock binding, shall contain 1.
+- #reset-cells : from common reset binding, shall contain 1.
+- #power-domain-cells : from generic power domain binding, shall contain 1.
+
+Example:
+	dispcc: clock-controller@af00000 {
+		compatible = "qcom,sdm845-dispcc";
+		reg = <0xaf00000 0x100000>;
+		#clock-cells = <1>;
+		#reset-cells = <1>;
+		#power-domain-cells = <1>;
+	};
diff --git a/include/dt-bindings/clock/qcom,dispcc-sdm845.h b/include/dt-bindings/clock/qcom,dispcc-sdm845.h
new file mode 100644
index 000000000000..11eed4bc9646
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,dispcc-sdm845.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SDM_DISP_CC_SDM845_H
+#define _DT_BINDINGS_CLK_SDM_DISP_CC_SDM845_H
+
+/* DISP_CC clock registers */
+#define DISP_CC_MDSS_AHB_CLK					0
+#define DISP_CC_MDSS_AXI_CLK					1
+#define DISP_CC_MDSS_BYTE0_CLK					2
+#define DISP_CC_MDSS_BYTE0_CLK_SRC				3
+#define DISP_CC_MDSS_BYTE0_INTF_CLK				4
+#define DISP_CC_MDSS_BYTE1_CLK					5
+#define DISP_CC_MDSS_BYTE1_CLK_SRC				6
+#define DISP_CC_MDSS_BYTE1_INTF_CLK				7
+#define DISP_CC_MDSS_ESC0_CLK					8
+#define DISP_CC_MDSS_ESC0_CLK_SRC				9
+#define DISP_CC_MDSS_ESC1_CLK					10
+#define DISP_CC_MDSS_ESC1_CLK_SRC				11
+#define DISP_CC_MDSS_MDP_CLK					12
+#define DISP_CC_MDSS_MDP_CLK_SRC				13
+#define DISP_CC_MDSS_MDP_LUT_CLK				14
+#define DISP_CC_MDSS_PCLK0_CLK					15
+#define DISP_CC_MDSS_PCLK0_CLK_SRC				16
+#define DISP_CC_MDSS_PCLK1_CLK					17
+#define DISP_CC_MDSS_PCLK1_CLK_SRC				18
+#define DISP_CC_MDSS_ROT_CLK					19
+#define DISP_CC_MDSS_ROT_CLK_SRC				20
+#define DISP_CC_MDSS_RSCC_AHB_CLK				21
+#define DISP_CC_MDSS_RSCC_VSYNC_CLK				22
+#define DISP_CC_MDSS_VSYNC_CLK					23
+#define DISP_CC_MDSS_VSYNC_CLK_SRC				24
+#define DISP_CC_PLL0						25
+#define DISP_CC_MDSS_BYTE0_DIV_CLK_SRC				26
+#define DISP_CC_MDSS_BYTE1_DIV_CLK_SRC				27
+
+/* DISP_CC Reset */
+#define DISP_CC_MDSS_RSCC_BCR					0
+
+/* DISP_CC GDSCR */
+#define MDSS_GDSC						0
+
+#endif
-- 
cgit v1.2.3


From 351782067b6be81879b0af0daf7bd3acbb32d986 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:54 -0400
Subject: ipv4: ipcm_cookie initializers

Initialize the cookie in one location to reduce code duplication and
avoid bugs from inconsistent initialization, such as that fixed in
commit 9887cba19978 ("ip: limit use of gso_size to udp").

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h     | 15 +++++++++++++++
 net/ipv4/icmp.c      | 11 ++---------
 net/ipv4/ip_output.c |  6 +-----
 net/ipv4/ping.c      |  9 +--------
 net/ipv4/raw.c       |  9 +--------
 net/ipv4/udp.c       | 10 +---------
 6 files changed, 21 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 99d1b835d2aa..6db23bf1e5eb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -79,6 +79,21 @@ struct ipcm_cookie {
 	__u16			gso_size;
 };
 
+static inline void ipcm_init(struct ipcm_cookie *ipcm)
+{
+	*ipcm = (struct ipcm_cookie) { .tos = -1 };
+}
+
+static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
+				const struct inet_sock *inet)
+{
+	ipcm_init(ipcm);
+
+	ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+	ipcm->oif = inet->sk.sk_bound_dev_if;
+	ipcm->addr = inet->inet_saddr;
+}
+
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
 #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 937239afd68d..695979b7ef6d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -429,15 +429,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 
 	icmp_param->data.icmph.checksum = 0;
 
+	ipcm_init(&ipc);
 	inet->tos = ip_hdr(skb)->tos;
 	sk->sk_mark = mark;
 	daddr = ipc.addr = ip_hdr(skb)->saddr;
 	saddr = fib_compute_spec_dst(skb);
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
 
 	if (icmp_param->replyopts.opt.opt.optlen) {
 		ipc.opt = &icmp_param->replyopts.opt;
@@ -711,12 +707,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param.offset = skb_network_offset(skb_in);
 	inet_sk(sk)->tos = tos;
 	sk->sk_mark = mark;
+	ipcm_init(&ipc);
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param.replyopts.opt;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
 
 	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 			       type, code, &icmp_param);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 570e3ebc3974..81d0e4a77ec5 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1548,12 +1548,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
 		return;
 
+	ipcm_init(&ipc);
 	ipc.addr = daddr;
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
 
 	if (replyopts.opt.opt.optlen) {
 		ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b47492205507..6f17fc8ebbdb 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -739,14 +739,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		/* no remote port */
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.addr = inet->inet_saddr;
-	ipc.opt = NULL;
-	ipc.oif = sk->sk_bound_dev_if;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
+	ipcm_init_sk(&ipc, inet);
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sk, msg, &ipc, false);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 446af7be2b55..cf142909389c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -562,14 +562,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		daddr = inet->inet_daddr;
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.sockc.transmit_time = 0;
-	ipc.addr = inet->inet_saddr;
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.oif = sk->sk_bound_dev_if;
+	ipcm_init_sk(&ipc, inet);
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sk, msg, &ipc, false);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5c76ba0666ec..87f3a0b77864 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -926,12 +926,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
 		return -EOPNOTSUPP;
 
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.sockc.transmit_time = 0;
-
 	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
 
 	fl4 = &inet->cork.fl.u.ip4;
@@ -978,9 +972,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		connected = 1;
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.addr = inet->inet_saddr;
-	ipc.oif = sk->sk_bound_dev_if;
+	ipcm_init_sk(&ipc, inet);
 	ipc.gso_size = up->gso_size;
 
 	if (msg->msg_controllen) {
-- 
cgit v1.2.3


From b515430ac9c25d5192cf498af3c6be6c4f51caad Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:55 -0400
Subject: ipv6: ipcm6_cookie initializer

Initialize the cookie in one location to reduce code duplication and
avoid bugs from inconsistent initialization, such as that fixed in
commit 9887cba19978 ("ip: limit use of gso_size to udp").

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h  | 19 +++++++++++++++++++
 net/ipv6/icmp.c     |  7 ++-----
 net/ipv6/ping.c     |  4 +---
 net/ipv6/raw.c      |  5 +----
 net/ipv6/udp.c      |  4 +---
 net/l2tp/l2tp_ip6.c |  4 +---
 6 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b7843e0b16ee..6cb247f54d4c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -301,6 +301,25 @@ struct ipcm6_cookie {
 	__u16 gso_size;
 };
 
+static inline void ipcm6_init(struct ipcm6_cookie *ipc6)
+{
+	*ipc6 = (struct ipcm6_cookie) {
+		.hlimit = -1,
+		.tclass = -1,
+		.dontfrag = -1,
+	};
+}
+
+static inline void ipcm6_init_sk(struct ipcm6_cookie *ipc6,
+				 const struct ipv6_pinfo *np)
+{
+	*ipc6 = (struct ipcm6_cookie) {
+		.hlimit = -1,
+		.tclass = np->tclass,
+		.dontfrag = np->dontfrag,
+	};
+}
+
 static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
 {
 	struct ipv6_txoptions *opt;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index be491bf6ab6e..d99fed67cd10 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -545,7 +545,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
-	ipc6.tclass = np->tclass;
+	ipcm6_init_sk(&ipc6, np);
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
@@ -553,8 +553,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 		goto out;
 
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	msg.skb = skb;
 	msg.offset = skb_network_offset(skb);
@@ -726,10 +724,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	msg.offset = 0;
 	msg.type = ICMPV6_ECHO_REPLY;
 
+	ipcm6_init_sk(&ipc6, np);
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 			    skb->len + sizeof(struct icmp6hdr),
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 96f56bf49a30..717e7c1fba29 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,7 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.fl6_icmp_code = user_icmph.icmp6_code;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	ipc6.tclass = np->tclass;
+	ipcm6_init_sk(&ipc6, np);
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
 	dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
@@ -142,8 +142,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	pfh.family = AF_INET6;
 
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	lock_sock(sk);
 	err = ip6_append_data(sk, ping_getfrag, &pfh, len,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5737c50f16eb..5f40670271ee 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -791,10 +791,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.flowi6_uid = sk->sk_uid;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
-	ipc6.opt = NULL;
+	ipcm6_init(&ipc6);
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ac6fc6728903..940115da9843 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1143,9 +1143,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
 	struct sockcm_cookie sockc;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
+	ipcm6_init(&ipc6);
 	ipc6.gso_size = up->gso_size;
 	sockc.tsflags = sk->sk_tsflags;
 	sockc.transmit_time = 0;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 957369192ca1..38f80691f4ab 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -525,9 +525,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.flowi6_uid = sk->sk_uid;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
+	ipcm6_init(&ipc6);
 
 	if (lsa) {
 		if (addr_len < SIN6_LEN_RFC2133)
-- 
cgit v1.2.3


From 657a0667025e77cc17f8a38b93e60a2bc24d830c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:56 -0400
Subject: sock: sockc cookie initializer

Initialize the cookie in one location to reduce code duplication and
avoid bugs from inconsistent initialization, such as that fixed in
commit 9887cba19978 ("ip: limit use of gso_size to udp").

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h     | 6 ++++++
 net/ipv4/tcp.c         | 2 +-
 net/packet/af_packet.c | 9 +++------
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index e0eac9ef44b5..83b747538bd0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1600,6 +1600,12 @@ struct sockcm_cookie {
 	u16 tsflags;
 };
 
+static inline void sockcm_init(struct sockcm_cookie *sockc,
+			       const struct sock *sk)
+{
+	*sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
+}
+
 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 		     struct sockcm_cookie *sockc);
 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bf461fa77ed6..850dc8f15afc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1241,7 +1241,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 		/* 'common' sending to sendq */
 	}
 
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
 		if (unlikely(err)) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3428f7739ae9..47931ebfaef3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1951,8 +1951,7 @@ retry:
 		goto out_unlock;
 	}
 
-	sockc.transmit_time = 0;
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
 		if (unlikely(err))
@@ -2636,8 +2635,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
-	sockc.transmit_time = 0;
-	sockc.tsflags = po->sk.sk_tsflags;
+	sockcm_init(&sockc, &po->sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(&po->sk, msg, &sockc);
 		if (unlikely(err))
@@ -2833,8 +2831,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;
 
-	sockc.transmit_time = 0;
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	sockc.mark = sk->sk_mark;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
-- 
cgit v1.2.3


From 5fdaa88dfefa87ee1ea92750e99950dca182ea41 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:57 -0400
Subject: ipv6: fold sockcm_cookie into ipcm6_cookie

ipcm_cookie includes sockcm_cookie. Do the same for ipcm6_cookie.

This reduces the number of arguments that need to be passed around,
applies ipcm6_init to all cookie fields at once and reduces code
differentiation between ipv4 and ipv6.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h       |  7 +++----
 include/net/transp_v6.h  |  3 +--
 net/ipv6/datagram.c      |  4 ++--
 net/ipv6/icmp.c          |  7 ++-----
 net/ipv6/ip6_flowlabel.c |  3 +--
 net/ipv6/ip6_output.c    | 24 ++++++++++--------------
 net/ipv6/ipv6_sockglue.c |  3 +--
 net/ipv6/ping.c          |  3 +--
 net/ipv6/raw.c           | 10 ++++------
 net/ipv6/udp.c           | 10 ++++------
 net/l2tp/l2tp_ip6.c      |  6 ++----
 11 files changed, 31 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6cb247f54d4c..aa6fd11a887c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -294,6 +294,7 @@ struct ipv6_fl_socklist {
 };
 
 struct ipcm6_cookie {
+	struct sockcm_cookie sockc;
 	__s16 hlimit;
 	__s16 tclass;
 	__s8  dontfrag;
@@ -959,8 +960,7 @@ int ip6_append_data(struct sock *sk,
 				int odd, struct sk_buff *skb),
 		    void *from, int length, int transhdrlen,
 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-		    struct rt6_info *rt, unsigned int flags,
-		    const struct sockcm_cookie *sockc);
+		    struct rt6_info *rt, unsigned int flags);
 
 int ip6_push_pending_frames(struct sock *sk);
 
@@ -977,8 +977,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 			     void *from, int length, int transhdrlen,
 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
 			     struct rt6_info *rt, unsigned int flags,
-			     struct inet_cork_full *cork,
-			     const struct sockcm_cookie *sockc);
+			     struct inet_cork_full *cork);
 
 static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
 {
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index f6a3543e5247..a8f6020f1196 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -42,8 +42,7 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
 				    struct sk_buff *skb);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg,
-			  struct flowi6 *fl6, struct ipcm6_cookie *ipc6,
-			  struct sockcm_cookie *sockc);
+			  struct flowi6 *fl6, struct ipcm6_cookie *ipc6);
 
 void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
 			       __u16 srcp, __u16 destp, int rqueue, int bucket);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2ee08b6a86a4..201306b9b5ea 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -736,7 +736,7 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			  struct msghdr *msg, struct flowi6 *fl6,
-			  struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc)
+			  struct ipcm6_cookie *ipc6)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -755,7 +755,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 		}
 
 		if (cmsg->cmsg_level == SOL_SOCKET) {
-			err = __sock_cmsg_send(sk, msg, cmsg, sockc);
+			err = __sock_cmsg_send(sk, msg, cmsg, &ipc6->sockc);
 			if (err)
 				return err;
 			continue;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index d99fed67cd10..24611c8b0562 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -430,7 +430,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	struct icmp6hdr tmp_hdr;
 	struct flowi6 fl6;
 	struct icmpv6_msg msg;
-	struct sockcm_cookie sockc_unused = {0};
 	struct ipcm6_cookie ipc6;
 	int iif = 0;
 	int addr_type = 0;
@@ -573,7 +572,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 			    len + sizeof(struct icmp6hdr),
 			    sizeof(struct icmp6hdr),
 			    &ipc6, &fl6, (struct rt6_info *)dst,
-			    MSG_DONTWAIT, &sockc_unused)) {
+			    MSG_DONTWAIT)) {
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {
@@ -677,7 +676,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	struct dst_entry *dst;
 	struct ipcm6_cookie ipc6;
 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
-	struct sockcm_cookie sockc_unused = {0};
 
 	saddr = &ipv6_hdr(skb)->daddr;
 
@@ -731,8 +729,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 			    skb->len + sizeof(struct icmp6hdr),
 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
-			    (struct rt6_info *)dst, MSG_DONTWAIT,
-			    &sockc_unused)) {
+			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3eee7637bdfe..cb54a8a3c273 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -373,7 +373,6 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 	if (olen > 0) {
 		struct msghdr msg;
 		struct flowi6 flowi6;
-		struct sockcm_cookie sockc_junk;
 		struct ipcm6_cookie ipc6;
 
 		err = -ENOMEM;
@@ -392,7 +391,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 		memset(&flowi6, 0, sizeof(flowi6));
 
 		ipc6.opt = fl->opt;
-		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk);
+		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f48af7e62f12..1a3bf6437cb9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1158,8 +1158,7 @@ static void ip6_append_data_mtu(unsigned int *mtu,
 
 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
-			  struct rt6_info *rt, struct flowi6 *fl6,
-			  const struct sockcm_cookie *sockc)
+			  struct rt6_info *rt, struct flowi6 *fl6)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	unsigned int mtu;
@@ -1227,7 +1226,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 		cork->base.flags |= IPCORK_ALLFRAG;
 	cork->base.length = 0;
 
-	cork->base.transmit_time = sockc->transmit_time;
+	cork->base.transmit_time = ipc6->sockc.transmit_time;
 
 	return 0;
 }
@@ -1241,8 +1240,7 @@ static int __ip6_append_data(struct sock *sk,
 			     int getfrag(void *from, char *to, int offset,
 					 int len, int odd, struct sk_buff *skb),
 			     void *from, int length, int transhdrlen,
-			     unsigned int flags, struct ipcm6_cookie *ipc6,
-			     const struct sockcm_cookie *sockc)
+			     unsigned int flags, struct ipcm6_cookie *ipc6)
 {
 	struct sk_buff *skb, *skb_prev = NULL;
 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
@@ -1321,7 +1319,7 @@ emsgsize:
 		csummode = CHECKSUM_PARTIAL;
 
 	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
-		sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
+		sock_tx_timestamp(sk, ipc6->sockc.tsflags, &tx_flags);
 		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
 		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
 			tskey = sk->sk_tskey++;
@@ -1563,8 +1561,7 @@ int ip6_append_data(struct sock *sk,
 				int odd, struct sk_buff *skb),
 		    void *from, int length, int transhdrlen,
 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-		    struct rt6_info *rt, unsigned int flags,
-		    const struct sockcm_cookie *sockc)
+		    struct rt6_info *rt, unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1578,7 +1575,7 @@ int ip6_append_data(struct sock *sk,
 		 * setup for corking
 		 */
 		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
-				     ipc6, rt, fl6, sockc);
+				     ipc6, rt, fl6);
 		if (err)
 			return err;
 
@@ -1592,7 +1589,7 @@ int ip6_append_data(struct sock *sk,
 
 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
 				 &np->cork, sk_page_frag(sk), getfrag,
-				 from, length, transhdrlen, flags, ipc6, sockc);
+				 from, length, transhdrlen, flags, ipc6);
 }
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
@@ -1752,8 +1749,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 			     void *from, int length, int transhdrlen,
 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
 			     struct rt6_info *rt, unsigned int flags,
-			     struct inet_cork_full *cork,
-			     const struct sockcm_cookie *sockc)
+			     struct inet_cork_full *cork)
 {
 	struct inet6_cork v6_cork;
 	struct sk_buff_head queue;
@@ -1770,7 +1766,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 	cork->base.opt = NULL;
 	cork->base.dst = NULL;
 	v6_cork.opt = NULL;
-	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6, sockc);
+	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
 	if (err) {
 		ip6_cork_release(cork, &v6_cork);
 		return ERR_PTR(err);
@@ -1781,7 +1777,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
 				&current->task_frag, getfrag, from,
 				length + exthdrlen, transhdrlen + exthdrlen,
-				flags, ipc6, sockc);
+				flags, ipc6);
 	if (err) {
 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
 		return ERR_PTR(err);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4d780c7f0130..fabe3ba1bddc 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -489,7 +489,6 @@ sticky_done:
 		struct ipv6_txoptions *opt = NULL;
 		struct msghdr msg;
 		struct flowi6 fl6;
-		struct sockcm_cookie sockc_junk;
 		struct ipcm6_cookie ipc6;
 
 		memset(&fl6, 0, sizeof(fl6));
@@ -522,7 +521,7 @@ sticky_done:
 		msg.msg_control = (void *)(opt+1);
 		ipc6.opt = opt;
 
-		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk);
+		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6);
 		if (retv)
 			goto done;
 update:
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 717e7c1fba29..4c04bccc7417 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -62,7 +62,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct dst_entry *dst;
 	struct rt6_info *rt;
 	struct pingfakehdr pfh;
-	struct sockcm_cookie junk = {0};
 	struct ipcm6_cookie ipc6;
 
 	pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -146,7 +145,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	lock_sock(sk);
 	err = ip6_append_data(sk, ping_getfrag, &pfh, len,
 			      0, &ipc6, &fl6, rt,
-			      MSG_DONTWAIT, &junk);
+			      MSG_DONTWAIT);
 
 	if (err) {
 		ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5f40670271ee..413d98bf24f4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -767,7 +767,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct dst_entry *dst = NULL;
 	struct raw6_frag_vec rfv;
 	struct flowi6 fl6;
-	struct sockcm_cookie sockc;
 	struct ipcm6_cookie ipc6;
 	int addr_len = msg->msg_namelen;
 	u16 proto;
@@ -792,6 +791,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_uid = sk->sk_uid;
 
 	ipcm6_init(&ipc6);
+	ipc6.sockc.tsflags = sk->sk_tsflags;
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -845,15 +845,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (fl6.flowi6_oif == 0)
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 
-	sockc.tsflags = sk->sk_tsflags;
-	sockc.transmit_time = 0;
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 		ipc6.opt = opt;
 
-		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -921,13 +919,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 back_from_confirm:
 	if (inet->hdrincl)
 		err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst,
-					msg->msg_flags, &sockc);
+					msg->msg_flags, &ipc6.sockc);
 	else {
 		ipc6.opt = opt;
 		lock_sock(sk);
 		err = ip6_append_data(sk, raw6_getfrag, &rfv,
 			len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
-			msg->msg_flags, &sockc);
+			msg->msg_flags);
 
 		if (err)
 			ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 940115da9843..f6b96956a8ed 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1141,12 +1141,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
-	struct sockcm_cookie sockc;
 
 	ipcm6_init(&ipc6);
 	ipc6.gso_size = up->gso_size;
-	sockc.tsflags = sk->sk_tsflags;
-	sockc.transmit_time = 0;
+	ipc6.sockc.tsflags = sk->sk_tsflags;
 
 	/* destination address check */
 	if (sin6) {
@@ -1281,7 +1279,7 @@ do_udp_sendmsg:
 		err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
 		if (err > 0)
 			err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
-						    &ipc6, &sockc);
+						    &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -1375,7 +1373,7 @@ back_from_confirm:
 		skb = ip6_make_skb(sk, getfrag, msg, ulen,
 				   sizeof(struct udphdr), &ipc6,
 				   &fl6, (struct rt6_info *)dst,
-				   msg->msg_flags, &cork, &sockc);
+				   msg->msg_flags, &cork);
 		err = PTR_ERR(skb);
 		if (!IS_ERR_OR_NULL(skb))
 			err = udp_v6_send_skb(skb, &fl6, &cork.base);
@@ -1401,7 +1399,7 @@ do_append_data:
 	up->len += ulen;
 	err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
 			      &ipc6, &fl6, (struct rt6_info *)dst,
-			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc);
+			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
 	if (err)
 		udp_v6_flush_pending_frames(sk);
 	else if (!corkreq)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 38f80691f4ab..672e5b753738 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -500,7 +500,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct dst_entry *dst = NULL;
 	struct flowi6 fl6;
-	struct sockcm_cookie sockc_unused = {0};
 	struct ipcm6_cookie ipc6;
 	int addr_len = msg->msg_namelen;
 	int transhdrlen = 4; /* zero session-id */
@@ -573,8 +572,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 		ipc6.opt = opt;
 
-		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6,
-					    &sockc_unused);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -639,7 +637,7 @@ back_from_confirm:
 	err = ip6_append_data(sk, ip_generic_getfrag, msg,
 			      ulen, transhdrlen, &ipc6,
 			      &fl6, (struct rt6_info *)dst,
-			      msg->msg_flags, &sockc_unused);
+			      msg->msg_flags);
 	if (err)
 		ip6_flush_pending_frames(sk);
 	else if (!(msg->msg_flags & MSG_MORE))
-- 
cgit v1.2.3


From 678ca42d688534adfc780b150abefaaac7c86687 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 6 Jul 2018 10:12:58 -0400
Subject: ip: remove tx_flags from ipcm_cookie and use same logic for v4 and v6

skb_shinfo(skb)->tx_flags is derived from sk->sk_tsflags, possibly
after modification by __sock_cmsg_send, by calling sock_tx_timestamp.

The IPv4 and IPv6 paths do this conversion differently. In IPv4, the
individual protocols that support tx timestamps call this function
and store the result in ipc.tx_flags. In IPv6, sock_tx_timestamp is
called in __ip6_append_data.

There is no need to store both tx_flags and ts_flags in the cookie
as one is derived from the other. Convert when setting up the cork
and remove the redundant field. This is similar to IPv6, only have
the conversion happen only once per datagram, in ip(6)_setup_cork.

Also change __ip6_append_data to match __ip_append_data. Only update
tskey if timestamping is enabled with OPT_ID. The SOCK_.. test is
redundant: only valid protocols can have non-zero cork->tx_flags.

After this change the IPv4 and IPv6 logic is the same.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      |  1 -
 net/ipv4/ip_output.c  |  3 ++-
 net/ipv4/ping.c       |  2 --
 net/ipv4/raw.c        |  2 --
 net/ipv4/udp.c        |  2 --
 net/ipv6/ip6_output.c | 18 ++++++++----------
 6 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 6db23bf1e5eb..e44b1a44f67a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -72,7 +72,6 @@ struct ipcm_cookie {
 	__be32			addr;
 	int			oif;
 	struct ip_options_rcu	*opt;
-	__u8			tx_flags;
 	__u8			ttl;
 	__s16			tos;
 	char			priority;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 81d0e4a77ec5..e14c774cc092 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1153,8 +1153,9 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 	cork->ttl = ipc->ttl;
 	cork->tos = ipc->tos;
 	cork->priority = ipc->priority;
-	cork->tx_flags = ipc->tx_flags;
 	cork->transmit_time = ipc->sockc.transmit_time;
+	cork->tx_flags = 0;
+	sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
 
 	return 0;
 }
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 6f17fc8ebbdb..b54c964ad925 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -763,8 +763,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		rcu_read_unlock();
 	}
 
-	sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 	saddr = ipc.addr;
 	ipc.addr = faddr = daddr;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index cf142909389c..33df4d76db2d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -665,8 +665,6 @@ back_from_confirm:
 				      &rt, msg->msg_flags, &ipc.sockc);
 
 	 else {
-		sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 		if (!ipc.addr)
 			ipc.addr = fl4.daddr;
 		lock_sock(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 87f3a0b77864..060e841dde40 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1020,8 +1020,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	saddr = ipc.addr;
 	ipc.addr = faddr = daddr;
 
-	sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 	if (ipc.opt && ipc.opt->opt.srr) {
 		if (!daddr) {
 			err = -EINVAL;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1a3bf6437cb9..ff4b28a600ab 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1221,6 +1221,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	cork->base.fragsize = mtu;
 	cork->base.gso_size = sk->sk_type == SOCK_DGRAM &&
 			      sk->sk_protocol == IPPROTO_UDP ? ipc6->gso_size : 0;
+	cork->base.tx_flags = 0;
+	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
 
 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
 		cork->base.flags |= IPCORK_ALLFRAG;
@@ -1250,7 +1252,6 @@ static int __ip6_append_data(struct sock *sk,
 	int copy;
 	int err;
 	int offset = 0;
-	__u8 tx_flags = 0;
 	u32 tskey = 0;
 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
 	struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1269,6 +1270,10 @@ static int __ip6_append_data(struct sock *sk,
 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
 	orig_mtu = mtu;
 
+	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		tskey = sk->sk_tskey++;
+
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
@@ -1318,13 +1323,6 @@ emsgsize:
 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		csummode = CHECKSUM_PARTIAL;
 
-	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
-		sock_tx_timestamp(sk, ipc6->sockc.tsflags, &tx_flags);
-		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
-		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
-			tskey = sk->sk_tskey++;
-	}
-
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -1443,8 +1441,8 @@ alloc_new_skb:
 				    dst_exthdrlen);
 
 			/* Only the initial fragment is time stamped */
-			skb_shinfo(skb)->tx_flags = tx_flags;
-			tx_flags = 0;
+			skb_shinfo(skb)->tx_flags = cork->tx_flags;
+			cork->tx_flags = 0;
 			skb_shinfo(skb)->tskey = tskey;
 			tskey = 0;
 
-- 
cgit v1.2.3


From 000244d3dc1f8114e38fe9ee2d9a0986404d9cbe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Jul 2018 14:44:02 +0200
Subject: net: bridge: fix br_vlan_get_{pvid,info} return values

These two functions return the regular -EINVAL failure in the normal
code path, but return a nonstandard '-1' error otherwise, which gets
interpreted as -EPERM.

Let's change it to -EINVAL for the dummy functions as well.

Fixes: 4d4fd36126d6 ("net: bridge: Publish bridge accessor functions")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 7843b98e1c6e..c20c7e197d07 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -105,13 +105,13 @@ static inline bool br_vlan_enabled(const struct net_device *dev)
 
 static inline int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
 {
-	return -1;
+	return -EINVAL;
 }
 
 static inline int br_vlan_get_info(const struct net_device *dev, u16 vid,
 				   struct bridge_vlan_info *p_vinfo)
 {
-	return -1;
+	return -EINVAL;
 }
 #endif
 
-- 
cgit v1.2.3


From 22dd149167359981ea6f4afde04026fb78747ddc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Jul 2018 14:58:51 +0200
Subject: devlink: fix incorrect return statement

A newly added dummy helper function tries to return a failure from a "void"
function:

In file included from include/net/dsa.h:24,
                 from arch/arm/plat-orion/common.c:21:
include/net/devlink.h: In function 'devlink_param_value_changed':
include/net/devlink.h:771:9: error: 'return' with a value, in function returning void [-Werror]
  return -EOPNOTSUPP;

This fixes it by removing the bogus statement.

Fixes: ea601e170988 ("devlink: Add devlink notifications support for params")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 8ed571385626..f67c29cede15 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -768,7 +768,6 @@ devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 static inline void
 devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
-	return -EOPNOTSUPP;
 }
 
 #endif
-- 
cgit v1.2.3


From 2064c3d4c02026572d4975177f28a58052f0a8b7 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Fri, 6 Jul 2018 05:38:12 +0000
Subject: net/flow_dissector: Save vlan ethertype from headers

Change vlan dissector key to save vlan tpid to support both 802.1Q
and 802.1AD ethertype.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 2 +-
 net/core/flow_dissector.c    | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index adc24df56b90..8f899688a965 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -47,7 +47,7 @@ struct flow_dissector_key_tags {
 struct flow_dissector_key_vlan {
 	u16	vlan_id:12,
 		vlan_priority:3;
-	u16	padding;
+	__be16	vlan_tpid;
 };
 
 struct flow_dissector_key_mpls {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 53f96e4f7bf5..18cb99b50cba 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -751,6 +751,7 @@ proto_again:
 		const struct vlan_hdr *vlan;
 		struct vlan_hdr _vlan;
 		bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
+		__be16 saved_vlan_tpid = proto;
 
 		if (vlan_tag_present)
 			proto = skb->protocol;
@@ -789,6 +790,7 @@ proto_again:
 					(ntohs(vlan->h_vlan_TCI) &
 					 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 			}
+			key_vlan->vlan_tpid = saved_vlan_tpid;
 		}
 
 		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-- 
cgit v1.2.3


From 24c590e3b0f9eebe603ebe3d516990306d385f46 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Fri, 6 Jul 2018 05:38:14 +0000
Subject: net/flow_dissector: Add support for QinQ dissection

Dissect the QinQ packets to get both outer and inner vlan information,
then store to the extended flow keys.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h |  2 ++
 net/core/flow_dissector.c    | 32 +++++++++++++++++---------------
 2 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8f899688a965..c64406717eee 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -206,6 +206,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_MPLS, /* struct flow_dissector_key_mpls */
 	FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
+	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
 
 	FLOW_DISSECTOR_KEY_MAX,
 };
@@ -237,6 +238,7 @@ struct flow_keys {
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_tags tags;
 	struct flow_dissector_key_vlan vlan;
+	struct flow_dissector_key_vlan cvlan;
 	struct flow_dissector_key_keyid keyid;
 	struct flow_dissector_key_ports ports;
 	struct flow_dissector_key_addrs addrs;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 18cb99b50cba..b555fc229e96 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -589,7 +589,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
 	enum flow_dissect_ret fdret;
-	bool skip_vlan = false;
+	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -748,15 +748,14 @@ proto_again:
 	}
 	case htons(ETH_P_8021AD):
 	case htons(ETH_P_8021Q): {
-		const struct vlan_hdr *vlan;
+		const struct vlan_hdr *vlan = NULL;
 		struct vlan_hdr _vlan;
-		bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
 		__be16 saved_vlan_tpid = proto;
 
-		if (vlan_tag_present)
+		if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX &&
+		    skb && skb_vlan_tag_present(skb)) {
 			proto = skb->protocol;
-
-		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
+		} else {
 			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
 						    data, hlen, &_vlan);
 			if (!vlan) {
@@ -766,20 +765,23 @@ proto_again:
 
 			proto = vlan->h_vlan_encapsulated_proto;
 			nhoff += sizeof(*vlan);
-			if (skip_vlan) {
-				fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-				break;
-			}
 		}
 
-		skip_vlan = true;
-		if (dissector_uses_key(flow_dissector,
-				       FLOW_DISSECTOR_KEY_VLAN)) {
+		if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
+			dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
+		} else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
+			dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN;
+		} else {
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
+		}
+
+		if (dissector_uses_key(flow_dissector, dissector_vlan)) {
 			key_vlan = skb_flow_dissector_target(flow_dissector,
-							     FLOW_DISSECTOR_KEY_VLAN,
+							     dissector_vlan,
 							     target_container);
 
-			if (vlan_tag_present) {
+			if (!vlan) {
 				key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
 				key_vlan->vlan_priority =
 					(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
-- 
cgit v1.2.3


From d64efd0926ba4f32e657e615a4f4a6170d5cc0fa Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Fri, 6 Jul 2018 05:38:16 +0000
Subject: net/sched: flower: Add supprt for matching on QinQ vlan headers

As support dissecting of QinQ inner and outer vlan headers, user can
add rules to match on QinQ vlan headers.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  4 +++
 net/sched/cls_flower.c       | 65 ++++++++++++++++++++++++++++++++++----------
 2 files changed, 55 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 84e4c1d0f874..c4262d911596 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -469,6 +469,10 @@ enum {
 	TCA_FLOWER_KEY_IP_TTL,		/* u8 */
 	TCA_FLOWER_KEY_IP_TTL_MASK,	/* u8 */
 
+	TCA_FLOWER_KEY_CVLAN_ID,	/* be16 */
+	TCA_FLOWER_KEY_CVLAN_PRIO,	/* u8   */
+	TCA_FLOWER_KEY_CVLAN_ETH_TYPE,	/* be16 */
+
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e93b13d2cb81..487a152a852c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -35,6 +35,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_eth_addrs eth;
 	struct flow_dissector_key_vlan vlan;
+	struct flow_dissector_key_vlan cvlan;
 	union {
 		struct flow_dissector_key_ipv4_addrs ipv4;
 		struct flow_dissector_key_ipv6_addrs ipv6;
@@ -449,6 +450,9 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_IP_TOS_MASK]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_IP_TTL]		= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_IP_TTL_MASK]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_CVLAN_ID]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CVLAN_PRIO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]	= { .type = NLA_U16 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -501,19 +505,20 @@ static int fl_set_key_mpls(struct nlattr **tb,
 
 static void fl_set_key_vlan(struct nlattr **tb,
 			    __be16 ethertype,
+			    int vlan_id_key, int vlan_prio_key,
 			    struct flow_dissector_key_vlan *key_val,
 			    struct flow_dissector_key_vlan *key_mask)
 {
 #define VLAN_PRIORITY_MASK	0x7
 
-	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+	if (tb[vlan_id_key]) {
 		key_val->vlan_id =
-			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+			nla_get_u16(tb[vlan_id_key]) & VLAN_VID_MASK;
 		key_mask->vlan_id = VLAN_VID_MASK;
 	}
-	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+	if (tb[vlan_prio_key]) {
 		key_val->vlan_priority =
-			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+			nla_get_u8(tb[vlan_prio_key]) &
 			VLAN_PRIORITY_MASK;
 		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
 	}
@@ -596,11 +601,25 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
 
 		if (eth_type_vlan(ethertype)) {
-			fl_set_key_vlan(tb, ethertype, &key->vlan, &mask->vlan);
-			fl_set_key_val(tb, &key->basic.n_proto,
-				       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-				       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
-				       sizeof(key->basic.n_proto));
+			fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
+					TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
+					&mask->vlan);
+
+			ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
+			if (eth_type_vlan(ethertype)) {
+				fl_set_key_vlan(tb, ethertype,
+						TCA_FLOWER_KEY_CVLAN_ID,
+						TCA_FLOWER_KEY_CVLAN_PRIO,
+						&key->cvlan, &mask->cvlan);
+				fl_set_key_val(tb, &key->basic.n_proto,
+					       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+					       &mask->basic.n_proto,
+					       TCA_FLOWER_UNSPEC,
+					       sizeof(key->basic.n_proto));
+			} else {
+				key->basic.n_proto = ethertype;
+				mask->basic.n_proto = cpu_to_be16(~0);
+			}
 		} else {
 			key->basic.n_proto = ethertype;
 			mask->basic.n_proto = cpu_to_be16(~0);
@@ -825,6 +844,8 @@ static void fl_init_dissector(struct fl_flow_mask *mask)
 			     FLOW_DISSECTOR_KEY_MPLS, mpls);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_VLAN, vlan);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_CVLAN, cvlan);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
@@ -1201,6 +1222,7 @@ static int fl_dump_key_ip(struct sk_buff *skb,
 }
 
 static int fl_dump_key_vlan(struct sk_buff *skb,
+			    int vlan_id_key, int vlan_prio_key,
 			    struct flow_dissector_key_vlan *vlan_key,
 			    struct flow_dissector_key_vlan *vlan_mask)
 {
@@ -1209,13 +1231,13 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
 	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
 		return 0;
 	if (vlan_mask->vlan_id) {
-		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+		err = nla_put_u16(skb, vlan_id_key,
 				  vlan_key->vlan_id);
 		if (err)
 			return err;
 	}
 	if (vlan_mask->vlan_priority) {
-		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+		err = nla_put_u8(skb, vlan_prio_key,
 				 vlan_key->vlan_priority);
 		if (err)
 			return err;
@@ -1310,13 +1332,28 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
 		goto nla_put_failure;
 
-	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+	if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_VLAN_ID,
+			     TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, &mask->vlan))
 		goto nla_put_failure;
 
-	if (mask->vlan.vlan_tpid &&
-	    nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE, key->basic.n_proto))
+	if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_CVLAN_ID,
+			     TCA_FLOWER_KEY_CVLAN_PRIO,
+			     &key->cvlan, &mask->cvlan) ||
+	    (mask->cvlan.vlan_tpid &&
+	     nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+			 key->cvlan.vlan_tpid)))
 		goto nla_put_failure;
 
+	if (mask->cvlan.vlan_tpid) {
+		if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+				 key->basic.n_proto))
+			goto nla_put_failure;
+	} else if (mask->vlan.vlan_tpid) {
+		if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+				 key->basic.n_proto))
+			goto nla_put_failure;
+	}
+
 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
 	    (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
-- 
cgit v1.2.3


From 11a245e2f7bf25fc21f47e4c9c8491841b128890 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 6 Jul 2018 21:01:05 +0200
Subject: net/sched: act_csum: fix NULL dereference when 'goto chain' is used

the control action in the common member of struct tcf_csum must be a valid
value, as it can contain the chain index when 'goto chain' is used. Ensure
that the control action can be read as x->tcfa_action, when x is a pointer
to struct tc_action and x->ops->type is TCA_ACT_CSUM, to prevent the
following command:

  # tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
  > $tcflags dst_mac $h2mac action csum ip or tcp or udp or sctp goto chain 1

from triggering a NULL pointer dereference when a matching packet is
received.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
 PGD 800000010416b067 P4D 800000010416b067 PUD 1041be067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 0 PID: 3072 Comm: mausezahn Tainted: G            E     4.18.0-rc2.auguri+ #421
 Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.58 02/07/2013
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffffa020dea03c40 EFLAGS: 00010246
 RAX: 0000000020000001 RBX: ffffa020d7ccef00 RCX: 0000000000000054
 RDX: 0000000000000000 RSI: ffffa020ca5ae000 RDI: ffffa020d7ccef00
 RBP: ffffa020dea03e60 R08: 0000000000000000 R09: ffffa020dea03c9c
 R10: ffffa020dea03c78 R11: 0000000000000008 R12: ffffa020d3fe4f00
 R13: ffffa020d3fe4f08 R14: 0000000000000001 R15: ffffa020d53ca300
 FS:  00007f5a46942740(0000) GS:ffffa020dea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 0000000104218002 CR4: 00000000001606f0
 Call Trace:
  <IRQ>
  fl_classify+0x1ad/0x1c0 [cls_flower]
  ? arp_rcv+0x121/0x1b0
  ? __x2apic_send_IPI_dest+0x40/0x40
  ? smp_reschedule_interrupt+0x1c/0xd0
  ? reschedule_interrupt+0xf/0x20
  ? reschedule_interrupt+0xa/0x20
  ? device_is_rmrr_locked+0xe/0x50
  ? iommu_should_identity_map+0x49/0xd0
  ? __intel_map_single+0x30/0x140
  ? e1000e_update_rdt_wa.isra.52+0x22/0xb0 [e1000e]
  ? e1000_alloc_rx_buffers+0x233/0x250 [e1000e]
  ? kmem_cache_alloc+0x38/0x1c0
  tcf_classify+0x89/0x140
  __netif_receive_skb_core+0x5ea/0xb70
  ? enqueue_task_fair+0xb6/0x7d0
  ? process_backlog+0x97/0x150
  process_backlog+0x97/0x150
  net_rx_action+0x14b/0x3e0
  __do_softirq+0xde/0x2b4
  do_softirq_own_stack+0x2a/0x40
  </IRQ>
  do_softirq.part.18+0x49/0x50
  __local_bh_enable_ip+0x49/0x50
  __dev_queue_xmit+0x4ab/0x8a0
  ? wait_woken+0x80/0x80
  ? packet_sendmsg+0x38f/0x810
  ? __dev_queue_xmit+0x8a0/0x8a0
  packet_sendmsg+0x38f/0x810
  sock_sendmsg+0x36/0x40
  __sys_sendto+0x10e/0x140
  ? do_vfs_ioctl+0xa4/0x630
  ? syscall_trace_enter+0x1df/0x2e0
  ? __audit_syscall_exit+0x22a/0x290
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x5b/0x180
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f5a45cbec93
 Code: 48 8b 0d 18 83 20 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 59 c7 20 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 2b f7 ff ff 48 89 04 24
 RSP: 002b:00007ffd0ee6d748 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 0000000001161010 RCX: 00007f5a45cbec93
 RDX: 0000000000000062 RSI: 0000000001161322 RDI: 0000000000000003
 RBP: 00007ffd0ee6d780 R08: 00007ffd0ee6d760 R09: 0000000000000014
 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000062
 R13: 0000000001161322 R14: 00007ffd0ee6d760 R15: 0000000000000003
 Modules linked in: act_csum act_gact cls_flower sch_ingress vrf veth act_tunnel_key(E) xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel snd_hda_codec_hdmi snd_hda_codec_realtek kvm snd_hda_codec_generic hp_wmi iTCO_wdt sparse_keymap rfkill mei_wdt iTCO_vendor_support wmi_bmof gpio_ich irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel snd_hda_intel crypto_simd cryptd snd_hda_codec glue_helper snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm pcspkr i2c_i801 snd_timer snd sg lpc_ich soundcore wmi mei_me
  mei ie31200_edac nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod cdrom sd_mod ahci libahci crc32c_intel i915 ixgbe serio_raw libata video dca i2c_algo_bit sfc drm_kms_helper syscopyarea mtd sysfillrect mdio sysimgblt fb_sys_fops drm e1000e i2c_core
 CR2: 0000000000000000
 ---[ end trace 3c9e9d1a77df4026 ]---
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffffa020dea03c40 EFLAGS: 00010246
 RAX: 0000000020000001 RBX: ffffa020d7ccef00 RCX: 0000000000000054
 RDX: 0000000000000000 RSI: ffffa020ca5ae000 RDI: ffffa020d7ccef00
 RBP: ffffa020dea03e60 R08: 0000000000000000 R09: ffffa020dea03c9c
 R10: ffffa020dea03c78 R11: 0000000000000008 R12: ffffa020d3fe4f00
 R13: ffffa020d3fe4f08 R14: 0000000000000001 R15: ffffa020d53ca300
 FS:  00007f5a46942740(0000) GS:ffffa020dea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 0000000104218002 CR4: 00000000001606f0
 Kernel panic - not syncing: Fatal exception in interrupt
 Kernel Offset: 0x26400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
 ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---

Fixes: 9c5f69bbd75a ("net/sched: act_csum: don't use spinlock in the fast path")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_csum.h | 1 -
 net/sched/act_csum.c         | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index 9470fd7e4350..32d2454c0479 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -7,7 +7,6 @@
 #include <linux/tc_act/tc_csum.h>
 
 struct tcf_csum_params {
-	int action;
 	u32 update_flags;
 	struct rcu_head rcu;
 };
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 526a8e491626..6e7124e57918 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -91,7 +91,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 	}
 	params_old = rtnl_dereference(p->params);
 
-	params_new->action = parm->action;
+	p->tcf_action = parm->action;
 	params_new->update_flags = parm->update_flags;
 	rcu_assign_pointer(p->params, params_new);
 	if (params_old)
@@ -561,7 +561,7 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
 	tcf_lastuse_update(&p->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
 
-	action = params->action;
+	action = READ_ONCE(p->tcf_action);
 	if (unlikely(action == TC_ACT_SHOT))
 		goto drop_stats;
 
@@ -599,11 +599,11 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 		.index   = p->tcf_index,
 		.refcnt  = p->tcf_refcnt - ref,
 		.bindcnt = p->tcf_bindcnt - bind,
+		.action  = p->tcf_action,
 	};
 	struct tcf_t t;
 
 	params = rtnl_dereference(p->params);
-	opt.action = params->action;
 	opt.update_flags = params->update_flags;
 
 	if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
-- 
cgit v1.2.3


From 38230a3e0e0933bbcf5df6fa469ba0667f667568 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 6 Jul 2018 21:01:06 +0200
Subject: net/sched: act_tunnel_key: fix NULL dereference when 'goto chain' is
 used

the control action in the common member of struct tcf_tunnel_key must be a
valid value, as it can contain the chain index when 'goto chain' is used.
Ensure that the control action can be read as x->tcfa_action, when x is a
pointer to struct tc_action and x->ops->type is TCA_ACT_TUNNEL_KEY, to
prevent the following command:

 # tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
 > $tcflags dst_mac $h2mac action tunnel_key unset goto chain 1

from causing a NULL dereference when a matching packet is received:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
 PGD 80000001097ac067 P4D 80000001097ac067 PUD 103b0a067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 0 PID: 3491 Comm: mausezahn Tainted: G            E     4.18.0-rc2.auguri+ #421
 Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.58 02/07/2013
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffff95145ea03c40 EFLAGS: 00010246
 RAX: 0000000020000001 RBX: ffff9514499e5800 RCX: 0000000000000001
 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000
 RBP: ffff95145ea03e60 R08: 0000000000000000 R09: ffff95145ea03c9c
 R10: ffff95145ea03c78 R11: 0000000000000008 R12: ffff951456a69800
 R13: ffff951456a69808 R14: 0000000000000001 R15: ffff95144965ee40
 FS:  00007fd67ee11740(0000) GS:ffff95145ea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 00000001038a2006 CR4: 00000000001606f0
 Call Trace:
  <IRQ>
  fl_classify+0x1ad/0x1c0 [cls_flower]
  ? __update_load_avg_se.isra.47+0x1ca/0x1d0
  ? __update_load_avg_se.isra.47+0x1ca/0x1d0
  ? update_load_avg+0x665/0x690
  ? update_load_avg+0x665/0x690
  ? kmem_cache_alloc+0x38/0x1c0
  tcf_classify+0x89/0x140
  __netif_receive_skb_core+0x5ea/0xb70
  ? enqueue_entity+0xd0/0x270
  ? process_backlog+0x97/0x150
  process_backlog+0x97/0x150
  net_rx_action+0x14b/0x3e0
  __do_softirq+0xde/0x2b4
  do_softirq_own_stack+0x2a/0x40
  </IRQ>
  do_softirq.part.18+0x49/0x50
  __local_bh_enable_ip+0x49/0x50
  __dev_queue_xmit+0x4ab/0x8a0
  ? wait_woken+0x80/0x80
  ? packet_sendmsg+0x38f/0x810
  ? __dev_queue_xmit+0x8a0/0x8a0
  packet_sendmsg+0x38f/0x810
  sock_sendmsg+0x36/0x40
  __sys_sendto+0x10e/0x140
  ? do_vfs_ioctl+0xa4/0x630
  ? syscall_trace_enter+0x1df/0x2e0
  ? __audit_syscall_exit+0x22a/0x290
  __x64_sys_sendto+0x24/0x30
  do_syscall_64+0x5b/0x180
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7fd67e18dc93
 Code: 48 8b 0d 18 83 20 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 59 c7 20 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 2b f7 ff ff 48 89 04 24
 RSP: 002b:00007ffe0189b748 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 00000000020ca010 RCX: 00007fd67e18dc93
 RDX: 0000000000000062 RSI: 00000000020ca322 RDI: 0000000000000003
 RBP: 00007ffe0189b780 R08: 00007ffe0189b760 R09: 0000000000000014
 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000062
 R13: 00000000020ca322 R14: 00007ffe0189b760 R15: 0000000000000003
 Modules linked in: act_tunnel_key act_gact cls_flower sch_ingress vrf veth act_csum(E) xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter intel_rapl snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_realtek coretemp snd_hda_codec_generic kvm_intel kvm irqbypass snd_hda_intel crct10dif_pclmul crc32_pclmul hp_wmi ghash_clmulni_intel pcbc snd_hda_codec aesni_intel sparse_keymap rfkill snd_hda_core snd_hwdep snd_seq crypto_simd iTCO_wdt gpio_ich iTCO_vendor_support wmi_bmof cryptd mei_wdt glue_helper snd_seq_device snd_pcm pcspkr snd_timer snd i2c_i801 lpc_ich sg soundcore wmi mei_me
  mei ie31200_edac nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod sr_mod cdrom i915 video i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ahci crc32c_intel libahci serio_raw sfc libata mtd drm ixgbe mdio i2c_core e1000e dca
 CR2: 0000000000000000
 ---[ end trace 1ab8b5b5d4639dfc ]---
 RIP: 0010:tcf_action_exec+0xb8/0x100
 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 RSP: 0018:ffff95145ea03c40 EFLAGS: 00010246
 RAX: 0000000020000001 RBX: ffff9514499e5800 RCX: 0000000000000001
 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000
 RBP: ffff95145ea03e60 R08: 0000000000000000 R09: ffff95145ea03c9c
 R10: ffff95145ea03c78 R11: 0000000000000008 R12: ffff951456a69800
 R13: ffff951456a69808 R14: 0000000000000001 R15: ffff95144965ee40
 FS:  00007fd67ee11740(0000) GS:ffff95145ea00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 00000001038a2006 CR4: 00000000001606f0
 Kernel panic - not syncing: Fatal exception in interrupt
 Kernel Offset: 0x11400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
 ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---

Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_tunnel_key.h | 1 -
 net/sched/act_tunnel_key.c         | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index efef0b4b1b2b..46b8c7f1c8d5 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -18,7 +18,6 @@
 struct tcf_tunnel_key_params {
 	struct rcu_head		rcu;
 	int			tcft_action;
-	int			action;
 	struct metadata_dst     *tcft_enc_metadata;
 };
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 626dac81a48a..9bc6c2ae98a5 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -36,7 +36,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
 
 	tcf_lastuse_update(&t->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
-	action = params->action;
+	action = READ_ONCE(t->tcf_action);
 
 	switch (params->tcft_action) {
 	case TCA_TUNNEL_KEY_ACT_RELEASE:
@@ -182,7 +182,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 
 	params_old = rtnl_dereference(t->params);
 
-	params_new->action = parm->action;
+	t->tcf_action = parm->action;
 	params_new->tcft_action = parm->t_action;
 	params_new->tcft_enc_metadata = metadata;
 
@@ -254,13 +254,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 		.index    = t->tcf_index,
 		.refcnt   = t->tcf_refcnt - ref,
 		.bindcnt  = t->tcf_bindcnt - bind,
+		.action   = t->tcf_action,
 	};
 	struct tcf_t tm;
 
 	params = rtnl_dereference(t->params);
 
 	opt.t_action = params->tcft_action;
-	opt.action = params->action;
 
 	if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-- 
cgit v1.2.3


From 543af5861f41af0a5d2432f6fb5976af50f9cee5 Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Fri, 6 Jul 2018 22:05:38 -0400
Subject: uio: change to use the mutex lock instead of the spin lock

We are hitting a regression with the following commit:

commit a93e7b331568227500186a465fee3c2cb5dffd1f
Author: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Date:   Mon May 14 13:32:23 2018 +1200

    uio: Prevent device destruction while fds are open

The problem is the addition of spin_lock_irqsave in uio_write. This
leads to hitting  uio_write -> copy_from_user -> _copy_from_user ->
might_fault and the logs filling up with sleeping warnings.

I also noticed some uio drivers allocate memory, sleep, grab mutexes
from callouts like open() and release and uio is now doing
spin_lock_irqsave while calling them.

Reported-by: Mike Christie <mchristi@redhat.com>
CC: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Reviewed-by: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio.c          | 32 +++++++++++++-------------------
 include/linux/uio_driver.h |  2 +-
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index b4b2ae1e0473..655ade4fb3b1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -433,7 +433,6 @@ static int uio_open(struct inode *inode, struct file *filep)
 	struct uio_device *idev;
 	struct uio_listener *listener;
 	int ret = 0;
-	unsigned long flags;
 
 	mutex_lock(&minor_lock);
 	idev = idr_find(&uio_idr, iminor(inode));
@@ -460,10 +459,10 @@ static int uio_open(struct inode *inode, struct file *filep)
 	listener->event_count = atomic_read(&idev->event);
 	filep->private_data = listener;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (idev->info && idev->info->open)
 		ret = idev->info->open(idev->info, inode);
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 	if (ret)
 		goto err_infoopen;
 
@@ -495,12 +494,11 @@ static int uio_release(struct inode *inode, struct file *filep)
 	int ret = 0;
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (idev->info && idev->info->release)
 		ret = idev->info->release(idev->info, inode);
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	module_put(idev->owner);
 	kfree(listener);
@@ -513,12 +511,11 @@ static __poll_t uio_poll(struct file *filep, poll_table *wait)
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
 	__poll_t ret = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq)
 		ret = -EIO;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	if (ret)
 		return ret;
@@ -537,12 +534,11 @@ static ssize_t uio_read(struct file *filep, char __user *buf,
 	DECLARE_WAITQUEUE(wait, current);
 	ssize_t retval = 0;
 	s32 event_count;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq)
 		retval = -EIO;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	if (retval)
 		return retval;
@@ -592,9 +588,8 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 	struct uio_device *idev = listener->dev;
 	ssize_t retval;
 	s32 irq_on;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq) {
 		retval = -EIO;
 		goto out;
@@ -618,7 +613,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 	retval = idev->info->irqcontrol(idev->info, irq_on);
 
 out:
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 	return retval ? retval : sizeof(s32);
 }
 
@@ -865,7 +860,7 @@ int __uio_register_device(struct module *owner,
 
 	idev->owner = owner;
 	idev->info = info;
-	spin_lock_init(&idev->info_lock);
+	mutex_init(&idev->info_lock);
 	init_waitqueue_head(&idev->wait);
 	atomic_set(&idev->event, 0);
 
@@ -929,7 +924,6 @@ EXPORT_SYMBOL_GPL(__uio_register_device);
 void uio_unregister_device(struct uio_info *info)
 {
 	struct uio_device *idev;
-	unsigned long flags;
 
 	if (!info || !info->uio_dev)
 		return;
@@ -943,9 +937,9 @@ void uio_unregister_device(struct uio_info *info)
 	if (info->irq && info->irq != UIO_IRQ_CUSTOM)
 		free_irq(info->irq, idev);
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	idev->info = NULL;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	device_unregister(&idev->dev);
 
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 6c5f2074e14f..6f8b68cd460f 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -75,7 +75,7 @@ struct uio_device {
         struct fasync_struct    *async_queue;
         wait_queue_head_t       wait;
         struct uio_info         *info;
-	spinlock_t		info_lock;
+	struct mutex		info_lock;
         struct kobject          *map_dir;
         struct kobject          *portio_dir;
 };
-- 
cgit v1.2.3


From e0772de8a48b69d39624cdf99fac8f4a3fcc387b Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 19 Jun 2018 17:12:57 +0100
Subject: slimbus: core: add of_slim_device_get() helper

On SLIMBus controllers like Qcom NGD(non ported device), controller
can request logical address once the remote side is powered, having a
helper function like this to explicitly enumerate the bus is helpful.
Also codec drivers which are taking to interface device would need
such a helper too.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/core.c  | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/slimbus.h |  2 ++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c
index 7ddfc675b131..88248a4ecad9 100644
--- a/drivers/slimbus/core.c
+++ b/drivers/slimbus/core.c
@@ -356,6 +356,45 @@ struct slim_device *slim_get_device(struct slim_controller *ctrl,
 }
 EXPORT_SYMBOL_GPL(slim_get_device);
 
+static int of_slim_match_dev(struct device *dev, void *data)
+{
+	struct device_node *np = data;
+	struct slim_device *sbdev = to_slim_device(dev);
+
+	return (sbdev->dev.of_node == np);
+}
+
+static struct slim_device *of_find_slim_device(struct slim_controller *ctrl,
+					       struct device_node *np)
+{
+	struct slim_device *sbdev;
+	struct device *dev;
+
+	dev = device_find_child(ctrl->dev, np, of_slim_match_dev);
+	if (dev) {
+		sbdev = to_slim_device(dev);
+		return sbdev;
+	}
+
+	return NULL;
+}
+
+/**
+ * of_slim_get_device() - get handle to a device using dt node.
+ *
+ * @ctrl: Controller on which this device will be added/queried
+ * @np: node pointer to device
+ *
+ * Return: pointer to a device if it has already reported. Creates a new
+ * device and returns pointer to it if the device has not yet enumerated.
+ */
+struct slim_device *of_slim_get_device(struct slim_controller *ctrl,
+				       struct device_node *np)
+{
+	return of_find_slim_device(ctrl, np);
+}
+EXPORT_SYMBOL_GPL(of_slim_get_device);
+
 static int slim_device_alloc_laddr(struct slim_device *sbdev,
 				   bool report_present)
 {
diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h
index c36cf121d2cd..efa36a852dc3 100644
--- a/include/linux/slimbus.h
+++ b/include/linux/slimbus.h
@@ -138,6 +138,8 @@ static inline void slim_set_devicedata(struct slim_device *dev, void *data)
 	dev_set_drvdata(&dev->dev, data);
 }
 
+struct slim_device *of_slim_get_device(struct slim_controller *ctrl,
+				       struct device_node *np);
 struct slim_device *slim_get_device(struct slim_controller *ctrl,
 				    struct slim_eaddr *e_addr);
 int slim_get_logical_addr(struct slim_device *sbdev);
-- 
cgit v1.2.3


From db455d66b9900a8a81fe474f11c78c8c8a95eefa Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 19 Jun 2018 17:12:58 +0100
Subject: slimbus: core: rearrange slim_eaddr structure

Rearrange struct slim_eaddr so that the structure is packed correctly
to be able to send in SLIMBus messages.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/slimbus.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h
index efa36a852dc3..63801bcc5e60 100644
--- a/include/linux/slimbus.h
+++ b/include/linux/slimbus.h
@@ -14,16 +14,16 @@ extern struct bus_type slimbus_bus;
 
 /**
  * struct slim_eaddr - Enumeration address for a SLIMbus device
- * @manf_id: Manufacturer Id for the device
- * @prod_code: Product code
- * @dev_index: Device index
  * @instance: Instance value
+ * @dev_index: Device index
+ * @prod_code: Product code
+ * @manf_id: Manufacturer Id for the device
  */
 struct slim_eaddr {
-	u16 manf_id;
-	u16 prod_code;
-	u8 dev_index;
 	u8 instance;
+	u8 dev_index;
+	u16 prod_code;
+	u16 manf_id;
 } __packed;
 
 /**
-- 
cgit v1.2.3


From abb9c9b8b51ba53b47ad7685ad2a0a64dbbf7bf5 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Thu, 5 Jul 2018 14:54:25 +0100
Subject: slimbus: stream: add stream support

This patch adds support to SLIMbus stream apis for slimbus device.
SLIMbus streaming involves adding support to Data Channel Management and
channel Reconfiguration Messages to slim core plus few stream apis.
>From slim device side the apis are very simple mostly inline with other
stream apis.

Currently it only supports Isochronous and Push/Pull transport protocols,
which are sufficient for audio use cases.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/slimbus.rst |   5 +
 drivers/slimbus/Makefile             |   2 +-
 drivers/slimbus/core.c               |   2 +
 drivers/slimbus/slimbus.h            | 189 ++++++++++++++
 drivers/slimbus/stream.c             | 477 +++++++++++++++++++++++++++++++++++
 include/linux/slimbus.h              |  46 ++++
 6 files changed, 720 insertions(+), 1 deletion(-)
 create mode 100644 drivers/slimbus/stream.c

(limited to 'include')

diff --git a/Documentation/driver-api/slimbus.rst b/Documentation/driver-api/slimbus.rst
index a97449cf603a..410eec79b2a1 100644
--- a/Documentation/driver-api/slimbus.rst
+++ b/Documentation/driver-api/slimbus.rst
@@ -125,3 +125,8 @@ Messaging APIs:
 ~~~~~~~~~~~~~~~
 .. kernel-doc:: drivers/slimbus/messaging.c
    :export:
+
+Streaming APIs:
+~~~~~~~~~~~~~~~
+.. kernel-doc:: drivers/slimbus/stream.c
+   :export:
diff --git a/drivers/slimbus/Makefile b/drivers/slimbus/Makefile
index c78c6e16c2df..d9aa011b6804 100644
--- a/drivers/slimbus/Makefile
+++ b/drivers/slimbus/Makefile
@@ -3,7 +3,7 @@
 # Makefile for kernel SLIMbus framework.
 #
 obj-$(CONFIG_SLIMBUS)			+= slimbus.o
-slimbus-y				:= core.o messaging.o sched.o
+slimbus-y				:= core.o messaging.o sched.o stream.o
 
 #Controllers
 obj-$(CONFIG_SLIM_QCOM_CTRL)		+= slim-qcom-ctrl.o
diff --git a/drivers/slimbus/core.c b/drivers/slimbus/core.c
index 88248a4ecad9..95b00d28ad6e 100644
--- a/drivers/slimbus/core.c
+++ b/drivers/slimbus/core.c
@@ -114,6 +114,8 @@ static int slim_add_device(struct slim_controller *ctrl,
 	sbdev->dev.release = slim_dev_release;
 	sbdev->dev.driver = NULL;
 	sbdev->ctrl = ctrl;
+	INIT_LIST_HEAD(&sbdev->stream_list);
+	spin_lock_init(&sbdev->stream_list_lock);
 
 	if (node)
 		sbdev->dev.of_node = of_node_get(node);
diff --git a/drivers/slimbus/slimbus.h b/drivers/slimbus/slimbus.h
index 63229e8cd050..6dbc2b320704 100644
--- a/drivers/slimbus/slimbus.h
+++ b/drivers/slimbus/slimbus.h
@@ -45,9 +45,20 @@
 #define SLIM_MSG_MC_ASSIGN_LOGICAL_ADDRESS       0x2
 #define SLIM_MSG_MC_REPORT_ABSENT                0xF
 
+/* Data channel management messages */
+#define SLIM_MSG_MC_CONNECT_SOURCE		0x10
+#define SLIM_MSG_MC_CONNECT_SINK		0x11
+#define SLIM_MSG_MC_DISCONNECT_PORT		0x14
+#define SLIM_MSG_MC_CHANGE_CONTENT		0x18
+
 /* Clock pause Reconfiguration messages */
 #define SLIM_MSG_MC_BEGIN_RECONFIGURATION        0x40
 #define SLIM_MSG_MC_NEXT_PAUSE_CLOCK             0x4A
+#define SLIM_MSG_MC_NEXT_DEFINE_CHANNEL          0x50
+#define SLIM_MSG_MC_NEXT_DEFINE_CONTENT          0x51
+#define SLIM_MSG_MC_NEXT_ACTIVATE_CHANNEL        0x54
+#define SLIM_MSG_MC_NEXT_DEACTIVATE_CHANNEL      0x55
+#define SLIM_MSG_MC_NEXT_REMOVE_CHANNEL          0x58
 #define SLIM_MSG_MC_RECONFIGURE_NOW              0x5F
 
 /*
@@ -69,7 +80,15 @@
 /* Standard values per SLIMbus spec needed by controllers and devices */
 #define SLIM_MAX_CLK_GEAR		10
 #define SLIM_MIN_CLK_GEAR		1
+#define SLIM_SLOT_LEN_BITS		4
+
+/* Indicate that the frequency of the flow and the bus frequency are locked */
+#define SLIM_CHANNEL_CONTENT_FL		BIT(7)
 
+/* Standard values per SLIMbus spec needed by controllers and devices */
+#define SLIM_CL_PER_SUPERFRAME		6144
+#define SLIM_SLOTS_PER_SUPERFRAME	(SLIM_CL_PER_SUPERFRAME >> 2)
+#define SLIM_SL_PER_SUPERFRAME		(SLIM_CL_PER_SUPERFRAME >> 2)
 /* Manager's logical address is set to 0xFF per spec */
 #define SLIM_LA_MANAGER 0xFF
 
@@ -167,6 +186,170 @@ struct slim_sched {
 	struct mutex		m_reconf;
 };
 
+/**
+ * enum slim_port_direction: SLIMbus port direction
+ *
+ * @SLIM_PORT_SINK: SLIMbus port is a sink
+ * @SLIM_PORT_SOURCE: SLIMbus port is a source
+ */
+enum slim_port_direction {
+	SLIM_PORT_SINK = 0,
+	SLIM_PORT_SOURCE,
+};
+/**
+ * enum slim_port_state: SLIMbus Port/Endpoint state machine
+ *	according to SLIMbus Spec 2.0
+ * @SLIM_PORT_DISCONNECTED: SLIMbus port is disconnected
+ *	entered from Unconfigure/configured state after
+ *	DISCONNECT_PORT or REMOVE_CHANNEL core command
+ * @SLIM_PORT_UNCONFIGURED: SLIMbus port is in unconfigured state.
+ *	entered from disconnect state after CONNECT_SOURCE/SINK core command
+ * @SLIM_PORT_CONFIGURED: SLIMbus port is in configured state.
+ *	entered from unconfigured state after DEFINE_CHANNEL, DEFINE_CONTENT
+ *	and ACTIVATE_CHANNEL core commands. Ready for data transmission.
+ */
+enum slim_port_state {
+	SLIM_PORT_DISCONNECTED = 0,
+	SLIM_PORT_UNCONFIGURED,
+	SLIM_PORT_CONFIGURED,
+};
+
+/**
+ * enum slim_channel_state: SLIMbus channel state machine used by core.
+ * @SLIM_CH_STATE_DISCONNECTED: SLIMbus channel is disconnected
+ * @SLIM_CH_STATE_ALLOCATED: SLIMbus channel is allocated
+ * @SLIM_CH_STATE_ASSOCIATED: SLIMbus channel is associated with port
+ * @SLIM_CH_STATE_DEFINED: SLIMbus channel parameters are defined
+ * @SLIM_CH_STATE_CONTENT_DEFINED: SLIMbus channel content is defined
+ * @SLIM_CH_STATE_ACTIVE: SLIMbus channel is active and ready for data
+ * @SLIM_CH_STATE_REMOVED: SLIMbus channel is inactive and removed
+ */
+enum slim_channel_state {
+	SLIM_CH_STATE_DISCONNECTED = 0,
+	SLIM_CH_STATE_ALLOCATED,
+	SLIM_CH_STATE_ASSOCIATED,
+	SLIM_CH_STATE_DEFINED,
+	SLIM_CH_STATE_CONTENT_DEFINED,
+	SLIM_CH_STATE_ACTIVE,
+	SLIM_CH_STATE_REMOVED,
+};
+
+/**
+ * enum slim_ch_data_fmt: SLIMbus channel data Type identifiers according to
+ *	Table 60 of SLIMbus Spec 1.01.01
+ * @SLIM_CH_DATA_FMT_NOT_DEFINED: Undefined
+ * @SLIM_CH_DATA_FMT_LPCM_AUDIO: LPCM audio
+ * @SLIM_CH_DATA_FMT_IEC61937_COMP_AUDIO: IEC61937 Compressed audio
+ * @SLIM_CH_DATA_FMT_PACKED_PDM_AUDIO: Packed PDM audio
+ */
+enum slim_ch_data_fmt {
+	SLIM_CH_DATA_FMT_NOT_DEFINED = 0,
+	SLIM_CH_DATA_FMT_LPCM_AUDIO = 1,
+	SLIM_CH_DATA_FMT_IEC61937_COMP_AUDIO = 2,
+	SLIM_CH_DATA_FMT_PACKED_PDM_AUDIO = 3,
+};
+
+/**
+ * enum slim_ch_aux_fmt: SLIMbus channel Aux Field format IDs according to
+ *	Table 63 of SLIMbus Spec 2.0
+ * @SLIM_CH_AUX_FMT_NOT_APPLICABLE: Undefined
+ * @SLIM_CH_AUX_FMT_ZCUV_TUNNEL_IEC60958: ZCUV for tunneling IEC60958
+ * @SLIM_CH_AUX_FMT_USER_DEFINED: User defined
+ */
+enum slim_ch_aux_bit_fmt {
+	SLIM_CH_AUX_FMT_NOT_APPLICABLE = 0,
+	SLIM_CH_AUX_FMT_ZCUV_TUNNEL_IEC60958 = 1,
+	SLIM_CH_AUX_FMT_USER_DEFINED = 0xF,
+};
+
+/**
+ * struct slim_channel  - SLIMbus channel, used for state machine
+ *
+ * @id: ID of channel
+ * @prrate: Presense rate of channel from Table 66 of SLIMbus 2.0 Specs
+ * @seg_dist: segment distribution code from Table 20 of SLIMbus 2.0 Specs
+ * @data_fmt: Data format of channel.
+ * @aux_fmt: Aux format for this channel.
+ * @state: channel state machine
+ */
+struct slim_channel {
+	int id;
+	int prrate;
+	int seg_dist;
+	enum slim_ch_data_fmt data_fmt;
+	enum slim_ch_aux_bit_fmt aux_fmt;
+	enum slim_channel_state state;
+};
+
+/**
+ * struct slim_port  - SLIMbus port
+ *
+ * @id: Port id
+ * @direction: Port direction, Source or Sink.
+ * @state: state machine of port.
+ * @ch: channel associated with this port.
+ */
+struct slim_port {
+	int id;
+	enum slim_port_direction direction;
+	enum slim_port_state state;
+	struct slim_channel ch;
+};
+
+/**
+ * enum slim_transport_protocol: SLIMbus Transport protocol list from
+ *	Table 47 of SLIMbus 2.0 specs.
+ * @SLIM_PROTO_ISO: Isochronous Protocol, no flow control as data rate match
+ *		channel rate flow control embedded in the data.
+ * @SLIM_PROTO_PUSH: Pushed Protocol, includes flow control, Used to carry
+ *		data whose rate	is equal to, or lower than the channel rate.
+ * @SLIM_PROTO_PULL: Pulled Protocol, similar usage as pushed protocol
+ *		but pull is a unicast.
+ * @SLIM_PROTO_LOCKED: Locked Protocol
+ * @SLIM_PROTO_ASYNC_SMPLX: Asynchronous Protocol-Simplex
+ * @SLIM_PROTO_ASYNC_HALF_DUP: Asynchronous Protocol-Half-duplex
+ * @SLIM_PROTO_EXT_SMPLX: Extended Asynchronous Protocol-Simplex
+ * @SLIM_PROTO_EXT_HALF_DUP: Extended Asynchronous Protocol-Half-duplex
+ */
+enum slim_transport_protocol {
+	SLIM_PROTO_ISO = 0,
+	SLIM_PROTO_PUSH,
+	SLIM_PROTO_PULL,
+	SLIM_PROTO_LOCKED,
+	SLIM_PROTO_ASYNC_SMPLX,
+	SLIM_PROTO_ASYNC_HALF_DUP,
+	SLIM_PROTO_EXT_SMPLX,
+	SLIM_PROTO_EXT_HALF_DUP,
+};
+
+/**
+ * struct slim_stream_runtime  - SLIMbus stream runtime instance
+ *
+ * @dev: Name of the stream
+ * @dev: SLIM Device instance associated with this stream
+ * @state: state of stream
+ * @direction: direction of stream
+ * @prot: Transport protocol used in this stream
+ * @rate: Data rate of samples *
+ * @bps: bits per sample
+ * @ratem: rate multipler which is super frame rate/data rate
+ * @num_ports: number of ports
+ * @ports: pointer to instance of ports
+ * @node: list head for stream associated with slim device.
+ */
+struct slim_stream_runtime {
+	const char *name;
+	struct slim_device *dev;
+	int direction;
+	enum slim_transport_protocol prot;
+	unsigned int rate;
+	unsigned int bps;
+	unsigned int ratem;
+	int num_ports;
+	struct slim_port *ports;
+	struct list_head node;
+};
+
 /**
  * struct slim_controller  - Controls every instance of SLIMbus
  *				(similar to 'master' on SPI)
@@ -196,6 +379,10 @@ struct slim_sched {
  * @wakeup: This function pointer implements controller-specific procedure
  *	to wake it up from clock-pause. Framework will call this to bring
  *	the controller out of clock pause.
+ * @enable_stream: This function pointer implements controller-specific procedure
+ *	to enable a stream.
+ * @disable_stream: This function pointer implements controller-specific procedure
+ *	to disable stream.
  *
  *	'Manager device' is responsible for  device management, bandwidth
  *	allocation, channel setup, and port associations per channel.
@@ -237,6 +424,8 @@ struct slim_controller {
 					     struct slim_eaddr *ea, u8 laddr);
 	int			(*get_laddr)(struct slim_controller *ctrl,
 					     struct slim_eaddr *ea, u8 *laddr);
+	int		(*enable_stream)(struct slim_stream_runtime *rt);
+	int		(*disable_stream)(struct slim_stream_runtime *rt);
 	int			(*wakeup)(struct slim_controller *ctrl);
 };
 
diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c
new file mode 100644
index 000000000000..2fa05324ed07
--- /dev/null
+++ b/drivers/slimbus/stream.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/slimbus.h>
+#include <uapi/sound/asound.h>
+#include "slimbus.h"
+
+/**
+ * struct segdist_code - Segment Distributions code from
+ *	Table 20 of SLIMbus Specs Version 2.0
+ *
+ * @ratem: Channel Rate Multipler(Segments per Superframe)
+ * @seg_interval: Number of slots between the first Slot of Segment
+ *		and the first slot of the next  consecutive Segment.
+ * @segdist_code: Segment Distribution Code SD[11:0]
+ * @seg_offset_mask: Segment offset mask in SD[11:0]
+ * @segdist_codes: List of all possible Segmet Distribution codes.
+ */
+static const struct segdist_code {
+	int ratem;
+	int seg_interval;
+	int segdist_code;
+	u32 seg_offset_mask;
+
+} segdist_codes[] = {
+	{1,	1536,	0x200,	 0xdff},
+	{2,	768,	0x100,	 0xcff},
+	{4,	384,	0x080,	 0xc7f},
+	{8,	192,	0x040,	 0xc3f},
+	{16,	96,	0x020,	 0xc1f},
+	{32,	48,	0x010,	 0xc0f},
+	{64,	24,	0x008,	 0xc07},
+	{128,	12,	0x004,	 0xc03},
+	{256,	6,	0x002,	 0xc01},
+	{512,	3,	0x001,	 0xc00},
+	{3,	512,	0xe00,	 0x1ff},
+	{6,	256,	0xd00,	 0x0ff},
+	{12,	128,	0xc80,	 0x07f},
+	{24,	64,	0xc40,	 0x03f},
+	{48,	32,	0xc20,	 0x01f},
+	{96,	16,	0xc10,	 0x00f},
+	{192,	8,	0xc08,	 0x007},
+	{364,	4,	0xc04,	 0x003},
+	{768,	2,	0xc02,	 0x001},
+};
+
+/*
+ * Presence Rate table for all Natural Frequencies
+ * The Presence rate of a constant bitrate stream is mean flow rate of the
+ * stream expressed in occupied Segments of that Data Channel per second.
+ * Table 66 from SLIMbus 2.0 Specs
+ *
+ * Index of the table corresponds to Presence rate code for the respective rate
+ * in the table.
+ */
+static const int slim_presence_rate_table[] = {
+	0, /* Not Indicated */
+	12000,
+	24000,
+	48000,
+	96000,
+	192000,
+	384000,
+	768000,
+	0, /* Reserved */
+	110250,
+	220500,
+	441000,
+	882000,
+	176400,
+	352800,
+	705600,
+	4000,
+	8000,
+	16000,
+	32000,
+	64000,
+	128000,
+	256000,
+	512000,
+};
+
+/*
+ * slim_stream_allocate() - Allocate a new SLIMbus Stream
+ * @dev:Slim device to be associated with
+ * @name: name of the stream
+ *
+ * This is very first call for SLIMbus streaming, this API will allocate
+ * a new SLIMbus stream and return a valid stream runtime pointer for client
+ * to use it in subsequent stream apis. state of stream is set to ALLOCATED
+ *
+ * Return: valid pointer on success and error code on failure.
+ * From ASoC DPCM framework, this state is linked to startup() operation.
+ */
+struct slim_stream_runtime *slim_stream_allocate(struct slim_device *dev,
+						 const char *name)
+{
+	struct slim_stream_runtime *rt;
+
+	rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+	if (!rt)
+		return ERR_PTR(-ENOMEM);
+
+	rt->name = kasprintf(GFP_KERNEL, "slim-%s", name);
+	if (!rt->name) {
+		kfree(rt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	rt->dev = dev;
+	spin_lock(&dev->stream_list_lock);
+	list_add_tail(&rt->node, &dev->stream_list);
+	spin_unlock(&dev->stream_list_lock);
+
+	return rt;
+}
+EXPORT_SYMBOL_GPL(slim_stream_allocate);
+
+static int slim_connect_port_channel(struct slim_stream_runtime *stream,
+				     struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[2];
+	struct slim_val_inf msg = {0, 2, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_CONNECT_SOURCE;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 6, stream->dev->laddr, &msg);
+
+	if (port->direction == SLIM_PORT_SINK)
+		txn.mc = SLIM_MSG_MC_CONNECT_SINK;
+
+	wbuf[0] = port->id;
+	wbuf[1] = port->ch.id;
+	port->ch.state = SLIM_CH_STATE_ASSOCIATED;
+	port->state = SLIM_PORT_UNCONFIGURED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_disconnect_port(struct slim_stream_runtime *stream,
+				struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[1];
+	struct slim_val_inf msg = {0, 1, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_DISCONNECT_PORT;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 5, stream->dev->laddr, &msg);
+
+	wbuf[0] = port->id;
+	port->ch.state = SLIM_CH_STATE_DISCONNECTED;
+	port->state = SLIM_PORT_DISCONNECTED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_deactivate_remove_channel(struct slim_stream_runtime *stream,
+					  struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[1];
+	struct slim_val_inf msg = {0, 1, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_NEXT_DEACTIVATE_CHANNEL;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 5, stream->dev->laddr, &msg);
+	int ret;
+
+	wbuf[0] = port->ch.id;
+	ret = slim_do_transfer(sdev->ctrl, &txn);
+	if (ret)
+		return ret;
+
+	txn.mc = SLIM_MSG_MC_NEXT_REMOVE_CHANNEL;
+	port->ch.state = SLIM_CH_STATE_REMOVED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_get_prate_code(int rate)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(slim_presence_rate_table); i++) {
+		if (rate == slim_presence_rate_table[i])
+			return i;
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * slim_stream_prepare() - Prepare a SLIMbus Stream
+ *
+ * @rt: instance of slim stream runtime to configure
+ * @cfg: new configuration for the stream
+ *
+ * This API will configure SLIMbus stream with config parameters from cfg.
+ * return zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to hw_params() operation.
+ */
+int slim_stream_prepare(struct slim_stream_runtime *rt,
+			struct slim_stream_config *cfg)
+{
+	struct slim_controller *ctrl = rt->dev->ctrl;
+	struct slim_port *port;
+	int num_ports, i, port_id;
+
+	if (rt->ports) {
+		dev_err(&rt->dev->dev, "Stream already Prepared\n");
+		return -EINVAL;
+	}
+
+	num_ports = hweight32(cfg->port_mask);
+	rt->ports = kcalloc(num_ports, sizeof(*port), GFP_KERNEL);
+	if (!rt->ports)
+		return -ENOMEM;
+
+	rt->num_ports = num_ports;
+	rt->rate = cfg->rate;
+	rt->bps = cfg->bps;
+	rt->direction = cfg->direction;
+
+	if (cfg->rate % ctrl->a_framer->superfreq) {
+		/*
+		 * data rate not exactly multiple of super frame,
+		 * use PUSH/PULL protocol
+		 */
+		if (cfg->direction == SNDRV_PCM_STREAM_PLAYBACK)
+			rt->prot = SLIM_PROTO_PUSH;
+		else
+			rt->prot = SLIM_PROTO_PULL;
+	} else {
+		rt->prot = SLIM_PROTO_ISO;
+	}
+
+	rt->ratem = cfg->rate/ctrl->a_framer->superfreq;
+
+	i = 0;
+	for_each_set_bit(port_id, &cfg->port_mask, SLIM_DEVICE_MAX_PORTS) {
+		port = &rt->ports[i];
+		port->state = SLIM_PORT_DISCONNECTED;
+		port->id = port_id;
+		port->ch.prrate = slim_get_prate_code(cfg->rate);
+		port->ch.id = cfg->chs[i];
+		port->ch.data_fmt = SLIM_CH_DATA_FMT_NOT_DEFINED;
+		port->ch.aux_fmt = SLIM_CH_AUX_FMT_NOT_APPLICABLE;
+		port->ch.state = SLIM_CH_STATE_ALLOCATED;
+
+		if (cfg->direction == SNDRV_PCM_STREAM_PLAYBACK)
+			port->direction = SLIM_PORT_SINK;
+		else
+			port->direction = SLIM_PORT_SOURCE;
+
+		slim_connect_port_channel(rt, port);
+		i++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(slim_stream_prepare);
+
+static int slim_define_channel_content(struct slim_stream_runtime *stream,
+				       struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[4];
+	struct slim_val_inf msg = {0, 4, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_NEXT_DEFINE_CONTENT;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 8, stream->dev->laddr, &msg);
+
+	wbuf[0] = port->ch.id;
+	wbuf[1] = port->ch.prrate;
+
+	/* Frequency Locked for ISO Protocol */
+	if (stream->prot != SLIM_PROTO_ISO)
+		wbuf[1] |= SLIM_CHANNEL_CONTENT_FL;
+
+	wbuf[2] = port->ch.data_fmt | (port->ch.aux_fmt << 4);
+	wbuf[3] = stream->bps/SLIM_SLOT_LEN_BITS;
+	port->ch.state = SLIM_CH_STATE_CONTENT_DEFINED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_get_segdist_code(int ratem)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(segdist_codes); i++) {
+		if (segdist_codes[i].ratem == ratem)
+			return segdist_codes[i].segdist_code;
+	}
+
+	return -EINVAL;
+}
+
+static int slim_define_channel(struct slim_stream_runtime *stream,
+				       struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[4];
+	struct slim_val_inf msg = {0, 4, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_NEXT_DEFINE_CHANNEL;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 8, stream->dev->laddr, &msg);
+
+	port->ch.seg_dist = slim_get_segdist_code(stream->ratem);
+
+	wbuf[0] = port->ch.id;
+	wbuf[1] = port->ch.seg_dist & 0xFF;
+	wbuf[2] = (stream->prot << 4) | ((port->ch.seg_dist & 0xF00) >> 8);
+	if (stream->prot == SLIM_PROTO_ISO)
+		wbuf[3] = stream->bps/SLIM_SLOT_LEN_BITS;
+	else
+		wbuf[3] = stream->bps/SLIM_SLOT_LEN_BITS + 1;
+
+	port->ch.state = SLIM_CH_STATE_DEFINED;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+static int slim_activate_channel(struct slim_stream_runtime *stream,
+				 struct slim_port *port)
+{
+	struct slim_device *sdev = stream->dev;
+	u8 wbuf[1];
+	struct slim_val_inf msg = {0, 1, NULL, wbuf, NULL};
+	u8 mc = SLIM_MSG_MC_NEXT_ACTIVATE_CHANNEL;
+	DEFINE_SLIM_LDEST_TXN(txn, mc, 5, stream->dev->laddr, &msg);
+
+	txn.msg->num_bytes = 1;
+	txn.msg->wbuf = wbuf;
+	wbuf[0] = port->ch.id;
+	port->ch.state = SLIM_CH_STATE_ACTIVE;
+
+	return slim_do_transfer(sdev->ctrl, &txn);
+}
+
+/*
+ * slim_stream_enable() - Enable a prepared SLIMbus Stream
+ *
+ * @stream: instance of slim stream runtime to enable
+ *
+ * This API will enable all the ports and channels associated with
+ * SLIMbus stream
+ *
+ * Return: zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to trigger() start operation.
+ */
+int slim_stream_enable(struct slim_stream_runtime *stream)
+{
+	DEFINE_SLIM_BCAST_TXN(txn, SLIM_MSG_MC_BEGIN_RECONFIGURATION,
+				3, SLIM_LA_MANAGER, NULL);
+	struct slim_controller *ctrl = stream->dev->ctrl;
+	int ret, i;
+
+	if (ctrl->enable_stream) {
+		ret = ctrl->enable_stream(stream);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < stream->num_ports; i++)
+			stream->ports[i].ch.state = SLIM_CH_STATE_ACTIVE;
+
+		return ret;
+	}
+
+	ret = slim_do_transfer(ctrl, &txn);
+	if (ret)
+		return ret;
+
+	/* define channels first before activating them */
+	for (i = 0; i < stream->num_ports; i++) {
+		struct slim_port *port = &stream->ports[i];
+
+		slim_define_channel(stream, port);
+		slim_define_channel_content(stream, port);
+	}
+
+	for (i = 0; i < stream->num_ports; i++) {
+		struct slim_port *port = &stream->ports[i];
+
+		slim_activate_channel(stream, port);
+		port->state = SLIM_PORT_CONFIGURED;
+	}
+	txn.mc = SLIM_MSG_MC_RECONFIGURE_NOW;
+
+	return slim_do_transfer(ctrl, &txn);
+}
+EXPORT_SYMBOL_GPL(slim_stream_enable);
+
+/*
+ * slim_stream_disable() - Disable a SLIMbus Stream
+ *
+ * @stream: instance of slim stream runtime to disable
+ *
+ * This API will disable all the ports and channels associated with
+ * SLIMbus stream
+ *
+ * Return: zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to trigger() pause operation.
+ */
+int slim_stream_disable(struct slim_stream_runtime *stream)
+{
+	DEFINE_SLIM_BCAST_TXN(txn, SLIM_MSG_MC_BEGIN_RECONFIGURATION,
+				3, SLIM_LA_MANAGER, NULL);
+	struct slim_controller *ctrl = stream->dev->ctrl;
+	int ret, i;
+
+	if (ctrl->disable_stream)
+		ctrl->disable_stream(stream);
+
+	ret = slim_do_transfer(ctrl, &txn);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < stream->num_ports; i++)
+		slim_deactivate_remove_channel(stream, &stream->ports[i]);
+
+	txn.mc = SLIM_MSG_MC_RECONFIGURE_NOW;
+
+	return slim_do_transfer(ctrl, &txn);
+}
+EXPORT_SYMBOL_GPL(slim_stream_disable);
+
+/*
+ * slim_stream_unprepare() - Un-prepare a SLIMbus Stream
+ *
+ * @stream: instance of slim stream runtime to unprepare
+ *
+ * This API will un allocate all the ports and channels associated with
+ * SLIMbus stream
+ *
+ * Return: zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to trigger() stop operation.
+ */
+int slim_stream_unprepare(struct slim_stream_runtime *stream)
+{
+	int i;
+
+	for (i = 0; i < stream->num_ports; i++)
+		slim_disconnect_port(stream, &stream->ports[i]);
+
+	kfree(stream->ports);
+	stream->ports = NULL;
+	stream->num_ports = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(slim_stream_unprepare);
+
+/*
+ * slim_stream_free() - Free a SLIMbus Stream
+ *
+ * @stream: instance of slim stream runtime to free
+ *
+ * This API will un allocate all the memory associated with
+ * slim stream runtime, user is not allowed to make an dereference
+ * to stream after this call.
+ *
+ * Return: zero on success and error code on failure. From ASoC DPCM framework,
+ * this state is linked to shutdown() operation.
+ */
+int slim_stream_free(struct slim_stream_runtime *stream)
+{
+	struct slim_device *sdev = stream->dev;
+
+	spin_lock(&sdev->stream_list_lock);
+	list_del(&stream->node);
+	spin_unlock(&sdev->stream_list_lock);
+
+	kfree(stream->name);
+	kfree(stream);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(slim_stream_free);
diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h
index 63801bcc5e60..12c9719b2a55 100644
--- a/include/linux/slimbus.h
+++ b/include/linux/slimbus.h
@@ -48,6 +48,8 @@ struct slim_controller;
  * @ctrl: slim controller instance.
  * @laddr: 1-byte Logical address of this device.
  * @is_laddr_valid: indicates if the laddr is valid or not
+ * @stream_list: List of streams on this device
+ * @stream_list_lock: lock to protect the stream list
  *
  * This is the client/device handle returned when a SLIMbus
  * device is registered with a controller.
@@ -60,6 +62,8 @@ struct slim_device {
 	enum slim_device_status	status;
 	u8			laddr;
 	bool			is_laddr_valid;
+	struct list_head	stream_list;
+	spinlock_t stream_list_lock;
 };
 
 #define to_slim_device(d) container_of(d, struct slim_device, dev)
@@ -108,6 +112,36 @@ struct slim_val_inf {
 	struct	completion	*comp;
 };
 
+#define SLIM_DEVICE_MAX_CHANNELS	256
+/* A SLIMBus Device may have frmo 0 to 31 Ports (inclusive) */
+#define SLIM_DEVICE_MAX_PORTS		32
+
+/**
+ * struct slim_stream_config - SLIMbus stream configuration
+ *	Configuring a stream is done at hw_params or prepare call
+ *	from audio drivers where they have all the required information
+ *	regarding rate, number of channels and so on.
+ *	There is a 1:1 mapping of channel and ports.
+ *
+ * @rate: data rate
+ * @bps: bits per data sample
+ * @ch_count: number of channels
+ * @chs: pointer to list of channel numbers
+ * @port_mask: port mask of ports to use for this stream
+ * @direction: direction of the stream, SNDRV_PCM_STREAM_PLAYBACK
+ *	or SNDRV_PCM_STREAM_CAPTURE.
+ */
+struct slim_stream_config {
+	unsigned int rate;
+	unsigned int bps;
+	/* MAX 256 channels */
+	unsigned int ch_count;
+	unsigned int *chs;
+	/* Max 32 ports per device */
+	unsigned long port_mask;
+	int direction;
+};
+
 /*
  * use a macro to avoid include chaining to get THIS_MODULE
  */
@@ -163,4 +197,16 @@ int slim_readb(struct slim_device *sdev, u32 addr);
 int slim_writeb(struct slim_device *sdev, u32 addr, u8 value);
 int slim_read(struct slim_device *sdev, u32 addr, size_t count, u8 *val);
 int slim_write(struct slim_device *sdev, u32 addr, size_t count, u8 *val);
+
+/* SLIMbus Stream apis */
+struct slim_stream_runtime;
+struct slim_stream_runtime *slim_stream_allocate(struct slim_device *dev,
+						 const char *sname);
+int slim_stream_prepare(struct slim_stream_runtime *stream,
+			struct slim_stream_config *c);
+int slim_stream_enable(struct slim_stream_runtime *stream);
+int slim_stream_disable(struct slim_stream_runtime *stream);
+int slim_stream_unprepare(struct slim_stream_runtime *stream);
+int slim_stream_free(struct slim_stream_runtime *stream);
+
 #endif /* _LINUX_SLIMBUS_H */
-- 
cgit v1.2.3


From 16777ecd1b54d75136f77b2cc25f2cfa75156852 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Sun, 3 Jun 2018 20:18:58 +0200
Subject: kbd: complete dead keys definitions

This completes dead keys definitions for internationalization
completeness on the console.  The representatives have been chosen
coherently with libx11 compose sequences, which avoid symetry conflicts
(e.g. there is U with caron, but no c with breve).

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/char/keyboard.c  | 30 ++++++++++++++++++++++++++++--
 drivers/tty/vt/keyboard.c     | 30 +++++++++++++++++++++++++++++-
 include/uapi/linux/keyboard.h | 23 ++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 79eb60958015..eda245887fe0 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -39,8 +39,34 @@ static const int kbd_max_vals[] = {
 };
 static const int KBD_NR_TYPES = ARRAY_SIZE(kbd_max_vals);
 
-static unsigned char ret_diacr[NR_DEAD] = {
-	'`', '\'', '^', '~', '"', ','
+static const unsigned char ret_diacr[NR_DEAD] = {
+	'`',	/* dead_grave */
+	'\'',	/* dead_acute */
+	'^',	/* dead_circumflex */
+	'~',	/* dead_tilda */
+	'"',	/* dead_diaeresis */
+	',',	/* dead_cedilla */
+	'_',	/* dead_macron */
+	'U',	/* dead_breve */
+	'.',	/* dead_abovedot */
+	'*',	/* dead_abovering */
+	'=',	/* dead_doubleacute */
+	'c',	/* dead_caron */
+	'k',	/* dead_ogonek */
+	'i',	/* dead_iota */
+	'#',	/* dead_voiced_sound */
+	'o',	/* dead_semivoiced_sound */
+	'!',	/* dead_belowdot */
+	'?',	/* dead_hook */
+	'+',	/* dead_horn */
+	'-',	/* dead_stroke */
+	')',	/* dead_abovecomma */
+	'(',	/* dead_abovereversedcomma */
+	':',	/* dead_doublegrave */
+	'n',	/* dead_invertedbreve */
+	';',	/* dead_belowcomma */
+	'$',	/* dead_currency */
+	'@',	/* dead_greek */
 };
 
 /*
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index d5b4a2b44ab8..c0f5802acd7c 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -690,7 +690,35 @@ static void k_dead2(struct vc_data *vc, unsigned char value, char up_flag)
  */
 static void k_dead(struct vc_data *vc, unsigned char value, char up_flag)
 {
-	static const unsigned char ret_diacr[NR_DEAD] = {'`', '\'', '^', '~', '"', ',' };
+	static const unsigned char ret_diacr[NR_DEAD] = {
+		'`',	/* dead_grave */
+		'\'',	/* dead_acute */
+		'^',	/* dead_circumflex */
+		'~',	/* dead_tilda */
+		'"',	/* dead_diaeresis */
+		',',	/* dead_cedilla */
+		'_',	/* dead_macron */
+		'U',	/* dead_breve */
+		'.',	/* dead_abovedot */
+		'*',	/* dead_abovering */
+		'=',	/* dead_doubleacute */
+		'c',	/* dead_caron */
+		'k',	/* dead_ogonek */
+		'i',	/* dead_iota */
+		'#',	/* dead_voiced_sound */
+		'o',	/* dead_semivoiced_sound */
+		'!',	/* dead_belowdot */
+		'?',	/* dead_hook */
+		'+',	/* dead_horn */
+		'-',	/* dead_stroke */
+		')',	/* dead_abovecomma */
+		'(',	/* dead_abovereversedcomma */
+		':',	/* dead_doublegrave */
+		'n',	/* dead_invertedbreve */
+		';',	/* dead_belowcomma */
+		'$',	/* dead_currency */
+		'@',	/* dead_greek */
+	};
 
 	k_deadunicode(vc, ret_diacr[value], up_flag);
 }
diff --git a/include/uapi/linux/keyboard.h b/include/uapi/linux/keyboard.h
index ab4108c83186..4846716e7c5c 100644
--- a/include/uapi/linux/keyboard.h
+++ b/include/uapi/linux/keyboard.h
@@ -357,8 +357,29 @@
 #define K_DTILDE	K(KT_DEAD,3)
 #define K_DDIERE	K(KT_DEAD,4)
 #define K_DCEDIL	K(KT_DEAD,5)
+#define K_DMACRON	K(KT_DEAD,6)
+#define K_DBREVE	K(KT_DEAD,7)
+#define K_DABDOT	K(KT_DEAD,8)
+#define K_DABRING	K(KT_DEAD,9)
+#define K_DDBACUTE	K(KT_DEAD,10)
+#define K_DCARON	K(KT_DEAD,11)
+#define K_DOGONEK	K(KT_DEAD,12)
+#define K_DIOTA		K(KT_DEAD,13)
+#define K_DVOICED	K(KT_DEAD,14)
+#define K_DSEMVOICED	K(KT_DEAD,15)
+#define K_DBEDOT	K(KT_DEAD,16)
+#define K_DHOOK		K(KT_DEAD,17)
+#define K_DHORN		K(KT_DEAD,18)
+#define K_DSTROKE	K(KT_DEAD,19)
+#define K_DABCOMMA	K(KT_DEAD,20)
+#define K_DABREVCOMMA	K(KT_DEAD,21)
+#define K_DDBGRAVE	K(KT_DEAD,22)
+#define K_DINVBREVE	K(KT_DEAD,23)
+#define K_DBECOMMA	K(KT_DEAD,24)
+#define K_DCURRENCY	K(KT_DEAD,25)
+#define K_DGREEK	K(KT_DEAD,26)
 
-#define NR_DEAD		6
+#define NR_DEAD		27
 
 #define K_DOWN		K(KT_CUR,0)
 #define K_LEFT		K(KT_CUR,1)
-- 
cgit v1.2.3


From ea614629c69c2c0258ef6414b017c998e3cc61a7 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 16 Jun 2018 18:53:57 -0700
Subject: linux/device.h: fix kernel-doc notation warning

Fix kernel-doc build warning (missing " *" at beginning of line):

../include/linux/device.h:93: warning: bad line:                         this bus.

Fixes: 07397df29e57c ("dma-mapping: move dma configuration to bus infrastructure")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Nipun Gupta <nipun.gupta@nxp.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 2eaa9ea13c09..575c5a35ece5 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -90,7 +90,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @num_vf:	Called to find out how many virtual functions a device on this
  *		bus supports.
  * @dma_configure:	Called to setup DMA configuration on a device on
-			this bus.
+ *			this bus.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
  * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
-- 
cgit v1.2.3


From ac3167257b9fe16c9426c2087ead1c9f1b0992b1 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 19 Jun 2018 22:47:28 -0700
Subject: headers: separate linux/mod_devicetable.h from
 linux/platform_device.h

At over 4000 #includes, <linux/platform_device.h> is the 9th most
#included header file in the Linux kernel.  It does not need
<linux/mod_devicetable.h>, so drop that header and explicitly add
<linux/mod_devicetable.h> to source files that need it.

   4146 #include <linux/platform_device.h>

After this patch, there are 225 files that use <linux/mod_devicetable.h>,
for a reduction of around 3900 times that <linux/mod_devicetable.h>
does not have to be read & parsed.

    225 #include <linux/mod_devicetable.h>

This patch was build-tested on 20 different arch-es.

It also makes these drivers SubmitChecklist#1 compliant.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kbuild test robot <lkp@intel.com> # drivers/media/platform/vimc/
Reported-by: kbuild test robot <lkp@intel.com> # drivers/pinctrl/pinctrl-u300.c
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-mmp/sram.c                                      | 1 +
 drivers/ata/pata_imx.c                                        | 1 +
 drivers/ata/pata_samsung_cf.c                                 | 1 +
 drivers/auxdisplay/hd44780.c                                  | 1 +
 drivers/char/hw_random/atmel-rng.c                            | 1 +
 drivers/char/hw_random/exynos-trng.c                          | 1 +
 drivers/char/hw_random/imx-rngc.c                             | 1 +
 drivers/char/hw_random/powernv-rng.c                          | 1 +
 drivers/crypto/mediatek/mtk-platform.c                        | 1 +
 drivers/crypto/qce/core.c                                     | 1 +
 drivers/crypto/stm32/stm32_crc32.c                            | 1 +
 drivers/crypto/ux500/cryp/cryp_core.c                         | 1 +
 drivers/crypto/ux500/hash/hash_core.c                         | 1 +
 drivers/devfreq/tegra-devfreq.c                               | 1 +
 drivers/dma/ep93xx_dma.c                                      | 1 +
 drivers/dma/s3c24xx-dma.c                                     | 1 +
 drivers/extcon/extcon-intel-cht-wc.c                          | 1 +
 drivers/extcon/extcon-qcom-spmi-misc.c                        | 1 +
 drivers/gpu/drm/mediatek/mtk_cec.c                            | 1 +
 drivers/gpu/drm/sun4i/sun6i_drc.c                             | 1 +
 drivers/hsi/controllers/omap_ssi_port.c                       | 1 +
 drivers/hwmon/max197.c                                        | 1 +
 drivers/hwmon/mc13783-adc.c                                   | 1 +
 drivers/media/platform/coda/imx-vdoa.c                        | 1 +
 drivers/media/platform/rcar-fcp.c                             | 1 +
 drivers/media/platform/vimc/vimc-capture.c                    | 1 +
 drivers/media/platform/vimc/vimc-debayer.c                    | 1 +
 drivers/media/platform/vimc/vimc-scaler.c                     | 1 +
 drivers/media/platform/vimc/vimc-sensor.c                     | 1 +
 drivers/memory/tegra/tegra186.c                               | 1 +
 drivers/mfd/atmel-hlcdc.c                                     | 1 +
 drivers/mfd/cros_ec_dev.c                                     | 1 +
 drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c                  | 1 +
 drivers/net/ethernet/calxeda/xgmac.c                          | 1 +
 drivers/net/ethernet/faraday/ftmac100.c                       | 1 +
 drivers/net/wireless/ath/ath9k/ahb.c                          | 1 +
 drivers/net/wireless/ti/wl12xx/main.c                         | 1 +
 drivers/net/wireless/ti/wl18xx/main.c                         | 1 +
 drivers/nvmem/lpc18xx_eeprom.c                                | 1 +
 drivers/nvmem/mtk-efuse.c                                     | 1 +
 drivers/nvmem/qfprom.c                                        | 1 +
 drivers/nvmem/uniphier-efuse.c                                | 1 +
 drivers/perf/arm-ccn.c                                        | 1 +
 drivers/pinctrl/intel/pinctrl-merrifield.c                    | 1 +
 drivers/pinctrl/pinctrl-u300.c                                | 1 +
 drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c                    | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c               | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c              | 1 +
 drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c              | 1 +
 drivers/platform/goldfish/goldfish_pipe.c                     | 1 +
 drivers/platform/x86/intel_bxtwc_tmu.c                        | 1 +
 drivers/power/avs/smartreflex.c                               | 1 +
 drivers/power/reset/ltc2952-poweroff.c                        | 1 +
 drivers/power/supply/max8998_charger.c                        | 1 +
 drivers/power/supply/olpc_battery.c                           | 1 +
 drivers/ptp/ptp_dte.c                                         | 1 +
 drivers/regulator/tps65912-regulator.c                        | 1 +
 drivers/reset/reset-ath79.c                                   | 1 +
 drivers/reset/reset-axs10x.c                                  | 1 +
 drivers/reset/reset-imx7.c                                    | 1 +
 drivers/rtc/rtc-coh901331.c                                   | 1 +
 drivers/rtc/rtc-cpcap.c                                       | 1 +
 drivers/rtc/rtc-ftrtc010.c                                    | 1 +
 drivers/rtc/rtc-mc13xxx.c                                     | 1 +
 drivers/rtc/rtc-mxc_v2.c                                      | 1 +
 drivers/rtc/rtc-r7301.c                                       | 1 +
 drivers/rtc/rtc-sh.c                                          | 1 +
 drivers/rtc/rtc-tegra.c                                       | 1 +
 drivers/siox/siox-bus-gpio.c                                  | 1 +
 drivers/tty/goldfish.c                                        | 1 +
 drivers/tty/serial/8250/8250_em.c                             | 1 +
 drivers/tty/serial/sccnxp.c                                   | 1 +
 drivers/usb/gadget/udc/fsl_mxc_udc.c                          | 1 +
 drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c | 1 +
 drivers/w1/masters/mxc_w1.c                                   | 1 +
 drivers/watchdog/coh901327_wdt.c                              | 1 +
 drivers/watchdog/davinci_wdt.c                                | 1 +
 drivers/watchdog/imgpdc_wdt.c                                 | 1 +
 drivers/watchdog/max63xx_wdt.c                                | 1 +
 drivers/watchdog/max77620_wdt.c                               | 1 +
 drivers/watchdog/moxart_wdt.c                                 | 1 +
 drivers/watchdog/omap_wdt.c                                   | 1 +
 drivers/watchdog/tangox_wdt.c                                 | 1 +
 include/linux/platform_device.h                               | 2 +-
 90 files changed, 90 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm/mach-mmp/sram.c b/arch/arm/mach-mmp/sram.c
index bf5e64906e65..ba91e4fe444d 100644
--- a/arch/arm/mach-mmp/sram.c
+++ b/arch/arm/mach-mmp/sram.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c
index d4caa23f5a88..6f0534047c6d 100644
--- a/drivers/ata/pata_imx.c
+++ b/drivers/ata/pata_imx.c
@@ -17,6 +17,7 @@
 #include <linux/clk.h>
 #include <linux/libata.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #define DRV_NAME "pata_imx"
diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
index bb96dc35950d..f5bd44b8bd63 100644
--- a/drivers/ata/pata_samsung_cf.c
+++ b/drivers/ata/pata_samsung_cf.c
@@ -17,6 +17,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/clk.h>
 #include <linux/libata.h>
diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
index 78d8f1986fec..f1a42f0f1ded 100644
--- a/drivers/auxdisplay/hd44780.c
+++ b/drivers/auxdisplay/hd44780.c
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/slab.h>
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index 661c82cde0f2..433426242b87 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -8,6 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/clk.h>
diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
index 1947aed7c044..94235761955c 100644
--- a/drivers/char/hw_random/exynos-trng.c
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -19,6 +19,7 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
index 250123bc4905..14730be54edf 100644
--- a/drivers/char/hw_random/imx-rngc.c
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/clk.h>
diff --git a/drivers/char/hw_random/powernv-rng.c b/drivers/char/hw_random/powernv-rng.c
index 263a5bb8e605..791182aa8e04 100644
--- a/drivers/char/hw_random/powernv-rng.c
+++ b/drivers/char/hw_random/powernv-rng.c
@@ -10,6 +10,7 @@
 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/random.h>
diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c
index b182e941b0cd..ee0404e27a0f 100644
--- a/drivers/crypto/mediatek/mtk-platform.c
+++ b/drivers/crypto/mediatek/mtk-platform.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include "mtk-platform.h"
diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c
index 718b32a3112e..1c3b36b75467 100644
--- a/drivers/crypto/qce/core.c
+++ b/drivers/crypto/qce/core.c
@@ -14,6 +14,7 @@
 #include <linux/clk.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c
index 8f09b8430893..a4a21fcf1e17 100644
--- a/drivers/crypto/stm32/stm32_crc32.c
+++ b/drivers/crypto/stm32/stm32_crc32.c
@@ -7,6 +7,7 @@
 #include <linux/bitrev.h>
 #include <linux/clk.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <crypto/internal/hash.h>
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index cb31b59c9d53..d2663a4e1f5e 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -20,6 +20,7 @@
 #include <linux/irqreturn.h>
 #include <linux/klist.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/semaphore.h>
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 2d0a677bcc76..b141b74a084e 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -21,6 +21,7 @@
 #include <linux/klist.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/crypto.h>
 
diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
index ae712159246f..c59d2eee5d30 100644
--- a/drivers/devfreq/tegra-devfreq.c
+++ b/drivers/devfreq/tegra-devfreq.c
@@ -24,6 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/reset.h>
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index ec240592f5c8..a15592383d4e 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 7056fe7513b4..64744eb88720 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -35,6 +35,7 @@
 #include <linux/interrupt.h>
 #include <linux/clk.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/platform_data/dma-s3c24xx.h>
 
diff --git a/drivers/extcon/extcon-intel-cht-wc.c b/drivers/extcon/extcon-intel-cht-wc.c
index b7e9ea377d70..5e1dd2772278 100644
--- a/drivers/extcon/extcon-intel-cht-wc.c
+++ b/drivers/extcon/extcon-intel-cht-wc.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <linux/mfd/intel_soc_pmic.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
diff --git a/drivers/extcon/extcon-qcom-spmi-misc.c b/drivers/extcon/extcon-qcom-spmi-misc.c
index 660bbf163bf5..72bc0f2478e2 100644
--- a/drivers/extcon/extcon-qcom-spmi-misc.c
+++ b/drivers/extcon/extcon-qcom-spmi-misc.c
@@ -20,6 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
diff --git a/drivers/gpu/drm/mediatek/mtk_cec.c b/drivers/gpu/drm/mediatek/mtk_cec.c
index 7a3eb8c17ef9..5ce84d0dbf81 100644
--- a/drivers/gpu/drm/mediatek/mtk_cec.c
+++ b/drivers/gpu/drm/mediatek/mtk_cec.c
@@ -15,6 +15,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include "mtk_cec.h"
diff --git a/drivers/gpu/drm/sun4i/sun6i_drc.c b/drivers/gpu/drm/sun4i/sun6i_drc.c
index b5e071a49045..88eb268fdf73 100644
--- a/drivers/gpu/drm/sun4i/sun6i_drc.c
+++ b/drivers/gpu/drm/sun4i/sun6i_drc.c
@@ -12,6 +12,7 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
diff --git a/drivers/hsi/controllers/omap_ssi_port.c b/drivers/hsi/controllers/omap_ssi_port.c
index 7765de2f1ef1..2ada82d2ec8c 100644
--- a/drivers/hsi/controllers/omap_ssi_port.c
+++ b/drivers/hsi/controllers/omap_ssi_port.c
@@ -20,6 +20,7 @@
  * 02110-1301 USA
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/hwmon/max197.c b/drivers/hwmon/max197.c
index 638567fb7cd8..3d9e210beedf 100644
--- a/drivers/hwmon/max197.c
+++ b/drivers/hwmon/max197.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/slab.h>
diff --git a/drivers/hwmon/mc13783-adc.c b/drivers/hwmon/mc13783-adc.c
index 67860ad2e3d9..78fe8759d2a9 100644
--- a/drivers/hwmon/mc13783-adc.c
+++ b/drivers/hwmon/mc13783-adc.c
@@ -23,6 +23,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/hwmon.h>
 #include <linux/slab.h>
 #include <linux/init.h>
diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c
index 85a66e4e2f9a..96ab4b61669a 100644
--- a/drivers/media/platform/coda/imx-vdoa.c
+++ b/drivers/media/platform/coda/imx-vdoa.c
@@ -18,6 +18,7 @@
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/videodev2.h>
diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c
index 2988031d285d..3ad9d0dc6fa7 100644
--- a/drivers/media/platform/rcar-fcp.c
+++ b/drivers/media/platform/rcar-fcp.c
@@ -14,6 +14,7 @@
 #include <linux/device.h>
 #include <linux/list.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/media/platform/vimc/vimc-capture.c b/drivers/media/platform/vimc/vimc-capture.c
index 88a1e5670c72..ec68feaac378 100644
--- a/drivers/media/platform/vimc/vimc-capture.c
+++ b/drivers/media/platform/vimc/vimc-capture.c
@@ -17,6 +17,7 @@
 
 #include <linux/component.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <media/v4l2-ioctl.h>
 #include <media/videobuf2-core.h>
diff --git a/drivers/media/platform/vimc/vimc-debayer.c b/drivers/media/platform/vimc/vimc-debayer.c
index 6e10b63ba9ec..77887f66f323 100644
--- a/drivers/media/platform/vimc/vimc-debayer.c
+++ b/drivers/media/platform/vimc/vimc-debayer.c
@@ -17,6 +17,7 @@
 
 #include <linux/component.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 #include <linux/v4l2-mediabus.h>
diff --git a/drivers/media/platform/vimc/vimc-scaler.c b/drivers/media/platform/vimc/vimc-scaler.c
index e583ec7a91da..b0952ee86296 100644
--- a/drivers/media/platform/vimc/vimc-scaler.c
+++ b/drivers/media/platform/vimc/vimc-scaler.c
@@ -17,6 +17,7 @@
 
 #include <linux/component.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 #include <linux/v4l2-mediabus.h>
diff --git a/drivers/media/platform/vimc/vimc-sensor.c b/drivers/media/platform/vimc/vimc-sensor.c
index 605e2a2d5dd5..b2b89315e7ba 100644
--- a/drivers/media/platform/vimc/vimc-sensor.c
+++ b/drivers/media/platform/vimc/vimc-sensor.c
@@ -19,6 +19,7 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/v4l2-mediabus.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/memory/tegra/tegra186.c b/drivers/memory/tegra/tegra186.c
index 7254fb596979..ffda903c49bb 100644
--- a/drivers/memory/tegra/tegra186.c
+++ b/drivers/memory/tegra/tegra186.c
@@ -8,6 +8,7 @@
 
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <dt-bindings/memory/tegra186-mc.h>
diff --git a/drivers/mfd/atmel-hlcdc.c b/drivers/mfd/atmel-hlcdc.c
index 4b15b0840f16..e82543bcfdc8 100644
--- a/drivers/mfd/atmel-hlcdc.c
+++ b/drivers/mfd/atmel-hlcdc.c
@@ -22,6 +22,7 @@
 #include <linux/mfd/atmel-hlcdc.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 306e1fd109bd..27af62ed480a 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -20,6 +20,7 @@
 #include <linux/fs.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c b/drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c
index 5c271077ac87..489af7bc005a 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmstb_nand.c
@@ -13,6 +13,7 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include "brcmnand.h"
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 2c63afff1382..13741ee49b9b 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -14,6 +14,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/kernel.h>
 #include <linux/circ_buf.h>
 #include <linux/interrupt.h>
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index aecc76504b69..a1197d3adbe0 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -29,6 +29,7 @@
 #include <linux/io.h>
 #include <linux/mii.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/netdevice.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/net/wireless/ath/ath9k/ahb.c b/drivers/net/wireless/ath/ath9k/ahb.c
index 2bd982c3a479..63019c3de034 100644
--- a/drivers/net/wireless/ath/ath9k/ahb.c
+++ b/drivers/net/wireless/ath/ath9k/ahb.c
@@ -19,6 +19,7 @@
 #include <linux/nl80211.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include "ath9k.h"
 
 static const struct platform_device_id ath9k_platform_id_table[] = {
diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c
index 22009e14a8fc..4a4f797bb10f 100644
--- a/drivers/net/wireless/ti/wl12xx/main.c
+++ b/drivers/net/wireless/ti/wl12xx/main.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <linux/err.h>
diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c
index ca0f936fc119..496b9b63cea1 100644
--- a/drivers/net/wireless/ti/wl18xx/main.c
+++ b/drivers/net/wireless/ti/wl18xx/main.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/ip.h>
 #include <linux/firmware.h>
diff --git a/drivers/nvmem/lpc18xx_eeprom.c b/drivers/nvmem/lpc18xx_eeprom.c
index b1af966206a6..a9534a6e8636 100644
--- a/drivers/nvmem/lpc18xx_eeprom.c
+++ b/drivers/nvmem/lpc18xx_eeprom.c
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
diff --git a/drivers/nvmem/mtk-efuse.c b/drivers/nvmem/mtk-efuse.c
index e66adf17a747..58c998b2e3bc 100644
--- a/drivers/nvmem/mtk-efuse.c
+++ b/drivers/nvmem/mtk-efuse.c
@@ -14,6 +14,7 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/io.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
diff --git a/drivers/nvmem/qfprom.c b/drivers/nvmem/qfprom.c
index 4f650baad983..fbb1f1df6fc7 100644
--- a/drivers/nvmem/qfprom.c
+++ b/drivers/nvmem/qfprom.c
@@ -13,6 +13,7 @@
 
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/io.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
diff --git a/drivers/nvmem/uniphier-efuse.c b/drivers/nvmem/uniphier-efuse.c
index 271f0b2ff86a..286910336ef6 100644
--- a/drivers/nvmem/uniphier-efuse.c
+++ b/drivers/nvmem/uniphier-efuse.c
@@ -16,6 +16,7 @@
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/nvmem-provider.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index b416ee18e6bb..860f5bd8dce8 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c
index d9357054d41d..acea41073dfd 100644
--- a/drivers/pinctrl/intel/pinctrl-merrifield.c
+++ b/drivers/pinctrl/intel/pinctrl-merrifield.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pinctrl/pinconf.h>
 #include <linux/pinctrl/pinconf-generic.h>
diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c
index 9cc80a500880..2b1a61dba224 100644
--- a/drivers/pinctrl/pinctrl-u300.c
+++ b/drivers/pinctrl/pinctrl-u300.c
@@ -13,6 +13,7 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/slab.h>
diff --git a/drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c b/drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c
index 3cdad8bc8f93..5702b6704137 100644
--- a/drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c
+++ b/drivers/pinctrl/sprd/pinctrl-sprd-sc9860.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include "pinctrl-sprd.h"
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
index 58825f68b58b..c93752cd51c5 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c
index 9f449b35e300..130ce2484705 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld20.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
index 0b10ebc07eb8..5a20121d2c93 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld4.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
index 8e4d45fea885..76c57b0d5d88 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld6b.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
index 24788a74c254..4a0027373247 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
index d5d5e579cb08..9a55972e9c16 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro5.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
index 032619ad0e73..544f4b465308 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs2.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c
index 535bb2e935e4..9a15998b4f73 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pxs3.c
@@ -15,6 +15,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
index 0f921a653164..0402077d3b3a 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-sld8.c
@@ -15,6 +15,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c
index 4866a8b12e98..2da567540c2d 100644
--- a/drivers/platform/goldfish/goldfish_pipe.c
+++ b/drivers/platform/goldfish/goldfish_pipe.c
@@ -48,6 +48,7 @@
 
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
diff --git a/drivers/platform/x86/intel_bxtwc_tmu.c b/drivers/platform/x86/intel_bxtwc_tmu.c
index ea865d4ca220..227943a20212 100644
--- a/drivers/platform/x86/intel_bxtwc_tmu.c
+++ b/drivers/platform/x86/intel_bxtwc_tmu.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/intel_soc_pmic.h>
diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c
index cb0237143dbe..1360a7fa542c 100644
--- a/drivers/power/avs/smartreflex.c
+++ b/drivers/power/avs/smartreflex.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/interrupt.h>
 #include <linux/clk.h>
 #include <linux/io.h>
diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c
index bfcd6fba6363..6b911b6b10a6 100644
--- a/drivers/power/reset/ltc2952-poweroff.c
+++ b/drivers/power/reset/ltc2952-poweroff.c
@@ -62,6 +62,7 @@
 #include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/gpio/consumer.h>
 #include <linux/reboot.h>
 
diff --git a/drivers/power/supply/max8998_charger.c b/drivers/power/supply/max8998_charger.c
index b64cf0f14142..cad7d1a8feec 100644
--- a/drivers/power/supply/max8998_charger.c
+++ b/drivers/power/supply/max8998_charger.c
@@ -21,6 +21,7 @@
 
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
diff --git a/drivers/power/supply/olpc_battery.c b/drivers/power/supply/olpc_battery.c
index 3bc2eea7b3b7..6da79ae14860 100644
--- a/drivers/power/supply/olpc_battery.c
+++ b/drivers/power/supply/olpc_battery.c
@@ -10,6 +10,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/err.h>
 #include <linux/device.h>
diff --git a/drivers/ptp/ptp_dte.c b/drivers/ptp/ptp_dte.c
index 6edd3b9c7f01..a7dc43368df4 100644
--- a/drivers/ptp/ptp_dte.c
+++ b/drivers/ptp/ptp_dte.c
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/types.h>
diff --git a/drivers/regulator/tps65912-regulator.c b/drivers/regulator/tps65912-regulator.c
index a4921a70da55..276faeddc370 100644
--- a/drivers/regulator/tps65912-regulator.c
+++ b/drivers/regulator/tps65912-regulator.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 
diff --git a/drivers/reset/reset-ath79.c b/drivers/reset/reset-ath79.c
index 2674880e5492..a7455916e396 100644
--- a/drivers/reset/reset-ath79.c
+++ b/drivers/reset/reset-ath79.c
@@ -17,6 +17,7 @@
 
 #include <linux/io.h>
 #include <linux/init.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/reboot.h>
diff --git a/drivers/reset/reset-axs10x.c b/drivers/reset/reset-axs10x.c
index afb298e46bd9..a854ef41364d 100644
--- a/drivers/reset/reset-axs10x.c
+++ b/drivers/reset/reset-axs10x.c
@@ -10,6 +10,7 @@
 
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 
diff --git a/drivers/reset/reset-imx7.c b/drivers/reset/reset-imx7.c
index 4db177bc89bc..14bc78d28707 100644
--- a/drivers/reset/reset-imx7.c
+++ b/drivers/reset/reset-imx7.c
@@ -16,6 +16,7 @@
  */
 
 #include <linux/mfd/syscon.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/regmap.h>
diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
index 2fc517498a5d..fc5cf5c44ae7 100644
--- a/drivers/rtc/rtc-coh901331.c
+++ b/drivers/rtc/rtc-coh901331.c
@@ -8,6 +8,7 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/rtc.h>
 #include <linux/clk.h>
 #include <linux/interrupt.h>
diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
index a8856f2b9bc2..6b477174a82f 100644
--- a/drivers/rtc/rtc-cpcap.c
+++ b/drivers/rtc/rtc-cpcap.c
@@ -24,6 +24,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
diff --git a/drivers/rtc/rtc-ftrtc010.c b/drivers/rtc/rtc-ftrtc010.c
index 61f798c6101f..8f1dd88fa827 100644
--- a/drivers/rtc/rtc-ftrtc010.c
+++ b/drivers/rtc/rtc-ftrtc010.c
@@ -26,6 +26,7 @@
 #include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/clk.h>
 
 #define DRV_NAME        "rtc-ftrtc010"
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 1f892b238ddb..0fa33708fc49 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -13,6 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/rtc.h>
 
diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c
index c75f26dc8fcc..007879a5042d 100644
--- a/drivers/rtc/rtc-mxc_v2.c
+++ b/drivers/rtc/rtc-mxc_v2.c
@@ -8,6 +8,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
diff --git a/drivers/rtc/rtc-r7301.c b/drivers/rtc/rtc-r7301.c
index 169704b2ce13..1943c8151152 100644
--- a/drivers/rtc/rtc-r7301.c
+++ b/drivers/rtc/rtc-r7301.c
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/delay.h>
 #include <linux/regmap.h>
 #include <linux/platform_device.h>
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 4f98543d1ea5..776b70a14e03 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -15,6 +15,7 @@
  * for more details.
  */
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/kernel.h>
 #include <linux/bcd.h>
 #include <linux/rtc.h>
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
index 66efff60c4d5..8dc48fe7fc35 100644
--- a/drivers/rtc/rtc-tegra.c
+++ b/drivers/rtc/rtc-tegra.c
@@ -25,6 +25,7 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/rtc.h>
diff --git a/drivers/siox/siox-bus-gpio.c b/drivers/siox/siox-bus-gpio.c
index ea7ef982968b..46b4cda36bac 100644
--- a/drivers/siox/siox-bus-gpio.c
+++ b/drivers/siox/siox-bus-gpio.c
@@ -5,6 +5,7 @@
 
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <linux/delay.h>
diff --git a/drivers/tty/goldfish.c b/drivers/tty/goldfish.c
index 37caba7c3aff..c8c5cdfc5e19 100644
--- a/drivers/tty/goldfish.c
+++ b/drivers/tty/goldfish.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/goldfish.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/tty/serial/8250/8250_em.c b/drivers/tty/serial/8250/8250_em.c
index f6a86f2bc4e5..2a76e22d2ec0 100644
--- a/drivers/tty/serial/8250/8250_em.c
+++ b/drivers/tty/serial/8250/8250_em.c
@@ -8,6 +8,7 @@
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/serial_8250.h>
 #include <linux/serial_reg.h>
 #include <linux/platform_device.h>
diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c
index d6ae3086c2a2..339befdd2f4d 100644
--- a/drivers/tty/serial/sccnxp.c
+++ b/drivers/tty/serial/sccnxp.c
@@ -14,6 +14,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/device.h>
 #include <linux/console.h>
 #include <linux/serial_core.h>
diff --git a/drivers/usb/gadget/udc/fsl_mxc_udc.c b/drivers/usb/gadget/udc/fsl_mxc_udc.c
index f29cf5c6160c..5a321992decc 100644
--- a/drivers/usb/gadget/udc/fsl_mxc_udc.c
+++ b/drivers/usb/gadget/udc/fsl_mxc_udc.c
@@ -11,6 +11,7 @@
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/fsl_devices.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
index 80dc47347e21..3079a3df8c37 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tpd12s015.c
@@ -12,6 +12,7 @@
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/gpio/consumer.h>
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
index 8851d441e5fd..50b46c4399ea 100644
--- a/drivers/w1/masters/mxc_w1.c
+++ b/drivers/w1/masters/mxc_w1.c
@@ -17,6 +17,7 @@
 #include <linux/io.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 
 #include <linux/w1.h>
diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c
index e3a78f927f83..f29d1edc5bad 100644
--- a/drivers/watchdog/coh901327_wdt.c
+++ b/drivers/watchdog/coh901327_wdt.c
@@ -7,6 +7,7 @@
  * Author: Linus Walleij <linus.walleij@stericsson.com>
  */
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 #include <linux/interrupt.h>
diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index 6c6594261cb7..ebb85d60b6d5 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -13,6 +13,7 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/watchdog.h>
diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c
index 6ed39dee995f..a3134ffa59f8 100644
--- a/drivers/watchdog/imgpdc_wdt.c
+++ b/drivers/watchdog/imgpdc_wdt.c
@@ -44,6 +44,7 @@
 #include <linux/io.h>
 #include <linux/log2.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/watchdog.h>
diff --git a/drivers/watchdog/max63xx_wdt.c b/drivers/watchdog/max63xx_wdt.c
index ac5840d9689a..bf6a068245ba 100644
--- a/drivers/watchdog/max63xx_wdt.c
+++ b/drivers/watchdog/max63xx_wdt.c
@@ -17,6 +17,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/watchdog.h>
diff --git a/drivers/watchdog/max77620_wdt.c b/drivers/watchdog/max77620_wdt.c
index 2c9f53eaff4f..70c9cd3ba938 100644
--- a/drivers/watchdog/max77620_wdt.c
+++ b/drivers/watchdog/max77620_wdt.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/mfd/max77620.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
diff --git a/drivers/watchdog/moxart_wdt.c b/drivers/watchdog/moxart_wdt.c
index 2c4a73d1e214..430c3ab84c07 100644
--- a/drivers/watchdog/moxart_wdt.c
+++ b/drivers/watchdog/moxart_wdt.c
@@ -13,6 +13,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index ae77112ce97f..cbd752f9ac56 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -29,6 +29,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/drivers/watchdog/tangox_wdt.c b/drivers/watchdog/tangox_wdt.c
index b1de8297fa40..d0b53f3c0d17 100644
--- a/drivers/watchdog/tangox_wdt.c
+++ b/drivers/watchdog/tangox_wdt.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 3097c943fab9..1a9f38f27f65 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -12,13 +12,13 @@
 #define _PLATFORM_DEVICE_H_
 
 #include <linux/device.h>
-#include <linux/mod_devicetable.h>
 
 #define PLATFORM_DEVID_NONE	(-1)
 #define PLATFORM_DEVID_AUTO	(-2)
 
 struct mfd_cell;
 struct property_entry;
+struct platform_device_id;
 
 struct platform_device {
 	const char	*name;
-- 
cgit v1.2.3


From 0ea488ff8d23c93da383fcf424825c298b13b1fb Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 5 Jul 2018 08:50:15 -0700
Subject: bpf: sockmap, convert bpf_compute_data_pointers to bpf_*_sk_skb

In commit

  'bpf: bpf_compute_data uses incorrect cb structure' (8108a7751512)

we added the routine bpf_compute_data_end_sk_skb() to compute the
correct data_end values, but this has since been lost. In kernel
v4.14 this was correct and the above patch was applied in it
entirety. Then when v4.14 was merged into v4.15-rc1 net-next tree
we lost the piece that renamed bpf_compute_data_pointers to the
new function bpf_compute_data_end_sk_skb. This was done here,

e1ea2f9856b7 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")

When it conflicted with the following rename patch,

6aaae2b6c433 ("bpf: rename bpf_compute_data_end into bpf_compute_data_pointers")

Finally, after a refactor I thought even the function
bpf_compute_data_end_sk_skb() was no longer needed and it was
erroneously removed.

However, we never reverted the sk_skb_convert_ctx_access() usage of
tcp_skb_cb which had been committed and survived the merge conflict.
Here we fix this by adding back the helper and *_data_end_sk_skb()
usage. Using the bpf_skc_data_end mapping is not correct because it
expects a qdisc_skb_cb object but at the sock layer this is not the
case. Even though it happens to work here because we don't overwrite
any data in-use at the socket layer and the cb structure is cleared
later this has potential to create some subtle issues. But, even
more concretely the filter.c access check uses tcp_skb_cb.

And by some act of chance though,

struct bpf_skb_data_end {
        struct qdisc_skb_cb        qdisc_cb;             /*     0    28 */

        /* XXX 4 bytes hole, try to pack */

        void *                     data_meta;            /*    32     8 */
        void *                     data_end;             /*    40     8 */

        /* size: 48, cachelines: 1, members: 3 */
        /* sum members: 44, holes: 1, sum holes: 4 */
        /* last cacheline: 48 bytes */
};

and then tcp_skb_cb,

struct tcp_skb_cb {
	[...]
                struct {
                        __u32      flags;                /*    24     4 */
                        struct sock * sk_redir;          /*    32     8 */
                        void *     data_end;             /*    40     8 */
                } bpf;                                   /*          24 */
        };

So when we use offset_of() to track down the byte offset we get 40 in
either case and everything continues to work. Fix this mess and use
correct structures its unclear how long this might actually work for
until someone moves the structs around.

Reported-by: Martin KaFai Lau <kafai@fb.com>
Fixes: e1ea2f9856b7 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")
Fixes: 6aaae2b6c433 ("bpf: rename bpf_compute_data_end into bpf_compute_data_pointers")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/tcp.h    |  4 +++
 kernel/bpf/sockmap.c |  4 +--
 net/core/filter.c    | 98 ++++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 97 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 800582b5dd54..af3ec72d5d41 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -828,6 +828,10 @@ struct tcp_skb_cb {
 
 #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
+static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
+}
 
 #if IS_ENABLED(CONFIG_IPV6)
 /* This is the variant of inet6_iif() that must be used by TCP,
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index dfc8a8a07c1f..98fb7938beea 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1236,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
 	skb->sk = psock->sock;
-	bpf_compute_data_pointers(skb);
+	bpf_compute_data_end_sk_skb(skb);
 	preempt_disable();
 	rc = (*prog->bpf_func)(skb, prog->insnsi);
 	preempt_enable();
@@ -1491,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp,
 	 * any socket yet.
 	 */
 	skb->sk = psock->sock;
-	bpf_compute_data_pointers(skb);
+	bpf_compute_data_end_sk_skb(skb);
 	rc = (*prog->bpf_func)(skb, prog->insnsi);
 	skb->sk = NULL;
 	rcu_read_unlock();
diff --git a/net/core/filter.c b/net/core/filter.c
index 3095f1ba7015..470268024a40 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1762,6 +1762,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static inline int sk_skb_try_make_writable(struct sk_buff *skb,
+					   unsigned int write_len)
+{
+	int err = __bpf_try_make_writable(skb, write_len);
+
+	bpf_compute_data_end_sk_skb(skb);
+	return err;
+}
+
+BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+	/* Idea is the following: should the needed direct read/write
+	 * test fail during runtime, we can pull in more data and redo
+	 * again, since implicitly, we invalidate previous checks here.
+	 *
+	 * Or, since we know how much we need to make read/writeable,
+	 * this can be done once at the program beginning for direct
+	 * access case. By this we overcome limitations of only current
+	 * headroom being accessible.
+	 */
+	return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto sk_skb_pull_data_proto = {
+	.func		= sk_skb_pull_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
 	   u64, from, u64, to, u64, flags)
 {
@@ -2864,8 +2895,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
 	return __skb_trim_rcsum(skb, new_len);
 }
 
-BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
-	   u64, flags)
+static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
+					u64 flags)
 {
 	u32 max_len = __bpf_skb_max_len(skb);
 	u32 min_len = __bpf_skb_min_len(skb);
@@ -2901,6 +2932,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
 		if (!ret && skb_is_gso(skb))
 			skb_gso_reset(skb);
 	}
+	return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+	   u64, flags)
+{
+	int ret = __bpf_skb_change_tail(skb, new_len, flags);
 
 	bpf_compute_data_pointers(skb);
 	return ret;
@@ -2915,8 +2953,26 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
 	   u64, flags)
+{
+	int ret = __bpf_skb_change_tail(skb, new_len, flags);
+
+	bpf_compute_data_end_sk_skb(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_tail_proto = {
+	.func		= sk_skb_change_tail,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
+					u64 flags)
 {
 	u32 max_len = __bpf_skb_max_len(skb);
 	u32 new_len = skb->len + head_room;
@@ -2942,8 +2998,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
 		skb_reset_mac_header(skb);
 	}
 
+	return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+	   u64, flags)
+{
+	int ret = __bpf_skb_change_head(skb, head_room, flags);
+
 	bpf_compute_data_pointers(skb);
-	return 0;
+	return ret;
 }
 
 static const struct bpf_func_proto bpf_skb_change_head_proto = {
@@ -2955,6 +3019,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
+	   u64, flags)
+{
+	int ret = __bpf_skb_change_head(skb, head_room, flags);
+
+	bpf_compute_data_end_sk_skb(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_head_proto = {
+	.func		= sk_skb_change_head,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
 {
 	return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -4618,9 +4699,12 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_skb_store_bytes ||
 	    func == bpf_skb_change_proto ||
 	    func == bpf_skb_change_head ||
+	    func == sk_skb_change_head ||
 	    func == bpf_skb_change_tail ||
+	    func == sk_skb_change_tail ||
 	    func == bpf_skb_adjust_room ||
 	    func == bpf_skb_pull_data ||
+	    func == sk_skb_pull_data ||
 	    func == bpf_clone_redirect ||
 	    func == bpf_l3_csum_replace ||
 	    func == bpf_l4_csum_replace ||
@@ -4872,11 +4956,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_skb_load_bytes:
 		return &bpf_skb_load_bytes_proto;
 	case BPF_FUNC_skb_pull_data:
-		return &bpf_skb_pull_data_proto;
+		return &sk_skb_pull_data_proto;
 	case BPF_FUNC_skb_change_tail:
-		return &bpf_skb_change_tail_proto;
+		return &sk_skb_change_tail_proto;
 	case BPF_FUNC_skb_change_head:
-		return &bpf_skb_change_head_proto;
+		return &sk_skb_change_head_proto;
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_proto;
 	case BPF_FUNC_get_socket_uid:
-- 
cgit v1.2.3


From d8d7218ad842e18fc6976b87c08ed749e8d56313 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 6 Jul 2018 11:49:00 +0900
Subject: xdp: XDP_REDIRECT should check IFF_UP and MTU

Otherwise we end up with attempting to send packets from down devices
or to send oversized packets, which may cause unexpected driver/device
behaviour. Generic XDP has already done this check, so reuse the logic
in native XDP.

Fixes: 814abfabef3c ("xdp: add bpf_redirect helper function")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 6 +++---
 kernel/bpf/devmap.c    | 7 ++++++-
 net/core/filter.c      | 9 +++++++--
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 300baad62c88..c73dd7396886 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -765,8 +765,8 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
-static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
-					   struct net_device *fwd)
+static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
+				 unsigned int pktlen)
 {
 	unsigned int len;
 
@@ -774,7 +774,7 @@ static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
 		return -ENETDOWN;
 
 	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
-	if (skb->len > len)
+	if (pktlen > len)
 		return -EMSGSIZE;
 
 	return 0;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 642c97f6d1b8..d361fc1e3bf3 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -334,10 +334,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 {
 	struct net_device *dev = dst->dev;
 	struct xdp_frame *xdpf;
+	int err;
 
 	if (!dev->netdev_ops->ndo_xdp_xmit)
 		return -EOPNOTSUPP;
 
+	err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+	if (unlikely(err))
+		return err;
+
 	xdpf = convert_to_xdp_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
@@ -350,7 +355,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 {
 	int err;
 
-	err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
+	err = xdp_ok_fwd_dev(dst->dev, skb->len);
 	if (unlikely(err))
 		return err;
 	skb->dev = dst->dev;
diff --git a/net/core/filter.c b/net/core/filter.c
index 470268024a40..5fa66a33927f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3128,12 +3128,16 @@ static int __bpf_tx_xdp(struct net_device *dev,
 			u32 index)
 {
 	struct xdp_frame *xdpf;
-	int sent;
+	int err, sent;
 
 	if (!dev->netdev_ops->ndo_xdp_xmit) {
 		return -EOPNOTSUPP;
 	}
 
+	err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+	if (unlikely(err))
+		return err;
+
 	xdpf = convert_to_xdp_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
@@ -3367,7 +3371,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 		goto err;
 	}
 
-	if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+	err = xdp_ok_fwd_dev(fwd, skb->len);
+	if (unlikely(err))
 		goto err;
 
 	skb->dev = fwd;
-- 
cgit v1.2.3


From b233504033dbd65740e59681820ccfd0a2a8ec53 Mon Sep 17 00:00:00 2001
From: Yifeng Sun <pkusunyifeng@gmail.com>
Date: Mon, 2 Jul 2018 08:18:03 -0700
Subject: openvswitch: kernel datapath clone action

Add 'clone' action to kernel datapath by using existing functions.
When actions within clone don't modify the current flow, the flow
key is not cloned before executing clone actions.

This is a follow up patch for this incomplete work:
https://patchwork.ozlabs.org/patch/722096/

v1 -> v2:
Refactor as advised by reviewer.

Signed-off-by: Yifeng Sun <pkusunyifeng@gmail.com>
Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/openvswitch.h      |  5 +++
 include/uapi/linux/openvswitch.h |  3 ++
 net/openvswitch/actions.c        | 33 ++++++++++++++++++
 net/openvswitch/flow_netlink.c   | 73 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 114 insertions(+)

(limited to 'include')

diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index e6b240b6196c..379affc63e24 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -21,4 +21,9 @@
 
 #include <uapi/linux/openvswitch.h>
 
+#define OVS_CLONE_ATTR_EXEC      0   /* Specify an u32 value. When nonzero,
+				      * actions in clone will not change flow
+				      * keys. False otherwise.
+				      */
+
 #endif /* _LINUX_OPENVSWITCH_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 863aabaa5cc9..dbe0cbe4f1b7 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -840,6 +840,8 @@ struct ovs_action_push_eth {
  * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
  * @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the
  * packet, or modify the packet (e.g., change the DSCP field).
+ * @OVS_ACTION_ATTR_CLONE: make a copy of the packet and execute a list of
+ * actions without affecting the original packet and key.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -873,6 +875,7 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_PUSH_NSH,     /* Nested OVS_NSH_KEY_ATTR_*. */
 	OVS_ACTION_ATTR_POP_NSH,      /* No argument. */
 	OVS_ACTION_ATTR_METER,        /* u32 meter ID. */
+	OVS_ACTION_ATTR_CLONE,        /* Nested OVS_CLONE_ATTR_*.  */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 30a5df27116e..85ae53d8fd09 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1057,6 +1057,28 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 			     clone_flow_key);
 }
 
+/* When 'last' is true, clone() should always consume the 'skb'.
+ * Otherwise, clone() should keep 'skb' intact regardless what
+ * actions are executed within clone().
+ */
+static int clone(struct datapath *dp, struct sk_buff *skb,
+		 struct sw_flow_key *key, const struct nlattr *attr,
+		 bool last)
+{
+	struct nlattr *actions;
+	struct nlattr *clone_arg;
+	int rem = nla_len(attr);
+	bool dont_clone_flow_key;
+
+	/* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+	clone_arg = nla_data(attr);
+	dont_clone_flow_key = nla_get_u32(clone_arg);
+	actions = nla_next(clone_arg, &rem);
+
+	return clone_execute(dp, skb, key, 0, actions, rem, last,
+			     !dont_clone_flow_key);
+}
+
 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 			 const struct nlattr *attr)
 {
@@ -1336,6 +1358,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 				consume_skb(skb);
 				return 0;
 			}
+			break;
+
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = clone(dp, skb, key, a, last);
+			if (last)
+				return err;
+
+			break;
+		}
 		}
 
 		if (unlikely(err)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 391c4073a6dc..a70097ecf33c 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2460,6 +2460,40 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	return 0;
 }
 
+static int validate_and_copy_clone(struct net *net,
+				   const struct nlattr *attr,
+				   const struct sw_flow_key *key,
+				   struct sw_flow_actions **sfa,
+				   __be16 eth_type, __be16 vlan_tci,
+				   bool log, bool last)
+{
+	int start, err;
+	u32 exec;
+
+	if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
+		return -EINVAL;
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
+	if (start < 0)
+		return start;
+
+	exec = last || !actions_may_change_flow(attr);
+
+	err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
+				 sizeof(exec), log);
+	if (err)
+		return err;
+
+	err = __ovs_nla_copy_actions(net, attr, key, sfa,
+				     eth_type, vlan_tci, log);
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, start);
+
+	return 0;
+}
+
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key,
 		    bool reset_key,
@@ -2849,6 +2883,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
 			[OVS_ACTION_ATTR_POP_NSH] = 0,
 			[OVS_ACTION_ATTR_METER] = sizeof(u32),
+			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -3038,6 +3073,18 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			/* Non-existent meters are simply ignored.  */
 			break;
 
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = validate_and_copy_clone(net, a, key, sfa,
+						      eth_type, vlan_tci,
+						      log, last);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+		}
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
@@ -3116,6 +3163,26 @@ out:
 	return err;
 }
 
+static int clone_action_to_attr(const struct nlattr *attr,
+				struct sk_buff *skb)
+{
+	struct nlattr *start;
+	int err = 0, rem = nla_len(attr);
+
+	start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
+	if (!start)
+		return -EMSGSIZE;
+
+	err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+
+	if (err)
+		nla_nest_cancel(skb, start);
+	else
+		nla_nest_end(skb, start);
+
+	return err;
+}
+
 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -3204,6 +3271,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_CLONE:
+			err = clone_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		default:
 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
 				return -EMSGSIZE;
-- 
cgit v1.2.3


From eec94fdb04806790c7b7e6ea347820064cc6d467 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:23 +0300
Subject: net: sched: use rcu for action cookie update

Implement functions to atomically update and free action cookie
using rcu mechanism.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  2 +-
 include/net/pkt_cls.h |  1 +
 net/sched/act_api.c   | 44 ++++++++++++++++++++++++++++++--------------
 3 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 5ff11adbe2a6..ffc3ef321776 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -37,7 +37,7 @@ struct tc_action {
 	spinlock_t			tcfa_lock;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
 	struct gnet_stats_queue __percpu *cpu_qstats;
-	struct tc_cookie	*act_cookie;
+	struct tc_cookie	__rcu *act_cookie;
 	struct tcf_chain	*goto_chain;
 };
 #define tcf_index	common.tcfa_index
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6641584b27f1..2081e4219f81 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -781,6 +781,7 @@ struct tc_mqprio_qopt_offload {
 struct tc_cookie {
 	u8  *data;
 	u32 len;
+	struct rcu_head rcu;
 };
 
 struct tc_qopt_offload_stats {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3f4cf930f809..02670c7489e3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -55,6 +55,24 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
 
+static void tcf_free_cookie_rcu(struct rcu_head *p)
+{
+	struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu);
+
+	kfree(cookie->data);
+	kfree(cookie);
+}
+
+static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
+				  struct tc_cookie *new_cookie)
+{
+	struct tc_cookie *old;
+
+	old = xchg(old_cookie, new_cookie);
+	if (old)
+		call_rcu(&old->rcu, tcf_free_cookie_rcu);
+}
+
 /* XXX: For standalone actions, we don't need a RCU grace period either, because
  * actions are always connected to filters and filters are already destroyed in
  * RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -65,10 +83,7 @@ static void free_tcf(struct tc_action *p)
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_qstats);
 
-	if (p->act_cookie) {
-		kfree(p->act_cookie->data);
-		kfree(p->act_cookie);
-	}
+	tcf_set_action_cookie(&p->act_cookie, NULL);
 	if (p->goto_chain)
 		tcf_action_goto_chain_fini(p);
 
@@ -567,16 +582,22 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	int err = -EINVAL;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
+	struct tc_cookie *cookie;
 
 	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
 		goto nla_put_failure;
 	if (tcf_action_copy_stats(skb, a, 0))
 		goto nla_put_failure;
-	if (a->act_cookie) {
-		if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len,
-			    a->act_cookie->data))
+
+	rcu_read_lock();
+	cookie = rcu_dereference(a->act_cookie);
+	if (cookie) {
+		if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
+			rcu_read_unlock();
 			goto nla_put_failure;
+		}
 	}
+	rcu_read_unlock();
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
@@ -719,13 +740,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (err < 0)
 		goto err_mod;
 
-	if (name == NULL && tb[TCA_ACT_COOKIE]) {
-		if (a->act_cookie) {
-			kfree(a->act_cookie->data);
-			kfree(a->act_cookie);
-		}
-		a->act_cookie = cookie;
-	}
+	if (!name && tb[TCA_ACT_COOKIE])
+		tcf_set_action_cookie(&a->act_cookie, cookie);
 
 	/* module count goes up only when brand new policy is created
 	 * if it exists and is only bound to in a_o->init() then
-- 
cgit v1.2.3


From 036bb44327f50273e85ee4a2c9b56eebce1c0838 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:24 +0300
Subject: net: sched: change type of reference and bind counters

Change type of action reference counter to refcount_t.

Change type of action bind counter to atomic_t.
This type is used to allow decrementing bind counter without testing
for 0 result.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  5 +++--
 net/sched/act_api.c        | 32 ++++++++++++++++++++++----------
 net/sched/act_bpf.c        |  4 ++--
 net/sched/act_connmark.c   |  4 ++--
 net/sched/act_csum.c       |  4 ++--
 net/sched/act_gact.c       |  4 ++--
 net/sched/act_ife.c        |  4 ++--
 net/sched/act_ipt.c        |  4 ++--
 net/sched/act_mirred.c     |  4 ++--
 net/sched/act_nat.c        |  4 ++--
 net/sched/act_pedit.c      |  4 ++--
 net/sched/act_police.c     |  4 ++--
 net/sched/act_sample.c     |  4 ++--
 net/sched/act_simple.c     |  4 ++--
 net/sched/act_skbedit.c    |  4 ++--
 net/sched/act_skbmod.c     |  4 ++--
 net/sched/act_tunnel_key.c |  4 ++--
 net/sched/act_vlan.c       |  4 ++--
 18 files changed, 57 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index ffc3ef321776..2759226527a2 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -6,6 +6,7 @@
  * Public action API for classifiers/qdiscs
 */
 
+#include <linux/refcount.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
@@ -26,8 +27,8 @@ struct tc_action {
 	struct tcf_idrinfo		*idrinfo;
 
 	u32				tcfa_index;
-	int				tcfa_refcnt;
-	int				tcfa_bindcnt;
+	refcount_t			tcfa_refcnt;
+	atomic_t			tcfa_bindcnt;
 	u32				tcfa_capab;
 	int				tcfa_action;
 	struct tcf_t			tcfa_tm;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 02670c7489e3..4f064ecab882 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -105,14 +105,26 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 
 	ASSERT_RTNL();
 
+	/* Release with strict==1 and bind==0 is only called through act API
+	 * interface (classifiers always bind). Only case when action with
+	 * positive reference count and zero bind count can exist is when it was
+	 * also created with act API (unbinding last classifier will destroy the
+	 * action if it was created by classifier). So only case when bind count
+	 * can be changed after initial check is when unbound action is
+	 * destroyed by act API while classifier binds to action with same id
+	 * concurrently. This result either creation of new action(same behavior
+	 * as before), or reusing existing action if concurrent process
+	 * increments reference count before action is deleted. Both scenarios
+	 * are acceptable.
+	 */
 	if (p) {
 		if (bind)
-			p->tcfa_bindcnt--;
-		else if (strict && p->tcfa_bindcnt > 0)
+			atomic_dec(&p->tcfa_bindcnt);
+		else if (strict && atomic_read(&p->tcfa_bindcnt) > 0)
 			return -EPERM;
 
-		p->tcfa_refcnt--;
-		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
+		if (atomic_read(&p->tcfa_bindcnt) <= 0 &&
+		    refcount_dec_and_test(&p->tcfa_refcnt)) {
 			if (p->ops->cleanup)
 				p->ops->cleanup(p);
 			tcf_idr_remove(p->idrinfo, p);
@@ -304,8 +316,8 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 
 	if (index && p) {
 		if (bind)
-			p->tcfa_bindcnt++;
-		p->tcfa_refcnt++;
+			atomic_inc(&p->tcfa_bindcnt);
+		refcount_inc(&p->tcfa_refcnt);
 		*a = p;
 		return true;
 	}
@@ -324,9 +336,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 
 	if (unlikely(!p))
 		return -ENOMEM;
-	p->tcfa_refcnt = 1;
+	refcount_set(&p->tcfa_refcnt, 1);
 	if (bind)
-		p->tcfa_bindcnt = 1;
+		atomic_set(&p->tcfa_bindcnt, 1);
 
 	if (cpustats) {
 		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
@@ -782,7 +794,7 @@ static void cleanup_a(struct list_head *actions, int ovr)
 		return;
 
 	list_for_each_entry(a, actions, list)
-		a->tcfa_refcnt--;
+		refcount_dec(&a->tcfa_refcnt);
 }
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
@@ -810,7 +822,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		act->order = i;
 		sz += tcf_action_fill_size(act);
 		if (ovr)
-			act->tcfa_refcnt++;
+			refcount_inc(&act->tcfa_refcnt);
 		list_add_tail(&act->list, actions);
 	}
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 18089c02e557..15a2a53cbde1 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -141,8 +141,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
 	struct tcf_bpf *prog = to_bpf(act);
 	struct tc_act_bpf opt = {
 		.index   = prog->tcf_index,
-		.refcnt  = prog->tcf_refcnt - ref,
-		.bindcnt = prog->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&prog->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&prog->tcf_bindcnt) - bind,
 		.action  = prog->tcf_action,
 	};
 	struct tcf_t tm;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e4b880fa51fe..188865034f9a 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -154,8 +154,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
 
 	struct tc_connmark opt = {
 		.index   = ci->tcf_index,
-		.refcnt  = ci->tcf_refcnt - ref,
-		.bindcnt = ci->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
 		.action  = ci->tcf_action,
 		.zone   = ci->zone,
 	};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 526a8e491626..da865f7b390a 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -597,8 +597,8 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tcf_csum_params *params;
 	struct tc_csum opt = {
 		.index   = p->tcf_index,
-		.refcnt  = p->tcf_refcnt - ref,
-		.bindcnt = p->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&p->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 4dc4f153cad8..ca83debd5a70 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -169,8 +169,8 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_gact *gact = to_gact(a);
 	struct tc_gact opt = {
 		.index   = gact->tcf_index,
-		.refcnt  = gact->tcf_refcnt - ref,
-		.bindcnt = gact->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&gact->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
 		.action  = gact->tcf_action,
 	};
 	struct tcf_t t;
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 20d7d36b2fc9..3536a23f46b5 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -596,8 +596,8 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tcf_ife_params *p = rtnl_dereference(ife->params);
 	struct tc_ife opt = {
 		.index = ife->tcf_index,
-		.refcnt = ife->tcf_refcnt - ref,
-		.bindcnt = ife->tcf_bindcnt - bind,
+		.refcnt = refcount_read(&ife->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind,
 		.action = ife->tcf_action,
 		.flags = p->flags,
 	};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 14c312d7908f..7bce88dc11c9 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -280,8 +280,8 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	if (unlikely(!t))
 		goto nla_put_failure;
 
-	c.bindcnt = ipt->tcf_bindcnt - bind;
-	c.refcnt = ipt->tcf_refcnt - ref;
+	c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
+	c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
 	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
 
 	if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fd34015331ab..82a8bdd67c47 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -250,8 +250,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tc_mirred opt = {
 		.index   = m->tcf_index,
 		.action  = m->tcf_action,
-		.refcnt  = m->tcf_refcnt - ref,
-		.bindcnt = m->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&m->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&m->tcf_bindcnt) - bind,
 		.eaction = m->tcfm_eaction,
 		.ifindex = dev ? dev->ifindex : 0,
 	};
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4b5848b6c252..457c2ae3de46 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -257,8 +257,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 
 		.index    = p->tcf_index,
 		.action   = p->tcf_action,
-		.refcnt   = p->tcf_refcnt - ref,
-		.bindcnt  = p->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&p->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&p->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index e43aef28fdac..889690e0ec39 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -409,8 +409,8 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 	opt->nkeys = p->tcfp_nkeys;
 	opt->flags = p->tcfp_flags;
 	opt->action = p->tcf_action;
-	opt->refcnt = p->tcf_refcnt - ref;
-	opt->bindcnt = p->tcf_bindcnt - bind;
+	opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
+	opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
 
 	if (p->tcfp_keys_ex) {
 		tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 4e72bc2a0dfb..a789b8060968 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -274,8 +274,8 @@ static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a,
 		.action = police->tcf_action,
 		.mtu = police->tcfp_mtu,
 		.burst = PSCHED_NS2TICKS(police->tcfp_burst),
-		.refcnt = police->tcf_refcnt - ref,
-		.bindcnt = police->tcf_bindcnt - bind,
+		.refcnt = refcount_read(&police->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&police->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 5db358497c9e..4a46978db092 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -173,8 +173,8 @@ static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tc_sample opt = {
 		.index      = s->tcf_index,
 		.action     = s->tcf_action,
-		.refcnt     = s->tcf_refcnt - ref,
-		.bindcnt    = s->tcf_bindcnt - bind,
+		.refcnt     = refcount_read(&s->tcf_refcnt) - ref,
+		.bindcnt    = atomic_read(&s->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 98c4afe7c15b..c3a761097b01 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -145,8 +145,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_defact *d = to_defact(a);
 	struct tc_defact opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
 	struct tcf_t t;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index dfaf5d8028dd..cfd20d3d2ca9 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -208,8 +208,8 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_skbedit *d = to_skbedit(a);
 	struct tc_skbedit opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
 	struct tcf_t t;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index ad050d7d4b46..ff90d720eda3 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -205,8 +205,8 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
 	struct tc_skbmod opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
 	struct tcf_t t;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index ea203e386a92..2354f07eba15 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -474,8 +474,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_tunnel_key_params *params;
 	struct tc_tunnel_key opt = {
 		.index    = t->tcf_index,
-		.refcnt   = t->tcf_refcnt - ref,
-		.bindcnt  = t->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&t->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&t->tcf_bindcnt) - bind,
 	};
 	struct tcf_t tm;
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 1fb39e1f9d07..799e3deb44ac 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -239,8 +239,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
 	struct tc_vlan opt = {
 		.index    = v->tcf_index,
-		.refcnt   = v->tcf_refcnt - ref,
-		.bindcnt  = v->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&v->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&v->tcf_bindcnt) - bind,
 		.action   = v->tcf_action,
 		.v_action = p->tcfv_action,
 	};
-- 
cgit v1.2.3


From 789871bb2a0381425b106d2a995bde1460d35a34 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:25 +0300
Subject: net: sched: implement unlocked action init API

Add additional 'rtnl_held' argument to act API init functions. It is
required to implement actions that need to release rtnl lock before loading
kernel module and reacquire if afterwards.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  6 ++++--
 net/sched/act_api.c        | 18 +++++++++++-------
 net/sched/act_bpf.c        |  3 ++-
 net/sched/act_connmark.c   |  2 +-
 net/sched/act_csum.c       |  3 ++-
 net/sched/act_gact.c       |  3 ++-
 net/sched/act_ife.c        |  3 ++-
 net/sched/act_ipt.c        |  6 ++++--
 net/sched/act_mirred.c     |  5 +++--
 net/sched/act_nat.c        |  2 +-
 net/sched/act_pedit.c      |  3 ++-
 net/sched/act_police.c     |  2 +-
 net/sched/act_sample.c     |  3 ++-
 net/sched/act_simple.c     |  3 ++-
 net/sched/act_skbedit.c    |  3 ++-
 net/sched/act_skbmod.c     |  3 ++-
 net/sched/act_tunnel_key.c |  3 ++-
 net/sched/act_vlan.c       |  3 ++-
 net/sched/cls_api.c        |  5 +++--
 19 files changed, 50 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 2759226527a2..27823f4e24c4 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -92,7 +92,8 @@ struct tc_action_ops {
 			  struct netlink_ext_ack *extack);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
-			int bind, struct netlink_ext_ack *extack);
+			int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack);
 	int     (*walk)(struct net *, struct sk_buff *,
 			struct netlink_callback *, int,
 			const struct tc_action_ops *,
@@ -168,10 +169,11 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
 		    struct list_head *actions, size_t *attr_size,
-		    struct netlink_ext_ack *extack);
+		    bool rtnl_held, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
+				    bool rtnl_held,
 				    struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 4f064ecab882..256b0c93916c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -671,6 +671,7 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
+				    bool rtnl_held,
 				    struct netlink_ext_ack *extack)
 {
 	struct tc_action *a;
@@ -721,9 +722,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	a_o = tc_lookup_action_n(act_name);
 	if (a_o == NULL) {
 #ifdef CONFIG_MODULES
-		rtnl_unlock();
+		if (rtnl_held)
+			rtnl_unlock();
 		request_module("act_%s", act_name);
-		rtnl_lock();
+		if (rtnl_held)
+			rtnl_lock();
 
 		a_o = tc_lookup_action_n(act_name);
 
@@ -746,9 +749,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	/* backward compatibility for policer */
 	if (name == NULL)
 		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-				extack);
+				rtnl_held, extack);
 	else
-		err = a_o->init(net, nla, est, &a, ovr, bind, extack);
+		err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
+				extack);
 	if (err < 0)
 		goto err_mod;
 
@@ -800,7 +804,7 @@ static void cleanup_a(struct list_head *actions, int ovr)
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
 		    struct list_head *actions, size_t *attr_size,
-		    struct netlink_ext_ack *extack)
+		    bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -814,7 +818,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
 		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-					extack);
+					rtnl_held, extack);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
@@ -1173,7 +1177,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	LIST_HEAD(actions);
 
 	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
-			      &attr_size, extack);
+			      &attr_size, true, extack);
 	if (ret)
 		return ret;
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 15a2a53cbde1..8ebf40a3506c 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -276,7 +276,8 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act,
-			int replace, int bind, struct netlink_ext_ack *extack)
+			int replace, int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 188865034f9a..e3787aa0025a 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,7 +96,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			     struct nlattr *est, struct tc_action **a,
-			     int ovr, int bind,
+			     int ovr, int bind, bool rtnl_held,
 			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index da865f7b390a..334261943f9f 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,8 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a, int ovr,
-			 int bind, struct netlink_ext_ack *extack)
+			 int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 	struct tcf_csum_params *params_old, *params_new;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ca83debd5a70..b4dfb2b4addc 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -56,7 +56,8 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 	struct nlattr *tb[TCA_GACT_MAX + 1];
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3536a23f46b5..576ffbba61c3 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -448,7 +448,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a,
-			int ovr, int bind, struct netlink_ext_ack *extack)
+			int ovr, int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 7bce88dc11c9..9c21663a86a6 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -196,7 +196,8 @@ err1:
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
-			int bind, struct netlink_ext_ack *extack)
+			int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
 			      bind);
@@ -204,7 +205,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
-		       int bind, struct netlink_ext_ack *extack)
+		       int bind, bool unlocked,
+		       struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
 			      bind);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 82a8bdd67c47..5434f08f2eb7 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -68,8 +68,9 @@ static unsigned int mirred_net_id;
 static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
-			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind, struct netlink_ext_ack *extack)
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 457c2ae3de46..e6487ad1e4a8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -38,7 +38,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action **a, int ovr, int bind,
-			struct netlink_ext_ack *extack)
+			bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 889690e0ec39..f7965f35585b 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -132,7 +132,8 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
-			  int ovr, int bind, struct netlink_ext_ack *extack)
+			  int ovr, int bind, bool rtnl_held,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index a789b8060968..0e1c2fb0ebea 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -75,7 +75,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 			       struct nlattr *est, struct tc_action **a,
-			       int ovr, int bind,
+			       int ovr, int bind, bool rtnl_held,
 			       struct netlink_ext_ack *extack)
 {
 	int ret = 0, err;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 4a46978db092..316fc645595d 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,8 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind, struct netlink_ext_ack *extack)
+			   int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 	struct nlattr *tb[TCA_SAMPLE_MAX + 1];
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index c3a761097b01..dc591cc87f4a 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -79,7 +79,8 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cfd20d3d2ca9..c4ae4bd830aa 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -94,7 +94,8 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct nlattr *est, struct tc_action **a,
-			    int ovr, int bind, struct netlink_ext_ack *extack)
+			    int ovr, int bind, bool rtnl_held,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index ff90d720eda3..026d6f58eda1 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -84,7 +84,8 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind, struct netlink_ext_ack *extack)
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 2354f07eba15..15ea5ce0f9ed 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -201,7 +201,8 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind, struct netlink_ext_ack *extack)
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 799e3deb44ac..c61775250722 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -109,7 +109,8 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bbf8dda96b0e..ebc2b9dd783f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1632,7 +1632,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND, extack);
+						TCA_ACT_BIND, true, extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -1645,7 +1645,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions, &attr_size, extack);
+					      &actions, &attr_size, true,
+					      extack);
 			if (err)
 				return err;
 			list_for_each_entry(act, &actions, list)
-- 
cgit v1.2.3


From 2a2ea349704fffade9526d5122299edbbfd122ca Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:27 +0300
Subject: net: sched: implement action API that deletes action by index

Implement new action API function that atomically finds and deletes action
from idr by index. Intended to be used by lockless actions that do not rely
on rtnl lock.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  1 +
 net/sched/act_api.c   | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 27823f4e24c4..a8eaae67c264 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -153,6 +153,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   int bind, bool cpustats);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
+int tcf_idr_delete_index(struct tc_action_net *tn, u32 index);
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
 static inline int tcf_idr_release(struct tc_action *a, bool bind)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index aa304d36fee0..0f31f09946ab 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -319,6 +319,45 @@ bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 }
 EXPORT_SYMBOL(tcf_idr_check);
 
+int tcf_idr_delete_index(struct tc_action_net *tn, u32 index)
+{
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p;
+	int ret = 0;
+
+	spin_lock(&idrinfo->lock);
+	p = idr_find(&idrinfo->action_idr, index);
+	if (!p) {
+		spin_unlock(&idrinfo->lock);
+		return -ENOENT;
+	}
+
+	if (!atomic_read(&p->tcfa_bindcnt)) {
+		if (refcount_dec_and_test(&p->tcfa_refcnt)) {
+			struct module *owner = p->ops->owner;
+
+			WARN_ON(p != idr_remove(&idrinfo->action_idr,
+						p->tcfa_index));
+			spin_unlock(&idrinfo->lock);
+
+			if (p->ops->cleanup)
+				p->ops->cleanup(p);
+
+			gen_kill_estimator(&p->tcfa_rate_est);
+			free_tcf(p);
+			module_put(owner);
+			return 0;
+		}
+		ret = 0;
+	} else {
+		ret = -EPERM;
+	}
+
+	spin_unlock(&idrinfo->lock);
+	return ret;
+}
+EXPORT_SYMBOL(tcf_idr_delete_index);
+
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   struct tc_action **a, const struct tc_action_ops *ops,
 		   int bind, bool cpustats)
-- 
cgit v1.2.3


From b409074e6693bcdaa7abbee2a035f22a9eabda53 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:28 +0300
Subject: net: sched: add 'delete' function to action ops

Extend action ops with 'delete' function. Each action type to implements
its own delete function that doesn't depend on rtnl lock.

Implement delete function that is required to delete actions without
holding rtnl lock. Use action API function that atomically deletes action
only if it is still in action idr. This implementation prevents concurrent
threads from deleting same action twice.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  1 +
 net/sched/act_bpf.c        |  8 ++++++++
 net/sched/act_connmark.c   |  8 ++++++++
 net/sched/act_csum.c       |  8 ++++++++
 net/sched/act_gact.c       |  8 ++++++++
 net/sched/act_ife.c        |  8 ++++++++
 net/sched/act_ipt.c        | 16 ++++++++++++++++
 net/sched/act_mirred.c     |  8 ++++++++
 net/sched/act_nat.c        |  8 ++++++++
 net/sched/act_pedit.c      |  8 ++++++++
 net/sched/act_police.c     |  8 ++++++++
 net/sched/act_sample.c     |  8 ++++++++
 net/sched/act_simple.c     |  8 ++++++++
 net/sched/act_skbedit.c    |  8 ++++++++
 net/sched/act_skbmod.c     |  8 ++++++++
 net/sched/act_tunnel_key.c |  8 ++++++++
 net/sched/act_vlan.c       |  8 ++++++++
 17 files changed, 137 insertions(+)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index a8eaae67c264..b9ed2b8256a5 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -101,6 +101,7 @@ struct tc_action_ops {
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
+	int     (*delete)(struct net *net, u32 index);
 };
 
 struct tc_action_net {
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 8ebf40a3506c..7941dd66ff83 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -388,6 +388,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_bpf_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.kind		=	"bpf",
 	.type		=	TCA_ACT_BPF,
@@ -398,6 +405,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.init		=	tcf_bpf_init,
 	.walk		=	tcf_bpf_walker,
 	.lookup		=	tcf_bpf_search,
+	.delete		=	tcf_bpf_delete,
 	.size		=	sizeof(struct tcf_bpf),
 };
 
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e3787aa0025a..143c2d3de723 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -193,6 +193,13 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_connmark_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_connmark_ops = {
 	.kind		=	"connmark",
 	.type		=	TCA_ACT_CONNMARK,
@@ -202,6 +209,7 @@ static struct tc_action_ops act_connmark_ops = {
 	.init		=	tcf_connmark_init,
 	.walk		=	tcf_connmark_walker,
 	.lookup		=	tcf_connmark_search,
+	.delete		=	tcf_connmark_delete,
 	.size		=	sizeof(struct tcf_connmark_info),
 };
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 334261943f9f..3768539340e0 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -654,6 +654,13 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
 	return nla_total_size(sizeof(struct tc_csum));
 }
 
+static int tcf_csum_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_csum_ops = {
 	.kind		= "csum",
 	.type		= TCA_ACT_CSUM,
@@ -665,6 +672,7 @@ static struct tc_action_ops act_csum_ops = {
 	.walk		= tcf_csum_walker,
 	.lookup		= tcf_csum_search,
 	.get_fill_size  = tcf_csum_get_fill_size,
+	.delete		= tcf_csum_delete,
 	.size		= sizeof(struct tcf_csum),
 };
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b4dfb2b4addc..a431a711f0dd 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -231,6 +231,13 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
 	return sz;
 }
 
+static int tcf_gact_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
 	.type		=	TCA_ACT_GACT,
@@ -242,6 +249,7 @@ static struct tc_action_ops act_gact_ops = {
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
 	.get_fill_size	=	tcf_gact_get_fill_size,
+	.delete		=	tcf_gact_delete,
 	.size		=	sizeof(struct tcf_gact),
 };
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 576ffbba61c3..89a761395c94 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -844,6 +844,13 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ife_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ife_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_ife_ops = {
 	.kind = "ife",
 	.type = TCA_ACT_IFE,
@@ -854,6 +861,7 @@ static struct tc_action_ops act_ife_ops = {
 	.init = tcf_ife_init,
 	.walk = tcf_ife_walker,
 	.lookup = tcf_ife_search,
+	.delete = tcf_ife_delete,
 	.size =	sizeof(struct tcf_ife_info),
 };
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 9c21663a86a6..6c234411c771 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -324,6 +324,13 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ipt_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_ipt_ops = {
 	.kind		=	"ipt",
 	.type		=	TCA_ACT_IPT,
@@ -334,6 +341,7 @@ static struct tc_action_ops act_ipt_ops = {
 	.init		=	tcf_ipt_init,
 	.walk		=	tcf_ipt_walker,
 	.lookup		=	tcf_ipt_search,
+	.delete		=	tcf_ipt_delete,
 	.size		=	sizeof(struct tcf_ipt),
 };
 
@@ -374,6 +382,13 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_xt_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_xt_ops = {
 	.kind		=	"xt",
 	.type		=	TCA_ACT_XT,
@@ -384,6 +399,7 @@ static struct tc_action_ops act_xt_ops = {
 	.init		=	tcf_xt_init,
 	.walk		=	tcf_xt_walker,
 	.lookup		=	tcf_xt_search,
+	.delete		=	tcf_xt_delete,
 	.size		=	sizeof(struct tcf_ipt),
 };
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5434f08f2eb7..3d8300bce7e4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -322,6 +322,13 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
 	return rtnl_dereference(m->tcfm_dev);
 }
 
+static int tcf_mirred_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
 	.type		=	TCA_ACT_MIRRED,
@@ -335,6 +342,7 @@ static struct tc_action_ops act_mirred_ops = {
 	.lookup		=	tcf_mirred_search,
 	.size		=	sizeof(struct tcf_mirred),
 	.get_dev	=	tcf_mirred_get_dev,
+	.delete		=	tcf_mirred_delete,
 };
 
 static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index e6487ad1e4a8..9eb27c89dc46 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -294,6 +294,13 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_nat_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_nat_ops = {
 	.kind		=	"nat",
 	.type		=	TCA_ACT_NAT,
@@ -303,6 +310,7 @@ static struct tc_action_ops act_nat_ops = {
 	.init		=	tcf_nat_init,
 	.walk		=	tcf_nat_walker,
 	.lookup		=	tcf_nat_search,
+	.delete		=	tcf_nat_delete,
 	.size		=	sizeof(struct tcf_nat),
 };
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index f7965f35585b..45871052840f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -454,6 +454,13 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_pedit_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_pedit_ops = {
 	.kind		=	"pedit",
 	.type		=	TCA_ACT_PEDIT,
@@ -464,6 +471,7 @@ static struct tc_action_ops act_pedit_ops = {
 	.init		=	tcf_pedit_init,
 	.walk		=	tcf_pedit_walker,
 	.lookup		=	tcf_pedit_search,
+	.delete		=	tcf_pedit_delete,
 	.size		=	sizeof(struct tcf_pedit),
 };
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0e1c2fb0ebea..c955fb0d4f3f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -314,6 +314,13 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_police_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, police_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 MODULE_AUTHOR("Alexey Kuznetsov");
 MODULE_DESCRIPTION("Policing actions");
 MODULE_LICENSE("GPL");
@@ -327,6 +334,7 @@ static struct tc_action_ops act_police_ops = {
 	.init		=	tcf_act_police_init,
 	.walk		=	tcf_act_police_walker,
 	.lookup		=	tcf_police_search,
+	.delete		=	tcf_police_delete,
 	.size		=	sizeof(struct tcf_police),
 };
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 316fc645595d..6f79d2afcba2 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -220,6 +220,13 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_sample_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_sample_ops = {
 	.kind	  = "sample",
 	.type	  = TCA_ACT_SAMPLE,
@@ -230,6 +237,7 @@ static struct tc_action_ops act_sample_ops = {
 	.cleanup  = tcf_sample_cleanup,
 	.walk	  = tcf_sample_walker,
 	.lookup	  = tcf_sample_search,
+	.delete	  = tcf_sample_delete,
 	.size	  = sizeof(struct tcf_sample),
 };
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index dc591cc87f4a..446c750f3d3c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -184,6 +184,13 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_simp_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_simp_ops = {
 	.kind		=	"simple",
 	.type		=	TCA_ACT_SIMP,
@@ -194,6 +201,7 @@ static struct tc_action_ops act_simp_ops = {
 	.init		=	tcf_simp_init,
 	.walk		=	tcf_simp_walker,
 	.lookup		=	tcf_simp_search,
+	.delete		=	tcf_simp_delete,
 	.size		=	sizeof(struct tcf_defact),
 };
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index c4ae4bd830aa..b3eaa120c7f4 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -267,6 +267,13 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbedit_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_skbedit_ops = {
 	.kind		=	"skbedit",
 	.type		=	TCA_ACT_SKBEDIT,
@@ -276,6 +283,7 @@ static struct tc_action_ops act_skbedit_ops = {
 	.init		=	tcf_skbedit_init,
 	.walk		=	tcf_skbedit_walker,
 	.lookup		=	tcf_skbedit_search,
+	.delete		=	tcf_skbedit_delete,
 	.size		=	sizeof(struct tcf_skbedit),
 };
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 026d6f58eda1..30be3f767495 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -253,6 +253,13 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbmod_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_skbmod_ops = {
 	.kind		=	"skbmod",
 	.type		=	TCA_ACT_SKBMOD,
@@ -263,6 +270,7 @@ static struct tc_action_ops act_skbmod_ops = {
 	.cleanup	=	tcf_skbmod_cleanup,
 	.walk		=	tcf_skbmod_walker,
 	.lookup		=	tcf_skbmod_search,
+	.delete		=	tcf_skbmod_delete,
 	.size		=	sizeof(struct tcf_skbmod),
 };
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 15ea5ce0f9ed..655ed0b3fc67 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -534,6 +534,13 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tunnel_key_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_tunnel_key_ops = {
 	.kind		=	"tunnel_key",
 	.type		=	TCA_ACT_TUNNEL_KEY,
@@ -544,6 +551,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
 	.cleanup	=	tunnel_key_release,
 	.walk		=	tunnel_key_walker,
 	.lookup		=	tunnel_key_search,
+	.delete		=	tunnel_key_delete,
 	.size		=	sizeof(struct tcf_tunnel_key),
 };
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index c61775250722..e334d2751784 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -287,6 +287,13 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_vlan_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_vlan_ops = {
 	.kind		=	"vlan",
 	.type		=	TCA_ACT_VLAN,
@@ -297,6 +304,7 @@ static struct tc_action_ops act_vlan_ops = {
 	.cleanup	=	tcf_vlan_cleanup,
 	.walk		=	tcf_vlan_walker,
 	.lookup		=	tcf_vlan_search,
+	.delete		=	tcf_vlan_delete,
 	.size		=	sizeof(struct tcf_vlan),
 };
 
-- 
cgit v1.2.3


From 0190c1d452a91c38a3462abdd81752be1b9006a8 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:32 +0300
Subject: net: sched: atomically check-allocate action

Implement function that atomically checks if action exists and either takes
reference to it, or allocates idr slot for action index to prevent
concurrent allocations of actions with same index. Use EBUSY error pointer
to indicate that idr slot is reserved.

Implement cleanup helper function that removes temporary error pointer from
idr. (in case of error between idr allocation and insertion of newly
created action to specified index)

Refactor all action init functions to insert new action to idr using this
API.

Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  3 ++
 net/sched/act_api.c        | 92 ++++++++++++++++++++++++++++++++++++----------
 net/sched/act_bpf.c        | 11 ++++--
 net/sched/act_connmark.c   | 10 +++--
 net/sched/act_csum.c       | 11 ++++--
 net/sched/act_gact.c       | 11 ++++--
 net/sched/act_ife.c        |  6 ++-
 net/sched/act_ipt.c        | 13 ++++++-
 net/sched/act_mirred.c     | 16 ++++++--
 net/sched/act_nat.c        | 11 ++++--
 net/sched/act_pedit.c      | 12 ++++--
 net/sched/act_police.c     |  9 ++++-
 net/sched/act_sample.c     | 11 ++++--
 net/sched/act_simple.c     | 11 +++++-
 net/sched/act_skbedit.c    | 11 +++++-
 net/sched/act_skbmod.c     | 11 +++++-
 net/sched/act_tunnel_key.c |  9 ++++-
 net/sched/act_vlan.c       | 17 ++++++++-
 18 files changed, 216 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index b9ed2b8256a5..8090de2edab7 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -154,6 +154,9 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   int bind, bool cpustats);
 void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
 
+void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
+int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
+			struct tc_action **a, int bind);
 int tcf_idr_delete_index(struct tc_action_net *tn, u32 index);
 int __tcf_idr_release(struct tc_action *a, bool bind, bool strict);
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index eefe8c2fe667..9511502e1cbb 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -303,7 +303,9 @@ static bool __tcf_idr_check(struct tc_action_net *tn, u32 index,
 
 	spin_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
-	if (p) {
+	if (IS_ERR(p)) {
+		p = NULL;
+	} else if (p) {
 		refcount_inc(&p->tcfa_refcnt);
 		if (bind)
 			atomic_inc(&p->tcfa_bindcnt);
@@ -371,7 +373,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 {
 	struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct idr *idr = &idrinfo->action_idr;
 	int err = -ENOMEM;
 
 	if (unlikely(!p))
@@ -389,20 +390,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 			goto err2;
 	}
 	spin_lock_init(&p->tcfa_lock);
-	idr_preload(GFP_KERNEL);
-	spin_lock(&idrinfo->lock);
-	/* user doesn't specify an index */
-	if (!index) {
-		index = 1;
-		err = idr_alloc_u32(idr, NULL, &index, UINT_MAX, GFP_ATOMIC);
-	} else {
-		err = idr_alloc_u32(idr, NULL, &index, index, GFP_ATOMIC);
-	}
-	spin_unlock(&idrinfo->lock);
-	idr_preload_end();
-	if (err)
-		goto err3;
-
 	p->tcfa_index = index;
 	p->tcfa_tm.install = jiffies;
 	p->tcfa_tm.lastuse = jiffies;
@@ -412,7 +399,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 					&p->tcfa_rate_est,
 					&p->tcfa_lock, NULL, est);
 		if (err)
-			goto err4;
+			goto err3;
 	}
 
 	p->idrinfo = idrinfo;
@@ -420,8 +407,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 	INIT_LIST_HEAD(&p->list);
 	*a = p;
 	return 0;
-err4:
-	idr_remove(idr, index);
 err3:
 	free_percpu(p->cpu_qstats);
 err2:
@@ -437,11 +422,78 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
 	spin_lock(&idrinfo->lock);
-	idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
+	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
 	spin_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_insert);
 
+/* Cleanup idr index that was allocated but not initialized. */
+
+void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
+{
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+
+	spin_lock(&idrinfo->lock);
+	/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+	WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
+	spin_unlock(&idrinfo->lock);
+}
+EXPORT_SYMBOL(tcf_idr_cleanup);
+
+/* Check if action with specified index exists. If actions is found, increments
+ * its reference and bind counters, and return 1. Otherwise insert temporary
+ * error pointer (to prevent concurrent users from inserting actions with same
+ * index) and return 0.
+ */
+
+int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
+			struct tc_action **a, int bind)
+{
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p;
+	int ret;
+
+again:
+	spin_lock(&idrinfo->lock);
+	if (*index) {
+		p = idr_find(&idrinfo->action_idr, *index);
+		if (IS_ERR(p)) {
+			/* This means that another process allocated
+			 * index but did not assign the pointer yet.
+			 */
+			spin_unlock(&idrinfo->lock);
+			goto again;
+		}
+
+		if (p) {
+			refcount_inc(&p->tcfa_refcnt);
+			if (bind)
+				atomic_inc(&p->tcfa_bindcnt);
+			*a = p;
+			ret = 1;
+		} else {
+			*a = NULL;
+			ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
+					    *index, GFP_ATOMIC);
+			if (!ret)
+				idr_replace(&idrinfo->action_idr,
+					    ERR_PTR(-EBUSY), *index);
+		}
+	} else {
+		*index = 1;
+		*a = NULL;
+		ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
+				    UINT_MAX, GFP_ATOMIC);
+		if (!ret)
+			idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
+				    *index);
+	}
+	spin_unlock(&idrinfo->lock);
+	return ret;
+}
+EXPORT_SYMBOL(tcf_idr_check_alloc);
+
 void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
 			 struct tcf_idrinfo *idrinfo)
 {
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index d3f4ac6f2c4b..06f743d8ed41 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -299,14 +299,17 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, act, bind)) {
+	ret = tcf_idr_check_alloc(tn, &parm->index, act, bind);
+	if (!ret) {
 		ret = tcf_idr_create(tn, parm->index, est, act,
 				     &act_bpf_ops, bind, true);
-		if (ret < 0)
+		if (ret < 0) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		res = ACT_P_CREATED;
-	} else {
+	} else if (ret > 0) {
 		/* Don't override defaults. */
 		if (bind)
 			return 0;
@@ -315,6 +318,8 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			tcf_idr_release(*act, bind);
 			return -EEXIST;
 		}
+	} else {
+		return ret;
 	}
 
 	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 701e90244eff..1e31f0e448e2 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -118,11 +118,14 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	ret = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!ret) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_connmark_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ci = to_connmark(*a);
 		ci->tcf_action = parm->action;
@@ -131,7 +134,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 		tcf_idr_insert(tn, *a);
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (ret > 0) {
 		ci = to_connmark(*a);
 		if (bind)
 			return 0;
@@ -142,6 +145,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 		/* replacing action and zone */
 		ci->tcf_action = parm->action;
 		ci->zone = parm->zone;
+		ret = 0;
 	}
 
 	return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 5dbee136b0a1..bd232d3bd022 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -67,19 +67,24 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_CSUM_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_csum_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)/* dont override defaults */
 			return 0;
 		if (!ovr) {
 			tcf_idr_release(*a, bind);
 			return -EEXIST;
 		}
+	} else {
+		return err;
 	}
 
 	p = to_tcf_csum(*a);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 11c4de3f344e..661b72b9147d 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -91,19 +91,24 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_gact_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)/* dont override defaults */
 			return 0;
 		if (!ovr) {
 			tcf_idr_release(*a, bind);
 			return -EEXIST;
 		}
+	} else {
+		return err;
 	}
 
 	gact = to_gact(*a);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index acea3feae762..a3eef00cd711 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -484,7 +484,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	if (!p)
 		return -ENOMEM;
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind) {
 		kfree(p);
 		return 0;
@@ -494,6 +497,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
 				     bind, true);
 		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			kfree(p);
 			return ret;
 		}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 85e85dfba401..0dc787a57798 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -119,13 +119,18 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (tb[TCA_IPT_INDEX] != NULL)
 		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
-	exists = tcf_idr_check(tn, index, a, bind);
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
@@ -133,14 +138,18 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, index, est, a, ops, bind,
 				     false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e08aed06d7f8..6afd89a36c69 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -79,7 +79,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	struct tcf_mirred *m;
 	struct net_device *dev;
 	bool exists = false;
-	int ret;
+	int ret, err;
 
 	if (!nla) {
 		NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
@@ -94,7 +94,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -107,6 +110,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
 		return -EINVAL;
 	}
@@ -115,6 +120,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		if (dev == NULL) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -ENODEV;
 		}
 		mac_header_xmit = dev_is_mac_header_xmit(dev);
@@ -124,13 +131,16 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 
 	if (!exists) {
 		if (!dev) {
+			tcf_idr_cleanup(tn, parm->index);
 			NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
 			return -EINVAL;
 		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 1f91e8e66c0f..4dd9188a72fd 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -57,19 +57,24 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_nat_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)
 			return 0;
 		if (!ovr) {
 			tcf_idr_release(*a, bind);
 			return -EEXIST;
 		}
+	} else {
+		return err;
 	}
 	p = to_tcf_nat(*a);
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3a0e2f762f4e..cc8ffcd1ddb5 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -173,16 +173,20 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	if (IS_ERR(keys_ex))
 		return PTR_ERR(keys_ex);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		if (!parm->nkeys) {
+			tcf_idr_cleanup(tn, parm->index);
 			NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
 			ret = -EINVAL;
 			goto out_free;
 		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_pedit_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			goto out_free;
+		}
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (!keys) {
@@ -191,7 +195,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			goto out_free;
 		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)
 			goto out_free;
 		if (!ovr) {
@@ -207,6 +211,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 				goto out_free;
 			}
 		}
+	} else {
+		return err;
 	}
 
 	spin_lock_bh(&p->tcf_lock);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 99335cca739e..1f3192ea8df7 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -101,15 +101,20 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_POLICE_TBF]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, NULL, a,
 				     &act_police_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index a8582e1347db..3079e7be5bde 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -46,7 +46,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	struct tc_sample *parm;
 	struct tcf_sample *s;
 	bool exists = false;
-	int ret;
+	int ret, err;
 
 	if (!nla)
 		return -EINVAL;
@@ -59,15 +59,20 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SAMPLE_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_sample_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 2da47c682a30..aa51152e0066 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -100,21 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_DEF_DATA] == NULL) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_simp_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		d = to_defact(*a);
 		ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 4616a2c1821f..86521a74ecdd 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -152,21 +152,28 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!flags) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_skbedit_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		d = to_skbedit(*a);
 		ret = ACT_P_CREATED;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index e844381af066..cdc6bacfb190 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -128,21 +128,28 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (parm->flags & SKBMOD_F_SWAPMAC)
 		lflags = SKBMOD_F_SWAPMAC;
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!lflags) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_skbmod_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index ab5bf5c13f87..3ec585d58762 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -237,7 +237,10 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	}
 
 	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -325,7 +328,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 				     &act_tunnel_key_ops, bind, true);
 		if (ret) {
 			NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
-			return ret;
+			goto err_out;
 		}
 
 		ret = ACT_P_CREATED;
@@ -364,6 +367,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 err_out:
 	if (exists)
 		tcf_idr_release(*a, bind);
+	else
+		tcf_idr_cleanup(tn, parm->index);
 	return ret;
 }
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 9b600faaccbb..ad37f308175a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -134,7 +134,10 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -146,12 +149,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -EINVAL;
 		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
 		if (push_vid >= VLAN_VID_MASK) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -ERANGE;
 		}
 
@@ -164,6 +171,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			default:
 				if (exists)
 					tcf_idr_release(*a, bind);
+				else
+					tcf_idr_cleanup(tn, parm->index);
 				return -EPROTONOSUPPORT;
 			}
 		} else {
@@ -176,6 +185,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 	action = parm->v_action;
@@ -183,8 +194,10 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_vlan_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
-- 
cgit v1.2.3


From 90b73b77d08ec395311411b545c756ca710aae59 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 5 Jul 2018 17:24:33 +0300
Subject: net: sched: change action API to use array of pointers to actions

Act API used linked list to pass set of actions to functions. It is
intrusive data structure that stores list nodes inside action structure
itself, which means it is not safe to modify such list concurrently.
However, action API doesn't use any linked list specific operations on this
set of actions, so it can be safely refactored into plain pointer array.

Refactor action API to use array of pointers to tc_actions instead of
linked list. Change argument 'actions' type of exported action init,
destroy and dump functions.

Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  7 ++--
 net/sched/act_api.c   | 89 +++++++++++++++++++++++++++++----------------------
 net/sched/cls_api.c   | 21 ++++--------
 3 files changed, 60 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 8090de2edab7..683ce41053d9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -168,19 +168,20 @@ static inline int tcf_idr_release(struct tc_action *a, bool bind)
 int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
 int tcf_unregister_action(struct tc_action_ops *a,
 			  struct pernet_operations *ops);
-int tcf_action_destroy(struct list_head *actions, int bind);
+int tcf_action_destroy(struct tc_action *actions[], int bind);
 int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, size_t *attr_size,
+		    struct tc_action *actions[], size_t *attr_size,
 		    bool rtnl_held, struct netlink_ext_ack *extack);
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
 				    bool rtnl_held,
 				    struct netlink_ext_ack *extack);
-int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
+		    int ref);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9511502e1cbb..bf1c35f3deb6 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -657,13 +657,15 @@ repeat:
 }
 EXPORT_SYMBOL(tcf_action_exec);
 
-int tcf_action_destroy(struct list_head *actions, int bind)
+int tcf_action_destroy(struct tc_action *actions[], int bind)
 {
 	const struct tc_action_ops *ops;
-	struct tc_action *a, *tmp;
-	int ret = 0;
+	struct tc_action *a;
+	int ret = 0, i;
 
-	list_for_each_entry_safe(a, tmp, actions, list) {
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		a = actions[i];
+		actions[i] = NULL;
 		ops = a->ops;
 		ret = __tcf_idr_release(a, bind, true);
 		if (ret == ACT_P_DELETED)
@@ -679,11 +681,12 @@ static int tcf_action_put(struct tc_action *p)
 	return __tcf_action_put(p, false);
 }
 
-static void tcf_action_put_lst(struct list_head *actions)
+static void tcf_action_put_many(struct tc_action *actions[])
 {
-	struct tc_action *a, *tmp;
+	int i;
 
-	list_for_each_entry_safe(a, tmp, actions, list) {
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		struct tc_action *a = actions[i];
 		const struct tc_action_ops *ops = a->ops;
 
 		if (tcf_action_put(a))
@@ -735,14 +738,15 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(tcf_action_dump_1);
 
-int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
 		    int bind, int ref)
 {
 	struct tc_action *a;
-	int err = -EINVAL;
+	int err = -EINVAL, i;
 	struct nlattr *nest;
 
-	list_for_each_entry(a, actions, list) {
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		a = actions[i];
 		nest = nla_nest_start(skb, a->order);
 		if (nest == NULL)
 			goto nla_put_failure;
@@ -878,10 +882,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
 		err = tcf_action_goto_chain_init(a, tp);
 		if (err) {
-			LIST_HEAD(actions);
+			struct tc_action *actions[] = { a, NULL };
 
-			list_add_tail(&a->list, &actions);
-			tcf_action_destroy(&actions, bind);
+			tcf_action_destroy(actions, bind);
 			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
 			return ERR_PTR(err);
 		}
@@ -899,9 +902,11 @@ err_out:
 	return ERR_PTR(err);
 }
 
+/* Returns numbers of initialized actions or negative error. */
+
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, size_t *attr_size,
+		    struct tc_action *actions[], size_t *attr_size,
 		    bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -923,11 +928,12 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		}
 		act->order = i;
 		sz += tcf_action_fill_size(act);
-		list_add_tail(&act->list, actions);
+		/* Start from index 0 */
+		actions[i - 1] = act;
 	}
 
 	*attr_size = tcf_action_full_attrs_size(sz);
-	return 0;
+	return i - 1;
 
 err:
 	tcf_action_destroy(actions, bind);
@@ -978,7 +984,7 @@ errout:
 	return -1;
 }
 
-static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
+static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
 			u32 portid, u32 seq, u16 flags, int event, int bind,
 			int ref)
 {
@@ -1014,7 +1020,7 @@ out_nlmsg_trim:
 
 static int
 tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-	       struct list_head *actions, int event,
+	       struct tc_action *actions[], int event,
 	       struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
@@ -1150,14 +1156,14 @@ err_out:
 	return err;
 }
 
-static int tcf_action_delete(struct net *net, struct list_head *actions,
-			     struct netlink_ext_ack *extack)
+static int tcf_action_delete(struct net *net, struct tc_action *actions[],
+			     int *acts_deleted, struct netlink_ext_ack *extack)
 {
-	struct tc_action *a, *tmp;
 	u32 act_index;
-	int ret;
+	int ret, i;
 
-	list_for_each_entry_safe(a, tmp, actions, list) {
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		struct tc_action *a = actions[i];
 		const struct tc_action_ops *ops = a->ops;
 
 		/* Actions can be deleted concurrently so we must save their
@@ -1165,23 +1171,26 @@ static int tcf_action_delete(struct net *net, struct list_head *actions,
 		 */
 		act_index = a->tcfa_index;
 
-		list_del(&a->list);
 		if (tcf_action_put(a)) {
 			/* last reference, action was deleted concurrently */
 			module_put(ops->owner);
 		} else  {
 			/* now do the delete */
 			ret = ops->delete(net, act_index);
-			if (ret < 0)
+			if (ret < 0) {
+				*acts_deleted = i + 1;
 				return ret;
+			}
 		}
 	}
+	*acts_deleted = i;
 	return 0;
 }
 
 static int
-tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
+tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
+	       int *acts_deleted, u32 portid, size_t attr_size,
+	       struct netlink_ext_ack *extack)
 {
 	int ret;
 	struct sk_buff *skb;
@@ -1199,7 +1208,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 	}
 
 	/* now do the delete */
-	ret = tcf_action_delete(net, actions, extack);
+	ret = tcf_action_delete(net, actions, acts_deleted, extack);
 	if (ret < 0) {
 		NL_SET_ERR_MSG(extack, "Failed to delete TC action");
 		kfree_skb(skb);
@@ -1221,7 +1230,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
 	size_t attr_size = 0;
-	LIST_HEAD(actions);
+	struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {};
+	int acts_deleted = 0;
 
 	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (ret < 0)
@@ -1243,26 +1253,27 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		}
 		act->order = i;
 		attr_size += tcf_action_fill_size(act);
-		list_add_tail(&act->list, &actions);
+		actions[i - 1] = act;
 	}
 
 	attr_size = tcf_action_full_attrs_size(attr_size);
 
 	if (event == RTM_GETACTION)
-		ret = tcf_get_notify(net, portid, n, &actions, event, extack);
+		ret = tcf_get_notify(net, portid, n, actions, event, extack);
 	else { /* delete */
-		ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
+		ret = tcf_del_notify(net, n, actions, &acts_deleted, portid,
+				     attr_size, extack);
 		if (ret)
 			goto err;
 		return ret;
 	}
 err:
-	tcf_action_put_lst(&actions);
+	tcf_action_put_many(&actions[acts_deleted]);
 	return ret;
 }
 
 static int
-tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
+tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
 	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
@@ -1293,15 +1304,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 {
 	size_t attr_size = 0;
 	int ret = 0;
-	LIST_HEAD(actions);
+	struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
 			      &attr_size, true, extack);
-	if (ret)
+	if (ret < 0)
 		return ret;
-	ret = tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+	ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
 	if (ovr)
-		tcf_action_put_lst(&actions);
+		tcf_action_put_many(actions);
 
 	return ret;
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 9041f0e43e9a..73d9967c3739 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1609,10 +1609,7 @@ out:
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	LIST_HEAD(actions);
-
-	tcf_exts_to_list(exts, &actions);
-	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+	tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
 	kfree(exts->actions);
 	exts->nr_actions = 0;
 #endif
@@ -1639,18 +1636,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 			exts->actions[0] = act;
 			exts->nr_actions = 1;
 		} else if (exts->action && tb[exts->action]) {
-			LIST_HEAD(actions);
-			int err, i = 0;
+			int err;
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions, &attr_size, true,
+					      exts->actions, &attr_size, true,
 					      extack);
-			if (err)
+			if (err < 0)
 				return err;
-			list_for_each_entry(act, &actions, list)
-				exts->actions[i++] = act;
-			exts->nr_actions = i;
+			exts->nr_actions = err;
 		}
 		exts->net = net;
 	}
@@ -1699,14 +1693,11 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 		 * tc data even if iproute2  was newer - jhs
 		 */
 		if (exts->type != TCA_OLD_COMPAT) {
-			LIST_HEAD(actions);
-
 			nest = nla_nest_start(skb, exts->action);
 			if (nest == NULL)
 				goto nla_put_failure;
 
-			tcf_exts_to_list(exts, &actions);
-			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
+			if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
 				goto nla_put_failure;
 			nla_nest_end(skb, nest);
 		} else if (exts->police) {
-- 
cgit v1.2.3


From e3fe0ae129622b78e710e75ecbf7aca7af5dda47 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 27 Jun 2018 08:15:31 +0200
Subject: crypto: dh - add public key verification test

According to SP800-56A section 5.6.2.1, the public key to be processed
for the DH operation shall be checked for appropriateness. The check
shall covers the full verification test in case the domain parameter Q
is provided as defined in SP800-56A section 5.6.2.3.1. If Q is not
provided, the partial check according to SP800-56A section 5.6.2.3.2 is
performed.

The full verification test requires the presence of the domain parameter
Q. Thus, the patch adds the support to handle Q. It is permissible to
not provide the Q value as part of the domain parameters. This implies
that the interface is still backwards-compatible where so far only P and
G are to be provided. However, if Q is provided, it is imported.

Without the test, the NIST ACVP testing fails. After adding this check,
the NIST ACVP testing passes. Testing without providing the Q domain
parameter has been performed to verify the interface has not changed.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/dh.c         | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 crypto/dh_helper.c  | 15 +++++++++---
 include/crypto/dh.h |  4 ++++
 3 files changed, 79 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/crypto/dh.c b/crypto/dh.c
index 5659fe7f446d..8f79269db2b7 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -16,14 +16,16 @@
 #include <linux/mpi.h>
 
 struct dh_ctx {
-	MPI p;
-	MPI g;
-	MPI xa;
+	MPI p;	/* Value is guaranteed to be set. */
+	MPI q;	/* Value is optional. */
+	MPI g;	/* Value is guaranteed to be set. */
+	MPI xa;	/* Value is guaranteed to be set. */
 };
 
 static void dh_clear_ctx(struct dh_ctx *ctx)
 {
 	mpi_free(ctx->p);
+	mpi_free(ctx->q);
 	mpi_free(ctx->g);
 	mpi_free(ctx->xa);
 	memset(ctx, 0, sizeof(*ctx));
@@ -60,6 +62,12 @@ static int dh_set_params(struct dh_ctx *ctx, struct dh *params)
 	if (!ctx->p)
 		return -EINVAL;
 
+	if (params->q && params->q_size) {
+		ctx->q = mpi_read_raw_data(params->q, params->q_size);
+		if (!ctx->q)
+			return -EINVAL;
+	}
+
 	ctx->g = mpi_read_raw_data(params->g, params->g_size);
 	if (!ctx->g)
 		return -EINVAL;
@@ -93,6 +101,55 @@ err_clear_ctx:
 	return -EINVAL;
 }
 
+/*
+ * SP800-56A public key verification:
+ *
+ * * If Q is provided as part of the domain paramenters, a full validation
+ *   according to SP800-56A section 5.6.2.3.1 is performed.
+ *
+ * * If Q is not provided, a partial validation according to SP800-56A section
+ *   5.6.2.3.2 is performed.
+ */
+static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
+{
+	if (unlikely(!ctx->p))
+		return -EINVAL;
+
+	/*
+	 * Step 1: Verify that 2 <= y <= p - 2.
+	 *
+	 * The upper limit check is actually y < p instead of y < p - 1
+	 * as the mpi_sub_ui function is yet missing.
+	 */
+	if (mpi_cmp_ui(y, 1) < 1 || mpi_cmp(y, ctx->p) >= 0)
+		return -EINVAL;
+
+	/* Step 2: Verify that 1 = y^q mod p */
+	if (ctx->q) {
+		MPI val = mpi_alloc(0);
+		int ret;
+
+		if (!val)
+			return -ENOMEM;
+
+		ret = mpi_powm(val, y, ctx->q, ctx->p);
+
+		if (ret) {
+			mpi_free(val);
+			return ret;
+		}
+
+		ret = mpi_cmp_ui(val, 1);
+
+		mpi_free(val);
+
+		if (ret != 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int dh_compute_value(struct kpp_request *req)
 {
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
@@ -115,6 +172,9 @@ static int dh_compute_value(struct kpp_request *req)
 			ret = -EINVAL;
 			goto err_free_val;
 		}
+		ret = dh_is_pubkey_valid(ctx, base);
+		if (ret)
+			goto err_free_val;
 	} else {
 		base = ctx->g;
 	}
diff --git a/crypto/dh_helper.c b/crypto/dh_helper.c
index 24fdb2ecaa85..a7de3d9ce5ac 100644
--- a/crypto/dh_helper.c
+++ b/crypto/dh_helper.c
@@ -30,7 +30,7 @@ static inline const u8 *dh_unpack_data(void *dst, const void *src, size_t size)
 
 static inline unsigned int dh_data_size(const struct dh *p)
 {
-	return p->key_size + p->p_size + p->g_size;
+	return p->key_size + p->p_size + p->q_size + p->g_size;
 }
 
 unsigned int crypto_dh_key_len(const struct dh *p)
@@ -56,9 +56,11 @@ int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params)
 	ptr = dh_pack_data(ptr, &secret, sizeof(secret));
 	ptr = dh_pack_data(ptr, &params->key_size, sizeof(params->key_size));
 	ptr = dh_pack_data(ptr, &params->p_size, sizeof(params->p_size));
+	ptr = dh_pack_data(ptr, &params->q_size, sizeof(params->q_size));
 	ptr = dh_pack_data(ptr, &params->g_size, sizeof(params->g_size));
 	ptr = dh_pack_data(ptr, params->key, params->key_size);
 	ptr = dh_pack_data(ptr, params->p, params->p_size);
+	ptr = dh_pack_data(ptr, params->q, params->q_size);
 	dh_pack_data(ptr, params->g, params->g_size);
 
 	return 0;
@@ -79,6 +81,7 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 
 	ptr = dh_unpack_data(&params->key_size, ptr, sizeof(params->key_size));
 	ptr = dh_unpack_data(&params->p_size, ptr, sizeof(params->p_size));
+	ptr = dh_unpack_data(&params->q_size, ptr, sizeof(params->q_size));
 	ptr = dh_unpack_data(&params->g_size, ptr, sizeof(params->g_size));
 	if (secret.len != crypto_dh_key_len(params))
 		return -EINVAL;
@@ -88,7 +91,7 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	 * some drivers assume otherwise.
 	 */
 	if (params->key_size > params->p_size ||
-	    params->g_size > params->p_size)
+	    params->g_size > params->p_size || params->q_size > params->p_size)
 		return -EINVAL;
 
 	/* Don't allocate memory. Set pointers to data within
@@ -96,7 +99,9 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	 */
 	params->key = (void *)ptr;
 	params->p = (void *)(ptr + params->key_size);
-	params->g = (void *)(ptr + params->key_size + params->p_size);
+	params->q = (void *)(ptr + params->key_size + params->p_size);
+	params->g = (void *)(ptr + params->key_size + params->p_size +
+			     params->q_size);
 
 	/*
 	 * Don't permit 'p' to be 0.  It's not a prime number, and it's subject
@@ -106,6 +111,10 @@ int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params)
 	if (memchr_inv(params->p, 0, params->p_size) == NULL)
 		return -EINVAL;
 
+	/* It is permissible to not provide Q. */
+	if (params->q_size == 0)
+		params->q = NULL;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_dh_decode_key);
diff --git a/include/crypto/dh.h b/include/crypto/dh.h
index 71e1bb24d79f..7e0dad94cb2b 100644
--- a/include/crypto/dh.h
+++ b/include/crypto/dh.h
@@ -29,17 +29,21 @@
  *
  * @key:	Private DH key
  * @p:		Diffie-Hellman parameter P
+ * @q:		Diffie-Hellman parameter Q
  * @g:		Diffie-Hellman generator G
  * @key_size:	Size of the private DH key
  * @p_size:	Size of DH parameter P
+ * @q_size:	Size of DH parameter Q
  * @g_size:	Size of DH generator G
  */
 struct dh {
 	void *key;
 	void *p;
+	void *q;
 	void *g;
 	unsigned int key_size;
 	unsigned int p_size;
+	unsigned int q_size;
 	unsigned int g_size;
 };
 
-- 
cgit v1.2.3


From 69448867abcb231afaa7891ff9d9fd04b2b94a0d Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 5 Jul 2018 09:25:43 +0300
Subject: fs: shave 8 bytes off of struct inode

Here is a link to Linus' reply to Jan's concern about making
i_blkbibts byte addressable:
https://marc.info/?l=linux-fsdevel&m=152882624707975&w=2

Here is a link to an lkp.org report about potential performance
improvement in some workload, which could(?) be related to packing
i_blkbits closer to i_bytes/i_lock:
https://marc.info/?l=linux-fsdevel&m=153077048108198&w=2

Changes since v1:
- Add links to relevant discussions

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f140c11d35dd..9b00676f5d9d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -275,6 +275,7 @@ struct writeback_control;
 
 /*
  * Write life time hint values.
+ * Stored in struct inode as u8.
  */
 enum rw_hint {
 	WRITE_LIFE_NOT_SET	= 0,
@@ -609,8 +610,8 @@ struct inode {
 	struct timespec64	i_ctime;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
-	unsigned int		i_blkbits;
-	enum rw_hint		i_write_hint;
+	u8			i_blkbits;
+	u8			i_write_hint;
 	blkcnt_t		i_blocks;
 
 #ifdef __NEED_I_SIZE_ORDERED
-- 
cgit v1.2.3


From f292b87d3ac020418644d8a4bbf29814890505cb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 6 Jul 2018 14:34:29 -0700
Subject: bpf: include errno.h from bpf-cgroup.h

Commit fdb5c4531c1e ("bpf: fix attach type BPF_LIRC_MODE2 dependency
wrt CONFIG_CGROUP_BPF") caused some build issues, detected by 0-DAY
kernel test infrastructure.

The problem is that cgroup_bpf_prog_attach/detach/query() functions
can return -EINVAL error code, which is not defined. Fix this adding
errno.h to includes.

Fixes: fdb5c4531c1e ("bpf: fix attach type BPF_LIRC_MODE2 dependency wrt CONFIG_CGROUP_BPF")
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Sean Young <sean@mess.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 79795c5fa7c3..d50c2f0a655a 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -2,6 +2,7 @@
 #ifndef _BPF_CGROUP_H
 #define _BPF_CGROUP_H
 
+#include <linux/errno.h>
 #include <linux/jump_label.h>
 #include <uapi/linux/bpf.h>
 
-- 
cgit v1.2.3


From e74770fb6823c6fde49c3f2b38e3a2e68ae46682 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 29 Jun 2018 11:28:13 -0700
Subject: ACPICA: Update version to 20180629

Version 20180629.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 48d84f0d9547..88072c92ace2 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20180531
+#define ACPI_CA_VERSION                 0x20180629
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
cgit v1.2.3


From 5d6be70add65e3f236642ab0029e356261617cd0 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 29 Jun 2018 13:04:31 +0200
Subject: PM / Domains: Introduce option to attach a device by name to genpd

For the multiple PM domain case, let's introduce a new function called
genpd_dev_pm_attach_by_name(). This allows a device to be associated with
its PM domain through genpd, by using a name based lookup.

Note that, genpd_dev_pm_attach_by_name() shall only be called by the driver
core / PM core, similar to how the existing dev_pm_domain_attach_by_id()
makes use of genpd_dev_pm_attach_by_id(). However, this is implemented by
following changes on top.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Rajendra Nayak <rnayak@codeaurora.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 24 ++++++++++++++++++++++++
 include/linux/pm_domain.h   |  8 ++++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 9e8484189034..79bdca70a81a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2374,6 +2374,30 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id);
 
+/**
+ * genpd_dev_pm_attach_by_name - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @name: The name of the PM domain.
+ *
+ * Parse device's OF node to find a PM domain specifier using the
+ * power-domain-names DT property. For further description see
+ * genpd_dev_pm_attach_by_id().
+ */
+struct device *genpd_dev_pm_attach_by_name(struct device *dev, char *name)
+{
+	int index;
+
+	if (!dev->of_node)
+		return NULL;
+
+	index = of_property_match_string(dev->of_node, "power-domain-names",
+					 name);
+	if (index < 0)
+		return NULL;
+
+	return genpd_dev_pm_attach_by_id(dev, index);
+}
+
 static const struct of_device_id idle_state_match[] = {
 	{ .compatible = "domain-idle-state", },
 	{ }
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index cb8d84090cfb..03e14a38462d 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -239,6 +239,8 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev,
 int genpd_dev_pm_attach(struct device *dev);
 struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 					 unsigned int index);
+struct device *genpd_dev_pm_attach_by_name(struct device *dev,
+					   char *name);
 #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
 static inline int of_genpd_add_provider_simple(struct device_node *np,
 					struct generic_pm_domain *genpd)
@@ -290,6 +292,12 @@ static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 	return NULL;
 }
 
+static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev,
+							 char *name)
+{
+	return NULL;
+}
+
 static inline
 struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 {
-- 
cgit v1.2.3


From 27dceb81f445c58b1d10d27d26eaf4ac2e9e0b00 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 29 Jun 2018 13:04:32 +0200
Subject: PM / Domains: Introduce dev_pm_domain_attach_by_name()

For the multiple PM domain case, let's introduce a new API called
dev_pm_domain_attach_by_name(). This allows a consumer driver to associate
its device with one of its PM domains, by using a name based lookup.

Do note that, currently it's only genpd that supports multiple PM domains
per device, but dev_pm_domain_attach_by_name() can easily by extended to
cover other PM domain types, if/when needed.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Rajendra Nayak <rnayak@codeaurora.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/common.c | 17 +++++++++++++++++
 include/linux/pm_domain.h   |  7 +++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index df41b4780b3b..b413951c6abc 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -152,6 +152,23 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id);
 
+/**
+ * dev_pm_domain_attach_by_name - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @name: The name of the PM domain.
+ *
+ * For a detailed function description, see dev_pm_domain_attach_by_id().
+ */
+struct device *dev_pm_domain_attach_by_name(struct device *dev,
+					    char *name)
+{
+	if (dev->pm_domain)
+		return ERR_PTR(-EEXIST);
+
+	return genpd_dev_pm_attach_by_name(dev, name);
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);
+
 /**
  * dev_pm_domain_detach - Detach a device from its PM domain.
  * @dev: Device to detach.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 03e14a38462d..776c546d581a 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -309,6 +309,8 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 int dev_pm_domain_attach(struct device *dev, bool power_on);
 struct device *dev_pm_domain_attach_by_id(struct device *dev,
 					  unsigned int index);
+struct device *dev_pm_domain_attach_by_name(struct device *dev,
+					    char *name);
 void dev_pm_domain_detach(struct device *dev, bool power_off);
 void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
 #else
@@ -321,6 +323,11 @@ static inline struct device *dev_pm_domain_attach_by_id(struct device *dev,
 {
 	return NULL;
 }
+static inline struct device *dev_pm_domain_attach_by_name(struct device *dev,
+							  char *name)
+{
+	return NULL;
+}
 static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
 static inline void dev_pm_domain_set(struct device *dev,
 				     struct dev_pm_domain *pd) {}
-- 
cgit v1.2.3


From e88728f46cfbb59cc7e7acf1d230c05ec093764e Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Wed, 27 Jun 2018 18:20:55 +0530
Subject: driver core: Rename flag AUTOREMOVE to AUTOREMOVE_CONSUMER

Now that we want to add another flag to autoremove the device link
on supplier unbind, it's fair to rename the existing flag from
DL_FLAG_AUTOREMOVE to DL_FLAG_AUTOREMOVE_CONSUMER so that we can
add similar flag for supplier later.
And, while we are touching device.h, fix a doc build warning.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/device_link.rst |  8 ++++----
 drivers/base/core.c                      | 15 ++++++++-------
 drivers/gpu/drm/tegra/dc.c               |  2 +-
 drivers/gpu/ipu-v3/ipu-pre.c             |  3 ++-
 drivers/gpu/ipu-v3/ipu-prg.c             |  3 ++-
 drivers/soc/imx/gpc.c                    |  2 +-
 include/linux/device.h                   | 12 ++++++------
 7 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index 70e328e16aad..a005b904a264 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -81,10 +81,10 @@ integration is desired.
 Two other flags are specifically targeted at use cases where the device
 link is added from the consumer's ``->probe`` callback:  ``DL_FLAG_RPM_ACTIVE``
 can be specified to runtime resume the supplier upon addition of the
-device link.  ``DL_FLAG_AUTOREMOVE`` causes the device link to be automatically
-purged when the consumer fails to probe or later unbinds.  This obviates
-the need to explicitly delete the link in the ``->remove`` callback or in
-the error path of the ``->probe`` callback.
+device link.  ``DL_FLAG_AUTOREMOVE_CONSUMER`` causes the device link to be
+automatically purged when the consumer fails to probe or later unbinds.
+This obviates the need to explicitly delete the link in the ``->remove``
+callback or in the error path of the ``->probe`` callback.
 
 Limitations
 ===========
diff --git a/drivers/base/core.c b/drivers/base/core.c
index df3e1a44707a..14c1e3151e08 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -178,10 +178,10 @@ void device_pm_move_to_tail(struct device *dev)
  * of the link.  If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
  * ignored.
  *
- * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically
- * when the consumer device driver unbinds from it.  The combination of both
- * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL
- * to be returned.
+ * If the DL_FLAG_AUTOREMOVE_CONSUMER is set, the link will be removed
+ * automatically when the consumer device driver unbinds from it.
+ * The combination of both DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_STATELESS
+ * set is invalid and will cause NULL to be returned.
  *
  * A side effect of the link creation is re-ordering of dpm_list and the
  * devices_kset list by moving the consumer device and all devices depending
@@ -198,7 +198,8 @@ struct device_link *device_link_add(struct device *consumer,
 	struct device_link *link;
 
 	if (!consumer || !supplier ||
-	    ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE)))
+	    ((flags & DL_FLAG_STATELESS) &&
+	     (flags & DL_FLAG_AUTOREMOVE_CONSUMER)))
 		return NULL;
 
 	device_links_write_lock();
@@ -479,7 +480,7 @@ static void __device_links_no_driver(struct device *dev)
 		if (link->flags & DL_FLAG_STATELESS)
 			continue;
 
-		if (link->flags & DL_FLAG_AUTOREMOVE)
+		if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
 			kref_put(&link->kref, __device_link_del);
 		else if (link->status != DL_STATE_SUPPLIER_UNBIND)
 			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
@@ -515,7 +516,7 @@ void device_links_driver_cleanup(struct device *dev)
 		if (link->flags & DL_FLAG_STATELESS)
 			continue;
 
-		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE);
+		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
 		WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
 	}
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index c3afe7b2237e..965088afcfad 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -2312,7 +2312,7 @@ static int tegra_dc_couple(struct tegra_dc *dc)
 	 * POWER_CONTROL registers during CRTC enabling.
 	 */
 	if (dc->soc->coupled_pm && dc->pipe == 1) {
-		u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE;
+		u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER;
 		struct device_link *link;
 		struct device *partner;
 
diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index 0f70e8847540..2f8db9d62551 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c
@@ -128,7 +128,8 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index)
 	list_for_each_entry(pre, &ipu_pre_list, list) {
 		if (pre_node == pre->dev->of_node) {
 			mutex_unlock(&ipu_pre_list_mutex);
-			device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE);
+			device_link_add(dev, pre->dev,
+					DL_FLAG_AUTOREMOVE_CONSUMER);
 			of_node_put(pre_node);
 			return pre;
 		}
diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c
index 83f9dd934a5d..38a3a9764e49 100644
--- a/drivers/gpu/ipu-v3/ipu-prg.c
+++ b/drivers/gpu/ipu-v3/ipu-prg.c
@@ -100,7 +100,8 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id)
 	list_for_each_entry(prg, &ipu_prg_list, list) {
 		if (prg_node == prg->dev->of_node) {
 			mutex_unlock(&ipu_prg_list_mutex);
-			device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE);
+			device_link_add(dev, prg->dev,
+					DL_FLAG_AUTOREMOVE_CONSUMER);
 			prg->id = ipu_id;
 			of_node_put(prg_node);
 			return prg;
diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c
index 32f0748fd067..aa9e65bc965e 100644
--- a/drivers/soc/imx/gpc.c
+++ b/drivers/soc/imx/gpc.c
@@ -202,7 +202,7 @@ static int imx_pgc_power_domain_probe(struct platform_device *pdev)
 			goto genpd_err;
 	}
 
-	device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE);
+	device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE_CONSUMER);
 
 	return 0;
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..3929805cdd59 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -90,7 +90,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  * @num_vf:	Called to find out how many virtual functions a device on this
  *		bus supports.
  * @dma_configure:	Called to setup DMA configuration on a device on
-			this bus.
+ *			this bus.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
  * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
@@ -784,14 +784,14 @@ enum device_link_state {
  * Device link flags.
  *
  * STATELESS: The core won't track the presence of supplier/consumer drivers.
- * AUTOREMOVE: Remove this link automatically on consumer driver unbind.
+ * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind.
  * PM_RUNTIME: If set, the runtime PM framework will use this link.
  * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
  */
-#define DL_FLAG_STATELESS	BIT(0)
-#define DL_FLAG_AUTOREMOVE	BIT(1)
-#define DL_FLAG_PM_RUNTIME	BIT(2)
-#define DL_FLAG_RPM_ACTIVE	BIT(3)
+#define DL_FLAG_STATELESS		BIT(0)
+#define DL_FLAG_AUTOREMOVE_CONSUMER	BIT(1)
+#define DL_FLAG_PM_RUNTIME		BIT(2)
+#define DL_FLAG_RPM_ACTIVE		BIT(3)
 
 /**
  * struct device_link - Device link representation.
-- 
cgit v1.2.3


From 1689cac5b32a6db6f812e8063ea418a7cf023d03 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Wed, 27 Jun 2018 18:20:56 +0530
Subject: driver core: Add flag to autoremove device link on supplier unbind

Add a flag to autoremove the device links on supplier driver
unbind. This obviates the need to explicitly delete the link
in the remove path.
We remove these links only when the supplier's link to its
consumers has gone to DL_STATE_SUPPLIER_UNBIND state.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Suggested-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/driver-api/device_link.rst |  4 ++++
 drivers/base/core.c                      | 10 ++++++++++
 include/linux/device.h                   |  2 ++
 3 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index a005b904a264..d6763272e747 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -86,6 +86,10 @@ automatically purged when the consumer fails to probe or later unbinds.
 This obviates the need to explicitly delete the link in the ``->remove``
 callback or in the error path of the ``->probe`` callback.
 
+Similarly, when the device link is added from supplier's ``->probe`` callback,
+``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically
+purged when the supplier fails to probe or later unbinds.
+
 Limitations
 ===========
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 14c1e3151e08..e721218bf352 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -518,6 +518,16 @@ void device_links_driver_cleanup(struct device *dev)
 
 		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
 		WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
+
+		/*
+		 * autoremove the links between this @dev and its consumer
+		 * devices that are not active, i.e. where the link state
+		 * has moved to DL_STATE_SUPPLIER_UNBIND.
+		 */
+		if (link->status == DL_STATE_SUPPLIER_UNBIND &&
+		    link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
+			kref_put(&link->kref, __device_link_del);
+
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
 	}
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 3929805cdd59..e80920452b49 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -787,11 +787,13 @@ enum device_link_state {
  * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind.
  * PM_RUNTIME: If set, the runtime PM framework will use this link.
  * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
+ * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind.
  */
 #define DL_FLAG_STATELESS		BIT(0)
 #define DL_FLAG_AUTOREMOVE_CONSUMER	BIT(1)
 #define DL_FLAG_PM_RUNTIME		BIT(2)
 #define DL_FLAG_RPM_ACTIVE		BIT(3)
+#define DL_FLAG_AUTOREMOVE_SUPPLIER	BIT(4)
 
 /**
  * struct device_link - Device link representation.
-- 
cgit v1.2.3


From 63f3fb8d7ccb813e0a1f2ceedb9a3ebe2685b620 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 2 Jul 2018 15:59:37 -0700
Subject: pinctrl: Document pin_config_group_get() return codes like
 pin_config_get()

The pinconf_generic_dump_one() function makes the assumption that
pin_config_group_get() should return -EINVAL and -ENOTSUPP just like
pin_config_get() does.  Document that so it's more obvious.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
index 09eb80f2574a..8dd85d302b90 100644
--- a/include/linux/pinctrl/pinconf.h
+++ b/include/linux/pinctrl/pinconf.h
@@ -28,7 +28,8 @@ struct seq_file;
  *	is not available on this controller this should return -ENOTSUPP
  *	and if it is available but disabled it should return -EINVAL
  * @pin_config_set: configure an individual pin
- * @pin_config_group_get: get configurations for an entire pin group
+ * @pin_config_group_get: get configurations for an entire pin group; should
+ *	return -ENOTSUPP and -EINVAL using the same rules as pin_config_get.
  * @pin_config_group_set: configure all pins in a group
  * @pin_config_dbg_parse_modify: optional debugfs to modify a pin configuration
  * @pin_config_dbg_show: optional debugfs display hook that will provide
-- 
cgit v1.2.3


From de3c1e71d412d7e254e3eebeb82e260fb8513566 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 4 Jul 2018 18:54:57 +0200
Subject: clk: meson: expose GEN_CLK clkid

Expose GEN_CLK clock id

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 include/dt-bindings/clock/axg-clkc.h  | 1 +
 include/dt-bindings/clock/gxbb-clkc.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/axg-clkc.h b/include/dt-bindings/clock/axg-clkc.h
index 70371228b7e0..fd1f938c38d1 100644
--- a/include/dt-bindings/clock/axg-clkc.h
+++ b/include/dt-bindings/clock/axg-clkc.h
@@ -71,5 +71,6 @@
 #define CLKID_PCIE_CML_EN0			79
 #define CLKID_PCIE_CML_EN1			80
 #define CLKID_MIPI_ENABLE			81
+#define CLKID_GEN_CLK				84
 
 #endif /* __AXG_CLKC_H */
diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h
index 7a892be90549..3979d48c025f 100644
--- a/include/dt-bindings/clock/gxbb-clkc.h
+++ b/include/dt-bindings/clock/gxbb-clkc.h
@@ -127,5 +127,6 @@
 #define CLKID_VAPB		140
 #define CLKID_VDEC_1		153
 #define CLKID_VDEC_HEVC		156
+#define CLKID_GEN_CLK		159
 
 #endif /* __GXBB_CLKC_H */
-- 
cgit v1.2.3


From 03fc7f9c99c1e7ae2925d459e8487f1a6f199f79 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 27 Jun 2018 16:20:28 +0200
Subject: printk/nmi: Prevent deadlock when accessing the main log buffer in
 NMI

The commit 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI
when logbuf_lock is available") brought back the possible deadlocks
in printk() and NMI.

The check of logbuf_lock is done only in printk_nmi_enter() to prevent
mixed output. But another CPU might take the lock later, enter NMI, and:

      + Both NMIs might be serialized by yet another lock, for example,
	the one in nmi_cpu_backtrace().

      + The other CPU might get stopped in NMI, see smp_send_stop()
	in panic().

The only safe solution is to use trylock when storing the message
into the main log-buffer. It might cause reordering when some lines
go to the main lock buffer directly and others are delayed via
the per-CPU buffer. It means that it is not useful in general.

This patch replaces the problematic NMI deferred context with NMI
direct context. It can be used to mark a code that might produce
many messages in NMI and the risk of losing them is more critical
than problems with eventual reordering.

The context is then used when dumping trace buffers on oops. It was
the primary motivation for the original fix. Also the reordering is
even smaller issue there because some traces have their own time stamps.

Finally, nmi_cpu_backtrace() need not longer be serialized because
it will always us the per-CPU buffers again.

Fixes: 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available")
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20180627142028.11259-1-pmladek@suse.com
To: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/printk.h      |  4 ++++
 kernel/printk/internal.h    |  9 ++++++-
 kernel/printk/printk_safe.c | 58 +++++++++++++++++++++++++++++----------------
 kernel/trace/trace.c        |  4 +++-
 lib/nmi_backtrace.c         |  3 ---
 5 files changed, 52 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 6d7e800affd8..3ede9f46a494 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -148,9 +148,13 @@ void early_printk(const char *s, ...) { }
 #ifdef CONFIG_PRINTK_NMI
 extern void printk_nmi_enter(void);
 extern void printk_nmi_exit(void);
+extern void printk_nmi_direct_enter(void);
+extern void printk_nmi_direct_exit(void);
 #else
 static inline void printk_nmi_enter(void) { }
 static inline void printk_nmi_exit(void) { }
+static inline void printk_nmi_direct_enter(void) { }
+static inline void printk_nmi_direct_exit(void) { }
 #endif /* PRINTK_NMI */
 
 #ifdef CONFIG_PRINTK
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 2a7d04049af4..0f1898820cba 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -19,11 +19,16 @@
 #ifdef CONFIG_PRINTK
 
 #define PRINTK_SAFE_CONTEXT_MASK	 0x3fffffff
-#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000
+#define PRINTK_NMI_DIRECT_CONTEXT_MASK	 0x40000000
 #define PRINTK_NMI_CONTEXT_MASK		 0x80000000
 
 extern raw_spinlock_t logbuf_lock;
 
+__printf(5, 0)
+int vprintk_store(int facility, int level,
+		  const char *dict, size_t dictlen,
+		  const char *fmt, va_list args);
+
 __printf(1, 0) int vprintk_default(const char *fmt, va_list args);
 __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
 __printf(1, 0) int vprintk_func(const char *fmt, va_list args);
@@ -54,6 +59,8 @@ void __printk_safe_exit(void);
 		local_irq_enable();		\
 	} while (0)
 
+void defer_console_output(void);
+
 #else
 
 __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index d7d091309054..a0a74c533e4b 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -308,24 +308,33 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
 
 void printk_nmi_enter(void)
 {
-	/*
-	 * The size of the extra per-CPU buffer is limited. Use it only when
-	 * the main one is locked. If this CPU is not in the safe context,
-	 * the lock must be taken on another CPU and we could wait for it.
-	 */
-	if ((this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) &&
-	    raw_spin_is_locked(&logbuf_lock)) {
-		this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
-	} else {
-		this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK);
-	}
+	this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
 }
 
 void printk_nmi_exit(void)
 {
-	this_cpu_and(printk_context,
-		     ~(PRINTK_NMI_CONTEXT_MASK |
-		       PRINTK_NMI_DEFERRED_CONTEXT_MASK));
+	this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
+}
+
+/*
+ * Marks a code that might produce many messages in NMI context
+ * and the risk of losing them is more critical than eventual
+ * reordering.
+ *
+ * It has effect only when called in NMI context. Then printk()
+ * will try to store the messages into the main logbuf directly
+ * and use the per-CPU buffers only as a fallback when the lock
+ * is not available.
+ */
+void printk_nmi_direct_enter(void)
+{
+	if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
+		this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK);
+}
+
+void printk_nmi_direct_exit(void)
+{
+	this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK);
 }
 
 #else
@@ -363,6 +372,20 @@ void __printk_safe_exit(void)
 
 __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
 {
+	/*
+	 * Try to use the main logbuf even in NMI. But avoid calling console
+	 * drivers that might have their own locks.
+	 */
+	if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
+	    raw_spin_trylock(&logbuf_lock)) {
+		int len;
+
+		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
+		raw_spin_unlock(&logbuf_lock);
+		defer_console_output();
+		return len;
+	}
+
 	/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
 	if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
 		return vprintk_nmi(fmt, args);
@@ -371,13 +394,6 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
 	if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
 		return vprintk_safe(fmt, args);
 
-	/*
-	 * Use the main logbuf when logbuf_lock is available in NMI.
-	 * But avoid calling console drivers that might have their own locks.
-	 */
-	if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK)
-		return vprintk_deferred(fmt, args);
-
 	/* No obstacles. */
 	return vprintk_default(fmt, args);
 }
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bcd93031d042..f106ad12f72f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8265,6 +8265,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 	tracing_off();
 
 	local_irq_save(flags);
+	printk_nmi_direct_enter();
 
 	/* Simulate the iterator */
 	trace_init_global_iter(&iter);
@@ -8344,7 +8345,8 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 	for_each_tracing_cpu(cpu) {
 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
 	}
- 	atomic_dec(&dump_running);
+	atomic_dec(&dump_running);
+	printk_nmi_direct_exit();
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(ftrace_dump);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 61a6b5aab07e..15ca78e1c7d4 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -87,11 +87,9 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 
 bool nmi_cpu_backtrace(struct pt_regs *regs)
 {
-	static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
 	int cpu = smp_processor_id();
 
 	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-		arch_spin_lock(&lock);
 		if (regs && cpu_in_idle(instruction_pointer(regs))) {
 			pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
 				cpu, (void *)instruction_pointer(regs));
@@ -102,7 +100,6 @@ bool nmi_cpu_backtrace(struct pt_regs *regs)
 			else
 				dump_stack();
 		}
-		arch_spin_unlock(&lock);
 		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
 		return true;
 	}
-- 
cgit v1.2.3


From b3e7e7d2d668de0102264302a4d10dd9d4438a42 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:18 -0700
Subject: include/uapi/linux/blkzoned.h: Remove a superfluous __packed
 directive

Using the __packed directive for a structure that does not need
it is wrong because it makes gcc generate suboptimal code on some
architectures. Hence remove the __packed directive from the
blk_zone_report structure definition. See also
http://digitalvampire.org/blog/index.php/2006/07/31/why-you-shouldnt-use-__attribute__packed/.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/blkzoned.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index e3c70fe6bf0f..ff5a5db8906a 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -117,7 +117,7 @@ struct blk_zone_report {
 	__u32		nr_zones;
 	__u8		reserved[4];
 	struct blk_zone zones[0];
-} __packed;
+};
 
 /**
  * struct blk_zone_range - BLKRESETZONE ioctl request
-- 
cgit v1.2.3


From 6b1d83d274486615cc341e410467a98fd9c27c0a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:19 -0700
Subject: block: Remove bdev_nr_zones()

Remove this function since it has no callers. This function was
introduced in commit 6cc77e9cb080 ("block: introduce zoned block
devices zone write locking").

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matias Bjorling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 79226ca8f80f..49a400afb146 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1639,15 +1639,6 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
 	return 0;
 }
 
-static inline unsigned int bdev_nr_zones(struct block_device *bdev)
-{
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (q)
-		return blk_queue_nr_zones(q);
-	return 0;
-}
-
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
-- 
cgit v1.2.3


From 7c8542b7982264226cf94102950343185869b584 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:20 -0700
Subject: block: Inline blk_queue_nr_zones()

Since the implementation of blk_queue_nr_zones() is trivial and since
it only has a single caller, inline this function.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 2 +-
 include/linux/blkdev.h | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 1c4532e92938..26e1f8e425a8 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -214,7 +214,7 @@ static int queue_zone_wlock_show(void *data, struct seq_file *m)
 	if (!q->seq_zones_wlock)
 		return 0;
 
-	for (i = 0; i < blk_queue_nr_zones(q); i++)
+	for (i = 0; i < q->nr_zones; i++)
 		if (test_bit(i, q->seq_zones_wlock))
 			seq_printf(m, "%u\n", i);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 49a400afb146..905daa7c647e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -800,11 +800,6 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
-static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
-{
-	return q->nr_zones;
-}
-
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 					     sector_t sector)
 {
-- 
cgit v1.2.3


From 6a5ac9846508ad7d1d23881d9d5add35f2e6ae71 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 15 Jun 2018 14:55:21 -0700
Subject: block: Make struct request_queue smaller for CONFIG_BLK_DEV_ZONED=n

Exclude zoned block device members from struct request_queue for
CONFIG_BLK_DEV_ZONED == n. Avoid breaking the build by only building
the code that uses these struct request_queue members if
CONFIG_BLK_DEV_ZONED != n.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig                |  4 ++++
 block/Makefile               |  1 +
 block/blk-mq-debugfs-zoned.c | 24 ++++++++++++++++++++++++
 block/blk-mq-debugfs.c       | 15 ---------------
 block/blk-mq-debugfs.h       |  9 +++++++++
 include/linux/blkdev.h       |  6 ++++++
 6 files changed, 44 insertions(+), 15 deletions(-)
 create mode 100644 block/blk-mq-debugfs-zoned.c

(limited to 'include')

diff --git a/block/Kconfig b/block/Kconfig
index eb50fd4977c2..dfe7bc770fc9 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -177,6 +177,10 @@ config BLK_DEBUG_FS
 	Unless you are building a kernel for a tiny system, you should
 	say Y here.
 
+config BLK_DEBUG_FS_ZONED
+       bool
+       default BLK_DEBUG_FS && BLK_DEV_ZONED
+
 config BLK_SED_OPAL
 	bool "Logic for interfacing with Opal enabled SEDs"
 	---help---
diff --git a/block/Makefile b/block/Makefile
index 6a56303b9925..a8f94cdb75c3 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -34,4 +34,5 @@ obj-$(CONFIG_BLK_MQ_RDMA)	+= blk-mq-rdma.o
 obj-$(CONFIG_BLK_DEV_ZONED)	+= blk-zoned.o
 obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
 obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
+obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
 obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
diff --git a/block/blk-mq-debugfs-zoned.c b/block/blk-mq-debugfs-zoned.c
new file mode 100644
index 000000000000..fb2c82c351e4
--- /dev/null
+++ b/block/blk-mq-debugfs-zoned.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017 Western Digital Corporation or its affiliates.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/blkdev.h>
+#include "blk-mq-debugfs.h"
+
+int queue_zone_wlock_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+	unsigned int i;
+
+	if (!q->seq_zones_wlock)
+		return 0;
+
+	for (i = 0; i < q->nr_zones; i++)
+		if (test_bit(i, q->seq_zones_wlock))
+			seq_printf(m, "%u\n", i);
+
+	return 0;
+}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 26e1f8e425a8..7efe268e4447 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -206,21 +206,6 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
 	return count;
 }
 
-static int queue_zone_wlock_show(void *data, struct seq_file *m)
-{
-	struct request_queue *q = data;
-	unsigned int i;
-
-	if (!q->seq_zones_wlock)
-		return 0;
-
-	for (i = 0; i < q->nr_zones; i++)
-		if (test_bit(i, q->seq_zones_wlock))
-			seq_printf(m, "%u\n", i);
-
-	return 0;
-}
-
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{ "poll_stat", 0400, queue_poll_stat_show },
 	{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index b9d366e57097..a9160be12be0 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -80,4 +80,13 @@ static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hc
 }
 #endif
 
+#ifdef CONFIG_BLK_DEBUG_FS_ZONED
+int queue_zone_wlock_show(void *data, struct seq_file *m);
+#else
+static inline int queue_zone_wlock_show(void *data, struct seq_file *m)
+{
+	return 0;
+}
+#endif
+
 #endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 905daa7c647e..ca5a8b046894 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -592,6 +592,7 @@ struct request_queue {
 
 	struct queue_limits	limits;
 
+#ifdef CONFIG_BLK_DEV_ZONED
 	/*
 	 * Zoned block device information for request dispatch control.
 	 * nr_zones is the total number of zones of the device. This is always
@@ -612,6 +613,7 @@ struct request_queue {
 	unsigned int		nr_zones;
 	unsigned long		*seq_zones_bitmap;
 	unsigned long		*seq_zones_wlock;
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 	/*
 	 * sg stuff
@@ -800,6 +802,7 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 					     sector_t sector)
 {
@@ -815,6 +818,7 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 		return false;
 	return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
 }
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 static inline bool rq_is_sync(struct request *rq)
 {
@@ -1065,6 +1069,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_rq_zone_no(struct request *rq)
 {
 	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
@@ -1074,6 +1079,7 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
 {
 	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
 }
+#endif /* CONFIG_BLK_DEV_ZONED */
 
 /*
  * Some commands like WRITE SAME have a payload or data transfer size which
-- 
cgit v1.2.3


From 5815839b3ca16bb1d45939270871169f6803a121 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 25 Jun 2018 19:31:47 +0800
Subject: blk-mq: introduce new lock for protecting hctx->dispatch_wait

Now hctx->lock is only acquired when adding hctx->dispatch_wait to
one wait queue, but not held when removing it from the wait queue.

IO hang can be observed easily if SCHED RESTART is disabled, that means
now RESTART exits just for fixing the issue in blk_mq_mark_tag_wait().

This patch fixes the issue by introducing hctx->dispatch_wait_lock and
holding it for removing hctx->dispatch_wait in blk_mq_dispatch_wake(),
since we need to avoid acquiring hctx->lock in irq context.

Fixes: eb619fdb2d4cb8b3d3419 ("blk-mq: fix issue with shared tag queue re-running")
Cc: Christoph Hellwig <hch@lst.de>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Tested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 26 +++++++++++++++++---------
 include/linux/blk-mq.h |  1 +
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9eee896a3592..df84281f6af6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -998,7 +998,10 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
 
 	hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
 
+	spin_lock(&hctx->dispatch_wait_lock);
 	list_del_init(&wait->entry);
+	spin_unlock(&hctx->dispatch_wait_lock);
+
 	blk_mq_run_hw_queue(hctx, true);
 	return 1;
 }
@@ -1012,7 +1015,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
 static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 				 struct request *rq)
 {
-	struct sbq_wait_state *ws;
+	struct wait_queue_head *wq;
 	wait_queue_entry_t *wait;
 	bool ret;
 
@@ -1035,14 +1038,18 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	if (!list_empty_careful(&wait->entry))
 		return false;
 
-	spin_lock(&hctx->lock);
+	wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait;
+
+	spin_lock_irq(&wq->lock);
+	spin_lock(&hctx->dispatch_wait_lock);
 	if (!list_empty(&wait->entry)) {
-		spin_unlock(&hctx->lock);
+		spin_unlock(&hctx->dispatch_wait_lock);
+		spin_unlock_irq(&wq->lock);
 		return false;
 	}
 
-	ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
-	add_wait_queue(&ws->wait, wait);
+	wait->flags &= ~WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue(wq, wait);
 
 	/*
 	 * It's possible that a tag was freed in the window between the
@@ -1051,7 +1058,8 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	 */
 	ret = blk_mq_get_driver_tag(rq);
 	if (!ret) {
-		spin_unlock(&hctx->lock);
+		spin_unlock(&hctx->dispatch_wait_lock);
+		spin_unlock_irq(&wq->lock);
 		return false;
 	}
 
@@ -1059,10 +1067,9 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	 * We got a tag, remove ourselves from the wait queue to ensure
 	 * someone else gets the wakeup.
 	 */
-	spin_lock_irq(&ws->wait.lock);
 	list_del_init(&wait->entry);
-	spin_unlock_irq(&ws->wait.lock);
-	spin_unlock(&hctx->lock);
+	spin_unlock(&hctx->dispatch_wait_lock);
+	spin_unlock_irq(&wq->lock);
 
 	return true;
 }
@@ -2142,6 +2149,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 
 	hctx->nr_ctx = 0;
 
+	spin_lock_init(&hctx->dispatch_wait_lock);
 	init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
 	INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e3147eb74222..ea690254dab7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -39,6 +39,7 @@ struct blk_mq_hw_ctx {
 	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
 
+	spinlock_t		dispatch_wait_lock;
 	wait_queue_entry_t	dispatch_wait;
 	atomic_t		wait_index;
 
-- 
cgit v1.2.3


From 97889f9ac24f8d2fc8e703ea7f80c162bab10d4d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 25 Jun 2018 19:31:48 +0800
Subject: blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()

We have to remove synchronize_rcu() from blk_queue_cleanup(),
otherwise long delay can be caused during lun probe. For removing
it, we have to avoid to iterate the set->tag_list in IO path, eg,
blk_mq_sched_restart().

This patch reverts 5b79413946d (Revert "blk-mq: don't handle
TAG_SHARED in restart"). Given we have fixed enough IO hang issue,
and there isn't any reason to restart all queues in one tags any more,
see the following reasons:

1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(),
which can wake up queues waiting for driver tag.

2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue,
target or host is ready, but SCSI built-in restart can cover all these well,
see scsi_end_request(), queue will be rerun after any request initiated from
this host/target is completed.

In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30%
when running I/O on these 8 luns concurrently.

Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list")
Cc: Omar Sandoval <osandov@fb.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Reported-by: Andrew Jones <drjones@redhat.com>
Tested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c   | 85 +++-----------------------------------------------
 block/blk-mq.c         | 10 ++----
 include/linux/blkdev.h |  2 --
 3 files changed, 7 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 56c493c6cd90..4e027f6108ae 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
 		return;
 
-	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-		struct request_queue *q = hctx->queue;
-
-		if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-			atomic_inc(&q->shared_hctx_restart);
-	} else
-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+	set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
 
-static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 {
 	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-		return false;
-
-	if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
-		struct request_queue *q = hctx->queue;
-
-		if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-			atomic_dec(&q->shared_hctx_restart);
-	} else
-		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
+		return;
+	clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 
-	return blk_mq_run_hw_queue(hctx, true);
+	blk_mq_run_hw_queue(hctx, true);
 }
 
 /*
@@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-/**
- * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
- * @pos:    loop cursor.
- * @skip:   the list element that will not be examined. Iteration starts at
- *          @skip->next.
- * @head:   head of the list to examine. This list must have at least one
- *          element, namely @skip.
- * @member: name of the list_head structure within typeof(*pos).
- */
-#define list_for_each_entry_rcu_rr(pos, skip, head, member)		\
-	for ((pos) = (skip);						\
-	     (pos = (pos)->member.next != (head) ? list_entry_rcu(	\
-			(pos)->member.next, typeof(*pos), member) :	\
-	      list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
-	     (pos) != (skip); )
-
-/*
- * Called after a driver tag has been freed to check whether a hctx needs to
- * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
- * queues in a round-robin fashion if the tag set of @hctx is shared with other
- * hardware queues.
- */
-void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
-{
-	struct blk_mq_tags *const tags = hctx->tags;
-	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
-	struct request_queue *const queue = hctx->queue, *q;
-	struct blk_mq_hw_ctx *hctx2;
-	unsigned int i, j;
-
-	if (set->flags & BLK_MQ_F_TAG_SHARED) {
-		/*
-		 * If this is 0, then we know that no hardware queues
-		 * have RESTART marked. We're done.
-		 */
-		if (!atomic_read(&queue->shared_hctx_restart))
-			return;
-
-		rcu_read_lock();
-		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
-					   tag_set_list) {
-			queue_for_each_hw_ctx(q, hctx2, i)
-				if (hctx2->tags == tags &&
-				    blk_mq_sched_restart_hctx(hctx2))
-					goto done;
-		}
-		j = hctx->queue_num + 1;
-		for (i = 0; i < queue->nr_hw_queues; i++, j++) {
-			if (j == queue->nr_hw_queues)
-				j = 0;
-			hctx2 = queue->queue_hw_ctx[j];
-			if (hctx2->tags == tags &&
-			    blk_mq_sched_restart_hctx(hctx2))
-				break;
-		}
-done:
-		rcu_read_unlock();
-	} else {
-		blk_mq_sched_restart_hctx(hctx);
-	}
-}
-
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
 				 bool run_queue, bool async)
 {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df84281f6af6..7c6ff13171ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2335,15 +2335,10 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if (shared) {
-			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-				atomic_inc(&q->shared_hctx_restart);
+		if (shared)
 			hctx->flags |= BLK_MQ_F_TAG_SHARED;
-		} else {
-			if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-				atomic_dec(&q->shared_hctx_restart);
+		else
 			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
-		}
 	}
 }
 
@@ -2374,7 +2369,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
 		blk_mq_update_tag_set_depth(set, false);
 	}
 	mutex_unlock(&set->tag_list_lock);
-	synchronize_rcu();
 	INIT_LIST_HEAD(&q->tag_set_list);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca5a8b046894..9d05646d5059 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -442,8 +442,6 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
-	atomic_t		shared_hctx_restart;
-
 	struct blk_queue_stats	*stats;
 	struct rq_wb		*rq_wb;
 
-- 
cgit v1.2.3


From 6e768717304bdbe8d2897ca8298f6b58863fdc41 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 3 Jul 2018 09:03:16 -0600
Subject: blk-mq: dequeue request one by one from sw queue if hctx is busy

It won't be efficient to dequeue request one by one from sw queue,
but we have to do that when queue is busy for better merge performance.

This patch takes the Exponential Weighted Moving Average(EWMA) to figure
out if queue is busy, then only dequeue request one by one from sw queue
when queue is busy.

Fixes: b347689ffbca ("blk-mq-sched: improve dispatching from sw queue")
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Reported-by: Kashyap Desai <kashyap.desai@broadcom.com>
Tested-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c |  9 +++++++++
 block/blk-mq-sched.c   | 11 ++---------
 block/blk-mq.c         | 33 ++++++++++++++++++++++++++++++++-
 include/linux/blk-mq.h |  3 ++-
 4 files changed, 45 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 7efe268e4447..cb1e6cf7ac48 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -622,6 +622,14 @@ static int hctx_active_show(void *data, struct seq_file *m)
 	return 0;
 }
 
+static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
+{
+	struct blk_mq_hw_ctx *hctx = data;
+
+	seq_printf(m, "%u\n", hctx->dispatch_busy);
+	return 0;
+}
+
 static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos)
 	__acquires(&ctx->lock)
 {
@@ -783,6 +791,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"queued", 0600, hctx_queued_show, hctx_queued_write},
 	{"run", 0600, hctx_run_show, hctx_run_write},
 	{"active", 0400, hctx_active_show},
+	{"dispatch_busy", 0400, hctx_dispatch_busy_show},
 	{},
 };
 
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index f3b4b5ceb4d1..fdc129e64cc4 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -206,15 +206,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 		}
 	} else if (has_sched_dispatch) {
 		blk_mq_do_dispatch_sched(hctx);
-	} else if (q->mq_ops->get_budget) {
-		/*
-		 * If we need to get budget before queuing request, we
-		 * dequeue request one by one from sw queue for avoiding
-		 * to mess up I/O merge when dispatch runs out of resource.
-		 *
-		 * TODO: get more budgets, and dequeue more requests in
-		 * one time.
-		 */
+	} else if (hctx->dispatch_busy) {
+		/* dequeue request one by one from sw queue if queue is busy */
 		blk_mq_do_dispatch_ctx(hctx);
 	} else {
 		blk_mq_flush_busy_ctxs(hctx, &rq_list);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 795ba859b16b..850fdd02c385 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1074,6 +1074,35 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
 	return true;
 }
 
+#define BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT  8
+#define BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR  4
+/*
+ * Update dispatch busy with the Exponential Weighted Moving Average(EWMA):
+ * - EWMA is one simple way to compute running average value
+ * - weight(7/8 and 1/8) is applied so that it can decrease exponentially
+ * - take 4 as factor for avoiding to get too small(0) result, and this
+ *   factor doesn't matter because EWMA decreases exponentially
+ */
+static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
+{
+	unsigned int ewma;
+
+	if (hctx->queue->elevator)
+		return;
+
+	ewma = hctx->dispatch_busy;
+
+	if (!ewma && !busy)
+		return;
+
+	ewma *= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT - 1;
+	if (busy)
+		ewma += 1 << BLK_MQ_DISPATCH_BUSY_EWMA_FACTOR;
+	ewma /= BLK_MQ_DISPATCH_BUSY_EWMA_WEIGHT;
+
+	hctx->dispatch_busy = ewma;
+}
+
 #define BLK_MQ_RESOURCE_DELAY	3		/* ms units */
 
 /*
@@ -1210,8 +1239,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 		else if (needs_restart && (ret == BLK_STS_RESOURCE))
 			blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
 
+		blk_mq_update_dispatch_busy(hctx, true);
 		return false;
-	}
+	} else
+		blk_mq_update_dispatch_busy(hctx, false);
 
 	/*
 	 * If the host/device is unable to accept more work, inform the
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ea690254dab7..d710e92874cc 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -35,9 +35,10 @@ struct blk_mq_hw_ctx {
 	struct sbitmap		ctx_map;
 
 	struct blk_mq_ctx	*dispatch_from;
+	unsigned int		dispatch_busy;
 
-	struct blk_mq_ctx	**ctxs;
 	unsigned int		nr_ctx;
+	struct blk_mq_ctx	**ctxs;
 
 	spinlock_t		dispatch_wait_lock;
 	wait_queue_entry_t	dispatch_wait;
-- 
cgit v1.2.3


From 08e18eab0c579ad84399c1899c11899734854eb2 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:50 -0400
Subject: block: add bi_blkg to the bio for cgroups

Currently io.low uses a bi_cg_private to stash its private data for the
blkg, however other blkcg policies may want to use this as well.  Since
we can get the private data out of the blkg, move this to bi_blkg in the
bio and make it generic, then we can use bio_associate_blkg() to attach
the blkg to the bio.

Theoretically we could simply replace the bi_css with this since we can
get to all the same information from the blkg, however you have to
lookup the blkg, so for example wbc_init_bio() would have to lookup and
possibly allocate the blkg for the css it was trying to attach to the
bio.  This could be problematic and result in us either not attaching
the css at all to the bio, or falling back to the root blkcg if we are
unable to allocate the corresponding blkg.

So for now do this, and in the future if possible we could just replace
the bi_css with bi_blkg and update the helpers to do the correct
translation.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               | 23 +++++++++++++++++++++++
 block/blk-throttle.c      | 21 +++++++--------------
 include/linux/bio.h       |  1 +
 include/linux/blk_types.h |  2 +-
 4 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 67eff5eddc49..044571538574 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -28,6 +28,7 @@
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/cgroup.h>
+#include <linux/blk-cgroup.h>
 
 #include <trace/events/block.h>
 #include "blk.h"
@@ -2036,6 +2037,24 @@ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
 }
 EXPORT_SYMBOL_GPL(bio_associate_blkcg);
 
+/**
+ * bio_associate_blkg - associate a bio with the specified blkg
+ * @bio: target bio
+ * @blkg: the blkg to associate
+ *
+ * Associate @bio with the blkg specified by @blkg.  This is the queue specific
+ * blkcg information associated with the @bio, a reference will be taken on the
+ * @blkg and will be freed when the bio is freed.
+ */
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
+{
+	if (unlikely(bio->bi_blkg))
+		return -EBUSY;
+	blkg_get(blkg);
+	bio->bi_blkg = blkg;
+	return 0;
+}
+
 /**
  * bio_disassociate_task - undo bio_associate_current()
  * @bio: target bio
@@ -2050,6 +2069,10 @@ void bio_disassociate_task(struct bio *bio)
 		css_put(bio->bi_css);
 		bio->bi_css = NULL;
 	}
+	if (bio->bi_blkg) {
+		blkg_put(bio->bi_blkg);
+		bio->bi_blkg = NULL;
+	}
 }
 
 /**
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 63bb261811dd..caaabbe8a7a5 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2134,12 +2134,8 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
 {
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	if (bio->bi_css) {
-		if (bio->bi_cg_private)
-			blkg_put(tg_to_blkg(bio->bi_cg_private));
-		bio->bi_cg_private = tg;
-		blkg_get(tg_to_blkg(tg));
-	}
+	if (bio->bi_css)
+		bio_associate_blkg(bio, tg_to_blkg(tg));
 	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
 #endif
 }
@@ -2287,6 +2283,7 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
 
 void blk_throtl_bio_endio(struct bio *bio)
 {
+	struct blkcg_gq *blkg;
 	struct throtl_grp *tg;
 	u64 finish_time_ns;
 	unsigned long finish_time;
@@ -2294,20 +2291,18 @@ void blk_throtl_bio_endio(struct bio *bio)
 	unsigned long lat;
 	int rw = bio_data_dir(bio);
 
-	tg = bio->bi_cg_private;
-	if (!tg)
+	blkg = bio->bi_blkg;
+	if (!blkg)
 		return;
-	bio->bi_cg_private = NULL;
+	tg = blkg_to_tg(blkg);
 
 	finish_time_ns = ktime_get_ns();
 	tg->last_finish_time = finish_time_ns >> 10;
 
 	start_time = bio_issue_time(&bio->bi_issue) >> 10;
 	finish_time = __bio_issue_time(finish_time_ns) >> 10;
-	if (!start_time || finish_time <= start_time) {
-		blkg_put(tg_to_blkg(tg));
+	if (!start_time || finish_time <= start_time)
 		return;
-	}
 
 	lat = finish_time - start_time;
 	/* this is only for bio based driver */
@@ -2336,8 +2331,6 @@ void blk_throtl_bio_endio(struct bio *bio)
 		tg->bio_cnt /= 2;
 		tg->bad_bio_cnt /= 2;
 	}
-
-	blkg_put(tg_to_blkg(tg));
 }
 #endif
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f08f5fe7bd08..a279ba384da9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -555,6 +555,7 @@ do {						\
 
 #ifdef CONFIG_BLK_CGROUP
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
 void bio_disassociate_task(struct bio *bio);
 void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3c4f390aea4b..3364d42ebe08 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -179,8 +179,8 @@ struct bio {
 	 */
 	struct io_context	*bi_ioc;
 	struct cgroup_subsys_state *bi_css;
+	struct blkcg_gq		*bi_blkg;
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	void			*bi_cg_private;
 	struct bio_issue	bi_issue;
 #endif
 #endif
-- 
cgit v1.2.3


From c7c98fd37653955d3a17dd4f1fa67aba070096a9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:51 -0400
Subject: block: introduce bio_issue_as_root_blkg

Instead of forcing all file systems to get the right context on their
bio's, simply check for REQ_META to see if we need to issue as the root
blkg.  We don't want to force all bio's to have the root blkg associated
with them if REQ_META is set, as some controllers (blk-iolatency) need
to know who the originating cgroup is so it can backcharge them for the
work they are doing.  This helper will make sure that the controllers do
the proper thing wrt the IO priority and backcharging.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6c666fd7de3c..69aa71dc0c04 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -238,6 +238,22 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
 	return css_to_blkcg(task_css(current, io_cgrp_id));
 }
 
+/**
+ * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
+ * @return: true if this bio needs to be submitted with the root blkg context.
+ *
+ * In order to avoid priority inversions we sometimes need to issue a bio as if
+ * it were attached to the root blkg, and then backcharge to the actual owning
+ * blkg.  The idea is we do bio_blkcg() to look up the actual context for the
+ * bio and attach the appropriate blkg to the bio.  Then we call this helper and
+ * if it is true run with the root blkg for that queue and then do any
+ * backcharging to the originating cgroup once the io is complete.
+ */
+static inline bool bio_issue_as_root_blkg(struct bio *bio)
+{
+	return (bio->bi_opf & REQ_META);
+}
+
 /**
  * blkcg_parent - get the parent of a blkcg
  * @blkcg: blkcg of interest
-- 
cgit v1.2.3


From 903d23f0a354f226fa78f1c1c34b60aaf992e812 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:52 -0400
Subject: blk-cgroup: allow controllers to output their own stats

blk-iolatency has a few stats that it would like to print out, and
instead of adding a bunch of crap to the generic code just provide a
helper so that controllers can add stuff to the stat line if they want
to.

Hide it behind a boot option since it changes the output of io.stat from
normal, and these stats are only interesting to developers.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 47 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/blk-cgroup.h |  3 +++
 2 files changed, 47 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index eb85cb87c40f..7dc6f05cc44b 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -50,6 +50,8 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
 
 static LIST_HEAD(all_blkcgs);		/* protected by blkcg_pol_mutex */
 
+static bool blkcg_debug_stats = false;
+
 static bool blkcg_policy_enabled(struct request_queue *q,
 				 const struct blkcg_policy *pol)
 {
@@ -954,13 +956,25 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 
 	hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
 		const char *dname;
+		char *buf;
 		struct blkg_rwstat rwstat;
 		u64 rbytes, wbytes, rios, wios;
+		size_t size = seq_get_buf(sf, &buf), off = 0;
+		int i;
+		bool has_stats = false;
 
 		dname = blkg_dev_name(blkg);
 		if (!dname)
 			continue;
 
+		/*
+		 * Hooray string manipulation, count is the size written NOT
+		 * INCLUDING THE \0, so size is now count+1 less than what we
+		 * had before, but we want to start writing the next bit from
+		 * the \0 so we only add count to buf.
+		 */
+		off += scnprintf(buf+off, size-off, "%s ", dname);
+
 		spin_lock_irq(blkg->q->queue_lock);
 
 		rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
@@ -975,9 +989,33 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 
 		spin_unlock_irq(blkg->q->queue_lock);
 
-		if (rbytes || wbytes || rios || wios)
-			seq_printf(sf, "%s rbytes=%llu wbytes=%llu rios=%llu wios=%llu\n",
-				   dname, rbytes, wbytes, rios, wios);
+		if (rbytes || wbytes || rios || wios) {
+			has_stats = true;
+			off += scnprintf(buf+off, size-off,
+					 "rbytes=%llu wbytes=%llu rios=%llu wios=%llu",
+					 rbytes, wbytes, rios, wios);
+		}
+
+		if (!blkcg_debug_stats)
+			goto next;
+
+		for (i = 0; i < BLKCG_MAX_POLS; i++) {
+			struct blkcg_policy *pol = blkcg_policy[i];
+			size_t written;
+
+			if (!blkg->pd[i] || !pol->pd_stat_fn)
+				continue;
+
+			written = pol->pd_stat_fn(blkg->pd[i], buf+off, size-off);
+			if (written)
+				has_stats = true;
+			off += written;
+		}
+next:
+		if (has_stats) {
+			off += scnprintf(buf+off, size-off, "\n");
+			seq_commit(sf, off);
+		}
 	}
 
 	rcu_read_unlock();
@@ -1547,3 +1585,6 @@ out_unlock:
 	mutex_unlock(&blkcg_pol_register_mutex);
 }
 EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
+
+module_param(blkcg_debug_stats, bool, 0644);
+MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 69aa71dc0c04..b41292726c0f 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -148,6 +148,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
+typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
+				      size_t size);
 
 struct blkcg_policy {
 	int				plid;
@@ -167,6 +169,7 @@ struct blkcg_policy {
 	blkcg_pol_offline_pd_fn		*pd_offline_fn;
 	blkcg_pol_free_pd_fn		*pd_free_fn;
 	blkcg_pol_reset_pd_stats_fn	*pd_reset_stats_fn;
+	blkcg_pol_stat_pd_fn		*pd_stat_fn;
 };
 
 extern struct blkcg blkcg_root;
-- 
cgit v1.2.3


From 0d1e0c7cd5909d6c6aa0957179318e13fcca971a Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:53 -0400
Subject: blk: introduce REQ_SWAP

Just like REQ_META, it's important to know the IO coming down is swap
in order to guard against potential IO priority inversion issues with
cgroups.  Add REQ_SWAP and use it for all swap IO, and add it to our
bio_issue_as_root_blkg helper.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 2 +-
 include/linux/blk_types.h  | 3 ++-
 mm/page_io.c               | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index b41292726c0f..a8f9ba8f33a4 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -254,7 +254,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
  */
 static inline bool bio_issue_as_root_blkg(struct bio *bio)
 {
-	return (bio->bi_opf & REQ_META);
+	return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
 }
 
 /**
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3364d42ebe08..0ffc34c5cc83 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -329,7 +329,7 @@ enum req_flag_bits {
 
 	/* for driver use */
 	__REQ_DRV,
-
+	__REQ_SWAP,		/* swapping request. */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -351,6 +351,7 @@ enum req_flag_bits {
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
 
 #define REQ_DRV			(1ULL << __REQ_DRV)
+#define REQ_SWAP		(1ULL << __REQ_SWAP)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
diff --git a/mm/page_io.c b/mm/page_io.c
index b41cf9644585..a552cb37e220 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -338,7 +338,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		ret = -ENOMEM;
 		goto out;
 	}
-	bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
 	count_swpout_vm_event(page);
 	set_page_writeback(page);
 	unlock_page(page);
-- 
cgit v1.2.3


From 0d3bd88d54f513723602b361dccfc71639f50779 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 3 Jul 2018 11:14:54 -0400
Subject: swap,blkcg: issue swap io with the appropriate context

For backcharging we need to know who the page belongs to when swapping
it out.  We don't worry about things that do ->rw_page (zram etc) at the
moment, we're only worried about pages that actually go to a block
device.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 24 ++++++++++++++++++++++++
 include/linux/bio.h |  7 +++++++
 mm/page_io.c        |  1 +
 3 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 044571538574..5f84f5c3887b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -2015,6 +2015,30 @@ EXPORT_SYMBOL(bioset_init_from_src);
 
 #ifdef CONFIG_BLK_CGROUP
 
+#ifdef CONFIG_MEMCG
+/**
+ * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
+ * @bio: target bio
+ * @page: the page to lookup the blkcg from
+ *
+ * Associate @bio with the blkcg from @page's owning memcg.  This works like
+ * every other associate function wrt references.
+ */
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
+{
+	struct cgroup_subsys_state *blkcg_css;
+
+	if (unlikely(bio->bi_css))
+		return -EBUSY;
+	if (!page->mem_cgroup)
+		return 0;
+	blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
+				     &io_cgrp_subsys);
+	bio->bi_css = blkcg_css;
+	return 0;
+}
+#endif /* CONFIG_MEMCG */
+
 /**
  * bio_associate_blkcg - associate a bio with the specified blkcg
  * @bio: target bio
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a279ba384da9..a00dfff51aa5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -553,6 +553,13 @@ do {						\
 #define bio_dev(bio) \
 	disk_devt((bio)->bi_disk)
 
+#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
+#else
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+						struct page *page) {  return 0; }
+#endif
+
 #ifdef CONFIG_BLK_CGROUP
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
diff --git a/mm/page_io.c b/mm/page_io.c
index a552cb37e220..aafd19ec1db4 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -339,6 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		goto out;
 	}
 	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
+	bio_associate_blkcg_from_page(bio, page);
 	count_swpout_vm_event(page);
 	set_page_writeback(page);
 	unlock_page(page);
-- 
cgit v1.2.3


From d09d8df3a29403693d9d20cc34ed101f2c558e2b Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:14:55 -0400
Subject: blkcg: add generic throttling mechanism

Since IO can be issued from literally anywhere it's almost impossible to
do throttling without having some sort of adverse effect somewhere else
in the system because of locking or other dependencies.  The best way to
solve this is to do the throttling when we know we aren't holding any
other kernel resources.  Do this by tracking throttling in a per-blkg
basis, and if we require throttling flag the task that it needs to check
before it returns to user space and possibly sleep there.

This is to address the case where a process is doing work that is
generating IO that can't be throttled, whether that is directly with a
lot of REQ_META IO, or indirectly by allocating so much memory that it
is swamping the disk with REQ_SWAP.  We can't use task_add_work as we
don't want to induce a memory allocation in the IO path, so simply
saving the request queue in the task and flagging it to do the
notify_resume thing achieves the same result without the overhead of a
memory allocation.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c          | 220 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blk-cgroup.h  |  99 ++++++++++++++++++++
 include/linux/cgroup-defs.h |   3 +
 include/linux/sched.h       |   8 ++
 include/linux/tracehook.h   |   2 +
 5 files changed, 332 insertions(+)

(limited to 'include')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7dc6f05cc44b..d3310ec96c2a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -27,6 +27,7 @@
 #include <linux/atomic.h>
 #include <linux/ctype.h>
 #include <linux/blk-cgroup.h>
+#include <linux/tracehook.h>
 #include "blk.h"
 
 #define MAX_KEY_LEN 100
@@ -999,6 +1000,14 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 		if (!blkcg_debug_stats)
 			goto next;
 
+		if (atomic_read(&blkg->use_delay)) {
+			has_stats = true;
+			off += scnprintf(buf+off, size-off,
+					 " use_delay=%d delay_nsec=%llu",
+					 atomic_read(&blkg->use_delay),
+					(unsigned long long)atomic64_read(&blkg->delay_nsec));
+		}
+
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkcg_policy *pol = blkcg_policy[i];
 			size_t written;
@@ -1326,6 +1335,13 @@ static void blkcg_bind(struct cgroup_subsys_state *root_css)
 	mutex_unlock(&blkcg_pol_mutex);
 }
 
+static void blkcg_exit(struct task_struct *tsk)
+{
+	if (tsk->throttle_queue)
+		blk_put_queue(tsk->throttle_queue);
+	tsk->throttle_queue = NULL;
+}
+
 struct cgroup_subsys io_cgrp_subsys = {
 	.css_alloc = blkcg_css_alloc,
 	.css_offline = blkcg_css_offline,
@@ -1335,6 +1351,7 @@ struct cgroup_subsys io_cgrp_subsys = {
 	.dfl_cftypes = blkcg_files,
 	.legacy_cftypes = blkcg_legacy_files,
 	.legacy_name = "blkio",
+	.exit = blkcg_exit,
 #ifdef CONFIG_MEMCG
 	/*
 	 * This ensures that, if available, memcg is automatically enabled
@@ -1586,5 +1603,208 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
 
+/*
+ * Scale the accumulated delay based on how long it has been since we updated
+ * the delay.  We only call this when we are adding delay, in case it's been a
+ * while since we added delay, and when we are checking to see if we need to
+ * delay a task, to account for any delays that may have occurred.
+ */
+static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
+{
+	u64 old = atomic64_read(&blkg->delay_start);
+
+	/*
+	 * We only want to scale down every second.  The idea here is that we
+	 * want to delay people for min(delay_nsec, NSEC_PER_SEC) in a certain
+	 * time window.  We only want to throttle tasks for recent delay that
+	 * has occurred, in 1 second time windows since that's the maximum
+	 * things can be throttled.  We save the current delay window in
+	 * blkg->last_delay so we know what amount is still left to be charged
+	 * to the blkg from this point onward.  blkg->last_use keeps track of
+	 * the use_delay counter.  The idea is if we're unthrottling the blkg we
+	 * are ok with whatever is happening now, and we can take away more of
+	 * the accumulated delay as we've already throttled enough that
+	 * everybody is happy with their IO latencies.
+	 */
+	if (time_before64(old + NSEC_PER_SEC, now) &&
+	    atomic64_cmpxchg(&blkg->delay_start, old, now) == old) {
+		u64 cur = atomic64_read(&blkg->delay_nsec);
+		u64 sub = min_t(u64, blkg->last_delay, now - old);
+		int cur_use = atomic_read(&blkg->use_delay);
+
+		/*
+		 * We've been unthrottled, subtract a larger chunk of our
+		 * accumulated delay.
+		 */
+		if (cur_use < blkg->last_use)
+			sub = max_t(u64, sub, blkg->last_delay >> 1);
+
+		/*
+		 * This shouldn't happen, but handle it anyway.  Our delay_nsec
+		 * should only ever be growing except here where we subtract out
+		 * min(last_delay, 1 second), but lord knows bugs happen and I'd
+		 * rather not end up with negative numbers.
+		 */
+		if (unlikely(cur < sub)) {
+			atomic64_set(&blkg->delay_nsec, 0);
+			blkg->last_delay = 0;
+		} else {
+			atomic64_sub(sub, &blkg->delay_nsec);
+			blkg->last_delay = cur - sub;
+		}
+		blkg->last_use = cur_use;
+	}
+}
+
+/*
+ * This is called when we want to actually walk up the hierarchy and check to
+ * see if we need to throttle, and then actually throttle if there is some
+ * accumulated delay.  This should only be called upon return to user space so
+ * we're not holding some lock that would induce a priority inversion.
+ */
+static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
+{
+	u64 now = ktime_to_ns(ktime_get());
+	u64 exp;
+	u64 delay_nsec = 0;
+	int tok;
+
+	while (blkg->parent) {
+		if (atomic_read(&blkg->use_delay)) {
+			blkcg_scale_delay(blkg, now);
+			delay_nsec = max_t(u64, delay_nsec,
+					   atomic64_read(&blkg->delay_nsec));
+		}
+		blkg = blkg->parent;
+	}
+
+	if (!delay_nsec)
+		return;
+
+	/*
+	 * Let's not sleep for all eternity if we've amassed a huge delay.
+	 * Swapping or metadata IO can accumulate 10's of seconds worth of
+	 * delay, and we want userspace to be able to do _something_ so cap the
+	 * delays at 1 second.  If there's 10's of seconds worth of delay then
+	 * the tasks will be delayed for 1 second for every syscall.
+	 */
+	delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
+
+	/*
+	 * TODO: the use_memdelay flag is going to be for the upcoming psi stuff
+	 * that hasn't landed upstream yet.  Once that stuff is in place we need
+	 * to do a psi_memstall_enter/leave if memdelay is set.
+	 */
+
+	exp = ktime_add_ns(now, delay_nsec);
+	tok = io_schedule_prepare();
+	do {
+		__set_current_state(TASK_KILLABLE);
+		if (!schedule_hrtimeout(&exp, HRTIMER_MODE_ABS))
+			break;
+	} while (!fatal_signal_pending(current));
+	io_schedule_finish(tok);
+}
+
+/**
+ * blkcg_maybe_throttle_current - throttle the current task if it has been marked
+ *
+ * This is only called if we've been marked with set_notify_resume().  Obviously
+ * we can be set_notify_resume() for reasons other than blkcg throttling, so we
+ * check to see if current->throttle_queue is set and if not this doesn't do
+ * anything.  This should only ever be called by the resume code, it's not meant
+ * to be called by people willy-nilly as it will actually do the work to
+ * throttle the task if it is setup for throttling.
+ */
+void blkcg_maybe_throttle_current(void)
+{
+	struct request_queue *q = current->throttle_queue;
+	struct cgroup_subsys_state *css;
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+	bool use_memdelay = current->use_memdelay;
+
+	if (!q)
+		return;
+
+	current->throttle_queue = NULL;
+	current->use_memdelay = false;
+
+	rcu_read_lock();
+	css = kthread_blkcg();
+	if (css)
+		blkcg = css_to_blkcg(css);
+	else
+		blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
+
+	if (!blkcg)
+		goto out;
+	blkg = blkg_lookup(blkcg, q);
+	if (!blkg)
+		goto out;
+	blkg = blkg_try_get(blkg);
+	if (!blkg)
+		goto out;
+	rcu_read_unlock();
+	blk_put_queue(q);
+
+	blkcg_maybe_throttle_blkg(blkg, use_memdelay);
+	blkg_put(blkg);
+	return;
+out:
+	rcu_read_unlock();
+	blk_put_queue(q);
+}
+EXPORT_SYMBOL_GPL(blkcg_maybe_throttle_current);
+
+/**
+ * blkcg_schedule_throttle - this task needs to check for throttling
+ * @q - the request queue IO was submitted on
+ * @use_memdelay - do we charge this to memory delay for PSI
+ *
+ * This is called by the IO controller when we know there's delay accumulated
+ * for the blkg for this task.  We do not pass the blkg because there are places
+ * we call this that may not have that information, the swapping code for
+ * instance will only have a request_queue at that point.  This set's the
+ * notify_resume for the task to check and see if it requires throttling before
+ * returning to user space.
+ *
+ * We will only schedule once per syscall.  You can call this over and over
+ * again and it will only do the check once upon return to user space, and only
+ * throttle once.  If the task needs to be throttled again it'll need to be
+ * re-set at the next time we see the task.
+ */
+void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay)
+{
+	if (unlikely(current->flags & PF_KTHREAD))
+		return;
+
+	if (!blk_get_queue(q))
+		return;
+
+	if (current->throttle_queue)
+		blk_put_queue(current->throttle_queue);
+	current->throttle_queue = q;
+	if (use_memdelay)
+		current->use_memdelay = use_memdelay;
+	set_notify_resume(current);
+}
+EXPORT_SYMBOL_GPL(blkcg_schedule_throttle);
+
+/**
+ * blkcg_add_delay - add delay to this blkg
+ * @now - the current time in nanoseconds
+ * @delta - how many nanoseconds of delay to add
+ *
+ * Charge @delta to the blkg's current delay accumulation.  This is used to
+ * throttle tasks if an IO controller thinks we need more throttling.
+ */
+void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
+{
+	blkcg_scale_delay(blkg, now);
+	atomic64_add(delta, &blkg->delay_nsec);
+}
+EXPORT_SYMBOL_GPL(blkcg_add_delay);
+
 module_param(blkcg_debug_stats, bool, 0644);
 MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a8f9ba8f33a4..de57de4831d5 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -136,6 +136,12 @@ struct blkcg_gq {
 	struct blkg_policy_data		*pd[BLKCG_MAX_POLS];
 
 	struct rcu_head			rcu_head;
+
+	atomic_t			use_delay;
+	atomic64_t			delay_nsec;
+	atomic64_t			delay_start;
+	u64				last_delay;
+	int				last_use;
 };
 
 typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
@@ -241,6 +247,26 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
 	return css_to_blkcg(task_css(current, io_cgrp_id));
 }
 
+static inline bool blk_cgroup_congested(void)
+{
+	struct cgroup_subsys_state *css;
+	bool ret = false;
+
+	rcu_read_lock();
+	css = kthread_blkcg();
+	if (!css)
+		css = task_css(current, io_cgrp_id);
+	while (css) {
+		if (atomic_read(&css->cgroup->congestion_count)) {
+			ret = true;
+			break;
+		}
+		css = css->parent;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
 /**
  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
  * @return: true if this bio needs to be submitted with the root blkg context.
@@ -374,6 +400,21 @@ static inline void blkg_get(struct blkcg_gq *blkg)
 	atomic_inc(&blkg->refcnt);
 }
 
+/**
+ * blkg_try_get - try and get a blkg reference
+ * @blkg: blkg to get
+ *
+ * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
+ * of freeing this blkg, so we can only use it if the refcnt is not zero.
+ */
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
+{
+	if (atomic_inc_not_zero(&blkg->refcnt))
+		return blkg;
+	return NULL;
+}
+
+
 void __blkg_release_rcu(struct rcu_head *rcu);
 
 /**
@@ -734,6 +775,59 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 	return !throtl;
 }
 
+static inline void blkcg_use_delay(struct blkcg_gq *blkg)
+{
+	if (atomic_add_return(1, &blkg->use_delay) == 1)
+		atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
+}
+
+static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
+{
+	int old = atomic_read(&blkg->use_delay);
+
+	if (old == 0)
+		return 0;
+
+	/*
+	 * We do this song and dance because we can race with somebody else
+	 * adding or removing delay.  If we just did an atomic_dec we'd end up
+	 * negative and we'd already be in trouble.  We need to subtract 1 and
+	 * then check to see if we were the last delay so we can drop the
+	 * congestion count on the cgroup.
+	 */
+	while (old) {
+		int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
+		if (cur == old)
+			break;
+		old = cur;
+	}
+
+	if (old == 0)
+		return 0;
+	if (old == 1)
+		atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+	return 1;
+}
+
+static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
+{
+	int old = atomic_read(&blkg->use_delay);
+	if (!old)
+		return;
+	/* We only want 1 person clearing the congestion count for this blkg. */
+	while (old) {
+		int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
+		if (cur == old) {
+			atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
+			break;
+		}
+		old = cur;
+	}
+}
+
+void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
+void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
+void blkcg_maybe_throttle_current(void);
 #else	/* CONFIG_BLK_CGROUP */
 
 struct blkcg {
@@ -753,8 +847,13 @@ struct blkcg_policy {
 
 #define blkcg_root_css	((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
 
+static inline void blkcg_maybe_throttle_current(void) { }
+static inline bool blk_cgroup_congested(void) { return false; }
+
 #ifdef CONFIG_BLOCK
 
+static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
+
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index c0e68f903011..ff20b677fb9f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -438,6 +438,9 @@ struct cgroup {
 	/* used to store eBPF programs */
 	struct cgroup_bpf bpf;
 
+	/* If there is block congestion on this cgroup. */
+	atomic_t congestion_count;
+
 	/* ids of the ancestors at each level including self */
 	int ancestor_ids[];
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..c2e993de67ec 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -734,6 +734,10 @@ struct task_struct {
 	/* disallow userland-initiated cgroup migration */
 	unsigned			no_cgroup_migration:1;
 #endif
+#ifdef CONFIG_BLK_CGROUP
+	/* to be used once the psi infrastructure lands upstream. */
+	unsigned			use_memdelay:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
@@ -1151,6 +1155,10 @@ struct task_struct {
 	unsigned int			memcg_nr_pages_over_high;
 #endif
 
+#ifdef CONFIG_BLK_CGROUP
+	struct request_queue		*throttle_queue;
+#endif
+
 #ifdef CONFIG_UPROBES
 	struct uprobe_task		*utask;
 #endif
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 4a8841963c2e..05589a3e37f4 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -51,6 +51,7 @@
 #include <linux/security.h>
 #include <linux/task_work.h>
 #include <linux/memcontrol.h>
+#include <linux/blk-cgroup.h>
 struct linux_binprm;
 
 /*
@@ -192,6 +193,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
 		task_work_run();
 
 	mem_cgroup_handle_over_high();
+	blkcg_maybe_throttle_current();
 }
 
 #endif	/* <linux/tracehook.h> */
-- 
cgit v1.2.3


From 2cf855837b89d92996cf264713f3bed2bf9b0b4f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 3 Jul 2018 11:14:56 -0400
Subject: memcontrol: schedule throttling if we are congested

Memory allocations can induce swapping via kswapd or direct reclaim.  If
we are having IO done for us by kswapd and don't actually go into direct
reclaim we may never get scheduled for throttling.  So instead check to
see if our cgroup is congested, and if so schedule the throttling.
Before we return to user space the throttling stuff will only throttle
if we actually required it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/memcontrol.h | 13 +++++++++++++
 include/linux/swap.h       | 11 ++++++++++-
 mm/huge_memory.c           |  6 +++---
 mm/memcontrol.c            | 13 +++++++++++++
 mm/memory.c                | 11 ++++++-----
 mm/shmem.c                 | 10 +++++-----
 mm/swapfile.c              | 31 +++++++++++++++++++++++++++++++
 7 files changed, 81 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6c6fb116e925..680d3395fc83 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -317,6 +317,9 @@ enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
 int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
 			  bool compound);
+int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
+			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
+			  bool compound);
 void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
 			      bool lrucare, bool compound);
 void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg,
@@ -789,6 +792,16 @@ static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 	return 0;
 }
 
+static inline int mem_cgroup_try_charge_delay(struct page *page,
+					      struct mm_struct *mm,
+					      gfp_t gfp_mask,
+					      struct mem_cgroup **memcgp,
+					      bool compound)
+{
+	*memcgp = NULL;
+	return 0;
+}
+
 static inline void mem_cgroup_commit_charge(struct page *page,
 					    struct mem_cgroup *memcg,
 					    bool lrucare, bool compound)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c063443d8638..1a8bd05a335e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -629,7 +629,6 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 
 	return memcg->swappiness;
 }
-
 #else
 static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 {
@@ -637,6 +636,16 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
 }
 #endif
 
+#if defined(CONFIG_SWAP) && defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+extern void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
+					 gfp_t gfp_mask);
+#else
+static inline void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg,
+						int node, gfp_t gfp_mask)
+{
+}
+#endif
+
 #ifdef CONFIG_MEMCG_SWAP
 extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry);
 extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1cd7c1a57a14..b87d5b151db2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -552,7 +552,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, gfp, &memcg, true)) {
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1142,7 +1142,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
 		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
 					       vmf->address, page_to_nid(page));
 		if (unlikely(!pages[i] ||
-			     mem_cgroup_try_charge(pages[i], vma->vm_mm,
+			     mem_cgroup_try_charge_delay(pages[i], vma->vm_mm,
 				     GFP_KERNEL, &memcg, false))) {
 			if (pages[i])
 				put_page(pages[i]);
@@ -1312,7 +1312,7 @@ alloc:
 		goto out;
 	}
 
-	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
+	if (unlikely(mem_cgroup_try_charge_delay(new_page, vma->vm_mm,
 					huge_gfp, &memcg, true))) {
 		put_page(new_page);
 		split_huge_pmd(vma, vmf->pmd, vmf->address);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e6f0d5ef320a..64bd28d35388 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5593,6 +5593,19 @@ out:
 	return ret;
 }
 
+int mem_cgroup_try_charge_delay(struct page *page, struct mm_struct *mm,
+			  gfp_t gfp_mask, struct mem_cgroup **memcgp,
+			  bool compound)
+{
+	struct mem_cgroup *memcg;
+	int ret;
+
+	ret = mem_cgroup_try_charge(page, mm, gfp_mask, memcgp, compound);
+	memcg = *memcgp;
+	mem_cgroup_throttle_swaprate(memcg, page_to_nid(page), gfp_mask);
+	return ret;
+}
+
 /**
  * mem_cgroup_commit_charge - commit a page charge
  * @page: page to charge
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..dfe80c574282 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2503,7 +2503,7 @@ static int wp_page_copy(struct vm_fault *vmf)
 		cow_user_page(new_page, old_page, vmf->address, vma);
 	}
 
-	if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg, false))
+	if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
 		goto oom_free_new;
 
 	__SetPageUptodate(new_page);
@@ -3003,8 +3003,8 @@ int do_swap_page(struct vm_fault *vmf)
 		goto out_page;
 	}
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL,
-				&memcg, false)) {
+	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL,
+					&memcg, false)) {
 		ret = VM_FAULT_OOM;
 		goto out_page;
 	}
@@ -3165,7 +3165,8 @@ static int do_anonymous_page(struct vm_fault *vmf)
 	if (!page)
 		goto oom;
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
+	if (mem_cgroup_try_charge_delay(page, vma->vm_mm, GFP_KERNEL, &memcg,
+					false))
 		goto oom_free_page;
 
 	/*
@@ -3661,7 +3662,7 @@ static int do_cow_fault(struct vm_fault *vmf)
 	if (!vmf->cow_page)
 		return VM_FAULT_OOM;
 
-	if (mem_cgroup_try_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
+	if (mem_cgroup_try_charge_delay(vmf->cow_page, vma->vm_mm, GFP_KERNEL,
 				&vmf->memcg, false)) {
 		put_page(vmf->cow_page);
 		return VM_FAULT_OOM;
diff --git a/mm/shmem.c b/mm/shmem.c
index 2cab84403055..6206ca3510cf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1239,8 +1239,8 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
 	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
 	 * Charged back to the user (not to caller) when swap account is used.
 	 */
-	error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg,
-			false);
+	error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
+					    &memcg, false);
 	if (error)
 		goto out;
 	/* No radix_tree_preload: swap entry keeps a place for page in tree */
@@ -1712,7 +1712,7 @@ repeat:
 				goto failed;
 		}
 
-		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
+		error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
 				false);
 		if (!error) {
 			error = shmem_add_to_page_cache(page, mapping, index,
@@ -1818,7 +1818,7 @@ alloc_nohuge:		page = shmem_alloc_and_acct_page(gfp, inode,
 		if (sgp == SGP_WRITE)
 			__SetPageReferenced(page);
 
-		error = mem_cgroup_try_charge(page, charge_mm, gfp, &memcg,
+		error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
 				PageTransHuge(page));
 		if (error)
 			goto unacct;
@@ -2291,7 +2291,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	__SetPageSwapBacked(page);
 	__SetPageUptodate(page);
 
-	ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg, false);
+	ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
 	if (ret)
 		goto out_release;
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2cc2972eedaf..db4ec8ae1c8c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3731,6 +3731,37 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
 	}
 }
 
+#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
+void mem_cgroup_throttle_swaprate(struct mem_cgroup *memcg, int node,
+				  gfp_t gfp_mask)
+{
+	struct swap_info_struct *si, *next;
+	if (!(gfp_mask & __GFP_IO) || !memcg)
+		return;
+
+	if (!blk_cgroup_congested())
+		return;
+
+	/*
+	 * We've already scheduled a throttle, avoid taking the global swap
+	 * lock.
+	 */
+	if (current->throttle_queue)
+		return;
+
+	spin_lock(&swap_avail_lock);
+	plist_for_each_entry_safe(si, next, &swap_avail_heads[node],
+				  avail_lists[node]) {
+		if (si->bdev) {
+			blkcg_schedule_throttle(bdev_get_queue(si->bdev),
+						true);
+			break;
+		}
+	}
+	spin_unlock(&swap_avail_lock);
+}
+#endif
+
 static int __init swapfile_init(void)
 {
 	int nid;
-- 
cgit v1.2.3


From a79050434b45959f397042080fd1d70ffa9bd9df Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 09:32:35 -0600
Subject: blk-rq-qos: refactor out common elements of blk-wbt

blkcg-qos is going to do essentially what wbt does, only on a cgroup
basis.  Break out the common code that will be shared between blkcg-qos
and wbt into blk-rq-qos.* so they can both utilize the same
infrastructure.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Makefile         |   2 +-
 block/blk-core.c       |  12 +-
 block/blk-mq.c         |  12 +-
 block/blk-rq-qos.c     | 178 +++++++++++++++++++++++++++
 block/blk-rq-qos.h     | 106 ++++++++++++++++
 block/blk-settings.c   |   4 +-
 block/blk-sysfs.c      |  22 ++--
 block/blk-wbt.c        | 326 ++++++++++++++++++++++---------------------------
 block/blk-wbt.h        |  63 ++++------
 include/linux/blkdev.h |   4 +-
 10 files changed, 478 insertions(+), 251 deletions(-)
 create mode 100644 block/blk-rq-qos.c
 create mode 100644 block/blk-rq-qos.h

(limited to 'include')

diff --git a/block/Makefile b/block/Makefile
index a8f94cdb75c3..57d0f47ab05f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
 			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
 			genhd.o partition-generic.o ioprio.o \
-			badblocks.o partitions/
+			badblocks.o partitions/ blk-rq-qos.o
 
 obj-$(CONFIG_BOUNCE)		+= bounce.o
 obj-$(CONFIG_BLK_SCSI_REQUEST)	+= scsi_ioctl.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 2ff8e131a892..b33a73bcf2d0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1645,7 +1645,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, rq);
+	rq_qos_requeue(q, rq);
 
 	if (rq->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(q, rq);
@@ -1752,7 +1752,7 @@ void __blk_put_request(struct request_queue *q, struct request *req)
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 
-	wbt_done(q->rq_wb, req);
+	rq_qos_done(q, req);
 
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
@@ -2044,7 +2044,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 	}
 
 get_rq:
-	wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
+	wb_acct = rq_qos_throttle(q, bio, q->queue_lock);
 
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
@@ -2054,7 +2054,7 @@ get_rq:
 	req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
 	if (IS_ERR(req)) {
 		blk_queue_exit(q);
-		__wbt_done(q->rq_wb, wb_acct);
+		rq_qos_cleanup(q, wb_acct);
 		if (PTR_ERR(req) == -ENOMEM)
 			bio->bi_status = BLK_STS_RESOURCE;
 		else
@@ -2983,7 +2983,7 @@ void blk_start_request(struct request *req)
 		req->throtl_size = blk_rq_sectors(req);
 #endif
 		req->rq_flags |= RQF_STATS;
-		wbt_issue(req->q->rq_wb, req);
+		rq_qos_issue(req->q, req);
 	}
 
 	BUG_ON(blk_rq_is_complete(req));
@@ -3207,7 +3207,7 @@ void blk_finish_request(struct request *req, blk_status_t error)
 	blk_account_io_done(req, now);
 
 	if (req->end_io) {
-		wbt_done(req->q->rq_wb, req);
+		rq_qos_done(q, req);
 		req->end_io(req, error);
 	} else {
 		if (blk_bidi_rq(req))
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 850fdd02c385..ea2a226457fa 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -504,7 +504,7 @@ void blk_mq_free_request(struct request *rq)
 	if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
 		laptop_io_completion(q->backing_dev_info);
 
-	wbt_done(q->rq_wb, rq);
+	rq_qos_done(q, rq);
 
 	if (blk_rq_rl(rq))
 		blk_put_rl(blk_rq_rl(rq));
@@ -527,7 +527,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 	blk_account_io_done(rq, now);
 
 	if (rq->end_io) {
-		wbt_done(rq->q->rq_wb, rq);
+		rq_qos_done(rq->q, rq);
 		rq->end_io(rq, error);
 	} else {
 		if (unlikely(blk_bidi_rq(rq)))
@@ -641,7 +641,7 @@ void blk_mq_start_request(struct request *rq)
 		rq->throtl_size = blk_rq_sectors(rq);
 #endif
 		rq->rq_flags |= RQF_STATS;
-		wbt_issue(q->rq_wb, rq);
+		rq_qos_issue(q, rq);
 	}
 
 	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
@@ -667,7 +667,7 @@ static void __blk_mq_requeue_request(struct request *rq)
 	blk_mq_put_driver_tag(rq);
 
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, rq);
+	rq_qos_requeue(q, rq);
 
 	if (blk_mq_request_started(rq)) {
 		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
@@ -1806,13 +1806,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (blk_mq_sched_bio_merge(q, bio))
 		return BLK_QC_T_NONE;
 
-	wb_acct = wbt_wait(q->rq_wb, bio, NULL);
+	wb_acct = rq_qos_throttle(q, bio, NULL);
 
 	trace_block_getrq(q, bio, bio->bi_opf);
 
 	rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
 	if (unlikely(!rq)) {
-		__wbt_done(q->rq_wb, wb_acct);
+		rq_qos_cleanup(q, wb_acct);
 		if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
 		return BLK_QC_T_NONE;
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
new file mode 100644
index 000000000000..d2f2af8aa10c
--- /dev/null
+++ b/block/blk-rq-qos.c
@@ -0,0 +1,178 @@
+#include "blk-rq-qos.h"
+
+#include "blk-wbt.h"
+
+/*
+ * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
+ * false if 'v' + 1 would be bigger than 'below'.
+ */
+static bool atomic_inc_below(atomic_t *v, int below)
+{
+	int cur = atomic_read(v);
+
+	for (;;) {
+		int old;
+
+		if (cur >= below)
+			return false;
+		old = atomic_cmpxchg(v, cur, cur + 1);
+		if (old == cur)
+			break;
+		cur = old;
+	}
+
+	return true;
+}
+
+bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit)
+{
+	return atomic_inc_below(&rq_wait->inflight, limit);
+}
+
+void rq_qos_cleanup(struct request_queue *q, enum wbt_flags wb_acct)
+{
+	struct rq_qos *rqos;
+
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->cleanup)
+			rqos->ops->cleanup(rqos, wb_acct);
+	}
+}
+
+void rq_qos_done(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->done)
+			rqos->ops->done(rqos, rq);
+	}
+}
+
+void rq_qos_issue(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->issue)
+			rqos->ops->issue(rqos, rq);
+	}
+}
+
+void rq_qos_requeue(struct request_queue *q, struct request *rq)
+{
+	struct rq_qos *rqos;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->requeue)
+			rqos->ops->requeue(rqos, rq);
+	}
+}
+
+enum wbt_flags rq_qos_throttle(struct request_queue *q, struct bio *bio,
+			       spinlock_t *lock)
+{
+	struct rq_qos *rqos;
+	enum wbt_flags flags = 0;
+
+	for(rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->ops->throttle)
+			flags |= rqos->ops->throttle(rqos, bio, lock);
+	}
+	return flags;
+}
+
+/*
+ * Return true, if we can't increase the depth further by scaling
+ */
+bool rq_depth_calc_max_depth(struct rq_depth *rqd)
+{
+	unsigned int depth;
+	bool ret = false;
+
+	/*
+	 * For QD=1 devices, this is a special case. It's important for those
+	 * to have one request ready when one completes, so force a depth of
+	 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
+	 * since the device can't have more than that in flight. If we're
+	 * scaling down, then keep a setting of 1/1/1.
+	 */
+	if (rqd->queue_depth == 1) {
+		if (rqd->scale_step > 0)
+			rqd->max_depth = 1;
+		else {
+			rqd->max_depth = 2;
+			ret = true;
+		}
+	} else {
+		/*
+		 * scale_step == 0 is our default state. If we have suffered
+		 * latency spikes, step will be > 0, and we shrink the
+		 * allowed write depths. If step is < 0, we're only doing
+		 * writes, and we allow a temporarily higher depth to
+		 * increase performance.
+		 */
+		depth = min_t(unsigned int, rqd->default_depth,
+			      rqd->queue_depth);
+		if (rqd->scale_step > 0)
+			depth = 1 + ((depth - 1) >> min(31, rqd->scale_step));
+		else if (rqd->scale_step < 0) {
+			unsigned int maxd = 3 * rqd->queue_depth / 4;
+
+			depth = 1 + ((depth - 1) << -rqd->scale_step);
+			if (depth > maxd) {
+				depth = maxd;
+				ret = true;
+			}
+		}
+
+		rqd->max_depth = depth;
+	}
+
+	return ret;
+}
+
+void rq_depth_scale_up(struct rq_depth *rqd)
+{
+	/*
+	 * Hit max in previous round, stop here
+	 */
+	if (rqd->scaled_max)
+		return;
+
+	rqd->scale_step--;
+
+	rqd->scaled_max = rq_depth_calc_max_depth(rqd);
+}
+
+/*
+ * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
+ * had a latency violation.
+ */
+void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
+{
+	/*
+	 * Stop scaling down when we've hit the limit. This also prevents
+	 * ->scale_step from going to crazy values, if the device can't
+	 * keep up.
+	 */
+	if (rqd->max_depth == 1)
+		return;
+
+	if (rqd->scale_step < 0 && hard_throttle)
+		rqd->scale_step = 0;
+	else
+		rqd->scale_step++;
+
+	rqd->scaled_max = false;
+	rq_depth_calc_max_depth(rqd);
+}
+
+void rq_qos_exit(struct request_queue *q)
+{
+	while (q->rq_qos) {
+		struct rq_qos *rqos = q->rq_qos;
+		q->rq_qos = rqos->next;
+		rqos->ops->exit(rqos);
+	}
+}
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
new file mode 100644
index 000000000000..f9a39bd6ece3
--- /dev/null
+++ b/block/blk-rq-qos.h
@@ -0,0 +1,106 @@
+#ifndef RQ_QOS_H
+#define RQ_QOS_H
+
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/blk_types.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
+
+enum rq_qos_id {
+	RQ_QOS_WBT,
+	RQ_QOS_CGROUP,
+};
+
+struct rq_wait {
+	wait_queue_head_t wait;
+	atomic_t inflight;
+};
+
+struct rq_qos {
+	struct rq_qos_ops *ops;
+	struct request_queue *q;
+	enum rq_qos_id id;
+	struct rq_qos *next;
+};
+
+struct rq_qos_ops {
+	enum wbt_flags (*throttle)(struct rq_qos *, struct bio *,
+				   spinlock_t *);
+	void (*issue)(struct rq_qos *, struct request *);
+	void (*requeue)(struct rq_qos *, struct request *);
+	void (*done)(struct rq_qos *, struct request *);
+	void (*cleanup)(struct rq_qos *, enum wbt_flags);
+	void (*exit)(struct rq_qos *);
+};
+
+struct rq_depth {
+	unsigned int max_depth;
+
+	int scale_step;
+	bool scaled_max;
+
+	unsigned int queue_depth;
+	unsigned int default_depth;
+};
+
+static inline struct rq_qos *rq_qos_id(struct request_queue *q,
+				       enum rq_qos_id id)
+{
+	struct rq_qos *rqos;
+	for (rqos = q->rq_qos; rqos; rqos = rqos->next) {
+		if (rqos->id == id)
+			break;
+	}
+	return rqos;
+}
+
+static inline struct rq_qos *wbt_rq_qos(struct request_queue *q)
+{
+	return rq_qos_id(q, RQ_QOS_WBT);
+}
+
+static inline struct rq_qos *blkcg_rq_qos(struct request_queue *q)
+{
+	return rq_qos_id(q, RQ_QOS_CGROUP);
+}
+
+static inline void rq_wait_init(struct rq_wait *rq_wait)
+{
+	atomic_set(&rq_wait->inflight, 0);
+	init_waitqueue_head(&rq_wait->wait);
+}
+
+static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+{
+	rqos->next = q->rq_qos;
+	q->rq_qos = rqos;
+}
+
+static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+{
+	struct rq_qos *cur, *prev = NULL;
+	for (cur = q->rq_qos; cur; cur = cur->next) {
+		if (cur == rqos) {
+			if (prev)
+				prev->next = rqos->next;
+			else
+				q->rq_qos = cur;
+			break;
+		}
+		prev = cur;
+	}
+}
+
+bool rq_wait_inc_below(struct rq_wait *rq_wait, int limit);
+void rq_depth_scale_up(struct rq_depth *rqd);
+void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
+bool rq_depth_calc_max_depth(struct rq_depth *rqd);
+
+void rq_qos_cleanup(struct request_queue *, enum wbt_flags);
+void rq_qos_done(struct request_queue *, struct request *);
+void rq_qos_issue(struct request_queue *, struct request *);
+void rq_qos_requeue(struct request_queue *, struct request *);
+enum wbt_flags rq_qos_throttle(struct request_queue *, struct bio *, spinlock_t *);
+void rq_qos_exit(struct request_queue *);
+#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d1de71124656..053de87d1fda 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -875,7 +875,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
 void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
 	q->queue_depth = depth;
-	wbt_set_queue_depth(q->rq_wb, depth);
+	wbt_set_queue_depth(q, depth);
 }
 EXPORT_SYMBOL(blk_set_queue_depth);
 
@@ -900,7 +900,7 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
 		queue_flag_clear(QUEUE_FLAG_FUA, q);
 	spin_unlock_irq(q->queue_lock);
 
-	wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 }
 EXPORT_SYMBOL_GPL(blk_queue_write_cache);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 94987b1f69e1..49c29a5d06bb 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -422,16 +422,16 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
 
 static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
 {
-	if (!q->rq_wb)
+	if (!wbt_rq_qos(q))
 		return -EINVAL;
 
-	return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000));
+	return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
 }
 
 static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
 				  size_t count)
 {
-	struct rq_wb *rwb;
+	struct rq_qos *rqos;
 	ssize_t ret;
 	s64 val;
 
@@ -441,23 +441,21 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
 	if (val < -1)
 		return -EINVAL;
 
-	rwb = q->rq_wb;
-	if (!rwb) {
+	rqos = wbt_rq_qos(q);
+	if (!rqos) {
 		ret = wbt_init(q);
 		if (ret)
 			return ret;
 	}
 
-	rwb = q->rq_wb;
 	if (val == -1)
-		rwb->min_lat_nsec = wbt_default_latency_nsec(q);
+		val = wbt_default_latency_nsec(q);
 	else if (val >= 0)
-		rwb->min_lat_nsec = val * 1000ULL;
+		val *= 1000ULL;
 
-	if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
-		rwb->enable_state = WBT_STATE_ON_MANUAL;
+	wbt_set_min_lat(q, val);
 
-	wbt_update_limits(rwb);
+	wbt_update_limits(q);
 	return count;
 }
 
@@ -964,7 +962,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	kobject_del(&q->kobj);
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 
-	wbt_exit(q);
+	rq_qos_exit(q);
 
 	mutex_lock(&q->sysfs_lock);
 	if (q->request_fn || (q->mq_ops && q->elevator))
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 4f89b28fa652..6fe20fb823e4 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -25,6 +25,7 @@
 #include <linux/swap.h>
 
 #include "blk-wbt.h"
+#include "blk-rq-qos.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/wbt.h>
@@ -78,28 +79,6 @@ static inline bool rwb_enabled(struct rq_wb *rwb)
 	return rwb && rwb->wb_normal != 0;
 }
 
-/*
- * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
- * false if 'v' + 1 would be bigger than 'below'.
- */
-static bool atomic_inc_below(atomic_t *v, int below)
-{
-	int cur = atomic_read(v);
-
-	for (;;) {
-		int old;
-
-		if (cur >= below)
-			return false;
-		old = atomic_cmpxchg(v, cur, cur + 1);
-		if (old == cur)
-			break;
-		cur = old;
-	}
-
-	return true;
-}
-
 static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
 {
 	if (rwb_enabled(rwb)) {
@@ -116,7 +95,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
  */
 static bool wb_recent_wait(struct rq_wb *rwb)
 {
-	struct bdi_writeback *wb = &rwb->queue->backing_dev_info->wb;
+	struct bdi_writeback *wb = &rwb->rqos.q->backing_dev_info->wb;
 
 	return time_before(jiffies, wb->dirty_sleep + HZ);
 }
@@ -144,8 +123,9 @@ static void rwb_wake_all(struct rq_wb *rwb)
 	}
 }
 
-void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
+static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	struct rq_wait *rqw;
 	int inflight, limit;
 
@@ -194,10 +174,9 @@ void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
  * Called on completion of a request. Note that it's also called when
  * a request is merged, when the request gets freed.
  */
-void wbt_done(struct rq_wb *rwb, struct request *rq)
+static void wbt_done(struct rq_qos *rqos, struct request *rq)
 {
-	if (!rwb)
-		return;
+	struct rq_wb *rwb = RQWB(rqos);
 
 	if (!wbt_is_tracked(rq)) {
 		if (rwb->sync_cookie == rq) {
@@ -209,72 +188,11 @@ void wbt_done(struct rq_wb *rwb, struct request *rq)
 			wb_timestamp(rwb, &rwb->last_comp);
 	} else {
 		WARN_ON_ONCE(rq == rwb->sync_cookie);
-		__wbt_done(rwb, wbt_flags(rq));
+		__wbt_done(rqos, wbt_flags(rq));
 	}
 	wbt_clear_state(rq);
 }
 
-/*
- * Return true, if we can't increase the depth further by scaling
- */
-static bool calc_wb_limits(struct rq_wb *rwb)
-{
-	unsigned int depth;
-	bool ret = false;
-
-	if (!rwb->min_lat_nsec) {
-		rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0;
-		return false;
-	}
-
-	/*
-	 * For QD=1 devices, this is a special case. It's important for those
-	 * to have one request ready when one completes, so force a depth of
-	 * 2 for those devices. On the backend, it'll be a depth of 1 anyway,
-	 * since the device can't have more than that in flight. If we're
-	 * scaling down, then keep a setting of 1/1/1.
-	 */
-	if (rwb->queue_depth == 1) {
-		if (rwb->scale_step > 0)
-			rwb->wb_max = rwb->wb_normal = 1;
-		else {
-			rwb->wb_max = rwb->wb_normal = 2;
-			ret = true;
-		}
-		rwb->wb_background = 1;
-	} else {
-		/*
-		 * scale_step == 0 is our default state. If we have suffered
-		 * latency spikes, step will be > 0, and we shrink the
-		 * allowed write depths. If step is < 0, we're only doing
-		 * writes, and we allow a temporarily higher depth to
-		 * increase performance.
-		 */
-		depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth);
-		if (rwb->scale_step > 0)
-			depth = 1 + ((depth - 1) >> min(31, rwb->scale_step));
-		else if (rwb->scale_step < 0) {
-			unsigned int maxd = 3 * rwb->queue_depth / 4;
-
-			depth = 1 + ((depth - 1) << -rwb->scale_step);
-			if (depth > maxd) {
-				depth = maxd;
-				ret = true;
-			}
-		}
-
-		/*
-		 * Set our max/normal/bg queue depths based on how far
-		 * we have scaled down (->scale_step).
-		 */
-		rwb->wb_max = depth;
-		rwb->wb_normal = (rwb->wb_max + 1) / 2;
-		rwb->wb_background = (rwb->wb_max + 3) / 4;
-	}
-
-	return ret;
-}
-
 static inline bool stat_sample_valid(struct blk_rq_stat *stat)
 {
 	/*
@@ -307,7 +225,8 @@ enum {
 
 static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 {
-	struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
+	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+	struct rq_depth *rqd = &rwb->rq_depth;
 	u64 thislat;
 
 	/*
@@ -351,7 +270,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 		return LAT_EXCEEDED;
 	}
 
-	if (rwb->scale_step)
+	if (rqd->scale_step)
 		trace_wbt_stat(bdi, stat);
 
 	return LAT_OK;
@@ -359,58 +278,48 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 
 static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
 {
-	struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
+	struct backing_dev_info *bdi = rwb->rqos.q->backing_dev_info;
+	struct rq_depth *rqd = &rwb->rq_depth;
 
-	trace_wbt_step(bdi, msg, rwb->scale_step, rwb->cur_win_nsec,
-			rwb->wb_background, rwb->wb_normal, rwb->wb_max);
+	trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
+			rwb->wb_background, rwb->wb_normal, rqd->max_depth);
 }
 
-static void scale_up(struct rq_wb *rwb)
+static void calc_wb_limits(struct rq_wb *rwb)
 {
-	/*
-	 * Hit max in previous round, stop here
-	 */
-	if (rwb->scaled_max)
-		return;
+	if (rwb->min_lat_nsec == 0) {
+		rwb->wb_normal = rwb->wb_background = 0;
+	} else if (rwb->rq_depth.max_depth <= 2) {
+		rwb->wb_normal = rwb->rq_depth.max_depth;
+		rwb->wb_background = 1;
+	} else {
+		rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2;
+		rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4;
+	}
+}
 
-	rwb->scale_step--;
+static void scale_up(struct rq_wb *rwb)
+{
+	rq_depth_scale_up(&rwb->rq_depth);
+	calc_wb_limits(rwb);
 	rwb->unknown_cnt = 0;
-
-	rwb->scaled_max = calc_wb_limits(rwb);
-
-	rwb_wake_all(rwb);
-
-	rwb_trace_step(rwb, "step up");
+	rwb_trace_step(rwb, "scale up");
 }
 
-/*
- * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
- * had a latency violation.
- */
 static void scale_down(struct rq_wb *rwb, bool hard_throttle)
 {
-	/*
-	 * Stop scaling down when we've hit the limit. This also prevents
-	 * ->scale_step from going to crazy values, if the device can't
-	 * keep up.
-	 */
-	if (rwb->wb_max == 1)
-		return;
-
-	if (rwb->scale_step < 0 && hard_throttle)
-		rwb->scale_step = 0;
-	else
-		rwb->scale_step++;
-
-	rwb->scaled_max = false;
-	rwb->unknown_cnt = 0;
+	rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
 	calc_wb_limits(rwb);
-	rwb_trace_step(rwb, "step down");
+	rwb->unknown_cnt = 0;
+	rwb_wake_all(rwb);
+	rwb_trace_step(rwb, "scale down");
 }
 
 static void rwb_arm_timer(struct rq_wb *rwb)
 {
-	if (rwb->scale_step > 0) {
+	struct rq_depth *rqd = &rwb->rq_depth;
+
+	if (rqd->scale_step > 0) {
 		/*
 		 * We should speed this up, using some variant of a fast
 		 * integer inverse square root calculation. Since we only do
@@ -418,7 +327,7 @@ static void rwb_arm_timer(struct rq_wb *rwb)
 		 * though.
 		 */
 		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
-					int_sqrt((rwb->scale_step + 1) << 8));
+					int_sqrt((rqd->scale_step + 1) << 8));
 	} else {
 		/*
 		 * For step < 0, we don't want to increase/decrease the
@@ -433,12 +342,13 @@ static void rwb_arm_timer(struct rq_wb *rwb)
 static void wb_timer_fn(struct blk_stat_callback *cb)
 {
 	struct rq_wb *rwb = cb->data;
+	struct rq_depth *rqd = &rwb->rq_depth;
 	unsigned int inflight = wbt_inflight(rwb);
 	int status;
 
 	status = latency_exceeded(rwb, cb->stat);
 
-	trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step,
+	trace_wbt_timer(rwb->rqos.q->backing_dev_info, status, rqd->scale_step,
 			inflight);
 
 	/*
@@ -469,9 +379,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
 		 * currently don't have a valid read/write sample. For that
 		 * case, slowly return to center state (step == 0).
 		 */
-		if (rwb->scale_step > 0)
+		if (rqd->scale_step > 0)
 			scale_up(rwb);
-		else if (rwb->scale_step < 0)
+		else if (rqd->scale_step < 0)
 			scale_down(rwb, false);
 		break;
 	default:
@@ -481,19 +391,50 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
 	/*
 	 * Re-arm timer, if we have IO in flight
 	 */
-	if (rwb->scale_step || inflight)
+	if (rqd->scale_step || inflight)
 		rwb_arm_timer(rwb);
 }
 
-void wbt_update_limits(struct rq_wb *rwb)
+static void __wbt_update_limits(struct rq_wb *rwb)
 {
-	rwb->scale_step = 0;
-	rwb->scaled_max = false;
+	struct rq_depth *rqd = &rwb->rq_depth;
+
+	rqd->scale_step = 0;
+	rqd->scaled_max = false;
+
+	rq_depth_calc_max_depth(rqd);
 	calc_wb_limits(rwb);
 
 	rwb_wake_all(rwb);
 }
 
+void wbt_update_limits(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return;
+	__wbt_update_limits(RQWB(rqos));
+}
+
+u64 wbt_get_min_lat(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return 0;
+	return RQWB(rqos)->min_lat_nsec;
+}
+
+void wbt_set_min_lat(struct request_queue *q, u64 val)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (!rqos)
+		return;
+	RQWB(rqos)->min_lat_nsec = val;
+	RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL;
+	__wbt_update_limits(RQWB(rqos));
+}
+
+
 static bool close_io(struct rq_wb *rwb)
 {
 	const unsigned long now = jiffies;
@@ -520,7 +461,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 	 * IO for a bit.
 	 */
 	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
-		limit = rwb->wb_max;
+		limit = rwb->rq_depth.max_depth;
 	else if ((rw & REQ_BACKGROUND) || close_io(rwb)) {
 		/*
 		 * If less than 100ms since we completed unrelated IO,
@@ -554,7 +495,7 @@ static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
 	    rqw->wait.head.next != &wait->entry)
 		return false;
 
-	return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw));
+	return rq_wait_inc_below(rqw, get_limit(rwb, rw));
 }
 
 /*
@@ -614,8 +555,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
  * in an irq held spinlock, if it holds one when calling this function.
  * If we do sleep, we'll release and re-grab it.
  */
-enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
+static enum wbt_flags wbt_wait(struct rq_qos *rqos, struct bio *bio,
+			       spinlock_t *lock)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	enum wbt_flags ret = 0;
 
 	if (!rwb_enabled(rwb))
@@ -643,8 +586,10 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
 	return ret | WBT_TRACKED;
 }
 
-void wbt_issue(struct rq_wb *rwb, struct request *rq)
+void wbt_issue(struct rq_qos *rqos, struct request *rq)
 {
+	struct rq_wb *rwb = RQWB(rqos);
+
 	if (!rwb_enabled(rwb))
 		return;
 
@@ -661,8 +606,9 @@ void wbt_issue(struct rq_wb *rwb, struct request *rq)
 	}
 }
 
-void wbt_requeue(struct rq_wb *rwb, struct request *rq)
+void wbt_requeue(struct rq_qos *rqos, struct request *rq)
 {
+	struct rq_wb *rwb = RQWB(rqos);
 	if (!rwb_enabled(rwb))
 		return;
 	if (rq == rwb->sync_cookie) {
@@ -671,39 +617,30 @@ void wbt_requeue(struct rq_wb *rwb, struct request *rq)
 	}
 }
 
-void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
+void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
-	if (rwb) {
-		rwb->queue_depth = depth;
-		wbt_update_limits(rwb);
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (rqos) {
+		RQWB(rqos)->rq_depth.queue_depth = depth;
+		__wbt_update_limits(RQWB(rqos));
 	}
 }
 
-void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
-{
-	if (rwb)
-		rwb->wc = write_cache_on;
-}
-
-/*
- * Disable wbt, if enabled by default.
- */
-void wbt_disable_default(struct request_queue *q)
+void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
 {
-	struct rq_wb *rwb = q->rq_wb;
-
-	if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT)
-		wbt_exit(q);
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	if (rqos)
+		RQWB(rqos)->wc = write_cache_on;
 }
-EXPORT_SYMBOL_GPL(wbt_disable_default);
 
 /*
  * Enable wbt if defaults are configured that way
  */
 void wbt_enable_default(struct request_queue *q)
 {
+	struct rq_qos *rqos = wbt_rq_qos(q);
 	/* Throttling already enabled? */
-	if (q->rq_wb)
+	if (rqos)
 		return;
 
 	/* Queue not registered? Maybe shutting down... */
@@ -741,6 +678,41 @@ static int wbt_data_dir(const struct request *rq)
 	return -1;
 }
 
+static void wbt_exit(struct rq_qos *rqos)
+{
+	struct rq_wb *rwb = RQWB(rqos);
+	struct request_queue *q = rqos->q;
+
+	blk_stat_remove_callback(q, rwb->cb);
+	blk_stat_free_callback(rwb->cb);
+	kfree(rwb);
+}
+
+/*
+ * Disable wbt, if enabled by default.
+ */
+void wbt_disable_default(struct request_queue *q)
+{
+	struct rq_qos *rqos = wbt_rq_qos(q);
+	struct rq_wb *rwb;
+	if (!rqos)
+		return;
+	rwb = RQWB(rqos);
+	if (rwb->enable_state == WBT_STATE_ON_DEFAULT)
+		rwb->wb_normal = 0;
+}
+EXPORT_SYMBOL_GPL(wbt_disable_default);
+
+
+static struct rq_qos_ops wbt_rqos_ops = {
+	.throttle = wbt_wait,
+	.issue = wbt_issue,
+	.requeue = wbt_requeue,
+	.done = wbt_done,
+	.cleanup = __wbt_done,
+	.exit = wbt_exit,
+};
+
 int wbt_init(struct request_queue *q)
 {
 	struct rq_wb *rwb;
@@ -756,39 +728,29 @@ int wbt_init(struct request_queue *q)
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < WBT_NUM_RWQ; i++) {
-		atomic_set(&rwb->rq_wait[i].inflight, 0);
-		init_waitqueue_head(&rwb->rq_wait[i].wait);
-	}
+	for (i = 0; i < WBT_NUM_RWQ; i++)
+		rq_wait_init(&rwb->rq_wait[i]);
 
+	rwb->rqos.id = RQ_QOS_WBT;
+	rwb->rqos.ops = &wbt_rqos_ops;
+	rwb->rqos.q = q;
 	rwb->last_comp = rwb->last_issue = jiffies;
-	rwb->queue = q;
 	rwb->win_nsec = RWB_WINDOW_NSEC;
 	rwb->enable_state = WBT_STATE_ON_DEFAULT;
-	wbt_update_limits(rwb);
+	rwb->wc = 1;
+	rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
+	__wbt_update_limits(rwb);
 
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
-	q->rq_wb = rwb;
+	rq_qos_add(q, &rwb->rqos);
 	blk_stat_add_callback(q, rwb->cb);
 
 	rwb->min_lat_nsec = wbt_default_latency_nsec(q);
 
-	wbt_set_queue_depth(rwb, blk_queue_depth(q));
-	wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+	wbt_set_queue_depth(q, blk_queue_depth(q));
+	wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
 
 	return 0;
 }
-
-void wbt_exit(struct request_queue *q)
-{
-	struct rq_wb *rwb = q->rq_wb;
-
-	if (rwb) {
-		blk_stat_remove_callback(q, rwb->cb);
-		blk_stat_free_callback(rwb->cb);
-		q->rq_wb = NULL;
-		kfree(rwb);
-	}
-}
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 300df531d0a6..53b20a58c0a2 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -9,6 +9,7 @@
 #include <linux/ktime.h>
 
 #include "blk-stat.h"
+#include "blk-rq-qos.h"
 
 enum wbt_flags {
 	WBT_TRACKED		= 1,	/* write, tracked for throttling */
@@ -35,20 +36,12 @@ enum {
 	WBT_STATE_ON_MANUAL	= 2,
 };
 
-struct rq_wait {
-	wait_queue_head_t wait;
-	atomic_t inflight;
-};
-
 struct rq_wb {
 	/*
 	 * Settings that govern how we throttle
 	 */
 	unsigned int wb_background;		/* background writeback */
 	unsigned int wb_normal;			/* normal writeback */
-	unsigned int wb_max;			/* max throughput writeback */
-	int scale_step;
-	bool scaled_max;
 
 	short enable_state;			/* WBT_STATE_* */
 
@@ -67,15 +60,20 @@ struct rq_wb {
 	void *sync_cookie;
 
 	unsigned int wc;
-	unsigned int queue_depth;
 
 	unsigned long last_issue;		/* last non-throttled issue */
 	unsigned long last_comp;		/* last non-throttled comp */
 	unsigned long min_lat_nsec;
-	struct request_queue *queue;
+	struct rq_qos rqos;
 	struct rq_wait rq_wait[WBT_NUM_RWQ];
+	struct rq_depth rq_depth;
 };
 
+static inline struct rq_wb *RQWB(struct rq_qos *rqos)
+{
+	return container_of(rqos, struct rq_wb, rqos);
+}
+
 static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 {
 	unsigned int i, ret = 0;
@@ -86,6 +84,7 @@ static inline unsigned int wbt_inflight(struct rq_wb *rwb)
 	return ret;
 }
 
+
 #ifdef CONFIG_BLK_WBT
 
 static inline void wbt_track(struct request *rq, enum wbt_flags flags)
@@ -93,19 +92,16 @@ static inline void wbt_track(struct request *rq, enum wbt_flags flags)
 	rq->wbt_flags |= flags;
 }
 
-void __wbt_done(struct rq_wb *, enum wbt_flags);
-void wbt_done(struct rq_wb *, struct request *);
-enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
 int wbt_init(struct request_queue *);
-void wbt_exit(struct request_queue *);
-void wbt_update_limits(struct rq_wb *);
-void wbt_requeue(struct rq_wb *, struct request *);
-void wbt_issue(struct rq_wb *, struct request *);
+void wbt_update_limits(struct request_queue *);
 void wbt_disable_default(struct request_queue *);
 void wbt_enable_default(struct request_queue *);
 
-void wbt_set_queue_depth(struct rq_wb *, unsigned int);
-void wbt_set_write_cache(struct rq_wb *, bool);
+u64 wbt_get_min_lat(struct request_queue *q);
+void wbt_set_min_lat(struct request_queue *q, u64 val);
+
+void wbt_set_queue_depth(struct request_queue *, unsigned int);
+void wbt_set_write_cache(struct request_queue *, bool);
 
 u64 wbt_default_latency_nsec(struct request_queue *);
 
@@ -114,43 +110,30 @@ u64 wbt_default_latency_nsec(struct request_queue *);
 static inline void wbt_track(struct request *rq, enum wbt_flags flags)
 {
 }
-static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
-{
-}
-static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
-{
-}
-static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
-				      spinlock_t *lock)
-{
-	return 0;
-}
 static inline int wbt_init(struct request_queue *q)
 {
 	return -EINVAL;
 }
-static inline void wbt_exit(struct request_queue *q)
-{
-}
-static inline void wbt_update_limits(struct rq_wb *rwb)
+static inline void wbt_update_limits(struct request_queue *q)
 {
 }
-static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq)
+static inline void wbt_disable_default(struct request_queue *q)
 {
 }
-static inline void wbt_issue(struct rq_wb *rwb, struct request *rq)
+static inline void wbt_enable_default(struct request_queue *q)
 {
 }
-static inline void wbt_disable_default(struct request_queue *q)
+static inline void wbt_set_queue_depth(struct request_queue *q, unsigned int depth)
 {
 }
-static inline void wbt_enable_default(struct request_queue *q)
+static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
 {
 }
-static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
+static inline u64 wbt_get_min_lat(struct request_queue *q)
 {
+	return 0;
 }
-static inline void wbt_set_write_cache(struct rq_wb *rwb, bool wc)
+static inline void wbt_set_min_lat(struct request_queue *q, u64 val)
 {
 }
 static inline u64 wbt_default_latency_nsec(struct request_queue *q)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9d05646d5059..137759862f07 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -42,7 +42,7 @@ struct bsg_job;
 struct blkcg_gq;
 struct blk_flush_queue;
 struct pr_ops;
-struct rq_wb;
+struct rq_qos;
 struct blk_queue_stats;
 struct blk_stat_callback;
 
@@ -443,7 +443,7 @@ struct request_queue {
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
 	struct blk_queue_stats	*stats;
-	struct rq_wb		*rq_wb;
+	struct rq_qos		*rq_qos;
 
 	/*
 	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
-- 
cgit v1.2.3


From d70675121546c35feaceebf7ed9caed8716640f3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Tue, 3 Jul 2018 11:15:01 -0400
Subject: block: introduce blk-iolatency io controller

Current IO controllers for the block layer are less than ideal for our
use case.  The io.max controller is great at hard limiting, but it is
not work conserving.  This patch introduces io.latency.  You provide a
latency target for your group and we monitor the io in short windows to
make sure we are not exceeding those latency targets.  This makes use of
the rq-qos infrastructure and works much like the wbt stuff.  There are
a few differences from wbt

 - It's bio based, so the latency covers the whole block layer in addition to
   the actual io.
 - We will throttle all IO types that comes in here if we need to.
 - We use the mean latency over the 100ms window.  This is because writes can
   be particularly fast, which could give us a false sense of the impact of
   other workloads on our protected workload.
 - By default there's no throttling, we set the queue_depth to INT_MAX so that
   we can have as many outstanding bio's as we're allowed to.  Only at
   throttle time do we pay attention to the actual queue depth.
 - We backcharge cgroups for root cg issued IO and induce artificial
   delays in order to deal with cases like metadata only or swap heavy
   workloads.

In testing this has worked out relatively well.  Protected workloads
will throttle noisy workloads down to 1 io at time if they are doing
normal IO on their own, or induce up to a 1 second delay per syscall if
they are doing a lot of root issued IO (metadata/swap IO).

Our testing has revolved mostly around our production web servers where
we have hhvm (the web server application) in a protected group and
everything else in another group.  We see slightly higher requests per
second (RPS) on the test tier vs the control tier, and much more stable
RPS across all machines in the test tier vs the control tier.

Another test we run is a slow memory allocator in the unprotected group.
Before this would eventually push us into swap and cause the whole box
to die and not recover at all.  With these patches we see slight RPS
drops (usually 10-15%) before the memory consumer is properly killed and
things recover within seconds.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig             |  12 +
 block/Makefile            |   1 +
 block/blk-cgroup.c        |   8 +
 block/blk-iolatency.c     | 930 ++++++++++++++++++++++++++++++++++++++++++++++
 block/blk.h               |   6 +
 include/linux/blk_types.h |   2 -
 6 files changed, 957 insertions(+), 2 deletions(-)
 create mode 100644 block/blk-iolatency.c

(limited to 'include')

diff --git a/block/Kconfig b/block/Kconfig
index dfe7bc770fc9..1f2469a0123c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -149,6 +149,18 @@ config BLK_WBT
 	dynamically on an algorithm loosely based on CoDel, factoring in
 	the realtime performance of the disk.
 
+config BLK_CGROUP_IOLATENCY
+	bool "Enable support for latency based cgroup IO protection"
+	depends on BLK_CGROUP=y
+	default n
+	---help---
+	Enabling this option enables the .latency interface for IO throttling.
+	The IO controller will attempt to maintain average IO latencies below
+	the configured latency target, throttling anybody with a higher latency
+	target than the victimized group.
+
+	Note, this is an experimental interface and could be changed someday.
+
 config BLK_WBT_SQ
 	bool "Single queue writeback throttling"
 	default n
diff --git a/block/Makefile b/block/Makefile
index 57d0f47ab05f..572b33f32c07 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
+obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
 obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index d3310ec96c2a..7e2c19ce1a08 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1238,6 +1238,14 @@ int blkcg_init_queue(struct request_queue *q)
 	if (preloaded)
 		radix_tree_preload_end();
 
+	ret = blk_iolatency_init(q);
+	if (ret) {
+		spin_lock_irq(q->queue_lock);
+		blkg_destroy_all(q);
+		spin_unlock_irq(q->queue_lock);
+		return ret;
+	}
+
 	ret = blk_throtl_init(q);
 	if (ret) {
 		spin_lock_irq(q->queue_lock);
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
new file mode 100644
index 000000000000..a35a1f580337
--- /dev/null
+++ b/block/blk-iolatency.c
@@ -0,0 +1,930 @@
+/*
+ * Block rq-qos base io controller
+ *
+ * This works similar to wbt with a few exceptions
+ *
+ * - It's bio based, so the latency covers the whole block layer in addition to
+ *   the actual io.
+ * - We will throttle all IO that comes in here if we need to.
+ * - We use the mean latency over the 100ms window.  This is because writes can
+ *   be particularly fast, which could give us a false sense of the impact of
+ *   other workloads on our protected workload.
+ * - By default there's no throttling, we set the queue_depth to INT_MAX so that
+ *   we can have as many outstanding bio's as we're allowed to.  Only at
+ *   throttle time do we pay attention to the actual queue depth.
+ *
+ * The hierarchy works like the cpu controller does, we track the latency at
+ * every configured node, and each configured node has it's own independent
+ * queue depth.  This means that we only care about our latency targets at the
+ * peer level.  Some group at the bottom of the hierarchy isn't going to affect
+ * a group at the end of some other path if we're only configred at leaf level.
+ *
+ * Consider the following
+ *
+ *                   root blkg
+ *             /                     \
+ *        fast (target=5ms)     slow (target=10ms)
+ *         /     \                  /        \
+ *       a        b          normal(15ms)   unloved
+ *
+ * "a" and "b" have no target, but their combined io under "fast" cannot exceed
+ * an average latency of 5ms.  If it does then we will throttle the "slow"
+ * group.  In the case of "normal", if it exceeds its 15ms target, we will
+ * throttle "unloved", but nobody else.
+ *
+ * In this example "fast", "slow", and "normal" will be the only groups actually
+ * accounting their io latencies.  We have to walk up the heirarchy to the root
+ * on every submit and complete so we can do the appropriate stat recording and
+ * adjust the queue depth of ourselves if needed.
+ *
+ * There are 2 ways we throttle IO.
+ *
+ * 1) Queue depth throttling.  As we throttle down we will adjust the maximum
+ * number of IO's we're allowed to have in flight.  This starts at (u64)-1 down
+ * to 1.  If the group is only ever submitting IO for itself then this is the
+ * only way we throttle.
+ *
+ * 2) Induced delay throttling.  This is for the case that a group is generating
+ * IO that has to be issued by the root cg to avoid priority inversion. So think
+ * REQ_META or REQ_SWAP.  If we are already at qd == 1 and we're getting a lot
+ * of work done for us on behalf of the root cg and are being asked to scale
+ * down more then we induce a latency at userspace return.  We accumulate the
+ * total amount of time we need to be punished by doing
+ *
+ * total_time += min_lat_nsec - actual_io_completion
+ *
+ * and then at throttle time will do
+ *
+ * throttle_time = min(total_time, NSEC_PER_SEC)
+ *
+ * This induced delay will throttle back the activity that is generating the
+ * root cg issued io's, wethere that's some metadata intensive operation or the
+ * group is using so much memory that it is pushing us into swap.
+ *
+ * Copyright (C) 2018 Josef Bacik
+ */
+#include <linux/kernel.h>
+#include <linux/blk_types.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/memcontrol.h>
+#include <linux/sched/signal.h>
+#include <trace/events/block.h>
+#include "blk-rq-qos.h"
+#include "blk-stat.h"
+
+#define DEFAULT_SCALE_COOKIE 1000000U
+
+static struct blkcg_policy blkcg_policy_iolatency;
+struct iolatency_grp;
+
+struct blk_iolatency {
+	struct rq_qos rqos;
+	struct timer_list timer;
+	atomic_t enabled;
+};
+
+static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos)
+{
+	return container_of(rqos, struct blk_iolatency, rqos);
+}
+
+static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat)
+{
+	return atomic_read(&blkiolat->enabled) > 0;
+}
+
+struct child_latency_info {
+	spinlock_t lock;
+
+	/* Last time we adjusted the scale of everybody. */
+	u64 last_scale_event;
+
+	/* The latency that we missed. */
+	u64 scale_lat;
+
+	/* Total io's from all of our children for the last summation. */
+	u64 nr_samples;
+
+	/* The guy who actually changed the latency numbers. */
+	struct iolatency_grp *scale_grp;
+
+	/* Cookie to tell if we need to scale up or down. */
+	atomic_t scale_cookie;
+};
+
+struct iolatency_grp {
+	struct blkg_policy_data pd;
+	struct blk_rq_stat __percpu *stats;
+	struct blk_iolatency *blkiolat;
+	struct rq_depth rq_depth;
+	struct rq_wait rq_wait;
+	atomic64_t window_start;
+	atomic_t scale_cookie;
+	u64 min_lat_nsec;
+	u64 cur_win_nsec;
+
+	/* total running average of our io latency. */
+	u64 total_lat_avg;
+	u64 total_lat_nr;
+
+	/* Our current number of IO's for the last summation. */
+	u64 nr_samples;
+
+	struct child_latency_info child_lat;
+};
+
+static inline struct iolatency_grp *pd_to_lat(struct blkg_policy_data *pd)
+{
+	return pd ? container_of(pd, struct iolatency_grp, pd) : NULL;
+}
+
+static inline struct iolatency_grp *blkg_to_lat(struct blkcg_gq *blkg)
+{
+	return pd_to_lat(blkg_to_pd(blkg, &blkcg_policy_iolatency));
+}
+
+static inline struct blkcg_gq *lat_to_blkg(struct iolatency_grp *iolat)
+{
+	return pd_to_blkg(&iolat->pd);
+}
+
+static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
+				       wait_queue_entry_t *wait,
+				       bool first_block)
+{
+	struct rq_wait *rqw = &iolat->rq_wait;
+
+	if (first_block && waitqueue_active(&rqw->wait) &&
+	    rqw->wait.head.next != &wait->entry)
+		return false;
+	return rq_wait_inc_below(rqw, iolat->rq_depth.max_depth);
+}
+
+static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
+				       struct iolatency_grp *iolat,
+				       spinlock_t *lock, bool issue_as_root,
+				       bool use_memdelay)
+	__releases(lock)
+	__acquires(lock)
+{
+	struct rq_wait *rqw = &iolat->rq_wait;
+	unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
+	DEFINE_WAIT(wait);
+	bool first_block = true;
+
+	if (use_delay)
+		blkcg_schedule_throttle(rqos->q, use_memdelay);
+
+	/*
+	 * To avoid priority inversions we want to just take a slot if we are
+	 * issuing as root.  If we're being killed off there's no point in
+	 * delaying things, we may have been killed by OOM so throttling may
+	 * make recovery take even longer, so just let the IO's through so the
+	 * task can go away.
+	 */
+	if (issue_as_root || fatal_signal_pending(current)) {
+		atomic_inc(&rqw->inflight);
+		return;
+	}
+
+	if (iolatency_may_queue(iolat, &wait, first_block))
+		return;
+
+	do {
+		prepare_to_wait_exclusive(&rqw->wait, &wait,
+					  TASK_UNINTERRUPTIBLE);
+
+		if (iolatency_may_queue(iolat, &wait, first_block))
+			break;
+		first_block = false;
+
+		if (lock) {
+			spin_unlock_irq(lock);
+			io_schedule();
+			spin_lock_irq(lock);
+		} else {
+			io_schedule();
+		}
+	} while (1);
+
+	finish_wait(&rqw->wait, &wait);
+}
+
+#define SCALE_DOWN_FACTOR 2
+#define SCALE_UP_FACTOR 4
+
+static inline unsigned long scale_amount(unsigned long qd, bool up)
+{
+	return max(up ? qd >> SCALE_UP_FACTOR : qd >> SCALE_DOWN_FACTOR, 1UL);
+}
+
+/*
+ * We scale the qd down faster than we scale up, so we need to use this helper
+ * to adjust the scale_cookie accordingly so we don't prematurely get
+ * scale_cookie at DEFAULT_SCALE_COOKIE and unthrottle too much.
+ *
+ * Each group has their own local copy of the last scale cookie they saw, so if
+ * the global scale cookie goes up or down they know which way they need to go
+ * based on their last knowledge of it.
+ */
+static void scale_cookie_change(struct blk_iolatency *blkiolat,
+				struct child_latency_info *lat_info,
+				bool up)
+{
+	unsigned long qd = blk_queue_depth(blkiolat->rqos.q);
+	unsigned long scale = scale_amount(qd, up);
+	unsigned long old = atomic_read(&lat_info->scale_cookie);
+	unsigned long max_scale = qd << 1;
+	unsigned long diff = 0;
+
+	if (old < DEFAULT_SCALE_COOKIE)
+		diff = DEFAULT_SCALE_COOKIE - old;
+
+	if (up) {
+		if (scale + old > DEFAULT_SCALE_COOKIE)
+			atomic_set(&lat_info->scale_cookie,
+				   DEFAULT_SCALE_COOKIE);
+		else if (diff > qd)
+			atomic_inc(&lat_info->scale_cookie);
+		else
+			atomic_add(scale, &lat_info->scale_cookie);
+	} else {
+		/*
+		 * We don't want to dig a hole so deep that it takes us hours to
+		 * dig out of it.  Just enough that we don't throttle/unthrottle
+		 * with jagged workloads but can still unthrottle once pressure
+		 * has sufficiently dissipated.
+		 */
+		if (diff > qd) {
+			if (diff < max_scale)
+				atomic_dec(&lat_info->scale_cookie);
+		} else {
+			atomic_sub(scale, &lat_info->scale_cookie);
+		}
+	}
+}
+
+/*
+ * Change the queue depth of the iolatency_grp.  We add/subtract 1/16th of the
+ * queue depth at a time so we don't get wild swings and hopefully dial in to
+ * fairer distribution of the overall queue depth.
+ */
+static void scale_change(struct iolatency_grp *iolat, bool up)
+{
+	unsigned long qd = blk_queue_depth(iolat->blkiolat->rqos.q);
+	unsigned long scale = scale_amount(qd, up);
+	unsigned long old = iolat->rq_depth.max_depth;
+	bool changed = false;
+
+	if (old > qd)
+		old = qd;
+
+	if (up) {
+		if (old == 1 && blkcg_unuse_delay(lat_to_blkg(iolat)))
+			return;
+
+		if (old < qd) {
+			changed = true;
+			old += scale;
+			old = min(old, qd);
+			iolat->rq_depth.max_depth = old;
+			wake_up_all(&iolat->rq_wait.wait);
+		}
+	} else if (old > 1) {
+		old >>= 1;
+		changed = true;
+		iolat->rq_depth.max_depth = max(old, 1UL);
+	}
+}
+
+/* Check our parent and see if the scale cookie has changed. */
+static void check_scale_change(struct iolatency_grp *iolat)
+{
+	struct iolatency_grp *parent;
+	struct child_latency_info *lat_info;
+	unsigned int cur_cookie;
+	unsigned int our_cookie = atomic_read(&iolat->scale_cookie);
+	u64 scale_lat;
+	unsigned int old;
+	int direction = 0;
+
+	if (lat_to_blkg(iolat)->parent == NULL)
+		return;
+
+	parent = blkg_to_lat(lat_to_blkg(iolat)->parent);
+	if (!parent)
+		return;
+
+	lat_info = &parent->child_lat;
+	cur_cookie = atomic_read(&lat_info->scale_cookie);
+	scale_lat = READ_ONCE(lat_info->scale_lat);
+
+	if (cur_cookie < our_cookie)
+		direction = -1;
+	else if (cur_cookie > our_cookie)
+		direction = 1;
+	else
+		return;
+
+	old = atomic_cmpxchg(&iolat->scale_cookie, our_cookie, cur_cookie);
+
+	/* Somebody beat us to the punch, just bail. */
+	if (old != our_cookie)
+		return;
+
+	if (direction < 0 && iolat->min_lat_nsec) {
+		u64 samples_thresh;
+
+		if (!scale_lat || iolat->min_lat_nsec <= scale_lat)
+			return;
+
+		/*
+		 * Sometimes high priority groups are their own worst enemy, so
+		 * instead of taking it out on some poor other group that did 5%
+		 * or less of the IO's for the last summation just skip this
+		 * scale down event.
+		 */
+		samples_thresh = lat_info->nr_samples * 5;
+		samples_thresh = div64_u64(samples_thresh, 100);
+		if (iolat->nr_samples <= samples_thresh)
+			return;
+	}
+
+	/* We're as low as we can go. */
+	if (iolat->rq_depth.max_depth == 1 && direction < 0) {
+		blkcg_use_delay(lat_to_blkg(iolat));
+		return;
+	}
+
+	/* We're back to the default cookie, unthrottle all the things. */
+	if (cur_cookie == DEFAULT_SCALE_COOKIE) {
+		blkcg_clear_delay(lat_to_blkg(iolat));
+		iolat->rq_depth.max_depth = INT_MAX;
+		wake_up_all(&iolat->rq_wait.wait);
+		return;
+	}
+
+	scale_change(iolat, direction > 0);
+}
+
+static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
+				     spinlock_t *lock)
+{
+	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+	struct request_queue *q = rqos->q;
+	bool issue_as_root = bio_issue_as_root_blkg(bio);
+
+	if (!blk_iolatency_enabled(blkiolat))
+		return;
+
+	rcu_read_lock();
+	blkcg = bio_blkcg(bio);
+	bio_associate_blkcg(bio, &blkcg->css);
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg)) {
+		if (!lock)
+			spin_lock_irq(q->queue_lock);
+		blkg = blkg_lookup_create(blkcg, q);
+		if (IS_ERR(blkg))
+			blkg = NULL;
+		if (!lock)
+			spin_unlock_irq(q->queue_lock);
+	}
+	if (!blkg)
+		goto out;
+
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+	bio_associate_blkg(bio, blkg);
+out:
+	rcu_read_unlock();
+	while (blkg && blkg->parent) {
+		struct iolatency_grp *iolat = blkg_to_lat(blkg);
+		if (!iolat) {
+			blkg = blkg->parent;
+			continue;
+		}
+
+		check_scale_change(iolat);
+		__blkcg_iolatency_throttle(rqos, iolat, lock, issue_as_root,
+				     (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
+		blkg = blkg->parent;
+	}
+	if (!timer_pending(&blkiolat->timer))
+		mod_timer(&blkiolat->timer, jiffies + HZ);
+}
+
+static void iolatency_record_time(struct iolatency_grp *iolat,
+				  struct bio_issue *issue, u64 now,
+				  bool issue_as_root)
+{
+	struct blk_rq_stat *rq_stat;
+	u64 start = bio_issue_time(issue);
+	u64 req_time;
+
+	if (now <= start)
+		return;
+
+	req_time = now - start;
+
+	/*
+	 * We don't want to count issue_as_root bio's in the cgroups latency
+	 * statistics as it could skew the numbers downwards.
+	 */
+	if (unlikely(issue_as_root && iolat->rq_depth.max_depth != (u64)-1)) {
+		u64 sub = iolat->min_lat_nsec;
+		if (req_time < sub)
+			blkcg_add_delay(lat_to_blkg(iolat), now, sub - req_time);
+		return;
+	}
+
+	rq_stat = get_cpu_ptr(iolat->stats);
+	blk_rq_stat_add(rq_stat, req_time);
+	put_cpu_ptr(rq_stat);
+}
+
+#define BLKIOLATENCY_MIN_ADJUST_TIME (500 * NSEC_PER_MSEC)
+#define BLKIOLATENCY_MIN_GOOD_SAMPLES 5
+
+static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now)
+{
+	struct blkcg_gq *blkg = lat_to_blkg(iolat);
+	struct iolatency_grp *parent;
+	struct child_latency_info *lat_info;
+	struct blk_rq_stat stat;
+	unsigned long flags;
+	int cpu;
+
+	blk_rq_stat_init(&stat);
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		struct blk_rq_stat *s;
+		s = per_cpu_ptr(iolat->stats, cpu);
+		blk_rq_stat_sum(&stat, s);
+		blk_rq_stat_init(s);
+	}
+	preempt_enable();
+
+	/*
+	 * Our average exceeded our window, scale up our window so we are more
+	 * accurate, but not more than the global timer.
+	 */
+	if (stat.mean > iolat->cur_win_nsec) {
+		iolat->cur_win_nsec <<= 1;
+		iolat->cur_win_nsec =
+			max_t(u64, iolat->cur_win_nsec, NSEC_PER_SEC);
+	}
+
+	parent = blkg_to_lat(blkg->parent);
+	if (!parent)
+		return;
+
+	lat_info = &parent->child_lat;
+
+	iolat->total_lat_avg =
+		div64_u64((iolat->total_lat_avg * iolat->total_lat_nr) +
+			  stat.mean, iolat->total_lat_nr + 1);
+
+	iolat->total_lat_nr++;
+
+	/* Everything is ok and we don't need to adjust the scale. */
+	if (stat.mean <= iolat->min_lat_nsec &&
+	    atomic_read(&lat_info->scale_cookie) == DEFAULT_SCALE_COOKIE)
+		return;
+
+	/* Somebody beat us to the punch, just bail. */
+	spin_lock_irqsave(&lat_info->lock, flags);
+	lat_info->nr_samples -= iolat->nr_samples;
+	lat_info->nr_samples += stat.nr_samples;
+	iolat->nr_samples = stat.nr_samples;
+
+	if ((lat_info->last_scale_event >= now ||
+	    now - lat_info->last_scale_event < BLKIOLATENCY_MIN_ADJUST_TIME) &&
+	    lat_info->scale_lat <= iolat->min_lat_nsec)
+		goto out;
+
+	if (stat.mean <= iolat->min_lat_nsec &&
+	    stat.nr_samples >= BLKIOLATENCY_MIN_GOOD_SAMPLES) {
+		if (lat_info->scale_grp == iolat) {
+			lat_info->last_scale_event = now;
+			scale_cookie_change(iolat->blkiolat, lat_info, true);
+		}
+	} else if (stat.mean > iolat->min_lat_nsec) {
+		lat_info->last_scale_event = now;
+		if (!lat_info->scale_grp ||
+		    lat_info->scale_lat > iolat->min_lat_nsec) {
+			WRITE_ONCE(lat_info->scale_lat, iolat->min_lat_nsec);
+			lat_info->scale_grp = iolat;
+		}
+		scale_cookie_change(iolat->blkiolat, lat_info, false);
+	}
+out:
+	spin_unlock_irqrestore(&lat_info->lock, flags);
+}
+
+static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
+{
+	struct blkcg_gq *blkg;
+	struct rq_wait *rqw;
+	struct iolatency_grp *iolat;
+	u64 window_start;
+	u64 now = ktime_to_ns(ktime_get());
+	bool issue_as_root = bio_issue_as_root_blkg(bio);
+	bool enabled = false;
+
+	blkg = bio->bi_blkg;
+	if (!blkg)
+		return;
+
+	iolat = blkg_to_lat(bio->bi_blkg);
+	if (!iolat)
+		return;
+
+	enabled = blk_iolatency_enabled(iolat->blkiolat);
+	while (blkg && blkg->parent) {
+		iolat = blkg_to_lat(blkg);
+		if (!iolat) {
+			blkg = blkg->parent;
+			continue;
+		}
+		rqw = &iolat->rq_wait;
+
+		atomic_dec(&rqw->inflight);
+		if (!enabled || iolat->min_lat_nsec == 0)
+			goto next;
+		iolatency_record_time(iolat, &bio->bi_issue, now,
+				      issue_as_root);
+		window_start = atomic64_read(&iolat->window_start);
+		if (now > window_start &&
+		    (now - window_start) >= iolat->cur_win_nsec) {
+			if (atomic64_cmpxchg(&iolat->window_start,
+					window_start, now) == window_start)
+				iolatency_check_latencies(iolat, now);
+		}
+next:
+		wake_up(&rqw->wait);
+		blkg = blkg->parent;
+	}
+}
+
+static void blkcg_iolatency_cleanup(struct rq_qos *rqos, struct bio *bio)
+{
+	struct blkcg_gq *blkg;
+
+	blkg = bio->bi_blkg;
+	while (blkg && blkg->parent) {
+		struct rq_wait *rqw;
+		struct iolatency_grp *iolat;
+
+		iolat = blkg_to_lat(blkg);
+		if (!iolat)
+			goto next;
+
+		rqw = &iolat->rq_wait;
+		atomic_dec(&rqw->inflight);
+		wake_up(&rqw->wait);
+next:
+		blkg = blkg->parent;
+	}
+}
+
+static void blkcg_iolatency_exit(struct rq_qos *rqos)
+{
+	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
+
+	del_timer_sync(&blkiolat->timer);
+	blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
+	kfree(blkiolat);
+}
+
+static struct rq_qos_ops blkcg_iolatency_ops = {
+	.throttle = blkcg_iolatency_throttle,
+	.cleanup = blkcg_iolatency_cleanup,
+	.done_bio = blkcg_iolatency_done_bio,
+	.exit = blkcg_iolatency_exit,
+};
+
+static void blkiolatency_timer_fn(struct timer_list *t)
+{
+	struct blk_iolatency *blkiolat = from_timer(blkiolat, t, timer);
+	struct blkcg_gq *blkg;
+	struct cgroup_subsys_state *pos_css;
+	u64 now = ktime_to_ns(ktime_get());
+
+	rcu_read_lock();
+	blkg_for_each_descendant_pre(blkg, pos_css,
+				     blkiolat->rqos.q->root_blkg) {
+		struct iolatency_grp *iolat;
+		struct child_latency_info *lat_info;
+		unsigned long flags;
+		u64 cookie;
+
+		/*
+		 * We could be exiting, don't access the pd unless we have a
+		 * ref on the blkg.
+		 */
+		if (!blkg_try_get(blkg))
+			continue;
+
+		iolat = blkg_to_lat(blkg);
+		if (!iolat)
+			continue;
+
+		lat_info = &iolat->child_lat;
+		cookie = atomic_read(&lat_info->scale_cookie);
+
+		if (cookie >= DEFAULT_SCALE_COOKIE)
+			goto next;
+
+		spin_lock_irqsave(&lat_info->lock, flags);
+		if (lat_info->last_scale_event >= now)
+			goto next_lock;
+
+		/*
+		 * We scaled down but don't have a scale_grp, scale up and carry
+		 * on.
+		 */
+		if (lat_info->scale_grp == NULL) {
+			scale_cookie_change(iolat->blkiolat, lat_info, true);
+			goto next_lock;
+		}
+
+		/*
+		 * It's been 5 seconds since our last scale event, clear the
+		 * scale grp in case the group that needed the scale down isn't
+		 * doing any IO currently.
+		 */
+		if (now - lat_info->last_scale_event >=
+		    ((u64)NSEC_PER_SEC * 5))
+			lat_info->scale_grp = NULL;
+next_lock:
+		spin_unlock_irqrestore(&lat_info->lock, flags);
+next:
+		blkg_put(blkg);
+	}
+	rcu_read_unlock();
+}
+
+int blk_iolatency_init(struct request_queue *q)
+{
+	struct blk_iolatency *blkiolat;
+	struct rq_qos *rqos;
+	int ret;
+
+	blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
+	if (!blkiolat)
+		return -ENOMEM;
+
+	rqos = &blkiolat->rqos;
+	rqos->id = RQ_QOS_CGROUP;
+	rqos->ops = &blkcg_iolatency_ops;
+	rqos->q = q;
+
+	rq_qos_add(q, rqos);
+
+	ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
+	if (ret) {
+		rq_qos_del(q, rqos);
+		kfree(blkiolat);
+		return ret;
+	}
+
+	timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+
+	return 0;
+}
+
+static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+{
+	struct iolatency_grp *iolat = blkg_to_lat(blkg);
+	struct blk_iolatency *blkiolat = iolat->blkiolat;
+	u64 oldval = iolat->min_lat_nsec;
+
+	iolat->min_lat_nsec = val;
+	iolat->cur_win_nsec = max_t(u64, val << 4, 100 * NSEC_PER_MSEC);
+	iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, NSEC_PER_SEC);
+
+	if (!oldval && val)
+		atomic_inc(&blkiolat->enabled);
+	if (oldval && !val)
+		atomic_dec(&blkiolat->enabled);
+}
+
+static void iolatency_clear_scaling(struct blkcg_gq *blkg)
+{
+	if (blkg->parent) {
+		struct iolatency_grp *iolat = blkg_to_lat(blkg->parent);
+		struct child_latency_info *lat_info;
+		if (!iolat)
+			return;
+
+		lat_info = &iolat->child_lat;
+		spin_lock(&lat_info->lock);
+		atomic_set(&lat_info->scale_cookie, DEFAULT_SCALE_COOKIE);
+		lat_info->last_scale_event = 0;
+		lat_info->scale_grp = NULL;
+		lat_info->scale_lat = 0;
+		spin_unlock(&lat_info->lock);
+	}
+}
+
+static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
+			     size_t nbytes, loff_t off)
+{
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
+	struct blkcg_gq *blkg;
+	struct blk_iolatency *blkiolat;
+	struct blkg_conf_ctx ctx;
+	struct iolatency_grp *iolat;
+	char *p, *tok;
+	u64 lat_val = 0;
+	u64 oldval;
+	int ret;
+
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
+	if (ret)
+		return ret;
+
+	iolat = blkg_to_lat(ctx.blkg);
+	blkiolat = iolat->blkiolat;
+	p = ctx.body;
+
+	ret = -EINVAL;
+	while ((tok = strsep(&p, " "))) {
+		char key[16];
+		char val[21];	/* 18446744073709551616 */
+
+		if (sscanf(tok, "%15[^=]=%20s", key, val) != 2)
+			goto out;
+
+		if (!strcmp(key, "target")) {
+			u64 v;
+
+			if (!strcmp(val, "max"))
+				lat_val = 0;
+			else if (sscanf(val, "%llu", &v) == 1)
+				lat_val = v * NSEC_PER_USEC;
+			else
+				goto out;
+		} else {
+			goto out;
+		}
+	}
+
+	/* Walk up the tree to see if our new val is lower than it should be. */
+	blkg = ctx.blkg;
+	oldval = iolat->min_lat_nsec;
+
+	iolatency_set_min_lat_nsec(blkg, lat_val);
+	if (oldval != iolat->min_lat_nsec) {
+		iolatency_clear_scaling(blkg);
+	}
+
+	ret = 0;
+out:
+	blkg_conf_finish(&ctx);
+	return ret ?: nbytes;
+}
+
+static u64 iolatency_prfill_limit(struct seq_file *sf,
+				  struct blkg_policy_data *pd, int off)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	const char *dname = blkg_dev_name(pd->blkg);
+
+	if (!dname || !iolat->min_lat_nsec)
+		return 0;
+	seq_printf(sf, "%s target=%llu\n",
+		   dname,
+		   (unsigned long long)iolat->min_lat_nsec / NSEC_PER_USEC);
+	return 0;
+}
+
+static int iolatency_print_limit(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  iolatency_prfill_limit,
+			  &blkcg_policy_iolatency, seq_cft(sf)->private, false);
+	return 0;
+}
+
+static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
+				size_t size)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	unsigned long long avg_lat = div64_u64(iolat->total_lat_avg, NSEC_PER_USEC);
+
+	if (iolat->rq_depth.max_depth == (u64)-1)
+		return scnprintf(buf, size, " depth=max avg_lat=%llu",
+				 avg_lat);
+
+	return scnprintf(buf, size, " depth=%u avg_lat=%llu",
+			 iolat->rq_depth.max_depth, avg_lat);
+}
+
+
+static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
+{
+	struct iolatency_grp *iolat;
+
+	iolat = kzalloc_node(sizeof(*iolat), gfp, node);
+	if (!iolat)
+		return NULL;
+	iolat->stats = __alloc_percpu_gfp(sizeof(struct blk_rq_stat),
+				       __alignof__(struct blk_rq_stat), gfp);
+	if (!iolat->stats) {
+		kfree(iolat);
+		return NULL;
+	}
+	return &iolat->pd;
+}
+
+static void iolatency_pd_init(struct blkg_policy_data *pd)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	struct blkcg_gq *blkg = lat_to_blkg(iolat);
+	struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
+	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
+	u64 now = ktime_to_ns(ktime_get());
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct blk_rq_stat *stat;
+		stat = per_cpu_ptr(iolat->stats, cpu);
+		blk_rq_stat_init(stat);
+	}
+
+	rq_wait_init(&iolat->rq_wait);
+	spin_lock_init(&iolat->child_lat.lock);
+	iolat->rq_depth.queue_depth = blk_queue_depth(blkg->q);
+	iolat->rq_depth.max_depth = INT_MAX;
+	iolat->rq_depth.default_depth = iolat->rq_depth.queue_depth;
+	iolat->blkiolat = blkiolat;
+	iolat->cur_win_nsec = 100 * NSEC_PER_MSEC;
+	atomic64_set(&iolat->window_start, now);
+
+	/*
+	 * We init things in list order, so the pd for the parent may not be
+	 * init'ed yet for whatever reason.
+	 */
+	if (blkg->parent && blkg_to_pd(blkg->parent, &blkcg_policy_iolatency)) {
+		struct iolatency_grp *parent = blkg_to_lat(blkg->parent);
+		atomic_set(&iolat->scale_cookie,
+			   atomic_read(&parent->child_lat.scale_cookie));
+	} else {
+		atomic_set(&iolat->scale_cookie, DEFAULT_SCALE_COOKIE);
+	}
+
+	atomic_set(&iolat->child_lat.scale_cookie, DEFAULT_SCALE_COOKIE);
+}
+
+static void iolatency_pd_offline(struct blkg_policy_data *pd)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	struct blkcg_gq *blkg = lat_to_blkg(iolat);
+
+	iolatency_set_min_lat_nsec(blkg, 0);
+	iolatency_clear_scaling(blkg);
+}
+
+static void iolatency_pd_free(struct blkg_policy_data *pd)
+{
+	struct iolatency_grp *iolat = pd_to_lat(pd);
+	free_percpu(iolat->stats);
+	kfree(iolat);
+}
+
+static struct cftype iolatency_files[] = {
+	{
+		.name = "latency",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = iolatency_print_limit,
+		.write = iolatency_set_limit,
+	},
+	{}
+};
+
+static struct blkcg_policy blkcg_policy_iolatency = {
+	.dfl_cftypes	= iolatency_files,
+	.pd_alloc_fn	= iolatency_pd_alloc,
+	.pd_init_fn	= iolatency_pd_init,
+	.pd_offline_fn	= iolatency_pd_offline,
+	.pd_free_fn	= iolatency_pd_free,
+	.pd_stat_fn	= iolatency_pd_stat,
+};
+
+static int __init iolatency_init(void)
+{
+	return blkcg_policy_register(&blkcg_policy_iolatency);
+}
+
+static void __exit iolatency_exit(void)
+{
+	return blkcg_policy_unregister(&blkcg_policy_iolatency);
+}
+
+module_init(iolatency_init);
+module_exit(iolatency_exit);
diff --git a/block/blk.h b/block/blk.h
index 8d23aea96ce9..69b14cd2bb22 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -412,4 +412,10 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
 
 extern void blk_drain_queue(struct request_queue *q);
 
+#ifdef CONFIG_BLK_CGROUP_IOLATENCY
+extern int blk_iolatency_init(struct request_queue *q);
+#else
+static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
+#endif
+
 #endif /* BLK_INTERNAL_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0ffc34c5cc83..e13449a379a1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -180,9 +180,7 @@ struct bio {
 	struct io_context	*bi_ioc;
 	struct cgroup_subsys_state *bi_css;
 	struct blkcg_gq		*bi_blkg;
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 	struct bio_issue	bi_issue;
-#endif
 #endif
 	union {
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-- 
cgit v1.2.3


From c33e73af2183fb9fcd993e37abcdecc058b22d91 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 11:32:06 +0300
Subject: IB/uverbs: Add a uobj_perform_destroy helper

This consolidates a bunch of repeated code patterns into a helper.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c  | 22 +++++++++++
 drivers/infiniband/core/uverbs_cmd.c | 76 +++++++-----------------------------
 include/rdma/uverbs_std_types.h      |  5 +++
 3 files changed, 41 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index c67bcdda5760..38d3929f6e65 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -128,6 +128,28 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
 	return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
 }
 
+/*
+ * Does both rdma_lookup_get_uobject() and rdma_remove_commit_uobject(), then
+ * returns success_res on success (negative errno on failure). For use by
+ * callers that do not need the uobj.
+ */
+int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
+			   struct ib_uverbs_file *ufile, int success_res)
+{
+	struct ib_uobject *uobj;
+	int ret;
+
+	uobj = rdma_lookup_get_uobject(type, ufile->ucontext, id, true);
+	if (IS_ERR(uobj))
+		return PTR_ERR(uobj);
+
+	ret = rdma_remove_commit_uobject(uobj);
+	if (ret)
+		return ret;
+
+	return success_res;
+}
+
 static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
 				     const struct uverbs_obj_type *type)
 {
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5d0fd36b009d..b751c196e2c6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -367,20 +367,12 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
 			     int in_len, int out_len)
 {
 	struct ib_uverbs_dealloc_pd cmd;
-	struct ib_uobject          *uobj;
-	int                         ret;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
+				    in_len);
 }
 
 struct xrcd_table_entry {
@@ -597,19 +589,12 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
 			     int out_len)
 {
 	struct ib_uverbs_close_xrcd cmd;
-	struct ib_uobject           *uobj;
-	int                         ret = 0;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
+				    in_len);
 }
 
 int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
@@ -829,20 +814,12 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 			   int out_len)
 {
 	struct ib_uverbs_dereg_mr cmd;
-	struct ib_uobject	 *uobj;
-	int                       ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
+				    in_len);
 }
 
 ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
@@ -921,19 +898,12 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 			     int out_len)
 {
 	struct ib_uverbs_dealloc_mw cmd;
-	struct ib_uobject	   *uobj;
-	int                         ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof(cmd)))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
+				    in_len);
 }
 
 ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -2641,19 +2611,12 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
 			     const char __user *buf, int in_len, int out_len)
 {
 	struct ib_uverbs_destroy_ah cmd;
-	struct ib_uobject	   *uobj;
-	int			    ret;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
+				    in_len);
 }
 
 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -3445,7 +3408,6 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
 				       struct ib_udata *uhw)
 {
 	struct ib_uverbs_ex_destroy_rwq_ind_table	cmd = {};
-	struct ib_uobject		*uobj;
 	int			ret;
 	size_t required_cmd_sz;
 
@@ -3466,12 +3428,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
 	if (cmd.comp_mask)
 		return -EOPNOTSUPP;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	return uobj_remove_commit(uobj);
+	return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
+				    cmd.ind_tbl_handle, file, 0);
 }
 
 int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
@@ -3658,7 +3616,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
 			      struct ib_udata *uhw)
 {
 	struct ib_uverbs_destroy_flow	cmd;
-	struct ib_uobject		*uobj;
 	int				ret;
 
 	if (ucore->inlen < sizeof(cmd))
@@ -3671,13 +3628,8 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
 	if (cmd.comp_mask)
 		return -EINVAL;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	return ret;
+	return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
+				    0);
 }
 
 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 4c151b67fb6d..27c24453fc12 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -71,6 +71,11 @@ static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
 #define uobj_get_write(_type, _id, _ucontext)				\
 	 __uobj_get(uobj_get_type(_type), true, _ucontext, _id)
 
+int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
+			   struct ib_uverbs_file *ufile, int success_res);
+#define uobj_perform_destroy(_type, _id, _ufile, _success_res)                 \
+	__uobj_perform_destroy(uobj_get_type(_type), _id, _ufile, _success_res)
+
 static inline void uobj_put_read(struct ib_uobject *uobj)
 {
 	rdma_lookup_put_uobject(uobj, false);
-- 
cgit v1.2.3


From 6a5e9c88419828a487204e35291ae4459697a9bd Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 11:32:07 +0300
Subject: IB/uverbs: Move non driver related elements from ib_ucontext to
 ib_ufile

The IDR is part of the ib_ufile so all the machinery to lock it, handle
closing and disassociation rightly belongs to the ufile not the ucontext.

This changes the lifetime of that data to match the lifetime of the file
descriptor which is always strictly longer than the lifetime of the
ucontext.

We need the entire locking machinery to continue to exist after ucontext
destruction to allow us to return the destroy data after a device has been
disassociated.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c   | 72 +++++++++++++++++------------------
 drivers/infiniband/core/rdma_core.h   |  1 -
 drivers/infiniband/core/uverbs.h      |  8 ++++
 drivers/infiniband/core/uverbs_cmd.c  |  1 -
 drivers/infiniband/core/uverbs_main.c |  4 ++
 include/rdma/ib_verbs.h               |  9 ++---
 6 files changed, 49 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 38d3929f6e65..11c6f271be00 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -161,6 +161,7 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
 	 * user_handle should be filled by the handler,
 	 * The object is added to the list in the commit stage.
 	 */
+	uobj->ufile = context->ufile;
 	uobj->context = context;
 	uobj->type = type;
 	/*
@@ -286,7 +287,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
 
 	ret = uverbs_try_lock_object(uobj, exclusive);
 	if (ret) {
-		WARN(ucontext->cleanup_reason,
+		WARN(uobj->ufile->cleanup_reason,
 		     "ib_uverbs: Trying to lookup_get while cleanup context\n");
 		goto free;
 	}
@@ -441,8 +442,8 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
 static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
 						    enum rdma_remove_reason why)
 {
+	struct ib_uverbs_file *ufile = uobj->ufile;
 	int ret;
-	struct ib_ucontext *ucontext = uobj->context;
 
 	ret = uobj->type->type_class->remove_commit(uobj, why);
 	if (ib_is_destroy_retryable(ret, why, uobj)) {
@@ -450,9 +451,9 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
 		atomic_set(&uobj->usecnt, 0);
 		uobj->type->type_class->lookup_put(uobj, true);
 	} else {
-		mutex_lock(&ucontext->uobjects_lock);
+		mutex_lock(&ufile->uobjects_lock);
 		list_del(&uobj->list);
-		mutex_unlock(&ucontext->uobjects_lock);
+		mutex_unlock(&ufile->uobjects_lock);
 		/* put the ref we took when we created the object */
 		uverbs_uobject_put(uobj);
 	}
@@ -464,19 +465,19 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
 int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
 {
 	int ret;
-	struct ib_ucontext *ucontext = uobj->context;
+	struct ib_uverbs_file *ufile = uobj->ufile;
 
 	/* put the ref count we took at lookup_get */
 	uverbs_uobject_put(uobj);
 	/* Cleanup is running. Calling this should have been impossible */
-	if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+	if (!down_read_trylock(&ufile->cleanup_rwsem)) {
 		WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
 		return 0;
 	}
 	assert_uverbs_usecnt(uobj, true);
 	ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
 
-	up_read(&ucontext->cleanup_rwsem);
+	up_read(&ufile->cleanup_rwsem);
 	return ret;
 }
 
@@ -496,10 +497,10 @@ static const struct uverbs_obj_type null_obj_type = {
 int rdma_explicit_destroy(struct ib_uobject *uobject)
 {
 	int ret;
-	struct ib_ucontext *ucontext = uobject->context;
+	struct ib_uverbs_file *ufile = uobject->ufile;
 
 	/* Cleanup is running. Calling this should have been impossible */
-	if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+	if (!down_read_trylock(&ufile->cleanup_rwsem)) {
 		WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
 		return 0;
 	}
@@ -512,7 +513,7 @@ int rdma_explicit_destroy(struct ib_uobject *uobject)
 	uobject->type = &null_obj_type;
 
 out:
-	up_read(&ucontext->cleanup_rwsem);
+	up_read(&ufile->cleanup_rwsem);
 	return ret;
 }
 
@@ -542,8 +543,10 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
 
 int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 {
+	struct ib_uverbs_file *ufile = uobj->ufile;
+
 	/* Cleanup is running. Calling this should have been impossible */
-	if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
+	if (!down_read_trylock(&ufile->cleanup_rwsem)) {
 		int ret;
 
 		WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
@@ -559,12 +562,12 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 	assert_uverbs_usecnt(uobj, true);
 	atomic_set(&uobj->usecnt, 0);
 
-	mutex_lock(&uobj->context->uobjects_lock);
-	list_add(&uobj->list, &uobj->context->uobjects);
-	mutex_unlock(&uobj->context->uobjects_lock);
+	mutex_lock(&ufile->uobjects_lock);
+	list_add(&uobj->list, &ufile->uobjects);
+	mutex_unlock(&ufile->uobjects_lock);
 
 	uobj->type->type_class->alloc_commit(uobj);
-	up_read(&uobj->context->cleanup_rwsem);
+	up_read(&ufile->cleanup_rwsem);
 
 	return 0;
 }
@@ -638,20 +641,18 @@ EXPORT_SYMBOL(uverbs_idr_class);
 
 static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
 {
-	struct ib_ucontext *ucontext;
 	struct ib_uverbs_file *ufile = uobj_file->ufile;
 	int ret;
 
-	mutex_lock(&uobj_file->ufile->cleanup_mutex);
+	mutex_lock(&ufile->cleanup_mutex);
 
 	/* uobject was either already cleaned up or is cleaned up right now anyway */
 	if (!uobj_file->uobj.context ||
-	    !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
+	    !down_read_trylock(&ufile->cleanup_rwsem))
 		goto unlock;
 
-	ucontext = uobj_file->uobj.context;
 	ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
-	up_read(&ucontext->cleanup_rwsem);
+	up_read(&ufile->cleanup_rwsem);
 	if (ret)
 		pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
 unlock:
@@ -671,6 +672,7 @@ void uverbs_close_fd(struct file *f)
 static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext,
 				    enum rdma_remove_reason reason)
 {
+	struct ib_uverbs_file *ufile = ucontext->ufile;
 	struct ib_uobject *obj, *next_obj;
 	int ret = -EINVAL;
 	int err = 0;
@@ -684,9 +686,9 @@ static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext,
 	 * We take and release the lock per traversal in order to let
 	 * other threads (which might still use the FDs) chance to run.
 	 */
-	mutex_lock(&ucontext->uobjects_lock);
-	ucontext->cleanup_reason = reason;
-	list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects, list) {
+	mutex_lock(&ufile->uobjects_lock);
+	ufile->cleanup_reason = reason;
+	list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
 		/*
 		 * if we hit this WARN_ON, that means we are
 		 * racing with a lookup_get.
@@ -710,7 +712,7 @@ static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext,
 		uverbs_uobject_put(obj);
 		ret = 0;
 	}
-	mutex_unlock(&ucontext->uobjects_lock);
+	mutex_unlock(&ufile->uobjects_lock);
 	return ret;
 }
 
@@ -719,14 +721,16 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
 	enum rdma_remove_reason reason = device_removed ?
 					RDMA_REMOVE_DRIVER_REMOVE :
 					RDMA_REMOVE_CLOSE;
+	struct ib_uverbs_file *ufile = ucontext->ufile;
+
 	/*
 	 * Waits for all remove_commit and alloc_commit to finish. Logically, We
 	 * want to hold this forever as the context is going to be destroyed,
 	 * but we'll release it since it causes a "held lock freed" BUG message.
 	 */
-	down_write(&ucontext->cleanup_rwsem);
-	ucontext->cleanup_retryable = true;
-	while (!list_empty(&ucontext->uobjects))
+	down_write(&ufile->cleanup_rwsem);
+	ufile->ucontext->cleanup_retryable = true;
+	while (!list_empty(&ufile->uobjects))
 		if (__uverbs_cleanup_ucontext(ucontext, reason)) {
 			/*
 			 * No entry was cleaned-up successfully during this
@@ -735,19 +739,11 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
 			break;
 		}
 
-	ucontext->cleanup_retryable = false;
-	if (!list_empty(&ucontext->uobjects))
+	ufile->ucontext->cleanup_retryable = false;
+	if (!list_empty(&ufile->uobjects))
 		__uverbs_cleanup_ucontext(ucontext, reason);
 
-	up_write(&ucontext->cleanup_rwsem);
-}
-
-void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
-{
-	ucontext->cleanup_reason = 0;
-	mutex_init(&ucontext->uobjects_lock);
-	INIT_LIST_HEAD(&ucontext->uobjects);
-	init_rwsem(&ucontext->cleanup_rwsem);
+	up_write(&ufile->cleanup_rwsem);
 }
 
 const struct uverbs_obj_type_class uverbs_fd_class = {
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 8cede4546b25..f7f157e78f8c 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -56,7 +56,6 @@ const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_sp
  * cleanup_ucontext removes all uobjects from the context and puts them.
  */
 void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
-void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
 
 /*
  * uverbs_uobject_get is called in order to increase the reference count on
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a663e2cdc3d0..8b0a8ec98ac8 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -145,6 +145,14 @@ struct ib_uverbs_file {
 	struct list_head			list;
 	int					is_closed;
 
+	/* locking the uobjects_list */
+	struct mutex		uobjects_lock;
+	struct list_head	uobjects;
+
+	/* protects cleanup process from other actions */
+	struct rw_semaphore	cleanup_rwsem;
+	enum rdma_remove_reason cleanup_reason;
+
 	struct idr		idr;
 	/* spinlock protects write access to idr */
 	spinlock_t		idr_lock;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b751c196e2c6..aa84246c0bfe 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -110,7 +110,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	ucontext->cg_obj = cg_obj;
 	/* ufile is required when some objects are released */
 	ucontext->ufile = file;
-	uverbs_initialize_ucontext(ucontext);
 
 	rcu_read_lock();
 	ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index c05ce5ae5415..82168b53e2ae 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -888,6 +888,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	mutex_init(&file->mutex);
 	mutex_init(&file->cleanup_mutex);
 
+	mutex_init(&file->uobjects_lock);
+	INIT_LIST_HEAD(&file->uobjects);
+	init_rwsem(&file->cleanup_rwsem);
+
 	filp->private_data = file;
 	kobject_get(&dev->kobj);
 	list_add_tail(&file->list, &dev->uverbs_file_list);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8784d5bfc252..9c04cb5e4041 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1500,12 +1500,6 @@ struct ib_ucontext {
 	struct ib_uverbs_file  *ufile;
 	int			closing;
 
-	/* locking the uobjects_list */
-	struct mutex		uobjects_lock;
-	struct list_head	uobjects;
-	/* protects cleanup process from other actions */
-	struct rw_semaphore	cleanup_rwsem;
-	enum rdma_remove_reason cleanup_reason;
 	bool cleanup_retryable;
 
 	struct pid             *tgid;
@@ -1531,6 +1525,9 @@ struct ib_ucontext {
 
 struct ib_uobject {
 	u64			user_handle;	/* handle given to us by userspace */
+	/* ufile & ucontext owning this object */
+	struct ib_uverbs_file  *ufile;
+	/* FIXME, save memory: ufile->context == context */
 	struct ib_ucontext     *context;	/* associated user context */
 	void		       *object;		/* containing object */
 	struct list_head	list;		/* link to context's list */
-- 
cgit v1.2.3


From 6ef1c82821b2ae9bfa26fe65e6f0a66dfd79b7d7 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 11:32:08 +0300
Subject: IB/uverbs: Replace ib_ucontext with ib_uverbs_file in core function
 calls

The correct handle to refer to the idr/etc is ib_uverbs_file, revise all
the core APIs to use this instead. The user API are left as wrappers
that automatically convert a ucontext to a ufile for now.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c    | 66 +++++++++++++++++-----------------
 drivers/infiniband/core/rdma_core.h    | 21 +++++------
 drivers/infiniband/core/uverbs_ioctl.c |  4 +--
 drivers/infiniband/core/uverbs_main.c  | 15 ++++----
 include/rdma/uverbs_std_types.h        | 16 ++++-----
 include/rdma/uverbs_types.h            |  8 ++---
 6 files changed, 62 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 11c6f271be00..dcaf3813ee78 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -139,7 +139,7 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
 	struct ib_uobject *uobj;
 	int ret;
 
-	uobj = rdma_lookup_get_uobject(type, ufile->ucontext, id, true);
+	uobj = rdma_lookup_get_uobject(type, ufile, id, true);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -150,7 +150,7 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
 	return success_res;
 }
 
-static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
+static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 				     const struct uverbs_obj_type *type)
 {
 	struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
@@ -161,8 +161,8 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
 	 * user_handle should be filled by the handler,
 	 * The object is added to the list in the commit stage.
 	 */
-	uobj->ufile = context->ufile;
-	uobj->context = context;
+	uobj->ufile = ufile;
+	uobj->context = ufile->ucontext;
 	uobj->type = type;
 	/*
 	 * Allocated objects start out as write locked to deny any other
@@ -210,15 +210,15 @@ static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
 }
 
 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
-static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
-						 struct ib_ucontext *ucontext,
-						 int id, bool exclusive)
+static struct ib_uobject *
+lookup_get_idr_uobject(const struct uverbs_obj_type *type,
+		       struct ib_uverbs_file *ufile, int id, bool exclusive)
 {
 	struct ib_uobject *uobj;
 
 	rcu_read_lock();
 	/* object won't be released as we're protected in rcu */
-	uobj = idr_find(&ucontext->ufile->idr, id);
+	uobj = idr_find(&ufile->idr, id);
 	if (!uobj) {
 		uobj = ERR_PTR(-ENOENT);
 		goto free;
@@ -239,7 +239,7 @@ free:
 }
 
 static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
-						struct ib_ucontext *ucontext,
+						struct ib_uverbs_file *ufile,
 						int id, bool exclusive)
 {
 	struct file *f;
@@ -270,13 +270,13 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty
 }
 
 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
-					   struct ib_ucontext *ucontext,
-					   int id, bool exclusive)
+					   struct ib_uverbs_file *ufile, int id,
+					   bool exclusive)
 {
 	struct ib_uobject *uobj;
 	int ret;
 
-	uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+	uobj = type->type_class->lookup_get(type, ufile, id, exclusive);
 	if (IS_ERR(uobj))
 		return uobj;
 
@@ -300,12 +300,12 @@ free:
 }
 
 static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
-						  struct ib_ucontext *ucontext)
+						  struct ib_uverbs_file *ufile)
 {
 	int ret;
 	struct ib_uobject *uobj;
 
-	uobj = alloc_uobj(ucontext, type);
+	uobj = alloc_uobj(ufile, type);
 	if (IS_ERR(uobj))
 		return uobj;
 
@@ -313,7 +313,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
 	if (ret)
 		goto uobj_put;
 
-	ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+	ret = ib_rdmacg_try_charge(&uobj->cg_obj, ufile->ucontext->device,
 				   RDMACG_RESOURCE_HCA_OBJECT);
 	if (ret)
 		goto idr_remove;
@@ -328,7 +328,7 @@ uobj_put:
 }
 
 static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
-						 struct ib_ucontext *ucontext)
+						 struct ib_uverbs_file *ufile)
 {
 	const struct uverbs_obj_fd_type *fd_type =
 		container_of(type, struct uverbs_obj_fd_type, type);
@@ -341,7 +341,7 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 	if (new_fd < 0)
 		return ERR_PTR(new_fd);
 
-	uobj = alloc_uobj(ucontext, type);
+	uobj = alloc_uobj(ufile, type);
 	if (IS_ERR(uobj)) {
 		put_unused_fd(new_fd);
 		return uobj;
@@ -360,7 +360,7 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 
 	uobj_file->uobj.id = new_fd;
 	uobj_file->uobj.object = filp;
-	uobj_file->ufile = ucontext->ufile;
+	uobj_file->ufile = ufile;
 	INIT_LIST_HEAD(&uobj->list);
 	kref_get(&uobj_file->ufile->ref);
 
@@ -368,9 +368,9 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 }
 
 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
-					    struct ib_ucontext *ucontext)
+					    struct ib_uverbs_file *ufile)
 {
-	return type->type_class->alloc_begin(type, ucontext);
+	return type->type_class->alloc_begin(type, ufile);
 }
 
 static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
@@ -669,10 +669,9 @@ void uverbs_close_fd(struct file *f)
 	kref_put(uverbs_file_ref, ib_uverbs_release_file);
 }
 
-static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext,
-				    enum rdma_remove_reason reason)
+static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
+				  enum rdma_remove_reason reason)
 {
-	struct ib_uverbs_file *ufile = ucontext->ufile;
 	struct ib_uobject *obj, *next_obj;
 	int ret = -EINVAL;
 	int err = 0;
@@ -716,12 +715,11 @@ static int __uverbs_cleanup_ucontext(struct ib_ucontext *ucontext,
 	return ret;
 }
 
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed)
 {
 	enum rdma_remove_reason reason = device_removed ?
 					RDMA_REMOVE_DRIVER_REMOVE :
 					RDMA_REMOVE_CLOSE;
-	struct ib_uverbs_file *ufile = ucontext->ufile;
 
 	/*
 	 * Waits for all remove_commit and alloc_commit to finish. Logically, We
@@ -731,7 +729,7 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
 	down_write(&ufile->cleanup_rwsem);
 	ufile->ucontext->cleanup_retryable = true;
 	while (!list_empty(&ufile->uobjects))
-		if (__uverbs_cleanup_ucontext(ucontext, reason)) {
+		if (__uverbs_cleanup_ufile(ufile, reason)) {
 			/*
 			 * No entry was cleaned-up successfully during this
 			 * iteration
@@ -741,7 +739,7 @@ void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
 
 	ufile->ucontext->cleanup_retryable = false;
 	if (!list_empty(&ufile->uobjects))
-		__uverbs_cleanup_ucontext(ucontext, reason);
+		__uverbs_cleanup_ufile(ufile, reason);
 
 	up_write(&ufile->cleanup_rwsem);
 }
@@ -757,19 +755,19 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
 };
 EXPORT_SYMBOL(uverbs_fd_class);
 
-struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
-						   struct ib_ucontext *ucontext,
-						   enum uverbs_obj_access access,
-						   int id)
+struct ib_uobject *
+uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
+			     struct ib_uverbs_file *ufile,
+			     enum uverbs_obj_access access, int id)
 {
 	switch (access) {
 	case UVERBS_ACCESS_READ:
-		return rdma_lookup_get_uobject(type_attrs, ucontext, id, false);
+		return rdma_lookup_get_uobject(type_attrs, ufile, id, false);
 	case UVERBS_ACCESS_DESTROY:
 	case UVERBS_ACCESS_WRITE:
-		return rdma_lookup_get_uobject(type_attrs, ucontext, id, true);
+		return rdma_lookup_get_uobject(type_attrs, ufile, id, true);
 	case UVERBS_ACCESS_NEW:
-		return rdma_alloc_begin_uobject(type_attrs, ucontext);
+		return rdma_alloc_begin_uobject(type_attrs, ufile);
 	default:
 		WARN_ON(true);
 		return ERR_PTR(-EOPNOTSUPP);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index f7f157e78f8c..1bba60e960c1 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -48,14 +48,8 @@ const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
 						   uint16_t object);
 const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
 						   uint16_t method);
-/*
- * These functions initialize the context and cleanups its uobjects.
- * The context has a list of objects which is protected by a mutex
- * on the context. initialize_ucontext should be called when we create
- * a context.
- * cleanup_ucontext removes all uobjects from the context and puts them.
- */
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
+
+void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed);
 
 /*
  * uverbs_uobject_get is called in order to increase the reference count on
@@ -81,7 +75,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject);
 void uverbs_close_fd(struct file *f);
 
 /*
- * Get an ib_uobject that corresponds to the given id from ucontext, assuming
+ * Get an ib_uobject that corresponds to the given id from ufile, assuming
  * the object is from the given type. Lock it to the required access when
  * applicable.
  * This function could create (access == NEW), destroy (access == DESTROY)
@@ -89,10 +83,11 @@ void uverbs_close_fd(struct file *f);
  * The action will be finalized only when uverbs_finalize_object or
  * uverbs_finalize_objects are called.
  */
-struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
-						   struct ib_ucontext *ucontext,
-						   enum uverbs_obj_access access,
-						   int id);
+struct ib_uobject *
+uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
+			     struct ib_uverbs_file *ufile,
+			     enum uverbs_obj_access access, int id);
+
 /*
  * Note that certain finalize stages could return a status:
  *   (a) alloc_commit could return a failure if the object is committed at the
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 5b59c6f0feed..d3bf82cfaa2b 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -152,9 +152,9 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		if (!object)
 			return -EINVAL;
 
-		o_attr->uobject = uverbs_get_uobject_from_context(
+		o_attr->uobject = uverbs_get_uobject_from_file(
 					object->type_attrs,
-					ufile->ucontext,
+					ufile,
 					spec->u.obj.access,
 					(int)uattr->data);
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 82168b53e2ae..037c8975d9f0 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -227,12 +227,13 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
 	}
 }
 
-static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
-				      struct ib_ucontext *context,
-				      bool device_removed)
+static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file,
+				   bool device_removed)
 {
+	struct ib_ucontext *context = file->ucontext;
+
 	context->closing = 1;
-	uverbs_cleanup_ucontext(context, device_removed);
+	uverbs_cleanup_ufile(file, device_removed);
 	put_pid(context->tgid);
 
 	ib_rdmacg_uncharge(&context->cg_obj, context->device,
@@ -918,7 +919,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
 
 	mutex_lock(&file->cleanup_mutex);
 	if (file->ucontext) {
-		ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
+		ib_uverbs_cleanup_ufile(file, false);
 		file->ucontext = NULL;
 	}
 	mutex_unlock(&file->cleanup_mutex);
@@ -1176,7 +1177,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 		mutex_unlock(&file->cleanup_mutex);
 
 		/* At this point ib_uverbs_close cannot be running
-		 * ib_uverbs_cleanup_ucontext
+		 * ib_uverbs_cleanup_ufile
 		 */
 		if (ucontext) {
 			/* We must release the mutex before going ahead and
@@ -1188,7 +1189,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 			ib_uverbs_event_handler(&file->event_handler, &event);
 			ib_uverbs_disassociate_ucontext(ucontext);
 			mutex_lock(&file->cleanup_mutex);
-			ib_uverbs_cleanup_ucontext(file, ucontext, true);
+			ib_uverbs_cleanup_ufile(file, true);
 			mutex_unlock(&file->cleanup_mutex);
 		}
 
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 27c24453fc12..13b92020edd0 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -48,28 +48,28 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
 
 static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
 					    bool write,
-					    struct ib_ucontext *ucontext,
+					    struct ib_uverbs_file *ufile,
 					    int id)
 {
-	return rdma_lookup_get_uobject(type, ucontext, id, write);
+	return rdma_lookup_get_uobject(type, ufile, id, write);
 }
 
 #define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs
 
-#define uobj_get_read(_type, _id, _ucontext)				\
-	 __uobj_get(uobj_get_type(_type), false, _ucontext, _id)
+#define uobj_get_read(_type, _id, _ucontext)                                   \
+	__uobj_get(uobj_get_type(_type), false, (_ucontext)->ufile, _id)
 
 #define uobj_get_obj_read(_object, _type, _id, _ucontext)		\
 ({									\
 	struct ib_uobject *__uobj =					\
 		__uobj_get(uobj_get_type(_type),			\
-			   false, _ucontext, _id);			\
+			   false, (_ucontext)->ufile, _id);		\
 									\
 	(struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\
 })
 
-#define uobj_get_write(_type, _id, _ucontext)				\
-	 __uobj_get(uobj_get_type(_type), true, _ucontext, _id)
+#define uobj_get_write(_type, _id, _ucontext)                                  \
+	__uobj_get(uobj_get_type(_type), true, (_ucontext)->ufile, _id)
 
 int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
 			   struct ib_uverbs_file *ufile, int success_res);
@@ -107,7 +107,7 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
 static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type,
 					      struct ib_ucontext *ucontext)
 {
-	return rdma_alloc_begin_uobject(type, ucontext);
+	return rdma_alloc_begin_uobject(type, ucontext->ufile);
 }
 
 #define uobj_alloc(_type, ucontext)	\
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 175495d1b0b8..5290d8d34e9a 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -72,12 +72,12 @@ struct uverbs_obj_type_class {
 	 *		 reset flow).
 	 */
 	struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type,
-					  struct ib_ucontext *ucontext);
+					  struct ib_uverbs_file *ufile);
 	void (*alloc_commit)(struct ib_uobject *uobj);
 	void (*alloc_abort)(struct ib_uobject *uobj);
 
 	struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
-					 struct ib_ucontext *ucontext, int id,
+					 struct ib_uverbs_file *ufile, int id,
 					 bool exclusive);
 	void (*lookup_put)(struct ib_uobject *uobj, bool exclusive);
 	/*
@@ -120,11 +120,11 @@ struct uverbs_obj_idr_type {
 };
 
 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
-					   struct ib_ucontext *ucontext,
+					   struct ib_uverbs_file *ufile,
 					   int id, bool exclusive);
 void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive);
 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
-					    struct ib_ucontext *ucontext);
+					    struct ib_uverbs_file *ufile);
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
 int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj);
 int rdma_alloc_commit_uobject(struct ib_uobject *uobj);
-- 
cgit v1.2.3


From 2cc1e3b80942a7de7dce81f8a86c27a4ba3a695e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 11:32:09 +0300
Subject: IB/uverbs: Replace file->ucontext with file in uverbs_cmd.c

The ucontext isn't needed any more, just pass the uverbs_file directly.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 153 ++++++++++++++++-------------------
 include/rdma/uverbs_std_types.h      |  35 ++++----
 2 files changed, 90 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index aa84246c0bfe..ed61bd5b9c2b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -48,10 +48,10 @@
 #include "core_priv.h"
 
 static struct ib_uverbs_completion_event_file *
-ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
+ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile)
 {
 	struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
-						fd, context);
+						fd, ufile);
 	struct ib_uobject_file *uobj_file;
 
 	if (IS_ERR(uobj))
@@ -322,7 +322,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
                    in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                    out_len - sizeof(resp));
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_PD, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -509,8 +509,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 		}
 	}
 
-	obj  = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
-						    file->ucontext);
+	obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file);
 	if (IS_ERR(obj)) {
 		ret = PTR_ERR(obj);
 		goto err_tree_mutex_unlock;
@@ -652,11 +651,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_MR, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
 		goto err_free;
@@ -748,8 +747,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
 	     (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
 			return -EINVAL;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
-			       file->ucontext);
+	uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -767,7 +765,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
 	}
 
 	if (cmd.flags & IB_MR_REREG_PD) {
-		pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+		pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+				       file);
 		if (!pd) {
 			ret = -EINVAL;
 			goto put_uobjs;
@@ -840,11 +839,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof(cmd)))
 		return -EFAULT;
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_MW, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
 		goto err_free;
@@ -921,7 +920,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -966,14 +965,12 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	if (cmd->comp_vector >= file->device->num_comp_vectors)
 		return ERR_PTR(-EINVAL);
 
-	obj  = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
-						  file->ucontext);
+	obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file);
 	if (IS_ERR(obj))
 		return obj;
 
 	if (cmd->comp_channel >= 0) {
-		ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
-						     file->ucontext);
+		ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
 		if (IS_ERR(ev_file)) {
 			ret = PTR_ERR(ev_file);
 			goto err;
@@ -1155,7 +1152,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq)
 		return -EINVAL;
 
@@ -1220,7 +1217,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq)
 		return -EINVAL;
 
@@ -1267,7 +1264,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq)
 		return -EINVAL;
 
@@ -1294,8 +1291,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
-			       file->ucontext);
+	uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -1353,8 +1349,7 @@ static int create_qp(struct ib_uverbs_file *file,
 	if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
 		return -EPERM;
 
-	obj  = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
-						  file->ucontext);
+	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 	obj->uxrcd = NULL;
@@ -1364,9 +1359,9 @@ static int create_qp(struct ib_uverbs_file *file,
 	if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
 		      sizeof(cmd->rwq_ind_tbl_handle) &&
 		      (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
-		ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
-					    cmd->rwq_ind_tbl_handle,
-					    file->ucontext);
+		ind_tbl = uobj_get_obj_read(rwq_ind_table,
+					    UVERBS_OBJECT_RWQ_IND_TBL,
+					    cmd->rwq_ind_tbl_handle, file);
 		if (!ind_tbl) {
 			ret = -EINVAL;
 			goto err_put;
@@ -1392,7 +1387,7 @@ static int create_qp(struct ib_uverbs_file *file,
 
 	if (cmd->qp_type == IB_QPT_XRC_TGT) {
 		xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
-					  file->ucontext);
+					  file);
 
 		if (IS_ERR(xrcd_uobj)) {
 			ret = -EINVAL;
@@ -1411,8 +1406,8 @@ static int create_qp(struct ib_uverbs_file *file,
 			cmd->max_recv_sge = 0;
 		} else {
 			if (cmd->is_srq) {
-				srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
-							file->ucontext);
+				srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
+							cmd->srq_handle, file);
 				if (!srq || srq->srq_type == IB_SRQT_XRC) {
 					ret = -EINVAL;
 					goto err_put;
@@ -1421,8 +1416,9 @@ static int create_qp(struct ib_uverbs_file *file,
 
 			if (!ind_tbl) {
 				if (cmd->recv_cq_handle != cmd->send_cq_handle) {
-					rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
-								file->ucontext);
+					rcq = uobj_get_obj_read(
+						cq, UVERBS_OBJECT_CQ,
+						cmd->recv_cq_handle, file);
 					if (!rcq) {
 						ret = -EINVAL;
 						goto err_put;
@@ -1432,11 +1428,12 @@ static int create_qp(struct ib_uverbs_file *file,
 		}
 
 		if (has_sq)
-			scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
-						file->ucontext);
+			scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+						cmd->send_cq_handle, file);
 		if (!ind_tbl)
 			rcq = rcq ?: scq;
-		pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
+		pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
+				       file);
 		if (!pd || (!scq && has_sq)) {
 			ret = -EINVAL;
 			goto err_put;
@@ -1733,13 +1730,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	obj  = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
-						  file->ucontext);
+	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
-				  file->ucontext);
+	xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
 	if (IS_ERR(xrcd_uobj)) {
 		ret = -EINVAL;
 		goto err_put;
@@ -1841,7 +1836,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
 		goto out;
 	}
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp) {
 		ret = -EINVAL;
 		goto out;
@@ -1946,7 +1941,7 @@ static int modify_qp(struct ib_uverbs_file *file,
 	if (!attr)
 		return -ENOMEM;
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
 	if (!qp) {
 		ret = -EINVAL;
 		goto out;
@@ -2101,8 +2096,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 
 	memset(&resp, 0, sizeof resp);
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
-			       file->ucontext);
+	uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -2167,7 +2161,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 	if (!user_wr)
 		return -ENOMEM;
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp)
 		goto out;
 
@@ -2203,8 +2197,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 				goto out_put;
 			}
 
-			ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
-						   file->ucontext);
+			ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
+						   user_wr->wr.ud.ah, file);
 			if (!ud->ah) {
 				kfree(ud);
 				ret = -EINVAL;
@@ -2438,7 +2432,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 	if (IS_ERR(wr))
 		return PTR_ERR(wr);
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp)
 		goto out;
 
@@ -2487,7 +2481,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 	if (IS_ERR(wr))
 		return PTR_ERR(wr);
 
-	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (!srq)
 		goto out;
 
@@ -2544,11 +2538,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_AH, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
 		goto err;
@@ -2632,7 +2626,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp)
 		return -EINVAL;
 
@@ -2683,7 +2677,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp)
 		return -EINVAL;
 
@@ -2791,7 +2785,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
 	uflow_res->num++;
 }
 
-static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
 				       struct ib_uverbs_flow_spec *kern_spec,
 				       union ib_flow_spec *ib_spec,
 				       struct ib_uflow_resources *uflow_res)
@@ -2820,7 +2814,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
 		ib_spec->action.act = uobj_get_obj_read(flow_action,
 							UVERBS_OBJECT_FLOW_ACTION,
 							kern_spec->action.handle,
-							ucontext);
+							ufile);
 		if (!ib_spec->action.act)
 			return -EINVAL;
 		ib_spec->action.size =
@@ -2838,7 +2832,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
 			uobj_get_obj_read(counters,
 					  UVERBS_OBJECT_COUNTERS,
 					  kern_spec->flow_count.handle,
-					  ucontext);
+					  ufile);
 		if (!ib_spec->flow_count.counters)
 			return -EINVAL;
 		ib_spec->flow_count.size =
@@ -3019,7 +3013,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
 						     kern_filter_sz, ib_spec);
 }
 
-static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
+static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
 				struct ib_uverbs_flow_spec *kern_spec,
 				union ib_flow_spec *ib_spec,
 				struct ib_uflow_resources *uflow_res)
@@ -3028,7 +3022,7 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
 		return -EINVAL;
 
 	if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
-		return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
+		return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
 						   uflow_res);
 	else
 		return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
@@ -3071,18 +3065,17 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 	if (cmd.comp_mask)
 		return -EOPNOTSUPP;
 
-	obj  = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
-						  file->ucontext);
+	obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		err = -EINVAL;
 		goto err_uobj;
 	}
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq) {
 		err = -EINVAL;
 		goto err_put_pd;
@@ -3186,8 +3179,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
 		return -EOPNOTSUPP;
 
 	resp.response_length = required_resp_len;
-	uobj  = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
-			       file->ucontext);
+	uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -3237,7 +3229,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
 	if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
 		return -EINVAL;
 
-	wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
+	wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
 	if (!wq)
 		return -EINVAL;
 
@@ -3331,8 +3323,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 
 	for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
 			num_read_wqs++) {
-		wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
-				       file->ucontext);
+		wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
+				       wqs_handles[num_read_wqs], file);
 		if (!wq) {
 			err = -EINVAL;
 			goto put_wqs;
@@ -3341,7 +3333,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 		wqs[num_read_wqs] = wq;
 	}
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file);
 	if (IS_ERR(uobj)) {
 		err = PTR_ERR(uobj);
 		goto put_wqs;
@@ -3504,13 +3496,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		kern_flow_attr = &cmd.flow_attr;
 	}
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file);
 	if (IS_ERR(uobj)) {
 		err = PTR_ERR(uobj);
 		goto err_free_attr;
 	}
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp) {
 		err = -EINVAL;
 		goto err_uobj;
@@ -3546,8 +3538,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	     cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
 	     cmd.flow_attr.size >=
 	     ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
-		err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec,
-					   uflow_res);
+		err = kern_spec_to_ib_spec(file, kern_spec, ib_spec, uflow_res);
 		if (err)
 			goto err_free;
 
@@ -3644,8 +3635,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 	struct ib_srq_init_attr          attr;
 	int ret;
 
-	obj  = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
-						   file->ucontext);
+	obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -3654,7 +3644,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 
 	if (cmd->srq_type == IB_SRQT_XRC) {
 		xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
-					  file->ucontext);
+					  file);
 		if (IS_ERR(xrcd_uobj)) {
 			ret = -EINVAL;
 			goto err;
@@ -3671,15 +3661,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 	}
 
 	if (ib_srq_has_cq(cmd->srq_type)) {
-		attr.ext.cq  = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
-						 file->ucontext);
+		attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+						cmd->cq_handle, file);
 		if (!attr.ext.cq) {
 			ret = -EINVAL;
 			goto err_put_xrcd;
 		}
 	}
 
-	pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
 		goto err_put_cq;
@@ -3851,7 +3841,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
 		   out_len);
 
-	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (!srq)
 		return -EINVAL;
 
@@ -3882,7 +3872,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (!srq)
 		return -EINVAL;
 
@@ -3919,8 +3909,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
-			       file->ucontext);
+	uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -4098,7 +4087,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
 	if (cmd.attr_mask > IB_CQ_MODERATE)
 		return -EOPNOTSUPP;
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq)
 		return -EINVAL;
 
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 13b92020edd0..3e3f108f0912 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -56,20 +56,24 @@ static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
 
 #define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs
 
-#define uobj_get_read(_type, _id, _ucontext)                                   \
-	__uobj_get(uobj_get_type(_type), false, (_ucontext)->ufile, _id)
+#define uobj_get_read(_type, _id, _ufile)                                      \
+	__uobj_get(uobj_get_type(_type), false, _ufile, _id)
 
-#define uobj_get_obj_read(_object, _type, _id, _ucontext)		\
-({									\
-	struct ib_uobject *__uobj =					\
-		__uobj_get(uobj_get_type(_type),			\
-			   false, (_ucontext)->ufile, _id);		\
-									\
-	(struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\
-})
+static inline void *_uobj_get_obj_read(const struct uverbs_obj_type *type,
+				       int id, struct ib_uverbs_file *ufile)
+{
+	struct ib_uobject *uobj = __uobj_get(type, false, ufile, id);
+
+	if (IS_ERR(uobj))
+		return NULL;
+	return uobj->object;
+}
+#define uobj_get_obj_read(_object, _type, _id, _ufile)                         \
+	((struct ib_##_object *)_uobj_get_obj_read(uobj_get_type(_type), _id,  \
+						   _ufile))
 
-#define uobj_get_write(_type, _id, _ucontext)                                  \
-	__uobj_get(uobj_get_type(_type), true, (_ucontext)->ufile, _id)
+#define uobj_get_write(_type, _id, _ufile)                                     \
+	__uobj_get(uobj_get_type(_type), true, _ufile, _id)
 
 int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
 			   struct ib_uverbs_file *ufile, int success_res);
@@ -105,13 +109,12 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
 }
 
 static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type,
-					      struct ib_ucontext *ucontext)
+					      struct ib_uverbs_file *ufile)
 {
-	return rdma_alloc_begin_uobject(type, ucontext->ufile);
+	return rdma_alloc_begin_uobject(type, ufile);
 }
 
-#define uobj_alloc(_type, ucontext)	\
-	__uobj_alloc(uobj_get_type(_type), ucontext)
+#define uobj_alloc(_type, _ufile) __uobj_alloc(uobj_get_type(_type), _ufile)
 
 #endif
 
-- 
cgit v1.2.3


From d0259e82e7d214340aed33732e9a5ce448564921 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 11:32:11 +0300
Subject: IB/uverbs: Remove ib_uobject_file

The only purpose for this structure was to hold the ib_uobject_file
pointer, but now that is part of the standard ib_uobject the structure
no longer makes any sense, so get rid of it.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c           | 47 +++++++++++----------------
 drivers/infiniband/core/uverbs.h              |  2 +-
 drivers/infiniband/core/uverbs_cmd.c          |  8 ++---
 drivers/infiniband/core/uverbs_main.c         |  8 +++--
 drivers/infiniband/core/uverbs_std_types.c    |  6 ++--
 drivers/infiniband/core/uverbs_std_types_cq.c |  2 +-
 include/rdma/ib_verbs.h                       |  6 ----
 include/rdma/uverbs_types.h                   |  5 +--
 8 files changed, 35 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 98e55cb118ab..847c6a2f1346 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -334,7 +334,6 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 		container_of(type, struct uverbs_obj_fd_type, type);
 	int new_fd;
 	struct ib_uobject *uobj;
-	struct ib_uobject_file *uobj_file;
 	struct file *filp;
 
 	new_fd = get_unused_fd_flags(O_CLOEXEC);
@@ -347,10 +346,9 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 		return uobj;
 	}
 
-	uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
 	filp = anon_inode_getfile(fd_type->name,
 				  fd_type->fops,
-				  uobj_file,
+				  uobj,
 				  fd_type->flags);
 	if (IS_ERR(filp)) {
 		put_unused_fd(new_fd);
@@ -358,11 +356,11 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 		return (void *)filp;
 	}
 
-	uobj_file->uobj.id = new_fd;
-	uobj_file->uobj.object = filp;
-	uobj_file->ufile = ufile;
+	uobj->id = new_fd;
+	uobj->object = filp;
+	uobj->ufile = ufile;
 	INIT_LIST_HEAD(&uobj->list);
-	kref_get(&uobj_file->ufile->ref);
+	kref_get(&ufile->ref);
 
 	return uobj;
 }
@@ -398,10 +396,8 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
 
 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
 {
-	struct ib_uobject_file *uobj_file =
-		container_of(uobj, struct ib_uobject_file, uobj);
 	struct file *filp = uobj->object;
-	int id = uobj_file->uobj.id;
+	int id = uobj->id;
 
 	/* Unsuccessful NEW */
 	fput(filp);
@@ -413,9 +409,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
 {
 	const struct uverbs_obj_fd_type *fd_type =
 		container_of(uobj->type, struct uverbs_obj_fd_type, type);
-	struct ib_uobject_file *uobj_file =
-		container_of(uobj, struct ib_uobject_file, uobj);
-	int ret = fd_type->context_closed(uobj_file, why);
+	int ret = fd_type->context_closed(uobj, why);
 
 	if (ib_is_destroy_retryable(ret, why, uobj))
 		return ret;
@@ -425,7 +419,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
 		return ret;
 	}
 
-	uobj_file->uobj.context = NULL;
+	uobj->context = NULL;
 	return ret;
 }
 
@@ -530,14 +524,11 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
 
 static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
 {
-	struct ib_uobject_file *uobj_file =
-		container_of(uobj, struct ib_uobject_file, uobj);
-
-	fd_install(uobj_file->uobj.id, uobj->object);
+	fd_install(uobj->id, uobj->object);
 	/* This shouldn't be used anymore. Use the file object instead */
-	uobj_file->uobj.id = 0;
+	uobj->id = 0;
 	/* Get another reference as we export this to the fops */
-	uverbs_uobject_get(&uobj_file->uobj);
+	uverbs_uobject_get(uobj);
 }
 
 int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
@@ -638,19 +629,19 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
 };
 EXPORT_SYMBOL(uverbs_idr_class);
 
-static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
+static void _uverbs_close_fd(struct ib_uobject *uobj)
 {
-	struct ib_uverbs_file *ufile = uobj_file->ufile;
+	struct ib_uverbs_file *ufile = uobj->ufile;
 	int ret;
 
 	mutex_lock(&ufile->cleanup_mutex);
 
 	/* uobject was either already cleaned up or is cleaned up right now anyway */
-	if (!uobj_file->uobj.context ||
+	if (!uobj->context ||
 	    !down_read_trylock(&ufile->cleanup_rwsem))
 		goto unlock;
 
-	ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
+	ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_CLOSE);
 	up_read(&ufile->cleanup_rwsem);
 	if (ret)
 		pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
@@ -660,11 +651,11 @@ unlock:
 
 void uverbs_close_fd(struct file *f)
 {
-	struct ib_uobject_file *uobj_file = f->private_data;
-	struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+	struct ib_uobject *uobj = f->private_data;
+	struct kref *uverbs_file_ref = &uobj->ufile->ref;
 
-	_uverbs_close_fd(uobj_file);
-	uverbs_uobject_put(&uobj_file->uobj);
+	_uverbs_close_fd(uobj);
+	uverbs_uobject_put(uobj);
 	kref_put(uverbs_file_ref, ib_uverbs_release_file);
 }
 
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 8b0a8ec98ac8..cbb727f0959f 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -130,7 +130,7 @@ struct ib_uverbs_async_event_file {
 };
 
 struct ib_uverbs_completion_event_file {
-	struct ib_uobject_file			uobj_file;
+	struct ib_uobject			uobj;
 	struct ib_uverbs_event_queue		ev_queue;
 };
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ed61bd5b9c2b..bb2df271d3ff 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -52,7 +52,6 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile)
 {
 	struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
 						fd, ufile);
-	struct ib_uobject_file *uobj_file;
 
 	if (IS_ERR(uobj))
 		return (void *)uobj;
@@ -60,9 +59,8 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile)
 	uverbs_uobject_get(uobj);
 	uobj_put_read(uobj);
 
-	uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
-	return container_of(uobj_file, struct ib_uverbs_completion_event_file,
-			    uobj_file);
+	return container_of(uobj, struct ib_uverbs_completion_event_file,
+			    uobj);
 }
 
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -927,7 +925,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 	resp.fd = uobj->id;
 
 	ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
-			       uobj_file.uobj);
+			       uobj);
 	ib_uverbs_init_event_queue(&ev_file->ev_queue);
 
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5d64b9c481b9..e9a50ee0ba76 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -191,7 +191,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
 		}
 		spin_unlock_irq(&ev_file->ev_queue.lock);
 
-		uverbs_uobject_put(&ev_file->uobj_file.uobj);
+		uverbs_uobject_put(&ev_file->uobj);
 	}
 
 	spin_lock_irq(&file->async_file->ev_queue.lock);
@@ -346,7 +346,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
 		filp->private_data;
 
 	return ib_uverbs_event_read(&comp_ev_file->ev_queue,
-				    comp_ev_file->uobj_file.ufile, filp,
+				    comp_ev_file->uobj.ufile, filp,
 				    buf, count, pos,
 				    sizeof(struct ib_uverbs_comp_event_desc));
 }
@@ -428,7 +428,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
 
 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
 {
-	struct ib_uverbs_completion_event_file *file = filp->private_data;
+	struct ib_uobject *uobj = filp->private_data;
+	struct ib_uverbs_completion_event_file *file = container_of(
+		uobj, struct ib_uverbs_completion_event_file, uobj);
 	struct ib_uverbs_event *entry, *tmp;
 
 	spin_lock_irq(&file->ev_queue.lock);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 2f1a0b6598fe..912519fda3ba 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -190,12 +190,12 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
 	return 0;
 }
 
-static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
 						   enum rdma_remove_reason why)
 {
 	struct ib_uverbs_completion_event_file *comp_event_file =
-		container_of(uobj_file, struct ib_uverbs_completion_event_file,
-			     uobj_file);
+		container_of(uobj, struct ib_uverbs_completion_event_file,
+			     uobj);
 	struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
 
 	spin_lock_irq(&event_queue->lock);
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 32c75cdd162f..9d39a9e1e411 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -93,7 +93,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 	if (!IS_ERR(ev_file_uobj)) {
 		ev_file = container_of(ev_file_uobj,
 				       struct ib_uverbs_completion_event_file,
-				       uobj_file.uobj);
+				       uobj);
 		uverbs_uobject_get(ev_file_uobj);
 	}
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9c04cb5e4041..031d121190fd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1540,12 +1540,6 @@ struct ib_uobject {
 	const struct uverbs_obj_type *type;
 };
 
-struct ib_uobject_file {
-	struct ib_uobject	uobj;
-	/* ufile contains the lock between context release and file close */
-	struct ib_uverbs_file	*ufile;
-};
-
 struct ib_udata {
 	const void __user *inbuf;
 	void __user *outbuf;
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 5290d8d34e9a..e2fc9db466d3 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -139,7 +139,7 @@ struct uverbs_obj_fd_type {
 	 * the driver is removed or the process terminated.
 	 */
 	struct uverbs_obj_type  type;
-	int (*context_closed)(struct ib_uobject_file *uobj_file,
+	int (*context_closed)(struct ib_uobject *uobj,
 			      enum rdma_remove_reason why);
 	const struct file_operations	*fops;
 	const char			*name;
@@ -156,7 +156,8 @@ extern const struct uverbs_obj_type_class uverbs_fd_class;
 	 {.type = {							\
 		.type_class = &uverbs_fd_class,				\
 		.obj_size = (_obj_size) +				\
-			UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \
+			UVERBS_BUILD_BUG_ON((_obj_size) <               \
+					    sizeof(struct ib_uobject)), \
 	 },								\
 	 .context_closed = _context_closed,				\
 	 .fops = _fops,							\
-- 
cgit v1.2.3


From d8095f94e19581057bcad35b8a725aa739e77595 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 2 Jul 2018 15:08:10 +0200
Subject: dmaengine: add support for reporting pause and resume separately

'cmd_pause' DMA channel capability means that respective DMA engine
supports both pausing and resuming given DMA channel. However, in some
cases it is important to know if DMA channel can be paused without the
need to resume it. This is a typical requirement for proper residue
reading on transfer timeout in UART drivers. There are also some DMA
engines with limited hardware, which doesn't really support resuming.

Reporting pause and resume capabilities separately allows UART drivers to
properly check for the really required capabilities and operate in DMA
mode also in systems with limited DMA hardware. On the other hand drivers,
which rely on full channel suspend/resume support, should now check for
both 'pause' and 'resume' features.

Existing clients of dma_get_slave_caps() have been checked and the only
driver which rely on proper channel resuming is soc-generic-dmaengine-pcm
driver, which has been updated to check the newly added capability.
Existing 'cmd_pause' now only indicates that DMA engine support pausing
given DMA channel.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmaengine.c               | 8 ++------
 include/linux/dmaengine.h             | 5 ++++-
 sound/soc/soc-generic-dmaengine-pcm.c | 2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 08ba8473a284..84ac38dbdb65 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -500,12 +500,8 @@ int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
 	caps->max_burst = device->max_burst;
 	caps->residue_granularity = device->residue_granularity;
 	caps->descriptor_reuse = device->descriptor_reuse;
-
-	/*
-	 * Some devices implement only pause (e.g. to get residuum) but no
-	 * resume. However cmd_pause is advertised as pause AND resume.
-	 */
-	caps->cmd_pause = !!(device->device_pause && device->device_resume);
+	caps->cmd_pause = !!device->device_pause;
+	caps->cmd_resume = !!device->device_resume;
 	caps->cmd_terminate = !!device->device_terminate_all;
 
 	return 0;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 861be5cab1df..c8c3a7a93802 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -415,7 +415,9 @@ enum dma_residue_granularity {
  *	each type, the dma controller should set BIT(<TYPE>) and same
  *	should be checked by controller as well
  * @max_burst: max burst capability per-transfer
- * @cmd_pause: true, if pause and thereby resume is supported
+ * @cmd_pause: true, if pause is supported (i.e. for reading residue or
+ *	       for resume later)
+ * @cmd_resume: true, if resume is supported
  * @cmd_terminate: true, if terminate cmd is supported
  * @residue_granularity: granularity of the reported transfer residue
  * @descriptor_reuse: if a descriptor can be reused by client and
@@ -427,6 +429,7 @@ struct dma_slave_caps {
 	u32 directions;
 	u32 max_burst;
 	bool cmd_pause;
+	bool cmd_resume;
 	bool cmd_terminate;
 	enum dma_residue_granularity residue_granularity;
 	bool descriptor_reuse;
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index 56a541b9ff9e..76c46d793843 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -156,7 +156,7 @@ static int dmaengine_pcm_set_runtime_hwparams(struct snd_pcm_substream *substrea
 
 	ret = dma_get_slave_caps(chan, &dma_caps);
 	if (ret == 0) {
-		if (dma_caps.cmd_pause)
+		if (dma_caps.cmd_pause && dma_caps.cmd_resume)
 			hw.info |= SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME;
 		if (dma_caps.residue_granularity <= DMA_RESIDUE_GRANULARITY_SEGMENT)
 			hw.info |= SNDRV_PCM_INFO_BATCH;
-- 
cgit v1.2.3


From ffcfe25bb50f27395e15fa999f1a7eb769f55360 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:19:38 -0400
Subject: net: Add support for subordinate device traffic classes

This patch is meant to provide the basic tools needed to allow us to create
subordinate device traffic classes. The general idea here is to allow
subdividing the queues of a device into queue groups accessible through an
upper device such as a macvlan.

The idea here is to enforce the idea that an upper device has to be a
single queue device, ideally with IFF_NO_QUQUE set. With that being the
case we can pretty much guarantee that the tc_to_txq mappings and XPS maps
for the upper device are unused. As such we could reuse those in order to
support subdividing the lower device and distributing those queues between
the subordinate devices.

In order to distinguish between a regular set of traffic classes and if a
device is carrying subordinate traffic classes I changed num_tc from a u8
to a s16 value and use the negative values to represent the subordinate
pool values. So starting at -1 and running to -32768 we can encode those as
pool values, and the existing values of 0 to 15 can be maintained.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/netdevice.h | 16 ++++++++-
 net/core/dev.c            | 89 +++++++++++++++++++++++++++++++++++++++++++++++
 net/core/net-sysfs.c      | 21 ++++++++++-
 3 files changed, 124 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b683971e500d..b1ff77276bc4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -575,6 +575,9 @@ struct netdev_queue {
 	 * (/sys/class/net/DEV/Q/trans_timeout)
 	 */
 	unsigned long		trans_timeout;
+
+	/* Subordinate device that the queue has been assigned to */
+	struct net_device	*sb_dev;
 /*
  * write-mostly part
  */
@@ -1991,7 +1994,7 @@ struct net_device {
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
-	u8			num_tc;
+	s16			num_tc;
 	struct netdev_tc_txq	tc_to_txq[TC_MAX_QUEUE];
 	u8			prio_tc_map[TC_BITMASK + 1];
 
@@ -2045,6 +2048,17 @@ int netdev_get_num_tc(struct net_device *dev)
 	return dev->num_tc;
 }
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+			      struct net_device *sb_dev);
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+				 struct net_device *sb_dev,
+				 u8 tc, u16 count, u16 offset);
+int netdev_set_sb_channel(struct net_device *dev, u16 channel);
+static inline int netdev_get_sb_channel(struct net_device *dev)
+{
+	return max_t(int, -dev->num_tc, 0);
+}
+
 static inline
 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
 					 unsigned int index)
diff --git a/net/core/dev.c b/net/core/dev.c
index 89825c1eccdc..cc1d6bba017a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2067,11 +2067,13 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 		struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
 		int i;
 
+		/* walk through the TCs and see if it falls into any of them */
 		for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
 			if ((txq - tc->offset) < tc->count)
 				return i;
 		}
 
+		/* didn't find it, just return -1 to indicate no match */
 		return -1;
 	}
 
@@ -2260,7 +2262,14 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	unsigned int nr_ids;
 
 	if (dev->num_tc) {
+		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
+		if (num_tc < 0)
+			return -EINVAL;
+
+		/* If queue belongs to subordinate dev use its map */
+		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 		tc = netdev_txq_to_tc(dev, index);
 		if (tc < 0)
 			return -EINVAL;
@@ -2448,11 +2457,25 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
+static void netdev_unbind_all_sb_channels(struct net_device *dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+	/* Unbind any subordinate channels */
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev)
+			netdev_unbind_sb_channel(dev, txq->sb_dev);
+	}
+}
+
 void netdev_reset_tc(struct net_device *dev)
 {
 #ifdef CONFIG_XPS
 	netif_reset_xps_queues_gt(dev, 0);
 #endif
+	netdev_unbind_all_sb_channels(dev);
+
+	/* Reset TC configuration of device */
 	dev->num_tc = 0;
 	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
 	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
@@ -2481,11 +2504,77 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
 #ifdef CONFIG_XPS
 	netif_reset_xps_queues_gt(dev, 0);
 #endif
+	netdev_unbind_all_sb_channels(dev);
+
 	dev->num_tc = num_tc;
 	return 0;
 }
 EXPORT_SYMBOL(netdev_set_num_tc);
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+			      struct net_device *sb_dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+#ifdef CONFIG_XPS
+	netif_reset_xps_queues_gt(sb_dev, 0);
+#endif
+	memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
+	memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
+
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev == sb_dev)
+			txq->sb_dev = NULL;
+	}
+}
+EXPORT_SYMBOL(netdev_unbind_sb_channel);
+
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+				 struct net_device *sb_dev,
+				 u8 tc, u16 count, u16 offset)
+{
+	/* Make certain the sb_dev and dev are already configured */
+	if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
+		return -EINVAL;
+
+	/* We cannot hand out queues we don't have */
+	if ((offset + count) > dev->real_num_tx_queues)
+		return -EINVAL;
+
+	/* Record the mapping */
+	sb_dev->tc_to_txq[tc].count = count;
+	sb_dev->tc_to_txq[tc].offset = offset;
+
+	/* Provide a way for Tx queue to find the tc_to_txq map or
+	 * XPS map for itself.
+	 */
+	while (count--)
+		netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
+
+int netdev_set_sb_channel(struct net_device *dev, u16 channel)
+{
+	/* Do not use a multiqueue device to represent a subordinate channel */
+	if (netif_is_multiqueue(dev))
+		return -ENODEV;
+
+	/* We allow channels 1 - 32767 to be used for subordinate channels.
+	 * Channel 0 is meant to be "native" mode and used only to represent
+	 * the main root device. We allow writing 0 to reset the device back
+	 * to normal mode after being used as a subordinate channel.
+	 */
+	if (channel > S16_MAX)
+		return -EINVAL;
+
+	dev->num_tc = -channel;
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_sb_channel);
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index dce3ae0fbca2..ffa1d18f2c2c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1054,11 +1054,23 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
 		return -ENOENT;
 
 	index = get_netdev_queue_index(queue);
+
+	/* If queue belongs to subordinate dev use its TC mapping */
+	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 	tc = netdev_txq_to_tc(dev, index);
 	if (tc < 0)
 		return -EINVAL;
 
-	return sprintf(buf, "%u\n", tc);
+	/* We can report the traffic class one of two ways:
+	 * Subordinate device traffic classes are reported with the traffic
+	 * class first, and then the subordinate class so for example TC0 on
+	 * subordinate device 2 will be reported as "0-2". If the queue
+	 * belongs to the root device it will be reported with just the
+	 * traffic class, so just "0" for TC 0 for example.
+	 */
+	return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
+				 sprintf(buf, "%u\n", tc);
 }
 
 #ifdef CONFIG_XPS
@@ -1225,7 +1237,14 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	index = get_netdev_queue_index(queue);
 
 	if (dev->num_tc) {
+		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
+		if (num_tc < 0)
+			return -EINVAL;
+
+		/* If queue belongs to subordinate dev use its map */
+		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 		tc = netdev_txq_to_tc(dev, index);
 		if (tc < 0)
 			return -EINVAL;
-- 
cgit v1.2.3


From eadec877ce9ca46a94e9036b5a44e7941d4fc501 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:19:48 -0400
Subject: net: Add support for subordinate traffic classes to netdev_pick_tx

This change makes it so that we can support the concept of subordinate
device traffic classes to the core networking code. In doing this we can
start pulling out the driver specific bits needed to support selecting a
queue based on an upper device.

The solution at is currently stands is only partially implemented. I have
the start of some XPS bits in here, but I would still need to allow for
configuration of the XPS maps on the queues reserved for the subordinate
devices. For now I am using the reference to the sb_dev XPS map as just a
way to skip the lookup of the lower device XPS map for now as that would
result in the wrong queue being picked.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 19 +++------
 drivers/net/macvlan.c                         | 10 +----
 include/linux/netdevice.h                     |  4 +-
 net/core/dev.c                                | 58 ++++++++++++++++-----------
 4 files changed, 45 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 80225af2acb1..abb176df2e7f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8208,20 +8208,17 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 					      input, common, ring->queue_index);
 }
 
+#ifdef IXGBE_FCOE
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 			      void *accel_priv, select_queue_fallback_t fallback)
 {
-	struct ixgbe_fwd_adapter *fwd_adapter = accel_priv;
-#ifdef IXGBE_FCOE
 	struct ixgbe_adapter *adapter;
 	struct ixgbe_ring_feature *f;
-#endif
 	int txq;
 
-	if (fwd_adapter) {
-		u8 tc = netdev_get_num_tc(dev) ?
-			netdev_get_prio_tc_map(dev, skb->priority) : 0;
-		struct net_device *vdev = fwd_adapter->netdev;
+	if (accel_priv) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		struct net_device *vdev = accel_priv;
 
 		txq = vdev->tc_to_txq[tc].offset;
 		txq += reciprocal_scale(skb_get_hash(skb),
@@ -8230,8 +8227,6 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 		return txq;
 	}
 
-#ifdef IXGBE_FCOE
-
 	/*
 	 * only execute the code below if protocol is FCoE
 	 * or FIP and we have FCoE enabled on the adapter
@@ -8257,11 +8252,9 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 		txq -= f->indices;
 
 	return txq + f->offset;
-#else
-	return fallback(dev, skb);
-#endif
 }
 
+#endif
 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 			       struct xdp_frame *xdpf)
 {
@@ -10058,7 +10051,6 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
 	.ndo_start_xmit		= ixgbe_xmit_frame,
-	.ndo_select_queue	= ixgbe_select_queue,
 	.ndo_set_rx_mode	= ixgbe_set_rx_mode,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= ixgbe_set_mac,
@@ -10081,6 +10073,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_poll_controller	= ixgbe_netpoll,
 #endif
 #ifdef IXGBE_FCOE
+	.ndo_select_queue	= ixgbe_select_queue,
 	.ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get,
 	.ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target,
 	.ndo_fcoe_ddp_done = ixgbe_fcoe_ddp_put,
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index adde8fc45588..401e1d1ce1ec 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -514,7 +514,6 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 	const struct macvlan_dev *vlan = netdev_priv(dev);
 	const struct macvlan_port *port = vlan->port;
 	const struct macvlan_dev *dest;
-	void *accel_priv = NULL;
 
 	if (vlan->mode == MACVLAN_MODE_BRIDGE) {
 		const struct ethhdr *eth = (void *)skb->data;
@@ -533,15 +532,10 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NET_XMIT_SUCCESS;
 		}
 	}
-
-	/* For packets that are non-multicast and not bridged we will pass
-	 * the necessary information so that the lowerdev can distinguish
-	 * the source of the packets via the accel_priv value.
-	 */
-	accel_priv = vlan->accel_priv;
 xmit_world:
 	skb->dev = vlan->lowerdev;
-	return dev_queue_xmit_accel(skb, accel_priv);
+	return dev_queue_xmit_accel(skb,
+				    netdev_get_sb_channel(dev) ? dev : NULL);
 }
 
 static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b1ff77276bc4..fda0bcda7a42 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2103,7 +2103,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
-				    void *accel_priv);
+				    struct net_device *sb_dev);
 
 /* returns the headroom that the master device needs to take in account
  * when forwarding to this dev
@@ -2568,7 +2568,7 @@ void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
diff --git a/net/core/dev.c b/net/core/dev.c
index cc1d6bba017a..09a7cc2f3c55 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2786,24 +2786,26 @@ EXPORT_SYMBOL(netif_device_attach);
  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
  * to be used as a distribution range.
  */
-static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
+static u16 skb_tx_hash(const struct net_device *dev,
+		       const struct net_device *sb_dev,
+		       struct sk_buff *skb)
 {
 	u32 hash;
 	u16 qoffset = 0;
 	u16 qcount = dev->real_num_tx_queues;
 
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+
+		qoffset = sb_dev->tc_to_txq[tc].offset;
+		qcount = sb_dev->tc_to_txq[tc].count;
+	}
+
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
 		while (unlikely(hash >= qcount))
 			hash -= qcount;
-		return hash;
-	}
-
-	if (dev->num_tc) {
-		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-
-		qoffset = dev->tc_to_txq[tc].offset;
-		qcount = dev->tc_to_txq[tc].count;
+		return hash + qoffset;
 	}
 
 	return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
@@ -3573,7 +3575,8 @@ static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
 }
 #endif
 
-static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
+			 struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
@@ -3587,7 +3590,7 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	if (!static_key_false(&xps_rxqs_needed))
 		goto get_cpus_map;
 
-	dev_maps = rcu_dereference(dev->xps_rxqs_map);
+	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
 	if (dev_maps) {
 		int tci = sk_rx_queue_get(sk);
 
@@ -3598,7 +3601,7 @@ static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 
 get_cpus_map:
 	if (queue_index < 0) {
-		dev_maps = rcu_dereference(dev->xps_cpus_map);
+		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
 		if (dev_maps) {
 			unsigned int tci = skb->sender_cpu - 1;
 
@@ -3614,17 +3617,20 @@ get_cpus_map:
 #endif
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+			     struct net_device *sb_dev)
 {
 	struct sock *sk = skb->sk;
 	int queue_index = sk_tx_queue_get(sk);
 
+	sb_dev = sb_dev ? : dev;
+
 	if (queue_index < 0 || skb->ooo_okay ||
 	    queue_index >= dev->real_num_tx_queues) {
-		int new_index = get_xps_queue(dev, skb);
+		int new_index = get_xps_queue(dev, sb_dev, skb);
 
 		if (new_index < 0)
-			new_index = skb_tx_hash(dev, skb);
+			new_index = skb_tx_hash(dev, sb_dev, skb);
 
 		if (queue_index != new_index && sk &&
 		    sk_fullsock(sk) &&
@@ -3637,9 +3643,15 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 	return queue_index;
 }
 
+static u16 __netdev_pick_tx(struct net_device *dev,
+			    struct sk_buff *skb)
+{
+	return ___netdev_pick_tx(dev, skb, NULL);
+}
+
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
-				    void *accel_priv)
+				    struct net_device *sb_dev)
 {
 	int queue_index = 0;
 
@@ -3654,10 +3666,10 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue)
-			queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+			queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
 							    __netdev_pick_tx);
 		else
-			queue_index = __netdev_pick_tx(dev, skb);
+			queue_index = ___netdev_pick_tx(dev, skb, sb_dev);
 
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	}
@@ -3669,7 +3681,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 /**
  *	__dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
- *	@accel_priv: private data used for L2 forwarding offload
+ *	@sb_dev: suboordinate device used for L2 forwarding offload
  *
  *	Queue a buffer for transmission to a network device. The caller must
  *	have set the device and priority and built the buffer before calling
@@ -3692,7 +3704,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
@@ -3731,7 +3743,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 	else
 		skb_dst_force(skb);
 
-	txq = netdev_pick_tx(dev, skb, accel_priv);
+	txq = netdev_pick_tx(dev, skb, sb_dev);
 	q = rcu_dereference_bh(txq->qdisc);
 
 	trace_net_dev_queue(skb);
@@ -3805,9 +3817,9 @@ int dev_queue_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_queue_xmit);
 
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
 {
-	return __dev_queue_xmit(skb, accel_priv);
+	return __dev_queue_xmit(skb, sb_dev);
 }
 EXPORT_SYMBOL(dev_queue_xmit_accel);
 
-- 
cgit v1.2.3


From a4ea8a3dacc312c3402c78f6e4843afdda9b43a0 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:19:54 -0400
Subject: net: Add generic ndo_select_queue functions

This patch adds a generic version of the ndo_select_queue functions for
either returning 0 or selecting a queue based on the processor ID. This is
generally meant to just reduce the number of functions we have to change
in the future when we have to deal with ndo_select_queue changes.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/lantiq_etop.c   | 10 +---------
 drivers/net/ethernet/ti/netcp_core.c |  9 +--------
 drivers/staging/netlogic/xlr_net.c   |  9 +--------
 include/linux/netdevice.h            |  4 ++++
 net/core/dev.c                       | 14 ++++++++++++++
 net/packet/af_packet.c               |  2 +-
 6 files changed, 22 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index afc810069440..7a637b51c7d2 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -563,14 +563,6 @@ ltq_etop_set_multicast_list(struct net_device *dev)
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static u16
-ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb,
-		      void *accel_priv, select_queue_fallback_t fallback)
-{
-	/* we are currently only using the first queue */
-	return 0;
-}
-
 static int
 ltq_etop_init(struct net_device *dev)
 {
@@ -641,7 +633,7 @@ static const struct net_device_ops ltq_eth_netdev_ops = {
 	.ndo_set_mac_address = ltq_etop_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_set_rx_mode = ltq_etop_set_multicast_list,
-	.ndo_select_queue = ltq_etop_select_queue,
+	.ndo_select_queue = dev_pick_tx_zero,
 	.ndo_init = ltq_etop_init,
 	.ndo_tx_timeout = ltq_etop_tx_timeout,
 };
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 6ebf110cd594..a1d335a3c5e4 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1889,13 +1889,6 @@ static int netcp_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid)
 	return err;
 }
 
-static u16 netcp_select_queue(struct net_device *dev, struct sk_buff *skb,
-			      void *accel_priv,
-			      select_queue_fallback_t fallback)
-{
-	return 0;
-}
-
 static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
@@ -1972,7 +1965,7 @@ static const struct net_device_ops netcp_netdev_ops = {
 	.ndo_vlan_rx_add_vid	= netcp_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= netcp_rx_kill_vid,
 	.ndo_tx_timeout		= netcp_ndo_tx_timeout,
-	.ndo_select_queue	= netcp_select_queue,
+	.ndo_select_queue	= dev_pick_tx_zero,
 	.ndo_setup_tc		= netcp_setup_tc,
 };
 
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index e461168313bf..4e6611e4c59b 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c
@@ -290,13 +290,6 @@ static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
-static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb,
-				void *accel_priv,
-				select_queue_fallback_t fallback)
-{
-	return (u16)smp_processor_id();
-}
-
 static void xlr_hw_set_mac_addr(struct net_device *ndev)
 {
 	struct xlr_net_priv *priv = netdev_priv(ndev);
@@ -403,7 +396,7 @@ static const struct net_device_ops xlr_netdev_ops = {
 	.ndo_open = xlr_net_open,
 	.ndo_stop = xlr_net_stop,
 	.ndo_start_xmit = xlr_net_start_xmit,
-	.ndo_select_queue = xlr_net_select_queue,
+	.ndo_select_queue = dev_pick_tx_cpu_id,
 	.ndo_set_mac_address = xlr_net_set_mac_addr,
 	.ndo_set_rx_mode = xlr_set_rx_mode,
 	.ndo_get_stats64 = xlr_stats,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fda0bcda7a42..46f4c44ce3e4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2567,6 +2567,10 @@ void dev_close(struct net_device *dev);
 void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+		     void *accel_priv, select_queue_fallback_t fallback);
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+		       void *accel_priv, select_queue_fallback_t fallback);
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
diff --git a/net/core/dev.c b/net/core/dev.c
index 09a7cc2f3c55..b5e538032d5e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3617,6 +3617,20 @@ get_cpus_map:
 #endif
 }
 
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+		     void *accel_priv, select_queue_fallback_t fallback)
+{
+	return 0;
+}
+EXPORT_SYMBOL(dev_pick_tx_zero);
+
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+		       void *accel_priv, select_queue_fallback_t fallback)
+{
+	return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
+}
+EXPORT_SYMBOL(dev_pick_tx_cpu_id);
+
 static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
 			     struct net_device *sb_dev)
 {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 47931ebfaef3..f37d087ae652 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -277,7 +277,7 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
 
 static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
 {
-	return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
+	return dev_pick_tx_cpu_id(dev, skb, NULL, NULL);
 }
 
 static u16 packet_pick_tx_queue(struct sk_buff *skb)
-- 
cgit v1.2.3


From 4f49dec9075aa0277b8c9c657ec31e6361f88724 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:19:59 -0400
Subject: net: allow ndo_select_queue to pass netdev

This patch makes it so that instead of passing a void pointer as the
accel_priv we instead pass a net_device pointer as sb_dev. Making this
change allows us to pass the subordinate device through to the fallback
function eventually so that we can keep the actual code in the
ndo_select_queue call as focused on possible on the exception cases.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/infiniband/hw/hfi1/vnic_main.c            |  2 +-
 drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c |  4 ++--
 drivers/net/bonding/bond_main.c                   |  3 ++-
 drivers/net/ethernet/amazon/ena/ena_netdev.c      |  3 ++-
 drivers/net/ethernet/broadcom/bcmsysport.c        |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   |  3 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h   |  3 ++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c   |  3 ++-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c     |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |  7 ++++---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c        |  3 ++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h      |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   |  3 ++-
 drivers/net/ethernet/renesas/ravb_main.c          |  3 ++-
 drivers/net/ethernet/sun/ldmvsw.c                 |  3 ++-
 drivers/net/ethernet/sun/sunvnet.c                |  3 ++-
 drivers/net/hyperv/netvsc_drv.c                   |  4 ++--
 drivers/net/net_failover.c                        |  5 +++--
 drivers/net/team/team.c                           |  3 ++-
 drivers/net/tun.c                                 |  3 ++-
 drivers/net/wireless/marvell/mwifiex/main.c       |  3 ++-
 drivers/net/xen-netback/interface.c               |  2 +-
 drivers/net/xen-netfront.c                        |  3 ++-
 drivers/staging/rtl8188eu/os_dep/os_intfs.c       |  3 ++-
 drivers/staging/rtl8723bs/os_dep/os_intfs.c       |  7 +++----
 include/linux/netdevice.h                         | 11 +++++++----
 net/core/dev.c                                    |  6 ++++--
 net/mac80211/iface.c                              |  4 ++--
 29 files changed, 66 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 5d65582fe4d9..616fc9b6fad8 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -423,7 +423,7 @@ tx_finish:
 
 static u16 hfi1_vnic_select_queue(struct net_device *netdev,
 				  struct sk_buff *skb,
-				  void *accel_priv,
+				  struct net_device *sb_dev,
 				  select_queue_fallback_t fallback)
 {
 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
index 0c8aec62a425..61558788b3fa 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -95,7 +95,7 @@ static netdev_tx_t opa_netdev_start_xmit(struct sk_buff *skb,
 }
 
 static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
-				 void *accel_priv,
+				 struct net_device *sb_dev,
 				 select_queue_fallback_t fallback)
 {
 	struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
@@ -107,7 +107,7 @@ static u16 opa_vnic_select_queue(struct net_device *netdev, struct sk_buff *skb,
 	mdata->entropy = opa_vnic_calc_entropy(skb);
 	mdata->vl = opa_vnic_get_vl(adapter, skb);
 	rc = adapter->rn_ops->ndo_select_queue(netdev, skb,
-					       accel_priv, fallback);
+					       sb_dev, fallback);
 	skb_pull(skb, sizeof(*mdata));
 	return rc;
 }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 63e3844c5bec..9a2ea3c1f949 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4094,7 +4094,8 @@ static inline int bond_slave_override(struct bonding *bond,
 
 
 static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/* This helper function exists to help dev_pick_tx get the correct
 	 * destination queue.  Using a helper function skips a call to
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index f2af87d70594..e3befb1f9204 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2213,7 +2213,8 @@ static void ena_netpoll(struct net_device *netdev)
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	u16 qid;
 	/* we suspect that this is good for in--kernel network services that
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index d5fca2e5a9bc..32f548e6431d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2107,7 +2107,7 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = {
 };
 
 static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
-				    void *accel_priv,
+				    struct net_device *sb_dev,
 				    select_queue_fallback_t fallback)
 {
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index af7b5a4d8ba0..e4e1cf907ac6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1910,7 +1910,8 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
 }
 
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback)
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index a8ce5c55bbb0..0e508e5defce 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -497,7 +497,8 @@ int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback);
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 
 static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 					struct bnx2x_fastpath *fp,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0d91716a2566..5dc5e5604f05 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -930,7 +930,8 @@ freeout:
 }
 
 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	int txq;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index ef9ef703d13a..ff7a74ec8f11 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2022,7 +2022,8 @@ static void hns_nic_get_stats64(struct net_device *ndev,
 
 static u16
 hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
-		     void *accel_priv, select_queue_fallback_t fallback)
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback)
 {
 	struct ethhdr *eth_hdr = (struct ethhdr *)skb->data;
 	struct hns_nic_priv *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index abb176df2e7f..8c7a68c57afa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8210,15 +8210,16 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 
 #ifdef IXGBE_FCOE
 static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
-			      void *accel_priv, select_queue_fallback_t fallback)
+			      struct net_device *sb_dev,
+			      select_queue_fallback_t fallback)
 {
 	struct ixgbe_adapter *adapter;
 	struct ixgbe_ring_feature *f;
 	int txq;
 
-	if (accel_priv) {
+	if (sb_dev) {
 		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-		struct net_device *vdev = accel_priv;
+		struct net_device *vdev = sb_dev;
 
 		txq = vdev->tc_to_txq[tc].offset;
 		txq += reciprocal_scale(skb_get_hash(skb),
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 0227786308af..df2996618cd1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -688,7 +688,8 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
 }
 
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 void *accel_priv, select_queue_fallback_t fallback)
+			 struct net_device *sb_dev,
+			 select_queue_fallback_t fallback)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	u16 rings_p_up = priv->num_tx_rings_p_up;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index ace6545f82e6..c3228b89df46 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -699,7 +699,8 @@ void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
-			 void *accel_priv, select_queue_fallback_t fallback);
+			 struct net_device *sb_dev,
+			 select_queue_fallback_t fallback);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 			       struct mlx4_en_rx_alloc *frame,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e2b7586ed7a0..e1b237ccdf56 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -865,7 +865,8 @@ struct mlx5e_profile {
 void mlx5e_build_ptys2ethtool_map(void);
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback);
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			  struct mlx5e_tx_wqe *wqe, u16 pi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index f0739dae7b56..dfcc3710b65f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -111,7 +111,8 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb
 #endif
 
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback)
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	int channel_ix = fallback(dev, skb);
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 68f122140966..4a7f54c8e7aa 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1656,7 +1656,8 @@ drop:
 }
 
 static u16 ravb_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/* If skb needs TX timestamp, it is handled in network control queue */
 	return (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) ? RAVB_NC :
diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index a5dd627fe2f9..d42f47f6c632 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -101,7 +101,8 @@ static struct vnet_port *vsw_tx_port_find(struct sk_buff *skb,
 }
 
 static u16 vsw_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct vnet_port *port = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index a94f50442613..12539b357a78 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -234,7 +234,8 @@ static struct vnet_port *vnet_tx_port_find(struct sk_buff *skb,
 }
 
 static u16 vnet_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	struct vnet *vp = netdev_priv(dev);
 	struct vnet_port *port = __tx_port_find(vp, skb);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index dd1d6e115145..98c0107d6ca1 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -329,7 +329,7 @@ static u16 netvsc_pick_tx(struct net_device *ndev, struct sk_buff *skb)
 }
 
 static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
-			       void *accel_priv,
+			       struct net_device *sb_dev,
 			       select_queue_fallback_t fallback)
 {
 	struct net_device_context *ndc = netdev_priv(ndev);
@@ -343,7 +343,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 
 		if (vf_ops->ndo_select_queue)
 			txq = vf_ops->ndo_select_queue(vf_netdev, skb,
-						       accel_priv, fallback);
+						       sb_dev, fallback);
 		else
 			txq = fallback(vf_netdev, skb);
 
diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index 4f390fa557e4..78b549698b7b 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -115,7 +115,8 @@ static netdev_tx_t net_failover_start_xmit(struct sk_buff *skb,
 }
 
 static u16 net_failover_select_queue(struct net_device *dev,
-				     struct sk_buff *skb, void *accel_priv,
+				     struct sk_buff *skb,
+				     struct net_device *sb_dev,
 				     select_queue_fallback_t fallback)
 {
 	struct net_failover_info *nfo_info = netdev_priv(dev);
@@ -128,7 +129,7 @@ static u16 net_failover_select_queue(struct net_device *dev,
 
 		if (ops->ndo_select_queue)
 			txq = ops->ndo_select_queue(primary_dev, skb,
-						    accel_priv, fallback);
+						    sb_dev, fallback);
 		else
 			txq = fallback(primary_dev, skb);
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index b070959737ff..3a95eaae0c98 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1707,7 +1707,8 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb,
-			     void *accel_priv, select_queue_fallback_t fallback)
+			     struct net_device *sb_dev,
+			     select_queue_fallback_t fallback)
 {
 	/*
 	 * This helper function exists to help dev_pick_tx get the correct
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a192a017cc68..76f0f4131197 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -607,7 +607,8 @@ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 }
 
 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 	u16 ret;
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index 510f6b8e717d..fa3e8ddfe9a9 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1279,7 +1279,8 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev)
 
 static u16
 mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb,
-				void *accel_priv, select_queue_fallback_t fallback)
+				struct net_device *sb_dev,
+				select_queue_fallback_t fallback)
 {
 	skb->priority = cfg80211_classify8021d(skb, NULL);
 	return mwifiex_1d_to_wmm_queue[skb->priority];
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 78ebe494fef0..19c4c585f472 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -148,7 +148,7 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
 }
 
 static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
-			       void *accel_priv,
+			       struct net_device *sb_dev,
 			       select_queue_fallback_t fallback)
 {
 	struct xenvif *vif = netdev_priv(dev);
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index a57daecf1d57..d67cd379d156 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -545,7 +545,8 @@ static int xennet_count_skb_slots(struct sk_buff *skb)
 }
 
 static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb,
-			       void *accel_priv, select_queue_fallback_t fallback)
+			       struct net_device *sb_dev,
+			       select_queue_fallback_t fallback)
 {
 	unsigned int num_queues = dev->real_num_tx_queues;
 	u32 hash;
diff --git a/drivers/staging/rtl8188eu/os_dep/os_intfs.c b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
index add1ba00f3e9..38e85c8a85c8 100644
--- a/drivers/staging/rtl8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8188eu/os_dep/os_intfs.c
@@ -253,7 +253,8 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
-			    void *accel_priv, select_queue_fallback_t fallback)
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct adapter	*padapter = rtw_netdev_priv(dev);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
diff --git a/drivers/staging/rtl8723bs/os_dep/os_intfs.c b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
index ace68f023b49..181642358e3f 100644
--- a/drivers/staging/rtl8723bs/os_dep/os_intfs.c
+++ b/drivers/staging/rtl8723bs/os_dep/os_intfs.c
@@ -403,10 +403,9 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
 }
 
 
-static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb
-				, void *accel_priv
-				, select_queue_fallback_t fallback
-)
+static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
+			    struct net_device *sb_dev,
+			    select_queue_fallback_t fallback)
 {
 	struct adapter	*padapter = rtw_netdev_priv(dev);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 46f4c44ce3e4..bbf062c1ca8a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -957,7 +957,8 @@ struct dev_ifalias {
  *	those the driver believes to be appropriate.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
- *                         void *accel_priv, select_queue_fallback_t fallback);
+ *                         struct net_device *sb_dev,
+ *                         select_queue_fallback_t fallback);
  *	Called to decide which queue to use when device supports multiple
  *	transmit queues.
  *
@@ -1229,7 +1230,7 @@ struct net_device_ops {
 						      netdev_features_t features);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
-						    void *accel_priv,
+						    struct net_device *sb_dev,
 						    select_queue_fallback_t fallback);
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
@@ -2568,9 +2569,11 @@ void dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
-		     void *accel_priv, select_queue_fallback_t fallback);
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback);
 u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback);
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback);
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev);
 int dev_direct_xmit(struct sk_buff *skb, u16 queue_id);
diff --git a/net/core/dev.c b/net/core/dev.c
index b5e538032d5e..a051ce27198b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3618,14 +3618,16 @@ get_cpus_map:
 }
 
 u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
-		     void *accel_priv, select_queue_fallback_t fallback)
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback)
 {
 	return 0;
 }
 EXPORT_SYMBOL(dev_pick_tx_zero);
 
 u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
-		       void *accel_priv, select_queue_fallback_t fallback)
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
 {
 	return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
 }
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 555e389b7dfa..5e6cf2cee965 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1130,7 +1130,7 @@ static void ieee80211_uninit(struct net_device *dev)
 
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
 					 struct sk_buff *skb,
-					 void *accel_priv,
+					 struct net_device *sb_dev,
 					 select_queue_fallback_t fallback)
 {
 	return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
@@ -1176,7 +1176,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
 					  struct sk_buff *skb,
-					  void *accel_priv,
+					  struct net_device *sb_dev,
 					  select_queue_fallback_t fallback)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-- 
cgit v1.2.3


From 8ec56fc3c5ee6f9700adac190e9ce5b8859a58b6 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 9 Jul 2018 12:20:04 -0400
Subject: net: allow fallback function to pass netdev

For most of these calls we can just pass NULL through to the fallback
function as the sb_dev. The only cases where we cannot are the cases where
we might be dealing with either an upper device or a driver that would
have configured things to support an sb_dev itself.

The only driver that has any significant change in this patch set should be
ixgbe as we can drop the redundant functionality that existed in both the
ndo_select_queue function and the fallback function that was passed through
to us.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c    |  2 +-
 drivers/net/ethernet/broadcom/bcmsysport.c      |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |  3 ++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c   |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c   |  4 ++--
 drivers/net/ethernet/mellanox/mlx4/en_tx.c      |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c |  2 +-
 drivers/net/hyperv/netvsc_drv.c                 |  2 +-
 drivers/net/net_failover.c                      |  2 +-
 drivers/net/xen-netback/interface.c             |  2 +-
 include/linux/netdevice.h                       |  3 ++-
 net/core/dev.c                                  | 12 +++---------
 net/packet/af_packet.c                          |  7 ++++---
 14 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index e3befb1f9204..c673ac2df65b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2224,7 +2224,7 @@ static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
 	if (skb_rx_queue_recorded(skb))
 		qid = skb_get_rx_queue(skb);
 	else
-		qid = fallback(dev, skb);
+		qid = fallback(dev, skb, NULL);
 
 	return qid;
 }
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 32f548e6431d..eb890c4b3b2d 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -2116,7 +2116,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
 	unsigned int q, port;
 
 	if (!netdev_uses_dsa(dev))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
 	/* DSA tagging layer will have configured the correct queue */
 	q = BRCM_TAG_GET_QUEUE(queue);
@@ -2124,7 +2124,7 @@ static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
 	tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues];
 
 	if (unlikely(!tx_ring))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
 	return tx_ring->index;
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index e4e1cf907ac6..5a727d4729da 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1933,7 +1933,8 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
 	}
 
 	/* select a non-FCoE queue */
-	return fallback(dev, skb) % (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
+	return fallback(dev, skb, NULL) %
+	       (BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos);
 }
 
 void bnx2x_set_num_queues(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 5dc5e5604f05..40cf8dc9f163 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -973,7 +973,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
 		return txq;
 	}
 
-	return fallback(dev, skb) % dev->real_num_tx_queues;
+	return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
 }
 
 static int closest_timer(const struct sge *s, int time)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index ff7a74ec8f11..948b3e0d18f4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2033,7 +2033,7 @@ hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
 	    is_multicast_ether_addr(eth_hdr->h_dest))
 		return 0;
 	else
-		return fallback(ndev, skb);
+		return fallback(ndev, skb, NULL);
 }
 
 static const struct net_device_ops hns_nic_netdev_ops = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 8c7a68c57afa..bd6d9ea27b4b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8237,11 +8237,11 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 	case htons(ETH_P_FIP):
 		adapter = netdev_priv(dev);
 
-		if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
+		if (!sb_dev && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
 			break;
 		/* fall through */
 	default:
-		return fallback(dev, skb);
+		return fallback(dev, skb, sb_dev);
 	}
 
 	f = &adapter->ring_feature[RING_F_FCOE];
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index df2996618cd1..1857ee0f0871 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -695,9 +695,9 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 	u16 rings_p_up = priv->num_tx_rings_p_up;
 
 	if (netdev_get_num_tc(dev))
-		return fallback(dev, skb);
+		return fallback(dev, skb, NULL);
 
-	return fallback(dev, skb) % rings_p_up;
+	return fallback(dev, skb, NULL) % rings_p_up;
 }
 
 static void mlx4_bf_copy(void __iomem *dst, const void *src,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index dfcc3710b65f..9106ea45e3cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -115,7 +115,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       select_queue_fallback_t fallback)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	int channel_ix = fallback(dev, skb);
+	int channel_ix = fallback(dev, skb, NULL);
 	u16 num_channels;
 	int up = 0;
 
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 98c0107d6ca1..cf4f40a04194 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -345,7 +345,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
 			txq = vf_ops->ndo_select_queue(vf_netdev, skb,
 						       sb_dev, fallback);
 		else
-			txq = fallback(vf_netdev, skb);
+			txq = fallback(vf_netdev, skb, NULL);
 
 		/* Record the queue selected by VF so that it can be
 		 * used for common case where VF has more queues than
diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index 78b549698b7b..d00d42c845b7 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -131,7 +131,7 @@ static u16 net_failover_select_queue(struct net_device *dev,
 			txq = ops->ndo_select_queue(primary_dev, skb,
 						    sb_dev, fallback);
 		else
-			txq = fallback(primary_dev, skb);
+			txq = fallback(primary_dev, skb, NULL);
 
 		qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
 
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 19c4c585f472..92274c237200 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -155,7 +155,7 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
 	unsigned int size = vif->hash.size;
 
 	if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
-		return fallback(dev, skb) % dev->real_num_tx_queues;
+		return fallback(dev, skb, NULL) % dev->real_num_tx_queues;
 
 	xenvif_set_skb_hash(vif, skb);
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bbf062c1ca8a..2daf2fa6554f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -793,7 +793,8 @@ static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a,
 }
 
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
-				       struct sk_buff *skb);
+				       struct sk_buff *skb,
+				       struct net_device *sb_dev);
 
 enum tc_setup_type {
 	TC_SETUP_QDISC_MQPRIO,
diff --git a/net/core/dev.c b/net/core/dev.c
index a051ce27198b..e18d81837a6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3633,8 +3633,8 @@ u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(dev_pick_tx_cpu_id);
 
-static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
-			     struct net_device *sb_dev)
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+			    struct net_device *sb_dev)
 {
 	struct sock *sk = skb->sk;
 	int queue_index = sk_tx_queue_get(sk);
@@ -3659,12 +3659,6 @@ static u16 ___netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
 	return queue_index;
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev,
-			    struct sk_buff *skb)
-{
-	return ___netdev_pick_tx(dev, skb, NULL);
-}
-
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
 				    struct net_device *sb_dev)
@@ -3685,7 +3679,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 			queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
 							    __netdev_pick_tx);
 		else
-			queue_index = ___netdev_pick_tx(dev, skb, sb_dev);
+			queue_index = __netdev_pick_tx(dev, skb, sb_dev);
 
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f37d087ae652..00189a3b07f2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -275,9 +275,10 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
 	return po->xmit == packet_direct_xmit;
 }
 
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
+				  struct net_device *sb_dev)
 {
-	return dev_pick_tx_cpu_id(dev, skb, NULL, NULL);
+	return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
 }
 
 static u16 packet_pick_tx_queue(struct sk_buff *skb)
@@ -291,7 +292,7 @@ static u16 packet_pick_tx_queue(struct sk_buff *skb)
 						    __packet_pick_tx_queue);
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	} else {
-		queue_index = __packet_pick_tx_queue(dev, skb);
+		queue_index = __packet_pick_tx_queue(dev, skb, NULL);
 	}
 
 	return queue_index;
-- 
cgit v1.2.3


From 9f17dbf04ddf55ae48f5bbafea4c4920ea943215 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 9 Jul 2018 18:10:02 +0100
Subject: netfilter: fix use-after-free in NF_HOOK_LIST

nf_hook() can free the skb, so we need to remove it from the list before
 calling, and add passed skbs to a sublist afterwards.

Fixes: 17266ee93984 ("net: ipv4: listified version of ip_rcv")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 5a5e0a2ab2a3..23b48de8c2e2 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -294,12 +294,16 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct sk_buff *skb, *next;
+	struct list_head sublist;
 
+	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
-		int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
-		if (ret != 1)
-			list_del(&skb->list);
+		list_del(&skb->list);
+		if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1)
+			list_add_tail(&skb->list, &sublist);
 	}
+	/* Put passed packets back on main list */
+	list_splice(&sublist, head);
 }
 
 /* Call setsockopt() */
-- 
cgit v1.2.3


From 52b509218f0ab5946f9cbaf5501d88f69333f0e3 Mon Sep 17 00:00:00 2001
From: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Date: Mon, 9 Jul 2018 16:20:56 -0700
Subject: net: Use __u32 in uapi net_stamp.h

We are not supposed to use u32 in uapi, so change the flags member of
struct sock_txtime from u32 to __u32 instead.

Fixes: 80b14dee2bea ("net: Add a new socket option for a future transmit time")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_tstamp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index f8f4539f1135..97ff3c17ec4d 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -155,8 +155,8 @@ enum txtime_flags {
 };
 
 struct sock_txtime {
-	clockid_t       clockid;        /* reference clockid */
-	u32             flags;          /* flags defined by enum txtime_flags */
+	clockid_t	clockid;	/* reference clockid */
+	__u32		flags;		/* as defined by enum txtime_flags */
 };
 
 #endif /* _NET_TIMESTAMPING_H */
-- 
cgit v1.2.3


From 91c17a7006d2e8313afb9666f66313fdc992bfda Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Mon, 9 Jul 2018 22:18:37 +0530
Subject: clk: ti: dra7: Add clkctrl clock data for the mcan clocks

Add clkctrl data for the m_can clocks and register it within the
clkctrl driver

Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Stephen Boyd <sboyd@kernel.org>
CC: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clk-7xx.c         | 1 +
 include/dt-bindings/clock/dra7.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c
index fb249a1637a5..71a122b2dc67 100644
--- a/drivers/clk/ti/clk-7xx.c
+++ b/drivers/clk/ti/clk-7xx.c
@@ -708,6 +708,7 @@ static const struct omap_clkctrl_reg_data dra7_wkupaon_clkctrl_regs[] __initcons
 	{ DRA7_COUNTER_32K_CLKCTRL, NULL, 0, "wkupaon_iclk_mux" },
 	{ DRA7_UART10_CLKCTRL, dra7_uart10_bit_data, CLKF_SW_SUP, "wkupaon_cm:clk:0060:24" },
 	{ DRA7_DCAN1_CLKCTRL, dra7_dcan1_bit_data, CLKF_SW_SUP, "wkupaon_cm:clk:0068:24" },
+	{ DRA7_ADC_CLKCTRL, NULL, CLKF_SW_SUP, "mcan_clk"},
 	{ 0 },
 };
 
diff --git a/include/dt-bindings/clock/dra7.h b/include/dt-bindings/clock/dra7.h
index 5e1061b15aed..d7549c57cac3 100644
--- a/include/dt-bindings/clock/dra7.h
+++ b/include/dt-bindings/clock/dra7.h
@@ -168,5 +168,6 @@
 #define DRA7_COUNTER_32K_CLKCTRL	DRA7_CLKCTRL_INDEX(0x50)
 #define DRA7_UART10_CLKCTRL	DRA7_CLKCTRL_INDEX(0x80)
 #define DRA7_DCAN1_CLKCTRL	DRA7_CLKCTRL_INDEX(0x88)
+#define DRA7_ADC_CLKCTRL	DRA7_CLKCTRL_INDEX(0xa0)
 
 #endif
-- 
cgit v1.2.3


From 7f35e63dbfcb627bd30bac45702ffdf1ddde1516 Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Mon, 9 Jul 2018 22:18:38 +0530
Subject: bus: ti-sysc: Add support for using ti-sysc for MCAN on dra76x

The dra76x MCAN generic interconnect module has a its own
format for the bits in the control registers.

Therefore add a new module type, new regbits and new capabilities
specific to the MCAN module.

Acked-by: Rob Herring <robh@kernel.org>
CC: Tony Lindgren <tony@atomide.com>
Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 Documentation/devicetree/bindings/bus/ti-sysc.txt |  1 +
 drivers/bus/ti-sysc.c                             | 18 ++++++++++++++++++
 include/dt-bindings/bus/ti-sysc.h                 |  2 ++
 include/linux/platform_data/ti-sysc.h             |  1 +
 4 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.txt b/Documentation/devicetree/bindings/bus/ti-sysc.txt
index d8ed5b780ed9..91dc2333af01 100644
--- a/Documentation/devicetree/bindings/bus/ti-sysc.txt
+++ b/Documentation/devicetree/bindings/bus/ti-sysc.txt
@@ -36,6 +36,7 @@ Required standard properties:
 		"ti,sysc-omap-aes"
 		"ti,sysc-mcasp"
 		"ti,sysc-usb-host-fs"
+		"ti,sysc-dra7-mcan"
 
 - reg		shall have register areas implemented for the interconnect
 		target module in question such as revision, sysc and syss
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 80d60f43db56..c9db5369e2ec 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1552,6 +1552,23 @@ static const struct sysc_capabilities sysc_omap4_usb_host_fs = {
 	.regbits = &sysc_regbits_omap4_usb_host_fs,
 };
 
+static const struct sysc_regbits sysc_regbits_dra7_mcan = {
+	.dmadisable_shift = -ENODEV,
+	.midle_shift = -ENODEV,
+	.sidle_shift = -ENODEV,
+	.clkact_shift = -ENODEV,
+	.enwkup_shift = 4,
+	.srst_shift = 0,
+	.emufree_shift = -ENODEV,
+	.autoidle_shift = -ENODEV,
+};
+
+static const struct sysc_capabilities sysc_dra7_mcan = {
+	.type = TI_SYSC_DRA7_MCAN,
+	.sysc_mask = SYSC_DRA7_MCAN_ENAWAKEUP | SYSC_OMAP4_SOFTRESET,
+	.regbits = &sysc_regbits_dra7_mcan,
+};
+
 static int sysc_init_pdata(struct sysc *ddata)
 {
 	struct ti_sysc_platform_data *pdata = dev_get_platdata(ddata->dev);
@@ -1743,6 +1760,7 @@ static const struct of_device_id sysc_match[] = {
 	{ .compatible = "ti,sysc-mcasp", .data = &sysc_omap4_mcasp, },
 	{ .compatible = "ti,sysc-usb-host-fs",
 	  .data = &sysc_omap4_usb_host_fs, },
+	{ .compatible = "ti,sysc-dra7-mcan", .data = &sysc_dra7_mcan, },
 	{  },
 };
 MODULE_DEVICE_TABLE(of, sysc_match);
diff --git a/include/dt-bindings/bus/ti-sysc.h b/include/dt-bindings/bus/ti-sysc.h
index 2c005376ac0e..7138384e2ef9 100644
--- a/include/dt-bindings/bus/ti-sysc.h
+++ b/include/dt-bindings/bus/ti-sysc.h
@@ -15,6 +15,8 @@
 /* SmartReflex sysc found on 36xx and later */
 #define SYSC_OMAP3_SR_ENAWAKEUP		(1 << 26)
 
+#define SYSC_DRA7_MCAN_ENAWAKEUP	(1 << 4)
+
 /* SYSCONFIG STANDBYMODE/MIDLEMODE/SIDLEMODE supported by hardware */
 #define SYSC_IDLE_FORCE			0
 #define SYSC_IDLE_NO			1
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 990aad477458..2efa3470a451 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -14,6 +14,7 @@ enum ti_sysc_module_type {
 	TI_SYSC_OMAP4_SR,
 	TI_SYSC_OMAP4_MCASP,
 	TI_SYSC_OMAP4_USB_HOST_FS,
+	TI_SYSC_DRA7_MCAN,
 };
 
 struct ti_sysc_cookie {
-- 
cgit v1.2.3


From 74655749a58405e259eaaba66bfc391fdbe1e34e Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Mon, 9 Jul 2018 13:03:16 +0530
Subject: ARM: OMAP2+: sleep33/43xx: Make sleep actions configurable

Add an argument to the sleep33xx and sleep43xx code to allow us to set
flags to determine which portions of the code get called in order to use
the same code for multiple power saving modes. This patch allows us to
decide whether or not we flush and disable caches, save EMIF context,
put the memory into self refresh and disable the EMIF, and/or invoke
the wkup_m3 when entering into WFI.

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/pm33xx-core.c    | 10 ++++---
 arch/arm/mach-omap2/sleep33xx.S      | 47 ++++++++++++++++++++++++++++++
 arch/arm/mach-omap2/sleep43xx.S      | 56 +++++++++++++++++++++++++++++++++---
 drivers/soc/ti/pm33xx.c              | 15 +++++++++-
 include/linux/platform_data/pm33xx.h | 26 ++++++++++++++++-
 5 files changed, 144 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/pm33xx-core.c b/arch/arm/mach-omap2/pm33xx-core.c
index 9b3755a2e2ec..e363b9717fa5 100644
--- a/arch/arm/mach-omap2/pm33xx-core.c
+++ b/arch/arm/mach-omap2/pm33xx-core.c
@@ -106,12 +106,13 @@ static void amx3_post_suspend_common(void)
 		pr_err("PM: GFX domain did not transition: %x\n", status);
 }
 
-static int am33xx_suspend(unsigned int state, int (*fn)(unsigned long))
+static int am33xx_suspend(unsigned int state, int (*fn)(unsigned long),
+			  unsigned long args)
 {
 	int ret = 0;
 
 	amx3_pre_suspend_common();
-	ret = cpu_suspend(0, fn);
+	ret = cpu_suspend(args, fn);
 	amx3_post_suspend_common();
 
 	/*
@@ -128,13 +129,14 @@ static int am33xx_suspend(unsigned int state, int (*fn)(unsigned long))
 	return ret;
 }
 
-static int am43xx_suspend(unsigned int state, int (*fn)(unsigned long))
+static int am43xx_suspend(unsigned int state, int (*fn)(unsigned long),
+			  unsigned long args)
 {
 	int ret = 0;
 
 	amx3_pre_suspend_common();
 	scu_power_mode(scu_base, SCU_PM_POWEROFF);
-	ret = cpu_suspend(0, fn);
+	ret = cpu_suspend(args, fn);
 	scu_power_mode(scu_base, SCU_PM_NORMAL);
 	amx3_post_suspend_common();
 
diff --git a/arch/arm/mach-omap2/sleep33xx.S b/arch/arm/mach-omap2/sleep33xx.S
index 322b3bb868b4..8d0d53b671f9 100644
--- a/arch/arm/mach-omap2/sleep33xx.S
+++ b/arch/arm/mach-omap2/sleep33xx.S
@@ -8,6 +8,7 @@
 
 #include <generated/ti-pm-asm-offsets.h>
 #include <linux/linkage.h>
+#include <linux/platform_data/pm33xx.h>
 #include <linux/ti-emif-sram.h>
 #include <asm/assembler.h>
 #include <asm/memory.h>
@@ -19,12 +20,25 @@
 #define AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE			0x0003
 #define AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE			0x0002
 
+/* replicated define because linux/bitops.h cannot be included in assembly */
+#define BIT(nr)			(1 << (nr))
+
 	.arm
 	.align 3
 
 ENTRY(am33xx_do_wfi)
 	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
 
+	/* Save wfi_flags arg to data space */
+	mov	r4, r0
+	adr	r3, am33xx_pm_ro_sram_data
+	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
+	str	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
+
+	/* Only flush cache is we know we are losing MPU context */
+	tst	r4, #WFI_FLAG_FLUSH_CACHE
+	beq	cache_skip_flush
+
 	/*
 	 * Flush all data from the L1 and L2 data cache before disabling
 	 * SCTLR.C bit.
@@ -48,14 +62,33 @@ ENTRY(am33xx_do_wfi)
 	ldr	r1, kernel_flush
 	blx	r1
 
+	adr	r3, am33xx_pm_ro_sram_data
+	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
+	ldr	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
+
+cache_skip_flush:
+	/* Check if we want self refresh */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_enter_sr
+
 	adr	r9, am33xx_emif_sram_table
 
 	ldr	r3, [r9, #EMIF_PM_ENTER_SR_OFFSET]
 	blx	r3
 
+emif_skip_enter_sr:
+	/* Only necessary if PER is losing context */
+	tst	r4, #WFI_FLAG_SAVE_EMIF
+	beq	emif_skip_save
+
 	ldr	r3, [r9, #EMIF_PM_SAVE_CONTEXT_OFFSET]
 	blx	r3
 
+emif_skip_save:
+	/* Only can disable EMIF if we have entered self refresh */
+	tst     r4, #WFI_FLAG_SELF_REFRESH
+	beq     emif_skip_disable
+
 	/* Disable EMIF */
 	ldr     r1, virt_emif_clkctrl
 	ldr     r2, [r1]
@@ -69,6 +102,10 @@ wait_emif_disable:
 	cmp	r2, r3
 	bne	wait_emif_disable
 
+emif_skip_disable:
+	tst	r4, #WFI_FLAG_WAKE_M3
+	beq	wkup_m3_skip
+
 	/*
 	 * For the MPU WFI to be registered as an interrupt
 	 * to WKUP_M3, MPU_CLKCTRL.MODULEMODE needs to be set
@@ -79,6 +116,7 @@ wait_emif_disable:
 	bic	r2, r2, #AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE
 	str	r2, [r1]
 
+wkup_m3_skip:
 	/*
 	 * Execute an ISB instruction to ensure that all of the
 	 * CP15 register changes have been committed.
@@ -132,10 +170,18 @@ wait_emif_enable:
 	cmp	r2, r3
 	bne	wait_emif_enable
 
+	/* Only necessary if PER is losing context */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_exit_sr_abt
 
+	adr	r9, am33xx_emif_sram_table
 	ldr	r1, [r9, #EMIF_PM_ABORT_SR_OFFSET]
 	blx	r1
 
+emif_skip_exit_sr_abt:
+	tst	r4, #WFI_FLAG_FLUSH_CACHE
+	beq	cache_skip_restore
+
 	/*
 	 * Set SCTLR.C bit to allow data cache allocation
 	 */
@@ -144,6 +190,7 @@ wait_emif_enable:
 	mcr	p15, 0, r0, c1, c0, 0
 	isb
 
+cache_skip_restore:
 	/* Let the suspend code know about the abort */
 	mov	r0, #1
 	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
diff --git a/arch/arm/mach-omap2/sleep43xx.S b/arch/arm/mach-omap2/sleep43xx.S
index 8903814a6677..cd7e95f48a73 100644
--- a/arch/arm/mach-omap2/sleep43xx.S
+++ b/arch/arm/mach-omap2/sleep43xx.S
@@ -9,7 +9,7 @@
 #include <generated/ti-pm-asm-offsets.h>
 #include <linux/linkage.h>
 #include <linux/ti-emif-sram.h>
-
+#include <linux/platform_data/pm33xx.h>
 #include <asm/assembler.h>
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/memory.h>
@@ -22,6 +22,9 @@
 #include "prm33xx.h"
 #include "prcm43xx.h"
 
+/* replicated define because linux/bitops.h cannot be included in assembly */
+#define BIT(nr)			(1 << (nr))
+
 #define AM33XX_CM_CLKCTRL_MODULESTATE_DISABLED		0x00030000
 #define AM33XX_CM_CLKCTRL_MODULEMODE_DISABLE		0x0003
 #define AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE		0x0002
@@ -51,6 +54,12 @@
 ENTRY(am43xx_do_wfi)
 	stmfd	sp!, {r4 - r11, lr}	@ save registers on stack
 
+	/* Save wfi_flags arg to data space */
+	mov	r4, r0
+	adr	r3, am43xx_pm_ro_sram_data
+	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
+	str	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
+
 #ifdef CONFIG_CACHE_L2X0
 	/* Retrieve l2 cache virt address BEFORE we shut off EMIF */
 	ldr	r1, get_l2cache_base
@@ -58,6 +67,10 @@ ENTRY(am43xx_do_wfi)
 	mov	r8, r0
 #endif
 
+	/* Only flush cache is we know we are losing MPU context */
+	tst	r4, #WFI_FLAG_FLUSH_CACHE
+	beq	cache_skip_flush
+
 	/*
 	 * Flush all data from the L1 and L2 data cache before disabling
 	 * SCTLR.C bit.
@@ -128,13 +141,33 @@ sync:
 	bne	sync
 #endif
 
+	/* Restore wfi_flags */
+	adr	r3, am43xx_pm_ro_sram_data
+	ldr	r2, [r3, #AMX3_PM_RO_SRAM_DATA_VIRT_OFFSET]
+	ldr	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
+
+cache_skip_flush:
+	/* Check if we want self refresh */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_enter_sr
+
 	adr     r9, am43xx_emif_sram_table
 
 	ldr     r3, [r9, #EMIF_PM_ENTER_SR_OFFSET]
 	blx     r3
 
+emif_skip_enter_sr:
+	/* Only necessary if PER is losing context */
+	tst	r4, #WFI_FLAG_SAVE_EMIF
+	beq	emif_skip_save
+
 	ldr     r3, [r9, #EMIF_PM_SAVE_CONTEXT_OFFSET]
-	blx     r3
+	blx	r3
+
+emif_skip_save:
+	/* Only can disable EMIF if we have entered self refresh */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_disable
 
 	/* Disable EMIF */
 	ldr	r1, am43xx_virt_emif_clkctrl
@@ -148,6 +181,10 @@ wait_emif_disable:
 	cmp	r2, r3
 	bne	wait_emif_disable
 
+emif_skip_disable:
+	tst	r4, #WFI_FLAG_WAKE_M3
+	beq	wkup_m3_skip
+
 	/*
 	 * For the MPU WFI to be registered as an interrupt
 	 * to WKUP_M3, MPU_CLKCTRL.MODULEMODE needs to be set
@@ -165,6 +202,7 @@ wait_emif_disable:
 	mov	r2, #AM43XX_CM_CLKSTCTRL_CLKTRCTRL_SW_SLEEP
 	str	r2, [r1]
 
+wkup_m3_skip:
 	/*
 	 * Execute a barrier instruction to ensure that all cache,
 	 * TLB and branch predictor maintenance operations issued
@@ -218,6 +256,9 @@ wait_emif_enable:
 	cmp	r2, r3
 	bne	wait_emif_enable
 
+	tst	r4, #WFI_FLAG_FLUSH_CACHE
+	beq	cache_skip_restore
+
 	/*
 	 * Set SCTLR.C bit to allow data cache allocation
 	 */
@@ -226,9 +267,16 @@ wait_emif_enable:
 	mcr	p15, 0, r0, c1, c0, 0
 	isb
 
-	ldr     r1, [r9, #EMIF_PM_ABORT_SR_OFFSET]
-	blx     r1
+cache_skip_restore:
+	/* Only necessary if PER is losing context */
+	tst	r4, #WFI_FLAG_SELF_REFRESH
+	beq	emif_skip_exit_sr_abt
+
+	adr	r9, am43xx_emif_sram_table
+	ldr	r1, [r9, #EMIF_PM_ABORT_SR_OFFSET]
+	blx	r1
 
+emif_skip_exit_sr_abt:
 	/* Let the suspend code know about the abort */
 	mov	r0, #1
 	ldmfd	sp!, {r4 - r11, pc}	@ restore regs and return
diff --git a/drivers/soc/ti/pm33xx.c b/drivers/soc/ti/pm33xx.c
index 652739c7f718..0011c8f5ecd7 100644
--- a/drivers/soc/ti/pm33xx.c
+++ b/drivers/soc/ti/pm33xx.c
@@ -41,6 +41,8 @@ static struct am33xx_pm_sram_addr *pm_sram;
 static struct device *pm33xx_dev;
 static struct wkup_m3_ipc *m3_ipc;
 
+static unsigned long suspend_wfi_flags;
+
 static u32 sram_suspend_address(unsigned long addr)
 {
 	return ((unsigned long)am33xx_do_wfi_sram +
@@ -53,7 +55,7 @@ static int am33xx_pm_suspend(suspend_state_t suspend_state)
 	int i, ret = 0;
 
 	ret = pm_ops->soc_suspend((unsigned long)suspend_state,
-				  am33xx_do_wfi_sram);
+				  am33xx_do_wfi_sram, suspend_wfi_flags);
 
 	if (ret) {
 		dev_err(pm33xx_dev, "PM: Kernel suspend failure\n");
@@ -310,6 +312,17 @@ static int am33xx_pm_probe(struct platform_device *pdev)
 	suspend_set_ops(&am33xx_pm_ops);
 #endif /* CONFIG_SUSPEND */
 
+	/*
+	 * For a system suspend we must flush the caches, we want
+	 * the DDR in self-refresh, we want to save the context
+	 * of the EMIF, and we want the wkup_m3 to handle low-power
+	 * transition.
+	 */
+	suspend_wfi_flags |= WFI_FLAG_FLUSH_CACHE;
+	suspend_wfi_flags |= WFI_FLAG_SELF_REFRESH;
+	suspend_wfi_flags |= WFI_FLAG_SAVE_EMIF;
+	suspend_wfi_flags |= WFI_FLAG_WAKE_M3;
+
 	ret = pm_ops->init();
 	if (ret) {
 		dev_err(dev, "Unable to call core pm init!\n");
diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h
index f9bed2a0af9d..d231265c135c 100644
--- a/include/linux/platform_data/pm33xx.h
+++ b/include/linux/platform_data/pm33xx.h
@@ -12,6 +12,29 @@
 #include <linux/kbuild.h>
 #include <linux/types.h>
 
+/*
+ * WFI Flags for sleep code control
+ *
+ * These flags allow PM code to exclude certain operations from happening
+ * in the low level ASM code found in sleep33xx.S and sleep43xx.S
+ *
+ * WFI_FLAG_FLUSH_CACHE: Flush the ARM caches and disable caching. Only
+ *			 needed when MPU will lose context.
+ * WFI_FLAG_SELF_REFRESH: Let EMIF place DDR memory into self-refresh and
+ *			  disable EMIF.
+ * WFI_FLAG_SAVE_EMIF: Save context of all EMIF registers and restore in
+ *		       resume path. Only needed if PER domain loses context
+ *		       and must also have WFI_FLAG_SELF_REFRESH set.
+ * WFI_FLAG_WAKE_M3: Disable MPU clock or clockdomain to cause wkup_m3 to
+ *		     execute when WFI instruction executes.
+ * WFI_FLAG_RTC_ONLY: Configure the RTC to enter RTC+DDR mode.
+ */
+#define WFI_FLAG_FLUSH_CACHE		BIT(0)
+#define WFI_FLAG_SELF_REFRESH		BIT(1)
+#define WFI_FLAG_SAVE_EMIF		BIT(2)
+#define WFI_FLAG_WAKE_M3		BIT(3)
+#define WFI_FLAG_RTC_ONLY		BIT(4)
+
 #ifndef __ASSEMBLER__
 struct am33xx_pm_sram_addr {
 	void (*do_wfi)(void);
@@ -23,7 +46,8 @@ struct am33xx_pm_sram_addr {
 
 struct am33xx_pm_platform_data {
 	int	(*init)(void);
-	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long));
+	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long),
+			       unsigned long args);
 	struct  am33xx_pm_sram_addr *(*get_sram_addrs)(void);
 };
 
-- 
cgit v1.2.3


From 8c5a916f4c8815196cc8a86b9582ca89422aac25 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 9 Jul 2018 13:03:17 +0530
Subject: ARM: OMAP2+: sleep33/43xx: Add RTC-Mode support

Add support for RTC mode to low level suspend code. This includes
providing the rtc base address for the assembly code to configuring the
PMIC_PWR_EN line late in suspend to enter RTC+DDR mode.

Note: This patch also fold in left out space parameter for
am33xx_emif_sram_table and am43xx_emif_sram_table

Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/pm-asm-offsets.c |  2 ++
 arch/arm/mach-omap2/pm33xx-core.c    | 10 +++++++
 arch/arm/mach-omap2/sleep33xx.S      |  5 ++--
 arch/arm/mach-omap2/sleep43xx.S      | 54 ++++++++++++++++++++++++++++++++++--
 drivers/soc/ti/pm33xx.c              |  1 +
 include/linux/platform_data/pm33xx.h |  3 ++
 6 files changed, 71 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/pm-asm-offsets.c b/arch/arm/mach-omap2/pm-asm-offsets.c
index b9846b19e5e2..d8ae8a85b14b 100644
--- a/arch/arm/mach-omap2/pm-asm-offsets.c
+++ b/arch/arm/mach-omap2/pm-asm-offsets.c
@@ -27,6 +27,8 @@ int main(void)
 	       offsetof(struct am33xx_pm_ro_sram_data, amx3_pm_sram_data_virt));
 	DEFINE(AMX3_PM_RO_SRAM_DATA_PHYS_OFFSET,
 	       offsetof(struct am33xx_pm_ro_sram_data, amx3_pm_sram_data_phys));
+	DEFINE(AMX3_PM_RTC_BASE_VIRT_OFFSET,
+	       offsetof(struct am33xx_pm_ro_sram_data, rtc_base_virt));
 	DEFINE(AMX3_PM_RO_SRAM_DATA_SIZE,
 	       sizeof(struct am33xx_pm_ro_sram_data));
 
diff --git a/arch/arm/mach-omap2/pm33xx-core.c b/arch/arm/mach-omap2/pm33xx-core.c
index e363b9717fa5..f4971e4a86b2 100644
--- a/arch/arm/mach-omap2/pm33xx-core.c
+++ b/arch/arm/mach-omap2/pm33xx-core.c
@@ -26,6 +26,7 @@
 static struct powerdomain *cefuse_pwrdm, *gfx_pwrdm, *per_pwrdm, *mpu_pwrdm;
 static struct clockdomain *gfx_l4ls_clkdm;
 static void __iomem *scu_base;
+static struct omap_hwmod *rtc_oh;
 
 static int __init am43xx_map_scu(void)
 {
@@ -153,16 +154,25 @@ static struct am33xx_pm_sram_addr *amx3_get_sram_addrs(void)
 		return NULL;
 }
 
+void __iomem *am43xx_get_rtc_base_addr(void)
+{
+	rtc_oh = omap_hwmod_lookup("rtc");
+
+	return omap_hwmod_get_mpu_rt_va(rtc_oh);
+}
+
 static struct am33xx_pm_platform_data am33xx_ops = {
 	.init = am33xx_suspend_init,
 	.soc_suspend = am33xx_suspend,
 	.get_sram_addrs = amx3_get_sram_addrs,
+	.get_rtc_base_addr = am43xx_get_rtc_base_addr,
 };
 
 static struct am33xx_pm_platform_data am43xx_ops = {
 	.init = am43xx_suspend_init,
 	.soc_suspend = am43xx_suspend,
 	.get_sram_addrs = amx3_get_sram_addrs,
+	.get_rtc_base_addr = am43xx_get_rtc_base_addr,
 };
 
 static struct am33xx_pm_platform_data *am33xx_pm_get_pdata(void)
diff --git a/arch/arm/mach-omap2/sleep33xx.S b/arch/arm/mach-omap2/sleep33xx.S
index 8d0d53b671f9..47a816468cdb 100644
--- a/arch/arm/mach-omap2/sleep33xx.S
+++ b/arch/arm/mach-omap2/sleep33xx.S
@@ -228,8 +228,6 @@ ENDPROC(am33xx_resume_from_deep_sleep)
  * Local variables
  */
 	.align
-resume_addr:
-	.word	cpu_resume - PAGE_OFFSET + 0x80000000
 kernel_flush:
 	.word   v7_flush_dcache_all
 virt_mpu_clkctrl:
@@ -252,6 +250,9 @@ ENTRY(am33xx_pm_sram)
 	.word am33xx_emif_sram_table
 	.word am33xx_pm_ro_sram_data
 
+resume_addr:
+.word  cpu_resume - PAGE_OFFSET + 0x80000000
+
 .align 3
 ENTRY(am33xx_pm_ro_sram_data)
 	.space AMX3_PM_RO_SRAM_DATA_SIZE
diff --git a/arch/arm/mach-omap2/sleep43xx.S b/arch/arm/mach-omap2/sleep43xx.S
index cd7e95f48a73..5b9343b58fc7 100644
--- a/arch/arm/mach-omap2/sleep43xx.S
+++ b/arch/arm/mach-omap2/sleep43xx.S
@@ -48,6 +48,13 @@
 					AM43XX_CM_PER_EMIF_CLKCTRL_OFFSET)
 #define AM43XX_PRM_EMIF_CTRL_OFFSET			0x0030
 
+#define RTC_SECONDS_REG					0x0
+#define RTC_PMIC_REG					0x98
+#define RTC_PMIC_POWER_EN				BIT(16)
+#define RTC_PMIC_EXT_WAKEUP_STS				BIT(12)
+#define RTC_PMIC_EXT_WAKEUP_POL				BIT(4)
+#define RTC_PMIC_EXT_WAKEUP_EN				BIT(0)
+
 	.arm
 	.align 3
 
@@ -147,6 +154,20 @@ sync:
 	ldr	r4, [r2, #AMX3_PM_WFI_FLAGS_OFFSET]
 
 cache_skip_flush:
+	/*
+	 * If we are trying to enter RTC+DDR mode we must perform
+	 * a read from the rtc address space to ensure translation
+	 * presence in the TLB to avoid page table walk after DDR
+	 * is unavailable.
+	 */
+	tst	r4, #WFI_FLAG_RTC_ONLY
+	beq	skip_rtc_va_refresh
+
+	adr	r3, am43xx_pm_ro_sram_data
+	ldr	r1, [r3, #AMX3_PM_RTC_BASE_VIRT_OFFSET]
+	ldr	r0, [r1]
+
+skip_rtc_va_refresh:
 	/* Check if we want self refresh */
 	tst	r4, #WFI_FLAG_SELF_REFRESH
 	beq	emif_skip_enter_sr
@@ -182,6 +203,34 @@ wait_emif_disable:
 	bne	wait_emif_disable
 
 emif_skip_disable:
+	tst	r4, #WFI_FLAG_RTC_ONLY
+	beq	skip_rtc_only
+
+	adr	r3, am43xx_pm_ro_sram_data
+	ldr	r1, [r3, #AMX3_PM_RTC_BASE_VIRT_OFFSET]
+
+	ldr	r0, [r1, #RTC_PMIC_REG]
+	orr	r0, r0, #RTC_PMIC_POWER_EN
+	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_STS
+	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_EN
+	orr	r0, r0, #RTC_PMIC_EXT_WAKEUP_POL
+	str	r0, [r1, #RTC_PMIC_REG]
+	ldr	r0, [r1, #RTC_PMIC_REG]
+	/* Wait for 2 seconds to lose power */
+	mov	r3, #2
+	ldr	r2, [r1, #RTC_SECONDS_REG]
+rtc_loop:
+	ldr	r0, [r1, #RTC_SECONDS_REG]
+	cmp	r0, r2
+	beq	rtc_loop
+	mov	r2, r0
+	subs	r3, r3, #1
+	bne	rtc_loop
+
+	b	re_enable_emif
+
+skip_rtc_only:
+
 	tst	r4, #WFI_FLAG_WAKE_M3
 	beq	wkup_m3_skip
 
@@ -247,6 +296,7 @@ wkup_m3_skip:
 	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
 	str	r2, [r1]
 
+re_enable_emif:
 	/* Re-enable EMIF */
 	ldr	r1, am43xx_virt_emif_clkctrl
 	mov	r2, #AM33XX_CM_CLKCTRL_MODULEMODE_ENABLE
@@ -381,8 +431,6 @@ ENDPROC(am43xx_resume_from_deep_sleep)
  * Local variables
  */
 	.align
-resume_addr:
-	.word	cpu_resume - PAGE_OFFSET + 0x80000000
 kernel_flush:
 	.word   v7_flush_dcache_all
 ddr_start:
@@ -429,6 +477,8 @@ ENTRY(am43xx_pm_sram)
 	.word am43xx_emif_sram_table
 	.word am43xx_pm_ro_sram_data
 
+resume_addr:
+	.word   cpu_resume - PAGE_OFFSET + 0x80000000
 .align 3
 
 ENTRY(am43xx_pm_ro_sram_data)
diff --git a/drivers/soc/ti/pm33xx.c b/drivers/soc/ti/pm33xx.c
index 0011c8f5ecd7..d0dab323651f 100644
--- a/drivers/soc/ti/pm33xx.c
+++ b/drivers/soc/ti/pm33xx.c
@@ -229,6 +229,7 @@ static int am33xx_push_sram_idle(void)
 	ro_sram_data.amx3_pm_sram_data_virt = ocmcram_location_data;
 	ro_sram_data.amx3_pm_sram_data_phys =
 		gen_pool_virt_to_phys(sram_pool_data, ocmcram_location_data);
+	ro_sram_data.rtc_base_virt = pm_ops->get_rtc_base_addr();
 
 	/* Save physical address to calculate resume offset during pm init */
 	am33xx_do_wfi_sram_phys = gen_pool_virt_to_phys(sram_pool,
diff --git a/include/linux/platform_data/pm33xx.h b/include/linux/platform_data/pm33xx.h
index d231265c135c..fbf5ed73c7cc 100644
--- a/include/linux/platform_data/pm33xx.h
+++ b/include/linux/platform_data/pm33xx.h
@@ -42,6 +42,7 @@ struct am33xx_pm_sram_addr {
 	unsigned long *resume_offset;
 	unsigned long *emif_sram_table;
 	unsigned long *ro_sram_data;
+	unsigned long resume_address;
 };
 
 struct am33xx_pm_platform_data {
@@ -49,6 +50,7 @@ struct am33xx_pm_platform_data {
 	int	(*soc_suspend)(unsigned int state, int (*fn)(unsigned long),
 			       unsigned long args);
 	struct  am33xx_pm_sram_addr *(*get_sram_addrs)(void);
+	void __iomem *(*get_rtc_base_addr)(void);
 };
 
 struct am33xx_pm_sram_data {
@@ -60,6 +62,7 @@ struct am33xx_pm_sram_data {
 struct am33xx_pm_ro_sram_data {
 	u32 amx3_pm_sram_data_virt;
 	u32 amx3_pm_sram_data_phys;
+	void __iomem *rtc_base_virt;
 } __packed __aligned(8);
 
 #endif /* __ASSEMBLER__ */
-- 
cgit v1.2.3


From 7f69ae7fadd9dbebe563bfc28b993a2bc9c8acf9 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 28 Jun 2018 11:57:48 +0200
Subject: ARM: davinci: unduplicate aemif support

All users now register platform devices using the ti-aemif driver.
Remove the handcrafted aemif API.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/Makefile                  |   2 +-
 arch/arm/mach-davinci/aemif.c                   | 218 ------------------------
 include/linux/platform_data/mtd-davinci-aemif.h |   1 -
 3 files changed, 1 insertion(+), 220 deletions(-)
 delete mode 100644 arch/arm/mach-davinci/aemif.c

(limited to 'include')

diff --git a/arch/arm/mach-davinci/Makefile b/arch/arm/mach-davinci/Makefile
index 8725d8bea567..93d271b4d84b 100644
--- a/arch/arm/mach-davinci/Makefile
+++ b/arch/arm/mach-davinci/Makefile
@@ -6,7 +6,7 @@
 
 # Common objects
 obj-y 					:= time.o serial.o usb.o \
-					   common.o sram.o aemif.o
+					   common.o sram.o
 
 obj-$(CONFIG_DAVINCI_MUX)		+= mux.o
 
diff --git a/arch/arm/mach-davinci/aemif.c b/arch/arm/mach-davinci/aemif.c
deleted file mode 100644
index e4ab3f3a2a1f..000000000000
--- a/arch/arm/mach-davinci/aemif.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * AEMIF support for DaVinci SoCs
- *
- * Copyright (C) 2010 Texas Instruments Incorporated. http://www.ti.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/io.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/module.h>
-#include <linux/time.h>
-
-#include <linux/platform_data/mtd-davinci-aemif.h>
-#include <linux/platform_data/mtd-davinci.h>
-
-/* Timing value configuration */
-
-#define TA(x)		((x) << 2)
-#define RHOLD(x)	((x) << 4)
-#define RSTROBE(x)	((x) << 7)
-#define RSETUP(x)	((x) << 13)
-#define WHOLD(x)	((x) << 17)
-#define WSTROBE(x)	((x) << 20)
-#define WSETUP(x)	((x) << 26)
-
-#define TA_MAX		0x3
-#define RHOLD_MAX	0x7
-#define RSTROBE_MAX	0x3f
-#define RSETUP_MAX	0xf
-#define WHOLD_MAX	0x7
-#define WSTROBE_MAX	0x3f
-#define WSETUP_MAX	0xf
-
-#define TIMING_MASK	(TA(TA_MAX) | \
-				RHOLD(RHOLD_MAX) | \
-				RSTROBE(RSTROBE_MAX) |	\
-				RSETUP(RSETUP_MAX) | \
-				WHOLD(WHOLD_MAX) | \
-				WSTROBE(WSTROBE_MAX) | \
-				WSETUP(WSETUP_MAX))
-
-static inline unsigned int davinci_aemif_readl(void __iomem *base, int offset)
-{
-	return readl_relaxed(base + offset);
-}
-
-static inline void davinci_aemif_writel(void __iomem *base,
-					int offset, unsigned long value)
-{
-	writel_relaxed(value, base + offset);
-}
-
-/*
- * aemif_calc_rate - calculate timing data.
- * @wanted: The cycle time needed in nanoseconds.
- * @clk: The input clock rate in kHz.
- * @max: The maximum divider value that can be programmed.
- *
- * On success, returns the calculated timing value minus 1 for easy
- * programming into AEMIF timing registers, else negative errno.
- */
-static int aemif_calc_rate(int wanted, unsigned long clk, int max)
-{
-	int result;
-
-	result = DIV_ROUND_UP((wanted * clk), NSEC_PER_MSEC) - 1;
-
-	pr_debug("%s: result %d from %ld, %d\n", __func__, result, clk, wanted);
-
-	/* It is generally OK to have a more relaxed timing than requested... */
-	if (result < 0)
-		result = 0;
-
-	/* ... But configuring tighter timings is not an option. */
-	else if (result > max)
-		result = -EINVAL;
-
-	return result;
-}
-
-/**
- * davinci_aemif_setup_timing - setup timing values for a given AEMIF interface
- * @t: timing values to be progammed
- * @base: The virtual base address of the AEMIF interface
- * @cs: chip-select to program the timing values for
- * @clkrate: the AEMIF clkrate
- *
- * This function programs the given timing values (in real clock) into the
- * AEMIF registers taking the AEMIF clock into account.
- *
- * This function does not use any locking while programming the AEMIF
- * because it is expected that there is only one user of a given
- * chip-select.
- *
- * Returns 0 on success, else negative errno.
- */
-static int davinci_aemif_setup_timing(struct davinci_aemif_timing *t,
-					void __iomem *base, unsigned cs,
-					unsigned long clkrate)
-{
-	unsigned set, val;
-	int ta, rhold, rstrobe, rsetup, whold, wstrobe, wsetup;
-	unsigned offset = A1CR_OFFSET + cs * 4;
-
-	if (!t)
-		return 0;	/* Nothing to do */
-
-	clkrate /= 1000;	/* turn clock into kHz for ease of use */
-
-	ta	= aemif_calc_rate(t->ta, clkrate, TA_MAX);
-	rhold	= aemif_calc_rate(t->rhold, clkrate, RHOLD_MAX);
-	rstrobe	= aemif_calc_rate(t->rstrobe, clkrate, RSTROBE_MAX);
-	rsetup	= aemif_calc_rate(t->rsetup, clkrate, RSETUP_MAX);
-	whold	= aemif_calc_rate(t->whold, clkrate, WHOLD_MAX);
-	wstrobe	= aemif_calc_rate(t->wstrobe, clkrate, WSTROBE_MAX);
-	wsetup	= aemif_calc_rate(t->wsetup, clkrate, WSETUP_MAX);
-
-	if (ta < 0 || rhold < 0 || rstrobe < 0 || rsetup < 0 ||
-			whold < 0 || wstrobe < 0 || wsetup < 0) {
-		pr_err("%s: cannot get suitable timings\n", __func__);
-		return -EINVAL;
-	}
-
-	set = TA(ta) | RHOLD(rhold) | RSTROBE(rstrobe) | RSETUP(rsetup) |
-		WHOLD(whold) | WSTROBE(wstrobe) | WSETUP(wsetup);
-
-	val = __raw_readl(base + offset);
-	val &= ~TIMING_MASK;
-	val |= set;
-	__raw_writel(val, base + offset);
-
-	return 0;
-}
-
-/**
- * davinci_aemif_setup - setup AEMIF interface by davinci_nand_pdata
- * @pdev - link to platform device to setup settings for
- *
- * This function does not use any locking while programming the AEMIF
- * because it is expected that there is only one user of a given
- * chip-select.
- *
- * Returns 0 on success, else negative errno.
- */
-int davinci_aemif_setup(struct platform_device *pdev)
-{
-	struct davinci_nand_pdata *pdata = dev_get_platdata(&pdev->dev);
-	uint32_t val;
-	unsigned long clkrate;
-	struct resource	*res;
-	void __iomem *base;
-	struct clk *clk;
-	int ret = 0;
-
-	clk = clk_get(&pdev->dev, "aemif");
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
-		return ret;
-	}
-
-	ret = clk_prepare_enable(clk);
-	if (ret < 0) {
-		dev_dbg(&pdev->dev, "unable to enable AEMIF clock, err %d\n",
-			ret);
-		goto err_put;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!res) {
-		dev_err(&pdev->dev, "cannot get IORESOURCE_MEM\n");
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	base = ioremap(res->start, resource_size(res));
-	if (!base) {
-		dev_err(&pdev->dev, "ioremap failed for resource %pR\n", res);
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	/*
-	 * Setup Async configuration register in case we did not boot
-	 * from NAND and so bootloader did not bother to set it up.
-	 */
-	val = davinci_aemif_readl(base, A1CR_OFFSET + pdata->core_chipsel * 4);
-	/*
-	 * Extended Wait is not valid and Select Strobe mode is not
-	 * used
-	 */
-	val &= ~(ACR_ASIZE_MASK | ACR_EW_MASK | ACR_SS_MASK);
-	if (pdata->options & NAND_BUSWIDTH_16)
-		val |= 0x1;
-
-	davinci_aemif_writel(base, A1CR_OFFSET + pdata->core_chipsel * 4, val);
-
-	clkrate = clk_get_rate(clk);
-
-	if (pdata->timing)
-		ret = davinci_aemif_setup_timing(pdata->timing, base,
-						 pdata->core_chipsel, clkrate);
-
-	if (ret < 0)
-		dev_dbg(&pdev->dev, "NAND timing values setup fail\n");
-
-	iounmap(base);
-err:
-	clk_disable_unprepare(clk);
-err_put:
-	clk_put(clk);
-	return ret;
-}
diff --git a/include/linux/platform_data/mtd-davinci-aemif.h b/include/linux/platform_data/mtd-davinci-aemif.h
index 97948ac2bb9b..a403dd51dacc 100644
--- a/include/linux/platform_data/mtd-davinci-aemif.h
+++ b/include/linux/platform_data/mtd-davinci-aemif.h
@@ -33,5 +33,4 @@ struct davinci_aemif_timing {
 	u8	ta;
 };
 
-int davinci_aemif_setup(struct platform_device *pdev);
 #endif
-- 
cgit v1.2.3


From 25b4e70dcce92168eab4d8113817bb4dd130ebd2 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 9 Jul 2018 09:41:48 -0600
Subject: driver core: allow stopping deferred probe after init

Deferred probe will currently wait forever on dependent devices to probe,
but sometimes a driver will never exist. It's also not always critical for
a driver to exist. Platforms can rely on default configuration from the
bootloader or reset defaults for things such as pinctrl and power domains.
This is often the case with initial platform support until various drivers
get enabled. There's at least 2 scenarios where deferred probe can render
a platform broken. Both involve using a DT which has more devices and
dependencies than the kernel supports. The 1st case is a driver may be
disabled in the kernel config. The 2nd case is the kernel version may
simply not have the dependent driver. This can happen if using a newer DT
(provided by firmware perhaps) with a stable kernel version. Deferred
probe issues can be difficult to debug especially if the console has
dependencies or userspace fails to boot to a shell.

There are also cases like IOMMUs where only built-in drivers are
supported, so deferring probe after initcalls is not needed. The IOMMU
subsystem implemented its own mechanism to handle this using OF_DECLARE
linker sections.

This commit adds makes ending deferred probe conditional on initcalls
being completed or a debug timeout. Subsystems or drivers may opt-in by
calling driver_deferred_probe_check_init_done() instead of
unconditionally returning -EPROBE_DEFER. They may use additional
information from DT or kernel's config to decide whether to continue to
defer probe or not.

The timeout mechanism is intended for debug purposes and WARNs loudly.
The remaining deferred probe pending list will also be dumped after the
timeout. Not that this timeout won't work for the console which needs
to be enabled before userspace starts. However, if the console's
dependencies are resolved, then the kernel log will be printed (as
opposed to no output).

Cc: Alexander Graf <agraf@suse.de>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  9 ++++
 drivers/base/dd.c                               | 59 +++++++++++++++++++++++++
 include/linux/device.h                          |  2 +
 3 files changed, 70 insertions(+)

(limited to 'include')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index efc7aa7a0670..e83ef4648ea4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -804,6 +804,15 @@
 			Defaults to the default architecture's huge page size
 			if not specified.
 
+	deferred_probe_timeout=
+			[KNL] Debugging option to set a timeout in seconds for
+			deferred probe to give up waiting on dependencies to
+			probe. Only specific dependencies (subsystems or
+			drivers) that have opted in will be ignored. A timeout of 0
+			will timeout at the end of initcalls. This option will also
+			dump out devices still on the deferred probe list after
+			retrying.
+
 	dhash_entries=	[KNL]
 			Set number of hash buckets for dentry cache.
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index e85705e84407..fb62f1be40d3 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -55,6 +55,7 @@ static LIST_HEAD(deferred_probe_pending_list);
 static LIST_HEAD(deferred_probe_active_list);
 static atomic_t deferred_trigger_count = ATOMIC_INIT(0);
 static struct dentry *deferred_devices;
+static bool initcalls_done;
 
 /*
  * In some cases, like suspend to RAM or hibernation, It might be reasonable
@@ -219,6 +220,51 @@ static int deferred_devs_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(deferred_devs);
 
+static int deferred_probe_timeout = -1;
+static int __init deferred_probe_timeout_setup(char *str)
+{
+	deferred_probe_timeout = simple_strtol(str, NULL, 10);
+	return 1;
+}
+__setup("deferred_probe_timeout=", deferred_probe_timeout_setup);
+
+/**
+ * driver_deferred_probe_check_state() - Check deferred probe state
+ * @dev: device to check
+ *
+ * Returns -ENODEV if init is done and all built-in drivers have had a chance
+ * to probe (i.e. initcalls are done), -ETIMEDOUT if deferred probe debug
+ * timeout has expired, or -EPROBE_DEFER if none of those conditions are met.
+ *
+ * Drivers or subsystems can opt-in to calling this function instead of directly
+ * returning -EPROBE_DEFER.
+ */
+int driver_deferred_probe_check_state(struct device *dev)
+{
+	if (initcalls_done) {
+		if (!deferred_probe_timeout) {
+			dev_WARN(dev, "deferred probe timeout, ignoring dependency");
+			return -ETIMEDOUT;
+		}
+		dev_warn(dev, "ignoring dependency for device, assuming no driver");
+		return -ENODEV;
+	}
+	return -EPROBE_DEFER;
+}
+
+static void deferred_probe_timeout_work_func(struct work_struct *work)
+{
+	struct device_private *private, *p;
+
+	deferred_probe_timeout = 0;
+	driver_deferred_probe_trigger();
+	flush_work(&deferred_probe_work);
+
+	list_for_each_entry_safe(private, p, &deferred_probe_pending_list, deferred_probe)
+		dev_info(private->device, "deferred probe pending");
+}
+static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func);
+
 /**
  * deferred_probe_initcall() - Enable probing of deferred devices
  *
@@ -235,6 +281,19 @@ static int deferred_probe_initcall(void)
 	driver_deferred_probe_trigger();
 	/* Sort as many dependencies as possible before exiting initcalls */
 	flush_work(&deferred_probe_work);
+	initcalls_done = true;
+
+	/*
+	 * Trigger deferred probe again, this time we won't defer anything
+	 * that is optional
+	 */
+	driver_deferred_probe_trigger();
+	flush_work(&deferred_probe_work);
+
+	if (deferred_probe_timeout > 0) {
+		schedule_delayed_work(&deferred_probe_timeout_work,
+			deferred_probe_timeout * HZ);
+	}
 	return 0;
 }
 late_initcall(deferred_probe_initcall);
diff --git a/include/linux/device.h b/include/linux/device.h
index 575c5a35ece5..d2acc78d279b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -339,6 +339,8 @@ struct device *driver_find_device(struct device_driver *drv,
 				  struct device *start, void *data,
 				  int (*match)(struct device *dev, void *data));
 
+int driver_deferred_probe_check_state(struct device *dev);
+
 /**
  * struct subsys_interface - interfaces to device functions
  * @name:       name of the device function
-- 
cgit v1.2.3


From ac6bbf0cdf4206c517ac9789814c23e372ebce4d Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 9 Jul 2018 09:41:52 -0600
Subject: iommu: Remove IOMMU_OF_DECLARE

Now that we use the driver core to stop deferred probe for missing
drivers, IOMMU_OF_DECLARE can be removed.

This is slightly less optimal than having a list of built-in drivers in
that we'll now defer probe twice before giving up. This shouldn't have a
significant impact on boot times as past discussions about deferred
probe have given no evidence of deferred probe having a substantial
impact.

Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Kukjin Kim <kgene@kernel.org>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Heiko Stuebner <heiko@sntech.de>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: iommu@lists.linux-foundation.org
Cc: linux-samsung-soc@vger.kernel.org
Cc: linux-arm-msm@vger.kernel.org
Cc: linux-rockchip@lists.infradead.org
Cc: devicetree@vger.kernel.org
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/arm-smmu-v3.c       |  2 --
 drivers/iommu/arm-smmu.c          |  7 -------
 drivers/iommu/exynos-iommu.c      |  2 --
 drivers/iommu/ipmmu-vmsa.c        |  3 ---
 drivers/iommu/msm_iommu.c         |  2 --
 drivers/iommu/of_iommu.c          | 19 +------------------
 drivers/iommu/qcom_iommu.c        |  2 --
 drivers/iommu/rockchip-iommu.c    |  2 --
 include/asm-generic/vmlinux.lds.h |  2 --
 include/linux/of_iommu.h          |  4 ----
 10 files changed, 1 insertion(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 1d647104bccc..22bdabd3d8e0 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2915,8 +2915,6 @@ static struct platform_driver arm_smmu_driver = {
 };
 module_platform_driver(arm_smmu_driver);
 
-IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3");
-
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f7a96bcf94a6..c73cfce1ccc0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -2211,13 +2211,6 @@ static struct platform_driver arm_smmu_driver = {
 };
 module_platform_driver(arm_smmu_driver);
 
-IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1");
-IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2");
-IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400");
-IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401");
-IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500");
-IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2");
-
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 85879cfec52f..b128cb4372d3 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1390,5 +1390,3 @@ err_reg_driver:
 	return ret;
 }
 core_initcall(exynos_iommu_init);
-
-IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu");
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 40ae6e87cb88..f026aa16d5f1 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1108,9 +1108,6 @@ static void __exit ipmmu_exit(void)
 subsys_initcall(ipmmu_init);
 module_exit(ipmmu_exit);
 
-IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa");
-IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795");
-
 MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
 MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 0d3350463a3f..27377742600d 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -877,7 +877,5 @@ static void __exit msm_iommu_driver_exit(void)
 subsys_initcall(msm_iommu_driver_init);
 module_exit(msm_iommu_driver_exit);
 
-IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu");
-
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 78ddf47dd67a..f7787e757244 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -27,9 +27,6 @@
 
 #define NO_IOMMU	1
 
-static const struct of_device_id __iommu_of_table_sentinel
-	__used __section(__iommu_of_table_end);
-
 /**
  * of_get_dma_window - Parse *dma-window property and returns 0 if found.
  *
@@ -98,19 +95,6 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
 
-static bool of_iommu_driver_present(struct device_node *np)
-{
-	/*
-	 * If the IOMMU still isn't ready by the time we reach init, assume
-	 * it never will be. We don't want to defer indefinitely, nor attempt
-	 * to dereference __iommu_of_table after it's been freed.
-	 */
-	if (system_state >= SYSTEM_RUNNING)
-		return false;
-
-	return of_match_node(&__iommu_of_table, np);
-}
-
 static int of_iommu_xlate(struct device *dev,
 			  struct of_phandle_args *iommu_spec)
 {
@@ -120,8 +104,7 @@ static int of_iommu_xlate(struct device *dev,
 
 	ops = iommu_ops_from_fwnode(fwnode);
 	if ((ops && !ops->of_xlate) ||
-	    !of_device_is_available(iommu_spec->np) ||
-	    (!ops && !of_iommu_driver_present(iommu_spec->np)))
+	    !of_device_is_available(iommu_spec->np))
 		return NO_IOMMU;
 
 	err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops);
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index fe88a4880d3a..b48aee82d14b 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -945,7 +945,5 @@ static void __exit qcom_iommu_exit(void)
 module_init(qcom_iommu_init);
 module_exit(qcom_iommu_exit);
 
-IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1");
-
 MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 054cd2c8e9c8..de8d3bf91b23 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1284,8 +1284,6 @@ static int __init rk_iommu_init(void)
 }
 subsys_initcall(rk_iommu_init);
 
-IOMMU_OF_DECLARE(rk_iommu_of, "rockchip,iommu");
-
 MODULE_DESCRIPTION("IOMMU API for Rockchip");
 MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
 MODULE_ALIAS("platform:rockchip-iommu");
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e373e2e10f6a..f173b5f30dbe 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -218,7 +218,6 @@
 #define TIMER_OF_TABLES()	OF_TABLE(CONFIG_TIMER_OF, timer)
 #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
-#define IOMMU_OF_TABLES()	OF_TABLE(CONFIG_OF_IOMMU, iommu)
 #define RESERVEDMEM_OF_TABLES()	OF_TABLE(CONFIG_OF_RESERVED_MEM, reservedmem)
 #define CPU_METHOD_OF_TABLES()	OF_TABLE(CONFIG_SMP, cpu_method)
 #define CPUIDLE_METHOD_OF_TABLES() OF_TABLE(CONFIG_CPU_IDLE, cpuidle_method)
@@ -601,7 +600,6 @@
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
 	TIMER_OF_TABLES()						\
-	IOMMU_OF_TABLES()						\
 	CPU_METHOD_OF_TABLES()						\
 	CPUIDLE_METHOD_OF_TABLES()					\
 	KERNEL_DTB()							\
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 4fa654e4b5a9..f3d40dd7bb66 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -32,8 +32,4 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 #endif	/* CONFIG_OF_IOMMU */
 
-extern struct of_device_id __iommu_of_table;
-
-#define IOMMU_OF_DECLARE(name, compat)	OF_DECLARE_1(iommu, name, compat, NULL)
-
 #endif /* __OF_IOMMU_H */
-- 
cgit v1.2.3


From aef92a8bed25d03b8f03ce499a56e8e8e5e2c05e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 10 Jul 2018 13:42:10 +0200
Subject: watchdog/softlockup: Fix the SOFTLOCKUP_DETECTOR=n build

I got confused by all the various CONFIG options here about and
conflated CONFIG_LOCKUP_DETECTOR and CONFIG_SOFTLOCKUP_DETECTOR.

This results in a build failure for:

   CONFIG_LOCKUP_DETECTOR=y && CONFIG_SOFTLOCKUP_DETECTOR=n

As reported by Abdul.

Reported-and-tested-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-next <linux-next@vger.kernel.org>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: mpe <mpe@ellerman.id.au>
Cc: sachinp <sachinp@linux.vnet.ibm.com>
Cc: stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 9cf57731b63e ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work")
Link: http://lkml.kernel.org/r/20180710114210.GI2476@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 80664bbeca43..08f9247e9827 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -33,15 +33,10 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 #define sysctl_hardlockup_all_cpu_backtrace 0
 #endif /* !CONFIG_SMP */
 
-extern int lockup_detector_online_cpu(unsigned int cpu);
-extern int lockup_detector_offline_cpu(unsigned int cpu);
-
 #else /* CONFIG_LOCKUP_DETECTOR */
 static inline void lockup_detector_init(void) { }
 static inline void lockup_detector_soft_poweroff(void) { }
 static inline void lockup_detector_cleanup(void) { }
-#define lockup_detector_online_cpu	NULL
-#define lockup_detector_offline_cpu	NULL
 #endif /* !CONFIG_LOCKUP_DETECTOR */
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
@@ -50,12 +45,18 @@ extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern unsigned int  softlockup_panic;
-#else
+
+extern int lockup_detector_online_cpu(unsigned int cpu);
+extern int lockup_detector_offline_cpu(unsigned int cpu);
+#else /* CONFIG_SOFTLOCKUP_DETECTOR */
 static inline void touch_softlockup_watchdog_sched(void) { }
 static inline void touch_softlockup_watchdog(void) { }
 static inline void touch_softlockup_watchdog_sync(void) { }
 static inline void touch_all_softlockup_watchdogs(void) { }
-#endif
+
+#define lockup_detector_online_cpu	NULL
+#define lockup_detector_offline_cpu	NULL
+#endif /* CONFIG_SOFTLOCKUP_DETECTOR */
 
 #ifdef CONFIG_DETECT_HUNG_TASK
 void reset_hung_task_detector(void);
-- 
cgit v1.2.3


From 2f944c0fbf58b1f390e5e61affd98ba0061214c6 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 4 Jul 2018 15:57:48 +0300
Subject: RDMA: Fix storage of PortInfo CapabilityMask in the kernel

The internal flag IP_BASED_GIDS was added to a field that was being used
to hold the port Info CapabilityMask without considering the effects this
will have. Since most drivers just use the value from the HW MAD it means
IP_BASED_GIDS will also become set on any HW that sets the IBA flag
IsOtherLocalChangesNoticeSupported - which is not intended.

Fix this by keeping port_cap_flags only for the IBA CapabilityMask value
and store unrelated flags externally. Move the bit definitions for this to
ib_mad.h to make it clear what is happening.

To keep the uAPI unchanged define a new set of flags in the uapi header
that are only used by ib_uverbs_query_port_resp.port_cap_flags which match
the current flags supported in rdma-core, and the values exposed by the
current kernel.

Fixes: b4a26a27287a ("IB: Report using RoCE IP based gids in port caps")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c            | 23 ++++++++++++++-
 drivers/infiniband/hw/bnxt_re/ib_verbs.c        |  4 +--
 drivers/infiniband/hw/mlx4/main.c               |  3 +-
 drivers/infiniband/hw/mlx5/main.c               |  4 +--
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c     |  9 +++---
 drivers/infiniband/hw/qedr/verbs.c              |  2 +-
 drivers/infiniband/hw/qib/qib_verbs.h           |  3 --
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c |  3 +-
 include/rdma/ib_mad.h                           | 33 +++++++++++++++++++++
 include/rdma/ib_verbs.h                         | 31 ++------------------
 include/uapi/rdma/ib_user_ioctl_verbs.h         | 38 +++++++++++++++++++++++++
 include/uapi/rdma/ib_user_verbs.h               |  2 +-
 12 files changed, 110 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index d048cabc4246..1bc9ceb16b70 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -241,6 +241,27 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 	return in_len;
 }
 
+/*
+ * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
+ * PortInfo CapabilityMask, but was extended with unique bits.
+ */
+static u32 make_port_cap_flags(const struct ib_port_attr *attr)
+{
+	u32 res;
+
+	/* All IBA CapabilityMask bits are passed through here, except bit 26,
+	 * which is overridden with IP_BASED_GIDS. This is due to a historical
+	 * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
+	 * bits match the IBA definition across all kernel versions.
+	 */
+	res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
+
+	if (attr->ip_gids)
+		res |= IB_UVERBS_PCF_IP_BASED_GIDS;
+
+	return res;
+}
+
 ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 			     struct ib_device *ib_dev,
 			     const char __user *buf,
@@ -267,7 +288,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 	resp.max_mtu 	     = attr.max_mtu;
 	resp.active_mtu      = attr.active_mtu;
 	resp.gid_tbl_len     = attr.gid_tbl_len;
-	resp.port_cap_flags  = attr.port_cap_flags;
+	resp.port_cap_flags  = make_port_cap_flags(&attr);
 	resp.max_msg_sz      = attr.max_msg_sz;
 	resp.bad_pkey_cntr   = attr.bad_pkey_cntr;
 	resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 6c0c6d3426e0..492c750f7ed6 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -244,8 +244,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
 	port_attr->gid_tbl_len = dev_attr->max_sgid;
 	port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
 				    IB_PORT_DEVICE_MGMT_SUP |
-				    IB_PORT_VENDOR_CLASS_SUP |
-				    IB_PORT_IP_BASED_GIDS;
+				    IB_PORT_VENDOR_CLASS_SUP;
+	port_attr->ip_gids = true;
 
 	port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW;
 	port_attr->bad_pkey_cntr = 0;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 5bc522ca9431..ca0f1ee26091 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -762,7 +762,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
 					   IB_WIDTH_4X : IB_WIDTH_1X;
 	props->active_speed	=  (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
 					   IB_SPEED_FDR : IB_SPEED_QDR;
-	props->port_cap_flags	= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
+	props->port_cap_flags	= IB_PORT_CM_SUP;
+	props->ip_gids = true;
 	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
 	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
 	props->pkey_tbl_len	= 1;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 0f482d2a760f..d1f1beefe599 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 	translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
 				 &props->active_width);
 
-	props->port_cap_flags  |= IB_PORT_CM_SUP;
-	props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
+	props->port_cap_flags |= IB_PORT_CM_SUP;
+	props->ip_gids = true;
 
 	props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
 						roce_address_table_size);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 1f057fdb3a8c..9d0431e01dce 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -197,11 +197,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
 	props->sm_lid = 0;
 	props->sm_sl = 0;
 	props->state = port_state;
-	props->port_cap_flags =
-	    IB_PORT_CM_SUP |
-	    IB_PORT_REINIT_SUP |
-	    IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP |
-	    IB_PORT_IP_BASED_GIDS;
+	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+				IB_PORT_DEVICE_MGMT_SUP |
+				IB_PORT_VENDOR_CLASS_SUP;
+	props->ip_gids = true;
 	props->gid_tbl_len = OCRDMA_MAX_SGID;
 	props->pkey_tbl_len = 1;
 	props->bad_pkey_cntr = 0;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 0c41d54f586b..b82c5d5fb0e3 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -225,7 +225,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
 	attr->lmc = 0;
 	attr->sm_lid = 0;
 	attr->sm_sl = 0;
-	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
+	attr->ip_gids = true;
 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
 		attr->gid_tbl_len = 1;
 		attr->pkey_tbl_len = 1;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index f9a46768a19a..e72562a8959a 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -78,9 +78,6 @@ struct qib_verbs_txreq;
 
 #define QIB_VENDOR_IPG		cpu_to_be16(0xFFA0)
 
-/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
-#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
-
 #define IB_DEFAULT_GID_PREFIX	cpu_to_be64(0xfe80000000000000ULL)
 
 /* Values for set/get portinfo VLCap OperationalVLs */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index 816cc285daf6..b65d10b0a875 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -155,7 +155,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
 	props->gid_tbl_len = resp->attrs.gid_tbl_len;
 	props->port_cap_flags =
 		pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
-	props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
+	props->port_cap_flags |= IB_PORT_CM_SUP;
+	props->ip_gids = true;
 	props->max_msg_sz = resp->attrs.max_msg_sz;
 	props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
 	props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 2f4f1768ded4..f6ba366051c7 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -262,6 +262,39 @@ struct ib_class_port_info {
 	__be32			trap_qkey;
 };
 
+/* PortInfo CapabilityMask */
+enum ib_port_capability_mask_bits {
+	IB_PORT_SM = 1 << 1,
+	IB_PORT_NOTICE_SUP = 1 << 2,
+	IB_PORT_TRAP_SUP = 1 << 3,
+	IB_PORT_OPT_IPD_SUP = 1 << 4,
+	IB_PORT_AUTO_MIGR_SUP = 1 << 5,
+	IB_PORT_SL_MAP_SUP = 1 << 6,
+	IB_PORT_MKEY_NVRAM = 1 << 7,
+	IB_PORT_PKEY_NVRAM = 1 << 8,
+	IB_PORT_LED_INFO_SUP = 1 << 9,
+	IB_PORT_SM_DISABLED = 1 << 10,
+	IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
+	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+	IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+	IB_PORT_CM_SUP = 1 << 16,
+	IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
+	IB_PORT_REINIT_SUP = 1 << 18,
+	IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
+	IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
+	IB_PORT_DR_NOTICE_SUP = 1 << 21,
+	IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
+	IB_PORT_BOOT_MGMT_SUP = 1 << 23,
+	IB_PORT_LINK_LATENCY_SUP = 1 << 24,
+	IB_PORT_CLIENT_REG_SUP = 1 << 25,
+	IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26,
+	IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
+	IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
+	IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
+	IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30,
+	IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
+};
+
 #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
 
 struct opa_class_port_info {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 031d121190fd..98e025759791 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -432,33 +432,6 @@ enum ib_port_state {
 	IB_PORT_ACTIVE_DEFER	= 5
 };
 
-enum ib_port_cap_flags {
-	IB_PORT_SM				= 1 <<  1,
-	IB_PORT_NOTICE_SUP			= 1 <<  2,
-	IB_PORT_TRAP_SUP			= 1 <<  3,
-	IB_PORT_OPT_IPD_SUP                     = 1 <<  4,
-	IB_PORT_AUTO_MIGR_SUP			= 1 <<  5,
-	IB_PORT_SL_MAP_SUP			= 1 <<  6,
-	IB_PORT_MKEY_NVRAM			= 1 <<  7,
-	IB_PORT_PKEY_NVRAM			= 1 <<  8,
-	IB_PORT_LED_INFO_SUP			= 1 <<  9,
-	IB_PORT_SM_DISABLED			= 1 << 10,
-	IB_PORT_SYS_IMAGE_GUID_SUP		= 1 << 11,
-	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP	= 1 << 12,
-	IB_PORT_EXTENDED_SPEEDS_SUP             = 1 << 14,
-	IB_PORT_CM_SUP				= 1 << 16,
-	IB_PORT_SNMP_TUNNEL_SUP			= 1 << 17,
-	IB_PORT_REINIT_SUP			= 1 << 18,
-	IB_PORT_DEVICE_MGMT_SUP			= 1 << 19,
-	IB_PORT_VENDOR_CLASS_SUP		= 1 << 20,
-	IB_PORT_DR_NOTICE_SUP			= 1 << 21,
-	IB_PORT_CAP_MASK_NOTICE_SUP		= 1 << 22,
-	IB_PORT_BOOT_MGMT_SUP			= 1 << 23,
-	IB_PORT_LINK_LATENCY_SUP		= 1 << 24,
-	IB_PORT_CLIENT_REG_SUP			= 1 << 25,
-	IB_PORT_IP_BASED_GIDS			= 1 << 26,
-};
-
 enum ib_port_width {
 	IB_WIDTH_1X	= 1,
 	IB_WIDTH_4X	= 2,
@@ -597,6 +570,9 @@ struct ib_port_attr {
 	enum ib_mtu		max_mtu;
 	enum ib_mtu		active_mtu;
 	int			gid_tbl_len;
+	unsigned int		grh_required:1;
+	unsigned int		ip_gids:1;
+	/* This is the value from PortInfo CapabilityMask, defined by IBA */
 	u32			port_cap_flags;
 	u32			max_msg_sz;
 	u32			bad_pkey_cntr;
@@ -612,7 +588,6 @@ struct ib_port_attr {
 	u8			active_width;
 	u8			active_speed;
 	u8                      phys_state;
-	bool			grh_required;
 };
 
 enum ib_device_modify_flags {
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 625545d862d7..1220f1df3ded 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -40,6 +40,44 @@
 #define RDMA_UAPI_PTR(_type, _name)	__aligned_u64 _name
 #endif
 
+enum ib_uverbs_query_port_cap_flags {
+	IB_UVERBS_PCF_SM = 1 << 1,
+	IB_UVERBS_PCF_NOTICE_SUP = 1 << 2,
+	IB_UVERBS_PCF_TRAP_SUP = 1 << 3,
+	IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4,
+	IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5,
+	IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6,
+	IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7,
+	IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8,
+	IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9,
+	IB_UVERBS_PCF_SM_DISABLED = 1 << 10,
+	IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11,
+	IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+	IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14,
+	IB_UVERBS_PCF_CM_SUP = 1 << 16,
+	IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17,
+	IB_UVERBS_PCF_REINIT_SUP = 1 << 18,
+	IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19,
+	IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20,
+	IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21,
+	IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22,
+	IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23,
+	IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24,
+	IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25,
+	/*
+	 * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and
+	 * is inaccessible
+	 */
+	IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
+	IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
+	IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
+	IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30,
+	IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31,
+
+	/* NOTE this is an internal flag, not an IBA flag */
+	IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26,
+};
+
 enum ib_uverbs_flow_action_esp_keymat {
 	IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM,
 };
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 4f9991de8e3a..0a9070abb3f8 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -279,7 +279,7 @@ struct ib_uverbs_query_port {
 };
 
 struct ib_uverbs_query_port_resp {
-	__u32 port_cap_flags;
+	__u32 port_cap_flags;		/* see ib_uverbs_query_port_cap_flags */
 	__u32 max_msg_sz;
 	__u32 bad_pkey_cntr;
 	__u32 qkey_viol_cntr;
-- 
cgit v1.2.3


From b02289b3d60f79ba0831051a7743d8fdb4110355 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Wed, 4 Jul 2018 15:57:50 +0300
Subject: RDMA: Validate grh_required when handling AVs

Extend the existing grh_required flag to check when AV's are handled that
a GRH is present.

Since we don't want to do query_port during the AV checks for performance
reasons move the flag into the immutable_data.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/sa_query.c |  8 ++++++--
 drivers/infiniband/core/verbs.c    |  3 ++-
 drivers/infiniband/hw/mlx5/main.c  | 23 ++++++++++++++++-------
 include/rdma/ib_verbs.h            | 13 ++++++++++++-
 4 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index fdfdbb2bea7a..7b794a14d6e8 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -2276,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work)
 	struct ib_sa_sm_ah *new_ah;
 	struct ib_port_attr port_attr;
 	struct rdma_ah_attr   ah_attr;
+	bool grh_required;
 
 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
 		pr_warn("Couldn't query port\n");
@@ -2301,6 +2302,9 @@ static void update_sm_ah(struct work_struct *work)
 	rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
 	rdma_ah_set_port_num(&ah_attr, port->port_num);
 
+	grh_required = rdma_is_grh_required(port->agent->device,
+					    port->port_num);
+
 	/*
 	 * The OPA sm_lid of 0xFFFF needs special handling so that it can be
 	 * differentiated from a permissive LID of 0xFFFF.  We set the
@@ -2308,11 +2312,11 @@ static void update_sm_ah(struct work_struct *work)
 	 * address handle appropriately
 	 */
 	if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
-	    (port_attr.grh_required ||
+	    (grh_required ||
 	     port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
 		rdma_ah_set_make_grd(&ah_attr, true);
 
-	if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && port_attr.grh_required) {
+	if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
 		rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
 		rdma_ah_set_subnet_prefix(&ah_attr,
 					  cpu_to_be64(port_attr.subnet_prefix));
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 1bb6b6ff3341..b6ceb6fd6a67 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -390,7 +390,8 @@ static int rdma_check_ah_attr(struct ib_device *device,
 	if (!rdma_is_port_valid(device, ah_attr->port_num))
 		return -EINVAL;
 
-	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+	if ((rdma_is_grh_required(device, ah_attr->port_num) ||
+	     ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
 	    !(ah_attr->ah_flags & IB_AH_GRH))
 		return -EINVAL;
 
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d1f1beefe599..b7f94bc3811a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1220,7 +1220,6 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 	props->qkey_viol_cntr	= rep->qkey_violation_counter;
 	props->subnet_timeout	= rep->subnet_timeout;
 	props->init_type_reply	= rep->init_type_reply;
-	props->grh_required	= rep->grh_required;
 
 	err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
 	if (err)
@@ -4462,7 +4461,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
 		cancel_work_sync(&devr->ports[port].pkey_change_work);
 }
 
-static u32 get_core_cap_flags(struct ib_device *ibdev)
+static u32 get_core_cap_flags(struct ib_device *ibdev,
+			      struct mlx5_hca_vport_context *rep)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
@@ -4471,11 +4471,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
 	bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
 	u32 ret = 0;
 
+	if (rep->grh_required)
+		ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
+
 	if (ll == IB_LINK_LAYER_INFINIBAND)
-		return RDMA_CORE_PORT_IBA_IB;
+		return ret | RDMA_CORE_PORT_IBA_IB;
 
 	if (raw_support)
-		ret = RDMA_CORE_PORT_RAW_PACKET;
+		ret |= RDMA_CORE_PORT_RAW_PACKET;
 
 	if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
 		return ret;
@@ -4498,17 +4501,23 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
+	struct mlx5_hca_vport_context rep = {0};
 	int err;
 
-	immutable->core_cap_flags = get_core_cap_flags(ibdev);
-
 	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
+	if (ll == IB_LINK_LAYER_INFINIBAND) {
+		err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
+						   &rep);
+		if (err)
+			return err;
+	}
+
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = get_core_cap_flags(ibdev);
+	immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
 	if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
 		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 98e025759791..b523298d486b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -529,6 +529,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 #define RDMA_CORE_CAP_AF_IB             0x00001000
 #define RDMA_CORE_CAP_ETH_AH            0x00002000
 #define RDMA_CORE_CAP_OPA_AH            0x00004000
+#define RDMA_CORE_CAP_IB_GRH_REQUIRED   0x00008000
 
 /* Protocol                             0xFFF00000 */
 #define RDMA_CORE_CAP_PROT_IB           0x00100000
@@ -538,6 +539,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 #define RDMA_CORE_CAP_PROT_RAW_PACKET   0x01000000
 #define RDMA_CORE_CAP_PROT_USNIC        0x02000000
 
+#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
+					| RDMA_CORE_CAP_PROT_ROCE     \
+					| RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
+
 #define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
 					| RDMA_CORE_CAP_IB_MAD \
 					| RDMA_CORE_CAP_IB_SMI \
@@ -570,7 +575,6 @@ struct ib_port_attr {
 	enum ib_mtu		max_mtu;
 	enum ib_mtu		active_mtu;
 	int			gid_tbl_len;
-	unsigned int		grh_required:1;
 	unsigned int		ip_gids:1;
 	/* This is the value from PortInfo CapabilityMask, defined by IBA */
 	u32			port_cap_flags;
@@ -2771,6 +2775,13 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
 		port <= rdma_end_port(device));
 }
 
+static inline bool rdma_is_grh_required(const struct ib_device *device,
+					u8 port_num)
+{
+	return device->port_immutable[port_num].core_cap_flags &
+		RDMA_CORE_PORT_IB_GRH_REQUIRED;
+}
+
 static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
 {
 	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
-- 
cgit v1.2.3


From 8942acea3723ff9424dc89350d2ab6e969fdd093 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Wed, 4 Jul 2018 15:57:51 +0300
Subject: IB/uverbs: Pass IB_UVERBS_QPF_GRH_REQUIRED to user space

Userspace also needs to know if the port requires GRHs to properly form
the AVs it creates.

Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c    | 3 +++
 include/uapi/rdma/ib_user_ioctl_verbs.h | 4 ++++
 include/uapi/rdma/ib_user_verbs.h       | 3 ++-
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1bc9ceb16b70..bd6eefaecbd6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -294,6 +294,9 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 	resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
 	resp.pkey_tbl_len    = attr.pkey_tbl_len;
 
+	if (rdma_is_grh_required(ib_dev, cmd.port_num))
+		resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED;
+
 	if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
 		resp.lid     = OPA_TO_IB_UCAST_LID(attr.lid);
 		resp.sm_lid  = OPA_TO_IB_UCAST_LID(attr.sm_lid);
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 1220f1df3ded..a81d853bf25d 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -78,6 +78,10 @@ enum ib_uverbs_query_port_cap_flags {
 	IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26,
 };
 
+enum ib_uverbs_query_port_flags {
+	IB_UVERBS_QPF_GRH_REQUIRED = 1 << 0,
+};
+
 enum ib_uverbs_flow_action_esp_keymat {
 	IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM,
 };
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 0a9070abb3f8..25a16760de2a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -299,7 +299,8 @@ struct ib_uverbs_query_port_resp {
 	__u8  active_speed;
 	__u8  phys_state;
 	__u8  link_layer;
-	__u8  reserved[2];
+	__u8  flags;			/* see ib_uverbs_query_port_flags */
+	__u8  reserved;
 };
 
 struct ib_uverbs_alloc_pd {
-- 
cgit v1.2.3


From 8d3e994241e6bcc7ead2b918c4f15b7683afa90a Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 10 Jul 2018 09:57:58 +0100
Subject: arm_pmu: Clean up maximum period handling

Each PMU defines their max_period of the counter as the maximum
value that can be counted. Since all the PMU backends support
32bit counters by default, let us remove the redundant field.

No functional changes.

Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_v6.c     |  2 --
 arch/arm/kernel/perf_event_v7.c     |  1 -
 arch/arm/kernel/perf_event_xscale.c |  2 --
 arch/arm64/kernel/perf_event.c      |  1 -
 drivers/perf/arm_pmu.c              | 16 ++++++++++++----
 include/linux/perf/arm_pmu.h        |  1 -
 6 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index be42c4f66a40..f64a6bfebcec 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -495,7 +495,6 @@ static void armv6pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop		= armv6pmu_stop;
 	cpu_pmu->map_event	= armv6_map_event;
 	cpu_pmu->num_events	= 3;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 }
 
 static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
@@ -546,7 +545,6 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop		= armv6pmu_stop;
 	cpu_pmu->map_event	= armv6mpcore_map_event;
 	cpu_pmu->num_events	= 3;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 
 	return 0;
 }
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 5a5116794440..2cf1ca2925c8 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1170,7 +1170,6 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->start		= armv7pmu_start;
 	cpu_pmu->stop		= armv7pmu_stop;
 	cpu_pmu->reset		= armv7pmu_reset;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 };
 
 static void armv7_read_num_pmnc_events(void *info)
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 88d1a76f5367..c4f029458b52 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -374,7 +374,6 @@ static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop		= xscale1pmu_stop;
 	cpu_pmu->map_event	= xscale_map_event;
 	cpu_pmu->num_events	= 3;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 
 	return 0;
 }
@@ -743,7 +742,6 @@ static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop		= xscale2pmu_stop;
 	cpu_pmu->map_event	= xscale_map_event;
 	cpu_pmu->num_events	= 5;
-	cpu_pmu->max_period	= (1LLU << 32) - 1;
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 33147aacdafd..678ecffd3724 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -960,7 +960,6 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->start			= armv8pmu_start,
 	cpu_pmu->stop			= armv8pmu_stop,
 	cpu_pmu->reset			= armv8pmu_reset,
-	cpu_pmu->max_period		= (1LLU << 32) - 1,
 	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
 
 	return 0;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index a6347d487635..6ddc00da5373 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -28,6 +28,11 @@
 static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
 static DEFINE_PER_CPU(int, cpu_irq);
 
+static inline u64 arm_pmu_max_period(void)
+{
+	return (1ULL << 32) - 1;
+}
+
 static int
 armpmu_map_cache_event(const unsigned (*cache_map)
 				      [PERF_COUNT_HW_CACHE_MAX]
@@ -114,8 +119,10 @@ int armpmu_event_set_period(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	s64 left = local64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
+	u64 max_period;
 	int ret = 0;
 
+	max_period = arm_pmu_max_period();
 	if (unlikely(left <= -period)) {
 		left = period;
 		local64_set(&hwc->period_left, left);
@@ -136,8 +143,8 @@ int armpmu_event_set_period(struct perf_event *event)
 	 * effect we are reducing max_period to account for
 	 * interrupt latency (and we are being very conservative).
 	 */
-	if (left > (armpmu->max_period >> 1))
-		left = armpmu->max_period >> 1;
+	if (left > (max_period >> 1))
+		left = (max_period >> 1);
 
 	local64_set(&hwc->prev_count, (u64)-left);
 
@@ -153,6 +160,7 @@ u64 armpmu_event_update(struct perf_event *event)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	u64 delta, prev_raw_count, new_raw_count;
+	u64 max_period = arm_pmu_max_period();
 
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
@@ -162,7 +170,7 @@ again:
 			     new_raw_count) != prev_raw_count)
 		goto again;
 
-	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
+	delta = (new_raw_count - prev_raw_count) & max_period;
 
 	local64_add(delta, &event->count);
 	local64_sub(delta, &hwc->period_left);
@@ -402,7 +410,7 @@ __hw_perf_event_init(struct perf_event *event)
 		 * is far less likely to overtake the previous one unless
 		 * you have some serious IRQ latency issues.
 		 */
-		hwc->sample_period  = armpmu->max_period >> 1;
+		hwc->sample_period  = arm_pmu_max_period() >> 1;
 		hwc->last_period    = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
 	}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index ad5444491975..12c30a22fc8d 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -94,7 +94,6 @@ struct arm_pmu {
 	void		(*reset)(void *);
 	int		(*map_event)(struct perf_event *event);
 	int		num_events;
-	u64		max_period;
 	bool		secure_access; /* 32-bit ARM only */
 #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
 	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
-- 
cgit v1.2.3


From 3a95200d3f89afd8b67f39d88d36cc7ec96ce385 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 10 Jul 2018 09:57:59 +0100
Subject: arm_pmu: Change API to support 64bit counter values

Convert the {read/write}_counter APIs to handle 64bit values
to enable supporting chained event counters. The backends still
use 32bit values and we pass them 32bit values only. So in effect
there are no functional changes.

Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/kernel/perf_event_v6.c     | 4 ++--
 arch/arm/kernel/perf_event_v7.c     | 4 ++--
 arch/arm/kernel/perf_event_xscale.c | 8 ++++----
 arch/arm64/kernel/perf_event.c      | 9 ++++-----
 include/linux/perf/arm_pmu.h        | 4 ++--
 5 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index f64a6bfebcec..0729f9841ef4 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -233,7 +233,7 @@ armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
 	return ret;
 }
 
-static inline u32 armv6pmu_read_counter(struct perf_event *event)
+static inline u64 armv6pmu_read_counter(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
@@ -251,7 +251,7 @@ static inline u32 armv6pmu_read_counter(struct perf_event *event)
 	return value;
 }
 
-static inline void armv6pmu_write_counter(struct perf_event *event, u32 value)
+static inline void armv6pmu_write_counter(struct perf_event *event, u64 value)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 2cf1ca2925c8..973043dd6187 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -743,7 +743,7 @@ static inline void armv7_pmnc_select_counter(int idx)
 	isb();
 }
 
-static inline u32 armv7pmu_read_counter(struct perf_event *event)
+static inline u64 armv7pmu_read_counter(struct perf_event *event)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -763,7 +763,7 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event)
 	return value;
 }
 
-static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
+static inline void armv7pmu_write_counter(struct perf_event *event, u64 value)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index c4f029458b52..942230fe0426 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -316,7 +316,7 @@ static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline u32 xscale1pmu_read_counter(struct perf_event *event)
+static inline u64 xscale1pmu_read_counter(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
@@ -337,7 +337,7 @@ static inline u32 xscale1pmu_read_counter(struct perf_event *event)
 	return val;
 }
 
-static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val)
+static inline void xscale1pmu_write_counter(struct perf_event *event, u64 val)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
@@ -678,7 +678,7 @@ static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static inline u32 xscale2pmu_read_counter(struct perf_event *event)
+static inline u64 xscale2pmu_read_counter(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
@@ -705,7 +705,7 @@ static inline u32 xscale2pmu_read_counter(struct perf_event *event)
 	return val;
 }
 
-static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val)
+static inline void xscale2pmu_write_counter(struct perf_event *event, u64 val)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int counter = hwc->idx;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 678ecffd3724..66a2ffdca6dd 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -512,7 +512,7 @@ static inline int armv8pmu_select_counter(int idx)
 	return idx;
 }
 
-static inline u32 armv8pmu_read_counter(struct perf_event *event)
+static inline u64 armv8pmu_read_counter(struct perf_event *event)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -530,7 +530,7 @@ static inline u32 armv8pmu_read_counter(struct perf_event *event)
 	return value;
 }
 
-static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
+static inline void armv8pmu_write_counter(struct perf_event *event, u64 value)
 {
 	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
@@ -545,9 +545,8 @@ static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
 		 * count using the lower 32bits and we want an interrupt when
 		 * it overflows.
 		 */
-		u64 value64 = 0xffffffff00000000ULL | value;
-
-		write_sysreg(value64, pmccntr_el0);
+		value |= 0xffffffff00000000ULL;
+		write_sysreg(value, pmccntr_el0);
 	} else if (armv8pmu_select_counter(idx) == idx)
 		write_sysreg(value, pmxevcntr_el0);
 }
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 12c30a22fc8d..f7126a21df30 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -87,8 +87,8 @@ struct arm_pmu {
 					 struct perf_event *event);
 	int		(*set_event_filter)(struct hw_perf_event *evt,
 					    struct perf_event_attr *attr);
-	u32		(*read_counter)(struct perf_event *event);
-	void		(*write_counter)(struct perf_event *event, u32 val);
+	u64		(*read_counter)(struct perf_event *event);
+	void		(*write_counter)(struct perf_event *event, u64 val);
 	void		(*start)(struct arm_pmu *);
 	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
-- 
cgit v1.2.3


From e2da97d328d4951d25f6634eda7213f7257417b6 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 10 Jul 2018 09:58:00 +0100
Subject: arm_pmu: Add support for 64bit event counters

Each PMU has a set of 32bit event counters. But in some
special cases, the events could be counted using counters
which are effectively 64bit wide.

e.g, Arm V8 PMUv3 has a 64 bit cycle counter which can count
only the CPU cycles. Also, the PMU can chain the event counters
to effectively count as a 64bit counter.

Add support for tracking the events that uses 64bit counters.
This only affects the periods set for each counter in the core
driver.

Cc: Will Deacon <will.deacon@arm.com>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/perf/arm_pmu.c       | 16 ++++++++++------
 include/linux/perf/arm_pmu.h |  6 ++++++
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 6ddc00da5373..8cad6b535a2c 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -28,9 +28,12 @@
 static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
 static DEFINE_PER_CPU(int, cpu_irq);
 
-static inline u64 arm_pmu_max_period(void)
+static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 {
-	return (1ULL << 32) - 1;
+	if (event->hw.flags & ARMPMU_EVT_64BIT)
+		return GENMASK_ULL(63, 0);
+	else
+		return GENMASK_ULL(31, 0);
 }
 
 static int
@@ -122,7 +125,7 @@ int armpmu_event_set_period(struct perf_event *event)
 	u64 max_period;
 	int ret = 0;
 
-	max_period = arm_pmu_max_period();
+	max_period = arm_pmu_event_max_period(event);
 	if (unlikely(left <= -period)) {
 		left = period;
 		local64_set(&hwc->period_left, left);
@@ -148,7 +151,7 @@ int armpmu_event_set_period(struct perf_event *event)
 
 	local64_set(&hwc->prev_count, (u64)-left);
 
-	armpmu->write_counter(event, (u64)(-left) & 0xffffffff);
+	armpmu->write_counter(event, (u64)(-left) & max_period);
 
 	perf_event_update_userpage(event);
 
@@ -160,7 +163,7 @@ u64 armpmu_event_update(struct perf_event *event)
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	u64 delta, prev_raw_count, new_raw_count;
-	u64 max_period = arm_pmu_max_period();
+	u64 max_period = arm_pmu_event_max_period(event);
 
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
@@ -368,6 +371,7 @@ __hw_perf_event_init(struct perf_event *event)
 	struct hw_perf_event *hwc = &event->hw;
 	int mapping;
 
+	hwc->flags = 0;
 	mapping = armpmu->map_event(event);
 
 	if (mapping < 0) {
@@ -410,7 +414,7 @@ __hw_perf_event_init(struct perf_event *event)
 		 * is far less likely to overtake the previous one unless
 		 * you have some serious IRQ latency issues.
 		 */
-		hwc->sample_period  = arm_pmu_max_period() >> 1;
+		hwc->sample_period  = arm_pmu_event_max_period(event) >> 1;
 		hwc->last_period    = hwc->sample_period;
 		local64_set(&hwc->period_left, hwc->sample_period);
 	}
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index f7126a21df30..10f92e1d8e7b 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -25,6 +25,12 @@
  */
 #define ARMPMU_MAX_HWEVENTS		32
 
+/*
+ * ARM PMU hw_event flags
+ */
+/* Event uses a 64bit counter */
+#define ARMPMU_EVT_64BIT		1
+
 #define HW_OP_UNSUPPORTED		0xFFFF
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
 #define CACHE_OP_UNSUPPORTED		0xFFFF
-- 
cgit v1.2.3


From 528922afd41cdd1da6a4b33e2c82e38c1746561c Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 8 Jul 2018 13:24:39 +0300
Subject: IB: Enable uverbs_destroy_def_handler to be used by drivers

Enable uverbs_destroy_def_handler to be used by drivers and replace
current code to use it.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs.h           |  3 ---
 drivers/infiniband/core/uverbs_std_types.c |  1 +
 drivers/infiniband/hw/mlx5/devx.c          | 18 ++----------------
 include/rdma/ib_verbs.h                    |  3 +++
 4 files changed, 6 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 3ddd39e435e1..d0a1a54275e5 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -246,9 +246,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
 
 void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
 long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int uverbs_destroy_def_handler(struct ib_device *ib_dev,
-			       struct ib_uverbs_file *file,
-			       struct uverbs_attr_bundle *attrs);
 
 struct ib_uverbs_flow_spec {
 	union {
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 912519fda3ba..718c8430d364 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -215,6 +215,7 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev,
 {
 	return 0;
 }
+EXPORT_SYMBOL(uverbs_destroy_def_handler);
 
 void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
 {
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 192844bf6016..60ac1fbe940e 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -682,13 +682,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
 	return ret;
 }
 
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_DESTROY)(struct ib_device *ib_dev,
-				    struct ib_uverbs_file *file,
-				    struct uverbs_attr_bundle *attrs)
-{
-	return 0;
-}
-
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_dev,
 				   struct ib_uverbs_file *file,
 				   struct uverbs_attr_bundle *attrs)
@@ -961,13 +954,6 @@ err_obj_free:
 	return err;
 }
 
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_DEREG)(struct ib_device *ib_dev,
-				   struct ib_uverbs_file *file,
-				   struct uverbs_attr_bundle *attrs)
-{
-	return 0;
-}
-
 static int devx_umem_cleanup(struct ib_uobject *uobject,
 			     enum rdma_remove_reason why)
 {
@@ -1003,7 +989,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 			    UVERBS_ATTR_TYPE(u32),
 			    UA_MANDATORY));
 
-DECLARE_UVERBS_NAMED_METHOD(
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
 			MLX5_IB_OBJECT_DEVX_UMEM,
@@ -1056,7 +1042,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
 		UA_MANDATORY));
 
-DECLARE_UVERBS_NAMED_METHOD(
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
 			MLX5_IB_OBJECT_DEVX_OBJ,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b523298d486b..2696f1d730a1 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4146,4 +4146,7 @@ void rdma_roce_rescan_device(struct ib_device *ibdev);
 
 struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
 
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+			       struct ib_uverbs_file *file,
+			       struct uverbs_attr_bundle *attrs);
 #endif /* IB_VERBS_H */
-- 
cgit v1.2.3


From 89c815ef07a1db0ac2cc09d06cb2d5c3d86d6322 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <contact@tzimmermann.org>
Date: Thu, 21 Jun 2018 15:21:35 +0200
Subject: drm/ttm: Introduce ttm_bo_get() and ttm_bo_put() for ref counting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TTM buffer-object interface provides ttm_bo_reference() and
ttm_bo_unref() for managing reference counts. Replacing them with
ttm_bo_get() and ttm_bo_put() aligns the API with conventions used
throughout the Linux kernel.

The implementation of ttm_bo_unref() clears the supplied pointer
to NULL. This leads to workarounds where the caller saves the
pointer's value before de-referencing the BO. ttm_bo_put() does
not clear the supplied pointer.

Signed-off-by: Thomas Zimmermann <contact@tzimmermann.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c |  8 +++++++-
 include/drm/ttm/ttm_bo_api.h | 25 ++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 5142dcb8ce39..183e67c4197a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -587,12 +587,18 @@ static void ttm_bo_release(struct kref *kref)
 	kref_put(&bo->list_kref, ttm_bo_release_list);
 }
 
+void ttm_bo_put(struct ttm_buffer_object *bo)
+{
+	kref_put(&bo->kref, ttm_bo_release);
+}
+EXPORT_SYMBOL(ttm_bo_put);
+
 void ttm_bo_unref(struct ttm_buffer_object **p_bo)
 {
 	struct ttm_buffer_object *bo = *p_bo;
 
 	*p_bo = NULL;
-	kref_put(&bo->kref, ttm_bo_release);
+	ttm_bo_put(bo);
 }
 EXPORT_SYMBOL(ttm_bo_unref);
 
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index c67977aa1a0e..a01ba2032f0e 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -283,18 +283,30 @@ struct ttm_operation_ctx {
 /* when serving page fault or suspend, allow alloc anyway */
 #define TTM_OPT_FLAG_FORCE_ALLOC		0x2
 
+/**
+ * ttm_bo_get - reference a struct ttm_buffer_object
+ *
+ * @bo: The buffer object.
+ */
+static inline void ttm_bo_get(struct ttm_buffer_object *bo)
+{
+	kref_get(&bo->kref);
+}
+
 /**
  * ttm_bo_reference - reference a struct ttm_buffer_object
  *
  * @bo: The buffer object.
  *
  * Returns a refcounted pointer to a buffer object.
+ *
+ * This function is deprecated. Use @ttm_bo_get instead.
  */
 
 static inline struct ttm_buffer_object *
 ttm_bo_reference(struct ttm_buffer_object *bo)
 {
-	kref_get(&bo->kref);
+	ttm_bo_get(bo);
 	return bo;
 }
 
@@ -345,12 +357,23 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 		    struct ttm_placement *placement,
 		    struct ttm_operation_ctx *ctx);
 
+/**
+ * ttm_bo_put
+ *
+ * @bo: The buffer object.
+ *
+ * Unreference a buffer object.
+ */
+void ttm_bo_put(struct ttm_buffer_object *bo);
+
 /**
  * ttm_bo_unref
  *
  * @bo: The buffer object.
  *
  * Unreference and clear a pointer to a buffer object.
+ *
+ * This function is deprecated. Use @ttm_bo_put instead.
  */
 void ttm_bo_unref(struct ttm_buffer_object **bo);
 
-- 
cgit v1.2.3


From e96d71359e9bbea846a2111e4469a03a055dfa6f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:50 -0400
Subject: rseq: Use __u64 for rseq_cs fields, validate user inputs

Change the rseq ABI so rseq_cs start_ip, post_commit_offset and abort_ip
fields are seen as 64-bit fields by both 32-bit and 64-bit kernels rather
that ignoring the 32 upper bits on 32-bit kernels. This ensures we have a
consistent behavior for a 32-bit binary executed on 32-bit kernels and in
compat mode on 64-bit kernels.

Validating the value of abort_ip field to be below TASK_SIZE ensures the
kernel don't return to an invalid address when returning to userspace
after an abort. I don't fully trust each architecture code to consistently
deal with invalid return addresses.

Validating the value of the start_ip and post_commit_offset fields
prevents overflow on arithmetic performed on those values, used to
check whether abort_ip is within the rseq critical section.

If validation fails, the process is killed with a segmentation fault.

When the signature encountered before abort_ip does not match the expected
signature, return -EINVAL rather than -EPERM to be consistent with other
input validation return codes from rseq_get_rseq_cs().

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-2-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/rseq.h |  6 +++---
 kernel/rseq.c             | 14 ++++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index d620fa43756c..519ad6e176d1 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -52,10 +52,10 @@ struct rseq_cs {
 	__u32 version;
 	/* enum rseq_cs_flags */
 	__u32 flags;
-	LINUX_FIELD_u32_u64(start_ip);
+	__u64 start_ip;
 	/* Offset from start_ip. */
-	LINUX_FIELD_u32_u64(post_commit_offset);
-	LINUX_FIELD_u32_u64(abort_ip);
+	__u64 post_commit_offset;
+	__u64 abort_ip;
 } __attribute__((aligned(4 * sizeof(__u64))));
 
 /*
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 22b6acf1ad63..16b38c5342f9 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -130,14 +130,20 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
 	urseq_cs = (struct rseq_cs __user *)ptr;
 	if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
 		return -EFAULT;
-	if (rseq_cs->version > 0)
-		return -EINVAL;
 
+	if (rseq_cs->start_ip >= TASK_SIZE ||
+	    rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
+	    rseq_cs->abort_ip >= TASK_SIZE ||
+	    rseq_cs->version > 0)
+		return -EINVAL;
+	/* Check for overflow. */
+	if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
+		return -EINVAL;
 	/* Ensure that abort_ip is not in the critical section. */
 	if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
 		return -EINVAL;
 
-	usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32));
+	usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
 	ret = get_user(sig, usig);
 	if (ret)
 		return ret;
@@ -146,7 +152,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
 		printk_ratelimited(KERN_WARNING
 			"Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
 			sig, current->rseq_sig, current->pid, usig);
-		return -EPERM;
+		return -EINVAL;
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 0fb9a1abc8c97f858997e962694eb36b4517144e Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:52 -0400
Subject: rseq: uapi: Update uapi comments

Update rseq uapi header comments to reflect that user-space need to do
thread-local loads/stores from/to the struct rseq fields.

As a consequence of this added requirement, the kernel does not need
to perform loads/stores with single-copy atomicity.

Update the comment associated to the "flags" fields to describe
more accurately that it's only useful to facilitate single-stepping
through rseq critical sections with debuggers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-4-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/rseq.h | 69 ++++++++++++++++++++++++-----------------------
 kernel/rseq.c             |  2 +-
 2 files changed, 37 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 519ad6e176d1..bf4188c13bec 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -67,28 +67,30 @@ struct rseq_cs {
 struct rseq {
 	/*
 	 * Restartable sequences cpu_id_start field. Updated by the
-	 * kernel, and read by user-space with single-copy atomicity
-	 * semantics. Aligned on 32-bit. Always contains a value in the
-	 * range of possible CPUs, although the value may not be the
-	 * actual current CPU (e.g. if rseq is not initialized). This
-	 * CPU number value should always be compared against the value
-	 * of the cpu_id field before performing a rseq commit or
-	 * returning a value read from a data structure indexed using
-	 * the cpu_id_start value.
+	 * kernel. Read by user-space with single-copy atomicity
+	 * semantics. This field should only be read by the thread which
+	 * registered this data structure. Aligned on 32-bit. Always
+	 * contains a value in the range of possible CPUs, although the
+	 * value may not be the actual current CPU (e.g. if rseq is not
+	 * initialized). This CPU number value should always be compared
+	 * against the value of the cpu_id field before performing a rseq
+	 * commit or returning a value read from a data structure indexed
+	 * using the cpu_id_start value.
 	 */
 	__u32 cpu_id_start;
 	/*
-	 * Restartable sequences cpu_id field. Updated by the kernel,
-	 * and read by user-space with single-copy atomicity semantics.
-	 * Aligned on 32-bit. Values RSEQ_CPU_ID_UNINITIALIZED and
-	 * RSEQ_CPU_ID_REGISTRATION_FAILED have a special semantic: the
-	 * former means "rseq uninitialized", and latter means "rseq
-	 * initialization failed". This value is meant to be read within
-	 * rseq critical sections and compared with the cpu_id_start
-	 * value previously read, before performing the commit instruction,
-	 * or read and compared with the cpu_id_start value before returning
-	 * a value loaded from a data structure indexed using the
-	 * cpu_id_start value.
+	 * Restartable sequences cpu_id field. Updated by the kernel.
+	 * Read by user-space with single-copy atomicity semantics. This
+	 * field should only be read by the thread which registered this
+	 * data structure. Aligned on 32-bit. Values
+	 * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
+	 * have a special semantic: the former means "rseq uninitialized",
+	 * and latter means "rseq initialization failed". This value is
+	 * meant to be read within rseq critical sections and compared
+	 * with the cpu_id_start value previously read, before performing
+	 * the commit instruction, or read and compared with the
+	 * cpu_id_start value before returning a value loaded from a data
+	 * structure indexed using the cpu_id_start value.
 	 */
 	__u32 cpu_id;
 	/*
@@ -105,27 +107,28 @@ struct rseq {
 	 * targeted by the rseq_cs. Also needs to be set to NULL by user-space
 	 * before reclaiming memory that contains the targeted struct rseq_cs.
 	 *
-	 * Read and set by the kernel with single-copy atomicity semantics.
-	 * Set by user-space with single-copy atomicity semantics. Aligned
-	 * on 64-bit.
+	 * Read and set by the kernel. Set by user-space with single-copy
+	 * atomicity semantics. This field should only be updated by the
+	 * thread which registered this data structure. Aligned on 64-bit.
 	 */
 	LINUX_FIELD_u32_u64(rseq_cs);
 	/*
-	 * - RSEQ_DISABLE flag:
+	 * Restartable sequences flags field.
+	 *
+	 * This field should only be updated by the thread which
+	 * registered this data structure. Read by the kernel.
+	 * Mainly used for single-stepping through rseq critical sections
+	 * with debuggers.
 	 *
-	 * Fallback fast-track flag for single-stepping.
-	 * Set by user-space if lack of progress is detected.
-	 * Cleared by user-space after rseq finish.
-	 * Read by the kernel.
 	 * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
-	 *     Inhibit instruction sequence block restart and event
-	 *     counter increment on preemption for this thread.
+	 *     Inhibit instruction sequence block restart on preemption
+	 *     for this thread.
 	 * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
-	 *     Inhibit instruction sequence block restart and event
-	 *     counter increment on signal delivery for this thread.
+	 *     Inhibit instruction sequence block restart on signal
+	 *     delivery for this thread.
 	 * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
-	 *     Inhibit instruction sequence block restart and event
-	 *     counter increment on migration for this thread.
+	 *     Inhibit instruction sequence block restart on migration for
+	 *     this thread.
 	 */
 	__u32 flags;
 } __attribute__((aligned(4 * sizeof(__u64))));
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 2c8463acb50d..2a7748675be7 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -201,7 +201,7 @@ static int clear_rseq_cs(struct task_struct *t)
 	 * of code outside of the rseq assembly block. This performs
 	 * a lazy clear of the rseq_cs field.
 	 *
-	 * Set rseq_cs to NULL with single-copy atomicity.
+	 * Set rseq_cs to NULL.
 	 */
 	return put_user(0UL, &t->rseq->rseq_cs);
 }
-- 
cgit v1.2.3


From ec9c82e03a744e5698bd95eab872855861a821fa Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:53 -0400
Subject: rseq: uapi: Declare rseq_cs field as union, update includes

Declaring the rseq_cs field as a union between __u64 and two __u32
allows both 32-bit and 64-bit kernels to read the full __u64, and
therefore validate that a 32-bit user-space cleared the upper 32
bits, thus ensuring a consistent behavior between native 32-bit
kernels and 32-bit compat tasks on 64-bit kernels.

Check that the rseq_cs value read is < TASK_SIZE.

The asm/byteorder.h header needs to be included by rseq.h, now
that it is not using linux/types_32_64.h anymore.

Considering that only __32 and __u64 types are declared in linux/rseq.h,
the linux/types.h header should always be included for both kernel and
user-space code: including stdint.h is just for u64 and u32, which are
not used in this header at all.

Use copy_from_user()/clear_user() to interact with a 64-bit field,
because arm32 does not implement 64-bit __get_user, and ppc32 does not
64-bit get_user. Considering that the rseq_cs pointer does not need to
be loaded/stored with single-copy atomicity from the kernel anymore, we
can simply use copy_from_user()/clear_user().

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-5-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/rseq.h           | 27 +++++++++++++++++++--------
 kernel/rseq.c                       | 15 +++++++++------
 tools/testing/selftests/rseq/rseq.h | 11 ++++++++++-
 3 files changed, 38 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index bf4188c13bec..9a402fdb60e9 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -10,13 +10,8 @@
  * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  */
 
-#ifdef __KERNEL__
-# include <linux/types.h>
-#else
-# include <stdint.h>
-#endif
-
-#include <linux/types_32_64.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
 
 enum rseq_cpu_id_state {
 	RSEQ_CPU_ID_UNINITIALIZED		= -1,
@@ -111,7 +106,23 @@ struct rseq {
 	 * atomicity semantics. This field should only be updated by the
 	 * thread which registered this data structure. Aligned on 64-bit.
 	 */
-	LINUX_FIELD_u32_u64(rseq_cs);
+	union {
+		__u64 ptr64;
+#ifdef __LP64__
+		__u64 ptr;
+#else
+		struct {
+#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
+			__u32 padding;		/* Initialized to zero. */
+			__u32 ptr32;
+#else /* LITTLE */
+			__u32 ptr32;
+			__u32 padding;		/* Initialized to zero. */
+#endif /* ENDIAN */
+		} ptr;
+#endif
+	} rseq_cs;
+
 	/*
 	 * Restartable sequences flags field.
 	 *
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 2a7748675be7..c6242d8594dc 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -115,19 +115,20 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
 static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
 {
 	struct rseq_cs __user *urseq_cs;
-	unsigned long ptr;
+	u64 ptr;
 	u32 __user *usig;
 	u32 sig;
 	int ret;
 
-	ret = get_user(ptr, &t->rseq->rseq_cs);
-	if (ret)
-		return ret;
+	if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
+		return -EFAULT;
 	if (!ptr) {
 		memset(rseq_cs, 0, sizeof(*rseq_cs));
 		return 0;
 	}
-	urseq_cs = (struct rseq_cs __user *)ptr;
+	if (ptr >= TASK_SIZE)
+		return -EINVAL;
+	urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
 	if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
 		return -EFAULT;
 
@@ -203,7 +204,9 @@ static int clear_rseq_cs(struct task_struct *t)
 	 *
 	 * Set rseq_cs to NULL.
 	 */
-	return put_user(0UL, &t->rseq->rseq_cs);
+	if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
+		return -EFAULT;
+	return 0;
 }
 
 /*
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index a4684112676c..f2073cfa4448 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -133,6 +133,15 @@ static inline uint32_t rseq_current_cpu(void)
 	return cpu;
 }
 
+static inline void rseq_clear_rseq_cs(void)
+{
+#ifdef __LP64__
+	__rseq_abi.rseq_cs.ptr = 0;
+#else
+	__rseq_abi.rseq_cs.ptr.ptr32 = 0;
+#endif
+}
+
 /*
  * rseq_prepare_unload() should be invoked by each thread using rseq_finish*()
  * at least once between their last rseq_finish*() and library unload of the
@@ -143,7 +152,7 @@ static inline uint32_t rseq_current_cpu(void)
  */
 static inline void rseq_prepare_unload(void)
 {
-	__rseq_abi.rseq_cs = 0;
+	rseq_clear_rseq_cs();
 }
 
 #endif  /* RSEQ_H_ */
-- 
cgit v1.2.3


From 4f4c0acdf4652a964da869d578a3c8bf6df14ce2 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:54 -0400
Subject: rseq: Remove unused types_32_64.h uapi header

This header was introduced in the 4.18 merge window, and rseq does
not need it anymore. Nuke it before the final release.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-6-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/types_32_64.h | 50 ----------------------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 include/uapi/linux/types_32_64.h

(limited to 'include')

diff --git a/include/uapi/linux/types_32_64.h b/include/uapi/linux/types_32_64.h
deleted file mode 100644
index 0a87ace34a57..000000000000
--- a/include/uapi/linux/types_32_64.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_TYPES_32_64_H
-#define _UAPI_LINUX_TYPES_32_64_H
-
-/*
- * linux/types_32_64.h
- *
- * Integer type declaration for pointers across 32-bit and 64-bit systems.
- *
- * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- */
-
-#ifdef __KERNEL__
-# include <linux/types.h>
-#else
-# include <stdint.h>
-#endif
-
-#include <asm/byteorder.h>
-
-#ifdef __BYTE_ORDER
-# if (__BYTE_ORDER == __BIG_ENDIAN)
-#  define LINUX_BYTE_ORDER_BIG_ENDIAN
-# else
-#  define LINUX_BYTE_ORDER_LITTLE_ENDIAN
-# endif
-#else
-# ifdef __BIG_ENDIAN
-#  define LINUX_BYTE_ORDER_BIG_ENDIAN
-# else
-#  define LINUX_BYTE_ORDER_LITTLE_ENDIAN
-# endif
-#endif
-
-#ifdef __LP64__
-# define LINUX_FIELD_u32_u64(field)			__u64 field
-# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v)	field = (intptr_t)v
-#else
-# ifdef LINUX_BYTE_ORDER_BIG_ENDIAN
-#  define LINUX_FIELD_u32_u64(field)	__u32 field ## _padding, field
-#  define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v)	\
-	field ## _padding = 0, field = (intptr_t)v
-# else
-#  define LINUX_FIELD_u32_u64(field)	__u32 field, field ## _padding
-#  define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v)	\
-	field = (intptr_t)v, field ## _padding = 0
-# endif
-#endif
-
-#endif /* _UAPI_LINUX_TYPES_32_64_H */
-- 
cgit v1.2.3


From db0c8d8b031d2b5960f6407f7f2ca20e97e00605 Mon Sep 17 00:00:00 2001
From: Paulo Zanoni <paulo.r.zanoni@intel.com>
Date: Fri, 4 May 2018 13:32:51 -0700
Subject: x86/gpu: reserve ICL's graphics stolen memory

ICL changes the registers and addresses to 64 bits.

I also briefly looked at implementing an u64 version of the PCI config
read functions, but I concluded this wouldn't be trivial, so it's not
worth doing it for a single user that can't have any racing problems
while reading the register in two separate operations.

v2:
 - Scrub the development (non-public) changelog (Joonas).
 - Remove the i915.ko bits so this can be easily backported in order
   to properly avoid stolen memory even on machines without i915.ko
   (Joonas).
 - CC stable for the reasons above.

Issue: VIZ-9250
CC: stable@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: x86@kernel.org
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Fixes: 412310019a20 ("drm/i915/icl: Add initial Icelake definitions.")
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180504203252.28048-1-paulo.r.zanoni@intel.com
---
 arch/x86/kernel/early-quirks.c | 18 ++++++++++++++++++
 include/drm/i915_drm.h         |  4 +++-
 2 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index bae0d32e327b..72c2cf961d44 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -340,6 +340,18 @@ static resource_size_t __init gen3_stolen_base(int num, int slot, int func,
 	return bsm & INTEL_BSM_MASK;
 }
 
+static resource_size_t __init gen11_stolen_base(int num, int slot, int func,
+						resource_size_t stolen_size)
+{
+	u64 bsm;
+
+	bsm = read_pci_config(num, slot, func, INTEL_GEN11_BSM_DW0);
+	bsm &= INTEL_BSM_MASK;
+	bsm |= (u64)read_pci_config(num, slot, func, INTEL_GEN11_BSM_DW1) << 32;
+
+	return bsm;
+}
+
 static resource_size_t __init i830_stolen_size(int num, int slot, int func)
 {
 	u16 gmch_ctrl;
@@ -500,6 +512,11 @@ static const struct intel_early_ops chv_early_ops __initconst = {
 	.stolen_size = chv_stolen_size,
 };
 
+static const struct intel_early_ops gen11_early_ops __initconst = {
+	.stolen_base = gen11_stolen_base,
+	.stolen_size = gen9_stolen_size,
+};
+
 static const struct pci_device_id intel_early_ids[] __initconst = {
 	INTEL_I830_IDS(&i830_early_ops),
 	INTEL_I845G_IDS(&i845_early_ops),
@@ -531,6 +548,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
 	INTEL_CFL_IDS(&gen9_early_ops),
 	INTEL_GLK_IDS(&gen9_early_ops),
 	INTEL_CNL_IDS(&gen9_early_ops),
+	INTEL_ICL_11_IDS(&gen11_early_ops),
 };
 
 struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0);
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c9e5a6621b95..c44703f471b3 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -95,7 +95,9 @@ extern struct resource intel_graphics_stolen_res;
 #define    I845_TSEG_SIZE_512K	(2 << 1)
 #define    I845_TSEG_SIZE_1M	(3 << 1)
 
-#define INTEL_BSM 0x5c
+#define INTEL_BSM		0x5c
+#define INTEL_GEN11_BSM_DW0	0xc0
+#define INTEL_GEN11_BSM_DW1	0xc4
 #define   INTEL_BSM_MASK	(-(1u << 20))
 
 #endif				/* _I915_DRM_H_ */
-- 
cgit v1.2.3


From 14b470b56840dbb093abd71f214e9d63770c87b8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Jul 2018 22:19:07 +0200
Subject: scsi: target: sbitmap: add seq_file forward declaration

The target core runs into a warning in the linux/sbitmap.h
file in some configurations:

In file included from include/target/target_core_base.h:7,
                 from drivers/target/target_core_fabric_lib.c:41:
include/linux/sbitmap.h:331:46: error: 'struct seq_file' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]
 void sbitmap_show(struct sbitmap *sb, struct seq_file *m);
                                              ^~~~~~~~

In general, headers should not depend on others being included first,
so this fixes it with a forward declaration for that struct name, but
we probably want to merge the patch through the scsi tree to help
bisection.

Fixes: 10e9cbb6b531 ("scsi: target: Convert target drivers to use sbitmap")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/sbitmap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index e6539536dea9..804a50983ec5 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -23,6 +23,8 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 
+struct seq_file;
+
 /**
  * struct sbitmap_word - Word in a &struct sbitmap.
  */
-- 
cgit v1.2.3


From 046f6fd5daefac7f5abdafb436b30f63bc7c602b Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Fri, 6 Jul 2018 17:37:19 +0200
Subject: sched: Add Common Applications Kept Enhanced (cake) qdisc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sch_cake targets the home router use case and is intended to squeeze the
most bandwidth and latency out of even the slowest ISP links and routers,
while presenting an API simple enough that even an ISP can configure it.

Example of use on a cable ISP uplink:

tc qdisc add dev eth0 cake bandwidth 20Mbit nat docsis ack-filter

To shape a cable download link (ifb and tc-mirred setup elided)

tc qdisc add dev ifb0 cake bandwidth 200mbit nat docsis ingress wash

CAKE is filled with:

* A hybrid Codel/Blue AQM algorithm, "Cobalt", tied to an FQ_Codel
  derived Flow Queuing system, which autoconfigures based on the bandwidth.
* A novel "triple-isolate" mode (the default) which balances per-host
  and per-flow FQ even through NAT.
* An deficit based shaper, that can also be used in an unlimited mode.
* 8 way set associative hashing to reduce flow collisions to a minimum.
* A reasonable interpretation of various diffserv latency/loss tradeoffs.
* Support for zeroing diffserv markings for entering and exiting traffic.
* Support for interacting well with Docsis 3.0 shaper framing.
* Extensive support for DSL framing types.
* Support for ack filtering.
* Extensive statistics for measuring, loss, ecn markings, latency
  variation.

A paper describing the design of CAKE is available at
https://arxiv.org/abs/1804.07617, and will be published at the 2018 IEEE
International Symposium on Local and Metropolitan Area Networks (LANMAN).

This patch adds the base shaper and packet scheduler, while subsequent
commits add the optional (configurable) features. The full userspace API
and most data structures are included in this commit, but options not
understood in the base version will be ignored.

Various versions baking have been available as an out of tree build for
kernel versions going back to 3.10, as the embedded router world has been
running a few years behind mainline Linux. A stable version has been
generally available on lede-17.01 and later.

sch_cake replaces a combination of iptables, tc filter, htb and fq_codel
in the sqm-scripts, with sane defaults and vastly simpler configuration.

CAKE's principal author is Jonathan Morton, with contributions from
Kevin Darbyshire-Bryant, Toke Høiland-Jørgensen, Sebastian Moeller,
Ryan Mounce, Tony Ambardar, Dean Scarff, Nils Andreas Svee, Dave Täht,
and Loganaden Velvindron.

Testing from Pete Heist, Georgios Amanakis, and the many other members of
the cake@lists.bufferbloat.net mailing list.

tc -s qdisc show dev eth2
 qdisc cake 8017: root refcnt 2 bandwidth 1Gbit diffserv3 triple-isolate split-gso rtt 100.0ms noatm overhead 38 mpu 84
 Sent 51504294511 bytes 37724591 pkt (dropped 6, overlimits 64958695 requeues 12)
  backlog 0b 0p requeues 12
  memory used: 1053008b of 15140Kb
  capacity estimate: 970Mbit
  min/max network layer size:           28 /    1500
  min/max overhead-adjusted size:       84 /    1538
  average network hdr offset:           14
                    Bulk  Best Effort        Voice
   thresh      62500Kbit        1Gbit      250Mbit
   target          5.0ms        5.0ms        5.0ms
   interval      100.0ms      100.0ms      100.0ms
   pk_delay          5us          5us          6us
   av_delay          3us          2us          2us
   sp_delay          2us          1us          1us
   backlog            0b           0b           0b
   pkts          3164050     25030267      9530280
   bytes      3227519915  35396974782  12879808898
   way_inds            0            8            0
   way_miss           21          366           25
   way_cols            0            0            0
   drops               5            0            1
   marks               0            0            0
   ack_drop            0            0            0
   sp_flows            1            3            0
   bk_flows            0            1            1
   un_flows            0            0            0
   max_len         68130        68130        68130

Tested-by: Pete Heist <peteheist@gmail.com>
Tested-by: Georgios Amanakis <gamanakis@gmail.com>
Signed-off-by: Dave Taht <dave.taht@gmail.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  114 +++
 net/sched/Kconfig              |   11 +
 net/sched/Makefile             |    1 +
 net/sched/sch_cake.c           | 1867 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1993 insertions(+)
 create mode 100644 net/sched/sch_cake.c

(limited to 'include')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 949118461009..d9cc9dc4f547 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -955,4 +955,118 @@ enum {
 
 #define TCA_ETF_MAX (__TCA_ETF_MAX - 1)
 
+
+/* CAKE */
+enum {
+	TCA_CAKE_UNSPEC,
+	TCA_CAKE_PAD,
+	TCA_CAKE_BASE_RATE64,
+	TCA_CAKE_DIFFSERV_MODE,
+	TCA_CAKE_ATM,
+	TCA_CAKE_FLOW_MODE,
+	TCA_CAKE_OVERHEAD,
+	TCA_CAKE_RTT,
+	TCA_CAKE_TARGET,
+	TCA_CAKE_AUTORATE,
+	TCA_CAKE_MEMORY,
+	TCA_CAKE_NAT,
+	TCA_CAKE_RAW,
+	TCA_CAKE_WASH,
+	TCA_CAKE_MPU,
+	TCA_CAKE_INGRESS,
+	TCA_CAKE_ACK_FILTER,
+	TCA_CAKE_SPLIT_GSO,
+	__TCA_CAKE_MAX
+};
+#define TCA_CAKE_MAX	(__TCA_CAKE_MAX - 1)
+
+enum {
+	__TCA_CAKE_STATS_INVALID,
+	TCA_CAKE_STATS_PAD,
+	TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
+	TCA_CAKE_STATS_MEMORY_LIMIT,
+	TCA_CAKE_STATS_MEMORY_USED,
+	TCA_CAKE_STATS_AVG_NETOFF,
+	TCA_CAKE_STATS_MIN_NETLEN,
+	TCA_CAKE_STATS_MAX_NETLEN,
+	TCA_CAKE_STATS_MIN_ADJLEN,
+	TCA_CAKE_STATS_MAX_ADJLEN,
+	TCA_CAKE_STATS_TIN_STATS,
+	TCA_CAKE_STATS_DEFICIT,
+	TCA_CAKE_STATS_COBALT_COUNT,
+	TCA_CAKE_STATS_DROPPING,
+	TCA_CAKE_STATS_DROP_NEXT_US,
+	TCA_CAKE_STATS_P_DROP,
+	TCA_CAKE_STATS_BLUE_TIMER_US,
+	__TCA_CAKE_STATS_MAX
+};
+#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
+
+enum {
+	__TCA_CAKE_TIN_STATS_INVALID,
+	TCA_CAKE_TIN_STATS_PAD,
+	TCA_CAKE_TIN_STATS_SENT_PACKETS,
+	TCA_CAKE_TIN_STATS_SENT_BYTES64,
+	TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
+	TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
+	TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
+	TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
+	TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
+	TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
+	TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
+	TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
+	TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
+	TCA_CAKE_TIN_STATS_TARGET_US,
+	TCA_CAKE_TIN_STATS_INTERVAL_US,
+	TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
+	TCA_CAKE_TIN_STATS_WAY_MISSES,
+	TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
+	TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
+	TCA_CAKE_TIN_STATS_AVG_DELAY_US,
+	TCA_CAKE_TIN_STATS_BASE_DELAY_US,
+	TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
+	TCA_CAKE_TIN_STATS_BULK_FLOWS,
+	TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
+	TCA_CAKE_TIN_STATS_MAX_SKBLEN,
+	TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
+	__TCA_CAKE_TIN_STATS_MAX
+};
+#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
+#define TC_CAKE_MAX_TINS (8)
+
+enum {
+	CAKE_FLOW_NONE = 0,
+	CAKE_FLOW_SRC_IP,
+	CAKE_FLOW_DST_IP,
+	CAKE_FLOW_HOSTS,    /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
+	CAKE_FLOW_FLOWS,
+	CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_TRIPLE,   /* = CAKE_FLOW_HOSTS  | CAKE_FLOW_FLOWS */
+	CAKE_FLOW_MAX,
+};
+
+enum {
+	CAKE_DIFFSERV_DIFFSERV3 = 0,
+	CAKE_DIFFSERV_DIFFSERV4,
+	CAKE_DIFFSERV_DIFFSERV8,
+	CAKE_DIFFSERV_BESTEFFORT,
+	CAKE_DIFFSERV_PRECEDENCE,
+	CAKE_DIFFSERV_MAX
+};
+
+enum {
+	CAKE_ACK_NONE = 0,
+	CAKE_ACK_FILTER,
+	CAKE_ACK_AGGRESSIVE,
+	CAKE_ACK_MAX
+};
+
+enum {
+	CAKE_ATM_NONE = 0,
+	CAKE_ATM_ATM,
+	CAKE_ATM_PTM,
+	CAKE_ATM_MAX
+};
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index fcc89706745b..7af246764a35 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -295,6 +295,17 @@ config NET_SCH_FQ_CODEL
 
 	  If unsure, say N.
 
+config NET_SCH_CAKE
+	tristate "Common Applications Kept Enhanced (CAKE)"
+	help
+	  Say Y here if you want to use the Common Applications Kept Enhanced
+          (CAKE) queue management algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_cake.
+
+	  If unsure, say N.
+
 config NET_SCH_FQ
 	tristate "Fair Queue"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 9a5a7077d217..673ee7d26ff2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
 obj-$(CONFIG_NET_SCH_FQ_CODEL)	+= sch_fq_codel.o
+obj-$(CONFIG_NET_SCH_CAKE)	+= sch_cake.o
 obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
new file mode 100644
index 000000000000..ea0272615d63
--- /dev/null
+++ b/net/sched/sch_cake.c
@@ -0,0 +1,1867 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* COMMON Applications Kept Enhanced (CAKE) discipline
+ *
+ * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com>
+ * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <toke@toke.dk>
+ * Copyright (C) 2014-2018 Dave Täht <dave.taht@gmail.com>
+ * Copyright (C) 2015-2018 Sebastian Moeller <moeller0@gmx.de>
+ * (C) 2015-2018 Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
+ * Copyright (C) 2017-2018 Ryan Mounce <ryan@mounce.com.au>
+ *
+ * The CAKE Principles:
+ *		   (or, how to have your cake and eat it too)
+ *
+ * This is a combination of several shaping, AQM and FQ techniques into one
+ * easy-to-use package:
+ *
+ * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE
+ *   equipment and bloated MACs.  This operates in deficit mode (as in sch_fq),
+ *   eliminating the need for any sort of burst parameter (eg. token bucket
+ *   depth).  Burst support is limited to that necessary to overcome scheduling
+ *   latency.
+ *
+ * - A Diffserv-aware priority queue, giving more priority to certain classes,
+ *   up to a specified fraction of bandwidth.  Above that bandwidth threshold,
+ *   the priority is reduced to avoid starving other tins.
+ *
+ * - Each priority tin has a separate Flow Queue system, to isolate traffic
+ *   flows from each other.  This prevents a burst on one flow from increasing
+ *   the delay to another.  Flows are distributed to queues using a
+ *   set-associative hash function.
+ *
+ * - Each queue is actively managed by Cobalt, which is a combination of the
+ *   Codel and Blue AQM algorithms.  This serves flows fairly, and signals
+ *   congestion early via ECN (if available) and/or packet drops, to keep
+ *   latency low.  The codel parameters are auto-tuned based on the bandwidth
+ *   setting, as is necessary at low bandwidths.
+ *
+ * The configuration parameters are kept deliberately simple for ease of use.
+ * Everything has sane defaults.  Complete generality of configuration is *not*
+ * a goal.
+ *
+ * The priority queue operates according to a weighted DRR scheme, combined with
+ * a bandwidth tracker which reuses the shaper logic to detect which side of the
+ * bandwidth sharing threshold the tin is operating.  This determines whether a
+ * priority-based weight (high) or a bandwidth-based weight (low) is used for
+ * that tin in the current pass.
+ *
+ * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly
+ * granted us permission to leverage.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/reciprocal_div.h>
+#include <net/netlink.h>
+#include <linux/version.h>
+#include <linux/if_vlan.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/tcp.h>
+#include <net/flow_dissector.h>
+
+#define CAKE_SET_WAYS (8)
+#define CAKE_MAX_TINS (8)
+#define CAKE_QUEUES (1024)
+#define CAKE_FLOW_MASK 63
+#define CAKE_FLOW_NAT_FLAG 64
+
+/* struct cobalt_params - contains codel and blue parameters
+ * @interval:	codel initial drop rate
+ * @target:     maximum persistent sojourn time & blue update rate
+ * @mtu_time:   serialisation delay of maximum-size packet
+ * @p_inc:      increment of blue drop probability (0.32 fxp)
+ * @p_dec:      decrement of blue drop probability (0.32 fxp)
+ */
+struct cobalt_params {
+	u64	interval;
+	u64	target;
+	u64	mtu_time;
+	u32	p_inc;
+	u32	p_dec;
+};
+
+/* struct cobalt_vars - contains codel and blue variables
+ * @count:		codel dropping frequency
+ * @rec_inv_sqrt:	reciprocal value of sqrt(count) >> 1
+ * @drop_next:		time to drop next packet, or when we dropped last
+ * @blue_timer:		Blue time to next drop
+ * @p_drop:		BLUE drop probability (0.32 fxp)
+ * @dropping:		set if in dropping state
+ * @ecn_marked:		set if marked
+ */
+struct cobalt_vars {
+	u32	count;
+	u32	rec_inv_sqrt;
+	ktime_t	drop_next;
+	ktime_t	blue_timer;
+	u32     p_drop;
+	bool	dropping;
+	bool    ecn_marked;
+};
+
+enum {
+	CAKE_SET_NONE = 0,
+	CAKE_SET_SPARSE,
+	CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */
+	CAKE_SET_BULK,
+	CAKE_SET_DECAYING
+};
+
+struct cake_flow {
+	/* this stuff is all needed per-flow at dequeue time */
+	struct sk_buff	  *head;
+	struct sk_buff	  *tail;
+	struct list_head  flowchain;
+	s32		  deficit;
+	u32		  dropped;
+	struct cobalt_vars cvars;
+	u16		  srchost; /* index into cake_host table */
+	u16		  dsthost;
+	u8		  set;
+}; /* please try to keep this structure <= 64 bytes */
+
+struct cake_host {
+	u32 srchost_tag;
+	u32 dsthost_tag;
+	u16 srchost_refcnt;
+	u16 dsthost_refcnt;
+};
+
+struct cake_heap_entry {
+	u16 t:3, b:10;
+};
+
+struct cake_tin_data {
+	struct cake_flow flows[CAKE_QUEUES];
+	u32	backlogs[CAKE_QUEUES];
+	u32	tags[CAKE_QUEUES]; /* for set association */
+	u16	overflow_idx[CAKE_QUEUES];
+	struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */
+	u16	flow_quantum;
+
+	struct cobalt_params cparams;
+	u32	drop_overlimit;
+	u16	bulk_flow_count;
+	u16	sparse_flow_count;
+	u16	decaying_flow_count;
+	u16	unresponsive_flow_count;
+
+	u32	max_skblen;
+
+	struct list_head new_flows;
+	struct list_head old_flows;
+	struct list_head decaying_flows;
+
+	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+	ktime_t	time_next_packet;
+	u64	tin_rate_ns;
+	u64	tin_rate_bps;
+	u16	tin_rate_shft;
+
+	u16	tin_quantum_prio;
+	u16	tin_quantum_band;
+	s32	tin_deficit;
+	u32	tin_backlog;
+	u32	tin_dropped;
+	u32	tin_ecn_mark;
+
+	u32	packets;
+	u64	bytes;
+
+	u32	ack_drops;
+
+	/* moving averages */
+	u64 avge_delay;
+	u64 peak_delay;
+	u64 base_delay;
+
+	/* hash function stats */
+	u32	way_directs;
+	u32	way_hits;
+	u32	way_misses;
+	u32	way_collisions;
+}; /* number of tins is small, so size of this struct doesn't matter much */
+
+struct cake_sched_data {
+	struct tcf_proto __rcu *filter_list; /* optional external classifier */
+	struct tcf_block *block;
+	struct cake_tin_data *tins;
+
+	struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
+	u16		overflow_timeout;
+
+	u16		tin_cnt;
+	u8		tin_mode;
+	u8		flow_mode;
+	u8		ack_filter;
+	u8		atm_mode;
+
+	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+	u16		rate_shft;
+	ktime_t		time_next_packet;
+	ktime_t		failsafe_next_packet;
+	u64		rate_ns;
+	u64		rate_bps;
+	u16		rate_flags;
+	s16		rate_overhead;
+	u16		rate_mpu;
+	u64		interval;
+	u64		target;
+
+	/* resource tracking */
+	u32		buffer_used;
+	u32		buffer_max_used;
+	u32		buffer_limit;
+	u32		buffer_config_limit;
+
+	/* indices for dequeue */
+	u16		cur_tin;
+	u16		cur_flow;
+
+	struct qdisc_watchdog watchdog;
+	const u8	*tin_index;
+	const u8	*tin_order;
+
+	/* bandwidth capacity estimate */
+	ktime_t		last_packet_time;
+	ktime_t		avg_window_begin;
+	u64		avg_packet_interval;
+	u64		avg_window_bytes;
+	u64		avg_peak_bandwidth;
+	ktime_t		last_reconfig_time;
+
+	/* packet length stats */
+	u32		avg_netoff;
+	u16		max_netlen;
+	u16		max_adjlen;
+	u16		min_netlen;
+	u16		min_adjlen;
+};
+
+enum {
+	CAKE_FLAG_OVERHEAD	   = BIT(0),
+	CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
+	CAKE_FLAG_INGRESS	   = BIT(2),
+	CAKE_FLAG_WASH		   = BIT(3),
+	CAKE_FLAG_SPLIT_GSO	   = BIT(4)
+};
+
+/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
+ * obtain the best features of each.  Codel is excellent on flows which
+ * respond to congestion signals in a TCP-like way.  BLUE is more effective on
+ * unresponsive flows.
+ */
+
+struct cobalt_skb_cb {
+	ktime_t enqueue_time;
+};
+
+static u64 us_to_ns(u64 us)
+{
+	return us * NSEC_PER_USEC;
+}
+
+static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb)
+{
+	qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb));
+	return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb)
+{
+	return get_cobalt_cb(skb)->enqueue_time;
+}
+
+static void cobalt_set_enqueue_time(struct sk_buff *skb,
+				    ktime_t now)
+{
+	get_cobalt_cb(skb)->enqueue_time = now;
+}
+
+static u16 quantum_div[CAKE_QUEUES + 1] = {0};
+
+#define REC_INV_SQRT_CACHE (16)
+static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
+
+/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+
+static void cobalt_newton_step(struct cobalt_vars *vars)
+{
+	u32 invsqrt, invsqrt2;
+	u64 val;
+
+	invsqrt = vars->rec_inv_sqrt;
+	invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
+	val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+
+	val >>= 2; /* avoid overflow in following multiply */
+	val = (val * invsqrt) >> (32 - 2 + 1);
+
+	vars->rec_inv_sqrt = val;
+}
+
+static void cobalt_invsqrt(struct cobalt_vars *vars)
+{
+	if (vars->count < REC_INV_SQRT_CACHE)
+		vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
+	else
+		cobalt_newton_step(vars);
+}
+
+/* There is a big difference in timing between the accurate values placed in
+ * the cache and the approximations given by a single Newton step for small
+ * count values, particularly when stepping from count 1 to 2 or vice versa.
+ * Above 16, a single Newton step gives sufficient accuracy in either
+ * direction, given the precision stored.
+ *
+ * The magnitude of the error when stepping up to count 2 is such as to give
+ * the value that *should* have been produced at count 4.
+ */
+
+static void cobalt_cache_init(void)
+{
+	struct cobalt_vars v;
+
+	memset(&v, 0, sizeof(v));
+	v.rec_inv_sqrt = ~0U;
+	cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
+
+	for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+
+		cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
+	}
+}
+
+static void cobalt_vars_init(struct cobalt_vars *vars)
+{
+	memset(vars, 0, sizeof(*vars));
+
+	if (!cobalt_rec_inv_sqrt_cache[0]) {
+		cobalt_cache_init();
+		cobalt_rec_inv_sqrt_cache[0] = ~0;
+	}
+}
+
+/* CoDel control_law is t + interval/sqrt(count)
+ * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
+ * both sqrt() and divide operation.
+ */
+static ktime_t cobalt_control(ktime_t t,
+			      u64 interval,
+			      u32 rec_inv_sqrt)
+{
+	return ktime_add_ns(t, reciprocal_scale(interval,
+						rec_inv_sqrt));
+}
+
+/* Call this when a packet had to be dropped due to queue overflow.  Returns
+ * true if the BLUE state was quiescent before but active after this call.
+ */
+static bool cobalt_queue_full(struct cobalt_vars *vars,
+			      struct cobalt_params *p,
+			      ktime_t now)
+{
+	bool up = false;
+
+	if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+		up = !vars->p_drop;
+		vars->p_drop += p->p_inc;
+		if (vars->p_drop < p->p_inc)
+			vars->p_drop = ~0;
+		vars->blue_timer = now;
+	}
+	vars->dropping = true;
+	vars->drop_next = now;
+	if (!vars->count)
+		vars->count = 1;
+
+	return up;
+}
+
+/* Call this when the queue was serviced but turned out to be empty.  Returns
+ * true if the BLUE state was active before but quiescent after this call.
+ */
+static bool cobalt_queue_empty(struct cobalt_vars *vars,
+			       struct cobalt_params *p,
+			       ktime_t now)
+{
+	bool down = false;
+
+	if (vars->p_drop &&
+	    ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+		if (vars->p_drop < p->p_dec)
+			vars->p_drop = 0;
+		else
+			vars->p_drop -= p->p_dec;
+		vars->blue_timer = now;
+		down = !vars->p_drop;
+	}
+	vars->dropping = false;
+
+	if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
+		vars->count--;
+		cobalt_invsqrt(vars);
+		vars->drop_next = cobalt_control(vars->drop_next,
+						 p->interval,
+						 vars->rec_inv_sqrt);
+	}
+
+	return down;
+}
+
+/* Call this with a freshly dequeued packet for possible congestion marking.
+ * Returns true as an instruction to drop the packet, false for delivery.
+ */
+static bool cobalt_should_drop(struct cobalt_vars *vars,
+			       struct cobalt_params *p,
+			       ktime_t now,
+			       struct sk_buff *skb)
+{
+	bool next_due, over_target, drop = false;
+	ktime_t schedule;
+	u64 sojourn;
+
+/* The 'schedule' variable records, in its sign, whether 'now' is before or
+ * after 'drop_next'.  This allows 'drop_next' to be updated before the next
+ * scheduling decision is actually branched, without destroying that
+ * information.  Similarly, the first 'schedule' value calculated is preserved
+ * in the boolean 'next_due'.
+ *
+ * As for 'drop_next', we take advantage of the fact that 'interval' is both
+ * the delay between first exceeding 'target' and the first signalling event,
+ * *and* the scaling factor for the signalling frequency.  It's therefore very
+ * natural to use a single mechanism for both purposes, and eliminates a
+ * significant amount of reference Codel's spaghetti code.  To help with this,
+ * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close
+ * as possible to 1.0 in fixed-point.
+ */
+
+	sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+	schedule = ktime_sub(now, vars->drop_next);
+	over_target = sojourn > p->target &&
+		      sojourn > p->mtu_time * 4;
+	next_due = vars->count && ktime_to_ns(schedule) >= 0;
+
+	vars->ecn_marked = false;
+
+	if (over_target) {
+		if (!vars->dropping) {
+			vars->dropping = true;
+			vars->drop_next = cobalt_control(now,
+							 p->interval,
+							 vars->rec_inv_sqrt);
+		}
+		if (!vars->count)
+			vars->count = 1;
+	} else if (vars->dropping) {
+		vars->dropping = false;
+	}
+
+	if (next_due && vars->dropping) {
+		/* Use ECN mark if possible, otherwise drop */
+		drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
+
+		vars->count++;
+		if (!vars->count)
+			vars->count--;
+		cobalt_invsqrt(vars);
+		vars->drop_next = cobalt_control(vars->drop_next,
+						 p->interval,
+						 vars->rec_inv_sqrt);
+		schedule = ktime_sub(now, vars->drop_next);
+	} else {
+		while (next_due) {
+			vars->count--;
+			cobalt_invsqrt(vars);
+			vars->drop_next = cobalt_control(vars->drop_next,
+							 p->interval,
+							 vars->rec_inv_sqrt);
+			schedule = ktime_sub(now, vars->drop_next);
+			next_due = vars->count && ktime_to_ns(schedule) >= 0;
+		}
+	}
+
+	/* Simple BLUE implementation.  Lack of ECN is deliberate. */
+	if (vars->p_drop)
+		drop |= (prandom_u32() < vars->p_drop);
+
+	/* Overload the drop_next field as an activity timeout */
+	if (!vars->count)
+		vars->drop_next = ktime_add_ns(now, p->interval);
+	else if (ktime_to_ns(schedule) > 0 && !drop)
+		vars->drop_next = now;
+
+	return drop;
+}
+
+/* Cake has several subtle multiple bit settings. In these cases you
+ *  would be matching triple isolate mode as well.
+ */
+
+static bool cake_dsrc(int flow_mode)
+{
+	return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC;
+}
+
+static bool cake_ddst(int flow_mode)
+{
+	return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST;
+}
+
+static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
+		     int flow_mode)
+{
+	u32 flow_hash = 0, srchost_hash, dsthost_hash;
+	u16 reduced_hash, srchost_idx, dsthost_idx;
+	struct flow_keys keys, host_keys;
+
+	if (unlikely(flow_mode == CAKE_FLOW_NONE))
+		return 0;
+
+	skb_flow_dissect_flow_keys(skb, &keys,
+				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+	/* flow_hash_from_keys() sorts the addresses by value, so we have
+	 * to preserve their order in a separate data structure to treat
+	 * src and dst host addresses as independently selectable.
+	 */
+	host_keys = keys;
+	host_keys.ports.ports     = 0;
+	host_keys.basic.ip_proto  = 0;
+	host_keys.keyid.keyid     = 0;
+	host_keys.tags.flow_label = 0;
+
+	switch (host_keys.control.addr_type) {
+	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+		host_keys.addrs.v4addrs.src = 0;
+		dsthost_hash = flow_hash_from_keys(&host_keys);
+		host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+		host_keys.addrs.v4addrs.dst = 0;
+		srchost_hash = flow_hash_from_keys(&host_keys);
+		break;
+
+	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+		memset(&host_keys.addrs.v6addrs.src, 0,
+		       sizeof(host_keys.addrs.v6addrs.src));
+		dsthost_hash = flow_hash_from_keys(&host_keys);
+		host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+		memset(&host_keys.addrs.v6addrs.dst, 0,
+		       sizeof(host_keys.addrs.v6addrs.dst));
+		srchost_hash = flow_hash_from_keys(&host_keys);
+		break;
+
+	default:
+		dsthost_hash = 0;
+		srchost_hash = 0;
+	}
+
+	/* This *must* be after the above switch, since as a
+	 * side-effect it sorts the src and dst addresses.
+	 */
+	if (flow_mode & CAKE_FLOW_FLOWS)
+		flow_hash = flow_hash_from_keys(&keys);
+
+	if (!(flow_mode & CAKE_FLOW_FLOWS)) {
+		if (flow_mode & CAKE_FLOW_SRC_IP)
+			flow_hash ^= srchost_hash;
+
+		if (flow_mode & CAKE_FLOW_DST_IP)
+			flow_hash ^= dsthost_hash;
+	}
+
+	reduced_hash = flow_hash % CAKE_QUEUES;
+
+	/* set-associative hashing */
+	/* fast path if no hash collision (direct lookup succeeds) */
+	if (likely(q->tags[reduced_hash] == flow_hash &&
+		   q->flows[reduced_hash].set)) {
+		q->way_directs++;
+	} else {
+		u32 inner_hash = reduced_hash % CAKE_SET_WAYS;
+		u32 outer_hash = reduced_hash - inner_hash;
+		bool allocate_src = false;
+		bool allocate_dst = false;
+		u32 i, k;
+
+		/* check if any active queue in the set is reserved for
+		 * this flow.
+		 */
+		for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+		     i++, k = (k + 1) % CAKE_SET_WAYS) {
+			if (q->tags[outer_hash + k] == flow_hash) {
+				if (i)
+					q->way_hits++;
+
+				if (!q->flows[outer_hash + k].set) {
+					/* need to increment host refcnts */
+					allocate_src = cake_dsrc(flow_mode);
+					allocate_dst = cake_ddst(flow_mode);
+				}
+
+				goto found;
+			}
+		}
+
+		/* no queue is reserved for this flow, look for an
+		 * empty one.
+		 */
+		for (i = 0; i < CAKE_SET_WAYS;
+			 i++, k = (k + 1) % CAKE_SET_WAYS) {
+			if (!q->flows[outer_hash + k].set) {
+				q->way_misses++;
+				allocate_src = cake_dsrc(flow_mode);
+				allocate_dst = cake_ddst(flow_mode);
+				goto found;
+			}
+		}
+
+		/* With no empty queues, default to the original
+		 * queue, accept the collision, update the host tags.
+		 */
+		q->way_collisions++;
+		q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
+		q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+		allocate_src = cake_dsrc(flow_mode);
+		allocate_dst = cake_ddst(flow_mode);
+found:
+		/* reserve queue for future packets in same flow */
+		reduced_hash = outer_hash + k;
+		q->tags[reduced_hash] = flow_hash;
+
+		if (allocate_src) {
+			srchost_idx = srchost_hash % CAKE_QUEUES;
+			inner_hash = srchost_idx % CAKE_SET_WAYS;
+			outer_hash = srchost_idx - inner_hash;
+			for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+				i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (q->hosts[outer_hash + k].srchost_tag ==
+				    srchost_hash)
+					goto found_src;
+			}
+			for (i = 0; i < CAKE_SET_WAYS;
+				i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (!q->hosts[outer_hash + k].srchost_refcnt)
+					break;
+			}
+			q->hosts[outer_hash + k].srchost_tag = srchost_hash;
+found_src:
+			srchost_idx = outer_hash + k;
+			q->hosts[srchost_idx].srchost_refcnt++;
+			q->flows[reduced_hash].srchost = srchost_idx;
+		}
+
+		if (allocate_dst) {
+			dsthost_idx = dsthost_hash % CAKE_QUEUES;
+			inner_hash = dsthost_idx % CAKE_SET_WAYS;
+			outer_hash = dsthost_idx - inner_hash;
+			for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+			     i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (q->hosts[outer_hash + k].dsthost_tag ==
+				    dsthost_hash)
+					goto found_dst;
+			}
+			for (i = 0; i < CAKE_SET_WAYS;
+			     i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (!q->hosts[outer_hash + k].dsthost_refcnt)
+					break;
+			}
+			q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
+found_dst:
+			dsthost_idx = outer_hash + k;
+			q->hosts[dsthost_idx].dsthost_refcnt++;
+			q->flows[reduced_hash].dsthost = dsthost_idx;
+		}
+	}
+
+	return reduced_hash;
+}
+
+/* helper functions : might be changed when/if skb use a standard list_head */
+/* remove one skb from head of slot queue */
+
+static struct sk_buff *dequeue_head(struct cake_flow *flow)
+{
+	struct sk_buff *skb = flow->head;
+
+	if (skb) {
+		flow->head = skb->next;
+		skb->next = NULL;
+	}
+
+	return skb;
+}
+
+/* add skb to flow queue (tail add) */
+
+static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb)
+{
+	if (!flow->head)
+		flow->head = skb;
+	else
+		flow->tail->next = skb;
+	flow->tail = skb;
+	skb->next = NULL;
+}
+
+static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
+{
+	avg -= avg >> shift;
+	avg += sample >> shift;
+	return avg;
+}
+
+static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
+{
+	struct cake_heap_entry ii = q->overflow_heap[i];
+	struct cake_heap_entry jj = q->overflow_heap[j];
+
+	q->overflow_heap[i] = jj;
+	q->overflow_heap[j] = ii;
+
+	q->tins[ii.t].overflow_idx[ii.b] = j;
+	q->tins[jj.t].overflow_idx[jj.b] = i;
+}
+
+static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i)
+{
+	struct cake_heap_entry ii = q->overflow_heap[i];
+
+	return q->tins[ii.t].backlogs[ii.b];
+}
+
+static void cake_heapify(struct cake_sched_data *q, u16 i)
+{
+	static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES;
+	u32 mb = cake_heap_get_backlog(q, i);
+	u32 m = i;
+
+	while (m < a) {
+		u32 l = m + m + 1;
+		u32 r = l + 1;
+
+		if (l < a) {
+			u32 lb = cake_heap_get_backlog(q, l);
+
+			if (lb > mb) {
+				m  = l;
+				mb = lb;
+			}
+		}
+
+		if (r < a) {
+			u32 rb = cake_heap_get_backlog(q, r);
+
+			if (rb > mb) {
+				m  = r;
+				mb = rb;
+			}
+		}
+
+		if (m != i) {
+			cake_heap_swap(q, i, m);
+			i = m;
+		} else {
+			break;
+		}
+	}
+}
+
+static void cake_heapify_up(struct cake_sched_data *q, u16 i)
+{
+	while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) {
+		u16 p = (i - 1) >> 1;
+		u32 ib = cake_heap_get_backlog(q, i);
+		u32 pb = cake_heap_get_backlog(q, p);
+
+		if (ib > pb) {
+			cake_heap_swap(q, i, p);
+			i = p;
+		} else {
+			break;
+		}
+	}
+}
+
+static int cake_advance_shaper(struct cake_sched_data *q,
+			       struct cake_tin_data *b,
+			       struct sk_buff *skb,
+			       ktime_t now, bool drop)
+{
+	u32 len = qdisc_pkt_len(skb);
+
+	/* charge packet bandwidth to this tin
+	 * and to the global shaper.
+	 */
+	if (q->rate_ns) {
+		u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft;
+		u64 global_dur = (len * q->rate_ns) >> q->rate_shft;
+		u64 failsafe_dur = global_dur + (global_dur >> 1);
+
+		if (ktime_before(b->time_next_packet, now))
+			b->time_next_packet = ktime_add_ns(b->time_next_packet,
+							   tin_dur);
+
+		else if (ktime_before(b->time_next_packet,
+				      ktime_add_ns(now, tin_dur)))
+			b->time_next_packet = ktime_add_ns(now, tin_dur);
+
+		q->time_next_packet = ktime_add_ns(q->time_next_packet,
+						   global_dur);
+		if (!drop)
+			q->failsafe_next_packet = \
+				ktime_add_ns(q->failsafe_next_packet,
+					     failsafe_dur);
+	}
+	return len;
+}
+
+static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	ktime_t now = ktime_get();
+	u32 idx = 0, tin = 0, len;
+	struct cake_heap_entry qq;
+	struct cake_tin_data *b;
+	struct cake_flow *flow;
+	struct sk_buff *skb;
+
+	if (!q->overflow_timeout) {
+		int i;
+		/* Build fresh max-heap */
+		for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
+			cake_heapify(q, i);
+	}
+	q->overflow_timeout = 65535;
+
+	/* select longest queue for pruning */
+	qq  = q->overflow_heap[0];
+	tin = qq.t;
+	idx = qq.b;
+
+	b = &q->tins[tin];
+	flow = &b->flows[idx];
+	skb = dequeue_head(flow);
+	if (unlikely(!skb)) {
+		/* heap has gone wrong, rebuild it next time */
+		q->overflow_timeout = 0;
+		return idx + (tin << 16);
+	}
+
+	if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
+		b->unresponsive_flow_count++;
+
+	len = qdisc_pkt_len(skb);
+	q->buffer_used      -= skb->truesize;
+	b->backlogs[idx]    -= len;
+	b->tin_backlog      -= len;
+	sch->qstats.backlog -= len;
+	qdisc_tree_reduce_backlog(sch, 1, len);
+
+	flow->dropped++;
+	b->tin_dropped++;
+	sch->qstats.drops++;
+
+	__qdisc_drop(skb, to_free);
+	sch->q.qlen--;
+
+	cake_heapify(q, 0);
+
+	return idx + (tin << 16);
+}
+
+static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data *t,
+			 struct sk_buff *skb, int flow_mode, int *qerr)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct tcf_proto *filter;
+	struct tcf_result res;
+	int result;
+
+	filter = rcu_dereference_bh(q->filter_list);
+	if (!filter)
+		return cake_hash(t, skb, flow_mode) + 1;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tcf_classify(skb, filter, &res, false);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+		case TC_ACT_TRAP:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
+		case TC_ACT_SHOT:
+			return 0;
+		}
+#endif
+		if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
+			return TC_H_MIN(res.classid);
+	}
+	return 0;
+}
+
+static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			struct sk_buff **to_free)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int len = qdisc_pkt_len(skb);
+	int uninitialized_var(ret);
+	ktime_t now = ktime_get();
+	struct cake_tin_data *b;
+	struct cake_flow *flow;
+	u32 idx, tin;
+
+	tin = 0;
+	b = &q->tins[tin];
+
+	/* choose flow to insert into */
+	idx = cake_classify(sch, b, skb, q->flow_mode, &ret);
+	if (idx == 0) {
+		if (ret & __NET_XMIT_BYPASS)
+			qdisc_qstats_drop(sch);
+		__qdisc_drop(skb, to_free);
+		return ret;
+	}
+	idx--;
+	flow = &b->flows[idx];
+
+	/* ensure shaper state isn't stale */
+	if (!b->tin_backlog) {
+		if (ktime_before(b->time_next_packet, now))
+			b->time_next_packet = now;
+
+		if (!sch->q.qlen) {
+			if (ktime_before(q->time_next_packet, now)) {
+				q->failsafe_next_packet = now;
+				q->time_next_packet = now;
+			} else if (ktime_after(q->time_next_packet, now) &&
+				   ktime_after(q->failsafe_next_packet, now)) {
+				u64 next = \
+					min(ktime_to_ns(q->time_next_packet),
+					    ktime_to_ns(
+						   q->failsafe_next_packet));
+				sch->qstats.overlimits++;
+				qdisc_watchdog_schedule_ns(&q->watchdog, next);
+			}
+		}
+	}
+
+	if (unlikely(len > b->max_skblen))
+		b->max_skblen = len;
+
+	cobalt_set_enqueue_time(skb, now);
+	flow_queue_add(flow, skb);
+
+	sch->q.qlen++;
+	q->buffer_used      += skb->truesize;
+
+	/* stats */
+	b->packets++;
+	b->bytes	    += len;
+	b->backlogs[idx]    += len;
+	b->tin_backlog      += len;
+	sch->qstats.backlog += len;
+	q->avg_window_bytes += len;
+
+	if (q->overflow_timeout)
+		cake_heapify_up(q, b->overflow_idx[idx]);
+
+	/* incoming bandwidth capacity estimate */
+	q->avg_window_bytes = 0;
+	q->last_packet_time = now;
+
+	/* flowchain */
+	if (!flow->set || flow->set == CAKE_SET_DECAYING) {
+		struct cake_host *srchost = &b->hosts[flow->srchost];
+		struct cake_host *dsthost = &b->hosts[flow->dsthost];
+		u16 host_load = 1;
+
+		if (!flow->set) {
+			list_add_tail(&flow->flowchain, &b->new_flows);
+		} else {
+			b->decaying_flow_count--;
+			list_move_tail(&flow->flowchain, &b->new_flows);
+		}
+		flow->set = CAKE_SET_SPARSE;
+		b->sparse_flow_count++;
+
+		if (cake_dsrc(q->flow_mode))
+			host_load = max(host_load, srchost->srchost_refcnt);
+
+		if (cake_ddst(q->flow_mode))
+			host_load = max(host_load, dsthost->dsthost_refcnt);
+
+		flow->deficit = (b->flow_quantum *
+				 quantum_div[host_load]) >> 16;
+	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+		/* this flow was empty, accounted as a sparse flow, but actually
+		 * in the bulk rotation.
+		 */
+		flow->set = CAKE_SET_BULK;
+		b->sparse_flow_count--;
+		b->bulk_flow_count++;
+	}
+
+	if (q->buffer_used > q->buffer_max_used)
+		q->buffer_max_used = q->buffer_used;
+
+	if (q->buffer_used > q->buffer_limit) {
+		u32 dropped = 0;
+
+		while (q->buffer_used > q->buffer_limit) {
+			dropped++;
+			cake_drop(sch, to_free);
+		}
+		b->drop_overlimit += dropped;
+	}
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[q->cur_tin];
+	struct cake_flow *flow = &b->flows[q->cur_flow];
+	struct sk_buff *skb = NULL;
+	u32 len;
+
+	if (flow->head) {
+		skb = dequeue_head(flow);
+		len = qdisc_pkt_len(skb);
+		b->backlogs[q->cur_flow] -= len;
+		b->tin_backlog		 -= len;
+		sch->qstats.backlog      -= len;
+		q->buffer_used		 -= skb->truesize;
+		sch->q.qlen--;
+
+		if (q->overflow_timeout)
+			cake_heapify(q, b->overflow_idx[q->cur_flow]);
+	}
+	return skb;
+}
+
+/* Discard leftover packets from a tin no longer in use. */
+static void cake_clear_tin(struct Qdisc *sch, u16 tin)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	q->cur_tin = tin;
+	for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++)
+		while (!!(skb = cake_dequeue_one(sch)))
+			kfree_skb(skb);
+}
+
+static struct sk_buff *cake_dequeue(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[q->cur_tin];
+	struct cake_host *srchost, *dsthost;
+	ktime_t now = ktime_get();
+	struct cake_flow *flow;
+	struct list_head *head;
+	bool first_flow = true;
+	struct sk_buff *skb;
+	u16 host_load;
+	u64 delay;
+	u32 len;
+
+begin:
+	if (!sch->q.qlen)
+		return NULL;
+
+	/* global hard shaper */
+	if (ktime_after(q->time_next_packet, now) &&
+	    ktime_after(q->failsafe_next_packet, now)) {
+		u64 next = min(ktime_to_ns(q->time_next_packet),
+			       ktime_to_ns(q->failsafe_next_packet));
+
+		sch->qstats.overlimits++;
+		qdisc_watchdog_schedule_ns(&q->watchdog, next);
+		return NULL;
+	}
+
+	/* Choose a class to work on. */
+	if (!q->rate_ns) {
+		/* In unlimited mode, can't rely on shaper timings, just balance
+		 * with DRR
+		 */
+		bool wrapped = false, empty = true;
+
+		while (b->tin_deficit < 0 ||
+		       !(b->sparse_flow_count + b->bulk_flow_count)) {
+			if (b->tin_deficit <= 0)
+				b->tin_deficit += b->tin_quantum_band;
+			if (b->sparse_flow_count + b->bulk_flow_count)
+				empty = false;
+
+			q->cur_tin++;
+			b++;
+			if (q->cur_tin >= q->tin_cnt) {
+				q->cur_tin = 0;
+				b = q->tins;
+
+				if (wrapped) {
+					/* It's possible for q->qlen to be
+					 * nonzero when we actually have no
+					 * packets anywhere.
+					 */
+					if (empty)
+						return NULL;
+				} else {
+					wrapped = true;
+				}
+			}
+		}
+	} else {
+		/* In shaped mode, choose:
+		 * - Highest-priority tin with queue and meeting schedule, or
+		 * - The earliest-scheduled tin with queue.
+		 */
+		ktime_t best_time = KTIME_MAX;
+		int tin, best_tin = 0;
+
+		for (tin = 0; tin < q->tin_cnt; tin++) {
+			b = q->tins + tin;
+			if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
+				ktime_t time_to_pkt = \
+					ktime_sub(b->time_next_packet, now);
+
+				if (ktime_to_ns(time_to_pkt) <= 0 ||
+				    ktime_compare(time_to_pkt,
+						  best_time) <= 0) {
+					best_time = time_to_pkt;
+					best_tin = tin;
+				}
+			}
+		}
+
+		q->cur_tin = best_tin;
+		b = q->tins + best_tin;
+
+		/* No point in going further if no packets to deliver. */
+		if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
+			return NULL;
+	}
+
+retry:
+	/* service this class */
+	head = &b->decaying_flows;
+	if (!first_flow || list_empty(head)) {
+		head = &b->new_flows;
+		if (list_empty(head)) {
+			head = &b->old_flows;
+			if (unlikely(list_empty(head))) {
+				head = &b->decaying_flows;
+				if (unlikely(list_empty(head)))
+					goto begin;
+			}
+		}
+	}
+	flow = list_first_entry(head, struct cake_flow, flowchain);
+	q->cur_flow = flow - b->flows;
+	first_flow = false;
+
+	/* triple isolation (modified DRR++) */
+	srchost = &b->hosts[flow->srchost];
+	dsthost = &b->hosts[flow->dsthost];
+	host_load = 1;
+
+	if (cake_dsrc(q->flow_mode))
+		host_load = max(host_load, srchost->srchost_refcnt);
+
+	if (cake_ddst(q->flow_mode))
+		host_load = max(host_load, dsthost->dsthost_refcnt);
+
+	WARN_ON(host_load > CAKE_QUEUES);
+
+	/* flow isolation (DRR++) */
+	if (flow->deficit <= 0) {
+		/* The shifted prandom_u32() is a way to apply dithering to
+		 * avoid accumulating roundoff errors
+		 */
+		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+				  (prandom_u32() >> 16)) >> 16;
+		list_move_tail(&flow->flowchain, &b->old_flows);
+
+		/* Keep all flows with deficits out of the sparse and decaying
+		 * rotations.  No non-empty flow can go into the decaying
+		 * rotation, so they can't get deficits
+		 */
+		if (flow->set == CAKE_SET_SPARSE) {
+			if (flow->head) {
+				b->sparse_flow_count--;
+				b->bulk_flow_count++;
+				flow->set = CAKE_SET_BULK;
+			} else {
+				/* we've moved it to the bulk rotation for
+				 * correct deficit accounting but we still want
+				 * to count it as a sparse flow, not a bulk one.
+				 */
+				flow->set = CAKE_SET_SPARSE_WAIT;
+			}
+		}
+		goto retry;
+	}
+
+	/* Retrieve a packet via the AQM */
+	while (1) {
+		skb = cake_dequeue_one(sch);
+		if (!skb) {
+			/* this queue was actually empty */
+			if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
+				b->unresponsive_flow_count--;
+
+			if (flow->cvars.p_drop || flow->cvars.count ||
+			    ktime_before(now, flow->cvars.drop_next)) {
+				/* keep in the flowchain until the state has
+				 * decayed to rest
+				 */
+				list_move_tail(&flow->flowchain,
+					       &b->decaying_flows);
+				if (flow->set == CAKE_SET_BULK) {
+					b->bulk_flow_count--;
+					b->decaying_flow_count++;
+				} else if (flow->set == CAKE_SET_SPARSE ||
+					   flow->set == CAKE_SET_SPARSE_WAIT) {
+					b->sparse_flow_count--;
+					b->decaying_flow_count++;
+				}
+				flow->set = CAKE_SET_DECAYING;
+			} else {
+				/* remove empty queue from the flowchain */
+				list_del_init(&flow->flowchain);
+				if (flow->set == CAKE_SET_SPARSE ||
+				    flow->set == CAKE_SET_SPARSE_WAIT)
+					b->sparse_flow_count--;
+				else if (flow->set == CAKE_SET_BULK)
+					b->bulk_flow_count--;
+				else
+					b->decaying_flow_count--;
+
+				flow->set = CAKE_SET_NONE;
+				srchost->srchost_refcnt--;
+				dsthost->dsthost_refcnt--;
+			}
+			goto begin;
+		}
+
+		/* Last packet in queue may be marked, shouldn't be dropped */
+		if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb) ||
+		    !flow->head)
+			break;
+
+		flow->dropped++;
+		b->tin_dropped++;
+		qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
+		qdisc_qstats_drop(sch);
+		kfree_skb(skb);
+	}
+
+	b->tin_ecn_mark += !!flow->cvars.ecn_marked;
+	qdisc_bstats_update(sch, skb);
+
+	/* collect delay stats */
+	delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+	b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
+	b->peak_delay = cake_ewma(b->peak_delay, delay,
+				  delay > b->peak_delay ? 2 : 8);
+	b->base_delay = cake_ewma(b->base_delay, delay,
+				  delay < b->base_delay ? 2 : 8);
+
+	len = cake_advance_shaper(q, b, skb, now, false);
+	flow->deficit -= len;
+	b->tin_deficit -= len;
+
+	if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
+		u64 next = min(ktime_to_ns(q->time_next_packet),
+			       ktime_to_ns(q->failsafe_next_packet));
+
+		qdisc_watchdog_schedule_ns(&q->watchdog, next);
+	} else if (!sch->q.qlen) {
+		int i;
+
+		for (i = 0; i < q->tin_cnt; i++) {
+			if (q->tins[i].decaying_flow_count) {
+				ktime_t next = \
+					ktime_add_ns(now,
+						     q->tins[i].cparams.target);
+
+				qdisc_watchdog_schedule_ns(&q->watchdog,
+							   ktime_to_ns(next));
+				break;
+			}
+		}
+	}
+
+	if (q->overflow_timeout)
+		q->overflow_timeout--;
+
+	return skb;
+}
+
+static void cake_reset(struct Qdisc *sch)
+{
+	u32 c;
+
+	for (c = 0; c < CAKE_MAX_TINS; c++)
+		cake_clear_tin(sch, c);
+}
+
+static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
+	[TCA_CAKE_BASE_RATE64]   = { .type = NLA_U64 },
+	[TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 },
+	[TCA_CAKE_ATM]		 = { .type = NLA_U32 },
+	[TCA_CAKE_FLOW_MODE]     = { .type = NLA_U32 },
+	[TCA_CAKE_OVERHEAD]      = { .type = NLA_S32 },
+	[TCA_CAKE_RTT]		 = { .type = NLA_U32 },
+	[TCA_CAKE_TARGET]	 = { .type = NLA_U32 },
+	[TCA_CAKE_AUTORATE]      = { .type = NLA_U32 },
+	[TCA_CAKE_MEMORY]	 = { .type = NLA_U32 },
+	[TCA_CAKE_NAT]		 = { .type = NLA_U32 },
+	[TCA_CAKE_RAW]		 = { .type = NLA_U32 },
+	[TCA_CAKE_WASH]		 = { .type = NLA_U32 },
+	[TCA_CAKE_MPU]		 = { .type = NLA_U32 },
+	[TCA_CAKE_INGRESS]	 = { .type = NLA_U32 },
+	[TCA_CAKE_ACK_FILTER]	 = { .type = NLA_U32 },
+};
+
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+			  u64 target_ns, u64 rtt_est_ns)
+{
+	/* convert byte-rate into time-per-byte
+	 * so it will always unwedge in reasonable time.
+	 */
+	static const u64 MIN_RATE = 64;
+	u32 byte_target = mtu;
+	u64 byte_target_ns;
+	u8  rate_shft = 0;
+	u64 rate_ns = 0;
+
+	b->flow_quantum = 1514;
+	if (rate) {
+		b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
+		rate_shft = 34;
+		rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
+		rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
+		while (!!(rate_ns >> 34)) {
+			rate_ns >>= 1;
+			rate_shft--;
+		}
+	} /* else unlimited, ie. zero delay */
+
+	b->tin_rate_bps  = rate;
+	b->tin_rate_ns   = rate_ns;
+	b->tin_rate_shft = rate_shft;
+
+	byte_target_ns = (byte_target * rate_ns) >> rate_shft;
+
+	b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
+	b->cparams.interval = max(rtt_est_ns +
+				     b->cparams.target - target_ns,
+				     b->cparams.target * 2);
+	b->cparams.mtu_time = byte_target_ns;
+	b->cparams.p_inc = 1 << 24; /* 1/256 */
+	b->cparams.p_dec = 1 << 20; /* 1/4096 */
+}
+
+static void cake_reconfigure(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[0];
+	int c, ft = 0;
+
+	q->tin_cnt = 1;
+	cake_set_rate(b, q->rate_bps, psched_mtu(qdisc_dev(sch)),
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	b->tin_quantum_band = 65535;
+	b->tin_quantum_prio = 65535;
+
+	for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
+		cake_clear_tin(sch, c);
+		q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
+	}
+
+	q->rate_ns   = q->tins[ft].tin_rate_ns;
+	q->rate_shft = q->tins[ft].tin_rate_shft;
+
+	if (q->buffer_config_limit) {
+		q->buffer_limit = q->buffer_config_limit;
+	} else if (q->rate_bps) {
+		u64 t = q->rate_bps * q->interval;
+
+		do_div(t, USEC_PER_SEC / 4);
+		q->buffer_limit = max_t(u32, t, 4U << 20);
+	} else {
+		q->buffer_limit = ~0;
+	}
+
+	sch->flags &= ~TCQ_F_CAN_BYPASS;
+
+	q->buffer_limit = min(q->buffer_limit,
+			      max(sch->limit * psched_mtu(qdisc_dev(sch)),
+				  q->buffer_config_limit));
+}
+
+static int cake_change(struct Qdisc *sch, struct nlattr *opt,
+		       struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_CAKE_MAX + 1];
+	int err;
+
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CAKE_BASE_RATE64])
+		q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+
+	if (tb[TCA_CAKE_FLOW_MODE])
+		q->flow_mode = (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
+				CAKE_FLOW_MASK);
+
+	if (tb[TCA_CAKE_RTT]) {
+		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+
+		if (!q->interval)
+			q->interval = 1;
+	}
+
+	if (tb[TCA_CAKE_TARGET]) {
+		q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+
+		if (!q->target)
+			q->target = 1;
+	}
+
+	if (tb[TCA_CAKE_MEMORY])
+		q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+
+	if (q->tins) {
+		sch_tree_lock(sch);
+		cake_reconfigure(sch);
+		sch_tree_unlock(sch);
+	}
+
+	return 0;
+}
+
+static void cake_destroy(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+
+	qdisc_watchdog_cancel(&q->watchdog);
+	tcf_block_put(q->block);
+	kvfree(q->tins);
+}
+
+static int cake_init(struct Qdisc *sch, struct nlattr *opt,
+		     struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int i, j, err;
+
+	sch->limit = 10240;
+	q->tin_mode = CAKE_DIFFSERV_BESTEFFORT;
+	q->flow_mode  = CAKE_FLOW_TRIPLE;
+
+	q->rate_bps = 0; /* unlimited by default */
+
+	q->interval = 100000; /* 100ms default */
+	q->target   =   5000; /* 5ms: codel RFC argues
+			       * for 5 to 10% of interval
+			       */
+
+	q->cur_tin = 0;
+	q->cur_flow  = 0;
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+
+	if (opt) {
+		int err = cake_change(sch, opt, extack);
+
+		if (err)
+			return err;
+	}
+
+	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+	if (err)
+		return err;
+
+	quantum_div[0] = ~0;
+	for (i = 1; i <= CAKE_QUEUES; i++)
+		quantum_div[i] = 65535 / i;
+
+	q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data),
+			   GFP_KERNEL);
+	if (!q->tins)
+		goto nomem;
+
+	for (i = 0; i < CAKE_MAX_TINS; i++) {
+		struct cake_tin_data *b = q->tins + i;
+
+		INIT_LIST_HEAD(&b->new_flows);
+		INIT_LIST_HEAD(&b->old_flows);
+		INIT_LIST_HEAD(&b->decaying_flows);
+		b->sparse_flow_count = 0;
+		b->bulk_flow_count = 0;
+		b->decaying_flow_count = 0;
+
+		for (j = 0; j < CAKE_QUEUES; j++) {
+			struct cake_flow *flow = b->flows + j;
+			u32 k = j * CAKE_MAX_TINS + i;
+
+			INIT_LIST_HEAD(&flow->flowchain);
+			cobalt_vars_init(&flow->cvars);
+
+			q->overflow_heap[k].t = i;
+			q->overflow_heap[k].b = j;
+			b->overflow_idx[j] = k;
+		}
+	}
+
+	cake_reconfigure(sch);
+	q->avg_peak_bandwidth = q->rate_bps;
+	q->min_netlen = ~0;
+	q->min_adjlen = ~0;
+	return 0;
+
+nomem:
+	cake_destroy(sch);
+	return -ENOMEM;
+}
+
+static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts;
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (!opts)
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
+			      TCA_CAKE_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
+			q->flow_mode & CAKE_FLOW_MASK))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	return -1;
+}
+
+static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP);
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tstats, *ts;
+	int i;
+
+	if (!stats)
+		return -1;
+
+#define PUT_STAT_U32(attr, data) do {				       \
+		if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+#define PUT_STAT_U64(attr, data) do {				       \
+		if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \
+					data, TCA_CAKE_STATS_PAD)) \
+			goto nla_put_failure;			       \
+	} while (0)
+
+	PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
+	PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
+	PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
+	PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
+	PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
+	PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
+	PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
+	PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+
+#undef PUT_STAT_U32
+#undef PUT_STAT_U64
+
+	tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS);
+	if (!tstats)
+		goto nla_put_failure;
+
+#define PUT_TSTAT_U32(attr, data) do {					\
+		if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \
+			goto nla_put_failure;				\
+	} while (0)
+#define PUT_TSTAT_U64(attr, data) do {					\
+		if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \
+					data, TCA_CAKE_TIN_STATS_PAD))	\
+			goto nla_put_failure;				\
+	} while (0)
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[i];
+
+		ts = nla_nest_start(d->skb, i + 1);
+		if (!ts)
+			goto nla_put_failure;
+
+		PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
+		PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
+		PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
+
+		PUT_TSTAT_U32(TARGET_US,
+			      ktime_to_us(ns_to_ktime(b->cparams.target)));
+		PUT_TSTAT_U32(INTERVAL_US,
+			      ktime_to_us(ns_to_ktime(b->cparams.interval)));
+
+		PUT_TSTAT_U32(SENT_PACKETS, b->packets);
+		PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
+		PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
+		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
+
+		PUT_TSTAT_U32(PEAK_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->peak_delay)));
+		PUT_TSTAT_U32(AVG_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->avge_delay)));
+		PUT_TSTAT_U32(BASE_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->base_delay)));
+
+		PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
+		PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
+		PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
+
+		PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
+					    b->decaying_flow_count);
+		PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
+		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
+		PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
+
+		PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
+		nla_nest_end(d->skb, ts);
+	}
+
+#undef PUT_TSTAT_U32
+#undef PUT_TSTAT_U64
+
+	nla_nest_end(d->skb, tstats);
+	return nla_nest_end(d->skb, stats);
+
+nla_put_failure:
+	nla_nest_cancel(d->skb, stats);
+	return -1;
+}
+
+static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long cake_find(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent,
+			       u32 classid)
+{
+	return 0;
+}
+
+static void cake_unbind(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl,
+					struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return q->block;
+}
+
+static int cake_dump_class(struct Qdisc *sch, unsigned long cl,
+			   struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	const struct cake_flow *flow = NULL;
+	struct gnet_stats_queue qs = { 0 };
+	struct nlattr *stats;
+	u32 idx = cl - 1;
+
+	if (idx < CAKE_QUEUES * q->tin_cnt) {
+		const struct cake_tin_data *b = &q->tins[idx / CAKE_QUEUES];
+		const struct sk_buff *skb;
+
+		flow = &b->flows[idx % CAKE_QUEUES];
+
+		if (flow->head) {
+			sch_tree_lock(sch);
+			skb = flow->head;
+			while (skb) {
+				qs.qlen++;
+				skb = skb->next;
+			}
+			sch_tree_unlock(sch);
+		}
+		qs.backlog = b->backlogs[idx % CAKE_QUEUES];
+		qs.drops = flow->dropped;
+	}
+	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
+		return -1;
+	if (flow) {
+		ktime_t now = ktime_get();
+
+		stats = nla_nest_start(d->skb, TCA_STATS_APP);
+		if (!stats)
+			return -1;
+
+#define PUT_STAT_U32(attr, data) do {				       \
+		if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+#define PUT_STAT_S32(attr, data) do {				       \
+		if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+
+		PUT_STAT_S32(DEFICIT, flow->deficit);
+		PUT_STAT_U32(DROPPING, flow->cvars.dropping);
+		PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
+		PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
+		if (flow->cvars.p_drop) {
+			PUT_STAT_S32(BLUE_TIMER_US,
+				     ktime_to_us(
+					     ktime_sub(now,
+						     flow->cvars.blue_timer)));
+		}
+		if (flow->cvars.dropping) {
+			PUT_STAT_S32(DROP_NEXT_US,
+				     ktime_to_us(
+					     ktime_sub(now,
+						       flow->cvars.drop_next)));
+		}
+
+		if (nla_nest_end(d->skb, stats) < 0)
+			return -1;
+	}
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(d->skb, stats);
+	return -1;
+}
+
+static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	unsigned int i, j;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[i];
+
+		for (j = 0; j < CAKE_QUEUES; j++) {
+			if (list_empty(&b->flows[j].flowchain) ||
+			    arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) {
+				arg->stop = 1;
+				break;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static const struct Qdisc_class_ops cake_class_ops = {
+	.leaf		=	cake_leaf,
+	.find		=	cake_find,
+	.tcf_block	=	cake_tcf_block,
+	.bind_tcf	=	cake_bind,
+	.unbind_tcf	=	cake_unbind,
+	.dump		=	cake_dump_class,
+	.dump_stats	=	cake_dump_class_stats,
+	.walk		=	cake_walk,
+};
+
+static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
+	.cl_ops		=	&cake_class_ops,
+	.id		=	"cake",
+	.priv_size	=	sizeof(struct cake_sched_data),
+	.enqueue	=	cake_enqueue,
+	.dequeue	=	cake_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	cake_init,
+	.reset		=	cake_reset,
+	.destroy	=	cake_destroy,
+	.change		=	cake_change,
+	.dump		=	cake_dump,
+	.dump_stats	=	cake_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init cake_module_init(void)
+{
+	return register_qdisc(&cake_qdisc_ops);
+}
+
+static void __exit cake_module_exit(void)
+{
+	unregister_qdisc(&cake_qdisc_ops);
+}
+
+module_init(cake_module_init)
+module_exit(cake_module_exit)
+MODULE_AUTHOR("Jonathan Morton");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("The CAKE shaper.");
-- 
cgit v1.2.3


From b60a60405fb95a688eb2ef4ef20f5fcaa7b64f68 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Fri, 6 Jul 2018 17:37:19 +0200
Subject: netfilter: Add nf_ct_get_tuple_skb global lookup function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a global netfilter function to extract a conntrack tuple from an
skb. The function uses a new function added to nf_ct_hook, which will try
to get the tuple from skb->_nfct, and do a full lookup if that fails. This
makes it possible to use the lookup function before the skb has passed
through the conntrack init hooks (e.g., in an ingress qdisc). The tuple is
copied to the caller to avoid issues with reference counting.

The function returns false if conntrack is not loaded, allowing it to be
used without incurring a module dependency on conntrack. This is used by
the NAT mode in sch_cake.

Cc: netfilter-devel@vger.kernel.org
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h         | 11 +++++++++++
 net/netfilter/core.c              | 15 +++++++++++++++
 net/netfilter/nf_conntrack_core.c | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 23b48de8c2e2..07efffd0c759 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -414,8 +414,17 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
 void nf_ct_attach(struct sk_buff *, const struct sk_buff *);
+struct nf_conntrack_tuple;
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb);
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+struct nf_conntrack_tuple;
+static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	return false;
+}
 #endif
 
 struct nf_conn;
@@ -424,6 +433,8 @@ enum ip_conntrack_info;
 struct nf_ct_hook {
 	int (*update)(struct net *net, struct sk_buff *skb);
 	void (*destroy)(struct nf_conntrack *);
+	bool (*get_tuple_skb)(struct nf_conntrack_tuple *,
+			      const struct sk_buff *);
 };
 extern struct nf_ct_hook __rcu *nf_ct_hook;
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 168af54db975..dc240cb47ddf 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -603,6 +603,21 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
 }
 EXPORT_SYMBOL(nf_conntrack_destroy);
 
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb)
+{
+	struct nf_ct_hook *ct_hook;
+	bool ret = false;
+
+	rcu_read_lock();
+	ct_hook = rcu_dereference(nf_ct_hook);
+	if (ct_hook)
+		ret = ct_hook->get_tuple_skb(dst_tuple, skb);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(nf_ct_get_tuple_skb);
+
 /* Built-in default zone used e.g. by modules. */
 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 	.id	= NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3465da2a98bd..85ab2fd6a665 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1683,6 +1683,41 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 	return 0;
 }
 
+static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	const struct nf_conntrack_tuple *src_tuple;
+	const struct nf_conntrack_tuple_hash *hash;
+	struct nf_conntrack_tuple srctuple;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
+		memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+		return true;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       NFPROTO_IPV4, dev_net(skb->dev),
+			       &srctuple))
+		return false;
+
+	hash = nf_conntrack_find_get(dev_net(skb->dev),
+				     &nf_ct_zone_dflt,
+				     &srctuple);
+	if (!hash)
+		return false;
+
+	ct = nf_ct_tuplehash_to_ctrack(hash);
+	src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
+	memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+	nf_ct_put(ct);
+
+	return true;
+}
+
 /* Bring out ya dead! */
 static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
@@ -2204,6 +2239,7 @@ err_cachep:
 static struct nf_ct_hook nf_conntrack_hook = {
 	.update		= nf_conntrack_update,
 	.destroy	= destroy_conntrack,
+	.get_tuple_skb  = nf_conntrack_get_tuple_skb,
 };
 
 void nf_conntrack_init_end(void)
-- 
cgit v1.2.3


From 19f391eb05b8b005f2907ddc8f284487b446abf3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 11:19:32 -0400
Subject: turn filp_clone_open() into inline wrapper for dentry_open()

it's exactly the same thing as
	dentry_open(&file->f_path, file->f_flags, file->f_cred)

... and rename it to file_clone_open(), while we are at it.
'filp' naming convention is bogus; sure, it's "file pointer",
but we generally don't do that kind of Hungarian notation.
Some of the instances have too many callers to touch, but this
one has only two, so let's sanitize it while we can...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/gpu/drm/drm_lease.c |  2 +-
 fs/binfmt_misc.c            |  2 +-
 fs/open.c                   | 20 --------------------
 include/linux/fs.h          |  5 ++++-
 4 files changed, 6 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index d638c0fb3418..b54fb78a283c 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -553,7 +553,7 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 
 	/* Clone the lessor file to create a new file for us */
 	DRM_DEBUG_LEASE("Allocating lease file\n");
-	lessee_file = filp_clone_open(lessor_file);
+	lessee_file = file_clone_open(lessor_file);
 	if (IS_ERR(lessee_file)) {
 		ret = PTR_ERR(lessee_file);
 		goto out_lessee;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 4b5fff31ef27..aa4a7a23ff99 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -205,7 +205,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		goto error;
 
 	if (fmt->flags & MISC_FMT_OPEN_FILE) {
-		interp_file = filp_clone_open(fmt->interp_file);
+		interp_file = file_clone_open(fmt->interp_file);
 		if (!IS_ERR(interp_file))
 			deny_write_access(interp_file);
 	} else {
diff --git a/fs/open.c b/fs/open.c
index d0e955b558ad..76c56966e297 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1063,26 +1063,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(file_open_root);
 
-struct file *filp_clone_open(struct file *oldfile)
-{
-	struct file *file;
-	int retval;
-
-	file = get_empty_filp();
-	if (IS_ERR(file))
-		return file;
-
-	file->f_flags = oldfile->f_flags;
-	retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
-	if (retval) {
-		put_filp(file);
-		return ERR_PTR(retval);
-	}
-
-	return file;
-}
-EXPORT_SYMBOL(filp_clone_open);
-
 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
 {
 	struct open_flags op;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index aa9b4c169ed2..c4ca4c9c1130 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2422,7 +2422,10 @@ extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern struct file *filp_clone_open(struct file *);
+static inline struct file *file_clone_open(struct file *file)
+{
+	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
+}
 extern int filp_close(struct file *, fl_owner_t id);
 
 extern struct filename *getname_flags(const char __user *, int, int *);
-- 
cgit v1.2.3


From b4e7a7a88b5d060650094b8d3454bc521d669f6a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 11:17:54 -0400
Subject: drm_mode_create_lease_ioctl(): fix open-coded filp_clone_open()

Failure of ->open() should *not* be followed by fput().  Fixed by
using filp_clone_open(), which gets the cleanups right.

Cc: stable@vger.kernel.org
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/gpu/drm/drm_lease.c | 16 +---------------
 fs/internal.h               |  1 -
 include/linux/fs.h          |  1 +
 3 files changed, 2 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index 50c73c0a20b9..d638c0fb3418 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -553,24 +553,13 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 
 	/* Clone the lessor file to create a new file for us */
 	DRM_DEBUG_LEASE("Allocating lease file\n");
-	path_get(&lessor_file->f_path);
-	lessee_file = alloc_file(&lessor_file->f_path,
-				 lessor_file->f_mode,
-				 fops_get(lessor_file->f_inode->i_fop));
-
+	lessee_file = filp_clone_open(lessor_file);
 	if (IS_ERR(lessee_file)) {
 		ret = PTR_ERR(lessee_file);
 		goto out_lessee;
 	}
 
-	/* Initialize the new file for DRM */
-	DRM_DEBUG_LEASE("Initializing the file with %p\n", lessee_file->f_op->open);
-	ret = lessee_file->f_op->open(lessee_file->f_inode, lessee_file);
-	if (ret)
-		goto out_lessee_file;
-
 	lessee_priv = lessee_file->private_data;
-
 	/* Change the file to a master one */
 	drm_master_put(&lessee_priv->master);
 	lessee_priv->master = lessee;
@@ -588,9 +577,6 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 	DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n");
 	return 0;
 
-out_lessee_file:
-	fput(lessee_file);
-
 out_lessee:
 	drm_master_put(&lessee);
 
diff --git a/fs/internal.h b/fs/internal.h
index 980d005b21b4..5645b4ebf494 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -127,7 +127,6 @@ int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 
 extern int open_check_o_direct(struct file *f);
 extern int vfs_open(const struct path *, struct file *, const struct cred *);
-extern struct file *filp_clone_open(struct file *);
 
 /*
  * inode.c
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..aa9b4c169ed2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2422,6 +2422,7 @@ extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
+extern struct file *filp_clone_open(struct file *);
 extern int filp_close(struct file *, fl_owner_t id);
 
 extern struct filename *getname_flags(const char __user *, int, int *);
-- 
cgit v1.2.3


From baacd8d100d571aa713c3c60b1471b9962e6ec8a Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 5 Jul 2018 12:13:49 +0200
Subject: ASoC: dpcm: add rate merge to the BE stream merge

As done for format and channels, add the possibility to merge
the backend rates on the frontend rates.

This useful if the backend does not support all rates supported by the
frontend, or if several backends (cpu and codecs) with different
capabilities are connected to the same frontend.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h |  2 ++
 sound/soc/soc-pcm.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 870ba6b64817..a4915148f739 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -964,6 +964,8 @@ struct snd_soc_dai_link {
 	unsigned int dpcm_merged_format:1;
 	/* DPCM used FE & BE merged channel */
 	unsigned int dpcm_merged_chan:1;
+	/* DPCM used FE & BE merged rate */
+	unsigned int dpcm_merged_rate:1;
 
 	/* pmdown_time is ignored at stop */
 	unsigned int ignore_pmdown_time:1;
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 114e6c060cae..4019bc10897c 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1769,6 +1769,64 @@ static void dpcm_runtime_merge_chan(struct snd_pcm_substream *substream,
 	}
 }
 
+static void dpcm_runtime_merge_rate(struct snd_pcm_substream *substream,
+				    unsigned int *rates,
+				    unsigned int *rate_min,
+				    unsigned int *rate_max)
+{
+	struct snd_soc_pcm_runtime *fe = substream->private_data;
+	struct snd_soc_dpcm *dpcm;
+	int stream = substream->stream;
+
+	if (!fe->dai_link->dpcm_merged_rate)
+		return;
+
+	/*
+	 * It returns merged BE codec channel;
+	 * if FE want to use it (= dpcm_merged_chan)
+	 */
+
+	list_for_each_entry(dpcm, &fe->dpcm[stream].be_clients, list_be) {
+		struct snd_soc_pcm_runtime *be = dpcm->be;
+		struct snd_soc_dai_driver *cpu_dai_drv =  be->cpu_dai->driver;
+		struct snd_soc_dai_driver *codec_dai_drv;
+		struct snd_soc_pcm_stream *codec_stream;
+		struct snd_soc_pcm_stream *cpu_stream;
+		int i;
+
+		if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+			cpu_stream = &cpu_dai_drv->playback;
+		else
+			cpu_stream = &cpu_dai_drv->capture;
+
+		*rate_min = max(*rate_min, cpu_stream->rate_min);
+		*rate_max = min_not_zero(*rate_max, cpu_stream->rate_max);
+		*rates = snd_pcm_rate_mask_intersect(*rates, cpu_stream->rates);
+
+		for (i = 0; i < be->num_codecs; i++) {
+			/*
+			 * Skip CODECs which don't support the current stream
+			 * type. See soc_pcm_init_runtime_hw() for more details
+			 */
+			if (!snd_soc_dai_stream_valid(be->codec_dais[i],
+						      stream))
+				continue;
+
+			codec_dai_drv = be->codec_dais[i]->driver;
+			if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+				codec_stream = &codec_dai_drv->playback;
+			else
+				codec_stream = &codec_dai_drv->capture;
+
+			*rate_min = max(*rate_min, codec_stream->rate_min);
+			*rate_max = min_not_zero(*rate_max,
+						 codec_stream->rate_max);
+			*rates = snd_pcm_rate_mask_intersect(*rates,
+						codec_stream->rates);
+		}
+	}
+}
+
 static void dpcm_set_fe_runtime(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -1784,6 +1842,8 @@ static void dpcm_set_fe_runtime(struct snd_pcm_substream *substream)
 	dpcm_runtime_merge_format(substream, &runtime->hw.formats);
 	dpcm_runtime_merge_chan(substream, &runtime->hw.channels_min,
 				&runtime->hw.channels_max);
+	dpcm_runtime_merge_rate(substream, &runtime->hw.rates,
+				&runtime->hw.rate_min, &runtime->hw.rate_max);
 }
 
 static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd);
-- 
cgit v1.2.3


From db7a2d1809a5b6b08d138ff68837f805fc073351 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 20 Jun 2018 14:58:10 +0100
Subject: asm-generic: unistd.h: Wire up sys_rseq

The new rseq call arrived in 4.18-rc1, so provide it in the asm-generic
unistd.h for architectures such as arm64.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/uapi/asm-generic/unistd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 42990676a55e..df4bedb9b01c 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -734,9 +734,11 @@ __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 __SYSCALL(__NR_statx,     sys_statx)
 #define __NR_io_pgetevents 292
 __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
+#define __NR_rseq 293
+__SYSCALL(__NR_rseq, sys_rseq)
 
 #undef __NR_syscalls
-#define __NR_syscalls 293
+#define __NR_syscalls 294
 
 /*
  * 32 bit systems traditionally used different
-- 
cgit v1.2.3


From 03dc7a35fcc83a199121a5156c4a7a976b836682 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Jul 2018 12:19:14 +0200
Subject: ipv6: xfrm: use 64-bit timestamps

get_seconds() is deprecated because it can overflow on 32-bit
architectures.  For the xfrm_state->lastused member, we treat the data
as a 64-bit number already, so we just need to use the right accessor
that works on both 32-bit and 64-bit machines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h       | 2 +-
 net/ipv6/xfrm6_mode_ro.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a5378613a49c..1350e2cf0749 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -227,7 +227,7 @@ struct xfrm_state {
 	long		saved_tmo;
 
 	/* Last used time */
-	unsigned long		lastused;
+	time64_t		lastused;
 
 	struct page_frag xfrag;
 
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 07d36573f50b..da28e4407b8f 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -55,7 +55,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	__skb_pull(skb, hdr_len);
 	memmove(ipv6_hdr(skb), iph, hdr_len);
 
-	x->lastused = get_seconds();
+	x->lastused = ktime_get_real_seconds();
 
 	return 0;
 }
-- 
cgit v1.2.3


From fae68031f7fbc8b6db58d87830ba7ed1d696fbb1 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Fri, 6 Jul 2018 07:35:50 +0200
Subject: w1: core: match sub-nodes of bus masters in devicetree

Once a new slave device is detected, match it against all sub-nodes of the
master bus controller. If a match is found, set the slave device's of_node
pointer.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
---
 drivers/w1/w1.c    | 3 +++
 include/linux/w1.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index caef0e0fd817..890c038c25f8 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -26,6 +26,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/hwmon.h>
+#include <linux/of.h>
 
 #include <linux/atomic.h>
 
@@ -686,6 +687,8 @@ static int __w1_attach_slave_device(struct w1_slave *sl)
 	sl->dev.bus = &w1_bus_type;
 	sl->dev.release = &w1_slave_release;
 	sl->dev.groups = w1_slave_groups;
+	sl->dev.of_node = of_find_matching_node(sl->master->dev.of_node,
+						sl->family->of_match_table);
 
 	dev_set_name(&sl->dev, "%02x-%012llx",
 		 (unsigned int) sl->reg_num.family,
diff --git a/include/linux/w1.h b/include/linux/w1.h
index 694101f744c7..3111585c371f 100644
--- a/include/linux/w1.h
+++ b/include/linux/w1.h
@@ -274,6 +274,8 @@ struct w1_family {
 
 	struct w1_family_ops	*fops;
 
+	const struct of_device_id *of_match_table;
+
 	atomic_t		refcnt;
 };
 
-- 
cgit v1.2.3


From 23ebda2fc715534d383d59ae6740d4e3ebd43798 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 11 Jul 2018 17:21:05 +0200
Subject: libata: remove ata_sff_data_xfer_noirq()

ata_sff_data_xfer_noirq() is invoked via the ->sff_data_xfer hook. The
latter is invoked by ata_pio_sector(), atapi_send_cdb() and
__atapi_pio_bytes() which in turn is invoked by ata_sff_hsm_move().
The latter function requires that the "ap->lock" lock is held which
needs to be taken with disabled interrupts.

There is no need have to have ata_sff_data_xfer_noirq() which invokes
ata_sff_data_xfer32() with disabled interrupts because at this point the
interrupts are already disabled.
Remove the function and its references to it and replace all callers
with ata_sff_data_xfer32().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/driver-api/libata.rst |  3 +--
 drivers/ata/libata-sff.c            | 30 ------------------------------
 drivers/ata/pata_cmd640.c           |  2 +-
 drivers/ata/pata_icside.c           |  2 +-
 drivers/ata/pata_imx.c              |  2 +-
 drivers/ata/pata_legacy.c           |  6 +++---
 drivers/ata/pata_palmld.c           |  2 +-
 drivers/ata/pata_pcmcia.c           |  2 +-
 drivers/ata/pata_platform.c         |  2 +-
 drivers/ata/pata_via.c              |  2 +-
 include/linux/libata.h              |  2 --
 11 files changed, 11 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst
index 4adc056f7635..70e180e6b93d 100644
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@@ -118,8 +118,7 @@ PIO data read/write
 All bmdma-style drivers must implement this hook. This is the low-level
 operation that actually copies the data bytes during a PIO data
 transfer. Typically the driver will choose one of
-:c:func:`ata_sff_data_xfer_noirq`, :c:func:`ata_sff_data_xfer`, or
-:c:func:`ata_sff_data_xfer32`.
+:c:func:`ata_sff_data_xfer`, or :c:func:`ata_sff_data_xfer32`.
 
 ATA command execute
 ~~~~~~~~~~~~~~~~~~~
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index cc2f2e35f4c2..c5ea0fc635e5 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -657,36 +657,6 @@ unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf,
 }
 EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
 
-/**
- *	ata_sff_data_xfer_noirq - Transfer data by PIO
- *	@qc: queued command
- *	@buf: data buffer
- *	@buflen: buffer length
- *	@rw: read/write
- *
- *	Transfer data from/to the device data register by PIO. Do the
- *	transfer with interrupts disabled.
- *
- *	LOCKING:
- *	Inherited from caller.
- *
- *	RETURNS:
- *	Bytes consumed.
- */
-unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc, unsigned char *buf,
-				     unsigned int buflen, int rw)
-{
-	unsigned long flags;
-	unsigned int consumed;
-
-	local_irq_save(flags);
-	consumed = ata_sff_data_xfer32(qc, buf, buflen, rw);
-	local_irq_restore(flags);
-
-	return consumed;
-}
-EXPORT_SYMBOL_GPL(ata_sff_data_xfer_noirq);
-
 /**
  *	ata_pio_sector - Transfer a sector of data.
  *	@qc: Command on going
diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c
index c47caa807fa9..e3532eda7b05 100644
--- a/drivers/ata/pata_cmd640.c
+++ b/drivers/ata/pata_cmd640.c
@@ -178,7 +178,7 @@ static struct scsi_host_template cmd640_sht = {
 static struct ata_port_operations cmd640_port_ops = {
 	.inherits	= &ata_sff_port_ops,
 	/* In theory xfer_noirq is not needed once we kill the prefetcher */
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 	.sff_irq_check	= cmd640_sff_irq_check,
 	.qc_issue	= cmd640_qc_issue,
 	.cable_detect	= ata_cable_40wire,
diff --git a/drivers/ata/pata_icside.c b/drivers/ata/pata_icside.c
index 188f2f2eb21f..c272f2cbb47c 100644
--- a/drivers/ata/pata_icside.c
+++ b/drivers/ata/pata_icside.c
@@ -324,7 +324,7 @@ static struct ata_port_operations pata_icside_port_ops = {
 	.inherits		= &ata_bmdma_port_ops,
 	/* no need to build any PRD tables for DMA */
 	.qc_prep		= ata_noop_qc_prep,
-	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.sff_data_xfer		= ata_sff_data_xfer32,
 	.bmdma_setup		= pata_icside_bmdma_setup,
 	.bmdma_start		= pata_icside_bmdma_start,
 	.bmdma_stop		= pata_icside_bmdma_stop,
diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c
index d4caa23f5a88..108101325efd 100644
--- a/drivers/ata/pata_imx.c
+++ b/drivers/ata/pata_imx.c
@@ -102,7 +102,7 @@ static struct scsi_host_template pata_imx_sht = {
 
 static struct ata_port_operations pata_imx_port_ops = {
 	.inherits		= &ata_sff_port_ops,
-	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.sff_data_xfer		= ata_sff_data_xfer32,
 	.cable_detect		= ata_cable_unknown,
 	.set_piomode		= pata_imx_set_piomode,
 };
diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c
index 53828b6c3044..8ea4b8431fc8 100644
--- a/drivers/ata/pata_legacy.c
+++ b/drivers/ata/pata_legacy.c
@@ -246,12 +246,12 @@ static const struct ata_port_operations legacy_base_port_ops = {
 
 static struct ata_port_operations simple_port_ops = {
 	.inherits	= &legacy_base_port_ops,
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 };
 
 static struct ata_port_operations legacy_port_ops = {
 	.inherits	= &legacy_base_port_ops,
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 	.set_mode	= legacy_set_mode,
 };
 
@@ -341,7 +341,7 @@ static unsigned int pdc_data_xfer_vlb(struct ata_queued_cmd *qc,
 		}
 		local_irq_restore(flags);
 	} else
-		buflen = ata_sff_data_xfer_noirq(qc, buf, buflen, rw);
+		buflen = ata_sff_data_xfer32(qc, buf, buflen, rw);
 
 	return buflen;
 }
diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c
index 8c0d7d736b7a..d071ab6864a8 100644
--- a/drivers/ata/pata_palmld.c
+++ b/drivers/ata/pata_palmld.c
@@ -44,7 +44,7 @@ static struct scsi_host_template palmld_sht = {
 
 static struct ata_port_operations palmld_port_ops = {
 	.inherits		= &ata_sff_port_ops,
-	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.sff_data_xfer		= ata_sff_data_xfer32,
 	.cable_detect		= ata_cable_40wire,
 };
 
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index a541eacc5e95..9b0e6c72e3f9 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -151,7 +151,7 @@ static struct scsi_host_template pcmcia_sht = {
 
 static struct ata_port_operations pcmcia_port_ops = {
 	.inherits	= &ata_sff_port_ops,
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 	.cable_detect	= ata_cable_40wire,
 	.set_mode	= pcmcia_set_mode,
 };
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index c503ded87bb8..d6f8f5406442 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -49,7 +49,7 @@ static struct scsi_host_template pata_platform_sht = {
 
 static struct ata_port_operations pata_platform_port_ops = {
 	.inherits		= &ata_sff_port_ops,
-	.sff_data_xfer		= ata_sff_data_xfer_noirq,
+	.sff_data_xfer		= ata_sff_data_xfer32,
 	.cable_detect		= ata_cable_unknown,
 	.set_mode		= pata_platform_set_mode,
 };
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 1ca6bcab369f..fd19f1ce83aa 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -471,7 +471,7 @@ static struct ata_port_operations via_port_ops = {
 
 static struct ata_port_operations via_port_ops_noirq = {
 	.inherits	= &via_port_ops,
-	.sff_data_xfer	= ata_sff_data_xfer_noirq,
+	.sff_data_xfer	= ata_sff_data_xfer32,
 };
 
 /**
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8b8946dd63b9..aa8583655a18 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1832,8 +1832,6 @@ extern unsigned int ata_sff_data_xfer(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
 extern unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc,
 			unsigned char *buf, unsigned int buflen, int rw);
-extern unsigned int ata_sff_data_xfer_noirq(struct ata_queued_cmd *qc,
-			unsigned char *buf, unsigned int buflen, int rw);
 extern void ata_sff_irq_on(struct ata_port *ap);
 extern void ata_sff_irq_clear(struct ata_port *ap);
 extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
-- 
cgit v1.2.3


From e4f8d81c738db6d3ffdabfb8329aa2feaa310699 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 9 Jul 2018 17:48:54 -0400
Subject: cgroup/tracing: Move taking of spin lock out of trace event handlers

It is unwise to take spin locks from the handlers of trace events.
Mainly, because they can introduce lockups, because it introduces locks
in places that are normally not tested. Worse yet, because trace events
are tucked away in the include/trace/events/ directory, locks that are
taken there are forgotten about.

As a general rule, I tell people never to take any locks in a trace
event handler.

Several cgroup trace event handlers call cgroup_path() which eventually
takes the kernfs_rename_lock spinlock. This injects the spinlock in the
code without people realizing it. It also can cause issues for the
PREEMPT_RT patch, as the spinlock becomes a mutex, and the trace event
handlers are called with preemption disabled.

By moving the calculation of the cgroup_path() out of the trace event
handlers and into a macro (surrounded by a
trace_cgroup_##type##_enabled()), then we could place the cgroup_path
into a string, and pass that to the trace event. Not only does this
remove the taking of the spinlock out of the trace event handler, but
it also means that the cgroup_path() only needs to be called once (it
is currently called twice, once to get the length to reserver the
buffer for, and once again to get the path itself. Now it only needs to
be done once.

Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/trace/events/cgroup.h   | 47 ++++++++++++++++++++---------------------
 kernel/cgroup/cgroup-internal.h | 26 +++++++++++++++++++++++
 kernel/cgroup/cgroup-v1.c       |  4 ++--
 kernel/cgroup/cgroup.c          | 12 ++++++-----
 4 files changed, 58 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index d74722c2ac8b..a401ff5e7847 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -53,24 +53,22 @@ DEFINE_EVENT(cgroup_root, cgroup_remount,
 
 DECLARE_EVENT_CLASS(cgroup,
 
-	TP_PROTO(struct cgroup *cgrp),
+	TP_PROTO(struct cgroup *cgrp, const char *path),
 
-	TP_ARGS(cgrp),
+	TP_ARGS(cgrp, path),
 
 	TP_STRUCT__entry(
 		__field(	int,		root			)
 		__field(	int,		id			)
 		__field(	int,		level			)
-		__dynamic_array(char,		path,
-				cgroup_path(cgrp, NULL, 0) + 1)
+		__string(	path,		path			)
 	),
 
 	TP_fast_assign(
 		__entry->root = cgrp->root->hierarchy_id;
 		__entry->id = cgrp->id;
 		__entry->level = cgrp->level;
-		cgroup_path(cgrp, __get_dynamic_array(path),
-				  __get_dynamic_array_len(path));
+		__assign_str(path, path);
 	),
 
 	TP_printk("root=%d id=%d level=%d path=%s",
@@ -79,45 +77,45 @@ DECLARE_EVENT_CLASS(cgroup,
 
 DEFINE_EVENT(cgroup, cgroup_mkdir,
 
-	TP_PROTO(struct cgroup *cgroup),
+	TP_PROTO(struct cgroup *cgrp, const char *path),
 
-	TP_ARGS(cgroup)
+	TP_ARGS(cgrp, path)
 );
 
 DEFINE_EVENT(cgroup, cgroup_rmdir,
 
-	TP_PROTO(struct cgroup *cgroup),
+	TP_PROTO(struct cgroup *cgrp, const char *path),
 
-	TP_ARGS(cgroup)
+	TP_ARGS(cgrp, path)
 );
 
 DEFINE_EVENT(cgroup, cgroup_release,
 
-	TP_PROTO(struct cgroup *cgroup),
+	TP_PROTO(struct cgroup *cgrp, const char *path),
 
-	TP_ARGS(cgroup)
+	TP_ARGS(cgrp, path)
 );
 
 DEFINE_EVENT(cgroup, cgroup_rename,
 
-	TP_PROTO(struct cgroup *cgroup),
+	TP_PROTO(struct cgroup *cgrp, const char *path),
 
-	TP_ARGS(cgroup)
+	TP_ARGS(cgrp, path)
 );
 
 DECLARE_EVENT_CLASS(cgroup_migrate,
 
-	TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+	TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+		 struct task_struct *task, bool threadgroup),
 
-	TP_ARGS(dst_cgrp, task, threadgroup),
+	TP_ARGS(dst_cgrp, path, task, threadgroup),
 
 	TP_STRUCT__entry(
 		__field(	int,		dst_root		)
 		__field(	int,		dst_id			)
 		__field(	int,		dst_level		)
-		__dynamic_array(char,		dst_path,
-				cgroup_path(dst_cgrp, NULL, 0) + 1)
 		__field(	int,		pid			)
+		__string(	dst_path,	path			)
 		__string(	comm,		task->comm		)
 	),
 
@@ -125,8 +123,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
 		__entry->dst_root = dst_cgrp->root->hierarchy_id;
 		__entry->dst_id = dst_cgrp->id;
 		__entry->dst_level = dst_cgrp->level;
-		cgroup_path(dst_cgrp, __get_dynamic_array(dst_path),
-				      __get_dynamic_array_len(dst_path));
+		__assign_str(dst_path, path);
 		__entry->pid = task->pid;
 		__assign_str(comm, task->comm);
 	),
@@ -138,16 +135,18 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
 
 DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
 
-	TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+	TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+		 struct task_struct *task, bool threadgroup),
 
-	TP_ARGS(dst_cgrp, task, threadgroup)
+	TP_ARGS(dst_cgrp, path, task, threadgroup)
 );
 
 DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
 
-	TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+	TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+		 struct task_struct *task, bool threadgroup),
 
-	TP_ARGS(dst_cgrp, task, threadgroup)
+	TP_ARGS(dst_cgrp, path, task, threadgroup)
 );
 
 #endif /* _TRACE_CGROUP_H */
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 77ff1cd6a252..75568fcf2180 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -8,6 +8,32 @@
 #include <linux/list.h>
 #include <linux/refcount.h>
 
+#define TRACE_CGROUP_PATH_LEN 1024
+extern spinlock_t trace_cgroup_path_lock;
+extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
+
+/*
+ * cgroup_path() takes a spin lock. It is good practice not to take
+ * spin locks within trace point handlers, as they are mostly hidden
+ * from normal view. As cgroup_path() can take the kernfs_rename_lock
+ * spin lock, it is best to not call that function from the trace event
+ * handler.
+ *
+ * Note: trace_cgroup_##type##_enabled() is a static branch that will only
+ *       be set when the trace event is enabled.
+ */
+#define TRACE_CGROUP_PATH(type, cgrp, ...)				\
+	do {								\
+		if (trace_cgroup_##type##_enabled()) {			\
+			spin_lock(&trace_cgroup_path_lock);		\
+			cgroup_path(cgrp, trace_cgroup_path,		\
+				    TRACE_CGROUP_PATH_LEN);		\
+			trace_cgroup_##type(cgrp, trace_cgroup_path,	\
+					    ##__VA_ARGS__);		\
+			spin_unlock(&trace_cgroup_path_lock);		\
+		}							\
+	} while (0)
+
 /*
  * A cgroup can be associated with multiple css_sets as different tasks may
  * belong to different cgroups on different hierarchies.  In the other
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 8b4f0768efd6..51063e7a93c2 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -135,7 +135,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 		if (task) {
 			ret = cgroup_migrate(task, false, &mgctx);
 			if (!ret)
-				trace_cgroup_transfer_tasks(to, task, false);
+				TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
 			put_task_struct(task);
 		}
 	} while (task && !ret);
@@ -865,7 +865,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
 
 	ret = kernfs_rename(kn, new_parent, new_name_str);
 	if (!ret)
-		trace_cgroup_rename(cgrp);
+		TRACE_CGROUP_PATH(rename, cgrp);
 
 	mutex_unlock(&cgroup_mutex);
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 077370bf8964..4a439de621bd 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -83,6 +83,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex);
 EXPORT_SYMBOL_GPL(css_set_lock);
 #endif
 
+DEFINE_SPINLOCK(trace_cgroup_path_lock);
+char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
+
 /*
  * Protects cgroup_idr and css_idr so that IDs can be released without
  * grabbing cgroup_mutex.
@@ -2638,7 +2641,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
 	cgroup_migrate_finish(&mgctx);
 
 	if (!ret)
-		trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+		TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
 
 	return ret;
 }
@@ -4634,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work)
 		struct cgroup *tcgrp;
 
 		/* cgroup release path */
-		trace_cgroup_release(cgrp);
+		TRACE_CGROUP_PATH(release, cgrp);
 
 		if (cgroup_on_dfl(cgrp))
 			cgroup_rstat_flush(cgrp);
@@ -4977,7 +4980,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
 	if (ret)
 		goto out_destroy;
 
-	trace_cgroup_mkdir(cgrp);
+	TRACE_CGROUP_PATH(mkdir, cgrp);
 
 	/* let's create and online css's */
 	kernfs_activate(kn);
@@ -5165,9 +5168,8 @@ int cgroup_rmdir(struct kernfs_node *kn)
 		return 0;
 
 	ret = cgroup_destroy_locked(cgrp);
-
 	if (!ret)
-		trace_cgroup_rmdir(cgrp);
+		TRACE_CGROUP_PATH(rmdir, cgrp);
 
 	cgroup_kn_unlock(kn);
 	return ret;
-- 
cgit v1.2.3


From 777fd524ba197475a60198aa1666408662acbcbc Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 29 May 2018 19:28:38 +1000
Subject: fsi: master-gpio: Add more tracepoints

This adds a few more tracepoints that have proven useful when
debugging issues with the FSI bus.

This also makes echo_delay() use clock_zeros() instead of
open-code it in order to share the tracepoint.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>
---
 drivers/fsi/fsi-master-gpio.c          | 16 +++++----
 include/trace/events/fsi_master_gpio.h | 59 ++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c
index a8445ec21b4a..40cbaf96547c 100644
--- a/drivers/fsi/fsi-master-gpio.c
+++ b/drivers/fsi/fsi-master-gpio.c
@@ -130,10 +130,17 @@ static void set_sda_output(struct fsi_master_gpio *master, int value)
 
 static void clock_zeros(struct fsi_master_gpio *master, int count)
 {
+	trace_fsi_master_gpio_clock_zeros(master, count);
 	set_sda_output(master, 1);
 	clock_toggle(master, count);
 }
 
+static void echo_delay(struct fsi_master_gpio *master)
+{
+	clock_zeros(master, master->t_echo_delay);
+}
+
+
 static void serial_in(struct fsi_master_gpio *master, struct fsi_gpio_msg *msg,
 			uint8_t num_bits)
 {
@@ -279,16 +286,19 @@ static void build_ar_command(struct fsi_master_gpio *master,
 		addr_bits = 2;
 		opcode_bits = 2;
 		opcode = FSI_GPIO_CMD_SAME_AR;
+		trace_fsi_master_gpio_cmd_same_addr(master);
 
 	} else if (check_relative_address(master, id, addr, &rel_addr)) {
 		/* 8 bits plus sign */
 		addr_bits = 9;
 		addr = rel_addr;
 		opcode = FSI_GPIO_CMD_REL_AR;
+		trace_fsi_master_gpio_cmd_rel_addr(master, rel_addr);
 
 	} else {
 		addr_bits = 21;
 		opcode = FSI_GPIO_CMD_ABS_AR;
+		trace_fsi_master_gpio_cmd_abs_addr(master, addr);
 	}
 
 	/*
@@ -337,12 +347,6 @@ static void build_epoll_command(struct fsi_gpio_msg *cmd, uint8_t slave_id)
 	msg_push_crc(cmd);
 }
 
-static void echo_delay(struct fsi_master_gpio *master)
-{
-	set_sda_output(master, 1);
-	clock_toggle(master, master->t_echo_delay);
-}
-
 static void build_term_command(struct fsi_gpio_msg *cmd, uint8_t slave_id)
 {
 	cmd->bits = 0;
diff --git a/include/trace/events/fsi_master_gpio.h b/include/trace/events/fsi_master_gpio.h
index 389082132433..70ef66e63e84 100644
--- a/include/trace/events/fsi_master_gpio.h
+++ b/include/trace/events/fsi_master_gpio.h
@@ -50,6 +50,22 @@ TRACE_EVENT(fsi_master_gpio_out,
 	)
 );
 
+TRACE_EVENT(fsi_master_gpio_clock_zeros,
+	TP_PROTO(const struct fsi_master_gpio *master, int clocks),
+	TP_ARGS(master, clocks),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(int,	clocks)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->clocks = clocks;
+	),
+	TP_printk("fsi-gpio%d clock %d zeros",
+		  __entry->master_idx, __entry->clocks
+	)
+);
+
 TRACE_EVENT(fsi_master_gpio_break,
 	TP_PROTO(const struct fsi_master_gpio *master),
 	TP_ARGS(master),
@@ -107,6 +123,49 @@ TRACE_EVENT(fsi_master_gpio_poll_response_busy,
 		__entry->master_idx, __entry->busy)
 );
 
+TRACE_EVENT(fsi_master_gpio_cmd_abs_addr,
+	TP_PROTO(const struct fsi_master_gpio *master, u32 addr),
+	TP_ARGS(master, addr),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(u32,	addr)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->addr = addr;
+	),
+	TP_printk("fsi-gpio%d: Sending ABS_ADR %06x",
+		__entry->master_idx, __entry->addr)
+);
+
+TRACE_EVENT(fsi_master_gpio_cmd_rel_addr,
+	TP_PROTO(const struct fsi_master_gpio *master, u32 rel_addr),
+	TP_ARGS(master, rel_addr),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(u32,	rel_addr)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->rel_addr = rel_addr;
+	),
+	TP_printk("fsi-gpio%d: Sending REL_ADR %03x",
+		__entry->master_idx, __entry->rel_addr)
+);
+
+TRACE_EVENT(fsi_master_gpio_cmd_same_addr,
+	TP_PROTO(const struct fsi_master_gpio *master),
+	TP_ARGS(master),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+	),
+	TP_printk("fsi-gpio%d: Sending SAME_ADR",
+		__entry->master_idx)
+);
+
 #endif /* _TRACE_FSI_MASTER_GPIO_H */
 
 #include <trace/define_trace.h>
-- 
cgit v1.2.3


From 2640c3facbd6e21e63c95f19588cc24913a263cd Mon Sep 17 00:00:00 2001
From: shaoyunl <Shaoyun.Liu@amd.com>
Date: Wed, 11 Jul 2018 22:32:50 -0400
Subject: drm/amdkfd: Handle VM faults in KFD
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. Pre-GFX9 the amdgpu ISR saves the vm-fault status and address per
   per-vmid. amdkfd needs to get the information from amdgpu through the
   new get_vm_fault_info interface. On GFX9 and later, all the required
   information is in the IH ring
2. amdkfd unmaps all queues from the faulting process and create new
   run-list without the guilty process
3. amdkfd notifies the runtime of the vm fault trap via EVENT_TYPE_MEMORY

Signed-off-by: shaoyun liu <shaoyun.liu@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c   | 25 ++++++++++++---
 drivers/gpu/drm/amd/amdkfd/cik_int.h               |  2 ++
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 17 ++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_events.c            | 37 ++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c    | 18 +++++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  4 +++
 include/uapi/linux/kfd_ioctl.h                     |  2 +-
 7 files changed, 98 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 49df6c791cfc..cc33870e7edb 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -48,18 +48,19 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 	return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
 		ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
 		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
-		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
+		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE ||
+		ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT;
 }
 
 static void cik_event_interrupt_wq(struct kfd_dev *dev,
 					const uint32_t *ih_ring_entry)
 {
-	unsigned int pasid;
 	const struct cik_ih_ring_entry *ihre =
 			(const struct cik_ih_ring_entry *)ih_ring_entry;
 	uint32_t context_id = ihre->data & 0xfffffff;
-
-	pasid = (ihre->ring_id & 0xffff0000) >> 16;
+	unsigned int vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
+	unsigned int pasid = (ihre->ring_id & 0xffff0000) >> 16;
 
 	if (pasid == 0)
 		return;
@@ -72,6 +73,22 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
 		kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
 	else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
 		kfd_signal_hw_exception_event(pasid);
+	else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
+		struct kfd_vm_fault_info info;
+
+		kfd_process_vm_fault(dev->dqm, pasid);
+
+		memset(&info, 0, sizeof(info));
+		dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info);
+		if (!info.page_addr && !info.status)
+			return;
+
+		if (info.vmid == vmid)
+			kfd_signal_vm_fault_event(dev, pasid, &info);
+		else
+			kfd_signal_vm_fault_event(dev, pasid, NULL);
+	}
 }
 
 const struct kfd_event_interrupt_class event_interrupt_class_cik = {
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h
index 109298b9d507..a2079a04a673 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
@@ -37,6 +37,8 @@ struct cik_ih_ring_entry {
 #define CIK_INTSRC_DEQUEUE_COMPLETE	0xC6
 #define CIK_INTSRC_SDMA_TRAP		0xE0
 #define CIK_INTSRC_SQ_INTERRUPT_MSG	0xEF
+#define CIK_INTSRC_GFX_PAGE_INV_FAULT	0x92
+#define CIK_INTSRC_GFX_MEM_PROT_FAULT	0x93
 
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f2f81d26db0c..44fc2038770e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1684,6 +1684,23 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
 	kfree(dqm);
 }
 
+int kfd_process_vm_fault(struct device_queue_manager *dqm,
+			 unsigned int pasid)
+{
+	struct kfd_process_device *pdd;
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+	int ret = 0;
+
+	if (!p)
+		return -EINVAL;
+	pdd = kfd_get_process_device_data(dqm->dev, p);
+	if (pdd)
+		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
+	kfd_unref_process(p);
+
+	return ret;
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 static void seq_reg_dump(struct seq_file *m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 3d5a8332e8c0..b58a0e665ebc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -963,3 +963,40 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
 	mutex_unlock(&p->event_mutex);
 	kfd_unref_process(p);
 }
+
+void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
+				struct kfd_vm_fault_info *info)
+{
+	struct kfd_event *ev;
+	uint32_t id;
+	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+	struct kfd_hsa_memory_exception_data memory_exception_data;
+
+	if (!p)
+		return; /* Presumably process exited. */
+	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+	memory_exception_data.gpu_id = dev->id;
+	memory_exception_data.failure.imprecise = 1;
+	/* Set failure reason */
+	if (info) {
+		memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
+		memory_exception_data.failure.NotPresent =
+			info->prot_valid ? 1 : 0;
+		memory_exception_data.failure.NoExecute =
+			info->prot_exec ? 1 : 0;
+		memory_exception_data.failure.ReadOnly =
+			info->prot_write ? 1 : 0;
+		memory_exception_data.failure.imprecise = 0;
+	}
+	mutex_lock(&p->event_mutex);
+
+	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+	idr_for_each_entry_continue(&p->event_idr, ev, id)
+		if (ev->type == KFD_EVENT_TYPE_MEMORY) {
+			ev->memory_exception_data = memory_exception_data;
+			set_event(ev);
+		}
+
+	mutex_unlock(&p->event_mutex);
+	kfd_unref_process(p);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 37029baa3346..d6b64e692760 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -57,7 +57,9 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
 	return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
 		source_id == SOC15_INTSRC_SDMA_TRAP ||
 		source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
-		source_id == SOC15_INTSRC_CP_BAD_OPCODE;
+		source_id == SOC15_INTSRC_CP_BAD_OPCODE ||
+		client_id == SOC15_IH_CLIENTID_VMC ||
+		client_id == SOC15_IH_CLIENTID_UTCL2;
 }
 
 static void event_interrupt_wq_v9(struct kfd_dev *dev,
@@ -82,7 +84,19 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
 		kfd_signal_hw_exception_event(pasid);
 	else if (client_id == SOC15_IH_CLIENTID_VMC ||
 		 client_id == SOC15_IH_CLIENTID_UTCL2) {
-		/* TODO */
+		struct kfd_vm_fault_info info = {0};
+		uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
+
+		info.vmid = vmid;
+		info.mc_id = client_id;
+		info.page_addr = ih_ring_entry[4] |
+			(uint64_t)(ih_ring_entry[5] & 0xf) << 32;
+		info.prot_valid = ring_id & 0x08;
+		info.prot_read  = ring_id & 0x10;
+		info.prot_write = ring_id & 0x20;
+
+		kfd_process_vm_fault(dev->dqm, pasid);
+		kfd_signal_vm_fault_event(dev, pasid, &info);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 5e3990bb4c4b..91a3368421b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -838,6 +838,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 					enum kfd_queue_type type);
 void kernel_queue_uninit(struct kernel_queue *kq);
+int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
 
 /* Process Queue Manager */
 struct process_queue_node {
@@ -964,6 +965,9 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 		     uint64_t *event_page_offset, uint32_t *event_slot_index);
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 
+void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
+				struct kfd_vm_fault_info *info);
+
 void kfd_flush_tlb(struct kfd_process_device *pdd);
 
 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index b4f5073dbac2..46a54ab1e728 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -219,7 +219,7 @@ struct kfd_memory_exception_failure {
 	__u32 NotPresent;	/* Page not present or supervisor privilege */
 	__u32 ReadOnly;	/* Write access to a read-only page */
 	__u32 NoExecute;	/* Execute access to a page marked NX */
-	__u32 pad;
+	__u32 imprecise;	/* Can't determine the	exact fault address */
 };
 
 /* memory exception data*/
-- 
cgit v1.2.3


From 0c119abad7f0d7987c3ce4ea76b30bde76d0436e Mon Sep 17 00:00:00 2001
From: Shaoyun Liu <Shaoyun.Liu@amd.com>
Date: Wed, 11 Jul 2018 22:32:53 -0400
Subject: drm/amd: Add kfd ioctl defines for hw_exception event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 46a54ab1e728..88d17c39dbf9 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -189,6 +189,15 @@ struct kfd_ioctl_dbg_wave_control_args {
 
 #define KFD_SIGNAL_EVENT_LIMIT			4096
 
+/* For kfd_event_data.hw_exception_data.reset_type. */
+#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET	0
+#define KFD_HW_EXCEPTION_PER_ENGINE_RESET	1
+
+/* For kfd_event_data.hw_exception_data.reset_cause. */
+#define KFD_HW_EXCEPTION_GPU_HANG	0
+#define KFD_HW_EXCEPTION_ECC		1
+
+
 struct kfd_ioctl_create_event_args {
 	__u64 event_page_offset;	/* from KFD */
 	__u32 event_trigger_data;	/* from KFD - signal events only */
@@ -230,10 +239,19 @@ struct kfd_hsa_memory_exception_data {
 	__u32 pad;
 };
 
-/* Event data*/
+/* hw exception data */
+struct kfd_hsa_hw_exception_data {
+	uint32_t reset_type;
+	uint32_t reset_cause;
+	uint32_t memory_lost;
+	uint32_t gpu_id;
+};
+
+/* Event data */
 struct kfd_event_data {
 	union {
 		struct kfd_hsa_memory_exception_data memory_exception_data;
+		struct kfd_hsa_hw_exception_data hw_exception_data;
 	};				/* From KFD */
 	__u64 kfd_event_data_ext;	/* pointer to an extension structure
 					   for future exception types */
-- 
cgit v1.2.3


From 9dc55f1389f9569acf9659e58dd836a9c70df217 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 22:26:05 -0700
Subject: iomap: add support for sub-pagesize buffered I/O without buffer heads

After already supporting a simple implementation of buffered writes for
the blocksize == PAGE_SIZE case in the last commit this adds full support
even for smaller block sizes.   There are three bits of per-block
information in the buffer_head structure that really matter for the iomap
read and write path:

 - uptodate status (BH_uptodate)
 - marked as currently under read I/O (BH_Async_Read)
 - marked as currently under write I/O (BH_Async_Write)

Instead of having new per-block structures this now adds a per-page
structure called struct iomap_page to track this information in a slightly
different form:

 - a bitmap for the per-block uptodate status.  For worst case of a 64k
   page size system this bitmap needs to contain 128 bits.  For the
   typical 4k page size case it only needs 8 bits, although we still
   need a full unsigned long due to the way the atomic bitmap API works.
 - two atomic_t counters are used to track the outstanding read and write
   counts

There is quite a bit of boilerplate code as the buffered I/O path uses
various helper methods, but the actual code is very straight forward.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c            | 280 ++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/iomap.h |  31 ++++++
 2 files changed, 290 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/iomap.c b/fs/iomap.c
index 13cdcf33e6c0..07501a647d13 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -17,6 +17,7 @@
 #include <linux/iomap.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
 #include <linux/mm.h>
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
@@ -104,6 +105,138 @@ iomap_sector(struct iomap *iomap, loff_t pos)
 	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
 }
 
+static struct iomap_page *
+iomap_page_create(struct inode *inode, struct page *page)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (iop || i_blocksize(inode) == PAGE_SIZE)
+		return iop;
+
+	iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL);
+	atomic_set(&iop->read_count, 0);
+	atomic_set(&iop->write_count, 0);
+	bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+	set_page_private(page, (unsigned long)iop);
+	SetPagePrivate(page);
+	return iop;
+}
+
+static void
+iomap_page_release(struct page *page)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (!iop)
+		return;
+	WARN_ON_ONCE(atomic_read(&iop->read_count));
+	WARN_ON_ONCE(atomic_read(&iop->write_count));
+	ClearPagePrivate(page);
+	set_page_private(page, 0);
+	kfree(iop);
+}
+
+/*
+ * Calculate the range inside the page that we actually need to read.
+ */
+static void
+iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
+		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
+{
+	unsigned block_bits = inode->i_blkbits;
+	unsigned block_size = (1 << block_bits);
+	unsigned poff = *pos & (PAGE_SIZE - 1);
+	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	unsigned first = poff >> block_bits;
+	unsigned last = (poff + plen - 1) >> block_bits;
+	unsigned end = (i_size_read(inode) & (PAGE_SIZE - 1)) >> block_bits;
+
+	/*
+	 * If the block size is smaller than the page size we need to check the
+	 * per-block uptodate status and adjust the offset and length if needed
+	 * to avoid reading in already uptodate ranges.
+	 */
+	if (iop) {
+		unsigned int i;
+
+		/* move forward for each leading block marked uptodate */
+		for (i = first; i <= last; i++) {
+			if (!test_bit(i, iop->uptodate))
+				break;
+			*pos += block_size;
+			poff += block_size;
+			plen -= block_size;
+			first++;
+		}
+
+		/* truncate len if we find any trailing uptodate block(s) */
+		for ( ; i <= last; i++) {
+			if (test_bit(i, iop->uptodate)) {
+				plen -= (last - i + 1) * block_size;
+				last = i - 1;
+				break;
+			}
+		}
+	}
+
+	/*
+	 * If the extent spans the block that contains the i_size we need to
+	 * handle both halves separately so that we properly zero data in the
+	 * page cache for blocks that are entirely outside of i_size.
+	 */
+	if (first <= end && last > end)
+		plen -= (last - end) * block_size;
+
+	*offp = poff;
+	*lenp = plen;
+}
+
+static void
+iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+	struct inode *inode = page->mapping->host;
+	unsigned first = off >> inode->i_blkbits;
+	unsigned last = (off + len - 1) >> inode->i_blkbits;
+	unsigned int i;
+	bool uptodate = true;
+
+	if (iop) {
+		for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) {
+			if (i >= first && i <= last)
+				set_bit(i, iop->uptodate);
+			else if (!test_bit(i, iop->uptodate))
+				uptodate = false;
+		}
+	}
+
+	if (uptodate && !PageError(page))
+		SetPageUptodate(page);
+}
+
+static void
+iomap_read_finish(struct iomap_page *iop, struct page *page)
+{
+	if (!iop || atomic_dec_and_test(&iop->read_count))
+		unlock_page(page);
+}
+
+static void
+iomap_read_page_end_io(struct bio_vec *bvec, int error)
+{
+	struct page *page = bvec->bv_page;
+	struct iomap_page *iop = to_iomap_page(page);
+
+	if (unlikely(error)) {
+		ClearPageUptodate(page);
+		SetPageError(page);
+	} else {
+		iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
+	}
+
+	iomap_read_finish(iop, page);
+}
+
 static void
 iomap_read_inline_data(struct inode *inode, struct page *page,
 		struct iomap *iomap)
@@ -132,7 +265,7 @@ iomap_read_end_io(struct bio *bio)
 	int i;
 
 	bio_for_each_segment_all(bvec, bio, i)
-		page_endio(bvec->bv_page, false, error);
+		iomap_read_page_end_io(bvec, error);
 	bio_put(bio);
 }
 
@@ -150,9 +283,10 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 {
 	struct iomap_readpage_ctx *ctx = data;
 	struct page *page = ctx->cur_page;
-	unsigned poff = pos & (PAGE_SIZE - 1);
-	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
+	struct iomap_page *iop = iomap_page_create(inode, page);
 	bool is_contig = false;
+	loff_t orig_pos = pos;
+	unsigned poff, plen;
 	sector_t sector;
 
 	if (iomap->type == IOMAP_INLINE) {
@@ -161,13 +295,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		return PAGE_SIZE;
 	}
 
-	/* we don't support blocksize < PAGE_SIZE quite yet. */
-	WARN_ON_ONCE(pos != page_offset(page));
-	WARN_ON_ONCE(plen != PAGE_SIZE);
+	/* zero post-eof blocks as the page may be mapped */
+	iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
+	if (plen == 0)
+		goto done;
 
 	if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
 		zero_user(page, poff, plen);
-		SetPageUptodate(page);
+		iomap_set_range_uptodate(page, poff, plen);
 		goto done;
 	}
 
@@ -183,6 +318,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		is_contig = true;
 	}
 
+	/*
+	 * If we start a new segment we need to increase the read count, and we
+	 * need to do so before submitting any previous full bio to make sure
+	 * that we don't prematurely unlock the page.
+	 */
+	if (iop)
+		atomic_inc(&iop->read_count);
+
 	if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
 		gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
 		int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -203,7 +346,13 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 
 	__bio_add_page(ctx->bio, page, plen, poff);
 done:
-	return plen;
+	/*
+	 * Move the caller beyond our range so that it keeps making progress.
+	 * For that we have to include any leading non-uptodate ranges, but
+	 * we can skip trailing ones as they will be handled in the next
+	 * iteration.
+	 */
+	return pos - orig_pos + plen;
 }
 
 int
@@ -214,8 +363,6 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops)
 	unsigned poff;
 	loff_t ret;
 
-	WARN_ON_ONCE(page_has_buffers(page));
-
 	for (poff = 0; poff < PAGE_SIZE; poff += ret) {
 		ret = iomap_apply(inode, page_offset(page) + poff,
 				PAGE_SIZE - poff, 0, ops, &ctx,
@@ -341,6 +488,84 @@ done:
 }
 EXPORT_SYMBOL_GPL(iomap_readpages);
 
+int
+iomap_is_partially_uptodate(struct page *page, unsigned long from,
+		unsigned long count)
+{
+	struct iomap_page *iop = to_iomap_page(page);
+	struct inode *inode = page->mapping->host;
+	unsigned first = from >> inode->i_blkbits;
+	unsigned last = (from + count - 1) >> inode->i_blkbits;
+	unsigned i;
+
+	if (iop) {
+		for (i = first; i <= last; i++)
+			if (!test_bit(i, iop->uptodate))
+				return 0;
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
+
+int
+iomap_releasepage(struct page *page, gfp_t gfp_mask)
+{
+	/*
+	 * mm accommodates an old ext3 case where clean pages might not have had
+	 * the dirty bit cleared. Thus, it can send actual dirty pages to
+	 * ->releasepage() via shrink_active_list(), skip those here.
+	 */
+	if (PageDirty(page) || PageWriteback(page))
+		return 0;
+	iomap_page_release(page);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(iomap_releasepage);
+
+void
+iomap_invalidatepage(struct page *page, unsigned int offset, unsigned int len)
+{
+	/*
+	 * If we are invalidating the entire page, clear the dirty state from it
+	 * and release it to avoid unnecessary buildup of the LRU.
+	 */
+	if (offset == 0 && len == PAGE_SIZE) {
+		WARN_ON_ONCE(PageWriteback(page));
+		cancel_dirty_page(page);
+		iomap_page_release(page);
+	}
+}
+EXPORT_SYMBOL_GPL(iomap_invalidatepage);
+
+#ifdef CONFIG_MIGRATION
+int
+iomap_migrate_page(struct address_space *mapping, struct page *newpage,
+		struct page *page, enum migrate_mode mode)
+{
+	int ret;
+
+	ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+	if (ret != MIGRATEPAGE_SUCCESS)
+		return ret;
+
+	if (page_has_private(page)) {
+		ClearPagePrivate(page);
+		set_page_private(newpage, page_private(page));
+		set_page_private(page, 0);
+		SetPagePrivate(newpage);
+	}
+
+	if (mode != MIGRATE_SYNC_NO_COPY)
+		migrate_page_copy(newpage, page);
+	else
+		migrate_page_states(newpage, page);
+	return MIGRATEPAGE_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(iomap_migrate_page);
+#endif /* CONFIG_MIGRATION */
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -364,6 +589,7 @@ iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
 
 	if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
 		zero_user_segments(page, poff, from, to, poff + plen);
+		iomap_set_range_uptodate(page, poff, plen);
 		return 0;
 	}
 
@@ -379,21 +605,33 @@ static int
 __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
 		struct page *page, struct iomap *iomap)
 {
+	struct iomap_page *iop = iomap_page_create(inode, page);
 	loff_t block_size = i_blocksize(inode);
 	loff_t block_start = pos & ~(block_size - 1);
 	loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
-	unsigned poff = block_start & (PAGE_SIZE - 1);
-	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
-	unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
-
-	WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+	unsigned from = pos & (PAGE_SIZE - 1), to = from + len, poff, plen;
+	int status = 0;
 
 	if (PageUptodate(page))
 		return 0;
-	if (from <= poff && to >= poff + plen)
-		return 0;
-	return iomap_read_page_sync(inode, block_start, page,
-			poff, plen, from, to, iomap);
+
+	do {
+		iomap_adjust_read_range(inode, iop, &block_start,
+				block_end - block_start, &poff, &plen);
+		if (plen == 0)
+			break;
+
+		if ((from > poff && from < poff + plen) ||
+		    (to > poff && to < poff + plen)) {
+			status = iomap_read_page_sync(inode, block_start, page,
+					poff, plen, from, to, iomap);
+			if (status)
+				break;
+		}
+
+	} while ((block_start += plen) < block_end);
+
+	return status;
 }
 
 static int
@@ -476,7 +714,7 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
 	if (unlikely(copied < len && !PageUptodate(page))) {
 		copied = 0;
 	} else {
-		SetPageUptodate(page);
+		iomap_set_range_uptodate(page, pos & (PAGE_SIZE - 1), len);
 		iomap_set_page_dirty(page);
 	}
 	return __generic_write_end(inode, pos, copied, page);
@@ -812,7 +1050,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 		block_commit_write(page, 0, length);
 	} else {
 		WARN_ON_ONCE(!PageUptodate(page));
-		WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
+		iomap_page_create(inode, page);
 	}
 
 	return length;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 5eb9ca8d7ce5..3555d54bf79a 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -2,6 +2,9 @@
 #ifndef LINUX_IOMAP_H
 #define LINUX_IOMAP_H 1
 
+#include <linux/atomic.h>
+#include <linux/bitmap.h>
+#include <linux/mm.h>
 #include <linux/types.h>
 
 struct address_space;
@@ -98,12 +101,40 @@ struct iomap_ops {
 			ssize_t written, unsigned flags, struct iomap *iomap);
 };
 
+/*
+ * Structure allocate for each page when block size < PAGE_SIZE to track
+ * sub-page uptodate status and I/O completions.
+ */
+struct iomap_page {
+	atomic_t		read_count;
+	atomic_t		write_count;
+	DECLARE_BITMAP(uptodate, PAGE_SIZE / 512);
+};
+
+static inline struct iomap_page *to_iomap_page(struct page *page)
+{
+	if (page_has_private(page))
+		return (struct iomap_page *)page_private(page);
+	return NULL;
+}
+
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
 int iomap_readpage(struct page *page, const struct iomap_ops *ops);
 int iomap_readpages(struct address_space *mapping, struct list_head *pages,
 		unsigned nr_pages, const struct iomap_ops *ops);
 int iomap_set_page_dirty(struct page *page);
+int iomap_is_partially_uptodate(struct page *page, unsigned long from,
+		unsigned long count);
+int iomap_releasepage(struct page *page, gfp_t gfp_mask);
+void iomap_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int len);
+#ifdef CONFIG_MIGRATION
+int iomap_migrate_page(struct address_space *mapping, struct page *newpage,
+		struct page *page, enum migrate_mode mode);
+#else
+#define iomap_migrate_page NULL
+#endif
 int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
 		const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
-- 
cgit v1.2.3


From 4929c9428a171145f82f81aae0c3c25ef7d82837 Mon Sep 17 00:00:00 2001
From: Deepti Raghavan <deeptir@mit.edu>
Date: Mon, 9 Jul 2018 17:53:39 +0000
Subject: tcp: expose both send and receive intervals for rate sample

Congestion control algorithms, which access the rate sample
through the tcp_cong_control function, only have access to the maximum
of the send and receive interval, for cases where the acknowledgment
rate may be inaccurate due to ACK compression or decimation. Algorithms
may want to use send rates and receive rates as separate signals.

Signed-off-by: Deepti Raghavan <deeptir@mit.edu>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   | 2 ++
 net/ipv4/tcp_rate.c | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cce37694776e..f6cb20e6e524 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -954,6 +954,8 @@ struct rate_sample {
 	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
 	s32  delivered;		/* number of packets delivered over interval */
 	long interval_us;	/* time for tp->delivered to incr "delivered" */
+	u32 snd_interval_us;	/* snd interval for delivered packets */
+	u32 rcv_interval_us;	/* rcv interval for delivered packets */
 	long rtt_us;		/* RTT of last (S)ACKed packet (or -1) */
 	int  losses;		/* number of packets marked lost upon ACK */
 	u32  acked_sacked;	/* number of packets newly (S)ACKed upon ACK */
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c61240e43923..4dff40dad4dc 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -146,6 +146,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 				    rs->prior_mstamp); /* ack phase */
 	rs->interval_us = max(snd_us, ack_us);
 
+	/* Record both segment send and ack receive intervals */
+	rs->snd_interval_us = snd_us;
+	rs->rcv_interval_us = ack_us;
+
 	/* Normally we expect interval_us >= min-rtt.
 	 * Note that rate may still be over-estimated when a spuriously
 	 * retransmistted skb was first (s)acked because "interval_us"
-- 
cgit v1.2.3


From 3443b00e07eed5798605ba6524de37e1d3f1a4bf Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 10 Jul 2018 10:02:57 +0300
Subject: team: Publish team_port_get_rcu()

A follow-up patch adds a new entry point, team_port_dev_txable(). Making
it an ordinary exported function would mean that any module that may
need the service in one of the supported configurations also
unconditionally needs to pull in the team module, whether or not the
user actually intends to create team interfaces.

To prevent that, team_port_dev_txable() is defined in if_team.h, and
therefore all dependencies of that function also need to be
publicly-visible.

Therefore move team_port_get_rcu() from team.c to if_team.h.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 5 -----
 include/linux/if_team.h | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 3a95eaae0c98..6a047d30e8c6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -41,11 +41,6 @@
 
 #define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT)
 
-static struct team_port *team_port_get_rcu(const struct net_device *dev)
-{
-	return rcu_dereference(dev->rx_handler_data);
-}
-
 static struct team_port *team_port_get_rtnl(const struct net_device *dev)
 {
 	struct team_port *port = rtnl_dereference(dev->rx_handler_data);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index d95cae09dea0..0d07c6655cce 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -74,6 +74,11 @@ struct team_port {
 	long mode_priv[0];
 };
 
+static inline struct team_port *team_port_get_rcu(const struct net_device *dev)
+{
+	return rcu_dereference(dev->rx_handler_data);
+}
+
 static inline bool team_port_enabled(struct team_port *port)
 {
 	return port->index != -1;
-- 
cgit v1.2.3


From eeed992b776c54af6108187c87ac60d028e69d37 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 10 Jul 2018 10:02:58 +0300
Subject: net: Add lag.h, net_lag_port_dev_txable()

LAG devices (team or bond) recognize for each one of their slave devices
whether LAG traffic is going to be sent through that device. Bond calls
such devices "active", team calls them "txable". When this state
changes, a NETDEV_CHANGELOWERSTATE notification is distributed, together
with a netdev_notifier_changelowerstate_info structure that for LAG
devices includes a tx_enabled flag that refers to the new state. The
notification thus makes it possible to react to the changes in txability
in drivers.

However there's no way to query txability from the outside on demand.
That is problematic namely for mlxsw, which when resolving ERSPAN packet
path, may encounter a LAG device, and needs to determine which of the
slaves it should choose.

To that end, introduce a new function, net_lag_port_dev_txable(), which
determines whether a given slave device is "active" or
"txable" (depending on the flavor of the LAG device). That function then
dispatches to per-LAG-flavor helpers, bond_is_active_slave_dev() resp.
team_port_dev_txable().

Because there currently is no good place where net_lag_port_dev_txable()
should be added, introduce a new header file, lag.h, which should from
now on hold any logic common to both team and bond. (But keep
netif_is_lag_master() together with the rest of netif_is_*_master()
functions).

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_team.h | 13 +++++++++++++
 include/net/bonding.h   | 13 +++++++++++++
 include/net/lag.h       | 17 +++++++++++++++++
 3 files changed, 43 insertions(+)
 create mode 100644 include/net/lag.h

(limited to 'include')

diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 0d07c6655cce..ac42da56f7a2 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -89,6 +89,19 @@ static inline bool team_port_txable(struct team_port *port)
 	return port->linkup && team_port_enabled(port);
 }
 
+static inline bool team_port_dev_txable(const struct net_device *port_dev)
+{
+	struct team_port *port;
+	bool txable;
+
+	rcu_read_lock();
+	port = team_port_get_rcu(port_dev);
+	txable = port ? team_port_txable(port) : false;
+	rcu_read_unlock();
+
+	return txable;
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static inline void team_netpoll_send_skb(struct team_port *port,
 					 struct sk_buff *skb)
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 808f1d167349..a2d058170ea3 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -411,6 +411,19 @@ static inline bool bond_slave_can_tx(struct slave *slave)
 	       bond_is_active_slave(slave);
 }
 
+static inline bool bond_is_active_slave_dev(const struct net_device *slave_dev)
+{
+	struct slave *slave;
+	bool active;
+
+	rcu_read_lock();
+	slave = bond_slave_get_rcu(slave_dev);
+	active = bond_is_active_slave(slave);
+	rcu_read_unlock();
+
+	return active;
+}
+
 static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len)
 {
 	if (len == ETH_ALEN) {
diff --git a/include/net/lag.h b/include/net/lag.h
new file mode 100644
index 000000000000..95b880e6fdde
--- /dev/null
+++ b/include/net/lag.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IF_LAG_H
+#define _LINUX_IF_LAG_H
+
+#include <linux/netdevice.h>
+#include <linux/if_team.h>
+#include <net/bonding.h>
+
+static inline bool net_lag_port_dev_txable(const struct net_device *port_dev)
+{
+	if (netif_is_team_port(port_dev))
+		return team_port_dev_txable(port_dev);
+	else
+		return bond_is_active_slave_dev(port_dev);
+}
+
+#endif /* _LINUX_IF_LAG_H */
-- 
cgit v1.2.3


From bf1c77b4644f46d2986b7ca5e43e012f0fc8984b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Jul 2018 14:56:50 +0100
Subject: kernel: add ksys_personality()

Using this helper allows us to avoid the in-kernel call to the
sys_personality() syscall. The ksys_ prefix denotes that this function
is meant as a drop-in replacement for the syscall. In particular, it
uses the same calling convention as sys_personality().

Since ksys_personality is trivial, it is implemented directly in
<linux/syscalls.h>, as we do for ksys_close() and friends.

This helper is necessary to enable conversion of arm64's syscall
handling to use pt_regs wrappers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Martin <dave.martin@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/syscalls.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a368a68cb667..abfe12d8a9c5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -80,6 +80,7 @@ union bpf_attr;
 #include <linux/unistd.h>
 #include <linux/quota.h>
 #include <linux/key.h>
+#include <linux/personality.h>
 #include <trace/syscall.h>
 
 #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
@@ -1281,4 +1282,14 @@ static inline long ksys_truncate(const char __user *pathname, loff_t length)
 	return do_sys_truncate(pathname, length);
 }
 
+static inline unsigned int ksys_personality(unsigned int personality)
+{
+	unsigned int old = current->personality;
+
+	if (personality != 0xffffffff)
+		set_personality(personality);
+
+	return old;
+}
+
 #endif
-- 
cgit v1.2.3


From 9b54bf9d6a5b30e2cc22b18793e9a4158c5b4882 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Jul 2018 14:56:51 +0100
Subject: kernel: add kcompat_sys_{f,}statfs64()

Using this helper allows us to avoid the in-kernel calls to the
compat_sys_{f,}statfs64() sycalls, as are necessary for parameter
mangling in arm64's compat handling.

Following the example of ksys_* functions, kcompat_sys_* functions are
intended to be a drop-in replacement for their compat_sys_*
counterparts, with the same calling convention.

This is necessary to enable conversion of arm64's syscall handling to
use pt_regs wrappers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 fs/statfs.c            | 14 ++++++++++++--
 include/linux/compat.h | 11 +++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/statfs.c b/fs/statfs.c
index 5b2a24f0f263..f0216629621d 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -335,7 +335,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz, struct compat_statfs64 __user * buf)
 {
 	struct kstatfs tmp;
 	int error;
@@ -349,7 +349,12 @@ COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, s
 	return error;
 }
 
-COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	return kcompat_sys_statfs64(pathname, sz, buf);
+}
+
+int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user * buf)
 {
 	struct kstatfs tmp;
 	int error;
@@ -363,6 +368,11 @@ COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct co
 	return error;
 }
 
+COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
+{
+	return kcompat_sys_fstatfs64(fd, sz, buf);
+}
+
 /*
  * This is a copy of sys_ustat, just dealing with a structure layout.
  * Given how simple this syscall is that apporach is more maintainable
diff --git a/include/linux/compat.h b/include/linux/compat.h
index c68acc47da57..43f4ed44c5d5 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -1028,6 +1028,17 @@ static inline struct compat_timeval ns_to_compat_timeval(s64 nsec)
 	return ctv;
 }
 
+/*
+ * Kernel code should not call compat syscalls (i.e., compat_sys_xyzyyz())
+ * directly.  Instead, use one of the functions which work equivalently, such
+ * as the kcompat_sys_xyzyyz() functions prototyped below.
+ */
+
+int kcompat_sys_statfs64(const char __user * pathname, compat_size_t sz,
+		     struct compat_statfs64 __user * buf);
+int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
+			  struct compat_statfs64 __user * buf);
+
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
-- 
cgit v1.2.3


From c9c554f21490bbc96cc554f80024d27d09670480 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 11 Jul 2018 14:19:04 -0400
Subject: alloc_file(): switch to passing O_... flags instead of FMODE_... mode

... so that it could set both ->f_flags and ->f_mode, without callers
having to set ->f_flags manually.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/misc/cxl/api.c          |  3 +--
 drivers/scsi/cxlflash/ocxl_hw.c |  3 +--
 fs/aio.c                        |  8 ++------
 fs/anon_inodes.c                |  3 +--
 fs/file_table.c                 | 17 +++++++++--------
 fs/hugetlbfs/inode.c            |  3 +--
 fs/pipe.c                       |  8 ++++----
 include/linux/file.h            |  2 +-
 ipc/shm.c                       |  8 ++++----
 mm/memfd.c                      |  2 +-
 mm/shmem.c                      |  3 +--
 net/socket.c                    |  3 +--
 12 files changed, 27 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 6b16946f9b05..0b5cb6cf91a0 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -102,12 +102,11 @@ static struct file *cxl_getfile(const char *name,
 	path.mnt = mntget(cxl_vfs_mount);
 	d_instantiate(path.dentry, inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file)) {
 		path_put(&path);
 		goto err_fs;
 	}
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 
 	return file;
diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c
index 497a68389461..99bb393a8a34 100644
--- a/drivers/scsi/cxlflash/ocxl_hw.c
+++ b/drivers/scsi/cxlflash/ocxl_hw.c
@@ -129,7 +129,7 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name,
 	path.mnt = mntget(ocxlflash_vfs_mount);
 	d_instantiate(path.dentry, inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file)) {
 		rc = PTR_ERR(file);
 		dev_err(dev, "%s: alloc_file failed rc=%d\n",
@@ -138,7 +138,6 @@ static struct file *ocxlflash_getfile(struct device *dev, const char *name,
 		goto err3;
 	}
 
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 out:
 	return file;
diff --git a/fs/aio.c b/fs/aio.c
index e1d20124ec0e..9eea53887d6c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -234,13 +234,9 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 	path.mnt = mntget(aio_mnt);
 
 	d_instantiate(path.dentry, inode);
-	file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &aio_ring_fops);
-	if (IS_ERR(file)) {
+	file = alloc_file(&path, O_RDWR, &aio_ring_fops);
+	if (IS_ERR(file))
 		path_put(&path);
-		return file;
-	}
-
-	file->f_flags = O_RDWR;
 	return file;
 }
 
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3168ee4e77f4..6b235ab1df6c 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,12 +102,11 @@ struct file *anon_inode_getfile(const char *name,
 
 	d_instantiate(path.dentry, anon_inode_inode);
 
-	file = alloc_file(&path, OPEN_FMODE(flags), fops);
+	file = alloc_file(&path, flags & (O_ACCMODE | O_NONBLOCK), fops);
 	if (IS_ERR(file))
 		goto err_dput;
 	file->f_mapping = anon_inode_inode->i_mapping;
 
-	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
 	file->private_data = priv;
 
 	return file;
diff --git a/fs/file_table.c b/fs/file_table.c
index eee7cf629e52..086c3f5ec31a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -153,10 +153,10 @@ over:
  * alloc_file - allocate and initialize a 'struct file'
  *
  * @path: the (dentry, vfsmount) pair for the new file
- * @mode: the mode with which the new file will be opened
+ * @flags: O_... flags with which the new file will be opened
  * @fop: the 'struct file_operations' for the new file
  */
-struct file *alloc_file(const struct path *path, fmode_t mode,
+struct file *alloc_file(const struct path *path, int flags,
 		const struct file_operations *fop)
 {
 	struct file *file;
@@ -165,19 +165,20 @@ struct file *alloc_file(const struct path *path, fmode_t mode,
 	if (IS_ERR(file))
 		return file;
 
+	file->f_mode = OPEN_FMODE(flags);
+	file->f_flags = flags;
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
 	file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
-	if ((mode & FMODE_READ) &&
+	if ((file->f_mode & FMODE_READ) &&
 	     likely(fop->read || fop->read_iter))
-		mode |= FMODE_CAN_READ;
-	if ((mode & FMODE_WRITE) &&
+		file->f_mode |= FMODE_CAN_READ;
+	if ((file->f_mode & FMODE_WRITE) &&
 	     likely(fop->write || fop->write_iter))
-		mode |= FMODE_CAN_WRITE;
-	file->f_mode = mode;
+		file->f_mode |= FMODE_CAN_WRITE;
 	file->f_op = fop;
-	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(path->dentry->d_inode);
 	return file;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d508c7844681..71aed47422e2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1375,8 +1375,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	inode->i_size = size;
 	clear_nlink(inode);
 
-	file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
-			&hugetlbfs_file_operations);
+	file = alloc_file(&path, O_RDWR, &hugetlbfs_file_operations);
 	if (IS_ERR(file))
 		goto out_dentry; /* inode is already attached */
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 9405e455f5b1..1909422e5a78 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -760,16 +760,17 @@ int create_pipe_files(struct file **res, int flags)
 
 	d_instantiate(path.dentry, inode);
 
-	f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
+	f = alloc_file(&path, O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
+			&pipefifo_fops);
 	if (IS_ERR(f)) {
 		err = PTR_ERR(f);
 		goto err_dentry;
 	}
 
-	f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
 	f->private_data = inode->i_pipe;
 
-	res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
+	res[0] = alloc_file(&path, O_RDONLY | (flags & O_NONBLOCK),
+			&pipefifo_fops);
 	if (IS_ERR(res[0])) {
 		put_pipe_info(inode, inode->i_pipe);
 		fput(f);
@@ -778,7 +779,6 @@ int create_pipe_files(struct file **res, int flags)
 
 	path_get(&path);
 	res[0]->private_data = inode->i_pipe;
-	res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
 	res[1] = f;
 	return 0;
 
diff --git a/include/linux/file.h b/include/linux/file.h
index 279720db984a..6d34a1262b31 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -18,7 +18,7 @@ struct file_operations;
 struct vfsmount;
 struct dentry;
 struct path;
-extern struct file *alloc_file(const struct path *, fmode_t mode,
+extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
 
 static inline void fput_light(struct file *file, int fput_needed)
diff --git a/ipc/shm.c b/ipc/shm.c
index 051a3e1fb8df..c702abd578a7 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1362,7 +1362,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 	struct ipc_namespace *ns;
 	struct shm_file_data *sfd;
 	struct path path;
-	fmode_t f_mode;
+	int f_flags;
 	unsigned long populate = 0;
 
 	err = -EINVAL;
@@ -1395,11 +1395,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 	if (shmflg & SHM_RDONLY) {
 		prot = PROT_READ;
 		acc_mode = S_IRUGO;
-		f_mode = FMODE_READ;
+		f_flags = O_RDONLY;
 	} else {
 		prot = PROT_READ | PROT_WRITE;
 		acc_mode = S_IRUGO | S_IWUGO;
-		f_mode = FMODE_READ | FMODE_WRITE;
+		f_flags = O_RDWR;
 	}
 	if (shmflg & SHM_EXEC) {
 		prot |= PROT_EXEC;
@@ -1449,7 +1449,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 		goto out_nattch;
 	}
 
-	file = alloc_file(&path, f_mode,
+	file = alloc_file(&path, f_flags,
 			  is_file_hugepages(shp->shm_file) ?
 				&shm_file_operations_huge :
 				&shm_file_operations);
diff --git a/mm/memfd.c b/mm/memfd.c
index 27069518e3c5..2bb5e257080e 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -326,7 +326,7 @@ SYSCALL_DEFINE2(memfd_create,
 		goto err_fd;
 	}
 	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
-	file->f_flags |= O_RDWR | O_LARGEFILE;
+	file->f_flags |= O_LARGEFILE;
 
 	if (flags & MFD_ALLOW_SEALING) {
 		file_seals = memfd_file_seals_ptr(file);
diff --git a/mm/shmem.c b/mm/shmem.c
index 2cab84403055..84844e52bf24 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3942,8 +3942,7 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
 	if (IS_ERR(res))
 		goto put_path;
 
-	res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
-		  &shmem_file_operations);
+	res = alloc_file(&path, O_RDWR, &shmem_file_operations);
 	if (IS_ERR(res))
 		goto put_path;
 
diff --git a/net/socket.c b/net/socket.c
index 8a109012608a..2cdbe8f71b7f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -411,7 +411,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 
 	d_instantiate(path.dentry, SOCK_INODE(sock));
 
-	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
+	file = alloc_file(&path, O_RDWR | (flags & O_NONBLOCK),
 		  &socket_file_ops);
 	if (IS_ERR(file)) {
 		/* drop dentry, keep inode for a bit */
@@ -423,7 +423,6 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 	}
 
 	sock->file = file;
-	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 	file->private_data = sock;
 	return file;
 }
-- 
cgit v1.2.3


From e3f20ae21079ecac282df65d83865c5771f4bca0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 10 Jul 2018 13:25:29 -0400
Subject: security_file_open(): lose cred argument

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c                | 2 +-
 include/linux/security.h | 5 ++---
 security/security.c      | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index 0a9f00b7f3d5..4c65edefa487 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -776,7 +776,7 @@ static int do_dentry_open(struct file *f,
 		goto cleanup_all;
 	}
 
-	error = security_file_open(f, f->f_cred);
+	error = security_file_open(f);
 	if (error)
 		goto cleanup_all;
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 63030c85ee19..88d30fc975e7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -309,7 +309,7 @@ void security_file_set_fowner(struct file *file);
 int security_file_send_sigiotask(struct task_struct *tsk,
 				 struct fown_struct *fown, int sig);
 int security_file_receive(struct file *file);
-int security_file_open(struct file *file, const struct cred *cred);
+int security_file_open(struct file *file);
 int security_task_alloc(struct task_struct *task, unsigned long clone_flags);
 void security_task_free(struct task_struct *task);
 int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
@@ -858,8 +858,7 @@ static inline int security_file_receive(struct file *file)
 	return 0;
 }
 
-static inline int security_file_open(struct file *file,
-				     const struct cred *cred)
+static inline int security_file_open(struct file *file)
 {
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 68f46d849abe..235b35f58a65 100644
--- a/security/security.c
+++ b/security/security.c
@@ -970,11 +970,11 @@ int security_file_receive(struct file *file)
 	return call_int_hook(file_receive, 0, file);
 }
 
-int security_file_open(struct file *file, const struct cred *cred)
+int security_file_open(struct file *file)
 {
 	int ret;
 
-	ret = call_int_hook(file_open, 0, file, cred);
+	ret = call_int_hook(file_open, 0, file, file->f_cred);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 9481769208b5e39b871ae4e89f5328c776ec38dc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 10 Jul 2018 14:13:18 -0400
Subject: ->file_open(): lose cred argument

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lsm_hooks.h  | 2 +-
 security/apparmor/lsm.c    | 4 ++--
 security/security.c        | 2 +-
 security/selinux/hooks.c   | 4 ++--
 security/smack/smack_lsm.c | 6 +++---
 security/tomoyo/tomoyo.c   | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 8f1131c8dd54..a8ee106b865d 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1569,7 +1569,7 @@ union security_list_options {
 	int (*file_send_sigiotask)(struct task_struct *tsk,
 					struct fown_struct *fown, int sig);
 	int (*file_receive)(struct file *file);
-	int (*file_open)(struct file *file, const struct cred *cred);
+	int (*file_open)(struct file *file);
 
 	int (*task_alloc)(struct task_struct *task, unsigned long clone_flags);
 	void (*task_free)(struct task_struct *task);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 74f17376202b..8b8b70620bbe 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -395,7 +395,7 @@ static int apparmor_inode_getattr(const struct path *path)
 	return common_perm_cond(OP_GETATTR, path, AA_MAY_GETATTR);
 }
 
-static int apparmor_file_open(struct file *file, const struct cred *cred)
+static int apparmor_file_open(struct file *file)
 {
 	struct aa_file_ctx *fctx = file_ctx(file);
 	struct aa_label *label;
@@ -414,7 +414,7 @@ static int apparmor_file_open(struct file *file, const struct cred *cred)
 		return 0;
 	}
 
-	label = aa_get_newest_cred_label(cred);
+	label = aa_get_newest_cred_label(file->f_cred);
 	if (!unconfined(label)) {
 		struct inode *inode = file_inode(file);
 		struct path_cond cond = { inode->i_uid, inode->i_mode };
diff --git a/security/security.c b/security/security.c
index 235b35f58a65..5dce67070cdf 100644
--- a/security/security.c
+++ b/security/security.c
@@ -974,7 +974,7 @@ int security_file_open(struct file *file)
 {
 	int ret;
 
-	ret = call_int_hook(file_open, 0, file, file->f_cred);
+	ret = call_int_hook(file_open, 0, file);
 	if (ret)
 		return ret;
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2b5ee5fbd652..18006be15713 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3862,7 +3862,7 @@ static int selinux_file_receive(struct file *file)
 	return file_has_perm(cred, file, file_to_av(file));
 }
 
-static int selinux_file_open(struct file *file, const struct cred *cred)
+static int selinux_file_open(struct file *file)
 {
 	struct file_security_struct *fsec;
 	struct inode_security_struct *isec;
@@ -3886,7 +3886,7 @@ static int selinux_file_open(struct file *file, const struct cred *cred)
 	 * new inode label or new policy.
 	 * This check is not redundant - do not remove.
 	 */
-	return file_path_has_perm(cred, file, open_file_to_av(file));
+	return file_path_has_perm(file->f_cred, file, open_file_to_av(file));
 }
 
 /* task security operations */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 7ad226018f51..e7b6c012431d 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1927,9 +1927,9 @@ static int smack_file_receive(struct file *file)
  *
  * Returns 0
  */
-static int smack_file_open(struct file *file, const struct cred *cred)
+static int smack_file_open(struct file *file)
 {
-	struct task_smack *tsp = cred->security;
+	struct task_smack *tsp = file->f_cred->security;
 	struct inode *inode = file_inode(file);
 	struct smk_audit_info ad;
 	int rc;
@@ -1937,7 +1937,7 @@ static int smack_file_open(struct file *file, const struct cred *cred)
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
 	smk_ad_setfield_u_fs_path(&ad, file->f_path);
 	rc = smk_tskacc(tsp, smk_of_inode(inode), MAY_READ, &ad);
-	rc = smk_bu_credfile(cred, file, MAY_READ, rc);
+	rc = smk_bu_credfile(file->f_cred, file, MAY_READ, rc);
 
 	return rc;
 }
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 213b8c593668..9f932e2d6852 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -320,7 +320,7 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
  *
  * Returns 0 on success, negative value otherwise.
  */
-static int tomoyo_file_open(struct file *f, const struct cred *cred)
+static int tomoyo_file_open(struct file *f)
 {
 	int flags = f->f_flags;
 	/* Don't check read permission here if called from do_execve(). */
-- 
cgit v1.2.3


From f5d11409e61dadf1f9af91b22bbedc28a60a2e2c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 02:35:08 -0400
Subject: introduce FMODE_OPENED

basically, "is that instance set up enough for regular fput(), or
do we want put_filp() for that one".

NOTE: the only alloc_file() caller that could be followed by put_filp()
is in arch/ia64/kernel/perfmon.c, which is (Kconfig-level) broken.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c    | 1 +
 fs/open.c          | 3 ++-
 include/linux/fs.h | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index 705f486f7007..d664d10acfeb 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -176,6 +176,7 @@ struct file *alloc_file(const struct path *path, int flags,
 	if ((file->f_mode & FMODE_WRITE) &&
 	     likely(fop->write || fop->write_iter))
 		file->f_mode |= FMODE_CAN_WRITE;
+	file->f_mode |= FMODE_OPENED;
 	file->f_op = fop;
 	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(path->dentry->d_inode);
diff --git a/fs/open.c b/fs/open.c
index 4c65edefa487..f3c6cb6a57b9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -749,7 +749,7 @@ static int do_dentry_open(struct file *f,
 	f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
 
 	if (unlikely(f->f_flags & O_PATH)) {
-		f->f_mode = FMODE_PATH;
+		f->f_mode = FMODE_PATH | FMODE_OPENED;
 		f->f_op = &empty_fops;
 		return 0;
 	}
@@ -793,6 +793,7 @@ static int do_dentry_open(struct file *f,
 		if (error)
 			goto cleanup_all;
 	}
+	f->f_mode |= FMODE_OPENED;
 	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(inode);
 	if ((f->f_mode & FMODE_READ) &&
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c4ca4c9c1130..05f34726e29c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -148,6 +148,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* Has write method(s) */
 #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
+#define FMODE_OPENED		((__force fmode_t)0x80000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
-- 
cgit v1.2.3


From 4d27f3266f14e4d1d13125ce32cb49a40f3122c3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Jul 2018 11:14:39 -0400
Subject: fold put_filp() into fput()

Just check FMODE_OPENED in __fput() and be done with that...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 15 +++++----------
 fs/namei.c           |  4 ++--
 fs/open.c            | 11 +++--------
 include/linux/file.h |  1 -
 4 files changed, 10 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index d664d10acfeb..9b70ed2bbc4e 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -192,6 +192,9 @@ static void __fput(struct file *file)
 	struct vfsmount *mnt = file->f_path.mnt;
 	struct inode *inode = file->f_inode;
 
+	if (unlikely(!(file->f_mode & FMODE_OPENED)))
+		goto out;
+
 	might_sleep();
 
 	fsnotify_close(file);
@@ -221,12 +224,10 @@ static void __fput(struct file *file)
 		put_write_access(inode);
 		__mnt_drop_write(mnt);
 	}
-	file->f_path.dentry = NULL;
-	file->f_path.mnt = NULL;
-	file->f_inode = NULL;
-	file_free(file);
 	dput(dentry);
 	mntput(mnt);
+out:
+	file_free(file);
 }
 
 static LLIST_HEAD(delayed_fput_list);
@@ -301,12 +302,6 @@ void __fput_sync(struct file *file)
 
 EXPORT_SYMBOL(fput);
 
-void put_filp(struct file *file)
-{
-	if (atomic_long_dec_and_test(&file->f_count))
-		file_free(file);
-}
-
 void __init files_init(void)
 {
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
diff --git a/fs/namei.c b/fs/namei.c
index 3cf02804d5ff..503c4b968415 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3531,7 +3531,7 @@ static struct file *path_openat(struct nameidata *nd,
 
 	s = path_init(nd, flags);
 	if (IS_ERR(s)) {
-		put_filp(file);
+		fput(file);
 		return ERR_CAST(s);
 	}
 	while (!(error = link_path_walk(s, nd)) &&
@@ -3547,7 +3547,7 @@ static struct file *path_openat(struct nameidata *nd,
 out2:
 	if (!(opened & FILE_OPENED)) {
 		BUG_ON(!error);
-		put_filp(file);
+		fput(file);
 	}
 	if (unlikely(error)) {
 		if (error == -EOPENSTALE) {
diff --git a/fs/open.c b/fs/open.c
index f3c6cb6a57b9..3d09b823f12b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -921,15 +921,10 @@ struct file *dentry_open(const struct path *path, int flags,
 	f = alloc_empty_file(flags, cred);
 	if (!IS_ERR(f)) {
 		error = vfs_open(path, f);
-		if (!error) {
-			/* from now on we need fput() to dispose of f */
+		if (!error)
 			error = open_check_o_direct(f);
-			if (error) {
-				fput(f);
-				f = ERR_PTR(error);
-			}
-		} else { 
-			put_filp(f);
+		if (error) {
+			fput(f);
 			f = ERR_PTR(error);
 		}
 	}
diff --git a/include/linux/file.h b/include/linux/file.h
index 6d34a1262b31..aed45d69811e 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -78,7 +78,6 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
 extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
 extern void set_close_on_exec(unsigned int fd, int flag);
 extern bool get_close_on_exec(unsigned int fd);
-extern void put_filp(struct file *);
 extern int get_unused_fd_flags(unsigned flags);
 extern void put_unused_fd(unsigned int fd);
 
-- 
cgit v1.2.3


From 73a09dd94377e4b186b300bd5461920710c7c3d5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:22:02 -0400
Subject: introduce FMODE_CREATED and switch to it

Parallel to FILE_CREATED, goes into ->f_mode instead of *opened.
NFS is a bit of a wart here - it doesn't have file at the point
where FILE_CREATED used to be set, so we need to propagate it
there (for now).  IMA is another one (here and everywhere)...

Note that this needs do_dentry_open() to leave old bits in ->f_mode
alone - we want it to preserve FMODE_CREATED if it had been already
set (no other bit can be there).

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      |  2 +-
 fs/9p/vfs_inode_dotl.c |  2 +-
 fs/ceph/file.c         |  2 +-
 fs/cifs/dir.c          |  2 +-
 fs/fuse/dir.c          |  2 +-
 fs/gfs2/inode.c        |  2 +-
 fs/namei.c             | 15 ++++++++-------
 fs/nfs/dir.c           |  5 ++++-
 fs/nfs/nfs4proc.c      |  2 +-
 include/linux/fs.h     |  1 +
 10 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 42e102e2e74a..566929792480 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -925,7 +925,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(d_inode(dentry), file);
 
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 out:
 	dput(res);
 	return err;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 7f6ae21a27b3..ee65db5c7eb0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -358,7 +358,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	file->private_data = ofid;
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(inode, file);
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 out:
 	v9fs_put_acl(dacl, pacl);
 	dput(res);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ad0bed99b1d5..38a63fff7903 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -507,7 +507,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		dout("atomic_open finish_open on dn %p\n", dn);
 		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
 			ceph_init_inode_acls(d_inode(dentry), &acls);
-			*opened |= FILE_CREATED;
+			file->f_mode |= FMODE_CREATED;
 		}
 		err = finish_open(file, dentry, ceph_open, opened);
 	}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ddae52bd1993..21d7e393900e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -539,7 +539,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	}
 
 	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 
 	rc = finish_open(file, direntry, generic_file_open, opened);
 	if (rc) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 56231b31f806..d4bdcf51e6cb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -508,7 +508,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 		goto no_open;
 
 	/* Only creates */
-	*opened |= FILE_CREATED;
+	file->f_mode |= FMODE_CREATED;
 
 	if (fc->no_create)
 		goto mknod;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 67c588edf8d8..4aba00a6004b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -767,7 +767,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	mark_inode_dirty(inode);
 	d_instantiate(dentry, inode);
 	if (file) {
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 		error = finish_open(file, dentry, gfs2_open_common, opened);
 	}
 	gfs2_glock_dq_uninit(ghs);
diff --git a/fs/namei.c b/fs/namei.c
index 8a1ae074c1c1..4bd7cc0d7522 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3061,7 +3061,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 		 * permission here.
 		 */
 		int acc_mode = op->acc_mode;
-		if (*opened & FILE_CREATED) {
+		if (file->f_mode & FMODE_CREATED) {
 			WARN_ON(!(open_flag & O_CREAT));
 			fsnotify_create(dir, dentry);
 			acc_mode = 0;
@@ -3077,7 +3077,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 				dput(dentry);
 				dentry = file->f_path.dentry;
 			}
-			if (*opened & FILE_CREATED)
+			if (file->f_mode & FMODE_CREATED)
 				fsnotify_create(dir, dentry);
 			if (unlikely(d_is_negative(dentry))) {
 				error = -ENOENT;
@@ -3126,7 +3126,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
 	if (unlikely(IS_DEADDIR(dir_inode)))
 		return -ENOENT;
 
-	*opened &= ~FILE_CREATED;
+	file->f_mode &= ~FMODE_CREATED;
 	dentry = d_lookup(dir, &nd->last);
 	for (;;) {
 		if (!dentry) {
@@ -3211,7 +3211,7 @@ no_open:
 
 	/* Negative dentry, just create the file */
 	if (!dentry->d_inode && (open_flag & O_CREAT)) {
-		*opened |= FILE_CREATED;
+		file->f_mode |= FMODE_CREATED;
 		audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
 		if (!dir_inode->i_op->create) {
 			error = -EACCES;
@@ -3318,7 +3318,7 @@ static int do_last(struct nameidata *nd,
 		if (error)
 			goto out;
 
-		if ((*opened & FILE_CREATED) ||
+		if ((file->f_mode & FMODE_CREATED) ||
 		    !S_ISREG(file_inode(file)->i_mode))
 			will_truncate = false;
 
@@ -3326,7 +3326,7 @@ static int do_last(struct nameidata *nd,
 		goto opened;
 	}
 
-	if (*opened & FILE_CREATED) {
+	if (file->f_mode & FMODE_CREATED) {
 		/* Don't check for write permission, don't truncate */
 		open_flag &= ~O_TRUNC;
 		will_truncate = false;
@@ -3400,7 +3400,8 @@ finish_open_created:
 	if (error)
 		goto out;
 opened:
-	error = ima_file_check(file, op->acc_mode, *opened);
+	error = ima_file_check(file, op->acc_mode,
+				file->f_mode & FMODE_CREATED ? FILE_CREATED : 0);
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7a9c14426855..0ac50983fc4e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1461,6 +1461,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct inode *inode;
 	unsigned int lookup_flags = 0;
 	bool switched = false;
+	int created = 0;
 	int err;
 
 	/* Expect a negative dentry */
@@ -1521,7 +1522,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		goto out;
 
 	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
-	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
+	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
+	if (created)
+		file->f_mode |= FMODE_CREATED;
 	if (IS_ERR(inode)) {
 		err = PTR_ERR(inode);
 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ed45090e4df6..2c4df0ffbca1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2951,7 +2951,7 @@ static int _nfs4_do_open(struct inode *dir,
 		}
 	}
 	if (opened && opendata->file_created)
-		*opened |= FILE_CREATED;
+		*opened = 1;
 
 	if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) {
 		*ctx_th = opendata->f_attr.mdsthreshold;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 05f34726e29c..ca668c7e48a7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -149,6 +149,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
 #define FMODE_OPENED		((__force fmode_t)0x80000)
+#define FMODE_CREATED		((__force fmode_t)0x100000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
-- 
cgit v1.2.3


From 6035a27b25ab9dadc8c3d5c5df5eae3fca62fc95 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:40:10 -0400
Subject: IMA: don't propagate opened through the entire thing

just check ->f_mode in ima_appraise_measurement()

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c                            |  3 +--
 fs/nfsd/vfs.c                         |  2 +-
 include/linux/ima.h                   |  4 ++--
 security/integrity/ima/ima.h          |  4 ++--
 security/integrity/ima/ima_appraise.c |  4 ++--
 security/integrity/ima/ima_main.c     | 16 ++++++++--------
 6 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 4bd7cc0d7522..d2aeb282ed05 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3400,8 +3400,7 @@ finish_open_created:
 	if (error)
 		goto out;
 opened:
-	error = ima_file_check(file, op->acc_mode,
-				file->f_mode & FMODE_CREATED ? FILE_CREATED : 0);
+	error = ima_file_check(file, op->acc_mode);
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b0555d7d8200..55a099e47ba2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -763,7 +763,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 		goto out_nfserr;
 	}
 
-	host_err = ima_file_check(file, may_flags, 0);
+	host_err = ima_file_check(file, may_flags);
 	if (host_err) {
 		fput(file);
 		goto out_nfserr;
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e4647e0eb60..d9ba3fc363b7 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -16,7 +16,7 @@ struct linux_binprm;
 
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
-extern int ima_file_check(struct file *file, int mask, int opened);
+extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
@@ -34,7 +34,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
-static inline int ima_file_check(struct file *file, int mask, int opened)
+static inline int ima_file_check(struct file *file, int mask)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 354bb5716ce3..e4c1a236976c 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -238,7 +238,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 			     struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len, int opened);
+			     int xattr_len);
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
@@ -254,7 +254,7 @@ static inline int ima_appraise_measurement(enum ima_hooks func,
 					   struct file *file,
 					   const unsigned char *filename,
 					   struct evm_ima_xattr_data *xattr_value,
-					   int xattr_len, int opened)
+					   int xattr_len)
 {
 	return INTEGRITY_UNKNOWN;
 }
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 8bd7a0733e51..deec1804a00a 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -212,7 +212,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 			     struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len, int opened)
+			     int xattr_len)
 {
 	static const char op[] = "appraise_data";
 	const char *cause = "unknown";
@@ -231,7 +231,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 		cause = iint->flags & IMA_DIGSIG_REQUIRED ?
 				"IMA-signature-required" : "missing-hash";
 		status = INTEGRITY_NOLABEL;
-		if (opened & FILE_CREATED)
+		if (file->f_mode & FMODE_CREATED)
 			iint->flags |= IMA_NEW_FILE;
 		if ((iint->flags & IMA_NEW_FILE) &&
 		    (!(iint->flags & IMA_DIGSIG_REQUIRED) ||
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index dca44cf7838e..b286f37712d5 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -168,7 +168,7 @@ void ima_file_free(struct file *file)
 
 static int process_measurement(struct file *file, const struct cred *cred,
 			       u32 secid, char *buf, loff_t size, int mask,
-			       enum ima_hooks func, int opened)
+			       enum ima_hooks func)
 {
 	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint = NULL;
@@ -294,7 +294,7 @@ static int process_measurement(struct file *file, const struct cred *cred,
 	if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) {
 		inode_lock(inode);
 		rc = ima_appraise_measurement(func, iint, file, pathname,
-					      xattr_value, xattr_len, opened);
+					      xattr_value, xattr_len);
 		inode_unlock(inode);
 	}
 	if (action & IMA_AUDIT)
@@ -338,7 +338,7 @@ int ima_file_mmap(struct file *file, unsigned long prot)
 	if (file && (prot & PROT_EXEC)) {
 		security_task_getsecid(current, &secid);
 		return process_measurement(file, current_cred(), secid, NULL,
-					   0, MAY_EXEC, MMAP_CHECK, 0);
+					   0, MAY_EXEC, MMAP_CHECK);
 	}
 
 	return 0;
@@ -364,13 +364,13 @@ int ima_bprm_check(struct linux_binprm *bprm)
 
 	security_task_getsecid(current, &secid);
 	ret = process_measurement(bprm->file, current_cred(), secid, NULL, 0,
-				  MAY_EXEC, BPRM_CHECK, 0);
+				  MAY_EXEC, BPRM_CHECK);
 	if (ret)
 		return ret;
 
 	security_cred_getsecid(bprm->cred, &secid);
 	return process_measurement(bprm->file, bprm->cred, secid, NULL, 0,
-				   MAY_EXEC, CREDS_CHECK, 0);
+				   MAY_EXEC, CREDS_CHECK);
 }
 
 /**
@@ -383,14 +383,14 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * On success return 0.  On integrity appraisal error, assuming the file
  * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
  */
-int ima_file_check(struct file *file, int mask, int opened)
+int ima_file_check(struct file *file, int mask)
 {
 	u32 secid;
 
 	security_task_getsecid(current, &secid);
 	return process_measurement(file, current_cred(), secid, NULL, 0,
 				   mask & (MAY_READ | MAY_WRITE | MAY_EXEC |
-					   MAY_APPEND), FILE_CHECK, opened);
+					   MAY_APPEND), FILE_CHECK);
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
@@ -493,7 +493,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 	func = read_idmap[read_id] ?: FILE_CHECK;
 	security_task_getsecid(current, &secid);
 	return process_measurement(file, current_cred(), secid, buf, size,
-				   MAY_READ, func, 0);
+				   MAY_READ, func);
 }
 
 static int __init init_ima(void)
-- 
cgit v1.2.3


From be12af3ef5e61ebc44d065e121424ac605d7bb8e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 11:44:56 -0400
Subject: getting rid of 'opened' argument of ->atomic_open() - part 1

'opened' argument of finish_open() is unused.  Kill it.

Signed-off-by Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      | 2 +-
 fs/9p/vfs_inode_dotl.c | 2 +-
 fs/ceph/file.c         | 2 +-
 fs/cifs/dir.c          | 2 +-
 fs/fuse/dir.c          | 2 +-
 fs/gfs2/inode.c        | 6 +++---
 fs/namei.c             | 2 +-
 fs/nfs/dir.c           | 2 +-
 fs/open.c              | 3 +--
 include/linux/fs.h     | 3 +--
 10 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 566929792480..7b6ff3275d9c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -917,7 +917,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		v9inode->writeback_fid = (void *) inode_fid;
 	}
 	mutex_unlock(&v9inode->v_mutex);
-	err = finish_open(file, dentry, generic_file_open, opened);
+	err = finish_open(file, dentry, generic_file_open);
 	if (err)
 		goto error;
 
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ee65db5c7eb0..c6939b7cb18c 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -352,7 +352,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	}
 	mutex_unlock(&v9inode->v_mutex);
 	/* Since we are opening a file, assign the open fid to the file */
-	err = finish_open(file, dentry, generic_file_open, opened);
+	err = finish_open(file, dentry, generic_file_open);
 	if (err)
 		goto err_clunk_old_fid;
 	file->private_data = ofid;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 38a63fff7903..38b28cb2fac1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -509,7 +509,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			ceph_init_inode_acls(d_inode(dentry), &acls);
 			file->f_mode |= FMODE_CREATED;
 		}
-		err = finish_open(file, dentry, ceph_open, opened);
+		err = finish_open(file, dentry, ceph_open);
 	}
 out_req:
 	if (!req->r_err && req->r_target_inode)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 21d7e393900e..891bfd62e67a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -541,7 +541,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 		file->f_mode |= FMODE_CREATED;
 
-	rc = finish_open(file, direntry, generic_file_open, opened);
+	rc = finish_open(file, direntry, generic_file_open);
 	if (rc) {
 		if (server->ops->close)
 			server->ops->close(xid, tcon, &fid);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d4bdcf51e6cb..a5b1f5ff8cb7 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -469,7 +469,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	d_instantiate(entry, inode);
 	fuse_change_entry_timeout(entry, &outentry);
 	fuse_invalidate_attr(dir);
-	err = finish_open(file, entry, generic_file_open, opened);
+	err = finish_open(file, entry, generic_file_open);
 	if (err) {
 		fuse_sync_release(ff, flags);
 	} else {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 4aba00a6004b..59f695e96d63 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -626,7 +626,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		error = 0;
 		if (file) {
 			if (S_ISREG(inode->i_mode))
-				error = finish_open(file, dentry, gfs2_open_common, opened);
+				error = finish_open(file, dentry, gfs2_open_common);
 			else
 				error = finish_no_open(file, NULL);
 		}
@@ -768,7 +768,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	d_instantiate(dentry, inode);
 	if (file) {
 		file->f_mode |= FMODE_CREATED;
-		error = finish_open(file, dentry, gfs2_open_common, opened);
+		error = finish_open(file, dentry, gfs2_open_common);
 	}
 	gfs2_glock_dq_uninit(ghs);
 	gfs2_glock_dq_uninit(ghs + 1);
@@ -866,7 +866,7 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry,
 		return d;
 	}
 	if (file && S_ISREG(inode->i_mode))
-		error = finish_open(file, dentry, gfs2_open_common, opened);
+		error = finish_open(file, dentry, gfs2_open_common);
 
 	gfs2_glock_dq_uninit(&gh);
 	if (error) {
diff --git a/fs/namei.c b/fs/namei.c
index d2aeb282ed05..117b118853f2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3475,7 +3475,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 	if (error)
 		goto out2;
 	file->f_path.mnt = path.mnt;
-	error = finish_open(file, child, NULL, opened);
+	error = finish_open(file, child, NULL);
 out2:
 	mnt_drop_write(path.mnt);
 out:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0ac50983fc4e..22176a3818d5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1439,7 +1439,7 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
 {
 	int err;
 
-	err = finish_open(file, dentry, do_open, opened);
+	err = finish_open(file, dentry, do_open);
 	if (err)
 		goto out;
 	if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
diff --git a/fs/open.c b/fs/open.c
index d2030a3c5c52..dbaac9efc7fc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -843,8 +843,7 @@ cleanup_file:
  * Returns zero on success or -errno if the open failed.
  */
 int finish_open(struct file *file, struct dentry *dentry,
-		int (*open)(struct inode *, struct file *),
-		int *opened)
+		int (*open)(struct inode *, struct file *))
 {
 	BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ca668c7e48a7..70be3e4c26ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2441,8 +2441,7 @@ enum {
 	FILE_OPENED = 2
 };
 extern int finish_open(struct file *file, struct dentry *dentry,
-			int (*open)(struct inode *, struct file *),
-			int *opened);
+			int (*open)(struct inode *, struct file *));
 extern int finish_no_open(struct file *file, struct dentry *dentry);
 
 /* fs/ioctl.c */
-- 
cgit v1.2.3


From 44907d79002466049fdbb8ef15730d185e0808b4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 13:32:02 -0400
Subject: get rid of 'opened' argument of ->atomic_open() - part 3

now it can be done...

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c      | 3 +--
 fs/9p/vfs_inode_dotl.c | 3 +--
 fs/bad_inode.c         | 2 +-
 fs/ceph/file.c         | 3 +--
 fs/ceph/super.h        | 3 +--
 fs/cifs/cifsfs.h       | 3 +--
 fs/cifs/dir.c          | 3 +--
 fs/fuse/dir.c          | 2 +-
 fs/gfs2/inode.c        | 3 +--
 fs/namei.c             | 3 +--
 fs/nfs/dir.c           | 2 +-
 fs/nfs/nfs4_fs.h       | 2 +-
 include/linux/fs.h     | 2 +-
 13 files changed, 13 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7b6ff3275d9c..85ff859d3af5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -859,8 +859,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 static int
 v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
-		     struct file *file, unsigned flags, umode_t mode,
-		     int *opened)
+		     struct file *file, unsigned flags, umode_t mode)
 {
 	int err;
 	u32 perm;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index c6939b7cb18c..4823e1c46999 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -241,8 +241,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 
 static int
 v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
-			  struct file *file, unsigned flags, umode_t omode,
-			  int *opened)
+			  struct file *file, unsigned flags, umode_t omode)
 {
 	int err = 0;
 	kgid_t gid;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 125e8bbd22a2..8035d2a44561 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -134,7 +134,7 @@ static int bad_inode_update_time(struct inode *inode, struct timespec64 *time,
 
 static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
 				 struct file *file, unsigned int open_flag,
-				 umode_t create_mode, int *opened)
+				 umode_t create_mode)
 {
 	return -EIO;
 }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 38b28cb2fac1..e2679e8a2535 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -429,8 +429,7 @@ out:
  * file or symlink, return 1 so the VFS can retry.
  */
 int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
-		     struct file *file, unsigned flags, umode_t mode,
-		     int *opened)
+		     struct file *file, unsigned flags, umode_t mode)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a7077a0c989f..971328b99ede 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1025,8 +1025,7 @@ extern const struct file_operations ceph_file_fops;
 extern int ceph_renew_caps(struct inode *inode);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
-			    struct file *file, unsigned flags, umode_t mode,
-			    int *opened);
+			    struct file *file, unsigned flags, umode_t mode);
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 				  char *data, size_t len);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 5f0231803431..f3a78efc3109 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -65,8 +65,7 @@ extern struct inode *cifs_root_iget(struct super_block *);
 extern int cifs_create(struct inode *, struct dentry *, umode_t,
 		       bool excl);
 extern int cifs_atomic_open(struct inode *, struct dentry *,
-			    struct file *, unsigned, umode_t,
-			    int *);
+			    struct file *, unsigned, umode_t);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
 				  unsigned int);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 891bfd62e67a..3713d22b95a7 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -465,8 +465,7 @@ out_err:
 
 int
 cifs_atomic_open(struct inode *inode, struct dentry *direntry,
-		 struct file *file, unsigned oflags, umode_t mode,
-		 int *opened)
+		 struct file *file, unsigned oflags, umode_t mode)
 {
 	int rc;
 	unsigned int xid;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b8d7e9d423c8..c979329311c8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -489,7 +489,7 @@ out_err:
 static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 			    struct file *file, unsigned flags,
-			    umode_t mode, int *opened)
+			    umode_t mode)
 {
 	int err;
 	struct fuse_conn *fc = get_fuse_conn(dir);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 15e2a8a3b917..648f0ca1ad57 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1228,14 +1228,13 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
  * @file: The proposed new struct file
  * @flags: open flags
  * @mode: File mode
- * @opened: Flag to say whether the file has been opened or not
  *
  * Returns: error code or 0 for success
  */
 
 static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags,
-			    umode_t mode, int *opened)
+			    umode_t mode)
 {
 	struct dentry *d;
 	bool excl = !!(flags & O_EXCL);
diff --git a/fs/namei.c b/fs/namei.c
index 117b118853f2..1da272bf8ed3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3052,8 +3052,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 	file->f_path.dentry = DENTRY_NOT_SET;
 	file->f_path.mnt = nd->path.mnt;
 	error = dir->i_op->atomic_open(dir, dentry, file,
-				       open_to_namei_flags(open_flag),
-				       mode, opened);
+				       open_to_namei_flags(open_flag), mode);
 	d_lookup_done(dentry);
 	if (!error) {
 		/*
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 71ae3cc3e53a..f447b1a24350 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1451,7 +1451,7 @@ out:
 
 int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		    struct file *file, unsigned open_flags,
-		    umode_t mode, int *opened)
+		    umode_t mode)
 {
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	struct nfs_open_context *ctx;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 137e18abb7e7..51beb6e38c90 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -258,7 +258,7 @@ extern const struct dentry_operations nfs4_dentry_operations;
 
 /* dir.c */
 int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
-		    unsigned, umode_t, int *);
+		    unsigned, umode_t);
 
 /* super.c */
 extern struct file_system_type nfs4_fs_type;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70be3e4c26ac..c25896b30e9f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1781,7 +1781,7 @@ struct inode_operations {
 	int (*update_time)(struct inode *, struct timespec64 *, int);
 	int (*atomic_open)(struct inode *, struct dentry *,
 			   struct file *, unsigned open_flag,
-			   umode_t create_mode, int *opened);
+			   umode_t create_mode);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
 } ____cacheline_aligned;
-- 
cgit v1.2.3


From dbae8f2ca2f0586f4b80201c78ff0aed2a012ab5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 8 Jun 2018 17:51:47 -0400
Subject: kill FILE_{CREATED,OPENED}

no users left

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c25896b30e9f..bd904c496878 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2436,10 +2436,6 @@ extern struct filename *getname(const char __user *);
 extern struct filename *getname_kernel(const char *);
 extern void putname(struct filename *name);
 
-enum {
-	FILE_CREATED = 1,
-	FILE_OPENED = 2
-};
 extern int finish_open(struct file *file, struct dentry *dentry,
 			int (*open)(struct inode *, struct file *));
 extern int finish_no_open(struct file *file, struct dentry *dentry);
-- 
cgit v1.2.3


From d93aa9d82aea80b80f225dbf9c7986df444d8106 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 Jun 2018 09:40:05 -0400
Subject: new wrapper: alloc_file_pseudo()

takes inode, vfsmount, name, O_... flags and file_operations and
either returns a new struct file (in which case inode reference we
held is consumed) or returns ERR_PTR(), in which case no refcounts
are altered.

converted aio_private_file() and sock_alloc_file() to it

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c             | 20 ++++----------------
 fs/file_table.c      | 27 +++++++++++++++++++++++++++
 include/linux/file.h |  3 +++
 net/socket.c         | 28 +++++-----------------------
 4 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index 9eea53887d6c..c3a8bac16374 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -215,9 +215,7 @@ static const struct address_space_operations aio_ctx_aops;
 
 static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 {
-	struct qstr this = QSTR_INIT("[aio]", 5);
 	struct file *file;
-	struct path path;
 	struct inode *inode = alloc_anon_inode(aio_mnt->mnt_sb);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
@@ -226,27 +224,17 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 	inode->i_mapping->private_data = ctx;
 	inode->i_size = PAGE_SIZE * nr_pages;
 
-	path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
-	if (!path.dentry) {
-		iput(inode);
-		return ERR_PTR(-ENOMEM);
-	}
-	path.mnt = mntget(aio_mnt);
-
-	d_instantiate(path.dentry, inode);
-	file = alloc_file(&path, O_RDWR, &aio_ring_fops);
+	file = alloc_file_pseudo(inode, aio_mnt, "[aio]",
+				O_RDWR, &aio_ring_fops);
 	if (IS_ERR(file))
-		path_put(&path);
+		iput(inode);
 	return file;
 }
 
 static struct dentry *aio_mount(struct file_system_type *fs_type,
 				int flags, const char *dev_name, void *data)
 {
-	static const struct dentry_operations ops = {
-		.d_dname	= simple_dname,
-	};
-	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, NULL,
 					   AIO_RING_MAGIC);
 
 	if (!IS_ERR(root))
diff --git a/fs/file_table.c b/fs/file_table.c
index 9b70ed2bbc4e..6b3723909342 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -184,6 +184,33 @@ struct file *alloc_file(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(alloc_file);
 
+struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
+				const char *name, int flags,
+				const struct file_operations *fops)
+{
+	static const struct dentry_operations anon_ops = {
+		.d_dname = simple_dname
+	};
+	struct qstr this = QSTR_INIT(name, strlen(name));
+	struct path path;
+	struct file *file;
+
+	path.dentry = d_alloc_pseudo(mnt->mnt_sb, &this);
+	if (!path.dentry)
+		return ERR_PTR(-ENOMEM);
+	if (!mnt->mnt_sb->s_d_op)
+		d_set_d_op(path.dentry, &anon_ops);
+	path.mnt = mntget(mnt);
+	d_instantiate(path.dentry, inode);
+	file = alloc_file(&path, flags, fops);
+	if (IS_ERR(file)) {
+		ihold(inode);
+		path_put(&path);
+	}
+	return file;
+}
+EXPORT_SYMBOL(alloc_file_pseudo);
+
 /* the real guts of fput() - releasing the last reference to file
  */
 static void __fput(struct file *file)
diff --git a/include/linux/file.h b/include/linux/file.h
index aed45d69811e..5b25388f2f79 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -17,9 +17,12 @@ extern void fput(struct file *);
 struct file_operations;
 struct vfsmount;
 struct dentry;
+struct inode;
 struct path;
 extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
+extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
+	const char *, int flags, const struct file_operations *);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
diff --git a/net/socket.c b/net/socket.c
index 2cdbe8f71b7f..4cf3568caf9f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -391,33 +391,15 @@ static struct file_system_type sock_fs_type = {
 
 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 {
-	struct qstr name = { .name = "" };
-	struct path path;
 	struct file *file;
 
-	if (dname) {
-		name.name = dname;
-		name.len = strlen(name.name);
-	} else if (sock->sk) {
-		name.name = sock->sk->sk_prot_creator->name;
-		name.len = strlen(name.name);
-	}
-	path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
-	if (unlikely(!path.dentry)) {
-		sock_release(sock);
-		return ERR_PTR(-ENOMEM);
-	}
-	path.mnt = mntget(sock_mnt);
-
-	d_instantiate(path.dentry, SOCK_INODE(sock));
+	if (!dname)
+		dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
 
-	file = alloc_file(&path, O_RDWR | (flags & O_NONBLOCK),
-		  &socket_file_ops);
+	file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
+				O_RDWR | (flags & O_NONBLOCK),
+				&socket_file_ops);
 	if (IS_ERR(file)) {
-		/* drop dentry, keep inode for a bit */
-		ihold(d_inode(path.dentry));
-		path_put(&path);
-		/* ... and now kill it properly */
 		sock_release(sock);
 		return file;
 	}
-- 
cgit v1.2.3


From 183266f26f45a47958afb5c9aa1b3d4651e2eb8c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 17 Jun 2018 14:15:10 -0400
Subject: new helper: alloc_file_clone()

alloc_file_clone(old_file, mode, ops): create a new struct file with
->f_path equal to that of old_file.  pipe converted.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 11 +++++++++++
 fs/pipe.c            |  6 ++----
 include/linux/file.h |  2 ++
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index 6b3723909342..78b067ddb386 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -211,6 +211,17 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(alloc_file_pseudo);
 
+struct file *alloc_file_clone(struct file *base, int flags,
+				const struct file_operations *fops)
+{
+	struct file *f = alloc_file(&base->f_path, flags, fops);
+	if (!IS_ERR(f)) {
+		path_get(&f->f_path);
+		f->f_mapping = base->f_mapping;
+	}
+	return f;
+}
+
 /* the real guts of fput() - releasing the last reference to file
  */
 static void __fput(struct file *file)
diff --git a/fs/pipe.c b/fs/pipe.c
index 9701bd3458d1..5f50070774bc 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -761,15 +761,13 @@ int create_pipe_files(struct file **res, int flags)
 
 	f->private_data = inode->i_pipe;
 
-	res[0] = alloc_file(&f->f_path, O_RDONLY | (flags & O_NONBLOCK),
-			&pipefifo_fops);
+	res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
+				  &pipefifo_fops);
 	if (IS_ERR(res[0])) {
 		put_pipe_info(inode, inode->i_pipe);
 		fput(f);
 		return PTR_ERR(res[0]);
 	}
-
-	path_get(&f->f_path);
 	res[0]->private_data = inode->i_pipe;
 	res[1] = f;
 	return 0;
diff --git a/include/linux/file.h b/include/linux/file.h
index 5b25388f2f79..60914843c737 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -23,6 +23,8 @@ extern struct file *alloc_file(const struct path *, int flags,
 	const struct file_operations *fop);
 extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
 	const char *, int flags, const struct file_operations *);
+extern struct file *alloc_file_clone(struct file *, int flags,
+	const struct file_operations *);
 
 static inline void fput_light(struct file *file, int fput_needed)
 {
-- 
cgit v1.2.3


From ee1904ba44bd4a242b453e8fe179b374906da173 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 17 Jun 2018 14:21:27 -0400
Subject: make alloc_file() static

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c      | 3 +--
 include/linux/file.h | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index 78b067ddb386..d6eccd04d703 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -157,7 +157,7 @@ over:
  * @flags: O_... flags with which the new file will be opened
  * @fop: the 'struct file_operations' for the new file
  */
-struct file *alloc_file(const struct path *path, int flags,
+static struct file *alloc_file(const struct path *path, int flags,
 		const struct file_operations *fop)
 {
 	struct file *file;
@@ -182,7 +182,6 @@ struct file *alloc_file(const struct path *path, int flags,
 		i_readcount_inc(path->dentry->d_inode);
 	return file;
 }
-EXPORT_SYMBOL(alloc_file);
 
 struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt,
 				const char *name, int flags,
diff --git a/include/linux/file.h b/include/linux/file.h
index 60914843c737..6b2fb032416c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -19,8 +19,6 @@ struct vfsmount;
 struct dentry;
 struct inode;
 struct path;
-extern struct file *alloc_file(const struct path *, int flags,
-	const struct file_operations *fop);
 extern struct file *alloc_file_pseudo(struct inode *, struct vfsmount *,
 	const char *, int flags, const struct file_operations *);
 extern struct file *alloc_file_clone(struct file *, int flags,
-- 
cgit v1.2.3


From 0238d2b4a4c7bb1b2268e1ef789629878e16ab02 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Thu, 12 Jul 2018 09:57:00 +0800
Subject: serial: 8250: introduce get_divisor() and set_divisor() hook

Add these two hooks so that they can be overridden with driver specific
implementations.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c |  4 ++++
 drivers/tty/serial/8250/8250_port.c | 27 +++++++++++++++++++++++----
 include/linux/serial_core.h         |  7 +++++++
 3 files changed, 34 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 9342fc2ee7df..a0bb77290747 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1023,6 +1023,10 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 			uart->port.get_mctrl = up->port.get_mctrl;
 		if (up->port.set_mctrl)
 			uart->port.set_mctrl = up->port.set_mctrl;
+		if (up->port.get_divisor)
+			uart->port.get_divisor = up->port.get_divisor;
+		if (up->port.set_divisor)
+			uart->port.set_divisor = up->port.set_divisor;
 		if (up->port.startup)
 			uart->port.startup = up->port.startup;
 		if (up->port.shutdown)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 709fe6b4265c..ce0dc17f18ee 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2498,9 +2498,9 @@ static unsigned int npcm_get_divisor(struct uart_8250_port *up,
 	return DIV_ROUND_CLOSEST(port->uartclk, 16 * baud + 2) - 2;
 }
 
-static unsigned int serial8250_get_divisor(struct uart_port *port,
-					   unsigned int baud,
-					   unsigned int *frac)
+static unsigned int serial8250_do_get_divisor(struct uart_port *port,
+					      unsigned int baud,
+					      unsigned int *frac)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned int quot;
@@ -2532,6 +2532,16 @@ static unsigned int serial8250_get_divisor(struct uart_port *port,
 	return quot;
 }
 
+static unsigned int serial8250_get_divisor(struct uart_port *port,
+					   unsigned int baud,
+					   unsigned int *frac)
+{
+	if (port->get_divisor)
+		return port->get_divisor(port, baud, frac);
+
+	return serial8250_do_get_divisor(port, baud, frac);
+}
+
 static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
 					    tcflag_t c_cflag)
 {
@@ -2570,7 +2580,7 @@ static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
 	return cval;
 }
 
-static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
+static void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
 			    unsigned int quot, unsigned int quot_frac)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -2603,6 +2613,15 @@ static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
 	}
 }
 
+static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
+				   unsigned int quot, unsigned int quot_frac)
+{
+	if (port->set_divisor)
+		port->set_divisor(port, baud, quot, quot_frac);
+	else
+		serial8250_do_set_divisor(port, baud, quot, quot_frac);
+}
+
 static unsigned int serial8250_get_baud_rate(struct uart_port *port,
 					     struct ktermios *termios,
 					     struct ktermios *old)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 06ea4eeb09ab..406edae44ca3 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -127,6 +127,13 @@ struct uart_port {
 					     struct ktermios *);
 	unsigned int		(*get_mctrl)(struct uart_port *);
 	void			(*set_mctrl)(struct uart_port *, unsigned int);
+	unsigned int		(*get_divisor)(struct uart_port *,
+					       unsigned int baud,
+					       unsigned int *frac);
+	void			(*set_divisor)(struct uart_port *,
+					       unsigned int baud,
+					       unsigned int quot,
+					       unsigned int quot_frac);
 	int			(*startup)(struct uart_port *port);
 	void			(*shutdown)(struct uart_port *port);
 	void			(*throttle)(struct uart_port *port);
-- 
cgit v1.2.3


From 6226e5f31a786b58f33781852c897ac32a325ce6 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Thu, 12 Jul 2018 09:58:14 +0800
Subject: serial: 8250: export serial8250_do_set_divisor()

Some drivers could call serial8250_do_set_divisor() to complete its
own set_divisor routine. Export this symbol for code reusing.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 5 +++--
 include/linux/serial_8250.h         | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index ce0dc17f18ee..945f8dc2d50f 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2580,8 +2580,8 @@ static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
 	return cval;
 }
 
-static void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
-			    unsigned int quot, unsigned int quot_frac)
+void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
+			       unsigned int quot, unsigned int quot_frac)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
@@ -2612,6 +2612,7 @@ static void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
 		serial_port_out(port, 0x2, quot_frac);
 	}
 }
+EXPORT_SYMBOL_GPL(serial8250_do_set_divisor);
 
 static void serial8250_set_divisor(struct uart_port *port, unsigned int baud,
 				   unsigned int quot, unsigned int quot_frac)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 76b9db71e489..18e21427bce4 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -160,6 +160,9 @@ extern void serial8250_do_shutdown(struct uart_port *port);
 extern void serial8250_do_pm(struct uart_port *port, unsigned int state,
 			     unsigned int oldstate);
 extern void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl);
+extern void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
+				      unsigned int quot,
+				      unsigned int quot_frac);
 extern int fsl8250_handle_irq(struct uart_port *port);
 int serial8250_handle_irq(struct uart_port *port, unsigned int iir);
 unsigned char serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr);
-- 
cgit v1.2.3


From 2abc77af89e17582db9039293c8ac881c8c96d79 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Jul 2018 11:18:42 -0400
Subject: new helper: open_with_fake_path()

open a file by given inode, faking ->f_path.  Use with shitloads
of caution - at the very least you'd damn better make sure that
some dentry alias of that inode is pinned down by the path in
question.  Again, this is no general-purpose interface and I hope
it will eventually go away.  Right now overlayfs wants something
like that, but nothing else should.

Any out-of-tree code with bright idea of using this one *will*
eventually get hurt, with zero notice and great delight on my part.
I refuse to use EXPORT_SYMBOL_GPL(), especially in situations when
it's really EXPORT_SYMBOL_DONT_USE_IT(), but don't take that export
as "you are welcome to use it".

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c          | 18 ++++++++++++++++++
 include/linux/fs.h |  2 ++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index ee893240d199..dd15711eb658 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -925,6 +925,24 @@ struct file *dentry_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(dentry_open);
 
+struct file *open_with_fake_path(const struct path *path, int flags,
+				struct inode *inode, const struct cred *cred)
+{
+	struct file *f = alloc_empty_file(flags, cred);
+	if (!IS_ERR(f)) {
+		int error;
+
+		f->f_path = *path;
+		error = do_dentry_open(f, inode, NULL);
+		if (error) {
+			fput(f);
+			f = ERR_PTR(error);
+		}
+	}
+	return f;
+}
+EXPORT_SYMBOL(open_with_fake_path);
+
 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
 {
 	int lookup_flags = 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 05f34726e29c..4ff7b7012186 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2424,6 +2424,8 @@ extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int, umode_t);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
+extern struct file * open_with_fake_path(const struct path *, int,
+					 struct inode*, const struct cred *);
 static inline struct file *file_clone_open(struct file *file)
 {
 	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
-- 
cgit v1.2.3


From 2bae79d2d38f3dc50bfef81d3b4f7328b2883a17 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 12 Jul 2018 12:52:22 +0100
Subject: bpf: fix documentation for eBPF helpers

Minor formatting edits for eBPF helpers documentation, including blank
lines removal, fix of item list for return values in bpf_fib_lookup(),
and missing prefix on bpf_skb_load_bytes_relative().

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b7db3261c62d..6bcb287a888d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1826,7 +1826,7 @@ union bpf_attr {
  * 		A non-negative value equal to or less than *size* on success,
  * 		or a negative error in case of failure.
  *
- * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
  * 	Description
  * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
  * 		it provides an easy way to load *len* bytes from *offset*
@@ -1877,7 +1877,7 @@ union bpf_attr {
  *		* < 0 if any input argument is invalid
  *		*   0 on success (packet is forwarded, nexthop neighbor exists)
  *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
- *		*     packet is not forwarded or needs assist from full stack
+ *		  packet is not forwarded or needs assist from full stack
  *
  * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
@@ -2033,7 +2033,6 @@ union bpf_attr {
  *		This helper is only available is the kernel was compiled with
  *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
  *		"**y**".
- *
  *	Return
  *		0
  *
@@ -2053,7 +2052,6 @@ union bpf_attr {
  *		This helper is only available is the kernel was compiled with
  *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
  *		"**y**".
- *
  *	Return
  *		0
  *
-- 
cgit v1.2.3


From a8802d97e73346bc81609df9dfba7d3306f40d87 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 11 Jul 2018 11:16:41 -0700
Subject: ktime: Provide typesafe ktime_to_ns()

Using ktime_to_ns() is nice to help backports to stable kernels.

Having a typesafe function instead of a macro avoid stupid typos
and waste of time tracking these typos.

Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lkml.kernel.org/r/20180711181641.10369-1-edumazet@google.com
---
 include/linux/ktime.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 5b9fddbaac41..b2bb44f87f5a 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -93,8 +93,11 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
 /* Map the ktime_t to timeval conversion to ns_to_timeval function */
 #define ktime_to_timeval(kt)		ns_to_timeval((kt))
 
-/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
-#define ktime_to_ns(kt)			(kt)
+/* Convert ktime_t to nanoseconds */
+static inline s64 ktime_to_ns(const ktime_t kt)
+{
+	return kt;
+}
 
 /**
  * ktime_compare - Compares two ktime_t variables for less, greater or equal
-- 
cgit v1.2.3


From cca9bab1b72cd2296097c75f59ef11ef80461279 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Jul 2018 12:16:12 +0200
Subject: tcp: use monotonic timestamps for PAWS

Using get_seconds() for timestamps is deprecated since it can lead
to overflows on 32-bit systems. While the interface generally doesn't
overflow until year 2106, the specific implementation of the TCP PAWS
algorithm breaks in 2038 when the intermediate signed 32-bit timestamps
overflow.

A related problem is that the local timestamps in CLOCK_REALTIME form
lead to unexpected behavior when settimeofday is called to set the system
clock backwards or forwards by more than 24 days.

While the first problem could be solved by using an overflow-safe method
of comparing the timestamps, a nicer solution is to use a monotonic
clocksource with ktime_get_seconds() that simply doesn't overflow (at
least not until 136 years after boot) and that doesn't change during
settimeofday().

To make 32-bit and 64-bit architectures behave the same way here, and
also save a few bytes in the tcp_options_received structure, I'm changing
the type to a 32-bit integer, which is now safe on all architectures.

Finally, the ts_recent_stamp field also (confusingly) gets used to store
a jiffies value in tcp_synq_overflow()/tcp_synq_no_recent_overflow().
This is currently safe, but changing the type to 32-bit requires
some small changes there to keep it working.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_cm.c |  2 +-
 include/linux/tcp.h                     |  4 ++--
 include/net/tcp.h                       | 17 ++++++++++-------
 net/ipv4/tcp_input.c                    |  2 +-
 net/ipv4/tcp_ipv4.c                     |  3 ++-
 net/ipv4/tcp_minisocks.c                |  8 ++++----
 6 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index 2bb6f0380758..0997e166ea57 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -1673,7 +1673,7 @@ static void chtls_timewait(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->rcv_nxt++;
-	tp->rx_opt.ts_recent_stamp = get_seconds();
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 	tp->srtt_us = 0;
 	tcp_time_wait(sk, TCP_TIME_WAIT, 0);
 }
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3dbea6610304..58a8d7d71354 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -89,7 +89,7 @@ struct tcp_sack_block {
 
 struct tcp_options_received {
 /*	PAWS/RTTM data	*/
-	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
+	int	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
 	u32	ts_recent;	/* Time stamp to echo next		*/
 	u32	rcv_tsval;	/* Time stamp value             	*/
 	u32	rcv_tsecr;	/* Time stamp echo reply        	*/
@@ -426,7 +426,7 @@ struct tcp_timewait_sock {
 	/* The time we sent the last out-of-window ACK: */
 	u32			  tw_last_oow_ack_time;
 
-	long			  tw_ts_recent_stamp;
+	int			  tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
 #endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f6cb20e6e524..582304955087 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -472,19 +472,20 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
  */
 static inline void tcp_synq_overflow(const struct sock *sk)
 {
-	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
-	unsigned long now = jiffies;
+	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int now = jiffies;
 
-	if (time_after(now, last_overflow + HZ))
+	if (time_after32(now, last_overflow + HZ))
 		tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
 }
 
 /* syncookies: no recent synqueue overflow on this listening socket? */
 static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
 {
-	unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int now = jiffies;
 
-	return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+	return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
 }
 
 static inline u32 tcp_cookie_time(void)
@@ -1375,7 +1376,8 @@ static inline bool tcp_paws_check(const struct tcp_options_received *rx_opt,
 {
 	if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
 		return true;
-	if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
+	if (unlikely(!time_before32(ktime_get_seconds(),
+				    rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)))
 		return true;
 	/*
 	 * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0,
@@ -1405,7 +1407,8 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 
 	   However, we can relax time bounds for RST segments to MSL.
 	 */
-	if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+	if (rst && !time_before32(ktime_get_seconds(),
+				  rx_opt->ts_recent_stamp + TCP_PAWS_MSL))
 		return false;
 	return true;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 814ea43dd12f..d3b6390ecf23 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3462,7 +3462,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 static void tcp_store_ts_recent(struct tcp_sock *tp)
 {
 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-	tp->rx_opt.ts_recent_stamp = get_seconds();
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 }
 
 static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bea17f1e8302..dc415c66a33a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -155,7 +155,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 	   and use initial timestamp retrieved from peer table.
 	 */
 	if (tcptw->tw_ts_recent_stamp &&
-	    (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+	    (!twp || (reuse && time_after32(ktime_get_seconds(),
+					    tcptw->tw_ts_recent_stamp)))) {
 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 		if (tp->write_seq == 0)
 			tp->write_seq = 1;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index dac5893a52b4..75ef332a7caf 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -144,7 +144,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		tw->tw_substate	  = TCP_TIME_WAIT;
 		tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (tmp_opt.saw_tstamp) {
-			tcptw->tw_ts_recent_stamp = get_seconds();
+			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
@@ -189,7 +189,7 @@ kill:
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
-			tcptw->tw_ts_recent_stamp = get_seconds();
+			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
 		}
 
 		inet_twsk_put(tw);
@@ -537,7 +537,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 	if (newtp->rx_opt.tstamp_ok) {
 		newtp->rx_opt.ts_recent = req->ts_recent;
-		newtp->rx_opt.ts_recent_stamp = get_seconds();
+		newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 		newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 	} else {
 		newtp->rx_opt.ts_recent_stamp = 0;
@@ -603,7 +603,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			 * it can be estimated (approximately)
 			 * from another data.
 			 */
-			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+			tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
 			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
 		}
 	}
-- 
cgit v1.2.3


From c749cdda9089eb1fdb6a9ab98f945124d12f2595 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 11 Jul 2018 16:04:50 +0200
Subject: net/sched: act_skbedit: don't use spinlock in the data path

use RCU instead of spin_{,un}lock_bh, to protect concurrent read/write on
act_skbedit configuration. This reduces the effects of contention in the
data path, in case multiple readers are present.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_skbedit.h |  37 ++++++++++----
 net/sched/act_skbedit.c         | 107 +++++++++++++++++++++++++---------------
 2 files changed, 95 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 19cd3d345804..911bbac838a2 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -22,14 +22,19 @@
 #include <net/act_api.h>
 #include <linux/tc_act/tc_skbedit.h>
 
+struct tcf_skbedit_params {
+	u32 flags;
+	u32 priority;
+	u32 mark;
+	u32 mask;
+	u16 queue_mapping;
+	u16 ptype;
+	struct rcu_head rcu;
+};
+
 struct tcf_skbedit {
-	struct tc_action	common;
-	u32		flags;
-	u32		priority;
-	u32		mark;
-	u32		mask;
-	u16		queue_mapping;
-	u16		ptype;
+	struct tc_action common;
+	struct tcf_skbedit_params __rcu *params;
 };
 #define to_skbedit(a) ((struct tcf_skbedit *)a)
 
@@ -37,15 +42,27 @@ struct tcf_skbedit {
 static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
-		return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+	u32 flags;
+
+	if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) {
+		rcu_read_lock();
+		flags = rcu_dereference(to_skbedit(a)->params)->flags;
+		rcu_read_unlock();
+		return flags == SKBEDIT_F_MARK;
+	}
 #endif
 	return false;
 }
 
 static inline u32 tcf_skbedit_mark(const struct tc_action *a)
 {
-	return to_skbedit(a)->mark;
+	u32 mark;
+
+	rcu_read_lock();
+	mark = rcu_dereference(to_skbedit(a)->params)->mark;
+	rcu_read_unlock();
+
+	return mark;
 }
 
 #endif /* __NET_TC_SKBEDIT_H */
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 8651b5bd6b59..da56e6938c9e 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -37,14 +37,19 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 		       struct tcf_result *res)
 {
 	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
+	int action;
 
 	tcf_lastuse_update(&d->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
 
-	spin_lock(&d->tcf_lock);
-	if (d->flags & SKBEDIT_F_PRIORITY)
-		skb->priority = d->priority;
-	if (d->flags & SKBEDIT_F_INHERITDSFIELD) {
+	rcu_read_lock();
+	params = rcu_dereference(d->params);
+	action = READ_ONCE(d->tcf_action);
+
+	if (params->flags & SKBEDIT_F_PRIORITY)
+		skb->priority = params->priority;
+	if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
 		int wlen = skb_network_offset(skb);
 
 		switch (tc_skb_protocol(skb)) {
@@ -63,23 +68,23 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 			break;
 		}
 	}
-	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
-	    skb->dev->real_num_tx_queues > d->queue_mapping)
-		skb_set_queue_mapping(skb, d->queue_mapping);
-	if (d->flags & SKBEDIT_F_MARK) {
-		skb->mark &= ~d->mask;
-		skb->mark |= d->mark & d->mask;
+	if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
+	    skb->dev->real_num_tx_queues > params->queue_mapping)
+		skb_set_queue_mapping(skb, params->queue_mapping);
+	if (params->flags & SKBEDIT_F_MARK) {
+		skb->mark &= ~params->mask;
+		skb->mark |= params->mark & params->mask;
 	}
-	if (d->flags & SKBEDIT_F_PTYPE)
-		skb->pkt_type = d->ptype;
-
-	spin_unlock(&d->tcf_lock);
-	return d->tcf_action;
+	if (params->flags & SKBEDIT_F_PTYPE)
+		skb->pkt_type = params->ptype;
 
+unlock:
+	rcu_read_unlock();
+	return action;
 err:
-	spin_unlock(&d->tcf_lock);
 	qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
-	return TC_ACT_SHOT;
+	action = TC_ACT_SHOT;
+	goto unlock;
 }
 
 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
@@ -98,6 +103,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+	struct tcf_skbedit_params *params_old, *params_new;
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
 	struct tc_skbedit *parm;
 	struct tcf_skbedit *d;
@@ -185,25 +191,34 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		}
 	}
 
-	spin_lock_bh(&d->tcf_lock);
+	ASSERT_RTNL();
 
-	d->flags = flags;
+	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+	if (unlikely(!params_new)) {
+		if (ret == ACT_P_CREATED)
+			tcf_idr_release(*a, bind);
+		return -ENOMEM;
+	}
+
+	params_new->flags = flags;
 	if (flags & SKBEDIT_F_PRIORITY)
-		d->priority = *priority;
+		params_new->priority = *priority;
 	if (flags & SKBEDIT_F_QUEUE_MAPPING)
-		d->queue_mapping = *queue_mapping;
+		params_new->queue_mapping = *queue_mapping;
 	if (flags & SKBEDIT_F_MARK)
-		d->mark = *mark;
+		params_new->mark = *mark;
 	if (flags & SKBEDIT_F_PTYPE)
-		d->ptype = *ptype;
+		params_new->ptype = *ptype;
 	/* default behaviour is to use all the bits */
-	d->mask = 0xffffffff;
+	params_new->mask = 0xffffffff;
 	if (flags & SKBEDIT_F_MASK)
-		d->mask = *mask;
+		params_new->mask = *mask;
 
 	d->tcf_action = parm->action;
-
-	spin_unlock_bh(&d->tcf_lock);
+	params_old = rtnl_dereference(d->params);
+	rcu_assign_pointer(d->params, params_new);
+	if (params_old)
+		kfree_rcu(params_old, rcu);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
@@ -215,33 +230,36 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
 	struct tc_skbedit opt = {
 		.index   = d->tcf_index,
 		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
 		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
-	struct tcf_t t;
 	u64 pure_flags = 0;
+	struct tcf_t t;
+
+	params = rtnl_dereference(d->params);
 
 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_PRIORITY) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority))
+	if ((params->flags & SKBEDIT_F_PRIORITY) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, params->priority))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
-	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping))
+	if ((params->flags & SKBEDIT_F_QUEUE_MAPPING) &&
+	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, params->queue_mapping))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_MARK) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark))
+	if ((params->flags & SKBEDIT_F_MARK) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_MARK, params->mark))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_PTYPE) &&
-	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
+	if ((params->flags & SKBEDIT_F_PTYPE) &&
+	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, params->ptype))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_MASK) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
+	if ((params->flags & SKBEDIT_F_MASK) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_MASK, params->mask))
 		goto nla_put_failure;
-	if (d->flags & SKBEDIT_F_INHERITDSFIELD)
+	if (params->flags & SKBEDIT_F_INHERITDSFIELD)
 		pure_flags |= SKBEDIT_F_INHERITDSFIELD;
 	if (pure_flags != 0 &&
 	    nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
@@ -257,6 +275,16 @@ nla_put_failure:
 	return -1;
 }
 
+static void tcf_skbedit_cleanup(struct tc_action *a)
+{
+	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
+
+	params = rcu_dereference_protected(d->params, 1);
+	if (params)
+		kfree_rcu(params, rcu);
+}
+
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 			      struct netlink_callback *cb, int type,
 			      const struct tc_action_ops *ops,
@@ -289,6 +317,7 @@ static struct tc_action_ops act_skbedit_ops = {
 	.act		=	tcf_skbedit,
 	.dump		=	tcf_skbedit_dump,
 	.init		=	tcf_skbedit_init,
+	.cleanup	=	tcf_skbedit_cleanup,
 	.walk		=	tcf_skbedit_walker,
 	.lookup		=	tcf_skbedit_search,
 	.delete		=	tcf_skbedit_delete,
-- 
cgit v1.2.3


From 3f3a89e1d7c31558c070692241e3d6146d2cf1bf Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 20 Jun 2018 07:18:03 +0200
Subject: i2c: remove i2c_lock_adapter and use i2c_lock_bus directly

The i2c_lock_adapter name is ambiguous since it is unclear if it
refers to the root adapter or the adapter you name in the argument.
The natural interpretation is the adapter you name in the argument,
but there are historical reasons for that not being the case; it
in fact locks the root adapter. Just remove the function and force
users to spell out the I2C_LOCK_ROOT_ADAPTER name to indicate what
is really going on. Also remove i2c_unlock_adapter, of course.

This patch was generated with

git grep -l 'i2c_\(un\)\?lock_adapter' \
| xargs sed -i 's/i2c_\(un\)\?lock_adapter(\([^)]*\))/'\
'i2c_\1lock_bus(\2, I2C_LOCK_ROOT_ADAPTER)/g'

followed by white-space touch-up.

Signed-off-by: Peter Rosin <peda@axentia.se>
Acked-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-brcmstb.c   |  8 ++++----
 drivers/i2c/busses/i2c-davinci.c   |  4 ++--
 drivers/i2c/busses/i2c-gpio.c      | 40 +++++++++++++++++++-------------------
 drivers/i2c/busses/i2c-s3c2410.c   |  4 ++--
 drivers/i2c/busses/i2c-sprd.c      |  8 ++++----
 drivers/i2c/i2c-core-slave.c       |  8 ++++----
 drivers/iio/temperature/mlx90614.c |  4 ++--
 include/linux/i2c.h                | 12 ------------
 8 files changed, 38 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 78792b4d6437..826d32049996 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -689,9 +689,9 @@ static int brcmstb_i2c_suspend(struct device *dev)
 {
 	struct brcmstb_i2c_dev *i2c_dev = dev_get_drvdata(dev);
 
-	i2c_lock_adapter(&i2c_dev->adapter);
+	i2c_lock_bus(&i2c_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 	i2c_dev->is_suspended = true;
-	i2c_unlock_adapter(&i2c_dev->adapter);
+	i2c_unlock_bus(&i2c_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	return 0;
 }
@@ -700,10 +700,10 @@ static int brcmstb_i2c_resume(struct device *dev)
 {
 	struct brcmstb_i2c_dev *i2c_dev = dev_get_drvdata(dev);
 
-	i2c_lock_adapter(&i2c_dev->adapter);
+	i2c_lock_bus(&i2c_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 	brcmstb_i2c_set_bsc_reg_defaults(i2c_dev);
 	i2c_dev->is_suspended = false;
-	i2c_unlock_adapter(&i2c_dev->adapter);
+	i2c_unlock_bus(&i2c_dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c
index 75d6ab177055..d945a2654c2f 100644
--- a/drivers/i2c/busses/i2c-davinci.c
+++ b/drivers/i2c/busses/i2c-davinci.c
@@ -714,14 +714,14 @@ static int i2c_davinci_cpufreq_transition(struct notifier_block *nb,
 
 	dev = container_of(nb, struct davinci_i2c_dev, freq_transition);
 
-	i2c_lock_adapter(&dev->adapter);
+	i2c_lock_bus(&dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 	if (val == CPUFREQ_PRECHANGE) {
 		davinci_i2c_reset_ctrl(dev, 0);
 	} else if (val == CPUFREQ_POSTCHANGE) {
 		i2c_davinci_calc_clk_dividers(dev);
 		davinci_i2c_reset_ctrl(dev, 1);
 	}
-	i2c_unlock_adapter(&dev->adapter);
+	i2c_unlock_bus(&dev->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
index 66f85bbf3591..20b3dd756fc7 100644
--- a/drivers/i2c/busses/i2c-gpio.c
+++ b/drivers/i2c/busses/i2c-gpio.c
@@ -78,24 +78,24 @@ static struct dentry *i2c_gpio_debug_dir;
 #define getscl(bd)	((bd)->getscl((bd)->data))
 
 #define WIRE_ATTRIBUTE(wire) \
-static int fops_##wire##_get(void *data, u64 *val)	\
-{							\
-	struct i2c_gpio_private_data *priv = data;	\
-							\
-	i2c_lock_adapter(&priv->adap);			\
-	*val = get##wire(&priv->bit_data);		\
-	i2c_unlock_adapter(&priv->adap);		\
-	return 0;					\
-}							\
-static int fops_##wire##_set(void *data, u64 val)	\
-{							\
-	struct i2c_gpio_private_data *priv = data;	\
-							\
-	i2c_lock_adapter(&priv->adap);			\
-	set##wire(&priv->bit_data, val);		\
-	i2c_unlock_adapter(&priv->adap);		\
-	return 0;					\
-}							\
+static int fops_##wire##_get(void *data, u64 *val)		\
+{								\
+	struct i2c_gpio_private_data *priv = data;		\
+								\
+	i2c_lock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);	\
+	*val = get##wire(&priv->bit_data);			\
+	i2c_unlock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);	\
+	return 0;						\
+}								\
+static int fops_##wire##_set(void *data, u64 val)		\
+{								\
+	struct i2c_gpio_private_data *priv = data;		\
+								\
+	i2c_lock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);	\
+	set##wire(&priv->bit_data, val);			\
+	i2c_unlock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);	\
+	return 0;						\
+}								\
 DEFINE_DEBUGFS_ATTRIBUTE(fops_##wire, fops_##wire##_get, fops_##wire##_set, "%llu\n")
 
 WIRE_ATTRIBUTE(scl);
@@ -113,7 +113,7 @@ static int fops_incomplete_transfer_set(void *data, u64 addr)
 	/* ADDR (7 bit) + RD (1 bit) + SDA hi (1 bit) */
 	pattern = (addr << 2) | 3;
 
-	i2c_lock_adapter(&priv->adap);
+	i2c_lock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);
 
 	/* START condition */
 	setsda(bit_data, 0);
@@ -129,7 +129,7 @@ static int fops_incomplete_transfer_set(void *data, u64 addr)
 		udelay(bit_data->udelay);
 	}
 
-	i2c_unlock_adapter(&priv->adap);
+	i2c_unlock_bus(&priv->adap, I2C_LOCK_ROOT_ADAPTER);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index 9fe2b6951895..2f2e28d60ef5 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -919,9 +919,9 @@ static int s3c24xx_i2c_cpufreq_transition(struct notifier_block *nb,
 
 	if ((val == CPUFREQ_POSTCHANGE && delta_f < 0) ||
 	    (val == CPUFREQ_PRECHANGE && delta_f > 0)) {
-		i2c_lock_adapter(&i2c->adap);
+		i2c_lock_bus(&i2c->adap, I2C_LOCK_ROOT_ADAPTER);
 		ret = s3c24xx_i2c_clockrate(i2c, &got);
-		i2c_unlock_adapter(&i2c->adap);
+		i2c_unlock_bus(&i2c->adap, I2C_LOCK_ROOT_ADAPTER);
 
 		if (ret < 0)
 			dev_err(i2c->dev, "cannot find frequency (%d)\n", ret);
diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c
index 4053259bccb8..a94e724f51dc 100644
--- a/drivers/i2c/busses/i2c-sprd.c
+++ b/drivers/i2c/busses/i2c-sprd.c
@@ -590,9 +590,9 @@ static int __maybe_unused sprd_i2c_suspend_noirq(struct device *pdev)
 {
 	struct sprd_i2c *i2c_dev = dev_get_drvdata(pdev);
 
-	i2c_lock_adapter(&i2c_dev->adap);
+	i2c_lock_bus(&i2c_dev->adap, I2C_LOCK_ROOT_ADAPTER);
 	i2c_dev->is_suspended = true;
-	i2c_unlock_adapter(&i2c_dev->adap);
+	i2c_unlock_bus(&i2c_dev->adap, I2C_LOCK_ROOT_ADAPTER);
 
 	return pm_runtime_force_suspend(pdev);
 }
@@ -601,9 +601,9 @@ static int __maybe_unused sprd_i2c_resume_noirq(struct device *pdev)
 {
 	struct sprd_i2c *i2c_dev = dev_get_drvdata(pdev);
 
-	i2c_lock_adapter(&i2c_dev->adap);
+	i2c_lock_bus(&i2c_dev->adap, I2C_LOCK_ROOT_ADAPTER);
 	i2c_dev->is_suspended = false;
-	i2c_unlock_adapter(&i2c_dev->adap);
+	i2c_unlock_bus(&i2c_dev->adap, I2C_LOCK_ROOT_ADAPTER);
 
 	return pm_runtime_force_resume(pdev);
 }
diff --git a/drivers/i2c/i2c-core-slave.c b/drivers/i2c/i2c-core-slave.c
index 4a78c65e9971..47a9f70a24a9 100644
--- a/drivers/i2c/i2c-core-slave.c
+++ b/drivers/i2c/i2c-core-slave.c
@@ -47,9 +47,9 @@ int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
 
 	client->slave_cb = slave_cb;
 
-	i2c_lock_adapter(client->adapter);
+	i2c_lock_bus(client->adapter, I2C_LOCK_ROOT_ADAPTER);
 	ret = client->adapter->algo->reg_slave(client);
-	i2c_unlock_adapter(client->adapter);
+	i2c_unlock_bus(client->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	if (ret) {
 		client->slave_cb = NULL;
@@ -69,9 +69,9 @@ int i2c_slave_unregister(struct i2c_client *client)
 		return -EOPNOTSUPP;
 	}
 
-	i2c_lock_adapter(client->adapter);
+	i2c_lock_bus(client->adapter, I2C_LOCK_ROOT_ADAPTER);
 	ret = client->adapter->algo->unreg_slave(client);
-	i2c_unlock_adapter(client->adapter);
+	i2c_unlock_bus(client->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	if (ret == 0)
 		client->slave_cb = NULL;
diff --git a/drivers/iio/temperature/mlx90614.c b/drivers/iio/temperature/mlx90614.c
index d619e8634a00..13a4cec64ea8 100644
--- a/drivers/iio/temperature/mlx90614.c
+++ b/drivers/iio/temperature/mlx90614.c
@@ -433,11 +433,11 @@ static int mlx90614_wakeup(struct mlx90614_data *data)
 
 	dev_dbg(&data->client->dev, "Requesting wake-up");
 
-	i2c_lock_adapter(data->client->adapter);
+	i2c_lock_bus(data->client->adapter, I2C_LOCK_ROOT_ADAPTER);
 	gpiod_direction_output(data->wakeup_gpio, 0);
 	msleep(MLX90614_TIMING_WAKEUP);
 	gpiod_direction_input(data->wakeup_gpio);
-	i2c_unlock_adapter(data->client->adapter);
+	i2c_unlock_bus(data->client->adapter, I2C_LOCK_ROOT_ADAPTER);
 
 	data->ready_timestamp = jiffies +
 			msecs_to_jiffies(MLX90614_TIMING_STARTUP);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 254cd34eeae2..795e3a860afe 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -754,18 +754,6 @@ i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags)
 	adapter->lock_ops->unlock_bus(adapter, flags);
 }
 
-static inline void
-i2c_lock_adapter(struct i2c_adapter *adapter)
-{
-	i2c_lock_bus(adapter, I2C_LOCK_ROOT_ADAPTER);
-}
-
-static inline void
-i2c_unlock_adapter(struct i2c_adapter *adapter)
-{
-	i2c_unlock_bus(adapter, I2C_LOCK_ROOT_ADAPTER);
-}
-
 /*flags for the client struct: */
 #define I2C_CLIENT_PEC		0x04	/* Use Packet Error Checking */
 #define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
-- 
cgit v1.2.3


From 8b7008620b8452728cadead460a36f64ed78c460 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Wed, 11 Jul 2018 14:39:42 +0200
Subject: net: Don't copy pfmemalloc flag in __copy_skb_header()

The pfmemalloc flag indicates that the skb was allocated from
the PFMEMALLOC reserves, and the flag is currently copied on skb
copy and clone.

However, an skb copied from an skb flagged with pfmemalloc
wasn't necessarily allocated from PFMEMALLOC reserves, and on
the other hand an skb allocated that way might be copied from an
skb that wasn't.

So we should not copy the flag on skb copy, and rather decide
whether to allow an skb to be associated with sockets unrelated
to page reclaim depending only on how it was allocated.

Move the pfmemalloc flag before headers_start[0] using an
existing 1-bit hole, so that __copy_skb_header() doesn't copy
it.

When cloning, we'll now take care of this flag explicitly,
contravening to the warning comment of __skb_clone().

While at it, restore the newline usage introduced by commit
b19372273164 ("net: reorganize sk_buff for faster
__copy_skb_header()") to visually separate bytes used in
bitfields after headers_start[0], that was gone after commit
a9e419dc7be6 ("netfilter: merge ctinfo into nfct pointer storage
area"), and describe the pfmemalloc flag in the kernel-doc
structure comment.

This doesn't change the size of sk_buff or cacheline boundaries,
but consolidates the 15 bits hole before tc_index into a 2 bytes
hole before csum, that could now be filled more easily.

Reported-by: Patrick Talbert <ptalbert@redhat.com>
Fixes: c93bdd0e03e8 ("netvm: allow skb allocation to use PFMEMALLOC reserves")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 +++++-----
 net/core/skbuff.c      |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 164cdedf6012..610a201126ee 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -630,6 +630,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
+ *	@pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -735,7 +736,7 @@ struct sk_buff {
 				peeked:1,
 				head_frag:1,
 				xmit_more:1,
-				__unused:1; /* one bit hole */
+				pfmemalloc:1;
 
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
@@ -754,31 +755,30 @@ struct sk_buff {
 
 	__u8			__pkt_type_offset[0];
 	__u8			pkt_type:3;
-	__u8			pfmemalloc:1;
 	__u8			ignore_df:1;
-
 	__u8			nf_trace:1;
 	__u8			ip_summed:2;
 	__u8			ooo_okay:1;
+
 	__u8			l4_hash:1;
 	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
-
 	__u8			no_fcs:1;
 	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
+
 	__u8			csum_complete_sw:1;
 	__u8			csum_level:2;
 	__u8			csum_not_inet:1;
-
 	__u8			dst_pending_confirm:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
 	__u8			ipvs_property:1;
+
 	__u8			inner_protocol_type:1;
 	__u8			remcsum_offload:1;
 #ifdef CONFIG_NET_SWITCHDEV
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eba8dae22c25..4df3164bb5fc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -858,6 +858,8 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	n->cloned = 1;
 	n->nohdr = 0;
 	n->peeked = 0;
+	if (skb->pfmemalloc)
+		n->pfmemalloc = 1;
 	n->destructor = NULL;
 	C(tail);
 	C(end);
-- 
cgit v1.2.3


From 477351f7829d2268769c5d545511081555066529 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 May 2018 12:54:11 -0700
Subject: rcu: Convert rcu_grace_period tracepoint to gp_seq

This commit makes the rcu_grace_period tracepoint use gp_seq instead
of ->gpnum or ->completed.  It also introduces a "cpuofl-bgp" string to
less obscurely indicate when a CPU has gone offline while a grace period
is waiting on it.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 13 +++++++------
 kernel/rcu/tree.c          | 43 +++++++++++++++++++++----------------------
 kernel/rcu/tree_plugin.h   |  2 +-
 3 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 5936aac357ab..cd229e82ec8b 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -52,6 +52,7 @@ TRACE_EVENT(rcu_utilization,
  *	"cpuqs": CPU passes through a quiescent state.
  *	"cpuonl": CPU comes online.
  *	"cpuofl": CPU goes offline.
+ *	"cpuofl-bgp": CPU goes offline while blocking a grace period.
  *	"reqwait": GP kthread sleeps waiting for grace-period request.
  *	"reqwaitsig": GP kthread awakened by signal from reqwait state.
  *	"fqswait": GP kthread waiting until time to force quiescent states.
@@ -63,24 +64,24 @@ TRACE_EVENT(rcu_utilization,
  */
 TRACE_EVENT(rcu_grace_period,
 
-	TP_PROTO(const char *rcuname, unsigned long gpnum, const char *gpevent),
+	TP_PROTO(const char *rcuname, unsigned long gp_seq, const char *gpevent),
 
-	TP_ARGS(rcuname, gpnum, gpevent),
+	TP_ARGS(rcuname, gp_seq, gpevent),
 
 	TP_STRUCT__entry(
 		__field(const char *, rcuname)
-		__field(unsigned long, gpnum)
+		__field(unsigned long, gp_seq)
 		__field(const char *, gpevent)
 	),
 
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
-		__entry->gpnum = gpnum;
+		__entry->gp_seq = gp_seq;
 		__entry->gpevent = gpevent;
 	),
 
 	TP_printk("%s %lu %s",
-		  __entry->rcuname, __entry->gpnum, __entry->gpevent)
+		  __entry->rcuname, __entry->gp_seq, __entry->gpevent)
 );
 
 /*
@@ -753,7 +754,7 @@ TRACE_EVENT(rcu_barrier,
 
 #else /* #ifdef CONFIG_RCU_TRACE */
 
-#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
+#define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0)
 #define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
 				      level, grplo, grphi, event) \
 				      do { } while (0)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4826598867c3..7ce85ad39af6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -234,7 +234,7 @@ void rcu_sched_qs(void)
 	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
 		return;
 	trace_rcu_grace_period(TPS("rcu_sched"),
-			       __this_cpu_read(rcu_sched_data.gpnum),
+			       __this_cpu_read(rcu_sched_data.gp_seq),
 			       TPS("cpuqs"));
 	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
 	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
@@ -249,7 +249,7 @@ void rcu_bh_qs(void)
 	RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");
 	if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_bh"),
-				       __this_cpu_read(rcu_bh_data.gpnum),
+				       __this_cpu_read(rcu_bh_data.gp_seq),
 				       TPS("cpuqs"));
 		__this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
 	}
@@ -1615,7 +1615,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 		trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
 		goto unlock_out;
 	}
-	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
+	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), TPS("newreq"));
 	ret = true;  /* Caller must wake GP kthread. */
 unlock_out:
 	/* Push furthest requested GP to leaf node and rcu_data structure. */
@@ -1702,9 +1702,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 
 	/* Trace depending on how much we were able to accelerate. */
 	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
-		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
+		trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccWaitCB"));
 	else
-		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
+		trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccReadyCB"));
 	return ret;
 }
 
@@ -1774,7 +1774,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
 		 * go looking for one.
 		 */
 		rdp->gpnum = rnp->gpnum;
-		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
+		trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpustart"));
 		need_gp = !!(rnp->qsmask & rdp->grpmask);
 		rdp->cpu_no_qs.b.norm = need_gp;
 		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
@@ -1851,7 +1851,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
 	rcu_seq_start(&rsp->gp_seq);
 	if (WARN_ON_ONCE(((rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT) != rcu_seq_ctr(rnp->gp_seq))) /* Catch any ->completed/->gp_seq mismatches. */
 		pr_info("%s ->completed: %#lx (%#lx) ->gp_seq %#lx (%#lx)\n", __func__, rnp->completed, (rnp->completed << RCU_SEQ_CTR_SHIFT) >> RCU_SEQ_CTR_SHIFT, rnp->gp_seq, rcu_seq_ctr(rnp->gp_seq));
-	trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
+	trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("start"));
 	raw_spin_unlock_irq_rcu_node(rnp);
 
 	/*
@@ -1928,7 +1928,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
 		if (rnp == rdp->mynode)
 			(void)__note_gp_changes(rsp, rnp, rdp);
 		rcu_preempt_boost_start_gp(rnp);
-		trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
+		trace_rcu_grace_period_init(rsp->name, rnp->gp_seq,
 					    rnp->level, rnp->grplo,
 					    rnp->grphi, rnp->qsmask);
 		raw_spin_unlock_irq_rcu_node(rnp);
@@ -2048,7 +2048,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 	/* Declare grace period done. */
 	WRITE_ONCE(rsp->completed, rsp->gpnum);
 	rcu_seq_end(&rsp->gp_seq);
-	trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
+	trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("end"));
 	rsp->gp_state = RCU_GP_IDLE;
 	/* Check for GP requests since above loop. */
 	rdp = this_cpu_ptr(rsp->rda);
@@ -2061,7 +2061,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 	if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
 		WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
 		rsp->gp_req_activity = jiffies;
-		trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
+		trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq),
 				       TPS("newreq"));
 	} else {
 		WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
@@ -2087,7 +2087,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 		/* Handle grace-period start. */
 		for (;;) {
 			trace_rcu_grace_period(rsp->name,
-					       READ_ONCE(rsp->gpnum),
+					       READ_ONCE(rsp->gp_seq),
 					       TPS("reqwait"));
 			rsp->gp_state = RCU_GP_WAIT_GPS;
 			swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) &
@@ -2100,7 +2100,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 			WRITE_ONCE(rsp->gp_activity, jiffies);
 			WARN_ON(signal_pending(current));
 			trace_rcu_grace_period(rsp->name,
-					       READ_ONCE(rsp->gpnum),
+					       READ_ONCE(rsp->gp_seq),
 					       TPS("reqwaitsig"));
 		}
 
@@ -2119,7 +2119,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 					   jiffies + 3 * j);
 			}
 			trace_rcu_grace_period(rsp->name,
-					       READ_ONCE(rsp->gpnum),
+					       READ_ONCE(rsp->gp_seq),
 					       TPS("fqswait"));
 			rsp->gp_state = RCU_GP_WAIT_FQS;
 			ret = swait_event_idle_timeout(rsp->gp_wq,
@@ -2134,12 +2134,12 @@ static int __noreturn rcu_gp_kthread(void *arg)
 			if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
 			    (gf & RCU_GP_FLAG_FQS)) {
 				trace_rcu_grace_period(rsp->name,
-						       READ_ONCE(rsp->gpnum),
+						       READ_ONCE(rsp->gp_seq),
 						       TPS("fqsstart"));
 				rcu_gp_fqs(rsp, first_gp_fqs);
 				first_gp_fqs = false;
 				trace_rcu_grace_period(rsp->name,
-						       READ_ONCE(rsp->gpnum),
+						       READ_ONCE(rsp->gp_seq),
 						       TPS("fqsend"));
 				cond_resched_tasks_rcu_qs();
 				WRITE_ONCE(rsp->gp_activity, jiffies);
@@ -2158,7 +2158,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 				WRITE_ONCE(rsp->gp_activity, jiffies);
 				WARN_ON(signal_pending(current));
 				trace_rcu_grace_period(rsp->name,
-						       READ_ONCE(rsp->gpnum),
+						       READ_ONCE(rsp->gp_seq),
 						       TPS("fqswaitsig"));
 				ret = 1; /* Keep old FQS timing. */
 				j = jiffies;
@@ -2388,17 +2388,16 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
  */
 static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
 {
-	RCU_TRACE(unsigned long mask;)
+	RCU_TRACE(bool blkd;)
 	RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);)
 	RCU_TRACE(struct rcu_node *rnp = rdp->mynode;)
 
 	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
 		return;
 
-	RCU_TRACE(mask = rdp->grpmask;)
-	trace_rcu_grace_period(rsp->name,
-			       rnp->gpnum + 1 - !!(rnp->qsmask & mask),
-			       TPS("cpuofl"));
+	RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);)
+	trace_rcu_grace_period(rsp->name, rnp->gp_seq,
+			       blkd ? TPS("cpuofl") : TPS("cpuofl-bgp"));
 }
 
 /*
@@ -3538,7 +3537,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->core_needs_qs = false;
 	rdp->rcu_iw_pending = false;
 	rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
-	trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
+	trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuonl"));
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 }
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index ca73931f7b30..aca9d187c509 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -301,7 +301,7 @@ static void rcu_preempt_qs(void)
 	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
 	if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_preempt"),
-				       __this_cpu_read(rcu_data_p->gpnum),
+				       __this_cpu_read(rcu_data_p->gp_seq),
 				       TPS("cpuqs"));
 		__this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
 		barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
-- 
cgit v1.2.3


From abd13fdd9516e5baae2257721b921684ecb090d3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 May 2018 13:08:46 -0700
Subject: rcu: Convert rcu_future_grace_period tracepoint to gp_seq

This commit makes the rcu_future_grace_period tracepoint use gp_seq
instead of ->gpnum and ->completed.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 22 +++++++++-------------
 kernel/rcu/tree.c          |  7 +++----
 2 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index cd229e82ec8b..286047d22314 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -103,16 +103,14 @@ TRACE_EVENT(rcu_grace_period,
  */
 TRACE_EVENT(rcu_future_grace_period,
 
-	TP_PROTO(const char *rcuname, unsigned long gpnum, unsigned long completed,
-		 unsigned long c, u8 level, int grplo, int grphi,
-		 const char *gpevent),
+	TP_PROTO(const char *rcuname, unsigned long gp_seq, unsigned long c,
+		 u8 level, int grplo, int grphi, const char *gpevent),
 
-	TP_ARGS(rcuname, gpnum, completed, c, level, grplo, grphi, gpevent),
+	TP_ARGS(rcuname, gp_seq, c, level, grplo, grphi, gpevent),
 
 	TP_STRUCT__entry(
 		__field(const char *, rcuname)
-		__field(unsigned long, gpnum)
-		__field(unsigned long, completed)
+		__field(unsigned long, gp_seq)
 		__field(unsigned long, c)
 		__field(u8, level)
 		__field(int, grplo)
@@ -122,8 +120,7 @@ TRACE_EVENT(rcu_future_grace_period,
 
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
-		__entry->gpnum = gpnum;
-		__entry->completed = completed;
+		__entry->gp_seq = gp_seq;
 		__entry->c = c;
 		__entry->level = level;
 		__entry->grplo = grplo;
@@ -131,10 +128,9 @@ TRACE_EVENT(rcu_future_grace_period,
 		__entry->gpevent = gpevent;
 	),
 
-	TP_printk("%s %lu %lu %lu %u %d %d %s",
-		  __entry->rcuname, __entry->gpnum, __entry->completed,
-		  __entry->c, __entry->level, __entry->grplo, __entry->grphi,
-		  __entry->gpevent)
+	TP_printk("%s %lu %lu %u %d %d %s",
+		  __entry->rcuname, __entry->gp_seq, __entry->c, __entry->level,
+		  __entry->grplo, __entry->grphi, __entry->gpevent)
 );
 
 /*
@@ -755,7 +751,7 @@ TRACE_EVENT(rcu_barrier,
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0)
-#define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
+#define trace_rcu_future_grace_period(rcuname, gp_seq, c, \
 				      level, grplo, grphi, event) \
 				      do { } while (0)
 #define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7ce85ad39af6..066dbaacec30 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1552,9 +1552,8 @@ void rcu_cpu_stall_reset(void)
 static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 			      unsigned long c, const char *s)
 {
-	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
-				      rnp->completed, c, rnp->level,
-				      rnp->grplo, rnp->grphi, s);
+	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gp_seq, c,
+				      rnp->level, rnp->grplo, rnp->grphi, s);
 }
 
 /*
@@ -2053,7 +2052,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 	/* Check for GP requests since above loop. */
 	rdp = this_cpu_ptr(rsp->rda);
 	if (ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
-		trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
+		trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed,
 				  TPS("CleanupMore"));
 		needgp = true;
 	}
-- 
cgit v1.2.3


From 63d86a7e85f84b8ac3b2f394570965aedbb03787 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 May 2018 13:08:46 -0700
Subject: rcu: Convert rcu_grace_period_init tracepoint to gp_seq

This commit makes the rcu_grace_period_init tracepoint use gp_seq instead
of ->gpnum.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 286047d22314..892016ad0647 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -142,14 +142,14 @@ TRACE_EVENT(rcu_future_grace_period,
  */
 TRACE_EVENT(rcu_grace_period_init,
 
-	TP_PROTO(const char *rcuname, unsigned long gpnum, u8 level,
+	TP_PROTO(const char *rcuname, unsigned long gp_seq, u8 level,
 		 int grplo, int grphi, unsigned long qsmask),
 
-	TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask),
+	TP_ARGS(rcuname, gp_seq, level, grplo, grphi, qsmask),
 
 	TP_STRUCT__entry(
 		__field(const char *, rcuname)
-		__field(unsigned long, gpnum)
+		__field(unsigned long, gp_seq)
 		__field(u8, level)
 		__field(int, grplo)
 		__field(int, grphi)
@@ -158,7 +158,7 @@ TRACE_EVENT(rcu_grace_period_init,
 
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
-		__entry->gpnum = gpnum;
+		__entry->gp_seq = gp_seq;
 		__entry->level = level;
 		__entry->grplo = grplo;
 		__entry->grphi = grphi;
@@ -166,7 +166,7 @@ TRACE_EVENT(rcu_grace_period_init,
 	),
 
 	TP_printk("%s %lu %u %d %d %lx",
-		  __entry->rcuname, __entry->gpnum, __entry->level,
+		  __entry->rcuname, __entry->gp_seq, __entry->level,
 		  __entry->grplo, __entry->grphi, __entry->qsmask)
 );
 
@@ -754,7 +754,7 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_future_grace_period(rcuname, gp_seq, c, \
 				      level, grplo, grphi, event) \
 				      do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+#define trace_rcu_grace_period_init(rcuname, gp_seq, level, grplo, grphi, \
 				    qsmask) do { } while (0)
 #define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \
 	do { } while (0)
-- 
cgit v1.2.3


From 598ce09480efb6b48799df60c66bac70bea5ef54 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 May 2018 13:35:20 -0700
Subject: rcu: Convert rcu_preempt_task tracepoint to ->gp_seq

This commit makes the rcu_preempt_task tracepoint use ->gp_seq instead
of ->gpnum.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 12 ++++++------
 kernel/rcu/tree_plugin.h   |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 892016ad0647..38dbd97d65a3 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -298,24 +298,24 @@ TRACE_EVENT(rcu_nocb_wake,
  */
 TRACE_EVENT(rcu_preempt_task,
 
-	TP_PROTO(const char *rcuname, int pid, unsigned long gpnum),
+	TP_PROTO(const char *rcuname, int pid, unsigned long gp_seq),
 
-	TP_ARGS(rcuname, pid, gpnum),
+	TP_ARGS(rcuname, pid, gp_seq),
 
 	TP_STRUCT__entry(
 		__field(const char *, rcuname)
-		__field(unsigned long, gpnum)
+		__field(unsigned long, gp_seq)
 		__field(int, pid)
 	),
 
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
-		__entry->gpnum = gpnum;
+		__entry->gp_seq = gp_seq;
 		__entry->pid = pid;
 	),
 
 	TP_printk("%s %lu %d",
-		  __entry->rcuname, __entry->gpnum, __entry->pid)
+		  __entry->rcuname, __entry->gp_seq, __entry->pid)
 );
 
 /*
@@ -761,7 +761,7 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \
 	do { } while (0)
 #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
-#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
+#define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
 					 grplo, grphi, gp_tasks) do { } \
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index aca9d187c509..02ca3b4e6a8f 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -350,8 +350,8 @@ static void rcu_preempt_note_context_switch(bool preempt)
 		trace_rcu_preempt_task(rdp->rsp->name,
 				       t->pid,
 				       (rnp->qsmask & rdp->grpmask)
-				       ? rnp->gpnum
-				       : rnp->gpnum + 1);
+				       ? rnp->gp_seq
+				       : rcu_seq_snap(&rnp->gp_seq));
 		rcu_preempt_ctxt_queue(rnp, rdp);
 	} else if (t->rcu_read_lock_nesting < 0 &&
 		   t->rcu_read_unlock_special.s) {
-- 
cgit v1.2.3


From 865aa1e08d8aefdfd1f5d30ecfce1b8ef8cd520a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 May 2018 13:35:20 -0700
Subject: rcu: Convert rcu_unlock_preempted_task tracepoint to ->gp_seq

This commit makes the rcu_unlock_preempted_task tracepoint use ->gp_seq
instead of ->gpnum.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 12 ++++++------
 kernel/rcu/tree_plugin.h   |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 38dbd97d65a3..95b7491196aa 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -325,23 +325,23 @@ TRACE_EVENT(rcu_preempt_task,
  */
 TRACE_EVENT(rcu_unlock_preempted_task,
 
-	TP_PROTO(const char *rcuname, unsigned long gpnum, int pid),
+	TP_PROTO(const char *rcuname, unsigned long gp_seq, int pid),
 
-	TP_ARGS(rcuname, gpnum, pid),
+	TP_ARGS(rcuname, gp_seq, pid),
 
 	TP_STRUCT__entry(
 		__field(const char *, rcuname)
-		__field(unsigned long, gpnum)
+		__field(unsigned long, gp_seq)
 		__field(int, pid)
 	),
 
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
-		__entry->gpnum = gpnum;
+		__entry->gp_seq = gp_seq;
 		__entry->pid = pid;
 	),
 
-	TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid)
+	TP_printk("%s %lu %d", __entry->rcuname, __entry->gp_seq, __entry->pid)
 );
 
 /*
@@ -762,7 +762,7 @@ TRACE_EVENT(rcu_barrier,
 	do { } while (0)
 #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0)
-#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
+#define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0)
 #define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
 					 grplo, grphi, gp_tasks) do { } \
 	while (0)
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 02ca3b4e6a8f..a10b0e26ce19 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -545,7 +545,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 		list_del_init(&t->rcu_node_entry);
 		t->rcu_blocked_node = NULL;
 		trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
-						rnp->gpnum, t->pid);
+						rnp->gp_seq, t->pid);
 		if (&t->rcu_node_entry == rnp->gp_tasks)
 			rnp->gp_tasks = np;
 		if (&t->rcu_node_entry == rnp->exp_tasks)
@@ -708,7 +708,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 		t = container_of(rnp->gp_tasks, struct task_struct,
 				 rcu_node_entry);
 		trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
-						rnp->gpnum, t->pid);
+						rnp->gp_seq, t->pid);
 	}
 	WARN_ON_ONCE(rnp->qsmask);
 }
-- 
cgit v1.2.3


From db023296f0115d2fe01fdabad54678f2b806da23 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 May 2018 13:35:20 -0700
Subject: rcu: Convert rcu_quiescent_state_report tracepoint to ->gp_seq

This commit makes the rcu_quiescent_state_report tracepoint use ->gp_seq
instead of ->gpnum.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 12 ++++++------
 kernel/rcu/tree.c          |  2 +-
 kernel/rcu/tree_plugin.h   |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 95b7491196aa..ac4d9d4a1ebf 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -354,15 +354,15 @@ TRACE_EVENT(rcu_unlock_preempted_task,
  */
 TRACE_EVENT(rcu_quiescent_state_report,
 
-	TP_PROTO(const char *rcuname, unsigned long gpnum,
+	TP_PROTO(const char *rcuname, unsigned long gp_seq,
 		 unsigned long mask, unsigned long qsmask,
 		 u8 level, int grplo, int grphi, int gp_tasks),
 
-	TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks),
+	TP_ARGS(rcuname, gp_seq, mask, qsmask, level, grplo, grphi, gp_tasks),
 
 	TP_STRUCT__entry(
 		__field(const char *, rcuname)
-		__field(unsigned long, gpnum)
+		__field(unsigned long, gp_seq)
 		__field(unsigned long, mask)
 		__field(unsigned long, qsmask)
 		__field(u8, level)
@@ -373,7 +373,7 @@ TRACE_EVENT(rcu_quiescent_state_report,
 
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
-		__entry->gpnum = gpnum;
+		__entry->gp_seq = gp_seq;
 		__entry->mask = mask;
 		__entry->qsmask = qsmask;
 		__entry->level = level;
@@ -383,7 +383,7 @@ TRACE_EVENT(rcu_quiescent_state_report,
 	),
 
 	TP_printk("%s %lu %lx>%lx %u %d %d %u",
-		  __entry->rcuname, __entry->gpnum,
+		  __entry->rcuname, __entry->gp_seq,
 		  __entry->mask, __entry->qsmask, __entry->level,
 		  __entry->grplo, __entry->grphi, __entry->gp_tasks)
 );
@@ -763,7 +763,7 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gp_seq) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gp_seq, pid) do { } while (0)
-#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
+#define trace_rcu_quiescent_state_report(rcuname, gp_seq, mask, qsmask, level, \
 					 grplo, grphi, gp_tasks) do { } \
 	while (0)
 #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 066dbaacec30..7c6c11d5479c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2229,7 +2229,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
 		WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
 			     rcu_preempt_blocked_readers_cgp(rnp));
 		rnp->qsmask &= ~mask;
-		trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
+		trace_rcu_quiescent_state_report(rsp->name, rnp->gp_seq,
 						 mask, rnp->qsmask, rnp->level,
 						 rnp->grplo, rnp->grphi,
 						 !!rnp->gp_tasks);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a10b0e26ce19..c8a2c7760121 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -566,7 +566,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 		empty_exp_now = sync_rcu_preempt_exp_done(rnp);
 		if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
 			trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
-							 rnp->gpnum,
+							 rnp->gp_seq,
 							 0, rnp->qsmask,
 							 rnp->level,
 							 rnp->grplo,
-- 
cgit v1.2.3


From fee5997c17562e95fb1fecc142efb2da0934baa4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 May 2018 13:35:20 -0700
Subject: rcu: Convert rcu_fqs tracepoint to ->gp_seq

This commit makes the rcu_fqs tracepoint use ->gp_seq instead of ->gpnum.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 12 ++++++------
 kernel/rcu/tree.c          |  8 ++++----
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index ac4d9d4a1ebf..7d3650cc9d30 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -398,26 +398,26 @@ TRACE_EVENT(rcu_quiescent_state_report,
  */
 TRACE_EVENT(rcu_fqs,
 
-	TP_PROTO(const char *rcuname, unsigned long gpnum, int cpu, const char *qsevent),
+	TP_PROTO(const char *rcuname, unsigned long gp_seq, int cpu, const char *qsevent),
 
-	TP_ARGS(rcuname, gpnum, cpu, qsevent),
+	TP_ARGS(rcuname, gp_seq, cpu, qsevent),
 
 	TP_STRUCT__entry(
 		__field(const char *, rcuname)
-		__field(unsigned long, gpnum)
+		__field(unsigned long, gp_seq)
 		__field(int, cpu)
 		__field(const char *, qsevent)
 	),
 
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
-		__entry->gpnum = gpnum;
+		__entry->gp_seq = gp_seq;
 		__entry->cpu = cpu;
 		__entry->qsevent = qsevent;
 	),
 
 	TP_printk("%s %lu %d %s",
-		  __entry->rcuname, __entry->gpnum,
+		  __entry->rcuname, __entry->gp_seq,
 		  __entry->cpu, __entry->qsevent)
 );
 
@@ -766,7 +766,7 @@ TRACE_EVENT(rcu_barrier,
 #define trace_rcu_quiescent_state_report(rcuname, gp_seq, mask, qsmask, level, \
 					 grplo, grphi, gp_tasks) do { } \
 	while (0)
-#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
+#define trace_rcu_fqs(rcuname, gp_seq, cpu, qsevent) do { } while (0)
 #define trace_rcu_dyntick(polarity, oldnesting, newnesting, dyntick) do { } while (0)
 #define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0)
 #define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 7c6c11d5479c..42e89d4f1e33 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1111,7 +1111,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
 {
 	rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
 	if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
-		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
+		trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti"));
 		rcu_gpnum_ovf(rdp->mynode, rdp);
 		return 1;
 	}
@@ -1161,7 +1161,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	 * of the current RCU grace period.
 	 */
 	if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) {
-		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
+		trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti"));
 		rdp->dynticks_fqs++;
 		rcu_gpnum_ovf(rnp, rdp);
 		return 1;
@@ -1178,7 +1178,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
 	    READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
 	    rcu_seq_current(&rdp->gp_seq) == rnp->gp_seq && !rdp->gpwrap) {
-		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc"));
+		trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("rqc"));
 		rcu_gpnum_ovf(rnp, rdp);
 		return 1;
 	} else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {
@@ -1188,7 +1188,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 
 	/* Check for the CPU being offline. */
 	if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) {
-		trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("ofl"));
+		trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("ofl"));
 		rdp->offline_fqs++;
 		rcu_gpnum_ovf(rnp, rdp);
 		return 1;
-- 
cgit v1.2.3


From a2165e416878b325747f871df4b236b49bf61486 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 12 May 2018 07:42:20 -0700
Subject: rcu: Don't funnel-lock above leaf node if GP in progress

The old grace-period start code would acquire only the leaf's rcu_node
structure's ->lock if that structure believed that a grace period was
in progress.  The new code advances to the leaf's parent in this case,
needlessly acquiring then leaf's parent's ->lock.  This commit therefore
checks the grace-period state after marking the leaf with the need for
the specified grace period, and if the leaf believes that a grace period
is in progress, takes an early exit.

Reported-by: Joel Fernandes <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Add "Startedleaf" tracing as suggested by Joel Fernandes. ]
---
 include/trace/events/rcu.h | 4 ++--
 kernel/rcu/tree.c          | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 7d3650cc9d30..5ab1df0a2801 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -91,8 +91,8 @@ TRACE_EVENT(rcu_grace_period,
  *
  * "Startleaf": Request a grace period based on leaf-node data.
  * "Prestarted": Someone beat us to the request
- * "Startedleaf": Leaf-node start proved sufficient.
- * "Startedleafroot": Leaf-node start proved sufficient after checking root.
+ * "Startedleaf": Leaf node marked for future GP.
+ * "Startedleafroot": All nodes from leaf to root marked for future GP.
  * "Startedroot": Requested a nocb grace period based on root-node data.
  * "NoGPkthread": The RCU grace-period kthread has not yet started.
  * "StartWait": Start waiting for the requested grace period.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5643c135fb06..24a79e85b81f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1590,6 +1590,15 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 			goto unlock_out;
 		}
 		rnp_root->gp_seq_needed = c;
+		if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) {
+			/*
+			 * We just marked the leaf, and a grace period
+			 * is in progress, which means that rcu_gp_cleanup()
+			 * will see the marking.  Bail to reduce contention.
+			 */
+			trace_rcu_this_gp(rnp, rdp, c, TPS("Startedleaf"));
+			goto unlock_out;
+		}
 		if (rnp_root != rnp && rnp_root->parent != NULL)
 			raw_spin_unlock_rcu_node(rnp_root);
 		if (!rnp_root->parent)
-- 
cgit v1.2.3


From b73de91d6a4c97ed586b2a5a6ce7c6fe395d9a3b Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Sun, 20 May 2018 21:42:18 -0700
Subject: rcu: Rename the grace-period-request variables and parameters

The name 'c' is used for variables and parameters holding the requested
grace-period sequence number.  However it is no longer very meaningful
given the conversions from ->gpnum and (especially) ->completed to
->gp_seq. This commit therefore renames 'c' to 'gp_seq_req'.

Previous patch discussion is at:
https://patchwork.kernel.org/patch/10396579/

Signed-off-by: Joel Fernandes <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 15 ++++++++-------
 kernel/rcu/tree.c          | 46 +++++++++++++++++++++++++++-------------------
 2 files changed, 35 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 5ab1df0a2801..759e7e83733d 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -103,15 +103,16 @@ TRACE_EVENT(rcu_grace_period,
  */
 TRACE_EVENT(rcu_future_grace_period,
 
-	TP_PROTO(const char *rcuname, unsigned long gp_seq, unsigned long c,
-		 u8 level, int grplo, int grphi, const char *gpevent),
+	TP_PROTO(const char *rcuname, unsigned long gp_seq,
+		 unsigned long gp_seq_req, u8 level, int grplo, int grphi,
+		 const char *gpevent),
 
-	TP_ARGS(rcuname, gp_seq, c, level, grplo, grphi, gpevent),
+	TP_ARGS(rcuname, gp_seq, gp_seq_req, level, grplo, grphi, gpevent),
 
 	TP_STRUCT__entry(
 		__field(const char *, rcuname)
 		__field(unsigned long, gp_seq)
-		__field(unsigned long, c)
+		__field(unsigned long, gp_seq_req)
 		__field(u8, level)
 		__field(int, grplo)
 		__field(int, grphi)
@@ -121,7 +122,7 @@ TRACE_EVENT(rcu_future_grace_period,
 	TP_fast_assign(
 		__entry->rcuname = rcuname;
 		__entry->gp_seq = gp_seq;
-		__entry->c = c;
+		__entry->gp_seq_req = gp_seq_req;
 		__entry->level = level;
 		__entry->grplo = grplo;
 		__entry->grphi = grphi;
@@ -129,7 +130,7 @@ TRACE_EVENT(rcu_future_grace_period,
 	),
 
 	TP_printk("%s %lu %lu %u %d %d %s",
-		  __entry->rcuname, __entry->gp_seq, __entry->c, __entry->level,
+		  __entry->rcuname, __entry->gp_seq, __entry->gp_seq_req, __entry->level,
 		  __entry->grplo, __entry->grphi, __entry->gpevent)
 );
 
@@ -751,7 +752,7 @@ TRACE_EVENT(rcu_barrier,
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gp_seq, gpevent) do { } while (0)
-#define trace_rcu_future_grace_period(rcuname, gp_seq, c, \
+#define trace_rcu_future_grace_period(rcuname, gp_seq, gp_seq_req, \
 				      level, grplo, grphi, event) \
 				      do { } while (0)
 #define trace_rcu_grace_period_init(rcuname, gp_seq, level, grplo, grphi, \
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 161e8fb8b83f..8c31b1740afc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1546,13 +1546,18 @@ void rcu_cpu_stall_reset(void)
 
 /* Trace-event wrapper function for trace_rcu_future_grace_period.  */
 static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-			      unsigned long c, const char *s)
+			      unsigned long gp_seq_req, const char *s)
 {
-	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gp_seq, c,
+	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gp_seq, gp_seq_req,
 				      rnp->level, rnp->grplo, rnp->grphi, s);
 }
 
 /*
+ * rcu_start_this_gp - Request the start of a particular grace period
+ * @rnp: The leaf node of the CPU from which to start.
+ * @rdp: The rcu_data corresponding to the CPU from which to start.
+ * @gp_seq_req: The gp_seq of the grace period to start.
+ *
  * Start the specified grace period, as needed to handle newly arrived
  * callbacks.  The required future grace periods are recorded in each
  * rcu_node structure's ->gp_seq_needed field.  Returns true if there
@@ -1560,9 +1565,11 @@ static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
  *
  * The caller must hold the specified rcu_node structure's ->lock, which
  * is why the caller is responsible for waking the grace-period kthread.
+ *
+ * Returns true if the GP thread needs to be awakened else false.
  */
 static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-			      unsigned long c)
+			      unsigned long gp_seq_req)
 {
 	bool ret = false;
 	struct rcu_state *rsp = rdp->rsp;
@@ -1578,25 +1585,27 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 	 * not be released.
 	 */
 	raw_lockdep_assert_held_rcu_node(rnp);
-	trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf"));
+	trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startleaf"));
 	for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
 		if (rnp_root != rnp)
 			raw_spin_lock_rcu_node(rnp_root);
-		if (ULONG_CMP_GE(rnp_root->gp_seq_needed, c) ||
-		    rcu_seq_started(&rnp_root->gp_seq, c) ||
+		if (ULONG_CMP_GE(rnp_root->gp_seq_needed, gp_seq_req) ||
+		    rcu_seq_started(&rnp_root->gp_seq, gp_seq_req) ||
 		    (rnp != rnp_root &&
 		     rcu_seq_state(rcu_seq_current(&rnp_root->gp_seq)))) {
-			trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
+			trace_rcu_this_gp(rnp_root, rdp, gp_seq_req,
+					  TPS("Prestarted"));
 			goto unlock_out;
 		}
-		rnp_root->gp_seq_needed = c;
+		rnp_root->gp_seq_needed = gp_seq_req;
 		if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) {
 			/*
 			 * We just marked the leaf, and a grace period
 			 * is in progress, which means that rcu_gp_cleanup()
 			 * will see the marking.  Bail to reduce contention.
 			 */
-			trace_rcu_this_gp(rnp, rdp, c, TPS("Startedleaf"));
+			trace_rcu_this_gp(rnp, rdp, gp_seq_req,
+					  TPS("Startedleaf"));
 			goto unlock_out;
 		}
 		if (rnp_root != rnp && rnp_root->parent != NULL)
@@ -1607,21 +1616,21 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 
 	/* If GP already in progress, just leave, otherwise start one. */
 	if (rcu_gp_in_progress(rsp)) {
-		trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));
+		trace_rcu_this_gp(rnp_root, rdp, gp_seq_req, TPS("Startedleafroot"));
 		goto unlock_out;
 	}
-	trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
+	trace_rcu_this_gp(rnp_root, rdp, gp_seq_req, TPS("Startedroot"));
 	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
 	rsp->gp_req_activity = jiffies;
 	if (!rsp->gp_kthread) {
-		trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
+		trace_rcu_this_gp(rnp_root, rdp, gp_seq_req, TPS("NoGPkthread"));
 		goto unlock_out;
 	}
 	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), TPS("newreq"));
 	ret = true;  /* Caller must wake GP kthread. */
 unlock_out:
 	/* Push furthest requested GP to leaf node and rcu_data structure. */
-	if (ULONG_CMP_LT(c, rnp_root->gp_seq_needed)) {
+	if (ULONG_CMP_LT(gp_seq_req, rnp_root->gp_seq_needed)) {
 		rnp->gp_seq_needed = rnp_root->gp_seq_needed;
 		rdp->gp_seq_needed = rnp_root->gp_seq_needed;
 	}
@@ -1636,14 +1645,13 @@ unlock_out:
  */
 static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
-	unsigned long c = rnp->gp_seq;
 	bool needmore;
 	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
 
 	needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);
 	if (!needmore)
 		rnp->gp_seq_needed = rnp->gp_seq; /* Avoid counter wrap. */
-	trace_rcu_this_gp(rnp, rdp, c,
+	trace_rcu_this_gp(rnp, rdp, rnp->gp_seq,
 			  needmore ? TPS("CleanupMore") : TPS("Cleanup"));
 	return needmore;
 }
@@ -1679,7 +1687,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
 static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 			       struct rcu_data *rdp)
 {
-	unsigned long c;
+	unsigned long gp_seq_req;
 	bool ret = false;
 
 	raw_lockdep_assert_held_rcu_node(rnp);
@@ -1698,9 +1706,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 	 * accelerating callback invocation to an earlier grace-period
 	 * number.
 	 */
-	c = rcu_seq_snap(&rsp->gp_seq);
-	if (rcu_segcblist_accelerate(&rdp->cblist, c))
-		ret = rcu_start_this_gp(rnp, rdp, c);
+	gp_seq_req = rcu_seq_snap(&rsp->gp_seq);
+	if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req))
+		ret = rcu_start_this_gp(rnp, rdp, gp_seq_req);
 
 	/* Trace depending on how much we were able to accelerate. */
 	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
-- 
cgit v1.2.3


From e05121ba5b81e2f85349f038642410578457f6db Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 7 May 2018 12:07:48 -0700
Subject: rcu: Remove CPU-hotplug failsafe from force-quiescent-state code path

Now that quiescent states for newly offlined CPUs are reported either
when that CPU goes offline or at the end of grace-period initialization,
the CPU-hotplug failsafe in the force-quiescent-state code path is no
longer needed.

This commit therefore removes this failsafe.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 10 +++++-----
 kernel/rcu/tree.c          |  9 +--------
 kernel/rcu/tree.h          |  1 -
 3 files changed, 6 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 759e7e83733d..a8d07feff6a0 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -391,11 +391,11 @@ TRACE_EVENT(rcu_quiescent_state_report,
 
 /*
  * Tracepoint for quiescent states detected by force_quiescent_state().
- * These trace events include the type of RCU, the grace-period number that
- * was blocked by the CPU, the CPU itself, and the type of quiescent state,
- * which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, "kick"
- * when kicking a CPU that has been in dyntick-idle mode for too long, or
- * "rqc" if the CPU got a quiescent state via its rcu_qs_ctr.
+ * These trace events include the type of RCU, the grace-period number
+ * that was blocked by the CPU, the CPU itself, and the type of quiescent
+ * state, which can be "dti" for dyntick-idle mode, "kick" when kicking
+ * a CPU that has been in dyntick-idle mode for too long, or "rqc" if the
+ * CPU got a quiescent state via its rcu_qs_ctr.
  */
 TRACE_EVENT(rcu_fqs,
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 770f0df54015..5f1a11f1f7bc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1188,14 +1188,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 		smp_store_release(ruqp, true);
 	}
 
-	/* Check for the CPU being offline. */
-	if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp))) {
-		trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("ofl"));
-		rdp->offline_fqs++;
-		rcu_gpnum_ovf(rnp, rdp);
-		return 1;
-	}
-
 	/*
 	 * A CPU running for an extended time within the kernel can
 	 * delay RCU grace periods.  When the CPU is in NO_HZ_FULL mode,
@@ -3718,6 +3710,7 @@ void rcu_cpu_starting(unsigned int cpu)
 		nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
 		/* Allow lockless access for expedited grace periods. */
 		smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */
+		rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
 		if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
 			/* Report QS -after- changing ->qsmaskinitnext! */
 			rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6683da6e4ecc..795d469c6f67 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -217,7 +217,6 @@ struct rcu_data {
 
 	/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
 	unsigned long dynticks_fqs;	/* Kicked due to dynticks idle. */
-	unsigned long offline_fqs;	/* Kicked due to being offline. */
 	unsigned long cond_resched_completed;
 					/* Grace period that needs help */
 					/*  from cond_resched(). */
-- 
cgit v1.2.3


From 3949fa9bac090ad217534c30bc3b6572289abf21 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 May 2018 15:29:10 -0700
Subject: rcu: Make rcu_read_unlock_special() static

Because rcu_read_unlock_special() is no longer used outside of
kernel/rcu/tree_plugin.h, this commit makes it static.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 1 -
 kernel/rcu/tree_plugin.h | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 65163aa0bb04..67ec077c7ee5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -64,7 +64,6 @@ void rcu_barrier_tasks(void);
 
 void __rcu_read_lock(void);
 void __rcu_read_unlock(void);
-void rcu_read_unlock_special(struct task_struct *t);
 void synchronize_rcu(void);
 
 /*
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 613372246a07..54a251640f53 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -127,6 +127,7 @@ static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
 
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake);
+static void rcu_read_unlock_special(struct task_struct *t);
 
 /*
  * Tell them what RCU they are running.
@@ -461,7 +462,7 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
  * notify RCU core processing or task having blocked during the RCU
  * read-side critical section.
  */
-void rcu_read_unlock_special(struct task_struct *t)
+static void rcu_read_unlock_special(struct task_struct *t)
 {
 	bool empty_exp;
 	bool empty_norm;
-- 
cgit v1.2.3


From 07f27570dcd148a5f4de7dc3513c1d1cd069b362 Mon Sep 17 00:00:00 2001
From: Byungchul Park <byungchul.park@lge.com>
Date: Fri, 11 May 2018 17:30:34 +0900
Subject: rcu: Improve rcu_note_voluntary_context_switch() reporting

We expect a quiescent state of TASKS_RCU when cond_resched_tasks_rcu_qs()
is called, no matter whether it actually be scheduled or not. However,
it currently doesn't report the quiescent state when the task enters
into __schedule() as it's called with preempt = true. So make it report
the quiescent state unconditionally when cond_resched_tasks_rcu_qs() is
called.

And in TINY_RCU, even though the quiescent state of rcu_bh also should
be reported when the tick interrupt comes from user, it doesn't. So make
it reported.

Lastly in TREE_RCU, rcu_note_voluntary_context_switch() should be
reported when the tick interrupt comes from not only user but also idle,
as an extended quiescent state.

Signed-off-by: Byungchul Park <byungchul.park@lge.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Simplify rcutiny portion given no RCU-tasks for !PREEMPT. ]
---
 include/linux/rcupdate.h | 4 ++--
 kernel/rcu/tiny.c        | 4 +---
 kernel/rcu/tree.c        | 4 ++--
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 67ec077c7ee5..5cab15e7ec83 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -194,8 +194,8 @@ static inline void exit_tasks_rcu_finish(void) { }
  */
 #define cond_resched_tasks_rcu_qs() \
 do { \
-	if (!cond_resched()) \
-		rcu_note_voluntary_context_switch_lite(current); \
+	rcu_note_voluntary_context_switch_lite(current); \
+	cond_resched(); \
 } while (0)
 
 /*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index a64eee0db39e..befc9321a89c 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -122,10 +122,8 @@ void rcu_check_callbacks(int user)
 {
 	if (user)
 		rcu_sched_qs();
-	else if (!in_softirq())
+	if (user || !in_softirq())
 		rcu_bh_qs();
-	if (user)
-		rcu_note_voluntary_context_switch(current);
 }
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d3333ee2c6f5..19beabe73629 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2645,6 +2645,7 @@ void rcu_check_callbacks(int user)
 
 		rcu_sched_qs();
 		rcu_bh_qs();
+		rcu_note_voluntary_context_switch(current);
 
 	} else if (!in_softirq()) {
 
@@ -2660,8 +2661,7 @@ void rcu_check_callbacks(int user)
 	rcu_preempt_check_callbacks();
 	if (rcu_pending())
 		invoke_rcu_core();
-	if (user)
-		rcu_note_voluntary_context_switch(current);
+
 	trace_rcu_utilization(TPS("End scheduler-tick"));
 }
 
-- 
cgit v1.2.3


From 1445e9175bead409bb9930f3c745246a09f22cf6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 7 May 2018 06:35:46 -0300
Subject: rcu: rcupdate.h: Get rid of Sphinx warnings at rcu_pointer_handoff()

The code example at rcupdate.h currently produce lots of warnings:

	./include/linux/rcupdate.h:572: WARNING: Unexpected indentation.
	./include/linux/rcupdate.h:576: WARNING: Unexpected indentation.
	./include/linux/rcupdate.h:580: WARNING: Block quote ends without a blank line; unexpected unindent.
	./include/linux/rcupdate.h:582: WARNING: Block quote ends without a blank line; unexpected unindent.
	./include/linux/rcupdate.h:582: WARNING: Inline literal start-string without end-string.

This commit therefore changes it to a code-block.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5cab15e7ec83..dacc90358b33 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -566,8 +566,8 @@ static inline void rcu_preempt_sleep_check(void) { }
  * This is simply an identity function, but it documents where a pointer
  * is handed off from RCU to some other synchronization mechanism, for
  * example, reference counting or locking.  In C11, it would map to
- * kill_dependency().  It could be used as follows:
- * ``
+ * kill_dependency().  It could be used as follows::
+ *
  *	rcu_read_lock();
  *	p = rcu_dereference(gp);
  *	long_lived = is_long_lived(p);
@@ -578,7 +578,6 @@ static inline void rcu_preempt_sleep_check(void) { }
  *			p = rcu_pointer_handoff(p);
  *	}
  *	rcu_read_unlock();
- *``
  */
 #define rcu_pointer_handoff(p) (p)
 
-- 
cgit v1.2.3


From 6f56f714db067056c80f5d71510118f82872e34c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 14 May 2018 13:52:27 -0700
Subject: rcu: Improve RCU-tasks naming and comments

The naming and comments associated with some RCU-tasks code make
the faulty assumption that context switches due to cond_resched()
are voluntary.  As several people pointed out, this is not the case.
This commit therefore updates function names and comments to better
reflect current reality.

Reported-by: Byungchul Park <byungchul.park@lge.com>
Reported-by: Joel Fernandes <joel@joelfernandes.org>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 12 ++++++------
 include/linux/rcutiny.h  |  2 +-
 kernel/rcu/tree.c        |  2 +-
 kernel/rcu/update.c      | 27 ++++++++++++++-------------
 4 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dacc90358b33..75e5b393cf44 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -158,11 +158,11 @@ static inline void rcu_init_nohz(void) { }
 	} while (0)
 
 /*
- * Note a voluntary context switch for RCU-tasks benefit.  This is a
- * macro rather than an inline function to avoid #include hell.
+ * Note a quasi-voluntary context switch for RCU-tasks's benefit.
+ * This is a macro rather than an inline function to avoid #include hell.
  */
 #ifdef CONFIG_TASKS_RCU
-#define rcu_note_voluntary_context_switch_lite(t) \
+#define rcu_tasks_qs(t) \
 	do { \
 		if (READ_ONCE((t)->rcu_tasks_holdout)) \
 			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
@@ -170,14 +170,14 @@ static inline void rcu_init_nohz(void) { }
 #define rcu_note_voluntary_context_switch(t) \
 	do { \
 		rcu_all_qs(); \
-		rcu_note_voluntary_context_switch_lite(t); \
+		rcu_tasks_qs(t); \
 	} while (0)
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void exit_tasks_rcu_start(void);
 void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU */
-#define rcu_note_voluntary_context_switch_lite(t)	do { } while (0)
+#define rcu_tasks_qs(t)	do { } while (0)
 #define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
 #define call_rcu_tasks call_rcu_sched
 #define synchronize_rcu_tasks synchronize_sched
@@ -194,7 +194,7 @@ static inline void exit_tasks_rcu_finish(void) { }
  */
 #define cond_resched_tasks_rcu_qs() \
 do { \
-	rcu_note_voluntary_context_switch_lite(current); \
+	rcu_tasks_qs(current); \
 	cond_resched(); \
 } while (0)
 
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7b3c82e8a625..8d9a0ea8f0b5 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -93,7 +93,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 #define rcu_note_context_switch(preempt) \
 	do { \
 		rcu_sched_qs(); \
-		rcu_note_voluntary_context_switch_lite(current); \
+		rcu_tasks_qs(current); \
 	} while (0)
 
 static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 6f2922168216..ccc061acf887 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -457,7 +457,7 @@ void rcu_note_context_switch(bool preempt)
 		rcu_momentary_dyntick_idle();
 	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	if (!preempt)
-		rcu_note_voluntary_context_switch_lite(current);
+		rcu_tasks_qs(current);
 out:
 	trace_rcu_utilization(TPS("End context switch"));
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4c230a60ece4..5783bdf86e5a 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -507,14 +507,15 @@ early_initcall(check_cpu_stall_init);
 #ifdef CONFIG_TASKS_RCU
 
 /*
- * Simple variant of RCU whose quiescent states are voluntary context switch,
- * user-space execution, and idle.  As such, grace periods can take one good
- * long time.  There are no read-side primitives similar to rcu_read_lock()
- * and rcu_read_unlock() because this implementation is intended to get
- * the system into a safe state for some of the manipulations involved in
- * tracing and the like.  Finally, this implementation does not support
- * high call_rcu_tasks() rates from multiple CPUs.  If this is required,
- * per-CPU callback lists will be needed.
+ * Simple variant of RCU whose quiescent states are voluntary context
+ * switch, cond_resched_rcu_qs(), user-space execution, and idle.
+ * As such, grace periods can take one good long time.  There are no
+ * read-side primitives similar to rcu_read_lock() and rcu_read_unlock()
+ * because this implementation is intended to get the system into a safe
+ * state for some of the manipulations involved in tracing and the like.
+ * Finally, this implementation does not support high call_rcu_tasks()
+ * rates from multiple CPUs.  If this is required, per-CPU callback lists
+ * will be needed.
  */
 
 /* Global list of callbacks and associated lock. */
@@ -542,11 +543,11 @@ static struct task_struct *rcu_tasks_kthread_ptr;
  * period elapses, in other words after all currently executing RCU
  * read-side critical sections have completed. call_rcu_tasks() assumes
  * that the read-side critical sections end at a voluntary context
- * switch (not a preemption!), entry into idle, or transition to usermode
- * execution.  As such, there are no read-side primitives analogous to
- * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
- * to determine that all tasks have passed through a safe state, not so
- * much for data-strcuture synchronization.
+ * switch (not a preemption!), cond_resched_rcu_qs(), entry into idle,
+ * or transition to usermode execution.  As such, there are no read-side
+ * primitives analogous to rcu_read_lock() and rcu_read_unlock() because
+ * this primitive is intended to determine that all tasks have passed
+ * through a safe state, not so much for data-strcuture synchronization.
  *
  * See the description of call_rcu() for more detailed information on
  * memory ordering guarantees.
-- 
cgit v1.2.3


From b7b6f94cf6e6d961f78064315b6f5de5d9c6414b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 18 Jun 2018 14:22:40 +1000
Subject: rculist: Improve documentation for list_for_each_entry_from_rcu()

Unfortunately the patch for adding list_for_each_entry_from_rcu()
wasn't the final patch after all review.  It is functionally
correct but the documentation was incomplete.

This patch adds this missing documentation which includes an update to
the documentation for list_for_each_entry_continue_rcu() to match the
documentation for the new list_for_each_entry_from_rcu(), and adds
list_for_each_entry_from_rcu() and the already existing
hlist_for_each_entry_from_rcu() to section 7 of whatisRCU.txt.

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/whatisRCU.txt |  2 ++
 include/linux/rculist.h         | 19 ++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 94288f1b8759..c2a7facf7ff9 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -820,11 +820,13 @@ RCU list traversal:
 	list_next_rcu
 	list_for_each_entry_rcu
 	list_for_each_entry_continue_rcu
+	list_for_each_entry_from_rcu
 	hlist_first_rcu
 	hlist_next_rcu
 	hlist_pprev_rcu
 	hlist_for_each_entry_rcu
 	hlist_for_each_entry_rcu_bh
+	hlist_for_each_entry_from_rcu
 	hlist_for_each_entry_continue_rcu
 	hlist_for_each_entry_continue_rcu_bh
 	hlist_nulls_first_rcu
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 36df6ccbc874..4786c2235b98 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -396,7 +396,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @member:	the name of the list_head within the struct.
  *
  * Continue to iterate over list of given type, continuing after
- * the current position.
+ * the current position which must have been in the list when the RCU read
+ * lock was taken.
+ * This would typically require either that you obtained the node from a
+ * previous walk of the list in the same RCU read-side critical section, or
+ * that you held some sort of non-RCU reference (such as a reference count)
+ * to keep the node alive *and* in the list.
+ *
+ * This iterator is similar to list_for_each_entry_from_rcu() except
+ * this starts after the given position and that one starts at the given
+ * position.
  */
 #define list_for_each_entry_continue_rcu(pos, head, member) 		\
 	for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
@@ -411,6 +420,14 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  *
  * Iterate over the tail of a list starting from a given position,
  * which must have been in the list when the RCU read lock was taken.
+ * This would typically require either that you obtained the node from a
+ * previous walk of the list in the same RCU read-side critical section, or
+ * that you held some sort of non-RCU reference (such as a reference count)
+ * to keep the node alive *and* in the list.
+ *
+ * This iterator is similar to list_for_each_entry_continue_rcu() except
+ * this starts from the given position and that one starts from the position
+ * after the given position.
  */
 #define list_for_each_entry_from_rcu(pos, head, member)			\
 	for (; &(pos)->member != (head);					\
-- 
cgit v1.2.3


From 3025520ec424df8b0fd5cdc319ad6b83406d9954 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 22 May 2018 11:38:47 -0700
Subject: rcutorture: Use per-CPU random state for rcu_torture_timer()

Currently, the rcu_torture_timer() function uses a single global
torture_random_state structure protected by a single global lock.
This conflicts to some extent with performance and scalability,
but even more with the goal of consolidating read-side testing
with rcu_torture_reader().  This commit therefore creates a per-CPU
torture_random_state structure for use by rcu_torture_timer() and
eliminates the lock.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Make rcu_torture_timer_rand static, per 0day Test Robot report. ]
---
 include/linux/torture.h |  2 ++
 kernel/rcu/rcutorture.c | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index a55e80817dae..61dfd93b6ee4 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -64,6 +64,8 @@ struct torture_random_state {
 	long trs_count;
 };
 #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 }
+#define DEFINE_TORTURE_RANDOM_PERCPU(name) \
+	DEFINE_PER_CPU(struct torture_random_state, name)
 unsigned long torture_random(struct torture_random_state *trsp);
 
 /* Task shuffler, which causes CPUs to occasionally go idle. */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 2452e4a29923..d5a5465d2507 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1143,6 +1143,8 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
 	return true;
 }
 
+static DEFINE_TORTURE_RANDOM_PERCPU(rcu_torture_timer_rand);
+
 /*
  * RCU torture reader from timer handler.  Dereferences rcu_torture_current,
  * incrementing the corresponding element of the pipeline array.  The
@@ -1154,12 +1156,12 @@ static void rcu_torture_timer(struct timer_list *unused)
 	int idx;
 	unsigned long started;
 	unsigned long completed;
-	static DEFINE_TORTURE_RANDOM(rand);
-	static DEFINE_SPINLOCK(rand_lock);
 	struct rcu_torture *p;
 	int pipe_count;
+	struct torture_random_state *trsp;
 	unsigned long long ts;
 
+	trsp = this_cpu_ptr(&rcu_torture_timer_rand);
 	atomic_long_inc(&n_rcu_torture_timers);
 	idx = cur_ops->readlock();
 	started = cur_ops->get_gp_seq();
@@ -1176,9 +1178,7 @@ static void rcu_torture_timer(struct timer_list *unused)
 	}
 	if (p->rtort_mbtest == 0)
 		atomic_inc(&n_rcu_torture_mberror);
-	spin_lock(&rand_lock);
-	cur_ops->read_delay(&rand);
-	spin_unlock(&rand_lock);
+	cur_ops->read_delay(trsp);
 	preempt_disable();
 	pipe_count = p->rtort_pipe_count;
 	if (pipe_count > RCU_TORTURE_PIPE_LEN) {
-- 
cgit v1.2.3


From b16ebe925a4400a2ec3dc663c81dce2fd9bf0998 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:08 +0300
Subject: devlink: Add support for creating and destroying regions

This allows a device to register its supported address regions.
Each address region can be accessed directly for example reading
the snapshots taken of this address space.
Drivers are not limited in the name selection for different regions.
An example of a region-name can be: pci cr-space, register-space.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 22 ++++++++++++++
 net/core/devlink.c    | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index f67c29cede15..e5397652f2fb 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -28,6 +28,7 @@ struct devlink {
 	struct list_head dpipe_table_list;
 	struct list_head resource_list;
 	struct list_head param_list;
+	struct list_head region_list;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -397,6 +398,8 @@ enum devlink_param_generic_id {
 	.validate = _validate,						\
 }
 
+struct devlink_region;
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -543,6 +546,11 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val);
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
+struct devlink_region *devlink_region_create(struct devlink *devlink,
+					     const char *region_name,
+					     u32 region_max_snapshots,
+					     u64 region_size);
+void devlink_region_destroy(struct devlink_region *region);
 
 #else
 
@@ -770,6 +778,20 @@ devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
 }
 
+static inline struct devlink_region *
+devlink_region_create(struct devlink *devlink,
+		      const char *region_name,
+		      u32 region_max_snapshots,
+		      u64 region_size)
+{
+	return NULL;
+}
+
+static inline void
+devlink_region_destroy(struct devlink_region *region)
+{
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 470f3dbfecfe..cac856136ac6 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -326,6 +326,28 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
 						  pool_type, p_tc_index);
 }
 
+struct devlink_region {
+	struct devlink *devlink;
+	struct list_head list;
+	const char *name;
+	struct list_head snapshot_list;
+	u32 max_snapshots;
+	u32 cur_snapshots;
+	u64 size;
+};
+
+static struct devlink_region *
+devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry(region, &devlink->region_list, list)
+		if (!strcmp(region->name, region_name))
+			return region;
+
+	return NULL;
+}
+
 #define DEVLINK_NL_FLAG_NEED_DEVLINK	BIT(0)
 #define DEVLINK_NL_FLAG_NEED_PORT	BIT(1)
 #define DEVLINK_NL_FLAG_NEED_SB		BIT(2)
@@ -3358,6 +3380,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
 	INIT_LIST_HEAD(&devlink->resource_list);
 	INIT_LIST_HEAD(&devlink->param_list);
+	INIT_LIST_HEAD(&devlink->region_list);
 	mutex_init(&devlink->lock);
 	return devlink;
 }
@@ -4109,6 +4132,67 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 }
 EXPORT_SYMBOL_GPL(devlink_param_value_changed);
 
+/**
+ *	devlink_region_create - create a new address region
+ *
+ *	@devlink: devlink
+ *	@region_name: region name
+ *	@region_max_snapshots: Maximum supported number of snapshots for region
+ *	@region_size: size of region
+ */
+struct devlink_region *devlink_region_create(struct devlink *devlink,
+					     const char *region_name,
+					     u32 region_max_snapshots,
+					     u64 region_size)
+{
+	struct devlink_region *region;
+	int err = 0;
+
+	mutex_lock(&devlink->lock);
+
+	if (devlink_region_get_by_name(devlink, region_name)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	region->devlink = devlink;
+	region->max_snapshots = region_max_snapshots;
+	region->name = region_name;
+	region->size = region_size;
+	INIT_LIST_HEAD(&region->snapshot_list);
+	list_add_tail(&region->list, &devlink->region_list);
+
+	mutex_unlock(&devlink->lock);
+	return region;
+
+unlock:
+	mutex_unlock(&devlink->lock);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devlink_region_create);
+
+/**
+ *	devlink_region_destroy - destroy address region
+ *
+ *	@region: devlink region to destroy
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+	struct devlink *devlink = region->devlink;
+
+	mutex_lock(&devlink->lock);
+	list_del(&region->list);
+	mutex_unlock(&devlink->lock);
+	kfree(region);
+}
+EXPORT_SYMBOL_GPL(devlink_region_destroy);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
-- 
cgit v1.2.3


From ccadfa444b34c6ec7bb458eee17fdd8c9a456c63 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:09 +0300
Subject: devlink: Add callback to query for snapshot id before snapshot create

To restrict the driver with the snapshot ID selection a new callback
is introduced for the driver to get the snapshot ID before creating
a new snapshot. This will also allow giving the same ID for multiple
snapshots taken of different regions on the same time.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h |  8 ++++++++
 net/core/devlink.c    | 21 +++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index e5397652f2fb..f27d8593687a 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -29,6 +29,7 @@ struct devlink {
 	struct list_head resource_list;
 	struct list_head param_list;
 	struct list_head region_list;
+	u32 snapshot_id;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -551,6 +552,7 @@ struct devlink_region *devlink_region_create(struct devlink *devlink,
 					     u32 region_max_snapshots,
 					     u64 region_size);
 void devlink_region_destroy(struct devlink_region *region);
+u32 devlink_region_shapshot_id_get(struct devlink *devlink);
 
 #else
 
@@ -792,6 +794,12 @@ devlink_region_destroy(struct devlink_region *region)
 {
 }
 
+static inline u32
+devlink_region_shapshot_id_get(struct devlink *devlink)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index cac856136ac6..6c92ddd2465d 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4193,6 +4193,27 @@ void devlink_region_destroy(struct devlink_region *region)
 }
 EXPORT_SYMBOL_GPL(devlink_region_destroy);
 
+/**
+ *	devlink_region_shapshot_id_get - get snapshot ID
+ *
+ *	This callback should be called when adding a new snapshot,
+ *	Driver should use the same id for multiple snapshots taken
+ *	on multiple regions at the same time/by the same trigger.
+ *
+ *	@devlink: devlink
+ */
+u32 devlink_region_shapshot_id_get(struct devlink *devlink)
+{
+	u32 id;
+
+	mutex_lock(&devlink->lock);
+	id = ++devlink->snapshot_id;
+	mutex_unlock(&devlink->lock);
+
+	return id;
+}
+EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
-- 
cgit v1.2.3


From d7e5272282d93bedbbeb6174b8af8425d7dcfd6f Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:10 +0300
Subject: devlink: Add support for creating region snapshots

Each device address region can store multiple snapshots,
each snapshot is identified using a different numerical ID.
This ID is used when deleting a snapshot or showing an address
region specific snapshot. This patch exposes a callback to add
a new snapshot to an address region.
The snapshot will be deleted using the destructor function
when destroying a region or when a snapshot delete command
from devlink user tool.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 13 +++++++
 net/core/devlink.c    | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index f27d8593687a..905f0bb7b4ba 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -401,6 +401,8 @@ enum devlink_param_generic_id {
 
 struct devlink_region;
 
+typedef void devlink_snapshot_data_dest_t(const void *data);
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -553,6 +555,9 @@ struct devlink_region *devlink_region_create(struct devlink *devlink,
 					     u64 region_size);
 void devlink_region_destroy(struct devlink_region *region);
 u32 devlink_region_shapshot_id_get(struct devlink *devlink);
+int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+				   u8 *data, u32 snapshot_id,
+				   devlink_snapshot_data_dest_t *data_destructor);
 
 #else
 
@@ -800,6 +805,14 @@ devlink_region_shapshot_id_get(struct devlink *devlink)
 	return 0;
 }
 
+static inline int
+devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+			       u8 *data, u32 snapshot_id,
+			       devlink_snapshot_data_dest_t *data_destructor)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6c92ddd2465d..7d09fe60fa4b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -336,6 +336,15 @@ struct devlink_region {
 	u64 size;
 };
 
+struct devlink_snapshot {
+	struct list_head list;
+	struct devlink_region *region;
+	devlink_snapshot_data_dest_t *data_destructor;
+	u64 data_len;
+	u8 *data;
+	u32 id;
+};
+
 static struct devlink_region *
 devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
 {
@@ -348,6 +357,26 @@ devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
 	return NULL;
 }
 
+static struct devlink_snapshot *
+devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
+{
+	struct devlink_snapshot *snapshot;
+
+	list_for_each_entry(snapshot, &region->snapshot_list, list)
+		if (snapshot->id == id)
+			return snapshot;
+
+	return NULL;
+}
+
+static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot)
+{
+	snapshot->region->cur_snapshots--;
+	list_del(&snapshot->list);
+	(*snapshot->data_destructor)(snapshot->data);
+	kfree(snapshot);
+}
+
 #define DEVLINK_NL_FLAG_NEED_DEVLINK	BIT(0)
 #define DEVLINK_NL_FLAG_NEED_PORT	BIT(1)
 #define DEVLINK_NL_FLAG_NEED_SB		BIT(2)
@@ -4185,8 +4214,14 @@ EXPORT_SYMBOL_GPL(devlink_region_create);
 void devlink_region_destroy(struct devlink_region *region)
 {
 	struct devlink *devlink = region->devlink;
+	struct devlink_snapshot *snapshot, *ts;
 
 	mutex_lock(&devlink->lock);
+
+	/* Free all snapshots of region */
+	list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
+		devlink_region_snapshot_del(snapshot);
+
 	list_del(&region->list);
 	mutex_unlock(&devlink->lock);
 	kfree(region);
@@ -4214,6 +4249,66 @@ u32 devlink_region_shapshot_id_get(struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
 
+/**
+ *	devlink_region_snapshot_create - create a new snapshot
+ *	This will add a new snapshot of a region. The snapshot
+ *	will be stored on the region struct and can be accessed
+ *	from devlink. This is useful for future	analyses of snapshots.
+ *	Multiple snapshots can be created on a region.
+ *	The @snapshot_id should be obtained using the getter function.
+ *
+ *	@devlink_region: devlink region of the snapshot
+ *	@data_len: size of snapshot data
+ *	@data: snapshot data
+ *	@snapshot_id: snapshot id to be created
+ *	@data_destructor: pointer to destructor function to free data
+ */
+int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+				   u8 *data, u32 snapshot_id,
+				   devlink_snapshot_data_dest_t *data_destructor)
+{
+	struct devlink *devlink = region->devlink;
+	struct devlink_snapshot *snapshot;
+	int err;
+
+	mutex_lock(&devlink->lock);
+
+	/* check if region can hold one more snapshot */
+	if (region->cur_snapshots == region->max_snapshots) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+	if (!snapshot) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	snapshot->id = snapshot_id;
+	snapshot->region = region;
+	snapshot->data = data;
+	snapshot->data_len = data_len;
+	snapshot->data_destructor = data_destructor;
+
+	list_add_tail(&snapshot->list, &region->snapshot_list);
+
+	region->cur_snapshots++;
+
+	mutex_unlock(&devlink->lock);
+	return 0;
+
+unlock:
+	mutex_unlock(&devlink->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
-- 
cgit v1.2.3


From d8db7ea55f2ff5890ad31137233a3808d80c7f62 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:11 +0300
Subject: devlink: Add support for region get command

Add support for DEVLINK_CMD_REGION_GET command which is used for
querying for the supported DEV/REGION values of devlink devices.
The support is both for doit and dumpit.

Reply includes:
  BUS_NAME, DEVICE_NAME, REGION_NAME, REGION_SIZE

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |   6 +++
 net/core/devlink.c           | 114 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 68641fb56654..28bfa8aa3d91 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -83,6 +83,9 @@ enum devlink_command {
 	DEVLINK_CMD_PARAM_NEW,
 	DEVLINK_CMD_PARAM_DEL,
 
+	DEVLINK_CMD_REGION_GET,
+	DEVLINK_CMD_REGION_SET,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -262,6 +265,9 @@ enum devlink_attr {
 	DEVLINK_ATTR_PARAM_VALUE_DATA,		/* dynamic */
 	DEVLINK_ATTR_PARAM_VALUE_CMODE,		/* u8 */
 
+	DEVLINK_ATTR_REGION_NAME,               /* string */
+	DEVLINK_ATTR_REGION_SIZE,               /* u64 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 7d09fe60fa4b..221ddb6bae48 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3149,6 +3149,111 @@ static void devlink_param_unregister_one(struct devlink *devlink,
 	kfree(param_item);
 }
 
+static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
+				  enum devlink_command cmd, u32 portid,
+				  u32 seq, int flags,
+				  struct devlink_region *region)
+{
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->name);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+				region->size,
+				DEVLINK_ATTR_PAD);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
+					  struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_region *region;
+	const char *region_name;
+	struct sk_buff *msg;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_REGION_NAME])
+		return -EINVAL;
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
+				     info->snd_portid, info->snd_seq, 0,
+				     region);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
+					    struct netlink_callback *cb)
+{
+	struct devlink_region *region;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(region, &devlink->region_list, list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_region_fill(msg, devlink,
+						     DEVLINK_CMD_REGION_GET,
+						     NETLINK_CB(cb->skb).portid,
+						     cb->nlh->nlmsg_seq,
+						     NLM_F_MULTI, region);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+	cb->args[0] = idx;
+	return msg->len;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -3172,6 +3277,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -3370,6 +3476,14 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_REGION_GET,
+		.doit = devlink_nl_cmd_region_get_doit,
+		.dumpit = devlink_nl_cmd_region_get_dumpit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From a006d467fbf1d405e73cd167829d7a9e3df600e3 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:12 +0300
Subject: devlink: Extend the support querying for region snapshot IDs

Extend the support for DEVLINK_CMD_REGION_GET command to also
return the IDs of the snapshot currently present on the region.
Each reply will include a nested snapshots attribute that
can contain multiple snapshot attributes each with an ID.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  3 +++
 net/core/devlink.c           | 53 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 28bfa8aa3d91..abde4e306375 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -267,6 +267,9 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_REGION_NAME,               /* string */
 	DEVLINK_ATTR_REGION_SIZE,               /* u64 */
+	DEVLINK_ATTR_REGION_SNAPSHOTS,          /* nested */
+	DEVLINK_ATTR_REGION_SNAPSHOT,           /* nested */
+	DEVLINK_ATTR_REGION_SNAPSHOT_ID,        /* u32 */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 221ddb6bae48..cb75e26d70ff 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3149,6 +3149,55 @@ static void devlink_param_unregister_one(struct devlink *devlink,
 	kfree(param_item);
 }
 
+static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
+					     struct devlink *devlink,
+					     struct devlink_snapshot *snapshot)
+{
+	struct nlattr *snap_attr;
+	int err;
+
+	snap_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
+	if (!snap_attr)
+		return -EINVAL;
+
+	err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, snap_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, snap_attr);
+	return err;
+}
+
+static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
+					      struct devlink *devlink,
+					      struct devlink_region *region)
+{
+	struct devlink_snapshot *snapshot;
+	struct nlattr *snapshots_attr;
+	int err;
+
+	snapshots_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOTS);
+	if (!snapshots_attr)
+		return -EINVAL;
+
+	list_for_each_entry(snapshot, &region->snapshot_list, list) {
+		err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
+		if (err)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(msg, snapshots_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, snapshots_attr);
+	return err;
+}
+
 static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
 				  enum devlink_command cmd, u32 portid,
 				  u32 seq, int flags,
@@ -3175,6 +3224,10 @@ static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
 	if (err)
 		goto nla_put_failure;
 
+	err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
+	if (err)
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
-- 
cgit v1.2.3


From 866319bb9437614407ca36f8b16f89ab77a6a831 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:13 +0300
Subject: devlink: Add support for region snapshot delete command

Add support for DEVLINK_CMD_REGION_DEL used
for deleting a snapshot from a region. The snapshot ID is required.
Also added notification support for NEW and DEL of snapshots.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  2 +
 net/core/devlink.c           | 93 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index abde4e306375..d212e02f843f 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -85,6 +85,8 @@ enum devlink_command {
 
 	DEVLINK_CMD_REGION_GET,
 	DEVLINK_CMD_REGION_SET,
+	DEVLINK_CMD_REGION_NEW,
+	DEVLINK_CMD_REGION_DEL,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index cb75e26d70ff..fc0836371a71 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3236,6 +3236,58 @@ nla_put_failure:
 	return err;
 }
 
+static void devlink_nl_region_notify(struct devlink_region *region,
+				     struct devlink_snapshot *snapshot,
+				     enum devlink_command cmd)
+{
+	struct devlink *devlink = region->devlink;
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+	if (!hdr)
+		goto out_free_msg;
+
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto out_cancel_msg;
+
+	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
+			     region->name);
+	if (err)
+		goto out_cancel_msg;
+
+	if (snapshot) {
+		err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+				  snapshot->id);
+		if (err)
+			goto out_cancel_msg;
+	} else {
+		err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+					region->size, DEVLINK_ATTR_PAD);
+		if (err)
+			goto out_cancel_msg;
+	}
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+
+	return;
+
+out_cancel_msg:
+	genlmsg_cancel(msg, hdr);
+out_free_msg:
+	nlmsg_free(msg);
+}
+
 static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
 					  struct genl_info *info)
 {
@@ -3307,6 +3359,35 @@ out:
 	return msg->len;
 }
 
+static int devlink_nl_cmd_region_del(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_snapshot *snapshot;
+	struct devlink_region *region;
+	const char *region_name;
+	u32 snapshot_id;
+
+	if (!info->attrs[DEVLINK_ATTR_REGION_NAME] ||
+	    !info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+		return -EINVAL;
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+	snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		return -EINVAL;
+
+	snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+	if (!snapshot)
+		return -EINVAL;
+
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+	devlink_region_snapshot_del(snapshot);
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -3331,6 +3412,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -3537,6 +3619,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_REGION_DEL,
+		.doit = devlink_nl_cmd_region_del,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
@@ -4363,6 +4452,7 @@ struct devlink_region *devlink_region_create(struct devlink *devlink,
 	region->size = region_size;
 	INIT_LIST_HEAD(&region->snapshot_list);
 	list_add_tail(&region->list, &devlink->region_list);
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
 
 	mutex_unlock(&devlink->lock);
 	return region;
@@ -4390,6 +4480,8 @@ void devlink_region_destroy(struct devlink_region *region)
 		devlink_region_snapshot_del(snapshot);
 
 	list_del(&region->list);
+
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
 	mutex_unlock(&devlink->lock);
 	kfree(region);
 }
@@ -4467,6 +4559,7 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
 
 	region->cur_snapshots++;
 
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
 	mutex_unlock(&devlink->lock);
 	return 0;
 
-- 
cgit v1.2.3


From 4e54795a27f56102649f121a34b8445e42f79ccd Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:14 +0300
Subject: devlink: Add support for region snapshot read command

Add support for DEVLINK_CMD_REGION_READ_GET used for both reading
and dumping region data. Read allows reading from a region specific
address for given length. Dump allows reading the full region.
If only snapshot ID is provided a snapshot dump will be done.
If snapshot ID, Address and Length are provided a snapshot read
will done.

This is used for both snapshot access and will be used in the same
way to access current data on the region.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |   7 ++
 net/core/devlink.c           | 182 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 189 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index d212e02f843f..79407bbd296d 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -87,6 +87,7 @@ enum devlink_command {
 	DEVLINK_CMD_REGION_SET,
 	DEVLINK_CMD_REGION_NEW,
 	DEVLINK_CMD_REGION_DEL,
+	DEVLINK_CMD_REGION_READ,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
@@ -273,6 +274,12 @@ enum devlink_attr {
 	DEVLINK_ATTR_REGION_SNAPSHOT,           /* nested */
 	DEVLINK_ATTR_REGION_SNAPSHOT_ID,        /* u32 */
 
+	DEVLINK_ATTR_REGION_CHUNKS,             /* nested */
+	DEVLINK_ATTR_REGION_CHUNK,              /* nested */
+	DEVLINK_ATTR_REGION_CHUNK_DATA,         /* binary */
+	DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
+	DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index fc0836371a71..e5118dba6bb4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3388,6 +3388,181 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb,
 	return 0;
 }
 
+static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
+						 struct devlink *devlink,
+						 u8 *chunk, u32 chunk_size,
+						 u64 addr)
+{
+	struct nlattr *chunk_attr;
+	int err;
+
+	chunk_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_CHUNK);
+	if (!chunk_attr)
+		return -EINVAL;
+
+	err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
+				DEVLINK_ATTR_PAD);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, chunk_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, chunk_attr);
+	return err;
+}
+
+#define DEVLINK_REGION_READ_CHUNK_SIZE 256
+
+static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
+						struct devlink *devlink,
+						struct devlink_region *region,
+						struct nlattr **attrs,
+						u64 start_offset,
+						u64 end_offset,
+						bool dump,
+						u64 *new_offset)
+{
+	struct devlink_snapshot *snapshot;
+	u64 curr_offset = start_offset;
+	u32 snapshot_id;
+	int err = 0;
+
+	*new_offset = start_offset;
+
+	snapshot_id = nla_get_u32(attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+	snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+	if (!snapshot)
+		return -EINVAL;
+
+	if (end_offset > snapshot->data_len || dump)
+		end_offset = snapshot->data_len;
+
+	while (curr_offset < end_offset) {
+		u32 data_size;
+		u8 *data;
+
+		if (end_offset - curr_offset < DEVLINK_REGION_READ_CHUNK_SIZE)
+			data_size = end_offset - curr_offset;
+		else
+			data_size = DEVLINK_REGION_READ_CHUNK_SIZE;
+
+		data = &snapshot->data[curr_offset];
+		err = devlink_nl_cmd_region_read_chunk_fill(skb, devlink,
+							    data, data_size,
+							    curr_offset);
+		if (err)
+			break;
+
+		curr_offset += data_size;
+	}
+	*new_offset = curr_offset;
+
+	return err;
+}
+
+static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+					     struct netlink_callback *cb)
+{
+	u64 ret_offset, start_offset, end_offset = 0;
+	struct nlattr *attrs[DEVLINK_ATTR_MAX + 1];
+	const struct genl_ops *ops = cb->data;
+	struct devlink_region *region;
+	struct nlattr *chunks_attr;
+	const char *region_name;
+	struct devlink *devlink;
+	bool dump = true;
+	void *hdr;
+	int err;
+
+	start_offset = *((u64 *)&cb->args[0]);
+
+	err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
+			  attrs, DEVLINK_ATTR_MAX, ops->policy, NULL);
+	if (err)
+		goto out;
+
+	devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+	if (IS_ERR(devlink))
+		goto out;
+
+	mutex_lock(&devlink_mutex);
+	mutex_lock(&devlink->lock);
+
+	if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
+	    !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+		goto out_unlock;
+
+	region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		goto out_unlock;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
+			  DEVLINK_CMD_REGION_READ);
+	if (!hdr)
+		goto out_unlock;
+
+	err = devlink_nl_put_handle(skb, devlink);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
+	if (err)
+		goto nla_put_failure;
+
+	chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS);
+	if (!chunks_attr)
+		goto nla_put_failure;
+
+	if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
+	    attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
+		if (!start_offset)
+			start_offset =
+				nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+
+		end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+		end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
+		dump = false;
+	}
+
+	err = devlink_nl_region_read_snapshot_fill(skb, devlink,
+						   region, attrs,
+						   start_offset,
+						   end_offset, dump,
+						   &ret_offset);
+
+	if (err && err != -EMSGSIZE)
+		goto nla_put_failure;
+
+	/* Check if there was any progress done to prevent infinite loop */
+	if (ret_offset == start_offset)
+		goto nla_put_failure;
+
+	*((u64 *)&cb->args[0]) = ret_offset;
+
+	nla_nest_end(skb, chunks_attr);
+	genlmsg_end(skb, hdr);
+	mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink_mutex);
+
+	return skb->len;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+out_unlock:
+	mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink_mutex);
+out:
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -3626,6 +3801,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_REGION_READ,
+		.dumpit = devlink_nl_cmd_region_read_dumpit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 523f9eb1ef25aab4aaf9aeb5356160e8039411ef Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:15 +0300
Subject: net/mlx4_core: Add health buffer address capability

Health buffer address is a 32 bit PCI address offset provided by
the FW. This offset is used for reading FW health debug data
located on the shared CR space. Cr space is accessible in both
driver and FW and allows for different queries and configurations.
Health buffer size is always 64B of readable data followed by a
lock which is used to block volatile CR space access.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 5 ++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h   | 1 +
 drivers/net/ethernet/mellanox/mlx4/main.c | 1 +
 include/linux/mlx4/device.h               | 1 +
 4 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 46dcbfbe4c5e..babcfd9c0571 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -825,7 +825,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET	0xcc
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET	0xd0
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET	0xd2
-
+#define QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET	0xe4
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -1082,6 +1082,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev_cap->rl_caps.min_unit = size >> 14;
 	}
 
+	MLX4_GET(dev_cap->health_buffer_addrs, outbox,
+		 QUERY_DEV_CAP_HEALTH_BUFFER_ADDRESS_OFFSET);
+
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index cd6399c76bfd..650ae08c71de 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -128,6 +128,7 @@ struct mlx4_dev_cap {
 	u32 dmfs_high_rate_qpn_base;
 	u32 dmfs_high_rate_qpn_range;
 	struct mlx4_rate_limit_caps rl_caps;
+	u32 health_buffer_addrs;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
 	bool wol_port[MLX4_MAX_PORTS + 1];
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index c42eddfcd2b5..806d441b3701 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -523,6 +523,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
 	dev->caps.wol_port[1]          = dev_cap->wol_port[1];
 	dev->caps.wol_port[2]          = dev_cap->wol_port[2];
+	dev->caps.health_buffer_addrs  = dev_cap->health_buffer_addrs;
 
 	/* Save uar page shift */
 	if (!mlx4_is_slave(dev)) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 122e7e9d3091..e3bfe76aea98 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -630,6 +630,7 @@ struct mlx4_caps {
 	u32			vf_caps;
 	bool			wol_port[MLX4_MAX_PORTS + 1];
 	struct mlx4_rate_limit_caps rl_caps;
+	u32			health_buffer_addrs;
 };
 
 struct mlx4_buf_list {
-- 
cgit v1.2.3


From bedc989b0c98285b277ff8a08ff9514e580913f4 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:16 +0300
Subject: net/mlx4_core: Add Crdump FW snapshot support

Crdump allows the driver to create a snapshot of the FW PCI
crspace and health buffer during a critical FW issue.
In case of a FW command timeout, FW getting stuck or a non zero
value on the catastrophic buffer, a snapshot will be taken.

The snapshot is exposed using devlink, cr-space, fw-health
address regions are registered on init and snapshots are attached
once a new snapshot is collected by the driver.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/Makefile |   2 +-
 drivers/net/ethernet/mellanox/mlx4/catas.c  |   6 +-
 drivers/net/ethernet/mellanox/mlx4/crdump.c | 231 ++++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/main.c   |  10 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h   |   4 +
 include/linux/mlx4/device.h                 |   6 +
 6 files changed, 255 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx4/crdump.c

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/Makefile b/drivers/net/ethernet/mellanox/mlx4/Makefile
index 16b10d01fcf4..3f400770fcd8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx4/Makefile
@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX4_CORE)		+= mlx4_core.o
 
 mlx4_core-y :=	alloc.o catas.o cmd.o cq.o eq.o fw.o fw_qos.o icm.o intf.o \
 		main.o mcg.o mr.o pd.o port.o profile.o qp.o reset.o sense.o \
-		srq.o resource_tracker.o
+		srq.o resource_tracker.o crdump.o
 
 obj-$(CONFIG_MLX4_EN)               += mlx4_en.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 8afe4b5fb09b..c81d15bf259c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -178,10 +178,12 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
 
 	dev = persist->dev;
 	mlx4_err(dev, "device is going to be reset\n");
-	if (mlx4_is_slave(dev))
+	if (mlx4_is_slave(dev)) {
 		err = mlx4_reset_slave(dev);
-	else
+	} else {
+		mlx4_crdump_collect(dev);
 		err = mlx4_reset_master(dev);
+	}
 
 	if (!err) {
 		mlx4_err(dev, "device was reset successfully\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
new file mode 100644
index 000000000000..4d5524dffec4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx4.h"
+
+#define BAD_ACCESS			0xBADACCE5
+#define HEALTH_BUFFER_SIZE		0x40
+#define CR_ENABLE_BIT			swab32(BIT(6))
+#define CR_ENABLE_BIT_OFFSET		0xF3F04
+#define MAX_NUM_OF_DUMPS_TO_STORE	(8)
+
+static const char *region_cr_space_str = "cr-space";
+static const char *region_fw_health_str = "fw-health";
+
+/* Set to true in case cr enable bit was set to true before crdump */
+static bool crdump_enbale_bit_set;
+
+static void crdump_enable_crspace_access(struct mlx4_dev *dev,
+					 u8 __iomem *cr_space)
+{
+	/* Get current enable bit value */
+	crdump_enbale_bit_set =
+		readl(cr_space + CR_ENABLE_BIT_OFFSET) & CR_ENABLE_BIT;
+
+	/* Enable FW CR filter (set bit6 to 0) */
+	if (crdump_enbale_bit_set)
+		writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) & ~CR_ENABLE_BIT,
+		       cr_space + CR_ENABLE_BIT_OFFSET);
+
+	/* Enable block volatile crspace accesses */
+	writel(swab32(1), cr_space + dev->caps.health_buffer_addrs +
+	       HEALTH_BUFFER_SIZE);
+}
+
+static void crdump_disable_crspace_access(struct mlx4_dev *dev,
+					  u8 __iomem *cr_space)
+{
+	/* Disable block volatile crspace accesses */
+	writel(0, cr_space + dev->caps.health_buffer_addrs +
+	       HEALTH_BUFFER_SIZE);
+
+	/* Restore FW CR filter value (set bit6 to original value) */
+	if (crdump_enbale_bit_set)
+		writel(readl(cr_space + CR_ENABLE_BIT_OFFSET) | CR_ENABLE_BIT,
+		       cr_space + CR_ENABLE_BIT_OFFSET);
+}
+
+static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev,
+					u8 __iomem *cr_space,
+					u32 id)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	struct pci_dev *pdev = dev->persist->pdev;
+	unsigned long cr_res_size;
+	u8 *crspace_data;
+	int offset;
+	int err;
+
+	if (!crdump->region_crspace) {
+		mlx4_err(dev, "crdump: cr-space region is NULL\n");
+		return;
+	}
+
+	/* Try to collect CR space */
+	cr_res_size = pci_resource_len(pdev, 0);
+	crspace_data = kvmalloc(cr_res_size, GFP_KERNEL);
+	if (crspace_data) {
+		for (offset = 0; offset < cr_res_size; offset += 4)
+			*(u32 *)(crspace_data + offset) =
+					readl(cr_space + offset);
+
+		err = devlink_region_snapshot_create(crdump->region_crspace,
+						     cr_res_size, crspace_data,
+						     id, &kvfree);
+		if (err) {
+			kvfree(crspace_data);
+			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+				  region_cr_space_str, id, err);
+		} else {
+			mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+				  id, region_cr_space_str);
+		}
+	} else {
+		mlx4_err(dev, "crdump: Failed to allocate crspace buffer\n");
+	}
+}
+
+static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
+					  u8 __iomem *cr_space,
+					  u32 id)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	u8 *health_data;
+	int offset;
+	int err;
+
+	if (!crdump->region_fw_health) {
+		mlx4_err(dev, "crdump: fw-health region is NULL\n");
+		return;
+	}
+
+	/* Try to collect health buffer */
+	health_data = kvmalloc(HEALTH_BUFFER_SIZE, GFP_KERNEL);
+	if (health_data) {
+		u8 __iomem *health_buf_start =
+				cr_space + dev->caps.health_buffer_addrs;
+
+		for (offset = 0; offset < HEALTH_BUFFER_SIZE; offset += 4)
+			*(u32 *)(health_data + offset) =
+					readl(health_buf_start + offset);
+
+		err = devlink_region_snapshot_create(crdump->region_fw_health,
+						     HEALTH_BUFFER_SIZE,
+						     health_data,
+						     id, &kvfree);
+		if (err) {
+			kvfree(health_data);
+			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
+				  region_fw_health_str, id, err);
+		} else {
+			mlx4_info(dev, "crdump: added snapshot %d to devlink region %s\n",
+				  id, region_fw_health_str);
+		}
+	} else {
+		mlx4_err(dev, "crdump: Failed to allocate health buffer\n");
+	}
+}
+
+int mlx4_crdump_collect(struct mlx4_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+	struct pci_dev *pdev = dev->persist->pdev;
+	unsigned long cr_res_size;
+	u8 __iomem *cr_space;
+	u32 id;
+
+	if (!dev->caps.health_buffer_addrs) {
+		mlx4_info(dev, "crdump: FW doesn't support health buffer access, skipping\n");
+		return 0;
+	}
+
+	cr_res_size = pci_resource_len(pdev, 0);
+
+	cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
+	if (!cr_space) {
+		mlx4_err(dev, "crdump: Failed to map pci cr region\n");
+		return -ENODEV;
+	}
+
+	crdump_enable_crspace_access(dev, cr_space);
+
+	/* Get the available snapshot ID for the dumps */
+	id = devlink_region_shapshot_id_get(devlink);
+
+	/* Try to capture dumps */
+	mlx4_crdump_collect_crspace(dev, cr_space, id);
+	mlx4_crdump_collect_fw_health(dev, cr_space, id);
+
+	crdump_disable_crspace_access(dev, cr_space);
+
+	iounmap(cr_space);
+	return 0;
+}
+
+int mlx4_crdump_init(struct mlx4_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+	struct pci_dev *pdev = dev->persist->pdev;
+
+	/* Create cr-space region */
+	crdump->region_crspace =
+		devlink_region_create(devlink,
+				      region_cr_space_str,
+				      MAX_NUM_OF_DUMPS_TO_STORE,
+				      pci_resource_len(pdev, 0));
+	if (IS_ERR(crdump->region_crspace))
+		mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+			  region_cr_space_str,
+			  PTR_ERR(crdump->region_crspace));
+
+	/* Create fw-health region */
+	crdump->region_fw_health =
+		devlink_region_create(devlink,
+				      region_fw_health_str,
+				      MAX_NUM_OF_DUMPS_TO_STORE,
+				      HEALTH_BUFFER_SIZE);
+	if (IS_ERR(crdump->region_fw_health))
+		mlx4_warn(dev, "crdump: create devlink region %s err %ld\n",
+			  region_fw_health_str,
+			  PTR_ERR(crdump->region_fw_health));
+
+	return 0;
+}
+
+void mlx4_crdump_end(struct mlx4_dev *dev)
+{
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
+
+	devlink_region_destroy(crdump->region_fw_health);
+	devlink_region_destroy(crdump->region_crspace);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 806d441b3701..46b021409b8b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3807,10 +3807,14 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 		}
 	}
 
-	err = mlx4_catas_init(&priv->dev);
+	err = mlx4_crdump_init(&priv->dev);
 	if (err)
 		goto err_release_regions;
 
+	err = mlx4_catas_init(&priv->dev);
+	if (err)
+		goto err_crdump;
+
 	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
 	if (err)
 		goto err_catas;
@@ -3820,6 +3824,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 err_catas:
 	mlx4_catas_end(&priv->dev);
 
+err_crdump:
+	mlx4_crdump_end(&priv->dev);
+
 err_release_regions:
 	pci_release_regions(pdev);
 
@@ -4081,6 +4088,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	else
 		mlx4_info(dev, "%s: interface is down\n", __func__);
 	mlx4_catas_end(dev);
+	mlx4_crdump_end(dev);
 	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
 		mlx4_warn(dev, "Disabling SR-IOV\n");
 		pci_disable_sriov(pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 2ebaa3bee42f..6e016092a6f1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1042,6 +1042,8 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
 int mlx4_catas_init(struct mlx4_dev *dev);
 void mlx4_catas_end(struct mlx4_dev *dev);
+int mlx4_crdump_init(struct mlx4_dev *dev);
+void mlx4_crdump_end(struct mlx4_dev *dev);
 int mlx4_restart_one(struct pci_dev *pdev, bool reload,
 		     struct devlink *devlink);
 int mlx4_register_device(struct mlx4_dev *dev);
@@ -1228,6 +1230,8 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
 int mlx4_comm_internal_err(u32 slave_read);
 
+int mlx4_crdump_collect(struct mlx4_dev *dev);
+
 int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
 		    enum mlx4_port_type *type);
 void mlx4_do_sense_ports(struct mlx4_dev *dev,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e3bfe76aea98..300b944e6e1e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -852,6 +852,11 @@ struct mlx4_vf_dev {
 	u8			n_ports;
 };
 
+struct mlx4_fw_crdump {
+	struct devlink_region *region_crspace;
+	struct devlink_region *region_fw_health;
+};
+
 enum mlx4_pci_status {
 	MLX4_PCI_STATUS_DISABLED,
 	MLX4_PCI_STATUS_ENABLED,
@@ -872,6 +877,7 @@ struct mlx4_dev_persistent {
 	u8	interface_state;
 	struct mutex		pci_status_mutex; /* sync pci state */
 	enum mlx4_pci_status	pci_status;
+	struct mlx4_fw_crdump	crdump;
 };
 
 struct mlx4_dev {
-- 
cgit v1.2.3


From f6a69885f2e38be0229ab9f6a2d9d4a1b4ba2be5 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:17 +0300
Subject: devlink: Add generic parameters region_snapshot

region_snapshot - When set enables capturing region snapshots

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 net/core/devlink.c    | 5 +++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 905f0bb7b4ba..b9b89d6604d4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -361,6 +361,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
 	DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+	DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -376,6 +377,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME "enable_sriov"
 #define DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable"
+#define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e5118dba6bb4..65fc366a78a4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2671,6 +2671,11 @@ static const struct devlink_param devlink_param_generic[] = {
 		.name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
 		.type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
 	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+		.name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
+		.type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
+	},
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
-- 
cgit v1.2.3


From 3c641ba4a852cf4e90e3d7f29c5df08e24213c5d Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 12 Jul 2018 15:13:18 +0300
Subject: net/mlx4_core: Use devlink region_snapshot parameter

This parameter enables capturing region snapshot of the crspace
during critical errors. The default value of this parameter is
disabled, it can be enabled using devlink param commands.
It is possible to configure during runtime and also driver init.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/crdump.c |  8 ++++++
 drivers/net/ethernet/mellanox/mlx4/main.c   | 41 +++++++++++++++++++++++++++++
 include/linux/mlx4/device.h                 |  1 +
 3 files changed, 50 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
index 4d5524dffec4..88316c743820 100644
--- a/drivers/net/ethernet/mellanox/mlx4/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -158,6 +158,7 @@ static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
 int mlx4_crdump_collect(struct mlx4_dev *dev)
 {
 	struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
 	struct pci_dev *pdev = dev->persist->pdev;
 	unsigned long cr_res_size;
 	u8 __iomem *cr_space;
@@ -168,6 +169,11 @@ int mlx4_crdump_collect(struct mlx4_dev *dev)
 		return 0;
 	}
 
+	if (!crdump->snapshot_enable) {
+		mlx4_info(dev, "crdump: devlink snapshot disabled, skipping\n");
+		return 0;
+	}
+
 	cr_res_size = pci_resource_len(pdev, 0);
 
 	cr_space = ioremap(pci_resource_start(pdev, 0), cr_res_size);
@@ -197,6 +203,8 @@ int mlx4_crdump_init(struct mlx4_dev *dev)
 	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
 	struct pci_dev *pdev = dev->persist->pdev;
 
+	crdump->snapshot_enable = false;
+
 	/* Create cr-space region */
 	crdump->region_crspace =
 		devlink_region_create(devlink,
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 46b021409b8b..2d979a652b7b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -191,6 +191,26 @@ static int mlx4_devlink_ierr_reset_set(struct devlink *devlink, u32 id,
 	return 0;
 }
 
+static int mlx4_devlink_crdump_snapshot_get(struct devlink *devlink, u32 id,
+					    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+
+	ctx->val.vbool = dev->persist->crdump.snapshot_enable;
+	return 0;
+}
+
+static int mlx4_devlink_crdump_snapshot_set(struct devlink *devlink, u32 id,
+					    struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+
+	dev->persist->crdump.snapshot_enable = ctx->val.vbool;
+	return 0;
+}
+
 static int
 mlx4_devlink_max_macs_validate(struct devlink *devlink, u32 id,
 			       union devlink_param_value val,
@@ -224,6 +244,11 @@ static const struct devlink_param mlx4_devlink_params[] = {
 	DEVLINK_PARAM_GENERIC(MAX_MACS,
 			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 			      NULL, NULL, mlx4_devlink_max_macs_validate),
+	DEVLINK_PARAM_GENERIC(REGION_SNAPSHOT,
+			      BIT(DEVLINK_PARAM_CMODE_RUNTIME) |
+			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+			      mlx4_devlink_crdump_snapshot_get,
+			      mlx4_devlink_crdump_snapshot_set, NULL),
 	DEVLINK_PARAM_DRIVER(MLX4_DEVLINK_PARAM_ID_ENABLE_64B_CQE_EQE,
 			     "enable_64b_cqe_eqe", DEVLINK_PARAM_TYPE_BOOL,
 			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
@@ -270,6 +295,11 @@ static void mlx4_devlink_set_params_init_values(struct devlink *devlink)
 	mlx4_devlink_set_init_value(devlink,
 				    MLX4_DEVLINK_PARAM_ID_ENABLE_4K_UAR,
 				    value);
+
+	value.vbool = false;
+	mlx4_devlink_set_init_value(devlink,
+				    DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+				    value);
 }
 
 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
@@ -3862,6 +3892,9 @@ static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port,
 
 static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
 {
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+	struct mlx4_fw_crdump *crdump = &dev->persist->crdump;
 	union devlink_param_value saved_value;
 	int err;
 
@@ -3889,6 +3922,14 @@ static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink)
 						 &saved_value);
 	if (!err)
 		enable_4k_uar = saved_value.vbool;
+	err = devlink_param_driverinit_value_get(devlink,
+						 DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+						 &saved_value);
+	if (!err && crdump->snapshot_enable != saved_value.vbool) {
+		crdump->snapshot_enable = saved_value.vbool;
+		devlink_param_value_changed(devlink,
+					    DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT);
+	}
 }
 
 static int mlx4_devlink_reload(struct devlink *devlink,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 300b944e6e1e..dca6ab4eaa99 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -853,6 +853,7 @@ struct mlx4_vf_dev {
 };
 
 struct mlx4_fw_crdump {
+	bool snapshot_enable;
 	struct devlink_region *region_crspace;
 	struct devlink_region *region_fw_health;
 };
-- 
cgit v1.2.3


From 7a78c1e116d2a786fcd541c8828472d462c8821f Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 4 Jul 2018 17:08:16 +0200
Subject: media: cec-notifier: Get notifier by device and connector name

In non device-tree world, we can need to get the notifier by the driver
name directly and eventually defer probe if not yet created.

This patch adds a variant of the get function by using the device name
instead and will not create a notifier if not yet created.

But the i915 driver exposes at least 2 HDMI connectors, this patch also
adds the possibility to add a connector name tied to the notifier device
to form a tuple and associate different CEC controllers for each HDMI
connectors.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/media/cec/cec-notifier.c | 11 ++++++++---
 include/media/cec-notifier.h     | 27 ++++++++++++++++++++++++---
 2 files changed, 32 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/media/cec/cec-notifier.c b/drivers/media/cec/cec-notifier.c
index 16dffa06c913..dd2078b27a41 100644
--- a/drivers/media/cec/cec-notifier.c
+++ b/drivers/media/cec/cec-notifier.c
@@ -21,6 +21,7 @@ struct cec_notifier {
 	struct list_head head;
 	struct kref kref;
 	struct device *dev;
+	const char *conn;
 	struct cec_adapter *cec_adap;
 	void (*callback)(struct cec_adapter *adap, u16 pa);
 
@@ -30,13 +31,14 @@ struct cec_notifier {
 static LIST_HEAD(cec_notifiers);
 static DEFINE_MUTEX(cec_notifiers_lock);
 
-struct cec_notifier *cec_notifier_get(struct device *dev)
+struct cec_notifier *cec_notifier_get_conn(struct device *dev, const char *conn)
 {
 	struct cec_notifier *n;
 
 	mutex_lock(&cec_notifiers_lock);
 	list_for_each_entry(n, &cec_notifiers, head) {
-		if (n->dev == dev) {
+		if (n->dev == dev &&
+		    (!conn || !strcmp(n->conn, conn))) {
 			kref_get(&n->kref);
 			mutex_unlock(&cec_notifiers_lock);
 			return n;
@@ -46,6 +48,8 @@ struct cec_notifier *cec_notifier_get(struct device *dev)
 	if (!n)
 		goto unlock;
 	n->dev = dev;
+	if (conn)
+		n->conn = kstrdup(conn, GFP_KERNEL);
 	n->phys_addr = CEC_PHYS_ADDR_INVALID;
 	mutex_init(&n->lock);
 	kref_init(&n->kref);
@@ -54,7 +58,7 @@ unlock:
 	mutex_unlock(&cec_notifiers_lock);
 	return n;
 }
-EXPORT_SYMBOL_GPL(cec_notifier_get);
+EXPORT_SYMBOL_GPL(cec_notifier_get_conn);
 
 static void cec_notifier_release(struct kref *kref)
 {
@@ -62,6 +66,7 @@ static void cec_notifier_release(struct kref *kref)
 		container_of(kref, struct cec_notifier, kref);
 
 	list_del(&n->head);
+	kfree(n->conn);
 	kfree(n);
 }
 
diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h
index cf0add70b0e7..814eeef35a5c 100644
--- a/include/media/cec-notifier.h
+++ b/include/media/cec-notifier.h
@@ -20,8 +20,10 @@ struct cec_notifier;
 #if IS_REACHABLE(CONFIG_CEC_CORE) && IS_ENABLED(CONFIG_CEC_NOTIFIER)
 
 /**
- * cec_notifier_get - find or create a new cec_notifier for the given device.
+ * cec_notifier_get_conn - find or create a new cec_notifier for the given
+ * device and connector tuple.
  * @dev: device that sends the events.
+ * @conn: the connector name from which the event occurs
  *
  * If a notifier for device @dev already exists, then increase the refcount
  * and return that notifier.
@@ -31,7 +33,8 @@ struct cec_notifier;
  *
  * Return NULL if the memory could not be allocated.
  */
-struct cec_notifier *cec_notifier_get(struct device *dev);
+struct cec_notifier *cec_notifier_get_conn(struct device *dev,
+					   const char *conn);
 
 /**
  * cec_notifier_put - decrease refcount and delete when the refcount reaches 0.
@@ -85,7 +88,8 @@ void cec_register_cec_notifier(struct cec_adapter *adap,
 			       struct cec_notifier *notifier);
 
 #else
-static inline struct cec_notifier *cec_notifier_get(struct device *dev)
+static inline struct cec_notifier *cec_notifier_get_conn(struct device *dev,
+							 const char *conn)
 {
 	/* A non-NULL pointer is expected on success */
 	return (struct cec_notifier *)0xdeadfeed;
@@ -120,6 +124,23 @@ static inline void cec_register_cec_notifier(struct cec_adapter *adap,
 }
 #endif
 
+/**
+ * cec_notifier_get - find or create a new cec_notifier for the given device.
+ * @dev: device that sends the events.
+ *
+ * If a notifier for device @dev already exists, then increase the refcount
+ * and return that notifier.
+ *
+ * If it doesn't exist, then allocate a new notifier struct and return a
+ * pointer to that new struct.
+ *
+ * Return NULL if the memory could not be allocated.
+ */
+static inline struct cec_notifier *cec_notifier_get(struct device *dev)
+{
+	return cec_notifier_get_conn(dev, NULL);
+}
+
 /**
  * cec_notifier_phys_addr_invalidate() - set the physical address to INVALID
  *
-- 
cgit v1.2.3


From 57e94c8b974db2d83c60e1139c89a70806abbea0 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 4 Jul 2018 17:08:18 +0200
Subject: mfd: cros-ec: Increase maximum mkbp event size

Having a 16 byte mkbp event size makes it possible to send CEC
messages from the EC to the AP directly inside the mkbp event
instead of first doing a notification and then a read.

Signed-off-by: Stefan Adolfsson <sadolfsson@chromium.org>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/platform/chrome/cros_ec_proto.c | 40 +++++++++++++++++++++++++--------
 include/linux/mfd/cros_ec.h             |  2 +-
 include/linux/mfd/cros_ec_commands.h    | 16 +++++++++++++
 3 files changed, 48 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index 8350ca2311c7..398393ab5df8 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -506,10 +506,31 @@ int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_cmd_xfer_status);
 
+static int get_next_event_xfer(struct cros_ec_device *ec_dev,
+			       struct cros_ec_command *msg,
+			       int version, uint32_t size)
+{
+	int ret;
+
+	msg->version = version;
+	msg->command = EC_CMD_GET_NEXT_EVENT;
+	msg->insize = size;
+	msg->outsize = 0;
+
+	ret = cros_ec_cmd_xfer(ec_dev, msg);
+	if (ret > 0) {
+		ec_dev->event_size = ret - 1;
+		memcpy(&ec_dev->event_data, msg->data, ec_dev->event_size);
+	}
+
+	return ret;
+}
+
 static int get_next_event(struct cros_ec_device *ec_dev)
 {
 	u8 buffer[sizeof(struct cros_ec_command) + sizeof(ec_dev->event_data)];
 	struct cros_ec_command *msg = (struct cros_ec_command *)&buffer;
+	static int cmd_version = 1;
 	int ret;
 
 	if (ec_dev->suspended) {
@@ -517,18 +538,19 @@ static int get_next_event(struct cros_ec_device *ec_dev)
 		return -EHOSTDOWN;
 	}
 
-	msg->version = 0;
-	msg->command = EC_CMD_GET_NEXT_EVENT;
-	msg->insize = sizeof(ec_dev->event_data);
-	msg->outsize = 0;
+	if (cmd_version == 1) {
+		ret = get_next_event_xfer(ec_dev, msg, cmd_version,
+				sizeof(struct ec_response_get_next_event_v1));
+		if (ret < 0 || msg->result != EC_RES_INVALID_VERSION)
+			return ret;
 
-	ret = cros_ec_cmd_xfer(ec_dev, msg);
-	if (ret > 0) {
-		ec_dev->event_size = ret - 1;
-		memcpy(&ec_dev->event_data, msg->data,
-		       sizeof(ec_dev->event_data));
+		/* Fallback to version 0 for future send attempts */
+		cmd_version = 0;
 	}
 
+	ret = get_next_event_xfer(ec_dev, msg, cmd_version,
+				  sizeof(struct ec_response_get_next_event));
+
 	return ret;
 }
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 32421dfeb996..20949dde35cd 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -147,7 +147,7 @@ struct cros_ec_device {
 	bool mkbp_event_supported;
 	struct blocking_notifier_head event_notifier;
 
-	struct ec_response_get_next_event event_data;
+	struct ec_response_get_next_event_v1 event_data;
 	int event_size;
 	u32 host_event_wake_mask;
 };
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index f2edd9969b40..c4f0caa642be 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2093,12 +2093,28 @@ union ec_response_get_next_data {
 	uint32_t   sysrq;
 } __packed;
 
+union ec_response_get_next_data_v1 {
+	uint8_t key_matrix[16];
+	uint32_t host_event;
+	uint32_t buttons;
+	uint32_t switches;
+	uint32_t sysrq;
+	uint32_t cec_events;
+	uint8_t cec_message[16];
+} __packed;
+
 struct ec_response_get_next_event {
 	uint8_t event_type;
 	/* Followed by event data if any */
 	union ec_response_get_next_data data;
 } __packed;
 
+struct ec_response_get_next_event_v1 {
+	uint8_t event_type;
+	/* Followed by event data if any */
+	union ec_response_get_next_data_v1 data;
+} __packed;
+
 /* Bit indices for buttons and switches.*/
 /* Buttons */
 #define EC_MKBP_POWER_BUTTON	0
-- 
cgit v1.2.3


From f47674e5263dfe0f89a640fc8b7dc54f9b79d5db Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 4 Jul 2018 17:08:19 +0200
Subject: mfd: cros-ec: Introduce CEC commands and events definitions.

The EC can expose a CEC bus, this patch adds the CEC related definitions
needed by the cros-ec-cec driver.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec_commands.h | 81 ++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index c4f0caa642be..5079bc2d4378 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -804,6 +804,8 @@ enum ec_feature_code {
 	EC_FEATURE_MOTION_SENSE_FIFO = 24,
 	/* EC has RTC feature that can be controlled by host commands */
 	EC_FEATURE_RTC = 27,
+	/* EC supports CEC commands */
+	EC_FEATURE_CEC = 35,
 };
 
 #define EC_FEATURE_MASK_0(event_code) (1UL << (event_code % 32))
@@ -2078,6 +2080,12 @@ enum ec_mkbp_event {
 	/* EC sent a sysrq command */
 	EC_MKBP_EVENT_SYSRQ = 6,
 
+	/* Notify the AP that something happened on CEC */
+	EC_MKBP_EVENT_CEC_EVENT = 8,
+
+	/* Send an incoming CEC message to the AP */
+	EC_MKBP_EVENT_CEC_MESSAGE = 9,
+
 	/* Number of MKBP events */
 	EC_MKBP_EVENT_COUNT,
 };
@@ -2845,6 +2853,79 @@ struct ec_params_reboot_ec {
 #define EC_ACPI_MEM_VERSION_CURRENT 1
 
 
+/*****************************************************************************/
+/*
+ * HDMI CEC commands
+ *
+ * These commands are for sending and receiving message via HDMI CEC
+ */
+#define EC_MAX_CEC_MSG_LEN 16
+
+/* CEC message from the AP to be written on the CEC bus */
+#define EC_CMD_CEC_WRITE_MSG 0x00B8
+
+/**
+ * struct ec_params_cec_write - Message to write to the CEC bus
+ * @msg: message content to write to the CEC bus
+ */
+struct ec_params_cec_write {
+	uint8_t msg[EC_MAX_CEC_MSG_LEN];
+} __packed;
+
+/* Set various CEC parameters */
+#define EC_CMD_CEC_SET 0x00BA
+
+/**
+ * struct ec_params_cec_set - CEC parameters set
+ * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
+ * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC
+ *	or 1 to enable CEC functionality, in case cmd is CEC_CMD_LOGICAL_ADDRESS,
+ *	this field encodes the requested logical address between 0 and 15
+ *	or 0xff to unregister
+ */
+struct ec_params_cec_set {
+	uint8_t cmd; /* enum cec_command */
+	uint8_t val;
+} __packed;
+
+/* Read various CEC parameters */
+#define EC_CMD_CEC_GET 0x00BB
+
+/**
+ * struct ec_params_cec_get - CEC parameters get
+ * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
+ */
+struct ec_params_cec_get {
+	uint8_t cmd; /* enum cec_command */
+} __packed;
+
+/**
+ * struct ec_response_cec_get - CEC parameters get response
+ * @val: in case cmd was CEC_CMD_ENABLE, this field will 0 if CEC is
+ *	disabled or 1 if CEC functionality is enabled,
+ *	in case cmd was CEC_CMD_LOGICAL_ADDRESS, this will encode the
+ *	configured logical address between 0 and 15 or 0xff if unregistered
+ */
+struct ec_response_cec_get {
+	uint8_t val;
+} __packed;
+
+/* CEC parameters command */
+enum ec_cec_command {
+	/* CEC reading, writing and events enable */
+	CEC_CMD_ENABLE,
+	/* CEC logical address  */
+	CEC_CMD_LOGICAL_ADDRESS,
+};
+
+/* Events from CEC to AP */
+enum mkbp_cec_event {
+	/* Outgoing message was acknowledged by a follower */
+	EC_MKBP_CEC_SEND_OK			= BIT(0),
+	/* Outgoing message was not acknowledged */
+	EC_MKBP_CEC_SEND_FAILED			= BIT(1),
+};
+
 /*****************************************************************************/
 /*
  * Special commands
-- 
cgit v1.2.3


From 03133347b4452ef9b1f1456b92f5fafa467c0655 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Date: Mon, 30 Apr 2018 18:33:24 +0200
Subject: KVM: s390: a utility function for migration

Introduce a utility function that will be used later on for storage
attributes migration, and use it in kvm_main.c to replace existing code
that does the same thing.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Message-Id: <1525106005-13931-2-git-send-email-imbrenda@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/kvm_host.h | 7 +++++++
 virt/kvm/kvm_main.c      | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4ee7bc548a83..5f138b40e433 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -309,6 +309,13 @@ static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memsl
 	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
 }
 
+static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot)
+{
+	unsigned long len = kvm_dirty_bitmap_bytes(memslot);
+
+	return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
+}
+
 struct kvm_s390_adapter_int {
 	u64 ind_addr;
 	u64 summary_addr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b47507faab5..f519eb8d06b1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1169,7 +1169,7 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
 
 	n = kvm_dirty_bitmap_bytes(memslot);
 
-	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
+	dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
 	memset(dirty_bitmap_buffer, 0, n);
 
 	spin_lock(&kvm->mmu_lock);
-- 
cgit v1.2.3


From afed7bcf9487bb28e2e2b016a195085c07416c0b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 11 Jul 2018 10:36:07 +0100
Subject: locking/refcount: Always allow checked forms

In many cases, it would be useful to be able to use the full
sanity-checked refcount helpers regardless of CONFIG_REFCOUNT_FULL,
as this would help to avoid duplicate warnings where callers try to
sanity-check refcount manipulation.

This patch refactors things such that the full refcount helpers were
always built, as refcount_${op}_checked(), such that they can be used
regardless of CONFIG_REFCOUNT_FULL. This will allow code which *always*
wants a checked refcount to opt-in, avoiding the need to duplicate the
logic for warnings.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180711093607.1644-1-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/refcount.h | 27 +++++++++++++++++-------
 lib/refcount.c           | 53 +++++++++++++++++++++++-------------------------
 2 files changed, 45 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index c36addd27dd5..53c5eca24d83 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -43,17 +43,30 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
+extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r);
+extern void refcount_add_checked(unsigned int i, refcount_t *r);
+
+extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r);
+extern void refcount_inc_checked(refcount_t *r);
+
+extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r);
+
+extern __must_check bool refcount_dec_and_test_checked(refcount_t *r);
+extern void refcount_dec_checked(refcount_t *r);
+
 #ifdef CONFIG_REFCOUNT_FULL
-extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r);
-extern void refcount_add(unsigned int i, refcount_t *r);
 
-extern __must_check bool refcount_inc_not_zero(refcount_t *r);
-extern void refcount_inc(refcount_t *r);
+#define refcount_add_not_zero	refcount_add_not_zero_checked
+#define refcount_add		refcount_add_checked
+
+#define refcount_inc_not_zero	refcount_inc_not_zero_checked
+#define refcount_inc		refcount_inc_checked
+
+#define refcount_sub_and_test	refcount_sub_and_test_checked
 
-extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r);
+#define refcount_dec_and_test	refcount_dec_and_test_checked
+#define refcount_dec		refcount_dec_checked
 
-extern __must_check bool refcount_dec_and_test(refcount_t *r);
-extern void refcount_dec(refcount_t *r);
 #else
 # ifdef CONFIG_ARCH_HAS_REFCOUNT
 #  include <asm/refcount.h>
diff --git a/lib/refcount.c b/lib/refcount.c
index 4bd842f20749..5c4aaefc0682 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -40,10 +40,8 @@
 #include <linux/spinlock.h>
 #include <linux/bug.h>
 
-#ifdef CONFIG_REFCOUNT_FULL
-
 /**
- * refcount_add_not_zero - add a value to a refcount unless it is 0
+ * refcount_add_not_zero_checked - add a value to a refcount unless it is 0
  * @i: the value to add to the refcount
  * @r: the refcount
  *
@@ -60,7 +58,7 @@
  *
  * Return: false if the passed refcount is 0, true otherwise
  */
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
+bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -81,10 +79,10 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r)
 
 	return true;
 }
-EXPORT_SYMBOL(refcount_add_not_zero);
+EXPORT_SYMBOL(refcount_add_not_zero_checked);
 
 /**
- * refcount_add - add a value to a refcount
+ * refcount_add_checked - add a value to a refcount
  * @i: the value to add to the refcount
  * @r: the refcount
  *
@@ -99,14 +97,14 @@ EXPORT_SYMBOL(refcount_add_not_zero);
  * cases, refcount_inc(), or one of its variants, should instead be used to
  * increment a reference count.
  */
-void refcount_add(unsigned int i, refcount_t *r)
+void refcount_add_checked(unsigned int i, refcount_t *r)
 {
-	WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n");
 }
-EXPORT_SYMBOL(refcount_add);
+EXPORT_SYMBOL(refcount_add_checked);
 
 /**
- * refcount_inc_not_zero - increment a refcount unless it is 0
+ * refcount_inc_not_zero_checked - increment a refcount unless it is 0
  * @r: the refcount to increment
  *
  * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN.
@@ -117,7 +115,7 @@ EXPORT_SYMBOL(refcount_add);
  *
  * Return: true if the increment was successful, false otherwise
  */
-bool refcount_inc_not_zero(refcount_t *r)
+bool refcount_inc_not_zero_checked(refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -136,10 +134,10 @@ bool refcount_inc_not_zero(refcount_t *r)
 
 	return true;
 }
-EXPORT_SYMBOL(refcount_inc_not_zero);
+EXPORT_SYMBOL(refcount_inc_not_zero_checked);
 
 /**
- * refcount_inc - increment a refcount
+ * refcount_inc_checked - increment a refcount
  * @r: the refcount to increment
  *
  * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN.
@@ -150,14 +148,14 @@ EXPORT_SYMBOL(refcount_inc_not_zero);
  * Will WARN if the refcount is 0, as this represents a possible use-after-free
  * condition.
  */
-void refcount_inc(refcount_t *r)
+void refcount_inc_checked(refcount_t *r)
 {
-	WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+	WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n");
 }
-EXPORT_SYMBOL(refcount_inc);
+EXPORT_SYMBOL(refcount_inc_checked);
 
 /**
- * refcount_sub_and_test - subtract from a refcount and test if it is 0
+ * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0
  * @i: amount to subtract from the refcount
  * @r: the refcount
  *
@@ -176,7 +174,7 @@ EXPORT_SYMBOL(refcount_inc);
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r)
 {
 	unsigned int new, val = atomic_read(&r->refs);
 
@@ -194,10 +192,10 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r)
 
 	return !new;
 }
-EXPORT_SYMBOL(refcount_sub_and_test);
+EXPORT_SYMBOL(refcount_sub_and_test_checked);
 
 /**
- * refcount_dec_and_test - decrement a refcount and test if it is 0
+ * refcount_dec_and_test_checked - decrement a refcount and test if it is 0
  * @r: the refcount
  *
  * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
@@ -209,14 +207,14 @@ EXPORT_SYMBOL(refcount_sub_and_test);
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
-bool refcount_dec_and_test(refcount_t *r)
+bool refcount_dec_and_test_checked(refcount_t *r)
 {
-	return refcount_sub_and_test(1, r);
+	return refcount_sub_and_test_checked(1, r);
 }
-EXPORT_SYMBOL(refcount_dec_and_test);
+EXPORT_SYMBOL(refcount_dec_and_test_checked);
 
 /**
- * refcount_dec - decrement a refcount
+ * refcount_dec_checked - decrement a refcount
  * @r: the refcount
  *
  * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
@@ -225,12 +223,11 @@ EXPORT_SYMBOL(refcount_dec_and_test);
  * Provides release memory ordering, such that prior loads and stores are done
  * before.
  */
-void refcount_dec(refcount_t *r)
+void refcount_dec_checked(refcount_t *r)
 {
-	WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n");
+	WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n");
 }
-EXPORT_SYMBOL(refcount_dec);
-#endif /* CONFIG_REFCOUNT_FULL */
+EXPORT_SYMBOL(refcount_dec_checked);
 
 /**
  * refcount_dec_if_one - decrement a refcount if it is 1
-- 
cgit v1.2.3


From 05814a10370b3252fe2b0898b6adac3cdd531096 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Fri, 13 Jul 2018 17:07:26 +0300
Subject: block: remove blkdev_entry_to_request() macro

Remove blkdev_entry_to_request() macro, which remained unused through
the observable history, also note that it repeats list_entry_rq() macro
verbatim.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 137759862f07..1939ed95f936 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1436,8 +1436,6 @@ enum blk_default_limits {
 	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
 };
 
-#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
-
 static inline unsigned long queue_segment_boundary(struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
-- 
cgit v1.2.3


From 8e1b706b6e819bed215c0db16345568864660393 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 13 Jul 2018 16:23:23 +0200
Subject: cpu/hotplug: Expose SMT control init function

The L1TF mitigation will gain a commend line parameter which allows to set
a combination of hypervisor mitigation and SMT control.

Expose cpu_smt_disable() so the command line parser can tweak SMT settings.

[ tglx: Split out of larger patch and made it preserve an already existing
  	force off state ]

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.039715135@linutronix.de
---
 include/linux/cpu.h |  2 ++
 kernel/cpu.c        | 16 +++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7532cbf27b1d..c9b23ad27b38 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -177,8 +177,10 @@ enum cpuhp_smt_control {
 
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
+extern void cpu_smt_disable(bool force);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
+static inline void cpu_smt_disable(bool force) { }
 #endif
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d79e24df2420..8453e31f2d1a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -347,13 +347,23 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
 
-static int __init smt_cmdline_disable(char *str)
+void __init cpu_smt_disable(bool force)
 {
-	cpu_smt_control = CPU_SMT_DISABLED;
-	if (str && !strcmp(str, "force")) {
+	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
+		cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+		return;
+
+	if (force) {
 		pr_info("SMT: Force disabled\n");
 		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
+	} else {
+		cpu_smt_control = CPU_SMT_DISABLED;
 	}
+}
+
+static int __init smt_cmdline_disable(char *str)
+{
+	cpu_smt_disable(str && !strcmp(str, "force"));
 	return 0;
 }
 early_param("nosmt", smt_cmdline_disable);
-- 
cgit v1.2.3


From fee0aede6f4739c87179eca76136f83210953b86 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 13 Jul 2018 16:23:24 +0200
Subject: cpu/hotplug: Set CPU_SMT_NOT_SUPPORTED early

The CPU_SMT_NOT_SUPPORTED state is set (if the processor does not support
SMT) when the sysfs SMT control file is initialized.

That was fine so far as this was only required to make the output of the
control file correct and to prevent writes in that case.

With the upcoming l1tf command line parameter, this needs to be set up
before the L1TF mitigation selection and command line parsing happens.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20180713142323.121795971@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c |  6 ++++++
 include/linux/cpu.h        |  2 ++
 kernel/cpu.c               | 13 ++++++++++---
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7df7df9a2753..4be69a7bef0b 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -58,6 +58,12 @@ void __init check_bugs(void)
 {
 	identify_boot_cpu();
 
+	/*
+	 * identify_boot_cpu() initialized SMT support information, let the
+	 * core code know.
+	 */
+	cpu_smt_check_topology();
+
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
 		print_cpu_info(&boot_cpu_data);
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index c9b23ad27b38..ccdf3a67ce56 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -178,9 +178,11 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology(void) { }
 #endif
 
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 8453e31f2d1a..37eec872042b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -361,6 +361,16 @@ void __init cpu_smt_disable(bool force)
 	}
 }
 
+/*
+ * The decision whether SMT is supported can only be done after the full
+ * CPU identification. Called from architecture code.
+ */
+void __init cpu_smt_check_topology(void)
+{
+	if (!topology_smt_supported())
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
 static int __init smt_cmdline_disable(char *str)
 {
 	cpu_smt_disable(str && !strcmp(str, "force"));
@@ -2115,9 +2125,6 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
-	if (!topology_smt_supported())
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }
-- 
cgit v1.2.3


From 3e977ac6179b39faa3c0eda5fce4f00663ae298d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 12 Jul 2018 19:53:13 +0100
Subject: drm/i915: Prevent writing into a read-only object via a GGTT mmap

If the user has created a read-only object, they should not be allowed
to circumvent the write protection by using a GGTT mmapping. Deny it.

Also most machines do not support read-only GGTT PTEs, so again we have
to reject attempted writes. Fortunately, this is known a priori, so we
can at least reject in the call to create the mmap (with a sanity check
in the fault handler).

v2: Check the vma->vm_flags during mmap() to allow readonly access.
v3: Remove VM_MAYWRITE to curtail mprotect()

Testcase: igt/gem_userptr_blits/readonly_mmap*
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: David Herrmann <dh.herrmann@gmail.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com> #v1
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-4-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/drm_gem.c                         |  9 +++++++++
 drivers/gpu/drm/i915/i915_gem.c                   |  4 ++++
 drivers/gpu/drm/i915/i915_gem_gtt.c               | 12 +++++++-----
 drivers/gpu/drm/i915/i915_gem_object.h            | 13 ++++++++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.c           |  2 +-
 drivers/gpu/drm/i915/selftests/i915_gem_context.c |  5 +++--
 include/drm/drm_vma_manager.h                     |  1 +
 7 files changed, 37 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 4a16d7b26c89..bf90625df3c5 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1036,6 +1036,15 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 		return -EACCES;
 	}
 
+	if (node->readonly) {
+		if (vma->vm_flags & VM_WRITE) {
+			drm_gem_object_put_unlocked(obj);
+			return -EINVAL;
+		}
+
+		vma->vm_flags &= ~VM_MAYWRITE;
+	}
+
 	ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT,
 			       vma);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed2be33ec58a..1910c66f48e2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2012,6 +2012,10 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
 	pgoff_t page_offset;
 	int ret;
 
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write)
+		return VM_FAULT_SIGBUS;
+
 	/* We don't use vmf->pgoff since that has the fake offset */
 	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 86a9618bab24..3d75f2bb5623 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -206,7 +206,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 
 	/* Applicable to VLV, and gen8+ */
 	pte_flags = 0;
-	if (vma->obj->gt_ro)
+	if (i915_gem_object_is_readonly(vma->obj))
 		pte_flags |= PTE_READ_ONLY;
 
 	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
@@ -2491,8 +2491,10 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
 	dma_addr_t addr;
 
-	/* The GTT does not support read-only mappings */
-	GEM_BUG_ON(flags & PTE_READ_ONLY);
+	/*
+	 * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+	 * not to allow the user to override access to a read only page.
+	 */
 
 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
 	gtt_entries += vma->node.start >> PAGE_SHIFT;
@@ -2731,7 +2733,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 
 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 	pte_flags = 0;
-	if (obj->gt_ro)
+	if (i915_gem_object_is_readonly(obj))
 		pte_flags |= PTE_READ_ONLY;
 
 	intel_runtime_pm_get(i915);
@@ -2769,7 +2771,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 
 	/* Currently applicable only to VLV */
 	pte_flags = 0;
-	if (vma->obj->gt_ro)
+	if (i915_gem_object_is_readonly(vma->obj))
 		pte_flags |= PTE_READ_ONLY;
 
 	if (flags & I915_VMA_LOCAL_BIND) {
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index c3c6f2e588fb..56e9f00d2c4c 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -141,7 +141,6 @@ struct drm_i915_gem_object {
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
 	 */
-	unsigned long gt_ro:1;
 	unsigned int cache_level:3;
 	unsigned int cache_coherent:2;
 #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
@@ -358,6 +357,18 @@ static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
 	reservation_object_unlock(obj->resv);
 }
 
+static inline void
+i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
+{
+	obj->base.vma_node.readonly = true;
+}
+
+static inline bool
+i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
+{
+	return obj->base.vma_node.readonly;
+}
+
 static inline bool
 i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6de88add508a..69bd7f697f6d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1100,7 +1100,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
 	 * if supported by the platform's GGTT.
 	 */
 	if (vm->has_read_only)
-		obj->gt_ro = 1;
+		i915_gem_object_set_readonly(obj);
 
 	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma))
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 2e2a3c44b8e9..1c92560d35da 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -487,7 +487,8 @@ static int igt_ctx_readonly(void *arg)
 					goto out_unlock;
 				}
 
-				obj->gt_ro = prandom_u32_state(&prng);
+				if (prandom_u32_state(&prng) & 1)
+					i915_gem_object_set_readonly(obj);
 			}
 
 			intel_runtime_pm_get(i915);
@@ -518,7 +519,7 @@ static int igt_ctx_readonly(void *arg)
 		unsigned int num_writes;
 
 		num_writes = rem;
-		if (obj->gt_ro)
+		if (i915_gem_object_is_readonly(obj))
 			num_writes = 0;
 
 		err = cpu_check(obj, num_writes);
diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h
index 8758df94e9a0..c7987daeaed0 100644
--- a/include/drm/drm_vma_manager.h
+++ b/include/drm/drm_vma_manager.h
@@ -41,6 +41,7 @@ struct drm_vma_offset_node {
 	rwlock_t vm_lock;
 	struct drm_mm_node vm_node;
 	struct rb_root vm_files;
+	bool readonly:1;
 };
 
 struct drm_vma_offset_manager {
-- 
cgit v1.2.3


From 811e299f4645588cc7a1b78d97b6847c155324b9 Mon Sep 17 00:00:00 2001
From: Romuald CARI <romuald.cari@devialet.com>
Date: Thu, 7 Jun 2018 16:08:02 +0200
Subject: ieee802154: add rx LQI from userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Link Quality Indication data exposed by drivers could not be accessed from
userspace. Since this data is per-datagram received, it makes sense to make it
available to userspace application through the ancillary data mechanism in
recvmsg rather than through ioctls. This can be activated using the socket
option WPAN_WANTLQI under SOL_IEEE802154 protocol.

This LQI data is available in the ancillary data buffer under the SOL_IEEE802154
level as the type WPAN_LQI. The value is an unsigned byte indicating the link
quality with values ranging 0-255.

Signed-off-by: Romuald Cari <romuald.cari@devialet.com>
Signed-off-by: Clément Peron <clement.peron@devialet.com>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 include/net/af_ieee802154.h |  1 +
 net/ieee802154/socket.c     | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h
index a5563d27a3eb..8003a9f6eb43 100644
--- a/include/net/af_ieee802154.h
+++ b/include/net/af_ieee802154.h
@@ -56,6 +56,7 @@ struct sockaddr_ieee802154 {
 #define WPAN_WANTACK		0
 #define WPAN_SECURITY		1
 #define WPAN_SECURITY_LEVEL	2
+#define WPAN_WANTLQI		3
 
 #define WPAN_SECURITY_DEFAULT	0
 #define WPAN_SECURITY_OFF	1
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a60658c85a9a..bc6b912603f1 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -25,6 +25,7 @@
 #include <linux/termios.h>	/* For TIOCOUTQ/INQ */
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/socket.h>
 #include <net/datalink.h>
 #include <net/psnap.h>
 #include <net/sock.h>
@@ -452,6 +453,7 @@ struct dgram_sock {
 	unsigned int bound:1;
 	unsigned int connected:1;
 	unsigned int want_ack:1;
+	unsigned int want_lqi:1;
 	unsigned int secen:1;
 	unsigned int secen_override:1;
 	unsigned int seclevel:3;
@@ -486,6 +488,7 @@ static int dgram_init(struct sock *sk)
 	struct dgram_sock *ro = dgram_sk(sk);
 
 	ro->want_ack = 1;
+	ro->want_lqi = 0;
 	return 0;
 }
 
@@ -713,6 +716,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	size_t copied = 0;
 	int err = -EOPNOTSUPP;
 	struct sk_buff *skb;
+	struct dgram_sock *ro = dgram_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_ieee802154 *, saddr, msg->msg_name);
 
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
@@ -744,6 +748,13 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		*addr_len = sizeof(*saddr);
 	}
 
+	if (ro->want_lqi) {
+		err = put_cmsg(msg, SOL_IEEE802154, WPAN_WANTLQI,
+			       sizeof(uint8_t), &(mac_cb(skb)->lqi));
+		if (err)
+			goto done;
+	}
+
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
 done:
@@ -847,6 +858,9 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname,
 	case WPAN_WANTACK:
 		val = ro->want_ack;
 		break;
+	case WPAN_WANTLQI:
+		val = ro->want_lqi;
+		break;
 	case WPAN_SECURITY:
 		if (!ro->secen_override)
 			val = WPAN_SECURITY_DEFAULT;
@@ -892,6 +906,9 @@ static int dgram_setsockopt(struct sock *sk, int level, int optname,
 	case WPAN_WANTACK:
 		ro->want_ack = !!val;
 		break;
+	case WPAN_WANTLQI:
+		ro->want_lqi = !!val;
+		break;
 	case WPAN_SECURITY:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN) &&
 		    !ns_capable(net->user_ns, CAP_NET_RAW)) {
-- 
cgit v1.2.3


From 65ca8d9670b70aa8076054c0c23be032c6ac5c77 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Thu, 5 Jul 2018 18:26:01 +0530
Subject: rdma/cxgb4: Add support for 64Byte cqes

This patch adds support for iw_cxb4 to extend cqes from existing 32Byte
size to 64Byte.

Also includes adds backward compatibility support (for 32Byte) to work
with older libraries.

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/cxgb4/cq.c       | 43 +++++++++++++++++++++++++++++-----
 drivers/infiniband/hw/cxgb4/ev.c       |  5 ++--
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h |  1 +
 drivers/infiniband/hw/cxgb4/t4.h       | 18 +++++++++++---
 include/uapi/rdma/cxgb4-abi.h          | 12 +++++++++-
 5 files changed, 67 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index a3a829951ac4..a055f9f08e76 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	int user = (uctx != &rdev->uctx);
 	int ret;
 	struct sk_buff *skb;
+	struct c4iw_ucontext *ucontext = NULL;
+
+	if (user)
+		ucontext = container_of(uctx, struct c4iw_ucontext, uctx);
 
 	cq->cqid = c4iw_get_cqid(rdev, uctx);
 	if (!cq->cqid) {
@@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	dma_unmap_addr_set(cq, mapping, cq->dma_addr);
 	memset(cq->queue, 0, cq->memsize);
 
+	if (user && ucontext->is_32b_cqe) {
+		cq->qp_errp = &((struct t4_status_page *)
+		((u8 *)cq->queue + (cq->size - 1) *
+		 (sizeof(*cq->queue) / 2)))->qp_err;
+	} else {
+		cq->qp_errp = &((struct t4_status_page *)
+		((u8 *)cq->queue + (cq->size - 1) *
+		 sizeof(*cq->queue)))->qp_err;
+	}
+
 	/* build fw_ri_res_wr */
 	wr_len = sizeof *res_wr + sizeof *res;
 
@@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 			FW_RI_RES_WR_IQPCIECH_V(2) |
 			FW_RI_RES_WR_IQINTCNTTHRESH_V(0) |
 			FW_RI_RES_WR_IQO_F |
-			FW_RI_RES_WR_IQESIZE_V(1));
+			((user && ucontext->is_32b_cqe) ?
+			 FW_RI_RES_WR_IQESIZE_V(1) :
+			 FW_RI_RES_WR_IQESIZE_V(2)));
 	res->u.cq.iqsize = cpu_to_be16(cq->size);
 	res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
 
@@ -884,6 +900,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 	int vector = attr->comp_vector;
 	struct c4iw_dev *rhp;
 	struct c4iw_cq *chp;
+	struct c4iw_create_cq ucmd;
 	struct c4iw_create_cq_resp uresp;
 	struct c4iw_ucontext *ucontext = NULL;
 	int ret, wr_len;
@@ -899,9 +916,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 	if (vector >= rhp->rdev.lldi.nciq)
 		return ERR_PTR(-EINVAL);
 
+	if (ib_context) {
+		ucontext = to_c4iw_ucontext(ib_context);
+		if (udata->inlen < sizeof(ucmd))
+			ucontext->is_32b_cqe = 1;
+	}
+
 	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
 	if (!chp)
 		return ERR_PTR(-ENOMEM);
+
 	chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
 	if (!chp->wr_waitp) {
 		ret = -ENOMEM;
@@ -916,9 +940,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 		goto err_free_wr_wait;
 	}
 
-	if (ib_context)
-		ucontext = to_c4iw_ucontext(ib_context);
-
 	/* account for the status page. */
 	entries++;
 
@@ -942,13 +963,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 	if (hwentries < 64)
 		hwentries = 64;
 
-	memsize = hwentries * sizeof *chp->cq.queue;
+	memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ?
+			(sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue));
 
 	/*
 	 * memsize must be a multiple of the page size if its a user cq.
 	 */
 	if (ucontext)
 		memsize = roundup(memsize, PAGE_SIZE);
+
 	chp->cq.size = hwentries;
 	chp->cq.memsize = memsize;
 	chp->cq.vector = vector;
@@ -979,6 +1002,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 		if (!mm2)
 			goto err_free_mm;
 
+		memset(&uresp, 0, sizeof(uresp));
 		uresp.qid_mask = rhp->rdev.cqmask;
 		uresp.cqid = chp->cq.cqid;
 		uresp.size = chp->cq.size;
@@ -988,9 +1012,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 		ucontext->key += PAGE_SIZE;
 		uresp.gts_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
+		/* communicate to the userspace that
+		 * kernel driver supports 64B CQE
+		 */
+		uresp.flags |= C4IW_64B_CQE;
+
 		spin_unlock(&ucontext->mmap_lock);
 		ret = ib_copy_to_udata(udata, &uresp,
-				       sizeof(uresp) - sizeof(uresp.reserved));
+				       ucontext->is_32b_cqe ?
+				       sizeof(uresp) - sizeof(uresp.flags) :
+				       sizeof(uresp));
 		if (ret)
 			goto err_free_mm2;
 
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 3e9d8b277ab9..8741d23168f3 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
 		CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len),
 		CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
 
-	pr_debug("%016llx %016llx %016llx %016llx\n",
+	pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n",
 		 be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
-		 be64_to_cpu(p[3]));
+		 be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]),
+		 be64_to_cpu(p[6]), be64_to_cpu(p[7]));
 
 	/*
 	 * Ingress WRITE and READ_RESP errors provide
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 870649ff049c..8866bf992316 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -566,6 +566,7 @@ struct c4iw_ucontext {
 	spinlock_t mmap_lock;
 	struct list_head mmaps;
 	struct kref kref;
+	bool is_32b_cqe;
 };
 
 static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 8369c7c8de83..838a7dee48bd 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -179,9 +179,20 @@ struct t4_cqe {
 			__be32 wrid_hi;
 			__be32 wrid_low;
 		} gen;
+		struct {
+			__be32 stag;
+			__be32 msn;
+			__be32 reserved;
+			__be32 abs_rqe_idx;
+		} srcqe;
+		struct {
+			__be64 imm_data;
+		} imm_data_rcqe;
+
 		u64 drain_cookie;
+		__be64 flits[3];
 	} u;
-	__be64 reserved;
+	__be64 reserved[3];
 	__be64 bits_type_ts;
 };
 
@@ -565,6 +576,7 @@ struct t4_cq {
 	u16 cidx_inc;
 	u8 gen;
 	u8 error;
+	u8 *qp_errp;
 	unsigned long flags;
 };
 
@@ -698,12 +710,12 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
 
 static inline int t4_cq_in_error(struct t4_cq *cq)
 {
-	return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err;
+	return *cq->qp_errp;
 }
 
 static inline void t4_set_cq_in_error(struct t4_cq *cq)
 {
-	((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1;
+	*cq->qp_errp = 1;
 }
 #endif
 
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index a159ba8dcf8f..65c9eacd3ffb 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -44,6 +44,16 @@
  * In particular do not use pointer types -- pass pointers in __aligned_u64
  * instead.
  */
+
+enum {
+	C4IW_64B_CQE = (1 << 0)
+};
+
+struct c4iw_create_cq {
+	__u32 flags;
+	__u32 reserved;
+};
+
 struct c4iw_create_cq_resp {
 	__aligned_u64 key;
 	__aligned_u64 gts_key;
@@ -51,7 +61,7 @@ struct c4iw_create_cq_resp {
 	__u32 cqid;
 	__u32 size;
 	__u32 qid_mask;
-	__u32 reserved; /* explicit padding (optional for i386) */
+	__u32 flags;
 };
 
 enum {
-- 
cgit v1.2.3


From 4f91da26c81145f255cb153152ffed70014b1c41 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 11 Jul 2018 20:36:38 -0700
Subject: xdp: add per mode attributes for attached programs

In preparation for support of simultaneous driver and hardware XDP
support add per-mode attributes.  The catch-all IFLA_XDP_PROG_ID
will still be reported, but user space can now also access the
program ID in a new IFLA_XDP_<mode>_PROG_ID attribute.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/if_link.h |  3 +++
 net/core/rtnetlink.c         | 30 ++++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index cf01b6824244..bc86c2b105ec 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -928,6 +928,9 @@ enum {
 	IFLA_XDP_ATTACHED,
 	IFLA_XDP_FLAGS,
 	IFLA_XDP_PROG_ID,
+	IFLA_XDP_DRV_PROG_ID,
+	IFLA_XDP_SKB_PROG_ID,
+	IFLA_XDP_HW_PROG_ID,
 	__IFLA_XDP_MAX,
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ef61222fdef..b40242459907 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -964,7 +964,8 @@ static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
 			  nla_total_size(1) +	/* XDP_ATTACHED */
-			  nla_total_size(4);	/* XDP_PROG_ID */
+			  nla_total_size(4) +	/* XDP_PROG_ID */
+			  nla_total_size(4);	/* XDP_<mode>_PROG_ID */
 
 	return xdp_size;
 }
@@ -1378,16 +1379,17 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
+	u32 prog_attr, prog_id;
 	struct nlattr *xdp;
-	u32 prog_id;
 	int err;
+	u8 mode;
 
 	xdp = nla_nest_start(skb, IFLA_XDP);
 	if (!xdp)
 		return -EMSGSIZE;
 
-	err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
-			 rtnl_xdp_attached_mode(dev, &prog_id));
+	mode = rtnl_xdp_attached_mode(dev, &prog_id);
+	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
 	if (err)
 		goto err_cancel;
 
@@ -1395,6 +1397,26 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 		err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
 		if (err)
 			goto err_cancel;
+
+		switch (mode) {
+		case XDP_ATTACHED_DRV:
+			prog_attr = IFLA_XDP_DRV_PROG_ID;
+			break;
+		case XDP_ATTACHED_SKB:
+			prog_attr = IFLA_XDP_SKB_PROG_ID;
+			break;
+		case XDP_ATTACHED_HW:
+			prog_attr = IFLA_XDP_HW_PROG_ID;
+			break;
+		case XDP_ATTACHED_NONE:
+		default:
+			err = -EINVAL;
+			goto err_cancel;
+		}
+
+		err = nla_put_u32(skb, prog_attr, prog_id);
+		if (err)
+			goto err_cancel;
 	}
 
 	nla_nest_end(skb, xdp);
-- 
cgit v1.2.3


From 6b8675897338f874c41612655a85d8e10cdb23d8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 11 Jul 2018 20:36:39 -0700
Subject: xdp: don't make drivers report attachment mode

prog_attached of struct netdev_bpf should have been superseded
by simply setting prog_id long time ago, but we kept it around
to allow offloading drivers to communicate attachment mode (drv
vs hw).  Subsequently drivers were also allowed to report back
attachment flags (prog_flags), and since nowadays only programs
attached will XDP_FLAGS_HW_MODE can get offloaded, we can tell
the attachment mode from the flags driver reports.  Remove
prog_attached member.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c       | 1 -
 drivers/net/ethernet/cavium/thunder/nicvf_main.c    | 1 -
 drivers/net/ethernet/intel/i40e/i40e_main.c         | 1 -
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c       | 1 -
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c   | 1 -
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c      | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c   | 1 -
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ---
 drivers/net/ethernet/qlogic/qede/qede_filter.c      | 1 -
 drivers/net/netdevsim/bpf.c                         | 1 -
 drivers/net/tun.c                                   | 1 -
 drivers/net/virtio_net.c                            | 1 -
 include/linux/netdevice.h                           | 5 -----
 net/core/dev.c                                      | 7 +++----
 net/core/rtnetlink.c                                | 8 ++++++--
 15 files changed, 9 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 1f0e872d0667..0584d07c8c33 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -219,7 +219,6 @@ int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		rc = bnxt_xdp_set(bp, xdp->prog);
 		break;
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!bp->xdp_prog;
 		xdp->prog_id = bp->xdp_prog ? bp->xdp_prog->aux->id : 0;
 		rc = 0;
 		break;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 135766c4296b..768f584f8392 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1848,7 +1848,6 @@ static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		return nicvf_xdp_setup(nic, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!nic->xdp_prog;
 		xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0;
 		return 0;
 	default:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 426b0ccb1fc6..51762428b40e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11841,7 +11841,6 @@ static int i40e_xdp(struct net_device *dev,
 	case XDP_SETUP_PROG:
 		return i40e_xdp_setup(vsi, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
 		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
 		return 0;
 	default:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a8e21becb619..3862fea1c923 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9966,7 +9966,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		return ixgbe_xdp_setup(dev, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!(adapter->xdp_prog);
 		xdp->prog_id = adapter->xdp_prog ?
 			adapter->xdp_prog->aux->id : 0;
 		return 0;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 59416eddd840..d86446d202d5 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4462,7 +4462,6 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		return ixgbevf_xdp_setup(dev, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!(adapter->xdp_prog);
 		xdp->prog_id = adapter->xdp_prog ?
 			       adapter->xdp_prog->aux->id : 0;
 		return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 65eb06e017e4..6785661d1a72 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2926,7 +2926,6 @@ static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		return mlx4_xdp_set(dev, xdp->prog);
 	case XDP_QUERY_PROG:
 		xdp->prog_id = mlx4_xdp_query(dev);
-		xdp->prog_attached = !!xdp->prog_id;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bbd2fd0b2e06..e4a9a0768a81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4192,7 +4192,6 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		return mlx5e_xdp_set(dev, xdp->prog);
 	case XDP_QUERY_PROG:
 		xdp->prog_id = mlx5e_xdp_query(dev);
-		xdp->prog_attached = !!xdp->prog_id;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 7df5ca37bfb8..d20714598613 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3459,9 +3459,6 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 		return nfp_net_xdp_setup(nn, xdp->prog, xdp->flags,
 					 xdp->extack);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!nn->xdp_prog;
-		if (nn->dp.bpf_offload_xdp)
-			xdp->prog_attached = XDP_ATTACHED_HW;
 		xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
 		xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0;
 		return 0;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index b823bfe2ea4d..f9a327c821eb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -1116,7 +1116,6 @@ int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG:
 		return qede_xdp_set(edev, xdp->prog);
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!edev->xdp_prog;
 		xdp->prog_id = edev->xdp_prog ? edev->xdp_prog->aux->id : 0;
 		return 0;
 	default:
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 75c25306d234..712e6f918065 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -567,7 +567,6 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 		nsim_bpf_destroy_prog(bpf->offload.prog);
 		return 0;
 	case XDP_QUERY_PROG:
-		bpf->prog_attached = ns->xdp_prog_mode;
 		bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0;
 		bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0;
 		return 0;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a192a017cc68..49a50219d0da 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1268,7 +1268,6 @@ static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		return tun_xdp_set(dev, xdp->prog, xdp->extack);
 	case XDP_QUERY_PROG:
 		xdp->prog_id = tun_xdp_query(dev);
-		xdp->prog_attached = !!xdp->prog_id;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 53085c63277b..2ff08bc103a9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2343,7 +2343,6 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
 	case XDP_QUERY_PROG:
 		xdp->prog_id = virtnet_xdp_query(dev);
-		xdp->prog_attached = !!xdp->prog_id;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b683971e500d..69a664789b33 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -819,10 +819,6 @@ enum bpf_netdev_command {
 	 */
 	XDP_SETUP_PROG,
 	XDP_SETUP_PROG_HW,
-	/* Check if a bpf program is set on the device.  The callee should
-	 * set @prog_attached to one of XDP_ATTACHED_* values, note that "true"
-	 * is equivalent to XDP_ATTACHED_DRV.
-	 */
 	XDP_QUERY_PROG,
 	/* BPF program for offload callbacks, invoked at program load time. */
 	BPF_OFFLOAD_VERIFIER_PREP,
@@ -849,7 +845,6 @@ struct netdev_bpf {
 		};
 		/* XDP_QUERY_PROG */
 		struct {
-			u8 prog_attached;
 			u32 prog_id;
 			/* flags with which program was installed */
 			u32 prog_flags;
diff --git a/net/core/dev.c b/net/core/dev.c
index 89825c1eccdc..9fa3b3705a8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4926,7 +4926,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 		break;
 
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!old;
 		xdp->prog_id = old ? old->aux->id : 0;
 		break;
 
@@ -7593,13 +7592,13 @@ void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
 	WARN_ON(bpf_op(dev, xdp) < 0);
 }
 
-static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
+static bool __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
 {
 	struct netdev_bpf xdp;
 
 	__dev_xdp_query(dev, bpf_op, &xdp);
 
-	return xdp.prog_attached;
+	return xdp.prog_id;
 }
 
 static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
@@ -7634,7 +7633,7 @@ static void dev_xdp_uninstall(struct net_device *dev)
 		return;
 
 	__dev_xdp_query(dev, ndo_bpf, &xdp);
-	if (xdp.prog_attached == XDP_ATTACHED_NONE)
+	if (!xdp.prog_id)
 		return;
 
 	/* Program removal should always succeed */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b40242459907..02ebc056a688 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1372,9 +1372,13 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
 		return XDP_ATTACHED_NONE;
 
 	__dev_xdp_query(dev, ops->ndo_bpf, &xdp);
-	*prog_id = xdp.prog_id;
+	if (!xdp.prog_id)
+		return XDP_ATTACHED_NONE;
 
-	return xdp.prog_attached;
+	*prog_id = xdp.prog_id;
+	if (xdp.prog_flags & XDP_FLAGS_HW_MODE)
+		return XDP_ATTACHED_HW;
+	return XDP_ATTACHED_DRV;
 }
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
-- 
cgit v1.2.3


From 05296620f6d14dce0030b87e1e57891a770fb65c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 11 Jul 2018 20:36:40 -0700
Subject: xdp: factor out common program/flags handling from drivers

Basic operations drivers perform during xdp setup and query can
be moved to helpers in the core.  Encapsulate program and flags
into a structure and add helpers.  Note that the structure is
intended as the "main" program information source in the driver.
Most drivers will additionally place the program pointer in their
fast path or ring structures.

The helpers don't have a huge impact now, but they will
decrease the code duplication when programs can be installed
in HW and driver at the same time.  Encapsulating the basic
operations in helpers will hopefully also reduce the number
of changes to drivers which adopt them.

Helpers could really be static inline, but they depend on
definition of struct netdev_bpf which means they'd have
to be placed in netdevice.h, an already 4500 line header.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  6 ++--
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 28 +++++++-----------
 drivers/net/netdevsim/bpf.c                        | 16 +++-------
 drivers/net/netdevsim/netdevsim.h                  |  4 +--
 include/net/xdp.h                                  | 13 +++++++++
 net/core/xdp.c                                     | 34 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_offload.py        |  4 +--
 7 files changed, 67 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 2a71a9ffd095..2021dda595b7 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -553,8 +553,7 @@ struct nfp_net_dp {
  * @rss_cfg:            RSS configuration
  * @rss_key:            RSS secret key
  * @rss_itbl:           RSS indirection table
- * @xdp_flags:		Flags with which XDP prog was loaded
- * @xdp_prog:		XDP prog (for ctrl path, both DRV and HW modes)
+ * @xdp:		Information about the attached XDP program
  * @max_r_vecs:		Number of allocated interrupt vectors for RX/TX
  * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
  * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
@@ -610,8 +609,7 @@ struct nfp_net {
 	u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
 	u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
 
-	u32 xdp_flags;
-	struct bpf_prog *xdp_prog;
+	struct xdp_attachment_info xdp;
 
 	unsigned int max_tx_rings;
 	unsigned int max_rx_rings;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index d20714598613..4bb589dbffbc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3417,34 +3417,29 @@ nfp_net_xdp_setup_drv(struct nfp_net *nn, struct bpf_prog *prog,
 	return nfp_net_ring_reconfig(nn, dp, extack);
 }
 
-static int
-nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags,
-		  struct netlink_ext_ack *extack)
+static int nfp_net_xdp_setup(struct nfp_net *nn, struct netdev_bpf *bpf)
 {
 	struct bpf_prog *drv_prog, *offload_prog;
 	int err;
 
-	if (nn->xdp_prog && (flags ^ nn->xdp_flags) & XDP_FLAGS_MODES)
+	if (!xdp_attachment_flags_ok(&nn->xdp, bpf))
 		return -EBUSY;
 
 	/* Load both when no flags set to allow easy activation of driver path
 	 * when program is replaced by one which can't be offloaded.
 	 */
-	drv_prog     = flags & XDP_FLAGS_HW_MODE  ? NULL : prog;
-	offload_prog = flags & XDP_FLAGS_DRV_MODE ? NULL : prog;
+	drv_prog     = bpf->flags & XDP_FLAGS_HW_MODE  ? NULL : bpf->prog;
+	offload_prog = bpf->flags & XDP_FLAGS_DRV_MODE ? NULL : bpf->prog;
 
-	err = nfp_net_xdp_setup_drv(nn, drv_prog, extack);
+	err = nfp_net_xdp_setup_drv(nn, drv_prog, bpf->extack);
 	if (err)
 		return err;
 
-	err = nfp_app_xdp_offload(nn->app, nn, offload_prog, extack);
-	if (err && flags & XDP_FLAGS_HW_MODE)
+	err = nfp_app_xdp_offload(nn->app, nn, offload_prog, bpf->extack);
+	if (err && bpf->flags & XDP_FLAGS_HW_MODE)
 		return err;
 
-	if (nn->xdp_prog)
-		bpf_prog_put(nn->xdp_prog);
-	nn->xdp_prog = prog;
-	nn->xdp_flags = flags;
+	xdp_attachment_setup(&nn->xdp, bpf);
 
 	return 0;
 }
@@ -3456,12 +3451,9 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 	case XDP_SETUP_PROG_HW:
-		return nfp_net_xdp_setup(nn, xdp->prog, xdp->flags,
-					 xdp->extack);
+		return nfp_net_xdp_setup(nn, xdp);
 	case XDP_QUERY_PROG:
-		xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0;
-		xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0;
-		return 0;
+		return xdp_attachment_query(&nn->xdp, xdp);
 	default:
 		return nfp_app_bpf(nn->app, nn, xdp);
 	}
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 712e6f918065..c485d97b5df4 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -199,10 +199,8 @@ static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
 {
 	int err;
 
-	if (ns->xdp_prog && (bpf->flags ^ ns->xdp_flags) & XDP_FLAGS_MODES) {
-		NSIM_EA(bpf->extack, "program loaded with different flags");
+	if (!xdp_attachment_flags_ok(&ns->xdp, bpf))
 		return -EBUSY;
-	}
 
 	if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) {
 		NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS");
@@ -219,11 +217,7 @@ static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
 			return err;
 	}
 
-	if (ns->xdp_prog)
-		bpf_prog_put(ns->xdp_prog);
-
-	ns->xdp_prog = bpf->prog;
-	ns->xdp_flags = bpf->flags;
+	xdp_attachment_setup(&ns->xdp, bpf);
 
 	if (!bpf->prog)
 		ns->xdp_prog_mode = XDP_ATTACHED_NONE;
@@ -567,9 +561,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 		nsim_bpf_destroy_prog(bpf->offload.prog);
 		return 0;
 	case XDP_QUERY_PROG:
-		bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0;
-		bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0;
-		return 0;
+		return xdp_attachment_query(&ns->xdp, bpf);
 	case XDP_SETUP_PROG:
 		err = nsim_setup_prog_checks(ns, bpf);
 		if (err)
@@ -636,6 +628,6 @@ void nsim_bpf_uninit(struct netdevsim *ns)
 {
 	WARN_ON(!list_empty(&ns->bpf_bound_progs));
 	WARN_ON(!list_empty(&ns->bpf_bound_maps));
-	WARN_ON(ns->xdp_prog);
+	WARN_ON(ns->xdp.prog);
 	WARN_ON(ns->bpf_offloaded);
 }
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index d8a7cc995e88..69ffb4a2d14b 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -18,6 +18,7 @@
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/u64_stats_sync.h>
+#include <net/xdp.h>
 
 #define DRV_NAME	"netdevsim"
 
@@ -67,9 +68,8 @@ struct netdevsim {
 	struct bpf_prog	*bpf_offloaded;
 	u32 bpf_offloaded_id;
 
-	u32 xdp_flags;
+	struct xdp_attachment_info xdp;
 	int xdp_prog_mode;
-	struct bpf_prog	*xdp_prog;
 
 	u32 prog_id_gen;
 
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 2deea7166a34..fcb033f51d8c 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -144,4 +144,17 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
 	return unlikely(xdp->data_meta > xdp->data);
 }
 
+struct xdp_attachment_info {
+	struct bpf_prog *prog;
+	u32 flags;
+};
+
+struct netdev_bpf;
+int xdp_attachment_query(struct xdp_attachment_info *info,
+			 struct netdev_bpf *bpf);
+bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+			     struct netdev_bpf *bpf);
+void xdp_attachment_setup(struct xdp_attachment_info *info,
+			  struct netdev_bpf *bpf);
+
 #endif /* __LINUX_NET_XDP_H__ */
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 31c58719b5a9..57285383ed00 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -3,8 +3,11 @@
  * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
  * Released under terms in GPL version 2.  See COPYING.
  */
+#include <linux/bpf.h>
+#include <linux/filter.h>
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/rhashtable.h>
@@ -370,3 +373,34 @@ void xdp_return_buff(struct xdp_buff *xdp)
 	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
+
+int xdp_attachment_query(struct xdp_attachment_info *info,
+			 struct netdev_bpf *bpf)
+{
+	bpf->prog_id = info->prog ? info->prog->aux->id : 0;
+	bpf->prog_flags = info->prog ? info->flags : 0;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_query);
+
+bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+			     struct netdev_bpf *bpf)
+{
+	if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
+		NL_SET_ERR_MSG(bpf->extack,
+			       "program loaded with different flags");
+		return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
+
+void xdp_attachment_setup(struct xdp_attachment_info *info,
+			  struct netdev_bpf *bpf)
+{
+	if (info->prog)
+		bpf_prog_put(info->prog);
+	info->prog = bpf->prog;
+	info->flags = bpf->flags;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_setup);
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index f8d9bd81d9a4..40401e9e9351 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -821,7 +821,7 @@ try:
     ret, _, err = sim.set_xdp(obj, "", force=True,
                               fail=False, include_stderr=True)
     fail(ret == 0, "Replaced XDP program with a program in different mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
+    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test XDP prog remove with bad flags...")
     ret, _, err = sim.unset_xdp("offload", force=True,
@@ -831,7 +831,7 @@ try:
     ret, _, err = sim.unset_xdp("", force=True,
                                 fail=False, include_stderr=True)
     fail(ret == 0, "Removed program with a bad mode")
-    check_extack_nsim(err, "program loaded with different flags.", args)
+    check_extack(err, "program loaded with different flags.", args)
 
     start_test("Test MTU restrictions...")
     ret, _ = sim.set_mtu(9000, fail=False)
-- 
cgit v1.2.3


From a25717d2b604347d9af8da81deea7b08e8c94220 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 11 Jul 2018 20:36:41 -0700
Subject: xdp: support simultaneous driver and hw XDP attachment

Split the query of HW-attached program from the software one.
Introduce new .ndo_bpf command to query HW-attached program.
This will allow drivers to install different programs in HW
and SW at the same time.  Netlink can now also carry multiple
programs on dump (in which case mode will be set to
XDP_ATTACHED_MULTI and user has to check per-attachment point
attributes, IFLA_XDP_PROG_ID will not be present).  We reuse
IFLA_XDP_PROG_ID skb space for second mode, so rtnl_xdp_size()
doesn't need to be updated.

Note that the installation side is still not there, since all
drivers currently reject installing more than one program at
the time.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../net/ethernet/netronome/nfp/nfp_net_common.c    |  6 ++
 drivers/net/netdevsim/bpf.c                        |  6 ++
 include/linux/netdevice.h                          |  7 +-
 include/uapi/linux/if_link.h                       |  1 +
 net/core/dev.c                                     | 45 ++++++-----
 net/core/rtnetlink.c                               | 93 ++++++++++++----------
 6 files changed, 96 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 4bb589dbffbc..bb1e72e8dbc2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3453,6 +3453,12 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
 	case XDP_SETUP_PROG_HW:
 		return nfp_net_xdp_setup(nn, xdp);
 	case XDP_QUERY_PROG:
+		if (nn->dp.bpf_offload_xdp)
+			return 0;
+		return xdp_attachment_query(&nn->xdp, xdp);
+	case XDP_QUERY_PROG_HW:
+		if (!nn->dp.bpf_offload_xdp)
+			return 0;
 		return xdp_attachment_query(&nn->xdp, xdp);
 	default:
 		return nfp_app_bpf(nn->app, nn, xdp);
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index c485d97b5df4..5544c9b51173 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -561,6 +561,12 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 		nsim_bpf_destroy_prog(bpf->offload.prog);
 		return 0;
 	case XDP_QUERY_PROG:
+		if (ns->xdp_prog_mode != XDP_ATTACHED_DRV)
+			return 0;
+		return xdp_attachment_query(&ns->xdp, bpf);
+	case XDP_QUERY_PROG_HW:
+		if (ns->xdp_prog_mode != XDP_ATTACHED_HW)
+			return 0;
 		return xdp_attachment_query(&ns->xdp, bpf);
 	case XDP_SETUP_PROG:
 		err = nsim_setup_prog_checks(ns, bpf);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69a664789b33..2422c0e88f5c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -820,6 +820,7 @@ enum bpf_netdev_command {
 	XDP_SETUP_PROG,
 	XDP_SETUP_PROG_HW,
 	XDP_QUERY_PROG,
+	XDP_QUERY_PROG_HW,
 	/* BPF program for offload callbacks, invoked at program load time. */
 	BPF_OFFLOAD_VERIFIER_PREP,
 	BPF_OFFLOAD_TRANSLATE,
@@ -843,7 +844,7 @@ struct netdev_bpf {
 			struct bpf_prog *prog;
 			struct netlink_ext_ack *extack;
 		};
-		/* XDP_QUERY_PROG */
+		/* XDP_QUERY_PROG, XDP_QUERY_PROG_HW */
 		struct {
 			u32 prog_id;
 			/* flags with which program was installed */
@@ -3533,8 +3534,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
 int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags);
-void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
-		     struct netdev_bpf *xdp);
+u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
+		    enum bpf_netdev_command cmd);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index bc86c2b105ec..8759cfb8aa2e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -920,6 +920,7 @@ enum {
 	XDP_ATTACHED_DRV,
 	XDP_ATTACHED_SKB,
 	XDP_ATTACHED_HW,
+	XDP_ATTACHED_MULTI,
 };
 
 enum {
diff --git a/net/core/dev.c b/net/core/dev.c
index 9fa3b3705a8e..993cdc3cd086 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7582,21 +7582,19 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
-void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
-		     struct netdev_bpf *xdp)
+u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+		    enum bpf_netdev_command cmd)
 {
-	memset(xdp, 0, sizeof(*xdp));
-	xdp->command = XDP_QUERY_PROG;
+	struct netdev_bpf xdp;
 
-	/* Query must always succeed. */
-	WARN_ON(bpf_op(dev, xdp) < 0);
-}
+	if (!bpf_op)
+		return 0;
 
-static bool __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
-{
-	struct netdev_bpf xdp;
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = cmd;
 
-	__dev_xdp_query(dev, bpf_op, &xdp);
+	/* Query must always succeed. */
+	WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
 
 	return xdp.prog_id;
 }
@@ -7632,12 +7630,19 @@ static void dev_xdp_uninstall(struct net_device *dev)
 	if (!ndo_bpf)
 		return;
 
-	__dev_xdp_query(dev, ndo_bpf, &xdp);
-	if (!xdp.prog_id)
-		return;
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_QUERY_PROG;
+	WARN_ON(ndo_bpf(dev, &xdp));
+	if (xdp.prog_id)
+		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+					NULL));
 
-	/* Program removal should always succeed */
-	WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
+	/* Remove HW offload */
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_QUERY_PROG_HW;
+	if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
+		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+					NULL));
 }
 
 /**
@@ -7653,12 +7658,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	enum bpf_netdev_command query;
 	struct bpf_prog *prog = NULL;
 	bpf_op_t bpf_op, bpf_chk;
 	int err;
 
 	ASSERT_RTNL();
 
+	query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+
 	bpf_op = bpf_chk = ops->ndo_bpf;
 	if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
 		return -EOPNOTSUPP;
@@ -7668,10 +7676,11 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		bpf_chk = generic_xdp_install;
 
 	if (fd >= 0) {
-		if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
+		if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) ||
+		    __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW))
 			return -EEXIST;
 		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-		    __dev_xdp_attached(dev, bpf_op))
+		    __dev_xdp_query(dev, bpf_op, query))
 			return -EBUSY;
 
 		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02ebc056a688..c9929ef17539 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -964,7 +964,7 @@ static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
 			  nla_total_size(1) +	/* XDP_ATTACHED */
-			  nla_total_size(4) +	/* XDP_PROG_ID */
+			  nla_total_size(4) +	/* XDP_PROG_ID (or 1st mode) */
 			  nla_total_size(4);	/* XDP_<mode>_PROG_ID */
 
 	return xdp_size;
@@ -1354,37 +1354,57 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
+static u32 rtnl_xdp_prog_skb(struct net_device *dev)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
 	const struct bpf_prog *generic_xdp_prog;
-	struct netdev_bpf xdp;
 
 	ASSERT_RTNL();
 
-	*prog_id = 0;
 	generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
-	if (generic_xdp_prog) {
-		*prog_id = generic_xdp_prog->aux->id;
-		return XDP_ATTACHED_SKB;
-	}
-	if (!ops->ndo_bpf)
-		return XDP_ATTACHED_NONE;
+	if (!generic_xdp_prog)
+		return 0;
+	return generic_xdp_prog->aux->id;
+}
+
+static u32 rtnl_xdp_prog_drv(struct net_device *dev)
+{
+	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+}
+
+static u32 rtnl_xdp_prog_hw(struct net_device *dev)
+{
+	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
+			       XDP_QUERY_PROG_HW);
+}
+
+static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
+			       u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
+			       u32 (*get_prog_id)(struct net_device *dev))
+{
+	u32 curr_id;
+	int err;
+
+	curr_id = get_prog_id(dev);
+	if (!curr_id)
+		return 0;
+
+	*prog_id = curr_id;
+	err = nla_put_u32(skb, attr, curr_id);
+	if (err)
+		return err;
 
-	__dev_xdp_query(dev, ops->ndo_bpf, &xdp);
-	if (!xdp.prog_id)
-		return XDP_ATTACHED_NONE;
+	if (*mode != XDP_ATTACHED_NONE)
+		*mode = XDP_ATTACHED_MULTI;
+	else
+		*mode = tgt_mode;
 
-	*prog_id = xdp.prog_id;
-	if (xdp.prog_flags & XDP_FLAGS_HW_MODE)
-		return XDP_ATTACHED_HW;
-	return XDP_ATTACHED_DRV;
+	return 0;
 }
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 {
-	u32 prog_attr, prog_id;
 	struct nlattr *xdp;
+	u32 prog_id;
 	int err;
 	u8 mode;
 
@@ -1392,35 +1412,26 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 	if (!xdp)
 		return -EMSGSIZE;
 
-	mode = rtnl_xdp_attached_mode(dev, &prog_id);
+	prog_id = 0;
+	mode = XDP_ATTACHED_NONE;
+	if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
+				IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb))
+		goto err_cancel;
+	if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
+				IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv))
+		goto err_cancel;
+	if (rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
+				IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw))
+		goto err_cancel;
+
 	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
 	if (err)
 		goto err_cancel;
 
-	if (prog_id) {
+	if (prog_id && mode != XDP_ATTACHED_MULTI) {
 		err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
 		if (err)
 			goto err_cancel;
-
-		switch (mode) {
-		case XDP_ATTACHED_DRV:
-			prog_attr = IFLA_XDP_DRV_PROG_ID;
-			break;
-		case XDP_ATTACHED_SKB:
-			prog_attr = IFLA_XDP_SKB_PROG_ID;
-			break;
-		case XDP_ATTACHED_HW:
-			prog_attr = IFLA_XDP_HW_PROG_ID;
-			break;
-		case XDP_ATTACHED_NONE:
-		default:
-			err = -EINVAL;
-			goto err_cancel;
-		}
-
-		err = nla_put_u32(skb, prog_attr, prog_id);
-		if (err)
-			goto err_cancel;
 	}
 
 	nla_nest_end(skb, xdp);
-- 
cgit v1.2.3


From 8dc9fbbf274b7b2a647e06141aee70ffabf6dbc0 Mon Sep 17 00:00:00 2001
From: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Date: Fri, 13 Jul 2018 15:21:13 +0530
Subject: drm/scheduler: add a pointer to scheduler in the rq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch is in preparation for a better load balancing in
scheduler. It allows us to associate entities with the
run queues instead of binding them to a scheduler.

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 ++++--
 include/drm/gpu_scheduler.h               | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 7d2560699b84..429b1328653a 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -69,11 +69,13 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
  *
  * Initializes a scheduler runqueue.
  */
-static void drm_sched_rq_init(struct drm_sched_rq *rq)
+static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
+			      struct drm_sched_rq *rq)
 {
 	spin_lock_init(&rq->lock);
 	INIT_LIST_HEAD(&rq->entities);
 	rq->current_entity = NULL;
+	rq->sched = sched;
 }
 
 /**
@@ -926,7 +928,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	sched->timeout = timeout;
 	sched->hang_limit = hang_limit;
 	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_MAX; i++)
-		drm_sched_rq_init(&sched->sched_rq[i]);
+		drm_sched_rq_init(sched, &sched->sched_rq[i]);
 
 	init_waitqueue_head(&sched->wake_up_worker);
 	init_waitqueue_head(&sched->job_scheduled);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 4214ceb71c05..43e93d6077cf 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -93,6 +93,7 @@ struct drm_sched_entity {
  * struct drm_sched_rq - queue of entities to be scheduled.
  *
  * @lock: to modify the entities list.
+ * @sched: the scheduler to which this rq belongs to.
  * @entities: list of the entities to be scheduled.
  * @current_entity: the entity which is to be scheduled.
  *
@@ -102,6 +103,7 @@ struct drm_sched_entity {
  */
 struct drm_sched_rq {
 	spinlock_t			lock;
+	struct drm_gpu_scheduler	*sched;
 	struct list_head		entities;
 	struct drm_sched_entity		*current_entity;
 };
-- 
cgit v1.2.3


From aa16b6c6b4d979234f830a48add47d02c12bb569 Mon Sep 17 00:00:00 2001
From: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Date: Fri, 13 Jul 2018 15:21:14 +0530
Subject: drm/scheduler: modify args of drm_sched_entity_init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

replace run queue by a list of run queues and remove the
sched arg as that is part of run queue itself

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c   |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  3 +--
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c     |  4 ++--
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c     |  4 ++--
 drivers/gpu/drm/etnaviv/etnaviv_drv.c     |  8 ++++----
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 20 ++++++++++++--------
 drivers/gpu/drm/v3d/v3d_drv.c             |  7 +++----
 include/drm/gpu_scheduler.h               |  6 +++---
 11 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0120b24fae1b..83e3b320a793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -90,8 +90,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 		if (ring == &adev->gfx.kiq.ring)
 			continue;
 
-		r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
-					  rq, &ctx->guilty);
+		r = drm_sched_entity_init(&ctx->rings[i].entity,
+					  &rq, 1, &ctx->guilty);
 		if (r)
 			goto failed;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 6a3fead5c1f0..11a12483c995 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1918,8 +1918,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 
 		ring = adev->mman.buffer_funcs_ring;
 		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
-		r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
-					  rq, NULL);
+		r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
 		if (r) {
 			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
 				  r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 3e70eb61a960..a6c2cace4b9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -266,8 +266,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 
 		ring = &adev->uvd.inst[j].ring;
 		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
-					  rq, NULL);
+		r = drm_sched_entity_init(&adev->uvd.inst[j].entity, &rq,
+					  1, NULL);
 		if (r != 0) {
 			DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 6ae1ad7e83b3..ffb0fcc9707e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -190,8 +190,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 
 	ring = &adev->vce.ring[0];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
-				  rq, NULL);
+	r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCE run queue.\n");
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0fd0a718763b..484e2c19c027 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2564,8 +2564,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	ring_instance %= adev->vm_manager.vm_pte_num_rings;
 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
-	r = drm_sched_entity_init(&ring->sched, &vm->entity,
-				  rq, NULL);
+	r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 2623f249cb7a..1c118c02e8cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -430,8 +430,8 @@ static int uvd_v6_0_sw_init(void *handle)
 		struct drm_sched_rq *rq;
 		ring = &adev->uvd.inst->ring_enc[0];
 		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc,
-					  rq, NULL);
+		r = drm_sched_entity_init(&adev->uvd.inst->entity_enc,
+					  &rq, 1, NULL);
 		if (r) {
 			DRM_ERROR("Failed setting up UVD ENC run queue.\n");
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index ce360ad16856..d48bc3393545 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -432,8 +432,8 @@ static int uvd_v7_0_sw_init(void *handle)
 	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
 		ring = &adev->uvd.inst[j].ring_enc[0];
 		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc,
-					  rq, NULL);
+		r = drm_sched_entity_init(&adev->uvd.inst[j].entity_enc,
+					  &rq, 1, NULL);
 		if (r) {
 			DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j);
 			return r;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 45bfdf4cc107..36414ba56b22 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -49,12 +49,12 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 
 	for (i = 0; i < ETNA_MAX_PIPES; i++) {
 		struct etnaviv_gpu *gpu = priv->gpu[i];
+		struct drm_sched_rq *rq;
 
 		if (gpu) {
-			drm_sched_entity_init(&gpu->sched,
-				&ctx->sched_entity[i],
-				&gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
-				NULL);
+			rq = &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+			drm_sched_entity_init(&ctx->sched_entity[i],
+					      &rq, 1, NULL);
 			}
 	}
 
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 429b1328653a..16bf446aa6b3 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -162,26 +162,30 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq)
  * drm_sched_entity_init - Init a context entity used by scheduler when
  * submit to HW ring.
  *
- * @sched: scheduler instance
  * @entity: scheduler entity to init
- * @rq: the run queue this entity belongs
+ * @rq_list: the list of run queue on which jobs from this
+ *           entity can be submitted
+ * @num_rq_list: number of run queue in rq_list
  * @guilty: atomic_t set to 1 when a job on this queue
  *          is found to be guilty causing a timeout
  *
+ * Note: the rq_list should have atleast one element to schedule
+ *       the entity
+ *
  * Returns 0 on success or a negative error code on failure.
 */
-int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
-			  struct drm_sched_entity *entity,
-			  struct drm_sched_rq *rq,
+int drm_sched_entity_init(struct drm_sched_entity *entity,
+			  struct drm_sched_rq **rq_list,
+			  unsigned int num_rq_list,
 			  atomic_t *guilty)
 {
-	if (!(sched && entity && rq))
+	if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
 		return -EINVAL;
 
 	memset(entity, 0, sizeof(struct drm_sched_entity));
 	INIT_LIST_HEAD(&entity->list);
-	entity->rq = rq;
-	entity->sched = sched;
+	entity->rq = rq_list[0];
+	entity->sched = rq_list[0]->sched;
 	entity->guilty = guilty;
 	entity->last_scheduled = NULL;
 
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 567f7d46d912..1dceba2b42fd 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -123,6 +123,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct v3d_dev *v3d = to_v3d_dev(dev);
 	struct v3d_file_priv *v3d_priv;
+	struct drm_sched_rq *rq;
 	int i;
 
 	v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
@@ -132,10 +133,8 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 	v3d_priv->v3d = v3d;
 
 	for (i = 0; i < V3D_MAX_QUEUES; i++) {
-		drm_sched_entity_init(&v3d->queue[i].sched,
-				      &v3d_priv->sched_entity[i],
-				      &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
-				      NULL);
+		rq = &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+		drm_sched_entity_init(&v3d_priv->sched_entity[i], &rq, 1, NULL);
 	}
 
 	file->driver_priv = v3d_priv;
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 43e93d6077cf..2205e89722f6 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -282,9 +282,9 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   const char *name);
 void drm_sched_fini(struct drm_gpu_scheduler *sched);
 
-int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
-			  struct drm_sched_entity *entity,
-			  struct drm_sched_rq *rq,
+int drm_sched_entity_init(struct drm_sched_entity *entity,
+			  struct drm_sched_rq **rq_list,
+			  unsigned int num_rq_list,
 			  atomic_t *guilty);
 long drm_sched_entity_flush(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity, long timeout);
-- 
cgit v1.2.3


From c921c2077b32081617789a645120148bc8b60c98 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 13 Jul 2018 12:16:43 +0300
Subject: net: ipmr: add support for passing full packet on wrong vif

This patch adds support for IGMPMSG_WRVIFWHOLE which is used to pass
full packet and real vif id when the incoming interface is wrong.
While the RP and FHR are setting up state we need to be sending the
registers encapsulated with all the data inside otherwise we lose it.
The RP then decapsulates it and forwards it to the interested parties.
Currently with WRONGVIF we can only be sending empty register packets
and will lose that data.
This behaviour can be enabled by using MRT_PIM with
val == IGMPMSG_WRVIFWHOLE. This doesn't prevent IGMPMSG_WRONGVIF from
happening, it happens in addition to it, also it is controlled by the same
throttling parameters as WRONGVIF (i.e. 1 packet per 3 seconds currently).
Both messages are generated to keep backwards compatibily and avoid
breaking someone who was enabling MRT_PIM with val == 4, since any
positive val is accepted and treated the same.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h |  1 +
 include/uapi/linux/mroute.h |  2 ++
 net/ipv4/ipmr.c             | 21 ++++++++++++++++-----
 3 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index fd436cdd4725..6675b9f81979 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -254,6 +254,7 @@ struct mr_table {
 	atomic_t		cache_resolve_queue_len;
 	bool			mroute_do_assert;
 	bool			mroute_do_pim;
+	bool			mroute_do_wrvifwhole;
 	int			mroute_reg_vif_num;
 };
 
diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 10f9ff9426a2..5d37a9ccce63 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -120,6 +120,7 @@ enum {
 	IPMRA_TABLE_MROUTE_DO_ASSERT,
 	IPMRA_TABLE_MROUTE_DO_PIM,
 	IPMRA_TABLE_VIFS,
+	IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
 	__IPMRA_TABLE_MAX
 };
 #define IPMRA_TABLE_MAX (__IPMRA_TABLE_MAX - 1)
@@ -173,5 +174,6 @@ enum {
 #define IGMPMSG_NOCACHE		1		/* Kern cache fill request to mrouted */
 #define IGMPMSG_WRONGVIF	2		/* For PIM assert processing (unused) */
 #define IGMPMSG_WHOLEPKT	3		/* For PIM Register processing */
+#define IGMPMSG_WRVIFWHOLE	4		/* For PIM Register and assert processing */
 
 #endif /* _UAPI__LINUX_MROUTE_H */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 82f914122f1b..5660adcf7a04 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1052,7 +1052,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	struct sk_buff *skb;
 	int ret;
 
-	if (assert == IGMPMSG_WHOLEPKT)
+	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 	else
 		skb = alloc_skb(128, GFP_ATOMIC);
@@ -1060,7 +1060,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	if (!skb)
 		return -ENOBUFS;
 
-	if (assert == IGMPMSG_WHOLEPKT) {
+	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
 		/* Ugly, but we have no choice with this interface.
 		 * Duplicate old header, fix ihl, length etc.
 		 * And all this only to mangle msg->im_msgtype and
@@ -1071,9 +1071,12 @@ static int ipmr_cache_report(struct mr_table *mrt,
 		skb_reset_transport_header(skb);
 		msg = (struct igmpmsg *)skb_network_header(skb);
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
-		msg->im_msgtype = IGMPMSG_WHOLEPKT;
+		msg->im_msgtype = assert;
 		msg->im_mbz = 0;
-		msg->im_vif = mrt->mroute_reg_vif_num;
+		if (assert == IGMPMSG_WRVIFWHOLE)
+			msg->im_vif = vifi;
+		else
+			msg->im_vif = mrt->mroute_reg_vif_num;
 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 					     sizeof(struct iphdr));
@@ -1372,6 +1375,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	struct mr_table *mrt;
 	struct vifctl vif;
 	struct mfcctl mfc;
+	bool do_wrvifwhole;
 	u32 uval;
 
 	/* There's one exception to the lock - MRT_DONE which needs to unlock */
@@ -1502,10 +1506,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 			break;
 		}
 
+		do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
 		val = !!val;
 		if (val != mrt->mroute_do_pim) {
 			mrt->mroute_do_pim = val;
 			mrt->mroute_do_assert = val;
+			mrt->mroute_do_wrvifwhole = do_wrvifwhole;
 		}
 		break;
 	case MRT_TABLE:
@@ -1983,6 +1989,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			       MFC_ASSERT_THRESH)) {
 			c->_c.mfc_un.res.last_assert = jiffies;
 			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
+			if (mrt->mroute_do_wrvifwhole)
+				ipmr_cache_report(mrt, skb, true_vifi,
+						  IGMPMSG_WRVIFWHOLE);
 		}
 		goto dont_forward;
 	}
@@ -2659,7 +2668,9 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
 			mrt->mroute_reg_vif_num) ||
 	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
 		       mrt->mroute_do_assert) ||
-	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
+	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
+	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
+		       mrt->mroute_do_wrvifwhole))
 		return false;
 
 	return true;
-- 
cgit v1.2.3


From 01683a1469995cc7aaf833d6f8b3f1c1d2fc3b92 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 9 Jul 2018 13:29:11 +0300
Subject: net: sched: refactor flower walk to iterate over idr

Extend struct tcf_walker with additional 'cookie' field. It is intended to
be used by classifier walk implementations to continue iteration directly
from particular filter, instead of iterating 'skip' number of times.

Change flower walk implementation to save filter handle in 'cookie'. Each
time flower walk is called, it looks up filter with saved handle directly
with idr, instead of iterating over filter linked list 'skip' number of
times. This change improves complexity of dumping flower classifier from
quadratic to linearithmic. (assuming idr lookup has logarithmic complexity)

Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reported-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h  |  1 +
 net/sched/cls_api.c    |  2 ++
 net/sched/cls_flower.c | 20 +++++++++-----------
 3 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 2081e4219f81..e4252a176eec 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -13,6 +13,7 @@ struct tcf_walker {
 	int	stop;
 	int	skip;
 	int	count;
+	unsigned long cookie;
 	int	(*fn)(struct tcf_proto *, void *node, struct tcf_walker *);
 };
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 73d9967c3739..c51b1b12450d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1508,7 +1508,9 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 		arg.w.stop = 0;
 		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
+		arg.w.cookie = cb->args[2];
 		tp->ops->walk(tp, &arg.w);
+		cb->args[2] = arg.w.cookie;
 		cb->args[1] = arg.w.count + 1;
 		if (arg.w.stop)
 			return false;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 8b2474293db1..c53fdd411f90 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1099,19 +1099,17 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *f;
-	struct fl_flow_mask *mask;
 
-	list_for_each_entry_rcu(mask, &head->masks, list) {
-		list_for_each_entry_rcu(f, &mask->filters, list) {
-			if (arg->count < arg->skip)
-				goto skip;
-			if (arg->fn(tp, f, arg) < 0) {
-				arg->stop = 1;
-				break;
-			}
-skip:
-			arg->count++;
+	arg->count = arg->skip;
+
+	while ((f = idr_get_next_ul(&head->handle_idr,
+				    &arg->cookie)) != NULL) {
+		if (arg->fn(tp, f, arg) < 0) {
+			arg->stop = 1;
+			break;
 		}
+		arg->cookie = f->handle + 1;
+		arg->count++;
 	}
 }
 
-- 
cgit v1.2.3


From c3086637b0d7dbee0925697f8dbee2bcf9637b9f Mon Sep 17 00:00:00 2001
From: Michael Heimpold <mhei@heimpold.de>
Date: Wed, 11 Jul 2018 23:10:55 +0200
Subject: net: ethtool: fix spelling mistake: "tubale" -> "tunable"

Signed-off-by: Michael Heimpold <mhei@heimpold.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 4ca65b56084f..7363f18e65a5 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -226,7 +226,7 @@ enum tunable_id {
 	ETHTOOL_TX_COPYBREAK,
 	ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */
 	/*
-	 * Add your fresh new tubale attribute above and remember to update
+	 * Add your fresh new tunable attribute above and remember to update
 	 * tunable_strings[] in net/core/ethtool.c
 	 */
 	__ETHTOOL_TUNABLE_COUNT,
-- 
cgit v1.2.3


From a69258f7aa2623e0930212f09c586fd06674ad79 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 12 Jul 2018 06:04:53 -0700
Subject: tcp: remove DELAYED ACK events in DCTCP

After fixing the way DCTCP tracking delayed ACKs, the delayed-ACK
related callbacks are no longer needed

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 --
 net/ipv4/tcp_dctcp.c  | 25 -------------------------
 net/ipv4/tcp_output.c |  4 ----
 3 files changed, 31 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index af3ec72d5d41..3482d13d655b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -912,8 +912,6 @@ enum tcp_ca_event {
 	CA_EVENT_LOSS,		/* loss timeout */
 	CA_EVENT_ECN_NO_CE,	/* ECT set, but not CE marked */
 	CA_EVENT_ECN_IS_CE,	/* received CE marked IP packet */
-	CA_EVENT_DELAYED_ACK,	/* Delayed ack is sent */
-	CA_EVENT_NON_DELAYED_ACK,
 };
 
 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 89f88b0d8167..5869f89ca656 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -55,7 +55,6 @@ struct dctcp {
 	u32 dctcp_alpha;
 	u32 next_seq;
 	u32 ce_state;
-	u32 delayed_ack_reserved;
 	u32 loss_cwnd;
 };
 
@@ -96,7 +95,6 @@ static void dctcp_init(struct sock *sk)
 
 		ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
 
-		ca->delayed_ack_reserved = 0;
 		ca->loss_cwnd = 0;
 		ca->ce_state = 0;
 
@@ -250,25 +248,6 @@ static void dctcp_state(struct sock *sk, u8 new_state)
 	}
 }
 
-static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
-{
-	struct dctcp *ca = inet_csk_ca(sk);
-
-	switch (ev) {
-	case CA_EVENT_DELAYED_ACK:
-		if (!ca->delayed_ack_reserved)
-			ca->delayed_ack_reserved = 1;
-		break;
-	case CA_EVENT_NON_DELAYED_ACK:
-		if (ca->delayed_ack_reserved)
-			ca->delayed_ack_reserved = 0;
-		break;
-	default:
-		/* Don't care for the rest. */
-		break;
-	}
-}
-
 static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 {
 	switch (ev) {
@@ -278,10 +257,6 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 	case CA_EVENT_ECN_NO_CE:
 		dctcp_ce_state_1_to_0(sk);
 		break;
-	case CA_EVENT_DELAYED_ACK:
-	case CA_EVENT_NON_DELAYED_ACK:
-		dctcp_update_ack_reserved(sk, ev);
-		break;
 	default:
 		/* Don't care for the rest. */
 		break;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8e08b409c71e..00e5a300ddb9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3523,8 +3523,6 @@ void tcp_send_delayed_ack(struct sock *sk)
 	int ato = icsk->icsk_ack.ato;
 	unsigned long timeout;
 
-	tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
-
 	if (ato > TCP_DELACK_MIN) {
 		const struct tcp_sock *tp = tcp_sk(sk);
 		int max_ato = HZ / 2;
@@ -3581,8 +3579,6 @@ void tcp_send_ack(struct sock *sk)
 	if (sk->sk_state == TCP_CLOSE)
 		return;
 
-	tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
-
 	/* We are not putting this on the write queue, so
 	 * tcp_transmit_skb() will set the ownership to this
 	 * sock.
-- 
cgit v1.2.3


From a90744bac57c3c07d0d4422af62f3e44549ade30 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 13 Jul 2018 16:59:03 -0700
Subject: mm: allow arch to supply p??_free_tlb functions

The mmu_gather APIs keep track of the invalidated address range
including the span covered by invalidated page table pages.  Ranges
covered by page tables but not ptes (and therefore no TLBs) still need
to be invalidated because some architectures (x86) can cache
intermediate page table entries, and invalidate those with normal TLB
invalidation instructions to be almost-backward-compatible.

Architectures which don't cache intermediate page table entries, or
which invalidate these caches separately from TLB invalidation, do not
require TLB invalidation range expanded over page tables.

Allow architectures to supply their own p??_free_tlb functions, which
can avoid the __tlb_adjust_range.

Link: http://lkml.kernel.org/r/20180703013131.2807-1-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Aneesh Kumar K. V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/tlb.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index faddde44de8c..3063125197ad 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -265,33 +265,41 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
  * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
  */
 
+#ifndef pte_free_tlb
 #define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
 		__pte_free_tlb(tlb, ptep, address);		\
 	} while (0)
+#endif
 
+#ifndef pmd_free_tlb
 #define pmd_free_tlb(tlb, pmdp, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
 		__pmd_free_tlb(tlb, pmdp, address);		\
 	} while (0)
+#endif
 
 #ifndef __ARCH_HAS_4LEVEL_HACK
+#ifndef pud_free_tlb
 #define pud_free_tlb(tlb, pudp, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
 		__pud_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
+#endif
 
 #ifndef __ARCH_HAS_5LEVEL_HACK
+#ifndef p4d_free_tlb
 #define p4d_free_tlb(tlb, pudp, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
 		__p4d_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
+#endif
 
 #define tlb_migrate_finish(mm) do {} while (0)
 
-- 
cgit v1.2.3


From f333ee0cdb27ba201e6cc0c99c76b1364aa29b86 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 11 Jul 2018 17:33:32 -0700
Subject: bpf: Add BPF_SOCK_OPS_TCP_LISTEN_CB

Add new TCP-BPF callback that is called on listen(2) right after socket
transition to TCP_LISTEN state.

It fills the gap for listening sockets in TCP-BPF. For example BPF
program can set BPF_SOCK_OPS_STATE_CB_FLAG when socket becomes listening
and track later transition from TCP_LISTEN to TCP_CLOSE with
BPF_SOCK_OPS_STATE_CB callback.

Before there was no way to do it with TCP-BPF and other options were
much harder to work with. E.g. socket state tracking can be done with
tracepoints (either raw or regular) but they can't be attached to cgroup
and their lifetime has to be managed separately.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 3 +++
 net/ipv4/af_inet.c       | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6bcb287a888d..870113916cac 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2555,6 +2555,9 @@ enum {
 					 * Arg1: old_state
 					 * Arg2: new_state
 					 */
+	BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
+					 * socket transition to LISTEN state.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c716be13d58c..f2a0a3bab6b5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -229,6 +229,7 @@ int inet_listen(struct socket *sock, int backlog)
 		err = inet_csk_listen_start(sk, backlog);
 		if (err)
 			goto out;
+		tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
 	}
 	sk->sk_max_ack_backlog = backlog;
 	err = 0;
-- 
cgit v1.2.3


From a7fe68a1e8e4bce007505f729bc33e427c540386 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Sat, 14 Jul 2018 19:05:58 -0400
Subject: drm/amd: Add CU-masking ioctl definition to kfd_ioctl.h

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 88d17c39dbf9..01674b56e14f 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -76,6 +76,12 @@ struct kfd_ioctl_update_queue_args {
 	__u32 queue_priority;	/* to KFD */
 };
 
+struct kfd_ioctl_set_cu_mask_args {
+	__u32 queue_id;		/* to KFD */
+	__u32 num_cu_mask;		/* to KFD */
+	__u64 cu_mask_ptr;		/* to KFD */
+};
+
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
 #define KFD_IOC_CACHE_POLICY_COHERENT 0
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
@@ -466,7 +472,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
 #define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU	\
 		AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
 
+#define AMDKFD_IOC_SET_CU_MASK		\
+		AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x1A
+#define AMDKFD_COMMAND_END		0x1B
 
 #endif
-- 
cgit v1.2.3


From 6c26fcd2abfe0a56bbd95271fce02df2896cfd24 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sat, 14 Jul 2018 21:56:13 +0200
Subject: x86/speculation/l1tf: Unbreak !__HAVE_ARCH_PFN_MODIFY_ALLOWED
 architectures

pfn_modify_allowed() and arch_has_pfn_modify_check() are outside of the
!__ASSEMBLY__ section in include/asm-generic/pgtable.h, which confuses
assembler on archs that don't have __HAVE_ARCH_PFN_MODIFY_ALLOWED (e.g.
ia64) and breaks build:

    include/asm-generic/pgtable.h: Assembler messages:
    include/asm-generic/pgtable.h:538: Error: Unknown opcode `static inline bool pfn_modify_allowed(unsigned long pfn,pgprot_t prot)'
    include/asm-generic/pgtable.h:540: Error: Unknown opcode `return true'
    include/asm-generic/pgtable.h:543: Error: Unknown opcode `static inline bool arch_has_pfn_modify_check(void)'
    include/asm-generic/pgtable.h:545: Error: Unknown opcode `return false'
    arch/ia64/kernel/entry.S:69: Error: `mov' does not fit into bundle

Move those two static inlines into the !__ASSEMBLY__ section so that they
don't confuse the asm build pass.

Fixes: 42e4089c7890 ("x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings")
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-generic/pgtable.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 0ecc1197084b..26ca0276b503 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1083,6 +1083,18 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 static inline void init_espfix_bsp(void) { }
 #endif
 
+#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
+static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+	return true;
+}
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+	return false;
+}
+#endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
@@ -1097,16 +1109,4 @@ static inline void init_espfix_bsp(void) { }
 #endif
 #endif
 
-#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
-static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
-{
-	return true;
-}
-
-static inline bool arch_has_pfn_modify_check(void)
-{
-	return false;
-}
-#endif
-
 #endif /* _ASM_GENERIC_PGTABLE_H */
-- 
cgit v1.2.3


From 25b4a2b909dc9fdd141ee4239cf78d1617a9d91d Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Wed, 11 Jul 2018 13:40:13 -0600
Subject: coresight: Remove function coresight_vpid_to_pid()

Now that we prevent users from using contextID tracing when PID namespaces
are involved there is no client for function coresight_vpid_to_pid().  As
such simply remove it.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Kim Phillips <kim.phillips@arm.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/coresight.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index c265e0468414..e5421b83e4e6 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -267,24 +267,4 @@ static inline struct coresight_platform_data *of_get_coresight_platform_data(
 	struct device *dev, const struct device_node *node) { return NULL; }
 #endif
 
-#ifdef CONFIG_PID_NS
-static inline unsigned long
-coresight_vpid_to_pid(unsigned long vpid)
-{
-	struct task_struct *task = NULL;
-	unsigned long pid = 0;
-
-	rcu_read_lock();
-	task = find_task_by_vpid(vpid);
-	if (task)
-		pid = task_pid_nr(task);
-	rcu_read_unlock();
-
-	return pid;
-}
-#else
-static inline unsigned long
-coresight_vpid_to_pid(unsigned long vpid) { return vpid; }
-#endif
-
 #endif
-- 
cgit v1.2.3


From 575694978247b7cc1588ec233bde3fbf66db9e50 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 11 Jul 2018 13:40:27 -0600
Subject: coresight: Cleanup platform description data

Nobody uses the "clk" field in struct coresight_platform_data.
Remove it.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/coresight.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index e5421b83e4e6..69a5c9f82469 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -87,7 +87,6 @@ struct coresight_dev_subtype {
  * @child_ports:child component port number the current component is
 		connected  to.
  * @nr_outport:	number of output ports for this component.
- * @clk:	The clock this component is associated to.
  */
 struct coresight_platform_data {
 	int cpu;
@@ -97,7 +96,6 @@ struct coresight_platform_data {
 	const char **child_names;
 	int *child_ports;
 	int nr_outport;
-	struct clk *clk;
 };
 
 /**
-- 
cgit v1.2.3


From 00b78e8b7b003c686a8570df7d231787f514226c Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 11 Jul 2018 13:40:29 -0600
Subject: coresight: Cleanup device subtype struct

Clean up our struct a little bit by using a union instead of
a struct for tracking the subtype of a device.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/coresight.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 69a5c9f82469..7c188c2a5b3c 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -63,17 +63,20 @@ enum coresight_dev_subtype_source {
 };
 
 /**
- * struct coresight_dev_subtype - further characterisation of a type
+ * union coresight_dev_subtype - further characterisation of a type
  * @sink_subtype:	type of sink this component is, as defined
-			by @coresight_dev_subtype_sink.
+ *			by @coresight_dev_subtype_sink.
  * @link_subtype:	type of link this component is, as defined
-			by @coresight_dev_subtype_link.
+ *			by @coresight_dev_subtype_link.
  * @source_subtype:	type of source this component is, as defined
-			by @coresight_dev_subtype_source.
+ *			by @coresight_dev_subtype_source.
  */
-struct coresight_dev_subtype {
-	enum coresight_dev_subtype_sink sink_subtype;
-	enum coresight_dev_subtype_link link_subtype;
+union coresight_dev_subtype {
+	/* We have some devices which acts as LINK and SINK */
+	struct {
+		enum coresight_dev_subtype_sink sink_subtype;
+		enum coresight_dev_subtype_link link_subtype;
+	};
 	enum coresight_dev_subtype_source source_subtype;
 };
 
@@ -111,7 +114,7 @@ struct coresight_platform_data {
  */
 struct coresight_desc {
 	enum coresight_dev_type type;
-	struct coresight_dev_subtype subtype;
+	union coresight_dev_subtype subtype;
 	const struct coresight_ops *ops;
 	struct coresight_platform_data *pdata;
 	struct device *dev;
@@ -155,7 +158,7 @@ struct coresight_device {
 	int nr_inport;
 	int nr_outport;
 	enum coresight_dev_type type;
-	struct coresight_dev_subtype subtype;
+	union coresight_dev_subtype subtype;
 	const struct coresight_ops *ops;
 	struct device dev;
 	atomic_t *refcnt;
-- 
cgit v1.2.3


From 8a091d847cecd1a3656172cb79b53a19e90b8c4f Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 11 Jul 2018 13:40:30 -0600
Subject: coresight: Add helper device type

Add a new coresight device type, which do not belong to any
of the existing types, i.e, source, sink, link etc. A helper
device could be connected to a coresight device, which could
augment the functionality of the coresight device.

This is intended to cover Coresight Address Translation Unit (CATU)
devices, which provide improved Scatter Gather mechanism for TMC
ETR. The idea is that the helper device could be controlled by
the driver of the device it is attached to (in this case ETR),
transparent to the generic coresight driver (and paths).

The operations include enable(), disable(), both of which could
accept a device specific "data" which the driving device and
the helper device could share. Since they don't appear in the
coresight "path" tracked by software, we have to ensure that
they are powered up/down whenever the master device is turned
on.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight.c | 43 +++++++++++++++++++++++++++++++--
 include/linux/coresight.h               | 24 ++++++++++++++++++
 2 files changed, 65 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 7f2b478a048f..3e07fd335f8c 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -425,6 +425,42 @@ struct coresight_device *coresight_get_enabled_sink(bool deactivate)
 	return dev ? to_coresight_device(dev) : NULL;
 }
 
+/*
+ * coresight_grab_device - Power up this device and any of the helper
+ * devices connected to it for trace operation. Since the helper devices
+ * don't appear on the trace path, they should be handled along with the
+ * the master device.
+ */
+static void coresight_grab_device(struct coresight_device *csdev)
+{
+	int i;
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		struct coresight_device *child = csdev->conns[i].child_dev;
+
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+			pm_runtime_get_sync(child->dev.parent);
+	}
+	pm_runtime_get_sync(csdev->dev.parent);
+}
+
+/*
+ * coresight_drop_device - Release this device and any of the helper
+ * devices connected to it.
+ */
+static void coresight_drop_device(struct coresight_device *csdev)
+{
+	int i;
+
+	pm_runtime_put(csdev->dev.parent);
+	for (i = 0; i < csdev->nr_outport; i++) {
+		struct coresight_device *child = csdev->conns[i].child_dev;
+
+		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+			pm_runtime_put(child->dev.parent);
+	}
+}
+
 /**
  * _coresight_build_path - recursively build a path from a @csdev to a sink.
  * @csdev:	The device to start from.
@@ -473,9 +509,9 @@ out:
 	if (!node)
 		return -ENOMEM;
 
+	coresight_grab_device(csdev);
 	node->csdev = csdev;
 	list_add(&node->link, path);
-	pm_runtime_get_sync(csdev->dev.parent);
 
 	return 0;
 }
@@ -519,7 +555,7 @@ void coresight_release_path(struct list_head *path)
 	list_for_each_entry_safe(nd, next, path, link) {
 		csdev = nd->csdev;
 
-		pm_runtime_put_sync(csdev->dev.parent);
+		coresight_drop_device(csdev);
 		list_del(&nd->link);
 		kfree(nd);
 	}
@@ -770,6 +806,9 @@ static struct device_type coresight_dev_type[] = {
 		.name = "source",
 		.groups = coresight_source_groups,
 	},
+	{
+		.name = "helper",
+	},
 };
 
 static void coresight_device_release(struct device *dev)
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 7c188c2a5b3c..3d40a2b135f1 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -40,6 +40,7 @@ enum coresight_dev_type {
 	CORESIGHT_DEV_TYPE_LINK,
 	CORESIGHT_DEV_TYPE_LINKSINK,
 	CORESIGHT_DEV_TYPE_SOURCE,
+	CORESIGHT_DEV_TYPE_HELPER,
 };
 
 enum coresight_dev_subtype_sink {
@@ -62,6 +63,10 @@ enum coresight_dev_subtype_source {
 	CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE,
 };
 
+enum coresight_dev_subtype_helper {
+	CORESIGHT_DEV_SUBTYPE_HELPER_NONE,
+};
+
 /**
  * union coresight_dev_subtype - further characterisation of a type
  * @sink_subtype:	type of sink this component is, as defined
@@ -70,6 +75,8 @@ enum coresight_dev_subtype_source {
  *			by @coresight_dev_subtype_link.
  * @source_subtype:	type of source this component is, as defined
  *			by @coresight_dev_subtype_source.
+ * @helper_subtype:	type of helper this component is, as defined
+ *			by @coresight_dev_subtype_helper.
  */
 union coresight_dev_subtype {
 	/* We have some devices which acts as LINK and SINK */
@@ -78,6 +85,7 @@ union coresight_dev_subtype {
 		enum coresight_dev_subtype_link link_subtype;
 	};
 	enum coresight_dev_subtype_source source_subtype;
+	enum coresight_dev_subtype_helper helper_subtype;
 };
 
 /**
@@ -172,6 +180,7 @@ struct coresight_device {
 #define source_ops(csdev)	csdev->ops->source_ops
 #define sink_ops(csdev)		csdev->ops->sink_ops
 #define link_ops(csdev)		csdev->ops->link_ops
+#define helper_ops(csdev)	csdev->ops->helper_ops
 
 /**
  * struct coresight_ops_sink - basic operations for a sink
@@ -231,10 +240,25 @@ struct coresight_ops_source {
 			struct perf_event *event);
 };
 
+/**
+ * struct coresight_ops_helper - Operations for a helper device.
+ *
+ * All operations could pass in a device specific data, which could
+ * help the helper device to determine what to do.
+ *
+ * @enable	: Enable the device
+ * @disable	: Disable the device
+ */
+struct coresight_ops_helper {
+	int (*enable)(struct coresight_device *csdev, void *data);
+	int (*disable)(struct coresight_device *csdev, void *data);
+};
+
 struct coresight_ops {
 	const struct coresight_ops_sink *sink_ops;
 	const struct coresight_ops_link *link_ops;
 	const struct coresight_ops_source *source_ops;
+	const struct coresight_ops_helper *helper_ops;
 };
 
 #ifdef CONFIG_CORESIGHT
-- 
cgit v1.2.3


From fcacb5c154baaeaee3d89b2a2b7cf6e4ce43f5f5 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Wed, 11 Jul 2018 13:40:31 -0600
Subject: coresight: Introduce support for Coresight Address Translation Unit

Add the initial support for Coresight Address Translation Unit, which
augments the TMC in Coresight SoC-600 by providing an improved Scatter
Gather mechanism. CATU is always connected to a single TMC-ETR and
converts the AXI address with a translated address (from a given SG
table with specific format). The CATU should be programmed in pass
through mode and enabled even if the ETR doesn't use the translation
by CATU.

This patch provides mechanism to enable/disable the CATU always in the
pass through mode.

We reuse the existing ports mechanism to link the TMC-ETR to the
connected CATU.

i.e, TMC-ETR:output_port0 -> CATU:input_port0

Reference manual for CATU component is avilable in version r2p0 of :
"Arm Coresight System-on-Chip SoC-600 Technical Reference Manual".

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/Kconfig             |  11 ++
 drivers/hwtracing/coresight/Makefile            |   1 +
 drivers/hwtracing/coresight/coresight-catu.c    | 214 ++++++++++++++++++++++++
 drivers/hwtracing/coresight/coresight-catu.h    |  84 ++++++++++
 drivers/hwtracing/coresight/coresight-tmc-etr.c |  52 ++++++
 include/linux/coresight.h                       |   1 +
 6 files changed, 363 insertions(+)
 create mode 100644 drivers/hwtracing/coresight/coresight-catu.c
 create mode 100644 drivers/hwtracing/coresight/coresight-catu.h

(limited to 'include')

diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index ef9cb3c164e1..ad34380cac49 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -31,6 +31,17 @@ config CORESIGHT_LINK_AND_SINK_TMC
 	  complies with the generic implementation of the component without
 	  special enhancement or added features.
 
+config CORESIGHT_CATU
+	bool "Coresight Address Translation Unit (CATU) driver"
+	depends on CORESIGHT_LINK_AND_SINK_TMC
+	help
+	   Enable support for the Coresight Address Translation Unit (CATU).
+	   CATU supports a scatter gather table of 4K pages, with forward/backward
+	   lookup. CATU helps TMC ETR to use a large physically non-contiguous trace
+	   buffer by translating the addresses used by ETR to the physical address
+	   by looking up the provided table. CATU can also be used in pass-through
+	   mode where the address is not translated.
+
 config CORESIGHT_SINK_TPIU
 	bool "Coresight generic TPIU driver"
 	depends on CORESIGHT_LINKS_AND_SINKS
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
index 61db9dd0d571..41870ded51a3 100644
--- a/drivers/hwtracing/coresight/Makefile
+++ b/drivers/hwtracing/coresight/Makefile
@@ -18,3 +18,4 @@ obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o \
 obj-$(CONFIG_CORESIGHT_DYNAMIC_REPLICATOR) += coresight-dynamic-replicator.o
 obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
 obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
+obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o
diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
new file mode 100644
index 000000000000..9d0cb1f6f16b
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Arm Limited. All rights reserved.
+ *
+ * Coresight Address Translation Unit support
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "coresight-catu.h"
+#include "coresight-priv.h"
+
+#define csdev_to_catu_drvdata(csdev)	\
+	dev_get_drvdata(csdev->dev.parent)
+
+coresight_simple_reg32(struct catu_drvdata, devid, CORESIGHT_DEVID);
+coresight_simple_reg32(struct catu_drvdata, control, CATU_CONTROL);
+coresight_simple_reg32(struct catu_drvdata, status, CATU_STATUS);
+coresight_simple_reg32(struct catu_drvdata, mode, CATU_MODE);
+coresight_simple_reg32(struct catu_drvdata, axictrl, CATU_AXICTRL);
+coresight_simple_reg32(struct catu_drvdata, irqen, CATU_IRQEN);
+coresight_simple_reg64(struct catu_drvdata, sladdr,
+		       CATU_SLADDRLO, CATU_SLADDRHI);
+coresight_simple_reg64(struct catu_drvdata, inaddr,
+		       CATU_INADDRLO, CATU_INADDRHI);
+
+static struct attribute *catu_mgmt_attrs[] = {
+	&dev_attr_devid.attr,
+	&dev_attr_control.attr,
+	&dev_attr_status.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_axictrl.attr,
+	&dev_attr_irqen.attr,
+	&dev_attr_sladdr.attr,
+	&dev_attr_inaddr.attr,
+	NULL,
+};
+
+static const struct attribute_group catu_mgmt_group = {
+	.attrs = catu_mgmt_attrs,
+	.name = "mgmt",
+};
+
+static const struct attribute_group *catu_groups[] = {
+	&catu_mgmt_group,
+	NULL,
+};
+
+
+static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
+{
+	return coresight_timeout(drvdata->base,
+				 CATU_STATUS, CATU_STATUS_READY, 1);
+}
+
+static int catu_enable_hw(struct catu_drvdata *drvdata, void *__unused)
+{
+	u32 control;
+
+	if (catu_wait_for_ready(drvdata))
+		dev_warn(drvdata->dev, "Timeout while waiting for READY\n");
+
+	control = catu_read_control(drvdata);
+	if (control & BIT(CATU_CONTROL_ENABLE)) {
+		dev_warn(drvdata->dev, "CATU is already enabled\n");
+		return -EBUSY;
+	}
+
+	control |= BIT(CATU_CONTROL_ENABLE);
+	catu_write_mode(drvdata, CATU_MODE_PASS_THROUGH);
+	catu_write_control(drvdata, control);
+	dev_dbg(drvdata->dev, "Enabled in Pass through mode\n");
+	return 0;
+}
+
+static int catu_enable(struct coresight_device *csdev, void *data)
+{
+	int rc;
+	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
+
+	CS_UNLOCK(catu_drvdata->base);
+	rc = catu_enable_hw(catu_drvdata, data);
+	CS_LOCK(catu_drvdata->base);
+	return rc;
+}
+
+static int catu_disable_hw(struct catu_drvdata *drvdata)
+{
+	int rc = 0;
+
+	catu_write_control(drvdata, 0);
+	if (catu_wait_for_ready(drvdata)) {
+		dev_info(drvdata->dev, "Timeout while waiting for READY\n");
+		rc = -EAGAIN;
+	}
+
+	dev_dbg(drvdata->dev, "Disabled\n");
+	return rc;
+}
+
+static int catu_disable(struct coresight_device *csdev, void *__unused)
+{
+	int rc;
+	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
+
+	CS_UNLOCK(catu_drvdata->base);
+	rc = catu_disable_hw(catu_drvdata);
+	CS_LOCK(catu_drvdata->base);
+	return rc;
+}
+
+const struct coresight_ops_helper catu_helper_ops = {
+	.enable = catu_enable,
+	.disable = catu_disable,
+};
+
+const struct coresight_ops catu_ops = {
+	.helper_ops = &catu_helper_ops,
+};
+
+static int catu_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	int ret = 0;
+	u32 dma_mask;
+	struct catu_drvdata *drvdata;
+	struct coresight_desc catu_desc;
+	struct coresight_platform_data *pdata = NULL;
+	struct device *dev = &adev->dev;
+	struct device_node *np = dev->of_node;
+	void __iomem *base;
+
+	if (np) {
+		pdata = of_get_coresight_platform_data(dev, np);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			goto out;
+		}
+		dev->platform_data = pdata;
+	}
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	drvdata->dev = dev;
+	dev_set_drvdata(dev, drvdata);
+	base = devm_ioremap_resource(dev, &adev->res);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
+		goto out;
+	}
+
+	/* Setup dma mask for the device */
+	dma_mask = readl_relaxed(base + CORESIGHT_DEVID) & 0x3f;
+	switch (dma_mask) {
+	case 32:
+	case 40:
+	case 44:
+	case 48:
+	case 52:
+	case 56:
+	case 64:
+		break;
+	default:
+		/* Default to the 40bits as supported by TMC-ETR */
+		dma_mask = 40;
+	}
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_mask));
+	if (ret)
+		goto out;
+
+	drvdata->base = base;
+	catu_desc.pdata = pdata;
+	catu_desc.dev = dev;
+	catu_desc.groups = catu_groups;
+	catu_desc.type = CORESIGHT_DEV_TYPE_HELPER;
+	catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU;
+	catu_desc.ops = &catu_ops;
+	drvdata->csdev = coresight_register(&catu_desc);
+	if (IS_ERR(drvdata->csdev))
+		ret = PTR_ERR(drvdata->csdev);
+out:
+	pm_runtime_put(&adev->dev);
+	return ret;
+}
+
+static struct amba_id catu_ids[] = {
+	{
+		.id	= 0x000bb9ee,
+		.mask	= 0x000fffff,
+	},
+	{},
+};
+
+static struct amba_driver catu_driver = {
+	.drv = {
+		.name			= "coresight-catu",
+		.owner			= THIS_MODULE,
+		.suppress_bind_attrs	= true,
+	},
+	.probe				= catu_probe,
+	.id_table			= catu_ids,
+};
+
+builtin_amba_driver(catu_driver);
diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h
new file mode 100644
index 000000000000..4f221fccffca
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-catu.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Arm Limited. All rights reserved.
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ */
+
+#ifndef _CORESIGHT_CATU_H
+#define _CORESIGHT_CATU_H
+
+#include "coresight-priv.h"
+
+/* Register offset from base */
+#define CATU_CONTROL		0x000
+#define CATU_MODE		0x004
+#define CATU_AXICTRL		0x008
+#define CATU_IRQEN		0x00c
+#define CATU_SLADDRLO		0x020
+#define CATU_SLADDRHI		0x024
+#define CATU_INADDRLO		0x028
+#define CATU_INADDRHI		0x02c
+#define CATU_STATUS		0x100
+#define CATU_DEVARCH		0xfbc
+
+#define CATU_CONTROL_ENABLE	0
+
+#define CATU_MODE_PASS_THROUGH	0U
+#define CATU_MODE_TRANSLATE	1U
+
+#define CATU_STATUS_READY	8
+#define CATU_STATUS_ADRERR	0
+#define CATU_STATUS_AXIERR	4
+
+#define CATU_IRQEN_ON		0x1
+#define CATU_IRQEN_OFF		0x0
+
+struct catu_drvdata {
+	struct device *dev;
+	void __iomem *base;
+	struct coresight_device *csdev;
+	int irq;
+};
+
+#define CATU_REG32(name, offset)					\
+static inline u32							\
+catu_read_##name(struct catu_drvdata *drvdata)				\
+{									\
+	return coresight_read_reg_pair(drvdata->base, offset, -1);	\
+}									\
+static inline void							\
+catu_write_##name(struct catu_drvdata *drvdata, u32 val)		\
+{									\
+	coresight_write_reg_pair(drvdata->base, val, offset, -1);	\
+}
+
+#define CATU_REG_PAIR(name, lo_off, hi_off)				\
+static inline u64							\
+catu_read_##name(struct catu_drvdata *drvdata)				\
+{									\
+	return coresight_read_reg_pair(drvdata->base, lo_off, hi_off);	\
+}									\
+static inline void							\
+catu_write_##name(struct catu_drvdata *drvdata, u64 val)		\
+{									\
+	coresight_write_reg_pair(drvdata->base, val, lo_off, hi_off);	\
+}
+
+CATU_REG32(control, CATU_CONTROL);
+CATU_REG32(mode, CATU_MODE);
+CATU_REG_PAIR(sladdr, CATU_SLADDRLO, CATU_SLADDRHI)
+CATU_REG_PAIR(inaddr, CATU_INADDRLO, CATU_INADDRHI)
+
+static inline bool coresight_is_catu_device(struct coresight_device *csdev)
+{
+	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
+		return false;
+	if (csdev->type != CORESIGHT_DEV_TYPE_HELPER)
+		return false;
+	if (csdev->subtype.helper_subtype != CORESIGHT_DEV_SUBTYPE_HELPER_CATU)
+		return false;
+	return true;
+}
+
+#endif
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 8af451230121..e37923acb725 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -9,6 +9,7 @@
 #include <linux/iommu.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include "coresight-catu.h"
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
 
@@ -701,6 +702,48 @@ static const struct etr_buf_operations etr_sg_buf_ops = {
 	.get_data = tmc_etr_get_data_sg_buf,
 };
 
+/*
+ * TMC ETR could be connected to a CATU device, which can provide address
+ * translation service. This is represented by the Output port of the TMC
+ * (ETR) connected to the input port of the CATU.
+ *
+ * Returns	: coresight_device ptr for the CATU device if a CATU is found.
+ *		: NULL otherwise.
+ */
+static inline struct coresight_device *
+tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
+{
+	int i;
+	struct coresight_device *tmp, *etr = drvdata->csdev;
+
+	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
+		return NULL;
+
+	for (i = 0; i < etr->nr_outport; i++) {
+		tmp = etr->conns[i].child_dev;
+		if (tmp && coresight_is_catu_device(tmp))
+			return tmp;
+	}
+
+	return NULL;
+}
+
+static inline void tmc_etr_enable_catu(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
+
+	if (catu && helper_ops(catu)->enable)
+		helper_ops(catu)->enable(catu, NULL);
+}
+
+static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata)
+{
+	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
+
+	if (catu && helper_ops(catu)->disable)
+		helper_ops(catu)->disable(catu, NULL);
+}
+
 static const struct etr_buf_operations *etr_buf_ops[] = {
 	[ETR_MODE_FLAT] = &etr_flat_buf_ops,
 	[ETR_MODE_ETR_SG] = &etr_sg_buf_ops,
@@ -844,6 +887,12 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
 	u32 axictl, sts;
 	struct etr_buf *etr_buf = drvdata->etr_buf;
 
+	/*
+	 * If this ETR is connected to a CATU, enable it before we turn
+	 * this on
+	 */
+	tmc_etr_enable_catu(drvdata);
+
 	CS_UNLOCK(drvdata->base);
 
 	/* Wait for TMCSReady bit to be set */
@@ -952,6 +1001,9 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 	tmc_disable_hw(drvdata);
 
 	CS_LOCK(drvdata->base);
+
+	/* Disable CATU device if this ETR is connected to one */
+	tmc_etr_disable_catu(drvdata);
 }
 
 static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 3d40a2b135f1..d828a6efe0b1 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -65,6 +65,7 @@ enum coresight_dev_subtype_source {
 
 enum coresight_dev_subtype_helper {
 	CORESIGHT_DEV_SUBTYPE_HELPER_NONE,
+	CORESIGHT_DEV_SUBTYPE_HELPER_CATU,
 };
 
 /**
-- 
cgit v1.2.3


From 571d78bd458a831cf51dff2afa1dda3309bd82b2 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:09 +0800
Subject: fpga: mgr: add region_id to fpga_image_info

This patch adds region_id to fpga_image_info data structure, it
allows driver to pass region id information to fpga-mgr via
fpga_image_info for fpga reconfiguration function.

Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Moritz Fischer <mdf@kernel.org>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fpga/fpga-mgr.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index eec7c2478b0d..3eb6b9d60d65 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -77,6 +77,7 @@ enum fpga_mgr_states {
  * @sgt: scatter/gather table containing FPGA image
  * @buf: contiguous buffer containing FPGA image
  * @count: size of buf
+ * @region_id: id of target region
  * @dev: device that owns this
  * @overlay: Device Tree overlay
  */
@@ -89,6 +90,7 @@ struct fpga_image_info {
 	struct sg_table *sgt;
 	const char *buf;
 	size_t count;
+	int region_id;
 	struct device *dev;
 #ifdef CONFIG_OF
 	struct device_node *overlay;
-- 
cgit v1.2.3


From ecb5fbe299dfaad778033259f35bc696fa1fb743 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:10 +0800
Subject: fpga: mgr: add status for fpga-manager

This patch adds status sysfs interface for fpga manager, it's a
read only interface which allows user to get fpga manager status,
including full/partial reconfiguration error and other status
information. It adds a status callback to fpga_manager_ops too,
allows each fpga_manager driver to define its own method to
collect latest status from hardware.

The following sysfs file is created:
* /sys/class/fpga_manager/<fpga>/status
  Return status of fpga manager, including reconfiguration errors.

Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-fpga-manager | 24 +++++++++++++++++++
 drivers/fpga/fpga-mgr.c                            | 28 ++++++++++++++++++++++
 include/linux/fpga/fpga-mgr.h                      |  9 +++++++
 3 files changed, 61 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-fpga-manager b/Documentation/ABI/testing/sysfs-class-fpga-manager
index 23056c532fdd..5284fa33d4c5 100644
--- a/Documentation/ABI/testing/sysfs-class-fpga-manager
+++ b/Documentation/ABI/testing/sysfs-class-fpga-manager
@@ -35,3 +35,27 @@ Description:	Read fpga manager state as a string.
 		* write complete	= Doing post programming steps
 		* write complete error	= Error while doing post programming
 		* operating		= FPGA is programmed and operating
+
+What:		/sys/class/fpga_manager/<fpga>/status
+Date:		June 2018
+KernelVersion:	4.19
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read fpga manager status as a string.
+		If FPGA programming operation fails, it could be caused by crc
+		error or incompatible bitstream image. The intent of this
+		interface is to provide more detailed information for FPGA
+		programming errors to userspace. This is a list of strings for
+		the supported status.
+
+		* reconfig operation error 	- invalid operations detected by
+						  reconfiguration hardware.
+						  e.g. start reconfiguration
+						  with errors not cleared
+		* reconfig CRC error		- CRC error detected by
+						  reconfiguration hardware.
+		* reconfig incompatible image	- reconfiguration image is
+						  incompatible with hardware
+		* reconfig IP protocol error	- protocol errors detected by
+						  reconfiguration hardware
+		* reconfig fifo overflow error	- FIFO overflow detected by
+						  reconfiguration hardware
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index c1564cf827fe..a41b07e37884 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -406,12 +406,40 @@ static ssize_t state_show(struct device *dev,
 	return sprintf(buf, "%s\n", state_str[mgr->state]);
 }
 
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct fpga_manager *mgr = to_fpga_manager(dev);
+	u64 status;
+	int len = 0;
+
+	if (!mgr->mops->status)
+		return -ENOENT;
+
+	status = mgr->mops->status(mgr);
+
+	if (status & FPGA_MGR_STATUS_OPERATION_ERR)
+		len += sprintf(buf + len, "reconfig operation error\n");
+	if (status & FPGA_MGR_STATUS_CRC_ERR)
+		len += sprintf(buf + len, "reconfig CRC error\n");
+	if (status & FPGA_MGR_STATUS_INCOMPATIBLE_IMAGE_ERR)
+		len += sprintf(buf + len, "reconfig incompatible image\n");
+	if (status & FPGA_MGR_STATUS_IP_PROTOCOL_ERR)
+		len += sprintf(buf + len, "reconfig IP protocol error\n");
+	if (status & FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR)
+		len += sprintf(buf + len, "reconfig fifo overflow error\n");
+
+	return len;
+}
+
 static DEVICE_ATTR_RO(name);
 static DEVICE_ATTR_RO(state);
+static DEVICE_ATTR_RO(status);
 
 static struct attribute *fpga_mgr_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_state.attr,
+	&dev_attr_status.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(fpga_mgr);
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 3eb6b9d60d65..e249b7250345 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -101,6 +101,7 @@ struct fpga_image_info {
  * struct fpga_manager_ops - ops for low level fpga manager drivers
  * @initial_header_size: Maximum number of bytes that should be passed into write_init
  * @state: returns an enum value of the FPGA's state
+ * @status: returns status of the FPGA, including reconfiguration error code
  * @write_init: prepare the FPGA to receive confuration data
  * @write: write count bytes of configuration data to the FPGA
  * @write_sg: write the scatter list of configuration data to the FPGA
@@ -115,6 +116,7 @@ struct fpga_image_info {
 struct fpga_manager_ops {
 	size_t initial_header_size;
 	enum fpga_mgr_states (*state)(struct fpga_manager *mgr);
+	u64 (*status)(struct fpga_manager *mgr);
 	int (*write_init)(struct fpga_manager *mgr,
 			  struct fpga_image_info *info,
 			  const char *buf, size_t count);
@@ -126,6 +128,13 @@ struct fpga_manager_ops {
 	const struct attribute_group **groups;
 };
 
+/* FPGA manager status: Partial/Full Reconfiguration errors */
+#define FPGA_MGR_STATUS_OPERATION_ERR		BIT(0)
+#define FPGA_MGR_STATUS_CRC_ERR			BIT(1)
+#define FPGA_MGR_STATUS_INCOMPATIBLE_IMAGE_ERR	BIT(2)
+#define FPGA_MGR_STATUS_IP_PROTOCOL_ERR		BIT(3)
+#define FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR	BIT(4)
+
 /**
  * struct fpga_manager - fpga manager structure
  * @name: name of low level fpga manager
-- 
cgit v1.2.3


From 99a560bde313892f87ca81db568a829d3d205882 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:11 +0800
Subject: fpga: mgr: add compat_id support

This patch introduces compat_id support to fpga manager, it adds
a fpga_compat_id pointer to fpga manager data structure to allow
fpga manager drivers to save the compatibility id. This compat_id
could be used for compatibility checking before doing partial
reconfiguration to associated fpga regions.

Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fpga/fpga-mgr.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index e249b7250345..8942e61f0028 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -135,12 +135,24 @@ struct fpga_manager_ops {
 #define FPGA_MGR_STATUS_IP_PROTOCOL_ERR		BIT(3)
 #define FPGA_MGR_STATUS_FIFO_OVERFLOW_ERR	BIT(4)
 
+/**
+ * struct fpga_compat_id - id for compatibility check
+ *
+ * @id_h: high 64bit of the compat_id
+ * @id_l: low 64bit of the compat_id
+ */
+struct fpga_compat_id {
+	u64 id_h;
+	u64 id_l;
+};
+
 /**
  * struct fpga_manager - fpga manager structure
  * @name: name of low level fpga manager
  * @dev: fpga manager device
  * @ref_mutex: only allows one reference to fpga manager
  * @state: state of fpga manager
+ * @compat_id: FPGA manager id for compatibility check.
  * @mops: pointer to struct of fpga manager ops
  * @priv: low level driver private date
  */
@@ -149,6 +161,7 @@ struct fpga_manager {
 	struct device dev;
 	struct mutex ref_mutex;
 	enum fpga_mgr_states state;
+	struct fpga_compat_id *compat_id;
 	const struct fpga_manager_ops *mops;
 	void *priv;
 };
-- 
cgit v1.2.3


From 41a8b2c56470b7e4e3e2db93324d50bbbf60cdc4 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:12 +0800
Subject: fpga: region: add compat_id support

This patch introduces a compat_id pointer member and sysfs interface
for each fpga region, similar as compat_id for fpga manager, it allows
applications to read the per region compat_id for compatibility
checking before other actions on this fpga-region (e.g. PR).

Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-class-fpga-region |  9 +++++++++
 drivers/fpga/fpga-region.c                        | 22 ++++++++++++++++++++++
 include/linux/fpga/fpga-region.h                  |  2 ++
 3 files changed, 33 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-fpga-region

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-fpga-region b/Documentation/ABI/testing/sysfs-class-fpga-region
new file mode 100644
index 000000000000..bc7ec644acc9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-fpga-region
@@ -0,0 +1,9 @@
+What:		/sys/class/fpga_region/<region>/compat_id
+Date:		June 2018
+KernelVersion:	4.19
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	FPGA region id for compatibility check, e.g. compatibility
+		of the FPGA reconfiguration hardware and image. This value
+		is defined or calculated by the layer that is creating the
+		FPGA region. This interface returns the compat_id value or
+		just error code -ENOENT in case compat_id is not used.
diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
index 6d214d75c7be..0d65220d5ec5 100644
--- a/drivers/fpga/fpga-region.c
+++ b/drivers/fpga/fpga-region.c
@@ -158,6 +158,27 @@ err_put_region:
 }
 EXPORT_SYMBOL_GPL(fpga_region_program_fpga);
 
+static ssize_t compat_id_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct fpga_region *region = to_fpga_region(dev);
+
+	if (!region->compat_id)
+		return -ENOENT;
+
+	return sprintf(buf, "%016llx%016llx\n",
+		       (unsigned long long)region->compat_id->id_h,
+		       (unsigned long long)region->compat_id->id_l);
+}
+
+static DEVICE_ATTR_RO(compat_id);
+
+static struct attribute *fpga_region_attrs[] = {
+	&dev_attr_compat_id.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(fpga_region);
+
 /**
  * fpga_region_create - alloc and init a struct fpga_region
  * @dev: device parent
@@ -258,6 +279,7 @@ static int __init fpga_region_init(void)
 	if (IS_ERR(fpga_region_class))
 		return PTR_ERR(fpga_region_class);
 
+	fpga_region_class->dev_groups = fpga_region_groups;
 	fpga_region_class->dev_release = fpga_region_dev_release;
 
 	return 0;
diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h
index d7071cddd727..0521b7f577a4 100644
--- a/include/linux/fpga/fpga-region.h
+++ b/include/linux/fpga/fpga-region.h
@@ -14,6 +14,7 @@
  * @bridge_list: list of FPGA bridges specified in region
  * @mgr: FPGA manager
  * @info: FPGA image info
+ * @compat_id: FPGA region id for compatibility check.
  * @priv: private data
  * @get_bridges: optional function to get bridges to a list
  */
@@ -23,6 +24,7 @@ struct fpga_region {
 	struct list_head bridge_list;
 	struct fpga_manager *mgr;
 	struct fpga_image_info *info;
+	struct fpga_compat_id *compat_id;
 	void *priv;
 	int (*get_bridges)(struct fpga_region *region);
 };
-- 
cgit v1.2.3


From 620e1902f6fe57ddacdabd9e33fadbd290be9652 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:23 +0800
Subject: fpga: dfl: fme: add DFL_FPGA_GET_API_VERSION/CHECK_EXTENSION ioctls
 support

DFL_FPGA_GET_API_VERSION and DFL_FPGA_CHECK_EXTENSION ioctls are common
ones which need to be supported by all feature devices drivers including
FME and AFU. Userspace application can use these ioctl interfaces to get
the API info and check if specific extension is supported or not in
current driver.

This patch implements above 2 ioctls in FPGA Management Engine (FME)
driver.

Signed-off-by: Tim Whisonant <tim.whisonant@intel.com>
Signed-off-by: Enno Luebbers <enno.luebbers@intel.com>
Signed-off-by: Shiva Rao <shiva.rao@intel.com>
Signed-off-by: Christopher Rauer <christopher.rauer@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ioctl/ioctl-number.txt |  1 +
 drivers/fpga/dfl-fme-main.c          | 12 +++++++++
 include/uapi/linux/fpga-dfl.h        | 50 ++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)
 create mode 100644 include/uapi/linux/fpga-dfl.h

(limited to 'include')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 480c8609dc58..db9afea24299 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -322,6 +322,7 @@ Code  Seq#(hex)	Include File		Comments
 0xB3	00	linux/mmc/ioctl.h
 0xB4	00-0F	linux/gpio.h		<mailto:linux-gpio@vger.kernel.org>
 0xB5	00-0F	uapi/linux/rpmsg.h	<mailto:linux-remoteproc@vger.kernel.org>
+0xB6	all	linux/fpga-dfl.h
 0xC0	00-0F	linux/usb/iowarrior.h
 0xCA	00-0F	uapi/misc/cxl.h
 0xCA	10-2F	uapi/misc/ocxl.h
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index c23c56fe3f4b..c83ff88e3bbb 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -16,6 +16,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/fpga-dfl.h>
 
 #include "dfl.h"
 
@@ -116,6 +117,13 @@ static struct dfl_feature_driver fme_feature_drvs[] = {
 	},
 };
 
+static long fme_ioctl_check_extension(struct dfl_feature_platform_data *pdata,
+				      unsigned long arg)
+{
+	/* No extension support for now */
+	return 0;
+}
+
 static int fme_open(struct inode *inode, struct file *filp)
 {
 	struct platform_device *fdev = dfl_fpga_inode_to_feature_dev(inode);
@@ -156,6 +164,10 @@ static long fme_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	dev_dbg(&pdev->dev, "%s cmd 0x%x\n", __func__, cmd);
 
 	switch (cmd) {
+	case DFL_FPGA_GET_API_VERSION:
+		return DFL_FPGA_API_VERSION;
+	case DFL_FPGA_CHECK_EXTENSION:
+		return fme_ioctl_check_extension(pdata, arg);
 	default:
 		/*
 		 * Let sub-feature's ioctl function to handle the cmd.
diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
new file mode 100644
index 000000000000..858e4437c31c
--- /dev/null
+++ b/include/uapi/linux/fpga-dfl.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Header File for FPGA DFL User API
+ *
+ * Copyright (C) 2017-2018 Intel Corporation, Inc.
+ *
+ * Authors:
+ *   Kang Luwei <luwei.kang@intel.com>
+ *   Zhang Yi <yi.z.zhang@intel.com>
+ *   Wu Hao <hao.wu@intel.com>
+ *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ */
+
+#ifndef _UAPI_LINUX_FPGA_DFL_H
+#define _UAPI_LINUX_FPGA_DFL_H
+
+#include <linux/ioctl.h>
+
+#define DFL_FPGA_API_VERSION 0
+
+/*
+ * The IOCTL interface for DFL based FPGA is designed for extensibility by
+ * embedding the structure length (argsz) and flags into structures passed
+ * between kernel and userspace. This design referenced the VFIO IOCTL
+ * interface (include/uapi/linux/vfio.h).
+ */
+
+#define DFL_FPGA_MAGIC 0xB6
+
+#define DFL_FPGA_BASE 0
+
+/**
+ * DFL_FPGA_GET_API_VERSION - _IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 0)
+ *
+ * Report the version of the driver API.
+ * Return: Driver API Version.
+ */
+
+#define DFL_FPGA_GET_API_VERSION	_IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 0)
+
+/**
+ * DFL_FPGA_CHECK_EXTENSION - _IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 1)
+ *
+ * Check whether an extension is supported.
+ * Return: 0 if not supported, otherwise the extension is supported.
+ */
+
+#define DFL_FPGA_CHECK_EXTENSION	_IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 1)
+
+#endif /* _UAPI_LINUX_FPGA_DFL_H */
-- 
cgit v1.2.3


From 29de76240e861d52b75405166337e94184f1875d Mon Sep 17 00:00:00 2001
From: Kang Luwei <luwei.kang@intel.com>
Date: Sat, 30 Jun 2018 08:53:24 +0800
Subject: fpga: dfl: fme: add partial reconfiguration sub feature support

Partial Reconfiguration (PR) is the most important function for FME. It
allows reconfiguration for given Port/Accelerated Function Unit (AFU).

It creates platform devices for fpga-mgr, fpga-regions and fpga-bridges,
and invokes fpga-region's interface (fpga_region_program_fpga) for PR
operation once PR request received via ioctl. Below user space interface
is exposed by this sub feature.

Ioctl interface:
* DFL_FPGA_FME_PORT_PR
  Do partial reconfiguration per information from userspace, including
  target port(AFU), buffer size and address info. It returns error code
  to userspace if failed. For detailed PR error information, user needs
  to read fpga-mgr's status sysfs interface.

Signed-off-by: Tim Whisonant <tim.whisonant@intel.com>
Signed-off-by: Enno Luebbers <enno.luebbers@intel.com>
Signed-off-by: Shiva Rao <shiva.rao@intel.com>
Signed-off-by: Christopher Rauer <christopher.rauer@intel.com>
Signed-off-by: Kang Luwei <luwei.kang@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/Makefile         |   2 +-
 drivers/fpga/dfl-fme-main.c   |  43 +++-
 drivers/fpga/dfl-fme-pr.c     | 479 ++++++++++++++++++++++++++++++++++++++++++
 drivers/fpga/dfl-fme-pr.h     |  84 ++++++++
 drivers/fpga/dfl-fme.h        |  38 ++++
 include/uapi/linux/fpga-dfl.h |  27 +++
 6 files changed, 671 insertions(+), 2 deletions(-)
 create mode 100644 drivers/fpga/dfl-fme-pr.c
 create mode 100644 drivers/fpga/dfl-fme-pr.h
 create mode 100644 drivers/fpga/dfl-fme.h

(limited to 'include')

diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index db11f340ba0f..fd334d40aa1c 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_OF_FPGA_REGION)		+= of-fpga-region.o
 obj-$(CONFIG_FPGA_DFL)			+= dfl.o
 obj-$(CONFIG_FPGA_DFL_FME)		+= dfl-fme.o
 
-dfl-fme-objs := dfl-fme-main.o
+dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o
 
 # Drivers for FPGAs which implement DFL
 obj-$(CONFIG_FPGA_DFL_PCI)		+= dfl-pci.o
diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index c83ff88e3bbb..086ad2420ade 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -19,6 +19,7 @@
 #include <linux/fpga-dfl.h>
 
 #include "dfl.h"
+#include "dfl-fme.h"
 
 static ssize_t ports_num_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
@@ -112,6 +113,10 @@ static struct dfl_feature_driver fme_feature_drvs[] = {
 		.id = FME_FEATURE_ID_HEADER,
 		.ops = &fme_hdr_ops,
 	},
+	{
+		.id = FME_FEATURE_ID_PR_MGMT,
+		.ops = &pr_mgmt_ops,
+	},
 	{
 		.ops = NULL,
 	},
@@ -187,6 +192,35 @@ static long fme_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	return -EINVAL;
 }
 
+static int fme_dev_init(struct platform_device *pdev)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct dfl_fme *fme;
+
+	fme = devm_kzalloc(&pdev->dev, sizeof(*fme), GFP_KERNEL);
+	if (!fme)
+		return -ENOMEM;
+
+	fme->pdata = pdata;
+
+	mutex_lock(&pdata->lock);
+	dfl_fpga_pdata_set_private(pdata, fme);
+	mutex_unlock(&pdata->lock);
+
+	return 0;
+}
+
+static void fme_dev_destroy(struct platform_device *pdev)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct dfl_fme *fme;
+
+	mutex_lock(&pdata->lock);
+	fme = dfl_fpga_pdata_get_private(pdata);
+	dfl_fpga_pdata_set_private(pdata, NULL);
+	mutex_unlock(&pdata->lock);
+}
+
 static const struct file_operations fme_fops = {
 	.owner		= THIS_MODULE,
 	.open		= fme_open,
@@ -198,10 +232,14 @@ static int fme_probe(struct platform_device *pdev)
 {
 	int ret;
 
-	ret = dfl_fpga_dev_feature_init(pdev, fme_feature_drvs);
+	ret = fme_dev_init(pdev);
 	if (ret)
 		goto exit;
 
+	ret = dfl_fpga_dev_feature_init(pdev, fme_feature_drvs);
+	if (ret)
+		goto dev_destroy;
+
 	ret = dfl_fpga_dev_ops_register(pdev, &fme_fops, THIS_MODULE);
 	if (ret)
 		goto feature_uinit;
@@ -210,6 +248,8 @@ static int fme_probe(struct platform_device *pdev)
 
 feature_uinit:
 	dfl_fpga_dev_feature_uinit(pdev);
+dev_destroy:
+	fme_dev_destroy(pdev);
 exit:
 	return ret;
 }
@@ -218,6 +258,7 @@ static int fme_remove(struct platform_device *pdev)
 {
 	dfl_fpga_dev_ops_unregister(pdev);
 	dfl_fpga_dev_feature_uinit(pdev);
+	fme_dev_destroy(pdev);
 
 	return 0;
 }
diff --git a/drivers/fpga/dfl-fme-pr.c b/drivers/fpga/dfl-fme-pr.c
new file mode 100644
index 000000000000..fc9fd2d0482f
--- /dev/null
+++ b/drivers/fpga/dfl-fme-pr.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for FPGA Management Engine (FME) Partial Reconfiguration
+ *
+ * Copyright (C) 2017-2018 Intel Corporation, Inc.
+ *
+ * Authors:
+ *   Kang Luwei <luwei.kang@intel.com>
+ *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ *   Wu Hao <hao.wu@intel.com>
+ *   Joseph Grecco <joe.grecco@intel.com>
+ *   Enno Luebbers <enno.luebbers@intel.com>
+ *   Tim Whisonant <tim.whisonant@intel.com>
+ *   Ananda Ravuri <ananda.ravuri@intel.com>
+ *   Christopher Rauer <christopher.rauer@intel.com>
+ *   Henry Mitchel <henry.mitchel@intel.com>
+ */
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+#include <linux/fpga/fpga-mgr.h>
+#include <linux/fpga/fpga-bridge.h>
+#include <linux/fpga/fpga-region.h>
+#include <linux/fpga-dfl.h>
+
+#include "dfl.h"
+#include "dfl-fme.h"
+#include "dfl-fme-pr.h"
+
+static struct dfl_fme_region *
+dfl_fme_region_find_by_port_id(struct dfl_fme *fme, int port_id)
+{
+	struct dfl_fme_region *fme_region;
+
+	list_for_each_entry(fme_region, &fme->region_list, node)
+		if (fme_region->port_id == port_id)
+			return fme_region;
+
+	return NULL;
+}
+
+static int dfl_fme_region_match(struct device *dev, const void *data)
+{
+	return dev->parent == data;
+}
+
+static struct fpga_region *dfl_fme_region_find(struct dfl_fme *fme, int port_id)
+{
+	struct dfl_fme_region *fme_region;
+	struct fpga_region *region;
+
+	fme_region = dfl_fme_region_find_by_port_id(fme, port_id);
+	if (!fme_region)
+		return NULL;
+
+	region = fpga_region_class_find(NULL, &fme_region->region->dev,
+					dfl_fme_region_match);
+	if (!region)
+		return NULL;
+
+	return region;
+}
+
+static int fme_pr(struct platform_device *pdev, unsigned long arg)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	void __user *argp = (void __user *)arg;
+	struct dfl_fpga_fme_port_pr port_pr;
+	struct fpga_image_info *info;
+	struct fpga_region *region;
+	void __iomem *fme_hdr;
+	struct dfl_fme *fme;
+	unsigned long minsz;
+	void *buf = NULL;
+	int ret = 0;
+	u64 v;
+
+	minsz = offsetofend(struct dfl_fpga_fme_port_pr, buffer_address);
+
+	if (copy_from_user(&port_pr, argp, minsz))
+		return -EFAULT;
+
+	if (port_pr.argsz < minsz || port_pr.flags)
+		return -EINVAL;
+
+	if (!IS_ALIGNED(port_pr.buffer_size, 4))
+		return -EINVAL;
+
+	/* get fme header region */
+	fme_hdr = dfl_get_feature_ioaddr_by_id(&pdev->dev,
+					       FME_FEATURE_ID_HEADER);
+
+	/* check port id */
+	v = readq(fme_hdr + FME_HDR_CAP);
+	if (port_pr.port_id >= FIELD_GET(FME_CAP_NUM_PORTS, v)) {
+		dev_dbg(&pdev->dev, "port number more than maximum\n");
+		return -EINVAL;
+	}
+
+	if (!access_ok(VERIFY_READ,
+		       (void __user *)(unsigned long)port_pr.buffer_address,
+		       port_pr.buffer_size))
+		return -EFAULT;
+
+	buf = vmalloc(port_pr.buffer_size);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf,
+			   (void __user *)(unsigned long)port_pr.buffer_address,
+			   port_pr.buffer_size)) {
+		ret = -EFAULT;
+		goto free_exit;
+	}
+
+	/* prepare fpga_image_info for PR */
+	info = fpga_image_info_alloc(&pdev->dev);
+	if (!info) {
+		ret = -ENOMEM;
+		goto free_exit;
+	}
+
+	info->flags |= FPGA_MGR_PARTIAL_RECONFIG;
+
+	mutex_lock(&pdata->lock);
+	fme = dfl_fpga_pdata_get_private(pdata);
+	/* fme device has been unregistered. */
+	if (!fme) {
+		ret = -EINVAL;
+		goto unlock_exit;
+	}
+
+	region = dfl_fme_region_find(fme, port_pr.port_id);
+	if (!region) {
+		ret = -EINVAL;
+		goto unlock_exit;
+	}
+
+	fpga_image_info_free(region->info);
+
+	info->buf = buf;
+	info->count = port_pr.buffer_size;
+	info->region_id = port_pr.port_id;
+	region->info = info;
+
+	ret = fpga_region_program_fpga(region);
+
+	/*
+	 * it allows userspace to reset the PR region's logic by disabling and
+	 * reenabling the bridge to clear things out between accleration runs.
+	 * so no need to hold the bridges after partial reconfiguration.
+	 */
+	if (region->get_bridges)
+		fpga_bridges_put(&region->bridge_list);
+
+	put_device(&region->dev);
+unlock_exit:
+	mutex_unlock(&pdata->lock);
+free_exit:
+	vfree(buf);
+	if (copy_to_user((void __user *)arg, &port_pr, minsz))
+		return -EFAULT;
+
+	return ret;
+}
+
+/**
+ * dfl_fme_create_mgr - create fpga mgr platform device as child device
+ *
+ * @pdata: fme platform_device's pdata
+ *
+ * Return: mgr platform device if successful, and error code otherwise.
+ */
+static struct platform_device *
+dfl_fme_create_mgr(struct dfl_feature_platform_data *pdata,
+		   struct dfl_feature *feature)
+{
+	struct platform_device *mgr, *fme = pdata->dev;
+	struct dfl_fme_mgr_pdata mgr_pdata;
+	int ret = -ENOMEM;
+
+	if (!feature->ioaddr)
+		return ERR_PTR(-ENODEV);
+
+	mgr_pdata.ioaddr = feature->ioaddr;
+
+	/*
+	 * Each FME has only one fpga-mgr, so allocate platform device using
+	 * the same FME platform device id.
+	 */
+	mgr = platform_device_alloc(DFL_FPGA_FME_MGR, fme->id);
+	if (!mgr)
+		return ERR_PTR(ret);
+
+	mgr->dev.parent = &fme->dev;
+
+	ret = platform_device_add_data(mgr, &mgr_pdata, sizeof(mgr_pdata));
+	if (ret)
+		goto create_mgr_err;
+
+	ret = platform_device_add(mgr);
+	if (ret)
+		goto create_mgr_err;
+
+	return mgr;
+
+create_mgr_err:
+	platform_device_put(mgr);
+	return ERR_PTR(ret);
+}
+
+/**
+ * dfl_fme_destroy_mgr - destroy fpga mgr platform device
+ * @pdata: fme platform device's pdata
+ */
+static void dfl_fme_destroy_mgr(struct dfl_feature_platform_data *pdata)
+{
+	struct dfl_fme *priv = dfl_fpga_pdata_get_private(pdata);
+
+	platform_device_unregister(priv->mgr);
+}
+
+/**
+ * dfl_fme_create_bridge - create fme fpga bridge platform device as child
+ *
+ * @pdata: fme platform device's pdata
+ * @port_id: port id for the bridge to be created.
+ *
+ * Return: bridge platform device if successful, and error code otherwise.
+ */
+static struct dfl_fme_bridge *
+dfl_fme_create_bridge(struct dfl_feature_platform_data *pdata, int port_id)
+{
+	struct device *dev = &pdata->dev->dev;
+	struct dfl_fme_br_pdata br_pdata;
+	struct dfl_fme_bridge *fme_br;
+	int ret = -ENOMEM;
+
+	fme_br = devm_kzalloc(dev, sizeof(*fme_br), GFP_KERNEL);
+	if (!fme_br)
+		return ERR_PTR(ret);
+
+	br_pdata.cdev = pdata->dfl_cdev;
+	br_pdata.port_id = port_id;
+
+	fme_br->br = platform_device_alloc(DFL_FPGA_FME_BRIDGE,
+					   PLATFORM_DEVID_AUTO);
+	if (!fme_br->br)
+		return ERR_PTR(ret);
+
+	fme_br->br->dev.parent = dev;
+
+	ret = platform_device_add_data(fme_br->br, &br_pdata, sizeof(br_pdata));
+	if (ret)
+		goto create_br_err;
+
+	ret = platform_device_add(fme_br->br);
+	if (ret)
+		goto create_br_err;
+
+	return fme_br;
+
+create_br_err:
+	platform_device_put(fme_br->br);
+	return ERR_PTR(ret);
+}
+
+/**
+ * dfl_fme_destroy_bridge - destroy fpga bridge platform device
+ * @fme_br: fme bridge to destroy
+ */
+static void dfl_fme_destroy_bridge(struct dfl_fme_bridge *fme_br)
+{
+	platform_device_unregister(fme_br->br);
+}
+
+/**
+ * dfl_fme_destroy_bridge - destroy all fpga bridge platform device
+ * @pdata: fme platform device's pdata
+ */
+static void dfl_fme_destroy_bridges(struct dfl_feature_platform_data *pdata)
+{
+	struct dfl_fme *priv = dfl_fpga_pdata_get_private(pdata);
+	struct dfl_fme_bridge *fbridge, *tmp;
+
+	list_for_each_entry_safe(fbridge, tmp, &priv->bridge_list, node) {
+		list_del(&fbridge->node);
+		dfl_fme_destroy_bridge(fbridge);
+	}
+}
+
+/**
+ * dfl_fme_create_region - create fpga region platform device as child
+ *
+ * @pdata: fme platform device's pdata
+ * @mgr: mgr platform device needed for region
+ * @br: br platform device needed for region
+ * @port_id: port id
+ *
+ * Return: fme region if successful, and error code otherwise.
+ */
+static struct dfl_fme_region *
+dfl_fme_create_region(struct dfl_feature_platform_data *pdata,
+		      struct platform_device *mgr,
+		      struct platform_device *br, int port_id)
+{
+	struct dfl_fme_region_pdata region_pdata;
+	struct device *dev = &pdata->dev->dev;
+	struct dfl_fme_region *fme_region;
+	int ret = -ENOMEM;
+
+	fme_region = devm_kzalloc(dev, sizeof(*fme_region), GFP_KERNEL);
+	if (!fme_region)
+		return ERR_PTR(ret);
+
+	region_pdata.mgr = mgr;
+	region_pdata.br = br;
+
+	/*
+	 * Each FPGA device may have more than one port, so allocate platform
+	 * device using the same port platform device id.
+	 */
+	fme_region->region = platform_device_alloc(DFL_FPGA_FME_REGION, br->id);
+	if (!fme_region->region)
+		return ERR_PTR(ret);
+
+	fme_region->region->dev.parent = dev;
+
+	ret = platform_device_add_data(fme_region->region, &region_pdata,
+				       sizeof(region_pdata));
+	if (ret)
+		goto create_region_err;
+
+	ret = platform_device_add(fme_region->region);
+	if (ret)
+		goto create_region_err;
+
+	fme_region->port_id = port_id;
+
+	return fme_region;
+
+create_region_err:
+	platform_device_put(fme_region->region);
+	return ERR_PTR(ret);
+}
+
+/**
+ * dfl_fme_destroy_region - destroy fme region
+ * @fme_region: fme region to destroy
+ */
+static void dfl_fme_destroy_region(struct dfl_fme_region *fme_region)
+{
+	platform_device_unregister(fme_region->region);
+}
+
+/**
+ * dfl_fme_destroy_regions - destroy all fme regions
+ * @pdata: fme platform device's pdata
+ */
+static void dfl_fme_destroy_regions(struct dfl_feature_platform_data *pdata)
+{
+	struct dfl_fme *priv = dfl_fpga_pdata_get_private(pdata);
+	struct dfl_fme_region *fme_region, *tmp;
+
+	list_for_each_entry_safe(fme_region, tmp, &priv->region_list, node) {
+		list_del(&fme_region->node);
+		dfl_fme_destroy_region(fme_region);
+	}
+}
+
+static int pr_mgmt_init(struct platform_device *pdev,
+			struct dfl_feature *feature)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct dfl_fme_region *fme_region;
+	struct dfl_fme_bridge *fme_br;
+	struct platform_device *mgr;
+	struct dfl_fme *priv;
+	void __iomem *fme_hdr;
+	int ret = -ENODEV, i = 0;
+	u64 fme_cap, port_offset;
+
+	fme_hdr = dfl_get_feature_ioaddr_by_id(&pdev->dev,
+					       FME_FEATURE_ID_HEADER);
+
+	mutex_lock(&pdata->lock);
+	priv = dfl_fpga_pdata_get_private(pdata);
+
+	/* Initialize the region and bridge sub device list */
+	INIT_LIST_HEAD(&priv->region_list);
+	INIT_LIST_HEAD(&priv->bridge_list);
+
+	/* Create fpga mgr platform device */
+	mgr = dfl_fme_create_mgr(pdata, feature);
+	if (IS_ERR(mgr)) {
+		dev_err(&pdev->dev, "fail to create fpga mgr pdev\n");
+		goto unlock;
+	}
+
+	priv->mgr = mgr;
+
+	/* Read capability register to check number of regions and bridges */
+	fme_cap = readq(fme_hdr + FME_HDR_CAP);
+	for (; i < FIELD_GET(FME_CAP_NUM_PORTS, fme_cap); i++) {
+		port_offset = readq(fme_hdr + FME_HDR_PORT_OFST(i));
+		if (!(port_offset & FME_PORT_OFST_IMP))
+			continue;
+
+		/* Create bridge for each port */
+		fme_br = dfl_fme_create_bridge(pdata, i);
+		if (IS_ERR(fme_br)) {
+			ret = PTR_ERR(fme_br);
+			goto destroy_region;
+		}
+
+		list_add(&fme_br->node, &priv->bridge_list);
+
+		/* Create region for each port */
+		fme_region = dfl_fme_create_region(pdata, mgr,
+						   fme_br->br, i);
+		if (!fme_region) {
+			ret = PTR_ERR(fme_region);
+			goto destroy_region;
+		}
+
+		list_add(&fme_region->node, &priv->region_list);
+	}
+	mutex_unlock(&pdata->lock);
+
+	return 0;
+
+destroy_region:
+	dfl_fme_destroy_regions(pdata);
+	dfl_fme_destroy_bridges(pdata);
+	dfl_fme_destroy_mgr(pdata);
+unlock:
+	mutex_unlock(&pdata->lock);
+	return ret;
+}
+
+static void pr_mgmt_uinit(struct platform_device *pdev,
+			  struct dfl_feature *feature)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct dfl_fme *priv;
+
+	mutex_lock(&pdata->lock);
+	priv = dfl_fpga_pdata_get_private(pdata);
+
+	dfl_fme_destroy_regions(pdata);
+	dfl_fme_destroy_bridges(pdata);
+	dfl_fme_destroy_mgr(pdata);
+	mutex_unlock(&pdata->lock);
+}
+
+static long fme_pr_ioctl(struct platform_device *pdev,
+			 struct dfl_feature *feature,
+			 unsigned int cmd, unsigned long arg)
+{
+	long ret;
+
+	switch (cmd) {
+	case DFL_FPGA_FME_PORT_PR:
+		ret = fme_pr(pdev, arg);
+		break;
+	default:
+		ret = -ENODEV;
+	}
+
+	return ret;
+}
+
+const struct dfl_feature_ops pr_mgmt_ops = {
+	.init = pr_mgmt_init,
+	.uinit = pr_mgmt_uinit,
+	.ioctl = fme_pr_ioctl,
+};
diff --git a/drivers/fpga/dfl-fme-pr.h b/drivers/fpga/dfl-fme-pr.h
new file mode 100644
index 000000000000..096a699089d3
--- /dev/null
+++ b/drivers/fpga/dfl-fme-pr.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for FPGA Management Engine (FME) Partial Reconfiguration Driver
+ *
+ * Copyright (C) 2017-2018 Intel Corporation, Inc.
+ *
+ * Authors:
+ *   Kang Luwei <luwei.kang@intel.com>
+ *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ *   Wu Hao <hao.wu@intel.com>
+ *   Joseph Grecco <joe.grecco@intel.com>
+ *   Enno Luebbers <enno.luebbers@intel.com>
+ *   Tim Whisonant <tim.whisonant@intel.com>
+ *   Ananda Ravuri <ananda.ravuri@intel.com>
+ *   Henry Mitchel <henry.mitchel@intel.com>
+ */
+
+#ifndef __DFL_FME_PR_H
+#define __DFL_FME_PR_H
+
+#include <linux/platform_device.h>
+
+/**
+ * struct dfl_fme_region - FME fpga region data structure
+ *
+ * @region: platform device of the FPGA region.
+ * @node: used to link fme_region to a list.
+ * @port_id: indicate which port this region connected to.
+ */
+struct dfl_fme_region {
+	struct platform_device *region;
+	struct list_head node;
+	int port_id;
+};
+
+/**
+ * struct dfl_fme_region_pdata - platform data for FME region platform device.
+ *
+ * @mgr: platform device of the FPGA manager.
+ * @br: platform device of the FPGA bridge.
+ * @region_id: region id (same as port_id).
+ */
+struct dfl_fme_region_pdata {
+	struct platform_device *mgr;
+	struct platform_device *br;
+	int region_id;
+};
+
+/**
+ * struct dfl_fme_bridge - FME fpga bridge data structure
+ *
+ * @br: platform device of the FPGA bridge.
+ * @node: used to link fme_bridge to a list.
+ */
+struct dfl_fme_bridge {
+	struct platform_device *br;
+	struct list_head node;
+};
+
+/**
+ * struct dfl_fme_bridge_pdata - platform data for FME bridge platform device.
+ *
+ * @cdev: container device.
+ * @port_id: port id.
+ */
+struct dfl_fme_br_pdata {
+	struct dfl_fpga_cdev *cdev;
+	int port_id;
+};
+
+/**
+ * struct dfl_fme_mgr_pdata - platform data for FME manager platform device.
+ *
+ * @ioaddr: mapped io address for FME manager platform device.
+ */
+struct dfl_fme_mgr_pdata {
+	void __iomem *ioaddr;
+};
+
+#define DFL_FPGA_FME_MGR	"dfl-fme-mgr"
+#define DFL_FPGA_FME_BRIDGE	"dfl-fme-bridge"
+#define DFL_FPGA_FME_REGION	"dfl-fme-region"
+
+#endif /* __DFL_FME_PR_H */
diff --git a/drivers/fpga/dfl-fme.h b/drivers/fpga/dfl-fme.h
new file mode 100644
index 000000000000..5394a216c5c0
--- /dev/null
+++ b/drivers/fpga/dfl-fme.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for FPGA Management Engine (FME) Driver
+ *
+ * Copyright (C) 2017-2018 Intel Corporation, Inc.
+ *
+ * Authors:
+ *   Kang Luwei <luwei.kang@intel.com>
+ *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ *   Wu Hao <hao.wu@intel.com>
+ *   Joseph Grecco <joe.grecco@intel.com>
+ *   Enno Luebbers <enno.luebbers@intel.com>
+ *   Tim Whisonant <tim.whisonant@intel.com>
+ *   Ananda Ravuri <ananda.ravuri@intel.com>
+ *   Henry Mitchel <henry.mitchel@intel.com>
+ */
+
+#ifndef __DFL_FME_H
+#define __DFL_FME_H
+
+/**
+ * struct dfl_fme - dfl fme private data
+ *
+ * @mgr: FME's FPGA manager platform device.
+ * @region_list: linked list of FME's FPGA regions.
+ * @bridge_list: linked list of FME's FPGA bridges.
+ * @pdata: fme platform device's pdata.
+ */
+struct dfl_fme {
+	struct platform_device *mgr;
+	struct list_head region_list;
+	struct list_head bridge_list;
+	struct dfl_feature_platform_data *pdata;
+};
+
+extern const struct dfl_feature_ops pr_mgmt_ops;
+
+#endif /* __DFL_FME_H */
diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
index 858e4437c31c..9666af85a8f5 100644
--- a/include/uapi/linux/fpga-dfl.h
+++ b/include/uapi/linux/fpga-dfl.h
@@ -14,6 +14,7 @@
 #ifndef _UAPI_LINUX_FPGA_DFL_H
 #define _UAPI_LINUX_FPGA_DFL_H
 
+#include <linux/types.h>
 #include <linux/ioctl.h>
 
 #define DFL_FPGA_API_VERSION 0
@@ -28,6 +29,7 @@
 #define DFL_FPGA_MAGIC 0xB6
 
 #define DFL_FPGA_BASE 0
+#define DFL_FME_BASE 0x80
 
 /**
  * DFL_FPGA_GET_API_VERSION - _IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 0)
@@ -47,4 +49,29 @@
 
 #define DFL_FPGA_CHECK_EXTENSION	_IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 1)
 
+/* IOCTLs for FME file descriptor */
+
+/**
+ * DFL_FPGA_FME_PORT_PR - _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 0,
+ *						struct dfl_fpga_fme_port_pr)
+ *
+ * Driver does Partial Reconfiguration based on Port ID and Buffer (Image)
+ * provided by caller.
+ * Return: 0 on success, -errno on failure.
+ * If DFL_FPGA_FME_PORT_PR returns -EIO, that indicates the HW has detected
+ * some errors during PR, under this case, the user can fetch HW error info
+ * from the status of FME's fpga manager.
+ */
+
+struct dfl_fpga_fme_port_pr {
+	/* Input */
+	__u32 argsz;		/* Structure length */
+	__u32 flags;		/* Zero for now */
+	__u32 port_id;
+	__u32 buffer_size;
+	__u64 buffer_address;	/* Userspace address to the buffer for PR */
+};
+
+#define DFL_FPGA_FME_PORT_PR	_IO(DFL_FPGA_MAGIC, DFL_FME_BASE + 0)
+
 #endif /* _UAPI_LINUX_FPGA_DFL_H */
-- 
cgit v1.2.3


From e4664c0ee4ac44993c62d10b048ab0a960691da5 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:32 +0800
Subject: fpga: dfl: afu: add header sub feature support

The port header register set is always present for port, it is mainly
for capability, control and status of the ports that AFU connected to.

This patch implements header sub feature support. Below user interfaces
are created by this patch.

Sysfs interface:
* /sys/class/fpga_region/<regionX>/<dfl-port.x>/id
  Read-only. Port ID.

Ioctl interface:
* DFL_FPGA_PORT_RESET
  Reset the FPGA Port and its AFU.

Signed-off-by: Tim Whisonant <tim.whisonant@intel.com>
Signed-off-by: Enno Luebbers <enno.luebbers@intel.com>
Signed-off-by: Shiva Rao <shiva.rao@intel.com>
Signed-off-by: Christopher Rauer <christopher.rauer@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-platform-dfl-port |  7 ++
 drivers/fpga/dfl-afu-main.c                       | 79 ++++++++++++++++++++++-
 include/uapi/linux/fpga-dfl.h                     | 17 +++++
 3 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/ABI/testing/sysfs-platform-dfl-port

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-port b/Documentation/ABI/testing/sysfs-platform-dfl-port
new file mode 100644
index 000000000000..cb91165f5397
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-port
@@ -0,0 +1,7 @@
+What:		/sys/bus/platform/devices/dfl-port.0/id
+Date:		June 2018
+KernelVersion:	4.19
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-only. It returns id of this port. One DFL FPGA device
+		may have more than one port. Userspace could use this id to
+		distinguish different ports under same FPGA device.
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index a38d6a825e7e..d36b3e9f3984 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -16,6 +16,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/fpga-dfl.h>
 
 #include "dfl.h"
 
@@ -87,6 +88,41 @@ static int port_disable(struct platform_device *pdev)
 	return 0;
 }
 
+/*
+ * This function resets the FPGA Port and its accelerator (AFU) by function
+ * __port_disable and __port_enable (set port soft reset bit and then clear
+ * it). Userspace can do Port reset at any time, e.g. during DMA or Partial
+ * Reconfiguration. But it should never cause any system level issue, only
+ * functional failure (e.g. DMA or PR operation failure) and be recoverable
+ * from the failure.
+ *
+ * Note: the accelerator (AFU) is not accessible when its port is in reset
+ * (disabled). Any attempts on MMIO access to AFU while in reset, will
+ * result errors reported via port error reporting sub feature (if present).
+ */
+static int __port_reset(struct platform_device *pdev)
+{
+	int ret;
+
+	ret = port_disable(pdev);
+	if (!ret)
+		port_enable(pdev);
+
+	return ret;
+}
+
+static int port_reset(struct platform_device *pdev)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	int ret;
+
+	mutex_lock(&pdata->lock);
+	ret = __port_reset(pdev);
+	mutex_unlock(&pdata->lock);
+
+	return ret;
+}
+
 static int port_get_id(struct platform_device *pdev)
 {
 	void __iomem *base;
@@ -96,23 +132,63 @@ static int port_get_id(struct platform_device *pdev)
 	return FIELD_GET(PORT_CAP_PORT_NUM, readq(base + PORT_HDR_CAP));
 }
 
+static ssize_t
+id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int id = port_get_id(to_platform_device(dev));
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", id);
+}
+static DEVICE_ATTR_RO(id);
+
+static const struct attribute *port_hdr_attrs[] = {
+	&dev_attr_id.attr,
+	NULL,
+};
+
 static int port_hdr_init(struct platform_device *pdev,
 			 struct dfl_feature *feature)
 {
 	dev_dbg(&pdev->dev, "PORT HDR Init.\n");
 
-	return 0;
+	port_reset(pdev);
+
+	return sysfs_create_files(&pdev->dev.kobj, port_hdr_attrs);
 }
 
 static void port_hdr_uinit(struct platform_device *pdev,
 			   struct dfl_feature *feature)
 {
 	dev_dbg(&pdev->dev, "PORT HDR UInit.\n");
+
+	sysfs_remove_files(&pdev->dev.kobj, port_hdr_attrs);
+}
+
+static long
+port_hdr_ioctl(struct platform_device *pdev, struct dfl_feature *feature,
+	       unsigned int cmd, unsigned long arg)
+{
+	long ret;
+
+	switch (cmd) {
+	case DFL_FPGA_PORT_RESET:
+		if (!arg)
+			ret = port_reset(pdev);
+		else
+			ret = -EINVAL;
+		break;
+	default:
+		dev_dbg(&pdev->dev, "%x cmd not handled", cmd);
+		ret = -ENODEV;
+	}
+
+	return ret;
 }
 
 static const struct dfl_feature_ops port_hdr_ops = {
 	.init = port_hdr_init,
 	.uinit = port_hdr_uinit,
+	.ioctl = port_hdr_ioctl,
 };
 
 static struct dfl_feature_driver port_feature_drvs[] = {
@@ -154,6 +230,7 @@ static int afu_release(struct inode *inode, struct file *filp)
 
 	pdata = dev_get_platdata(&pdev->dev);
 
+	port_reset(pdev);
 	dfl_feature_dev_use_end(pdata);
 
 	return 0;
diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
index 9666af85a8f5..e6b4dd26cc68 100644
--- a/include/uapi/linux/fpga-dfl.h
+++ b/include/uapi/linux/fpga-dfl.h
@@ -29,8 +29,11 @@
 #define DFL_FPGA_MAGIC 0xB6
 
 #define DFL_FPGA_BASE 0
+#define DFL_PORT_BASE 0x40
 #define DFL_FME_BASE 0x80
 
+/* Common IOCTLs for both FME and AFU file descriptor */
+
 /**
  * DFL_FPGA_GET_API_VERSION - _IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 0)
  *
@@ -49,6 +52,20 @@
 
 #define DFL_FPGA_CHECK_EXTENSION	_IO(DFL_FPGA_MAGIC, DFL_FPGA_BASE + 1)
 
+/* IOCTLs for AFU file descriptor */
+
+/**
+ * DFL_FPGA_PORT_RESET - _IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 0)
+ *
+ * Reset the FPGA Port and its AFU. No parameters are supported.
+ * Userspace can do Port reset at any time, e.g. during DMA or PR. But
+ * it should never cause any system level issue, only functional failure
+ * (e.g. DMA or PR operation failure) and be recoverable from the failure.
+ * Return: 0 on success, -errno of failure
+ */
+
+#define DFL_FPGA_PORT_RESET		_IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 0)
+
 /* IOCTLs for FME file descriptor */
 
 /**
-- 
cgit v1.2.3


From 857a26222ff75eecf7d701ef0e91e4fbf6efa663 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Date: Sat, 30 Jun 2018 08:53:34 +0800
Subject: fpga: dfl: afu: add afu sub feature support

User Accelerated Function Unit sub feature exposes the MMIO region of
the AFU. After valid PR bitstream is programmed and the port is enabled,
then this MMIO region could be accessed.

This patch adds support to enumerate the AFU MMIO region and expose it
to userspace via mmap file operation. Below interfaces are exposed to user:

Sysfs interface:
* /sys/class/fpga_region/<regionX>/<dfl-port.x>/afu_id
  Read-only. Indicate which PR bitstream is programmed to this AFU.

Ioctl interfaces:
* DFL_FPGA_PORT_GET_INFO
  Provide info to userspace on the number of supported region.
  Only UAFU region is supported now.

* DFL_FPGA_PORT_GET_REGION_INFO
  Provide region information, including access permission, region size,
  offset from the start of device fd.

Signed-off-by: Tim Whisonant <tim.whisonant@intel.com>
Signed-off-by: Enno Luebbers <enno.luebbers@intel.com>
Signed-off-by: Shiva Rao <shiva.rao@intel.com>
Signed-off-by: Christopher Rauer <christopher.rauer@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-platform-dfl-port |   9 +
 drivers/fpga/Makefile                             |   2 +-
 drivers/fpga/dfl-afu-main.c                       | 219 +++++++++++++++++++++-
 drivers/fpga/dfl-afu-region.c                     | 166 ++++++++++++++++
 drivers/fpga/dfl-afu.h                            |  71 +++++++
 include/uapi/linux/fpga-dfl.h                     |  48 +++++
 6 files changed, 508 insertions(+), 7 deletions(-)
 create mode 100644 drivers/fpga/dfl-afu-region.c
 create mode 100644 drivers/fpga/dfl-afu.h

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-port b/Documentation/ABI/testing/sysfs-platform-dfl-port
index cb91165f5397..6a92dda517b0 100644
--- a/Documentation/ABI/testing/sysfs-platform-dfl-port
+++ b/Documentation/ABI/testing/sysfs-platform-dfl-port
@@ -5,3 +5,12 @@ Contact:	Wu Hao <hao.wu@intel.com>
 Description:	Read-only. It returns id of this port. One DFL FPGA device
 		may have more than one port. Userspace could use this id to
 		distinguish different ports under same FPGA device.
+
+What:		/sys/bus/platform/devices/dfl-port.0/afu_id
+Date:		June 2018
+KernelVersion:	4.19
+Contact:	Wu Hao <hao.wu@intel.com>
+Description:	Read-only. User can program different PR bitstreams to FPGA
+		Accelerator Function Unit (AFU) for different functions. It
+		returns uuid which could be used to identify which PR bitstream
+		is programmed in this AFU.
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index 1ac7749b2542..a44d50dd0b70 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_FPGA_DFL_FME_REGION)	+= dfl-fme-region.o
 obj-$(CONFIG_FPGA_DFL_AFU)		+= dfl-afu.o
 
 dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o
-dfl-afu-objs := dfl-afu-main.o
+dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o
 
 # Drivers for FPGAs which implement DFL
 obj-$(CONFIG_FPGA_DFL_PCI)		+= dfl-pci.o
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index 4074b97122e2..f67a78d7e9ad 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -16,18 +16,18 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <linux/fpga-dfl.h>
 
-#include "dfl.h"
+#include "dfl-afu.h"
 
 /**
  * port_enable - enable a port
  * @pdev: port platform device.
  *
  * Enable Port by clear the port soft reset bit, which is set by default.
- * The User AFU is unable to respond to any MMIO access while in reset.
- * port_enable function should only be used after port_disable
- * function.
+ * The AFU is unable to respond to any MMIO access while in reset.
+ * port_enable function should only be used after port_disable function.
  */
 static void port_enable(struct platform_device *pdev)
 {
@@ -191,11 +191,74 @@ static const struct dfl_feature_ops port_hdr_ops = {
 	.ioctl = port_hdr_ioctl,
 };
 
+static ssize_t
+afu_id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(dev);
+	void __iomem *base;
+	u64 guidl, guidh;
+
+	base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_AFU);
+
+	mutex_lock(&pdata->lock);
+	if (pdata->disable_count) {
+		mutex_unlock(&pdata->lock);
+		return -EBUSY;
+	}
+
+	guidl = readq(base + GUID_L);
+	guidh = readq(base + GUID_H);
+	mutex_unlock(&pdata->lock);
+
+	return scnprintf(buf, PAGE_SIZE, "%016llx%016llx\n", guidh, guidl);
+}
+static DEVICE_ATTR_RO(afu_id);
+
+static const struct attribute *port_afu_attrs[] = {
+	&dev_attr_afu_id.attr,
+	NULL
+};
+
+static int port_afu_init(struct platform_device *pdev,
+			 struct dfl_feature *feature)
+{
+	struct resource *res = &pdev->resource[feature->resource_index];
+	int ret;
+
+	dev_dbg(&pdev->dev, "PORT AFU Init.\n");
+
+	ret = afu_mmio_region_add(dev_get_platdata(&pdev->dev),
+				  DFL_PORT_REGION_INDEX_AFU, resource_size(res),
+				  res->start, DFL_PORT_REGION_READ |
+				  DFL_PORT_REGION_WRITE | DFL_PORT_REGION_MMAP);
+	if (ret)
+		return ret;
+
+	return sysfs_create_files(&pdev->dev.kobj, port_afu_attrs);
+}
+
+static void port_afu_uinit(struct platform_device *pdev,
+			   struct dfl_feature *feature)
+{
+	dev_dbg(&pdev->dev, "PORT AFU UInit.\n");
+
+	sysfs_remove_files(&pdev->dev.kobj, port_afu_attrs);
+}
+
+static const struct dfl_feature_ops port_afu_ops = {
+	.init = port_afu_init,
+	.uinit = port_afu_uinit,
+};
+
 static struct dfl_feature_driver port_feature_drvs[] = {
 	{
 		.id = PORT_FEATURE_ID_HEADER,
 		.ops = &port_hdr_ops,
 	},
+	{
+		.id = PORT_FEATURE_ID_AFU,
+		.ops = &port_afu_ops,
+	},
 	{
 		.ops = NULL,
 	}
@@ -243,6 +306,64 @@ static long afu_ioctl_check_extension(struct dfl_feature_platform_data *pdata,
 	return 0;
 }
 
+static long
+afu_ioctl_get_info(struct dfl_feature_platform_data *pdata, void __user *arg)
+{
+	struct dfl_fpga_port_info info;
+	struct dfl_afu *afu;
+	unsigned long minsz;
+
+	minsz = offsetofend(struct dfl_fpga_port_info, num_umsgs);
+
+	if (copy_from_user(&info, arg, minsz))
+		return -EFAULT;
+
+	if (info.argsz < minsz)
+		return -EINVAL;
+
+	mutex_lock(&pdata->lock);
+	afu = dfl_fpga_pdata_get_private(pdata);
+	info.flags = 0;
+	info.num_regions = afu->num_regions;
+	info.num_umsgs = afu->num_umsgs;
+	mutex_unlock(&pdata->lock);
+
+	if (copy_to_user(arg, &info, sizeof(info)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long afu_ioctl_get_region_info(struct dfl_feature_platform_data *pdata,
+				      void __user *arg)
+{
+	struct dfl_fpga_port_region_info rinfo;
+	struct dfl_afu_mmio_region region;
+	unsigned long minsz;
+	long ret;
+
+	minsz = offsetofend(struct dfl_fpga_port_region_info, offset);
+
+	if (copy_from_user(&rinfo, arg, minsz))
+		return -EFAULT;
+
+	if (rinfo.argsz < minsz || rinfo.padding)
+		return -EINVAL;
+
+	ret = afu_mmio_region_get_by_index(pdata, rinfo.index, &region);
+	if (ret)
+		return ret;
+
+	rinfo.flags = region.flags;
+	rinfo.size = region.size;
+	rinfo.offset = region.offset;
+
+	if (copy_to_user(arg, &rinfo, sizeof(rinfo)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static long afu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct platform_device *pdev = filp->private_data;
@@ -259,6 +380,10 @@ static long afu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return DFL_FPGA_API_VERSION;
 	case DFL_FPGA_CHECK_EXTENSION:
 		return afu_ioctl_check_extension(pdata, arg);
+	case DFL_FPGA_PORT_GET_INFO:
+		return afu_ioctl_get_info(pdata, (void __user *)arg);
+	case DFL_FPGA_PORT_GET_REGION_INFO:
+		return afu_ioctl_get_region_info(pdata, (void __user *)arg);
 	default:
 		/*
 		 * Let sub-feature's ioctl function to handle the cmd
@@ -277,13 +402,83 @@ static long afu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	return -EINVAL;
 }
 
+static int afu_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct platform_device *pdev = filp->private_data;
+	struct dfl_feature_platform_data *pdata;
+	u64 size = vma->vm_end - vma->vm_start;
+	struct dfl_afu_mmio_region region;
+	u64 offset;
+	int ret;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	pdata = dev_get_platdata(&pdev->dev);
+
+	offset = vma->vm_pgoff << PAGE_SHIFT;
+	ret = afu_mmio_region_get_by_offset(pdata, offset, size, &region);
+	if (ret)
+		return ret;
+
+	if (!(region.flags & DFL_PORT_REGION_MMAP))
+		return -EINVAL;
+
+	if ((vma->vm_flags & VM_READ) && !(region.flags & DFL_PORT_REGION_READ))
+		return -EPERM;
+
+	if ((vma->vm_flags & VM_WRITE) &&
+	    !(region.flags & DFL_PORT_REGION_WRITE))
+		return -EPERM;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	return remap_pfn_range(vma, vma->vm_start,
+			(region.phys + (offset - region.offset)) >> PAGE_SHIFT,
+			size, vma->vm_page_prot);
+}
+
 static const struct file_operations afu_fops = {
 	.owner = THIS_MODULE,
 	.open = afu_open,
 	.release = afu_release,
 	.unlocked_ioctl = afu_ioctl,
+	.mmap = afu_mmap,
 };
 
+static int afu_dev_init(struct platform_device *pdev)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct dfl_afu *afu;
+
+	afu = devm_kzalloc(&pdev->dev, sizeof(*afu), GFP_KERNEL);
+	if (!afu)
+		return -ENOMEM;
+
+	afu->pdata = pdata;
+
+	mutex_lock(&pdata->lock);
+	dfl_fpga_pdata_set_private(pdata, afu);
+	afu_mmio_region_init(pdata);
+	mutex_unlock(&pdata->lock);
+
+	return 0;
+}
+
+static int afu_dev_destroy(struct platform_device *pdev)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct dfl_afu *afu;
+
+	mutex_lock(&pdata->lock);
+	afu = dfl_fpga_pdata_get_private(pdata);
+	afu_mmio_region_destroy(pdata);
+	dfl_fpga_pdata_set_private(pdata, NULL);
+	mutex_unlock(&pdata->lock);
+
+	return 0;
+}
+
 static int port_enable_set(struct platform_device *pdev, bool enable)
 {
 	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
@@ -312,14 +507,25 @@ static int afu_probe(struct platform_device *pdev)
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
 
+	ret = afu_dev_init(pdev);
+	if (ret)
+		goto exit;
+
 	ret = dfl_fpga_dev_feature_init(pdev, port_feature_drvs);
 	if (ret)
-		return ret;
+		goto dev_destroy;
 
 	ret = dfl_fpga_dev_ops_register(pdev, &afu_fops, THIS_MODULE);
-	if (ret)
+	if (ret) {
 		dfl_fpga_dev_feature_uinit(pdev);
+		goto dev_destroy;
+	}
+
+	return 0;
 
+dev_destroy:
+	afu_dev_destroy(pdev);
+exit:
 	return ret;
 }
 
@@ -329,6 +535,7 @@ static int afu_remove(struct platform_device *pdev)
 
 	dfl_fpga_dev_ops_unregister(pdev);
 	dfl_fpga_dev_feature_uinit(pdev);
+	afu_dev_destroy(pdev);
 
 	return 0;
 }
diff --git a/drivers/fpga/dfl-afu-region.c b/drivers/fpga/dfl-afu-region.c
new file mode 100644
index 000000000000..0804b7a0c298
--- /dev/null
+++ b/drivers/fpga/dfl-afu-region.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for FPGA Accelerated Function Unit (AFU) MMIO Region Management
+ *
+ * Copyright (C) 2017-2018 Intel Corporation, Inc.
+ *
+ * Authors:
+ *   Wu Hao <hao.wu@intel.com>
+ *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ */
+#include "dfl-afu.h"
+
+/**
+ * afu_mmio_region_init - init function for afu mmio region support
+ * @pdata: afu platform device's pdata.
+ */
+void afu_mmio_region_init(struct dfl_feature_platform_data *pdata)
+{
+	struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
+
+	INIT_LIST_HEAD(&afu->regions);
+}
+
+#define for_each_region(region, afu)	\
+	list_for_each_entry((region), &(afu)->regions, node)
+
+static struct dfl_afu_mmio_region *get_region_by_index(struct dfl_afu *afu,
+						       u32 region_index)
+{
+	struct dfl_afu_mmio_region *region;
+
+	for_each_region(region, afu)
+		if (region->index == region_index)
+			return region;
+
+	return NULL;
+}
+
+/**
+ * afu_mmio_region_add - add a mmio region to given feature dev.
+ *
+ * @region_index: region index.
+ * @region_size: region size.
+ * @phys: region's physical address of this region.
+ * @flags: region flags (access permission).
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int afu_mmio_region_add(struct dfl_feature_platform_data *pdata,
+			u32 region_index, u64 region_size, u64 phys, u32 flags)
+{
+	struct dfl_afu_mmio_region *region;
+	struct dfl_afu *afu;
+	int ret = 0;
+
+	region = devm_kzalloc(&pdata->dev->dev, sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	region->index = region_index;
+	region->size = region_size;
+	region->phys = phys;
+	region->flags = flags;
+
+	mutex_lock(&pdata->lock);
+
+	afu = dfl_fpga_pdata_get_private(pdata);
+
+	/* check if @index already exists */
+	if (get_region_by_index(afu, region_index)) {
+		mutex_unlock(&pdata->lock);
+		ret = -EEXIST;
+		goto exit;
+	}
+
+	region_size = PAGE_ALIGN(region_size);
+	region->offset = afu->region_cur_offset;
+	list_add(&region->node, &afu->regions);
+
+	afu->region_cur_offset += region_size;
+	afu->num_regions++;
+	mutex_unlock(&pdata->lock);
+
+	return 0;
+
+exit:
+	devm_kfree(&pdata->dev->dev, region);
+	return ret;
+}
+
+/**
+ * afu_mmio_region_destroy - destroy all mmio regions under given feature dev.
+ * @pdata: afu platform device's pdata.
+ */
+void afu_mmio_region_destroy(struct dfl_feature_platform_data *pdata)
+{
+	struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
+	struct dfl_afu_mmio_region *tmp, *region;
+
+	list_for_each_entry_safe(region, tmp, &afu->regions, node)
+		devm_kfree(&pdata->dev->dev, region);
+}
+
+/**
+ * afu_mmio_region_get_by_index - find an afu region by index.
+ * @pdata: afu platform device's pdata.
+ * @region_index: region index.
+ * @pregion: ptr to region for result.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int afu_mmio_region_get_by_index(struct dfl_feature_platform_data *pdata,
+				 u32 region_index,
+				 struct dfl_afu_mmio_region *pregion)
+{
+	struct dfl_afu_mmio_region *region;
+	struct dfl_afu *afu;
+	int ret = 0;
+
+	mutex_lock(&pdata->lock);
+	afu = dfl_fpga_pdata_get_private(pdata);
+	region = get_region_by_index(afu, region_index);
+	if (!region) {
+		ret = -EINVAL;
+		goto exit;
+	}
+	*pregion = *region;
+exit:
+	mutex_unlock(&pdata->lock);
+	return ret;
+}
+
+/**
+ * afu_mmio_region_get_by_offset - find an afu mmio region by offset and size
+ *
+ * @pdata: afu platform device's pdata.
+ * @offset: region offset from start of the device fd.
+ * @size: region size.
+ * @pregion: ptr to region for result.
+ *
+ * Find the region which fully contains the region described by input
+ * parameters (offset and size) from the feature dev's region linked list.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int afu_mmio_region_get_by_offset(struct dfl_feature_platform_data *pdata,
+				  u64 offset, u64 size,
+				  struct dfl_afu_mmio_region *pregion)
+{
+	struct dfl_afu_mmio_region *region;
+	struct dfl_afu *afu;
+	int ret = 0;
+
+	mutex_lock(&pdata->lock);
+	afu = dfl_fpga_pdata_get_private(pdata);
+	for_each_region(region, afu)
+		if (region->offset <= offset &&
+		    region->offset + region->size >= offset + size) {
+			*pregion = *region;
+			goto exit;
+		}
+	ret = -EINVAL;
+exit:
+	mutex_unlock(&pdata->lock);
+	return ret;
+}
diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
new file mode 100644
index 000000000000..11ce2cf99759
--- /dev/null
+++ b/drivers/fpga/dfl-afu.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for FPGA Accelerated Function Unit (AFU) Driver
+ *
+ * Copyright (C) 2017-2018 Intel Corporation, Inc.
+ *
+ * Authors:
+ *     Wu Hao <hao.wu@intel.com>
+ *     Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ *     Joseph Grecco <joe.grecco@intel.com>
+ *     Enno Luebbers <enno.luebbers@intel.com>
+ *     Tim Whisonant <tim.whisonant@intel.com>
+ *     Ananda Ravuri <ananda.ravuri@intel.com>
+ *     Henry Mitchel <henry.mitchel@intel.com>
+ */
+
+#ifndef __DFL_AFU_H
+#define __DFL_AFU_H
+
+#include <linux/mm.h>
+
+#include "dfl.h"
+
+/**
+ * struct dfl_afu_mmio_region - afu mmio region data structure
+ *
+ * @index: region index.
+ * @flags: region flags (access permission).
+ * @size: region size.
+ * @offset: region offset from start of the device fd.
+ * @phys: region's physical address.
+ * @node: node to add to afu feature dev's region list.
+ */
+struct dfl_afu_mmio_region {
+	u32 index;
+	u32 flags;
+	u64 size;
+	u64 offset;
+	u64 phys;
+	struct list_head node;
+};
+
+/**
+ * struct dfl_afu - afu device data structure
+ *
+ * @region_cur_offset: current region offset from start to the device fd.
+ * @num_regions: num of mmio regions.
+ * @regions: the mmio region linked list of this afu feature device.
+ * @num_umsgs: num of umsgs.
+ * @pdata: afu platform device's pdata.
+ */
+struct dfl_afu {
+	u64 region_cur_offset;
+	int num_regions;
+	u8 num_umsgs;
+	struct list_head regions;
+
+	struct dfl_feature_platform_data *pdata;
+};
+
+void afu_mmio_region_init(struct dfl_feature_platform_data *pdata);
+int afu_mmio_region_add(struct dfl_feature_platform_data *pdata,
+			u32 region_index, u64 region_size, u64 phys, u32 flags);
+void afu_mmio_region_destroy(struct dfl_feature_platform_data *pdata);
+int afu_mmio_region_get_by_index(struct dfl_feature_platform_data *pdata,
+				 u32 region_index,
+				 struct dfl_afu_mmio_region *pregion);
+int afu_mmio_region_get_by_offset(struct dfl_feature_platform_data *pdata,
+				  u64 offset, u64 size,
+				  struct dfl_afu_mmio_region *pregion);
+#endif
diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
index e6b4dd26cc68..a3ccdfb115a5 100644
--- a/include/uapi/linux/fpga-dfl.h
+++ b/include/uapi/linux/fpga-dfl.h
@@ -66,6 +66,54 @@
 
 #define DFL_FPGA_PORT_RESET		_IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 0)
 
+/**
+ * DFL_FPGA_PORT_GET_INFO - _IOR(DFL_FPGA_MAGIC, DFL_PORT_BASE + 1,
+ *						struct dfl_fpga_port_info)
+ *
+ * Retrieve information about the fpga port.
+ * Driver fills the info in provided struct dfl_fpga_port_info.
+ * Return: 0 on success, -errno on failure.
+ */
+struct dfl_fpga_port_info {
+	/* Input */
+	__u32 argsz;		/* Structure length */
+	/* Output */
+	__u32 flags;		/* Zero for now */
+	__u32 num_regions;	/* The number of supported regions */
+	__u32 num_umsgs;	/* The number of allocated umsgs */
+};
+
+#define DFL_FPGA_PORT_GET_INFO		_IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 1)
+
+/**
+ * FPGA_PORT_GET_REGION_INFO - _IOWR(FPGA_MAGIC, PORT_BASE + 2,
+ *					struct dfl_fpga_port_region_info)
+ *
+ * Retrieve information about a device memory region.
+ * Caller provides struct dfl_fpga_port_region_info with index value set.
+ * Driver returns the region info in other fields.
+ * Return: 0 on success, -errno on failure.
+ */
+struct dfl_fpga_port_region_info {
+	/* input */
+	__u32 argsz;		/* Structure length */
+	/* Output */
+	__u32 flags;		/* Access permission */
+#define DFL_PORT_REGION_READ	(1 << 0)	/* Region is readable */
+#define DFL_PORT_REGION_WRITE	(1 << 1)	/* Region is writable */
+#define DFL_PORT_REGION_MMAP	(1 << 2)	/* Can be mmaped to userspace */
+	/* Input */
+	__u32 index;		/* Region index */
+#define DFL_PORT_REGION_INDEX_AFU	0	/* AFU */
+#define DFL_PORT_REGION_INDEX_STP	1	/* Signal Tap */
+	__u32 padding;
+	/* Output */
+	__u64 size;		/* Region size (bytes) */
+	__u64 offset;		/* Region offset from start of device fd */
+};
+
+#define DFL_FPGA_PORT_GET_REGION_INFO	_IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 2)
+
 /* IOCTLs for FME file descriptor */
 
 /**
-- 
cgit v1.2.3


From fa8dda1edef9ebc3af467c644c5533ac97171e12 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sat, 30 Jun 2018 08:53:35 +0800
Subject: fpga: dfl: afu: add DFL_FPGA_PORT_DMA_MAP/UNMAP ioctls support

DMA memory regions are required for Accelerated Function Unit (AFU) usage.
These two ioctls allow user space applications to map user memory regions
for dma, and unmap them after use. Iova is returned from driver to user
space application via DFL_FPGA_PORT_DMA_MAP ioctl. Application needs to
unmap it after use, otherwise, driver will unmap them in device file
release operation.

Each AFU has its own rb tree to keep track of its mapped DMA regions.

Ioctl interfaces:
* DFL_FPGA_PORT_DMA_MAP
  Do the dma mapping per user_addr and length provided by user.
  Return iova in provided struct dfl_fpga_port_dma_map.

* DFL_FPGA_PORT_DMA_UNMAP
  Unmap the dma region per iova provided by user.

Signed-off-by: Tim Whisonant <tim.whisonant@intel.com>
Signed-off-by: Enno Luebbers <enno.luebbers@intel.com>
Signed-off-by: Shiva Rao <shiva.rao@intel.com>
Signed-off-by: Christopher Rauer <christopher.rauer@intel.com>
Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/Makefile             |   2 +-
 drivers/fpga/dfl-afu-dma-region.c | 463 ++++++++++++++++++++++++++++++++++++++
 drivers/fpga/dfl-afu-main.c       |  61 ++++-
 drivers/fpga/dfl-afu.h            |  31 ++-
 include/uapi/linux/fpga-dfl.h     |  37 +++
 5 files changed, 591 insertions(+), 3 deletions(-)
 create mode 100644 drivers/fpga/dfl-afu-dma-region.c

(limited to 'include')

diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index a44d50dd0b70..7a2d73ba7122 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -38,7 +38,7 @@ obj-$(CONFIG_FPGA_DFL_FME_REGION)	+= dfl-fme-region.o
 obj-$(CONFIG_FPGA_DFL_AFU)		+= dfl-afu.o
 
 dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o
-dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o
+dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
 
 # Drivers for FPGAs which implement DFL
 obj-$(CONFIG_FPGA_DFL_PCI)		+= dfl-pci.o
diff --git a/drivers/fpga/dfl-afu-dma-region.c b/drivers/fpga/dfl-afu-dma-region.c
new file mode 100644
index 000000000000..0e81d33af856
--- /dev/null
+++ b/drivers/fpga/dfl-afu-dma-region.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for FPGA Accelerated Function Unit (AFU) DMA Region Management
+ *
+ * Copyright (C) 2017-2018 Intel Corporation, Inc.
+ *
+ * Authors:
+ *   Wu Hao <hao.wu@intel.com>
+ *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+
+#include "dfl-afu.h"
+
+static void put_all_pages(struct page **pages, int npages)
+{
+	int i;
+
+	for (i = 0; i < npages; i++)
+		if (pages[i])
+			put_page(pages[i]);
+}
+
+void afu_dma_region_init(struct dfl_feature_platform_data *pdata)
+{
+	struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
+
+	afu->dma_regions = RB_ROOT;
+}
+
+/**
+ * afu_dma_adjust_locked_vm - adjust locked memory
+ * @dev: port device
+ * @npages: number of pages
+ * @incr: increase or decrease locked memory
+ *
+ * Increase or decrease the locked memory size with npages input.
+ *
+ * Return 0 on success.
+ * Return -ENOMEM if locked memory size is over the limit and no CAP_IPC_LOCK.
+ */
+static int afu_dma_adjust_locked_vm(struct device *dev, long npages, bool incr)
+{
+	unsigned long locked, lock_limit;
+	int ret = 0;
+
+	/* the task is exiting. */
+	if (!current->mm)
+		return 0;
+
+	down_write(&current->mm->mmap_sem);
+
+	if (incr) {
+		locked = current->mm->locked_vm + npages;
+		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+			ret = -ENOMEM;
+		else
+			current->mm->locked_vm += npages;
+	} else {
+		if (WARN_ON_ONCE(npages > current->mm->locked_vm))
+			npages = current->mm->locked_vm;
+		current->mm->locked_vm -= npages;
+	}
+
+	dev_dbg(dev, "[%d] RLIMIT_MEMLOCK %c%ld %ld/%ld%s\n", current->pid,
+		incr ? '+' : '-', npages << PAGE_SHIFT,
+		current->mm->locked_vm << PAGE_SHIFT, rlimit(RLIMIT_MEMLOCK),
+		ret ? "- execeeded" : "");
+
+	up_write(&current->mm->mmap_sem);
+
+	return ret;
+}
+
+/**
+ * afu_dma_pin_pages - pin pages of given dma memory region
+ * @pdata: feature device platform data
+ * @region: dma memory region to be pinned
+ *
+ * Pin all the pages of given dfl_afu_dma_region.
+ * Return 0 for success or negative error code.
+ */
+static int afu_dma_pin_pages(struct dfl_feature_platform_data *pdata,
+			     struct dfl_afu_dma_region *region)
+{
+	int npages = region->length >> PAGE_SHIFT;
+	struct device *dev = &pdata->dev->dev;
+	int ret, pinned;
+
+	ret = afu_dma_adjust_locked_vm(dev, npages, true);
+	if (ret)
+		return ret;
+
+	region->pages = kcalloc(npages, sizeof(struct page *), GFP_KERNEL);
+	if (!region->pages) {
+		ret = -ENOMEM;
+		goto unlock_vm;
+	}
+
+	pinned = get_user_pages_fast(region->user_addr, npages, 1,
+				     region->pages);
+	if (pinned < 0) {
+		ret = pinned;
+		goto put_pages;
+	} else if (pinned != npages) {
+		ret = -EFAULT;
+		goto free_pages;
+	}
+
+	dev_dbg(dev, "%d pages pinned\n", pinned);
+
+	return 0;
+
+put_pages:
+	put_all_pages(region->pages, pinned);
+free_pages:
+	kfree(region->pages);
+unlock_vm:
+	afu_dma_adjust_locked_vm(dev, npages, false);
+	return ret;
+}
+
+/**
+ * afu_dma_unpin_pages - unpin pages of given dma memory region
+ * @pdata: feature device platform data
+ * @region: dma memory region to be unpinned
+ *
+ * Unpin all the pages of given dfl_afu_dma_region.
+ * Return 0 for success or negative error code.
+ */
+static void afu_dma_unpin_pages(struct dfl_feature_platform_data *pdata,
+				struct dfl_afu_dma_region *region)
+{
+	long npages = region->length >> PAGE_SHIFT;
+	struct device *dev = &pdata->dev->dev;
+
+	put_all_pages(region->pages, npages);
+	kfree(region->pages);
+	afu_dma_adjust_locked_vm(dev, npages, false);
+
+	dev_dbg(dev, "%ld pages unpinned\n", npages);
+}
+
+/**
+ * afu_dma_check_continuous_pages - check if pages are continuous
+ * @region: dma memory region
+ *
+ * Return true if pages of given dma memory region have continuous physical
+ * address, otherwise return false.
+ */
+static bool afu_dma_check_continuous_pages(struct dfl_afu_dma_region *region)
+{
+	int npages = region->length >> PAGE_SHIFT;
+	int i;
+
+	for (i = 0; i < npages - 1; i++)
+		if (page_to_pfn(region->pages[i]) + 1 !=
+				page_to_pfn(region->pages[i + 1]))
+			return false;
+
+	return true;
+}
+
+/**
+ * dma_region_check_iova - check if memory area is fully contained in the region
+ * @region: dma memory region
+ * @iova: address of the dma memory area
+ * @size: size of the dma memory area
+ *
+ * Compare the dma memory area defined by @iova and @size with given dma region.
+ * Return true if memory area is fully contained in the region, otherwise false.
+ */
+static bool dma_region_check_iova(struct dfl_afu_dma_region *region,
+				  u64 iova, u64 size)
+{
+	if (!size && region->iova != iova)
+		return false;
+
+	return (region->iova <= iova) &&
+		(region->length + region->iova >= iova + size);
+}
+
+/**
+ * afu_dma_region_add - add given dma region to rbtree
+ * @pdata: feature device platform data
+ * @region: dma region to be added
+ *
+ * Return 0 for success, -EEXIST if dma region has already been added.
+ *
+ * Needs to be called with pdata->lock heold.
+ */
+static int afu_dma_region_add(struct dfl_feature_platform_data *pdata,
+			      struct dfl_afu_dma_region *region)
+{
+	struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
+	struct rb_node **new, *parent = NULL;
+
+	dev_dbg(&pdata->dev->dev, "add region (iova = %llx)\n",
+		(unsigned long long)region->iova);
+
+	new = &afu->dma_regions.rb_node;
+
+	while (*new) {
+		struct dfl_afu_dma_region *this;
+
+		this = container_of(*new, struct dfl_afu_dma_region, node);
+
+		parent = *new;
+
+		if (dma_region_check_iova(this, region->iova, region->length))
+			return -EEXIST;
+
+		if (region->iova < this->iova)
+			new = &((*new)->rb_left);
+		else if (region->iova > this->iova)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	rb_link_node(&region->node, parent, new);
+	rb_insert_color(&region->node, &afu->dma_regions);
+
+	return 0;
+}
+
+/**
+ * afu_dma_region_remove - remove given dma region from rbtree
+ * @pdata: feature device platform data
+ * @region: dma region to be removed
+ *
+ * Needs to be called with pdata->lock heold.
+ */
+static void afu_dma_region_remove(struct dfl_feature_platform_data *pdata,
+				  struct dfl_afu_dma_region *region)
+{
+	struct dfl_afu *afu;
+
+	dev_dbg(&pdata->dev->dev, "del region (iova = %llx)\n",
+		(unsigned long long)region->iova);
+
+	afu = dfl_fpga_pdata_get_private(pdata);
+	rb_erase(&region->node, &afu->dma_regions);
+}
+
+/**
+ * afu_dma_region_destroy - destroy all regions in rbtree
+ * @pdata: feature device platform data
+ *
+ * Needs to be called with pdata->lock heold.
+ */
+void afu_dma_region_destroy(struct dfl_feature_platform_data *pdata)
+{
+	struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
+	struct rb_node *node = rb_first(&afu->dma_regions);
+	struct dfl_afu_dma_region *region;
+
+	while (node) {
+		region = container_of(node, struct dfl_afu_dma_region, node);
+
+		dev_dbg(&pdata->dev->dev, "del region (iova = %llx)\n",
+			(unsigned long long)region->iova);
+
+		rb_erase(node, &afu->dma_regions);
+
+		if (region->iova)
+			dma_unmap_page(dfl_fpga_pdata_to_parent(pdata),
+				       region->iova, region->length,
+				       DMA_BIDIRECTIONAL);
+
+		if (region->pages)
+			afu_dma_unpin_pages(pdata, region);
+
+		node = rb_next(node);
+		kfree(region);
+	}
+}
+
+/**
+ * afu_dma_region_find - find the dma region from rbtree based on iova and size
+ * @pdata: feature device platform data
+ * @iova: address of the dma memory area
+ * @size: size of the dma memory area
+ *
+ * It finds the dma region from the rbtree based on @iova and @size:
+ * - if @size == 0, it finds the dma region which starts from @iova
+ * - otherwise, it finds the dma region which fully contains
+ *   [@iova, @iova+size)
+ * If nothing is matched returns NULL.
+ *
+ * Needs to be called with pdata->lock held.
+ */
+struct dfl_afu_dma_region *
+afu_dma_region_find(struct dfl_feature_platform_data *pdata, u64 iova, u64 size)
+{
+	struct dfl_afu *afu = dfl_fpga_pdata_get_private(pdata);
+	struct rb_node *node = afu->dma_regions.rb_node;
+	struct device *dev = &pdata->dev->dev;
+
+	while (node) {
+		struct dfl_afu_dma_region *region;
+
+		region = container_of(node, struct dfl_afu_dma_region, node);
+
+		if (dma_region_check_iova(region, iova, size)) {
+			dev_dbg(dev, "find region (iova = %llx)\n",
+				(unsigned long long)region->iova);
+			return region;
+		}
+
+		if (iova < region->iova)
+			node = node->rb_left;
+		else if (iova > region->iova)
+			node = node->rb_right;
+		else
+			/* the iova region is not fully covered. */
+			break;
+	}
+
+	dev_dbg(dev, "region with iova %llx and size %llx is not found\n",
+		(unsigned long long)iova, (unsigned long long)size);
+
+	return NULL;
+}
+
+/**
+ * afu_dma_region_find_iova - find the dma region from rbtree by iova
+ * @pdata: feature device platform data
+ * @iova: address of the dma region
+ *
+ * Needs to be called with pdata->lock held.
+ */
+static struct dfl_afu_dma_region *
+afu_dma_region_find_iova(struct dfl_feature_platform_data *pdata, u64 iova)
+{
+	return afu_dma_region_find(pdata, iova, 0);
+}
+
+/**
+ * afu_dma_map_region - map memory region for dma
+ * @pdata: feature device platform data
+ * @user_addr: address of the memory region
+ * @length: size of the memory region
+ * @iova: pointer of iova address
+ *
+ * Map memory region defined by @user_addr and @length, and return dma address
+ * of the memory region via @iova.
+ * Return 0 for success, otherwise error code.
+ */
+int afu_dma_map_region(struct dfl_feature_platform_data *pdata,
+		       u64 user_addr, u64 length, u64 *iova)
+{
+	struct dfl_afu_dma_region *region;
+	int ret;
+
+	/*
+	 * Check Inputs, only accept page-aligned user memory region with
+	 * valid length.
+	 */
+	if (!PAGE_ALIGNED(user_addr) || !PAGE_ALIGNED(length) || !length)
+		return -EINVAL;
+
+	/* Check overflow */
+	if (user_addr + length < user_addr)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_WRITE, (void __user *)(unsigned long)user_addr,
+		       length))
+		return -EINVAL;
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	region->user_addr = user_addr;
+	region->length = length;
+
+	/* Pin the user memory region */
+	ret = afu_dma_pin_pages(pdata, region);
+	if (ret) {
+		dev_err(&pdata->dev->dev, "failed to pin memory region\n");
+		goto free_region;
+	}
+
+	/* Only accept continuous pages, return error else */
+	if (!afu_dma_check_continuous_pages(region)) {
+		dev_err(&pdata->dev->dev, "pages are not continuous\n");
+		ret = -EINVAL;
+		goto unpin_pages;
+	}
+
+	/* As pages are continuous then start to do DMA mapping */
+	region->iova = dma_map_page(dfl_fpga_pdata_to_parent(pdata),
+				    region->pages[0], 0,
+				    region->length,
+				    DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(&pdata->dev->dev, region->iova)) {
+		dev_err(&pdata->dev->dev, "failed to map for dma\n");
+		ret = -EFAULT;
+		goto unpin_pages;
+	}
+
+	*iova = region->iova;
+
+	mutex_lock(&pdata->lock);
+	ret = afu_dma_region_add(pdata, region);
+	mutex_unlock(&pdata->lock);
+	if (ret) {
+		dev_err(&pdata->dev->dev, "failed to add dma region\n");
+		goto unmap_dma;
+	}
+
+	return 0;
+
+unmap_dma:
+	dma_unmap_page(dfl_fpga_pdata_to_parent(pdata),
+		       region->iova, region->length, DMA_BIDIRECTIONAL);
+unpin_pages:
+	afu_dma_unpin_pages(pdata, region);
+free_region:
+	kfree(region);
+	return ret;
+}
+
+/**
+ * afu_dma_unmap_region - unmap dma memory region
+ * @pdata: feature device platform data
+ * @iova: dma address of the region
+ *
+ * Unmap dma memory region based on @iova.
+ * Return 0 for success, otherwise error code.
+ */
+int afu_dma_unmap_region(struct dfl_feature_platform_data *pdata, u64 iova)
+{
+	struct dfl_afu_dma_region *region;
+
+	mutex_lock(&pdata->lock);
+	region = afu_dma_region_find_iova(pdata, iova);
+	if (!region) {
+		mutex_unlock(&pdata->lock);
+		return -EINVAL;
+	}
+
+	if (region->in_use) {
+		mutex_unlock(&pdata->lock);
+		return -EBUSY;
+	}
+
+	afu_dma_region_remove(pdata, region);
+	mutex_unlock(&pdata->lock);
+
+	dma_unmap_page(dfl_fpga_pdata_to_parent(pdata),
+		       region->iova, region->length, DMA_BIDIRECTIONAL);
+	afu_dma_unpin_pages(pdata, region);
+	kfree(region);
+
+	return 0;
+}
diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c
index f67a78d7e9ad..02baa6a227c0 100644
--- a/drivers/fpga/dfl-afu-main.c
+++ b/drivers/fpga/dfl-afu-main.c
@@ -293,7 +293,11 @@ static int afu_release(struct inode *inode, struct file *filp)
 
 	pdata = dev_get_platdata(&pdev->dev);
 
-	port_reset(pdev);
+	mutex_lock(&pdata->lock);
+	__port_reset(pdev);
+	afu_dma_region_destroy(pdata);
+	mutex_unlock(&pdata->lock);
+
 	dfl_feature_dev_use_end(pdata);
 
 	return 0;
@@ -364,6 +368,55 @@ static long afu_ioctl_get_region_info(struct dfl_feature_platform_data *pdata,
 	return 0;
 }
 
+static long
+afu_ioctl_dma_map(struct dfl_feature_platform_data *pdata, void __user *arg)
+{
+	struct dfl_fpga_port_dma_map map;
+	unsigned long minsz;
+	long ret;
+
+	minsz = offsetofend(struct dfl_fpga_port_dma_map, iova);
+
+	if (copy_from_user(&map, arg, minsz))
+		return -EFAULT;
+
+	if (map.argsz < minsz || map.flags)
+		return -EINVAL;
+
+	ret = afu_dma_map_region(pdata, map.user_addr, map.length, &map.iova);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(arg, &map, sizeof(map))) {
+		afu_dma_unmap_region(pdata, map.iova);
+		return -EFAULT;
+	}
+
+	dev_dbg(&pdata->dev->dev, "dma map: ua=%llx, len=%llx, iova=%llx\n",
+		(unsigned long long)map.user_addr,
+		(unsigned long long)map.length,
+		(unsigned long long)map.iova);
+
+	return 0;
+}
+
+static long
+afu_ioctl_dma_unmap(struct dfl_feature_platform_data *pdata, void __user *arg)
+{
+	struct dfl_fpga_port_dma_unmap unmap;
+	unsigned long minsz;
+
+	minsz = offsetofend(struct dfl_fpga_port_dma_unmap, iova);
+
+	if (copy_from_user(&unmap, arg, minsz))
+		return -EFAULT;
+
+	if (unmap.argsz < minsz || unmap.flags)
+		return -EINVAL;
+
+	return afu_dma_unmap_region(pdata, unmap.iova);
+}
+
 static long afu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct platform_device *pdev = filp->private_data;
@@ -384,6 +437,10 @@ static long afu_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return afu_ioctl_get_info(pdata, (void __user *)arg);
 	case DFL_FPGA_PORT_GET_REGION_INFO:
 		return afu_ioctl_get_region_info(pdata, (void __user *)arg);
+	case DFL_FPGA_PORT_DMA_MAP:
+		return afu_ioctl_dma_map(pdata, (void __user *)arg);
+	case DFL_FPGA_PORT_DMA_UNMAP:
+		return afu_ioctl_dma_unmap(pdata, (void __user *)arg);
 	default:
 		/*
 		 * Let sub-feature's ioctl function to handle the cmd
@@ -460,6 +517,7 @@ static int afu_dev_init(struct platform_device *pdev)
 	mutex_lock(&pdata->lock);
 	dfl_fpga_pdata_set_private(pdata, afu);
 	afu_mmio_region_init(pdata);
+	afu_dma_region_init(pdata);
 	mutex_unlock(&pdata->lock);
 
 	return 0;
@@ -473,6 +531,7 @@ static int afu_dev_destroy(struct platform_device *pdev)
 	mutex_lock(&pdata->lock);
 	afu = dfl_fpga_pdata_get_private(pdata);
 	afu_mmio_region_destroy(pdata);
+	afu_dma_region_destroy(pdata);
 	dfl_fpga_pdata_set_private(pdata, NULL);
 	mutex_unlock(&pdata->lock);
 
diff --git a/drivers/fpga/dfl-afu.h b/drivers/fpga/dfl-afu.h
index 11ce2cf99759..0c7630ae3cda 100644
--- a/drivers/fpga/dfl-afu.h
+++ b/drivers/fpga/dfl-afu.h
@@ -40,12 +40,32 @@ struct dfl_afu_mmio_region {
 	struct list_head node;
 };
 
+/**
+ * struct fpga_afu_dma_region - afu DMA region data structure
+ *
+ * @user_addr: region userspace virtual address.
+ * @length: region length.
+ * @iova: region IO virtual address.
+ * @pages: ptr to pages of this region.
+ * @node: rb tree node.
+ * @in_use: flag to indicate if this region is in_use.
+ */
+struct dfl_afu_dma_region {
+	u64 user_addr;
+	u64 length;
+	u64 iova;
+	struct page **pages;
+	struct rb_node node;
+	bool in_use;
+};
+
 /**
  * struct dfl_afu - afu device data structure
  *
  * @region_cur_offset: current region offset from start to the device fd.
  * @num_regions: num of mmio regions.
  * @regions: the mmio region linked list of this afu feature device.
+ * @dma_regions: root of dma regions rb tree.
  * @num_umsgs: num of umsgs.
  * @pdata: afu platform device's pdata.
  */
@@ -54,6 +74,7 @@ struct dfl_afu {
 	int num_regions;
 	u8 num_umsgs;
 	struct list_head regions;
+	struct rb_root dma_regions;
 
 	struct dfl_feature_platform_data *pdata;
 };
@@ -68,4 +89,12 @@ int afu_mmio_region_get_by_index(struct dfl_feature_platform_data *pdata,
 int afu_mmio_region_get_by_offset(struct dfl_feature_platform_data *pdata,
 				  u64 offset, u64 size,
 				  struct dfl_afu_mmio_region *pregion);
-#endif
+void afu_dma_region_init(struct dfl_feature_platform_data *pdata);
+void afu_dma_region_destroy(struct dfl_feature_platform_data *pdata);
+int afu_dma_map_region(struct dfl_feature_platform_data *pdata,
+		       u64 user_addr, u64 length, u64 *iova);
+int afu_dma_unmap_region(struct dfl_feature_platform_data *pdata, u64 iova);
+struct dfl_afu_dma_region *
+afu_dma_region_find(struct dfl_feature_platform_data *pdata,
+		    u64 iova, u64 size);
+#endif /* __DFL_AFU_H */
diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
index a3ccdfb115a5..2e324e515c41 100644
--- a/include/uapi/linux/fpga-dfl.h
+++ b/include/uapi/linux/fpga-dfl.h
@@ -114,6 +114,43 @@ struct dfl_fpga_port_region_info {
 
 #define DFL_FPGA_PORT_GET_REGION_INFO	_IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 2)
 
+/**
+ * DFL_FPGA_PORT_DMA_MAP - _IOWR(DFL_FPGA_MAGIC, DFL_PORT_BASE + 3,
+ *						struct dfl_fpga_port_dma_map)
+ *
+ * Map the dma memory per user_addr and length which are provided by caller.
+ * Driver fills the iova in provided struct afu_port_dma_map.
+ * This interface only accepts page-size aligned user memory for dma mapping.
+ * Return: 0 on success, -errno on failure.
+ */
+struct dfl_fpga_port_dma_map {
+	/* Input */
+	__u32 argsz;		/* Structure length */
+	__u32 flags;		/* Zero for now */
+	__u64 user_addr;        /* Process virtual address */
+	__u64 length;           /* Length of mapping (bytes)*/
+	/* Output */
+	__u64 iova;             /* IO virtual address */
+};
+
+#define DFL_FPGA_PORT_DMA_MAP		_IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 3)
+
+/**
+ * DFL_FPGA_PORT_DMA_UNMAP - _IOW(FPGA_MAGIC, PORT_BASE + 4,
+ *						struct dfl_fpga_port_dma_unmap)
+ *
+ * Unmap the dma memory per iova provided by caller.
+ * Return: 0 on success, -errno on failure.
+ */
+struct dfl_fpga_port_dma_unmap {
+	/* Input */
+	__u32 argsz;		/* Structure length */
+	__u32 flags;		/* Zero for now */
+	__u64 iova;		/* IO virtual address */
+};
+
+#define DFL_FPGA_PORT_DMA_UNMAP		_IO(DFL_FPGA_MAGIC, DFL_PORT_BASE + 4)
+
 /* IOCTLs for FME file descriptor */
 
 /**
-- 
cgit v1.2.3


From 45cd74cb5061781e793a098c420a7f548fdc9e7d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 10 Jul 2018 17:15:38 +0200
Subject: eventpoll.h: wrap casts in () properly

When importing the latest copy of the kernel headers into Bionic,
Christpher and Elliott noticed that the eventpoll.h casts were not
wrapped in ().  As it is, clang complains about macros without
surrounding (), so this makes it a pain for userspace tools.

So fix it up by adding another () pair, and make them line up purty by
using tabs.

Fixes: 65aaf87b3aa2 ("add EPOLLNVAL, annotate EPOLL... and event_poll->event")
Reported-by: Christopher Ferris <cferris@google.com>
Reported-by: Elliott Hughes <enh@google.com>
Cc: stable <stable@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/eventpoll.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index bf48e71f2634..8a3432d0f0dc 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -42,7 +42,7 @@
 #define EPOLLRDHUP	(__force __poll_t)0x00002000
 
 /* Set exclusive wakeup mode for the target file descriptor */
-#define EPOLLEXCLUSIVE (__force __poll_t)(1U << 28)
+#define EPOLLEXCLUSIVE	((__force __poll_t)(1U << 28))
 
 /*
  * Request the handling of system wakeup events so as to prevent system suspends
@@ -54,13 +54,13 @@
  *
  * Requires CAP_BLOCK_SUSPEND
  */
-#define EPOLLWAKEUP (__force __poll_t)(1U << 29)
+#define EPOLLWAKEUP	((__force __poll_t)(1U << 29))
 
 /* Set the One Shot behaviour for the target file descriptor */
-#define EPOLLONESHOT (__force __poll_t)(1U << 30)
+#define EPOLLONESHOT	((__force __poll_t)(1U << 30))
 
 /* Set the Edge Triggered behaviour for the target file descriptor */
-#define EPOLLET (__force __poll_t)(1U << 31)
+#define EPOLLET		((__force __poll_t)(1U << 31))
 
 /* 
  * On x86-64 make the 64bit structure have the same alignment as the
-- 
cgit v1.2.3


From 5fd778915ad29184a5ff8eb82d1118f6916b79e4 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Thu, 28 Jun 2018 17:45:14 +0200
Subject: sched/sysctl: Remove unused sched_time_avg_ms sysctl

/proc/sys/kernel/sched_time_avg_ms entry is not used anywhere,
remove it.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Morten.Rasmussen@arm.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: claudio@evidence.eu.com
Cc: daniel.lezcano@linaro.org
Cc: dietmar.eggemann@arm.com
Cc: joel@joelfernandes.org
Cc: juri.lelli@redhat.com
Cc: luca.abeni@santannapisa.it
Cc: patrick.bellasi@arm.com
Cc: quentin.perret@arm.com
Cc: rjw@rjwysocki.net
Cc: valentin.schneider@arm.com
Cc: viresh.kumar@linaro.org
Link: http://lkml.kernel.org/r/1530200714-4504-12-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/sysctl.h | 1 -
 kernel/sched/core.c          | 8 --------
 kernel/sched/sched.h         | 1 -
 kernel/sysctl.c              | 8 --------
 4 files changed, 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 1c1a1512ec55..913488d828cb 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -40,7 +40,6 @@ extern unsigned int sysctl_numa_balancing_scan_size;
 #ifdef CONFIG_SCHED_DEBUG
 extern __read_mostly unsigned int sysctl_sched_migration_cost;
 extern __read_mostly unsigned int sysctl_sched_nr_migrate;
-extern __read_mostly unsigned int sysctl_sched_time_avg;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *length,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a691b07390ab..ba6bb805693a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -46,14 +46,6 @@ const_debug unsigned int sysctl_sched_features =
  */
 const_debug unsigned int sysctl_sched_nr_migrate = 32;
 
-/*
- * period over which we average the RT time consumption, measured
- * in ms.
- *
- * default: 1s
- */
-const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
-
 /*
  * period over which we measure -rt task CPU usage in us.
  * default: 1s
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14aac2d2de80..ebb4b3c3ece7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1713,7 +1713,6 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
 
 extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
 
-extern const_debug unsigned int sysctl_sched_time_avg;
 extern const_debug unsigned int sysctl_sched_nr_migrate;
 extern const_debug unsigned int sysctl_sched_migration_cost;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2d9837c0aff4..f22f76b7a138 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -368,14 +368,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "sched_time_avg_ms",
-		.data		= &sysctl_sched_time_avg,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-	},
 #ifdef CONFIG_SCHEDSTATS
 	{
 		.procname	= "sched_schedstats",
-- 
cgit v1.2.3


From 788faab70d5a882693286b8d5022779559c79904 Mon Sep 17 00:00:00 2001
From: Tobias Tefke <tobias.tefke@gmail.com>
Date: Mon, 9 Jul 2018 12:57:15 +0200
Subject: perf, tools: Use correct articles in comments

Some of the comments in the perf events code use articles incorrectly,
using 'a' for words beginning with a vowel sound, where 'an' should be
used.

Signed-off-by: Tobias Tefke <tobias.tefke@tutanota.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: alexander.shishkin@linux.intel.com
Cc: jolsa@redhat.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/20180709105715.22938-1-tobias.tefke@tutanota.com
[ Fix a few more perf related 'a event' typo fixes from all around the kernel and tooling tree. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/perf/core-book3s.c          |  6 +++---
 include/linux/perf_event.h               |  2 +-
 kernel/events/core.c                     | 16 ++++++++--------
 kernel/events/uprobes.c                  |  6 +++---
 tools/perf/Documentation/perf-list.txt   |  2 +-
 tools/perf/Documentation/perf-record.txt |  4 ++--
 6 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 3f66fcf8ad99..19d8ab49d1bd 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1469,7 +1469,7 @@ static int collect_events(struct perf_event *group, int max_count,
 }
 
 /*
- * Add a event to the PMU.
+ * Add an event to the PMU.
  * If all events are not already frozen, then we disable and
  * re-enable the PMU in order to get hw_perf_enable to do the
  * actual work of reconfiguring the PMU.
@@ -1548,7 +1548,7 @@ nocheck:
 }
 
 /*
- * Remove a event from the PMU.
+ * Remove an event from the PMU.
  */
 static void power_pmu_del(struct perf_event *event, int ef_flags)
 {
@@ -1742,7 +1742,7 @@ static int power_pmu_commit_txn(struct pmu *pmu)
 /*
  * Return 1 if we might be able to put event on a limited PMC,
  * or 0 if not.
- * A event can only go on a limited PMC if it counts something
+ * An event can only go on a limited PMC if it counts something
  * that a limited PMC can count, doesn't require interrupts, and
  * doesn't exclude any processor mode.
  */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1fa12887ec02..e6dd3a2f8ec4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -490,7 +490,7 @@ struct perf_addr_filters_head {
 };
 
 /**
- * enum perf_event_state - the states of a event
+ * enum perf_event_state - the states of an event:
  */
 enum perf_event_state {
 	PERF_EVENT_STATE_DEAD		= -4,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a4d5ac6d74af..86b31e5a5a9a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1656,7 +1656,7 @@ perf_event_groups_next(struct perf_event *event)
 				typeof(*event), group_node))
 
 /*
- * Add a event from the lists for its context.
+ * Add an event from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
  */
 static void
@@ -1844,7 +1844,7 @@ static void perf_group_attach(struct perf_event *event)
 }
 
 /*
- * Remove a event from the lists for its context.
+ * Remove an event from the lists for its context.
  * Must be called with ctx->mutex and ctx->lock held.
  */
 static void
@@ -2148,7 +2148,7 @@ static void __perf_event_disable(struct perf_event *event,
 }
 
 /*
- * Disable a event.
+ * Disable an event.
  *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
@@ -2677,7 +2677,7 @@ static void __perf_event_enable(struct perf_event *event,
 }
 
 /*
- * Enable a event.
+ * Enable an event.
  *
  * If event->ctx is a cloned context, callers must make sure that
  * every task struct that event->ctx->task could possibly point to
@@ -2755,7 +2755,7 @@ static int __perf_event_stop(void *info)
 	 * events will refuse to restart because of rb::aux_mmap_count==0,
 	 * see comments in perf_aux_output_begin().
 	 *
-	 * Since this is happening on a event-local CPU, no trace is lost
+	 * Since this is happening on an event-local CPU, no trace is lost
 	 * while restarting.
 	 */
 	if (sd->restart)
@@ -4827,7 +4827,7 @@ __perf_read(struct perf_event *event, char __user *buf, size_t count)
 	int ret;
 
 	/*
-	 * Return end-of-file for a read on a event that is in
+	 * Return end-of-file for a read on an event that is in
 	 * error state (i.e. because it was pinned but it couldn't be
 	 * scheduled on to the CPU at some point).
 	 */
@@ -9898,7 +9898,7 @@ enabled:
 }
 
 /*
- * Allocate and initialize a event structure
+ * Allocate and initialize an event structure
  */
 static struct perf_event *
 perf_event_alloc(struct perf_event_attr *attr, int cpu,
@@ -11229,7 +11229,7 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 }
 
 /*
- * Inherit a event from parent task to child task.
+ * Inherit an event from parent task to child task.
  *
  * Returns:
  *  - valid pointer on success
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ccc579a7d32e..aed1ba569954 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -918,7 +918,7 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
 EXPORT_SYMBOL_GPL(uprobe_register);
 
 /*
- * uprobe_apply - unregister a already registered probe.
+ * uprobe_apply - unregister an already registered probe.
  * @inode: the file in which the probe has to be removed.
  * @offset: offset from the start of the file.
  * @uc: consumer which wants to add more or remove some breakpoints
@@ -947,7 +947,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
 }
 
 /*
- * uprobe_unregister - unregister a already registered probe.
+ * uprobe_unregister - unregister an already registered probe.
  * @inode: the file in which the probe has to be removed.
  * @offset: offset from the start of the file.
  * @uc: identify which probe if multiple probes are colocated.
@@ -1403,7 +1403,7 @@ static struct return_instance *free_ret_instance(struct return_instance *ri)
 
 /*
  * Called with no locks held.
- * Called in context of a exiting or a exec-ing thread.
+ * Called in context of an exiting or an exec-ing thread.
  */
 void uprobe_free_utask(struct task_struct *t)
 {
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 11300dbe35c5..14e13512c05f 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -234,7 +234,7 @@ perf also supports group leader sampling using the :S specifier.
   perf record -e '{cycles,instructions}:S' ...
   perf report --group
 
-Normally all events in a event group sample, but with :S only
+Normally all events in an event group sample, but with :S only
 the first event (the leader) samples, and it only reads the values of the
 other events in the group.
 
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 04168da4268e..246dee081efd 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -94,7 +94,7 @@ OPTIONS
 	  "perf report" to view group events together.
 
 --filter=<filter>::
-        Event filter. This option should follow a event selector (-e) which
+        Event filter. This option should follow an event selector (-e) which
 	selects either tracepoint event(s) or a hardware trace PMU
 	(e.g. Intel PT or CoreSight).
 
@@ -153,7 +153,7 @@ OPTIONS
 
 --exclude-perf::
 	Don't record events issued by perf itself. This option should follow
-	a event selector (-e) which selects tracepoint event(s). It adds a
+	an event selector (-e) which selects tracepoint event(s). It adds a
 	filter expression 'common_pid != $PERFPID' to filters. If other
 	'--filter' exists, the new filter expression will be combined with
 	them by '&&'.
-- 
cgit v1.2.3


From 3eb420e70d879ce0e6bf752accf5cdedb0a59de8 Mon Sep 17 00:00:00 2001
From: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Date: Wed, 11 Jul 2018 11:40:35 +0200
Subject: efi: Use a work queue to invoke EFI Runtime Services

Presently, when a user process requests the kernel to execute any
UEFI runtime service, the kernel temporarily switches to a separate
set of page tables that describe the virtual mapping of the UEFI
runtime services regions in memory. Since UEFI runtime services are
typically invoked with interrupts enabled, any code that may be called
during this time, will have an incorrect view of the process's address
space. Although it is unusual for code running in interrupt context to
make assumptions about the process context it runs in, there are cases
(such as the perf subsystem taking samples) where this causes problems.

So let's set up a work queue for calling UEFI runtime services, so that
the actual calls are made when the work queue items are dispatched by a
work queue worker running in a separate kernel thread. Such threads are
not expected to have userland mappings in the first place, and so the
additional mappings created for the UEFI runtime services can never
clash with any.

The ResetSystem() runtime service is not covered by the work queue
handling, since it is not expected to return, and may be called at a
time when the kernel is torn down to the point where we cannot expect
work queues to still be operational.

The non-blocking variants of SetVariable() and QueryVariableInfo()
are also excluded: these are intended to be used from atomic context,
which obviously rules out waiting for a completion to be signalled by
another thread. Note that these variants are currently only used for
UEFI runtime services calls that occur very early in the boot, and
for ones that occur in critical conditions, e.g., to flush kernel logs
to UEFI variables via efi-pstore.

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
[ardb: exclude ResetSystem() from the workqueue treatment
       merge from 2 separate patches and rewrite commit log]
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180711094040.12506-4-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c              |  14 +++
 drivers/firmware/efi/runtime-wrappers.c | 202 +++++++++++++++++++++++++++++---
 include/linux/efi.h                     |   3 +
 3 files changed, 204 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 232f4915223b..1379a375dfa8 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -84,6 +84,8 @@ struct mm_struct efi_mm = {
 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
 };
 
+struct workqueue_struct *efi_rts_wq;
+
 static bool disable_runtime;
 static int __init setup_noefi(char *arg)
 {
@@ -337,6 +339,18 @@ static int __init efisubsys_init(void)
 	if (!efi_enabled(EFI_BOOT))
 		return 0;
 
+	/*
+	 * Since we process only one efi_runtime_service() at a time, an
+	 * ordered workqueue (which creates only one execution context)
+	 * should suffice all our needs.
+	 */
+	efi_rts_wq = alloc_ordered_workqueue("efi_rts_wq", 0);
+	if (!efi_rts_wq) {
+		pr_err("Creating efi_rts_wq failed, EFI runtime services disabled.\n");
+		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+		return 0;
+	}
+
 	/* We register the efi directory at /sys/firmware/efi */
 	efi_kobj = kobject_create_and_add("efi", firmware_kobj);
 	if (!efi_kobj) {
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index ae54870b2788..aa66cbf23512 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -1,6 +1,15 @@
 /*
  * runtime-wrappers.c - Runtime Services function call wrappers
  *
+ * Implementation summary:
+ * -----------------------
+ * 1. When user/kernel thread requests to execute efi_runtime_service(),
+ * enqueue work to efi_rts_wq.
+ * 2. Caller thread waits for completion until the work is finished
+ * because it's dependent on the return status and execution of
+ * efi_runtime_service().
+ * For instance, get_variable() and get_next_variable().
+ *
  * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * Split off from arch/x86/platform/efi/efi.c
@@ -22,6 +31,9 @@
 #include <linux/mutex.h>
 #include <linux/semaphore.h>
 #include <linux/stringify.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
 #include <asm/efi.h>
 
 /*
@@ -33,6 +45,76 @@
 #define __efi_call_virt(f, args...) \
 	__efi_call_virt_pointer(efi.systab->runtime, f, args)
 
+/* efi_runtime_service() function identifiers */
+enum efi_rts_ids {
+	GET_TIME,
+	SET_TIME,
+	GET_WAKEUP_TIME,
+	SET_WAKEUP_TIME,
+	GET_VARIABLE,
+	GET_NEXT_VARIABLE,
+	SET_VARIABLE,
+	QUERY_VARIABLE_INFO,
+	GET_NEXT_HIGH_MONO_COUNT,
+	UPDATE_CAPSULE,
+	QUERY_CAPSULE_CAPS,
+};
+
+/*
+ * efi_runtime_work:	Details of EFI Runtime Service work
+ * @arg<1-5>:		EFI Runtime Service function arguments
+ * @status:		Status of executing EFI Runtime Service
+ * @efi_rts_id:		EFI Runtime Service function identifier
+ * @efi_rts_comp:	Struct used for handling completions
+ */
+struct efi_runtime_work {
+	void *arg1;
+	void *arg2;
+	void *arg3;
+	void *arg4;
+	void *arg5;
+	efi_status_t status;
+	struct work_struct work;
+	enum efi_rts_ids efi_rts_id;
+	struct completion efi_rts_comp;
+};
+
+/*
+ * efi_queue_work:	Queue efi_runtime_service() and wait until it's done
+ * @rts:		efi_runtime_service() function identifier
+ * @rts_arg<1-5>:	efi_runtime_service() function arguments
+ *
+ * Accesses to efi_runtime_services() are serialized by a binary
+ * semaphore (efi_runtime_lock) and caller waits until the work is
+ * finished, hence _only_ one work is queued at a time and the caller
+ * thread waits for completion.
+ */
+#define efi_queue_work(_rts, _arg1, _arg2, _arg3, _arg4, _arg5)		\
+({									\
+	struct efi_runtime_work efi_rts_work;				\
+	efi_rts_work.status = EFI_ABORTED;				\
+									\
+	init_completion(&efi_rts_work.efi_rts_comp);			\
+	INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts);		\
+	efi_rts_work.arg1 = _arg1;					\
+	efi_rts_work.arg2 = _arg2;					\
+	efi_rts_work.arg3 = _arg3;					\
+	efi_rts_work.arg4 = _arg4;					\
+	efi_rts_work.arg5 = _arg5;					\
+	efi_rts_work.efi_rts_id = _rts;					\
+									\
+	/*								\
+	 * queue_work() returns 0 if work was already on queue,         \
+	 * _ideally_ this should never happen.                          \
+	 */								\
+	if (queue_work(efi_rts_wq, &efi_rts_work.work))			\
+		wait_for_completion(&efi_rts_work.efi_rts_comp);	\
+	else								\
+		pr_err("Failed to queue work to efi_rts_wq.\n");	\
+									\
+	efi_rts_work.status;						\
+})
+
 void efi_call_virt_check_flags(unsigned long flags, const char *call)
 {
 	unsigned long cur_flags, mismatch;
@@ -90,13 +172,98 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
  */
 static DEFINE_SEMAPHORE(efi_runtime_lock);
 
+/*
+ * Calls the appropriate efi_runtime_service() with the appropriate
+ * arguments.
+ *
+ * Semantics followed by efi_call_rts() to understand efi_runtime_work:
+ * 1. If argument was a pointer, recast it from void pointer to original
+ * pointer type.
+ * 2. If argument was a value, recast it from void pointer to original
+ * pointer type and dereference it.
+ */
+static void efi_call_rts(struct work_struct *work)
+{
+	struct efi_runtime_work *efi_rts_work;
+	void *arg1, *arg2, *arg3, *arg4, *arg5;
+	efi_status_t status = EFI_NOT_FOUND;
+
+	efi_rts_work = container_of(work, struct efi_runtime_work, work);
+	arg1 = efi_rts_work->arg1;
+	arg2 = efi_rts_work->arg2;
+	arg3 = efi_rts_work->arg3;
+	arg4 = efi_rts_work->arg4;
+	arg5 = efi_rts_work->arg5;
+
+	switch (efi_rts_work->efi_rts_id) {
+	case GET_TIME:
+		status = efi_call_virt(get_time, (efi_time_t *)arg1,
+				       (efi_time_cap_t *)arg2);
+		break;
+	case SET_TIME:
+		status = efi_call_virt(set_time, (efi_time_t *)arg1);
+		break;
+	case GET_WAKEUP_TIME:
+		status = efi_call_virt(get_wakeup_time, (efi_bool_t *)arg1,
+				       (efi_bool_t *)arg2, (efi_time_t *)arg3);
+		break;
+	case SET_WAKEUP_TIME:
+		status = efi_call_virt(set_wakeup_time, *(efi_bool_t *)arg1,
+				       (efi_time_t *)arg2);
+		break;
+	case GET_VARIABLE:
+		status = efi_call_virt(get_variable, (efi_char16_t *)arg1,
+				       (efi_guid_t *)arg2, (u32 *)arg3,
+				       (unsigned long *)arg4, (void *)arg5);
+		break;
+	case GET_NEXT_VARIABLE:
+		status = efi_call_virt(get_next_variable, (unsigned long *)arg1,
+				       (efi_char16_t *)arg2,
+				       (efi_guid_t *)arg3);
+		break;
+	case SET_VARIABLE:
+		status = efi_call_virt(set_variable, (efi_char16_t *)arg1,
+				       (efi_guid_t *)arg2, *(u32 *)arg3,
+				       *(unsigned long *)arg4, (void *)arg5);
+		break;
+	case QUERY_VARIABLE_INFO:
+		status = efi_call_virt(query_variable_info, *(u32 *)arg1,
+				       (u64 *)arg2, (u64 *)arg3, (u64 *)arg4);
+		break;
+	case GET_NEXT_HIGH_MONO_COUNT:
+		status = efi_call_virt(get_next_high_mono_count, (u32 *)arg1);
+		break;
+	case UPDATE_CAPSULE:
+		status = efi_call_virt(update_capsule,
+				       (efi_capsule_header_t **)arg1,
+				       *(unsigned long *)arg2,
+				       *(unsigned long *)arg3);
+		break;
+	case QUERY_CAPSULE_CAPS:
+		status = efi_call_virt(query_capsule_caps,
+				       (efi_capsule_header_t **)arg1,
+				       *(unsigned long *)arg2, (u64 *)arg3,
+				       (int *)arg4);
+		break;
+	default:
+		/*
+		 * Ideally, we should never reach here because a caller of this
+		 * function should have put the right efi_runtime_service()
+		 * function identifier into efi_rts_work->efi_rts_id
+		 */
+		pr_err("Requested executing invalid EFI Runtime Service.\n");
+	}
+	efi_rts_work->status = status;
+	complete(&efi_rts_work->efi_rts_comp);
+}
+
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
 	efi_status_t status;
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_time, tm, tc);
+	status = efi_queue_work(GET_TIME, tm, tc, NULL, NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -107,7 +274,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(set_time, tm);
+	status = efi_queue_work(SET_TIME, tm, NULL, NULL, NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -120,7 +287,8 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
+	status = efi_queue_work(GET_WAKEUP_TIME, enabled, pending, tm, NULL,
+				NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -131,7 +299,8 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(set_wakeup_time, enabled, tm);
+	status = efi_queue_work(SET_WAKEUP_TIME, &enabled, tm, NULL, NULL,
+				NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -146,8 +315,8 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_variable, name, vendor, attr, data_size,
-			       data);
+	status = efi_queue_work(GET_VARIABLE, name, vendor, attr, data_size,
+				data);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -160,7 +329,8 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_next_variable, name_size, name, vendor);
+	status = efi_queue_work(GET_NEXT_VARIABLE, name_size, name, vendor,
+				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -175,8 +345,8 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
-			       data);
+	status = efi_queue_work(SET_VARIABLE, name, vendor, &attr, &data_size,
+				data);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -210,8 +380,8 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(query_variable_info, attr, storage_space,
-			       remaining_space, max_variable_size);
+	status = efi_queue_work(QUERY_VARIABLE_INFO, &attr, storage_space,
+				remaining_space, max_variable_size, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -242,7 +412,8 @@ static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(get_next_high_mono_count, count);
+	status = efi_queue_work(GET_NEXT_HIGH_MONO_COUNT, count, NULL, NULL,
+				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -272,7 +443,8 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(update_capsule, capsules, count, sg_list);
+	status = efi_queue_work(UPDATE_CAPSULE, capsules, &count, &sg_list,
+				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -289,8 +461,8 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_call_virt(query_capsule_caps, capsules, count, max_size,
-			       reset_type);
+	status = efi_queue_work(QUERY_CAPSULE_CAPS, capsules, &count,
+				max_size, reset_type, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 56add823f190..8ba0cdd244b2 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1651,4 +1651,7 @@ struct linux_efi_tpm_eventlog {
 
 extern int efi_tpm_eventlog_init(void);
 
+/* Workqueue to queue EFI Runtime Services */
+extern struct workqueue_struct *efi_rts_wq;
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From f5dcc214aae29a68b37b2b4183f7171724e7b02d Mon Sep 17 00:00:00 2001
From: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Date: Wed, 11 Jul 2018 11:40:37 +0200
Subject: efi: Remove the declaration of efi_late_init() as the function is
 unused

The following commit:

  7b0a911478c74 ("efi/x86: Move the EFI BGRT init code to early init code")

... removed the implementation and all the references to
efi_late_init() but the function is still declared at
include/linux/efi.h.

Hence, remove the unnecessary declaration.

Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180711094040.12506-6-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8ba0cdd244b2..e190652f5ef9 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -988,14 +988,12 @@ extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timespec64 *ts);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
 #ifdef CONFIG_X86
-extern void efi_late_init(void);
 extern void efi_free_boot_services(void);
 extern efi_status_t efi_query_variable_store(u32 attributes,
 					     unsigned long size,
 					     bool nonblocking);
 extern void efi_find_mirror(void);
 #else
-static inline void efi_late_init(void) {}
 static inline void efi_free_boot_services(void) {}
 
 static inline efi_status_t efi_query_variable_store(u32 attributes,
-- 
cgit v1.2.3


From 784abe24c903b093af04cf1a043140faa556cad7 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:35 +0300
Subject: net: Add decrypted field to skb

The decrypted bit is propogated to cloned/copied skbs.
This will be used later by the inline crypto receive side offload
of tls.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++++++-
 net/core/skbuff.c      | 6 ++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7601838c2513..3ceb8dcc54da 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -630,6 +630,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
+ *	@decrypted: Decrypted SKB
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -736,7 +737,11 @@ struct sk_buff {
 				peeked:1,
 				head_frag:1,
 				xmit_more:1,
-				__unused:1; /* one bit hole */
+#ifdef CONFIG_TLS_DEVICE
+				decrypted:1;
+#else
+				__unused:1;
+#endif
 
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c4e24ac27464..cfd6c6f35f9c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -805,6 +805,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	 * It is not yet because we do not want to have a 16 bit hole
 	 */
 	new->queue_mapping = old->queue_mapping;
+#ifdef CONFIG_TLS_DEVICE
+	new->decrypted = old->decrypted;
+#endif
 
 	memcpy(&new->headers_start, &old->headers_start,
 	       offsetof(struct sk_buff, headers_end) -
@@ -865,6 +868,9 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head_frag);
 	C(data);
 	C(truesize);
+#ifdef CONFIG_TLS_DEVICE
+	C(decrypted);
+#endif
 	refcount_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
-- 
cgit v1.2.3


From 14136564c8ee94566945e85014019cbdb1716dca Mon Sep 17 00:00:00 2001
From: Ilya Lesokhin <ilyal@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:36 +0300
Subject: net: Add TLS RX offload feature

This patch adds a netdev feature to configure TLS RX inline crypto offload.

Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 2 ++
 net/core/ethtool.c              | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 623bb8ced060..2b2a6dce1630 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -79,6 +79,7 @@ enum {
 	NETIF_F_HW_ESP_TX_CSUM_BIT,	/* ESP with TX checksum offload */
 	NETIF_F_RX_UDP_TUNNEL_PORT_BIT, /* Offload of RX port for UDP tunnels */
 	NETIF_F_HW_TLS_TX_BIT,		/* Hardware TLS TX offload */
+	NETIF_F_HW_TLS_RX_BIT,		/* Hardware TLS RX offload */
 
 	NETIF_F_GRO_HW_BIT,		/* Hardware Generic receive offload */
 	NETIF_F_HW_TLS_RECORD_BIT,	/* Offload TLS record */
@@ -151,6 +152,7 @@ enum {
 #define NETIF_F_HW_TLS_RECORD	__NETIF_F(HW_TLS_RECORD)
 #define NETIF_F_GSO_UDP_L4	__NETIF_F(GSO_UDP_L4)
 #define NETIF_F_HW_TLS_TX	__NETIF_F(HW_TLS_TX)
+#define NETIF_F_HW_TLS_RX	__NETIF_F(HW_TLS_RX)
 
 #define for_each_netdev_feature(mask_addr, bit)	\
 	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e677a20180cf..c9993c6c2fd4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -111,6 +111,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =	 "rx-udp_tunnel-port-offload",
 	[NETIF_F_HW_TLS_RECORD_BIT] =	"tls-hw-record",
 	[NETIF_F_HW_TLS_TX_BIT] =	 "tls-hw-tx-offload",
+	[NETIF_F_HW_TLS_RX_BIT] =	 "tls-hw-rx-offload",
 };
 
 static const char
-- 
cgit v1.2.3


From 16e4edc297ffc9b643b8dd3da6b0d579753ea2b3 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:37 +0300
Subject: net: Add TLS rx resync NDO

Add new netdev tls op for resynchronizing HW tls context

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4fa7f7a3f8b3..3514d67112b3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -903,6 +903,8 @@ struct tlsdev_ops {
 	void (*tls_dev_del)(struct net_device *netdev,
 			    struct tls_context *ctx,
 			    enum tls_offload_ctx_dir direction);
+	void (*tls_dev_resync_rx)(struct net_device *netdev,
+				  struct sock *sk, u32 seq, u64 rcd_sn);
 };
 #endif
 
-- 
cgit v1.2.3


From d80a1b9d186057ddb0d384ba601cf2b7d214539c Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:39 +0300
Subject: tls: Refactor tls_offload variable names

For symmetry, we rename tls_offload_context to
tls_offload_context_tx before we add tls_offload_context_rx.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_accel/tls.h |  6 +++---
 include/net/tls.h                                  | 16 +++++++-------
 net/tls/tls_device.c                               | 25 +++++++++++-----------
 net/tls/tls_device_fallback.c                      |  8 +++----
 4 files changed, 27 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
index b6162178f621..b82f4deaa398 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -50,7 +50,7 @@ struct mlx5e_tls {
 };
 
 struct mlx5e_tls_offload_context {
-	struct tls_offload_context base;
+	struct tls_offload_context_tx base;
 	u32 expected_seq;
 	__be32 swid;
 };
@@ -59,8 +59,8 @@ static inline struct mlx5e_tls_offload_context *
 mlx5e_get_tls_tx_context(struct tls_context *tls_ctx)
 {
 	BUILD_BUG_ON(sizeof(struct mlx5e_tls_offload_context) >
-		     TLS_OFFLOAD_CONTEXT_SIZE);
-	return container_of(tls_offload_ctx(tls_ctx),
+		     TLS_OFFLOAD_CONTEXT_SIZE_TX);
+	return container_of(tls_offload_ctx_tx(tls_ctx),
 			    struct mlx5e_tls_offload_context,
 			    base);
 }
diff --git a/include/net/tls.h b/include/net/tls.h
index 70c273777fe9..5dcd808236a7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -128,7 +128,7 @@ struct tls_record_info {
 	skb_frag_t frags[MAX_SKB_FRAGS];
 };
 
-struct tls_offload_context {
+struct tls_offload_context_tx {
 	struct crypto_aead *aead_send;
 	spinlock_t lock;	/* protects records list */
 	struct list_head records_list;
@@ -147,8 +147,8 @@ struct tls_offload_context {
 #define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *)))
 };
 
-#define TLS_OFFLOAD_CONTEXT_SIZE                                               \
-	(ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) +           \
+#define TLS_OFFLOAD_CONTEXT_SIZE_TX                                            \
+	(ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) +        \
 	 TLS_DRIVER_STATE_SIZE)
 
 enum {
@@ -239,7 +239,7 @@ void tls_device_sk_destruct(struct sock *sk);
 void tls_device_init(void);
 void tls_device_cleanup(void);
 
-struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 				       u32 seq, u64 *p_record_sn);
 
 static inline bool tls_record_is_start_marker(struct tls_record_info *rec)
@@ -380,10 +380,10 @@ static inline struct tls_sw_context_tx *tls_sw_ctx_tx(
 	return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
-static inline struct tls_offload_context *tls_offload_ctx(
-		const struct tls_context *tls_ctx)
+static inline struct tls_offload_context_tx *
+tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 {
-	return (struct tls_offload_context *)tls_ctx->priv_ctx_tx;
+	return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
@@ -396,7 +396,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 				      struct sk_buff *skb);
 
 int tls_sw_fallback_init(struct sock *sk,
-			 struct tls_offload_context *offload_ctx,
+			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info);
 
 #endif /* _TLS_OFFLOAD_H */
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a7a8f8e20ff3..332a5d1459b6 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,9 +52,8 @@ static DEFINE_SPINLOCK(tls_device_lock);
 
 static void tls_device_free_ctx(struct tls_context *ctx)
 {
-	struct tls_offload_context *offload_ctx = tls_offload_ctx(ctx);
+	kfree(tls_offload_ctx_tx(ctx));
 
-	kfree(offload_ctx);
 	kfree(ctx);
 }
 
@@ -125,7 +124,7 @@ static void destroy_record(struct tls_record_info *record)
 	kfree(record);
 }
 
-static void delete_all_records(struct tls_offload_context *offload_ctx)
+static void delete_all_records(struct tls_offload_context_tx *offload_ctx)
 {
 	struct tls_record_info *info, *temp;
 
@@ -141,14 +140,14 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_record_info *info, *temp;
-	struct tls_offload_context *ctx;
+	struct tls_offload_context_tx *ctx;
 	u64 deleted_records = 0;
 	unsigned long flags;
 
 	if (!tls_ctx)
 		return;
 
-	ctx = tls_offload_ctx(tls_ctx);
+	ctx = tls_offload_ctx_tx(tls_ctx);
 
 	spin_lock_irqsave(&ctx->lock, flags);
 	info = ctx->retransmit_hint;
@@ -179,7 +178,7 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
 void tls_device_sk_destruct(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 
 	if (ctx->open_record)
 		destroy_record(ctx->open_record);
@@ -219,7 +218,7 @@ static void tls_append_frag(struct tls_record_info *record,
 
 static int tls_push_record(struct sock *sk,
 			   struct tls_context *ctx,
-			   struct tls_offload_context *offload_ctx,
+			   struct tls_offload_context_tx *offload_ctx,
 			   struct tls_record_info *record,
 			   struct page_frag *pfrag,
 			   int flags,
@@ -264,7 +263,7 @@ static int tls_push_record(struct sock *sk,
 	return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
 }
 
-static int tls_create_new_record(struct tls_offload_context *offload_ctx,
+static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
 				 struct page_frag *pfrag,
 				 size_t prepend_size)
 {
@@ -290,7 +289,7 @@ static int tls_create_new_record(struct tls_offload_context *offload_ctx,
 }
 
 static int tls_do_allocation(struct sock *sk,
-			     struct tls_offload_context *offload_ctx,
+			     struct tls_offload_context_tx *offload_ctx,
 			     struct page_frag *pfrag,
 			     size_t prepend_size)
 {
@@ -324,7 +323,7 @@ static int tls_push_data(struct sock *sk,
 			 unsigned char record_type)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
 	int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
 	struct tls_record_info *record = ctx->open_record;
@@ -477,7 +476,7 @@ out:
 	return rc;
 }
 
-struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 				       u32 seq, u64 *p_record_sn)
 {
 	u64 record_sn = context->hint_record_sn;
@@ -524,7 +523,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 {
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
 	struct tls_record_info *start_marker_record;
-	struct tls_offload_context *offload_ctx;
+	struct tls_offload_context_tx *offload_ctx;
 	struct tls_crypto_info *crypto_info;
 	struct net_device *netdev;
 	char *iv, *rec_seq;
@@ -546,7 +545,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 		goto out;
 	}
 
-	offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE, GFP_KERNEL);
+	offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
 	if (!offload_ctx) {
 		rc = -ENOMEM;
 		goto free_marker_record;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 748914abdb60..d1d7dce38e0b 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -214,7 +214,7 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
 
 static int fill_sg_in(struct scatterlist *sg_in,
 		      struct sk_buff *skb,
-		      struct tls_offload_context *ctx,
+		      struct tls_offload_context_tx *ctx,
 		      u64 *rcd_sn,
 		      s32 *sync_size,
 		      int *resync_sgs)
@@ -299,7 +299,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
 				   s32 sync_size, u64 rcd_sn)
 {
 	int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
-	struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	int payload_len = skb->len - tcp_payload_offset;
 	void *buf, *iv, *aad, *dummy_buf;
 	struct aead_request *aead_req;
@@ -361,7 +361,7 @@ static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb)
 {
 	int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	int payload_len = skb->len - tcp_payload_offset;
 	struct scatterlist *sg_in, sg_out[3];
 	struct sk_buff *nskb = NULL;
@@ -415,7 +415,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 }
 
 int tls_sw_fallback_init(struct sock *sk,
-			 struct tls_offload_context *offload_ctx,
+			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info)
 {
 	const u8 *key;
-- 
cgit v1.2.3


From dafb67f3bb4a58a45fe92c1e362ea6429831688a Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:40 +0300
Subject: tls: Split decrypt_skb to two functions

Previously, decrypt_skb also updated the TLS context.
Now, decrypt_skb only decrypts the payload using the current context,
while decrypt_skb_update also updates the state.

Later, in the tls_device Rx flow, we will use decrypt_skb directly.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h |  2 ++
 net/tls/tls_sw.c  | 44 ++++++++++++++++++++++++++------------------
 2 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 5dcd808236a7..49b89221db43 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -390,6 +390,8 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 		      unsigned char *record_type);
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
+int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+		struct scatterlist *sgout);
 
 struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 				      struct net_device *dev,
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7453f5ae0819..1d2271736717 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -53,7 +53,6 @@ static int tls_do_decryption(struct sock *sk,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	struct strp_msg *rxm = strp_msg(skb);
 	struct aead_request *aead_req;
 
 	int ret;
@@ -71,18 +70,6 @@ static int tls_do_decryption(struct sock *sk,
 
 	ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
 
-	if (ret < 0)
-		goto out;
-
-	rxm->offset += tls_ctx->rx.prepend_size;
-	rxm->full_len -= tls_ctx->rx.overhead_size;
-	tls_advance_record_sn(sk, &tls_ctx->rx);
-
-	ctx->decrypted = true;
-
-	ctx->saved_data_ready(sk);
-
-out:
 	aead_request_free(aead_req);
 	return ret;
 }
@@ -666,8 +653,29 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
 	return skb;
 }
 
-static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
-		       struct scatterlist *sgout)
+static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+			      struct scatterlist *sgout)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct strp_msg *rxm = strp_msg(skb);
+	int err = 0;
+
+	err = decrypt_skb(sk, skb, sgout);
+	if (err < 0)
+		return err;
+
+	rxm->offset += tls_ctx->rx.prepend_size;
+	rxm->full_len -= tls_ctx->rx.overhead_size;
+	tls_advance_record_sn(sk, &tls_ctx->rx);
+	ctx->decrypted = true;
+	ctx->saved_data_ready(sk);
+
+	return err;
+}
+
+int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+		struct scatterlist *sgout)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -812,7 +820,7 @@ int tls_sw_recvmsg(struct sock *sk,
 				if (err < 0)
 					goto fallback_to_reg_recv;
 
-				err = decrypt_skb(sk, skb, sgin);
+				err = decrypt_skb_update(sk, skb, sgin);
 				for (; pages > 0; pages--)
 					put_page(sg_page(&sgin[pages]));
 				if (err < 0) {
@@ -821,7 +829,7 @@ int tls_sw_recvmsg(struct sock *sk,
 				}
 			} else {
 fallback_to_reg_recv:
-				err = decrypt_skb(sk, skb, NULL);
+				err = decrypt_skb_update(sk, skb, NULL);
 				if (err < 0) {
 					tls_err_abort(sk, EBADMSG);
 					goto recv_end;
@@ -892,7 +900,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	}
 
 	if (!ctx->decrypted) {
-		err = decrypt_skb(sk, skb, NULL);
+		err = decrypt_skb_update(sk, skb, NULL);
 
 		if (err < 0) {
 			tls_err_abort(sk, EBADMSG);
-- 
cgit v1.2.3


From 39f56e1a78d647316db330c3b6f4c5637a895e3b Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:41 +0300
Subject: tls: Split tls_sw_release_resources_rx

This patch splits tls_sw_release_resources_rx into two functions one
which releases all inner software tls structures and another that also
frees the containing structure.

In TLS_DEVICE we will need to release the software structures without
freeeing the containing structure, which contains other information.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h |  1 +
 net/tls/tls_sw.c  | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 49b89221db43..7a485de25646 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -223,6 +223,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
+void tls_sw_release_resources_rx(struct sock *sk);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
 unsigned int tls_sw_poll(struct file *file, struct socket *sock,
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1d2271736717..694d26589dcc 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1030,7 +1030,7 @@ void tls_sw_free_resources_tx(struct sock *sk)
 	kfree(ctx);
 }
 
-void tls_sw_free_resources_rx(struct sock *sk)
+void tls_sw_release_resources_rx(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1049,6 +1049,14 @@ void tls_sw_free_resources_rx(struct sock *sk)
 		strp_done(&ctx->strp);
 		lock_sock(sk);
 	}
+}
+
+void tls_sw_free_resources_rx(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+
+	tls_sw_release_resources_rx(sk);
 
 	kfree(ctx);
 }
-- 
cgit v1.2.3


From 4799ac81e52a72a6404827bf2738337bb581a174 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:43 +0300
Subject: tls: Add rx inline crypto offload

This patch completes the generic infrastructure to offload TLS crypto to a
network device. It enables the kernel to skip decryption and
authentication of some skbs marked as decrypted by the NIC. In the fast
path, all packets received are decrypted by the NIC and the performance
is comparable to plain TCP.

This infrastructure doesn't require a TCP offload engine. Instead, the
NIC only decrypts packets that contain the expected TCP sequence number.
Out-Of-Order TCP packets are provided unmodified. As a result, at the
worst case a received TLS record consists of both plaintext and ciphertext
packets. These partially decrypted records must be reencrypted,
only to be decrypted.

The notable differences between SW KTLS Rx and this offload are as
follows:
1. Partial decryption - Software must handle the case of a TLS record
that was only partially decrypted by HW. This can happen due to packet
reordering.
2. Resynchronization - tls_read_size calls the device driver to
resynchronize HW after HW lost track of TLS record framing in
the TCP stream.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h             |  63 +++++++++-
 net/tls/tls_device.c          | 278 ++++++++++++++++++++++++++++++++++++++----
 net/tls/tls_device_fallback.c |   1 +
 net/tls/tls_main.c            |  32 +++--
 net/tls/tls_sw.c              |  24 +++-
 5 files changed, 355 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 7a485de25646..d8b3b6578c01 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -83,6 +83,16 @@ struct tls_device {
 	void (*unhash)(struct tls_device *device, struct sock *sk);
 };
 
+enum {
+	TLS_BASE,
+	TLS_SW,
+#ifdef CONFIG_TLS_DEVICE
+	TLS_HW,
+#endif
+	TLS_HW_RECORD,
+	TLS_NUM_CONFIG,
+};
+
 struct tls_sw_context_tx {
 	struct crypto_aead *aead_send;
 	struct crypto_wait async_wait;
@@ -197,6 +207,7 @@ struct tls_context {
 	int (*push_pending_record)(struct sock *sk, int flags);
 
 	void (*sk_write_space)(struct sock *sk);
+	void (*sk_destruct)(struct sock *sk);
 	void (*sk_proto_close)(struct sock *sk, long timeout);
 
 	int  (*setsockopt)(struct sock *sk, int level,
@@ -209,13 +220,27 @@ struct tls_context {
 	void (*unhash)(struct sock *sk);
 };
 
+struct tls_offload_context_rx {
+	/* sw must be the first member of tls_offload_context_rx */
+	struct tls_sw_context_rx sw;
+	atomic64_t resync_req;
+	u8 driver_state[];
+	/* The TLS layer reserves room for driver specific state
+	 * Currently the belief is that there is not enough
+	 * driver specific state to justify another layer of indirection
+	 */
+};
+
+#define TLS_OFFLOAD_CONTEXT_SIZE_RX					\
+	(ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \
+	 TLS_DRIVER_STATE_SIZE)
+
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
 		int __user *optlen);
 int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 		  unsigned int optlen);
 
-
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
@@ -290,11 +315,19 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 	return tls_ctx->pending_open_record_frags;
 }
 
+struct sk_buff *
+tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
+		      struct sk_buff *skb);
+
 static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 {
-	return sk_fullsock(sk) &&
-	       /* matches smp_store_release in tls_set_device_offload */
-	       smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct;
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+	return sk_fullsock(sk) &
+	       (smp_load_acquire(&sk->sk_validate_xmit_skb) ==
+	       &tls_validate_xmit_skb);
+#else
+	return false;
+#endif
 }
 
 static inline void tls_err_abort(struct sock *sk, int err)
@@ -387,10 +420,27 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 	return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
+static inline struct tls_offload_context_rx *
+tls_offload_ctx_rx(const struct tls_context *tls_ctx)
+{
+	return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx;
+}
+
+/* The TLS context is valid until sk_destruct is called */
+static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+
+	atomic64_set(&rx_ctx->resync_req, ((((uint64_t)seq) << 32) | 1));
+}
+
+
 int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 		      unsigned char *record_type);
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
 int decrypt_skb(struct sock *sk, struct sk_buff *skb,
 		struct scatterlist *sgout);
 
@@ -402,4 +452,9 @@ int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info);
 
+int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
+
+void tls_device_offload_cleanup_rx(struct sock *sk);
+void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn);
+
 #endif /* _TLS_OFFLOAD_H */
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 332a5d1459b6..4995d84d228d 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,7 +52,11 @@ static DEFINE_SPINLOCK(tls_device_lock);
 
 static void tls_device_free_ctx(struct tls_context *ctx)
 {
-	kfree(tls_offload_ctx_tx(ctx));
+	if (ctx->tx_conf == TLS_HW)
+		kfree(tls_offload_ctx_tx(ctx));
+
+	if (ctx->rx_conf == TLS_HW)
+		kfree(tls_offload_ctx_rx(ctx));
 
 	kfree(ctx);
 }
@@ -70,10 +74,11 @@ static void tls_device_gc_task(struct work_struct *work)
 	list_for_each_entry_safe(ctx, tmp, &gc_list, list) {
 		struct net_device *netdev = ctx->netdev;
 
-		if (netdev) {
+		if (netdev && ctx->tx_conf == TLS_HW) {
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_TX);
 			dev_put(netdev);
+			ctx->netdev = NULL;
 		}
 
 		list_del(&ctx->list);
@@ -81,6 +86,22 @@ static void tls_device_gc_task(struct work_struct *work)
 	}
 }
 
+static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
+			      struct net_device *netdev)
+{
+	if (sk->sk_destruct != tls_device_sk_destruct) {
+		refcount_set(&ctx->refcount, 1);
+		dev_hold(netdev);
+		ctx->netdev = netdev;
+		spin_lock_irq(&tls_device_lock);
+		list_add_tail(&ctx->list, &tls_device_list);
+		spin_unlock_irq(&tls_device_lock);
+
+		ctx->sk_destruct = sk->sk_destruct;
+		sk->sk_destruct = tls_device_sk_destruct;
+	}
+}
+
 static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
 {
 	unsigned long flags;
@@ -180,13 +201,15 @@ void tls_device_sk_destruct(struct sock *sk)
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 
-	if (ctx->open_record)
-		destroy_record(ctx->open_record);
+	tls_ctx->sk_destruct(sk);
 
-	delete_all_records(ctx);
-	crypto_free_aead(ctx->aead_send);
-	ctx->sk_destruct(sk);
-	clean_acked_data_disable(inet_csk(sk));
+	if (tls_ctx->tx_conf == TLS_HW) {
+		if (ctx->open_record)
+			destroy_record(ctx->open_record);
+		delete_all_records(ctx);
+		crypto_free_aead(ctx->aead_send);
+		clean_acked_data_disable(inet_csk(sk));
+	}
 
 	if (refcount_dec_and_test(&tls_ctx->refcount))
 		tls_device_queue_ctx_destruction(tls_ctx);
@@ -519,6 +542,118 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
 	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
+void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct net_device *netdev = tls_ctx->netdev;
+	struct tls_offload_context_rx *rx_ctx;
+	u32 is_req_pending;
+	s64 resync_req;
+	u32 req_seq;
+
+	if (tls_ctx->rx_conf != TLS_HW)
+		return;
+
+	rx_ctx = tls_offload_ctx_rx(tls_ctx);
+	resync_req = atomic64_read(&rx_ctx->resync_req);
+	req_seq = ntohl(resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1);
+	is_req_pending = resync_req;
+
+	if (unlikely(is_req_pending) && req_seq == seq &&
+	    atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
+		netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk,
+						      seq + TLS_HEADER_SIZE - 1,
+						      rcd_sn);
+}
+
+static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
+{
+	struct strp_msg *rxm = strp_msg(skb);
+	int err = 0, offset = rxm->offset, copy, nsg;
+	struct sk_buff *skb_iter, *unused;
+	struct scatterlist sg[1];
+	char *orig_buf, *buf;
+
+	orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE +
+			   TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation);
+	if (!orig_buf)
+		return -ENOMEM;
+	buf = orig_buf;
+
+	nsg = skb_cow_data(skb, 0, &unused);
+	if (unlikely(nsg < 0)) {
+		err = nsg;
+		goto free_buf;
+	}
+
+	sg_init_table(sg, 1);
+	sg_set_buf(&sg[0], buf,
+		   rxm->full_len + TLS_HEADER_SIZE +
+		   TLS_CIPHER_AES_GCM_128_IV_SIZE);
+	skb_copy_bits(skb, offset, buf,
+		      TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE);
+
+	/* We are interested only in the decrypted data not the auth */
+	err = decrypt_skb(sk, skb, sg);
+	if (err != -EBADMSG)
+		goto free_buf;
+	else
+		err = 0;
+
+	copy = min_t(int, skb_pagelen(skb) - offset,
+		     rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+
+	if (skb->decrypted)
+		skb_store_bits(skb, offset, buf, copy);
+
+	offset += copy;
+	buf += copy;
+
+	skb_walk_frags(skb, skb_iter) {
+		copy = min_t(int, skb_iter->len,
+			     rxm->full_len - offset + rxm->offset -
+			     TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+
+		if (skb_iter->decrypted)
+			skb_store_bits(skb, offset, buf, copy);
+
+		offset += copy;
+		buf += copy;
+	}
+
+free_buf:
+	kfree(orig_buf);
+	return err;
+}
+
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
+	int is_decrypted = skb->decrypted;
+	int is_encrypted = !is_decrypted;
+	struct sk_buff *skb_iter;
+
+	/* Skip if it is already decrypted */
+	if (ctx->sw.decrypted)
+		return 0;
+
+	/* Check if all the data is decrypted already */
+	skb_walk_frags(skb, skb_iter) {
+		is_decrypted &= skb_iter->decrypted;
+		is_encrypted &= !skb_iter->decrypted;
+	}
+
+	ctx->sw.decrypted |= is_decrypted;
+
+	/* Return immedeatly if the record is either entirely plaintext or
+	 * entirely ciphertext. Otherwise handle reencrypt partially decrypted
+	 * record.
+	 */
+	return (is_encrypted || is_decrypted) ? 0 :
+		tls_device_reencrypt(sk, skb);
+}
+
 int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 {
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
@@ -608,7 +743,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 
 	clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked);
 	ctx->push_pending_record = tls_device_push_pending_record;
-	offload_ctx->sk_destruct = sk->sk_destruct;
 
 	/* TLS offload is greatly simplified if we don't send
 	 * SKBs where only part of the payload needs to be encrypted.
@@ -618,8 +752,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 	if (skb)
 		TCP_SKB_CB(skb)->eor = 1;
 
-	refcount_set(&ctx->refcount, 1);
-
 	/* We support starting offload on multiple sockets
 	 * concurrently, so we only need a read lock here.
 	 * This lock must precede get_netdev_for_sock to prevent races between
@@ -654,19 +786,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 	if (rc)
 		goto release_netdev;
 
-	ctx->netdev = netdev;
+	tls_device_attach(ctx, sk, netdev);
 
-	spin_lock_irq(&tls_device_lock);
-	list_add_tail(&ctx->list, &tls_device_list);
-	spin_unlock_irq(&tls_device_lock);
-
-	sk->sk_validate_xmit_skb = tls_validate_xmit_skb;
 	/* following this assignment tls_is_sk_tx_device_offloaded
 	 * will return true and the context might be accessed
 	 * by the netdev's xmit function.
 	 */
-	smp_store_release(&sk->sk_destruct,
-			  &tls_device_sk_destruct);
+	smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb);
+	dev_put(netdev);
 	up_read(&device_offload_lock);
 	goto out;
 
@@ -689,6 +816,105 @@ out:
 	return rc;
 }
 
+int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
+{
+	struct tls_offload_context_rx *context;
+	struct net_device *netdev;
+	int rc = 0;
+
+	/* We support starting offload on multiple sockets
+	 * concurrently, so we only need a read lock here.
+	 * This lock must precede get_netdev_for_sock to prevent races between
+	 * NETDEV_DOWN and setsockopt.
+	 */
+	down_read(&device_offload_lock);
+	netdev = get_netdev_for_sock(sk);
+	if (!netdev) {
+		pr_err_ratelimited("%s: netdev not found\n", __func__);
+		rc = -EINVAL;
+		goto release_lock;
+	}
+
+	if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
+		pr_err_ratelimited("%s: netdev %s with no TLS offload\n",
+				   __func__, netdev->name);
+		rc = -ENOTSUPP;
+		goto release_netdev;
+	}
+
+	/* Avoid offloading if the device is down
+	 * We don't want to offload new flows after
+	 * the NETDEV_DOWN event
+	 */
+	if (!(netdev->flags & IFF_UP)) {
+		rc = -EINVAL;
+		goto release_netdev;
+	}
+
+	context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL);
+	if (!context) {
+		rc = -ENOMEM;
+		goto release_netdev;
+	}
+
+	ctx->priv_ctx_rx = context;
+	rc = tls_set_sw_offload(sk, ctx, 0);
+	if (rc)
+		goto release_ctx;
+
+	rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
+					     &ctx->crypto_recv,
+					     tcp_sk(sk)->copied_seq);
+	if (rc) {
+		pr_err_ratelimited("%s: The netdev has refused to offload this socket\n",
+				   __func__);
+		goto free_sw_resources;
+	}
+
+	tls_device_attach(ctx, sk, netdev);
+	goto release_netdev;
+
+free_sw_resources:
+	tls_sw_free_resources_rx(sk);
+release_ctx:
+	ctx->priv_ctx_rx = NULL;
+release_netdev:
+	dev_put(netdev);
+release_lock:
+	up_read(&device_offload_lock);
+	return rc;
+}
+
+void tls_device_offload_cleanup_rx(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct net_device *netdev;
+
+	down_read(&device_offload_lock);
+	netdev = tls_ctx->netdev;
+	if (!netdev)
+		goto out;
+
+	if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
+		pr_err_ratelimited("%s: device is missing NETIF_F_HW_TLS_RX cap\n",
+				   __func__);
+		goto out;
+	}
+
+	netdev->tlsdev_ops->tls_dev_del(netdev, tls_ctx,
+					TLS_OFFLOAD_CTX_DIR_RX);
+
+	if (tls_ctx->tx_conf != TLS_HW) {
+		dev_put(netdev);
+		tls_ctx->netdev = NULL;
+	}
+out:
+	up_read(&device_offload_lock);
+	kfree(tls_ctx->rx.rec_seq);
+	kfree(tls_ctx->rx.iv);
+	tls_sw_release_resources_rx(sk);
+}
+
 static int tls_device_down(struct net_device *netdev)
 {
 	struct tls_context *ctx, *tmp;
@@ -709,8 +935,12 @@ static int tls_device_down(struct net_device *netdev)
 	spin_unlock_irqrestore(&tls_device_lock, flags);
 
 	list_for_each_entry_safe(ctx, tmp, &list, list)	{
-		netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
-						TLS_OFFLOAD_CTX_DIR_TX);
+		if (ctx->tx_conf == TLS_HW)
+			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+							TLS_OFFLOAD_CTX_DIR_TX);
+		if (ctx->rx_conf == TLS_HW)
+			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+							TLS_OFFLOAD_CTX_DIR_RX);
 		ctx->netdev = NULL;
 		dev_put(netdev);
 		list_del_init(&ctx->list);
@@ -731,12 +961,16 @@ static int tls_dev_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-	if (!(dev->features & NETIF_F_HW_TLS_TX))
+	if (!(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX)))
 		return NOTIFY_DONE;
 
 	switch (event) {
 	case NETDEV_REGISTER:
 	case NETDEV_FEAT_CHANGE:
+		if ((dev->features & NETIF_F_HW_TLS_RX) &&
+		    !dev->tlsdev_ops->tls_dev_resync_rx)
+			return NOTIFY_BAD;
+
 		if  (dev->tlsdev_ops &&
 		     dev->tlsdev_ops->tls_dev_add &&
 		     dev->tlsdev_ops->tls_dev_del)
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index d1d7dce38e0b..e3313c45663f 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -413,6 +413,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 
 	return tls_sw_fallback(sk, skb);
 }
+EXPORT_SYMBOL_GPL(tls_validate_xmit_skb);
 
 int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 301f22430469..b09867c8b817 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -51,15 +51,6 @@ enum {
 	TLSV6,
 	TLS_NUM_PROTS,
 };
-enum {
-	TLS_BASE,
-	TLS_SW,
-#ifdef CONFIG_TLS_DEVICE
-	TLS_HW,
-#endif
-	TLS_HW_RECORD,
-	TLS_NUM_CONFIG,
-};
 
 static struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
@@ -290,7 +281,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	}
 
 #ifdef CONFIG_TLS_DEVICE
-	if (ctx->tx_conf != TLS_HW) {
+	if (ctx->rx_conf == TLS_HW)
+		tls_device_offload_cleanup_rx(sk);
+
+	if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) {
 #else
 	{
 #endif
@@ -470,8 +464,16 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 			conf = TLS_SW;
 		}
 	} else {
-		rc = tls_set_sw_offload(sk, ctx, 0);
-		conf = TLS_SW;
+#ifdef CONFIG_TLS_DEVICE
+		rc = tls_set_device_offload_rx(sk, ctx);
+		conf = TLS_HW;
+		if (rc) {
+#else
+		{
+#endif
+			rc = tls_set_sw_offload(sk, ctx, 0);
+			conf = TLS_SW;
+		}
 	}
 
 	if (rc)
@@ -629,6 +631,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
 	prot[TLS_HW][TLS_SW].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_SW].sendpage		= tls_device_sendpage;
+
+	prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
+
+	prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW];
+
+	prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
 #endif
 
 	prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 5f7d70b24be6..fe5735c57774 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -654,16 +654,25 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
 }
 
 static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
-			      struct scatterlist *sgout)
+			      struct scatterlist *sgout, bool *zc)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 	struct strp_msg *rxm = strp_msg(skb);
 	int err = 0;
 
-	err = decrypt_skb(sk, skb, sgout);
+#ifdef CONFIG_TLS_DEVICE
+	err = tls_device_decrypted(sk, skb);
 	if (err < 0)
 		return err;
+#endif
+	if (!ctx->decrypted) {
+		err = decrypt_skb(sk, skb, sgout);
+		if (err < 0)
+			return err;
+	} else {
+		*zc = false;
+	}
 
 	rxm->offset += tls_ctx->rx.prepend_size;
 	rxm->full_len -= tls_ctx->rx.overhead_size;
@@ -820,7 +829,7 @@ int tls_sw_recvmsg(struct sock *sk,
 				if (err < 0)
 					goto fallback_to_reg_recv;
 
-				err = decrypt_skb_update(sk, skb, sgin);
+				err = decrypt_skb_update(sk, skb, sgin, &zc);
 				for (; pages > 0; pages--)
 					put_page(sg_page(&sgin[pages]));
 				if (err < 0) {
@@ -829,7 +838,7 @@ int tls_sw_recvmsg(struct sock *sk,
 				}
 			} else {
 fallback_to_reg_recv:
-				err = decrypt_skb_update(sk, skb, NULL);
+				err = decrypt_skb_update(sk, skb, NULL, &zc);
 				if (err < 0) {
 					tls_err_abort(sk, EBADMSG);
 					goto recv_end;
@@ -884,6 +893,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	int err = 0;
 	long timeo;
 	int chunk;
+	bool zc;
 
 	lock_sock(sk);
 
@@ -900,7 +910,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	}
 
 	if (!ctx->decrypted) {
-		err = decrypt_skb_update(sk, skb, NULL);
+		err = decrypt_skb_update(sk, skb, NULL, &zc);
 
 		if (err < 0) {
 			tls_err_abort(sk, EBADMSG);
@@ -989,6 +999,10 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 		goto read_failure;
 	}
 
+#ifdef CONFIG_TLS_DEVICE
+	handle_device_resync(strp->sk, TCP_SKB_CB(skb)->seq + rxm->offset,
+			     *(u64*)tls_ctx->rx.rec_seq);
+#endif
 	return data_len + TLS_HEADER_SIZE;
 
 read_failure:
-- 
cgit v1.2.3


From ab412e1dd7db132c2abeb9385b4bf0dc8e6c5a65 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Fri, 13 Jul 2018 14:33:46 +0300
Subject: net/mlx5: Accel, add TLS rx offload routines

In Innova TLS, TLS contexts are added or deleted
via a command message over the SBU connection.
The HW then sends a response message over the same connection.

Complete the implementation for Innova TLS (FPGA-based) hardware by
adding support for rx inline crypto offload.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ilya Lesokhin <ilyal@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/accel/tls.c    |  23 +++--
 .../net/ethernet/mellanox/mlx5/core/accel/tls.h    |  26 +++--
 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c | 113 ++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h |  18 ++--
 include/linux/mlx5/mlx5_ifc_fpga.h                 |   1 +
 5 files changed, 135 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
index 77ac19f38cbe..da7bd26368f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -37,17 +37,26 @@
 #include "mlx5_core.h"
 #include "fpga/tls.h"
 
-int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			       struct tls_crypto_info *crypto_info,
-			       u32 start_offload_tcp_sn, u32 *p_swid)
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			    struct tls_crypto_info *crypto_info,
+			    u32 start_offload_tcp_sn, u32 *p_swid,
+			    bool direction_sx)
 {
-	return mlx5_fpga_tls_add_tx_flow(mdev, flow, crypto_info,
-					 start_offload_tcp_sn, p_swid);
+	return mlx5_fpga_tls_add_flow(mdev, flow, crypto_info,
+				      start_offload_tcp_sn, p_swid,
+				      direction_sx);
 }
 
-void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid)
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+			     bool direction_sx)
 {
-	mlx5_fpga_tls_del_tx_flow(mdev, swid, GFP_KERNEL);
+	mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
+}
+
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+			     u64 rcd_sn)
+{
+	return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
 }
 
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
index 6f9c9f446ecc..2228c1083528 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -60,10 +60,14 @@ struct mlx5_ifc_tls_flow_bits {
 	u8         reserved_at_2[0x1e];
 };
 
-int mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			       struct tls_crypto_info *crypto_info,
-			       u32 start_offload_tcp_sn, u32 *p_swid);
-void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid);
+int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			    struct tls_crypto_info *crypto_info,
+			    u32 start_offload_tcp_sn, u32 *p_swid,
+			    bool direction_sx);
+void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+			     bool direction_sx);
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+			     u64 rcd_sn);
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
 u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
 int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
@@ -71,11 +75,15 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev);
 
 #else
 
-static inline int
-mlx5_accel_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			   struct tls_crypto_info *crypto_info,
-			   u32 start_offload_tcp_sn, u32 *p_swid) { return 0; }
-static inline void mlx5_accel_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid) { }
+static int
+mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			struct tls_crypto_info *crypto_info,
+			u32 start_offload_tcp_sn, u32 *p_swid,
+			bool direction_sx) { return -ENOTSUPP; }
+static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+					   bool direction_sx) { }
+static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
+					   u32 seq, u64 rcd_sn) { return 0; }
 static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; }
 static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; }
 static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index c9736238604a..5cf5f2a9d51f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -129,6 +129,7 @@ static void mlx5_fpga_tls_cmd_send(struct mlx5_fpga_device *fdev,
 static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
 				    void *ptr)
 {
+	unsigned long flags;
 	int ret;
 
 	/* TLS metadata format is 1 byte for syndrome followed
@@ -139,9 +140,9 @@ static int mlx5_fpga_tls_alloc_swid(struct idr *idr, spinlock_t *idr_spinlock,
 	BUILD_BUG_ON((SWID_END - 1) & 0xFF000000);
 
 	idr_preload(GFP_KERNEL);
-	spin_lock_irq(idr_spinlock);
+	spin_lock_irqsave(idr_spinlock, flags);
 	ret = idr_alloc(idr, ptr, SWID_START, SWID_END, GFP_ATOMIC);
-	spin_unlock_irq(idr_spinlock);
+	spin_unlock_irqrestore(idr_spinlock, flags);
 	idr_preload_end();
 
 	return ret;
@@ -157,6 +158,13 @@ static void mlx5_fpga_tls_release_swid(struct idr *idr,
 	spin_unlock_irqrestore(idr_spinlock, flags);
 }
 
+static void mlx_tls_kfree_complete(struct mlx5_fpga_conn *conn,
+				   struct mlx5_fpga_device *fdev,
+				   struct mlx5_fpga_dma_buf *buf, u8 status)
+{
+	kfree(buf);
+}
+
 struct mlx5_teardown_stream_context {
 	struct mlx5_fpga_tls_command_context cmd;
 	u32 swid;
@@ -178,9 +186,13 @@ mlx5_fpga_tls_teardown_completion(struct mlx5_fpga_conn *conn,
 			mlx5_fpga_err(fdev,
 				      "Teardown stream failed with syndrome = %d",
 				      syndrome);
-		else
+		else if (MLX5_GET(tls_cmd, cmd->buf.sg[0].data, direction_sx))
 			mlx5_fpga_tls_release_swid(&fdev->tls->tx_idr,
-						   &fdev->tls->idr_spinlock,
+						   &fdev->tls->tx_idr_spinlock,
+						   ctx->swid);
+		else
+			mlx5_fpga_tls_release_swid(&fdev->tls->rx_idr,
+						   &fdev->tls->rx_idr_spinlock,
 						   ctx->swid);
 	}
 	mlx5_fpga_tls_put_command_ctx(cmd);
@@ -196,6 +208,40 @@ static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
 		 MLX5_GET(tls_flow, flow, direction_sx));
 }
 
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+			    u64 rcd_sn)
+{
+	struct mlx5_fpga_dma_buf *buf;
+	int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
+	void *flow;
+	void *cmd;
+	int ret;
+
+	buf = kzalloc(size, GFP_ATOMIC);
+	if (!buf)
+		return -ENOMEM;
+
+	cmd = (buf + 1);
+
+	rcu_read_lock();
+	flow = idr_find(&mdev->fpga->tls->rx_idr, ntohl(handle));
+	rcu_read_unlock();
+	mlx5_fpga_tls_flow_to_cmd(flow, cmd);
+
+	MLX5_SET(tls_cmd, cmd, swid, ntohl(handle));
+	MLX5_SET64(tls_cmd, cmd, tls_rcd_sn, be64_to_cpu(rcd_sn));
+	MLX5_SET(tls_cmd, cmd, tcp_sn, seq);
+	MLX5_SET(tls_cmd, cmd, command_type, CMD_RESYNC_RX);
+
+	buf->sg[0].data = cmd;
+	buf->sg[0].size = MLX5_TLS_COMMAND_SIZE;
+	buf->complete = mlx_tls_kfree_complete;
+
+	ret = mlx5_fpga_sbu_conn_sendmsg(mdev->fpga->tls->conn, buf);
+
+	return ret;
+}
+
 static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
 					    void *flow, u32 swid, gfp_t flags)
 {
@@ -223,14 +269,18 @@ static void mlx5_fpga_tls_send_teardown_cmd(struct mlx5_core_dev *mdev,
 			       mlx5_fpga_tls_teardown_completion);
 }
 
-void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
-			       gfp_t flags)
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+			    gfp_t flags, bool direction_sx)
 {
 	struct mlx5_fpga_tls *tls = mdev->fpga->tls;
 	void *flow;
 
 	rcu_read_lock();
-	flow = idr_find(&tls->tx_idr, swid);
+	if (direction_sx)
+		flow = idr_find(&tls->tx_idr, swid);
+	else
+		flow = idr_find(&tls->rx_idr, swid);
+
 	rcu_read_unlock();
 
 	if (!flow) {
@@ -289,9 +339,11 @@ mlx5_fpga_tls_setup_completion(struct mlx5_fpga_conn *conn,
 		 * the command context because we might not have received
 		 * the tx completion yet.
 		 */
-		mlx5_fpga_tls_del_tx_flow(fdev->mdev,
-					  MLX5_GET(tls_cmd, tls_cmd, swid),
-					  GFP_ATOMIC);
+		mlx5_fpga_tls_del_flow(fdev->mdev,
+				       MLX5_GET(tls_cmd, tls_cmd, swid),
+				       GFP_ATOMIC,
+				       MLX5_GET(tls_cmd, tls_cmd,
+						direction_sx));
 	}
 
 	mlx5_fpga_tls_put_command_ctx(cmd);
@@ -415,8 +467,7 @@ int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
 	if (err)
 		goto error;
 
-	if (!(tls->caps & (MLX5_ACCEL_TLS_TX | MLX5_ACCEL_TLS_V12 |
-				 MLX5_ACCEL_TLS_AES_GCM128))) {
+	if (!(tls->caps & (MLX5_ACCEL_TLS_V12 | MLX5_ACCEL_TLS_AES_GCM128))) {
 		err = -ENOTSUPP;
 		goto error;
 	}
@@ -438,7 +489,9 @@ int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev)
 	INIT_LIST_HEAD(&tls->pending_cmds);
 
 	idr_init(&tls->tx_idr);
-	spin_lock_init(&tls->idr_spinlock);
+	idr_init(&tls->rx_idr);
+	spin_lock_init(&tls->tx_idr_spinlock);
+	spin_lock_init(&tls->rx_idr_spinlock);
 	fdev->tls = tls;
 	return 0;
 
@@ -500,9 +553,9 @@ static int mlx5_fpga_tls_set_key_material(void *cmd, u32 caps,
 	return 0;
 }
 
-static int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
-				  struct tls_crypto_info *crypto_info, u32 swid,
-				  u32 tcp_sn)
+static int _mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+				   struct tls_crypto_info *crypto_info,
+				   u32 swid, u32 tcp_sn)
 {
 	u32 caps = mlx5_fpga_tls_device_caps(mdev);
 	struct mlx5_setup_stream_context *ctx;
@@ -533,30 +586,42 @@ out:
 	return ret;
 }
 
-int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			      struct tls_crypto_info *crypto_info,
-			      u32 start_offload_tcp_sn, u32 *p_swid)
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			   struct tls_crypto_info *crypto_info,
+			   u32 start_offload_tcp_sn, u32 *p_swid,
+			   bool direction_sx)
 {
 	struct mlx5_fpga_tls *tls = mdev->fpga->tls;
 	int ret = -ENOMEM;
 	u32 swid;
 
-	ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr, &tls->idr_spinlock, flow);
+	if (direction_sx)
+		ret = mlx5_fpga_tls_alloc_swid(&tls->tx_idr,
+					       &tls->tx_idr_spinlock, flow);
+	else
+		ret = mlx5_fpga_tls_alloc_swid(&tls->rx_idr,
+					       &tls->rx_idr_spinlock, flow);
+
 	if (ret < 0)
 		return ret;
 
 	swid = ret;
-	MLX5_SET(tls_flow, flow, direction_sx, 1);
+	MLX5_SET(tls_flow, flow, direction_sx, direction_sx ? 1 : 0);
 
-	ret = mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
-				     start_offload_tcp_sn);
+	ret = _mlx5_fpga_tls_add_flow(mdev, flow, crypto_info, swid,
+				      start_offload_tcp_sn);
 	if (ret && ret != -EINTR)
 		goto free_swid;
 
 	*p_swid = swid;
 	return 0;
 free_swid:
-	mlx5_fpga_tls_release_swid(&tls->tx_idr, &tls->idr_spinlock, swid);
+	if (direction_sx)
+		mlx5_fpga_tls_release_swid(&tls->tx_idr,
+					   &tls->tx_idr_spinlock, swid);
+	else
+		mlx5_fpga_tls_release_swid(&tls->rx_idr,
+					   &tls->rx_idr_spinlock, swid);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
index 800a214e4e49..3b2e37bf76fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
@@ -46,15 +46,18 @@ struct mlx5_fpga_tls {
 	struct mlx5_fpga_conn *conn;
 
 	struct idr tx_idr;
-	spinlock_t idr_spinlock; /* protects the IDR */
+	struct idr rx_idr;
+	spinlock_t tx_idr_spinlock; /* protects the IDR */
+	spinlock_t rx_idr_spinlock; /* protects the IDR */
 };
 
-int mlx5_fpga_tls_add_tx_flow(struct mlx5_core_dev *mdev, void *flow,
-			      struct tls_crypto_info *crypto_info,
-			      u32 start_offload_tcp_sn, u32 *p_swid);
+int mlx5_fpga_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
+			   struct tls_crypto_info *crypto_info,
+			   u32 start_offload_tcp_sn, u32 *p_swid,
+			   bool direction_sx);
 
-void mlx5_fpga_tls_del_tx_flow(struct mlx5_core_dev *mdev, u32 swid,
-			       gfp_t flags);
+void mlx5_fpga_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
+			    gfp_t flags, bool direction_sx);
 
 bool mlx5_fpga_is_tls_device(struct mlx5_core_dev *mdev);
 int mlx5_fpga_tls_init(struct mlx5_core_dev *mdev);
@@ -65,4 +68,7 @@ static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
 	return mdev->fpga->tls->caps;
 }
 
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
+			    u64 rcd_sn);
+
 #endif /* __MLX5_FPGA_TLS_H__ */
diff --git a/include/linux/mlx5/mlx5_ifc_fpga.h b/include/linux/mlx5/mlx5_ifc_fpga.h
index 64d0f40d4cc3..37e065a80a43 100644
--- a/include/linux/mlx5/mlx5_ifc_fpga.h
+++ b/include/linux/mlx5/mlx5_ifc_fpga.h
@@ -576,6 +576,7 @@ struct mlx5_ifc_fpga_ipsec_sa {
 enum fpga_tls_cmds {
 	CMD_SETUP_STREAM		= 0x1001,
 	CMD_TEARDOWN_STREAM		= 0x1002,
+	CMD_RESYNC_RX			= 0x1003,
 };
 
 #define MLX5_TLS_1_2 (0)
-- 
cgit v1.2.3


From a94a7483a91cb6a0d15a4413e8fc853f33a4e1b9 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Wed, 30 May 2018 10:11:43 +0800
Subject: mmc: core: Adjust and reuse the macro of R1_STATUS(x)

R1_STATUS(x) now is only used by ioctl_rpmb_card_status_poll(),
which checks all bits as possible. But according to the spec,
bit 17 and bit 18 should be ignored, as well bit 14 which is
reserved(must be set to 0) quoting from the spec and these rule
apply to all places checking the device status. So change
its checking from 0xFFFFE000 to 0xFFF9A000.

As a bonus, we reuse it for mmc_do_erase() as well as
mmc_switch_status_error().
(1) Currently mmc_switch_status_error() doesn't check bit 25, but
it means device is locked but not unlocked by CMD42 prior to any
operations which need check busy, which is also not allowed.
(2) mmc_do_erase() also forgot to to check bit 15, WP_ERASE_SKIP.
The spec says "Only partial address space was erased due to existing
write protected blocks.", which obviously means we should fail this I/O.
Otherwise, the partial erased data stored in nonvalatile flash violates
the data integrity from the view of I/O owner, which probably confuse
it when further used.

So reusing R1_STATUS for them not only improve the readability but also
slove real problems.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c    | 2 +-
 drivers/mmc/core/mmc_ops.c | 2 +-
 include/linux/mmc/mmc.h    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 281826d1fcca..6780c2b81050 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2078,7 +2078,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 		/* Do not retry else we can't see errors */
 		err = mmc_wait_for_cmd(card->host, &cmd, 0);
-		if (err || (cmd.resp[0] & 0xFDF92000)) {
+		if (err || R1_STATUS(cmd.resp[0])) {
 			pr_err("error %d requesting status %#x\n",
 				err, cmd.resp[0]);
 			err = -EIO;
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 42d6aa89a48a..873b2aa0c155 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -417,7 +417,7 @@ static int mmc_switch_status_error(struct mmc_host *host, u32 status)
 		if (status & R1_SPI_ILLEGAL_COMMAND)
 			return -EBADMSG;
 	} else {
-		if (status & 0xFDFFA000)
+		if (R1_STATUS(status))
 			pr_warn("%s: unexpected status %#x after switch\n",
 				mmc_hostname(host), status);
 		if (status & R1_SWITCH_ERROR)
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 3ffc27aaeeaf..897a87c4c827 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -144,7 +144,7 @@ static inline bool mmc_op_multi(u32 opcode)
 #define R1_WP_ERASE_SKIP	(1 << 15)	/* sx, c */
 #define R1_CARD_ECC_DISABLED	(1 << 14)	/* sx, a */
 #define R1_ERASE_RESET		(1 << 13)	/* sr, c */
-#define R1_STATUS(x)            (x & 0xFFFFE000)
+#define R1_STATUS(x)            (x & 0xFFF9A000)
 #define R1_CURRENT_STATE(x)	((x & 0x00001E00) >> 9)	/* sx, b (4 bits) */
 #define R1_READY_FOR_DATA	(1 << 8)	/* sx, a */
 #define R1_SWITCH_ERROR		(1 << 7)	/* sx, c */
-- 
cgit v1.2.3


From ba6c7ac3a2f421635ae4446269526359d8bff721 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Mon, 18 Jun 2018 14:57:49 +0200
Subject: mmc: core: more fine-grained hooks for HS400 tuning

This adds two new HS400 tuning operations:
* hs400_downgrade
* hs400_complete

These supplement the existing HS400 operation:
* prepare_hs400_tuning

This is motivated by a requirement of Renesas SDHI for the following:
1. Disabling SCC before selecting to HS if selection of HS400 has occurred.
   This can be done in an implementation of prepare_hs400_tuning_downgrade
2. Updating registers after switching to HS400
   This can be done in an implementation of complete_hs400_tuning

If hs400_downgrade or hs400_complete are not implemented then they are not
called. Thus means there should be no affect for existing drivers as none
implemt these ops.

Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c   | 10 ++++++++++
 include/linux/mmc/host.h |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 4466f5de54d4..63a52379e8ab 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1169,6 +1169,10 @@ static int mmc_select_hs400(struct mmc_card *card)
 	/* Set host controller to HS timing */
 	mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
 
+	/* Prepare host to downgrade to HS timing */
+	if (host->ops->hs400_downgrade)
+		host->ops->hs400_downgrade(host);
+
 	/* Reduce frequency to HS frequency */
 	max_dtr = card->ext_csd.hs_max_dtr;
 	mmc_set_clock(host, max_dtr);
@@ -1209,6 +1213,9 @@ static int mmc_select_hs400(struct mmc_card *card)
 	if (err)
 		goto out_err;
 
+	if (host->ops->hs400_complete)
+		host->ops->hs400_complete(host);
+
 	return 0;
 
 out_err:
@@ -1256,6 +1263,9 @@ int mmc_hs400_to_hs200(struct mmc_card *card)
 
 	mmc_set_timing(host, MMC_TIMING_MMC_HS);
 
+	if (host->ops->hs400_downgrade)
+		host->ops->hs400_downgrade(host);
+
 	err = mmc_switch_status(card);
 	if (err)
 		goto out_err;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 64300a48dcce..a39e2925c84c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -146,6 +146,13 @@ struct mmc_host_ops {
 
 	/* Prepare HS400 target operating frequency depending host driver */
 	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
+
+	/* Prepare for switching from HS400 to HS200 */
+	void	(*hs400_downgrade)(struct mmc_host *host);
+
+	/* Complete selection of HS400 */
+	void	(*hs400_complete)(struct mmc_host *host);
+
 	/* Prepare enhanced strobe depending host driver */
 	void	(*hs400_enhanced_strobe)(struct mmc_host *host,
 					 struct mmc_ios *ios);
-- 
cgit v1.2.3


From db924bba47c8031188545c84bf943f4058b659d0 Mon Sep 17 00:00:00 2001
From: Masaharu Hayakawa <masaharu.hayakawa.ry@renesas.com>
Date: Mon, 18 Jun 2018 14:57:50 +0200
Subject: mmc: tmio: add eMMC HS400 mode support

This patch adds processing for selecting HS400 mode.

Signed-off-by: Masaharu Hayakawa <masaharu.hayakawa.ry@renesas.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc.h      |  6 +++++
 drivers/mmc/host/tmio_mmc_core.c | 47 ++++++++++++++++++++++++++++++++++++++--
 include/linux/mfd/tmio.h         |  3 +++
 3 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index e7d651352dc9..5d141f79e175 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -46,6 +46,7 @@
 #define CTL_DMA_ENABLE 0xd8
 #define CTL_RESET_SD 0xe0
 #define CTL_VERSION 0xe2
+#define CTL_SDIF_MODE 0xe6
 #define CTL_SDIO_REGS 0x100
 #define CTL_CLK_AND_WAIT_CTL 0x138
 #define CTL_RESET_SDIO 0x1e0
@@ -191,6 +192,11 @@ struct tmio_mmc_host {
 	/* Tuning values: 1 for success, 0 for failure */
 	DECLARE_BITMAP(taps, BITS_PER_BYTE * sizeof(long));
 	unsigned int tap_num;
+	unsigned long tap_set;
+
+	void (*prepare_hs400_tuning)(struct tmio_mmc_host *host);
+	void (*hs400_downgrade)(struct tmio_mmc_host *host);
+	void (*hs400_complete)(struct tmio_mmc_host *host);
 
 	const struct tmio_mmc_dma_ops *dma_ops;
 };
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index 308029930304..416f9e078fda 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -199,6 +199,14 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 		tmio_mmc_clk_stop(host);
 		return;
 	}
+	/*
+	 * Both HS400 and HS200/SD104 set 200MHz, but some devices need to
+	 * set 400MHz to distinguish the CPG settings in HS400.
+	 */
+	if (host->mmc->ios.timing == MMC_TIMING_MMC_HS400 &&
+	    host->pdata->flags & TMIO_MMC_HAVE_4TAP_HS400 &&
+	    new_clock == 200000000)
+		new_clock = 400000000;
 
 	if (host->clk_update)
 		clock = host->clk_update(host, new_clock) / 512;
@@ -209,8 +217,13 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 		clock <<= 1;
 
 	/* 1/1 clock is option */
-	if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) && ((clk >> 22) & 0x1))
-		clk |= 0xff;
+	if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) &&
+	    ((clk >> 22) & 0x1)) {
+		if (!(host->mmc->ios.timing == MMC_TIMING_MMC_HS400))
+			clk |= 0xff;
+		else
+			clk &= ~0xff;
+	}
 
 	if (host->set_clk_div)
 		host->set_clk_div(host->pdev, (clk >> 22) & 1);
@@ -1087,6 +1100,33 @@ static int tmio_multi_io_quirk(struct mmc_card *card,
 	return blk_size;
 }
 
+static int tmio_mmc_prepare_hs400_tuning(struct mmc_host *mmc,
+					 struct mmc_ios *ios)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	if (host->prepare_hs400_tuning)
+		host->prepare_hs400_tuning(host);
+
+	return 0;
+}
+
+static void tmio_mmc_hs400_downgrade(struct mmc_host *mmc)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	if (host->hs400_downgrade)
+		host->hs400_downgrade(host);
+}
+
+static void tmio_mmc_hs400_complete(struct mmc_host *mmc)
+{
+	struct tmio_mmc_host *host = mmc_priv(mmc);
+
+	if (host->hs400_complete)
+		host->hs400_complete(host);
+}
+
 static const struct mmc_host_ops tmio_mmc_ops = {
 	.request	= tmio_mmc_request,
 	.set_ios	= tmio_mmc_set_ios,
@@ -1096,6 +1136,9 @@ static const struct mmc_host_ops tmio_mmc_ops = {
 	.multi_io_quirk	= tmio_multi_io_quirk,
 	.hw_reset	= tmio_mmc_hw_reset,
 	.execute_tuning = tmio_mmc_execute_tuning,
+	.prepare_hs400_tuning = tmio_mmc_prepare_hs400_tuning,
+	.hs400_downgrade = tmio_mmc_hs400_downgrade,
+	.hs400_complete	= tmio_mmc_hs400_complete,
 };
 
 static int tmio_mmc_init_ocr(struct tmio_mmc_host *host)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 91f92215ca74..77866214ab51 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -90,6 +90,9 @@
 /* Some controllers have a CBSY bit */
 #define TMIO_MMC_HAVE_CBSY		BIT(11)
 
+/* Some controllers that support HS400 use use 4 taps while others use 8. */
+#define TMIO_MMC_HAVE_4TAP_HS400	BIT(13)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From 29772f8a73d88a5de648177d9d822055ab7d1ba6 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 26 Jun 2018 16:51:31 +0200
Subject: mmc: core: Drop the unused mmc_power_save|restore_host()

The last user of mmc_power_save|restore_host() APIs is gone, hence let's
drop them. Drop also the corresponding bus_ops callback,
->power_save|restore() as those becomes redundant.

Cc: Tony Lindgren <tony@atomide.com>
Cc: Eyal Reizer <eyalreizer@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c  | 46 ----------------------------------------------
 drivers/mmc/core/core.h  |  2 --
 drivers/mmc/core/sdio.c  |  1 -
 include/linux/mmc/host.h |  3 ---
 4 files changed, 52 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 6780c2b81050..50a5c340307b 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2716,52 +2716,6 @@ void mmc_stop_host(struct mmc_host *host)
 	mmc_release_host(host);
 }
 
-int mmc_power_save_host(struct mmc_host *host)
-{
-	int ret = 0;
-
-	pr_debug("%s: %s: powering down\n", mmc_hostname(host), __func__);
-
-	mmc_bus_get(host);
-
-	if (!host->bus_ops || host->bus_dead) {
-		mmc_bus_put(host);
-		return -EINVAL;
-	}
-
-	if (host->bus_ops->power_save)
-		ret = host->bus_ops->power_save(host);
-
-	mmc_bus_put(host);
-
-	mmc_power_off(host);
-
-	return ret;
-}
-EXPORT_SYMBOL(mmc_power_save_host);
-
-int mmc_power_restore_host(struct mmc_host *host)
-{
-	int ret;
-
-	pr_debug("%s: %s: powering up\n", mmc_hostname(host), __func__);
-
-	mmc_bus_get(host);
-
-	if (!host->bus_ops || host->bus_dead) {
-		mmc_bus_put(host);
-		return -EINVAL;
-	}
-
-	mmc_power_up(host, host->card->ocr);
-	ret = host->bus_ops->power_restore(host);
-
-	mmc_bus_put(host);
-
-	return ret;
-}
-EXPORT_SYMBOL(mmc_power_restore_host);
-
 #ifdef CONFIG_PM_SLEEP
 /* Do the card removal on suspend if card is assumed removeable
  * Do that in pm notifier while userspace isn't yet frozen, so we will be able
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 9d8f09ac0821..087ba68b2920 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -28,8 +28,6 @@ struct mmc_bus_ops {
 	int (*resume)(struct mmc_host *);
 	int (*runtime_suspend)(struct mmc_host *);
 	int (*runtime_resume)(struct mmc_host *);
-	int (*power_save)(struct mmc_host *);
-	int (*power_restore)(struct mmc_host *);
 	int (*alive)(struct mmc_host *);
 	int (*shutdown)(struct mmc_host *);
 	int (*hw_reset)(struct mmc_host *);
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index a86490dbca70..d8e17ea6126d 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -1076,7 +1076,6 @@ static const struct mmc_bus_ops mmc_sdio_ops = {
 	.resume = mmc_sdio_resume,
 	.runtime_suspend = mmc_sdio_runtime_suspend,
 	.runtime_resume = mmc_sdio_runtime_resume,
-	.power_restore = mmc_sdio_power_restore,
 	.alive = mmc_sdio_alive,
 	.hw_reset = mmc_sdio_hw_reset,
 	.sw_reset = mmc_sdio_sw_reset,
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a39e2925c84c..beed7121c781 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -481,9 +481,6 @@ static inline void *mmc_priv(struct mmc_host *host)
 #define mmc_classdev(x)	(&(x)->class_dev)
 #define mmc_hostname(x)	(dev_name(&(x)->class_dev))
 
-int mmc_power_save_host(struct mmc_host *host);
-int mmc_power_restore_host(struct mmc_host *host);
-
 void mmc_detect_change(struct mmc_host *, unsigned long delay);
 void mmc_request_done(struct mmc_host *, struct mmc_request *);
 void mmc_command_done(struct mmc_host *host, struct mmc_request *mrq);
-- 
cgit v1.2.3


From 86f495c57f7146bc5d363d27483957928bb9d1d8 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Thu, 28 Jun 2018 10:13:29 +0200
Subject: mmc: sdhci-esdhc-imx: get rid of support_vsel

The field support_vsel is currently only used in the device tree
case. Get rid of it. No change in behavior.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c          | 8 ++------
 include/linux/platform_data/mmc-esdhc-imx.h | 2 --
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index f9aa832735e4..85fd5a8b0b6d 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1151,18 +1151,14 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
 			     &boarddata->tuning_start_tap);
 
 	if (of_find_property(np, "no-1-8-v", NULL))
-		boarddata->support_vsel = false;
-	else
-		boarddata->support_vsel = true;
+		host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
 
 	if (of_property_read_u32(np, "fsl,delay-line", &boarddata->delay_line))
 		boarddata->delay_line = 0;
 
 	mmc_of_parse_voltage(np, &host->ocr_mask);
 
-	/* sdr50 and sdr104 need work on 1.8v signal voltage */
-	if ((boarddata->support_vsel) && esdhc_is_usdhc(imx_data) &&
-	    !IS_ERR(imx_data->pins_default)) {
+	if (esdhc_is_usdhc(imx_data) && !IS_ERR(imx_data->pins_default)) {
 		imx_data->pins_100mhz = pinctrl_lookup_state(imx_data->pinctrl,
 						ESDHC_PINCTRL_STATE_100MHZ);
 		imx_data->pins_200mhz = pinctrl_lookup_state(imx_data->pinctrl,
diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h
index 7daa78a2f342..640dec8b5b0c 100644
--- a/include/linux/platform_data/mmc-esdhc-imx.h
+++ b/include/linux/platform_data/mmc-esdhc-imx.h
@@ -34,7 +34,6 @@ enum cd_types {
  * @cd_gpio:	gpio for card_detect interrupt
  * @wp_type:	type of write_protect method (see wp_types enum above)
  * @cd_type:	type of card_detect method (see cd_types enum above)
- * @support_vsel:  indicate it supports 1.8v switching
  */
 
 struct esdhc_platform_data {
@@ -43,7 +42,6 @@ struct esdhc_platform_data {
 	enum wp_types wp_type;
 	enum cd_types cd_type;
 	int max_bus_width;
-	bool support_vsel;
 	unsigned int delay_line;
 	unsigned int tuning_step;       /* The delay cell steps in tuning procedure */
 	unsigned int tuning_start_tap;	/* The start delay cell point in tuning procedure */
-- 
cgit v1.2.3


From 8b0bbd956228ae87139673e5611c4c880ddb9529 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Wed, 11 Jul 2018 09:41:30 -0500
Subject: serial: sh-sci: Add support for R7S9210

Add support for a "RZ_SCIFA" which is different than a traditional
SCIFA. It looks like a normal SCIF with FIFO data, but with a
compressed address space. Also, the break out of interrupts
are different then traditinal SCIF: ERI/BRI, RXI, TXI, TEI, DRI.
The R7S9210 (RZ/A2) contains this type of SCIF.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 77 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/serial_sci.h  |  1 +
 2 files changed, 76 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 6eb65160e015..5808edfe3f7b 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -65,6 +65,7 @@ enum {
 	SCIx_RXI_IRQ,
 	SCIx_TXI_IRQ,
 	SCIx_BRI_IRQ,
+	SCIx_TEIDRI_IRQ,
 	SCIx_NR_IRQS,
 
 	SCIx_MUX_IRQ = SCIx_NR_IRQS,	/* special case */
@@ -76,6 +77,9 @@ enum {
 	((port)->irqs[SCIx_ERI_IRQ] &&	\
 	 ((port)->irqs[SCIx_RXI_IRQ] < 0))
 
+#define SCIx_TEIDRI_IRQ_EXISTS(port)		\
+	((port)->irqs[SCIx_TEIDRI_IRQ] > 0)
+
 enum SCI_CLKS {
 	SCI_FCK,		/* Functional Clock */
 	SCI_SCK,		/* Optional External Clock */
@@ -289,6 +293,33 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 		.error_clear = SCIF_ERROR_CLEAR,
 	},
 
+	/*
+	 * The "SCIFA" that is in RZ/T and RZ/A2.
+	 * It looks like a normal SCIF with FIFO data, but with a
+	 * compressed address space. Also, the break out of interrupts
+	 * are different: ERI/BRI, RXI, TXI, TEI, DRI.
+	 */
+	[SCIx_RZ_SCIFA_REGTYPE] = {
+		.regs = {
+			[SCSMR]		= { 0x00, 16 },
+			[SCBRR]		= { 0x02,  8 },
+			[SCSCR]		= { 0x04, 16 },
+			[SCxTDR]	= { 0x06,  8 },
+			[SCxSR]		= { 0x08, 16 },
+			[SCxRDR]	= { 0x0A,  8 },
+			[SCFCR]		= { 0x0C, 16 },
+			[SCFDR]		= { 0x0E, 16 },
+			[SCSPTR]	= { 0x10, 16 },
+			[SCLSR]		= { 0x12, 16 },
+		},
+		.fifosize = 16,
+		.overrun_reg = SCLSR,
+		.overrun_mask = SCLSR_ORER,
+		.sampling_rate_mask = SCI_SR(32),
+		.error_mask = SCIF_DEFAULT_ERROR_MASK,
+		.error_clear = SCIF_ERROR_CLEAR,
+	},
+
 	/*
 	 * Common SH-3 SCIF definitions.
 	 */
@@ -1682,11 +1713,26 @@ static irqreturn_t sci_tx_interrupt(int irq, void *ptr)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t sci_br_interrupt(int irq, void *ptr);
+
 static irqreturn_t sci_er_interrupt(int irq, void *ptr)
 {
 	struct uart_port *port = ptr;
 	struct sci_port *s = to_sci_port(port);
 
+	if (SCIx_TEIDRI_IRQ_EXISTS(s)) {
+		/* Break and Error interrupts are muxed */
+		unsigned short ssr_status = serial_port_in(port, SCxSR);
+
+		/* Break Interrupt */
+		if (ssr_status & SCxSR_BRK(port))
+			sci_br_interrupt(irq, ptr);
+
+		/* Break only? */
+		if (!(ssr_status & SCxSR_ERRORS(port)))
+			return IRQ_HANDLED;
+	}
+
 	/* Handle errors */
 	if (port->type == PORT_SCI) {
 		if (sci_handle_errors(port)) {
@@ -1826,8 +1872,31 @@ static int sci_request_irq(struct sci_port *port)
 		}
 
 		desc = sci_irq_desc + i;
-		port->irqstr[j] = kasprintf(GFP_KERNEL, "%s:%s",
-					    dev_name(up->dev), desc->desc);
+		if (SCIx_TEIDRI_IRQ_EXISTS(port)) {
+			/*
+			 * ERI and BRI are muxed, just register ERI and
+			 * ignore BRI.
+			 * TEI and DRI are muxed, but only DRI
+			 * is enabled, so use RXI handler
+			 */
+			if (i == SCIx_ERI_IRQ)
+				port->irqstr[j] = kasprintf(GFP_KERNEL,
+							    "%s:err + break",
+							    dev_name(up->dev));
+			if (i == SCIx_BRI_IRQ)
+				continue;
+			if (i == SCIx_TEIDRI_IRQ) {
+				port->irqstr[j] = kasprintf(GFP_KERNEL,
+							    "%s:tx end + rx ready",
+							    dev_name(up->dev));
+				desc = sci_irq_desc + SCIx_RXI_IRQ;
+			}
+		}
+
+		if (!port->irqstr[j])
+			port->irqstr[j] = kasprintf(GFP_KERNEL, "%s:%s",
+						    dev_name(up->dev),
+						    desc->desc);
 		if (!port->irqstr[j]) {
 			ret = -ENOMEM;
 			goto out_nomem;
@@ -3074,6 +3143,10 @@ static const struct of_device_id of_sci_match[] = {
 		.compatible = "renesas,scif-r7s72100",
 		.data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE),
 	},
+	{
+		.compatible = "renesas,scif-r7s9210",
+		.data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE),
+	},
 	/* Family-specific types */
 	{
 		.compatible = "renesas,rcar-gen1-scif",
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index c0e795d95477..1c89611e0e06 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,6 +36,7 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
+	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3


From b79df1ec22a3924a09028b98807fa71ea612af9d Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibis@codeaurora.org>
Date: Wed, 27 Jun 2018 19:54:42 +0530
Subject: dt-bindings: reset: Add AOSS reset bindings for SDM845 SoCs

Add SDM845 AOSS (always on subsystem) reset controller binding

Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 .../devicetree/bindings/reset/qcom,aoss-reset.txt  | 52 ++++++++++++++++++++++
 include/dt-bindings/reset/qcom,sdm845-aoss.h       | 17 +++++++
 2 files changed, 69 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/reset/qcom,aoss-reset.txt
 create mode 100644 include/dt-bindings/reset/qcom,sdm845-aoss.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/reset/qcom,aoss-reset.txt b/Documentation/devicetree/bindings/reset/qcom,aoss-reset.txt
new file mode 100644
index 000000000000..510c748656ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/qcom,aoss-reset.txt
@@ -0,0 +1,52 @@
+Qualcomm AOSS Reset Controller
+======================================
+
+This binding describes a reset-controller found on AOSS-CC (always on subsystem)
+for Qualcomm SDM845 SoCs.
+
+Required properties:
+- compatible:
+	Usage: required
+	Value type: <string>
+	Definition: must be:
+		    "qcom,sdm845-aoss-cc"
+
+- reg:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: must specify the base address and size of the register
+	            space.
+
+- #reset-cells:
+	Usage: required
+	Value type: <uint>
+	Definition: must be 1; cell entry represents the reset index.
+
+Example:
+
+aoss_reset: reset-controller@c2a0000 {
+	compatible = "qcom,sdm845-aoss-cc";
+	reg = <0xc2a0000 0x31000>;
+	#reset-cells = <1>;
+};
+
+Specifying reset lines connected to IP modules
+==============================================
+
+Device nodes that need access to reset lines should
+specify them as a reset phandle in their corresponding node as
+specified in reset.txt.
+
+For list of all valid reset indicies see
+<dt-bindings/reset/qcom,sdm845-aoss.h>
+
+Example:
+
+modem-pil@4080000 {
+	...
+
+	resets = <&aoss_reset AOSS_CC_MSS_RESTART>;
+	reset-names = "mss_restart";
+
+	...
+};
diff --git a/include/dt-bindings/reset/qcom,sdm845-aoss.h b/include/dt-bindings/reset/qcom,sdm845-aoss.h
new file mode 100644
index 000000000000..476c5fc873b6
--- /dev/null
+++ b/include/dt-bindings/reset/qcom,sdm845-aoss.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_RESET_AOSS_SDM_845_H
+#define _DT_BINDINGS_RESET_AOSS_SDM_845_H
+
+#define AOSS_CC_MSS_RESTART	0
+#define AOSS_CC_CAMSS_RESTART	1
+#define AOSS_CC_VENUS_RESTART	2
+#define AOSS_CC_GPU_RESTART	3
+#define AOSS_CC_DISPSS_RESTART	4
+#define AOSS_CC_WCSS_RESTART	5
+#define AOSS_CC_LPASS_RESTART	6
+
+#endif
-- 
cgit v1.2.3


From 3ec78790ec9a1e23fd9b6c8681a7cd3b64d3ad23 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 15 Jul 2018 10:31:28 -0700
Subject: sysfs.h: fix non-kernel-doc comment

Don't use "/**" to begin this comment block since it is not a
kernel-doc comment block.

Also adjust comment line to fit in 80 characters.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b8bfdc173ec0..8e08a90b1d15 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -91,9 +91,9 @@ struct attribute_group {
 	struct bin_attribute	**bin_attrs;
 };
 
-/**
- * Use these macros to make defining attributes easier. See include/linux/device.h
- * for examples..
+/*
+ * Use these macros to make defining attributes easier.
+ * See include/linux/device.h for examples..
  */
 
 #define SYSFS_PREALLOC 010000
-- 
cgit v1.2.3


From 726e41097920a73e4c7c33385dcc0debb1281e18 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 10 Jul 2018 10:29:10 +1000
Subject: drivers: core: Remove glue dirs from sysfs earlier

For devices with a class, we create a "glue" directory between
the parent device and the new device with the class name.

This directory is never "explicitely" removed when empty however,
this is left to the implicit sysfs removal done by kobject_release()
when the object loses its last reference via kobject_put().

This is problematic because as long as it's not been removed from
sysfs, it is still present in the class kset and in sysfs directory
structure.

The presence in the class kset exposes a use after free bug fixed
by the previous patch, but the presence in sysfs means that until
the kobject is released, which can take a while (especially with
kobject debugging), any attempt at re-creating such as binding a
new device for that class/parent pair, will result in a sysfs
duplicate file name error.

This fixes it by instead doing an explicit kobject_del() when
the glue dir is empty, by keeping track of the number of
child devices of the gluedir.

This is made easy by the fact that all glue dir operations are
done with a global mutex, and there's already a function
(cleanup_glue_dir) called in all the right places taking that
mutex that can be enhanced for this. It appears that this was
in fact the intent of the function, but the implementation was
wrong.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c     |  2 ++
 include/linux/kobject.h | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 9a6c71038616..cf8c605ec32e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1597,6 +1597,8 @@ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
 		return;
 
 	mutex_lock(&gdp_mutex);
+	if (!kobject_has_children(glue_dir))
+		kobject_del(glue_dir);
 	kobject_put(glue_dir);
 	mutex_unlock(&gdp_mutex);
 }
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 7f6f93c3df9c..270b40515e79 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -116,6 +116,23 @@ extern void kobject_put(struct kobject *kobj);
 extern const void *kobject_namespace(struct kobject *kobj);
 extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
+/**
+ * kobject_has_children - Returns whether a kobject has children.
+ * @kobj: the object to test
+ *
+ * This will return whether a kobject has other kobjects as children.
+ *
+ * It does NOT account for the presence of attribute files, only sub
+ * directories. It also assumes there is no concurrent addition or
+ * removal of such children, and thus relies on external locking.
+ */
+static inline bool kobject_has_children(struct kobject *kobj)
+{
+	WARN_ON_ONCE(kref_read(&kobj->kref) == 0);
+
+	return kobj->sd && kobj->sd->dir.subdirs;
+}
+
 struct kobj_type {
 	void (*release)(struct kobject *kobj);
 	const struct sysfs_ops *sysfs_ops;
-- 
cgit v1.2.3


From ff10e353a4c0c83a28c89c182462b3c8dc7f70cc Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@microchip.com>
Date: Fri, 29 Jun 2018 10:15:33 +0200
Subject: pinctrl: at91-pio4: add support for drive strength

Add support for the drive strength configuration. Usually, this value is
expressed in mA. Since the numeric value depends on VDDIOP voltage, a
value we can't retrieve at runtime, the controller uses low, medium and
high to define the drive strength.

The PIO controller accepts two values for the low drive configuration: 0
and 1. Most of the time, we don't care about the drive strength. So we
keep the default value which is 0. The drive strength is advertised
through the sysfs only when it has been explicitly set in the device
tree i.e. if its value is different from 0.

Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 .../bindings/pinctrl/atmel,at91-pio4-pinctrl.txt   |  3 ++
 drivers/pinctrl/pinctrl-at91-pio4.c                | 42 ++++++++++++++++++++++
 include/dt-bindings/pinctrl/at91.h                 |  4 +++
 3 files changed, 49 insertions(+)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/pinctrl/atmel,at91-pio4-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pio4-pinctrl.txt
index 61ac75706cc9..04d16fb69eb7 100644
--- a/Documentation/devicetree/bindings/pinctrl/atmel,at91-pio4-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pio4-pinctrl.txt
@@ -36,6 +36,8 @@ Optional properties:
 - GENERIC_PINCONFIG: generic pinconfig options to use, bias-disable,
 bias-pull-down, bias-pull-up, drive-open-drain, input-schmitt-enable,
 input-debounce, output-low, output-high.
+- atmel,drive-strength: 0 or 1 for low drive, 2 for medium drive and 3 for
+high drive. The default value is low drive.
 
 Example:
 
@@ -66,6 +68,7 @@ Example:
 			pinmux = <PIN_PB0>,
 				 <PIN_PB5>;
 			bias-pull-up;
+			atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>;
 		};
 
 		pinctrl_sdmmc1_default: sdmmc1_default {
diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 67e4d9ffa6b1..8f061971a7d1 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -14,6 +14,7 @@
  * GNU General Public License for more details.
  */
 
+#include <dt-bindings/pinctrl/at91.h>
 #include <linux/clk.h>
 #include <linux/gpio/driver.h>
 /* FIXME: needed for gpio_to_irq(), get rid of this */
@@ -49,6 +50,8 @@
 #define		ATMEL_PIO_IFSCEN_MASK		BIT(13)
 #define		ATMEL_PIO_OPD_MASK		BIT(14)
 #define		ATMEL_PIO_SCHMITT_MASK		BIT(15)
+#define		ATMEL_PIO_DRVSTR_MASK		GENMASK(17, 16)
+#define		ATMEL_PIO_DRVSTR_OFFSET		16
 #define		ATMEL_PIO_CFGR_EVTSEL_MASK	GENMASK(26, 24)
 #define		ATMEL_PIO_CFGR_EVTSEL_FALLING	(0 << 24)
 #define		ATMEL_PIO_CFGR_EVTSEL_RISING	(1 << 24)
@@ -75,6 +78,9 @@
 #define ATMEL_GET_PIN_FUNC(pinfunc)	((pinfunc >> 16) & 0xf)
 #define ATMEL_GET_PIN_IOSET(pinfunc)	((pinfunc >> 20) & 0xf)
 
+/* Custom pinconf parameters */
+#define ATMEL_PIN_CONFIG_DRIVE_STRENGTH	(PIN_CONFIG_END + 1)
+
 struct atmel_pioctrl_data {
 	unsigned nbanks;
 };
@@ -139,6 +145,10 @@ static const char * const atmel_functions[] = {
 	"GPIO", "A", "B", "C", "D", "E", "F", "G"
 };
 
+static const struct pinconf_generic_params atmel_custom_bindings[] = {
+	{"atmel,drive-strength", ATMEL_PIN_CONFIG_DRIVE_STRENGTH, 0},
+};
+
 /* --- GPIO --- */
 static unsigned int atmel_gpio_read(struct atmel_pioctrl *atmel_pioctrl,
 				    unsigned int bank, unsigned int reg)
@@ -692,6 +702,11 @@ static int atmel_conf_pin_config_group_get(struct pinctrl_dev *pctldev,
 			return -EINVAL;
 		arg = 1;
 		break;
+	case ATMEL_PIN_CONFIG_DRIVE_STRENGTH:
+		if (!(res & ATMEL_PIO_DRVSTR_MASK))
+			return -EINVAL;
+		arg = (res & ATMEL_PIO_DRVSTR_MASK) >> ATMEL_PIO_DRVSTR_OFFSET;
+		break;
 	default:
 		return -ENOTSUPP;
 	}
@@ -777,6 +792,18 @@ static int atmel_conf_pin_config_group_set(struct pinctrl_dev *pctldev,
 					ATMEL_PIO_SODR);
 			}
 			break;
+		case ATMEL_PIN_CONFIG_DRIVE_STRENGTH:
+			switch (arg) {
+			case ATMEL_PIO_DRVSTR_LO:
+			case ATMEL_PIO_DRVSTR_ME:
+			case ATMEL_PIO_DRVSTR_HI:
+				conf &= (~ATMEL_PIO_DRVSTR_MASK);
+				conf |= arg << ATMEL_PIO_DRVSTR_OFFSET;
+				break;
+			default:
+				dev_warn(pctldev->dev, "drive strength not updated (incorrect value)\n");
+			}
+			break;
 		default:
 			dev_warn(pctldev->dev,
 				 "unsupported configuration parameter: %u\n",
@@ -816,6 +843,19 @@ static void atmel_conf_pin_config_dbg_show(struct pinctrl_dev *pctldev,
 		seq_printf(s, "%s ", "open-drain");
 	if (conf & ATMEL_PIO_SCHMITT_MASK)
 		seq_printf(s, "%s ", "schmitt");
+	if (conf & ATMEL_PIO_DRVSTR_MASK) {
+		switch ((conf & ATMEL_PIO_DRVSTR_MASK) >> ATMEL_PIO_DRVSTR_OFFSET) {
+		case ATMEL_PIO_DRVSTR_ME:
+			seq_printf(s, "%s ", "medium-drive");
+			break;
+		case ATMEL_PIO_DRVSTR_HI:
+			seq_printf(s, "%s ", "high-drive");
+			break;
+		/* ATMEL_PIO_DRVSTR_LO and 0 which is the default value at reset */
+		default:
+			seq_printf(s, "%s ", "low-drive");
+		}
+	}
 }
 
 static const struct pinconf_ops atmel_confops = {
@@ -958,6 +998,8 @@ static int atmel_pinctrl_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	atmel_pinctrl_desc.pins = pin_desc;
 	atmel_pinctrl_desc.npins = atmel_pioctrl->npins;
+	atmel_pinctrl_desc.num_custom_params = ARRAY_SIZE(atmel_custom_bindings);
+	atmel_pinctrl_desc.custom_params = atmel_custom_bindings;
 
 	/* One pin is one group since a pin can achieve all functions. */
 	group_names = devm_kcalloc(dev,
diff --git a/include/dt-bindings/pinctrl/at91.h b/include/dt-bindings/pinctrl/at91.h
index 2732d6c0fb39..eb81867eac77 100644
--- a/include/dt-bindings/pinctrl/at91.h
+++ b/include/dt-bindings/pinctrl/at91.h
@@ -39,4 +39,8 @@
 #define AT91_PERIPH_C		3
 #define AT91_PERIPH_D		4
 
+#define ATMEL_PIO_DRVSTR_LO	1
+#define ATMEL_PIO_DRVSTR_ME	2
+#define ATMEL_PIO_DRVSTR_HI	3
+
 #endif /* __DT_BINDINGS_AT91_PINCTRL_H__ */
-- 
cgit v1.2.3


From a4f9edb29d9c19f9f8dcd2df7ddfe4eb7ad58996 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 30 May 2018 17:29:52 +0100
Subject: irqchip/gic-v3: Expose GICD_TYPER in the rdist structure

Instead of exposing the GIC distributor IntID field in the rdist
structure that is passed to the ITS, let's replace it with a
copy of the whole GICD_TYPER register. We are going to need
some of this information at a later time.

No functionnal change.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3-its.c   | 2 +-
 drivers/irqchip/irq-gic-v3.c       | 4 ++--
 include/linux/irqchip/arm-gic-v3.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 9c5b85577053..efe6d1a6c32e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1616,7 +1616,7 @@ static int __init its_alloc_lpi_tables(void)
 {
 	phys_addr_t paddr;
 
-	lpi_id_bits = gic_rdists->id_bits;
+	lpi_id_bits = GICD_TYPER_ID_BITS(gic_rdists->gicd_typer);
 	gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
 	if (!gic_rdists->prop_page) {
 		pr_err("Failed to allocate PROPBASE\n");
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 76ea56d779a1..e214181b77b7 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -877,7 +877,7 @@ static struct irq_chip gic_eoimode1_chip = {
 	.flags			= IRQCHIP_SET_TYPE_MASKED,
 };
 
-#define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
+#define GIC_ID_NR	(1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
 
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hw)
@@ -1091,7 +1091,7 @@ static int __init gic_init_bases(void __iomem *dist_base,
 	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
 	 */
 	typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
-	gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer);
+	gic_data.rdists.gicd_typer = typer;
 	gic_irqs = GICD_TYPER_IRQS(typer);
 	if (gic_irqs > 1020)
 		gic_irqs = 1020;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index cbb872c1b607..396cd99af02f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -576,8 +576,8 @@ struct rdists {
 		phys_addr_t	phys_base;
 	} __percpu		*rdist;
 	struct page		*prop_page;
-	int			id_bits;
 	u64			flags;
+	u32			gicd_typer;
 	bool			has_vlpis;
 	bool			has_direct_lpi;
 };
-- 
cgit v1.2.3


From 12b2905af183c931bedcab4292c81d3a415e080f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 31 May 2018 09:01:59 +0100
Subject: irqchip/gic-v3-its: Honor hypervisor enforced LPI range

A recent extension to the GIC architecture allows a hypervisor to
arbitrarily reduce the number of LPIs available to a guest, no
matter what the GIC says about the valid range of IntIDs.

Let's factor in this information when computing the number of
available LPIs

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3-its.c   | 9 +++++++++
 include/linux/irqchip/arm-gic-v3.h | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index efe6d1a6c32e..f56c84977241 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1541,8 +1541,17 @@ out:
 static int __init its_lpi_init(u32 id_bits)
 {
 	u32 lpis = (1UL << id_bits) - 8192;
+	u32 numlpis;
 	int err;
 
+	numlpis = 1UL << GICD_TYPER_NUM_LPIS(gic_rdists->gicd_typer);
+
+	if (numlpis > 2 && !WARN_ON(numlpis > lpis)) {
+		lpis = numlpis;
+		pr_info("ITS: Using hypervisor restricted LPI range [%u]\n",
+			lpis);
+	}
+
 	/*
 	 * Initializing the allocator is just the same as freeing the
 	 * full range of LPIs.
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 396cd99af02f..9d2ea3e907d0 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -73,6 +73,7 @@
 #define GICD_TYPER_MBIS			(1U << 16)
 
 #define GICD_TYPER_ID_BITS(typer)	((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_NUM_LPIS(typer)	((((typer) >> 11) & 0x1f) + 1)
 #define GICD_TYPER_IRQS(typer)		((((typer) & 0x1f) + 1) * 32)
 
 #define GICD_IROUTER_SPI_MODE_ONE	(0U << 31)
-- 
cgit v1.2.3


From 0f292f023ffcc67ec49d63dcb7fe388711cbb83a Mon Sep 17 00:00:00 2001
From: Jorge Sanjuan <jorge.sanjuan@codethink.co.uk>
Date: Wed, 11 Jul 2018 13:37:53 +0100
Subject: ALSA: usb-audio: Add support for Processing Units in UAC3

This patch adds support for the Processig Units defined in
the UAC3 spec. The main difference with the previous specs
is the lack of on/off switches in the controls for these
units and the addiction of the new Multi Function Processing
Unit.

The current version of the UAC3 spec doesn't define any
useful controls for the new Multi Function Processing Unit
so no control will get created once this unit is parsed.

Signed-off-by: Jorge Sanjuan <jorge.sanjuan@codethink.co.uk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio-v3.h   | 15 +++++++++++++
 include/uapi/linux/usb/audio.h | 49 ++++++++++++++++++++++++++++++++--------
 sound/usb/mixer.c              | 51 ++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 104 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h
index a710e28b5215..334bfa6dfb47 100644
--- a/include/linux/usb/audio-v3.h
+++ b/include/linux/usb/audio-v3.h
@@ -387,6 +387,12 @@ struct uac3_interrupt_data_msg {
 #define UAC3_CONNECTORS			0x0f
 #define UAC3_POWER_DOMAIN		0x10
 
+/* A.20 PROCESSING UNIT PROCESS TYPES */
+#define UAC3_PROCESS_UNDEFINED		0x00
+#define UAC3_PROCESS_UP_DOWNMIX		0x01
+#define UAC3_PROCESS_STEREO_EXTENDER	0x02
+#define UAC3_PROCESS_MULTI_FUNCTION	0x03
+
 /* A.22 AUDIO CLASS-SPECIFIC REQUEST CODES */
 /* see audio-v2.h for the rest, which is identical to v2 */
 #define UAC3_CS_REQ_INTEN			0x04
@@ -406,6 +412,15 @@ struct uac3_interrupt_data_msg {
 #define UAC3_TE_OVERFLOW			0x04
 #define UAC3_TE_LATENCY 			0x05
 
+/* A.23.10 PROCESSING UNITS CONTROL SELECTROS */
+
+/* Up/Down Mixer */
+#define UAC3_UD_MODE_SELECT			0x01
+
+/* Stereo Extender */
+#define UAC3_EXT_WIDTH_CONTROL			0x01
+
+
 /* BADD predefined Unit/Terminal values */
 #define UAC3_BADD_IT_ID1	1  /* Input Terminal ID1: bTerminalID = 1 */
 #define UAC3_BADD_FU_ID2	2  /* Feature Unit ID2: bUnitID = 2 */
diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h
index 74e520fb944f..ddc5396800aa 100644
--- a/include/uapi/linux/usb/audio.h
+++ b/include/uapi/linux/usb/audio.h
@@ -390,33 +390,64 @@ static inline __u8 uac_processing_unit_iChannelNames(struct uac_processing_unit_
 static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_descriptor *desc,
 						    int protocol)
 {
-	return (protocol == UAC_VERSION_1) ?
-		desc->baSourceID[desc->bNrInPins + 4] :
-		2; /* in UAC2, this value is constant */
+	switch (protocol) {
+	case UAC_VERSION_1:
+		return desc->baSourceID[desc->bNrInPins + 4];
+	case UAC_VERSION_2:
+		return 2; /* in UAC2, this value is constant */
+	case UAC_VERSION_3:
+		return 4; /* in UAC3, this value is constant */
+	default:
+		return 1;
+	}
 }
 
 static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc,
 						   int protocol)
 {
-	return (protocol == UAC_VERSION_1) ?
-		&desc->baSourceID[desc->bNrInPins + 5] :
-		&desc->baSourceID[desc->bNrInPins + 6];
+	switch (protocol) {
+	case UAC_VERSION_1:
+		return &desc->baSourceID[desc->bNrInPins + 5];
+	case UAC_VERSION_2:
+		return &desc->baSourceID[desc->bNrInPins + 6];
+	case UAC_VERSION_3:
+		return &desc->baSourceID[desc->bNrInPins + 2];
+	default:
+		return NULL;
+	}
 }
 
 static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc,
 						   int protocol)
 {
 	__u8 control_size = uac_processing_unit_bControlSize(desc, protocol);
-	return *(uac_processing_unit_bmControls(desc, protocol)
-			+ control_size);
+
+	switch (protocol) {
+	case UAC_VERSION_1:
+	case UAC_VERSION_2:
+	default:
+		return *(uac_processing_unit_bmControls(desc, protocol)
+			 + control_size);
+	case UAC_VERSION_3:
+		return 0; /* UAC3 does not have this field */
+	}
 }
 
 static inline __u8 *uac_processing_unit_specific(struct uac_processing_unit_descriptor *desc,
 						 int protocol)
 {
 	__u8 control_size = uac_processing_unit_bControlSize(desc, protocol);
-	return uac_processing_unit_bmControls(desc, protocol)
+
+	switch (protocol) {
+	case UAC_VERSION_1:
+	case UAC_VERSION_2:
+	default:
+		return uac_processing_unit_bmControls(desc, protocol)
 			+ control_size + 1;
+	case UAC_VERSION_3:
+		return uac_processing_unit_bmControls(desc, protocol)
+			+ control_size;
+	}
 }
 
 /* 4.5.2 Class-Specific AS Interface Descriptor */
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index bfb3484096a6..39fde49e8749 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -953,6 +953,23 @@ static int check_input_term(struct mixer_build *state, int id,
 
 				return 0;
 			}
+			case UAC3_PROCESSING_UNIT: {
+				struct uac_processing_unit_descriptor *d = p1;
+
+				if (!d->bNrInPins)
+					return -EINVAL;
+
+				/* call recursively to retrieve the channel info */
+				err = check_input_term(state, d->baSourceID[0], term);
+				if (err < 0)
+					return err;
+
+				term->type = d->bDescriptorSubtype << 16; /* virtual type */
+				term->id = id;
+				term->name = 0; /* TODO: UAC3 Class-specific strings */
+
+				return 0;
+			}
 			default:
 				return -ENODEV;
 			}
@@ -2180,6 +2197,11 @@ struct procunit_info {
 	struct procunit_value_info *values;
 };
 
+static struct procunit_value_info undefined_proc_info[] = {
+	{ 0x00, "Control Undefined", 0 },
+	{ 0 }
+};
+
 static struct procunit_value_info updown_proc_info[] = {
 	{ UAC_UD_ENABLE, "Switch", USB_MIXER_BOOLEAN },
 	{ UAC_UD_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 },
@@ -2228,6 +2250,23 @@ static struct procunit_info procunits[] = {
 	{ UAC_PROCESS_DYN_RANGE_COMP, "DCR", dcr_proc_info },
 	{ 0 },
 };
+
+static struct procunit_value_info uac3_updown_proc_info[] = {
+	{ UAC3_UD_MODE_SELECT, "Mode Select", USB_MIXER_U8, 1 },
+	{ 0 }
+};
+static struct procunit_value_info uac3_stereo_ext_proc_info[] = {
+	{ UAC3_EXT_WIDTH_CONTROL, "Width Control", USB_MIXER_U8 },
+	{ 0 }
+};
+
+static struct procunit_info uac3_procunits[] = {
+	{ UAC3_PROCESS_UP_DOWNMIX, "Up Down", uac3_updown_proc_info },
+	{ UAC3_PROCESS_STEREO_EXTENDER, "3D Stereo Extender", uac3_stereo_ext_proc_info },
+	{ UAC3_PROCESS_MULTI_FUNCTION, "Multi-Function", undefined_proc_info },
+	{ 0 },
+};
+
 /*
  * predefined data for extension units
  */
@@ -2388,8 +2427,16 @@ static int build_audio_procunit(struct mixer_build *state, int unitid,
 static int parse_audio_processing_unit(struct mixer_build *state, int unitid,
 				       void *raw_desc)
 {
-	return build_audio_procunit(state, unitid, raw_desc,
-				    procunits, "Processing Unit");
+	switch (state->mixer->protocol) {
+	case UAC_VERSION_1:
+	case UAC_VERSION_2:
+	default:
+		return build_audio_procunit(state, unitid, raw_desc,
+				procunits, "Processing Unit");
+	case UAC_VERSION_3:
+		return build_audio_procunit(state, unitid, raw_desc,
+				uac3_procunits, "Processing Unit");
+	}
 }
 
 static int parse_audio_extension_unit(struct mixer_build *state, int unitid,
-- 
cgit v1.2.3


From f286586df68e7733a8e651098401f139dc2e17f4 Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Mon, 18 Jun 2018 15:12:52 +0200
Subject: netfilter: nft_tproxy: Move nf_tproxy_assign_sock() to nf_tproxy.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function is also necessary to implement nft tproxy support

Fixes: 45ca4e0cf273 ("netfilter: Libify xt_TPROXY")
Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tproxy.h | 8 ++++++++
 net/netfilter/xt_TPROXY.c         | 9 ---------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
index 9754a50ecde9..d5a80888cbe4 100644
--- a/include/net/netfilter/nf_tproxy.h
+++ b/include/net/netfilter/nf_tproxy.h
@@ -17,6 +17,14 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
 	return false;
 }
 
+/* assign a socket to the skb -- consumes sk */
+static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_edemux;
+}
+
 __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr);
 
 /**
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 58fce4e749a9..35df0827e2ca 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -36,15 +36,6 @@
 #include <net/netfilter/nf_tproxy.h>
 #include <linux/netfilter/xt_TPROXY.h>
 
-/* assign a socket to the skb -- consumes sk */
-static void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
-{
-	skb_orphan(skb);
-	skb->sk = sk;
-	skb->destructor = sock_edemux;
-}
-
 static unsigned int
 tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
 	   u_int32_t mark_mask, u_int32_t mark_value)
-- 
cgit v1.2.3


From d7e5a9a50245b91f016c814b0f076f7e55cbb980 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:49:43 +0200
Subject: netfilter: utils: move nf_ip_checksum* from ipv4 to utils

allows to make nf_ip_checksum_partial static, it no longer
has an external caller.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4.h | 11 ---------
 net/ipv4/netfilter.c           | 53 ----------------------------------------
 net/netfilter/utils.c          | 55 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 55 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index b31dabfdb453..95ab5cc64422 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -23,9 +23,6 @@ struct nf_queue_entry;
 #ifdef CONFIG_INET
 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 		       unsigned int dataoff, u_int8_t protocol);
-__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
-			       unsigned int dataoff, unsigned int len,
-			       u_int8_t protocol);
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		bool strict);
 int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
@@ -35,14 +32,6 @@ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 {
 	return 0;
 }
-static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb,
-					     unsigned int hook,
-					     unsigned int dataoff,
-					     unsigned int len,
-					     u_int8_t protocol)
-{
-	return 0;
-}
 static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
 			      struct flowi *fl, bool strict)
 {
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index e6774ccb7731..8d2e5dc9a827 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -98,59 +98,6 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
 }
 EXPORT_SYMBOL_GPL(nf_ip_reroute);
 
-__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
-			    unsigned int dataoff, u_int8_t protocol)
-{
-	const struct iphdr *iph = ip_hdr(skb);
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
-			break;
-		if ((protocol == 0 && !csum_fold(skb->csum)) ||
-		    !csum_tcpudp_magic(iph->saddr, iph->daddr,
-				       skb->len - dataoff, protocol,
-				       skb->csum)) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			break;
-		}
-		/* fall through */
-	case CHECKSUM_NONE:
-		if (protocol == 0)
-			skb->csum = 0;
-		else
-			skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
-						       skb->len - dataoff,
-						       protocol, 0);
-		csum = __skb_checksum_complete(skb);
-	}
-	return csum;
-}
-EXPORT_SYMBOL(nf_ip_checksum);
-
-__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
-			       unsigned int dataoff, unsigned int len,
-			       u_int8_t protocol)
-{
-	const struct iphdr *iph = ip_hdr(skb);
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (len == skb->len - dataoff)
-			return nf_ip_checksum(skb, hook, dataoff, protocol);
-		/* fall through */
-	case CHECKSUM_NONE:
-		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
-					       skb->len - dataoff, 0);
-		skb->ip_summed = CHECKSUM_NONE;
-		return __skb_checksum_complete_head(skb, dataoff + len);
-	}
-	return csum;
-}
-EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
-
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		bool strict __always_unused)
 {
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 0b660c568156..8980c8a0fe5c 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -1,9 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_queue.h>
 
+#ifdef CONFIG_INET
+__sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+		       unsigned int dataoff, u8 protocol)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if ((protocol == 0 && !csum_fold(skb->csum)) ||
+		    !csum_tcpudp_magic(iph->saddr, iph->daddr,
+				       skb->len - dataoff, protocol,
+				       skb->csum)) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		/* fall through */
+	case CHECKSUM_NONE:
+		if (protocol == 0)
+			skb->csum = 0;
+		else
+			skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+						       skb->len - dataoff,
+						       protocol, 0);
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
+}
+EXPORT_SYMBOL(nf_ip_checksum);
+#endif
+
+static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				      unsigned int dataoff, unsigned int len,
+				      u8 protocol)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip_checksum(skb, hook, dataoff, protocol);
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
+					       skb->len - dataoff, 0);
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+}
+
 __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
 		    unsigned int dataoff, u_int8_t protocol,
 		    unsigned short family)
-- 
cgit v1.2.3


From ebee5a50d0b7cdc576aa8081f05b86971880054d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:49:59 +0200
Subject: netfilter: utils: move nf_ip6_checksum* from ipv6 to utils

similar to previous change, this also allows to remove it
from nf_ipv6_ops and avoid the indirection.

It also removes the bogus dependency of nf_conntrack_ipv6 on ipv6 module:
ipv6 checksum functions are built into kernel even if CONFIG_IPV6=m,
but ipv6/netfilter.o isn't.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h |  5 ---
 net/ipv6/netfilter.c           | 62 ----------------------------------
 net/netfilter/utils.c          | 76 ++++++++++++++++++++++++++++++++++++------
 3 files changed, 65 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 288c597e75b3..c0dc4dd78887 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -30,11 +30,6 @@ struct nf_ipv6_ops {
 	void (*route_input)(struct sk_buff *skb);
 	int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
 			int (*output)(struct net *, struct sock *, struct sk_buff *));
-	__sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
-			    unsigned int dataoff, u_int8_t protocol);
-	__sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
-				    unsigned int dataoff, unsigned int len,
-				    u_int8_t protocol);
 	int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		     bool strict);
 	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 531d6957af36..5ae8e1c51079 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -15,7 +15,6 @@
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #include <net/xfrm.h>
-#include <net/ip6_checksum.h>
 #include <net/netfilter/nf_queue.h>
 
 int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
@@ -106,71 +105,10 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
 	return err;
 }
 
-__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
-			     unsigned int dataoff, u_int8_t protocol)
-{
-	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
-			break;
-		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-				     skb->len - dataoff, protocol,
-				     csum_sub(skb->csum,
-					      skb_checksum(skb, 0,
-							   dataoff, 0)))) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			break;
-		}
-		/* fall through */
-	case CHECKSUM_NONE:
-		skb->csum = ~csum_unfold(
-				csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-					     skb->len - dataoff,
-					     protocol,
-					     csum_sub(0,
-						      skb_checksum(skb, 0,
-								   dataoff, 0))));
-		csum = __skb_checksum_complete(skb);
-	}
-	return csum;
-}
-EXPORT_SYMBOL(nf_ip6_checksum);
-
-static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
-				       unsigned int dataoff, unsigned int len,
-				       u_int8_t protocol)
-{
-	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	__wsum hsum;
-	__sum16 csum = 0;
-
-	switch (skb->ip_summed) {
-	case CHECKSUM_COMPLETE:
-		if (len == skb->len - dataoff)
-			return nf_ip6_checksum(skb, hook, dataoff, protocol);
-		/* fall through */
-	case CHECKSUM_NONE:
-		hsum = skb_checksum(skb, 0, dataoff, 0);
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
-							 &ip6h->daddr,
-							 skb->len - dataoff,
-							 protocol,
-							 csum_sub(0, hsum)));
-		skb->ip_summed = CHECKSUM_NONE;
-		return __skb_checksum_complete_head(skb, dataoff + len);
-	}
-	return csum;
-};
-
 static const struct nf_ipv6_ops ipv6ops = {
 	.chk_addr		= ipv6_chk_addr,
 	.route_input    	= ip6_route_input,
 	.fragment		= ip6_fragment,
-	.checksum		= nf_ip6_checksum,
-	.checksum_partial	= nf_ip6_checksum_partial,
 	.route			= nf_ip6_route,
 	.reroute		= nf_ip6_reroute,
 };
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 8980c8a0fe5c..e8da9a9bba73 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -4,6 +4,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/ip6_checksum.h>
 
 #ifdef CONFIG_INET
 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
@@ -59,11 +60,69 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
 	return csum;
 }
 
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			unsigned int dataoff, u8 protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
+			break;
+		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+				     skb->len - dataoff, protocol,
+				     csum_sub(skb->csum,
+					      skb_checksum(skb, 0,
+							   dataoff, 0)))) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			break;
+		}
+		/* fall through */
+	case CHECKSUM_NONE:
+		skb->csum = ~csum_unfold(
+				csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					     skb->len - dataoff,
+					     protocol,
+					     csum_sub(0,
+						      skb_checksum(skb, 0,
+								   dataoff, 0))));
+		csum = __skb_checksum_complete(skb);
+	}
+	return csum;
+}
+EXPORT_SYMBOL(nf_ip6_checksum);
+
+static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
+				       unsigned int dataoff, unsigned int len,
+				       u8 protocol)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	__wsum hsum;
+	__sum16 csum = 0;
+
+	switch (skb->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (len == skb->len - dataoff)
+			return nf_ip6_checksum(skb, hook, dataoff, protocol);
+		/* fall through */
+	case CHECKSUM_NONE:
+		hsum = skb_checksum(skb, 0, dataoff, 0);
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
+							 &ip6h->daddr,
+							 skb->len - dataoff,
+							 protocol,
+							 csum_sub(0, hsum)));
+		skb->ip_summed = CHECKSUM_NONE;
+		return __skb_checksum_complete_head(skb, dataoff + len);
+	}
+	return csum;
+};
+
 __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
-		    unsigned int dataoff, u_int8_t protocol,
+		    unsigned int dataoff, u8 protocol,
 		    unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops;
 	__sum16 csum = 0;
 
 	switch (family) {
@@ -71,9 +130,7 @@ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
 		csum = nf_ip_checksum(skb, hook, dataoff, protocol);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			csum = v6ops->checksum(skb, hook, dataoff, protocol);
+		csum = nf_ip6_checksum(skb, hook, dataoff, protocol);
 		break;
 	}
 
@@ -83,9 +140,8 @@ EXPORT_SYMBOL_GPL(nf_checksum);
 
 __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 			    unsigned int dataoff, unsigned int len,
-			    u_int8_t protocol, unsigned short family)
+			    u8 protocol, unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops;
 	__sum16 csum = 0;
 
 	switch (family) {
@@ -94,10 +150,8 @@ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
 					      protocol);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			csum = v6ops->checksum_partial(skb, hook, dataoff, len,
-						       protocol);
+		csum = nf_ip6_checksum_partial(skb, hook, dataoff, len,
+					       protocol);
 		break;
 	}
 
-- 
cgit v1.2.3


From 60e3be94e6a1c5162a0763c9aafb5190b2b1fdce Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 Jun 2018 17:55:32 +0200
Subject: openvswitch: use nf_ct_get_tuplepr, invert_tuplepr

These versions deal with the l3proto/l4proto details internally.
It removes only caller of nf_ct_get_tuple, so make it static.

After this, l3proto->get_l4proto() can be removed in a followup patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h |  7 -------
 net/netfilter/nf_conntrack_core.c         |  3 +--
 net/openvswitch/conntrack.c               | 17 +++--------------
 3 files changed, 4 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 9b5e7634713e..90df45022c51 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -40,13 +40,6 @@ void nf_conntrack_cleanup_start(void);
 void nf_conntrack_init_end(void);
 void nf_conntrack_cleanup_end(void);
 
-bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff,
-		     unsigned int dataoff, u_int16_t l3num, u_int8_t protonum,
-		     struct net *net,
-		     struct nf_conntrack_tuple *tuple,
-		     const struct nf_conntrack_l3proto *l3proto,
-		     const struct nf_conntrack_l4proto *l4proto);
-
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_tuple *orig,
 			const struct nf_conntrack_l3proto *l3proto,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 85ab2fd6a665..be0ab81e6b2c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -222,7 +222,7 @@ static u32 hash_conntrack(const struct net *net,
 	return scale_hash(hash_conntrack_raw(tuple, net));
 }
 
-bool
+static bool
 nf_ct_get_tuple(const struct sk_buff *skb,
 		unsigned int nhoff,
 		unsigned int dataoff,
@@ -244,7 +244,6 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 
 	return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
 }
-EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
 
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num,
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 284aca2a252d..e05bd3e53f0f 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -607,23 +607,12 @@ static struct nf_conn *
 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 		     u8 l3num, struct sk_buff *skb, bool natted)
 {
-	const struct nf_conntrack_l3proto *l3proto;
-	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
-	unsigned int dataoff;
-	u8 protonum;
 
-	l3proto = __nf_ct_l3proto_find(l3num);
-	if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
-				 &protonum) <= 0) {
-		pr_debug("ovs_ct_find_existing: Can't get protonum\n");
-		return NULL;
-	}
-	l4proto = __nf_ct_l4proto_find(l3num, protonum);
-	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-			     protonum, net, &tuple, l3proto, l4proto)) {
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num,
+			       net, &tuple)) {
 		pr_debug("ovs_ct_find_existing: Can't get tuple\n");
 		return NULL;
 	}
@@ -632,7 +621,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 	if (natted) {
 		struct nf_conntrack_tuple inverse;
 
-		if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) {
+		if (!nf_ct_invert_tuplepr(&inverse, &tuple)) {
 			pr_debug("ovs_ct_find_existing: Inversion failed!\n");
 			return NULL;
 		}
-- 
cgit v1.2.3


From f957be9d349a3800940f823b16e12b0405cc305b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:44 +0200
Subject: netfilter: conntrack: remove ctnetlink callbacks from l3 protocol
 trackers

handle everything from ctnetlink directly.

After all these years we still only support ipv4 and ipv6, so it
seems reasonable to remove l3 protocol tracker support and instead
handle ipv4/ipv6 from a common, always builtin inet tracker.

Step 1: Get rid of all the l3proto->func() calls.

Start with ctnetlink, then move on to packet-path ones.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h      |  6 +-
 include/net/netfilter/nf_conntrack_l3proto.h   |  8 ---
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 47 -------------
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 48 -------------
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c      |  1 -
 net/netfilter/nf_conntrack_expect.c            |  1 -
 net/netfilter/nf_conntrack_helper.c            |  1 -
 net/netfilter/nf_conntrack_netlink.c           | 96 +++++++++++++++++++-------
 net/netfilter/nf_conntrack_proto.c             |  5 +-
 net/netfilter/nf_conntrack_standalone.c        | 14 ++--
 net/netfilter/nfnetlink_cttimeout.c            |  1 -
 11 files changed, 79 insertions(+), 149 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 90df45022c51..d454a53ba646 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -68,10 +68,8 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	return ret;
 }
 
-void
-print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
-            const struct nf_conntrack_l3proto *l3proto,
-            const struct nf_conntrack_l4proto *proto);
+void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_l4proto *proto);
 
 #define CONNTRACK_LOCKS 1024
 
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index d5808f3e2715..d07b5216a925 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -46,14 +46,6 @@ struct nf_conntrack_l3proto {
 	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
 			   unsigned int *dataoff, u_int8_t *protonum);
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	int (*tuple_to_nlattr)(struct sk_buff *skb,
-			       const struct nf_conntrack_tuple *t);
-	int (*nlattr_to_tuple)(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t);
-	const struct nla_policy *nla_policy;
-#endif
-
 	/* Called when netns wants to use connection tracking */
 	int (*net_ns_get)(struct net *);
 	void (*net_ns_put)(struct net *);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 9db988f9a4d7..98ed12858c52 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -274,41 +274,6 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	return -ENOENT;
 }
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
-				const struct nf_conntrack_tuple *tuple)
-{
-	if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
-	    nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -1;
-}
-
-static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
-	[CTA_IP_V4_SRC]	= { .type = NLA_U32 },
-	[CTA_IP_V4_DST]	= { .type = NLA_U32 },
-};
-
-static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *t)
-{
-	if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
-		return -EINVAL;
-
-	t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
-	t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
-
-	return 0;
-}
-#endif
-
 static struct nf_sockopt_ops so_getorigdst = {
 	.pf		= PF_INET,
 	.get_optmin	= SO_ORIGINAL_DST,
@@ -360,13 +325,6 @@ const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
 	.invert_tuple	 = ipv4_invert_tuple,
 	.get_l4proto	 = ipv4_get_l4proto,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.tuple_to_nlattr = ipv4_tuple_to_nlattr,
-	.nlattr_to_tuple = ipv4_nlattr_to_tuple,
-	.nla_policy	 = ipv4_nla_policy,
-	.nla_size	 = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */
-			   NLA_ALIGN(NLA_HDRLEN + sizeof(u32)),  /* CTA_IP_V4_DST */
-#endif
 	.net_ns_get	 = ipv4_hooks_register,
 	.net_ns_put	 = ipv4_hooks_unregister,
 	.me		 = THIS_MODULE,
@@ -419,11 +377,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 
 	need_conntrack();
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) !=
-	    nf_conntrack_l3proto_ipv4.nla_size))
-		return -EINVAL;
-#endif
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret < 0) {
 		pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 663827ee3cf8..13a660ae5799 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -269,41 +269,6 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
 }
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
-				const struct nf_conntrack_tuple *tuple)
-{
-	if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
-	    nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -1;
-}
-
-static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = {
-	[CTA_IP_V6_SRC]	= { .len = sizeof(u_int32_t)*4 },
-	[CTA_IP_V6_DST]	= { .len = sizeof(u_int32_t)*4 },
-};
-
-static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *t)
-{
-	if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
-		return -EINVAL;
-
-	t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
-	t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
-
-	return 0;
-}
-#endif
-
 static int ipv6_hooks_register(struct net *net)
 {
 	struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
@@ -345,13 +310,6 @@ const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.pkt_to_tuple		= ipv6_pkt_to_tuple,
 	.invert_tuple		= ipv6_invert_tuple,
 	.get_l4proto		= ipv6_get_l4proto,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.tuple_to_nlattr	= ipv6_tuple_to_nlattr,
-	.nlattr_to_tuple	= ipv6_nlattr_to_tuple,
-	.nla_policy		= ipv6_nla_policy,
-	.nla_size		= NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) +
-				  NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])),
-#endif
 	.net_ns_get		= ipv6_hooks_register,
 	.net_ns_put		= ipv6_hooks_unregister,
 	.me			= THIS_MODULE,
@@ -409,12 +367,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 
 	need_conntrack();
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) !=
-	    nf_conntrack_l3proto_ipv6.nla_size))
-		return -EINVAL;
-#endif
-
 	ret = nf_register_sockopt(&so_getorigdst6);
 	if (ret < 0) {
 		pr_err("Unable to register netfilter socket option\n");
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index c87b48359e8f..e631be25337e 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -23,7 +23,6 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 #endif
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 853b23206bb7..3f586ba23d92 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -610,7 +610,6 @@ static int exp_seq_show(struct seq_file *s, void *v)
 		   expect->tuple.src.l3num,
 		   expect->tuple.dst.protonum);
 	print_tuple(s, &expect->tuple,
-		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
 		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
 				       expect->tuple.dst.protonum));
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index a75b11c39312..a55a58c706a9 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -24,7 +24,6 @@
 #include <linux/rtnetlink.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 20a2e37c76d1..40152b9ad772 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -38,7 +38,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
@@ -81,9 +80,26 @@ nla_put_failure:
 	return -1;
 }
 
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+	    nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+		return -EMSGSIZE;
+	return 0;
+}
+
+static int ipv6_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) ||
+	    nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6))
+		return -EMSGSIZE;
+	return 0;
+}
+
 static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
-				    const struct nf_conntrack_tuple *tuple,
-				    const struct nf_conntrack_l3proto *l3proto)
+				    const struct nf_conntrack_tuple *tuple)
 {
 	int ret = 0;
 	struct nlattr *nest_parms;
@@ -92,8 +108,14 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb,
 	if (!nest_parms)
 		goto nla_put_failure;
 
-	if (likely(l3proto->tuple_to_nlattr))
-		ret = l3proto->tuple_to_nlattr(skb, tuple);
+	switch (tuple->src.l3num) {
+	case NFPROTO_IPV4:
+		ret = ipv4_tuple_to_nlattr(skb, tuple);
+		break;
+	case NFPROTO_IPV6:
+		ret = ipv6_tuple_to_nlattr(skb, tuple);
+		break;
+	}
 
 	nla_nest_end(skb, nest_parms);
 
@@ -106,13 +128,11 @@ nla_put_failure:
 static int ctnetlink_dump_tuples(struct sk_buff *skb,
 				 const struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	int ret;
 
 	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
-	ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto);
+	ret = ctnetlink_dump_tuples_ip(skb, tuple);
 
 	if (ret >= 0) {
 		l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
@@ -556,15 +576,20 @@ nla_put_failure:
 	return -1;
 }
 
+static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = {
+	[CTA_IP_V4_SRC]	= { .type = NLA_U32 },
+	[CTA_IP_V4_DST]	= { .type = NLA_U32 },
+	[CTA_IP_V6_SRC]	= { .len = sizeof(__be32) * 4 },
+	[CTA_IP_V6_DST]	= { .len = sizeof(__be32) * 4 },
+};
+
 #if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS)
 static size_t ctnetlink_proto_size(const struct nf_conn *ct)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	size_t len, len4 = 0;
 
-	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	len = l3proto->nla_size;
+	len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
 	len *= 3u; /* ORIG, REPLY, MASTER */
 
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -936,29 +961,54 @@ out:
 	return skb->len;
 }
 
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
+		return -EINVAL;
+
+	t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+	t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+
+	return 0;
+}
+
+static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
+		return -EINVAL;
+
+	t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
+	t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+
+	return 0;
+}
+
 static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
 				    struct nf_conntrack_tuple *tuple)
 {
 	struct nlattr *tb[CTA_IP_MAX+1];
-	struct nf_conntrack_l3proto *l3proto;
 	int ret = 0;
 
 	ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL);
 	if (ret < 0)
 		return ret;
 
-	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
+	ret = nla_validate_nested(attr, CTA_IP_MAX,
+				  cta_ip_nla_policy, NULL);
+	if (ret)
+		return ret;
 
-	if (likely(l3proto->nlattr_to_tuple)) {
-		ret = nla_validate_nested(attr, CTA_IP_MAX,
-					  l3proto->nla_policy, NULL);
-		if (ret == 0)
-			ret = l3proto->nlattr_to_tuple(tb, tuple);
+	switch (tuple->src.l3num) {
+	case NFPROTO_IPV4:
+		ret = ipv4_nlattr_to_tuple(tb, tuple);
+		break;
+	case NFPROTO_IPV6:
+		ret = ipv6_nlattr_to_tuple(tb, tuple);
+		break;
 	}
 
-	rcu_read_unlock();
-
 	return ret;
 }
 
@@ -2581,7 +2631,6 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
 				   const struct nf_conntrack_tuple *tuple,
 				   const struct nf_conntrack_tuple_mask *mask)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple m;
 	struct nlattr *nest_parms;
@@ -2597,8 +2646,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
 		goto nla_put_failure;
 
 	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
-	ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto);
+	ret = ctnetlink_dump_tuples_ip(skb, &m);
 	if (ret >= 0) {
 		l4proto = __nf_ct_l4proto_find(tuple->src.l3num,
 					       tuple->dst.protonum);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index d88841fbc560..859cb303bb91 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -294,10 +294,7 @@ int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
 
 	if (proto->l3proto >= NFPROTO_NUMPROTO)
 		return -EBUSY;
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	if (proto->tuple_to_nlattr && proto->nla_size == 0)
-		return -EINVAL;
-#endif
+
 	mutex_lock(&nf_ct_proto_mutex);
 	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
 					lockdep_is_held(&nf_ct_proto_mutex));
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b642c0b2495c..47b80fd0d2c3 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -24,7 +24,6 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -38,10 +37,9 @@ MODULE_LICENSE("GPL");
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 void
 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
-            const struct nf_conntrack_l3proto *l3proto,
             const struct nf_conntrack_l4proto *l4proto)
 {
-	switch (l3proto->l3proto) {
+	switch (tuple->src.l3num) {
 	case NFPROTO_IPV4:
 		seq_printf(s, "src=%pI4 dst=%pI4 ",
 			   &tuple->src.u3.ip, &tuple->dst.u3.ip);
@@ -282,7 +280,6 @@ static int ct_seq_show(struct seq_file *s, void *v)
 {
 	struct nf_conntrack_tuple_hash *hash = v;
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct net *net = seq_file_net(s);
 	int ret = 0;
@@ -303,14 +300,12 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (!net_eq(nf_ct_net(ct), net))
 		goto release;
 
-	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	WARN_ON(!l3proto);
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
 	WARN_ON(!l4proto);
 
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u ",
-		   l3proto_name(l3proto->l3proto), nf_ct_l3num(ct),
+		   l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct),
 		   l4proto_name(l4proto->l4proto), nf_ct_protonum(ct));
 
 	if (!test_bit(IPS_OFFLOAD_BIT, &ct->status))
@@ -320,7 +315,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		l4proto->print_conntrack(s, ct);
 
 	print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-		    l3proto, l4proto);
+		    l4proto);
 
 	ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
 
@@ -333,8 +328,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
 		seq_puts(s, "[UNREPLIED] ");
 
-	print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-		    l3proto, l4proto);
+	print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto);
 
 	ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 9ee5fa551fa6..9da4b8462004 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -26,7 +26,6 @@
 #include <net/sock.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
-- 
cgit v1.2.3


From 47a91b14de62e35d1466820cbb4c024b6c02dff1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:45 +0200
Subject: netfilter: conntrack: remove pkt_to_tuple indirection from l3
 protocol trackers

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h   |  7 -----
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 17 -----------
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 18 ------------
 net/netfilter/nf_conntrack_core.c              | 39 ++++++++++++++++++++++----
 net/netfilter/nf_conntrack_l3proto_generic.c   | 10 -------
 5 files changed, 33 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index d07b5216a925..ece231450f30 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,13 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Try to fill in the third arg: nhoff is offset of l3 proto
-         * hdr.  Return true if possible.
-	 */
-	bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff,
-			     struct nf_conntrack_tuple *tuple);
-
 	/*
 	 * Invert the per-proto part of the tuple: ie. turn xmit into reply.
 	 * Some packets can't be inverted: return 0 in that case.
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 98ed12858c52..7ed56f61798b 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -38,22 +38,6 @@ struct conntrack4_net {
 	unsigned int users;
 };
 
-static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
-			      struct nf_conntrack_tuple *tuple)
-{
-	const __be32 *ap;
-	__be32 _addrs[2];
-	ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
-				sizeof(u_int32_t) * 2, _addrs);
-	if (ap == NULL)
-		return false;
-
-	tuple->src.u3.ip = ap[0];
-	tuple->dst.u3.ip = ap[1];
-
-	return true;
-}
-
 static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
 			      const struct nf_conntrack_tuple *orig)
 {
@@ -322,7 +306,6 @@ static void ipv4_hooks_unregister(struct net *net)
 
 const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.l3proto	 = PF_INET,
-	.pkt_to_tuple	 = ipv4_pkt_to_tuple,
 	.invert_tuple	 = ipv4_invert_tuple,
 	.get_l4proto	 = ipv4_get_l4proto,
 	.net_ns_get	 = ipv4_hooks_register,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 13a660ae5799..bdb1709bb951 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -41,23 +41,6 @@ struct conntrack6_net {
 	unsigned int users;
 };
 
-static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
-			      struct nf_conntrack_tuple *tuple)
-{
-	const u_int32_t *ap;
-	u_int32_t _addrs[8];
-
-	ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr),
-				sizeof(_addrs), _addrs);
-	if (ap == NULL)
-		return false;
-
-	memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
-	memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
-
-	return true;
-}
-
 static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 			      const struct nf_conntrack_tuple *orig)
 {
@@ -307,7 +290,6 @@ static void ipv6_hooks_unregister(struct net *net)
 
 const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.l3proto		= PF_INET6,
-	.pkt_to_tuple		= ipv6_pkt_to_tuple,
 	.invert_tuple		= ipv6_invert_tuple,
 	.get_l4proto		= ipv6_get_l4proto,
 	.net_ns_get		= ipv6_hooks_register,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index be0ab81e6b2c..66b2ebae2747 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -230,15 +230,43 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 		u_int8_t protonum,
 		struct net *net,
 		struct nf_conntrack_tuple *tuple,
-		const struct nf_conntrack_l3proto *l3proto,
 		const struct nf_conntrack_l4proto *l4proto)
 {
+	unsigned int size;
+	const __be32 *ap;
+	__be32 _addrs[8];
+
 	memset(tuple, 0, sizeof(*tuple));
 
 	tuple->src.l3num = l3num;
-	if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0)
+	switch (l3num) {
+	case NFPROTO_IPV4:
+		nhoff += offsetof(struct iphdr, saddr);
+		size = 2 * sizeof(__be32);
+		break;
+	case NFPROTO_IPV6:
+		nhoff += offsetof(struct ipv6hdr, saddr);
+		size = sizeof(_addrs);
+		break;
+	default:
+		return true;
+	}
+
+	ap = skb_header_pointer(skb, nhoff, size, _addrs);
+	if (!ap)
 		return false;
 
+	switch (l3num) {
+	case NFPROTO_IPV4:
+		tuple->src.u3.ip = ap[0];
+		tuple->dst.u3.ip = ap[1];
+		break;
+	case NFPROTO_IPV6:
+		memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6));
+		memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6));
+		break;
+	}
+
 	tuple->dst.protonum = protonum;
 	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
 
@@ -267,7 +295,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 	l4proto = __nf_ct_l4proto_find(l3num, protonum);
 
 	ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
-			      l3proto, l4proto);
+			      l4proto);
 
 	rcu_read_unlock();
 	return ret;
@@ -1318,8 +1346,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	u32 hash;
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
-			     dataoff, l3num, protonum, net, &tuple, l3proto,
-			     l4proto)) {
+			     dataoff, l3num, protonum, net, &tuple, l4proto)) {
 		pr_debug("Can't get tuple\n");
 		return 0;
 	}
@@ -1633,7 +1660,7 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-			     l4num, net, &tuple, l3proto, l4proto))
+			     l4num, net, &tuple, l4proto))
 		return -1;
 
 	if (ct->status & IPS_SRC_NAT) {
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 397e6911214f..0b01c9970e99 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -31,15 +31,6 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 
-static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
-				 struct nf_conntrack_tuple *tuple)
-{
-	memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
-	memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
-
-	return true;
-}
-
 static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
 				 const struct nf_conntrack_tuple *orig)
 {
@@ -59,7 +50,6 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
 	.l3proto	 = PF_UNSPEC,
-	.pkt_to_tuple	 = generic_pkt_to_tuple,
 	.invert_tuple	 = generic_invert_tuple,
 	.get_l4proto	 = generic_get_l4proto,
 };
-- 
cgit v1.2.3


From d1b6fe94941f43e4743d5fea953d16b0a001c2c6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:46 +0200
Subject: netfilter: conntrack: remove invert_tuple indirection from l3
 protocol trackers

Its simpler to just handle it directly in nf_ct_invert_tuple().
Also gets rid of need to pass l3proto pointer to resolve_conntrack().

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h      |  1 -
 include/net/netfilter/nf_conntrack_l3proto.h   |  7 -------
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 10 ----------
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  3 +--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 10 ----------
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  3 +--
 net/netfilter/nf_conntrack_core.c              | 26 ++++++++++++++++----------
 net/netfilter/nf_conntrack_l3proto_generic.c   | 10 ----------
 8 files changed, 18 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index d454a53ba646..35461b2d3462 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -42,7 +42,6 @@ void nf_conntrack_cleanup_end(void);
 
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 			const struct nf_conntrack_tuple *orig,
-			const struct nf_conntrack_l3proto *l3proto,
 			const struct nf_conntrack_l4proto *l4proto);
 
 /* Find a connection corresponding to a tuple. */
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index ece231450f30..164641c743a5 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,13 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Invert the per-proto part of the tuple: ie. turn xmit into reply.
-	 * Some packets can't be inverted: return 0 in that case.
-	 */
-	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
-			     const struct nf_conntrack_tuple *orig);
-
 	/*
 	 * Called before tracking. 
 	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7ed56f61798b..e10e38c443ab 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -38,15 +38,6 @@ struct conntrack4_net {
 	unsigned int users;
 };
 
-static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
-			      const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u3.ip = orig->dst.u3.ip;
-	tuple->dst.u3.ip = orig->src.u3.ip;
-
-	return true;
-}
-
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -306,7 +297,6 @@ static void ipv4_hooks_unregister(struct net *net)
 
 const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.l3proto	 = PF_INET,
-	.invert_tuple	 = ipv4_invert_tuple,
 	.get_l4proto	 = ipv4_get_l4proto,
 	.net_ns_get	 = ipv4_hooks_register,
 	.net_ns_put	 = ipv4_hooks_unregister,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5c15beafa711..34095949a003 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -142,8 +142,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
 	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&innertuple, &origtuple,
-				&nf_conntrack_l3proto_ipv4, innerproto)) {
+	if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
 		pr_debug("icmp_error_message: no match\n");
 		return -NF_ACCEPT;
 	}
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index bdb1709bb951..f8051fe20489 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -41,15 +41,6 @@ struct conntrack6_net {
 	unsigned int users;
 };
 
-static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple,
-			      const struct nf_conntrack_tuple *orig)
-{
-	memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6));
-	memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6));
-
-	return true;
-}
-
 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -290,7 +281,6 @@ static void ipv6_hooks_unregister(struct net *net)
 
 const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.l3proto		= PF_INET6,
-	.invert_tuple		= ipv6_invert_tuple,
 	.get_l4proto		= ipv6_get_l4proto,
 	.net_ns_get		= ipv6_hooks_register,
 	.net_ns_put		= ipv6_hooks_unregister,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 2548e2c8aedd..8bcbc2f15bd5 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -152,8 +152,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
 	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&intuple, &origtuple,
-				&nf_conntrack_l3proto_ipv6, inproto)) {
+	if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
 		pr_debug("icmpv6_error: Can't invert tuple\n");
 		return -NF_ACCEPT;
 	}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 66b2ebae2747..14c040805b32 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -305,14 +305,24 @@ EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
 bool
 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 		   const struct nf_conntrack_tuple *orig,
-		   const struct nf_conntrack_l3proto *l3proto,
 		   const struct nf_conntrack_l4proto *l4proto)
 {
 	memset(inverse, 0, sizeof(*inverse));
 
 	inverse->src.l3num = orig->src.l3num;
-	if (l3proto->invert_tuple(inverse, orig) == 0)
-		return false;
+
+	switch (orig->src.l3num) {
+	case NFPROTO_IPV4:
+		inverse->src.u3.ip = orig->dst.u3.ip;
+		inverse->dst.u3.ip = orig->src.u3.ip;
+		break;
+	case NFPROTO_IPV6:
+		inverse->src.u3.in6 = orig->dst.u3.in6;
+		inverse->dst.u3.in6 = orig->src.u3.in6;
+		break;
+	default:
+		break;
+	}
 
 	inverse->dst.dir = !orig->dst.dir;
 
@@ -1222,7 +1232,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
-	       const struct nf_conntrack_l3proto *l3proto,
 	       const struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
 	       unsigned int dataoff, u32 hash)
@@ -1237,7 +1246,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	struct nf_conntrack_zone tmp;
 	unsigned int *timeouts;
 
-	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
+	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
 		pr_debug("Can't invert tuple.\n");
 		return NULL;
 	}
@@ -1334,7 +1343,6 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
 		  u_int8_t protonum,
-		  const struct nf_conntrack_l3proto *l3proto,
 		  const struct nf_conntrack_l4proto *l4proto)
 {
 	const struct nf_conntrack_zone *zone;
@@ -1356,7 +1364,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	hash = hash_conntrack_raw(&tuple, net);
 	h = __nf_conntrack_find_get(net, zone, &tuple, hash);
 	if (!h) {
-		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+		h = init_conntrack(net, tmpl, &tuple, l4proto,
 				   skb, dataoff, hash);
 		if (!h)
 			return 0;
@@ -1439,8 +1447,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 			goto out;
 	}
 repeat:
-	ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
-				l3proto, l4proto);
+	ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto);
 	if (ret < 0) {
 		/* Too stressed to deal. */
 		NF_CT_STAT_INC_ATOMIC(net, drop);
@@ -1497,7 +1504,6 @@ bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
 
 	rcu_read_lock();
 	ret = nf_ct_invert_tuple(inverse, orig,
-				 __nf_ct_l3proto_find(orig->src.l3num),
 				 __nf_ct_l4proto_find(orig->src.l3num,
 						      orig->dst.protonum));
 	rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
index 0b01c9970e99..d6a8fe591ccc 100644
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ b/net/netfilter/nf_conntrack_l3proto_generic.c
@@ -31,15 +31,6 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 
-static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_tuple *orig)
-{
-	memset(&tuple->src.u3, 0, sizeof(tuple->src.u3));
-	memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3));
-
-	return true;
-}
-
 static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			       unsigned int *dataoff, u_int8_t *protonum)
 {
@@ -50,7 +41,6 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 
 struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
 	.l3proto	 = PF_UNSPEC,
-	.invert_tuple	 = generic_invert_tuple,
 	.get_l4proto	 = generic_get_l4proto,
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
-- 
cgit v1.2.3


From 6816d931cab009024b68c11c4cf752f8bf9a1e32 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:47 +0200
Subject: netfilter: conntrack: remove get_l4proto indirection from l3 protocol
 trackers

Handle it in the core instead.

ipv6_skip_exthdr() is built-in even if ipv6 is a module, i.e. this
doesn't create an ipv6 dependency.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l3proto.h   |   8 --
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  30 -------
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  29 -------
 net/netfilter/Makefile                         |   2 +-
 net/netfilter/nf_conntrack_core.c              | 108 ++++++++++++++++++++-----
 net/netfilter/nf_conntrack_l3proto_generic.c   |  46 -----------
 net/netfilter/nf_conntrack_proto.c             |   5 ++
 7 files changed, 94 insertions(+), 134 deletions(-)
 delete mode 100644 net/netfilter/nf_conntrack_l3proto_generic.c

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 164641c743a5..5f160375c93a 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -24,14 +24,6 @@ struct nf_conntrack_l3proto {
 	/* size of tuple nlattr, fills a hole */
 	u16 nla_size;
 
-	/*
-	 * Called before tracking. 
-	 *	*dataoff: offset of protocol header (TCP, UDP,...) in skb
-	 *	*protonum: protocol number
-	 */
-	int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff,
-			   unsigned int *dataoff, u_int8_t *protonum);
-
 	/* Called when netns wants to use connection tracking */
 	int (*net_ns_get)(struct net *);
 	void (*net_ns_put)(struct net *);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index e10e38c443ab..9fbf6c7f8ece 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -38,35 +38,6 @@ struct conntrack4_net {
 	unsigned int users;
 };
 
-static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
-			    unsigned int *dataoff, u_int8_t *protonum)
-{
-	const struct iphdr *iph;
-	struct iphdr _iph;
-
-	iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
-	if (iph == NULL)
-		return -NF_ACCEPT;
-
-	/* Conntrack defragments packets, we might still see fragments
-	 * inside ICMP packets though. */
-	if (iph->frag_off & htons(IP_OFFSET))
-		return -NF_ACCEPT;
-
-	*dataoff = nhoff + (iph->ihl << 2);
-	*protonum = iph->protocol;
-
-	/* Check bogus IP headers */
-	if (*dataoff > skb->len) {
-		pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
-			 "nhoff %u, ihl %u, skblen %u\n",
-			 nhoff, iph->ihl << 2, skb->len);
-		return -NF_ACCEPT;
-	}
-
-	return NF_ACCEPT;
-}
-
 static unsigned int ipv4_helper(void *priv,
 				struct sk_buff *skb,
 				const struct nf_hook_state *state)
@@ -297,7 +268,6 @@ static void ipv4_hooks_unregister(struct net *net)
 
 const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.l3proto	 = PF_INET,
-	.get_l4proto	 = ipv4_get_l4proto,
 	.net_ns_get	 = ipv4_hooks_register,
 	.net_ns_put	 = ipv4_hooks_unregister,
 	.me		 = THIS_MODULE,
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index f8051fe20489..37ab25645cf2 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -41,34 +41,6 @@ struct conntrack6_net {
 	unsigned int users;
 };
 
-static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
-			    unsigned int *dataoff, u_int8_t *protonum)
-{
-	unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
-	__be16 frag_off;
-	int protoff;
-	u8 nexthdr;
-
-	if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
-			  &nexthdr, sizeof(nexthdr)) != 0) {
-		pr_debug("ip6_conntrack_core: can't get nexthdr\n");
-		return -NF_ACCEPT;
-	}
-	protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
-	/*
-	 * (protoff == skb->len) means the packet has not data, just
-	 * IPv6 and possibly extensions headers, but it is tracked anyway
-	 */
-	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
-		pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
-		return -NF_ACCEPT;
-	}
-
-	*dataoff = protoff;
-	*protonum = nexthdr;
-	return NF_ACCEPT;
-}
-
 static unsigned int ipv6_helper(void *priv,
 				struct sk_buff *skb,
 				const struct nf_hook_state *state)
@@ -281,7 +253,6 @@ static void ipv6_hooks_unregister(struct net *net)
 
 const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.l3proto		= PF_INET6,
-	.get_l4proto		= ipv6_get_l4proto,
 	.net_ns_get		= ipv6_hooks_register,
 	.net_ns_put		= ipv6_hooks_unregister,
 	.me			= THIS_MODULE,
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 44449389e527..f132ea850778 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
 
-nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 14c040805b32..0674c6e5bfed 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -37,7 +37,6 @@
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -55,6 +54,7 @@
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netns/hash.h>
+#include <net/ip.h>
 
 #include "nf_internals.h"
 
@@ -273,21 +273,94 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 	return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
 }
 
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u_int8_t *protonum)
+{
+	int dataoff = -1;
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4)
+	const struct iphdr *iph;
+	struct iphdr _iph;
+
+	iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+	if (!iph)
+		return -1;
+
+	/* Conntrack defragments packets, we might still see fragments
+	 * inside ICMP packets though.
+	 */
+	if (iph->frag_off & htons(IP_OFFSET))
+		return -1;
+
+	dataoff = nhoff + (iph->ihl << 2);
+	*protonum = iph->protocol;
+
+	/* Check bogus IP headers */
+	if (dataoff > skb->len) {
+		pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n",
+			 nhoff, iph->ihl << 2, skb->len);
+		return -1;
+	}
+#endif
+	return dataoff;
+}
+
+static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+			    u8 *protonum)
+{
+	int protoff = -1;
+#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
+	unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
+	__be16 frag_off;
+	u8 nexthdr;
+
+	if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
+			  &nexthdr, sizeof(nexthdr)) != 0) {
+		pr_debug("can't get nexthdr\n");
+		return -1;
+	}
+	protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
+	/*
+	 * (protoff == skb->len) means the packet has not data, just
+	 * IPv6 and possibly extensions headers, but it is tracked anyway
+	 */
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("can't find proto in pkt\n");
+		return -1;
+	}
+
+	*protonum = nexthdr;
+#endif
+	return protoff;
+}
+
+static int get_l4proto(const struct sk_buff *skb,
+		       unsigned int nhoff, u8 pf, u8 *l4num)
+{
+	switch (pf) {
+	case NFPROTO_IPV4:
+		return ipv4_get_l4proto(skb, nhoff, l4num);
+	case NFPROTO_IPV6:
+		return ipv6_get_l4proto(skb, nhoff, l4num);
+	default:
+		*l4num = 0;
+		break;
+	}
+	return -1;
+}
+
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num,
 		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
-	unsigned int protoff;
-	u_int8_t protonum;
+	u8 protonum;
+	int protoff;
 	int ret;
 
 	rcu_read_lock();
 
-	l3proto = __nf_ct_l3proto_find(l3num);
-	ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum);
-	if (ret != NF_ACCEPT) {
+	protoff = get_l4proto(skb, nhoff, l3num, &protonum);
+	if (protoff <= 0) {
 		rcu_read_unlock();
 		return false;
 	}
@@ -1397,14 +1470,12 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conn *ct, *tmpl;
 	enum ip_conntrack_info ctinfo;
 	unsigned int *timeouts;
-	unsigned int dataoff;
 	u_int8_t protonum;
-	int ret;
+	int dataoff, ret;
 
 	tmpl = nf_ct_get(skb, &ctinfo);
 	if (tmpl || ctinfo == IP_CT_UNTRACKED) {
@@ -1418,14 +1489,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	}
 
 	/* rcu_read_lock()ed by nf_hook_thresh */
-	l3proto = __nf_ct_l3proto_find(pf);
-	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
-				   &dataoff, &protonum);
-	if (ret <= 0) {
+	dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum);
+	if (dataoff <= 0) {
 		pr_debug("not prepared to track yet or error occurred\n");
 		NF_CT_STAT_INC_ATOMIC(net, error);
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		ret = -ret;
+		ret = NF_ACCEPT;
 		goto out;
 	}
 
@@ -1641,14 +1710,14 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
 
 static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 {
-	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	enum ip_conntrack_info ctinfo;
 	struct nf_nat_hook *nat_hook;
-	unsigned int dataoff, status;
+	unsigned int status;
 	struct nf_conn *ct;
+	int dataoff;
 	u16 l3num;
 	u8 l4num;
 
@@ -1657,10 +1726,9 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 		return 0;
 
 	l3num = nf_ct_l3num(ct);
-	l3proto = nf_ct_l3proto_find_get(l3num);
 
-	if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff,
-				 &l4num) <= 0)
+	dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num);
+	if (dataoff <= 0)
 		return -1;
 
 	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c
deleted file mode 100644
index d6a8fe591ccc..000000000000
--- a/net/netfilter/nf_conntrack_l3proto_generic.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
- *
- * Based largely upon the original ip_conntrack code which
- * had the following copyright information:
- *
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Author:
- *	Yasuyuki Kozakai @USAGI	<yasuyuki.kozakai@toshiba.co.jp>
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/icmp.h>
-#include <linux/sysctl.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
-
-static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
-			       unsigned int *dataoff, u_int8_t *protonum)
-{
-	/* Never track !!! */
-	return -NF_ACCEPT;
-}
-
-
-struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
-	.l3proto	 = PF_UNSPEC,
-	.get_l4proto	 = generic_get_l4proto,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 859cb303bb91..39df72bb9d56 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -35,6 +35,11 @@ EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
+struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
+	.l3proto	 = PF_UNSPEC,
+};
+EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
+
 #ifdef CONFIG_SYSCTL
 static int
 nf_ct_register_sysctl(struct net *net,
-- 
cgit v1.2.3


From 8b3892ea8718920d29432328fe9544d89a429614 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:48 +0200
Subject: netfilter: conntrack: avoid calls to l4proto invert_tuple

Handle the common cases (tcp, udp, etc). in the core and only
do the indirect call for the protocols that need it (GRE for instance).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h |  2 +-
 net/netfilter/nf_conntrack_core.c            |  8 +++++++-
 net/netfilter/nf_conntrack_proto_dccp.c      | 10 ----------
 net/netfilter/nf_conntrack_proto_generic.c   | 10 ----------
 net/netfilter/nf_conntrack_proto_gre.c       | 10 ----------
 net/netfilter/nf_conntrack_proto_sctp.c      | 10 ----------
 net/netfilter/nf_conntrack_proto_tcp.c       | 10 ----------
 net/netfilter/nf_conntrack_proto_udp.c       | 12 ------------
 8 files changed, 8 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index a7220eef9aee..6a55e337a161 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -36,7 +36,7 @@ struct nf_conntrack_l4proto {
 			     struct net *net, struct nf_conntrack_tuple *tuple);
 
 	/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
-	 * Some packets can't be inverted: return 0 in that case.
+	 * Only used by icmp, most protocols use a generic version.
 	 */
 	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
 			     const struct nf_conntrack_tuple *orig);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0674c6e5bfed..92efce69b690 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -400,7 +400,13 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 	inverse->dst.dir = !orig->dst.dir;
 
 	inverse->dst.protonum = orig->dst.protonum;
-	return l4proto->invert_tuple(inverse, orig);
+
+	if (unlikely(l4proto->invert_tuple))
+		return l4proto->invert_tuple(inverse, orig);
+
+	inverse->src.u.all = orig->dst.u.all;
+	inverse->dst.u.all = orig->src.u.all;
+	return true;
 }
 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
 
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index abe647d5b8c6..05620c03f138 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -403,14 +403,6 @@ static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	return true;
 }
 
-static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv,
-			      const struct nf_conntrack_tuple *tuple)
-{
-	inv->src.u.dccp.port = tuple->dst.u.dccp.port;
-	inv->dst.u.dccp.port = tuple->src.u.dccp.port;
-	return true;
-}
-
 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 		     unsigned int dataoff, unsigned int *timeouts)
 {
@@ -865,7 +857,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
 	.l3proto		= AF_INET,
 	.l4proto		= IPPROTO_DCCP,
 	.pkt_to_tuple		= dccp_pkt_to_tuple,
-	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
 	.packet			= dccp_packet,
 	.get_timeouts		= dccp_get_timeouts,
@@ -901,7 +892,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
 	.l3proto		= AF_INET6,
 	.l4proto		= IPPROTO_DCCP,
 	.pkt_to_tuple		= dccp_pkt_to_tuple,
-	.invert_tuple		= dccp_invert_tuple,
 	.new			= dccp_new,
 	.packet			= dccp_packet,
 	.get_timeouts		= dccp_get_timeouts,
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 6c6896d21cd7..4dfe40aa9446 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -41,15 +41,6 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
 	return true;
 }
 
-static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u.all = 0;
-	tuple->dst.u.all = 0;
-
-	return true;
-}
-
 static unsigned int *generic_get_timeouts(struct net *net)
 {
 	return &(generic_pernet(net)->timeout);
@@ -168,7 +159,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 	.l3proto		= PF_UNSPEC,
 	.l4proto		= 255,
 	.pkt_to_tuple		= generic_pkt_to_tuple,
-	.invert_tuple		= generic_invert_tuple,
 	.packet			= generic_packet,
 	.get_timeouts		= generic_get_timeouts,
 	.new			= generic_new,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d049ea5a3770..0bd40eb06b55 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -179,15 +179,6 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
 
 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
 
-/* invert gre part of tuple */
-static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
-			     const struct nf_conntrack_tuple *orig)
-{
-	tuple->dst.u.gre.key = orig->src.u.gre.key;
-	tuple->src.u.gre.key = orig->dst.u.gre.key;
-	return true;
-}
-
 /* gre hdr info to tuple */
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 			     struct net *net, struct nf_conntrack_tuple *tuple)
@@ -356,7 +347,6 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
 	.l3proto	 = AF_INET,
 	.l4proto	 = IPPROTO_GRE,
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
-	.invert_tuple	 = gre_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
 #endif
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fb9a35d16069..148957a5cf3e 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -166,14 +166,6 @@ static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	return true;
 }
 
-static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			      const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u.sctp.port = orig->dst.u.sctp.port;
-	tuple->dst.u.sctp.port = orig->src.u.sctp.port;
-	return true;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -781,7 +773,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_SCTP,
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
-	.invert_tuple 		= sctp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
 #endif
@@ -818,7 +809,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_SCTP,
 	.pkt_to_tuple 		= sctp_pkt_to_tuple,
-	.invert_tuple 		= sctp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
 #endif
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 8e67910185a0..03cff1e3066a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -293,14 +293,6 @@ static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 	return true;
 }
 
-static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			     const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
-	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
-	return true;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -1560,7 +1552,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
 	.l3proto		= PF_INET,
 	.l4proto 		= IPPROTO_TCP,
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
-	.invert_tuple 		= tcp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
 #endif
@@ -1598,7 +1589,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
 	.l3proto		= PF_INET6,
 	.l4proto 		= IPPROTO_TCP,
 	.pkt_to_tuple 		= tcp_pkt_to_tuple,
-	.invert_tuple 		= tcp_invert_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
 #endif
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index fe7243970aa4..6fe2233c323a 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -55,14 +55,6 @@ static bool udp_pkt_to_tuple(const struct sk_buff *skb,
 	return true;
 }
 
-static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			     const struct nf_conntrack_tuple *orig)
-{
-	tuple->src.u.udp.port = orig->dst.u.udp.port;
-	tuple->dst.u.udp.port = orig->src.u.udp.port;
-	return true;
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
 	return udp_pernet(net)->timeouts;
@@ -302,7 +294,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
 	.l4proto		= IPPROTO_UDP,
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -334,7 +325,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
 	.l4proto		= IPPROTO_UDPLITE,
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -366,7 +356,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
 	.l4proto		= IPPROTO_UDP,
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
@@ -398,7 +387,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 	.l4proto		= IPPROTO_UDPLITE,
 	.allow_clash		= true,
 	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
 	.packet			= udp_packet,
 	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
-- 
cgit v1.2.3


From c779e849608a875448f6ffc2a5c2a15523bdcd00 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:50 +0200
Subject: netfilter: conntrack: remove get_timeout() indirection

Not needed, we can have the l4trackers fetch it themselvs.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h   |  8 ++------
 include/net/netfilter/nf_conntrack_timeout.h   | 18 ++++--------------
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 16 +++++++++++-----
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 14 ++++++++++----
 net/netfilter/nf_conntrack_core.c              | 16 ++--------------
 net/netfilter/nf_conntrack_proto_dccp.c        | 17 +++++++----------
 net/netfilter/nf_conntrack_proto_generic.c     | 22 ++++++++++++----------
 net/netfilter/nf_conntrack_proto_gre.c         | 14 ++++++++++----
 net/netfilter/nf_conntrack_proto_sctp.c        | 18 ++++++++----------
 net/netfilter/nf_conntrack_proto_tcp.c         | 23 +++++++++++------------
 net/netfilter/nf_conntrack_proto_udp.c         | 20 +++++++++++++-------
 net/netfilter/nfnetlink_cttimeout.c            | 12 ++++--------
 12 files changed, 94 insertions(+), 104 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 6a55e337a161..c7a0075d96df 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -45,13 +45,12 @@ struct nf_conntrack_l4proto {
 	int (*packet)(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts);
+		      enum ip_conntrack_info ctinfo);
 
 	/* Called when a new connection for this protocol found;
 	 * returns TRUE if it's OK.  If so, packet() called next. */
 	bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts);
+		    unsigned int dataoff);
 
 	/* Called when a conntrack entry is destroyed */
 	void (*destroy)(struct nf_conn *ct);
@@ -63,9 +62,6 @@ struct nf_conntrack_l4proto {
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
 
-	/* Return the array of timeouts for this protocol. */
-	unsigned int *(*get_timeouts)(struct net *net);
-
 	/* convert protoinfo to nfnetink attributes */
 	int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla,
 			 struct nf_conn *ct);
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 9468ab4ad12d..80ceb3d0291d 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -67,27 +67,17 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
 #endif
 };
 
-static inline unsigned int *
-nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
-		     const struct nf_conntrack_l4proto *l4proto)
+static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
 {
+	unsigned int *timeouts = NULL;
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	struct nf_conn_timeout *timeout_ext;
-	unsigned int *timeouts;
 
 	timeout_ext = nf_ct_timeout_find(ct);
-	if (timeout_ext) {
+	if (timeout_ext)
 		timeouts = nf_ct_timeout_data(timeout_ext);
-		if (unlikely(!timeouts))
-			timeouts = l4proto->get_timeouts(net);
-	} else {
-		timeouts = l4proto->get_timeouts(net);
-	}
-
-	return timeouts;
-#else
-	return l4proto->get_timeouts(net);
 #endif
+	return timeouts;
 }
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 34095949a003..036670b38282 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -19,6 +19,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_log.h>
 
@@ -80,12 +81,16 @@ static unsigned int *icmp_get_timeouts(struct net *net)
 static int icmp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned int *timeout)
+		       enum ip_conntrack_info ctinfo)
 {
 	/* Do not immediately delete the connection after the first
 	   successful reply to avoid excessive conntrackd traffic
 	   and also to handle correctly ICMP echo reply duplicates. */
+	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = icmp_get_timeouts(nf_ct_net(ct));
+
 	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
 
 	return NF_ACCEPT;
@@ -93,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff, unsigned int *timeouts)
+		     unsigned int dataoff)
 {
 	static const u_int8_t valid_new[] = {
 		[ICMP_ECHO] = 1,
@@ -280,9 +285,11 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
 	struct nf_icmp_net *in = icmp_pernet(net);
 
 	if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+		if (!timeout)
+			timeout = &in->timeout;
 		*timeout =
 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
-	} else {
+	} else if (timeout) {
 		/* Set default ICMP timeout. */
 		*timeout = in->timeout;
 	}
@@ -357,7 +364,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 	.pkt_to_tuple		= icmp_pkt_to_tuple,
 	.invert_tuple		= icmp_invert_tuple,
 	.packet			= icmp_packet,
-	.get_timeouts		= icmp_get_timeouts,
 	.new			= icmp_new,
 	.error			= icmp_error,
 	.destroy		= NULL,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 8bcbc2f15bd5..bed07b998a10 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 #include <net/netfilter/nf_log.h>
@@ -93,9 +94,13 @@ static unsigned int *icmpv6_get_timeouts(struct net *net)
 static int icmpv6_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned int *timeout)
+		       enum ip_conntrack_info ctinfo)
 {
+	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = icmpv6_get_timeouts(nf_ct_net(ct));
+
 	/* Do not immediately delete the connection after the first
 	   successful reply to avoid excessive conntrackd traffic
 	   and also to handle correctly ICMP echo reply duplicates. */
@@ -106,7 +111,7 @@ static int icmpv6_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
-		       unsigned int dataoff, unsigned int *timeouts)
+		       unsigned int dataoff)
 {
 	static const u_int8_t valid_new[] = {
 		[ICMPV6_ECHO_REQUEST - 128] = 1,
@@ -280,6 +285,8 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
 	unsigned int *timeout = data;
 	struct nf_icmp_net *in = icmpv6_pernet(net);
 
+	if (!timeout)
+		timeout = icmpv6_get_timeouts(net);
 	if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
 		*timeout =
 		    ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
@@ -358,7 +365,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
 	.invert_tuple		= icmpv6_invert_tuple,
 	.packet			= icmpv6_packet,
-	.get_timeouts		= icmpv6_get_timeouts,
 	.new			= icmpv6_new,
 	.error			= icmpv6_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 994591fd9b96..c069f2faff4c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1337,7 +1337,6 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	const struct nf_conntrack_zone *zone;
 	struct nf_conn_timeout *timeout_ext;
 	struct nf_conntrack_zone tmp;
-	unsigned int *timeouts;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
 		pr_debug("Can't invert tuple.\n");
@@ -1356,15 +1355,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	}
 
 	timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
-	if (timeout_ext) {
-		timeouts = nf_ct_timeout_data(timeout_ext);
-		if (unlikely(!timeouts))
-			timeouts = l4proto->get_timeouts(net);
-	} else {
-		timeouts = l4proto->get_timeouts(net);
-	}
 
-	if (!l4proto->new(ct, skb, dataoff, timeouts)) {
+	if (!l4proto->new(ct, skb, dataoff)) {
 		nf_conntrack_free(ct);
 		pr_debug("can't track with proto module\n");
 		return NULL;
@@ -1493,7 +1485,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conn *ct, *tmpl;
 	enum ip_conntrack_info ctinfo;
-	unsigned int *timeouts;
 	u_int8_t protonum;
 	int dataoff, ret;
 
@@ -1552,10 +1543,7 @@ repeat:
 		goto out;
 	}
 
-	/* Decide what timeout policy we want to apply to this flow. */
-	timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
-
-	ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts);
+	ret = l4proto->packet(ct, skb, dataoff, ctinfo);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index abfdce7baed5..f476d116c816 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 
 /* Timeouts are based on values from RFC4340:
@@ -389,7 +390,7 @@ static inline struct nf_dccp_net *dccp_pernet(struct net *net)
 }
 
 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff, unsigned int *timeouts)
+		     unsigned int dataoff)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_dccp_net *dn;
@@ -437,19 +438,14 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh)
 		     ntohl(dhack->dccph_ack_nr_low);
 }
 
-static unsigned int *dccp_get_timeouts(struct net *net)
-{
-	return dccp_pernet(net)->dccp_timeout;
-}
-
 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
-		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
-		       unsigned int *timeouts)
+		       unsigned int dataoff, enum ip_conntrack_info ctinfo)
 {
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	struct dccp_hdr _dh, *dh;
 	u_int8_t type, old_state, new_state;
 	enum ct_dccp_roles role;
+	unsigned int *timeouts;
 
 	dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
 	BUG_ON(dh == NULL);
@@ -523,6 +519,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	if (new_state != old_state)
 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
 
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
 	return NF_ACCEPT;
@@ -843,7 +842,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
 	.l4proto		= IPPROTO_DCCP,
 	.new			= dccp_new,
 	.packet			= dccp_packet,
-	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
@@ -877,7 +875,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
 	.l4proto		= IPPROTO_DCCP,
 	.new			= dccp_new,
 	.packet			= dccp_packet,
-	.get_timeouts		= dccp_get_timeouts,
 	.error			= dccp_error,
 	.can_early_drop		= dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 4dfe40aa9446..ac4a0b296dcd 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -11,6 +11,7 @@
 #include <linux/timer.h>
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 
 static const unsigned int nf_ct_generic_timeout = 600*HZ;
 
@@ -41,25 +42,24 @@ static bool generic_pkt_to_tuple(const struct sk_buff *skb,
 	return true;
 }
 
-static unsigned int *generic_get_timeouts(struct net *net)
-{
-	return &(generic_pernet(net)->timeout);
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int generic_packet(struct nf_conn *ct,
 			  const struct sk_buff *skb,
 			  unsigned int dataoff,
-			  enum ip_conntrack_info ctinfo,
-			  unsigned int *timeout)
+			  enum ip_conntrack_info ctinfo)
 {
+	const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = &generic_pernet(nf_ct_net(ct))->timeout;
+
 	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
 	return NF_ACCEPT;
 }
 
 /* Called when a new connection for this protocol found. */
 static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
-			unsigned int dataoff, unsigned int *timeouts)
+			unsigned int dataoff)
 {
 	bool ret;
 
@@ -78,8 +78,11 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
 					 struct net *net, void *data)
 {
-	unsigned int *timeout = data;
 	struct nf_generic_net *gn = generic_pernet(net);
+	unsigned int *timeout = data;
+
+	if (!timeout)
+		timeout = &gn->timeout;
 
 	if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT])
 		*timeout =
@@ -160,7 +163,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 	.l4proto		= 255,
 	.pkt_to_tuple		= generic_pkt_to_tuple,
 	.packet			= generic_packet,
-	.get_timeouts		= generic_get_timeouts,
 	.new			= generic_new,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
 	.ctnl_timeout		= {
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 0bd40eb06b55..d1632252bf5b 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -39,6 +39,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
@@ -234,8 +235,7 @@ static unsigned int *gre_get_timeouts(struct net *net)
 static int gre_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts)
+		      enum ip_conntrack_info ctinfo)
 {
 	/* If we've seen traffic both ways, this is a GRE connection.
 	 * Extend timeout. */
@@ -254,8 +254,13 @@ static int gre_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts)
+		    unsigned int dataoff)
 {
+	unsigned int *timeouts = nf_ct_timeout_lookup(ct);
+
+	if (!timeouts)
+		timeouts = gre_get_timeouts(nf_ct_net(ct));
+
 	pr_debug(": ");
 	nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 
@@ -291,6 +296,8 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
 	unsigned int *timeouts = data;
 	struct netns_proto_gre *net_gre = gre_pernet(net);
 
+	if (!timeouts)
+		timeouts = gre_get_timeouts(net);
 	/* set default timeouts for GRE. */
 	timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
 	timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
@@ -350,7 +357,6 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
 #endif
-	.get_timeouts    = gre_get_timeouts,
 	.packet		 = gre_packet,
 	.new		 = gre_new,
 	.destroy	 = gre_destroy,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b4126a842bfd..8d1e085fc14a 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 
 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
    closely.  They're more complex. --RR
@@ -272,17 +273,11 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
 	return sctp_conntracks[dir][i][cur_state];
 }
 
-static unsigned int *sctp_get_timeouts(struct net *net)
-{
-	return sctp_pernet(net)->timeouts;
-}
-
 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */
 static int sctp_packet(struct nf_conn *ct,
 		       const struct sk_buff *skb,
 		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned int *timeouts)
+		       enum ip_conntrack_info ctinfo)
 {
 	enum sctp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -291,6 +286,7 @@ static int sctp_packet(struct nf_conn *ct,
 	const struct sctp_chunkhdr *sch;
 	struct sctp_chunkhdr _sch;
 	u_int32_t offset, count;
+	unsigned int *timeouts;
 	unsigned long map[256 / sizeof(unsigned long)] = { 0 };
 
 	sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph);
@@ -379,6 +375,10 @@ static int sctp_packet(struct nf_conn *ct,
 	}
 	spin_unlock_bh(&ct->lock);
 
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
 	if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
@@ -399,7 +399,7 @@ out:
 
 /* Called when a new connection for this protocol found. */
 static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff, unsigned int *timeouts)
+		     unsigned int dataoff)
 {
 	enum sctp_conntrack new_state;
 	const struct sctphdr *sh;
@@ -760,7 +760,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
 	.print_conntrack	= sctp_print_conntrack,
 #endif
 	.packet 		= sctp_packet,
-	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
 	.error			= sctp_error,
 	.can_early_drop		= sctp_can_early_drop,
@@ -795,7 +794,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
 	.print_conntrack	= sctp_print_conntrack,
 #endif
 	.packet 		= sctp_packet,
-	.get_timeouts		= sctp_get_timeouts,
 	.new 			= sctp_new,
 	.error			= sctp_error,
 	.can_early_drop		= sctp_can_early_drop,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 13c89fd107b2..d80d322b9d8b 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -29,6 +29,7 @@
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -768,27 +769,21 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	return NF_ACCEPT;
 }
 
-static unsigned int *tcp_get_timeouts(struct net *net)
-{
-	return tcp_pernet(net)->timeouts;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int tcp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts)
+		      enum ip_conntrack_info ctinfo)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_tcp_net *tn = tcp_pernet(net);
 	struct nf_conntrack_tuple *tuple;
 	enum tcp_conntrack new_state, old_state;
+	unsigned int index, *timeouts;
 	enum ip_conntrack_dir dir;
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
 	unsigned long timeout;
-	unsigned int index;
 
 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
 	BUG_ON(th == NULL);
@@ -1021,6 +1016,10 @@ static int tcp_packet(struct nf_conn *ct,
 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
 
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = tn->timeouts;
+
 	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
 	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
@@ -1070,7 +1069,7 @@ static int tcp_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts)
+		    unsigned int dataoff)
 {
 	enum tcp_conntrack new_state;
 	const struct tcphdr *th;
@@ -1288,10 +1287,12 @@ static unsigned int tcp_nlattr_tuple_size(void)
 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				     struct net *net, void *data)
 {
-	unsigned int *timeouts = data;
 	struct nf_tcp_net *tn = tcp_pernet(net);
+	unsigned int *timeouts = data;
 	int i;
 
+	if (!timeouts)
+		timeouts = tn->timeouts;
 	/* set default TCP timeouts. */
 	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
 		timeouts[i] = tn->timeouts[i];
@@ -1538,7 +1539,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
 	.print_conntrack 	= tcp_print_conntrack,
 #endif
 	.packet 		= tcp_packet,
-	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
 	.error			= tcp_error,
 	.can_early_drop		= tcp_can_early_drop,
@@ -1574,7 +1574,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
 	.print_conntrack 	= tcp_print_conntrack,
 #endif
 	.packet 		= tcp_packet,
-	.get_timeouts		= tcp_get_timeouts,
 	.new 			= tcp_new,
 	.error			= tcp_error,
 	.can_early_drop		= tcp_can_early_drop,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8b435d70ffe3..7a1b8988a931 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -22,6 +22,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -45,9 +46,14 @@ static unsigned int *udp_get_timeouts(struct net *net)
 static int udp_packet(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned int *timeouts)
+		      enum ip_conntrack_info ctinfo)
 {
+	unsigned int *timeouts;
+
+	timeouts = nf_ct_timeout_lookup(ct);
+	if (!timeouts)
+		timeouts = udp_get_timeouts(nf_ct_net(ct));
+
 	/* If we've seen traffic both ways, this is some kind of UDP
 	   stream.  Extend timeout. */
 	if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
@@ -65,7 +71,7 @@ static int udp_packet(struct nf_conn *ct,
 
 /* Called when a new connection for this protocol found. */
 static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff, unsigned int *timeouts)
+		    unsigned int dataoff)
 {
 	return true;
 }
@@ -176,6 +182,9 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
 	unsigned int *timeouts = data;
 	struct nf_udp_net *un = udp_pernet(net);
 
+	if (!timeouts)
+		timeouts = un->timeouts;
+
 	/* set default timeouts for UDP. */
 	timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED];
 	timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED];
@@ -275,7 +284,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
 	.l4proto		= IPPROTO_UDP,
 	.allow_clash		= true,
 	.packet			= udp_packet,
-	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
 	.error			= udp_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -305,7 +313,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
 	.l4proto		= IPPROTO_UDPLITE,
 	.allow_clash		= true,
 	.packet			= udp_packet,
-	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
 	.error			= udplite_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -335,7 +342,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
 	.l4proto		= IPPROTO_UDP,
 	.allow_clash		= true,
 	.packet			= udp_packet,
-	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
 	.error			= udp_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -365,7 +371,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 	.l4proto		= IPPROTO_UDPLITE,
 	.allow_clash		= true,
 	.packet			= udp_packet,
-	.get_timeouts		= udp_get_timeouts,
 	.new			= udp_new,
 	.error			= udplite_error,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
@@ -388,3 +393,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
 #endif
+#include <net/netfilter/nf_conntrack_timeout.h>
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 9da4b8462004..d9d952fad3e0 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -46,7 +46,7 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
 };
 
 static int
-ctnl_timeout_parse_policy(void *timeouts,
+ctnl_timeout_parse_policy(void *timeout,
 			  const struct nf_conntrack_l4proto *l4proto,
 			  struct net *net, const struct nlattr *attr)
 {
@@ -67,7 +67,7 @@ ctnl_timeout_parse_policy(void *timeouts,
 	if (ret < 0)
 		goto err;
 
-	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout);
 
 err:
 	kfree(tb);
@@ -372,7 +372,6 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 				 struct netlink_ext_ack *extack)
 {
 	const struct nf_conntrack_l4proto *l4proto;
-	unsigned int *timeouts;
 	__u16 l3num;
 	__u8 l4num;
 	int ret;
@@ -392,9 +391,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 		goto err;
 	}
 
-	timeouts = l4proto->get_timeouts(net);
-
-	ret = ctnl_timeout_parse_policy(timeouts, l4proto, net,
+	ret = ctnl_timeout_parse_policy(NULL, l4proto, net,
 					cda[CTA_TIMEOUT_DATA]);
 	if (ret < 0)
 		goto err;
@@ -431,7 +428,6 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 
 	if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
 		struct nlattr *nest_parms;
-		unsigned int *timeouts = l4proto->get_timeouts(net);
 		int ret;
 
 		nest_parms = nla_nest_start(skb,
@@ -439,7 +435,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 		if (!nest_parms)
 			goto nla_put_failure;
 
-		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
+		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
 		if (ret < 0)
 			goto nla_put_failure;
 
-- 
cgit v1.2.3


From d1b47a7c9efcf3c3384b70f6e3c8f1423b44d8c7 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Mon, 16 Jul 2018 11:16:30 -0400
Subject: mm: don't do zero_resv_unavail if memmap is not allocated

Moving zero_resv_unavail before memmap_init_zone(), caused a regression on
x86-32.

The cause is that we access struct pages before they are allocated when
CONFIG_FLAT_NODE_MEM_MAP is used.

free_area_init_nodes()
  zero_resv_unavail()
    mm_zero_struct_page(pfn_to_page(pfn)); <- struct page is not alloced
  free_area_init_node()
    if CONFIG_FLAT_NODE_MEM_MAP
      alloc_node_mem_map()
        memblock_virt_alloc_node_nopanic() <- struct page alloced here

On the other hand memblock_virt_alloc_node_nopanic() zeroes all the memory
that it returns, so we do not need to do zero_resv_unavail() here.

Fixes: e181ae0c5db9 ("mm: zero unavailable pages before memmap init")
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Matt Hart <matt@mattface.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 mm/page_alloc.c    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a0fbb9ffe380..3982c83fdcbf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2132,7 +2132,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
 					struct mminit_pfnnid_cache *state);
 #endif
 
-#ifdef CONFIG_HAVE_MEMBLOCK
+#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
 void zero_resv_unavail(void);
 #else
 static inline void zero_resv_unavail(void) {}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5d800d61ddb7..a790ef4be74e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6383,7 +6383,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 	free_area_init_core(pgdat);
 }
 
-#ifdef CONFIG_HAVE_MEMBLOCK
+#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
 /*
  * Only struct pages that are backed by physical memory are zeroed and
  * initialized by going through __init_single_page(). But, there are some
@@ -6421,7 +6421,7 @@ void __paginginit zero_resv_unavail(void)
 	if (pgcnt)
 		pr_info("Reserved but unavailable: %lld pages", pgcnt);
 }
-#endif /* CONFIG_HAVE_MEMBLOCK */
+#endif /* CONFIG_HAVE_MEMBLOCK && !CONFIG_FLAT_NODE_MEM_MAP */
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 
-- 
cgit v1.2.3


From 6e2059b53f9885f202b086d7b4ca10a98926e974 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 10 Jul 2018 22:41:26 +0800
Subject: ipv4/igmp: init group mode as INCLUDE when join source group

Based on RFC3376 5.1
   If no interface
   state existed for that multicast address before the change (i.e., the
   change consisted of creating a new per-interface record), or if no
   state exists after the change (i.e., the change consisted of deleting
   a per-interface record), then the "non-existent" state is considered
   to have a filter mode of INCLUDE and an empty source list.

Which means a new multicast group should start with state IN().

Function ip_mc_join_group() works correctly for IGMP ASM(Any-Source Multicast)
mode. It adds a group with state EX() and inits crcount to mc_qrv,
so the kernel will send a TO_EX() report message after adding group.

But for IGMPv3 SSM(Source-specific multicast) JOIN_SOURCE_GROUP mode, we
split the group joining into two steps. First we join the group like ASM,
i.e. via ip_mc_join_group(). So the state changes from IN() to EX().

Then we add the source-specific address with INCLUDE mode. So the state
changes from EX() to IN(A).

Before the first step sends a group change record, we finished the second
step. So we will only send the second change record. i.e. TO_IN(A).

Regarding the RFC stands, we should actually send an ALLOW(A) message for
SSM JOIN_SOURCE_GROUP as the state should mimic the 'IN() to IN(A)'
transition.

The issue was exposed by commit a052517a8ff65 ("net/multicast: should not
send source list records when have filter mode change"). Before this change,
we used to send both ALLOW(A) and TO_IN(A). After this change we only send
TO_IN(A).

Fix it by adding a new parameter to init group mode. Also add new wrapper
functions so we don't need to change too much code.

v1 -> v2:
In my first version I only cleared the group change record. But this is not
enough. Because when a new group join, it will init as EXCLUDE and trigger
an filter mode change in ip/ip6_mc_add_src(), which will clear all source
addresses' sf_crcount. This will prevent early joined address sending state
change records if multi source addressed joined at the same time.

In v2 patch, I fixed it by directly initializing the mode to INCLUDE for SSM
JOIN_SOURCE_GROUP. I also split the original patch into two separated patches
for IPv4 and IPv6.

Fixes: a052517a8ff65 ("net/multicast: should not send source list records when have filter mode change")
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h   |  2 ++
 net/ipv4/igmp.c        | 58 ++++++++++++++++++++++++++++++++++++--------------
 net/ipv4/ip_sockglue.c |  4 ++--
 3 files changed, 46 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index f8231854b5d6..119f53941c12 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -109,6 +109,8 @@ struct ip_mc_list {
 extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto);
 extern int igmp_rcv(struct sk_buff *);
 extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
+extern int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
+				unsigned int mode);
 extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr);
 extern void ip_mc_drop_socket(struct sock *sk);
 extern int ip_mc_source(int add, int omode, struct sock *sk,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 85b617b655bc..b3c899a630a0 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1200,13 +1200,14 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	spin_lock_bh(&im->lock);
 	if (pmc) {
 		im->interface = pmc->interface;
-		im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 		im->sfmode = pmc->sfmode;
 		if (pmc->sfmode == MCAST_INCLUDE) {
 			im->tomb = pmc->tomb;
 			im->sources = pmc->sources;
 			for (psf = im->sources; psf; psf = psf->sf_next)
-				psf->sf_crcount = im->crcount;
+				psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+		} else {
+			im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
 		}
 		in_dev_put(pmc->interface);
 		kfree(pmc);
@@ -1288,7 +1289,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
 #endif
 }
 
-static void igmp_group_added(struct ip_mc_list *im)
+static void igmp_group_added(struct ip_mc_list *im, unsigned int mode)
 {
 	struct in_device *in_dev = im->interface;
 #ifdef CONFIG_IP_MULTICAST
@@ -1316,7 +1317,13 @@ static void igmp_group_added(struct ip_mc_list *im)
 	}
 	/* else, v3 */
 
-	im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+	/* Based on RFC3376 5.1, for newly added INCLUDE SSM, we should
+	 * not send filter-mode change record as the mode should be from
+	 * IN() to IN(A).
+	 */
+	if (mode == MCAST_EXCLUDE)
+		im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+
 	igmp_ifc_event(in_dev);
 #endif
 }
@@ -1381,8 +1388,7 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
 /*
  *	A socket has joined a multicast group on device dev.
  */
-
-void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode)
 {
 	struct ip_mc_list *im;
 #ifdef CONFIG_IP_MULTICAST
@@ -1394,7 +1400,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	for_each_pmc_rtnl(in_dev, im) {
 		if (im->multiaddr == addr) {
 			im->users++;
-			ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
+			ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
 			goto out;
 		}
 	}
@@ -1408,8 +1414,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	in_dev_hold(in_dev);
 	im->multiaddr = addr;
 	/* initial mode is (EX, empty) */
-	im->sfmode = MCAST_EXCLUDE;
-	im->sfcount[MCAST_EXCLUDE] = 1;
+	im->sfmode = mode;
+	im->sfcount[mode] = 1;
 	refcount_set(&im->refcnt, 1);
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
@@ -1426,12 +1432,17 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 #ifdef CONFIG_IP_MULTICAST
 	igmpv3_del_delrec(in_dev, im);
 #endif
-	igmp_group_added(im);
+	igmp_group_added(im, mode);
 	if (!in_dev->dead)
 		ip_rt_multicast_event(in_dev);
 out:
 	return;
 }
+
+void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+{
+	__ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE);
+}
 EXPORT_SYMBOL(ip_mc_inc_group);
 
 static int ip_mc_check_iphdr(struct sk_buff *skb)
@@ -1688,7 +1699,7 @@ void ip_mc_remap(struct in_device *in_dev)
 #ifdef CONFIG_IP_MULTICAST
 		igmpv3_del_delrec(in_dev, pmc);
 #endif
-		igmp_group_added(pmc);
+		igmp_group_added(pmc, pmc->sfmode);
 	}
 }
 
@@ -1751,7 +1762,7 @@ void ip_mc_up(struct in_device *in_dev)
 #ifdef CONFIG_IP_MULTICAST
 		igmpv3_del_delrec(in_dev, pmc);
 #endif
-		igmp_group_added(pmc);
+		igmp_group_added(pmc, pmc->sfmode);
 	}
 }
 
@@ -2130,8 +2141,8 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
 
 /* Join a multicast group
  */
-
-int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
+			      unsigned int mode)
 {
 	__be32 addr = imr->imr_multiaddr.s_addr;
 	struct ip_mc_socklist *iml, *i;
@@ -2172,15 +2183,30 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
 	memcpy(&iml->multi, imr, sizeof(*imr));
 	iml->next_rcu = inet->mc_list;
 	iml->sflist = NULL;
-	iml->sfmode = MCAST_EXCLUDE;
+	iml->sfmode = mode;
 	rcu_assign_pointer(inet->mc_list, iml);
-	ip_mc_inc_group(in_dev, addr);
+	__ip_mc_inc_group(in_dev, addr, mode);
 	err = 0;
 done:
 	return err;
 }
+
+/* Join ASM (Any-Source Multicast) group
+ */
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+{
+	return __ip_mc_join_group(sk, imr, MCAST_EXCLUDE);
+}
 EXPORT_SYMBOL(ip_mc_join_group);
 
+/* Join SSM (Source-Specific Multicast) group
+ */
+int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
+			 unsigned int mode)
+{
+	return __ip_mc_join_group(sk, imr, mode);
+}
+
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 			   struct in_device *in_dev)
 {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fc32fdbeefa6..64c76dcf7386 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -984,7 +984,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
 			mreq.imr_address.s_addr = mreqs.imr_interface;
 			mreq.imr_ifindex = 0;
-			err = ip_mc_join_group(sk, &mreq);
+			err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
 			if (err && err != -EADDRINUSE)
 				break;
 			omode = MCAST_INCLUDE;
@@ -1061,7 +1061,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			mreq.imr_multiaddr = psin->sin_addr;
 			mreq.imr_address.s_addr = 0;
 			mreq.imr_ifindex = greqs.gsr_interface;
-			err = ip_mc_join_group(sk, &mreq);
+			err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
 			if (err && err != -EADDRINUSE)
 				break;
 			greqs.gsr_interface = mreq.imr_ifindex;
-- 
cgit v1.2.3


From c7ea20c9da5b94e400c8dcc0adb99411f2e430a6 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 10 Jul 2018 22:41:27 +0800
Subject: ipv6/mcast: init as INCLUDE when join SSM INCLUDE group

This an IPv6 version patch of "ipv4/igmp: init group mode as INCLUDE when
join source group". From RFC3810, part 6.1:

   If no per-interface state existed for that
   multicast address before the change (i.e., the change consisted of
   creating a new per-interface record), or if no state exists after the
   change (i.e., the change consisted of deleting a per-interface
   record), then the "non-existent" state is considered to have an
   INCLUDE filter mode and an empty source list.

Which means a new multicast group should start with state IN(). Currently,
for MLDv2 SSM JOIN_SOURCE_GROUP mode, we first call ipv6_sock_mc_join(),
then ip6_mc_source(), which will trigger a TO_IN() message instead of
ALLOW().

The issue was exposed by commit a052517a8ff65 ("net/multicast: should not
send source list records when have filter mode change"). Before this change,
we sent both ALLOW(A) and TO_IN(A). Now, we only send TO_IN(A).

Fix it by adding a new parameter to init group mode. Also add some wrapper
functions to avoid changing too much code.

v1 -> v2:
In the first version I only cleared the group change record. But this is not
enough. Because when a new group join, it will init as EXCLUDE and trigger
a filter mode change in ip/ip6_mc_add_src(), which will clear all source
addresses sf_crcount. This will prevent early joined address sending state
change records if multi source addressed joined at the same time.

In v2 patch, I fixed it by directly initializing the mode to INCLUDE for SSM
JOIN_SOURCE_GROUP. I also split the original patch into two separated patches
for IPv4 and IPv6.

There is also a difference between v4 and v6 version. For IPv6, when the
interface goes down and up, we will send correct state change record with
unspecified IPv6 address (::) with function ipv6_mc_up(). But after DAD is
completed, we resend the change record TO_IN() in mld_send_initial_cr().
Fix it by sending ALLOW() for INCLUDE mode in mld_send_initial_cr().

Fixes: a052517a8ff65 ("net/multicast: should not send source list records when have filter mode change")
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h       |  2 ++
 net/ipv6/ipv6_sockglue.c |  5 ++--
 net/ipv6/mcast.c         | 64 ++++++++++++++++++++++++++++++++++--------------
 3 files changed, 50 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d02881e4ad1f..7528632bcf2a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1100,6 +1100,8 @@ void ipv6_sysctl_unregister(void);
 
 int ipv6_sock_mc_join(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
+int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
+			  const struct in6_addr *addr, unsigned int mode);
 int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
 #endif /* _NET_IPV6_H */
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c95c3486d904..568ca4187cd1 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -729,8 +729,9 @@ done:
 			struct sockaddr_in6 *psin6;
 
 			psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
-			retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
-						 &psin6->sin6_addr);
+			retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface,
+						     &psin6->sin6_addr,
+						     MCAST_INCLUDE);
 			/* prior join w/ different source is ok */
 			if (retv && retv != -EADDRINUSE)
 				break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index c0c74088f2af..2699be7202be 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -95,6 +95,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
 			  int delta);
 static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 			    struct inet6_dev *idev);
+static int __ipv6_dev_mc_inc(struct net_device *dev,
+			     const struct in6_addr *addr, unsigned int mode);
 
 #define MLD_QRV_DEFAULT		2
 /* RFC3810, 9.2. Query Interval */
@@ -132,7 +134,8 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
 	return iv > 0 ? iv : 1;
 }
 
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
+			       const struct in6_addr *addr, unsigned int mode)
 {
 	struct net_device *dev = NULL;
 	struct ipv6_mc_socklist *mc_lst;
@@ -179,7 +182,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	}
 
 	mc_lst->ifindex = dev->ifindex;
-	mc_lst->sfmode = MCAST_EXCLUDE;
+	mc_lst->sfmode = mode;
 	rwlock_init(&mc_lst->sflock);
 	mc_lst->sflist = NULL;
 
@@ -187,7 +190,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	 *	now add/increase the group membership on the device
 	 */
 
-	err = ipv6_dev_mc_inc(dev, addr);
+	err = __ipv6_dev_mc_inc(dev, addr, mode);
 
 	if (err) {
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
@@ -199,8 +202,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	return 0;
 }
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+	return __ipv6_sock_mc_join(sk, ifindex, addr, MCAST_EXCLUDE);
+}
 EXPORT_SYMBOL(ipv6_sock_mc_join);
 
+int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
+			  const struct in6_addr *addr, unsigned int mode)
+{
+	return __ipv6_sock_mc_join(sk, ifindex, addr, mode);
+}
+
 /*
  *	socket leave on multicast group
  */
@@ -646,7 +660,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 	return rv;
 }
 
-static void igmp6_group_added(struct ifmcaddr6 *mc)
+static void igmp6_group_added(struct ifmcaddr6 *mc, unsigned int mode)
 {
 	struct net_device *dev = mc->idev->dev;
 	char buf[MAX_ADDR_LEN];
@@ -672,7 +686,13 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	}
 	/* else v2 */
 
-	mc->mca_crcount = mc->idev->mc_qrv;
+	/* Based on RFC3810 6.1, for newly added INCLUDE SSM, we
+	 * should not send filter-mode change record as the mode
+	 * should be from IN() to IN(A).
+	 */
+	if (mode == MCAST_EXCLUDE)
+		mc->mca_crcount = mc->idev->mc_qrv;
+
 	mld_ifc_event(mc->idev);
 }
 
@@ -770,13 +790,14 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
 	spin_lock_bh(&im->mca_lock);
 	if (pmc) {
 		im->idev = pmc->idev;
-		im->mca_crcount = idev->mc_qrv;
 		im->mca_sfmode = pmc->mca_sfmode;
 		if (pmc->mca_sfmode == MCAST_INCLUDE) {
 			im->mca_tomb = pmc->mca_tomb;
 			im->mca_sources = pmc->mca_sources;
 			for (psf = im->mca_sources; psf; psf = psf->sf_next)
-				psf->sf_crcount = im->mca_crcount;
+				psf->sf_crcount = idev->mc_qrv;
+		} else {
+			im->mca_crcount = idev->mc_qrv;
 		}
 		in6_dev_put(pmc->idev);
 		kfree(pmc);
@@ -831,7 +852,8 @@ static void ma_put(struct ifmcaddr6 *mc)
 }
 
 static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
-				   const struct in6_addr *addr)
+				   const struct in6_addr *addr,
+				   unsigned int mode)
 {
 	struct ifmcaddr6 *mc;
 
@@ -849,9 +871,8 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
 	refcount_set(&mc->mca_refcnt, 1);
 	spin_lock_init(&mc->mca_lock);
 
-	/* initial mode is (EX, empty) */
-	mc->mca_sfmode = MCAST_EXCLUDE;
-	mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+	mc->mca_sfmode = mode;
+	mc->mca_sfcount[mode] = 1;
 
 	if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
 	    IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
@@ -863,7 +884,8 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
 /*
  *	device multicast group inc (add if not found)
  */
-int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+static int __ipv6_dev_mc_inc(struct net_device *dev,
+			     const struct in6_addr *addr, unsigned int mode)
 {
 	struct ifmcaddr6 *mc;
 	struct inet6_dev *idev;
@@ -887,14 +909,13 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 		if (ipv6_addr_equal(&mc->mca_addr, addr)) {
 			mc->mca_users++;
 			write_unlock_bh(&idev->lock);
-			ip6_mc_add_src(idev, &mc->mca_addr, MCAST_EXCLUDE, 0,
-				NULL, 0);
+			ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0);
 			in6_dev_put(idev);
 			return 0;
 		}
 	}
 
-	mc = mca_alloc(idev, addr);
+	mc = mca_alloc(idev, addr, mode);
 	if (!mc) {
 		write_unlock_bh(&idev->lock);
 		in6_dev_put(idev);
@@ -911,11 +932,16 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 
 	mld_del_delrec(idev, mc);
-	igmp6_group_added(mc);
+	igmp6_group_added(mc, mode);
 	ma_put(mc);
 	return 0;
 }
 
+int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+{
+	return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE);
+}
+
 /*
  *	device multicast group del
  */
@@ -1751,7 +1777,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 
 		psf_next = psf->sf_next;
 
-		if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
+		if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) {
 			psf_prev = psf;
 			continue;
 		}
@@ -2066,7 +2092,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
 		if (pmc->mca_sfcount[MCAST_EXCLUDE])
 			type = MLD2_CHANGE_TO_EXCLUDE;
 		else
-			type = MLD2_CHANGE_TO_INCLUDE;
+			type = MLD2_ALLOW_NEW_SOURCES;
 		skb = add_grec(skb, pmc, type, 0, 0, 1);
 		spin_unlock_bh(&pmc->mca_lock);
 	}
@@ -2546,7 +2572,7 @@ void ipv6_mc_up(struct inet6_dev *idev)
 	ipv6_mc_reset(idev);
 	for (i = idev->mc_list; i; i = i->next) {
 		mld_del_delrec(idev, i);
-		igmp6_group_added(i);
+		igmp6_group_added(i, i->mca_sfmode);
 	}
 	read_unlock_bh(&idev->lock);
 }
-- 
cgit v1.2.3


From 377179cd28cd417dcfb4396edb824533431e607e Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 13 Jul 2018 14:05:56 -0400
Subject: security: define new LSM hook named security_kernel_load_data

Differentiate between the kernel reading a file specified by userspace
from the kernel loading a buffer containing data provided by userspace.
This patch defines a new LSM hook named security_kernel_load_data().

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/linux/lsm_hooks.h |  6 ++++++
 include/linux/security.h  | 27 +++++++++++++++++++++++++++
 security/security.c       |  5 +++++
 3 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 8f1131c8dd54..a08bc2587b96 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -576,6 +576,10 @@
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
+ * @kernel_load_data:
+ *	Load data provided by userspace.
+ *	@id kernel load data identifier
+ *	Return 0 if permission is granted.
  * @kernel_read_file:
  *	Read a file specified by userspace.
  *	@file contains the file structure pointing to the file being read
@@ -1582,6 +1586,7 @@ union security_list_options {
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
+	int (*kernel_load_data)(enum kernel_load_data_id id);
 	int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id);
 	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size,
 				     enum kernel_read_file_id id);
@@ -1872,6 +1877,7 @@ struct security_hook_heads {
 	struct hlist_head cred_getsecid;
 	struct hlist_head kernel_act_as;
 	struct hlist_head kernel_create_files_as;
+	struct hlist_head kernel_load_data;
 	struct hlist_head kernel_read_file;
 	struct hlist_head kernel_post_read_file;
 	struct hlist_head kernel_module_request;
diff --git a/include/linux/security.h b/include/linux/security.h
index 63030c85ee19..3410acfe139c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -159,6 +159,27 @@ extern int mmap_min_addr_handler(struct ctl_table *table, int write,
 typedef int (*initxattrs) (struct inode *inode,
 			   const struct xattr *xattr_array, void *fs_data);
 
+
+/* Keep the kernel_load_data_id enum in sync with kernel_read_file_id */
+#define __data_id_enumify(ENUM, dummy) LOADING_ ## ENUM,
+#define __data_id_stringify(dummy, str) #str,
+
+enum kernel_load_data_id {
+	__kernel_read_file_id(__data_id_enumify)
+};
+
+static const char * const kernel_load_data_str[] = {
+	__kernel_read_file_id(__data_id_stringify)
+};
+
+static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id)
+{
+	if ((unsigned)id >= LOADING_MAX_ID)
+		return kernel_load_data_str[LOADING_UNKNOWN];
+
+	return kernel_load_data_str[id];
+}
+
 #ifdef CONFIG_SECURITY
 
 struct security_mnt_opts {
@@ -320,6 +341,7 @@ void security_cred_getsecid(const struct cred *c, u32 *secid);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
+int security_kernel_load_data(enum kernel_load_data_id id);
 int security_kernel_read_file(struct file *file, enum kernel_read_file_id id);
 int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 				   enum kernel_read_file_id id);
@@ -909,6 +931,11 @@ static inline int security_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
+static inline int security_kernel_load_data(enum kernel_load_data_id id)
+{
+	return 0;
+}
+
 static inline int security_kernel_read_file(struct file *file,
 					    enum kernel_read_file_id id)
 {
diff --git a/security/security.c b/security/security.c
index 68f46d849abe..c2de2f134854 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1056,6 +1056,11 @@ int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 }
 EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
 
+int security_kernel_load_data(enum kernel_load_data_id id)
+{
+	return call_int_hook(kernel_load_data, 0, id);
+}
+
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags)
 {
-- 
cgit v1.2.3


From 16c267aac86b463b1fcccd43c89f4c8e5c5c86fa Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 13 Jul 2018 14:05:58 -0400
Subject: ima: based on policy require signed kexec kernel images

The original kexec_load syscall can not verify file signatures, nor can
the kexec image be measured.  Based on policy, deny the kexec_load
syscall.

Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/linux/ima.h                 |  7 +++++++
 security/integrity/ima/ima.h        |  1 +
 security/integrity/ima/ima_main.c   | 27 +++++++++++++++++++++++++++
 security/integrity/ima/ima_policy.c |  2 ++
 security/security.c                 |  7 ++++++-
 5 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 0e4647e0eb60..84806b54b50a 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -11,6 +11,7 @@
 #define _LINUX_IMA_H
 
 #include <linux/fs.h>
+#include <linux/security.h>
 #include <linux/kexec.h>
 struct linux_binprm;
 
@@ -19,6 +20,7 @@ extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask, int opened);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
+extern int ima_load_data(enum kernel_load_data_id id);
 extern int ima_read_file(struct file *file, enum kernel_read_file_id id);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
 			      enum kernel_read_file_id id);
@@ -49,6 +51,11 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
 	return 0;
 }
 
+static inline int ima_load_data(enum kernel_load_data_id id)
+{
+	return 0;
+}
+
 static inline int ima_read_file(struct file *file, enum kernel_read_file_id id)
 {
 	return 0;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 354bb5716ce3..78c15264b17b 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -232,6 +232,7 @@ int ima_policy_show(struct seq_file *m, void *v);
 #define IMA_APPRAISE_MODULES	0x08
 #define IMA_APPRAISE_FIRMWARE	0x10
 #define IMA_APPRAISE_POLICY	0x20
+#define IMA_APPRAISE_KEXEC	0x40
 
 #ifdef CONFIG_IMA_APPRAISE
 int ima_appraise_measurement(enum ima_hooks func,
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index dca44cf7838e..71fecfef0939 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -496,6 +496,33 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 				   MAY_READ, func, 0);
 }
 
+/**
+ * ima_load_data - appraise decision based on policy
+ * @id: kernel load data caller identifier
+ *
+ * Callers of this LSM hook can not measure, appraise, or audit the
+ * data provided by userspace.  Enforce policy rules requring a file
+ * signature (eg. kexec'ed kernel image).
+ *
+ * For permission return 0, otherwise return -EACCES.
+ */
+int ima_load_data(enum kernel_load_data_id id)
+{
+	if ((ima_appraise & IMA_APPRAISE_ENFORCE) != IMA_APPRAISE_ENFORCE)
+		return 0;
+
+	switch (id) {
+	case LOADING_KEXEC_IMAGE:
+		if (ima_appraise & IMA_APPRAISE_KEXEC) {
+			pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n");
+			return -EACCES;	/* INTEGRITY_UNKNOWN */
+		}
+	default:
+		break;
+	}
+	return 0;
+}
+
 static int __init init_ima(void)
 {
 	int error;
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index cdcc9a7b4e24..d5b4958decc5 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -448,6 +448,8 @@ static int ima_appraise_flag(enum ima_hooks func)
 		return IMA_APPRAISE_FIRMWARE;
 	else if (func == POLICY_CHECK)
 		return IMA_APPRAISE_POLICY;
+	else if (func == KEXEC_KERNEL_CHECK)
+		return IMA_APPRAISE_KEXEC;
 	return 0;
 }
 
diff --git a/security/security.c b/security/security.c
index c2de2f134854..4927e7cc7d96 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1058,7 +1058,12 @@ EXPORT_SYMBOL_GPL(security_kernel_post_read_file);
 
 int security_kernel_load_data(enum kernel_load_data_id id)
 {
-	return call_int_hook(kernel_load_data, 0, id);
+	int ret;
+
+	ret = call_int_hook(kernel_load_data, 0, id);
+	if (ret)
+		return ret;
+	return ima_load_data(id);
 }
 
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
-- 
cgit v1.2.3


From 964d0fbf6301d3dc8dfad19ffab5a06d002d27f1 Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Fri, 6 Jul 2018 14:16:54 -0400
Subject: drm/amdgpu: Allow to create BO lists in CS ioctl v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change is to support MESA performace optimization.
Modify CS IOCTL to allow its input as command buffer and an array of
buffer handles to create a temporay bo list and then destroy it
when IOCTL completes.
This saves on calling for BO_LIST create and destry IOCTLs in MESA
and by this improves performance.

v2: Avoid inserting the temp list into idr struct.

v3:
Remove idr alloation from amdgpu_bo_list_create.
Remove useless argument from amdgpu_cs_parser_fini
Minor cosmetic stuff.

v4: Revert amdgpu_bo_list_destroy back to static

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  8 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 88 +++++++++++++++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 48 +++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
 include/uapi/drm/amdgpu_drm.h               |  1 +
 5 files changed, 108 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 53435da158c2..c6c1e8dc919f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -732,6 +732,14 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 			     struct list_head *validated);
 void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
 void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+				      struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+				 struct drm_file *filp,
+				 struct drm_amdgpu_bo_list_entry *info,
+				 unsigned num_entries,
+				 struct amdgpu_bo_list **list);
 
 /*
  * GFX stuff
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 92be7f6de197..7679c068c89a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -55,15 +55,15 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
 	kfree_rcu(list, rhead);
 }
 
-static int amdgpu_bo_list_create(struct amdgpu_device *adev,
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
 				 struct drm_file *filp,
 				 struct drm_amdgpu_bo_list_entry *info,
 				 unsigned num_entries,
-				 int *id)
+				 struct amdgpu_bo_list **list_out)
 {
-	int r;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	struct amdgpu_bo_list *list;
+	int r;
+
 
 	list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
 	if (!list)
@@ -78,16 +78,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
 		return r;
 	}
 
-	/* idr alloc should be called only after initialization of bo list. */
-	mutex_lock(&fpriv->bo_list_lock);
-	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
-	mutex_unlock(&fpriv->bo_list_lock);
-	if (r < 0) {
-		amdgpu_bo_list_free(list);
-		return r;
-	}
-	*id = r;
-
+	*list_out = list;
 	return 0;
 }
 
@@ -263,55 +254,79 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
 	kfree(list);
 }
 
-int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *filp)
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+				      struct drm_amdgpu_bo_list_entry **info_param)
 {
+	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
 	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-
-	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
-	union drm_amdgpu_bo_list *args = data;
-	uint32_t handle = args->in.list_handle;
-	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
-
 	struct drm_amdgpu_bo_list_entry *info;
-	struct amdgpu_bo_list *list;
-
 	int r;
 
-	info = kvmalloc_array(args->in.bo_number,
-			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
+	info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 
 	/* copy the handle array from userspace to a kernel buffer */
 	r = -EFAULT;
-	if (likely(info_size == args->in.bo_info_size)) {
-		unsigned long bytes = args->in.bo_number *
-			args->in.bo_info_size;
+	if (likely(info_size == in->bo_info_size)) {
+		unsigned long bytes = in->bo_number *
+			in->bo_info_size;
 
 		if (copy_from_user(info, uptr, bytes))
 			goto error_free;
 
 	} else {
-		unsigned long bytes = min(args->in.bo_info_size, info_size);
+		unsigned long bytes = min(in->bo_info_size, info_size);
 		unsigned i;
 
-		memset(info, 0, args->in.bo_number * info_size);
-		for (i = 0; i < args->in.bo_number; ++i) {
+		memset(info, 0, in->bo_number * info_size);
+		for (i = 0; i < in->bo_number; ++i) {
 			if (copy_from_user(&info[i], uptr, bytes))
 				goto error_free;
 
-			uptr += args->in.bo_info_size;
+			uptr += in->bo_info_size;
 		}
 	}
 
+	*info_param = info;
+	return 0;
+
+error_free:
+	kvfree(info);
+	return r;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	union drm_amdgpu_bo_list *args = data;
+	uint32_t handle = args->in.list_handle;
+	struct drm_amdgpu_bo_list_entry *info = NULL;
+	struct amdgpu_bo_list *list;
+	int r;
+
+	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+	if (r)
+		goto error_free;
+
 	switch (args->in.operation) {
 	case AMDGPU_BO_LIST_OP_CREATE:
 		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
-					  &handle);
+					  &list);
 		if (r)
 			goto error_free;
+
+		mutex_lock(&fpriv->bo_list_lock);
+		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+		mutex_unlock(&fpriv->bo_list_lock);
+		if (r < 0) {
+			amdgpu_bo_list_free(list);
+			return r;
+		}
+
+		handle = r;
 		break;
 
 	case AMDGPU_BO_LIST_OP_DESTROY:
@@ -345,6 +360,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 	return 0;
 
 error_free:
-	kvfree(info);
+	if (info)
+		kvfree(info);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5a2a5ba29f9a..6d8df76b5a5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -66,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	return 0;
 }
 
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+				      struct drm_amdgpu_bo_list_in *data)
+{
+	int r;
+	struct drm_amdgpu_bo_list_entry *info = NULL;
+
+	r = amdgpu_bo_create_list_entry_array(data, &info);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+				  &p->bo_list);
+	if (r)
+		goto error_free;
+
+	kvfree(info);
+	return 0;
+
+error_free:
+	if (info)
+		kvfree(info);
+
+	return r;
+}
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	union drm_amdgpu_cs *cs = data;
 	uint64_t *chunk_array_user;
 	uint64_t *chunk_array;
 	unsigned size, num_ibs = 0;
@@ -164,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 
 			break;
 
+		case AMDGPU_CHUNK_ID_BO_HANDLES:
+			size = sizeof(struct drm_amdgpu_bo_list_in);
+			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+				ret = -EINVAL;
+				goto free_partial_kdata;
+			}
+
+			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+			if (ret)
+				goto free_partial_kdata;
+
+			break;
+
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -534,7 +571,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	INIT_LIST_HEAD(&p->validated);
 
-	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+	if (!p->bo_list)
+		p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+	else
+		mutex_lock(&p->bo_list->lock);
+
 	if (p->bo_list) {
 		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 		if (p->bo_list->first_userptr != p->bo_list->num_entries)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 06aede194bf8..529500c94675 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,9 +69,10 @@
  * - 3.24.0 - Add high priority compute support for gfx9
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	26
+#define KMS_DRIVER_MINOR	27
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 784b0fe470ee..1ceec56de015 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -516,6 +516,7 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
 #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
-- 
cgit v1.2.3


From 2b9672ddb6f347467d7b33b86c5dfc4d5c0501a8 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 12 Jul 2018 21:32:53 +0200
Subject: net: phy: add phy_speed_down and phy_speed_up

Some network drivers include functionality to speed down the PHY when
suspending and just waiting for a WoL packet because this saves energy.
This functionality is quite generic, therefore let's factor it out to
phylib.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h   |  2 ++
 2 files changed, 80 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index c4aa360dedff..d2baedc4ea91 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -551,6 +551,84 @@ int phy_start_aneg(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_start_aneg);
 
+static int phy_poll_aneg_done(struct phy_device *phydev)
+{
+	unsigned int retries = 100;
+	int ret;
+
+	do {
+		msleep(100);
+		ret = phy_aneg_done(phydev);
+	} while (!ret && --retries);
+
+	if (!ret)
+		return -ETIMEDOUT;
+
+	return ret < 0 ? ret : 0;
+}
+
+/**
+ * phy_speed_down - set speed to lowest speed supported by both link partners
+ * @phydev: the phy_device struct
+ * @sync: perform action synchronously
+ *
+ * Description: Typically used to save energy when waiting for a WoL packet
+ *
+ * WARNING: Setting sync to false may cause the system being unable to suspend
+ * in case the PHY generates an interrupt when finishing the autonegotiation.
+ * This interrupt may wake up the system immediately after suspend.
+ * Therefore use sync = false only if you're sure it's safe with the respective
+ * network chip.
+ */
+int phy_speed_down(struct phy_device *phydev, bool sync)
+{
+	u32 adv = phydev->lp_advertising & phydev->supported;
+	u32 adv_old = phydev->advertising;
+	int ret;
+
+	if (phydev->autoneg != AUTONEG_ENABLE)
+		return 0;
+
+	if (adv & PHY_10BT_FEATURES)
+		phydev->advertising &= ~(PHY_100BT_FEATURES |
+					 PHY_1000BT_FEATURES);
+	else if (adv & PHY_100BT_FEATURES)
+		phydev->advertising &= ~PHY_1000BT_FEATURES;
+
+	if (phydev->advertising == adv_old)
+		return 0;
+
+	ret = phy_config_aneg(phydev);
+	if (ret)
+		return ret;
+
+	return sync ? phy_poll_aneg_done(phydev) : 0;
+}
+EXPORT_SYMBOL_GPL(phy_speed_down);
+
+/**
+ * phy_speed_up - (re)set advertised speeds to all supported speeds
+ * @phydev: the phy_device struct
+ *
+ * Description: Used to revert the effect of phy_speed_down
+ */
+int phy_speed_up(struct phy_device *phydev)
+{
+	u32 mask = PHY_10BT_FEATURES | PHY_100BT_FEATURES | PHY_1000BT_FEATURES;
+	u32 adv_old = phydev->advertising;
+
+	if (phydev->autoneg != AUTONEG_ENABLE)
+		return 0;
+
+	phydev->advertising = (adv_old & ~mask) | (phydev->supported & mask);
+
+	if (phydev->advertising == adv_old)
+		return 0;
+
+	return phy_config_aneg(phydev);
+}
+EXPORT_SYMBOL_GPL(phy_speed_up);
+
 /**
  * phy_start_machine - start PHY state machine tracking
  * @phydev: the phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6cd09098427c..075c2f770d3e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -942,6 +942,8 @@ void phy_start(struct phy_device *phydev);
 void phy_stop(struct phy_device *phydev);
 int phy_start_aneg(struct phy_device *phydev);
 int phy_aneg_done(struct phy_device *phydev);
+int phy_speed_down(struct phy_device *phydev, bool sync);
+int phy_speed_up(struct phy_device *phydev);
 
 int phy_stop_interrupts(struct phy_device *phydev);
 int phy_restart_aneg(struct phy_device *phydev);
-- 
cgit v1.2.3


From d9f37d01e294e5338aa3e9d3b2eda61b59b619df Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 13 Jul 2018 14:41:36 +0800
Subject: net: convert gro_count to bitmask

gro_hash size is 192 bytes, and uses 3 cache lines, if there is few
flows, gro_hash may be not fully used, so it is unnecessary to iterate
all gro_hash in napi_gro_flush(), to occupy unnecessary cacheline.

convert gro_count to a bitmask, and rename it as gro_bitmask, each bit
represents a element of gro_hash, only flush a gro_hash element if the
related bit is set, to speed up napi_gro_flush().

and update gro_bitmask only if it will be changed, to reduce cache
update

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Cc: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  9 +++++++--
 net/core/dev.c            | 36 ++++++++++++++++++++++++------------
 2 files changed, 31 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3514d67112b3..c1295c7a452e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -308,9 +308,14 @@ struct gro_list {
 };
 
 /*
- * Structure for NAPI scheduling similar to tasklet but with weighting
+ * size of gro hash buckets, must less than bit number of
+ * napi_struct::gro_bitmask
  */
 #define GRO_HASH_BUCKETS	8
+
+/*
+ * Structure for NAPI scheduling similar to tasklet but with weighting
+ */
 struct napi_struct {
 	/* The poll_list must only be managed by the entity which
 	 * changes the state of the NAPI_STATE_SCHED bit.  This means
@@ -322,7 +327,7 @@ struct napi_struct {
 
 	unsigned long		state;
 	int			weight;
-	unsigned int		gro_count;
+	unsigned long		gro_bitmask;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
 	int			poll_owner;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0df1771a12f9..c883b17ee0fe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5282,9 +5282,11 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 		list_del(&skb->list);
 		skb->next = NULL;
 		napi_gro_complete(skb);
-		napi->gro_count--;
 		napi->gro_hash[index].count--;
 	}
+
+	if (!napi->gro_hash[index].count)
+		__clear_bit(index, &napi->gro_bitmask);
 }
 
 /* napi->gro_hash[].list contains packets ordered by age.
@@ -5295,8 +5297,10 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
 {
 	u32 i;
 
-	for (i = 0; i < GRO_HASH_BUCKETS; i++)
-		__napi_gro_flush_chain(napi, i, flush_old);
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		if (test_bit(i, &napi->gro_bitmask))
+			__napi_gro_flush_chain(napi, i, flush_old);
+	}
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
@@ -5388,8 +5392,8 @@ static void gro_flush_oldest(struct list_head *head)
 	if (WARN_ON_ONCE(!oldest))
 		return;
 
-	/* Do not adjust napi->gro_count, caller is adding a new SKB to
-	 * the chain.
+	/* Do not adjust napi->gro_hash[].count, caller is adding a new
+	 * SKB to the chain.
 	 */
 	list_del(&oldest->list);
 	napi_gro_complete(oldest);
@@ -5464,7 +5468,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		list_del(&pp->list);
 		pp->next = NULL;
 		napi_gro_complete(pp);
-		napi->gro_count--;
 		napi->gro_hash[hash].count--;
 	}
 
@@ -5477,7 +5480,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
 		gro_flush_oldest(gro_head);
 	} else {
-		napi->gro_count++;
 		napi->gro_hash[hash].count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
@@ -5492,6 +5494,13 @@ pull:
 	if (grow > 0)
 		gro_pull_from_frag0(skb, grow);
 ok:
+	if (napi->gro_hash[hash].count) {
+		if (!test_bit(hash, &napi->gro_bitmask))
+			__set_bit(hash, &napi->gro_bitmask);
+	} else if (test_bit(hash, &napi->gro_bitmask)) {
+		__clear_bit(hash, &napi->gro_bitmask);
+	}
+
 	return ret;
 
 normal:
@@ -5890,7 +5899,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (n->gro_count) {
+	if (n->gro_bitmask) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -6099,7 +6108,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (napi->gro_count && !napi_disable_pending(napi) &&
+	if (napi->gro_bitmask && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -6114,7 +6123,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	INIT_LIST_HEAD(&napi->poll_list);
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
-	napi->gro_count = 0;
+	napi->gro_bitmask = 0;
 	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
 		INIT_LIST_HEAD(&napi->gro_hash[i].list);
 		napi->gro_hash[i].count = 0;
@@ -6174,7 +6183,7 @@ void netif_napi_del(struct napi_struct *napi)
 	napi_free_frags(napi);
 
 	flush_gro_hash(napi);
-	napi->gro_count = 0;
+	napi->gro_bitmask = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 
@@ -6216,7 +6225,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (n->gro_count) {
+	if (n->gro_bitmask) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
@@ -9272,6 +9281,9 @@ static struct hlist_head * __net_init netdev_create_hash(void)
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
+	BUILD_BUG_ON(GRO_HASH_BUCKETS >
+			FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+
 	if (net != &init_net)
 		INIT_LIST_HEAD(&net->dev_base_head);
 
-- 
cgit v1.2.3


From c133459765fae249ba482f62e12f987aec4376f0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 13 Jul 2018 21:25:19 -0700
Subject: net/ethernet/freescale/fman: fix cross-build error

  CC [M]  drivers/net/ethernet/freescale/fman/fman.o
In file included from ../drivers/net/ethernet/freescale/fman/fman.c:35:
../include/linux/fsl/guts.h: In function 'guts_set_dmacr':
../include/linux/fsl/guts.h:165:2: error: implicit declaration of function 'clrsetbits_be32' [-Werror=implicit-function-declaration]
  clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift);
  ^~~~~~~~~~~~~~~

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Madalin Bucur <madalin.bucur@nxp.com>
Cc: netdev@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fsl/guts.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h
index 3efa3b861d44..941b11811f85 100644
--- a/include/linux/fsl/guts.h
+++ b/include/linux/fsl/guts.h
@@ -16,6 +16,7 @@
 #define __FSL_GUTS_H__
 
 #include <linux/types.h>
+#include <linux/io.h>
 
 /**
  * Global Utility Registers.
-- 
cgit v1.2.3


From 31048d7aedf31bf0f69c54a662944632f29d82f2 Mon Sep 17 00:00:00 2001
From: Stefan Baranoff <sbaranoff@gmail.com>
Date: Sun, 15 Jul 2018 11:36:37 -0400
Subject: tcp: Fix broken repair socket window probe patch

Correct previous bad attempt at allowing sockets to come out of TCP
repair without sending window probes. To avoid changing size of
the repair variable in struct tcp_sock, this lets the decision for
sending probes or not to be made when coming out of repair by
introducing two ways to turn it off.

v2:
* Remove erroneous comment; defines now make behavior clear

Fixes: 70b7ff130224 ("tcp: allow user to create repair socket without window probes")
Signed-off-by: Stefan Baranoff <sbaranoff@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Andrei Vagin <avagin@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h |  4 ++++
 net/ipv4/tcp.c           | 13 +++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 29eb659aa77a..e3f6ed8a7064 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -127,6 +127,10 @@ enum {
 
 #define TCP_CM_INQ		TCP_INQ
 
+#define TCP_REPAIR_ON		1
+#define TCP_REPAIR_OFF		0
+#define TCP_REPAIR_OFF_NO_WP	-1	/* Turn off without window probes */
+
 struct tcp_repair_opt {
 	__u32	opt_code;
 	__u32	opt_val;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8e5e2ca9ab1b..ec2186e3087f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2823,16 +2823,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	case TCP_REPAIR:
 		if (!tcp_can_repair_sock(sk))
 			err = -EPERM;
-		/* 1 for normal repair, 2 for no window probes */
-		else if (val == 1 || val == 2) {
-			tp->repair = val;
+		else if (val == TCP_REPAIR_ON) {
+			tp->repair = 1;
 			sk->sk_reuse = SK_FORCE_REUSE;
 			tp->repair_queue = TCP_NO_QUEUE;
-		} else if (val == 0) {
+		} else if (val == TCP_REPAIR_OFF) {
+			tp->repair = 0;
+			sk->sk_reuse = SK_NO_REUSE;
+			tcp_send_window_probe(sk);
+		} else if (val == TCP_REPAIR_OFF_NO_WP) {
 			tp->repair = 0;
 			sk->sk_reuse = SK_NO_REUSE;
-			if (tp->repair == 1)
-				tcp_send_window_probe(sk);
 		} else
 			err = -EINVAL;
 
-- 
cgit v1.2.3


From 3d85b2703783636366560c94842affd8608ec9d1 Mon Sep 17 00:00:00 2001
From: Andrea Parri <andrea.parri@amarulasolutions.com>
Date: Mon, 16 Jul 2018 11:06:02 -0700
Subject: locking/spinlock, sched/core: Clarify requirements for
 smp_mb__after_spinlock()

There are 11 interpretations of the requirements described in the header
comment for smp_mb__after_spinlock(): one for each LKMM maintainer, and
one currently encoded in the Cat file. Stick to the latter (until a more
satisfactory solution is available).

This also reworks some snippets related to the barrier to illustrate the
requirements and to link them to the idioms which are relied upon at its
call sites.

Suggested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: akiyks@gmail.com
Cc: dhowells@redhat.com
Cc: j.alglave@ucl.ac.uk
Cc: linux-arch@vger.kernel.org
Cc: luc.maranget@inria.fr
Cc: npiggin@gmail.com
Cc: parri.andrea@gmail.com
Cc: stern@rowland.harvard.edu
Link: http://lkml.kernel.org/r/20180716180605.16115-11-paulmck@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/spinlock.h | 53 ++++++++++++++++++++++++++++++++----------------
 kernel/sched/core.c      | 41 +++++++++++++++++++------------------
 2 files changed, 57 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index fd57888d4942..3190997df9ca 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -114,29 +114,48 @@ do {								\
 #endif /*arch_spin_is_contended*/
 
 /*
- * This barrier must provide two things:
+ * smp_mb__after_spinlock() provides the equivalent of a full memory barrier
+ * between program-order earlier lock acquisitions and program-order later
+ * memory accesses.
  *
- *   - it must guarantee a STORE before the spin_lock() is ordered against a
- *     LOAD after it, see the comments at its two usage sites.
+ * This guarantees that the following two properties hold:
  *
- *   - it must ensure the critical section is RCsc.
+ *   1) Given the snippet:
  *
- * The latter is important for cases where we observe values written by other
- * CPUs in spin-loops, without barriers, while being subject to scheduling.
+ *	  { X = 0;  Y = 0; }
  *
- * CPU0			CPU1			CPU2
+ *	  CPU0				CPU1
  *
- *			for (;;) {
- *			  if (READ_ONCE(X))
- *			    break;
- *			}
- * X=1
- *			<sched-out>
- *						<sched-in>
- *						r = X;
+ *	  WRITE_ONCE(X, 1);		WRITE_ONCE(Y, 1);
+ *	  spin_lock(S);			smp_mb();
+ *	  smp_mb__after_spinlock();	r1 = READ_ONCE(X);
+ *	  r0 = READ_ONCE(Y);
+ *	  spin_unlock(S);
  *
- * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
- * we get migrated and CPU2 sees X==0.
+ *      it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0)
+ *      and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments
+ *      preceding the call to smp_mb__after_spinlock() in __schedule() and in
+ *      try_to_wake_up().
+ *
+ *   2) Given the snippet:
+ *
+ *  { X = 0;  Y = 0; }
+ *
+ *  CPU0		CPU1				CPU2
+ *
+ *  spin_lock(S);	spin_lock(S);			r1 = READ_ONCE(Y);
+ *  WRITE_ONCE(X, 1);	smp_mb__after_spinlock();	smp_rmb();
+ *  spin_unlock(S);	r0 = READ_ONCE(X);		r2 = READ_ONCE(X);
+ *			WRITE_ONCE(Y, 1);
+ *			spin_unlock(S);
+ *
+ *      it is forbidden that CPU0's critical section executes before CPU1's
+ *      critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1)
+ *      and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments
+ *      preceding the calls to smp_rmb() in try_to_wake_up() for similar
+ *      snippets but "projected" onto two CPUs.
+ *
+ * Property (2) upgrades the lock to an RCsc lock.
  *
  * Since most load-store architectures implement ACQUIRE with an smp_mb() after
  * the LL/SC loop, they need no further barriers. Similarly all our TSO
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..0c5ec2abdf93 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1998,21 +1998,20 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
 	 * in smp_cond_load_acquire() below.
 	 *
-	 * sched_ttwu_pending()                 try_to_wake_up()
-	 *   [S] p->on_rq = 1;                  [L] P->state
-	 *       UNLOCK rq->lock  -----.
-	 *                              \
-	 *				 +---   RMB
-	 * schedule()                   /
-	 *       LOCK rq->lock    -----'
-	 *       UNLOCK rq->lock
+	 * sched_ttwu_pending()			try_to_wake_up()
+	 *   STORE p->on_rq = 1			  LOAD p->state
+	 *   UNLOCK rq->lock
+	 *
+	 * __schedule() (switch to task 'p')
+	 *   LOCK rq->lock			  smp_rmb();
+	 *   smp_mb__after_spinlock();
+	 *   UNLOCK rq->lock
 	 *
 	 * [task p]
-	 *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
 	 *
-	 * Pairs with the UNLOCK+LOCK on rq->lock from the
-	 * last wakeup of our task and the schedule that got our task
-	 * current.
+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+	 * __schedule().  See the comment for smp_mb__after_spinlock().
 	 */
 	smp_rmb();
 	if (p->on_rq && ttwu_remote(p, wake_flags))
@@ -2026,15 +2025,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 * One must be running (->on_cpu == 1) in order to remove oneself
 	 * from the runqueue.
 	 *
-	 *  [S] ->on_cpu = 1;	[L] ->on_rq
-	 *      UNLOCK rq->lock
-	 *			RMB
-	 *      LOCK   rq->lock
-	 *  [S] ->on_rq = 0;    [L] ->on_cpu
+	 * __schedule() (switch to task 'p')	try_to_wake_up()
+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
+	 *   UNLOCK rq->lock
+	 *
+	 * __schedule() (put 'p' to sleep)
+	 *   LOCK rq->lock			  smp_rmb();
+	 *   smp_mb__after_spinlock();
+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
 	 *
-	 * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
-	 * from the consecutive calls to schedule(); the first switching to our
-	 * task, the second putting it to sleep.
+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+	 * __schedule().  See the comment for smp_mb__after_spinlock().
 	 */
 	smp_rmb();
 
-- 
cgit v1.2.3


From 7696f9910a9a40b8a952f57d3428515fabd2d889 Mon Sep 17 00:00:00 2001
From: Andrea Parri <andrea.parri@amarulasolutions.com>
Date: Mon, 16 Jul 2018 11:06:03 -0700
Subject: sched/Documentation: Update wake_up() & co. memory-barrier guarantees

Both the implementation and the users' expectation [1] for the various
wakeup primitives have evolved over time, but the documentation has not
kept up with these changes: brings it into 2018.

[1] http://lkml.kernel.org/r/20180424091510.GB4064@hirez.programming.kicks-ass.net

Also applied feedback from Alan Stern.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Akira Yokosawa <akiyks@gmail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Daniel Lustig <dlustig@nvidia.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Jade Alglave <j.alglave@ucl.ac.uk>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luc Maranget <luc.maranget@inria.fr>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: parri.andrea@gmail.com
Link: http://lkml.kernel.org/r/20180716180605.16115-12-paulmck@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/memory-barriers.txt | 43 ++++++++++++++++++++++++---------------
 include/linux/sched.h             |  4 ++--
 kernel/sched/completion.c         |  8 ++++----
 kernel/sched/core.c               | 30 +++++++++++----------------
 kernel/sched/wait.c               |  8 ++++----
 5 files changed, 49 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index a02d6bbfc9d0..0d8d7ef131e9 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2179,32 +2179,41 @@ or:
 	event_indicated = 1;
 	wake_up_process(event_daemon);
 
-A write memory barrier is implied by wake_up() and co.  if and only if they
-wake something up.  The barrier occurs before the task state is cleared, and so
-sits between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+A general memory barrier is executed by wake_up() if it wakes something up.
+If it doesn't wake anything up then a memory barrier may or may not be
+executed; you must not rely on it.  The barrier occurs before the task state
+is accessed, in particular, it sits between the STORE to indicate the event
+and the STORE to set TASK_RUNNING:
 
-	CPU 1				CPU 2
+	CPU 1 (Sleeper)			CPU 2 (Waker)
 	===============================	===============================
 	set_current_state();		STORE event_indicated
 	  smp_store_mb();		wake_up();
-	    STORE current->state	  <write barrier>
-	    <general barrier>		  STORE current->state
-	LOAD event_indicated
+	    STORE current->state	  ...
+	    <general barrier>		  <general barrier>
+	LOAD event_indicated		  if ((LOAD task->state) & TASK_NORMAL)
+					    STORE task->state
 
-To repeat, this write memory barrier is present if and only if something
-is actually awakened.  To see this, consider the following sequence of
-events, where X and Y are both initially zero:
+where "task" is the thread being woken up and it equals CPU 1's "current".
+
+To repeat, a general memory barrier is guaranteed to be executed by wake_up()
+if something is actually awakened, but otherwise there is no such guarantee.
+To see this, consider the following sequence of events, where X and Y are both
+initially zero:
 
 	CPU 1				CPU 2
 	===============================	===============================
-	X = 1;				STORE event_indicated
+	X = 1;				Y = 1;
 	smp_mb();			wake_up();
-	Y = 1;				wait_event(wq, Y == 1);
-	wake_up();			  load from Y sees 1, no memory barrier
-					load from X might see 0
+	LOAD Y				LOAD X
+
+If a wakeup does occur, one (at least) of the two loads must see 1.  If, on
+the other hand, a wakeup does not occur, both loads might see 0.
 
-In contrast, if a wakeup does occur, CPU 2's load from X would be guaranteed
-to see 1.
+wake_up_process() always executes a general memory barrier.  The barrier again
+occurs before the task state is accessed.  In particular, if the wake_up() in
+the previous snippet were replaced by a call to wake_up_process() then one of
+the two loads would be guaranteed to see 1.
 
 The available waker functions include:
 
@@ -2224,6 +2233,8 @@ The available waker functions include:
 	wake_up_poll();
 	wake_up_process();
 
+In terms of memory ordering, these functions all provide the same guarantees of
+a wake_up() (or stronger).
 
 [!] Note that the memory barriers implied by the sleeper and the waker do _not_
 order multiple stores before the wake-up with respect to loads of those stored
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..05cd419f962d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -167,8 +167,8 @@ struct task_group;
  *   need_sleep = false;
  *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
  *
- * Where wake_up_state() (and all other wakeup primitives) imply enough
- * barriers to order the store of the variable against wakeup.
+ * where wake_up_state() executes a full memory barrier before accessing the
+ * task state.
  *
  * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
  * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index e426b0cb9ac6..a1ad5b7d5521 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -22,8 +22,8 @@
  *
  * See also complete_all(), wait_for_completion() and related routines.
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
  */
 void complete(struct completion *x)
 {
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(complete);
  *
  * This will wake up all threads waiting on this particular completion event.
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
  *
  * Since complete_all() sets the completion of @x permanently to done
  * to allow multiple waiters to finish, a call to reinit_completion()
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0c5ec2abdf93..a0065c84e73f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -412,8 +412,8 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
 	 * its already queued (either by us or someone else) and will get the
 	 * wakeup due to that.
 	 *
-	 * This cmpxchg() implies a full barrier, which pairs with the write
-	 * barrier implied by the wakeup in wake_up_q().
+	 * This cmpxchg() executes a full barrier, which pairs with the full
+	 * barrier executed by the wakeup in wake_up_q().
 	 */
 	if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
 		return;
@@ -441,8 +441,8 @@ void wake_up_q(struct wake_q_head *head)
 		task->wake_q.next = NULL;
 
 		/*
-		 * wake_up_process() implies a wmb() to pair with the queueing
-		 * in wake_q_add() so as not to miss wakeups.
+		 * wake_up_process() executes a full barrier, which pairs with
+		 * the queueing in wake_q_add() so as not to miss wakeups.
 		 */
 		wake_up_process(task);
 		put_task_struct(task);
@@ -1879,8 +1879,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  *     rq(c1)->lock (if not at the same time, then in that order).
  *  C) LOCK of the rq(c1)->lock scheduling in task
  *
- * Transitivity guarantees that B happens after A and C after B.
- * Note: we only require RCpc transitivity.
+ * Release/acquire chaining guarantees that B happens after A and C after B.
  * Note: the CPU doing B need not be c0 or c1
  *
  * Example:
@@ -1942,16 +1941,9 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  *   UNLOCK rq(0)->lock
  *
  *
- * However; for wakeups there is a second guarantee we must provide, namely we
- * must observe the state that lead to our wakeup. That is, not only must our
- * task observe its own prior state, it must also observe the stores prior to
- * its wakeup.
- *
- * This means that any means of doing remote wakeups must order the CPU doing
- * the wakeup against the CPU the task is going to end up running on. This,
- * however, is already required for the regular Program-Order guarantee above,
- * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
- *
+ * However, for wakeups there is a second guarantee we must provide, namely we
+ * must ensure that CONDITION=1 done by the caller can not be reordered with
+ * accesses to the task state; see try_to_wake_up() and set_current_state().
  */
 
 /**
@@ -1967,6 +1959,9 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
  * Atomic against schedule() which would dequeue a task, also see
  * set_current_state().
  *
+ * This function executes a full memory barrier before accessing the task
+ * state; see set_current_state().
+ *
  * Return: %true if @p->state changes (an actual wakeup was done),
  *	   %false otherwise.
  */
@@ -2141,8 +2136,7 @@ out:
  *
  * Return: 1 if the process was woken up, 0 if it was already running.
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * This function executes a full memory barrier before accessing the task state.
  */
 int wake_up_process(struct task_struct *p)
 {
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index a7a2aaa3026a..870f97b313e3 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -134,8 +134,8 @@ static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
  */
 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
@@ -180,8 +180,8 @@ EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
  *
  * On UP it can prevent extra preemption.
  *
- * It may be assumed that this function implies a write memory barrier before
- * changing the task state if and only if any tasks are woken up.
+ * If this function wakes up a task, it executes a full memory barrier before
+ * accessing the task state.
  */
 void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
 			int nr_exclusive, void *key)
-- 
cgit v1.2.3


From c1a2f7f0c06454387c2cd7b93ff1491c715a8c69 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Mon, 16 Jul 2018 15:03:31 -0400
Subject: mm: Allocate the mm_cpumask (mm->cpu_bitmap[]) dynamically based on
 nr_cpu_ids

The mm_struct always contains a cpumask bitmap, regardless of
CONFIG_CPUMASK_OFFSTACK. That means the first step can be to
simplify things, and simply have one bitmask at the end of the
mm_struct for the mm_cpumask.

This does necessitate moving everything else in mm_struct into
an anonymous sub-structure, which can be randomized when struct
randomization is enabled.

The second step is to determine the correct size for the
mm_struct slab object from the size of the mm_struct
(excluding the CPU bitmap) and the size the cpumask.

For init_mm we can simply allocate the maximum size this
kernel is compiled for, since we only have one init_mm
in the system, anyway.

Pointer magic by Mike Galbraith, to evade -Wstringop-overflow
getting confused by the dynamically sized array.

Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Rik van Riel <riel@surriel.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-team@fb.com
Cc: luto@kernel.org
Link: http://lkml.kernel.org/r/20180716190337.26133-2-riel@surriel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c |   1 +
 include/linux/mm_types.h   | 241 +++++++++++++++++++++++----------------------
 kernel/fork.c              |  15 +--
 mm/init-mm.c               |  11 +++
 4 files changed, 145 insertions(+), 123 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 232f4915223b..7f0b19410a95 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -82,6 +82,7 @@ struct mm_struct efi_mm = {
 	.mmap_sem		= __RWSEM_INITIALIZER(efi_mm.mmap_sem),
 	.page_table_lock	= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
+	.cpu_bitmap		= { [BITS_TO_LONGS(NR_CPUS)] = 0},
 };
 
 static bool disable_runtime;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 99ce070e7dcb..efdc24dd9e97 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -335,176 +335,183 @@ struct core_state {
 
 struct kioctx_table;
 struct mm_struct {
-	struct vm_area_struct *mmap;		/* list of VMAs */
-	struct rb_root mm_rb;
-	u32 vmacache_seqnum;                   /* per-thread vmacache */
+	struct {
+		struct vm_area_struct *mmap;		/* list of VMAs */
+		struct rb_root mm_rb;
+		u32 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
-	unsigned long (*get_unmapped_area) (struct file *filp,
+		unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 #endif
-	unsigned long mmap_base;		/* base of mmap area */
-	unsigned long mmap_legacy_base;         /* base of mmap area in bottom-up allocations */
+		unsigned long mmap_base;	/* base of mmap area */
+		unsigned long mmap_legacy_base;	/* base of mmap area in bottom-up allocations */
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
-	/* Base adresses for compatible mmap() */
-	unsigned long mmap_compat_base;
-	unsigned long mmap_compat_legacy_base;
+		/* Base adresses for compatible mmap() */
+		unsigned long mmap_compat_base;
+		unsigned long mmap_compat_legacy_base;
 #endif
-	unsigned long task_size;		/* size of task vm space */
-	unsigned long highest_vm_end;		/* highest vma end address */
-	pgd_t * pgd;
-
-	/**
-	 * @mm_users: The number of users including userspace.
-	 *
-	 * Use mmget()/mmget_not_zero()/mmput() to modify. When this drops
-	 * to 0 (i.e. when the task exits and there are no other temporary
-	 * reference holders), we also release a reference on @mm_count
-	 * (which may then free the &struct mm_struct if @mm_count also
-	 * drops to 0).
-	 */
-	atomic_t mm_users;
-
-	/**
-	 * @mm_count: The number of references to &struct mm_struct
-	 * (@mm_users count as 1).
-	 *
-	 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
-	 * &struct mm_struct is freed.
-	 */
-	atomic_t mm_count;
+		unsigned long task_size;	/* size of task vm space */
+		unsigned long highest_vm_end;	/* highest vma end address */
+		pgd_t * pgd;
+
+		/**
+		 * @mm_users: The number of users including userspace.
+		 *
+		 * Use mmget()/mmget_not_zero()/mmput() to modify. When this
+		 * drops to 0 (i.e. when the task exits and there are no other
+		 * temporary reference holders), we also release a reference on
+		 * @mm_count (which may then free the &struct mm_struct if
+		 * @mm_count also drops to 0).
+		 */
+		atomic_t mm_users;
+
+		/**
+		 * @mm_count: The number of references to &struct mm_struct
+		 * (@mm_users count as 1).
+		 *
+		 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
+		 * &struct mm_struct is freed.
+		 */
+		atomic_t mm_count;
 
 #ifdef CONFIG_MMU
-	atomic_long_t pgtables_bytes;		/* PTE page table pages */
+		atomic_long_t pgtables_bytes;	/* PTE page table pages */
 #endif
-	int map_count;				/* number of VMAs */
+		int map_count;			/* number of VMAs */
 
-	spinlock_t page_table_lock;		/* Protects page tables and some counters */
-	struct rw_semaphore mmap_sem;
+		spinlock_t page_table_lock; /* Protects page tables and some
+					     * counters
+					     */
+		struct rw_semaphore mmap_sem;
 
-	struct list_head mmlist;		/* List of maybe swapped mm's.	These are globally strung
-						 * together off init_mm.mmlist, and are protected
-						 * by mmlist_lock
-						 */
+		struct list_head mmlist; /* List of maybe swapped mm's.	These
+					  * are globally strung together off
+					  * init_mm.mmlist, and are protected
+					  * by mmlist_lock
+					  */
 
 
-	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
-	unsigned long hiwater_vm;	/* High-water virtual memory usage */
+		unsigned long hiwater_rss; /* High-watermark of RSS usage */
+		unsigned long hiwater_vm;  /* High-water virtual memory usage */
 
-	unsigned long total_vm;		/* Total pages mapped */
-	unsigned long locked_vm;	/* Pages that have PG_mlocked set */
-	unsigned long pinned_vm;	/* Refcount permanently increased */
-	unsigned long data_vm;		/* VM_WRITE & ~VM_SHARED & ~VM_STACK */
-	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE & ~VM_STACK */
-	unsigned long stack_vm;		/* VM_STACK */
-	unsigned long def_flags;
+		unsigned long total_vm;	   /* Total pages mapped */
+		unsigned long locked_vm;   /* Pages that have PG_mlocked set */
+		unsigned long pinned_vm;   /* Refcount permanently increased */
+		unsigned long data_vm;	   /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+		unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+		unsigned long stack_vm;	   /* VM_STACK */
+		unsigned long def_flags;
 
-	spinlock_t arg_lock; /* protect the below fields */
-	unsigned long start_code, end_code, start_data, end_data;
-	unsigned long start_brk, brk, start_stack;
-	unsigned long arg_start, arg_end, env_start, env_end;
+		spinlock_t arg_lock; /* protect the below fields */
+		unsigned long start_code, end_code, start_data, end_data;
+		unsigned long start_brk, brk, start_stack;
+		unsigned long arg_start, arg_end, env_start, env_end;
 
-	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+		unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
-	/*
-	 * Special counters, in some configurations protected by the
-	 * page_table_lock, in other configurations by being atomic.
-	 */
-	struct mm_rss_stat rss_stat;
-
-	struct linux_binfmt *binfmt;
+		/*
+		 * Special counters, in some configurations protected by the
+		 * page_table_lock, in other configurations by being atomic.
+		 */
+		struct mm_rss_stat rss_stat;
 
-	cpumask_var_t cpu_vm_mask_var;
+		struct linux_binfmt *binfmt;
 
-	/* Architecture-specific MM context */
-	mm_context_t context;
+		/* Architecture-specific MM context */
+		mm_context_t context;
 
-	unsigned long flags; /* Must use atomic bitops to access the bits */
+		unsigned long flags; /* Must use atomic bitops to access */
 
-	struct core_state *core_state; /* coredumping support */
+		struct core_state *core_state; /* coredumping support */
 #ifdef CONFIG_MEMBARRIER
-	atomic_t membarrier_state;
+		atomic_t membarrier_state;
 #endif
 #ifdef CONFIG_AIO
-	spinlock_t			ioctx_lock;
-	struct kioctx_table __rcu	*ioctx_table;
+		spinlock_t			ioctx_lock;
+		struct kioctx_table __rcu	*ioctx_table;
 #endif
 #ifdef CONFIG_MEMCG
-	/*
-	 * "owner" points to a task that is regarded as the canonical
-	 * user/owner of this mm. All of the following must be true in
-	 * order for it to be changed:
-	 *
-	 * current == mm->owner
-	 * current->mm != mm
-	 * new_owner->mm == mm
-	 * new_owner->alloc_lock is held
-	 */
-	struct task_struct __rcu *owner;
+		/*
+		 * "owner" points to a task that is regarded as the canonical
+		 * user/owner of this mm. All of the following must be true in
+		 * order for it to be changed:
+		 *
+		 * current == mm->owner
+		 * current->mm != mm
+		 * new_owner->mm == mm
+		 * new_owner->alloc_lock is held
+		 */
+		struct task_struct __rcu *owner;
 #endif
-	struct user_namespace *user_ns;
+		struct user_namespace *user_ns;
 
-	/* store ref to file /proc/<pid>/exe symlink points to */
-	struct file __rcu *exe_file;
+		/* store ref to file /proc/<pid>/exe symlink points to */
+		struct file __rcu *exe_file;
 #ifdef CONFIG_MMU_NOTIFIER
-	struct mmu_notifier_mm *mmu_notifier_mm;
+		struct mmu_notifier_mm *mmu_notifier_mm;
 #endif
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
-	pgtable_t pmd_huge_pte; /* protected by page_table_lock */
-#endif
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	struct cpumask cpumask_allocation;
+		pgtable_t pmd_huge_pte; /* protected by page_table_lock */
 #endif
 #ifdef CONFIG_NUMA_BALANCING
-	/*
-	 * numa_next_scan is the next time that the PTEs will be marked
-	 * pte_numa. NUMA hinting faults will gather statistics and migrate
-	 * pages to new nodes if necessary.
-	 */
-	unsigned long numa_next_scan;
+		/*
+		 * numa_next_scan is the next time that the PTEs will be marked
+		 * pte_numa. NUMA hinting faults will gather statistics and
+		 * migrate pages to new nodes if necessary.
+		 */
+		unsigned long numa_next_scan;
 
-	/* Restart point for scanning and setting pte_numa */
-	unsigned long numa_scan_offset;
+		/* Restart point for scanning and setting pte_numa */
+		unsigned long numa_scan_offset;
 
-	/* numa_scan_seq prevents two threads setting pte_numa */
-	int numa_scan_seq;
+		/* numa_scan_seq prevents two threads setting pte_numa */
+		int numa_scan_seq;
 #endif
-	/*
-	 * An operation with batched TLB flushing is going on. Anything that
-	 * can move process memory needs to flush the TLB when moving a
-	 * PROT_NONE or PROT_NUMA mapped page.
-	 */
-	atomic_t tlb_flush_pending;
+		/*
+		 * An operation with batched TLB flushing is going on. Anything
+		 * that can move process memory needs to flush the TLB when
+		 * moving a PROT_NONE or PROT_NUMA mapped page.
+		 */
+		atomic_t tlb_flush_pending;
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-	/* See flush_tlb_batched_pending() */
-	bool tlb_flush_batched;
+		/* See flush_tlb_batched_pending() */
+		bool tlb_flush_batched;
 #endif
-	struct uprobes_state uprobes_state;
+		struct uprobes_state uprobes_state;
 #ifdef CONFIG_HUGETLB_PAGE
-	atomic_long_t hugetlb_usage;
+		atomic_long_t hugetlb_usage;
 #endif
-	struct work_struct async_put_work;
+		struct work_struct async_put_work;
 
 #if IS_ENABLED(CONFIG_HMM)
-	/* HMM needs to track a few things per mm */
-	struct hmm *hmm;
+		/* HMM needs to track a few things per mm */
+		struct hmm *hmm;
 #endif
-} __randomize_layout;
+	} __randomize_layout;
+
+	/*
+	 * The mm_cpumask needs to be at the end of mm_struct, because it
+	 * is dynamically sized based on nr_cpu_ids.
+	 */
+	unsigned long cpu_bitmap[];
+};
 
 extern struct mm_struct init_mm;
 
+/* Pointer magic because the dynamic array size confuses some compilers. */
 static inline void mm_init_cpumask(struct mm_struct *mm)
 {
-#ifdef CONFIG_CPUMASK_OFFSTACK
-	mm->cpu_vm_mask_var = &mm->cpumask_allocation;
-#endif
-	cpumask_clear(mm->cpu_vm_mask_var);
+	unsigned long cpu_bitmap = (unsigned long)mm;
+
+	cpu_bitmap += offsetof(struct mm_struct, cpu_bitmap);
+	cpumask_clear((struct cpumask *)cpu_bitmap);
 }
 
 /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
 static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 {
-	return mm->cpu_vm_mask_var;
+	return (struct cpumask *)&mm->cpu_bitmap;
 }
 
 struct mmu_gather;
diff --git a/kernel/fork.c b/kernel/fork.c
index 9440d61b925c..5b64c1b8461e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2253,6 +2253,8 @@ static void sighand_ctor(void *data)
 
 void __init proc_caches_init(void)
 {
+	unsigned int mm_size;
+
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -2269,15 +2271,16 @@ void __init proc_caches_init(void)
 			sizeof(struct fs_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
 			NULL);
+
 	/*
-	 * FIXME! The "sizeof(struct mm_struct)" currently includes the
-	 * whole struct cpumask for the OFFSTACK case. We could change
-	 * this to *only* allocate as much of it as required by the
-	 * maximum number of CPU's we can ever have.  The cpumask_allocation
-	 * is at the end of the structure, exactly for that reason.
+	 * The mm_cpumask is located at the end of mm_struct, and is
+	 * dynamically sized based on the maximum CPU number this system
+	 * can have, taking hotplug into account (nr_cpu_ids).
 	 */
+	mm_size = sizeof(struct mm_struct) + cpumask_size();
+
 	mm_cachep = kmem_cache_create_usercopy("mm_struct",
-			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+			mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
 			offsetof(struct mm_struct, saved_auxv),
 			sizeof_field(struct mm_struct, saved_auxv),
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f0179c9c04c2..a787a319211e 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -15,6 +15,16 @@
 #define INIT_MM_CONTEXT(name)
 #endif
 
+/*
+ * For dynamically allocated mm_structs, there is a dynamically sized cpumask
+ * at the end of the structure, the size of which depends on the maximum CPU
+ * number the system can see. That way we allocate only as much memory for
+ * mm_cpumask() as needed for the hundreds, or thousands of processes that
+ * a system typically runs.
+ *
+ * Since there is only one init_mm in the entire system, keep it simple
+ * and size this cpu_bitmask to NR_CPUS.
+ */
 struct mm_struct init_mm = {
 	.mm_rb		= RB_ROOT,
 	.pgd		= swapper_pg_dir,
@@ -25,5 +35,6 @@ struct mm_struct init_mm = {
 	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
 	.user_ns	= &init_user_ns,
+	.cpu_bitmap	= { [BITS_TO_LONGS(NR_CPUS)] = 0},
 	INIT_MM_CONTEXT(init_mm)
 };
-- 
cgit v1.2.3


From 2ff6ddf19c0ec40633bd14d8fe28a289816bd98d Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Mon, 16 Jul 2018 15:03:32 -0400
Subject: x86/mm/tlb: Leave lazy TLB mode at page table free time

Andy discovered that speculative memory accesses while in lazy
TLB mode can crash a system, when a CPU tries to dereference a
speculative access using memory contents that used to be valid
page table memory, but have since been reused for something else
and point into la-la land.

The latter problem can be prevented in two ways. The first is to
always send a TLB shootdown IPI to CPUs in lazy TLB mode, while
the second one is to only send the TLB shootdown at page table
freeing time.

The second should result in fewer IPIs, since operationgs like
mprotect and madvise are very common with some workloads, but
do not involve page table freeing. Also, on munmap, batching
of page table freeing covers much larger ranges of virtual
memory than the batching of unmapped user pages.

Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: efault@gmx.de
Cc: kernel-team@fb.com
Cc: luto@kernel.org
Link: http://lkml.kernel.org/r/20180716190337.26133-3-riel@surriel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/tlbflush.h |  5 +++++
 arch/x86/mm/tlb.c               | 27 +++++++++++++++++++++++++++
 include/asm-generic/tlb.h       | 10 ++++++++++
 mm/memory.c                     | 22 ++++++++++++++--------
 4 files changed, 56 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 6690cd3fc8b1..3aa3204b5dc0 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -554,4 +554,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 	native_flush_tlb_others(mask, info)
 #endif
 
+extern void tlb_flush_remove_tables(struct mm_struct *mm);
+extern void tlb_flush_remove_tables_local(void *arg);
+
+#define HAVE_TLB_FLUSH_REMOVE_TABLES
+
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6eb1f34c3c85..9a893673c56b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -646,6 +646,33 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 	put_cpu();
 }
 
+void tlb_flush_remove_tables_local(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm &&
+			this_cpu_read(cpu_tlbstate.is_lazy)) {
+		/*
+		 * We're in lazy mode.  We need to at least flush our
+		 * paging-structure cache to avoid speculatively reading
+		 * garbage into our TLB.  Since switching to init_mm is barely
+		 * slower than a minimal flush, just switch to init_mm.
+		 */
+		switch_mm_irqs_off(NULL, &init_mm, NULL);
+	}
+}
+
+void tlb_flush_remove_tables(struct mm_struct *mm)
+{
+	int cpu = get_cpu();
+	/*
+	 * XXX: this really only needs to be called for CPUs in lazy TLB mode.
+	 */
+	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
+		smp_call_function_many(mm_cpumask(mm), tlb_flush_remove_tables_local, (void *)mm, 1);
+
+	put_cpu();
+}
 
 static void do_flush_tlb_all(void *info)
 {
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 3063125197ad..e811ef7b8350 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -303,4 +303,14 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 
 #define tlb_migrate_finish(mm) do {} while (0)
 
+/*
+ * Used to flush the TLB when page tables are removed, when lazy
+ * TLB mode may cause a CPU to retain intermediate translations
+ * pointing to about-to-be-freed page table memory.
+ */
+#ifndef HAVE_TLB_FLUSH_REMOVE_TABLES
+#define tlb_flush_remove_tables(mm) do {} while (0)
+#define tlb_flush_remove_tables_local(mm) do {} while (0)
+#endif
+
 #endif /* _ASM_GENERIC__TLB_H */
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..18355e0b971a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 
-/*
- * See the comment near struct mmu_table_batch.
- */
-
 static void tlb_remove_table_smp_sync(void *arg)
 {
-	/* Simply deliver the interrupt */
+	struct mm_struct __maybe_unused *mm = arg;
+	/*
+	 * On most architectures this does nothing. Simply delivering the
+	 * interrupt is enough to prevent races with software page table
+	 * walking like that done in get_user_pages_fast.
+	 *
+	 * See the comment near struct mmu_table_batch.
+	 */
+	tlb_flush_remove_tables_local(mm);
 }
 
-static void tlb_remove_table_one(void *table)
+static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
 {
 	/*
 	 * This isn't an RCU grace period and hence the page-tables cannot be
@@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table)
 	 * It is however sufficient for software page-table walkers that rely on
 	 * IRQ disabling. See the comment near struct mmu_table_batch.
 	 */
-	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+	smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);
 	__tlb_remove_table(table);
 }
 
@@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb)
 {
 	struct mmu_table_batch **batch = &tlb->batch;
 
+	tlb_flush_remove_tables(tlb->mm);
+
 	if (*batch) {
 		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
 		*batch = NULL;
@@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 	if (*batch == NULL) {
 		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
 		if (*batch == NULL) {
-			tlb_remove_table_one(table);
+			tlb_remove_table_one(table, tlb);
 			return;
 		}
 		(*batch)->nr = 0;
-- 
cgit v1.2.3


From ffc59c496bf8498657321c59433f55bbcf2d9c38 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 10 Jul 2018 23:42:16 +0200
Subject: i2c: recovery: require either get_sda or set_sda

For bus recovery, we either need to bail out early if we can read SDA or
we need to send STOP after every pulse. Otherwise recovery might be
misinterpreted as an unwanted write. So, require one of those SDA
handling functions to avoid this problem.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c |  7 ++++++-
 include/linux/i2c.h         | 12 ++++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 301285c54603..871a9731894f 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -202,7 +202,8 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap)
 		/*
 		 * If we can set SDA, we will always create STOP here to ensure
 		 * the additional pulses will do no harm. This is achieved by
-		 * letting SDA follow SCL half a cycle later.
+		 * letting SDA follow SCL half a cycle later. Check the
+		 * 'incomplete_write_byte' fault injector for details.
 		 */
 		ndelay(RECOVERY_NDELAY / 2);
 		if (bri->set_sda)
@@ -274,6 +275,10 @@ static void i2c_init_recovery(struct i2c_adapter *adap)
 			err_str = "no {get|set}_scl() found";
 			goto err;
 		}
+		if (!bri->set_sda && !bri->get_sda) {
+			err_str = "either get_sda() or set_sda() needed";
+			goto err;
+		}
 	}
 
 	return;
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 24acd69c8874..4c4360f94786 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -581,12 +581,12 @@ struct i2c_timings {
  *      recovery. Populated internally for generic GPIO recovery.
  * @set_scl: This sets/clears the SCL line. Mandatory for generic SCL recovery.
  *      Populated internally for generic GPIO recovery.
- * @get_sda: This gets current value of SDA line. Optional for generic SCL
- *      recovery. Populated internally, if sda_gpio is a valid GPIO, for generic
- *      GPIO recovery.
- * @set_sda: This sets/clears the SDA line. Optional for generic SCL recovery.
- *	Populated internally, if sda_gpio is a valid GPIO, for generic GPIO
- *	recovery.
+ * @get_sda: This gets current value of SDA line. This or set_sda() is mandatory
+ *	for generic SCL recovery. Populated internally, if sda_gpio is a valid
+ *	GPIO, for generic GPIO recovery.
+ * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for
+ *	generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO,
+ *	for generic GPIO recovery.
  * @prepare_recovery: This will be called before starting recovery. Platform may
  *	configure padmux here for SDA/SCL line or something else they want.
  * @unprepare_recovery: This will be called after completing recovery. Platform
-- 
cgit v1.2.3


From 7ca5f6be7900ca753ed01c0202dc5f998a41f4ee Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 11 Jul 2018 00:24:22 +0200
Subject: i2c: recovery: add get_bus_free callback

Some IP cores have an internal 'bus free' logic which may be more
advanced than just checking if SDA is high. Add a separate callback to
get this status. Filling it is optional.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 27 +++++++++++++++++++++++----
 include/linux/i2c.h         |  3 +++
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index c7995efd58ea..59f8dfc5be36 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -158,6 +158,22 @@ static void set_sda_gpio_value(struct i2c_adapter *adap, int val)
 	gpiod_set_value_cansleep(adap->bus_recovery_info->sda_gpiod, val);
 }
 
+static int i2c_generic_bus_free(struct i2c_adapter *adap)
+{
+	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
+	int ret = -EOPNOTSUPP;
+
+	if (bri->get_bus_free)
+		ret = bri->get_bus_free(adap);
+	else if (bri->get_sda)
+		ret = bri->get_sda(adap);
+
+	if (ret < 0)
+		return ret;
+
+	return ret ? 0 : -EBUSY;
+}
+
 /*
  * We are generating clock pulses. ndelay() determines durating of clk pulses.
  * We will generate clock with rate 100 KHz and so duration of both clock levels
@@ -169,7 +185,7 @@ static void set_sda_gpio_value(struct i2c_adapter *adap, int val)
 int i2c_generic_scl_recovery(struct i2c_adapter *adap)
 {
 	struct i2c_bus_recovery_info *bri = adap->bus_recovery_info;
-	int i = 0, val = 1, ret = 0;
+	int i = 0, val = 1, ret;
 
 	if (bri->prepare_recovery)
 		bri->prepare_recovery(adap);
@@ -207,14 +223,17 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap)
 			bri->set_sda(adap, val);
 		ndelay(RECOVERY_NDELAY / 2);
 
-		/* Break if SDA is high */
-		if (val && bri->get_sda) {
-			ret = bri->get_sda(adap) ? 0 : -EBUSY;
+		if (val) {
+			ret = i2c_generic_bus_free(adap);
 			if (ret == 0)
 				break;
 		}
 	}
 
+	/* If we can't check bus status, assume recovery worked */
+	if (ret == -EOPNOTSUPP)
+		ret = 0;
+
 	if (bri->unprepare_recovery)
 		bri->unprepare_recovery(adap);
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4c4360f94786..bc8d42f8544f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -587,6 +587,8 @@ struct i2c_timings {
  * @set_sda: This sets/clears the SDA line. This or get_sda() is mandatory for
  *	generic SCL recovery. Populated internally, if sda_gpio is a valid GPIO,
  *	for generic GPIO recovery.
+ * @get_bus_free: Returns the bus free state as seen from the IP core in case it
+ *	has a more complex internal logic than just reading SDA. Optional.
  * @prepare_recovery: This will be called before starting recovery. Platform may
  *	configure padmux here for SDA/SCL line or something else they want.
  * @unprepare_recovery: This will be called after completing recovery. Platform
@@ -601,6 +603,7 @@ struct i2c_bus_recovery_info {
 	void (*set_scl)(struct i2c_adapter *adap, int val);
 	int (*get_sda)(struct i2c_adapter *adap);
 	void (*set_sda)(struct i2c_adapter *adap, int val);
+	int (*get_bus_free)(struct i2c_adapter *adap);
 
 	void (*prepare_recovery)(struct i2c_adapter *adap);
 	void (*unprepare_recovery)(struct i2c_adapter *adap);
-- 
cgit v1.2.3


From 4aaf448fa9754e2d5ee188d32327b24ffc15ca4d Mon Sep 17 00:00:00 2001
From: Jim Qu <Jim.Qu@amd.com>
Date: Tue, 17 Jul 2018 16:20:50 +0800
Subject: vga_switcheroo: set audio client id according to bound GPU id

On modern laptop, there are more and more platforms
have two GPUs, and each of them maybe have audio codec
for HDMP/DP output. For some dGPU which is no output,
audio codec usually is disabled.

In currect HDA audio driver, it will set all codec as
VGA_SWITCHEROO_DIS, the audio which is binded to UMA
will be suspended if user use debugfs to contorl power

In HDA driver side, it is difficult to know which GPU
the audio has binded to. So set the bound gpu pci dev
to vga_switcheroo.

if the audio client is not the third registration, audio
id will set in vga_switcheroo enable function. if the
audio client is the last registration when vga_switcheroo
_ready() get true, we should get audio client id from bound
GPU directly.

Signed-off-by: Jim Qu <Jim.Qu@amd.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/vga/vga_switcheroo.c | 63 +++++++++++++++++++++++++++++++++-------
 include/linux/vga_switcheroo.h   |  8 ++---
 sound/pci/hda/hda_intel.c        | 11 +++----
 3 files changed, 62 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index fc4adf3d34e8..a96bf46bc483 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -103,9 +103,11 @@
  *	runtime pm. If true, writing ON and OFF to the vga_switcheroo debugfs
  *	interface is a no-op so as not to interfere with runtime pm
  * @list: client list
+ * @vga_dev: pci device, indicate which GPU is bound to current audio client
  *
  * Registered client. A client can be either a GPU or an audio device on a GPU.
- * For audio clients, the @fb_info and @active members are bogus.
+ * For audio clients, the @fb_info and @active members are bogus. For GPU
+ * clients, the @vga_dev is bogus.
  */
 struct vga_switcheroo_client {
 	struct pci_dev *pdev;
@@ -116,6 +118,7 @@ struct vga_switcheroo_client {
 	bool active;
 	bool driver_power_control;
 	struct list_head list;
+	struct pci_dev *vga_dev;
 };
 
 /*
@@ -161,9 +164,8 @@ struct vgasr_priv {
 };
 
 #define ID_BIT_AUDIO		0x100
-#define client_is_audio(c)	((c)->id & ID_BIT_AUDIO)
-#define client_is_vga(c)	((c)->id == VGA_SWITCHEROO_UNKNOWN_ID || \
-				 !client_is_audio(c))
+#define client_is_audio(c)		((c)->id & ID_BIT_AUDIO)
+#define client_is_vga(c)		(!client_is_audio(c))
 #define client_id(c)		((c)->id & ~ID_BIT_AUDIO)
 
 static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
@@ -192,14 +194,29 @@ static void vga_switcheroo_enable(void)
 		vgasr_priv.handler->init();
 
 	list_for_each_entry(client, &vgasr_priv.clients, list) {
-		if (client->id != VGA_SWITCHEROO_UNKNOWN_ID)
+		if (!client_is_vga(client) ||
+		     client_id(client) != VGA_SWITCHEROO_UNKNOWN_ID)
 			continue;
+
 		ret = vgasr_priv.handler->get_client_id(client->pdev);
 		if (ret < 0)
 			return;
 
 		client->id = ret;
 	}
+
+	list_for_each_entry(client, &vgasr_priv.clients, list) {
+		if (!client_is_audio(client) ||
+		     client_id(client) != VGA_SWITCHEROO_UNKNOWN_ID)
+			continue;
+
+		ret = vgasr_priv.handler->get_client_id(client->vga_dev);
+		if (ret < 0)
+			return;
+
+		client->id = ret | ID_BIT_AUDIO;
+	}
+
 	vga_switcheroo_debugfs_init(&vgasr_priv);
 	vgasr_priv.active = true;
 }
@@ -272,7 +289,9 @@ EXPORT_SYMBOL(vga_switcheroo_handler_flags);
 
 static int register_client(struct pci_dev *pdev,
 			   const struct vga_switcheroo_client_ops *ops,
-			   enum vga_switcheroo_client_id id, bool active,
+			   enum vga_switcheroo_client_id id,
+			   struct pci_dev *vga_dev,
+			   bool active,
 			   bool driver_power_control)
 {
 	struct vga_switcheroo_client *client;
@@ -287,6 +306,7 @@ static int register_client(struct pci_dev *pdev,
 	client->id = id;
 	client->active = active;
 	client->driver_power_control = driver_power_control;
+	client->vga_dev = vga_dev;
 
 	mutex_lock(&vgasr_mutex);
 	list_add_tail(&client->list, &vgasr_priv.clients);
@@ -319,7 +339,7 @@ int vga_switcheroo_register_client(struct pci_dev *pdev,
 				   const struct vga_switcheroo_client_ops *ops,
 				   bool driver_power_control)
 {
-	return register_client(pdev, ops, VGA_SWITCHEROO_UNKNOWN_ID,
+	return register_client(pdev, ops, VGA_SWITCHEROO_UNKNOWN_ID, NULL,
 			       pdev == vga_default_device(),
 			       driver_power_control);
 }
@@ -329,19 +349,40 @@ EXPORT_SYMBOL(vga_switcheroo_register_client);
  * vga_switcheroo_register_audio_client - register audio client
  * @pdev: client pci device
  * @ops: client callbacks
- * @id: client identifier
+ * @vga_dev:  pci device which is bound to current audio client
  *
  * Register audio client (audio device on a GPU). The client is assumed
  * to use runtime PM. Beforehand, vga_switcheroo_client_probe_defer()
  * shall be called to ensure that all prerequisites are met.
  *
- * Return: 0 on success, -ENOMEM on memory allocation error.
+ * Return: 0 on success, -ENOMEM on memory allocation error, -EINVAL on getting
+ * client id error.
  */
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 			const struct vga_switcheroo_client_ops *ops,
-			enum vga_switcheroo_client_id id)
+			struct pci_dev *vga_dev)
 {
-	return register_client(pdev, ops, id | ID_BIT_AUDIO, false, true);
+	enum vga_switcheroo_client_id id = VGA_SWITCHEROO_UNKNOWN_ID;
+
+	/*
+	 * if vga_switcheroo has enabled, that mean two GPU clients and also
+	 * handler are registered. Get audio client id from bound GPU client
+	 * id directly, otherwise, set it as VGA_SWITCHEROO_UNKNOWN_ID,
+	 * it will set to correct id in later when vga_switcheroo_enable()
+	 * is called.
+	 */
+	mutex_lock(&vgasr_mutex);
+	if (vgasr_priv.active) {
+		id = vgasr_priv.handler->get_client_id(vga_dev);
+		if (id < 0) {
+			mutex_unlock(&vgasr_mutex);
+			return -EINVAL;
+		}
+	}
+	mutex_unlock(&vgasr_mutex);
+
+	return register_client(pdev, ops, id | ID_BIT_AUDIO, vga_dev,
+			       false, true);
 }
 EXPORT_SYMBOL(vga_switcheroo_register_audio_client);
 
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 77f0f0af3a71..a34539b7f750 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -84,8 +84,8 @@ enum vga_switcheroo_state {
  * Client identifier. Audio clients use the same identifier & 0x100.
  */
 enum vga_switcheroo_client_id {
-	VGA_SWITCHEROO_UNKNOWN_ID = -1,
-	VGA_SWITCHEROO_IGD,
+	VGA_SWITCHEROO_UNKNOWN_ID = 0x1000,
+	VGA_SWITCHEROO_IGD = 0,
 	VGA_SWITCHEROO_DIS,
 	VGA_SWITCHEROO_MAX_CLIENTS,
 };
@@ -151,7 +151,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev,
 				   bool driver_power_control);
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
-					 enum vga_switcheroo_client_id id);
+					 struct pci_dev *vga_dev);
 
 void vga_switcheroo_client_fb_set(struct pci_dev *dev,
 				  struct fb_info *info);
@@ -180,7 +180,7 @@ static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_ha
 		enum vga_switcheroo_handler_flags_t handler_flags) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	const struct vga_switcheroo_client_ops *ops,
-	enum vga_switcheroo_client_id id) { return 0; }
+	struct pci_dev *vga_dev) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline enum vga_switcheroo_handler_flags_t vga_switcheroo_handler_flags(void) { return 0; }
 static inline int vga_switcheroo_lock_ddc(struct pci_dev *pdev) { return -ENODEV; }
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 1ae1850b3bfd..1967a5537d68 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1319,15 +1319,16 @@ static const struct vga_switcheroo_client_ops azx_vs_ops = {
 static int register_vga_switcheroo(struct azx *chip)
 {
 	struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
+	struct pci_dev *p;
 	int err;
 
 	if (!hda->use_vga_switcheroo)
 		return 0;
-	/* FIXME: currently only handling DIS controller
-	 * is there any machine with two switchable HDMI audio controllers?
-	 */
-	err = vga_switcheroo_register_audio_client(chip->pci, &azx_vs_ops,
-						   VGA_SWITCHEROO_DIS);
+
+	p = get_bound_vga(chip->pci);
+	err = vga_switcheroo_register_audio_client(chip->pci, &azx_vs_ops, p);
+	pci_dev_put(p);
+
 	if (err < 0)
 		return err;
 	hda->vga_switcheroo_registered = 1;
-- 
cgit v1.2.3


From 5b56c182edb1224bc1a97a1c74003eaa0eb59daf Mon Sep 17 00:00:00 2001
From: Wenyou Yang <wenyou.yang@atmel.com>
Date: Tue, 17 Jul 2018 11:26:55 +0300
Subject: ARM: at91: pm: Add ULP1 mode support

In the ULP1 mode, in order to achieve the lowest power consumption
with the system in retention mode and be able to resume on the wake
up events, all the clocks are shut off, inclusive the embedded 12MHz
RC oscillator, and the number of wake up sources is limited as well.
When the wake up event is asserted, the embedded 12MHz RC oscillator
restarts automatically.

The ULP1 (Ultra Low-power mode 1) is introduced by SAMA5D2.

The previous size of pm_suspend.o was 2148 bytes. With the addition of
ULP1 mode the new size of pm_suspend.o raised at 2456 bytes.

Signed-off-by: Wenyou Yang <wenyou.yang@atmel.com>
Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
[claudiu.beznea@microchip.com: aligned with 4.18-rc1]
Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 arch/arm/mach-at91/pm.c         |   1 +
 arch/arm/mach-at91/pm.h         |   3 +-
 arch/arm/mach-at91/pm_suspend.S | 142 ++++++++++++++++++++++++++++++++++------
 include/linux/clk/at91_pmc.h    |   2 +
 4 files changed, 127 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index d43f00a715d7..099d8094018c 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -42,6 +42,7 @@ extern void at91_pinctrl_gpio_resume(void);
 static const match_table_t pm_modes __initconst = {
 	{ AT91_PM_STANDBY, "standby" },
 	{ AT91_PM_ULP0, "ulp0" },
+	{ AT91_PM_ULP1, "ulp1" },
 	{ AT91_PM_BACKUP, "backup" },
 	{ -1, NULL },
 };
diff --git a/arch/arm/mach-at91/pm.h b/arch/arm/mach-at91/pm.h
index c44eaf17db86..9bd4e6ca672a 100644
--- a/arch/arm/mach-at91/pm.h
+++ b/arch/arm/mach-at91/pm.h
@@ -23,7 +23,8 @@
 
 #define	AT91_PM_STANDBY		0x00
 #define AT91_PM_ULP0		0x01
-#define	AT91_PM_BACKUP		0x02
+#define AT91_PM_ULP1		0x02
+#define	AT91_PM_BACKUP		0x03
 
 #ifndef __ASSEMBLY__
 struct at91_pm_data {
diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S
index 821322d1a64d..a7c6ae13c945 100644
--- a/arch/arm/mach-at91/pm_suspend.S
+++ b/arch/arm/mach-at91/pm_suspend.S
@@ -41,6 +41,15 @@ tmp2	.req	r5
 	beq	1b
 	.endm
 
+/*
+ * Wait for main oscillator selection is done
+ */
+	.macro wait_moscsels
+1:	ldr	tmp1, [pmc, #AT91_PMC_SR]
+	tst	tmp1, #AT91_PMC_MOSCSELS
+	beq	1b
+	.endm
+
 /*
  * Wait until PLLA has locked.
  */
@@ -112,19 +121,20 @@ ENTRY(at91_pm_suspend_in_sram)
 	bl	at91_sramc_self_refresh
 
 	ldr	r0, .pm_mode
-	cmp	r0, #AT91_PM_ULP0
-	beq	ulp0_mode
+	cmp	r0, #AT91_PM_STANDBY
+	beq	standby
 	cmp	r0, #AT91_PM_BACKUP
 	beq	backup_mode
 
+	bl	at91_ulp_mode
+	b	exit_suspend
+
+standby:
 	/* Wait for interrupt */
 	ldr	pmc, .pmc_base
 	at91_cpu_idle
 	b	exit_suspend
 
-ulp0_mode:
-	bl	at91_ulp0_mode
-	b	exit_suspend
 backup_mode:
 	bl	at91_backup_mode
 	b	exit_suspend
@@ -151,7 +161,102 @@ ENTRY(at91_backup_mode)
 	str	tmp1, [r0, #0]
 ENDPROC(at91_backup_mode)
 
-ENTRY(at91_ulp0_mode)
+.macro at91_pm_ulp0_mode
+	ldr	pmc, .pmc_base
+
+	/* Turn off the crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	bic	tmp1, tmp1, #AT91_PMC_MOSCEN
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	/* Wait for interrupt */
+	at91_cpu_idle
+
+	/* Turn on the crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	orr	tmp1, tmp1, #AT91_PMC_MOSCEN
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_moscrdy
+.endm
+
+/**
+ * Note: This procedure only applies on the platform which uses
+ * the external crystal oscillator as a main clock source.
+ */
+.macro at91_pm_ulp1_mode
+	ldr	pmc, .pmc_base
+
+	/* Switch the main clock source to 12-MHz RC oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	bic	tmp1, tmp1, #AT91_PMC_MOSCSEL
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_moscsels
+
+	/* Disable the crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	bic	tmp1, tmp1, #AT91_PMC_MOSCEN
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	/* Switch the master clock source to main clock */
+	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
+	bic	tmp1, tmp1, #AT91_PMC_CSS
+	orr	tmp1, tmp1, #AT91_PMC_CSS_MAIN
+	str	tmp1, [pmc, #AT91_PMC_MCKR]
+
+	wait_mckrdy
+
+	/* Enter the ULP1 mode by set WAITMODE bit in CKGR_MOR */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	orr	tmp1, tmp1, #AT91_PMC_WAITMODE
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_mckrdy
+
+	/* Enable the crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	orr	tmp1, tmp1, #AT91_PMC_MOSCEN
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_moscrdy
+
+	/* Switch the master clock source to slow clock */
+	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
+	bic	tmp1, tmp1, #AT91_PMC_CSS
+	str	tmp1, [pmc, #AT91_PMC_MCKR]
+
+	wait_mckrdy
+
+	/* Switch main clock source to crystal oscillator */
+	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
+	orr	tmp1, tmp1, #AT91_PMC_MOSCSEL
+	bic	tmp1, tmp1, #AT91_PMC_KEY_MASK
+	orr	tmp1, tmp1, #AT91_PMC_KEY
+	str	tmp1, [pmc, #AT91_CKGR_MOR]
+
+	wait_moscsels
+
+	/* Switch the master clock source to main clock */
+	ldr	tmp1, [pmc, #AT91_PMC_MCKR]
+	bic	tmp1, tmp1, #AT91_PMC_CSS
+	orr	tmp1, tmp1, #AT91_PMC_CSS_MAIN
+	str	tmp1, [pmc, #AT91_PMC_MCKR]
+
+	wait_mckrdy
+.endm
+
+ENTRY(at91_ulp_mode)
 	ldr	pmc, .pmc_base
 
 	/* Save Master clock setting */
@@ -174,22 +279,19 @@ ENTRY(at91_ulp0_mode)
 	orr	tmp1, tmp1, #(1 << 29)		/* bit 29 always set */
 	str	tmp1, [pmc, #AT91_CKGR_PLLAR]
 
-	/* Turn off the main oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	bic	tmp1, tmp1, #AT91_PMC_MOSCEN
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
+	ldr	r0, .pm_mode
+	cmp	r0, #AT91_PM_ULP1
+	beq	ulp1_mode
 
-	/* Wait for interrupt */
-	at91_cpu_idle
+	at91_pm_ulp0_mode
+	b	ulp_exit
 
-	/* Turn on the main oscillator */
-	ldr	tmp1, [pmc, #AT91_CKGR_MOR]
-	orr	tmp1, tmp1, #AT91_PMC_MOSCEN
-	orr	tmp1, tmp1, #AT91_PMC_KEY
-	str	tmp1, [pmc, #AT91_CKGR_MOR]
+ulp1_mode:
+	at91_pm_ulp1_mode
+	b	ulp_exit
 
-	wait_moscrdy
+ulp_exit:
+	ldr	pmc, .pmc_base
 
 	/* Restore PLLA setting */
 	ldr	tmp1, .saved_pllar
@@ -212,7 +314,7 @@ ENTRY(at91_ulp0_mode)
 	wait_mckrdy
 
 	mov	pc, lr
-ENDPROC(at91_ulp0_mode)
+ENDPROC(at91_ulp_mode)
 
 /*
  * void at91_sramc_self_refresh(unsigned int is_active)
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 6aca5ce8a99a..4ea2cbf9b50d 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -47,8 +47,10 @@
 #define	AT91_CKGR_MOR		0x20			/* Main Oscillator Register [not on SAM9RL] */
 #define		AT91_PMC_MOSCEN		(1    <<  0)		/* Main Oscillator Enable */
 #define		AT91_PMC_OSCBYPASS	(1    <<  1)		/* Oscillator Bypass */
+#define		AT91_PMC_WAITMODE	(1    <<  2)		/* Wait Mode Command */
 #define		AT91_PMC_MOSCRCEN	(1    <<  3)		/* Main On-Chip RC Oscillator Enable [some SAM9] */
 #define		AT91_PMC_OSCOUNT	(0xff <<  8)		/* Main Oscillator Start-up Time */
+#define		AT91_PMC_KEY_MASK	(0xff << 16)
 #define		AT91_PMC_KEY		(0x37 << 16)		/* MOR Writing Key */
 #define		AT91_PMC_MOSCSEL	(1    << 24)		/* Main Oscillator Selection [some SAM9] */
 #define		AT91_PMC_CFDEN		(1    << 25)		/* Clock Failure Detector Enable [some SAM9] */
-- 
cgit v1.2.3


From 3abd729aa468d7346f12d7dfc8f81aba653f6c88 Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Tue, 17 Jul 2018 11:26:56 +0300
Subject: ARM: at91: pm: add PMC fast startup registers defines

Add PMC fast startup registers defines.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/clk/at91_pmc.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index 4ea2cbf9b50d..931ab05f771d 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -157,6 +157,19 @@
 #define		AT91_PMC_GCKRDY		(1 << 24)		/* Generated Clocks */
 #define	AT91_PMC_IMR		0x6c			/* Interrupt Mask Register */
 
+#define AT91_PMC_FSMR		0x70		/* Fast Startup Mode Register */
+#define AT91_PMC_FSTT(n)	BIT(n)
+#define AT91_PMC_RTCAL		BIT(17)		/* RTC Alarm Enable */
+#define AT91_PMC_USBAL		BIT(18)		/* USB Resume Enable */
+#define AT91_PMC_SDMMC_CD	BIT(19)		/* SDMMC Card Detect Enable */
+#define AT91_PMC_LPM		BIT(20)		/* Low-power Mode */
+#define AT91_PMC_RXLP_MCE	BIT(24)		/* Backup UART Receive Enable */
+#define AT91_PMC_ACC_CE		BIT(25)		/* ACC Enable */
+
+#define AT91_PMC_FSPR		0x74		/* Fast Startup Polarity Reg */
+
+#define AT91_PMC_FS_INPUT_MASK  0x7ff
+
 #define AT91_PMC_PLLICPR	0x80			/* PLL Charge Pump Current Register */
 
 #define AT91_PMC_PROT		0xe4			/* Write Protect Mode Register [some SAM9] */
-- 
cgit v1.2.3


From a0ae2562c6c4b2721d9fddba63b7286c13517d9f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 Jun 2018 07:46:51 +0200
Subject: netfilter: conntrack: remove l3proto abstraction

This unifies ipv4 and ipv6 protocol trackers and removes the l3proto
abstraction.

This gets rid of all l3proto indirect calls and the need to do
a lookup on the function to call for l3 demux.

It increases module size by only a small amount (12kbyte), so this reduces
size because nf_conntrack.ko is useless without either nf_conntrack_ipv4
or nf_conntrack_ipv6 module.

before:
   text    data     bss     dec     hex filename
   7357    1088       0    8445    20fd nf_conntrack_ipv4.ko
   7405    1084       4    8493    212d nf_conntrack_ipv6.ko
  72614   13689     236   86539   1520b nf_conntrack.ko
 19K nf_conntrack_ipv4.ko
 19K nf_conntrack_ipv6.ko
179K nf_conntrack.ko

after:
   text    data     bss     dec     hex filename
  79277   13937     236   93450   16d0a nf_conntrack.ko
  191K nf_conntrack.ko

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h |   3 -
 include/net/netfilter/nf_conntrack.h           |   5 +
 include/net/netfilter/nf_conntrack_core.h      |   1 -
 include/net/netfilter/nf_conntrack_l3proto.h   |  54 --
 include/net/netfilter/nf_conntrack_l4proto.h   |   4 -
 net/ipv4/netfilter/Kconfig                     |  22 +-
 net/ipv4/netfilter/Makefile                    |   6 -
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 368 -----------
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   | 388 -----------
 net/ipv6/netfilter/Kconfig                     |  27 +-
 net/ipv6/netfilter/Makefile                    |   6 -
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 355 -----------
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 387 -----------
 net/netfilter/Kconfig                          |   2 +
 net/netfilter/Makefile                         |   7 +-
 net/netfilter/nf_conntrack_core.c              |  11 +-
 net/netfilter/nf_conntrack_proto.c             | 847 ++++++++++++++++++-------
 net/netfilter/nf_conntrack_proto_icmp.c        | 388 +++++++++++
 net/netfilter/nf_conntrack_proto_icmpv6.c      | 387 +++++++++++
 net/netfilter/nf_conntrack_standalone.c        |  14 +-
 net/netfilter/nf_nat_core.c                    |   8 -
 21 files changed, 1420 insertions(+), 1870 deletions(-)
 delete mode 100644 include/net/netfilter/nf_conntrack_l3proto.h
 delete mode 100644 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
 delete mode 100644 net/ipv4/netfilter/nf_conntrack_proto_icmp.c
 delete mode 100644 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
 delete mode 100644 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
 create mode 100644 net/netfilter/nf_conntrack_proto_icmp.c
 create mode 100644 net/netfilter/nf_conntrack_proto_icmpv6.c

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 73f825732326..c84b51682f08 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -10,9 +10,6 @@
 #ifndef _NF_CONNTRACK_IPV4_H
 #define _NF_CONNTRACK_IPV4_H
 
-
-const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
-
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 062dc19b5840..a2b0ed025908 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -41,6 +41,11 @@ union nf_conntrack_expect_proto {
 	/* insert expect proto private data here */
 };
 
+struct nf_conntrack_net {
+	unsigned int users4;
+	unsigned int users6;
+};
+
 #include <linux/types.h>
 #include <linux/skbuff.h>
 
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 35461b2d3462..2a3e0974a6af 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -14,7 +14,6 @@
 #define _NF_CONNTRACK_CORE_H
 
 #include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
deleted file mode 100644
index 5f160375c93a..000000000000
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C)2003,2004 USAGI/WIDE Project
- *
- * Header for use in defining a given L3 protocol for connection tracking.
- *
- * Author:
- *	Yasuyuki Kozakai @USAGI	<yasuyuki.kozakai@toshiba.co.jp>
- *
- * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h
- */
-
-#ifndef _NF_CONNTRACK_L3PROTO_H
-#define _NF_CONNTRACK_L3PROTO_H
-#include <linux/netlink.h>
-#include <net/netlink.h>
-#include <linux/seq_file.h>
-#include <net/netfilter/nf_conntrack.h>
-
-struct nf_conntrack_l3proto {
-	/* L3 Protocol Family number. ex) PF_INET */
-	u_int16_t l3proto;
-
-	/* size of tuple nlattr, fills a hole */
-	u16 nla_size;
-
-	/* Called when netns wants to use connection tracking */
-	int (*net_ns_get)(struct net *);
-	void (*net_ns_put)(struct net *);
-
-	/* Module (if any) which this is connected to. */
-	struct module *me;
-};
-
-extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO];
-
-/* Protocol global registration. */
-int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto);
-void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto);
-
-const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
-
-/* Existing built-in protocols */
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic;
-
-static inline struct nf_conntrack_l3proto *
-__nf_ct_l3proto_find(u_int16_t l3proto)
-{
-	if (unlikely(l3proto >= NFPROTO_NUMPROTO))
-		return &nf_conntrack_l3proto_generic;
-	return rcu_dereference(nf_ct_l3protos[l3proto]);
-}
-
-#endif /*_NF_CONNTRACK_L3PROTO_H*/
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index c7a0075d96df..6068c6da3eac 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -130,10 +130,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 /* Protocol global registration. */
 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
-int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[],
-			   unsigned int num_proto);
-void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[],
-			      unsigned int num_proto);
 
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index bbfc356cb1b5..d9504adc47b3 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -9,22 +9,6 @@ config NF_DEFRAG_IPV4
 	tristate
 	default n
 
-config NF_CONNTRACK_IPV4
-	tristate "IPv4 connection tracking support (required for NAT)"
-	depends on NF_CONNTRACK
-	default m if NETFILTER_ADVANCED=n
-	select NF_DEFRAG_IPV4
-	---help---
-	  Connection tracking keeps a record of what packets have passed
-	  through your machine, in order to figure out how they are related
-	  into connections.
-
-	  This is IPv4 support on Layer 3 independent connection tracking.
-	  Layer 3 independent connection tracking is experimental scheme
-	  which generalize ip_conntrack to support other layer 3 protocols.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NF_SOCKET_IPV4
 	tristate "IPv4 socket lookup support"
 	help
@@ -112,7 +96,7 @@ config NF_REJECT_IPV4
 
 config NF_NAT_IPV4
 	tristate "IPv4 NAT"
-	depends on NF_CONNTRACK_IPV4
+	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	select NF_NAT
 	help
@@ -279,7 +263,7 @@ config IP_NF_TARGET_SYNPROXY
 # NAT + specific targets: nf_conntrack
 config IP_NF_NAT
 	tristate "iptables NAT support"
-	depends on NF_CONNTRACK_IPV4
+	depends on NF_CONNTRACK
 	default m if NETFILTER_ADVANCED=n
 	select NF_NAT
 	select NF_NAT_IPV4
@@ -340,7 +324,7 @@ config IP_NF_MANGLE
 config IP_NF_TARGET_CLUSTERIP
 	tristate "CLUSTERIP target support"
 	depends on IP_NF_MANGLE
-	depends on NF_CONNTRACK_IPV4
+	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_CONNTRACK_MARK
 	select NETFILTER_FAMILY_ARP
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 8394c17c269f..367993adf4d3 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -3,12 +3,6 @@
 # Makefile for the netfilter modules on top of IPv4.
 #
 
-# objects for l3 independent conntrack
-nf_conntrack_ipv4-y	:=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
-
-# connection tracking
-obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
-
 nf_nat_ipv4-y		:= nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
 nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
 obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
deleted file mode 100644
index 9fbf6c7f8ece..000000000000
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ /dev/null
@@ -1,368 +0,0 @@
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/icmp.h>
-#include <linux/sysctl.h>
-#include <net/route.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-#include <net/netfilter/nf_log.h>
-
-static int conntrack4_net_id __read_mostly;
-static DEFINE_MUTEX(register_ipv4_hooks);
-
-struct conntrack4_net {
-	unsigned int users;
-};
-
-static unsigned int ipv4_helper(void *priv,
-				struct sk_buff *skb,
-				const struct nf_hook_state *state)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	const struct nf_conn_help *help;
-	const struct nf_conntrack_helper *helper;
-
-	/* This is where we call the helper: as the packet goes out. */
-	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-		return NF_ACCEPT;
-
-	help = nfct_help(ct);
-	if (!help)
-		return NF_ACCEPT;
-
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	helper = rcu_dereference(help->helper);
-	if (!helper)
-		return NF_ACCEPT;
-
-	return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
-			    ct, ctinfo);
-}
-
-static unsigned int ipv4_confirm(void *priv,
-				 struct sk_buff *skb,
-				 const struct nf_hook_state *state)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-		goto out;
-
-	/* adjust seqs for loopback traffic only in outgoing direction */
-	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
-	    !nf_is_loopback_packet(skb)) {
-		if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
-			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
-			return NF_DROP;
-		}
-	}
-out:
-	/* We've seen it coming out the other side: confirm it */
-	return nf_conntrack_confirm(skb);
-}
-
-static unsigned int ipv4_conntrack_in(void *priv,
-				      struct sk_buff *skb,
-				      const struct nf_hook_state *state)
-{
-	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
-}
-
-static unsigned int ipv4_conntrack_local(void *priv,
-					 struct sk_buff *skb,
-					 const struct nf_hook_state *state)
-{
-	if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
-		enum ip_conntrack_info ctinfo;
-		struct nf_conn *tmpl;
-
-		tmpl = nf_ct_get(skb, &ctinfo);
-		if (tmpl && nf_ct_is_template(tmpl)) {
-			/* when skipping ct, clear templates to avoid fooling
-			 * later targets/matches
-			 */
-			skb->_nfct = 0;
-			nf_ct_put(tmpl);
-		}
-		return NF_ACCEPT;
-	}
-
-	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
-}
-
-/* Connection tracking may drop packets, but never alters them, so
-   make it the first hook. */
-static const struct nf_hook_ops ipv4_conntrack_ops[] = {
-	{
-		.hook		= ipv4_conntrack_in,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK,
-	},
-	{
-		.hook		= ipv4_conntrack_local,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_CONNTRACK,
-	},
-	{
-		.hook		= ipv4_helper,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
-	},
-	{
-		.hook		= ipv4_confirm,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
-	},
-	{
-		.hook		= ipv4_helper,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
-	},
-	{
-		.hook		= ipv4_confirm,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
-	},
-};
-
-/* Fast function for those who don't want to parse /proc (and I don't
-   blame them). */
-/* Reversing the socket's dst/src point of view gives us the reply
-   mapping. */
-static int
-getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct nf_conntrack_tuple_hash *h;
-	struct nf_conntrack_tuple tuple;
-
-	memset(&tuple, 0, sizeof(tuple));
-
-	lock_sock(sk);
-	tuple.src.u3.ip = inet->inet_rcv_saddr;
-	tuple.src.u.tcp.port = inet->inet_sport;
-	tuple.dst.u3.ip = inet->inet_daddr;
-	tuple.dst.u.tcp.port = inet->inet_dport;
-	tuple.src.l3num = PF_INET;
-	tuple.dst.protonum = sk->sk_protocol;
-	release_sock(sk);
-
-	/* We only do TCP and SCTP at the moment: is there a better way? */
-	if (tuple.dst.protonum != IPPROTO_TCP &&
-	    tuple.dst.protonum != IPPROTO_SCTP) {
-		pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
-		return -ENOPROTOOPT;
-	}
-
-	if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
-		pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
-			 *len, sizeof(struct sockaddr_in));
-		return -EINVAL;
-	}
-
-	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
-	if (h) {
-		struct sockaddr_in sin;
-		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
-		sin.sin_family = AF_INET;
-		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
-			.tuple.dst.u.tcp.port;
-		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
-			.tuple.dst.u3.ip;
-		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
-
-		pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
-			 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
-		nf_ct_put(ct);
-		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
-			return -EFAULT;
-		else
-			return 0;
-	}
-	pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
-		 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
-		 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
-	return -ENOENT;
-}
-
-static struct nf_sockopt_ops so_getorigdst = {
-	.pf		= PF_INET,
-	.get_optmin	= SO_ORIGINAL_DST,
-	.get_optmax	= SO_ORIGINAL_DST+1,
-	.get		= getorigdst,
-	.owner		= THIS_MODULE,
-};
-
-static int ipv4_hooks_register(struct net *net)
-{
-	struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
-	int err = 0;
-
-	mutex_lock(&register_ipv4_hooks);
-
-	cnet->users++;
-	if (cnet->users > 1)
-		goto out_unlock;
-
-	err = nf_defrag_ipv4_enable(net);
-	if (err) {
-		cnet->users = 0;
-		goto out_unlock;
-	}
-
-	err = nf_register_net_hooks(net, ipv4_conntrack_ops,
-				    ARRAY_SIZE(ipv4_conntrack_ops));
-
-	if (err)
-		cnet->users = 0;
- out_unlock:
-	mutex_unlock(&register_ipv4_hooks);
-	return err;
-}
-
-static void ipv4_hooks_unregister(struct net *net)
-{
-	struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);
-
-	mutex_lock(&register_ipv4_hooks);
-	if (cnet->users && (--cnet->users == 0))
-		nf_unregister_net_hooks(net, ipv4_conntrack_ops,
-					ARRAY_SIZE(ipv4_conntrack_ops));
-	mutex_unlock(&register_ipv4_hooks);
-}
-
-const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
-	.l3proto	 = PF_INET,
-	.net_ns_get	 = ipv4_hooks_register,
-	.net_ns_put	 = ipv4_hooks_unregister,
-	.me		 = THIS_MODULE,
-};
-
-module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
-		  &nf_conntrack_htable_size, 0600);
-
-MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
-MODULE_ALIAS("ip_conntrack");
-MODULE_LICENSE("GPL");
-
-static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = {
-	&nf_conntrack_l4proto_tcp4,
-	&nf_conntrack_l4proto_udp4,
-	&nf_conntrack_l4proto_icmp,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-	&nf_conntrack_l4proto_dccp4,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-	&nf_conntrack_l4proto_sctp4,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-	&nf_conntrack_l4proto_udplite4,
-#endif
-};
-
-static int ipv4_net_init(struct net *net)
-{
-	return nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
-					     ARRAY_SIZE(builtin_l4proto4));
-}
-
-static void ipv4_net_exit(struct net *net)
-{
-	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
-					ARRAY_SIZE(builtin_l4proto4));
-}
-
-static struct pernet_operations ipv4_net_ops = {
-	.init = ipv4_net_init,
-	.exit = ipv4_net_exit,
-	.id = &conntrack4_net_id,
-	.size = sizeof(struct conntrack4_net),
-};
-
-static int __init nf_conntrack_l3proto_ipv4_init(void)
-{
-	int ret = 0;
-
-	need_conntrack();
-
-	ret = nf_register_sockopt(&so_getorigdst);
-	if (ret < 0) {
-		pr_err("Unable to register netfilter socket option\n");
-		return ret;
-	}
-
-	ret = register_pernet_subsys(&ipv4_net_ops);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
-		goto cleanup_sockopt;
-	}
-
-	ret = nf_ct_l4proto_register(builtin_l4proto4,
-				     ARRAY_SIZE(builtin_l4proto4));
-	if (ret < 0)
-		goto cleanup_pernet;
-
-	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
-		goto cleanup_l4proto;
-	}
-
-	return ret;
-cleanup_l4proto:
-	nf_ct_l4proto_unregister(builtin_l4proto4,
-				 ARRAY_SIZE(builtin_l4proto4));
- cleanup_pernet:
-	unregister_pernet_subsys(&ipv4_net_ops);
- cleanup_sockopt:
-	nf_unregister_sockopt(&so_getorigdst);
-	return ret;
-}
-
-static void __exit nf_conntrack_l3proto_ipv4_fini(void)
-{
-	synchronize_net();
-	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
-	nf_ct_l4proto_unregister(builtin_l4proto4,
-				 ARRAY_SIZE(builtin_l4proto4));
-	unregister_pernet_subsys(&ipv4_net_ops);
-	nf_unregister_sockopt(&so_getorigdst);
-}
-
-module_init(nf_conntrack_l3proto_ipv4_init);
-module_exit(nf_conntrack_l3proto_ipv4_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
deleted file mode 100644
index 036670b38282..000000000000
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ /dev/null
@@ -1,388 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/icmp.h>
-#include <linux/seq_file.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_timeout.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/nf_log.h>
-
-static const unsigned int nf_ct_icmp_timeout = 30*HZ;
-
-static inline struct nf_icmp_net *icmp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmp;
-}
-
-static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			      struct net *net, struct nf_conntrack_tuple *tuple)
-{
-	const struct icmphdr *hp;
-	struct icmphdr _hdr;
-
-	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-	if (hp == NULL)
-		return false;
-
-	tuple->dst.u.icmp.type = hp->type;
-	tuple->src.u.icmp.id = hp->un.echo.id;
-	tuple->dst.u.icmp.code = hp->code;
-
-	return true;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
-	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
-};
-
-static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			      const struct nf_conntrack_tuple *orig)
-{
-	if (orig->dst.u.icmp.type >= sizeof(invmap) ||
-	    !invmap[orig->dst.u.icmp.type])
-		return false;
-
-	tuple->src.u.icmp.id = orig->src.u.icmp.id;
-	tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
-	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
-	return true;
-}
-
-static unsigned int *icmp_get_timeouts(struct net *net)
-{
-	return &icmp_pernet(net)->timeout;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct nf_conn *ct,
-		       const struct sk_buff *skb,
-		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo)
-{
-	/* Do not immediately delete the connection after the first
-	   successful reply to avoid excessive conntrackd traffic
-	   and also to handle correctly ICMP echo reply duplicates. */
-	unsigned int *timeout = nf_ct_timeout_lookup(ct);
-
-	if (!timeout)
-		timeout = icmp_get_timeouts(nf_ct_net(ct));
-
-	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
-		     unsigned int dataoff)
-{
-	static const u_int8_t valid_new[] = {
-		[ICMP_ECHO] = 1,
-		[ICMP_TIMESTAMP] = 1,
-		[ICMP_INFO_REQUEST] = 1,
-		[ICMP_ADDRESS] = 1
-	};
-
-	if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
-	    !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
-		/* Can't create a new ICMP `conn' with this. */
-		pr_debug("icmp: can't create new conn with type %u\n",
-			 ct->tuplehash[0].tuple.dst.u.icmp.type);
-		nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
-		return false;
-	}
-	return true;
-}
-
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-static int
-icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
-		 unsigned int hooknum)
-{
-	struct nf_conntrack_tuple innertuple, origtuple;
-	const struct nf_conntrack_l4proto *innerproto;
-	const struct nf_conntrack_tuple_hash *h;
-	const struct nf_conntrack_zone *zone;
-	enum ip_conntrack_info ctinfo;
-	struct nf_conntrack_zone tmp;
-
-	WARN_ON(skb_nfct(skb));
-	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-
-	/* Are they talking about one of our connections? */
-	if (!nf_ct_get_tuplepr(skb,
-			       skb_network_offset(skb) + ip_hdrlen(skb)
-						       + sizeof(struct icmphdr),
-			       PF_INET, net, &origtuple)) {
-		pr_debug("icmp_error_message: failed to get tuple\n");
-		return -NF_ACCEPT;
-	}
-
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
-
-	/* Ordinarily, we'd expect the inverted tupleproto, but it's
-	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
-		pr_debug("icmp_error_message: no match\n");
-		return -NF_ACCEPT;
-	}
-
-	ctinfo = IP_CT_RELATED;
-
-	h = nf_conntrack_find_get(net, zone, &innertuple);
-	if (!h) {
-		pr_debug("icmp_error_message: no match\n");
-		return -NF_ACCEPT;
-	}
-
-	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
-		ctinfo += IP_CT_IS_REPLY;
-
-	/* Update skb to refer to this connection */
-	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
-	return NF_ACCEPT;
-}
-
-static void icmp_error_log(const struct sk_buff *skb, struct net *net,
-			   u8 pf, const char *msg)
-{
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
-}
-
-/* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct net *net, struct nf_conn *tmpl,
-	   struct sk_buff *skb, unsigned int dataoff,
-	   u8 pf, unsigned int hooknum)
-{
-	const struct icmphdr *icmph;
-	struct icmphdr _ih;
-
-	/* Not enough header? */
-	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
-	if (icmph == NULL) {
-		icmp_error_log(skb, net, pf, "short packet");
-		return -NF_ACCEPT;
-	}
-
-	/* See ip_conntrack_proto_tcp.c */
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
-		icmp_error_log(skb, net, pf, "bad hw icmp checksum");
-		return -NF_ACCEPT;
-	}
-
-	/*
-	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
-	 *
-	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
-	 *		  discarded.
-	 */
-	if (icmph->type > NR_ICMP_TYPES) {
-		icmp_error_log(skb, net, pf, "invalid icmp type");
-		return -NF_ACCEPT;
-	}
-
-	/* Need to track icmp error message? */
-	if (icmph->type != ICMP_DEST_UNREACH &&
-	    icmph->type != ICMP_SOURCE_QUENCH &&
-	    icmph->type != ICMP_TIME_EXCEEDED &&
-	    icmph->type != ICMP_PARAMETERPROB &&
-	    icmph->type != ICMP_REDIRECT)
-		return NF_ACCEPT;
-
-	return icmp_error_message(net, tmpl, skb, hooknum);
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-static int icmp_tuple_to_nlattr(struct sk_buff *skb,
-				const struct nf_conntrack_tuple *t)
-{
-	if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
-	    nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
-	    nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -1;
-}
-
-static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
-	[CTA_PROTO_ICMP_TYPE]	= { .type = NLA_U8 },
-	[CTA_PROTO_ICMP_CODE]	= { .type = NLA_U8 },
-	[CTA_PROTO_ICMP_ID]	= { .type = NLA_U16 },
-};
-
-static int icmp_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *tuple)
-{
-	if (!tb[CTA_PROTO_ICMP_TYPE] ||
-	    !tb[CTA_PROTO_ICMP_CODE] ||
-	    !tb[CTA_PROTO_ICMP_ID])
-		return -EINVAL;
-
-	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
-	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
-	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
-
-	if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
-	    !invmap[tuple->dst.u.icmp.type])
-		return -EINVAL;
-
-	return 0;
-}
-
-static unsigned int icmp_nlattr_tuple_size(void)
-{
-	static unsigned int size __read_mostly;
-
-	if (!size)
-		size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
-
-	return size;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_cttimeout.h>
-
-static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
-				      struct net *net, void *data)
-{
-	unsigned int *timeout = data;
-	struct nf_icmp_net *in = icmp_pernet(net);
-
-	if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
-		if (!timeout)
-			timeout = &in->timeout;
-		*timeout =
-			ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
-	} else if (timeout) {
-		/* Set default ICMP timeout. */
-		*timeout = in->timeout;
-	}
-	return 0;
-}
-
-static int
-icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
-{
-	const unsigned int *timeout = data;
-
-	if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -ENOSPC;
-}
-
-static const struct nla_policy
-icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
-	[CTA_TIMEOUT_ICMP_TIMEOUT]	= { .type = NLA_U32 },
-};
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table icmp_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_icmp_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
-				     struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
-	pn->ctl_table = kmemdup(icmp_sysctl_table,
-				sizeof(icmp_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
-
-	pn->ctl_table[0].data = &in->timeout;
-#endif
-	return 0;
-}
-
-static int icmp_init_net(struct net *net, u_int16_t proto)
-{
-	struct nf_icmp_net *in = icmp_pernet(net);
-	struct nf_proto_net *pn = &in->pn;
-
-	in->timeout = nf_ct_icmp_timeout;
-
-	return icmp_kmemdup_sysctl_table(pn, in);
-}
-
-static struct nf_proto_net *icmp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmp.pn;
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
-{
-	.l3proto		= PF_INET,
-	.l4proto		= IPPROTO_ICMP,
-	.pkt_to_tuple		= icmp_pkt_to_tuple,
-	.invert_tuple		= icmp_invert_tuple,
-	.packet			= icmp_packet,
-	.new			= icmp_new,
-	.error			= icmp_error,
-	.destroy		= NULL,
-	.me			= NULL,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
-	.nlattr_tuple_size	= icmp_nlattr_tuple_size,
-	.nlattr_to_tuple	= icmp_nlattr_to_tuple,
-	.nla_policy		= icmp_nla_policy,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-	.ctnl_timeout		= {
-		.nlattr_to_obj	= icmp_timeout_nlattr_to_obj,
-		.obj_to_nlattr	= icmp_timeout_obj_to_nlattr,
-		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
-		.obj_size	= sizeof(unsigned int),
-		.nla_policy	= icmp_timeout_nla_policy,
-	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-	.init_net		= icmp_init_net,
-	.get_net_proto		= icmp_get_net_proto,
-};
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 37b14dc9d863..339d0762b027 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -5,26 +5,6 @@
 menu "IPv6: Netfilter Configuration"
 	depends on INET && IPV6 && NETFILTER
 
-config NF_DEFRAG_IPV6
-	tristate
-	default n
-
-config NF_CONNTRACK_IPV6
-	tristate "IPv6 connection tracking support"
-	depends on INET && IPV6 && NF_CONNTRACK
-	default m if NETFILTER_ADVANCED=n
-	select NF_DEFRAG_IPV6
-	---help---
-	  Connection tracking keeps a record of what packets have passed
-	  through your machine, in order to figure out how they are related
-	  into connections.
-
-	  This is IPv6 support on Layer 3 independent connection tracking.
-	  Layer 3 independent connection tracking is experimental scheme
-	  which generalize ip_conntrack to support other layer 3 protocols.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NF_SOCKET_IPV6
 	tristate "IPv6 socket lookup support"
 	help
@@ -128,7 +108,7 @@ config NF_LOG_IPV6
 
 config NF_NAT_IPV6
 	tristate "IPv6 NAT"
-	depends on NF_CONNTRACK_IPV6
+	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_NAT
 	help
@@ -328,7 +308,7 @@ config IP6_NF_SECURITY
 
 config IP6_NF_NAT
 	tristate "ip6tables NAT support"
-	depends on NF_CONNTRACK_IPV6
+	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
 	select NF_NAT
 	select NF_NAT_IPV6
@@ -365,6 +345,7 @@ config IP6_NF_TARGET_NPT
 endif # IP6_NF_NAT
 
 endif # IP6_NF_IPTABLES
-
 endmenu
 
+config NF_DEFRAG_IPV6
+	tristate
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 10a5a1c87320..200c0c235565 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,12 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
 
-# objects for l3 independent conntrack
-nf_conntrack_ipv6-y  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
-
-# l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
-
 nf_nat_ipv6-y		:= nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
 nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
 obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
deleted file mode 100644
index 37ab25645cf2..000000000000
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Copyright (C)2004 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Author:
- *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- */
-
-#include <linux/types.h>
-#include <linux/ipv6.h>
-#include <linux/in6.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/icmp.h>
-#include <net/ipv6.h>
-#include <net/inet_frag.h>
-
-#include <linux/netfilter_bridge.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
-#include <net/netfilter/nf_log.h>
-
-static int conntrack6_net_id;
-static DEFINE_MUTEX(register_ipv6_hooks);
-
-struct conntrack6_net {
-	unsigned int users;
-};
-
-static unsigned int ipv6_helper(void *priv,
-				struct sk_buff *skb,
-				const struct nf_hook_state *state)
-{
-	struct nf_conn *ct;
-	const struct nf_conn_help *help;
-	const struct nf_conntrack_helper *helper;
-	enum ip_conntrack_info ctinfo;
-	__be16 frag_off;
-	int protoff;
-	u8 nexthdr;
-
-	/* This is where we call the helper: as the packet goes out. */
-	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-		return NF_ACCEPT;
-
-	help = nfct_help(ct);
-	if (!help)
-		return NF_ACCEPT;
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	helper = rcu_dereference(help->helper);
-	if (!helper)
-		return NF_ACCEPT;
-
-	nexthdr = ipv6_hdr(skb)->nexthdr;
-	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
-				   &frag_off);
-	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
-		pr_debug("proto header not found\n");
-		return NF_ACCEPT;
-	}
-
-	return helper->help(skb, protoff, ct, ctinfo);
-}
-
-static unsigned int ipv6_confirm(void *priv,
-				 struct sk_buff *skb,
-				 const struct nf_hook_state *state)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
-	int protoff;
-	__be16 frag_off;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
-		goto out;
-
-	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
-				   &frag_off);
-	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
-		pr_debug("proto header not found\n");
-		goto out;
-	}
-
-	/* adjust seqs for loopback traffic only in outgoing direction */
-	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
-	    !nf_is_loopback_packet(skb)) {
-		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
-			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
-			return NF_DROP;
-		}
-	}
-out:
-	/* We've seen it coming out the other side: confirm it */
-	return nf_conntrack_confirm(skb);
-}
-
-static unsigned int ipv6_conntrack_in(void *priv,
-				      struct sk_buff *skb,
-				      const struct nf_hook_state *state)
-{
-	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
-}
-
-static unsigned int ipv6_conntrack_local(void *priv,
-					 struct sk_buff *skb,
-					 const struct nf_hook_state *state)
-{
-	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
-}
-
-static const struct nf_hook_ops ipv6_conntrack_ops[] = {
-	{
-		.hook		= ipv6_conntrack_in,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP6_PRI_CONNTRACK,
-	},
-	{
-		.hook		= ipv6_conntrack_local,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP6_PRI_CONNTRACK,
-	},
-	{
-		.hook		= ipv6_helper,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
-	},
-	{
-		.hook		= ipv6_confirm,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP6_PRI_LAST,
-	},
-	{
-		.hook		= ipv6_helper,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
-	},
-	{
-		.hook		= ipv6_confirm,
-		.pf		= NFPROTO_IPV6,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP6_PRI_LAST-1,
-	},
-};
-
-static int
-ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
-	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
-	const struct ipv6_pinfo *inet6 = inet6_sk(sk);
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct nf_conntrack_tuple_hash *h;
-	struct sockaddr_in6 sin6;
-	struct nf_conn *ct;
-	__be32 flow_label;
-	int bound_dev_if;
-
-	lock_sock(sk);
-	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
-	tuple.src.u.tcp.port = inet->inet_sport;
-	tuple.dst.u3.in6 = sk->sk_v6_daddr;
-	tuple.dst.u.tcp.port = inet->inet_dport;
-	tuple.dst.protonum = sk->sk_protocol;
-	bound_dev_if = sk->sk_bound_dev_if;
-	flow_label = inet6->flow_label;
-	release_sock(sk);
-
-	if (tuple.dst.protonum != IPPROTO_TCP &&
-	    tuple.dst.protonum != IPPROTO_SCTP)
-		return -ENOPROTOOPT;
-
-	if (*len < 0 || (unsigned int) *len < sizeof(sin6))
-		return -EINVAL;
-
-	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
-	if (!h) {
-		pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
-			 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
-			 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
-		return -ENOENT;
-	}
-
-	ct = nf_ct_tuplehash_to_ctrack(h);
-
-	sin6.sin6_family = AF_INET6;
-	sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
-	sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
-	memcpy(&sin6.sin6_addr,
-		&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
-					sizeof(sin6.sin6_addr));
-
-	nf_ct_put(ct);
-	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
-	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
-}
-
-static int ipv6_hooks_register(struct net *net)
-{
-	struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
-	int err = 0;
-
-	mutex_lock(&register_ipv6_hooks);
-	cnet->users++;
-	if (cnet->users > 1)
-		goto out_unlock;
-
-	err = nf_defrag_ipv6_enable(net);
-	if (err < 0) {
-		cnet->users = 0;
-		goto out_unlock;
-	}
-
-	err = nf_register_net_hooks(net, ipv6_conntrack_ops,
-				    ARRAY_SIZE(ipv6_conntrack_ops));
-	if (err)
-		cnet->users = 0;
- out_unlock:
-	mutex_unlock(&register_ipv6_hooks);
-	return err;
-}
-
-static void ipv6_hooks_unregister(struct net *net)
-{
-	struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id);
-
-	mutex_lock(&register_ipv6_hooks);
-	if (cnet->users && (--cnet->users == 0))
-		nf_unregister_net_hooks(net, ipv6_conntrack_ops,
-					ARRAY_SIZE(ipv6_conntrack_ops));
-	mutex_unlock(&register_ipv6_hooks);
-}
-
-const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
-	.l3proto		= PF_INET6,
-	.net_ns_get		= ipv6_hooks_register,
-	.net_ns_put		= ipv6_hooks_unregister,
-	.me			= THIS_MODULE,
-};
-
-MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>");
-
-static struct nf_sockopt_ops so_getorigdst6 = {
-	.pf		= NFPROTO_IPV6,
-	.get_optmin	= IP6T_SO_ORIGINAL_DST,
-	.get_optmax	= IP6T_SO_ORIGINAL_DST + 1,
-	.get		= ipv6_getorigdst,
-	.owner		= THIS_MODULE,
-};
-
-static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
-	&nf_conntrack_l4proto_tcp6,
-	&nf_conntrack_l4proto_udp6,
-	&nf_conntrack_l4proto_icmpv6,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-	&nf_conntrack_l4proto_dccp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-	&nf_conntrack_l4proto_sctp6,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-	&nf_conntrack_l4proto_udplite6,
-#endif
-};
-
-static int ipv6_net_init(struct net *net)
-{
-	return nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
-					     ARRAY_SIZE(builtin_l4proto6));
-}
-
-static void ipv6_net_exit(struct net *net)
-{
-	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
-					ARRAY_SIZE(builtin_l4proto6));
-}
-
-static struct pernet_operations ipv6_net_ops = {
-	.init = ipv6_net_init,
-	.exit = ipv6_net_exit,
-	.id = &conntrack6_net_id,
-	.size = sizeof(struct conntrack6_net),
-};
-
-static int __init nf_conntrack_l3proto_ipv6_init(void)
-{
-	int ret = 0;
-
-	need_conntrack();
-
-	ret = nf_register_sockopt(&so_getorigdst6);
-	if (ret < 0) {
-		pr_err("Unable to register netfilter socket option\n");
-		return ret;
-	}
-
-	ret = register_pernet_subsys(&ipv6_net_ops);
-	if (ret < 0)
-		goto cleanup_sockopt;
-
-	ret = nf_ct_l4proto_register(builtin_l4proto6,
-				     ARRAY_SIZE(builtin_l4proto6));
-	if (ret < 0)
-		goto cleanup_pernet;
-
-	ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
-	if (ret < 0) {
-		pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
-		goto cleanup_l4proto;
-	}
-	return ret;
-cleanup_l4proto:
-	nf_ct_l4proto_unregister(builtin_l4proto6,
-				 ARRAY_SIZE(builtin_l4proto6));
- cleanup_pernet:
-	unregister_pernet_subsys(&ipv6_net_ops);
- cleanup_sockopt:
-	nf_unregister_sockopt(&so_getorigdst6);
-	return ret;
-}
-
-static void __exit nf_conntrack_l3proto_ipv6_fini(void)
-{
-	synchronize_net();
-	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
-	nf_ct_l4proto_unregister(builtin_l4proto6,
-				 ARRAY_SIZE(builtin_l4proto6));
-	unregister_pernet_subsys(&ipv6_net_ops);
-	nf_unregister_sockopt(&so_getorigdst6);
-}
-
-module_init(nf_conntrack_l3proto_ipv6_init);
-module_exit(nf_conntrack_l3proto_ipv6_fini);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
deleted file mode 100644
index bed07b998a10..000000000000
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * Copyright (C)2003,2004 USAGI/WIDE Project
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Author:
- *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/in6.h>
-#include <linux/icmpv6.h>
-#include <linux/ipv6.h>
-#include <net/ipv6.h>
-#include <net/ip6_checksum.h>
-#include <linux/seq_file.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_timeout.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
-#include <net/netfilter/nf_log.h>
-
-static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
-
-static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmpv6;
-}
-
-static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
-				unsigned int dataoff,
-				struct net *net,
-				struct nf_conntrack_tuple *tuple)
-{
-	const struct icmp6hdr *hp;
-	struct icmp6hdr _hdr;
-
-	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-	if (hp == NULL)
-		return false;
-	tuple->dst.u.icmp.type = hp->icmp6_type;
-	tuple->src.u.icmp.id = hp->icmp6_identifier;
-	tuple->dst.u.icmp.code = hp->icmp6_code;
-
-	return true;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
-	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
-	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
-	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_REPLY + 1,
-	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY + 1
-};
-
-static const u_int8_t noct_valid_new[] = {
-	[ICMPV6_MGM_QUERY - 130] = 1,
-	[ICMPV6_MGM_REPORT - 130] = 1,
-	[ICMPV6_MGM_REDUCTION - 130] = 1,
-	[NDISC_ROUTER_SOLICITATION - 130] = 1,
-	[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
-	[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
-	[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
-	[ICMPV6_MLD2_REPORT - 130] = 1
-};
-
-static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
-				const struct nf_conntrack_tuple *orig)
-{
-	int type = orig->dst.u.icmp.type - 128;
-	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
-		return false;
-
-	tuple->src.u.icmp.id   = orig->src.u.icmp.id;
-	tuple->dst.u.icmp.type = invmap[type] - 1;
-	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
-	return true;
-}
-
-static unsigned int *icmpv6_get_timeouts(struct net *net)
-{
-	return &icmpv6_pernet(net)->timeout;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmpv6_packet(struct nf_conn *ct,
-		       const struct sk_buff *skb,
-		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo)
-{
-	unsigned int *timeout = nf_ct_timeout_lookup(ct);
-
-	if (!timeout)
-		timeout = icmpv6_get_timeouts(nf_ct_net(ct));
-
-	/* Do not immediately delete the connection after the first
-	   successful reply to avoid excessive conntrackd traffic
-	   and also to handle correctly ICMP echo reply duplicates. */
-	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
-		       unsigned int dataoff)
-{
-	static const u_int8_t valid_new[] = {
-		[ICMPV6_ECHO_REQUEST - 128] = 1,
-		[ICMPV6_NI_QUERY - 128] = 1
-	};
-	int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
-
-	if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
-		/* Can't create a new ICMPv6 `conn' with this. */
-		pr_debug("icmpv6: can't create new conn with type %u\n",
-			 type + 128);
-		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
-		return false;
-	}
-	return true;
-}
-
-static int
-icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
-		     struct sk_buff *skb,
-		     unsigned int icmp6off)
-{
-	struct nf_conntrack_tuple intuple, origtuple;
-	const struct nf_conntrack_tuple_hash *h;
-	const struct nf_conntrack_l4proto *inproto;
-	enum ip_conntrack_info ctinfo;
-	struct nf_conntrack_zone tmp;
-
-	WARN_ON(skb_nfct(skb));
-
-	/* Are they talking about one of our connections? */
-	if (!nf_ct_get_tuplepr(skb,
-			       skb_network_offset(skb)
-				+ sizeof(struct ipv6hdr)
-				+ sizeof(struct icmp6hdr),
-			       PF_INET6, net, &origtuple)) {
-		pr_debug("icmpv6_error: Can't get tuple\n");
-		return -NF_ACCEPT;
-	}
-
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
-
-	/* Ordinarily, we'd expect the inverted tupleproto, but it's
-	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
-		pr_debug("icmpv6_error: Can't invert tuple\n");
-		return -NF_ACCEPT;
-	}
-
-	ctinfo = IP_CT_RELATED;
-
-	h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
-				  &intuple);
-	if (!h) {
-		pr_debug("icmpv6_error: no match\n");
-		return -NF_ACCEPT;
-	} else {
-		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
-			ctinfo += IP_CT_IS_REPLY;
-	}
-
-	/* Update skb to refer to this connection */
-	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
-	return NF_ACCEPT;
-}
-
-static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
-			     u8 pf, const char *msg)
-{
-	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
-}
-
-static int
-icmpv6_error(struct net *net, struct nf_conn *tmpl,
-	     struct sk_buff *skb, unsigned int dataoff,
-	     u8 pf, unsigned int hooknum)
-{
-	const struct icmp6hdr *icmp6h;
-	struct icmp6hdr _ih;
-	int type;
-
-	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
-	if (icmp6h == NULL) {
-		icmpv6_error_log(skb, net, pf, "short packet");
-		return -NF_ACCEPT;
-	}
-
-	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
-	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
-		icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
-		return -NF_ACCEPT;
-	}
-
-	type = icmp6h->icmp6_type - 130;
-	if (type >= 0 && type < sizeof(noct_valid_new) &&
-	    noct_valid_new[type]) {
-		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-		return NF_ACCEPT;
-	}
-
-	/* is not error message ? */
-	if (icmp6h->icmp6_type >= 128)
-		return NF_ACCEPT;
-
-	return icmpv6_error_message(net, tmpl, skb, dataoff);
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
-				  const struct nf_conntrack_tuple *t)
-{
-	if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
-	    nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
-	    nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -1;
-}
-
-static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
-	[CTA_PROTO_ICMPV6_TYPE]	= { .type = NLA_U8 },
-	[CTA_PROTO_ICMPV6_CODE]	= { .type = NLA_U8 },
-	[CTA_PROTO_ICMPV6_ID]	= { .type = NLA_U16 },
-};
-
-static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *tuple)
-{
-	if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
-	    !tb[CTA_PROTO_ICMPV6_CODE] ||
-	    !tb[CTA_PROTO_ICMPV6_ID])
-		return -EINVAL;
-
-	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
-	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
-	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
-
-	if (tuple->dst.u.icmp.type < 128 ||
-	    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
-	    !invmap[tuple->dst.u.icmp.type - 128])
-		return -EINVAL;
-
-	return 0;
-}
-
-static unsigned int icmpv6_nlattr_tuple_size(void)
-{
-	static unsigned int size __read_mostly;
-
-	if (!size)
-		size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
-
-	return size;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_cttimeout.h>
-
-static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
-					struct net *net, void *data)
-{
-	unsigned int *timeout = data;
-	struct nf_icmp_net *in = icmpv6_pernet(net);
-
-	if (!timeout)
-		timeout = icmpv6_get_timeouts(net);
-	if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
-		*timeout =
-		    ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
-	} else {
-		/* Set default ICMPv6 timeout. */
-		*timeout = in->timeout;
-	}
-	return 0;
-}
-
-static int
-icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
-{
-	const unsigned int *timeout = data;
-
-	if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -ENOSPC;
-}
-
-static const struct nla_policy
-icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
-	[CTA_TIMEOUT_ICMPV6_TIMEOUT]	= { .type = NLA_U32 },
-};
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table icmpv6_sysctl_table[] = {
-	{
-		.procname	= "nf_conntrack_icmpv6_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
-				       struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
-	pn->ctl_table = kmemdup(icmpv6_sysctl_table,
-				sizeof(icmpv6_sysctl_table),
-				GFP_KERNEL);
-	if (!pn->ctl_table)
-		return -ENOMEM;
-
-	pn->ctl_table[0].data = &in->timeout;
-#endif
-	return 0;
-}
-
-static int icmpv6_init_net(struct net *net, u_int16_t proto)
-{
-	struct nf_icmp_net *in = icmpv6_pernet(net);
-	struct nf_proto_net *pn = &in->pn;
-
-	in->timeout = nf_ct_icmpv6_timeout;
-
-	return icmpv6_kmemdup_sysctl_table(pn, in);
-}
-
-static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmpv6.pn;
-}
-
-const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
-{
-	.l3proto		= PF_INET6,
-	.l4proto		= IPPROTO_ICMPV6,
-	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
-	.invert_tuple		= icmpv6_invert_tuple,
-	.packet			= icmpv6_packet,
-	.new			= icmpv6_new,
-	.error			= icmpv6_error,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
-	.nlattr_tuple_size	= icmpv6_nlattr_tuple_size,
-	.nlattr_to_tuple	= icmpv6_nlattr_to_tuple,
-	.nla_policy		= icmpv6_nla_policy,
-#endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
-	.ctnl_timeout		= {
-		.nlattr_to_obj	= icmpv6_timeout_nlattr_to_obj,
-		.obj_to_nlattr	= icmpv6_timeout_obj_to_nlattr,
-		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
-		.obj_size	= sizeof(unsigned int),
-		.nla_policy	= icmpv6_timeout_nla_policy,
-	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-	.init_net		= icmpv6_init_net,
-	.get_net_proto		= icmpv6_get_net_proto,
-};
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 3ce657fbca67..9eab519b403a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -49,6 +49,8 @@ config NETFILTER_NETLINK_LOG
 config NF_CONNTRACK
 	tristate "Netfilter connection tracking support"
 	default m if NETFILTER_ADVANCED=n
+	select NF_DEFRAG_IPV4
+	select NF_DEFRAG_IPV6 if IPV6 != n
 	help
 	  Connection tracking keeps a record of what packets have passed
 	  through your machine, in order to figure out how they are related
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f132ea850778..53bd1ed1228a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,7 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
 
-nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+nf_conntrack-y	:= nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \
+		   nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \
+		   nf_conntrack_proto_icmp.o \
+		   nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+
+nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c069f2faff4c..5123e91b1982 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -291,7 +291,6 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    u_int8_t *protonum)
 {
 	int dataoff = -1;
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4)
 	const struct iphdr *iph;
 	struct iphdr _iph;
 
@@ -314,15 +313,14 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			 nhoff, iph->ihl << 2, skb->len);
 		return -1;
 	}
-#endif
 	return dataoff;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
 static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 			    u8 *protonum)
 {
 	int protoff = -1;
-#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
 	unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
 	__be16 frag_off;
 	u8 nexthdr;
@@ -343,9 +341,9 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	}
 
 	*protonum = nexthdr;
-#endif
 	return protoff;
 }
+#endif
 
 static int get_l4proto(const struct sk_buff *skb,
 		       unsigned int nhoff, u8 pf, u8 *l4num)
@@ -353,8 +351,10 @@ static int get_l4proto(const struct sk_buff *skb,
 	switch (pf) {
 	case NFPROTO_IPV4:
 		return ipv4_get_l4proto(skb, nhoff, l4num);
+#if IS_ENABLED(CONFIG_IPV6)
 	case NFPROTO_IPV6:
 		return ipv6_get_l4proto(skb, nhoff, l4num);
+#endif
 	default:
 		*l4num = 0;
 		break;
@@ -2197,9 +2197,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 
-module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
-		  &nf_conntrack_htable_size, 0600);
-
 static __always_inline unsigned int total_extension_size(void)
 {
 	/* remember to add new extensions below */
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 39df72bb9d56..803607a90102 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -1,14 +1,4 @@
-/* L3/L4 protocol support for nf_conntrack. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/types.h>
 #include <linux/netfilter.h>
@@ -24,22 +14,39 @@
 #include <linux/netdevice.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_log.h>
 
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+extern unsigned int nf_conntrack_net_id;
+
 static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly;
-struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly;
-EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
-struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = {
-	.l3proto	 = PF_UNSPEC,
-};
-EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
-
 #ifdef CONFIG_SYSCTL
 static int
 nf_ct_register_sysctl(struct net *net,
@@ -127,137 +134,6 @@ __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto)
 }
 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
 
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-const struct nf_conntrack_l3proto *
-nf_ct_l3proto_find_get(u_int16_t l3proto)
-{
-	struct nf_conntrack_l3proto *p;
-
-	rcu_read_lock();
-	p = __nf_ct_l3proto_find(l3proto);
-	if (!try_module_get(p->me))
-		p = &nf_conntrack_l3proto_generic;
-	rcu_read_unlock();
-
-	return p;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get);
-
-int
-nf_ct_l3proto_try_module_get(unsigned short l3proto)
-{
-	const struct nf_conntrack_l3proto *p;
-	int ret;
-
-retry:	p = nf_ct_l3proto_find_get(l3proto);
-	if (p == &nf_conntrack_l3proto_generic) {
-		ret = request_module("nf_conntrack-%d", l3proto);
-		if (!ret)
-			goto retry;
-
-		return -EPROTOTYPE;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get);
-
-void nf_ct_l3proto_module_put(unsigned short l3proto)
-{
-	struct nf_conntrack_l3proto *p;
-
-	/* rcu_read_lock not necessary since the caller holds a reference, but
-	 * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
-	 */
-	rcu_read_lock();
-	p = __nf_ct_l3proto_find(l3proto);
-	module_put(p->me);
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
-
-static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
-{
-	const struct nf_conntrack_l3proto *l3proto;
-	int ret;
-
-	might_sleep();
-
-	ret = nf_ct_l3proto_try_module_get(nfproto);
-	if (ret < 0)
-		return ret;
-
-	/* we already have a reference, can't fail */
-	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(nfproto);
-	rcu_read_unlock();
-
-	if (!l3proto->net_ns_get)
-		return 0;
-
-	ret = l3proto->net_ns_get(net);
-	if (ret < 0)
-		nf_ct_l3proto_module_put(nfproto);
-
-	return ret;
-}
-
-int nf_ct_netns_get(struct net *net, u8 nfproto)
-{
-	int err;
-
-	if (nfproto == NFPROTO_INET) {
-		err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
-		if (err < 0)
-			goto err1;
-		err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
-		if (err < 0)
-			goto err2;
-	} else {
-		err = nf_ct_netns_do_get(net, nfproto);
-		if (err < 0)
-			goto err1;
-	}
-	return 0;
-
-err2:
-	nf_ct_netns_put(net, NFPROTO_IPV4);
-err1:
-	return err;
-}
-EXPORT_SYMBOL_GPL(nf_ct_netns_get);
-
-static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
-{
-	const struct nf_conntrack_l3proto *l3proto;
-
-	might_sleep();
-
-	/* same as nf_conntrack_netns_get(), reference assumed */
-	rcu_read_lock();
-	l3proto = __nf_ct_l3proto_find(nfproto);
-	rcu_read_unlock();
-
-	if (WARN_ON(!l3proto))
-		return;
-
-	if (l3proto->net_ns_put)
-		l3proto->net_ns_put(net);
-
-	nf_ct_l3proto_module_put(nfproto);
-}
-
-void nf_ct_netns_put(struct net *net, uint8_t nfproto)
-{
-	if (nfproto == NFPROTO_INET) {
-		nf_ct_netns_do_put(net, NFPROTO_IPV4);
-		nf_ct_netns_do_put(net, NFPROTO_IPV6);
-	} else
-		nf_ct_netns_do_put(net, nfproto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-
 const struct nf_conntrack_l4proto *
 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num)
 {
@@ -279,11 +155,6 @@ void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
 
-static int kill_l3proto(struct nf_conn *i, void *data)
-{
-	return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto;
-}
-
 static int kill_l4proto(struct nf_conn *i, void *data)
 {
 	const struct nf_conntrack_l4proto *l4proto;
@@ -292,49 +163,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
 	       nf_ct_l3num(i) == l4proto->l3proto;
 }
 
-int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto)
-{
-	int ret = 0;
-	struct nf_conntrack_l3proto *old;
-
-	if (proto->l3proto >= NFPROTO_NUMPROTO)
-		return -EBUSY;
-
-	mutex_lock(&nf_ct_proto_mutex);
-	old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
-					lockdep_is_held(&nf_ct_proto_mutex));
-	if (old != &nf_conntrack_l3proto_generic) {
-		ret = -EBUSY;
-		goto out_unlock;
-	}
-
-	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
-
-out_unlock:
-	mutex_unlock(&nf_ct_proto_mutex);
-	return ret;
-
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_register);
-
-void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto)
-{
-	BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO);
-
-	mutex_lock(&nf_ct_proto_mutex);
-	BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
-					 lockdep_is_held(&nf_ct_proto_mutex)
-					 ) != proto);
-	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
-			   &nf_conntrack_l3proto_generic);
-	mutex_unlock(&nf_ct_proto_mutex);
-
-	synchronize_rcu();
-	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l3proto, (void*)proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister);
-
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 				const struct nf_conntrack_l4proto *l4proto)
 {
@@ -501,8 +329,23 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
 
-int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
-			   unsigned int num_proto)
+static void
+nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
+			 unsigned int num_proto)
+{
+	mutex_lock(&nf_ct_proto_mutex);
+	while (num_proto-- != 0)
+		__nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+	mutex_unlock(&nf_ct_proto_mutex);
+
+	synchronize_net();
+	/* Remove all contrack entries for this protocol */
+	nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
+}
+
+static int
+nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
+		       unsigned int num_proto)
 {
 	int ret = -EINVAL, ver;
 	unsigned int i;
@@ -520,7 +363,6 @@ int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
 	}
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
 
 int nf_ct_l4proto_pernet_register(struct net *net,
 				  const struct nf_conntrack_l4proto *const l4proto[],
@@ -544,20 +386,6 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
 
-void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
-			      unsigned int num_proto)
-{
-	mutex_lock(&nf_ct_proto_mutex);
-	while (num_proto-- != 0)
-		__nf_ct_l4proto_unregister_one(l4proto[num_proto]);
-	mutex_unlock(&nf_ct_proto_mutex);
-
-	synchronize_net();
-	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
-
 void nf_ct_l4proto_pernet_unregister(struct net *net,
 				const struct nf_conntrack_l4proto *const l4proto[],
 				unsigned int num_proto)
@@ -567,6 +395,563 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
+static unsigned int ipv4_helper(void *priv,
+				struct sk_buff *skb,
+				const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_helper *helper;
+
+	/* This is where we call the helper: as the packet goes out. */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return NF_ACCEPT;
+
+	help = nfct_help(ct);
+	if (!help)
+		return NF_ACCEPT;
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		return NF_ACCEPT;
+
+	return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+			    ct, ctinfo);
+}
+
+static unsigned int ipv4_confirm(void *priv,
+				 struct sk_buff *skb,
+				 const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		goto out;
+
+	/* adjust seqs for loopback traffic only in outgoing direction */
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return NF_DROP;
+		}
+	}
+out:
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv4_conntrack_in(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+}
+
+static unsigned int ipv4_conntrack_local(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+		enum ip_conntrack_info ctinfo;
+		struct nf_conn *tmpl;
+
+		tmpl = nf_ct_get(skb, &ctinfo);
+		if (tmpl && nf_ct_is_template(tmpl)) {
+			/* when skipping ct, clear templates to avoid fooling
+			 * later targets/matches
+			 */
+			skb->_nfct = 0;
+			nf_ct_put(tmpl);
+		}
+		return NF_ACCEPT;
+	}
+
+	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ * make it the first hook.
+ */
+static const struct nf_hook_ops ipv4_conntrack_ops[] = {
+	{
+		.hook		= ipv4_conntrack_in,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv4_conntrack_local,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv4_helper,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv4_confirm,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+	},
+	{
+		.hook		= ipv4_helper,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv4_confirm,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
+	},
+};
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ * blame them).
+ * Reversing the socket's dst/src point of view gives us the reply
+ * mapping.
+ */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+
+	memset(&tuple, 0, sizeof(tuple));
+
+	lock_sock(sk);
+	tuple.src.u3.ip = inet->inet_rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.ip = inet->inet_daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
+	tuple.src.l3num = PF_INET;
+	tuple.dst.protonum = sk->sk_protocol;
+	release_sock(sk);
+
+	/* We only do TCP and SCTP at the moment: is there a better way? */
+	if (tuple.dst.protonum != IPPROTO_TCP &&
+	    tuple.dst.protonum != IPPROTO_SCTP) {
+		pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
+		return -ENOPROTOOPT;
+	}
+
+	if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
+		pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
+			 *len, sizeof(struct sockaddr_in));
+		return -EINVAL;
+	}
+
+	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+	if (h) {
+		struct sockaddr_in sin;
+		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+		sin.sin_family = AF_INET;
+		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u.tcp.port;
+		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+			.tuple.dst.u3.ip;
+		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+		pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
+			 &sin.sin_addr.s_addr, ntohs(sin.sin_port));
+		nf_ct_put(ct);
+		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+			return -EFAULT;
+		else
+			return 0;
+	}
+	pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
+		 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
+		 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
+	return -ENOENT;
+}
+
+static struct nf_sockopt_ops so_getorigdst = {
+	.pf		= PF_INET,
+	.get_optmin	= SO_ORIGINAL_DST,
+	.get_optmax	= SO_ORIGINAL_DST + 1,
+	.get		= getorigdst,
+	.owner		= THIS_MODULE,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
+	const struct ipv6_pinfo *inet6 = inet6_sk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct nf_conntrack_tuple_hash *h;
+	struct sockaddr_in6 sin6;
+	struct nf_conn *ct;
+	__be32 flow_label;
+	int bound_dev_if;
+
+	lock_sock(sk);
+	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
+	tuple.src.u.tcp.port = inet->inet_sport;
+	tuple.dst.u3.in6 = sk->sk_v6_daddr;
+	tuple.dst.u.tcp.port = inet->inet_dport;
+	tuple.dst.protonum = sk->sk_protocol;
+	bound_dev_if = sk->sk_bound_dev_if;
+	flow_label = inet6->flow_label;
+	release_sock(sk);
+
+	if (tuple.dst.protonum != IPPROTO_TCP &&
+	    tuple.dst.protonum != IPPROTO_SCTP)
+		return -ENOPROTOOPT;
+
+	if (*len < 0 || (unsigned int)*len < sizeof(sin6))
+		return -EINVAL;
+
+	h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
+	if (!h) {
+		pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
+			 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
+			 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+		return -ENOENT;
+	}
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	sin6.sin6_family = AF_INET6;
+	sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
+	sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
+	memcpy(&sin6.sin6_addr,
+	       &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
+	       sizeof(sin6.sin6_addr));
+
+	nf_ct_put(ct);
+	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
+	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
+}
+
+static struct nf_sockopt_ops so_getorigdst6 = {
+	.pf		= NFPROTO_IPV6,
+	.get_optmin	= IP6T_SO_ORIGINAL_DST,
+	.get_optmax	= IP6T_SO_ORIGINAL_DST + 1,
+	.get		= ipv6_getorigdst,
+	.owner		= THIS_MODULE,
+};
+
+static unsigned int ipv6_confirm(void *priv,
+				 struct sk_buff *skb,
+				 const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+	int protoff;
+	__be16 frag_off;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		goto out;
+
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("proto header not found\n");
+		goto out;
+	}
+
+	/* adjust seqs for loopback traffic only in outgoing direction */
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return NF_DROP;
+		}
+	}
+out:
+	/* We've seen it coming out the other side: confirm it */
+	return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv6_conntrack_in(void *priv,
+				      struct sk_buff *skb,
+				      const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+}
+
+static unsigned int ipv6_conntrack_local(void *priv,
+					 struct sk_buff *skb,
+					 const struct nf_hook_state *state)
+{
+	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+}
+
+static unsigned int ipv6_helper(void *priv,
+				struct sk_buff *skb,
+				const struct nf_hook_state *state)
+{
+	struct nf_conn *ct;
+	const struct nf_conn_help *help;
+	const struct nf_conntrack_helper *helper;
+	enum ip_conntrack_info ctinfo;
+	__be16 frag_off;
+	int protoff;
+	u8 nexthdr;
+
+	/* This is where we call the helper: as the packet goes out. */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		return NF_ACCEPT;
+
+	help = nfct_help(ct);
+	if (!help)
+		return NF_ACCEPT;
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	helper = rcu_dereference(help->helper);
+	if (!helper)
+		return NF_ACCEPT;
+
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("proto header not found\n");
+		return NF_ACCEPT;
+	}
+
+	return helper->help(skb, protoff, ct, ctinfo);
+}
+
+static const struct nf_hook_ops ipv6_conntrack_ops[] = {
+	{
+		.hook		= ipv6_conntrack_in,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv6_conntrack_local,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_CONNTRACK,
+	},
+	{
+		.hook		= ipv6_helper,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv6_confirm,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_LAST,
+	},
+	{
+		.hook		= ipv6_helper,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
+	},
+	{
+		.hook		= ipv6_confirm,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_LAST - 1,
+	},
+};
+#endif
+
+static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
+{
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	int err = 0;
+
+	mutex_lock(&nf_ct_proto_mutex);
+
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		cnet->users4++;
+		if (cnet->users4 > 1)
+			goto out_unlock;
+		err = nf_defrag_ipv4_enable(net);
+		if (err) {
+			cnet->users4 = 0;
+			goto out_unlock;
+		}
+
+		err = nf_register_net_hooks(net, ipv4_conntrack_ops,
+					    ARRAY_SIZE(ipv4_conntrack_ops));
+		if (err)
+			cnet->users4 = 0;
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		cnet->users6++;
+		if (cnet->users6 > 1)
+			goto out_unlock;
+		err = nf_defrag_ipv6_enable(net);
+		if (err < 0) {
+			cnet->users6 = 0;
+			goto out_unlock;
+		}
+
+		err = nf_register_net_hooks(net, ipv6_conntrack_ops,
+					    ARRAY_SIZE(ipv6_conntrack_ops));
+		if (err)
+			cnet->users6 = 0;
+		break;
+#endif
+	default:
+		err = -EPROTO;
+		break;
+	}
+ out_unlock:
+	mutex_unlock(&nf_ct_proto_mutex);
+	return err;
+}
+
+static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
+{
+	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+	mutex_lock(&nf_ct_proto_mutex);
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		if (cnet->users4 && (--cnet->users4 == 0))
+			nf_unregister_net_hooks(net, ipv4_conntrack_ops,
+						ARRAY_SIZE(ipv4_conntrack_ops));
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		if (cnet->users6 && (--cnet->users6 == 0))
+			nf_unregister_net_hooks(net, ipv6_conntrack_ops,
+						ARRAY_SIZE(ipv6_conntrack_ops));
+		break;
+#endif
+	}
+
+	mutex_unlock(&nf_ct_proto_mutex);
+}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+	int err;
+
+	if (nfproto == NFPROTO_INET) {
+		err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+		if (err < 0)
+			goto err1;
+		err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+		if (err < 0)
+			goto err2;
+	} else {
+		err = nf_ct_netns_do_get(net, nfproto);
+		if (err < 0)
+			goto err1;
+	}
+	return 0;
+
+err2:
+	nf_ct_netns_put(net, NFPROTO_IPV4);
+err1:
+	return err;
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_get);
+
+void nf_ct_netns_put(struct net *net, uint8_t nfproto)
+{
+	if (nfproto == NFPROTO_INET) {
+		nf_ct_netns_do_put(net, NFPROTO_IPV4);
+		nf_ct_netns_do_put(net, NFPROTO_IPV6);
+	} else {
+		nf_ct_netns_do_put(net, nfproto);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+
+static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
+	&nf_conntrack_l4proto_tcp4,
+	&nf_conntrack_l4proto_udp4,
+	&nf_conntrack_l4proto_icmp,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	&nf_conntrack_l4proto_dccp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	&nf_conntrack_l4proto_sctp4,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	&nf_conntrack_l4proto_udplite4,
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+	&nf_conntrack_l4proto_tcp6,
+	&nf_conntrack_l4proto_udp6,
+	&nf_conntrack_l4proto_icmpv6,
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	&nf_conntrack_l4proto_dccp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	&nf_conntrack_l4proto_sctp6,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	&nf_conntrack_l4proto_udplite6,
+#endif
+#endif /* CONFIG_IPV6 */
+};
+
+int nf_conntrack_proto_init(void)
+{
+	int ret = 0;
+
+	ret = nf_register_sockopt(&so_getorigdst);
+	if (ret < 0)
+		return ret;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	ret = nf_register_sockopt(&so_getorigdst6);
+	if (ret < 0)
+		goto cleanup_sockopt;
+#endif
+	ret = nf_ct_l4proto_register(builtin_l4proto,
+				     ARRAY_SIZE(builtin_l4proto));
+	if (ret < 0)
+		goto cleanup_sockopt2;
+
+	return ret;
+cleanup_sockopt2:
+	nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+cleanup_sockopt:
+	nf_unregister_sockopt(&so_getorigdst6);
+#endif
+	return ret;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+	unsigned int i;
+
+	nf_ct_l4proto_unregister(builtin_l4proto,
+				 ARRAY_SIZE(builtin_l4proto));
+	nf_unregister_sockopt(&so_getorigdst);
+#if IS_ENABLED(CONFIG_IPV6)
+	nf_unregister_sockopt(&so_getorigdst6);
+#endif
+
+	/* free l3proto protocol tables */
+	for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
+		kfree(nf_ct_protos[i]);
+}
+
 int nf_conntrack_proto_pernet_init(struct net *net)
 {
 	int err;
@@ -583,6 +968,14 @@ int nf_conntrack_proto_pernet_init(struct net *net)
 	if (err < 0)
 		return err;
 
+	err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
+					    ARRAY_SIZE(builtin_l4proto));
+	if (err < 0) {
+		nf_ct_l4proto_unregister_sysctl(net, pn,
+						&nf_conntrack_l4proto_generic);
+		return err;
+	}
+
 	pn->users++;
 	return 0;
 }
@@ -592,25 +985,19 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
 					&nf_conntrack_l4proto_generic);
 
+	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
+					ARRAY_SIZE(builtin_l4proto));
 	pn->users--;
 	nf_ct_l4proto_unregister_sysctl(net,
 					pn,
 					&nf_conntrack_l4proto_generic);
 }
 
-int nf_conntrack_proto_init(void)
-{
-	unsigned int i;
-	for (i = 0; i < NFPROTO_NUMPROTO; i++)
-		rcu_assign_pointer(nf_ct_l3protos[i],
-				   &nf_conntrack_l3proto_generic);
-	return 0;
-}
 
-void nf_conntrack_proto_fini(void)
-{
-	unsigned int i;
-	/* free l3proto protocol tables */
-	for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
-		kfree(nf_ct_protos[i]);
-}
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+		  &nf_conntrack_htable_size, 0600);
+
+MODULE_ALIAS("ip_conntrack");
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 000000000000..036670b38282
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,388 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_log.h>
+
+static const unsigned int nf_ct_icmp_timeout = 30*HZ;
+
+static inline struct nf_icmp_net *icmp_pernet(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmp;
+}
+
+static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+			      struct net *net, struct nf_conntrack_tuple *tuple)
+{
+	const struct icmphdr *hp;
+	struct icmphdr _hdr;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return false;
+
+	tuple->dst.u.icmp.type = hp->type;
+	tuple->src.u.icmp.id = hp->un.echo.id;
+	tuple->dst.u.icmp.code = hp->code;
+
+	return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
+static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+			      const struct nf_conntrack_tuple *orig)
+{
+	if (orig->dst.u.icmp.type >= sizeof(invmap) ||
+	    !invmap[orig->dst.u.icmp.type])
+		return false;
+
+	tuple->src.u.icmp.id = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return true;
+}
+
+static unsigned int *icmp_get_timeouts(struct net *net)
+{
+	return &icmp_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+		       const struct sk_buff *skb,
+		       unsigned int dataoff,
+		       enum ip_conntrack_info ctinfo)
+{
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = icmp_get_timeouts(nf_ct_net(ct));
+
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
+		     unsigned int dataoff)
+{
+	static const u_int8_t valid_new[] = {
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1
+	};
+
+	if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
+	    !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
+		/* Can't create a new ICMP `conn' with this. */
+		pr_debug("icmp: can't create new conn with type %u\n",
+			 ct->tuplehash[0].tuple.dst.u.icmp.type);
+		nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+		return false;
+	}
+	return true;
+}
+
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+		 unsigned int hooknum)
+{
+	struct nf_conntrack_tuple innertuple, origtuple;
+	const struct nf_conntrack_l4proto *innerproto;
+	const struct nf_conntrack_tuple_hash *h;
+	const struct nf_conntrack_zone *zone;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conntrack_zone tmp;
+
+	WARN_ON(skb_nfct(skb));
+	zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+
+	/* Are they talking about one of our connections? */
+	if (!nf_ct_get_tuplepr(skb,
+			       skb_network_offset(skb) + ip_hdrlen(skb)
+						       + sizeof(struct icmphdr),
+			       PF_INET, net, &origtuple)) {
+		pr_debug("icmp_error_message: failed to get tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+
+	/* Ordinarily, we'd expect the inverted tupleproto, but it's
+	   been preserved inside the ICMP. */
+	if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+		pr_debug("icmp_error_message: no match\n");
+		return -NF_ACCEPT;
+	}
+
+	ctinfo = IP_CT_RELATED;
+
+	h = nf_conntrack_find_get(net, zone, &innertuple);
+	if (!h) {
+		pr_debug("icmp_error_message: no match\n");
+		return -NF_ACCEPT;
+	}
+
+	if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+		ctinfo += IP_CT_IS_REPLY;
+
+	/* Update skb to refer to this connection */
+	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+	return NF_ACCEPT;
+}
+
+static void icmp_error_log(const struct sk_buff *skb, struct net *net,
+			   u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg);
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct net *net, struct nf_conn *tmpl,
+	   struct sk_buff *skb, unsigned int dataoff,
+	   u8 pf, unsigned int hooknum)
+{
+	const struct icmphdr *icmph;
+	struct icmphdr _ih;
+
+	/* Not enough header? */
+	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+	if (icmph == NULL) {
+		icmp_error_log(skb, net, pf, "short packet");
+		return -NF_ACCEPT;
+	}
+
+	/* See ip_conntrack_proto_tcp.c */
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {
+		icmp_error_log(skb, net, pf, "bad hw icmp checksum");
+		return -NF_ACCEPT;
+	}
+
+	/*
+	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
+	 *
+	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
+	 *		  discarded.
+	 */
+	if (icmph->type > NR_ICMP_TYPES) {
+		icmp_error_log(skb, net, pf, "invalid icmp type");
+		return -NF_ACCEPT;
+	}
+
+	/* Need to track icmp error message? */
+	if (icmph->type != ICMP_DEST_UNREACH &&
+	    icmph->type != ICMP_SOURCE_QUENCH &&
+	    icmph->type != ICMP_TIME_EXCEEDED &&
+	    icmph->type != ICMP_PARAMETERPROB &&
+	    icmph->type != ICMP_REDIRECT)
+		return NF_ACCEPT;
+
+	return icmp_error_message(net, tmpl, skb, hooknum);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nlattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *t)
+{
+	if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_ICMP_TYPE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMP_CODE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMP_ID]	= { .type = NLA_U16 },
+};
+
+static int icmp_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMP_TYPE] ||
+	    !tb[CTA_PROTO_ICMP_CODE] ||
+	    !tb[CTA_PROTO_ICMP_ID])
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+
+	if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+	    !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int icmp_nlattr_tuple_size(void)
+{
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+				      struct net *net, void *data)
+{
+	unsigned int *timeout = data;
+	struct nf_icmp_net *in = icmp_pernet(net);
+
+	if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+		if (!timeout)
+			timeout = &in->timeout;
+		*timeout =
+			ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+	} else if (timeout) {
+		/* Set default ICMP timeout. */
+		*timeout = in->timeout;
+	}
+	return 0;
+}
+
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+	[CTA_TIMEOUT_ICMP_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmp_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_icmp_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+				     struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+	pn->ctl_table = kmemdup(icmp_sysctl_table,
+				sizeof(icmp_sysctl_table),
+				GFP_KERNEL);
+	if (!pn->ctl_table)
+		return -ENOMEM;
+
+	pn->ctl_table[0].data = &in->timeout;
+#endif
+	return 0;
+}
+
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+	struct nf_icmp_net *in = icmp_pernet(net);
+	struct nf_proto_net *pn = &in->pn;
+
+	in->timeout = nf_ct_icmp_timeout;
+
+	return icmp_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmp_get_net_proto(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmp.pn;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
+{
+	.l3proto		= PF_INET,
+	.l4proto		= IPPROTO_ICMP,
+	.pkt_to_tuple		= icmp_pkt_to_tuple,
+	.invert_tuple		= icmp_invert_tuple,
+	.packet			= icmp_packet,
+	.new			= icmp_new,
+	.error			= icmp_error,
+	.destroy		= NULL,
+	.me			= NULL,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
+	.nlattr_tuple_size	= icmp_nlattr_tuple_size,
+	.nlattr_to_tuple	= icmp_nlattr_to_tuple,
+	.nla_policy		= icmp_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= icmp_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= icmp_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= icmp_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+	.init_net		= icmp_init_net,
+	.get_net_proto		= icmp_get_net_proto,
+};
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
new file mode 100644
index 000000000000..bed07b998a10
--- /dev/null
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C)2003,2004 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Author:
+ *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
+#include <net/netfilter/nf_log.h>
+
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
+
+static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmpv6;
+}
+
+static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+				unsigned int dataoff,
+				struct net *net,
+				struct nf_conntrack_tuple *tuple)
+{
+	const struct icmp6hdr *hp;
+	struct icmp6hdr _hdr;
+
+	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL)
+		return false;
+	tuple->dst.u.icmp.type = hp->icmp6_type;
+	tuple->src.u.icmp.id = hp->icmp6_identifier;
+	tuple->dst.u.icmp.code = hp->icmp6_code;
+
+	return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
+	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
+	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_REPLY + 1,
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY + 1
+};
+
+static const u_int8_t noct_valid_new[] = {
+	[ICMPV6_MGM_QUERY - 130] = 1,
+	[ICMPV6_MGM_REPORT - 130] = 1,
+	[ICMPV6_MGM_REDUCTION - 130] = 1,
+	[NDISC_ROUTER_SOLICITATION - 130] = 1,
+	[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
+	[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
+	[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
+	[ICMPV6_MLD2_REPORT - 130] = 1
+};
+
+static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
+				const struct nf_conntrack_tuple *orig)
+{
+	int type = orig->dst.u.icmp.type - 128;
+	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
+		return false;
+
+	tuple->src.u.icmp.id   = orig->src.u.icmp.id;
+	tuple->dst.u.icmp.type = invmap[type] - 1;
+	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+	return true;
+}
+
+static unsigned int *icmpv6_get_timeouts(struct net *net)
+{
+	return &icmpv6_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmpv6_packet(struct nf_conn *ct,
+		       const struct sk_buff *skb,
+		       unsigned int dataoff,
+		       enum ip_conntrack_info ctinfo)
+{
+	unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = icmpv6_get_timeouts(nf_ct_net(ct));
+
+	/* Do not immediately delete the connection after the first
+	   successful reply to avoid excessive conntrackd traffic
+	   and also to handle correctly ICMP echo reply duplicates. */
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
+		       unsigned int dataoff)
+{
+	static const u_int8_t valid_new[] = {
+		[ICMPV6_ECHO_REQUEST - 128] = 1,
+		[ICMPV6_NI_QUERY - 128] = 1
+	};
+	int type = ct->tuplehash[0].tuple.dst.u.icmp.type - 128;
+
+	if (type < 0 || type >= sizeof(valid_new) || !valid_new[type]) {
+		/* Can't create a new ICMPv6 `conn' with this. */
+		pr_debug("icmpv6: can't create new conn with type %u\n",
+			 type + 128);
+		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
+		return false;
+	}
+	return true;
+}
+
+static int
+icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
+		     struct sk_buff *skb,
+		     unsigned int icmp6off)
+{
+	struct nf_conntrack_tuple intuple, origtuple;
+	const struct nf_conntrack_tuple_hash *h;
+	const struct nf_conntrack_l4proto *inproto;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conntrack_zone tmp;
+
+	WARN_ON(skb_nfct(skb));
+
+	/* Are they talking about one of our connections? */
+	if (!nf_ct_get_tuplepr(skb,
+			       skb_network_offset(skb)
+				+ sizeof(struct ipv6hdr)
+				+ sizeof(struct icmp6hdr),
+			       PF_INET6, net, &origtuple)) {
+		pr_debug("icmpv6_error: Can't get tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	/* rcu_read_lock()ed by nf_hook_thresh */
+	inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
+
+	/* Ordinarily, we'd expect the inverted tupleproto, but it's
+	   been preserved inside the ICMP. */
+	if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
+		pr_debug("icmpv6_error: Can't invert tuple\n");
+		return -NF_ACCEPT;
+	}
+
+	ctinfo = IP_CT_RELATED;
+
+	h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
+				  &intuple);
+	if (!h) {
+		pr_debug("icmpv6_error: no match\n");
+		return -NF_ACCEPT;
+	} else {
+		if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+			ctinfo += IP_CT_IS_REPLY;
+	}
+
+	/* Update skb to refer to this connection */
+	nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+	return NF_ACCEPT;
+}
+
+static void icmpv6_error_log(const struct sk_buff *skb, struct net *net,
+			     u8 pf, const char *msg)
+{
+	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg);
+}
+
+static int
+icmpv6_error(struct net *net, struct nf_conn *tmpl,
+	     struct sk_buff *skb, unsigned int dataoff,
+	     u8 pf, unsigned int hooknum)
+{
+	const struct icmp6hdr *icmp6h;
+	struct icmp6hdr _ih;
+	int type;
+
+	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
+	if (icmp6h == NULL) {
+		icmpv6_error_log(skb, net, pf, "short packet");
+		return -NF_ACCEPT;
+	}
+
+	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
+		icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed");
+		return -NF_ACCEPT;
+	}
+
+	type = icmp6h->icmp6_type - 130;
+	if (type >= 0 && type < sizeof(noct_valid_new) &&
+	    noct_valid_new[type]) {
+		nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+		return NF_ACCEPT;
+	}
+
+	/* is not error message ? */
+	if (icmp6h->icmp6_type >= 128)
+		return NF_ACCEPT;
+
+	return icmpv6_error_message(net, tmpl, skb, dataoff);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nlattr(struct sk_buff *skb,
+				  const struct nf_conntrack_tuple *t)
+{
+	if (nla_put_be16(skb, CTA_PROTO_ICMPV6_ID, t->src.u.icmp.id) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMPV6_TYPE, t->dst.u.icmp.type) ||
+	    nla_put_u8(skb, CTA_PROTO_ICMPV6_CODE, t->dst.u.icmp.code))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
+	[CTA_PROTO_ICMPV6_TYPE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMPV6_CODE]	= { .type = NLA_U8 },
+	[CTA_PROTO_ICMPV6_ID]	= { .type = NLA_U16 },
+};
+
+static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
+	    !tb[CTA_PROTO_ICMPV6_CODE] ||
+	    !tb[CTA_PROTO_ICMPV6_ID])
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+
+	if (tuple->dst.u.icmp.type < 128 ||
+	    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+	    !invmap[tuple->dst.u.icmp.type - 128])
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned int icmpv6_nlattr_tuple_size(void)
+{
+	static unsigned int size __read_mostly;
+
+	if (!size)
+		size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
+
+	return size;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
+					struct net *net, void *data)
+{
+	unsigned int *timeout = data;
+	struct nf_icmp_net *in = icmpv6_pernet(net);
+
+	if (!timeout)
+		timeout = icmpv6_get_timeouts(net);
+	if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) {
+		*timeout =
+		    ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ;
+	} else {
+		/* Set default ICMPv6 timeout. */
+		*timeout = in->timeout;
+	}
+	return 0;
+}
+
+static int
+icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+	const unsigned int *timeout = data;
+
+	if (nla_put_be32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static const struct nla_policy
+icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
+	[CTA_TIMEOUT_ICMPV6_TIMEOUT]	= { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmpv6_sysctl_table[] = {
+	{
+		.procname	= "nf_conntrack_icmpv6_timeout",
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{ }
+};
+#endif /* CONFIG_SYSCTL */
+
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+				       struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+	pn->ctl_table = kmemdup(icmpv6_sysctl_table,
+				sizeof(icmpv6_sysctl_table),
+				GFP_KERNEL);
+	if (!pn->ctl_table)
+		return -ENOMEM;
+
+	pn->ctl_table[0].data = &in->timeout;
+#endif
+	return 0;
+}
+
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+	struct nf_icmp_net *in = icmpv6_pernet(net);
+	struct nf_proto_net *pn = &in->pn;
+
+	in->timeout = nf_ct_icmpv6_timeout;
+
+	return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+	return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
+{
+	.l3proto		= PF_INET6,
+	.l4proto		= IPPROTO_ICMPV6,
+	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
+	.invert_tuple		= icmpv6_invert_tuple,
+	.packet			= icmpv6_packet,
+	.new			= icmpv6_new,
+	.error			= icmpv6_error,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
+	.nlattr_tuple_size	= icmpv6_nlattr_tuple_size,
+	.nlattr_to_tuple	= icmpv6_nlattr_to_tuple,
+	.nla_policy		= icmpv6_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+	.ctnl_timeout		= {
+		.nlattr_to_obj	= icmpv6_timeout_nlattr_to_obj,
+		.obj_to_nlattr	= icmpv6_timeout_obj_to_nlattr,
+		.nlattr_max	= CTA_TIMEOUT_ICMP_MAX,
+		.obj_size	= sizeof(unsigned int),
+		.nla_policy	= icmpv6_timeout_nla_policy,
+	},
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+	.init_net		= icmpv6_init_net,
+	.get_net_proto		= icmpv6_get_net_proto,
+};
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 47b80fd0d2c3..13279f683da9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,12 +1,4 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/slab.h>
@@ -32,7 +24,7 @@
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #include <linux/rculist_nulls.h>
 
-MODULE_LICENSE("GPL");
+unsigned int nf_conntrack_net_id __read_mostly;
 
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 void
@@ -674,6 +666,8 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
 static struct pernet_operations nf_conntrack_net_ops = {
 	.init		= nf_conntrack_pernet_init,
 	.exit_batch	= nf_conntrack_pernet_exit,
+	.id		= &nf_conntrack_net_id,
+	.size = sizeof(struct nf_conntrack_net),
 };
 
 static int __init nf_conntrack_standalone_init(void)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 86df2a1666fd..6366f0c0b8c1 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -28,7 +28,6 @@
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_nat.h>
 
@@ -743,12 +742,6 @@ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
 
 int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
 {
-	int err;
-
-	err = nf_ct_l3proto_try_module_get(l3proto->l3proto);
-	if (err < 0)
-		return err;
-
 	mutex_lock(&nf_nat_proto_mutex);
 	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
 			 &nf_nat_l4proto_tcp);
@@ -781,7 +774,6 @@ void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
 	synchronize_rcu();
 
 	nf_nat_l3proto_clean(l3proto->l3proto);
-	nf_ct_l3proto_module_put(l3proto->l3proto);
 }
 EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
 
-- 
cgit v1.2.3


From b875a5a529bedf73532000f4e2496c00a00f4765 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 13 Jul 2018 16:13:43 +0200
Subject: input: add MT_TOOL_DIAL

A dial is a tool you place on a multitouch surface which reports its
orientation or a relative angle of rotation when rotating its knob.

Some examples are the Dell Totem (on the Canvas 27"), the Microsoft Dial,
or the Griffin Powermate, though the later can't be put on a touch surface.

We give some extra space to account for other types of fingers if we need
(MT_TOOL_THUMB)

Slightly change the documentation to not make it mandatory to update each
MT_TOOL we add.

Reviewed-by: Peter Hutterer <peter.hutterer@who-t.net>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/input/multi-touch-protocol.rst | 12 ++++++------
 include/uapi/linux/input.h                   |  9 +++++----
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/Documentation/input/multi-touch-protocol.rst b/Documentation/input/multi-touch-protocol.rst
index b51751a0cd5d..6be70342e709 100644
--- a/Documentation/input/multi-touch-protocol.rst
+++ b/Documentation/input/multi-touch-protocol.rst
@@ -310,12 +310,12 @@ ABS_MT_TOOL_Y
 ABS_MT_TOOL_TYPE
     The type of approaching tool. A lot of kernel drivers cannot distinguish
     between different tool types, such as a finger or a pen. In such cases, the
-    event should be omitted. The protocol currently supports MT_TOOL_FINGER,
-    MT_TOOL_PEN, and MT_TOOL_PALM [#f2]_. For type B devices, this event is
-    handled by input core; drivers should instead use
-    input_mt_report_slot_state(). A contact's ABS_MT_TOOL_TYPE may change over
-    time while still touching the device, because the firmware may not be able
-    to determine which tool is being used when it first appears.
+    event should be omitted. The protocol currently mainly supports
+    MT_TOOL_FINGER, MT_TOOL_PEN, and MT_TOOL_PALM [#f2]_.
+    For type B devices, this event is handled by input core; drivers should
+    instead use input_mt_report_slot_state(). A contact's ABS_MT_TOOL_TYPE may
+    change over time while still touching the device, because the firmware may
+    not be able to determine which tool is being used when it first appears.
 
 ABS_MT_BLOB_ID
     The BLOB_ID groups several packets together into one arbitrarily shaped
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 7288a7c573cc..fb78f6f500f3 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -270,10 +270,11 @@ struct input_mask {
 /*
  * MT_TOOL types
  */
-#define MT_TOOL_FINGER		0
-#define MT_TOOL_PEN		1
-#define MT_TOOL_PALM		2
-#define MT_TOOL_MAX		2
+#define MT_TOOL_FINGER		0x00
+#define MT_TOOL_PEN		0x01
+#define MT_TOOL_PALM		0x02
+#define MT_TOOL_DIAL		0x0a
+#define MT_TOOL_MAX		0x0f
 
 /*
  * Values describing the status of a force-feedback effect
-- 
cgit v1.2.3


From ba6b055e0f3b4ff4942e4ab273260affcfad9bff Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 13 Jul 2018 16:13:49 +0200
Subject: HID: input: enable Totem on the Dell Canvas 27

The Dell Canvas 27 has a tool that can be put on the surface and acts
as a dial. The firmware processes the detection of the tool and forward
regular HID reports with X, Y, Azimuth, rotation, width/height.

The firmware also exports Contact ID, Countact Count which may hint that
several totems can be used at the same time (the FW only supports one).

We can tell that MT_TOOL_DIAL will be reported by setting the min/max
of ABS_MT_TOOL_TYPE to MT_TOOL_DIAL.

This tool is aimed at being used by the system and not the applications,
so the user space processing should not go through the regular touch
inputs.
We set INPUT_PROP_DIRECT which applies ID_INPUT_TOUCHSCREEN to this new
type of devices, but we will counter this for the time being with the
special udev hwdb entry mentioned above.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1511846

Acked-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c      |  3 +++
 drivers/hid/hid-multitouch.c | 48 +++++++++++++++++++++++++++++++-------------
 include/linux/hid.h          |  6 ++++++
 3 files changed, 43 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index ab93dd5927c3..4e94ea3e280a 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1550,6 +1550,9 @@ static struct hid_input *hidinput_allocate(struct hid_device *hid,
 		case HID_GD_WIRELESS_RADIO_CTLS:
 			suffix = "Wireless Radio Control";
 			break;
+		case HID_GD_SYSTEM_MULTIAXIS:
+			suffix = "System Multi Axis";
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 346e9caef6f3..a4a274ebfbef 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -665,7 +665,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 	/*
 	 * Model touchscreens providing buttons as touchpads.
 	 */
-	if ((usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) {
+	if (field->application == HID_DG_TOUCHSCREEN &&
+	    (usage->hid & HID_USAGE_PAGE) == HID_UP_BUTTON) {
 		app->mt_flags |= INPUT_MT_POINTER;
 		td->inputmode_value = MT_INPUTMODE_TOUCHPAD;
 	}
@@ -692,6 +693,19 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 
 			set_abs(hi->input, code, field, cls->sn_move);
 
+			/*
+			 * A system multi-axis that exports X and Y has a high
+			 * chance of being used directly on a surface
+			 */
+			if (field->application == HID_GD_SYSTEM_MULTIAXIS) {
+				__set_bit(INPUT_PROP_DIRECT,
+					  hi->input->propbit);
+				input_set_abs_params(hi->input,
+						     ABS_MT_TOOL_TYPE,
+						     MT_TOOL_DIAL,
+						     MT_TOOL_DIAL, 0, 0);
+			}
+
 			return 1;
 		case HID_GD_Y:
 			if (prev_usage && (prev_usage->hid == usage->hid)) {
@@ -725,7 +739,9 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			MT_STORE_FIELD(confidence_state);
 			return 1;
 		case HID_DG_TIPSWITCH:
-			input_set_capability(hi->input, EV_KEY, BTN_TOUCH);
+			if (field->application != HID_GD_SYSTEM_MULTIAXIS)
+				input_set_capability(hi->input,
+						     EV_KEY, BTN_TOUCH);
 			MT_STORE_FIELD(tip_state);
 			return 1;
 		case HID_DG_CONTACTID:
@@ -802,6 +818,10 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 		    field->application == HID_DG_TOUCHPAD &&
 		    (usage->hid & HID_USAGE) > 1)
 			code--;
+
+		if (field->application == HID_GD_SYSTEM_MULTIAXIS)
+			code = BTN_0  + ((usage->hid - 1) & HID_USAGE);
+
 		hid_map_usage(hi, usage, bit, max, EV_KEY, code);
 		input_set_capability(hi->input, EV_KEY, code);
 		return 1;
@@ -899,6 +919,7 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
 	bool inrange_state = false;
 	int active;
 	int slotnum;
+	int tool = MT_TOOL_FINGER;
 
 	if (!slot)
 		return -EINVAL;
@@ -939,8 +960,11 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
 
 	active = (*slot->tip_state || inrange_state) && confidence_state;
 
+	if (app->application == HID_GD_SYSTEM_MULTIAXIS)
+		tool = MT_TOOL_DIAL;
+
 	input_mt_slot(input, slotnum);
-	input_mt_report_slot_state(input, MT_TOOL_FINGER, active);
+	input_mt_report_slot_state(input, tool, active);
 	if (active) {
 		/* this finger is in proximity of the sensor */
 		int wide = (*slot->w > *slot->h);
@@ -1203,6 +1227,7 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 	    field->application != HID_GD_SYSTEM_CONTROL &&
 	    field->application != HID_CP_CONSUMER_CONTROL &&
 	    field->application != HID_GD_WIRELESS_RADIO_CTLS &&
+	    field->application != HID_GD_SYSTEM_MULTIAXIS &&
 	    !(field->application == HID_VD_ASUS_CUSTOM_MEDIA_KEYS &&
 	      application->quirks & MT_QUIRK_ASUS_CUSTOM_UP))
 		return -1;
@@ -1230,9 +1255,7 @@ static int mt_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 		return 1;
 	}
 
-	if (rdata->is_mt_collection &&
-	    (field->application == HID_DG_TOUCHSCREEN ||
-	     field->application == HID_DG_TOUCHPAD))
+	if (rdata->is_mt_collection)
 		return mt_touch_input_mapping(hdev, hi, field, usage, bit, max,
 					      application);
 
@@ -1244,15 +1267,11 @@ static int mt_input_mapped(struct hid_device *hdev, struct hid_input *hi,
 		struct hid_field *field, struct hid_usage *usage,
 		unsigned long **bit, int *max)
 {
-	/*
-	 * some egalax touchscreens have "application == HID_DG_TOUCHSCREEN"
-	 * for the stylus.
-	 */
-	if (field->physical == HID_DG_STYLUS)
-		return 0;
+	struct mt_device *td = hid_get_drvdata(hdev);
+	struct mt_report_data *rdata;
 
-	if (field->application == HID_DG_TOUCHSCREEN ||
-	    field->application == HID_DG_TOUCHPAD) {
+	rdata = mt_find_report_data(td, field->report);
+	if (rdata && rdata->is_mt_collection) {
 		/* We own these mappings, tell hid-input to ignore them */
 		return -1;
 	}
@@ -1460,6 +1479,7 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
 		case HID_GD_SYSTEM_CONTROL:
 		case HID_CP_CONSUMER_CONTROL:
 		case HID_GD_WIRELESS_RADIO_CTLS:
+		case HID_GD_SYSTEM_MULTIAXIS:
 			/* already handled by hid core */
 			break;
 		case HID_DG_TOUCHSCREEN:
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 773bcb1d4044..2e4498d52a2f 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -190,6 +190,12 @@ struct hid_item {
  * http://www.usb.org/developers/hidpage/HUTRR40RadioHIDUsagesFinal.pdf
  */
 #define HID_GD_WIRELESS_RADIO_CTLS	0x0001000c
+/*
+ * System Multi-Axis, see:
+ * http://www.usb.org/developers/hidpage/HUTRR62_-_Generic_Desktop_CA_for_System_Multi-Axis_Controllers.txt
+ */
+#define HID_GD_SYSTEM_MULTIAXIS	0x0001000e
+
 #define HID_GD_X		0x00010030
 #define HID_GD_Y		0x00010031
 #define HID_GD_Z		0x00010032
-- 
cgit v1.2.3


From 08a8a7cf14595f95d5cbb28ef5c15c56a6255fb4 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 13 Jul 2018 16:13:50 +0200
Subject: HID: core: do not upper bound the collection stack

Looks like 4 was sufficient until now. However, the Surface Dial needs
a stack of 5 and simply fails at probing.
Dynamically add HID_COLLECTION_STACK_SIZE to the size of the stack if
we hit the upper bound.

Checkpatch complains about bare unsigned, so converting those to
'unsigned int' in struct hid_parser

Acked-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 17 ++++++++++++++---
 include/linux/hid.h    |  9 +++++----
 2 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3942ee61bd1c..5de6f18c9bf7 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -128,9 +128,19 @@ static int open_collection(struct hid_parser *parser, unsigned type)
 
 	usage = parser->local.usage[0];
 
-	if (parser->collection_stack_ptr == HID_COLLECTION_STACK_SIZE) {
-		hid_err(parser->device, "collection stack overflow\n");
-		return -EINVAL;
+	if (parser->collection_stack_ptr == parser->collection_stack_size) {
+		unsigned int *collection_stack;
+		unsigned int new_size = parser->collection_stack_size +
+					HID_COLLECTION_STACK_SIZE;
+
+		collection_stack = krealloc(parser->collection_stack,
+					    new_size * sizeof(unsigned int),
+					    GFP_KERNEL);
+		if (!collection_stack)
+			return -ENOMEM;
+
+		parser->collection_stack = collection_stack;
+		parser->collection_stack_size = new_size;
 	}
 
 	if (parser->device->maxcollection == parser->device->collection_size) {
@@ -840,6 +850,7 @@ static int hid_scan_report(struct hid_device *hid)
 		break;
 	}
 
+	kfree(parser->collection_stack);
 	vfree(parser);
 	return 0;
 }
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2e4498d52a2f..aee281522c6d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -644,12 +644,13 @@ static inline void hid_set_drvdata(struct hid_device *hdev, void *data)
 struct hid_parser {
 	struct hid_global     global;
 	struct hid_global     global_stack[HID_GLOBAL_STACK_SIZE];
-	unsigned              global_stack_ptr;
+	unsigned int          global_stack_ptr;
 	struct hid_local      local;
-	unsigned              collection_stack[HID_COLLECTION_STACK_SIZE];
-	unsigned              collection_stack_ptr;
+	unsigned int         *collection_stack;
+	unsigned int          collection_stack_ptr;
+	unsigned int          collection_stack_size;
 	struct hid_device    *device;
-	unsigned              scan_flags;
+	unsigned int          scan_flags;
 };
 
 struct hid_class_descriptor {
-- 
cgit v1.2.3


From 9afb719e7046c4f2462278862ab3db2961cc141c Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Thu, 5 Jul 2018 17:49:37 -0700
Subject: kbuild: Add build salt to the kernel and modules

In Fedora, the debug information is packaged separately (foo-debuginfo) and
can be installed separately. There's been a long standing issue where only
one version of a debuginfo info package can be installed at a time. There's
been an effort for Fedora for parallel debuginfo to rectify this problem.

Part of the requirement to allow parallel debuginfo to work is that build ids
are unique between builds. The existing upstream rpm implementation ensures
this by re-calculating the build-id using the version and release as a
seed. This doesn't work 100% for the kernel because of the vDSO which is
its own binary and doesn't get updated when embedded.

Fix this by adding some data in an ELF note for both the kernel and modules.
The data is controlled via a Kconfig option so distributions can set it
to an appropriate value to ensure uniqueness between builds.

Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 include/linux/build-salt.h | 20 ++++++++++++++++++++
 init/Kconfig               |  9 +++++++++
 init/version.c             |  3 +++
 scripts/mod/modpost.c      |  3 +++
 4 files changed, 35 insertions(+)
 create mode 100644 include/linux/build-salt.h

(limited to 'include')

diff --git a/include/linux/build-salt.h b/include/linux/build-salt.h
new file mode 100644
index 000000000000..bb007bd05e7a
--- /dev/null
+++ b/include/linux/build-salt.h
@@ -0,0 +1,20 @@
+#ifndef __BUILD_SALT_H
+#define __BUILD_SALT_H
+
+#include <linux/elfnote.h>
+
+#define LINUX_ELFNOTE_BUILD_SALT       0x100
+
+#ifdef __ASSEMBLER__
+
+#define BUILD_SALT \
+       ELFNOTE(Linux, LINUX_ELFNOTE_BUILD_SALT, .asciz CONFIG_BUILD_SALT)
+
+#else
+
+#define BUILD_SALT \
+       ELFNOTE32("Linux", LINUX_ELFNOTE_BUILD_SALT, CONFIG_BUILD_SALT)
+
+#endif
+
+#endif /* __BUILD_SALT_H */
diff --git a/init/Kconfig b/init/Kconfig
index 041f3a022122..d39b31484c52 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -107,6 +107,15 @@ config LOCALVERSION_AUTO
 
 	  which is done within the script "scripts/setlocalversion".)
 
+config BUILD_SALT
+       string "Build ID Salt"
+       default ""
+       help
+          The build ID is used to link binaries and their debug info. Setting
+          this option will use the value in the calculation of the build id.
+          This is mostly useful for distributions which want to ensure the
+          build is unique between builds. It's safe to leave the default.
+
 config HAVE_KERNEL_GZIP
 	bool
 
diff --git a/init/version.c b/init/version.c
index bfb4e3f4955e..ef4012ec4375 100644
--- a/init/version.c
+++ b/init/version.c
@@ -7,6 +7,7 @@
  */
 
 #include <generated/compile.h>
+#include <linux/build-salt.h>
 #include <linux/export.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
@@ -49,3 +50,5 @@ const char linux_proc_banner[] =
 	"%s version %s"
 	" (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ")"
 	" (" LINUX_COMPILER ") %s\n";
+
+BUILD_SALT;
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 1663fb19343a..dc6d714e4dcb 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2125,10 +2125,13 @@ static int check_modname_len(struct module *mod)
  **/
 static void add_header(struct buffer *b, struct module *mod)
 {
+	buf_printf(b, "#include <linux/build-salt.h>\n");
 	buf_printf(b, "#include <linux/module.h>\n");
 	buf_printf(b, "#include <linux/vermagic.h>\n");
 	buf_printf(b, "#include <linux/compiler.h>\n");
 	buf_printf(b, "\n");
+	buf_printf(b, "BUILD_SALT;\n");
+	buf_printf(b, "\n");
 	buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
 	buf_printf(b, "MODULE_INFO(name, KBUILD_MODNAME);\n");
 	buf_printf(b, "\n");
-- 
cgit v1.2.3


From 26b2f552525cf98fad08515bd6faa427f2f22038 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 13 Jul 2018 01:38:08 +0900
Subject: netfilter: nf_tables: fix jumpstack depth validation

The level of struct nft_ctx is updated by nf_tables_check_loops().  That
is used to validate jumpstack depth. But jumpstack validation routine
doesn't update and validate recursively.  So, in some cases, chain depth
can be bigger than the NFT_JUMP_STACK_SIZE.

After this patch, The jumpstack validation routine is located in the
nft_chain_validate(). When new rules or new set elements are added, the
nft_table_validate() is called by the nf_tables_newrule and the
nf_tables_newsetelem. The nft_table_validate() calls the
nft_chain_validate() that visit all their children chains recursively.
So it can update depth of chain certainly.

Reproducer:
   %cat ./test.sh
   #!/bin/bash
   nft add table ip filter
   nft add chain ip filter input { type filter hook input priority 0\; }
   for ((i=0;i<20;i++)); do
	nft add chain ip filter a$i
   done

   nft add rule ip filter input jump a1

   for ((i=0;i<10;i++)); do
	nft add rule ip filter a$i jump a$((i+1))
   done

   for ((i=11;i<19;i++)); do
	nft add rule ip filter a$i jump a$((i+1))
   done

   nft add rule ip filter a10 jump a11

Result:
[  253.931782] WARNING: CPU: 1 PID: 0 at net/netfilter/nf_tables_core.c:186 nft_do_chain+0xacc/0xdf0 [nf_tables]
[  253.931915] Modules linked in: nf_tables nfnetlink ip_tables x_tables
[  253.932153] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.18.0-rc3+ #48
[  253.932153] RIP: 0010:nft_do_chain+0xacc/0xdf0 [nf_tables]
[  253.932153] Code: 83 f8 fb 0f 84 c7 00 00 00 e9 d0 00 00 00 83 f8 fd 74 0e 83 f8 ff 0f 84 b4 00 00 00 e9 bd 00 00 00 83 bd 64 fd ff ff 0f 76 09 <0f> 0b 31 c0 e9 bc 02 00 00 44 8b ad 64 fd
[  253.933807] RSP: 0018:ffff88011b807570 EFLAGS: 00010212
[  253.933807] RAX: 00000000fffffffd RBX: ffff88011b807660 RCX: 0000000000000000
[  253.933807] RDX: 0000000000000010 RSI: ffff880112b39d78 RDI: ffff88011b807670
[  253.933807] RBP: ffff88011b807850 R08: ffffed0023700ece R09: ffffed0023700ecd
[  253.933807] R10: ffff88011b80766f R11: ffffed0023700ece R12: ffff88011b807898
[  253.933807] R13: ffff880112b39d80 R14: ffff880112b39d60 R15: dffffc0000000000
[  253.933807] FS:  0000000000000000(0000) GS:ffff88011b800000(0000) knlGS:0000000000000000
[  253.933807] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  253.933807] CR2: 00000000014f1008 CR3: 000000006b216000 CR4: 00000000001006e0
[  253.933807] Call Trace:
[  253.933807]  <IRQ>
[  253.933807]  ? sched_clock_cpu+0x132/0x170
[  253.933807]  ? __nft_trace_packet+0x180/0x180 [nf_tables]
[  253.933807]  ? sched_clock_cpu+0x132/0x170
[  253.933807]  ? debug_show_all_locks+0x290/0x290
[  253.933807]  ? __lock_acquire+0x4835/0x4af0
[  253.933807]  ? inet_ehash_locks_alloc+0x1a0/0x1a0
[  253.933807]  ? unwind_next_frame+0x159e/0x1840
[  253.933807]  ? __read_once_size_nocheck.constprop.4+0x5/0x10
[  253.933807]  ? nft_do_chain_ipv4+0x197/0x1e0 [nf_tables]
[  253.933807]  ? nft_do_chain+0x5/0xdf0 [nf_tables]
[  253.933807]  nft_do_chain_ipv4+0x197/0x1e0 [nf_tables]
[  253.933807]  ? nft_do_chain_arp+0xb0/0xb0 [nf_tables]
[  253.933807]  ? __lock_is_held+0x9d/0x130
[  253.933807]  nf_hook_slow+0xc4/0x150
[  253.933807]  ip_local_deliver+0x28b/0x380
[  253.933807]  ? ip_call_ra_chain+0x3e0/0x3e0
[  253.933807]  ? ip_rcv_finish+0x1610/0x1610
[  253.933807]  ip_rcv+0xbcc/0xcc0
[  253.933807]  ? debug_show_all_locks+0x290/0x290
[  253.933807]  ? ip_local_deliver+0x380/0x380
[  253.933807]  ? __lock_is_held+0x9d/0x130
[  253.933807]  ? ip_local_deliver+0x380/0x380
[  253.933807]  __netif_receive_skb_core+0x1c9c/0x2240

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  4 ++--
 net/netfilter/nf_tables_api.c     | 11 ++++-------
 net/netfilter/nft_immediate.c     |  3 +++
 net/netfilter/nft_lookup.c        | 13 +++++++++++--
 4 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 08c005ce56e9..4e82a4c49912 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -150,6 +150,7 @@ static inline void nft_data_debug(const struct nft_data *data)
  *	@portid: netlink portID of the original message
  *	@seq: netlink sequence number
  *	@family: protocol family
+ *	@level: depth of the chains
  *	@report: notify via unicast netlink message
  */
 struct nft_ctx {
@@ -160,6 +161,7 @@ struct nft_ctx {
 	u32				portid;
 	u32				seq;
 	u8				family;
+	u8				level;
 	bool				report;
 };
 
@@ -865,7 +867,6 @@ enum nft_chain_flags {
  *	@table: table that this chain belongs to
  *	@handle: chain handle
  *	@use: number of jump references to this chain
- *	@level: length of longest path to this chain
  *	@flags: bitmask of enum nft_chain_flags
  *	@name: name of the chain
  */
@@ -878,7 +879,6 @@ struct nft_chain {
 	struct nft_table		*table;
 	u64				handle;
 	u32				use;
-	u16				level;
 	u8				flags:6,
 					genmask:2;
 	char				*name;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 896d4a36081d..d41fa2c82f14 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -75,6 +75,7 @@ static void nft_ctx_init(struct nft_ctx *ctx,
 {
 	ctx->net	= net;
 	ctx->family	= family;
+	ctx->level	= 0;
 	ctx->table	= table;
 	ctx->chain	= chain;
 	ctx->nla   	= nla;
@@ -2384,6 +2385,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
 	struct nft_rule *rule;
 	int err;
 
+	if (ctx->level == NFT_JUMP_STACK_SIZE)
+		return -EMLINK;
+
 	list_for_each_entry(rule, &chain->rules, list) {
 		if (!nft_is_active_next(ctx->net, rule))
 			continue;
@@ -6837,13 +6841,6 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
 			err = nf_tables_check_loops(ctx, data->verdict.chain);
 			if (err < 0)
 				return err;
-
-			if (ctx->chain->level + 1 >
-			    data->verdict.chain->level) {
-				if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
-					return -EMLINK;
-				data->verdict.chain->level = ctx->chain->level + 1;
-			}
 		}
 
 		return 0;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 15adf8ca82c3..0777a93211e2 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -98,6 +98,7 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
 				  const struct nft_data **d)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+	struct nft_ctx *pctx = (struct nft_ctx *)ctx;
 	const struct nft_data *data;
 	int err;
 
@@ -109,9 +110,11 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
 	switch (data->verdict.code) {
 	case NFT_JUMP:
 	case NFT_GOTO:
+		pctx->level++;
 		err = nft_chain_validate(ctx, data->verdict.chain);
 		if (err < 0)
 			return err;
+		pctx->level--;
 		break;
 	default:
 		break;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 42e6fadf1417..c2a1d84cdfc4 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -155,7 +155,9 @@ static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
 				       struct nft_set_elem *elem)
 {
 	const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+	struct nft_ctx *pctx = (struct nft_ctx *)ctx;
 	const struct nft_data *data;
+	int err;
 
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
 	    *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
@@ -165,10 +167,17 @@ static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
 	switch (data->verdict.code) {
 	case NFT_JUMP:
 	case NFT_GOTO:
-		return nft_chain_validate(ctx, data->verdict.chain);
+		pctx->level++;
+		err = nft_chain_validate(ctx, data->verdict.chain);
+		if (err < 0)
+			return err;
+		pctx->level--;
+		break;
 	default:
-		return 0;
+		break;
 	}
+
+	return 0;
 }
 
 static int nft_lookup_validate(const struct nft_ctx *ctx,
-- 
cgit v1.2.3


From ae891abe7c2ccf75b69ca8330225e37ecc06924e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 11 Jul 2018 15:17:22 +0200
Subject: drm/i915: Split audio component to a generic type

For allowing other drivers to use the DRM audio component, rename the
i915_audio_component_* with drm_audio_component_*, and split the
generic part into drm_audio_component.h.  The i915 specific stuff
remains in struct i915_audio_component, which contains
drm_audio_component as the base.

The license of drm_audio_component.h is kept to MIT as same as the the
original i915_component.h.

This is a preliminary change for further development, and no
functional changes by this patch itself, merely code-split and
renames.

v1->v2: Use SPDX for drm_audio_component.h, fix remaining i915
        argument in drm_audio_component.h

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/drm/i915/intel_audio.c | 22 +++++----
 include/drm/drm_audio_component.h  | 95 ++++++++++++++++++++++++++++++++++++++
 include/drm/i915_component.h       | 85 ++--------------------------------
 include/sound/hda_i915.h           |  6 +--
 include/sound/hdaudio.h            |  6 +--
 sound/hda/hdac_i915.c              | 40 ++++++++--------
 sound/pci/hda/patch_hdmi.c         |  8 ++--
 sound/soc/codecs/hdac_hdmi.c       |  2 +-
 8 files changed, 144 insertions(+), 120 deletions(-)
 create mode 100644 include/drm/drm_audio_component.h

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 3ea566f99450..7dd5605d94ae 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -639,11 +639,12 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 	dev_priv->av_enc_map[pipe] = encoder;
 	mutex_unlock(&dev_priv->av_mutex);
 
-	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) {
+	if (acomp && acomp->base.audio_ops &&
+	    acomp->base.audio_ops->pin_eld_notify) {
 		/* audio drivers expect pipe = -1 to indicate Non-MST cases */
 		if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))
 			pipe = -1;
-		acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr,
+		acomp->base.audio_ops->pin_eld_notify(acomp->base.audio_ops->audio_ptr,
 						 (int) port, (int) pipe);
 	}
 
@@ -681,11 +682,12 @@ void intel_audio_codec_disable(struct intel_encoder *encoder,
 	dev_priv->av_enc_map[pipe] = NULL;
 	mutex_unlock(&dev_priv->av_mutex);
 
-	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) {
+	if (acomp && acomp->base.audio_ops &&
+	    acomp->base.audio_ops->pin_eld_notify) {
 		/* audio drivers expect pipe = -1 to indicate Non-MST cases */
 		if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST))
 			pipe = -1;
-		acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr,
+		acomp->base.audio_ops->pin_eld_notify(acomp->base.audio_ops->audio_ptr,
 						 (int) port, (int) pipe);
 	}
 
@@ -880,7 +882,7 @@ static int i915_audio_component_get_eld(struct device *kdev, int port,
 	return ret;
 }
 
-static const struct i915_audio_component_ops i915_audio_component_ops = {
+static const struct drm_audio_component_ops i915_audio_component_ops = {
 	.owner		= THIS_MODULE,
 	.get_power	= i915_audio_component_get_power,
 	.put_power	= i915_audio_component_put_power,
@@ -897,12 +899,12 @@ static int i915_audio_component_bind(struct device *i915_kdev,
 	struct drm_i915_private *dev_priv = kdev_to_i915(i915_kdev);
 	int i;
 
-	if (WARN_ON(acomp->ops || acomp->dev))
+	if (WARN_ON(acomp->base.ops || acomp->base.dev))
 		return -EEXIST;
 
 	drm_modeset_lock_all(&dev_priv->drm);
-	acomp->ops = &i915_audio_component_ops;
-	acomp->dev = i915_kdev;
+	acomp->base.ops = &i915_audio_component_ops;
+	acomp->base.dev = i915_kdev;
 	BUILD_BUG_ON(MAX_PORTS != I915_MAX_PORTS);
 	for (i = 0; i < ARRAY_SIZE(acomp->aud_sample_rate); i++)
 		acomp->aud_sample_rate[i] = 0;
@@ -919,8 +921,8 @@ static void i915_audio_component_unbind(struct device *i915_kdev,
 	struct drm_i915_private *dev_priv = kdev_to_i915(i915_kdev);
 
 	drm_modeset_lock_all(&dev_priv->drm);
-	acomp->ops = NULL;
-	acomp->dev = NULL;
+	acomp->base.ops = NULL;
+	acomp->base.dev = NULL;
 	dev_priv->audio_component = NULL;
 	drm_modeset_unlock_all(&dev_priv->drm);
 }
diff --git a/include/drm/drm_audio_component.h b/include/drm/drm_audio_component.h
new file mode 100644
index 000000000000..e85689f212c2
--- /dev/null
+++ b/include/drm/drm_audio_component.h
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: MIT
+// Copyright © 2014 Intel Corporation
+
+#ifndef _DRM_AUDIO_COMPONENT_H_
+#define _DRM_AUDIO_COMPONENT_H_
+
+/**
+ * struct drm_audio_component_ops - Ops implemented by DRM driver, called by hda driver
+ */
+struct drm_audio_component_ops {
+	/**
+	 * @owner: drm module to pin down
+	 */
+	struct module *owner;
+	/**
+	 * @get_power: get the POWER_DOMAIN_AUDIO power well
+	 *
+	 * Request the power well to be turned on.
+	 */
+	void (*get_power)(struct device *);
+	/**
+	 * @put_power: put the POWER_DOMAIN_AUDIO power well
+	 *
+	 * Allow the power well to be turned off.
+	 */
+	void (*put_power)(struct device *);
+	/**
+	 * @codec_wake_override: Enable/disable codec wake signal
+	 */
+	void (*codec_wake_override)(struct device *, bool enable);
+	/**
+	 * @get_cdclk_freq: Get the Core Display Clock in kHz
+	 */
+	int (*get_cdclk_freq)(struct device *);
+	/**
+	 * @sync_audio_rate: set n/cts based on the sample rate
+	 *
+	 * Called from audio driver. After audio driver sets the
+	 * sample rate, it will call this function to set n/cts
+	 */
+	int (*sync_audio_rate)(struct device *, int port, int pipe, int rate);
+	/**
+	 * @get_eld: fill the audio state and ELD bytes for the given port
+	 *
+	 * Called from audio driver to get the HDMI/DP audio state of the given
+	 * digital port, and also fetch ELD bytes to the given pointer.
+	 *
+	 * It returns the byte size of the original ELD (not the actually
+	 * copied size), zero for an invalid ELD, or a negative error code.
+	 *
+	 * Note that the returned size may be over @max_bytes.  Then it
+	 * implies that only a part of ELD has been copied to the buffer.
+	 */
+	int (*get_eld)(struct device *, int port, int pipe, bool *enabled,
+		       unsigned char *buf, int max_bytes);
+};
+
+/**
+ * struct drm_audio_component_audio_ops - Ops implemented by hda driver, called by DRM driver
+ */
+struct drm_audio_component_audio_ops {
+	/**
+	 * @audio_ptr: Pointer to be used in call to pin_eld_notify
+	 */
+	void *audio_ptr;
+	/**
+	 * @pin_eld_notify: Notify the HDA driver that pin sense and/or ELD information has changed
+	 *
+	 * Called when the DRM driver has set up audio pipeline or has just
+	 * begun to tear it down. This allows the HDA driver to update its
+	 * status accordingly (even when the HDA controller is in power save
+	 * mode).
+	 */
+	void (*pin_eld_notify)(void *audio_ptr, int port, int pipe);
+};
+
+/**
+ * struct drm_audio_component - Used for direct communication between DRM and hda drivers
+ */
+struct drm_audio_component {
+	/**
+	 * @dev: DRM device, used as parameter for ops
+	 */
+	struct device *dev;
+	/**
+	 * @ops: Ops implemented by DRM driver, called by hda driver
+	 */
+	const struct drm_audio_component_ops *ops;
+	/**
+	 * @audio_ops: Ops implemented by hda driver, called by DRM driver
+	 */
+	const struct drm_audio_component_audio_ops *audio_ops;
+};
+
+#endif /* _DRM_AUDIO_COMPONENT_H_ */
diff --git a/include/drm/i915_component.h b/include/drm/i915_component.h
index 346b1f5cb180..fca22d463e1b 100644
--- a/include/drm/i915_component.h
+++ b/include/drm/i915_component.h
@@ -24,101 +24,26 @@
 #ifndef _I915_COMPONENT_H_
 #define _I915_COMPONENT_H_
 
+#include "drm_audio_component.h"
+
 /* MAX_PORT is the number of port
  * It must be sync with I915_MAX_PORTS defined i915_drv.h
  */
 #define MAX_PORTS 6
 
-/**
- * struct i915_audio_component_ops - Ops implemented by i915 driver, called by hda driver
- */
-struct i915_audio_component_ops {
-	/**
-	 * @owner: i915 module
-	 */
-	struct module *owner;
-	/**
-	 * @get_power: get the POWER_DOMAIN_AUDIO power well
-	 *
-	 * Request the power well to be turned on.
-	 */
-	void (*get_power)(struct device *);
-	/**
-	 * @put_power: put the POWER_DOMAIN_AUDIO power well
-	 *
-	 * Allow the power well to be turned off.
-	 */
-	void (*put_power)(struct device *);
-	/**
-	 * @codec_wake_override: Enable/disable codec wake signal
-	 */
-	void (*codec_wake_override)(struct device *, bool enable);
-	/**
-	 * @get_cdclk_freq: Get the Core Display Clock in kHz
-	 */
-	int (*get_cdclk_freq)(struct device *);
-	/**
-	 * @sync_audio_rate: set n/cts based on the sample rate
-	 *
-	 * Called from audio driver. After audio driver sets the
-	 * sample rate, it will call this function to set n/cts
-	 */
-	int (*sync_audio_rate)(struct device *, int port, int pipe, int rate);
-	/**
-	 * @get_eld: fill the audio state and ELD bytes for the given port
-	 *
-	 * Called from audio driver to get the HDMI/DP audio state of the given
-	 * digital port, and also fetch ELD bytes to the given pointer.
-	 *
-	 * It returns the byte size of the original ELD (not the actually
-	 * copied size), zero for an invalid ELD, or a negative error code.
-	 *
-	 * Note that the returned size may be over @max_bytes.  Then it
-	 * implies that only a part of ELD has been copied to the buffer.
-	 */
-	int (*get_eld)(struct device *, int port, int pipe, bool *enabled,
-		       unsigned char *buf, int max_bytes);
-};
-
-/**
- * struct i915_audio_component_audio_ops - Ops implemented by hda driver, called by i915 driver
- */
-struct i915_audio_component_audio_ops {
-	/**
-	 * @audio_ptr: Pointer to be used in call to pin_eld_notify
-	 */
-	void *audio_ptr;
-	/**
-	 * @pin_eld_notify: Notify the HDA driver that pin sense and/or ELD information has changed
-	 *
-	 * Called when the i915 driver has set up audio pipeline or has just
-	 * begun to tear it down. This allows the HDA driver to update its
-	 * status accordingly (even when the HDA controller is in power save
-	 * mode).
-	 */
-	void (*pin_eld_notify)(void *audio_ptr, int port, int pipe);
-};
-
 /**
  * struct i915_audio_component - Used for direct communication between i915 and hda drivers
  */
 struct i915_audio_component {
 	/**
-	 * @dev: i915 device, used as parameter for ops
+	 * @base: the drm_audio_component base class
 	 */
-	struct device *dev;
+	struct drm_audio_component	base;
+
 	/**
 	 * @aud_sample_rate: the array of audio sample rate per port
 	 */
 	int aud_sample_rate[MAX_PORTS];
-	/**
-	 * @ops: Ops implemented by i915 driver, called by hda driver
-	 */
-	const struct i915_audio_component_ops *ops;
-	/**
-	 * @audio_ops: Ops implemented by hda driver, called by i915 driver
-	 */
-	const struct i915_audio_component_audio_ops *audio_ops;
 };
 
 #endif /* _I915_COMPONENT_H_ */
diff --git a/include/sound/hda_i915.h b/include/sound/hda_i915.h
index a94f5b6f92ac..f69ea84e7b65 100644
--- a/include/sound/hda_i915.h
+++ b/include/sound/hda_i915.h
@@ -5,7 +5,7 @@
 #ifndef __SOUND_HDA_I915_H
 #define __SOUND_HDA_I915_H
 
-#include <drm/i915_component.h>
+#include <drm/drm_audio_component.h>
 
 #ifdef CONFIG_SND_HDA_I915
 int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable);
@@ -17,7 +17,7 @@ int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid, int dev_id,
 			   bool *audio_enabled, char *buffer, int max_bytes);
 int snd_hdac_i915_init(struct hdac_bus *bus);
 int snd_hdac_i915_exit(struct hdac_bus *bus);
-int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *);
+int snd_hdac_i915_register_notifier(const struct drm_audio_component_audio_ops *);
 #else
 static inline int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable)
 {
@@ -49,7 +49,7 @@ static inline int snd_hdac_i915_exit(struct hdac_bus *bus)
 {
 	return 0;
 }
-static inline int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *ops)
+static inline int snd_hdac_i915_register_notifier(const struct drm_audio_component_audio_ops *ops)
 {
 	return -ENODEV;
 }
diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index f1baaa88e766..ab5ee3ef2198 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -333,9 +333,9 @@ struct hdac_bus {
 	spinlock_t reg_lock;
 	struct mutex cmd_mutex;
 
-	/* i915 component interface */
-	struct i915_audio_component *audio_component;
-	int i915_power_refcount;
+	/* DRM component interface */
+	struct drm_audio_component *audio_component;
+	int drm_power_refcount;
 
 	/* parameters required for enhanced capabilities */
 	int num_streams;
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index cbe818eda336..1a88c1aaf9bb 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -16,13 +16,13 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/component.h>
-#include <drm/i915_component.h>
+#include <drm/drm_audio_component.h>
 #include <sound/core.h>
 #include <sound/hdaudio.h>
 #include <sound/hda_i915.h>
 #include <sound/hda_register.h>
 
-static struct i915_audio_component *hdac_acomp;
+static struct drm_audio_component *hdac_acomp;
 
 /**
  * snd_hdac_set_codec_wakeup - Enable / disable HDMI/DP codec wakeup
@@ -39,7 +39,7 @@ static struct i915_audio_component *hdac_acomp;
  */
 int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable)
 {
-	struct i915_audio_component *acomp = bus->audio_component;
+	struct drm_audio_component *acomp = bus->audio_component;
 
 	if (!acomp || !acomp->ops)
 		return -ENODEV;
@@ -74,7 +74,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_set_codec_wakeup);
  */
 int snd_hdac_display_power(struct hdac_bus *bus, bool enable)
 {
-	struct i915_audio_component *acomp = bus->audio_component;
+	struct drm_audio_component *acomp = bus->audio_component;
 
 	if (!acomp || !acomp->ops)
 		return -ENODEV;
@@ -83,14 +83,14 @@ int snd_hdac_display_power(struct hdac_bus *bus, bool enable)
 		enable ? "enable" : "disable");
 
 	if (enable) {
-		if (!bus->i915_power_refcount++) {
+		if (!bus->drm_power_refcount++) {
 			acomp->ops->get_power(acomp->dev);
 			snd_hdac_set_codec_wakeup(bus, true);
 			snd_hdac_set_codec_wakeup(bus, false);
 		}
 	} else {
-		WARN_ON(!bus->i915_power_refcount);
-		if (!--bus->i915_power_refcount)
+		WARN_ON(!bus->drm_power_refcount);
+		if (!--bus->drm_power_refcount)
 			acomp->ops->put_power(acomp->dev);
 	}
 
@@ -119,7 +119,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_display_power);
  */
 void snd_hdac_i915_set_bclk(struct hdac_bus *bus)
 {
-	struct i915_audio_component *acomp = bus->audio_component;
+	struct drm_audio_component *acomp = bus->audio_component;
 	struct pci_dev *pci = to_pci_dev(bus->dev);
 	int cdclk_freq;
 	unsigned int bclk_m, bclk_n;
@@ -206,7 +206,7 @@ int snd_hdac_sync_audio_rate(struct hdac_device *codec, hda_nid_t nid,
 			     int dev_id, int rate)
 {
 	struct hdac_bus *bus = codec->bus;
-	struct i915_audio_component *acomp = bus->audio_component;
+	struct drm_audio_component *acomp = bus->audio_component;
 	int port, pipe;
 
 	if (!acomp || !acomp->ops || !acomp->ops->sync_audio_rate)
@@ -244,7 +244,7 @@ int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid, int dev_id,
 			   bool *audio_enabled, char *buffer, int max_bytes)
 {
 	struct hdac_bus *bus = codec->bus;
-	struct i915_audio_component *acomp = bus->audio_component;
+	struct drm_audio_component *acomp = bus->audio_component;
 	int port, pipe;
 
 	if (!acomp || !acomp->ops || !acomp->ops->get_eld)
@@ -262,7 +262,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_acomp_get_eld);
 
 static int hdac_component_master_bind(struct device *dev)
 {
-	struct i915_audio_component *acomp = hdac_acomp;
+	struct drm_audio_component *acomp = hdac_acomp;
 	int ret;
 
 	ret = component_bind_all(dev, acomp);
@@ -294,7 +294,7 @@ out_unbind:
 
 static void hdac_component_master_unbind(struct device *dev)
 {
-	struct i915_audio_component *acomp = hdac_acomp;
+	struct drm_audio_component *acomp = hdac_acomp;
 
 	module_put(acomp->ops->owner);
 	component_unbind_all(dev, acomp);
@@ -323,7 +323,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
  *
  * Returns zero for success or a negative error code.
  */
-int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
+int snd_hdac_i915_register_notifier(const struct drm_audio_component_audio_ops *aops)
 {
 	if (!hdac_acomp)
 		return -ENODEV;
@@ -361,7 +361,8 @@ int snd_hdac_i915_init(struct hdac_bus *bus)
 {
 	struct component_match *match = NULL;
 	struct device *dev = bus->dev;
-	struct i915_audio_component *acomp;
+	struct i915_audio_component *i915_acomp;
+	struct drm_audio_component *acomp;
 	int ret;
 
 	if (WARN_ON(hdac_acomp))
@@ -370,9 +371,10 @@ int snd_hdac_i915_init(struct hdac_bus *bus)
 	if (!i915_gfx_present())
 		return -ENODEV;
 
-	acomp = kzalloc(sizeof(*acomp), GFP_KERNEL);
-	if (!acomp)
+	i915_acomp = kzalloc(sizeof(*i915_acomp), GFP_KERNEL);
+	if (!i915_acomp)
 		return -ENOMEM;
+	acomp = &i915_acomp->base;
 	bus->audio_component = acomp;
 	hdac_acomp = acomp;
 
@@ -421,13 +423,13 @@ EXPORT_SYMBOL_GPL(snd_hdac_i915_init);
 int snd_hdac_i915_exit(struct hdac_bus *bus)
 {
 	struct device *dev = bus->dev;
-	struct i915_audio_component *acomp = bus->audio_component;
+	struct drm_audio_component *acomp = bus->audio_component;
 
 	if (!acomp)
 		return 0;
 
-	WARN_ON(bus->i915_power_refcount);
-	if (bus->i915_power_refcount > 0 && acomp->ops)
+	WARN_ON(bus->drm_power_refcount);
+	if (bus->drm_power_refcount > 0 && acomp->ops)
 		acomp->ops->put_power(acomp->dev);
 
 	component_master_del(dev, &hdac_component_master_ops);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 8a49415aebac..c0847017114c 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -177,7 +177,7 @@ struct hdmi_spec {
 
 	/* i915/powerwell (Haswell+/Valleyview+) specific */
 	bool use_acomp_notifier; /* use i915 eld_notify callback for hotplug */
-	struct i915_audio_component_audio_ops i915_audio_ops;
+	struct drm_audio_component_audio_ops drm_audio_ops;
 
 	struct hdac_chmap chmap;
 	hda_nid_t vendor_nid;
@@ -2511,14 +2511,14 @@ static void register_i915_notifier(struct hda_codec *codec)
 	struct hdmi_spec *spec = codec->spec;
 
 	spec->use_acomp_notifier = true;
-	spec->i915_audio_ops.audio_ptr = codec;
+	spec->drm_audio_ops.audio_ptr = codec;
 	/* intel_audio_codec_enable() or intel_audio_codec_disable()
 	 * will call pin_eld_notify with using audio_ptr pointer
 	 * We need make sure audio_ptr is really setup
 	 */
 	wmb();
-	spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
-	snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
+	spec->drm_audio_ops.pin_eld_notify = intel_pin_eld_notify;
+	snd_hdac_i915_register_notifier(&spec->drm_audio_ops);
 }
 
 /* setup_stream ops override for HSW+ */
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 3e3a2a9ef310..460075475f20 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1583,7 +1583,7 @@ static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe)
 
 }
 
-static struct i915_audio_component_audio_ops aops = {
+static struct drm_audio_component_audio_ops aops = {
 	.pin_eld_notify	= hdac_hdmi_eld_notify_cb,
 };
 
-- 
cgit v1.2.3


From 82887c0beb1ee6b33eed8318d8e8d41c5b3eddae Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 11 Jul 2018 15:48:18 +0200
Subject: ALSA: hda/i915: Associate audio component with devres

The HD-audio i915 binding code contains a single pointer, hdac_acomp,
for allowing the access to audio component from the master bind/unbind
callbacks.  This was needed because the callbacks pass only the device
pointer and we can't guarantee the object type assigned to the drvdata
(which is free for each controller driver implementation).
And this implementation will be a problem if we support multiple
components for different DRM drivers, not only i915.

As a solution, allocate the audio component object via devres and
associate it with the given device, so that the component callbacks
can refer to it via devres_find().

The removal of the object is still done half-manually via
devres_destroy() to make the code consistent (although it may work
without the explicit call).

Also, the snd_hda_i915_register_notifier() had the reference to
hdac_acomp as well.  In this patch, the corresponding code is removed
by passing hdac_bus object to the function, too.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hda_i915.h     |  6 ++++--
 sound/hda/hdac_i915.c        | 34 +++++++++++++++++++++-------------
 sound/pci/hda/patch_hdmi.c   |  5 +++--
 sound/soc/codecs/hdac_hdmi.c |  2 +-
 4 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/sound/hda_i915.h b/include/sound/hda_i915.h
index f69ea84e7b65..648263791559 100644
--- a/include/sound/hda_i915.h
+++ b/include/sound/hda_i915.h
@@ -17,7 +17,8 @@ int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid, int dev_id,
 			   bool *audio_enabled, char *buffer, int max_bytes);
 int snd_hdac_i915_init(struct hdac_bus *bus);
 int snd_hdac_i915_exit(struct hdac_bus *bus);
-int snd_hdac_i915_register_notifier(const struct drm_audio_component_audio_ops *);
+int snd_hdac_i915_register_notifier(struct hdac_bus *bus,
+				    const struct drm_audio_component_audio_ops *ops);
 #else
 static inline int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable)
 {
@@ -49,7 +50,8 @@ static inline int snd_hdac_i915_exit(struct hdac_bus *bus)
 {
 	return 0;
 }
-static inline int snd_hdac_i915_register_notifier(const struct drm_audio_component_audio_ops *ops)
+static inline int snd_hdac_i915_register_notifier(struct hdac_bus *bus,
+						  const struct drm_audio_component_audio_ops *ops)
 {
 	return -ENODEV;
 }
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 1a88c1aaf9bb..861b77bbc7df 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -22,7 +22,14 @@
 #include <sound/hda_i915.h>
 #include <sound/hda_register.h>
 
-static struct drm_audio_component *hdac_acomp;
+static void hdac_acomp_release(struct device *dev, void *res)
+{
+}
+
+static struct drm_audio_component *hdac_get_acomp(struct device *dev)
+{
+	return devres_find(dev, hdac_acomp_release, NULL, NULL);
+}
 
 /**
  * snd_hdac_set_codec_wakeup - Enable / disable HDMI/DP codec wakeup
@@ -262,7 +269,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_acomp_get_eld);
 
 static int hdac_component_master_bind(struct device *dev)
 {
-	struct drm_audio_component *acomp = hdac_acomp;
+	struct drm_audio_component *acomp = hdac_get_acomp(dev);
 	int ret;
 
 	ret = component_bind_all(dev, acomp);
@@ -294,7 +301,7 @@ out_unbind:
 
 static void hdac_component_master_unbind(struct device *dev)
 {
-	struct drm_audio_component *acomp = hdac_acomp;
+	struct drm_audio_component *acomp = hdac_get_acomp(dev);
 
 	module_put(acomp->ops->owner);
 	component_unbind_all(dev, acomp);
@@ -314,6 +321,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
 
 /**
  * snd_hdac_i915_register_notifier - Register i915 audio component ops
+ * @bus: HDA core bus
  * @aops: i915 audio component ops
  *
  * This function is supposed to be used only by a HD-audio controller
@@ -323,12 +331,13 @@ static int hdac_component_master_match(struct device *dev, void *data)
  *
  * Returns zero for success or a negative error code.
  */
-int snd_hdac_i915_register_notifier(const struct drm_audio_component_audio_ops *aops)
+int snd_hdac_i915_register_notifier(struct hdac_bus *bus,
+				    const struct drm_audio_component_audio_ops *aops)
 {
-	if (!hdac_acomp)
+	if (!bus->audio_component)
 		return -ENODEV;
 
-	hdac_acomp->audio_ops = aops;
+	bus->audio_component->audio_ops = aops;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_i915_register_notifier);
@@ -365,18 +374,19 @@ int snd_hdac_i915_init(struct hdac_bus *bus)
 	struct drm_audio_component *acomp;
 	int ret;
 
-	if (WARN_ON(hdac_acomp))
+	if (WARN_ON(hdac_get_acomp(dev)))
 		return -EBUSY;
 
 	if (!i915_gfx_present())
 		return -ENODEV;
 
-	i915_acomp = kzalloc(sizeof(*i915_acomp), GFP_KERNEL);
+	i915_acomp = devres_alloc(hdac_acomp_release, sizeof(*i915_acomp),
+				  GFP_KERNEL);
 	if (!i915_acomp)
 		return -ENOMEM;
 	acomp = &i915_acomp->base;
 	bus->audio_component = acomp;
-	hdac_acomp = acomp;
+	devres_add(dev, acomp);
 
 	component_match_add(dev, &match, hdac_component_master_match, bus);
 	ret = component_master_add_with_match(dev, &hdac_component_master_ops,
@@ -400,9 +410,8 @@ int snd_hdac_i915_init(struct hdac_bus *bus)
 out_master_del:
 	component_master_del(dev, &hdac_component_master_ops);
 out_err:
-	kfree(acomp);
 	bus->audio_component = NULL;
-	hdac_acomp = NULL;
+	devres_destroy(dev, hdac_acomp_release, NULL, NULL);
 	dev_info(dev, "failed to add i915 component master (%d)\n", ret);
 
 	return ret;
@@ -434,9 +443,8 @@ int snd_hdac_i915_exit(struct hdac_bus *bus)
 
 	component_master_del(dev, &hdac_component_master_ops);
 
-	kfree(acomp);
 	bus->audio_component = NULL;
-	hdac_acomp = NULL;
+	devres_destroy(dev, hdac_acomp_release, NULL, NULL);
 
 	return 0;
 }
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index c0847017114c..bf174a013f2d 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2288,7 +2288,7 @@ static void generic_hdmi_free(struct hda_codec *codec)
 	int pin_idx, pcm_idx;
 
 	if (codec_has_acomp(codec))
-		snd_hdac_i915_register_notifier(NULL);
+		snd_hdac_i915_register_notifier(&codec->bus->core, NULL);
 
 	for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) {
 		struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx);
@@ -2518,7 +2518,8 @@ static void register_i915_notifier(struct hda_codec *codec)
 	 */
 	wmb();
 	spec->drm_audio_ops.pin_eld_notify = intel_pin_eld_notify;
-	snd_hdac_i915_register_notifier(&spec->drm_audio_ops);
+	snd_hdac_i915_register_notifier(&codec->bus->core,
+					&spec->drm_audio_ops);
 }
 
 /* setup_stream ops override for HSW+ */
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 460075475f20..2b7c33db4ded 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1812,7 +1812,7 @@ static int hdmi_codec_probe(struct snd_soc_component *component)
 		return ret;
 
 	aops.audio_ptr = hdev;
-	ret = snd_hdac_i915_register_notifier(&aops);
+	ret = snd_hdac_i915_register_notifier(hdev->bus, &aops);
 	if (ret < 0) {
 		dev_err(&hdev->dev, "notifier register failed: err: %d\n", ret);
 		return ret;
-- 
cgit v1.2.3


From a57942bfdd61b46df94021c9c33b8faaae5b65e1 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 11 Jul 2018 16:23:16 +0200
Subject: ALSA: hda: Make audio component support more generic

This is the final step for more generic support of DRM audio
component.  The generic audio component code is now moved to its own
file, and the symbols are renamed from snd_hac_i915_* to
snd_hdac_acomp_*, respectively.  The generic code is enabled via the
new kconfig, CONFIG_SND_HDA_COMPONENT, while CONFIG_SND_HDA_I915 is
kept as the super-class.

Along with the split, three new callbacks are added to audio_ops:
pin2port is for providing the conversion between the pin number and
the widget id, and master_bind/master_unbin are called at binding /
unbinding the master component, respectively.  All these are optional,
but used in i915 implementation and also other later implementations.

A note about the new snd_hdac_acomp_init() function: there is a slight
difference between this and the old snd_hdac_i915_init().  The latter
(still) synchronizes with the master component binding, i.e. it
assures that the relevant DRM component gets bound when it returns, or
gives a negative error.  Meanwhile the new function doesn't
synchronize but just leaves as is.  It's the responsibility by the
caller's side to synchronize, or the caller may accept the
asynchronous binding on the fly.

v1->v2: Fix missing NULL check in master_bind/unbind

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/drm/i915/Kconfig      |   1 +
 include/drm/drm_audio_component.h |  23 +++
 include/sound/hda_component.h     |  61 +++++++
 include/sound/hda_i915.h          |  39 +----
 sound/hda/Kconfig                 |   7 +-
 sound/hda/Makefile                |   1 +
 sound/hda/hdac_component.c        | 335 +++++++++++++++++++++++++++++++++++++
 sound/hda/hdac_i915.c             | 343 ++------------------------------------
 sound/pci/hda/patch_hdmi.c        |  50 ++++--
 sound/soc/codecs/hdac_hdmi.c      |   8 +-
 10 files changed, 486 insertions(+), 382 deletions(-)
 create mode 100644 include/sound/hda_component.h
 create mode 100644 sound/hda/hdac_component.c

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index dfd95889f4b7..5c607f2c707b 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -23,6 +23,7 @@ config DRM_I915
 	select SYNC_FILE
 	select IOSF_MBI
 	select CRC32
+	select SND_HDA_I915 if SND_HDA_CORE
 	help
 	  Choose this option if you have a system that has "Intel Graphics
 	  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/include/drm/drm_audio_component.h b/include/drm/drm_audio_component.h
index e85689f212c2..4923b00328c1 100644
--- a/include/drm/drm_audio_component.h
+++ b/include/drm/drm_audio_component.h
@@ -4,6 +4,8 @@
 #ifndef _DRM_AUDIO_COMPONENT_H_
 #define _DRM_AUDIO_COMPONENT_H_
 
+struct drm_audio_component;
+
 /**
  * struct drm_audio_component_ops - Ops implemented by DRM driver, called by hda driver
  */
@@ -72,6 +74,27 @@ struct drm_audio_component_audio_ops {
 	 * mode).
 	 */
 	void (*pin_eld_notify)(void *audio_ptr, int port, int pipe);
+	/**
+	 * @pin2port: Check and convert from pin node to port number
+	 *
+	 * Called by HDA driver to check and convert from the pin widget node
+	 * number to a port number in the graphics side.
+	 */
+	int (*pin2port)(void *audio_ptr, int pin);
+	/**
+	 * @master_bind: (Optional) component master bind callback
+	 *
+	 * Called at binding master component, for HDA codec-specific
+	 * handling of dynamic binding.
+	 */
+	int (*master_bind)(struct device *dev, struct drm_audio_component *);
+	/**
+	 * @master_unbind: (Optional) component master unbind callback
+	 *
+	 * Called at unbinding master component, for HDA codec-specific
+	 * handling of dynamic unbinding.
+	 */
+	void (*master_unbind)(struct device *dev, struct drm_audio_component *);
 };
 
 /**
diff --git a/include/sound/hda_component.h b/include/sound/hda_component.h
new file mode 100644
index 000000000000..78626cde7081
--- /dev/null
+++ b/include/sound/hda_component.h
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// HD-Audio helpers to sync with DRM driver
+
+#ifndef __SOUND_HDA_COMPONENT_H
+#define __SOUND_HDA_COMPONENT_H
+
+#include <drm/drm_audio_component.h>
+
+#ifdef CONFIG_SND_HDA_COMPONENT
+int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable);
+int snd_hdac_display_power(struct hdac_bus *bus, bool enable);
+int snd_hdac_sync_audio_rate(struct hdac_device *codec, hda_nid_t nid,
+			     int dev_id, int rate);
+int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid, int dev_id,
+			   bool *audio_enabled, char *buffer, int max_bytes);
+int snd_hdac_acomp_init(struct hdac_bus *bus,
+			const struct drm_audio_component_audio_ops *aops,
+			int (*match_master)(struct device *, void *),
+			size_t extra_size);
+int snd_hdac_acomp_exit(struct hdac_bus *bus);
+int snd_hdac_acomp_register_notifier(struct hdac_bus *bus,
+				    const struct drm_audio_component_audio_ops *ops);
+#else
+static inline int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable)
+{
+	return 0;
+}
+static inline int snd_hdac_display_power(struct hdac_bus *bus, bool enable)
+{
+	return 0;
+}
+static inline int snd_hdac_sync_audio_rate(struct hdac_device *codec,
+					   hda_nid_t nid, int dev_id, int rate)
+{
+	return 0;
+}
+static inline int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid,
+					 int dev_id, bool *audio_enabled,
+					 char *buffer, int max_bytes)
+{
+	return -ENODEV;
+}
+static inline int snd_hdac_acomp_init(struct hdac_bus *bus,
+				      const struct drm_audio_component_audio_ops *aops,
+				      int (*match_master)(struct device *, void *),
+				      size_t extra_size)
+{
+	return -ENODEV;
+}
+static inline int snd_hdac_acomp_exit(struct hdac_bus *bus)
+{
+	return 0;
+}
+static inline int snd_hdac_acomp_register_notifier(struct hdac_bus *bus,
+						  const struct drm_audio_component_audio_ops *ops)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /* __SOUND_HDA_COMPONENT_H */
diff --git a/include/sound/hda_i915.h b/include/sound/hda_i915.h
index 648263791559..6b79614a893b 100644
--- a/include/sound/hda_i915.h
+++ b/include/sound/hda_i915.h
@@ -5,56 +5,23 @@
 #ifndef __SOUND_HDA_I915_H
 #define __SOUND_HDA_I915_H
 
-#include <drm/drm_audio_component.h>
+#include "hda_component.h"
 
 #ifdef CONFIG_SND_HDA_I915
-int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable);
-int snd_hdac_display_power(struct hdac_bus *bus, bool enable);
 void snd_hdac_i915_set_bclk(struct hdac_bus *bus);
-int snd_hdac_sync_audio_rate(struct hdac_device *codec, hda_nid_t nid,
-			     int dev_id, int rate);
-int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid, int dev_id,
-			   bool *audio_enabled, char *buffer, int max_bytes);
 int snd_hdac_i915_init(struct hdac_bus *bus);
-int snd_hdac_i915_exit(struct hdac_bus *bus);
-int snd_hdac_i915_register_notifier(struct hdac_bus *bus,
-				    const struct drm_audio_component_audio_ops *ops);
 #else
-static inline int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable)
-{
-	return 0;
-}
-static inline int snd_hdac_display_power(struct hdac_bus *bus, bool enable)
-{
-	return 0;
-}
 static inline void snd_hdac_i915_set_bclk(struct hdac_bus *bus)
 {
 }
-static inline int snd_hdac_sync_audio_rate(struct hdac_device *codec,
-					   hda_nid_t nid, int dev_id, int rate)
-{
-	return 0;
-}
-static inline int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid,
-					 int dev_id, bool *audio_enabled,
-					 char *buffer, int max_bytes)
-{
-	return -ENODEV;
-}
 static inline int snd_hdac_i915_init(struct hdac_bus *bus)
 {
 	return -ENODEV;
 }
+#endif
 static inline int snd_hdac_i915_exit(struct hdac_bus *bus)
 {
-	return 0;
+	return snd_hdac_acomp_exit(bus);
 }
-static inline int snd_hdac_i915_register_notifier(struct hdac_bus *bus,
-						  const struct drm_audio_component_audio_ops *ops)
-{
-	return -ENODEV;
-}
-#endif
 
 #endif /* __SOUND_HDA_I915_H */
diff --git a/sound/hda/Kconfig b/sound/hda/Kconfig
index 3129546398d0..2d90e11b3eaa 100644
--- a/sound/hda/Kconfig
+++ b/sound/hda/Kconfig
@@ -5,11 +5,12 @@ config SND_HDA_CORE
 config SND_HDA_DSP_LOADER
 	bool
 
+config SND_HDA_COMPONENT
+	bool
+
 config SND_HDA_I915
 	bool
-	default y
-	depends on DRM_I915
-	depends on SND_HDA_CORE
+	select SND_HDA_COMPONENT
 
 config SND_HDA_EXT_CORE
        tristate
diff --git a/sound/hda/Makefile b/sound/hda/Makefile
index e4e726f2ce98..2160202e2dc1 100644
--- a/sound/hda/Makefile
+++ b/sound/hda/Makefile
@@ -6,6 +6,7 @@ snd-hda-core-objs += trace.o
 CFLAGS_trace.o := -I$(src)
 
 # for sync with i915 gfx driver
+snd-hda-core-$(CONFIG_SND_HDA_COMPONENT) += hdac_component.o
 snd-hda-core-$(CONFIG_SND_HDA_I915) += hdac_i915.o
 
 obj-$(CONFIG_SND_HDA_CORE) += snd-hda-core.o
diff --git a/sound/hda/hdac_component.c b/sound/hda/hdac_component.c
new file mode 100644
index 000000000000..6e46a9c73aed
--- /dev/null
+++ b/sound/hda/hdac_component.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+// hdac_component.c - routines for sync between HD-A core and DRM driver
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/component.h>
+#include <sound/core.h>
+#include <sound/hdaudio.h>
+#include <sound/hda_component.h>
+#include <sound/hda_register.h>
+
+static void hdac_acomp_release(struct device *dev, void *res)
+{
+}
+
+static struct drm_audio_component *hdac_get_acomp(struct device *dev)
+{
+	return devres_find(dev, hdac_acomp_release, NULL, NULL);
+}
+
+/**
+ * snd_hdac_set_codec_wakeup - Enable / disable HDMI/DP codec wakeup
+ * @bus: HDA core bus
+ * @enable: enable or disable the wakeup
+ *
+ * This function is supposed to be used only by a HD-audio controller
+ * driver that needs the interaction with graphics driver.
+ *
+ * This function should be called during the chip reset, also called at
+ * resume for updating STATESTS register read.
+ *
+ * Returns zero for success or a negative error code.
+ */
+int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable)
+{
+	struct drm_audio_component *acomp = bus->audio_component;
+
+	if (!acomp || !acomp->ops)
+		return -ENODEV;
+
+	if (!acomp->ops->codec_wake_override)
+		return 0;
+
+	dev_dbg(bus->dev, "%s codec wakeup\n",
+		enable ? "enable" : "disable");
+
+	acomp->ops->codec_wake_override(acomp->dev, enable);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_set_codec_wakeup);
+
+/**
+ * snd_hdac_display_power - Power up / down the power refcount
+ * @bus: HDA core bus
+ * @enable: power up or down
+ *
+ * This function is supposed to be used only by a HD-audio controller
+ * driver that needs the interaction with graphics driver.
+ *
+ * This function manages a refcount and calls the get_power() and
+ * put_power() ops accordingly, toggling the codec wakeup, too.
+ *
+ * Returns zero for success or a negative error code.
+ */
+int snd_hdac_display_power(struct hdac_bus *bus, bool enable)
+{
+	struct drm_audio_component *acomp = bus->audio_component;
+
+	if (!acomp || !acomp->ops)
+		return -ENODEV;
+
+	dev_dbg(bus->dev, "display power %s\n",
+		enable ? "enable" : "disable");
+
+	if (enable) {
+		if (!bus->drm_power_refcount++) {
+			if (acomp->ops->get_power)
+				acomp->ops->get_power(acomp->dev);
+			snd_hdac_set_codec_wakeup(bus, true);
+			snd_hdac_set_codec_wakeup(bus, false);
+		}
+	} else {
+		WARN_ON(!bus->drm_power_refcount);
+		if (!--bus->drm_power_refcount)
+			if (acomp->ops->put_power)
+				acomp->ops->put_power(acomp->dev);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_display_power);
+
+/**
+ * snd_hdac_sync_audio_rate - Set N/CTS based on the sample rate
+ * @codec: HDA codec
+ * @nid: the pin widget NID
+ * @dev_id: device identifier
+ * @rate: the sample rate to set
+ *
+ * This function is supposed to be used only by a HD-audio controller
+ * driver that needs the interaction with graphics driver.
+ *
+ * This function sets N/CTS value based on the given sample rate.
+ * Returns zero for success, or a negative error code.
+ */
+int snd_hdac_sync_audio_rate(struct hdac_device *codec, hda_nid_t nid,
+			     int dev_id, int rate)
+{
+	struct hdac_bus *bus = codec->bus;
+	struct drm_audio_component *acomp = bus->audio_component;
+	int port, pipe;
+
+	if (!acomp || !acomp->ops || !acomp->ops->sync_audio_rate)
+		return -ENODEV;
+	port = nid;
+	if (acomp->audio_ops && acomp->audio_ops->pin2port) {
+		port = acomp->audio_ops->pin2port(codec, nid);
+		if (port < 0)
+			return -EINVAL;
+	}
+	pipe = dev_id;
+	return acomp->ops->sync_audio_rate(acomp->dev, port, pipe, rate);
+}
+EXPORT_SYMBOL_GPL(snd_hdac_sync_audio_rate);
+
+/**
+ * snd_hdac_acomp_get_eld - Get the audio state and ELD via component
+ * @codec: HDA codec
+ * @nid: the pin widget NID
+ * @dev_id: device identifier
+ * @audio_enabled: the pointer to store the current audio state
+ * @buffer: the buffer pointer to store ELD bytes
+ * @max_bytes: the max bytes to be stored on @buffer
+ *
+ * This function is supposed to be used only by a HD-audio controller
+ * driver that needs the interaction with graphics driver.
+ *
+ * This function queries the current state of the audio on the given
+ * digital port and fetches the ELD bytes onto the given buffer.
+ * It returns the number of bytes for the total ELD data, zero for
+ * invalid ELD, or a negative error code.
+ *
+ * The return size is the total bytes required for the whole ELD bytes,
+ * thus it may be over @max_bytes.  If it's over @max_bytes, it implies
+ * that only a part of ELD bytes have been fetched.
+ */
+int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid, int dev_id,
+			   bool *audio_enabled, char *buffer, int max_bytes)
+{
+	struct hdac_bus *bus = codec->bus;
+	struct drm_audio_component *acomp = bus->audio_component;
+	int port, pipe;
+
+	if (!acomp || !acomp->ops || !acomp->ops->get_eld)
+		return -ENODEV;
+
+	port = nid;
+	if (acomp->audio_ops && acomp->audio_ops->pin2port) {
+		port = acomp->audio_ops->pin2port(codec, nid);
+		if (port < 0)
+			return -EINVAL;
+	}
+	pipe = dev_id;
+	return acomp->ops->get_eld(acomp->dev, port, pipe, audio_enabled,
+				   buffer, max_bytes);
+}
+EXPORT_SYMBOL_GPL(snd_hdac_acomp_get_eld);
+
+static int hdac_component_master_bind(struct device *dev)
+{
+	struct drm_audio_component *acomp = hdac_get_acomp(dev);
+	int ret;
+
+	if (WARN_ON(!acomp))
+		return -EINVAL;
+
+	ret = component_bind_all(dev, acomp);
+	if (ret < 0)
+		return ret;
+
+	if (WARN_ON(!(acomp->dev && acomp->ops))) {
+		ret = -EINVAL;
+		goto out_unbind;
+	}
+
+	/* pin the module to avoid dynamic unbinding, but only if given */
+	if (!try_module_get(acomp->ops->owner)) {
+		ret = -ENODEV;
+		goto out_unbind;
+	}
+
+	if (acomp->audio_ops && acomp->audio_ops->master_bind) {
+		ret = acomp->audio_ops->master_bind(dev, acomp);
+		if (ret < 0)
+			goto module_put;
+	}
+
+	return 0;
+
+ module_put:
+	module_put(acomp->ops->owner);
+out_unbind:
+	component_unbind_all(dev, acomp);
+
+	return ret;
+}
+
+static void hdac_component_master_unbind(struct device *dev)
+{
+	struct drm_audio_component *acomp = hdac_get_acomp(dev);
+
+	if (acomp->audio_ops && acomp->audio_ops->master_unbind)
+		acomp->audio_ops->master_unbind(dev, acomp);
+	module_put(acomp->ops->owner);
+	component_unbind_all(dev, acomp);
+	WARN_ON(acomp->ops || acomp->dev);
+}
+
+static const struct component_master_ops hdac_component_master_ops = {
+	.bind = hdac_component_master_bind,
+	.unbind = hdac_component_master_unbind,
+};
+
+/**
+ * snd_hdac_acomp_register_notifier - Register audio component ops
+ * @bus: HDA core bus
+ * @aops: audio component ops
+ *
+ * This function is supposed to be used only by a HD-audio controller
+ * driver that needs the interaction with graphics driver.
+ *
+ * This function sets the given ops to be called by the graphics driver.
+ *
+ * Returns zero for success or a negative error code.
+ */
+int snd_hdac_acomp_register_notifier(struct hdac_bus *bus,
+				    const struct drm_audio_component_audio_ops *aops)
+{
+	if (!bus->audio_component)
+		return -ENODEV;
+
+	bus->audio_component->audio_ops = aops;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_acomp_register_notifier);
+
+/**
+ * snd_hdac_acomp_init - Initialize audio component
+ * @bus: HDA core bus
+ * @match_master: match function for finding components
+ * @extra_size: Extra bytes to allocate
+ *
+ * This function is supposed to be used only by a HD-audio controller
+ * driver that needs the interaction with graphics driver.
+ *
+ * This function initializes and sets up the audio component to communicate
+ * with graphics driver.
+ *
+ * Unlike snd_hdac_i915_init(), this function doesn't synchronize with the
+ * binding with the DRM component.  Each caller needs to sync via master_bind
+ * audio_ops.
+ *
+ * Returns zero for success or a negative error code.
+ */
+int snd_hdac_acomp_init(struct hdac_bus *bus,
+			const struct drm_audio_component_audio_ops *aops,
+			int (*match_master)(struct device *, void *),
+			size_t extra_size)
+{
+	struct component_match *match = NULL;
+	struct device *dev = bus->dev;
+	struct drm_audio_component *acomp;
+	int ret;
+
+	if (WARN_ON(hdac_get_acomp(dev)))
+		return -EBUSY;
+
+	acomp = devres_alloc(hdac_acomp_release, sizeof(*acomp) + extra_size,
+			     GFP_KERNEL);
+	if (!acomp)
+		return -ENOMEM;
+	acomp->audio_ops = aops;
+	bus->audio_component = acomp;
+	devres_add(dev, acomp);
+
+	component_match_add(dev, &match, match_master, bus);
+	ret = component_master_add_with_match(dev, &hdac_component_master_ops,
+					      match);
+	if (ret < 0)
+		goto out_err;
+
+	return 0;
+
+out_err:
+	bus->audio_component = NULL;
+	devres_destroy(dev, hdac_acomp_release, NULL, NULL);
+	dev_info(dev, "failed to add audio component master (%d)\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_acomp_init);
+
+/**
+ * snd_hdac_acomp_exit - Finalize audio component
+ * @bus: HDA core bus
+ *
+ * This function is supposed to be used only by a HD-audio controller
+ * driver that needs the interaction with graphics driver.
+ *
+ * This function releases the audio component that has been used.
+ *
+ * Returns zero for success or a negative error code.
+ */
+int snd_hdac_acomp_exit(struct hdac_bus *bus)
+{
+	struct device *dev = bus->dev;
+	struct drm_audio_component *acomp = bus->audio_component;
+
+	if (!acomp)
+		return 0;
+
+	WARN_ON(bus->drm_power_refcount);
+	if (bus->drm_power_refcount > 0 && acomp->ops)
+		acomp->ops->put_power(acomp->dev);
+
+	component_master_del(dev, &hdac_component_master_ops);
+
+	bus->audio_component = NULL;
+	devres_destroy(dev, hdac_acomp_release, NULL, NULL);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_acomp_exit);
diff --git a/sound/hda/hdac_i915.c b/sound/hda/hdac_i915.c
index 861b77bbc7df..8f2aa8bc1185 100644
--- a/sound/hda/hdac_i915.c
+++ b/sound/hda/hdac_i915.c
@@ -15,96 +15,11 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/component.h>
-#include <drm/drm_audio_component.h>
 #include <sound/core.h>
 #include <sound/hdaudio.h>
 #include <sound/hda_i915.h>
 #include <sound/hda_register.h>
 
-static void hdac_acomp_release(struct device *dev, void *res)
-{
-}
-
-static struct drm_audio_component *hdac_get_acomp(struct device *dev)
-{
-	return devres_find(dev, hdac_acomp_release, NULL, NULL);
-}
-
-/**
- * snd_hdac_set_codec_wakeup - Enable / disable HDMI/DP codec wakeup
- * @bus: HDA core bus
- * @enable: enable or disable the wakeup
- *
- * This function is supposed to be used only by a HD-audio controller
- * driver that needs the interaction with i915 graphics.
- *
- * This function should be called during the chip reset, also called at
- * resume for updating STATESTS register read.
- *
- * Returns zero for success or a negative error code.
- */
-int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable)
-{
-	struct drm_audio_component *acomp = bus->audio_component;
-
-	if (!acomp || !acomp->ops)
-		return -ENODEV;
-
-	if (!acomp->ops->codec_wake_override) {
-		dev_warn(bus->dev,
-			"Invalid codec wake callback\n");
-		return 0;
-	}
-
-	dev_dbg(bus->dev, "%s codec wakeup\n",
-		enable ? "enable" : "disable");
-
-	acomp->ops->codec_wake_override(acomp->dev, enable);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(snd_hdac_set_codec_wakeup);
-
-/**
- * snd_hdac_display_power - Power up / down the power refcount
- * @bus: HDA core bus
- * @enable: power up or down
- *
- * This function is supposed to be used only by a HD-audio controller
- * driver that needs the interaction with i915 graphics.
- *
- * This function manages a refcount and calls the i915 get_power() and
- * put_power() ops accordingly, toggling the codec wakeup, too.
- *
- * Returns zero for success or a negative error code.
- */
-int snd_hdac_display_power(struct hdac_bus *bus, bool enable)
-{
-	struct drm_audio_component *acomp = bus->audio_component;
-
-	if (!acomp || !acomp->ops)
-		return -ENODEV;
-
-	dev_dbg(bus->dev, "display power %s\n",
-		enable ? "enable" : "disable");
-
-	if (enable) {
-		if (!bus->drm_power_refcount++) {
-			acomp->ops->get_power(acomp->dev);
-			snd_hdac_set_codec_wakeup(bus, true);
-			snd_hdac_set_codec_wakeup(bus, false);
-		}
-	} else {
-		WARN_ON(!bus->drm_power_refcount);
-		if (!--bus->drm_power_refcount)
-			acomp->ops->put_power(acomp->dev);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(snd_hdac_display_power);
-
 #define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \
 				((pci)->device == 0x0c0c) || \
 				((pci)->device == 0x0d0c) || \
@@ -165,183 +80,11 @@ void snd_hdac_i915_set_bclk(struct hdac_bus *bus)
 }
 EXPORT_SYMBOL_GPL(snd_hdac_i915_set_bclk);
 
-/* There is a fixed mapping between audio pin node and display port.
- * on SNB, IVY, HSW, BSW, SKL, BXT, KBL:
- * Pin Widget 5 - PORT B (port = 1 in i915 driver)
- * Pin Widget 6 - PORT C (port = 2 in i915 driver)
- * Pin Widget 7 - PORT D (port = 3 in i915 driver)
- *
- * on VLV, ILK:
- * Pin Widget 4 - PORT B (port = 1 in i915 driver)
- * Pin Widget 5 - PORT C (port = 2 in i915 driver)
- * Pin Widget 6 - PORT D (port = 3 in i915 driver)
- */
-static int pin2port(struct hdac_device *codec, hda_nid_t pin_nid)
-{
-	int base_nid;
-
-	switch (codec->vendor_id) {
-	case 0x80860054: /* ILK */
-	case 0x80862804: /* ILK */
-	case 0x80862882: /* VLV */
-		base_nid = 3;
-		break;
-	default:
-		base_nid = 4;
-		break;
-	}
-
-	if (WARN_ON(pin_nid <= base_nid || pin_nid > base_nid + 3))
-		return -1;
-	return pin_nid - base_nid;
-}
-
-/**
- * snd_hdac_sync_audio_rate - Set N/CTS based on the sample rate
- * @codec: HDA codec
- * @nid: the pin widget NID
- * @dev_id: device identifier
- * @rate: the sample rate to set
- *
- * This function is supposed to be used only by a HD-audio controller
- * driver that needs the interaction with i915 graphics.
- *
- * This function sets N/CTS value based on the given sample rate.
- * Returns zero for success, or a negative error code.
- */
-int snd_hdac_sync_audio_rate(struct hdac_device *codec, hda_nid_t nid,
-			     int dev_id, int rate)
+static int i915_component_master_match(struct device *dev, void *data)
 {
-	struct hdac_bus *bus = codec->bus;
-	struct drm_audio_component *acomp = bus->audio_component;
-	int port, pipe;
-
-	if (!acomp || !acomp->ops || !acomp->ops->sync_audio_rate)
-		return -ENODEV;
-	port = pin2port(codec, nid);
-	if (port < 0)
-		return -EINVAL;
-	pipe = dev_id;
-	return acomp->ops->sync_audio_rate(acomp->dev, port, pipe, rate);
-}
-EXPORT_SYMBOL_GPL(snd_hdac_sync_audio_rate);
-
-/**
- * snd_hdac_acomp_get_eld - Get the audio state and ELD via component
- * @codec: HDA codec
- * @nid: the pin widget NID
- * @dev_id: device identifier
- * @audio_enabled: the pointer to store the current audio state
- * @buffer: the buffer pointer to store ELD bytes
- * @max_bytes: the max bytes to be stored on @buffer
- *
- * This function is supposed to be used only by a HD-audio controller
- * driver that needs the interaction with i915 graphics.
- *
- * This function queries the current state of the audio on the given
- * digital port and fetches the ELD bytes onto the given buffer.
- * It returns the number of bytes for the total ELD data, zero for
- * invalid ELD, or a negative error code.
- *
- * The return size is the total bytes required for the whole ELD bytes,
- * thus it may be over @max_bytes.  If it's over @max_bytes, it implies
- * that only a part of ELD bytes have been fetched.
- */
-int snd_hdac_acomp_get_eld(struct hdac_device *codec, hda_nid_t nid, int dev_id,
-			   bool *audio_enabled, char *buffer, int max_bytes)
-{
-	struct hdac_bus *bus = codec->bus;
-	struct drm_audio_component *acomp = bus->audio_component;
-	int port, pipe;
-
-	if (!acomp || !acomp->ops || !acomp->ops->get_eld)
-		return -ENODEV;
-
-	port = pin2port(codec, nid);
-	if (port < 0)
-		return -EINVAL;
-
-	pipe = dev_id;
-	return acomp->ops->get_eld(acomp->dev, port, pipe, audio_enabled,
-				   buffer, max_bytes);
-}
-EXPORT_SYMBOL_GPL(snd_hdac_acomp_get_eld);
-
-static int hdac_component_master_bind(struct device *dev)
-{
-	struct drm_audio_component *acomp = hdac_get_acomp(dev);
-	int ret;
-
-	ret = component_bind_all(dev, acomp);
-	if (ret < 0)
-		return ret;
-
-	if (WARN_ON(!(acomp->dev && acomp->ops && acomp->ops->get_power &&
-		      acomp->ops->put_power && acomp->ops->get_cdclk_freq))) {
-		ret = -EINVAL;
-		goto out_unbind;
-	}
-
-	/*
-	 * Atm, we don't support dynamic unbinding initiated by the child
-	 * component, so pin its containing module until we unbind.
-	 */
-	if (!try_module_get(acomp->ops->owner)) {
-		ret = -ENODEV;
-		goto out_unbind;
-	}
-
-	return 0;
-
-out_unbind:
-	component_unbind_all(dev, acomp);
-
-	return ret;
-}
-
-static void hdac_component_master_unbind(struct device *dev)
-{
-	struct drm_audio_component *acomp = hdac_get_acomp(dev);
-
-	module_put(acomp->ops->owner);
-	component_unbind_all(dev, acomp);
-	WARN_ON(acomp->ops || acomp->dev);
-}
-
-static const struct component_master_ops hdac_component_master_ops = {
-	.bind = hdac_component_master_bind,
-	.unbind = hdac_component_master_unbind,
-};
-
-static int hdac_component_master_match(struct device *dev, void *data)
-{
-	/* i915 is the only supported component */
 	return !strcmp(dev->driver->name, "i915");
 }
 
-/**
- * snd_hdac_i915_register_notifier - Register i915 audio component ops
- * @bus: HDA core bus
- * @aops: i915 audio component ops
- *
- * This function is supposed to be used only by a HD-audio controller
- * driver that needs the interaction with i915 graphics.
- *
- * This function sets the given ops to be called by the i915 graphics driver.
- *
- * Returns zero for success or a negative error code.
- */
-int snd_hdac_i915_register_notifier(struct hdac_bus *bus,
-				    const struct drm_audio_component_audio_ops *aops)
-{
-	if (!bus->audio_component)
-		return -ENODEV;
-
-	bus->audio_component->audio_ops = aops;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(snd_hdac_i915_register_notifier);
-
 /* check whether intel graphics is present */
 static bool i915_gfx_present(void)
 {
@@ -368,84 +111,26 @@ static bool i915_gfx_present(void)
  */
 int snd_hdac_i915_init(struct hdac_bus *bus)
 {
-	struct component_match *match = NULL;
-	struct device *dev = bus->dev;
-	struct i915_audio_component *i915_acomp;
 	struct drm_audio_component *acomp;
-	int ret;
-
-	if (WARN_ON(hdac_get_acomp(dev)))
-		return -EBUSY;
+	int err;
 
 	if (!i915_gfx_present())
 		return -ENODEV;
 
-	i915_acomp = devres_alloc(hdac_acomp_release, sizeof(*i915_acomp),
-				  GFP_KERNEL);
-	if (!i915_acomp)
-		return -ENOMEM;
-	acomp = &i915_acomp->base;
-	bus->audio_component = acomp;
-	devres_add(dev, acomp);
-
-	component_match_add(dev, &match, hdac_component_master_match, bus);
-	ret = component_master_add_with_match(dev, &hdac_component_master_ops,
-					      match);
-	if (ret < 0)
-		goto out_err;
-
-	/*
-	 * Atm, we don't support deferring the component binding, so make sure
-	 * i915 is loaded and that the binding successfully completes.
-	 */
-	request_module("i915");
-
+	err = snd_hdac_acomp_init(bus, NULL,
+				  i915_component_master_match,
+				  sizeof(struct i915_audio_component) - sizeof(*acomp));
+	if (err < 0)
+		return err;
+	acomp = bus->audio_component;
+	if (!acomp)
+		return -ENODEV;
+	if (!acomp->ops)
+		request_module("i915");
 	if (!acomp->ops) {
-		ret = -ENODEV;
-		goto out_master_del;
+		snd_hdac_acomp_exit(bus);
+		return -ENODEV;
 	}
-	dev_dbg(dev, "bound to i915 component master\n");
-
 	return 0;
-out_master_del:
-	component_master_del(dev, &hdac_component_master_ops);
-out_err:
-	bus->audio_component = NULL;
-	devres_destroy(dev, hdac_acomp_release, NULL, NULL);
-	dev_info(dev, "failed to add i915 component master (%d)\n", ret);
-
-	return ret;
 }
 EXPORT_SYMBOL_GPL(snd_hdac_i915_init);
-
-/**
- * snd_hdac_i915_exit - Finalize i915 audio component
- * @bus: HDA core bus
- *
- * This function is supposed to be used only by a HD-audio controller
- * driver that needs the interaction with i915 graphics.
- *
- * This function releases the i915 audio component that has been used.
- *
- * Returns zero for success or a negative error code.
- */
-int snd_hdac_i915_exit(struct hdac_bus *bus)
-{
-	struct device *dev = bus->dev;
-	struct drm_audio_component *acomp = bus->audio_component;
-
-	if (!acomp)
-		return 0;
-
-	WARN_ON(bus->drm_power_refcount);
-	if (bus->drm_power_refcount > 0 && acomp->ops)
-		acomp->ops->put_power(acomp->dev);
-
-	component_master_del(dev, &hdac_component_master_ops);
-
-	bus->audio_component = NULL;
-	devres_destroy(dev, hdac_acomp_release, NULL, NULL);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(snd_hdac_i915_exit);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index bf174a013f2d..1de5491fb9bf 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -183,7 +183,7 @@ struct hdmi_spec {
 	hda_nid_t vendor_nid;
 };
 
-#ifdef CONFIG_SND_HDA_I915
+#ifdef CONFIG_SND_HDA_COMPONENT
 static inline bool codec_has_acomp(struct hda_codec *codec)
 {
 	struct hdmi_spec *spec = codec->spec;
@@ -2288,7 +2288,7 @@ static void generic_hdmi_free(struct hda_codec *codec)
 	int pin_idx, pcm_idx;
 
 	if (codec_has_acomp(codec))
-		snd_hdac_i915_register_notifier(&codec->bus->core, NULL);
+		snd_hdac_acomp_register_notifier(&codec->bus->core, NULL);
 
 	for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) {
 		struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx);
@@ -2471,6 +2471,38 @@ static void haswell_set_power_state(struct hda_codec *codec, hda_nid_t fg,
 	snd_hda_codec_set_power_to_all(codec, fg, power_state);
 }
 
+/* There is a fixed mapping between audio pin node and display port.
+ * on SNB, IVY, HSW, BSW, SKL, BXT, KBL:
+ * Pin Widget 5 - PORT B (port = 1 in i915 driver)
+ * Pin Widget 6 - PORT C (port = 2 in i915 driver)
+ * Pin Widget 7 - PORT D (port = 3 in i915 driver)
+ *
+ * on VLV, ILK:
+ * Pin Widget 4 - PORT B (port = 1 in i915 driver)
+ * Pin Widget 5 - PORT C (port = 2 in i915 driver)
+ * Pin Widget 6 - PORT D (port = 3 in i915 driver)
+ */
+static int intel_base_nid(struct hda_codec *codec)
+{
+	switch (codec->core.vendor_id) {
+	case 0x80860054: /* ILK */
+	case 0x80862804: /* ILK */
+	case 0x80862882: /* VLV */
+		return 4;
+	default:
+		return 5;
+	}
+}
+
+static int intel_pin2port(void *audio_ptr, int pin_nid)
+{
+	int base_nid = intel_base_nid(audio_ptr);
+
+	if (WARN_ON(pin_nid < base_nid || pin_nid >= base_nid + 3))
+		return -1;
+	return pin_nid - base_nid + 1; /* intel port is 1-based */
+}
+
 static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe)
 {
 	struct hda_codec *codec = audio_ptr;
@@ -2481,16 +2513,7 @@ static void intel_pin_eld_notify(void *audio_ptr, int port, int pipe)
 	if (port < 1 || port > 3)
 		return;
 
-	switch (codec->core.vendor_id) {
-	case 0x80860054: /* ILK */
-	case 0x80862804: /* ILK */
-	case 0x80862882: /* VLV */
-		pin_nid = port + 0x03;
-		break;
-	default:
-		pin_nid = port + 0x04;
-		break;
-	}
+	pin_nid = port + intel_base_nid(codec) - 1; /* intel port is 1-based */
 
 	/* skip notification during system suspend (but not in runtime PM);
 	 * the state will be updated at resume
@@ -2517,8 +2540,9 @@ static void register_i915_notifier(struct hda_codec *codec)
 	 * We need make sure audio_ptr is really setup
 	 */
 	wmb();
+	spec->drm_audio_ops.pin2port = intel_pin2port;
 	spec->drm_audio_ops.pin_eld_notify = intel_pin_eld_notify;
-	snd_hdac_i915_register_notifier(&codec->bus->core,
+	snd_hdac_acomp_register_notifier(&codec->bus->core,
 					&spec->drm_audio_ops);
 }
 
diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 2b7c33db4ded..4748a9d5de3b 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -1530,6 +1530,11 @@ free_widgets:
 	return ret;
 }
 
+static int hdac_hdmi_pin2port(void *aptr, int pin)
+{
+	return pin - 4; /* map NID 0x05 -> port #1 */
+}
+
 static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe)
 {
 	struct hdac_device *hdev = aptr;
@@ -1584,6 +1589,7 @@ static void hdac_hdmi_eld_notify_cb(void *aptr, int port, int pipe)
 }
 
 static struct drm_audio_component_audio_ops aops = {
+	.pin2port	= hdac_hdmi_pin2port,
 	.pin_eld_notify	= hdac_hdmi_eld_notify_cb,
 };
 
@@ -1812,7 +1818,7 @@ static int hdmi_codec_probe(struct snd_soc_component *component)
 		return ret;
 
 	aops.audio_ptr = hdev;
-	ret = snd_hdac_i915_register_notifier(hdev->bus, &aops);
+	ret = snd_hdac_acomp_register_notifier(hdev->bus, &aops);
 	if (ret < 0) {
 		dev_err(&hdev->dev, "notifier register failed: err: %d\n", ret);
 		return ret;
-- 
cgit v1.2.3


From 2fe31e43124040a67495fa81f7ac67bc4c09ebc8 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 17 Jul 2018 21:48:10 +0200
Subject: hwmon: Add missing HWMON_T_LCRIT_ALARM define

The enum hwmon_temp_lcrit_alarm exists, but the BIT definition is
missing.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/hwmon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index e5fd2707b6df..1b74ad11a5a4 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -93,6 +93,7 @@ enum hwmon_temp_attributes {
 #define HWMON_T_MIN_ALARM	BIT(hwmon_temp_min_alarm)
 #define HWMON_T_MAX_ALARM	BIT(hwmon_temp_max_alarm)
 #define HWMON_T_CRIT_ALARM	BIT(hwmon_temp_crit_alarm)
+#define HWMON_T_LCRIT_ALARM	BIT(hwmon_temp_lcrit_alarm)
 #define HWMON_T_EMERGENCY_ALARM	BIT(hwmon_temp_emergency_alarm)
 #define HWMON_T_FAULT		BIT(hwmon_temp_fault)
 #define HWMON_T_OFFSET		BIT(hwmon_temp_offset)
-- 
cgit v1.2.3


From aa7f29b07c8702127124d0522e3cd46850cdbc41 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 17 Jul 2018 21:48:11 +0200
Subject: hwmon: Add support for power min, lcrit, min_alarm and lcrit_alarm

Some sensors support reporting minimal and lower critical power, as
well as alarms when these thresholds are reached. Add support for
these attributes to the hwmon core.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/hwmon/hwmon.c | 4 ++++
 include/linux/hwmon.h | 8 ++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index e88c01961948..33d51281272b 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -394,12 +394,16 @@ static const char * const hwmon_power_attr_templates[] = {
 	[hwmon_power_cap_hyst] = "power%d_cap_hyst",
 	[hwmon_power_cap_max] = "power%d_cap_max",
 	[hwmon_power_cap_min] = "power%d_cap_min",
+	[hwmon_power_min] = "power%d_min",
 	[hwmon_power_max] = "power%d_max",
+	[hwmon_power_lcrit] = "power%d_lcrit",
 	[hwmon_power_crit] = "power%d_crit",
 	[hwmon_power_label] = "power%d_label",
 	[hwmon_power_alarm] = "power%d_alarm",
 	[hwmon_power_cap_alarm] = "power%d_cap_alarm",
+	[hwmon_power_min_alarm] = "power%d_min_alarm",
 	[hwmon_power_max_alarm] = "power%d_max_alarm",
+	[hwmon_power_lcrit_alarm] = "power%d_lcrit_alarm",
 	[hwmon_power_crit_alarm] = "power%d_crit_alarm",
 };
 
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 1b74ad11a5a4..b217101ca76e 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -188,12 +188,16 @@ enum hwmon_power_attributes {
 	hwmon_power_cap_hyst,
 	hwmon_power_cap_max,
 	hwmon_power_cap_min,
+	hwmon_power_min,
 	hwmon_power_max,
 	hwmon_power_crit,
+	hwmon_power_lcrit,
 	hwmon_power_label,
 	hwmon_power_alarm,
 	hwmon_power_cap_alarm,
+	hwmon_power_min_alarm,
 	hwmon_power_max_alarm,
+	hwmon_power_lcrit_alarm,
 	hwmon_power_crit_alarm,
 };
 
@@ -214,12 +218,16 @@ enum hwmon_power_attributes {
 #define HWMON_P_CAP_HYST		BIT(hwmon_power_cap_hyst)
 #define HWMON_P_CAP_MAX			BIT(hwmon_power_cap_max)
 #define HWMON_P_CAP_MIN			BIT(hwmon_power_cap_min)
+#define HWMON_P_MIN			BIT(hwmon_power_min)
 #define HWMON_P_MAX			BIT(hwmon_power_max)
+#define HWMON_P_LCRIT			BIT(hwmon_power_lcrit)
 #define HWMON_P_CRIT			BIT(hwmon_power_crit)
 #define HWMON_P_LABEL			BIT(hwmon_power_label)
 #define HWMON_P_ALARM			BIT(hwmon_power_alarm)
 #define HWMON_P_CAP_ALARM		BIT(hwmon_power_cap_alarm)
+#define HWMON_P_MIN_ALARM		BIT(hwmon_power_max_alarm)
 #define HWMON_P_MAX_ALARM		BIT(hwmon_power_max_alarm)
+#define HWMON_P_LCRIT_ALARM		BIT(hwmon_power_lcrit_alarm)
 #define HWMON_P_CRIT_ALARM		BIT(hwmon_power_crit_alarm)
 
 enum hwmon_energy_attributes {
-- 
cgit v1.2.3


From dcb5d0fcaa1ddd0af9c18ef51e6b05e38a6cec71 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 17 Jul 2018 21:48:12 +0200
Subject: hwmon: Add helper to tell if a char is invalid in a name

HWMON device names are not allowed to contain "-* \t\n". Add a helper
which will return true if passed an invalid character. It can be used
to massage a string into a hwmon compatible name by replacing invalid
characters with '_'.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/hwmon.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index b217101ca76e..9493d4a388db 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -398,4 +398,27 @@ devm_hwmon_device_register_with_info(struct device *dev,
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
 
+/**
+ * hwmon_is_bad_char - Is the char invalid in a hwmon name
+ * @ch: the char to be considered
+ *
+ * hwmon_is_bad_char() can be used to determine if the given character
+ * may not be used in a hwmon name.
+ *
+ * Returns true if the char is invalid, false otherwise.
+ */
+static inline bool hwmon_is_bad_char(const char ch)
+{
+	switch (ch) {
+	case '-':
+	case '*':
+	case ' ':
+	case '\t':
+	case '\n':
+		return true;
+	default:
+		return false;
+	}
+}
+
 #endif
-- 
cgit v1.2.3


From 1323061a018a7514287894a552c4ec2a5f0cb0cd Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 17 Jul 2018 21:48:13 +0200
Subject: net: phy: sfp: Add HWMON support for module sensors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SFP modules can contain a number of sensors. The EEPROM also contains
recommended alarm and critical values for each sensor, and indications
of if these have been exceeded. Export this information via
HWMON. Currently temperature, VCC, bias current, transmit power, and
possibly receiver power is supported.

The sensors in the modules can either return calibrate or uncalibrated
values. Uncalibrated values need to be manipulated, using coefficients
provided in the SFP EEPROM. Uncalibrated receive power values require
floating point maths in order to calibrate them. Performing this in
the kernel is hard. So if the SFP module indicates it uses
uncalibrated values, RX power is not made available.

With this hwmon device, it is possible to view the sensor values using
lm-sensors programs:

in0:          +3.29 V  (crit min =  +2.90 V, min =  +3.00 V)
                       (max =  +3.60 V, crit max =  +3.70 V)
temp1:        +33.0°C  (low  =  -5.0°C, high = +80.0°C)
                       (crit low = -10.0°C, crit = +85.0°C)
power1:      1000.00 nW (max = 794.00 uW, min =  50.00 uW)  ALARM (LCRIT)
                       (lcrit =  40.00 uW, crit = 1000.00 uW)
curr1:        +0.00 A  (crit min =  +0.00 A, min =  +0.00 A)  ALARM (LCRIT, MIN)
                       (max =  +0.01 A, crit max =  +0.01 A)

The scaling sensors performs on the bias current is not particularly
good. The raw values are more useful:

curr1:
  curr1_input: 0.000
  curr1_min: 0.002
  curr1_max: 0.010
  curr1_lcrit: 0.000
  curr1_crit: 0.011
  curr1_min_alarm: 1.000
  curr1_max_alarm: 0.000
  curr1_lcrit_alarm: 1.000
  curr1_crit_alarm: 0.000

In order to keep the I2C overhead to a minimum, the constant values,
such as limits and calibration coefficients are read once at module
insertion time. Thus only reading *_input and *_alarm properties
requires i2c read operations.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig |   1 +
 drivers/net/phy/sfp.c   | 727 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sfp.h     |  72 ++++-
 3 files changed, 799 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index ceede09a2845..9beac427f9e8 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -215,6 +215,7 @@ config SFP
 	tristate "SFP cage support"
 	depends on I2C && PHYLINK
 	select MDIO_I2C
+	imply HWMON
 
 config AMD_PHY
 	tristate "AMD PHYs"
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index c4c92db86dfa..5661226cf75b 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1,5 +1,7 @@
+#include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/hwmon.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
@@ -131,6 +133,12 @@ struct sfp {
 	unsigned int sm_retries;
 
 	struct sfp_eeprom_id id;
+#if IS_ENABLED(CONFIG_HWMON)
+	struct sfp_diag diag;
+	struct device *hwmon_dev;
+	char *hwmon_name;
+#endif
+
 };
 
 static bool sff_module_supported(const struct sfp_eeprom_id *id)
@@ -316,6 +324,719 @@ static unsigned int sfp_check(void *buf, size_t len)
 	return check;
 }
 
+/* hwmon */
+#if IS_ENABLED(CONFIG_HWMON)
+static umode_t sfp_hwmon_is_visible(const void *data,
+				    enum hwmon_sensor_types type,
+				    u32 attr, int channel)
+{
+	const struct sfp *sfp = data;
+
+	switch (type) {
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+		case hwmon_temp_min_alarm:
+		case hwmon_temp_max_alarm:
+		case hwmon_temp_lcrit_alarm:
+		case hwmon_temp_crit_alarm:
+		case hwmon_temp_min:
+		case hwmon_temp_max:
+		case hwmon_temp_lcrit:
+		case hwmon_temp_crit:
+			return 0444;
+		default:
+			return 0;
+		}
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_input:
+		case hwmon_in_min_alarm:
+		case hwmon_in_max_alarm:
+		case hwmon_in_lcrit_alarm:
+		case hwmon_in_crit_alarm:
+		case hwmon_in_min:
+		case hwmon_in_max:
+		case hwmon_in_lcrit:
+		case hwmon_in_crit:
+			return 0444;
+		default:
+			return 0;
+		}
+	case hwmon_curr:
+		switch (attr) {
+		case hwmon_curr_input:
+		case hwmon_curr_min_alarm:
+		case hwmon_curr_max_alarm:
+		case hwmon_curr_lcrit_alarm:
+		case hwmon_curr_crit_alarm:
+		case hwmon_curr_min:
+		case hwmon_curr_max:
+		case hwmon_curr_lcrit:
+		case hwmon_curr_crit:
+			return 0444;
+		default:
+			return 0;
+		}
+	case hwmon_power:
+		/* External calibration of receive power requires
+		 * floating point arithmetic. Doing that in the kernel
+		 * is not easy, so just skip it. If the module does
+		 * not require external calibration, we can however
+		 * show receiver power, since FP is then not needed.
+		 */
+		if (sfp->id.ext.diagmon & SFP_DIAGMON_EXT_CAL &&
+		    channel == 1)
+			return 0;
+		switch (attr) {
+		case hwmon_power_input:
+		case hwmon_power_min_alarm:
+		case hwmon_power_max_alarm:
+		case hwmon_power_lcrit_alarm:
+		case hwmon_power_crit_alarm:
+		case hwmon_power_min:
+		case hwmon_power_max:
+		case hwmon_power_lcrit:
+		case hwmon_power_crit:
+			return 0444;
+		default:
+			return 0;
+		}
+	default:
+		return 0;
+	}
+}
+
+static int sfp_hwmon_read_sensor(struct sfp *sfp, int reg, long *value)
+{
+	__be16 val;
+	int err;
+
+	err = sfp_read(sfp, true, reg, &val, sizeof(val));
+	if (err < 0)
+		return err;
+
+	*value = be16_to_cpu(val);
+
+	return 0;
+}
+
+static void sfp_hwmon_to_rx_power(long *value)
+{
+	*value = DIV_ROUND_CLOSEST(*value, 100);
+}
+
+static void sfp_hwmon_calibrate(struct sfp *sfp, unsigned int slope, int offset,
+				long *value)
+{
+	if (sfp->id.ext.diagmon & SFP_DIAGMON_EXT_CAL)
+		*value = DIV_ROUND_CLOSEST(*value * slope, 256) + offset;
+}
+
+static void sfp_hwmon_calibrate_temp(struct sfp *sfp, long *value)
+{
+	sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_t_slope),
+			    be16_to_cpu(sfp->diag.cal_t_offset), value);
+
+	if (*value >= 0x8000)
+		*value -= 0x10000;
+
+	*value = DIV_ROUND_CLOSEST(*value * 1000, 256);
+}
+
+static void sfp_hwmon_calibrate_vcc(struct sfp *sfp, long *value)
+{
+	sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_v_slope),
+			    be16_to_cpu(sfp->diag.cal_v_offset), value);
+
+	*value = DIV_ROUND_CLOSEST(*value, 10);
+}
+
+static void sfp_hwmon_calibrate_bias(struct sfp *sfp, long *value)
+{
+	sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_txi_slope),
+			    be16_to_cpu(sfp->diag.cal_txi_offset), value);
+
+	*value = DIV_ROUND_CLOSEST(*value, 500);
+}
+
+static void sfp_hwmon_calibrate_tx_power(struct sfp *sfp, long *value)
+{
+	sfp_hwmon_calibrate(sfp, be16_to_cpu(sfp->diag.cal_txpwr_slope),
+			    be16_to_cpu(sfp->diag.cal_txpwr_offset), value);
+
+	*value = DIV_ROUND_CLOSEST(*value, 10);
+}
+
+static int sfp_hwmon_read_temp(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_calibrate_temp(sfp, value);
+
+	return 0;
+}
+
+static int sfp_hwmon_read_vcc(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_calibrate_vcc(sfp, value);
+
+	return 0;
+}
+
+static int sfp_hwmon_read_bias(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_calibrate_bias(sfp, value);
+
+	return 0;
+}
+
+static int sfp_hwmon_read_tx_power(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_calibrate_tx_power(sfp, value);
+
+	return 0;
+}
+
+static int sfp_hwmon_read_rx_power(struct sfp *sfp, int reg, long *value)
+{
+	int err;
+
+	err = sfp_hwmon_read_sensor(sfp, reg, value);
+	if (err < 0)
+		return err;
+
+	sfp_hwmon_to_rx_power(value);
+
+	return 0;
+}
+
+static int sfp_hwmon_temp(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		return sfp_hwmon_read_temp(sfp, SFP_TEMP, value);
+
+	case hwmon_temp_lcrit:
+		*value = be16_to_cpu(sfp->diag.temp_low_alarm);
+		sfp_hwmon_calibrate_temp(sfp, value);
+		return 0;
+
+	case hwmon_temp_min:
+		*value = be16_to_cpu(sfp->diag.temp_low_warn);
+		sfp_hwmon_calibrate_temp(sfp, value);
+		return 0;
+	case hwmon_temp_max:
+		*value = be16_to_cpu(sfp->diag.temp_high_warn);
+		sfp_hwmon_calibrate_temp(sfp, value);
+		return 0;
+
+	case hwmon_temp_crit:
+		*value = be16_to_cpu(sfp->diag.temp_high_alarm);
+		sfp_hwmon_calibrate_temp(sfp, value);
+		return 0;
+
+	case hwmon_temp_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TEMP_LOW);
+		return 0;
+
+	case hwmon_temp_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TEMP_LOW);
+		return 0;
+
+	case hwmon_temp_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TEMP_HIGH);
+		return 0;
+
+	case hwmon_temp_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TEMP_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_vcc(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_in_input:
+		return sfp_hwmon_read_vcc(sfp, SFP_VCC, value);
+
+	case hwmon_in_lcrit:
+		*value = be16_to_cpu(sfp->diag.volt_low_alarm);
+		sfp_hwmon_calibrate_vcc(sfp, value);
+		return 0;
+
+	case hwmon_in_min:
+		*value = be16_to_cpu(sfp->diag.volt_low_warn);
+		sfp_hwmon_calibrate_vcc(sfp, value);
+		return 0;
+
+	case hwmon_in_max:
+		*value = be16_to_cpu(sfp->diag.volt_high_warn);
+		sfp_hwmon_calibrate_vcc(sfp, value);
+		return 0;
+
+	case hwmon_in_crit:
+		*value = be16_to_cpu(sfp->diag.volt_high_alarm);
+		sfp_hwmon_calibrate_vcc(sfp, value);
+		return 0;
+
+	case hwmon_in_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_VCC_LOW);
+		return 0;
+
+	case hwmon_in_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_VCC_LOW);
+		return 0;
+
+	case hwmon_in_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_VCC_HIGH);
+		return 0;
+
+	case hwmon_in_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_VCC_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_bias(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_curr_input:
+		return sfp_hwmon_read_bias(sfp, SFP_TX_BIAS, value);
+
+	case hwmon_curr_lcrit:
+		*value = be16_to_cpu(sfp->diag.bias_low_alarm);
+		sfp_hwmon_calibrate_bias(sfp, value);
+		return 0;
+
+	case hwmon_curr_min:
+		*value = be16_to_cpu(sfp->diag.bias_low_warn);
+		sfp_hwmon_calibrate_bias(sfp, value);
+		return 0;
+
+	case hwmon_curr_max:
+		*value = be16_to_cpu(sfp->diag.bias_high_warn);
+		sfp_hwmon_calibrate_bias(sfp, value);
+		return 0;
+
+	case hwmon_curr_crit:
+		*value = be16_to_cpu(sfp->diag.bias_high_alarm);
+		sfp_hwmon_calibrate_bias(sfp, value);
+		return 0;
+
+	case hwmon_curr_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TX_BIAS_LOW);
+		return 0;
+
+	case hwmon_curr_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TX_BIAS_LOW);
+		return 0;
+
+	case hwmon_curr_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TX_BIAS_HIGH);
+		return 0;
+
+	case hwmon_curr_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TX_BIAS_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_tx_power(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_power_input:
+		return sfp_hwmon_read_tx_power(sfp, SFP_TX_POWER, value);
+
+	case hwmon_power_lcrit:
+		*value = be16_to_cpu(sfp->diag.txpwr_low_alarm);
+		sfp_hwmon_calibrate_tx_power(sfp, value);
+		return 0;
+
+	case hwmon_power_min:
+		*value = be16_to_cpu(sfp->diag.txpwr_low_warn);
+		sfp_hwmon_calibrate_tx_power(sfp, value);
+		return 0;
+
+	case hwmon_power_max:
+		*value = be16_to_cpu(sfp->diag.txpwr_high_warn);
+		sfp_hwmon_calibrate_tx_power(sfp, value);
+		return 0;
+
+	case hwmon_power_crit:
+		*value = be16_to_cpu(sfp->diag.txpwr_high_alarm);
+		sfp_hwmon_calibrate_tx_power(sfp, value);
+		return 0;
+
+	case hwmon_power_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TXPWR_LOW);
+		return 0;
+
+	case hwmon_power_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TXPWR_LOW);
+		return 0;
+
+	case hwmon_power_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN0_TXPWR_HIGH);
+		return 0;
+
+	case hwmon_power_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM0, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM0_TXPWR_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_rx_power(struct sfp *sfp, u32 attr, long *value)
+{
+	u8 status;
+	int err;
+
+	switch (attr) {
+	case hwmon_power_input:
+		return sfp_hwmon_read_rx_power(sfp, SFP_RX_POWER, value);
+
+	case hwmon_power_lcrit:
+		*value = be16_to_cpu(sfp->diag.rxpwr_low_alarm);
+		sfp_hwmon_to_rx_power(value);
+		return 0;
+
+	case hwmon_power_min:
+		*value = be16_to_cpu(sfp->diag.rxpwr_low_warn);
+		sfp_hwmon_to_rx_power(value);
+		return 0;
+
+	case hwmon_power_max:
+		*value = be16_to_cpu(sfp->diag.rxpwr_high_warn);
+		sfp_hwmon_to_rx_power(value);
+		return 0;
+
+	case hwmon_power_crit:
+		*value = be16_to_cpu(sfp->diag.rxpwr_high_alarm);
+		sfp_hwmon_to_rx_power(value);
+		return 0;
+
+	case hwmon_power_lcrit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM1, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM1_RXPWR_LOW);
+		return 0;
+
+	case hwmon_power_min_alarm:
+		err = sfp_read(sfp, true, SFP_WARN1, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN1_RXPWR_LOW);
+		return 0;
+
+	case hwmon_power_max_alarm:
+		err = sfp_read(sfp, true, SFP_WARN1, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_WARN1_RXPWR_HIGH);
+		return 0;
+
+	case hwmon_power_crit_alarm:
+		err = sfp_read(sfp, true, SFP_ALARM1, &status, sizeof(status));
+		if (err < 0)
+			return err;
+
+		*value = !!(status & SFP_ALARM1_RXPWR_HIGH);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int sfp_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			  u32 attr, int channel, long *value)
+{
+	struct sfp *sfp = dev_get_drvdata(dev);
+
+	switch (type) {
+	case hwmon_temp:
+		return sfp_hwmon_temp(sfp, attr, value);
+	case hwmon_in:
+		return sfp_hwmon_vcc(sfp, attr, value);
+	case hwmon_curr:
+		return sfp_hwmon_bias(sfp, attr, value);
+	case hwmon_power:
+		switch (channel) {
+		case 0:
+			return sfp_hwmon_tx_power(sfp, attr, value);
+		case 1:
+			return sfp_hwmon_rx_power(sfp, attr, value);
+		default:
+			return -EOPNOTSUPP;
+		}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct hwmon_ops sfp_hwmon_ops = {
+	.is_visible = sfp_hwmon_is_visible,
+	.read = sfp_hwmon_read,
+};
+
+static u32 sfp_hwmon_chip_config[] = {
+	HWMON_C_REGISTER_TZ,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_chip = {
+	.type = hwmon_chip,
+	.config = sfp_hwmon_chip_config,
+};
+
+static u32 sfp_hwmon_temp_config[] = {
+	HWMON_T_INPUT |
+	HWMON_T_MAX | HWMON_T_MIN |
+	HWMON_T_MAX_ALARM | HWMON_T_MIN_ALARM |
+	HWMON_T_CRIT | HWMON_T_LCRIT |
+	HWMON_T_CRIT_ALARM | HWMON_T_LCRIT_ALARM,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_temp_channel_info = {
+	.type = hwmon_temp,
+	.config = sfp_hwmon_temp_config,
+};
+
+static u32 sfp_hwmon_vcc_config[] = {
+	HWMON_I_INPUT |
+	HWMON_I_MAX | HWMON_I_MIN |
+	HWMON_I_MAX_ALARM | HWMON_I_MIN_ALARM |
+	HWMON_I_CRIT | HWMON_I_LCRIT |
+	HWMON_I_CRIT_ALARM | HWMON_I_LCRIT_ALARM,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_vcc_channel_info = {
+	.type = hwmon_in,
+	.config = sfp_hwmon_vcc_config,
+};
+
+static u32 sfp_hwmon_bias_config[] = {
+	HWMON_C_INPUT |
+	HWMON_C_MAX | HWMON_C_MIN |
+	HWMON_C_MAX_ALARM | HWMON_C_MIN_ALARM |
+	HWMON_C_CRIT | HWMON_C_LCRIT |
+	HWMON_C_CRIT_ALARM | HWMON_C_LCRIT_ALARM,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_bias_channel_info = {
+	.type = hwmon_curr,
+	.config = sfp_hwmon_bias_config,
+};
+
+static u32 sfp_hwmon_power_config[] = {
+	/* Transmit power */
+	HWMON_P_INPUT |
+	HWMON_P_MAX | HWMON_P_MIN |
+	HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
+	HWMON_P_CRIT | HWMON_P_LCRIT |
+	HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM,
+	/* Receive power */
+	HWMON_P_INPUT |
+	HWMON_P_MAX | HWMON_P_MIN |
+	HWMON_P_MAX_ALARM | HWMON_P_MIN_ALARM |
+	HWMON_P_CRIT | HWMON_P_LCRIT |
+	HWMON_P_CRIT_ALARM | HWMON_P_LCRIT_ALARM,
+	0,
+};
+
+static const struct hwmon_channel_info sfp_hwmon_power_channel_info = {
+	.type = hwmon_power,
+	.config = sfp_hwmon_power_config,
+};
+
+static const struct hwmon_channel_info *sfp_hwmon_info[] = {
+	&sfp_hwmon_chip,
+	&sfp_hwmon_vcc_channel_info,
+	&sfp_hwmon_temp_channel_info,
+	&sfp_hwmon_bias_channel_info,
+	&sfp_hwmon_power_channel_info,
+	NULL,
+};
+
+static const struct hwmon_chip_info sfp_hwmon_chip_info = {
+	.ops = &sfp_hwmon_ops,
+	.info = sfp_hwmon_info,
+};
+
+static int sfp_hwmon_insert(struct sfp *sfp)
+{
+	int err, i;
+
+	if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE)
+		return 0;
+
+	if (!(sfp->id.ext.diagmon & SFP_DIAGMON_DDM))
+		return 0;
+
+	if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE)
+		/* This driver in general does not support address
+		 * change.
+		 */
+		return 0;
+
+	err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag));
+	if (err < 0)
+		return err;
+
+	sfp->hwmon_name = kstrdup(dev_name(sfp->dev), GFP_KERNEL);
+	if (!sfp->hwmon_name)
+		return -ENODEV;
+
+	for (i = 0; sfp->hwmon_name[i]; i++)
+		if (hwmon_is_bad_char(sfp->hwmon_name[i]))
+			sfp->hwmon_name[i] = '_';
+
+	sfp->hwmon_dev = hwmon_device_register_with_info(sfp->dev,
+							 sfp->hwmon_name, sfp,
+							 &sfp_hwmon_chip_info,
+							 NULL);
+
+	return PTR_ERR_OR_ZERO(sfp->hwmon_dev);
+}
+
+static void sfp_hwmon_remove(struct sfp *sfp)
+{
+	hwmon_device_unregister(sfp->hwmon_dev);
+	kfree(sfp->hwmon_name);
+}
+#else
+static int sfp_hwmon_insert(struct sfp *sfp)
+{
+	return 0;
+}
+
+static void sfp_hwmon_remove(struct sfp *sfp)
+{
+}
+#endif
+
 /* Helpers */
 static void sfp_module_tx_disable(struct sfp *sfp)
 {
@@ -636,6 +1357,10 @@ static int sfp_sm_mod_probe(struct sfp *sfp)
 		dev_warn(sfp->dev,
 			 "module address swap to access page 0xA2 is not supported.\n");
 
+	ret = sfp_hwmon_insert(sfp);
+	if (ret < 0)
+		return ret;
+
 	ret = sfp_module_insert(sfp->sfp_bus, &sfp->id);
 	if (ret < 0)
 		return ret;
@@ -647,6 +1372,8 @@ static void sfp_sm_mod_remove(struct sfp *sfp)
 {
 	sfp_module_remove(sfp->sfp_bus);
 
+	sfp_hwmon_remove(sfp);
+
 	if (sfp->mod_phy)
 		sfp_sm_phy_detach(sfp);
 
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index ebce9e24906a..d37518e89db2 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -231,6 +231,50 @@ struct sfp_eeprom_id {
 	struct sfp_eeprom_ext ext;
 } __packed;
 
+struct sfp_diag {
+	__be16 temp_high_alarm;
+	__be16 temp_low_alarm;
+	__be16 temp_high_warn;
+	__be16 temp_low_warn;
+	__be16 volt_high_alarm;
+	__be16 volt_low_alarm;
+	__be16 volt_high_warn;
+	__be16 volt_low_warn;
+	__be16 bias_high_alarm;
+	__be16 bias_low_alarm;
+	__be16 bias_high_warn;
+	__be16 bias_low_warn;
+	__be16 txpwr_high_alarm;
+	__be16 txpwr_low_alarm;
+	__be16 txpwr_high_warn;
+	__be16 txpwr_low_warn;
+	__be16 rxpwr_high_alarm;
+	__be16 rxpwr_low_alarm;
+	__be16 rxpwr_high_warn;
+	__be16 rxpwr_low_warn;
+	__be16 laser_temp_high_alarm;
+	__be16 laser_temp_low_alarm;
+	__be16 laser_temp_high_warn;
+	__be16 laser_temp_low_warn;
+	__be16 tec_cur_high_alarm;
+	__be16 tec_cur_low_alarm;
+	__be16 tec_cur_high_warn;
+	__be16 tec_cur_low_warn;
+	__be32 cal_rxpwr4;
+	__be32 cal_rxpwr3;
+	__be32 cal_rxpwr2;
+	__be32 cal_rxpwr1;
+	__be32 cal_rxpwr0;
+	__be16 cal_txi_slope;
+	__be16 cal_txi_offset;
+	__be16 cal_txpwr_slope;
+	__be16 cal_txpwr_offset;
+	__be16 cal_t_slope;
+	__be16 cal_t_offset;
+	__be16 cal_v_slope;
+	__be16 cal_v_offset;
+} __packed;
+
 /* SFP EEPROM registers */
 enum {
 	SFP_PHYS_ID			= 0x00,
@@ -384,7 +428,33 @@ enum {
 	SFP_TEC_CUR			= 0x6c,
 
 	SFP_STATUS			= 0x6e,
-	SFP_ALARM			= 0x70,
+	SFP_ALARM0			= 0x70,
+	SFP_ALARM0_TEMP_HIGH		= BIT(7),
+	SFP_ALARM0_TEMP_LOW		= BIT(6),
+	SFP_ALARM0_VCC_HIGH		= BIT(5),
+	SFP_ALARM0_VCC_LOW		= BIT(4),
+	SFP_ALARM0_TX_BIAS_HIGH		= BIT(3),
+	SFP_ALARM0_TX_BIAS_LOW		= BIT(2),
+	SFP_ALARM0_TXPWR_HIGH		= BIT(1),
+	SFP_ALARM0_TXPWR_LOW		= BIT(0),
+
+	SFP_ALARM1			= 0x71,
+	SFP_ALARM1_RXPWR_HIGH		= BIT(7),
+	SFP_ALARM1_RXPWR_LOW		= BIT(6),
+
+	SFP_WARN0			= 0x74,
+	SFP_WARN0_TEMP_HIGH		= BIT(7),
+	SFP_WARN0_TEMP_LOW		= BIT(6),
+	SFP_WARN0_VCC_HIGH		= BIT(5),
+	SFP_WARN0_VCC_LOW		= BIT(4),
+	SFP_WARN0_TX_BIAS_HIGH		= BIT(3),
+	SFP_WARN0_TX_BIAS_LOW		= BIT(2),
+	SFP_WARN0_TXPWR_HIGH		= BIT(1),
+	SFP_WARN0_TXPWR_LOW		= BIT(0),
+
+	SFP_WARN1			= 0x75,
+	SFP_WARN1_RXPWR_HIGH		= BIT(7),
+	SFP_WARN1_RXPWR_LOW		= BIT(6),
 
 	SFP_EXT_STATUS			= 0x76,
 	SFP_VSL				= 0x78,
-- 
cgit v1.2.3


From 753d433b586d1d43c487e3d660f5778c7c8d58ea Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Fri, 22 Jun 2018 09:15:32 +1000
Subject: random: Return nbytes filled from hw RNG

Currently the function get_random_bytes_arch() has return value 'void'.
If the hw RNG fails we currently fall back to using get_random_bytes().
This defeats the purpose of requesting random material from the hw RNG
in the first place.

There are currently no intree users of get_random_bytes_arch().

Only get random bytes from the hw RNG, make function return the number
of bytes retrieved from the hw RNG.

Acked-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Tobin C. Harding <me@tobin.cc>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 16 +++++++++-------
 include/linux/random.h |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 8e53e9515a1d..34ddfd57419b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1725,26 +1725,28 @@ EXPORT_SYMBOL(del_random_ready_callback);
  * key known by the NSA).  So it's useful if we need the speed, but
  * only if we're willing to trust the hardware manufacturer not to
  * have put in a back door.
+ *
+ * Return number of bytes filled in.
  */
-void get_random_bytes_arch(void *buf, int nbytes)
+int __must_check get_random_bytes_arch(void *buf, int nbytes)
 {
+	int left = nbytes;
 	char *p = buf;
 
-	trace_get_random_bytes_arch(nbytes, _RET_IP_);
-	while (nbytes) {
+	trace_get_random_bytes_arch(left, _RET_IP_);
+	while (left) {
 		unsigned long v;
-		int chunk = min(nbytes, (int)sizeof(unsigned long));
+		int chunk = min_t(int, left, sizeof(unsigned long));
 
 		if (!arch_get_random_long(&v))
 			break;
 
 		memcpy(p, &v, chunk);
 		p += chunk;
-		nbytes -= chunk;
+		left -= chunk;
 	}
 
-	if (nbytes)
-		get_random_bytes(p, nbytes);
+	return nbytes - left;
 }
 EXPORT_SYMBOL(get_random_bytes_arch);
 
diff --git a/include/linux/random.h b/include/linux/random.h
index 2ddf13b4281e..f1c9bc5cd231 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -38,7 +38,7 @@ extern void get_random_bytes(void *buf, int nbytes);
 extern int wait_for_random_bytes(void);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
-extern void get_random_bytes_arch(void *buf, int nbytes);
+extern int __must_check get_random_bytes_arch(void *buf, int nbytes);
 
 #ifndef MODULE
 extern const struct file_operations random_fops, urandom_fops;
-- 
cgit v1.2.3


From 9ba546c01976a426292af99e682a557075d6c010 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jul 2018 15:48:46 +0200
Subject: aio: don't expose __aio_sigset in uapi

glibc uses a different defintion of sigset_t than the kernel does,
and the current version would pull in both.  To fix this just do not
expose the type at all - this somewhat mirrors pselect() where we
do not even have a type for the magic sigmask argument, but just
use pointer arithmetics.

Fixes: 7a074e96 ("aio: implement io_pgetevents")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Adrian Reber <adrian@lisas.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c                     | 5 +++++
 include/linux/syscalls.h     | 1 +
 include/uapi/linux/aio_abi.h | 6 ------
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index e1d20124ec0e..b1a42e45698b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2042,6 +2042,11 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
 	return ret;
 }
 
+struct __aio_sigset {
+	const sigset_t __user	*sigmask;
+	size_t		sigsetsize;
+};
+
 SYSCALL_DEFINE6(io_pgetevents,
 		aio_context_t, ctx_id,
 		long, min_nr,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 73810808cdf2..b06b5eeda8e8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -11,6 +11,7 @@
 #ifndef _LINUX_SYSCALLS_H
 #define _LINUX_SYSCALLS_H
 
+struct __aio_sigset;
 struct epoll_event;
 struct iattr;
 struct inode;
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index d00221345c19..ce43d340f010 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -29,7 +29,6 @@
 
 #include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/signal.h>
 #include <asm/byteorder.h>
 
 typedef __kernel_ulong_t aio_context_t;
@@ -108,10 +107,5 @@ struct iocb {
 #undef IFBIG
 #undef IFLITTLE
 
-struct __aio_sigset {
-	const sigset_t __user	*sigmask;
-	size_t		sigsetsize;
-};
-
 #endif /* __LINUX__AIO_ABI_H */
 
-- 
cgit v1.2.3


From 49502b23e0e45b57b776490cfdfd95ea7fecd7c7 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 9 May 2018 14:57:44 +0200
Subject: dt-bindings: clock: add rk3399 DDR3 standard speed bins.

DDR3 SDRAM Standard (JESD79-3F) defines some standard speed bins for
DDR3 memories. The rk3399_dmc driver allows you to pass these values via
the device tree. For that purpose the devfreq/rk3399_dmc.txt binding
refers to a ddr.h file which does not exist. This patch adds the missing
defines in a include file called rk3399-ddr.h with the definition of
standard speed bins according to the ARM Trusted Firmware (ATF).

Fixes: c1ceb8f7c167 (Documentation: bindings: add dt documentation for rk3399 dmc)
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 .../devicetree/bindings/devfreq/rk3399_dmc.txt     |  2 +-
 include/dt-bindings/clock/rk3399-ddr.h             | 56 ++++++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 include/dt-bindings/clock/rk3399-ddr.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
index 1b93b33c7d38..e0030a139383 100644
--- a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
+++ b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
@@ -19,7 +19,7 @@ Required properties:
 
 Following properties relate to DDR timing:
 
-- rockchip,dram_speed_bin :	  Value reference include/dt-bindings/clock/ddr.h,
+- rockchip,dram_speed_bin :	  Value reference include/dt-bindings/clock/rk3399-ddr.h,
 				  it selects the DDR3 cl-trp-trcd type. It must be
 				  set according to "Speed Bin" in DDR3 datasheet,
 				  DO NOT use a smaller "Speed Bin" than specified
diff --git a/include/dt-bindings/clock/rk3399-ddr.h b/include/dt-bindings/clock/rk3399-ddr.h
new file mode 100644
index 000000000000..ed2280844963
--- /dev/null
+++ b/include/dt-bindings/clock/rk3399-ddr.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
+
+#ifndef DT_BINDINGS_DDR_H
+#define DT_BINDINGS_DDR_H
+
+/*
+ * DDR3 SDRAM Standard Speed Bins include tCK, tRCD, tRP, tRAS and tRC for
+ * each corresponding bin.
+ */
+
+/* DDR3-800 (5-5-5) */
+#define DDR3_800D	0
+/* DDR3-800 (6-6-6) */
+#define DDR3_800E	1
+/* DDR3-1066 (6-6-6) */
+#define DDR3_1066E	2
+/* DDR3-1066 (7-7-7) */
+#define DDR3_1066F	3
+/* DDR3-1066 (8-8-8) */
+#define DDR3_1066G	4
+/* DDR3-1333 (7-7-7) */
+#define DDR3_1333F	5
+/* DDR3-1333 (8-8-8) */
+#define DDR3_1333G	6
+/* DDR3-1333 (9-9-9) */
+#define DDR3_1333H	7
+/* DDR3-1333 (10-10-10) */
+#define DDR3_1333J 	8
+/* DDR3-1600 (8-8-8) */
+#define DDR3_1600G	9
+/* DDR3-1600 (9-9-9) */
+#define DDR3_1600H	10
+/* DDR3-1600 (10-10-10) */
+#define DDR3_1600J	11
+/* DDR3-1600 (11-11-11) */
+#define DDR3_1600K	12
+/* DDR3-1600 (10-10-10) */
+#define DDR3_1866J	13
+/* DDR3-1866 (11-11-11) */
+#define DDR3_1866K	14
+/* DDR3-1866 (12-12-12) */
+#define DDR3_1866L	15
+/* DDR3-1866 (13-13-13) */
+#define DDR3_1866M	16
+/* DDR3-2133 (11-11-11) */
+#define DDR3_2133K	17
+/* DDR3-2133 (12-12-12) */
+#define DDR3_2133L	18
+/* DDR3-2133 (13-13-13) */
+#define DDR3_2133M	19
+/* DDR3-2133 (14-14-14) */
+#define DDR3_2133N	20
+/* DDR3 ATF default */
+#define DDR3_DEFAULT	21
+
+#endif
-- 
cgit v1.2.3


From fa84cf094ef9667e2b91c104b0a788fd1896f482 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 18 Jul 2018 07:40:53 +0200
Subject: ALSA: pcm: Nuke snd_pcm_lib_mmap_vmalloc()

snd_pcm_lib_mmap_vmalloc() was supposed to be implemented with
somewhat special for vmalloc handling, but in the end, this turned to
just the default handler, i.e. NULL.  As the situation has never
changed over decades, let's rip it off.

Reviewed-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/staging/most/sound/sound.c       | 1 -
 include/sound/pcm.h                      | 2 --
 sound/drivers/aloop.c                    | 1 -
 sound/drivers/vx/vx_pcm.c                | 2 --
 sound/firewire/bebob/bebob_pcm.c         | 1 -
 sound/firewire/dice/dice-pcm.c           | 2 --
 sound/firewire/digi00x/digi00x-pcm.c     | 1 -
 sound/firewire/fireface/ff-pcm.c         | 1 -
 sound/firewire/fireworks/fireworks_pcm.c | 1 -
 sound/firewire/isight.c                  | 1 -
 sound/firewire/motu/motu-pcm.c           | 2 --
 sound/firewire/oxfw/oxfw-pcm.c           | 2 --
 sound/firewire/tascam/tascam-pcm.c       | 1 -
 sound/mips/sgio2audio.c                  | 3 ---
 sound/pcmcia/pdaudiocf/pdaudiocf_pcm.c   | 1 -
 sound/soc/codecs/rt5514-spi.c            | 1 -
 sound/usb/6fire/pcm.c                    | 1 -
 sound/usb/caiaq/audio.c                  | 1 -
 sound/usb/hiface/pcm.c                   | 1 -
 sound/usb/misc/ua101.c                   | 2 --
 sound/usb/pcm.c                          | 2 --
 21 files changed, 30 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/most/sound/sound.c b/drivers/staging/most/sound/sound.c
index 04c18323c2ea..89b02fc305b8 100644
--- a/drivers/staging/most/sound/sound.c
+++ b/drivers/staging/most/sound/sound.c
@@ -457,7 +457,6 @@ static const struct snd_pcm_ops pcm_ops = {
 	.trigger    = pcm_trigger,
 	.pointer    = pcm_pointer,
 	.page       = snd_pcm_lib_get_vmalloc_page,
-	.mmap       = snd_pcm_lib_mmap_vmalloc,
 };
 
 static int split_arg_list(char *buf, char **card_name, u16 *ch_num,
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index fcdf358a25f0..1206045ccf03 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -1342,8 +1342,6 @@ int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_s
 #define snd_pcm_lib_mmap_iomem	NULL
 #endif
 
-#define snd_pcm_lib_mmap_vmalloc NULL
-
 /**
  * snd_pcm_limit_isa_dma_size - Get the max size fitting with ISA DMA transfer
  * @dma: DMA number
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
index 78a2fdc38531..1e34e6381baa 100644
--- a/sound/drivers/aloop.c
+++ b/sound/drivers/aloop.c
@@ -778,7 +778,6 @@ static const struct snd_pcm_ops loopback_pcm_ops = {
 	.trigger =	loopback_trigger,
 	.pointer =	loopback_pointer,
 	.page =		snd_pcm_lib_get_vmalloc_page,
-	.mmap =		snd_pcm_lib_mmap_vmalloc,
 };
 
 static int loopback_pcm_new(struct loopback *loopback,
diff --git a/sound/drivers/vx/vx_pcm.c b/sound/drivers/vx/vx_pcm.c
index 380a028469c4..ba80f459bdc5 100644
--- a/sound/drivers/vx/vx_pcm.c
+++ b/sound/drivers/vx/vx_pcm.c
@@ -883,7 +883,6 @@ static const struct snd_pcm_ops vx_pcm_playback_ops = {
 	.trigger =	vx_pcm_trigger,
 	.pointer =	vx_pcm_playback_pointer,
 	.page =		snd_pcm_lib_get_vmalloc_page,
-	.mmap =		snd_pcm_lib_mmap_vmalloc,
 };
 
 
@@ -1105,7 +1104,6 @@ static const struct snd_pcm_ops vx_pcm_capture_ops = {
 	.trigger =	vx_pcm_trigger,
 	.pointer =	vx_pcm_capture_pointer,
 	.page =		snd_pcm_lib_get_vmalloc_page,
-	.mmap =		snd_pcm_lib_mmap_vmalloc,
 };
 
 
diff --git a/sound/firewire/bebob/bebob_pcm.c b/sound/firewire/bebob/bebob_pcm.c
index e6adab3ef42e..ea9b86450580 100644
--- a/sound/firewire/bebob/bebob_pcm.c
+++ b/sound/firewire/bebob/bebob_pcm.c
@@ -373,7 +373,6 @@ int snd_bebob_create_pcm_devices(struct snd_bebob *bebob)
 		.pointer	= pcm_playback_pointer,
 		.ack		= pcm_playback_ack,
 		.page		= snd_pcm_lib_get_vmalloc_page,
-		.mmap		= snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	int err;
diff --git a/sound/firewire/dice/dice-pcm.c b/sound/firewire/dice/dice-pcm.c
index 80351b29fe0d..bb3ef5ff3488 100644
--- a/sound/firewire/dice/dice-pcm.c
+++ b/sound/firewire/dice/dice-pcm.c
@@ -412,7 +412,6 @@ int snd_dice_create_pcm(struct snd_dice *dice)
 		.pointer   = capture_pointer,
 		.ack       = capture_ack,
 		.page      = snd_pcm_lib_get_vmalloc_page,
-		.mmap      = snd_pcm_lib_mmap_vmalloc,
 	};
 	static const struct snd_pcm_ops playback_ops = {
 		.open      = pcm_open,
@@ -425,7 +424,6 @@ int snd_dice_create_pcm(struct snd_dice *dice)
 		.pointer   = playback_pointer,
 		.ack       = playback_ack,
 		.page      = snd_pcm_lib_get_vmalloc_page,
-		.mmap      = snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	unsigned int capture, playback;
diff --git a/sound/firewire/digi00x/digi00x-pcm.c b/sound/firewire/digi00x/digi00x-pcm.c
index 796f4b4645f5..fdcff0460c53 100644
--- a/sound/firewire/digi00x/digi00x-pcm.c
+++ b/sound/firewire/digi00x/digi00x-pcm.c
@@ -352,7 +352,6 @@ int snd_dg00x_create_pcm_devices(struct snd_dg00x *dg00x)
 		.pointer	= pcm_playback_pointer,
 		.ack		= pcm_playback_ack,
 		.page		= snd_pcm_lib_get_vmalloc_page,
-		.mmap		= snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	int err;
diff --git a/sound/firewire/fireface/ff-pcm.c b/sound/firewire/fireface/ff-pcm.c
index e3c16308363d..bf47f9ec8703 100644
--- a/sound/firewire/fireface/ff-pcm.c
+++ b/sound/firewire/fireface/ff-pcm.c
@@ -383,7 +383,6 @@ int snd_ff_create_pcm_devices(struct snd_ff *ff)
 		.pointer	= pcm_playback_pointer,
 		.ack		= pcm_playback_ack,
 		.page		= snd_pcm_lib_get_vmalloc_page,
-		.mmap		= snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	int err;
diff --git a/sound/firewire/fireworks/fireworks_pcm.c b/sound/firewire/fireworks/fireworks_pcm.c
index 40faed5e6968..aed566d82726 100644
--- a/sound/firewire/fireworks/fireworks_pcm.c
+++ b/sound/firewire/fireworks/fireworks_pcm.c
@@ -397,7 +397,6 @@ int snd_efw_create_pcm_devices(struct snd_efw *efw)
 		.pointer	= pcm_playback_pointer,
 		.ack		= pcm_playback_ack,
 		.page		= snd_pcm_lib_get_vmalloc_page,
-		.mmap		= snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	int err;
diff --git a/sound/firewire/isight.c b/sound/firewire/isight.c
index 3919e186a30b..30957477e005 100644
--- a/sound/firewire/isight.c
+++ b/sound/firewire/isight.c
@@ -454,7 +454,6 @@ static int isight_create_pcm(struct isight *isight)
 		.trigger   = isight_trigger,
 		.pointer   = isight_pointer,
 		.page      = snd_pcm_lib_get_vmalloc_page,
-		.mmap      = snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	int err;
diff --git a/sound/firewire/motu/motu-pcm.c b/sound/firewire/motu/motu-pcm.c
index 4330220890e8..ab69d7e6ac05 100644
--- a/sound/firewire/motu/motu-pcm.c
+++ b/sound/firewire/motu/motu-pcm.c
@@ -363,7 +363,6 @@ int snd_motu_create_pcm_devices(struct snd_motu *motu)
 		.pointer   = capture_pointer,
 		.ack       = capture_ack,
 		.page      = snd_pcm_lib_get_vmalloc_page,
-		.mmap      = snd_pcm_lib_mmap_vmalloc,
 	};
 	static const struct snd_pcm_ops playback_ops = {
 		.open      = pcm_open,
@@ -376,7 +375,6 @@ int snd_motu_create_pcm_devices(struct snd_motu *motu)
 		.pointer   = playback_pointer,
 		.ack       = playback_ack,
 		.page      = snd_pcm_lib_get_vmalloc_page,
-		.mmap      = snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	int err;
diff --git a/sound/firewire/oxfw/oxfw-pcm.c b/sound/firewire/oxfw/oxfw-pcm.c
index 3dd46285c0e2..b3f6503dd34d 100644
--- a/sound/firewire/oxfw/oxfw-pcm.c
+++ b/sound/firewire/oxfw/oxfw-pcm.c
@@ -389,7 +389,6 @@ int snd_oxfw_create_pcm(struct snd_oxfw *oxfw)
 		.pointer   = pcm_capture_pointer,
 		.ack       = pcm_capture_ack,
 		.page      = snd_pcm_lib_get_vmalloc_page,
-		.mmap      = snd_pcm_lib_mmap_vmalloc,
 	};
 	static const struct snd_pcm_ops playback_ops = {
 		.open      = pcm_open,
@@ -402,7 +401,6 @@ int snd_oxfw_create_pcm(struct snd_oxfw *oxfw)
 		.pointer   = pcm_playback_pointer,
 		.ack       = pcm_playback_ack,
 		.page      = snd_pcm_lib_get_vmalloc_page,
-		.mmap      = snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	unsigned int cap = 0;
diff --git a/sound/firewire/tascam/tascam-pcm.c b/sound/firewire/tascam/tascam-pcm.c
index 6ec8ec634d4d..e4cc8990e195 100644
--- a/sound/firewire/tascam/tascam-pcm.c
+++ b/sound/firewire/tascam/tascam-pcm.c
@@ -279,7 +279,6 @@ int snd_tscm_create_pcm_devices(struct snd_tscm *tscm)
 		.pointer	= pcm_playback_pointer,
 		.ack		= pcm_playback_ack,
 		.page		= snd_pcm_lib_get_vmalloc_page,
-		.mmap		= snd_pcm_lib_mmap_vmalloc,
 	};
 	struct snd_pcm *pcm;
 	int err;
diff --git a/sound/mips/sgio2audio.c b/sound/mips/sgio2audio.c
index 9fb68b35de5a..3ec9391a4736 100644
--- a/sound/mips/sgio2audio.c
+++ b/sound/mips/sgio2audio.c
@@ -685,7 +685,6 @@ static const struct snd_pcm_ops snd_sgio2audio_playback1_ops = {
 	.trigger =     snd_sgio2audio_pcm_trigger,
 	.pointer =     snd_sgio2audio_pcm_pointer,
 	.page =        snd_pcm_lib_get_vmalloc_page,
-	.mmap =        snd_pcm_lib_mmap_vmalloc,
 };
 
 static const struct snd_pcm_ops snd_sgio2audio_playback2_ops = {
@@ -698,7 +697,6 @@ static const struct snd_pcm_ops snd_sgio2audio_playback2_ops = {
 	.trigger =     snd_sgio2audio_pcm_trigger,
 	.pointer =     snd_sgio2audio_pcm_pointer,
 	.page =        snd_pcm_lib_get_vmalloc_page,
-	.mmap =        snd_pcm_lib_mmap_vmalloc,
 };
 
 static const struct snd_pcm_ops snd_sgio2audio_capture_ops = {
@@ -711,7 +709,6 @@ static const struct snd_pcm_ops snd_sgio2audio_capture_ops = {
 	.trigger =     snd_sgio2audio_pcm_trigger,
 	.pointer =     snd_sgio2audio_pcm_pointer,
 	.page =        snd_pcm_lib_get_vmalloc_page,
-	.mmap =        snd_pcm_lib_mmap_vmalloc,
 };
 
 /*
diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf_pcm.c b/sound/pcmcia/pdaudiocf/pdaudiocf_pcm.c
index 4a2354b5ae00..98a6863e933c 100644
--- a/sound/pcmcia/pdaudiocf/pdaudiocf_pcm.c
+++ b/sound/pcmcia/pdaudiocf/pdaudiocf_pcm.c
@@ -276,7 +276,6 @@ static const struct snd_pcm_ops pdacf_pcm_capture_ops = {
 	.trigger =	pdacf_pcm_trigger,
 	.pointer =	pdacf_pcm_capture_pointer,
 	.page =		snd_pcm_lib_get_vmalloc_page,
-	.mmap =		snd_pcm_lib_mmap_vmalloc,
 };
 
 
diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c
index 18686ffb0cd5..6478d10c4f4a 100644
--- a/sound/soc/codecs/rt5514-spi.c
+++ b/sound/soc/codecs/rt5514-spi.c
@@ -268,7 +268,6 @@ static const struct snd_pcm_ops rt5514_spi_pcm_ops = {
 	.hw_params	= rt5514_spi_hw_params,
 	.hw_free	= rt5514_spi_hw_free,
 	.pointer	= rt5514_spi_pcm_pointer,
-	.mmap		= snd_pcm_lib_mmap_vmalloc,
 	.page		= snd_pcm_lib_get_vmalloc_page,
 };
 
diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c
index 2dd2518a71d3..f8ef3e2a8ca0 100644
--- a/sound/usb/6fire/pcm.c
+++ b/sound/usb/6fire/pcm.c
@@ -565,7 +565,6 @@ static const struct snd_pcm_ops pcm_ops = {
 	.trigger = usb6fire_pcm_trigger,
 	.pointer = usb6fire_pcm_pointer,
 	.page = snd_pcm_lib_get_vmalloc_page,
-	.mmap = snd_pcm_lib_mmap_vmalloc,
 };
 
 static void usb6fire_pcm_init_urb(struct pcm_urb *urb,
diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c
index 15344d39a6cd..c6108a3d7f8f 100644
--- a/sound/usb/caiaq/audio.c
+++ b/sound/usb/caiaq/audio.c
@@ -348,7 +348,6 @@ static const struct snd_pcm_ops snd_usb_caiaq_ops = {
 	.trigger =	snd_usb_caiaq_pcm_trigger,
 	.pointer =	snd_usb_caiaq_pcm_pointer,
 	.page =		snd_pcm_lib_get_vmalloc_page,
-	.mmap =		snd_pcm_lib_mmap_vmalloc,
 };
 
 static void check_for_elapsed_periods(struct snd_usb_caiaqdev *cdev,
diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c
index 396c317115b1..e1fbb9cc9ea7 100644
--- a/sound/usb/hiface/pcm.c
+++ b/sound/usb/hiface/pcm.c
@@ -523,7 +523,6 @@ static const struct snd_pcm_ops pcm_ops = {
 	.trigger = hiface_pcm_trigger,
 	.pointer = hiface_pcm_pointer,
 	.page = snd_pcm_lib_get_vmalloc_page,
-	.mmap = snd_pcm_lib_mmap_vmalloc,
 };
 
 static int hiface_pcm_init_urb(struct pcm_urb *urb,
diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c
index 386fbfd5c617..a0b6d039017f 100644
--- a/sound/usb/misc/ua101.c
+++ b/sound/usb/misc/ua101.c
@@ -900,7 +900,6 @@ static const struct snd_pcm_ops capture_pcm_ops = {
 	.trigger = capture_pcm_trigger,
 	.pointer = capture_pcm_pointer,
 	.page = snd_pcm_lib_get_vmalloc_page,
-	.mmap = snd_pcm_lib_mmap_vmalloc,
 };
 
 static const struct snd_pcm_ops playback_pcm_ops = {
@@ -913,7 +912,6 @@ static const struct snd_pcm_ops playback_pcm_ops = {
 	.trigger = playback_pcm_trigger,
 	.pointer = playback_pcm_pointer,
 	.page = snd_pcm_lib_get_vmalloc_page,
-	.mmap = snd_pcm_lib_mmap_vmalloc,
 };
 
 static const struct uac_format_type_i_discrete_descriptor *
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 160f52c4871b..4b930fa47277 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -1694,7 +1694,6 @@ static const struct snd_pcm_ops snd_usb_playback_ops = {
 	.trigger =	snd_usb_substream_playback_trigger,
 	.pointer =	snd_usb_pcm_pointer,
 	.page =		snd_pcm_lib_get_vmalloc_page,
-	.mmap =		snd_pcm_lib_mmap_vmalloc,
 };
 
 static const struct snd_pcm_ops snd_usb_capture_ops = {
@@ -1707,7 +1706,6 @@ static const struct snd_pcm_ops snd_usb_capture_ops = {
 	.trigger =	snd_usb_substream_capture_trigger,
 	.pointer =	snd_usb_pcm_pointer,
 	.page =		snd_pcm_lib_get_vmalloc_page,
-	.mmap =		snd_pcm_lib_mmap_vmalloc,
 };
 
 static const struct snd_pcm_ops snd_usb_playback_dev_ops = {
-- 
cgit v1.2.3


From 181ace9e1b2e1c5c2be590bc9603baa65f3a16d8 Mon Sep 17 00:00:00 2001
From: Abhishek Sahu <absahu@codeaurora.org>
Date: Wed, 20 Jun 2018 12:57:28 +0530
Subject: mtd: rawnand: helper function for setting up ECC configuration

commit 2c8f8afa7f92 ("mtd: nand: add generic helpers to check,
match, maximize ECC settings") provides generic helpers which
drivers can use for setting up ECC parameters.

Since same board can have different ECC strength nand chips so
following is the logic for setting up ECC strength and ECC step
size, which can be used by most of the drivers.

1. If both ECC step size and ECC strength are already set
   (usually by DT) then just check whether this setting
   is supported by NAND controller.
2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength
   supported by NAND controller.
3. Otherwise, try to match the ECC step size and ECC strength closest
   to the chip's requirement. If available OOB size can't fit the chip
   requirement then select maximum ECC strength which can be fit with
   available OOB size.

This patch introduces nand_ecc_choose_conf function which calls the
required helper functions for the above logic. The drivers can use
this single function instead of calling the 3 helper functions
individually.

Signed-off-by: Abhishek Sahu <absahu@codeaurora.org>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/mtd/rawnand.h      |  3 +++
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index dbf6e80e9ab5..2858d0b09c1d 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -6295,6 +6295,39 @@ int nand_maximize_ecc(struct nand_chip *chip,
 }
 EXPORT_SYMBOL_GPL(nand_maximize_ecc);
 
+/**
+ * nand_ecc_choose_conf - Set the ECC strength and ECC step size
+ * @chip: nand chip info structure
+ * @caps: ECC engine caps info structure
+ * @oobavail: OOB size that the ECC engine can use
+ *
+ * Choose the ECC configuration according to following logic
+ *
+ * 1. If both ECC step size and ECC strength are already set (usually by DT)
+ *    then check if it is supported by this controller.
+ * 2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength.
+ * 3. Otherwise, try to match the ECC step size and ECC strength closest
+ *    to the chip's requirement. If available OOB size can't fit the chip
+ *    requirement then fallback to the maximum ECC step size and ECC strength.
+ *
+ * On success, the chosen ECC settings are set.
+ */
+int nand_ecc_choose_conf(struct nand_chip *chip,
+			 const struct nand_ecc_caps *caps, int oobavail)
+{
+	if (chip->ecc.size && chip->ecc.strength)
+		return nand_check_ecc_caps(chip, caps, oobavail);
+
+	if (chip->ecc.options & NAND_ECC_MAXIMIZE)
+		return nand_maximize_ecc(chip, caps, oobavail);
+
+	if (!nand_match_ecc_req(chip, caps, oobavail))
+		return 0;
+
+	return nand_maximize_ecc(chip, caps, oobavail);
+}
+EXPORT_SYMBOL_GPL(nand_ecc_choose_conf);
+
 /*
  * Check if the chip configuration meet the datasheet requirements.
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index fbac4741da52..2ce305388471 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1672,6 +1672,9 @@ int nand_match_ecc_req(struct nand_chip *chip,
 int nand_maximize_ecc(struct nand_chip *chip,
 		      const struct nand_ecc_caps *caps, int oobavail);
 
+int nand_ecc_choose_conf(struct nand_chip *chip,
+			 const struct nand_ecc_caps *caps, int oobavail);
+
 /* Default write_oob implementation */
 int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
 
-- 
cgit v1.2.3


From 0cf5c7dbaa392d11cfae532cedeaad0ac48b2165 Mon Sep 17 00:00:00 2001
From: Abhishek Sahu <absahu@codeaurora.org>
Date: Wed, 20 Jun 2018 12:57:42 +0530
Subject: mtd: rawnand: provide only single helper function for ECC conf

Function nand_ecc_choose_conf() will be help for all the cases, so
other helper functions can be made static.

nand_check_ecc_caps(): Invoke nand_ecc_choose_conf() with
                       both chip->ecc.size and chip->ecc.strength
                       value set.

nand_maximize_ecc(): Invoke nand_ecc_choose_conf() with
                     NAND_ECC_MAXIMIZE flag.

nand_match_ecc_req(): Invoke nand_ecc_choose_conf() with either
                      chip->ecc.size or chip->ecc.strength value
                      set and without NAND_ECC_MAXIMIZE flag.

CC: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Abhishek Sahu <absahu@codeaurora.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 39 +++++++++++++++------------------------
 include/linux/mtd/rawnand.h      |  9 ---------
 2 files changed, 15 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 2858d0b09c1d..faac82b1e058 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -6085,24 +6085,17 @@ static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
  * by the controller and the calculated ECC bytes fit within the chip's OOB.
  * On success, the calculated ECC bytes is set.
  */
-int nand_check_ecc_caps(struct nand_chip *chip,
-			const struct nand_ecc_caps *caps, int oobavail)
+static int
+nand_check_ecc_caps(struct nand_chip *chip,
+		    const struct nand_ecc_caps *caps, int oobavail)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_ecc_step_info *stepinfo;
 	int preset_step = chip->ecc.size;
 	int preset_strength = chip->ecc.strength;
-	int nsteps, ecc_bytes;
+	int ecc_bytes, nsteps = mtd->writesize / preset_step;
 	int i, j;
 
-	if (WARN_ON(oobavail < 0))
-		return -EINVAL;
-
-	if (!preset_step || !preset_strength)
-		return -ENODATA;
-
-	nsteps = mtd->writesize / preset_step;
-
 	for (i = 0; i < caps->nstepinfos; i++) {
 		stepinfo = &caps->stepinfos[i];
 
@@ -6135,7 +6128,6 @@ int nand_check_ecc_caps(struct nand_chip *chip,
 
 	return -ENOTSUPP;
 }
-EXPORT_SYMBOL_GPL(nand_check_ecc_caps);
 
 /**
  * nand_match_ecc_req - meet the chip's requirement with least ECC bytes
@@ -6147,8 +6139,9 @@ EXPORT_SYMBOL_GPL(nand_check_ecc_caps);
  * number of ECC bytes (i.e. with the largest number of OOB-free bytes).
  * On success, the chosen ECC settings are set.
  */
-int nand_match_ecc_req(struct nand_chip *chip,
-		       const struct nand_ecc_caps *caps, int oobavail)
+static int
+nand_match_ecc_req(struct nand_chip *chip,
+		   const struct nand_ecc_caps *caps, int oobavail)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_ecc_step_info *stepinfo;
@@ -6159,9 +6152,6 @@ int nand_match_ecc_req(struct nand_chip *chip,
 	int best_ecc_bytes_total = INT_MAX;
 	int i, j;
 
-	if (WARN_ON(oobavail < 0))
-		return -EINVAL;
-
 	/* No information provided by the NAND chip */
 	if (!req_step || !req_strength)
 		return -ENOTSUPP;
@@ -6220,7 +6210,6 @@ int nand_match_ecc_req(struct nand_chip *chip,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nand_match_ecc_req);
 
 /**
  * nand_maximize_ecc - choose the max ECC strength available
@@ -6231,8 +6220,9 @@ EXPORT_SYMBOL_GPL(nand_match_ecc_req);
  * Choose the max ECC strength that is supported on the controller, and can fit
  * within the chip's OOB.  On success, the chosen ECC settings are set.
  */
-int nand_maximize_ecc(struct nand_chip *chip,
-		      const struct nand_ecc_caps *caps, int oobavail)
+static int
+nand_maximize_ecc(struct nand_chip *chip,
+		  const struct nand_ecc_caps *caps, int oobavail)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct nand_ecc_step_info *stepinfo;
@@ -6242,9 +6232,6 @@ int nand_maximize_ecc(struct nand_chip *chip,
 	int best_strength, best_ecc_bytes;
 	int i, j;
 
-	if (WARN_ON(oobavail < 0))
-		return -EINVAL;
-
 	for (i = 0; i < caps->nstepinfos; i++) {
 		stepinfo = &caps->stepinfos[i];
 		step_size = stepinfo->stepsize;
@@ -6293,7 +6280,6 @@ int nand_maximize_ecc(struct nand_chip *chip,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nand_maximize_ecc);
 
 /**
  * nand_ecc_choose_conf - Set the ECC strength and ECC step size
@@ -6315,6 +6301,11 @@ EXPORT_SYMBOL_GPL(nand_maximize_ecc);
 int nand_ecc_choose_conf(struct nand_chip *chip,
 			 const struct nand_ecc_caps *caps, int oobavail)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
+	if (WARN_ON(oobavail < 0 || oobavail > mtd->oobsize))
+		return -EINVAL;
+
 	if (chip->ecc.size && chip->ecc.strength)
 		return nand_check_ecc_caps(chip, caps, oobavail);
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 2ce305388471..0c6fb316b409 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1663,15 +1663,6 @@ int nand_check_erased_ecc_chunk(void *data, int datalen,
 				void *extraoob, int extraooblen,
 				int threshold);
 
-int nand_check_ecc_caps(struct nand_chip *chip,
-			const struct nand_ecc_caps *caps, int oobavail);
-
-int nand_match_ecc_req(struct nand_chip *chip,
-		       const struct nand_ecc_caps *caps,  int oobavail);
-
-int nand_maximize_ecc(struct nand_chip *chip,
-		      const struct nand_ecc_caps *caps, int oobavail);
-
 int nand_ecc_choose_conf(struct nand_chip *chip,
 			 const struct nand_ecc_caps *caps, int oobavail);
 
-- 
cgit v1.2.3


From 7529df4652482c33ae1a99ee8189401146f13cb7 Mon Sep 17 00:00:00 2001
From: Peter Pan <peterpandong@micron.com>
Date: Fri, 22 Jun 2018 14:28:23 +0200
Subject: mtd: nand: Add core infrastructure to support SPI NANDs

Add a SPI NAND framework based on the generic NAND framework and the
spi-mem infrastructure.

In its current state, this framework supports the following features:

- single/dual/quad IO modes
- on-die ECC

Signed-off-by: Peter Pan <peterpandong@micron.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/Kconfig      |    1 +
 drivers/mtd/nand/Makefile     |    1 +
 drivers/mtd/nand/spi/Kconfig  |    7 +
 drivers/mtd/nand/spi/Makefile |    3 +
 drivers/mtd/nand/spi/core.c   | 1136 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h   |  416 +++++++++++++++
 include/linux/spi/spi-mem.h   |    4 +-
 7 files changed, 1567 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/spi/Kconfig
 create mode 100644 drivers/mtd/nand/spi/Makefile
 create mode 100644 drivers/mtd/nand/spi/core.c
 create mode 100644 include/linux/mtd/spinand.h

(limited to 'include')

diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 88c7d3b4ff8b..9033215e62ea 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -4,3 +4,4 @@ config MTD_NAND_CORE
 source "drivers/mtd/nand/onenand/Kconfig"
 
 source "drivers/mtd/nand/raw/Kconfig"
+source "drivers/mtd/nand/spi/Kconfig"
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index 3f0cb87f1a57..7ecd80c0a66e 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o
 
 obj-y	+= onenand/
 obj-y	+= raw/
+obj-y	+= spi/
diff --git a/drivers/mtd/nand/spi/Kconfig b/drivers/mtd/nand/spi/Kconfig
new file mode 100644
index 000000000000..7c37d2929b68
--- /dev/null
+++ b/drivers/mtd/nand/spi/Kconfig
@@ -0,0 +1,7 @@
+menuconfig MTD_SPI_NAND
+	tristate "SPI NAND device Support"
+	select MTD_NAND_CORE
+	depends on SPI_MASTER
+	select SPI_MEM
+	help
+	  This is the framework for the SPI NAND device drivers.
diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
new file mode 100644
index 000000000000..2c473b765027
--- /dev/null
+++ b/drivers/mtd/nand/spi/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+spinand-objs := core.o
+obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
new file mode 100644
index 000000000000..4efbeb0d85b4
--- /dev/null
+++ b/drivers/mtd/nand/spi/core.c
@@ -0,0 +1,1136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2016-2017 Micron Technology, Inc.
+ *
+ * Authors:
+ *	Peter Pan <peterpandong@micron.com>
+ *	Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#define pr_fmt(fmt)	"spi-nand: " fmt
+
+#include <linux/device.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mtd/spinand.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+static void spinand_cache_op_adjust_colum(struct spinand_device *spinand,
+					  const struct nand_page_io_req *req,
+					  u16 *column)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int shift;
+
+	if (nand->memorg.planes_per_lun < 2)
+		return;
+
+	/* The plane number is passed in MSB just above the column address */
+	shift = fls(nand->memorg.pagesize);
+	*column |= req->pos.plane << shift;
+}
+
+static int spinand_read_reg_op(struct spinand_device *spinand, u8 reg, u8 *val)
+{
+	struct spi_mem_op op = SPINAND_GET_FEATURE_OP(reg,
+						      spinand->scratchbuf);
+	int ret;
+
+	ret = spi_mem_exec_op(spinand->spimem, &op);
+	if (ret)
+		return ret;
+
+	*val = *spinand->scratchbuf;
+	return 0;
+}
+
+static int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val)
+{
+	struct spi_mem_op op = SPINAND_SET_FEATURE_OP(reg,
+						      spinand->scratchbuf);
+
+	*spinand->scratchbuf = val;
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_read_status(struct spinand_device *spinand, u8 *status)
+{
+	return spinand_read_reg_op(spinand, REG_STATUS, status);
+}
+
+static int spinand_get_cfg(struct spinand_device *spinand, u8 *cfg)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+
+	if (WARN_ON(spinand->cur_target < 0 ||
+		    spinand->cur_target >= nand->memorg.ntargets))
+		return -EINVAL;
+
+	*cfg = spinand->cfg_cache[spinand->cur_target];
+	return 0;
+}
+
+static int spinand_set_cfg(struct spinand_device *spinand, u8 cfg)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	int ret;
+
+	if (WARN_ON(spinand->cur_target < 0 ||
+		    spinand->cur_target >= nand->memorg.ntargets))
+		return -EINVAL;
+
+	if (spinand->cfg_cache[spinand->cur_target] == cfg)
+		return 0;
+
+	ret = spinand_write_reg_op(spinand, REG_CFG, cfg);
+	if (ret)
+		return ret;
+
+	spinand->cfg_cache[spinand->cur_target] = cfg;
+	return 0;
+}
+
+/**
+ * spinand_upd_cfg() - Update the configuration register
+ * @spinand: the spinand device
+ * @mask: the mask encoding the bits to update in the config reg
+ * @val: the new value to apply
+ *
+ * Update the configuration register.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val)
+{
+	int ret;
+	u8 cfg;
+
+	ret = spinand_get_cfg(spinand, &cfg);
+	if (ret)
+		return ret;
+
+	cfg &= ~mask;
+	cfg |= val;
+
+	return spinand_set_cfg(spinand, cfg);
+}
+
+/**
+ * spinand_select_target() - Select a specific NAND target/die
+ * @spinand: the spinand device
+ * @target: the target/die to select
+ *
+ * Select a new target/die. If chip only has one die, this function is a NOOP.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int spinand_select_target(struct spinand_device *spinand, unsigned int target)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	int ret;
+
+	if (WARN_ON(target >= nand->memorg.ntargets))
+		return -EINVAL;
+
+	if (spinand->cur_target == target)
+		return 0;
+
+	if (nand->memorg.ntargets == 1) {
+		spinand->cur_target = target;
+		return 0;
+	}
+
+	ret = spinand->select_target(spinand, target);
+	if (ret)
+		return ret;
+
+	spinand->cur_target = target;
+	return 0;
+}
+
+static int spinand_init_cfg_cache(struct spinand_device *spinand)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	struct device *dev = &spinand->spimem->spi->dev;
+	unsigned int target;
+	int ret;
+
+	spinand->cfg_cache = devm_kcalloc(dev,
+					  nand->memorg.ntargets,
+					  sizeof(*spinand->cfg_cache),
+					  GFP_KERNEL);
+	if (!spinand->cfg_cache)
+		return -ENOMEM;
+
+	for (target = 0; target < nand->memorg.ntargets; target++) {
+		ret = spinand_select_target(spinand, target);
+		if (ret)
+			return ret;
+
+		/*
+		 * We use spinand_read_reg_op() instead of spinand_get_cfg()
+		 * here to bypass the config cache.
+		 */
+		ret = spinand_read_reg_op(spinand, REG_CFG,
+					  &spinand->cfg_cache[target]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int spinand_init_quad_enable(struct spinand_device *spinand)
+{
+	bool enable = false;
+
+	if (!(spinand->flags & SPINAND_HAS_QE_BIT))
+		return 0;
+
+	if (spinand->op_templates.read_cache->data.buswidth == 4 ||
+	    spinand->op_templates.write_cache->data.buswidth == 4 ||
+	    spinand->op_templates.update_cache->data.buswidth == 4)
+		enable = true;
+
+	return spinand_upd_cfg(spinand, CFG_QUAD_ENABLE,
+			       enable ? CFG_QUAD_ENABLE : 0);
+}
+
+static int spinand_ecc_enable(struct spinand_device *spinand,
+			      bool enable)
+{
+	return spinand_upd_cfg(spinand, CFG_ECC_ENABLE,
+			       enable ? CFG_ECC_ENABLE : 0);
+}
+
+static int spinand_write_enable_op(struct spinand_device *spinand)
+{
+	struct spi_mem_op op = SPINAND_WR_EN_DIS_OP(true);
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_load_page_op(struct spinand_device *spinand,
+				const struct nand_page_io_req *req)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int row = nanddev_pos_to_row(nand, &req->pos);
+	struct spi_mem_op op = SPINAND_PAGE_READ_OP(row);
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_read_from_cache_op(struct spinand_device *spinand,
+				      const struct nand_page_io_req *req)
+{
+	struct spi_mem_op op = *spinand->op_templates.read_cache;
+	struct nand_device *nand = spinand_to_nand(spinand);
+	struct mtd_info *mtd = nanddev_to_mtd(nand);
+	struct nand_page_io_req adjreq = *req;
+	unsigned int nbytes = 0;
+	void *buf = NULL;
+	u16 column = 0;
+	int ret;
+
+	if (req->datalen) {
+		adjreq.datalen = nanddev_page_size(nand);
+		adjreq.dataoffs = 0;
+		adjreq.databuf.in = spinand->databuf;
+		buf = spinand->databuf;
+		nbytes = adjreq.datalen;
+	}
+
+	if (req->ooblen) {
+		adjreq.ooblen = nanddev_per_page_oobsize(nand);
+		adjreq.ooboffs = 0;
+		adjreq.oobbuf.in = spinand->oobbuf;
+		nbytes += nanddev_per_page_oobsize(nand);
+		if (!buf) {
+			buf = spinand->oobbuf;
+			column = nanddev_page_size(nand);
+		}
+	}
+
+	spinand_cache_op_adjust_colum(spinand, &adjreq, &column);
+	op.addr.val = column;
+
+	/*
+	 * Some controllers are limited in term of max RX data size. In this
+	 * case, just repeat the READ_CACHE operation after updating the
+	 * column.
+	 */
+	while (nbytes) {
+		op.data.buf.in = buf;
+		op.data.nbytes = nbytes;
+		ret = spi_mem_adjust_op_size(spinand->spimem, &op);
+		if (ret)
+			return ret;
+
+		ret = spi_mem_exec_op(spinand->spimem, &op);
+		if (ret)
+			return ret;
+
+		buf += op.data.nbytes;
+		nbytes -= op.data.nbytes;
+		op.addr.val += op.data.nbytes;
+	}
+
+	if (req->datalen)
+		memcpy(req->databuf.in, spinand->databuf + req->dataoffs,
+		       req->datalen);
+
+	if (req->ooblen) {
+		if (req->mode == MTD_OPS_AUTO_OOB)
+			mtd_ooblayout_get_databytes(mtd, req->oobbuf.in,
+						    spinand->oobbuf,
+						    req->ooboffs,
+						    req->ooblen);
+		else
+			memcpy(req->oobbuf.in, spinand->oobbuf + req->ooboffs,
+			       req->ooblen);
+	}
+
+	return 0;
+}
+
+static int spinand_write_to_cache_op(struct spinand_device *spinand,
+				     const struct nand_page_io_req *req)
+{
+	struct spi_mem_op op = *spinand->op_templates.write_cache;
+	struct nand_device *nand = spinand_to_nand(spinand);
+	struct mtd_info *mtd = nanddev_to_mtd(nand);
+	struct nand_page_io_req adjreq = *req;
+	unsigned int nbytes = 0;
+	void *buf = NULL;
+	u16 column = 0;
+	int ret;
+
+	memset(spinand->databuf, 0xff,
+	       nanddev_page_size(nand) +
+	       nanddev_per_page_oobsize(nand));
+
+	if (req->datalen) {
+		memcpy(spinand->databuf + req->dataoffs, req->databuf.out,
+		       req->datalen);
+		adjreq.dataoffs = 0;
+		adjreq.datalen = nanddev_page_size(nand);
+		adjreq.databuf.out = spinand->databuf;
+		nbytes = adjreq.datalen;
+		buf = spinand->databuf;
+	}
+
+	if (req->ooblen) {
+		if (req->mode == MTD_OPS_AUTO_OOB)
+			mtd_ooblayout_set_databytes(mtd, req->oobbuf.out,
+						    spinand->oobbuf,
+						    req->ooboffs,
+						    req->ooblen);
+		else
+			memcpy(spinand->oobbuf + req->ooboffs, req->oobbuf.out,
+			       req->ooblen);
+
+		adjreq.ooblen = nanddev_per_page_oobsize(nand);
+		adjreq.ooboffs = 0;
+		nbytes += nanddev_per_page_oobsize(nand);
+		if (!buf) {
+			buf = spinand->oobbuf;
+			column = nanddev_page_size(nand);
+		}
+	}
+
+	spinand_cache_op_adjust_colum(spinand, &adjreq, &column);
+
+	op = *spinand->op_templates.write_cache;
+	op.addr.val = column;
+
+	/*
+	 * Some controllers are limited in term of max TX data size. In this
+	 * case, split the operation into one LOAD CACHE and one or more
+	 * LOAD RANDOM CACHE.
+	 */
+	while (nbytes) {
+		op.data.buf.out = buf;
+		op.data.nbytes = nbytes;
+
+		ret = spi_mem_adjust_op_size(spinand->spimem, &op);
+		if (ret)
+			return ret;
+
+		ret = spi_mem_exec_op(spinand->spimem, &op);
+		if (ret)
+			return ret;
+
+		buf += op.data.nbytes;
+		nbytes -= op.data.nbytes;
+		op.addr.val += op.data.nbytes;
+
+		/*
+		 * We need to use the RANDOM LOAD CACHE operation if there's
+		 * more than one iteration, because the LOAD operation resets
+		 * the cache to 0xff.
+		 */
+		if (nbytes) {
+			column = op.addr.val;
+			op = *spinand->op_templates.update_cache;
+			op.addr.val = column;
+		}
+	}
+
+	return 0;
+}
+
+static int spinand_program_op(struct spinand_device *spinand,
+			      const struct nand_page_io_req *req)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int row = nanddev_pos_to_row(nand, &req->pos);
+	struct spi_mem_op op = SPINAND_PROG_EXEC_OP(row);
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_erase_op(struct spinand_device *spinand,
+			    const struct nand_pos *pos)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int row = nanddev_pos_to_row(nand, pos);
+	struct spi_mem_op op = SPINAND_BLK_ERASE_OP(row);
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int spinand_wait(struct spinand_device *spinand, u8 *s)
+{
+	unsigned long timeo =  jiffies + msecs_to_jiffies(400);
+	u8 status;
+	int ret;
+
+	do {
+		ret = spinand_read_status(spinand, &status);
+		if (ret)
+			return ret;
+
+		if (!(status & STATUS_BUSY))
+			goto out;
+	} while (time_before(jiffies, timeo));
+
+	/*
+	 * Extra read, just in case the STATUS_READY bit has changed
+	 * since our last check
+	 */
+	ret = spinand_read_status(spinand, &status);
+	if (ret)
+		return ret;
+
+out:
+	if (s)
+		*s = status;
+
+	return status & STATUS_BUSY ? -ETIMEDOUT : 0;
+}
+
+static int spinand_read_id_op(struct spinand_device *spinand, u8 *buf)
+{
+	struct spi_mem_op op = SPINAND_READID_OP(0, spinand->scratchbuf,
+						 SPINAND_MAX_ID_LEN);
+	int ret;
+
+	ret = spi_mem_exec_op(spinand->spimem, &op);
+	if (!ret)
+		memcpy(buf, spinand->scratchbuf, SPINAND_MAX_ID_LEN);
+
+	return ret;
+}
+
+static int spinand_reset_op(struct spinand_device *spinand)
+{
+	struct spi_mem_op op = SPINAND_RESET_OP;
+	int ret;
+
+	ret = spi_mem_exec_op(spinand->spimem, &op);
+	if (ret)
+		return ret;
+
+	return spinand_wait(spinand, NULL);
+}
+
+static int spinand_lock_block(struct spinand_device *spinand, u8 lock)
+{
+	return spinand_write_reg_op(spinand, REG_BLOCK_LOCK, lock);
+}
+
+static int spinand_check_ecc_status(struct spinand_device *spinand, u8 status)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+
+	if (spinand->eccinfo.get_status)
+		return spinand->eccinfo.get_status(spinand, status);
+
+	switch (status & STATUS_ECC_MASK) {
+	case STATUS_ECC_NO_BITFLIPS:
+		return 0;
+
+	case STATUS_ECC_HAS_BITFLIPS:
+		/*
+		 * We have no way to know exactly how many bitflips have been
+		 * fixed, so let's return the maximum possible value so that
+		 * wear-leveling layers move the data immediately.
+		 */
+		return nand->eccreq.strength;
+
+	case STATUS_ECC_UNCOR_ERROR:
+		return -EBADMSG;
+
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int spinand_read_page(struct spinand_device *spinand,
+			     const struct nand_page_io_req *req,
+			     bool ecc_enabled)
+{
+	u8 status;
+	int ret;
+
+	ret = spinand_load_page_op(spinand, req);
+	if (ret)
+		return ret;
+
+	ret = spinand_wait(spinand, &status);
+	if (ret < 0)
+		return ret;
+
+	ret = spinand_read_from_cache_op(spinand, req);
+	if (ret)
+		return ret;
+
+	if (!ecc_enabled)
+		return 0;
+
+	return spinand_check_ecc_status(spinand, status);
+}
+
+static int spinand_write_page(struct spinand_device *spinand,
+			      const struct nand_page_io_req *req)
+{
+	u8 status;
+	int ret;
+
+	ret = spinand_write_enable_op(spinand);
+	if (ret)
+		return ret;
+
+	ret = spinand_write_to_cache_op(spinand, req);
+	if (ret)
+		return ret;
+
+	ret = spinand_program_op(spinand, req);
+	if (ret)
+		return ret;
+
+	ret = spinand_wait(spinand, &status);
+	if (!ret && (status & STATUS_PROG_FAILED))
+		ret = -EIO;
+
+	return ret;
+}
+
+static int spinand_mtd_read(struct mtd_info *mtd, loff_t from,
+			    struct mtd_oob_ops *ops)
+{
+	struct spinand_device *spinand = mtd_to_spinand(mtd);
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	unsigned int max_bitflips = 0;
+	struct nand_io_iter iter;
+	bool enable_ecc = false;
+	bool ecc_failed = false;
+	int ret = 0;
+
+	if (ops->mode != MTD_OPS_RAW && spinand->eccinfo.ooblayout)
+		enable_ecc = true;
+
+	mutex_lock(&spinand->lock);
+
+	nanddev_io_for_each_page(nand, from, ops, &iter) {
+		ret = spinand_select_target(spinand, iter.req.pos.target);
+		if (ret)
+			break;
+
+		ret = spinand_ecc_enable(spinand, enable_ecc);
+		if (ret)
+			break;
+
+		ret = spinand_read_page(spinand, &iter.req, enable_ecc);
+		if (ret < 0 && ret != -EBADMSG)
+			break;
+
+		if (ret == -EBADMSG) {
+			ecc_failed = true;
+			mtd->ecc_stats.failed++;
+			ret = 0;
+		} else {
+			mtd->ecc_stats.corrected += ret;
+			max_bitflips = max_t(unsigned int, max_bitflips, ret);
+		}
+
+		ops->retlen += iter.req.datalen;
+		ops->oobretlen += iter.req.ooblen;
+	}
+
+	mutex_unlock(&spinand->lock);
+
+	if (ecc_failed && !ret)
+		ret = -EBADMSG;
+
+	return ret ? ret : max_bitflips;
+}
+
+static int spinand_mtd_write(struct mtd_info *mtd, loff_t to,
+			     struct mtd_oob_ops *ops)
+{
+	struct spinand_device *spinand = mtd_to_spinand(mtd);
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	struct nand_io_iter iter;
+	bool enable_ecc = false;
+	int ret = 0;
+
+	if (ops->mode != MTD_OPS_RAW && mtd->ooblayout)
+		enable_ecc = true;
+
+	mutex_lock(&spinand->lock);
+
+	nanddev_io_for_each_page(nand, to, ops, &iter) {
+		ret = spinand_select_target(spinand, iter.req.pos.target);
+		if (ret)
+			break;
+
+		ret = spinand_ecc_enable(spinand, enable_ecc);
+		if (ret)
+			break;
+
+		ret = spinand_write_page(spinand, &iter.req);
+		if (ret)
+			break;
+
+		ops->retlen += iter.req.datalen;
+		ops->oobretlen += iter.req.ooblen;
+	}
+
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static bool spinand_isbad(struct nand_device *nand, const struct nand_pos *pos)
+{
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	struct nand_page_io_req req = {
+		.pos = *pos,
+		.ooblen = 2,
+		.ooboffs = 0,
+		.oobbuf.in = spinand->oobbuf,
+		.mode = MTD_OPS_RAW,
+	};
+
+	memset(spinand->oobbuf, 0, 2);
+	spinand_select_target(spinand, pos->target);
+	spinand_read_page(spinand, &req, false);
+	if (spinand->oobbuf[0] != 0xff || spinand->oobbuf[1] != 0xff)
+		return true;
+
+	return false;
+}
+
+static int spinand_mtd_block_isbad(struct mtd_info *mtd, loff_t offs)
+{
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	struct nand_pos pos;
+	int ret;
+
+	nanddev_offs_to_pos(nand, offs, &pos);
+	mutex_lock(&spinand->lock);
+	ret = nanddev_isbad(nand, &pos);
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static int spinand_markbad(struct nand_device *nand, const struct nand_pos *pos)
+{
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	struct nand_page_io_req req = {
+		.pos = *pos,
+		.ooboffs = 0,
+		.ooblen = 2,
+		.oobbuf.out = spinand->oobbuf,
+	};
+	int ret;
+
+	/* Erase block before marking it bad. */
+	ret = spinand_select_target(spinand, pos->target);
+	if (ret)
+		return ret;
+
+	ret = spinand_write_enable_op(spinand);
+	if (ret)
+		return ret;
+
+	spinand_erase_op(spinand, pos);
+
+	memset(spinand->oobbuf, 0, 2);
+	return spinand_write_page(spinand, &req);
+}
+
+static int spinand_mtd_block_markbad(struct mtd_info *mtd, loff_t offs)
+{
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	struct nand_pos pos;
+	int ret;
+
+	nanddev_offs_to_pos(nand, offs, &pos);
+	mutex_lock(&spinand->lock);
+	ret = nanddev_markbad(nand, &pos);
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static int spinand_erase(struct nand_device *nand, const struct nand_pos *pos)
+{
+	struct spinand_device *spinand = nand_to_spinand(nand);
+	u8 status;
+	int ret;
+
+	ret = spinand_select_target(spinand, pos->target);
+	if (ret)
+		return ret;
+
+	ret = spinand_write_enable_op(spinand);
+	if (ret)
+		return ret;
+
+	ret = spinand_erase_op(spinand, pos);
+	if (ret)
+		return ret;
+
+	ret = spinand_wait(spinand, &status);
+	if (!ret && (status & STATUS_ERASE_FAILED))
+		ret = -EIO;
+
+	return ret;
+}
+
+static int spinand_mtd_erase(struct mtd_info *mtd,
+			     struct erase_info *einfo)
+{
+	struct spinand_device *spinand = mtd_to_spinand(mtd);
+	int ret;
+
+	mutex_lock(&spinand->lock);
+	ret = nanddev_mtd_erase(mtd, einfo);
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static int spinand_mtd_block_isreserved(struct mtd_info *mtd, loff_t offs)
+{
+	struct spinand_device *spinand = mtd_to_spinand(mtd);
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	struct nand_pos pos;
+	int ret;
+
+	nanddev_offs_to_pos(nand, offs, &pos);
+	mutex_lock(&spinand->lock);
+	ret = nanddev_isreserved(nand, &pos);
+	mutex_unlock(&spinand->lock);
+
+	return ret;
+}
+
+static const struct nand_ops spinand_ops = {
+	.erase = spinand_erase,
+	.markbad = spinand_markbad,
+	.isbad = spinand_isbad,
+};
+
+static int spinand_manufacturer_detect(struct spinand_device *spinand)
+{
+	return -ENOTSUPP;
+}
+
+static int spinand_manufacturer_init(struct spinand_device *spinand)
+{
+	if (spinand->manufacturer->ops->init)
+		return spinand->manufacturer->ops->init(spinand);
+
+	return 0;
+}
+
+static void spinand_manufacturer_cleanup(struct spinand_device *spinand)
+{
+	/* Release manufacturer private data */
+	if (spinand->manufacturer->ops->cleanup)
+		return spinand->manufacturer->ops->cleanup(spinand);
+}
+
+static const struct spi_mem_op *
+spinand_select_op_variant(struct spinand_device *spinand,
+			  const struct spinand_op_variants *variants)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int i;
+
+	for (i = 0; i < variants->nops; i++) {
+		struct spi_mem_op op = variants->ops[i];
+		unsigned int nbytes;
+		int ret;
+
+		nbytes = nanddev_per_page_oobsize(nand) +
+			 nanddev_page_size(nand);
+
+		while (nbytes) {
+			op.data.nbytes = nbytes;
+			ret = spi_mem_adjust_op_size(spinand->spimem, &op);
+			if (ret)
+				break;
+
+			if (!spi_mem_supports_op(spinand->spimem, &op))
+				break;
+
+			nbytes -= op.data.nbytes;
+		}
+
+		if (!nbytes)
+			return &variants->ops[i];
+	}
+
+	return NULL;
+}
+
+/**
+ * spinand_match_and_init() - Try to find a match between a device ID and an
+ *			      entry in a spinand_info table
+ * @spinand: SPI NAND object
+ * @table: SPI NAND device description table
+ * @table_size: size of the device description table
+ *
+ * Should be used by SPI NAND manufacturer drivers when they want to find a
+ * match between a device ID retrieved through the READ_ID command and an
+ * entry in the SPI NAND description table. If a match is found, the spinand
+ * object will be initialized with information provided by the matching
+ * spinand_info entry.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int spinand_match_and_init(struct spinand_device *spinand,
+			   const struct spinand_info *table,
+			   unsigned int table_size, u8 devid)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int i;
+
+	for (i = 0; i < table_size; i++) {
+		const struct spinand_info *info = &table[i];
+		const struct spi_mem_op *op;
+
+		if (devid != info->devid)
+			continue;
+
+		nand->memorg = table[i].memorg;
+		nand->eccreq = table[i].eccreq;
+		spinand->eccinfo = table[i].eccinfo;
+		spinand->flags = table[i].flags;
+		spinand->select_target = table[i].select_target;
+
+		op = spinand_select_op_variant(spinand,
+					       info->op_variants.read_cache);
+		if (!op)
+			return -ENOTSUPP;
+
+		spinand->op_templates.read_cache = op;
+
+		op = spinand_select_op_variant(spinand,
+					       info->op_variants.write_cache);
+		if (!op)
+			return -ENOTSUPP;
+
+		spinand->op_templates.write_cache = op;
+
+		op = spinand_select_op_variant(spinand,
+					       info->op_variants.update_cache);
+		spinand->op_templates.update_cache = op;
+
+		return 0;
+	}
+
+	return -ENOTSUPP;
+}
+
+static int spinand_detect(struct spinand_device *spinand)
+{
+	struct device *dev = &spinand->spimem->spi->dev;
+	struct nand_device *nand = spinand_to_nand(spinand);
+	int ret;
+
+	ret = spinand_reset_op(spinand);
+	if (ret)
+		return ret;
+
+	ret = spinand_read_id_op(spinand, spinand->id.data);
+	if (ret)
+		return ret;
+
+	spinand->id.len = SPINAND_MAX_ID_LEN;
+
+	ret = spinand_manufacturer_detect(spinand);
+	if (ret) {
+		dev_err(dev, "unknown raw ID %*phN\n", SPINAND_MAX_ID_LEN,
+			spinand->id.data);
+		return ret;
+	}
+
+	if (nand->memorg.ntargets > 1 && !spinand->select_target) {
+		dev_err(dev,
+			"SPI NANDs with more than one die must implement ->select_target()\n");
+		return -EINVAL;
+	}
+
+	dev_info(&spinand->spimem->spi->dev,
+		 "%s SPI NAND was found.\n", spinand->manufacturer->name);
+	dev_info(&spinand->spimem->spi->dev,
+		 "%llu MiB, block size: %zu KiB, page size: %zu, OOB size: %u\n",
+		 nanddev_size(nand) >> 20, nanddev_eraseblock_size(nand) >> 10,
+		 nanddev_page_size(nand), nanddev_per_page_oobsize(nand));
+
+	return 0;
+}
+
+static int spinand_noecc_ooblayout_ecc(struct mtd_info *mtd, int section,
+				       struct mtd_oob_region *region)
+{
+	return -ERANGE;
+}
+
+static int spinand_noecc_ooblayout_free(struct mtd_info *mtd, int section,
+					struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	/* Reserve 2 bytes for the BBM. */
+	region->offset = 2;
+	region->length = 62;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops spinand_noecc_ooblayout = {
+	.ecc = spinand_noecc_ooblayout_ecc,
+	.free = spinand_noecc_ooblayout_free,
+};
+
+static int spinand_init(struct spinand_device *spinand)
+{
+	struct device *dev = &spinand->spimem->spi->dev;
+	struct mtd_info *mtd = spinand_to_mtd(spinand);
+	struct nand_device *nand = mtd_to_nanddev(mtd);
+	int ret, i;
+
+	/*
+	 * We need a scratch buffer because the spi_mem interface requires that
+	 * buf passed in spi_mem_op->data.buf be DMA-able.
+	 */
+	spinand->scratchbuf = kzalloc(SPINAND_MAX_ID_LEN, GFP_KERNEL);
+	if (!spinand->scratchbuf)
+		return -ENOMEM;
+
+	ret = spinand_detect(spinand);
+	if (ret)
+		goto err_free_bufs;
+
+	/*
+	 * Use kzalloc() instead of devm_kzalloc() here, because some drivers
+	 * may use this buffer for DMA access.
+	 * Memory allocated by devm_ does not guarantee DMA-safe alignment.
+	 */
+	spinand->databuf = kzalloc(nanddev_page_size(nand) +
+			       nanddev_per_page_oobsize(nand),
+			       GFP_KERNEL);
+	if (!spinand->databuf) {
+		ret = -ENOMEM;
+		goto err_free_bufs;
+	}
+
+	spinand->oobbuf = spinand->databuf + nanddev_page_size(nand);
+
+	ret = spinand_init_cfg_cache(spinand);
+	if (ret)
+		goto err_free_bufs;
+
+	ret = spinand_init_quad_enable(spinand);
+	if (ret)
+		goto err_free_bufs;
+
+	ret = spinand_upd_cfg(spinand, CFG_OTP_ENABLE, 0);
+	if (ret)
+		goto err_free_bufs;
+
+	ret = spinand_manufacturer_init(spinand);
+	if (ret) {
+		dev_err(dev,
+			"Failed to initialize the SPI NAND chip (err = %d)\n",
+			ret);
+		goto err_free_bufs;
+	}
+
+	/* After power up, all blocks are locked, so unlock them here. */
+	for (i = 0; i < nand->memorg.ntargets; i++) {
+		ret = spinand_select_target(spinand, i);
+		if (ret)
+			goto err_free_bufs;
+
+		ret = spinand_lock_block(spinand, BL_ALL_UNLOCKED);
+		if (ret)
+			goto err_free_bufs;
+	}
+
+	ret = nanddev_init(nand, &spinand_ops, THIS_MODULE);
+	if (ret)
+		goto err_manuf_cleanup;
+
+	/*
+	 * Right now, we don't support ECC, so let the whole oob
+	 * area is available for user.
+	 */
+	mtd->_read_oob = spinand_mtd_read;
+	mtd->_write_oob = spinand_mtd_write;
+	mtd->_block_isbad = spinand_mtd_block_isbad;
+	mtd->_block_markbad = spinand_mtd_block_markbad;
+	mtd->_block_isreserved = spinand_mtd_block_isreserved;
+	mtd->_erase = spinand_mtd_erase;
+
+	if (spinand->eccinfo.ooblayout)
+		mtd_set_ooblayout(mtd, spinand->eccinfo.ooblayout);
+	else
+		mtd_set_ooblayout(mtd, &spinand_noecc_ooblayout);
+
+	ret = mtd_ooblayout_count_freebytes(mtd);
+	if (ret < 0)
+		goto err_cleanup_nanddev;
+
+	mtd->oobavail = ret;
+
+	return 0;
+
+err_cleanup_nanddev:
+	nanddev_cleanup(nand);
+
+err_manuf_cleanup:
+	spinand_manufacturer_cleanup(spinand);
+
+err_free_bufs:
+	kfree(spinand->databuf);
+	kfree(spinand->scratchbuf);
+	return ret;
+}
+
+static void spinand_cleanup(struct spinand_device *spinand)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+
+	nanddev_cleanup(nand);
+	spinand_manufacturer_cleanup(spinand);
+	kfree(spinand->databuf);
+	kfree(spinand->scratchbuf);
+}
+
+static int spinand_probe(struct spi_mem *mem)
+{
+	struct spinand_device *spinand;
+	struct mtd_info *mtd;
+	int ret;
+
+	spinand = devm_kzalloc(&mem->spi->dev, sizeof(*spinand),
+			       GFP_KERNEL);
+	if (!spinand)
+		return -ENOMEM;
+
+	spinand->spimem = mem;
+	spi_mem_set_drvdata(mem, spinand);
+	spinand_set_of_node(spinand, mem->spi->dev.of_node);
+	mutex_init(&spinand->lock);
+	mtd = spinand_to_mtd(spinand);
+	mtd->dev.parent = &mem->spi->dev;
+
+	ret = spinand_init(spinand);
+	if (ret)
+		return ret;
+
+	ret = mtd_device_register(mtd, NULL, 0);
+	if (ret)
+		goto err_spinand_cleanup;
+
+	return 0;
+
+err_spinand_cleanup:
+	spinand_cleanup(spinand);
+
+	return ret;
+}
+
+static int spinand_remove(struct spi_mem *mem)
+{
+	struct spinand_device *spinand;
+	struct mtd_info *mtd;
+	int ret;
+
+	spinand = spi_mem_get_drvdata(mem);
+	mtd = spinand_to_mtd(spinand);
+
+	ret = mtd_device_unregister(mtd);
+	if (ret)
+		return ret;
+
+	spinand_cleanup(spinand);
+
+	return 0;
+}
+
+static const struct spi_device_id spinand_ids[] = {
+	{ .name = "spi-nand" },
+	{ /* sentinel */ },
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id spinand_of_ids[] = {
+	{ .compatible = "spi-nand" },
+	{ /* sentinel */ },
+};
+#endif
+
+static struct spi_mem_driver spinand_drv = {
+	.spidrv = {
+		.id_table = spinand_ids,
+		.driver = {
+			.name = "spi-nand",
+			.of_match_table = of_match_ptr(spinand_of_ids),
+		},
+	},
+	.probe = spinand_probe,
+	.remove = spinand_remove,
+};
+module_spi_mem_driver(spinand_drv);
+
+MODULE_DESCRIPTION("SPI NAND framework");
+MODULE_AUTHOR("Peter Pan<peterpandong@micron.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
new file mode 100644
index 000000000000..d3efe62dc9de
--- /dev/null
+++ b/include/linux/mtd/spinand.h
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016-2017 Micron Technology, Inc.
+ *
+ *  Authors:
+ *	Peter Pan <peterpandong@micron.com>
+ */
+#ifndef __LINUX_MTD_SPINAND_H
+#define __LINUX_MTD_SPINAND_H
+
+#include <linux/mutex.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/nand.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+/**
+ * Standard SPI NAND flash operations
+ */
+
+#define SPINAND_RESET_OP						\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xff, 1),				\
+		   SPI_MEM_OP_NO_ADDR,					\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_WR_EN_DIS_OP(enable)					\
+	SPI_MEM_OP(SPI_MEM_OP_CMD((enable) ? 0x06 : 0x04, 1),		\
+		   SPI_MEM_OP_NO_ADDR,					\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_READID_OP(ndummy, buf, len)				\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x9f, 1),				\
+		   SPI_MEM_OP_NO_ADDR,					\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 1))
+
+#define SPINAND_SET_FEATURE_OP(reg, valptr)				\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x1f, 1),				\
+		   SPI_MEM_OP_ADDR(1, reg, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_OUT(1, valptr, 1))
+
+#define SPINAND_GET_FEATURE_OP(reg, valptr)				\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x0f, 1),				\
+		   SPI_MEM_OP_ADDR(1, reg, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_IN(1, valptr, 1))
+
+#define SPINAND_BLK_ERASE_OP(addr)					\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xd8, 1),				\
+		   SPI_MEM_OP_ADDR(3, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_PAGE_READ_OP(addr)					\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x13, 1),				\
+		   SPI_MEM_OP_ADDR(3, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_PAGE_READ_FROM_CACHE_OP(fast, addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(fast ? 0x0b : 0x03, 1),		\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 1))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP(addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 2))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 1),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 4))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 2),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 2),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 2))
+
+#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len)	\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1),				\
+		   SPI_MEM_OP_ADDR(2, addr, 4),				\
+		   SPI_MEM_OP_DUMMY(ndummy, 4),				\
+		   SPI_MEM_OP_DATA_IN(len, buf, 4))
+
+#define SPINAND_PROG_EXEC_OP(addr)					\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1),				\
+		   SPI_MEM_OP_ADDR(3, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_NO_DATA)
+
+#define SPINAND_PROG_LOAD(reset, addr, buf, len)			\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x02 : 0x84, 1),		\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_OUT(len, buf, 1))
+
+#define SPINAND_PROG_LOAD_X4(reset, addr, buf, len)			\
+	SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x32 : 0x34, 1),		\
+		   SPI_MEM_OP_ADDR(2, addr, 1),				\
+		   SPI_MEM_OP_NO_DUMMY,					\
+		   SPI_MEM_OP_DATA_OUT(len, buf, 4))
+
+/**
+ * Standard SPI NAND flash commands
+ */
+#define SPINAND_CMD_PROG_LOAD_X4		0x32
+#define SPINAND_CMD_PROG_LOAD_RDM_DATA_X4	0x34
+
+/* feature register */
+#define REG_BLOCK_LOCK		0xa0
+#define BL_ALL_UNLOCKED		0x00
+
+/* configuration register */
+#define REG_CFG			0xb0
+#define CFG_OTP_ENABLE		BIT(6)
+#define CFG_ECC_ENABLE		BIT(4)
+#define CFG_QUAD_ENABLE		BIT(0)
+
+/* status register */
+#define REG_STATUS		0xc0
+#define STATUS_BUSY		BIT(0)
+#define STATUS_ERASE_FAILED	BIT(2)
+#define STATUS_PROG_FAILED	BIT(3)
+#define STATUS_ECC_MASK		GENMASK(5, 4)
+#define STATUS_ECC_NO_BITFLIPS	(0 << 4)
+#define STATUS_ECC_HAS_BITFLIPS	(1 << 4)
+#define STATUS_ECC_UNCOR_ERROR	(2 << 4)
+
+struct spinand_op;
+struct spinand_device;
+
+#define SPINAND_MAX_ID_LEN	4
+
+/**
+ * struct spinand_id - SPI NAND id structure
+ * @data: buffer containing the id bytes. Currently 4 bytes large, but can
+ *	  be extended if required
+ * @len: ID length
+ *
+ * struct_spinand_id->data contains all bytes returned after a READ_ID command,
+ * including dummy bytes if the chip does not emit ID bytes right after the
+ * READ_ID command. The responsibility to extract real ID bytes is left to
+ * struct_manufacurer_ops->detect().
+ */
+struct spinand_id {
+	u8 data[SPINAND_MAX_ID_LEN];
+	int len;
+};
+
+/**
+ * struct manufacurer_ops - SPI NAND manufacturer specific operations
+ * @detect: detect a SPI NAND device. Every time a SPI NAND device is probed
+ *	    the core calls the struct_manufacurer_ops->detect() hook of each
+ *	    registered manufacturer until one of them return 1. Note that
+ *	    the first thing to check in this hook is that the manufacturer ID
+ *	    in struct_spinand_device->id matches the manufacturer whose
+ *	    ->detect() hook has been called. Should return 1 if there's a
+ *	    match, 0 if the manufacturer ID does not match and a negative
+ *	    error code otherwise. When true is returned, the core assumes
+ *	    that properties of the NAND chip (spinand->base.memorg and
+ *	    spinand->base.eccreq) have been filled
+ * @init: initialize a SPI NAND device
+ * @cleanup: cleanup a SPI NAND device
+ *
+ * Each SPI NAND manufacturer driver should implement this interface so that
+ * NAND chips coming from this vendor can be detected and initialized properly.
+ */
+struct spinand_manufacturer_ops {
+	int (*detect)(struct spinand_device *spinand);
+	int (*init)(struct spinand_device *spinand);
+	void (*cleanup)(struct spinand_device *spinand);
+};
+
+/**
+ * struct spinand_manufacturer - SPI NAND manufacturer instance
+ * @id: manufacturer ID
+ * @name: manufacturer name
+ * @ops: manufacturer operations
+ */
+struct spinand_manufacturer {
+	u8 id;
+	char *name;
+	const struct spinand_manufacturer_ops *ops;
+};
+
+/**
+ * struct spinand_op_variants - SPI NAND operation variants
+ * @ops: the list of variants for a given operation
+ * @nops: the number of variants
+ *
+ * Some operations like read-from-cache/write-to-cache have several variants
+ * depending on the number of IO lines you use to transfer data or address
+ * cycles. This structure is a way to describe the different variants supported
+ * by a chip and let the core pick the best one based on the SPI mem controller
+ * capabilities.
+ */
+struct spinand_op_variants {
+	const struct spi_mem_op *ops;
+	unsigned int nops;
+};
+
+#define SPINAND_OP_VARIANTS(name, ...)					\
+	const struct spinand_op_variants name = {			\
+		.ops = (struct spi_mem_op[]) { __VA_ARGS__ },		\
+		.nops = sizeof((struct spi_mem_op[]){ __VA_ARGS__ }) /	\
+			sizeof(struct spi_mem_op),			\
+	}
+
+/**
+ * spinand_ecc_info - description of the on-die ECC implemented by a SPI NAND
+ *		      chip
+ * @get_status: get the ECC status. Should return a positive number encoding
+ *		the number of corrected bitflips if correction was possible or
+ *		-EBADMSG if there are uncorrectable errors. I can also return
+ *		other negative error codes if the error is not caused by
+ *		uncorrectable bitflips
+ * @ooblayout: the OOB layout used by the on-die ECC implementation
+ */
+struct spinand_ecc_info {
+	int (*get_status)(struct spinand_device *spinand, u8 status);
+	const struct mtd_ooblayout_ops *ooblayout;
+};
+
+#define SPINAND_HAS_QE_BIT		BIT(0)
+
+/**
+ * struct spinand_info - Structure used to describe SPI NAND chips
+ * @model: model name
+ * @devid: device ID
+ * @flags: OR-ing of the SPINAND_XXX flags
+ * @memorg: memory organization
+ * @eccreq: ECC requirements
+ * @eccinfo: on-die ECC info
+ * @op_variants: operations variants
+ * @op_variants.read_cache: variants of the read-cache operation
+ * @op_variants.write_cache: variants of the write-cache operation
+ * @op_variants.update_cache: variants of the update-cache operation
+ * @select_target: function used to select a target/die. Required only for
+ *		   multi-die chips
+ *
+ * Each SPI NAND manufacturer driver should have a spinand_info table
+ * describing all the chips supported by the driver.
+ */
+struct spinand_info {
+	const char *model;
+	u8 devid;
+	u32 flags;
+	struct nand_memory_organization memorg;
+	struct nand_ecc_req eccreq;
+	struct spinand_ecc_info eccinfo;
+	struct {
+		const struct spinand_op_variants *read_cache;
+		const struct spinand_op_variants *write_cache;
+		const struct spinand_op_variants *update_cache;
+	} op_variants;
+	int (*select_target)(struct spinand_device *spinand,
+			     unsigned int target);
+};
+
+#define SPINAND_INFO_OP_VARIANTS(__read, __write, __update)		\
+	{								\
+		.read_cache = __read,					\
+		.write_cache = __write,					\
+		.update_cache = __update,				\
+	}
+
+#define SPINAND_ECCINFO(__ooblayout, __get_status)			\
+	.eccinfo = {							\
+		.ooblayout = __ooblayout,				\
+		.get_status = __get_status,				\
+	}
+
+#define SPINAND_SELECT_TARGET(__func)					\
+	.select_target = __func,
+
+#define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants,	\
+		     __flags, ...)					\
+	{								\
+		.model = __model,					\
+		.devid = __id,						\
+		.memorg = __memorg,					\
+		.eccreq = __eccreq,					\
+		.op_variants = __op_variants,				\
+		.flags = __flags,					\
+		__VA_ARGS__						\
+	}
+
+/**
+ * struct spinand_device - SPI NAND device instance
+ * @base: NAND device instance
+ * @spimem: pointer to the SPI mem object
+ * @lock: lock used to serialize accesses to the NAND
+ * @id: NAND ID as returned by READ_ID
+ * @flags: NAND flags
+ * @op_templates: various SPI mem op templates
+ * @op_templates.read_cache: read cache op template
+ * @op_templates.write_cache: write cache op template
+ * @op_templates.update_cache: update cache op template
+ * @select_target: select a specific target/die. Usually called before sending
+ *		   a command addressing a page or an eraseblock embedded in
+ *		   this die. Only required if your chip exposes several dies
+ * @cur_target: currently selected target/die
+ * @eccinfo: on-die ECC information
+ * @cfg_cache: config register cache. One entry per die
+ * @databuf: bounce buffer for data
+ * @oobbuf: bounce buffer for OOB data
+ * @scratchbuf: buffer used for everything but page accesses. This is needed
+ *		because the spi-mem interface explicitly requests that buffers
+ *		passed in spi_mem_op be DMA-able, so we can't based the bufs on
+ *		the stack
+ * @manufacturer: SPI NAND manufacturer information
+ * @priv: manufacturer private data
+ */
+struct spinand_device {
+	struct nand_device base;
+	struct spi_mem *spimem;
+	struct mutex lock;
+	struct spinand_id id;
+	u32 flags;
+
+	struct {
+		const struct spi_mem_op *read_cache;
+		const struct spi_mem_op *write_cache;
+		const struct spi_mem_op *update_cache;
+	} op_templates;
+
+	int (*select_target)(struct spinand_device *spinand,
+			     unsigned int target);
+	unsigned int cur_target;
+
+	struct spinand_ecc_info eccinfo;
+
+	u8 *cfg_cache;
+	u8 *databuf;
+	u8 *oobbuf;
+	u8 *scratchbuf;
+	const struct spinand_manufacturer *manufacturer;
+	void *priv;
+};
+
+/**
+ * mtd_to_spinand() - Get the SPI NAND device attached to an MTD instance
+ * @mtd: MTD instance
+ *
+ * Return: the SPI NAND device attached to @mtd.
+ */
+static inline struct spinand_device *mtd_to_spinand(struct mtd_info *mtd)
+{
+	return container_of(mtd_to_nanddev(mtd), struct spinand_device, base);
+}
+
+/**
+ * spinand_to_mtd() - Get the MTD device embedded in a SPI NAND device
+ * @spinand: SPI NAND device
+ *
+ * Return: the MTD device embedded in @spinand.
+ */
+static inline struct mtd_info *spinand_to_mtd(struct spinand_device *spinand)
+{
+	return nanddev_to_mtd(&spinand->base);
+}
+
+/**
+ * nand_to_spinand() - Get the SPI NAND device embedding an NAND object
+ * @nand: NAND object
+ *
+ * Return: the SPI NAND device embedding @nand.
+ */
+static inline struct spinand_device *nand_to_spinand(struct nand_device *nand)
+{
+	return container_of(nand, struct spinand_device, base);
+}
+
+/**
+ * spinand_to_nand() - Get the NAND device embedded in a SPI NAND object
+ * @spinand: SPI NAND device
+ *
+ * Return: the NAND device embedded in @spinand.
+ */
+static inline struct nand_device *
+spinand_to_nand(struct spinand_device *spinand)
+{
+	return &spinand->base;
+}
+
+/**
+ * spinand_set_of_node - Attach a DT node to a SPI NAND device
+ * @spinand: SPI NAND device
+ * @np: DT node
+ *
+ * Attach a DT node to a SPI NAND device.
+ */
+static inline void spinand_set_of_node(struct spinand_device *spinand,
+				       struct device_node *np)
+{
+	nanddev_set_of_node(&spinand->base, np);
+}
+
+int spinand_match_and_init(struct spinand_device *dev,
+			   const struct spinand_info *table,
+			   unsigned int table_size, u8 devid);
+
+int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val);
+int spinand_select_target(struct spinand_device *spinand, unsigned int target);
+
+#endif /* __LINUX_MTD_SPINAND_H */
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index bb4bd15ae1f6..4fa34a227a0f 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -3,7 +3,9 @@
  * Copyright (C) 2018 Exceet Electronics GmbH
  * Copyright (C) 2018 Bootlin
  *
- * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ * Author:
+ *	Peter Pan <peterpandong@micron.com>
+ *	Boris Brezillon <boris.brezillon@bootlin.com>
  */
 
 #ifndef __LINUX_SPI_MEM_H
-- 
cgit v1.2.3


From a508e8875e135d7a1df26d8131b5443cb07005ff Mon Sep 17 00:00:00 2001
From: Peter Pan <peterpandong@micron.com>
Date: Fri, 22 Jun 2018 14:28:25 +0200
Subject: mtd: spinand: Add initial support for Micron MT29F2G01ABAGD

Add a basic driver for Micron SPI NANDs. Only one device is supported
right now, but the driver will be extended to support more devices
afterwards.

Signed-off-by: Peter Pan <peterpandong@micron.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/spi/Makefile |   2 +-
 drivers/mtd/nand/spi/core.c   |  17 ++++++
 drivers/mtd/nand/spi/micron.c | 133 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h   |   3 +
 4 files changed, 154 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/spi/micron.c

(limited to 'include')

diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
index 2c473b765027..a1df25398f20 100644
--- a/drivers/mtd/nand/spi/Makefile
+++ b/drivers/mtd/nand/spi/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-spinand-objs := core.o
+spinand-objs := core.o micron.o
 obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 4efbeb0d85b4..9cc502d7e745 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -763,8 +763,25 @@ static const struct nand_ops spinand_ops = {
 	.isbad = spinand_isbad,
 };
 
+static const struct spinand_manufacturer *spinand_manufacturers[] = {
+	&micron_spinand_manufacturer,
+};
+
 static int spinand_manufacturer_detect(struct spinand_device *spinand)
 {
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(spinand_manufacturers); i++) {
+		ret = spinand_manufacturers[i]->ops->detect(spinand);
+		if (ret > 0) {
+			spinand->manufacturer = spinand_manufacturers[i];
+			return 0;
+		} else if (ret < 0) {
+			return ret;
+		}
+	}
+
 	return -ENOTSUPP;
 }
 
diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c
new file mode 100644
index 000000000000..9c4381d6847b
--- /dev/null
+++ b/drivers/mtd/nand/spi/micron.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016-2017 Micron Technology, Inc.
+ *
+ * Authors:
+ *	Peter Pan <peterpandong@micron.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/spinand.h>
+
+#define SPINAND_MFR_MICRON		0x2c
+
+#define MICRON_STATUS_ECC_MASK		GENMASK(7, 4)
+#define MICRON_STATUS_ECC_NO_BITFLIPS	(0 << 4)
+#define MICRON_STATUS_ECC_1TO3_BITFLIPS	(1 << 4)
+#define MICRON_STATUS_ECC_4TO6_BITFLIPS	(3 << 4)
+#define MICRON_STATUS_ECC_7TO8_BITFLIPS	(5 << 4)
+
+static SPINAND_OP_VARIANTS(read_cache_variants,
+		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(write_cache_variants,
+		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+		SPINAND_PROG_LOAD(true, 0, NULL, 0));
+
+static SPINAND_OP_VARIANTS(update_cache_variants,
+		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
+		SPINAND_PROG_LOAD(false, 0, NULL, 0));
+
+static int mt29f2g01abagd_ooblayout_ecc(struct mtd_info *mtd, int section,
+					struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	region->offset = 64;
+	region->length = 64;
+
+	return 0;
+}
+
+static int mt29f2g01abagd_ooblayout_free(struct mtd_info *mtd, int section,
+					 struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	/* Reserve 2 bytes for the BBM. */
+	region->offset = 2;
+	region->length = 62;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops mt29f2g01abagd_ooblayout = {
+	.ecc = mt29f2g01abagd_ooblayout_ecc,
+	.free = mt29f2g01abagd_ooblayout_free,
+};
+
+static int mt29f2g01abagd_ecc_get_status(struct spinand_device *spinand,
+					 u8 status)
+{
+	switch (status & MICRON_STATUS_ECC_MASK) {
+	case STATUS_ECC_NO_BITFLIPS:
+		return 0;
+
+	case STATUS_ECC_UNCOR_ERROR:
+		return -EBADMSG;
+
+	case MICRON_STATUS_ECC_1TO3_BITFLIPS:
+		return 3;
+
+	case MICRON_STATUS_ECC_4TO6_BITFLIPS:
+		return 6;
+
+	case MICRON_STATUS_ECC_7TO8_BITFLIPS:
+		return 8;
+
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static const struct spinand_info micron_spinand_table[] = {
+	SPINAND_INFO("MT29F2G01ABAGD", 0x24,
+		     NAND_MEMORG(1, 2048, 128, 64, 2048, 2, 1, 1),
+		     NAND_ECCREQ(8, 512),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     0,
+		     SPINAND_ECCINFO(&mt29f2g01abagd_ooblayout,
+				     mt29f2g01abagd_ecc_get_status)),
+};
+
+static int micron_spinand_detect(struct spinand_device *spinand)
+{
+	u8 *id = spinand->id.data;
+	int ret;
+
+	/*
+	 * Micron SPI NAND read ID need a dummy byte,
+	 * so the first byte in raw_id is dummy.
+	 */
+	if (id[1] != SPINAND_MFR_MICRON)
+		return 0;
+
+	ret = spinand_match_and_init(spinand, micron_spinand_table,
+				     ARRAY_SIZE(micron_spinand_table), id[2]);
+	if (ret)
+		return ret;
+
+	return 1;
+}
+
+static const struct spinand_manufacturer_ops micron_spinand_manuf_ops = {
+	.detect = micron_spinand_detect,
+};
+
+const struct spinand_manufacturer micron_spinand_manufacturer = {
+	.id = SPINAND_MFR_MICRON,
+	.name = "Micron",
+	.ops = &micron_spinand_manuf_ops,
+};
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index d3efe62dc9de..717b272940f1 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -193,6 +193,9 @@ struct spinand_manufacturer {
 	const struct spinand_manufacturer_ops *ops;
 };
 
+/* SPI NAND manufacturers */
+extern const struct spinand_manufacturer micron_spinand_manufacturer;
+
 /**
  * struct spinand_op_variants - SPI NAND operation variants
  * @ops: the list of variants for a given operation
-- 
cgit v1.2.3


From 1075492bb9e26312bc8ddeec1a93e2de5f9c76b4 Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@exceet.de>
Date: Fri, 22 Jun 2018 14:28:26 +0200
Subject: mtd: spinand: Add initial support for Winbond W25M02GV

Add support for the W25M02GV chip.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/spi/Makefile  |   2 +-
 drivers/mtd/nand/spi/core.c    |   1 +
 drivers/mtd/nand/spi/winbond.c | 141 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h    |   1 +
 4 files changed, 144 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/spi/winbond.c

(limited to 'include')

diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
index a1df25398f20..100008d202ed 100644
--- a/drivers/mtd/nand/spi/Makefile
+++ b/drivers/mtd/nand/spi/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-spinand-objs := core.o micron.o
+spinand-objs := core.o micron.o winbond.o
 obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 9cc502d7e745..4803a6bfe8ec 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -765,6 +765,7 @@ static const struct nand_ops spinand_ops = {
 
 static const struct spinand_manufacturer *spinand_manufacturers[] = {
 	&micron_spinand_manufacturer,
+	&winbond_spinand_manufacturer,
 };
 
 static int spinand_manufacturer_detect(struct spinand_device *spinand)
diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c
new file mode 100644
index 000000000000..67baa1b32c00
--- /dev/null
+++ b/drivers/mtd/nand/spi/winbond.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017 exceet electronics GmbH
+ *
+ * Authors:
+ *	Frieder Schrempf <frieder.schrempf@exceet.de>
+ *	Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/spinand.h>
+
+#define SPINAND_MFR_WINBOND		0xEF
+
+#define WINBOND_CFG_BUF_READ		BIT(3)
+
+static SPINAND_OP_VARIANTS(read_cache_variants,
+		SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(write_cache_variants,
+		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+		SPINAND_PROG_LOAD(true, 0, NULL, 0));
+
+static SPINAND_OP_VARIANTS(update_cache_variants,
+		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
+		SPINAND_PROG_LOAD(false, 0, NULL, 0));
+
+static int w25m02gv_ooblayout_ecc(struct mtd_info *mtd, int section,
+				  struct mtd_oob_region *region)
+{
+	if (section > 3)
+		return -ERANGE;
+
+	region->offset = (16 * section) + 8;
+	region->length = 8;
+
+	return 0;
+}
+
+static int w25m02gv_ooblayout_free(struct mtd_info *mtd, int section,
+				   struct mtd_oob_region *region)
+{
+	if (section > 3)
+		return -ERANGE;
+
+	region->offset = (16 * section) + 2;
+	region->length = 6;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops w25m02gv_ooblayout = {
+	.ecc = w25m02gv_ooblayout_ecc,
+	.free = w25m02gv_ooblayout_free,
+};
+
+static int w25m02gv_select_target(struct spinand_device *spinand,
+				  unsigned int target)
+{
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(0xc2, 1),
+					  SPI_MEM_OP_NO_ADDR,
+					  SPI_MEM_OP_NO_DUMMY,
+					  SPI_MEM_OP_DATA_OUT(1,
+							spinand->scratchbuf,
+							1));
+
+	*spinand->scratchbuf = target;
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static const struct spinand_info winbond_spinand_table[] = {
+	SPINAND_INFO("W25M02GV", 0xAB,
+		     NAND_MEMORG(1, 2048, 64, 64, 1024, 1, 1, 2),
+		     NAND_ECCREQ(1, 512),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     0,
+		     SPINAND_ECCINFO(&w25m02gv_ooblayout, NULL),
+		     SPINAND_SELECT_TARGET(w25m02gv_select_target)),
+};
+
+/**
+ * winbond_spinand_detect - initialize device related part in spinand_device
+ * struct if it is a Winbond device.
+ * @spinand: SPI NAND device structure
+ */
+static int winbond_spinand_detect(struct spinand_device *spinand)
+{
+	u8 *id = spinand->id.data;
+	int ret;
+
+	/*
+	 * Winbond SPI NAND read ID need a dummy byte,
+	 * so the first byte in raw_id is dummy.
+	 */
+	if (id[1] != SPINAND_MFR_WINBOND)
+		return 0;
+
+	ret = spinand_match_and_init(spinand, winbond_spinand_table,
+				     ARRAY_SIZE(winbond_spinand_table), id[2]);
+	if (ret)
+		return ret;
+
+	return 1;
+}
+
+static int winbond_spinand_init(struct spinand_device *spinand)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	unsigned int i;
+
+	/*
+	 * Make sure all dies are in buffer read mode and not continuous read
+	 * mode.
+	 */
+	for (i = 0; i < nand->memorg.ntargets; i++) {
+		spinand_select_target(spinand, i);
+		spinand_upd_cfg(spinand, WINBOND_CFG_BUF_READ,
+				WINBOND_CFG_BUF_READ);
+	}
+
+	return 0;
+}
+
+static const struct spinand_manufacturer_ops winbond_spinand_manuf_ops = {
+	.detect = winbond_spinand_detect,
+	.init = winbond_spinand_init,
+};
+
+const struct spinand_manufacturer winbond_spinand_manufacturer = {
+	.id = SPINAND_MFR_WINBOND,
+	.name = "Winbond",
+	.ops = &winbond_spinand_manuf_ops,
+};
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 717b272940f1..f0f16b9029e7 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -195,6 +195,7 @@ struct spinand_manufacturer {
 
 /* SPI NAND manufacturers */
 extern const struct spinand_manufacturer micron_spinand_manufacturer;
+extern const struct spinand_manufacturer winbond_spinand_manufacturer;
 
 /**
  * struct spinand_op_variants - SPI NAND operation variants
-- 
cgit v1.2.3


From b02308af05e62c7d995f4fc75b0bc2ae3c3026f7 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 22 Jun 2018 14:28:27 +0200
Subject: mtd: spinand: Add initial support for the MX35LF1GE4AB chip

Add minimal support for the MX35LF1GE4AB SPI NAND chip.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/spi/Makefile   |   2 +-
 drivers/mtd/nand/spi/core.c     |   1 +
 drivers/mtd/nand/spi/macronix.c | 136 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spinand.h     |   1 +
 4 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/nand/spi/macronix.c

(limited to 'include')

diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile
index 100008d202ed..b74e074b363a 100644
--- a/drivers/mtd/nand/spi/Makefile
+++ b/drivers/mtd/nand/spi/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-spinand-objs := core.o micron.o winbond.o
+spinand-objs := core.o macronix.o micron.o winbond.o
 obj-$(CONFIG_MTD_SPI_NAND) += spinand.o
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 4803a6bfe8ec..30f83649c481 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -764,6 +764,7 @@ static const struct nand_ops spinand_ops = {
 };
 
 static const struct spinand_manufacturer *spinand_manufacturers[] = {
+	&macronix_spinand_manufacturer,
 	&micron_spinand_manufacturer,
 	&winbond_spinand_manufacturer,
 };
diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c
new file mode 100644
index 000000000000..8ba3489d332a
--- /dev/null
+++ b/drivers/mtd/nand/spi/macronix.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Macronix
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mtd/spinand.h>
+
+#define SPINAND_MFR_MACRONIX		0xC2
+
+static SPINAND_OP_VARIANTS(read_cache_variants,
+		SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0),
+		SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0));
+
+static SPINAND_OP_VARIANTS(write_cache_variants,
+		SPINAND_PROG_LOAD_X4(true, 0, NULL, 0),
+		SPINAND_PROG_LOAD(true, 0, NULL, 0));
+
+static SPINAND_OP_VARIANTS(update_cache_variants,
+		SPINAND_PROG_LOAD_X4(false, 0, NULL, 0),
+		SPINAND_PROG_LOAD(false, 0, NULL, 0));
+
+static int mx35lfxge4ab_ooblayout_ecc(struct mtd_info *mtd, int section,
+				      struct mtd_oob_region *region)
+{
+	return -ERANGE;
+}
+
+static int mx35lfxge4ab_ooblayout_free(struct mtd_info *mtd, int section,
+				       struct mtd_oob_region *region)
+{
+	if (section)
+		return -ERANGE;
+
+	region->offset = 2;
+	region->length = mtd->oobsize - 2;
+
+	return 0;
+}
+
+static const struct mtd_ooblayout_ops mx35lfxge4ab_ooblayout = {
+	.ecc = mx35lfxge4ab_ooblayout_ecc,
+	.free = mx35lfxge4ab_ooblayout_free,
+};
+
+static int mx35lf1ge4ab_get_eccsr(struct spinand_device *spinand, u8 *eccsr)
+{
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(0x7c, 1),
+					  SPI_MEM_OP_NO_ADDR,
+					  SPI_MEM_OP_DUMMY(1, 1),
+					  SPI_MEM_OP_DATA_IN(1, eccsr, 1));
+
+	return spi_mem_exec_op(spinand->spimem, &op);
+}
+
+static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand,
+				       u8 status)
+{
+	struct nand_device *nand = spinand_to_nand(spinand);
+	u8 eccsr;
+
+	switch (status & STATUS_ECC_MASK) {
+	case STATUS_ECC_NO_BITFLIPS:
+		return 0;
+
+	case STATUS_ECC_UNCOR_ERROR:
+		return -EBADMSG;
+
+	case STATUS_ECC_HAS_BITFLIPS:
+		/*
+		 * Let's try to retrieve the real maximum number of bitflips
+		 * in order to avoid forcing the wear-leveling layer to move
+		 * data around if it's not necessary.
+		 */
+		if (mx35lf1ge4ab_get_eccsr(spinand, &eccsr))
+			return nand->eccreq.strength;
+
+		if (WARN_ON(eccsr > nand->eccreq.strength || !eccsr))
+			return nand->eccreq.strength;
+
+		return eccsr;
+
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static const struct spinand_info macronix_spinand_table[] = {
+	SPINAND_INFO("MX35LF1GE4AB", 0x12,
+		     NAND_MEMORG(1, 2048, 64, 64, 1024, 1, 1, 1),
+		     NAND_ECCREQ(4, 512),
+		     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+					      &write_cache_variants,
+					      &update_cache_variants),
+		     SPINAND_HAS_QE_BIT,
+		     SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
+				     mx35lf1ge4ab_ecc_get_status)),
+};
+
+static int macronix_spinand_detect(struct spinand_device *spinand)
+{
+	u8 *id = spinand->id.data;
+	int ret;
+
+	/*
+	 * Macronix SPI NAND read ID needs a dummy byte, so the first byte in
+	 * raw_id is garbage.
+	 */
+	if (id[1] != SPINAND_MFR_MACRONIX)
+		return 0;
+
+	ret = spinand_match_and_init(spinand, macronix_spinand_table,
+				     ARRAY_SIZE(macronix_spinand_table),
+				     id[2]);
+	if (ret)
+		return ret;
+
+	return 1;
+}
+
+static const struct spinand_manufacturer_ops macronix_spinand_manuf_ops = {
+	.detect = macronix_spinand_detect,
+};
+
+const struct spinand_manufacturer macronix_spinand_manufacturer = {
+	.id = SPINAND_MFR_MACRONIX,
+	.name = "Macronix",
+	.ops = &macronix_spinand_manuf_ops,
+};
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index f0f16b9029e7..088ff96c3eb6 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -194,6 +194,7 @@ struct spinand_manufacturer {
 };
 
 /* SPI NAND manufacturers */
+extern const struct spinand_manufacturer macronix_spinand_manufacturer;
 extern const struct spinand_manufacturer micron_spinand_manufacturer;
 extern const struct spinand_manufacturer winbond_spinand_manufacturer;
 
-- 
cgit v1.2.3


From a4c025372d9d4756e7be98a98f5dde302c6df562 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:27 +0200
Subject: mtd: rawnand: Remove nand_do_read() prototype from rawnand.h

nand_do_read() is a static function implemented in nand_base.c. There's
no good reason to expose its prototype in rawnand.h.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 0c6fb316b409..4d7a46c42900 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1545,8 +1545,6 @@ int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
 int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
 int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		    int allowbbt);
-int nand_do_read(struct mtd_info *mtd, loff_t from, size_t len,
-		 size_t *retlen, uint8_t *buf);
 
 /**
  * struct platform_nand_chip - chip level device structure
-- 
cgit v1.2.3


From 707329aca6e0cf5781ca7f62c39bdcb5f87e9c23 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:28 +0200
Subject: mtd: rawnand: Remove forward declaration of mtd_info

struct mtd_info is defined in linux/mtd/mtd.h which is included
at the beginning of nand_base.c, there's thus no need for the
forward declaration of mtd_info.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 4d7a46c42900..32145b302585 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -23,7 +23,6 @@
 #include <linux/mtd/bbm.h>
 #include <linux/types.h>
 
-struct mtd_info;
 struct nand_flash_dev;
 struct device_node;
 
-- 
cgit v1.2.3


From 1c3ab61ebcf8cfb5308d2e68e03fd4b4df484e62 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:29 +0200
Subject: mtd: rawnand: Remove forward declaration of device_node

struct device_node is defined in linux/of.h. Let's include this file
instead of having a forward declaration of this struct.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 32145b302585..83ab6779144e 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -21,10 +21,10 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/flashchip.h>
 #include <linux/mtd/bbm.h>
+#include <linux/of.h>
 #include <linux/types.h>
 
 struct nand_flash_dev;
-struct device_node;
 
 /* Scan and identify a NAND device */
 int nand_scan_with_ids(struct mtd_info *mtd, int max_chips,
-- 
cgit v1.2.3


From 44b07b921dea5ec997fb929ceaa82582a7415816 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:30 +0200
Subject: mtd: rawnand: Rename nand_default_bbt() into nand_create_bbt()

Rename nand_default_bbt() into nand_create_bbt() and pass it a nand_chip
object to prepare removal of the chip->scan_bbt() hook.

We add a temporary nand_default_bbt() wrapper which will be dropped
after the removal of ->scan_bbt().

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c |  5 +++++
 drivers/mtd/nand/raw/nand_bbt.c  | 10 +++++-----
 include/linux/mtd/rawnand.h      |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index faac82b1e058..0476e13d47b1 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4924,6 +4924,11 @@ static void nand_shutdown(struct mtd_info *mtd)
 	nand_get_device(mtd, FL_PM_SUSPENDED);
 }
 
+static int nand_default_bbt(struct mtd_info *mtd)
+{
+	return nand_create_bbt(mtd_to_nand(mtd));
+}
+
 /* Set default functions */
 static void nand_set_defaults(struct nand_chip *chip)
 {
diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c
index d9f4ceff2568..39db352f8757 100644
--- a/drivers/mtd/nand/raw/nand_bbt.c
+++ b/drivers/mtd/nand/raw/nand_bbt.c
@@ -1349,15 +1349,14 @@ static int nand_create_badblock_pattern(struct nand_chip *this)
 }
 
 /**
- * nand_default_bbt - [NAND Interface] Select a default bad block table for the device
- * @mtd: MTD device structure
+ * nand_create_bbt - [NAND Interface] Select a default bad block table for the device
+ * @this: NAND chip object
  *
  * This function selects the default bad block table support for the device and
  * calls the nand_scan_bbt function.
  */
-int nand_default_bbt(struct mtd_info *mtd)
+int nand_create_bbt(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	int ret;
 
 	/* Is a flash based bad block table requested? */
@@ -1383,8 +1382,9 @@ int nand_default_bbt(struct mtd_info *mtd)
 			return ret;
 	}
 
-	return nand_scan_bbt(mtd, this->badblock_pattern);
+	return nand_scan_bbt(nand_to_mtd(this), this->badblock_pattern);
 }
+EXPORT_SYMBOL(nand_create_bbt);
 
 /**
  * nand_isreserved_bbt - [NAND Interface] Check if a block is reserved
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 83ab6779144e..186f9fb1e7eb 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1538,7 +1538,7 @@ extern const struct nand_manufacturer_ops micron_nand_manuf_ops;
 extern const struct nand_manufacturer_ops amd_nand_manuf_ops;
 extern const struct nand_manufacturer_ops macronix_nand_manuf_ops;
 
-int nand_default_bbt(struct mtd_info *mtd);
+int nand_create_bbt(struct nand_chip *chip);
 int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
 int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
 int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
-- 
cgit v1.2.3


From e80eba7581512625083ba540f120479b935425de Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:31 +0200
Subject: mtd: rawnand: Kill the chip->scan_bbt() hook

None of the existing drivers are overloading the ->scan_bbt() method,
let's get rid of it and replace calls to ->scan_bbt() by
nand_create_bbt() ones.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/diskonchip.c          | 4 ++--
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +-
 drivers/mtd/nand/raw/nand_base.c           | 9 +--------
 drivers/mtd/nand/raw/nandsim.c             | 2 +-
 include/linux/mtd/rawnand.h                | 2 --
 5 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 8d10061abb4b..3c46188dd6d2 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -1291,7 +1291,7 @@ static int __init nftl_scan_bbt(struct mtd_info *mtd)
 		this->bbt_md = NULL;
 	}
 
-	ret = this->scan_bbt(mtd);
+	ret = nand_create_bbt(this);
 	if (ret)
 		return ret;
 
@@ -1338,7 +1338,7 @@ static int __init inftl_scan_bbt(struct mtd_info *mtd)
 		this->bbt_md->pattern = "TBB_SYSM";
 	}
 
-	ret = this->scan_bbt(mtd);
+	ret = nand_create_bbt(this);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 58d4e4a93a93..599513321e71 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1944,7 +1944,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 	ret = nand_boot_init(this);
 	if (ret)
 		goto err_nand_cleanup;
-	ret = chip->scan_bbt(mtd);
+	ret = nand_create_bbt(chip);
 	if (ret)
 		goto err_nand_cleanup;
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 0476e13d47b1..4fa5e20d9690 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4924,11 +4924,6 @@ static void nand_shutdown(struct mtd_info *mtd)
 	nand_get_device(mtd, FL_PM_SUSPENDED);
 }
 
-static int nand_default_bbt(struct mtd_info *mtd)
-{
-	return nand_create_bbt(mtd_to_nand(mtd));
-}
-
 /* Set default functions */
 static void nand_set_defaults(struct nand_chip *chip)
 {
@@ -4970,8 +4965,6 @@ static void nand_set_defaults(struct nand_chip *chip)
 		chip->write_byte = busw ? nand_write_byte16 : nand_write_byte;
 	if (!chip->read_buf || chip->read_buf == nand_read_buf)
 		chip->read_buf = busw ? nand_read_buf16 : nand_read_buf;
-	if (!chip->scan_bbt)
-		chip->scan_bbt = nand_default_bbt;
 
 	if (!chip->controller) {
 		chip->controller = &chip->hwcontrol;
@@ -6673,7 +6666,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 		return 0;
 
 	/* Build bad block table */
-	ret = chip->scan_bbt(mtd);
+	ret = nand_create_bbt(chip);
 	if (ret)
 		goto err_nand_manuf_cleanup;
 
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index f8edacde49ab..8a3b36cfe5ea 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2337,7 +2337,7 @@ static int __init ns_init_module(void)
 	if ((retval = init_nandsim(nsmtd)) != 0)
 		goto err_exit;
 
-	if ((retval = chip->scan_bbt(nsmtd)) != 0)
+	if ((retval = nand_create_bbt(chip)) != 0)
 		goto err_exit;
 
 	if ((retval = parse_badblocks(nand, nsmtd)) != 0)
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 186f9fb1e7eb..ac0007ddadf2 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1199,7 +1199,6 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * @buf_align:		minimum buffer alignment required by a platform
  * @hwcontrol:		platform-specific hardware control structure
  * @erase:		[REPLACEABLE] erase function
- * @scan_bbt:		[REPLACEABLE] function to scan bad block table
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
  *			data from array to read regs (tR).
  * @state:		[INTERN] the current state of the NAND device
@@ -1292,7 +1291,6 @@ struct nand_chip {
 		       const struct nand_operation *op,
 		       bool check_only);
 	int (*erase)(struct mtd_info *mtd, int page);
-	int (*scan_bbt)(struct mtd_info *mtd);
 	int (*set_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			    int feature_addr, uint8_t *subfeature_para);
 	int (*get_features)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From f0f01838f76420988b50b23f315704c9a5cd2fa9 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:32 +0200
Subject: mtd: rawnand: orion_nand: Kill orion_nand_data.dev_ready()

None of the boards seem to overload the ->dev_ready() hook, just drop
this field from orion_nand_data.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/orion_nand.c            | 3 ---
 include/linux/platform_data/mtd-orion_nand.h | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 7825fd3ce66b..1a4828442675 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -153,9 +153,6 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	if (board->width == 16)
 		nc->options |= NAND_BUSWIDTH_16;
 
-	if (board->dev_ready)
-		nc->dev_ready = board->dev_ready;
-
 	platform_set_drvdata(pdev, info);
 
 	/* Not all platforms can gate the clock, so it is not
diff --git a/include/linux/platform_data/mtd-orion_nand.h b/include/linux/platform_data/mtd-orion_nand.h
index a7ce77c7c1a8..34828eb85982 100644
--- a/include/linux/platform_data/mtd-orion_nand.h
+++ b/include/linux/platform_data/mtd-orion_nand.h
@@ -12,7 +12,6 @@
  */
 struct orion_nand_data {
 	struct mtd_partition *parts;
-	int (*dev_ready)(struct mtd_info *mtd);
 	u32 nr_parts;
 	u8 ale;		/* address line number connected to ALE */
 	u8 cle;		/* address line number connected to CLE */
-- 
cgit v1.2.3


From dc2d8856a758627d4f126d17bc113eedd72b2d60 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 5 Jul 2018 12:27:33 +0200
Subject: mtd: rawnand: plat_nand: Kill pdata->ctrl.{hwcontrol, read_byte}()

None of the board files are overloading those hooks, so let's drop them
from struct platform_nand_ctrl.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 arch/arm/mach-pxa/balloon3.c     | 1 -
 arch/arm/mach-pxa/em-x270.c      | 1 -
 drivers/mtd/nand/raw/plat_nand.c | 2 --
 include/linux/mtd/rawnand.h      | 4 ----
 4 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index f4f8f23bda8c..af46d2182533 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -688,7 +688,6 @@ struct platform_nand_data balloon3_nand_pdata = {
 		.chip_delay	= 50,
 	},
 	.ctrl = {
-		.hwcontrol	= 0,
 		.dev_ready	= balloon3_nand_dev_ready,
 		.select_chip	= balloon3_nand_select_chip,
 		.cmd_ctrl	= balloon3_nand_cmd_ctl,
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 49022ad338e9..29be04c6cc48 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -346,7 +346,6 @@ struct platform_nand_data em_x270_nand_platdata = {
 		.chip_delay = 20,
 	},
 	.ctrl = {
-		.hwcontrol = 0,
 		.dev_ready = em_x270_nand_device_ready,
 		.select_chip = 0,
 		.cmd_ctrl = em_x270_nand_cmd_ctl,
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 925a1323604d..222626df4b96 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -67,12 +67,10 @@ static int plat_nand_probe(struct platform_device *pdev)
 	data->chip.select_chip = pdata->ctrl.select_chip;
 	data->chip.write_buf = pdata->ctrl.write_buf;
 	data->chip.read_buf = pdata->ctrl.read_buf;
-	data->chip.read_byte = pdata->ctrl.read_byte;
 	data->chip.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
 	data->chip.bbt_options |= pdata->chip.bbt_options;
 
-	data->chip.ecc.hwctl = pdata->ctrl.hwcontrol;
 	data->chip.ecc.mode = NAND_ECC_SOFT;
 	data->chip.ecc.algo = NAND_ECC_HAMMING;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index ac0007ddadf2..11c2426fc363 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1572,14 +1572,12 @@ struct platform_device;
  * struct platform_nand_ctrl - controller level device structure
  * @probe:		platform specific function to probe/setup hardware
  * @remove:		platform specific function to remove/teardown hardware
- * @hwcontrol:		platform specific hardware control structure
  * @dev_ready:		platform specific function to read ready/busy pin
  * @select_chip:	platform specific chip select function
  * @cmd_ctrl:		platform specific function for controlling
  *			ALE/CLE/nCE. Also used to write command and address
  * @write_buf:		platform specific function for write buffer
  * @read_buf:		platform specific function for read buffer
- * @read_byte:		platform specific function to read one byte from chip
  * @priv:		private data to transport driver specific settings
  *
  * All fields are optional and depend on the hardware driver requirements
@@ -1587,13 +1585,11 @@ struct platform_device;
 struct platform_nand_ctrl {
 	int (*probe)(struct platform_device *pdev);
 	void (*remove)(struct platform_device *pdev);
-	void (*hwcontrol)(struct mtd_info *mtd, int cmd);
 	int (*dev_ready)(struct mtd_info *mtd);
 	void (*select_chip)(struct mtd_info *mtd, int chip);
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
 	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
 	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
-	unsigned char (*read_byte)(struct mtd_info *mtd);
 	void *priv;
 };
 
-- 
cgit v1.2.3


From dc2865ac3527d76fe04ee1f1a3ffc101a60faba0 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Mon, 9 Jul 2018 22:09:40 +0200
Subject: MIPS: txx9: Move the ndfc.h header to
 include/linux/platform_data/txx9

This way we will be able to compile the ndfmc driver when
COMPILE_TEST=y.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Paul Burton <paul.burton@mips.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 arch/mips/include/asm/txx9/ndfmc.h       | 30 ------------------------------
 arch/mips/txx9/generic/setup.c           |  2 +-
 arch/mips/txx9/generic/setup_tx4938.c    |  2 +-
 arch/mips/txx9/generic/setup_tx4939.c    |  2 +-
 drivers/mtd/nand/raw/txx9ndfmc.c         |  2 +-
 include/linux/platform_data/txx9/ndfmc.h | 30 ++++++++++++++++++++++++++++++
 6 files changed, 34 insertions(+), 34 deletions(-)
 delete mode 100644 arch/mips/include/asm/txx9/ndfmc.h
 create mode 100644 include/linux/platform_data/txx9/ndfmc.h

(limited to 'include')

diff --git a/arch/mips/include/asm/txx9/ndfmc.h b/arch/mips/include/asm/txx9/ndfmc.h
deleted file mode 100644
index fa67f3df78fc..000000000000
--- a/arch/mips/include/asm/txx9/ndfmc.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * (C) Copyright TOSHIBA CORPORATION 2007
- */
-#ifndef __ASM_TXX9_NDFMC_H
-#define __ASM_TXX9_NDFMC_H
-
-#define NDFMC_PLAT_FLAG_USE_BSPRT	0x01
-#define NDFMC_PLAT_FLAG_NO_RSTR		0x02
-#define NDFMC_PLAT_FLAG_HOLDADD		0x04
-#define NDFMC_PLAT_FLAG_DUMMYWRITE	0x08
-
-struct txx9ndfmc_platform_data {
-	unsigned int shift;
-	unsigned int gbus_clock;
-	unsigned int hold;		/* hold time in nanosecond */
-	unsigned int spw;		/* strobe pulse width in nanosecond */
-	unsigned int flags;
-	unsigned char ch_mask;		/* available channel bitmask */
-	unsigned char wp_mask;		/* write-protect bitmask */
-	unsigned char wide_mask;	/* 16bit-nand bitmask */
-};
-
-void txx9_ndfmc_init(unsigned long baseaddr,
-		     const struct txx9ndfmc_platform_data *plat_data);
-
-#endif /* __ASM_TXX9_NDFMC_H */
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 1791a44ee570..aa47932abd28 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -20,6 +20,7 @@
 #include <linux/err.h>
 #include <linux/gpio/driver.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/txx9/ndfmc.h>
 #include <linux/serial_core.h>
 #include <linux/mtd/physmap.h>
 #include <linux/leds.h>
@@ -35,7 +36,6 @@
 #include <asm/txx9/generic.h>
 #include <asm/txx9/pci.h>
 #include <asm/txx9tmr.h>
-#include <asm/txx9/ndfmc.h>
 #include <asm/txx9/dmac.h>
 #ifdef CONFIG_CPU_TX49XX
 #include <asm/txx9/tx4938.h>
diff --git a/arch/mips/txx9/generic/setup_tx4938.c b/arch/mips/txx9/generic/setup_tx4938.c
index 85d1795652da..17395d5d15ca 100644
--- a/arch/mips/txx9/generic/setup_tx4938.c
+++ b/arch/mips/txx9/generic/setup_tx4938.c
@@ -17,13 +17,13 @@
 #include <linux/ptrace.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/txx9/ndfmc.h>
 #include <asm/reboot.h>
 #include <asm/traps.h>
 #include <asm/txx9irq.h>
 #include <asm/txx9tmr.h>
 #include <asm/txx9pio.h>
 #include <asm/txx9/generic.h>
-#include <asm/txx9/ndfmc.h>
 #include <asm/txx9/dmac.h>
 #include <asm/txx9/tx4938.h>
 
diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c
index 274928987a21..360c388f4c82 100644
--- a/arch/mips/txx9/generic/setup_tx4939.c
+++ b/arch/mips/txx9/generic/setup_tx4939.c
@@ -21,13 +21,13 @@
 #include <linux/ptrace.h>
 #include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
+#include <linux/platform_data/txx9/ndfmc.h>
 #include <asm/bootinfo.h>
 #include <asm/reboot.h>
 #include <asm/traps.h>
 #include <asm/txx9irq.h>
 #include <asm/txx9tmr.h>
 #include <asm/txx9/generic.h>
-#include <asm/txx9/ndfmc.h>
 #include <asm/txx9/dmac.h>
 #include <asm/txx9/tx4939.h>
 
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index b567d212fe7d..04d57474ef97 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -20,7 +20,7 @@
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
-#include <asm/txx9/ndfmc.h>
+#include <linux/platform_data/txx9/ndfmc.h>
 
 /* TXX9 NDFMC Registers */
 #define TXX9_NDFDTR	0x00
diff --git a/include/linux/platform_data/txx9/ndfmc.h b/include/linux/platform_data/txx9/ndfmc.h
new file mode 100644
index 000000000000..fc172627d54e
--- /dev/null
+++ b/include/linux/platform_data/txx9/ndfmc.h
@@ -0,0 +1,30 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * (C) Copyright TOSHIBA CORPORATION 2007
+ */
+#ifndef __TXX9_NDFMC_H
+#define __TXX9_NDFMC_H
+
+#define NDFMC_PLAT_FLAG_USE_BSPRT	0x01
+#define NDFMC_PLAT_FLAG_NO_RSTR		0x02
+#define NDFMC_PLAT_FLAG_HOLDADD		0x04
+#define NDFMC_PLAT_FLAG_DUMMYWRITE	0x08
+
+struct txx9ndfmc_platform_data {
+	unsigned int shift;
+	unsigned int gbus_clock;
+	unsigned int hold;		/* hold time in nanosecond */
+	unsigned int spw;		/* strobe pulse width in nanosecond */
+	unsigned int flags;
+	unsigned char ch_mask;		/* available channel bitmask */
+	unsigned char wp_mask;		/* write-protect bitmask */
+	unsigned char wide_mask;	/* 16bit-nand bitmask */
+};
+
+void txx9_ndfmc_init(unsigned long baseaddr,
+		     const struct txx9ndfmc_platform_data *plat_data);
+
+#endif /* __TXX9_NDFMC_H */
-- 
cgit v1.2.3


From e65e3a50702f4e7a01c0b36ff08d6ed8dde7ee7b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Mon, 9 Jul 2018 22:09:42 +0200
Subject: MIPS: jz4740: Move jz4740_nand.h header to
 include/linux/platform_data/jz4740

This way we will be able to compile the jz4740_nand driver when
COMPILE_TEST=y.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Paul Burton <paul.burton@mips.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 arch/mips/include/asm/mach-jz4740/jz4740_nand.h  | 34 ------------------------
 arch/mips/jz4740/board-qi_lb60.c                 |  3 ++-
 drivers/mtd/nand/raw/jz4740_nand.c               |  2 +-
 include/linux/platform_data/jz4740/jz4740_nand.h | 34 ++++++++++++++++++++++++
 4 files changed, 37 insertions(+), 36 deletions(-)
 delete mode 100644 arch/mips/include/asm/mach-jz4740/jz4740_nand.h
 create mode 100644 include/linux/platform_data/jz4740/jz4740_nand.h

(limited to 'include')

diff --git a/arch/mips/include/asm/mach-jz4740/jz4740_nand.h b/arch/mips/include/asm/mach-jz4740/jz4740_nand.h
deleted file mode 100644
index f381d465e768..000000000000
--- a/arch/mips/include/asm/mach-jz4740/jz4740_nand.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
- *  JZ4740 SoC NAND controller driver
- *
- *  This program is free software; you can redistribute	 it and/or modify it
- *  under  the terms of	 the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the	License, or (at your
- *  option) any later version.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef __ASM_MACH_JZ4740_JZ4740_NAND_H__
-#define __ASM_MACH_JZ4740_JZ4740_NAND_H__
-
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
-
-#define JZ_NAND_NUM_BANKS 4
-
-struct jz_nand_platform_data {
-	int			num_partitions;
-	struct mtd_partition	*partitions;
-
-	unsigned char banks[JZ_NAND_NUM_BANKS];
-
-	void (*ident_callback)(struct platform_device *, struct mtd_info *,
-				struct mtd_partition **, int *num_partitions);
-};
-
-#endif
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 60f0767507c6..af0c8ace0141 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -29,10 +29,11 @@
 #include <linux/power/gpio-charger.h>
 #include <linux/pwm.h>
 
+#include <linux/platform_data/jz4740/jz4740_nand.h>
+
 #include <asm/mach-jz4740/gpio.h>
 #include <asm/mach-jz4740/jz4740_fb.h>
 #include <asm/mach-jz4740/jz4740_mmc.h>
-#include <asm/mach-jz4740/jz4740_nand.h>
 
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 613b00a9604b..a0254461812d 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -25,7 +25,7 @@
 
 #include <linux/gpio.h>
 
-#include <asm/mach-jz4740/jz4740_nand.h>
+#include <linux/platform_data/jz4740/jz4740_nand.h>
 
 #define JZ_REG_NAND_CTRL	0x50
 #define JZ_REG_NAND_ECC_CTRL	0x100
diff --git a/include/linux/platform_data/jz4740/jz4740_nand.h b/include/linux/platform_data/jz4740/jz4740_nand.h
new file mode 100644
index 000000000000..bc571f6d5ced
--- /dev/null
+++ b/include/linux/platform_data/jz4740/jz4740_nand.h
@@ -0,0 +1,34 @@
+/*
+ *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
+ *  JZ4740 SoC NAND controller driver
+ *
+ *  This program is free software; you can redistribute	 it and/or modify it
+ *  under  the terms of	 the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the	License, or (at your
+ *  option) any later version.
+ *
+ *  You should have received a copy of the  GNU General Public License along
+ *  with this program; if not, write  to the Free Software Foundation, Inc.,
+ *  675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __JZ4740_NAND_H__
+#define __JZ4740_NAND_H__
+
+#include <linux/mtd/rawnand.h>
+#include <linux/mtd/partitions.h>
+
+#define JZ_NAND_NUM_BANKS 4
+
+struct jz_nand_platform_data {
+	int			num_partitions;
+	struct mtd_partition	*partitions;
+
+	unsigned char banks[JZ_NAND_NUM_BANKS];
+
+	void (*ident_callback)(struct platform_device *, struct mtd_info *,
+				struct mtd_partition **, int *num_partitions);
+};
+
+#endif
-- 
cgit v1.2.3


From cb2b36f5a97df76f547fcc4ab444a02522fb6c96 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:40 -0700
Subject: netfilter: nf_conncount: Switch to plain list

Original patch is from Florian Westphal.

This patch switches from hlist to plain list to store the list of
connections with the same filtering key in nf_conncount. With the
plain list, we can insert new connections at the tail, so over time
the beginning of list holds long-running connections and those are
expired, while the newly creates ones are at the end.

Later on, we could probably move checked ones to the end of the list,
so the next run has higher chance to reclaim stale entries in the front.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 15 ++++--
 net/netfilter/nf_conncount.c               | 83 ++++++++++++++++++------------
 net/netfilter/nft_connlimit.c              | 24 ++++-----
 3 files changed, 75 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 3a188a0923a3..e4884e0e4f69 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -1,8 +1,15 @@
 #ifndef _NF_CONNTRACK_COUNT_H
 #define _NF_CONNTRACK_COUNT_H
 
+#include <linux/list.h>
+
 struct nf_conncount_data;
 
+struct nf_conncount_list {
+	struct list_head head;	/* connections with the same filtering key */
+	unsigned int count;	/* length of list */
+};
+
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
 					    unsigned int keylen);
 void nf_conncount_destroy(struct net *net, unsigned int family,
@@ -14,15 +21,17 @@ unsigned int nf_conncount_count(struct net *net,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone);
 
-unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
+unsigned int nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
 				 const struct nf_conntrack_tuple *tuple,
 				 const struct nf_conntrack_zone *zone,
 				 bool *addit);
 
-bool nf_conncount_add(struct hlist_head *head,
+void nf_conncount_list_init(struct nf_conncount_list *list);
+
+bool nf_conncount_add(struct nf_conncount_list *list,
 		      const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conntrack_zone *zone);
 
-void nf_conncount_cache_free(struct hlist_head *hhead);
+void nf_conncount_cache_free(struct nf_conncount_list *list);
 
 #endif
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 81c02185b2e8..81b060adefef 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -44,7 +44,7 @@
 
 /* we will save the tuples of all connections we care about */
 struct nf_conncount_tuple {
-	struct hlist_node		node;
+	struct list_head		node;
 	struct nf_conntrack_tuple	tuple;
 	struct nf_conntrack_zone	zone;
 	int				cpu;
@@ -53,7 +53,7 @@ struct nf_conncount_tuple {
 
 struct nf_conncount_rb {
 	struct rb_node node;
-	struct hlist_head hhead; /* connections/hosts in same subnet */
+	struct nf_conncount_list list;
 	u32 key[MAX_KEYLEN];
 };
 
@@ -82,12 +82,15 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
 	return memcmp(a, b, klen * sizeof(u32));
 }
 
-bool nf_conncount_add(struct hlist_head *head,
+bool nf_conncount_add(struct nf_conncount_list *list,
 		      const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conntrack_zone *zone)
 {
 	struct nf_conncount_tuple *conn;
 
+	if (WARN_ON_ONCE(list->count > INT_MAX))
+		return false;
+
 	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
 	if (conn == NULL)
 		return false;
@@ -95,13 +98,26 @@ bool nf_conncount_add(struct hlist_head *head,
 	conn->zone = *zone;
 	conn->cpu = raw_smp_processor_id();
 	conn->jiffies32 = (u32)jiffies;
-	hlist_add_head(&conn->node, head);
+	list_add_tail(&conn->node, &list->head);
+	list->count++;
 	return true;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_add);
 
+static void conn_free(struct nf_conncount_list *list,
+		      struct nf_conncount_tuple *conn)
+{
+	if (WARN_ON_ONCE(list->count == 0))
+		return;
+
+	list->count--;
+	list_del(&conn->node);
+	kmem_cache_free(conncount_conn_cachep, conn);
+}
+
 static const struct nf_conntrack_tuple_hash *
-find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
+find_or_evict(struct net *net, struct nf_conncount_list *list,
+	      struct nf_conncount_tuple *conn)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	unsigned long a, b;
@@ -121,30 +137,29 @@ find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
 	 */
 	age = a - b;
 	if (conn->cpu == cpu || age >= 2) {
-		hlist_del(&conn->node);
-		kmem_cache_free(conncount_conn_cachep, conn);
+		conn_free(list, conn);
 		return ERR_PTR(-ENOENT);
 	}
 
 	return ERR_PTR(-EAGAIN);
 }
 
-unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
+unsigned int nf_conncount_lookup(struct net *net,
+				 struct nf_conncount_list *list,
 				 const struct nf_conntrack_tuple *tuple,
 				 const struct nf_conntrack_zone *zone,
 				 bool *addit)
 {
 	const struct nf_conntrack_tuple_hash *found;
-	struct nf_conncount_tuple *conn;
+	struct nf_conncount_tuple *conn, *conn_n;
 	struct nf_conn *found_ct;
-	struct hlist_node *n;
 	unsigned int length = 0;
 
 	*addit = tuple ? true : false;
 
 	/* check the saved connections */
-	hlist_for_each_entry_safe(conn, n, head, node) {
-		found = find_or_evict(net, conn);
+	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+		found = find_or_evict(net, list, conn);
 		if (IS_ERR(found)) {
 			/* Not found, but might be about to be confirmed */
 			if (PTR_ERR(found) == -EAGAIN) {
@@ -157,6 +172,7 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 				    nf_ct_zone_id(zone, zone->dir))
 					*addit = false;
 			}
+
 			continue;
 		}
 
@@ -176,8 +192,7 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 			 * closed already -> ditch it
 			 */
 			nf_ct_put(found_ct);
-			hlist_del(&conn->node);
-			kmem_cache_free(conncount_conn_cachep, conn);
+			conn_free(list, conn);
 			continue;
 		}
 
@@ -189,17 +204,23 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 }
 EXPORT_SYMBOL_GPL(nf_conncount_lookup);
 
+void nf_conncount_list_init(struct nf_conncount_list *list)
+{
+	INIT_LIST_HEAD(&list->head);
+	list->count = 1;
+}
+EXPORT_SYMBOL_GPL(nf_conncount_list_init);
+
 static void nf_conncount_gc_list(struct net *net,
-				 struct nf_conncount_rb *rbconn)
+				 struct nf_conncount_list *list)
 {
 	const struct nf_conntrack_tuple_hash *found;
-	struct nf_conncount_tuple *conn;
-	struct hlist_node *n;
+	struct nf_conncount_tuple *conn, *conn_n;
 	struct nf_conn *found_ct;
 	unsigned int collected = 0;
 
-	hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node) {
-		found = find_or_evict(net, conn);
+	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+		found = find_or_evict(net, list, conn);
 		if (IS_ERR(found)) {
 			if (PTR_ERR(found) == -ENOENT)
 				collected++;
@@ -213,8 +234,7 @@ static void nf_conncount_gc_list(struct net *net,
 			 * closed already -> ditch it
 			 */
 			nf_ct_put(found_ct);
-			hlist_del(&conn->node);
-			kmem_cache_free(conncount_conn_cachep, conn);
+			conn_free(list, conn);
 			collected++;
 			continue;
 		}
@@ -271,14 +291,14 @@ count_tree(struct net *net, struct rb_root *root,
 			/* same source network -> be counted! */
 			unsigned int count;
 
-			count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
+			count = nf_conncount_lookup(net, &rbconn->list, tuple,
 						    zone, &addit);
 
 			tree_nodes_free(root, gc_nodes, gc_count);
 			if (!addit)
 				return count;
 
-			if (!nf_conncount_add(&rbconn->hhead, tuple, zone))
+			if (!nf_conncount_add(&rbconn->list, tuple, zone))
 				return 0; /* hotdrop */
 
 			return count + 1;
@@ -287,8 +307,8 @@ count_tree(struct net *net, struct rb_root *root,
 		if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
 			continue;
 
-		nf_conncount_gc_list(net, rbconn);
-		if (hlist_empty(&rbconn->hhead))
+		nf_conncount_gc_list(net, &rbconn->list);
+		if (list_empty(&rbconn->list.head))
 			gc_nodes[gc_count++] = rbconn;
 	}
 
@@ -322,8 +342,8 @@ count_tree(struct net *net, struct rb_root *root,
 	conn->zone = *zone;
 	memcpy(rbconn->key, key, sizeof(u32) * keylen);
 
-	INIT_HLIST_HEAD(&rbconn->hhead);
-	hlist_add_head(&conn->node, &rbconn->hhead);
+	nf_conncount_list_init(&rbconn->list);
+	list_add(&conn->node, &rbconn->list.head);
 
 	rb_link_node(&rbconn->node, parent, rbnode);
 	rb_insert_color(&rbconn->node, root);
@@ -388,12 +408,11 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
 }
 EXPORT_SYMBOL_GPL(nf_conncount_init);
 
-void nf_conncount_cache_free(struct hlist_head *hhead)
+void nf_conncount_cache_free(struct nf_conncount_list *list)
 {
-	struct nf_conncount_tuple *conn;
-	struct hlist_node *n;
+	struct nf_conncount_tuple *conn, *conn_n;
 
-	hlist_for_each_entry_safe(conn, n, hhead, node)
+	list_for_each_entry_safe(conn, conn_n, &list->head, node)
 		kmem_cache_free(conncount_conn_cachep, conn);
 }
 EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
@@ -408,7 +427,7 @@ static void destroy_tree(struct rb_root *r)
 
 		rb_erase(node, r);
 
-		nf_conncount_cache_free(&rbconn->hhead);
+		nf_conncount_cache_free(&rbconn->list);
 
 		kmem_cache_free(conncount_rb_cachep, rbconn);
 	}
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index a832c59f0a9c..4f0491a36a1d 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -14,10 +14,10 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 
 struct nft_connlimit {
-	spinlock_t		lock;
-	struct hlist_head	hhead;
-	u32			limit;
-	bool			invert;
+	spinlock_t			lock;
+	struct nf_conncount_list	list;
+	u32				limit;
+	bool				invert;
 };
 
 static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
@@ -46,13 +46,13 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 	}
 
 	spin_lock_bh(&priv->lock);
-	count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone,
+	count = nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
 				    &addit);
 
 	if (!addit)
 		goto out;
 
-	if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) {
+	if (!nf_conncount_add(&priv->list, tuple_ptr, zone)) {
 		regs->verdict.code = NF_DROP;
 		spin_unlock_bh(&priv->lock);
 		return;
@@ -88,7 +88,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
 	}
 
 	spin_lock_init(&priv->lock);
-	INIT_HLIST_HEAD(&priv->hhead);
+	nf_conncount_list_init(&priv->list);
 	priv->limit	= limit;
 	priv->invert	= invert;
 
@@ -99,7 +99,7 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
 				     struct nft_connlimit *priv)
 {
 	nf_ct_netns_put(ctx->net, ctx->family);
-	nf_conncount_cache_free(&priv->hhead);
+	nf_conncount_cache_free(&priv->list);
 }
 
 static int nft_connlimit_do_dump(struct sk_buff *skb,
@@ -213,7 +213,7 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
 	struct nft_connlimit *priv_src = nft_expr_priv(src);
 
 	spin_lock_init(&priv_dst->lock);
-	INIT_HLIST_HEAD(&priv_dst->hhead);
+	nf_conncount_list_init(&priv_dst->list);
 	priv_dst->limit	 = priv_src->limit;
 	priv_dst->invert = priv_src->invert;
 
@@ -225,7 +225,7 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
 {
 	struct nft_connlimit *priv = nft_expr_priv(expr);
 
-	nf_conncount_cache_free(&priv->hhead);
+	nf_conncount_cache_free(&priv->list);
 }
 
 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
@@ -234,9 +234,9 @@ static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
 	bool addit, ret;
 
 	spin_lock_bh(&priv->lock);
-	nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit);
+	nf_conncount_lookup(net, &priv->list, NULL, &nf_ct_zone_dflt, &addit);
 
-	ret = hlist_empty(&priv->hhead);
+	ret = list_empty(&priv->list.head);
 	spin_unlock_bh(&priv->lock);
 
 	return ret;
-- 
cgit v1.2.3


From 976afca1ceba53df6f4a543014e15d1c7a962571 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:41 -0700
Subject: netfilter: nf_conncount: Early exit in nf_conncount_lookup() and
 cleanup

This patch is originally from Florian Westphal.

This patch does the following three tasks.

It applies the same early exit technique for nf_conncount_lookup().

Since now we keep the number of connections in 'struct nf_conncount_list',
we no longer need to return the count in nf_conncount_lookup().

Moreover, we expose the garbage collection function nf_conncount_gc_list()
for nft_connlimit.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h | 11 +++++----
 net/netfilter/nf_conncount.c               | 38 +++++++++++++++++-------------
 net/netfilter/nft_connlimit.c              |  9 +++----
 3 files changed, 33 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index e4884e0e4f69..dbec17f674b7 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -21,10 +21,10 @@ unsigned int nf_conncount_count(struct net *net,
 				const struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_zone *zone);
 
-unsigned int nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
-				 const struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_zone *zone,
-				 bool *addit);
+void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
+			 const struct nf_conntrack_tuple *tuple,
+			 const struct nf_conntrack_zone *zone,
+			 bool *addit);
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
@@ -32,6 +32,9 @@ bool nf_conncount_add(struct nf_conncount_list *list,
 		      const struct nf_conntrack_tuple *tuple,
 		      const struct nf_conntrack_zone *zone);
 
+void nf_conncount_gc_list(struct net *net,
+			  struct nf_conncount_list *list);
+
 void nf_conncount_cache_free(struct nf_conncount_list *list);
 
 #endif
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 81b060adefef..7dfd9d5e6a3e 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -144,26 +144,29 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
 	return ERR_PTR(-EAGAIN);
 }
 
-unsigned int nf_conncount_lookup(struct net *net,
-				 struct nf_conncount_list *list,
-				 const struct nf_conntrack_tuple *tuple,
-				 const struct nf_conntrack_zone *zone,
-				 bool *addit)
+void nf_conncount_lookup(struct net *net,
+			 struct nf_conncount_list *list,
+			 const struct nf_conntrack_tuple *tuple,
+			 const struct nf_conntrack_zone *zone,
+			 bool *addit)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct nf_conncount_tuple *conn, *conn_n;
 	struct nf_conn *found_ct;
-	unsigned int length = 0;
+	unsigned int collect = 0;
 
+	/* best effort only */
 	*addit = tuple ? true : false;
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
+		if (collect > CONNCOUNT_GC_MAX_NODES)
+			break;
+
 		found = find_or_evict(net, list, conn);
 		if (IS_ERR(found)) {
 			/* Not found, but might be about to be confirmed */
 			if (PTR_ERR(found) == -EAGAIN) {
-				length++;
 				if (!tuple)
 					continue;
 
@@ -171,8 +174,8 @@ unsigned int nf_conncount_lookup(struct net *net,
 				    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
 				    nf_ct_zone_id(zone, zone->dir))
 					*addit = false;
-			}
-
+			} else if (PTR_ERR(found) == -ENOENT)
+				collect++;
 			continue;
 		}
 
@@ -181,9 +184,10 @@ unsigned int nf_conncount_lookup(struct net *net,
 		if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
 		    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
 			/*
-			 * Just to be sure we have it only once in the list.
 			 * We should not see tuples twice unless someone hooks
 			 * this into a table without "-p tcp --syn".
+			 *
+			 * Attempt to avoid a re-add in this case.
 			 */
 			*addit = false;
 		} else if (already_closed(found_ct)) {
@@ -193,14 +197,12 @@ unsigned int nf_conncount_lookup(struct net *net,
 			 */
 			nf_ct_put(found_ct);
 			conn_free(list, conn);
+			collect++;
 			continue;
 		}
 
 		nf_ct_put(found_ct);
-		length++;
 	}
-
-	return length;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_lookup);
 
@@ -211,8 +213,8 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
 }
 EXPORT_SYMBOL_GPL(nf_conncount_list_init);
 
-static void nf_conncount_gc_list(struct net *net,
-				 struct nf_conncount_list *list)
+void nf_conncount_gc_list(struct net *net,
+			  struct nf_conncount_list *list)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct nf_conncount_tuple *conn, *conn_n;
@@ -244,6 +246,7 @@ static void nf_conncount_gc_list(struct net *net,
 			return;
 	}
 }
+EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
 
 static void tree_nodes_free(struct rb_root *root,
 			    struct nf_conncount_rb *gc_nodes[],
@@ -291,8 +294,9 @@ count_tree(struct net *net, struct rb_root *root,
 			/* same source network -> be counted! */
 			unsigned int count;
 
-			count = nf_conncount_lookup(net, &rbconn->list, tuple,
-						    zone, &addit);
+			nf_conncount_lookup(net, &rbconn->list, tuple, zone,
+					    &addit);
+			count = rbconn->list.count;
 
 			tree_nodes_free(root, gc_nodes, gc_count);
 			if (!addit)
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 4f0491a36a1d..37c52ae06741 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -46,8 +46,9 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 	}
 
 	spin_lock_bh(&priv->lock);
-	count = nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
-				    &addit);
+	nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
+			    &addit);
+	count = priv->list.count;
 
 	if (!addit)
 		goto out;
@@ -231,10 +232,10 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
 {
 	struct nft_connlimit *priv = nft_expr_priv(expr);
-	bool addit, ret;
+	bool ret;
 
 	spin_lock_bh(&priv->lock);
-	nf_conncount_lookup(net, &priv->list, NULL, &nf_ct_zone_dflt, &addit);
+	nf_conncount_gc_list(net, &priv->list);
 
 	ret = list_empty(&priv->list.head);
 	spin_unlock_bh(&priv->lock);
-- 
cgit v1.2.3


From 5c789e131cbb997a528451564ea4613e812fc718 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Mon, 2 Jul 2018 17:33:44 -0700
Subject: netfilter: nf_conncount: Add list lock and gc worker, and RCU for
 init tree search

This patch is originally from Florian Westphal.

This patch does the following 3 main tasks.

1) Add list lock to 'struct nf_conncount_list' so that we can
alter the lists containing the individual connections without holding the
main tree lock.  It would be useful when we only need to add/remove to/from
a list without allocate/remove a node in the tree.  With this change, we
update nft_connlimit accordingly since we longer need to maintain
a list lock in nft_connlimit now.

2) Use RCU for the initial tree search to improve tree look up performance.

3) Add a garbage collection worker. This worker is schedule when there
are excessive tree node that needed to be recycled.

Moreover,the rbnode reclaim logic is moved from search tree to insert tree
to avoid race condition.

Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h |  17 +-
 net/netfilter/nf_conncount.c               | 253 +++++++++++++++++++++--------
 net/netfilter/nft_connlimit.c              |  17 +-
 3 files changed, 196 insertions(+), 91 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index dbec17f674b7..4b2b2baf8ab4 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -5,9 +5,17 @@
 
 struct nf_conncount_data;
 
+enum nf_conncount_list_add {
+	NF_CONNCOUNT_ADDED, 	/* list add was ok */
+	NF_CONNCOUNT_ERR,	/* -ENOMEM, must drop skb */
+	NF_CONNCOUNT_SKIP,	/* list is already reclaimed by gc */
+};
+
 struct nf_conncount_list {
+	spinlock_t list_lock;
 	struct list_head head;	/* connections with the same filtering key */
 	unsigned int count;	/* length of list */
+	bool dead;
 };
 
 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
@@ -28,11 +36,12 @@ void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list,
 
 void nf_conncount_list_init(struct nf_conncount_list *list);
 
-bool nf_conncount_add(struct nf_conncount_list *list,
-		      const struct nf_conntrack_tuple *tuple,
-		      const struct nf_conntrack_zone *zone);
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+		 const struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_zone *zone);
 
-void nf_conncount_gc_list(struct net *net,
+bool nf_conncount_gc_list(struct net *net,
 			  struct nf_conncount_list *list);
 
 void nf_conncount_cache_free(struct nf_conncount_list *list);
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 3f14806b7271..02ca7df793f5 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -49,12 +49,14 @@ struct nf_conncount_tuple {
 	struct nf_conntrack_zone	zone;
 	int				cpu;
 	u32				jiffies32;
+	struct rcu_head			rcu_head;
 };
 
 struct nf_conncount_rb {
 	struct rb_node node;
 	struct nf_conncount_list list;
 	u32 key[MAX_KEYLEN];
+	struct rcu_head rcu_head;
 };
 
 static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp;
@@ -62,6 +64,10 @@ static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_i
 struct nf_conncount_data {
 	unsigned int keylen;
 	struct rb_root root[CONNCOUNT_SLOTS];
+	struct net *net;
+	struct work_struct gc_work;
+	unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)];
+	unsigned int gc_tree;
 };
 
 static u_int32_t conncount_rnd __read_mostly;
@@ -82,42 +88,70 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
 	return memcmp(a, b, klen * sizeof(u32));
 }
 
-bool nf_conncount_add(struct nf_conncount_list *list,
-		      const struct nf_conntrack_tuple *tuple,
-		      const struct nf_conntrack_zone *zone)
+enum nf_conncount_list_add
+nf_conncount_add(struct nf_conncount_list *list,
+		 const struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_zone *zone)
 {
 	struct nf_conncount_tuple *conn;
 
 	if (WARN_ON_ONCE(list->count > INT_MAX))
-		return false;
+		return NF_CONNCOUNT_ERR;
 
 	conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
 	if (conn == NULL)
-		return false;
+		return NF_CONNCOUNT_ERR;
+
 	conn->tuple = *tuple;
 	conn->zone = *zone;
 	conn->cpu = raw_smp_processor_id();
 	conn->jiffies32 = (u32)jiffies;
+	spin_lock(&list->list_lock);
+	if (list->dead == true) {
+		kmem_cache_free(conncount_conn_cachep, conn);
+		spin_unlock(&list->list_lock);
+		return NF_CONNCOUNT_SKIP;
+	}
 	list_add_tail(&conn->node, &list->head);
 	list->count++;
-	return true;
+	spin_unlock(&list->list_lock);
+	return NF_CONNCOUNT_ADDED;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_add);
 
-static void conn_free(struct nf_conncount_list *list,
+static void __conn_free(struct rcu_head *h)
+{
+	struct nf_conncount_tuple *conn;
+
+	conn = container_of(h, struct nf_conncount_tuple, rcu_head);
+	kmem_cache_free(conncount_conn_cachep, conn);
+}
+
+static bool conn_free(struct nf_conncount_list *list,
 		      struct nf_conncount_tuple *conn)
 {
-	if (WARN_ON_ONCE(list->count == 0))
-		return;
+	bool free_entry = false;
+
+	spin_lock(&list->list_lock);
+
+	if (list->count == 0) {
+		spin_unlock(&list->list_lock);
+                return free_entry;
+	}
 
 	list->count--;
-	list_del(&conn->node);
-	kmem_cache_free(conncount_conn_cachep, conn);
+	list_del_rcu(&conn->node);
+	if (list->count == 0)
+		free_entry = true;
+
+	spin_unlock(&list->list_lock);
+	call_rcu(&conn->rcu_head, __conn_free);
+	return free_entry;
 }
 
 static const struct nf_conntrack_tuple_hash *
 find_or_evict(struct net *net, struct nf_conncount_list *list,
-	      struct nf_conncount_tuple *conn)
+	      struct nf_conncount_tuple *conn, bool *free_entry)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	unsigned long a, b;
@@ -137,7 +171,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
 	 */
 	age = a - b;
 	if (conn->cpu == cpu || age >= 2) {
-		conn_free(list, conn);
+		*free_entry = conn_free(list, conn);
 		return ERR_PTR(-ENOENT);
 	}
 
@@ -154,6 +188,7 @@ void nf_conncount_lookup(struct net *net,
 	struct nf_conncount_tuple *conn, *conn_n;
 	struct nf_conn *found_ct;
 	unsigned int collect = 0;
+	bool free_entry = false;
 
 	/* best effort only */
 	*addit = tuple ? true : false;
@@ -163,7 +198,7 @@ void nf_conncount_lookup(struct net *net,
 		if (collect > CONNCOUNT_GC_MAX_NODES)
 			break;
 
-		found = find_or_evict(net, list, conn);
+		found = find_or_evict(net, list, conn, &free_entry);
 		if (IS_ERR(found)) {
 			/* Not found, but might be about to be confirmed */
 			if (PTR_ERR(found) == -EAGAIN) {
@@ -208,24 +243,31 @@ EXPORT_SYMBOL_GPL(nf_conncount_lookup);
 
 void nf_conncount_list_init(struct nf_conncount_list *list)
 {
+	spin_lock_init(&list->list_lock);
 	INIT_LIST_HEAD(&list->head);
 	list->count = 1;
+	list->dead = false;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_list_init);
 
-void nf_conncount_gc_list(struct net *net,
+/* Return true if the list is empty */
+bool nf_conncount_gc_list(struct net *net,
 			  struct nf_conncount_list *list)
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct nf_conncount_tuple *conn, *conn_n;
 	struct nf_conn *found_ct;
 	unsigned int collected = 0;
+	bool free_entry = false;
 
 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
-		found = find_or_evict(net, list, conn);
+		found = find_or_evict(net, list, conn, &free_entry);
 		if (IS_ERR(found)) {
-			if (PTR_ERR(found) == -ENOENT)
+			if (PTR_ERR(found) == -ENOENT)  {
+				if (free_entry)
+					return true;
 				collected++;
+			}
 			continue;
 		}
 
@@ -236,18 +278,28 @@ void nf_conncount_gc_list(struct net *net,
 			 * closed already -> ditch it
 			 */
 			nf_ct_put(found_ct);
-			conn_free(list, conn);
+			if (conn_free(list, conn))
+				return true;
 			collected++;
 			continue;
 		}
 
 		nf_ct_put(found_ct);
 		if (collected > CONNCOUNT_GC_MAX_NODES)
-			return;
+			return false;
 	}
+	return false;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
 
+static void __tree_nodes_free(struct rcu_head *h)
+{
+	struct nf_conncount_rb *rbconn;
+
+	rbconn = container_of(h, struct nf_conncount_rb, rcu_head);
+	kmem_cache_free(conncount_rb_cachep, rbconn);
+}
+
 static void tree_nodes_free(struct rb_root *root,
 			    struct nf_conncount_rb *gc_nodes[],
 			    unsigned int gc_count)
@@ -256,23 +308,39 @@ static void tree_nodes_free(struct rb_root *root,
 
 	while (gc_count) {
 		rbconn = gc_nodes[--gc_count];
-		rb_erase(&rbconn->node, root);
-		kmem_cache_free(conncount_rb_cachep, rbconn);
+		spin_lock(&rbconn->list.list_lock);
+		if (rbconn->list.count == 0 && rbconn->list.dead == false) {
+			rbconn->list.dead = true;
+			rb_erase(&rbconn->node, root);
+			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
+		}
+		spin_unlock(&rbconn->list.list_lock);
 	}
 }
 
+static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
+{
+	set_bit(tree, data->pending_trees);
+	schedule_work(&data->gc_work);
+}
+
 static unsigned int
-insert_tree(struct rb_root *root,
+insert_tree(struct net *net,
+	    struct nf_conncount_data *data,
+	    struct rb_root *root,
 	    unsigned int hash,
 	    const u32 *key,
 	    u8 keylen,
 	    const struct nf_conntrack_tuple *tuple,
 	    const struct nf_conntrack_zone *zone)
 {
+	enum nf_conncount_list_add ret;
+	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
 	struct rb_node **rbnode, *parent;
 	struct nf_conncount_rb *rbconn;
 	struct nf_conncount_tuple *conn;
-	unsigned int count = 0;
+	unsigned int count = 0, gc_count = 0;
+	bool node_found = false;
 
 	spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
 
@@ -290,16 +358,44 @@ insert_tree(struct rb_root *root,
 			rbnode = &((*rbnode)->rb_right);
 		} else {
 			/* unlikely: other cpu added node already */
-			if (!nf_conncount_add(&rbconn->list, tuple, zone)) {
+			node_found = true;
+			ret = nf_conncount_add(&rbconn->list, tuple, zone);
+			if (ret == NF_CONNCOUNT_ERR) {
 				count = 0; /* hotdrop */
-				goto out_unlock;
+			} else if (ret == NF_CONNCOUNT_ADDED) {
+				count = rbconn->list.count;
+			} else {
+				/* NF_CONNCOUNT_SKIP, rbconn is already
+				 * reclaimed by gc, insert a new tree node
+				 */
+				node_found = false;
 			}
-
-			count = rbconn->list.count;
-			goto out_unlock;
+			break;
 		}
+
+		if (gc_count >= ARRAY_SIZE(gc_nodes))
+			continue;
+
+		if (nf_conncount_gc_list(net, &rbconn->list))
+			gc_nodes[gc_count++] = rbconn;
+	}
+
+	if (gc_count) {
+		tree_nodes_free(root, gc_nodes, gc_count);
+		/* tree_node_free before new allocation permits
+		 * allocator to re-use newly free'd object.
+		 *
+		 * This is a rare event; in most cases we will find
+		 * existing node to re-use. (or gc_count is 0).
+		 */
+
+		if (gc_count >= ARRAY_SIZE(gc_nodes))
+			schedule_gc_worker(data, hash);
 	}
 
+	if (node_found)
+		goto out_unlock;
+
 	/* expected case: match, insert new node */
 	rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
 	if (rbconn == NULL)
@@ -333,87 +429,97 @@ count_tree(struct net *net,
 	   const struct nf_conntrack_tuple *tuple,
 	   const struct nf_conntrack_zone *zone)
 {
-	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
+	enum nf_conncount_list_add ret;
 	struct rb_root *root;
-	struct rb_node **rbnode, *parent;
+	struct rb_node *parent;
 	struct nf_conncount_rb *rbconn;
-	unsigned int gc_count, hash;
-	bool no_gc = false;
-	unsigned int count = 0;
+	unsigned int hash;
 	u8 keylen = data->keylen;
 
 	hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS;
 	root = &data->root[hash];
 
-	spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
- restart:
-	gc_count = 0;
-	parent = NULL;
-	rbnode = &(root->rb_node);
-	while (*rbnode) {
+	parent = rcu_dereference_raw(root->rb_node);
+	while (parent) {
 		int diff;
 		bool addit;
 
-		rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node);
+		rbconn = rb_entry(parent, struct nf_conncount_rb, node);
 
-		parent = *rbnode;
 		diff = key_diff(key, rbconn->key, keylen);
 		if (diff < 0) {
-			rbnode = &((*rbnode)->rb_left);
+			parent = rcu_dereference_raw(parent->rb_left);
 		} else if (diff > 0) {
-			rbnode = &((*rbnode)->rb_right);
+			parent = rcu_dereference_raw(parent->rb_right);
 		} else {
 			/* same source network -> be counted! */
 			nf_conncount_lookup(net, &rbconn->list, tuple, zone,
 					    &addit);
-			count = rbconn->list.count;
 
-			tree_nodes_free(root, gc_nodes, gc_count);
 			if (!addit)
-				goto out_unlock;
+				return rbconn->list.count;
+
+			ret = nf_conncount_add(&rbconn->list, tuple, zone);
+			if (ret == NF_CONNCOUNT_ERR) {
+				return 0; /* hotdrop */
+			} else if (ret == NF_CONNCOUNT_ADDED) {
+				return rbconn->list.count;
+			} else {
+				/* NF_CONNCOUNT_SKIP, rbconn is already
+				 * reclaimed by gc, insert a new tree node
+				 */
+				break;
+			}
+		}
+	}
 
-			if (!nf_conncount_add(&rbconn->list, tuple, zone))
-				count = 0; /* hotdrop */
-				goto out_unlock;
+	if (!tuple)
+		return 0;
 
-			count++;
-			goto out_unlock;
-		}
+	return insert_tree(net, data, root, hash, key, keylen, tuple, zone);
+}
 
-		if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
-			continue;
+static void tree_gc_worker(struct work_struct *work)
+{
+	struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work);
+	struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn;
+	struct rb_root *root;
+	struct rb_node *node;
+	unsigned int tree, next_tree, gc_count = 0;
+
+	tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS;
+	root = &data->root[tree];
 
-		nf_conncount_gc_list(net, &rbconn->list);
-		if (list_empty(&rbconn->list.head))
+	rcu_read_lock();
+	for (node = rb_first(root); node != NULL; node = rb_next(node)) {
+		rbconn = rb_entry(node, struct nf_conncount_rb, node);
+		if (nf_conncount_gc_list(data->net, &rbconn->list))
 			gc_nodes[gc_count++] = rbconn;
 	}
+	rcu_read_unlock();
+
+	spin_lock_bh(&nf_conncount_locks[tree]);
 
 	if (gc_count) {
-		no_gc = true;
 		tree_nodes_free(root, gc_nodes, gc_count);
-		/* tree_node_free before new allocation permits
-		 * allocator to re-use newly free'd object.
-		 *
-		 * This is a rare event; in most cases we will find
-		 * existing node to re-use. (or gc_count is 0).
-		 */
-		goto restart;
 	}
 
-	count = 0;
-	if (!tuple)
-		goto out_unlock;
+	clear_bit(tree, data->pending_trees);
 
-	spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
-	return insert_tree(root, hash, key, keylen, tuple, zone);
+	next_tree = (tree + 1) % CONNCOUNT_SLOTS;
+	next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS);
 
-out_unlock:
-	spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]);
-	return count;
+	if (next_tree < CONNCOUNT_SLOTS) {
+		data->gc_tree = next_tree;
+		schedule_work(work);
+	}
+
+	spin_unlock_bh(&nf_conncount_locks[tree]);
 }
 
 /* Count and return number of conntrack entries in 'net' with particular 'key'.
  * If 'tuple' is not null, insert it into the accounting data structure.
+ * Call with RCU read lock.
  */
 unsigned int nf_conncount_count(struct net *net,
 				struct nf_conncount_data *data,
@@ -452,6 +558,8 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
 		data->root[i] = RB_ROOT;
 
 	data->keylen = keylen / sizeof(u32);
+	data->net = net;
+	INIT_WORK(&data->gc_work, tree_gc_worker);
 
 	return data;
 }
@@ -487,6 +595,7 @@ void nf_conncount_destroy(struct net *net, unsigned int family,
 {
 	unsigned int i;
 
+	cancel_work_sync(&data->gc_work);
 	nf_ct_netns_put(net, family);
 
 	for (i = 0; i < ARRAY_SIZE(data->root); ++i)
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 37c52ae06741..b90d96ba4a12 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -14,7 +14,6 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 
 struct nft_connlimit {
-	spinlock_t			lock;
 	struct nf_conncount_list	list;
 	u32				limit;
 	bool				invert;
@@ -45,7 +44,6 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 		return;
 	}
 
-	spin_lock_bh(&priv->lock);
 	nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone,
 			    &addit);
 	count = priv->list.count;
@@ -53,14 +51,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 	if (!addit)
 		goto out;
 
-	if (!nf_conncount_add(&priv->list, tuple_ptr, zone)) {
+	if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) {
 		regs->verdict.code = NF_DROP;
-		spin_unlock_bh(&priv->lock);
 		return;
 	}
 	count++;
 out:
-	spin_unlock_bh(&priv->lock);
 
 	if ((count > priv->limit) ^ priv->invert) {
 		regs->verdict.code = NFT_BREAK;
@@ -88,7 +84,6 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
 			invert = true;
 	}
 
-	spin_lock_init(&priv->lock);
 	nf_conncount_list_init(&priv->list);
 	priv->limit	= limit;
 	priv->invert	= invert;
@@ -213,7 +208,6 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
 	struct nft_connlimit *priv_dst = nft_expr_priv(dst);
 	struct nft_connlimit *priv_src = nft_expr_priv(src);
 
-	spin_lock_init(&priv_dst->lock);
 	nf_conncount_list_init(&priv_dst->list);
 	priv_dst->limit	 = priv_src->limit;
 	priv_dst->invert = priv_src->invert;
@@ -232,15 +226,8 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
 {
 	struct nft_connlimit *priv = nft_expr_priv(expr);
-	bool ret;
 
-	spin_lock_bh(&priv->lock);
-	nf_conncount_gc_list(net, &priv->list);
-
-	ret = list_empty(&priv->list.head);
-	spin_unlock_bh(&priv->lock);
-
-	return ret;
+	return nf_conncount_gc_list(net, &priv->list);
 }
 
 static struct nft_expr_type nft_connlimit_type;
-- 
cgit v1.2.3


From ec1b28ca9674def4a158808a6493bdb87b993d81 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 6 Jul 2018 08:25:52 +0300
Subject: ipvs: provide just conn to ip_vs_state_name

In preparation for followup patches, provide just the cp
ptr to ip_vs_state_name.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h              | 2 +-
 net/netfilter/ipvs/ip_vs_conn.c  | 8 ++++----
 net/netfilter/ipvs/ip_vs_proto.c | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a0bec23c6d5e..4d76abcf1c41 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1221,7 +1221,7 @@ struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
 				  struct ip_vs_dest *dest, __u32 fwmark);
 void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
-const char *ip_vs_state_name(__u16 proto, int state);
+const char *ip_vs_state_name(const struct ip_vs_conn *cp);
 
 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
 int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 99e0aa350dc5..de5a64e42ebd 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1107,7 +1107,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 				&cp->caddr.in6, ntohs(cp->cport),
 				&cp->vaddr.in6, ntohs(cp->vport),
 				dbuf, ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_state_name(cp),
 				(cp->timer.expires-jiffies)/HZ, pe_data);
 		else
 #endif
@@ -1118,7 +1118,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 				ntohl(cp->caddr.ip), ntohs(cp->cport),
 				ntohl(cp->vaddr.ip), ntohs(cp->vport),
 				dbuf, ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_state_name(cp),
 				(cp->timer.expires-jiffies)/HZ, pe_data);
 	}
 	return 0;
@@ -1169,7 +1169,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
 				&cp->caddr.in6, ntohs(cp->cport),
 				&cp->vaddr.in6, ntohs(cp->vport),
 				dbuf, ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_state_name(cp),
 				ip_vs_origin_name(cp->flags),
 				(cp->timer.expires-jiffies)/HZ);
 		else
@@ -1181,7 +1181,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
 				ntohl(cp->caddr.ip), ntohs(cp->cport),
 				ntohl(cp->vaddr.ip), ntohs(cp->vport),
 				dbuf, ntohs(cp->dport),
-				ip_vs_state_name(cp->protocol, cp->state),
+				ip_vs_state_name(cp),
 				ip_vs_origin_name(cp->flags),
 				(cp->timer.expires-jiffies)/HZ);
 	}
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index ca880a3ad033..85c446621758 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -193,13 +193,13 @@ ip_vs_create_timeout_table(int *table, int size)
 }
 
 
-const char * ip_vs_state_name(__u16 proto, int state)
+const char *ip_vs_state_name(const struct ip_vs_conn *cp)
 {
-	struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
+	struct ip_vs_protocol *pp = ip_vs_proto_get(cp->protocol);
 
 	if (pp == NULL || pp->state_name == NULL)
-		return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
-	return pp->state_name(state);
+		return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
+	return pp->state_name(cp->state);
 }
 
 
-- 
cgit v1.2.3


From 275411430f892407b885be1de2548b2e632892c3 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 6 Jul 2018 08:25:53 +0300
Subject: ipvs: add assured state for conn templates

cp->state was not used for templates. Add support for state bits
and for the first "assured" bit which indicates that some
connection controlled by this template was established or assured
by the real server. In a followup patch we will use it to drop
templates under SYN attack.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h                   | 16 ++++++++++++++++
 net/netfilter/ipvs/ip_vs_proto.c      | 17 +++++++++++++++--
 net/netfilter/ipvs/ip_vs_proto_sctp.c |  2 ++
 net/netfilter/ipvs/ip_vs_proto_tcp.c  |  2 ++
 net/netfilter/ipvs/ip_vs_proto_udp.c  |  2 ++
 net/netfilter/ipvs/ip_vs_sync.c       | 18 ++++++------------
 6 files changed, 43 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 4d76abcf1c41..a0d2e0bb9a94 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -335,6 +335,11 @@ enum ip_vs_sctp_states {
 	IP_VS_SCTP_S_LAST
 };
 
+/* Connection templates use bits from state */
+#define IP_VS_CTPL_S_NONE		0x0000
+#define IP_VS_CTPL_S_ASSURED		0x0001
+#define IP_VS_CTPL_S_LAST		0x0002
+
 /* Delta sequence info structure
  * Each ip_vs_conn has 2 (output AND input seq. changes).
  * Only used in the VS/NAT.
@@ -1289,6 +1294,17 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 	atomic_inc(&ctl_cp->n_control);
 }
 
+/* Mark our template as assured */
+static inline void
+ip_vs_control_assure_ct(struct ip_vs_conn *cp)
+{
+	struct ip_vs_conn *ct = cp->control;
+
+	if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) &&
+	    (ct->flags & IP_VS_CONN_F_TEMPLATE))
+		ct->state |= IP_VS_CTPL_S_ASSURED;
+}
+
 /* IPVS netns init & cleanup functions */
 int ip_vs_estimator_net_init(struct netns_ipvs *ipvs);
 int ip_vs_control_net_init(struct netns_ipvs *ipvs);
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 85c446621758..54ee84adf0bd 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -42,6 +42,11 @@
 
 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE];
 
+/* States for conn templates: NONE or words separated with ",", max 15 chars */
+static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = {
+	[IP_VS_CTPL_S_NONE]			= "NONE",
+	[IP_VS_CTPL_S_ASSURED]			= "ASSURED",
+};
 
 /*
  *	register an ipvs protocol
@@ -195,11 +200,19 @@ ip_vs_create_timeout_table(int *table, int size)
 
 const char *ip_vs_state_name(const struct ip_vs_conn *cp)
 {
-	struct ip_vs_protocol *pp = ip_vs_proto_get(cp->protocol);
+	unsigned int state = cp->state;
+	struct ip_vs_protocol *pp;
+
+	if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
 
+		if (state >= IP_VS_CTPL_S_LAST)
+			return "ERR!";
+		return ip_vs_ctpl_state_name_table[state] ? : "?";
+	}
+	pp = ip_vs_proto_get(cp->protocol);
 	if (pp == NULL || pp->state_name == NULL)
 		return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!";
-	return pp->state_name(cp->state);
+	return pp->state_name(state);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3250c4a1111e..b0cd7d08f2a7 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -461,6 +461,8 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
 		}
+		if (next_state == IP_VS_SCTP_S_ESTABLISHED)
+			ip_vs_control_assure_ct(cp);
 	}
 	if (likely(pd))
 		cp->timeout = pd->timeout_table[cp->state = next_state];
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 80d10ad12a15..1770fc6ce960 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -569,6 +569,8 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
 		}
+		if (new_state == IP_VS_TCP_S_ESTABLISHED)
+			ip_vs_control_assure_ct(cp);
 	}
 
 	if (likely(pd))
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e0ef11c3691e..0f53c49025f8 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -460,6 +460,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
 	}
 
 	cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
+	if (direction == IP_VS_DIR_OUTPUT)
+		ip_vs_control_assure_ct(cp);
 }
 
 static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 001501e25625..d4020c5e831d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1003,12 +1003,9 @@ static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer
 				continue;
 			}
 		} else {
-			/* protocol in templates is not used for state/timeout */
-			if (state > 0) {
-				IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
-					state);
-				state = 0;
-			}
+			if (state >= IP_VS_CTPL_S_LAST)
+				IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n",
+					  state);
 		}
 
 		ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
@@ -1166,12 +1163,9 @@ static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *m
 			goto out;
 		}
 	} else {
-		/* protocol in templates is not used for state/timeout */
-		if (state > 0) {
-			IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
-				state);
-			state = 0;
-		}
+		if (state >= IP_VS_CTPL_S_LAST)
+			IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n",
+				  state);
 	}
 	if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
 				       pe_data_len, pe_name, pe_name_len)) {
-- 
cgit v1.2.3


From 440534d3c56be04abfb26850ee882d19d223557a Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Mon, 9 Jul 2018 18:06:33 +0800
Subject: netfilter: Remove useless param helper of nf_ct_helper_ext_add

The param helper of nf_ct_helper_ext_add is useless now, then remove
it now.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_helper.h | 4 +---
 net/netfilter/nf_conntrack_core.c           | 3 +--
 net/netfilter/nf_conntrack_helper.c         | 5 ++---
 net/netfilter/nf_conntrack_netlink.c        | 2 +-
 net/netfilter/nft_ct.c                      | 2 +-
 net/netfilter/xt_CT.c                       | 2 +-
 net/openvswitch/conntrack.c                 | 2 +-
 7 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 32c2a94a219d..2492120b8097 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -103,9 +103,7 @@ int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
 void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
 				     unsigned int);
 
-struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
-					  struct nf_conntrack_helper *helper,
-					  gfp_t gfp);
+struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
 
 int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 			      gfp_t flags);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4ced7c7102b6..d97d7e9a9ee7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1401,8 +1401,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 			/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
 			ct->master = exp->master;
 			if (exp->helper) {
-				help = nf_ct_helper_ext_add(ct, exp->helper,
-							    GFP_ATOMIC);
+				help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 				if (help)
 					rcu_assign_pointer(help->helper, exp->helper);
 			}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index a55a58c706a9..d557a425289d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -192,8 +192,7 @@ void nf_conntrack_helper_put(struct nf_conntrack_helper *helper)
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_put);
 
 struct nf_conn_help *
-nf_ct_helper_ext_add(struct nf_conn *ct,
-		     struct nf_conntrack_helper *helper, gfp_t gfp)
+nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
 	struct nf_conn_help *help;
 
@@ -262,7 +261,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 	}
 
 	if (help == NULL) {
-		help = nf_ct_helper_ext_add(ct, helper, flags);
+		help = nf_ct_helper_ext_add(ct, flags);
 		if (help == NULL)
 			return -ENOMEM;
 	} else {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 40152b9ad772..f981bfa8db72 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1947,7 +1947,7 @@ ctnetlink_create_conntrack(struct net *net,
 		} else {
 			struct nf_conn_help *help;
 
-			help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC);
+			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 			if (help == NULL) {
 				err = -ENOMEM;
 				goto err2;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 1435ffc5f57e..3bc82ee5464d 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -870,7 +870,7 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj,
 	if (test_bit(IPS_HELPER_BIT, &ct->status))
 		return;
 
-	help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC);
+	help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 	if (help) {
 		rcu_assign_pointer(help->helper, to_assign);
 		set_bit(IPS_HELPER_BIT, &ct->status);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 03b9a50ec93b..7ba454e9e3fa 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -93,7 +93,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 		return -ENOENT;
 	}
 
-	help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
+	help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
 	if (help == NULL) {
 		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e05bd3e53f0f..3e33c382367f 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1303,7 +1303,7 @@ static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
 		return -EINVAL;
 	}
 
-	help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
+	help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL);
 	if (!help) {
 		nf_conntrack_helper_put(helper);
 		return -ENOMEM;
-- 
cgit v1.2.3


From be2ab5b4d5c0bf041a34ec2e1397d50afbfb095e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 11 Jul 2018 13:45:12 +0200
Subject: netfilter: nf_tables: take module reference when starting a batch

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h | 1 +
 net/netfilter/nf_tables_api.c       | 1 +
 net/netfilter/nfnetlink.c           | 9 +++++++++
 3 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 3ecc3050be0e..4a520d3304a2 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -29,6 +29,7 @@ struct nfnetlink_subsystem {
 	__u8 subsys_id;			/* nfnetlink subsystem ID */
 	__u8 cb_count;			/* number of callbacks */
 	const struct nfnl_callback *cb;	/* callback for individual types */
+	struct module *owner;
 	int (*commit)(struct net *net, struct sk_buff *skb);
 	int (*abort)(struct net *net, struct sk_buff *skb);
 	void (*cleanup)(struct net *net);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 594b395442d6..c16c481fc52a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6603,6 +6603,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
 	.abort		= nf_tables_abort,
 	.cleanup	= nf_tables_cleanup,
 	.valid_genid	= nf_tables_valid_genid,
+	.owner		= THIS_MODULE,
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 94f9bcaa0799..dd1d7bc23b03 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -337,7 +337,14 @@ replay:
 		return kfree_skb(skb);
 	}
 
+	if (!try_module_get(ss->owner)) {
+		nfnl_unlock(subsys_id);
+		netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
+		return kfree_skb(skb);
+	}
+
 	if (!ss->valid_genid(net, genid)) {
+		module_put(ss->owner);
 		nfnl_unlock(subsys_id);
 		netlink_ack(oskb, nlh, -ERESTART, NULL);
 		return kfree_skb(skb);
@@ -472,6 +479,7 @@ done:
 		nfnl_err_reset(&err_list);
 		nfnl_unlock(subsys_id);
 		kfree_skb(skb);
+		module_put(ss->owner);
 		goto replay;
 	} else if (status == NFNL_BATCH_DONE) {
 		err = ss->commit(net, oskb);
@@ -491,6 +499,7 @@ done:
 	nfnl_err_deliver(&err_list, oskb);
 	nfnl_unlock(subsys_id);
 	kfree_skb(skb);
+	module_put(ss->owner);
 }
 
 static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {
-- 
cgit v1.2.3


From f102d66b335a417d4848da9441f585695a838934 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 11 Jul 2018 13:45:14 +0200
Subject: netfilter: nf_tables: use dedicated mutex to guard transactions

Continue to use nftnl subsys mutex to protect (un)registration of hook types,
expressions and so on, but force batch operations to do their own
locking.

This allows distinct net namespaces to perform transactions in parallel.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/nftables.h     |  1 +
 net/netfilter/nf_tables_api.c    | 88 +++++++++++++++++++++++++++++++---------
 net/netfilter/nfnetlink.c        | 10 ++---
 net/netfilter/nft_chain_filter.c |  4 +-
 net/netfilter/nft_dynset.c       |  2 +
 5 files changed, 77 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 94767ea3a490..286fd960896f 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,6 +7,7 @@
 struct netns_nftables {
 	struct list_head	tables;
 	struct list_head	commit_list;
+	struct mutex		commit_mutex;
 	unsigned int		base_seq;
 	u8			gencursor;
 	u8			validate_state;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 68436edd9cdf..c0fb2bcd30fe 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -480,12 +480,19 @@ static void nft_request_module(struct net *net, const char *fmt, ...)
 	if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret))
 		return;
 
-	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	mutex_unlock(&net->nft.commit_mutex);
 	request_module("%s", module_name);
-	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	mutex_lock(&net->nft.commit_mutex);
 }
 #endif
 
+static void lockdep_nfnl_nft_mutex_not_held(void)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+#endif
+}
+
 static const struct nft_chain_type *
 nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
 			    u8 family, bool autoload)
@@ -495,6 +502,8 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
 	type = __nf_tables_chain_type_lookup(nla, family);
 	if (type != NULL)
 		return type;
+
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (autoload) {
 		nft_request_module(net, "nft-chain-%u-%.*s", family,
@@ -802,6 +811,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 	struct nft_ctx ctx;
 	int err;
 
+	lockdep_assert_held(&net->nft.commit_mutex);
 	attr = nla[NFTA_TABLE_NAME];
 	table = nft_table_lookup(net, attr, family, genmask);
 	if (IS_ERR(table)) {
@@ -1042,7 +1052,17 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
 	return ERR_PTR(-ENOENT);
 }
 
-static struct nft_chain *nft_chain_lookup(struct nft_table *table,
+static bool lockdep_commit_lock_is_held(struct net *net)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	return lockdep_is_held(&net->nft.commit_mutex);
+#else
+	return true;
+#endif
+}
+
+static struct nft_chain *nft_chain_lookup(struct net *net,
+					  struct nft_table *table,
 					  const struct nlattr *nla, u8 genmask)
 {
 	char search[NFT_CHAIN_MAXNAMELEN + 1];
@@ -1055,7 +1075,7 @@ static struct nft_chain *nft_chain_lookup(struct nft_table *table,
 	nla_strlcpy(search, nla, sizeof(search));
 
 	WARN_ON(!rcu_read_lock_held() &&
-		!lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+		!lockdep_commit_lock_is_held(net));
 
 	chain = ERR_PTR(-ENOENT);
 	rcu_read_lock();
@@ -1295,7 +1315,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
 		return PTR_ERR(table);
 	}
 
-	chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+	chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask);
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
 		return PTR_ERR(chain);
@@ -1428,6 +1448,9 @@ static int nft_chain_parse_hook(struct net *net,
 	struct net_device *dev;
 	int err;
 
+	lockdep_assert_held(&net->nft.commit_mutex);
+	lockdep_nfnl_nft_mutex_not_held();
+
 	err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
 			       nft_hook_policy, NULL);
 	if (err < 0)
@@ -1662,7 +1685,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 	    nla[NFTA_CHAIN_NAME]) {
 		struct nft_chain *chain2;
 
-		chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask);
+		chain2 = nft_chain_lookup(ctx->net, table,
+					  nla[NFTA_CHAIN_NAME], genmask);
 		if (!IS_ERR(chain2))
 			return -EEXIST;
 	}
@@ -1724,6 +1748,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
+	lockdep_assert_held(&net->nft.commit_mutex);
+
 	table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
 	if (IS_ERR(table)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]);
@@ -1742,7 +1768,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 		}
 		attr = nla[NFTA_CHAIN_HANDLE];
 	} else {
-		chain = nft_chain_lookup(table, attr, genmask);
+		chain = nft_chain_lookup(net, table, attr, genmask);
 		if (IS_ERR(chain)) {
 			if (PTR_ERR(chain) != -ENOENT) {
 				NL_SET_BAD_ATTR(extack, attr);
@@ -1820,7 +1846,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk,
 		chain = nft_chain_lookup_byhandle(table, handle, genmask);
 	} else {
 		attr = nla[NFTA_CHAIN_NAME];
-		chain = nft_chain_lookup(table, attr, genmask);
+		chain = nft_chain_lookup(net, table, attr, genmask);
 	}
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, attr);
@@ -1918,6 +1944,7 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
 	if (type != NULL && try_module_get(type->owner))
 		return type;
 
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (type == NULL) {
 		nft_request_module(net, "nft-expr-%u-%.*s", family,
@@ -2352,7 +2379,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 		return PTR_ERR(table);
 	}
 
-	chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+	chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 		return PTR_ERR(chain);
@@ -2386,6 +2413,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 {
 	struct nft_expr *expr;
 
+	lockdep_assert_held(&ctx->net->nft.commit_mutex);
 	/*
 	 * Careful: some expressions might not be initialized in case this
 	 * is called on error from nf_tables_newrule().
@@ -2476,6 +2504,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	bool create;
 	u64 handle, pos_handle;
 
+	lockdep_assert_held(&net->nft.commit_mutex);
+
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
 	table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
@@ -2484,7 +2514,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 		return PTR_ERR(table);
 	}
 
-	chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+	chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask);
 	if (IS_ERR(chain)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 		return PTR_ERR(chain);
@@ -2684,7 +2714,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
 	}
 
 	if (nla[NFTA_RULE_CHAIN]) {
-		chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask);
+		chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
+					 genmask);
 		if (IS_ERR(chain)) {
 			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 			return PTR_ERR(chain);
@@ -2776,6 +2807,8 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 	const struct nft_set_type *type;
 	u32 flags = 0;
 
+	lockdep_assert_held(&ctx->net->nft.commit_mutex);
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (list_empty(&nf_tables_set_types)) {
 		nft_request_module(ctx->net, "nft-set");
@@ -4820,6 +4853,7 @@ nft_obj_type_get(struct net *net, u32 objtype)
 	if (type != NULL && try_module_get(type->owner))
 		return type;
 
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (type == NULL) {
 		nft_request_module(net, "nft-obj-%u", objtype);
@@ -5379,6 +5413,7 @@ nft_flowtable_type_get(struct net *net, u8 family)
 	if (type != NULL && try_module_get(type->owner))
 		return type;
 
+	lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
 	if (type == NULL) {
 		nft_request_module(net, "nf-flowtable-%u", family);
@@ -6232,9 +6267,9 @@ static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *cha
 	next_genbit = nft_gencursor_next(net);
 
 	g0 = rcu_dereference_protected(chain->rules_gen_0,
-				       lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+				       lockdep_commit_lock_is_held(net));
 	g1 = rcu_dereference_protected(chain->rules_gen_1,
-				       lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+				       lockdep_commit_lock_is_held(net));
 
 	/* No changes to this chain? */
 	if (chain->rules_next == NULL) {
@@ -6442,6 +6477,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
 	nf_tables_commit_release(net);
 	nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+	mutex_unlock(&net->nft.commit_mutex);
 
 	return 0;
 }
@@ -6593,12 +6629,25 @@ static void nf_tables_cleanup(struct net *net)
 
 static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 {
-	return __nf_tables_abort(net);
+	int ret = __nf_tables_abort(net);
+
+	mutex_unlock(&net->nft.commit_mutex);
+
+	return ret;
 }
 
 static bool nf_tables_valid_genid(struct net *net, u32 genid)
 {
-	return genid == 0 || net->nft.base_seq == genid;
+	bool genid_ok;
+
+	mutex_lock(&net->nft.commit_mutex);
+
+	genid_ok = genid == 0 || net->nft.base_seq == genid;
+	if (!genid_ok)
+		mutex_unlock(&net->nft.commit_mutex);
+
+	/* else, commit mutex has to be released by commit or abort function */
+	return genid_ok;
 }
 
 static const struct nfnetlink_subsystem nf_tables_subsys = {
@@ -6937,8 +6986,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 	case NFT_GOTO:
 		if (!tb[NFTA_VERDICT_CHAIN])
 			return -EINVAL;
-		chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN],
-					 genmask);
+		chain = nft_chain_lookup(ctx->net, ctx->table,
+					 tb[NFTA_VERDICT_CHAIN], genmask);
 		if (IS_ERR(chain))
 			return PTR_ERR(chain);
 		if (nft_is_base_chain(chain))
@@ -7183,6 +7232,7 @@ static int __net_init nf_tables_init_net(struct net *net)
 {
 	INIT_LIST_HEAD(&net->nft.tables);
 	INIT_LIST_HEAD(&net->nft.commit_list);
+	mutex_init(&net->nft.commit_mutex);
 	net->nft.base_seq = 1;
 	net->nft.validate_state = NFT_VALIDATE_SKIP;
 
@@ -7191,11 +7241,11 @@ static int __net_init nf_tables_init_net(struct net *net)
 
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
-	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	mutex_lock(&net->nft.commit_mutex);
 	if (!list_empty(&net->nft.commit_list))
 		__nf_tables_abort(net);
 	__nft_release_tables(net);
-	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	mutex_unlock(&net->nft.commit_mutex);
 	WARN_ON_ONCE(!list_empty(&net->nft.tables));
 }
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index dd1d7bc23b03..916913454624 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -350,6 +350,8 @@ replay:
 		return kfree_skb(skb);
 	}
 
+	nfnl_unlock(subsys_id);
+
 	while (skb->len >= nlmsg_total_size(0)) {
 		int msglen, type;
 
@@ -471,13 +473,8 @@ ack:
 	}
 done:
 	if (status & NFNL_BATCH_REPLAY) {
-		const struct nfnetlink_subsystem *ss2;
-
-		ss2 = nfnl_dereference_protected(subsys_id);
-		if (ss2 == ss)
-			ss->abort(net, oskb);
+		ss->abort(net, oskb);
 		nfnl_err_reset(&err_list);
-		nfnl_unlock(subsys_id);
 		kfree_skb(skb);
 		module_put(ss->owner);
 		goto replay;
@@ -497,7 +494,6 @@ done:
 		ss->cleanup(net);
 
 	nfnl_err_deliver(&err_list, oskb);
-	nfnl_unlock(subsys_id);
 	kfree_skb(skb);
 	module_put(ss->owner);
 }
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index d21834bed805..ea5b7c4944f6 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -322,7 +322,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 	if (!ctx.net)
 		return NOTIFY_DONE;
 
-	nfnl_lock(NFNL_SUBSYS_NFTABLES);
+	mutex_lock(&ctx.net->nft.commit_mutex);
 	list_for_each_entry(table, &ctx.net->nft.tables, list) {
 		if (table->family != NFPROTO_NETDEV)
 			continue;
@@ -337,7 +337,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 			nft_netdev_event(event, dev, &ctx);
 		}
 	}
-	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	mutex_unlock(&ctx.net->nft.commit_mutex);
 	put_net(ctx.net);
 
 	return NOTIFY_DONE;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 27d7e4598ab6..81184c244d1a 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -118,6 +118,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	u64 timeout;
 	int err;
 
+	lockdep_assert_held(&ctx->net->nft.commit_mutex);
+
 	if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
 	    tb[NFTA_DYNSET_OP] == NULL ||
 	    tb[NFTA_DYNSET_SREG_KEY] == NULL)
-- 
cgit v1.2.3


From 7d25f8851a2c03319bfa8e56bb40bde2c4621392 Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Thu, 12 Jul 2018 17:48:06 +0200
Subject: netfilter: nft_socket: Expose socket mark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  4 +++-
 net/netfilter/nft_socket.c               | 11 +++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 89438e68dc03..f466860bcf75 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -921,10 +921,12 @@ enum nft_socket_attributes {
 /*
  * enum nft_socket_keys - nf_tables socket expression keys
  *
- * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_
+ * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option
+ * @NFT_SOCKET_MARK: Value of the socket mark
  */
 enum nft_socket_keys {
 	NFT_SOCKET_TRANSPARENT,
+	NFT_SOCKET_MARK,
 	__NFT_SOCKET_MAX
 };
 #define NFT_SOCKET_MAX	(__NFT_SOCKET_MAX - 1)
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 622ac2012a40..d7f3776dfd71 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -54,6 +54,14 @@ static void nft_socket_eval(const struct nft_expr *expr,
 	case NFT_SOCKET_TRANSPARENT:
 		nft_reg_store8(dest, inet_sk_transparent(sk));
 		break;
+	case NFT_SOCKET_MARK:
+		if (sk_fullsock(sk)) {
+			*dest = sk->sk_mark;
+		} else {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		break;
 	default:
 		WARN_ON(1);
 		regs->verdict.code = NFT_BREAK;
@@ -91,6 +99,9 @@ static int nft_socket_init(const struct nft_ctx *ctx,
 	case NFT_SOCKET_TRANSPARENT:
 		len = sizeof(u8);
 		break;
+	case NFT_SOCKET_MARK:
+		len = sizeof(u32);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 70b095c84326640eeacfd69a411db8fc36e8ab1a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 14 Jul 2018 01:14:01 +0200
Subject: ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module

IPV6=m
DEFRAG_IPV6=m
CONNTRACK=y yields:

net/netfilter/nf_conntrack_proto.o: In function `nf_ct_netns_do_get':
net/netfilter/nf_conntrack_proto.c:802: undefined reference to `nf_defrag_ipv6_enable'
net/netfilter/nf_conntrack_proto.o:(.rodata+0x640): undefined reference to `nf_conntrack_l4proto_icmpv6'

Setting DEFRAG_IPV6=y causes undefined references to ip6_rhash_params
ip6_frag_init and ip6_expire_frag_queue so it would be needed to force
IPV6=y too.

This patch gets rid of the 'followup linker error' by removing
the dependency of ipv6.ko symbols from netfilter ipv6 defrag.

Shared code is placed into a header, then used from both.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ipv6.h                        |  28 --------
 include/net/ipv6_frag.h                   | 104 ++++++++++++++++++++++++++++++
 net/ieee802154/6lowpan/reassembly.c       |   2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c   |  17 +++--
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c |   3 +-
 net/ipv6/reassembly.c                     |  92 ++------------------------
 net/openvswitch/conntrack.c               |   1 +
 7 files changed, 126 insertions(+), 121 deletions(-)
 create mode 100644 include/net/ipv6_frag.h

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index aa6fd11a887c..3720958cd4e1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -581,34 +581,6 @@ static inline bool ipv6_prefix_equal(const struct in6_addr *addr1,
 }
 #endif
 
-struct inet_frag_queue;
-
-enum ip6_defrag_users {
-	IP6_DEFRAG_LOCAL_DELIVER,
-	IP6_DEFRAG_CONNTRACK_IN,
-	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
-	IP6_DEFRAG_CONNTRACK_OUT,
-	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
-	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
-	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
-};
-
-void ip6_frag_init(struct inet_frag_queue *q, const void *a);
-extern const struct rhashtable_params ip6_rhash_params;
-
-/*
- *	Equivalent of ipv4 struct ip
- */
-struct frag_queue {
-	struct inet_frag_queue	q;
-
-	int			iif;
-	__u16			nhoffset;
-	u8			ecn;
-};
-
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq);
-
 static inline bool ipv6_addr_any(const struct in6_addr *a)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
new file mode 100644
index 000000000000..6ced1e6899b6
--- /dev/null
+++ b/include/net/ipv6_frag.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _IPV6_FRAG_H
+#define _IPV6_FRAG_H
+#include <linux/kernel.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+#include <net/inet_frag.h>
+
+enum ip6_defrag_users {
+	IP6_DEFRAG_LOCAL_DELIVER,
+	IP6_DEFRAG_CONNTRACK_IN,
+	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
+	IP6_DEFRAG_CONNTRACK_OUT,
+	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
+	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
+	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+};
+
+/*
+ *	Equivalent of ipv4 struct ip
+ */
+struct frag_queue {
+	struct inet_frag_queue	q;
+
+	int			iif;
+	__u16			nhoffset;
+	u8			ecn;
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline void ip6frag_init(struct inet_frag_queue *q, const void *a)
+{
+	struct frag_queue *fq = container_of(q, struct frag_queue, q);
+	const struct frag_v6_compare_key *key = a;
+
+	q->key.v6 = *key;
+	fq->ecn = 0;
+}
+
+static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed)
+{
+	return jhash2(data,
+		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct inet_frag_queue *fq = data;
+
+	return jhash2((const u32 *)&fq->key.v6,
+		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static inline int
+ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+	const struct frag_v6_compare_key *key = arg->key;
+	const struct inet_frag_queue *fq = ptr;
+
+	return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static inline void
+ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
+{
+	struct net_device *dev = NULL;
+	struct sk_buff *head;
+
+	rcu_read_lock();
+	spin_lock(&fq->q.lock);
+
+	if (fq->q.flags & INET_FRAG_COMPLETE)
+		goto out;
+
+	inet_frag_kill(&fq->q);
+
+	dev = dev_get_by_index_rcu(net, fq->iif);
+	if (!dev)
+		goto out;
+
+	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
+
+	/* Don't send error if the first segment did not arrive. */
+	head = fq->q.fragments;
+	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+		goto out;
+
+	head->dev = dev;
+	skb_get(head);
+	spin_unlock(&fq->q.lock);
+
+	icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+	kfree_skb(head);
+	goto out_rcu_unlock;
+
+out:
+	spin_unlock(&fq->q.lock);
+out_rcu_unlock:
+	rcu_read_unlock();
+	inet_frag_put(&fq->q);
+}
+#endif
+#endif
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 2cc224106b69..ec7a5da56129 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -25,7 +25,7 @@
 
 #include <net/ieee802154_netdev.h>
 #include <net/6lowpan.h>
-#include <net/ipv6.h>
+#include <net/ipv6_frag.h>
 #include <net/inet_frag.h>
 
 #include "6lowpan_i.h"
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index a452d99c9f52..333ee3256964 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -33,9 +33,8 @@
 
 #include <net/sock.h>
 #include <net/snmp.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
 
-#include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/transp_v6.h>
 #include <net/rawv6.h>
@@ -151,7 +150,7 @@ static void nf_ct_frag6_expire(struct timer_list *t)
 	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, nf_frag.frags);
 
-	ip6_expire_frag_queue(net, fq);
+	ip6frag_expire_frag_queue(net, fq);
 }
 
 /* Creation primitives. */
@@ -622,16 +621,24 @@ static struct pernet_operations nf_ct_net_ops = {
 	.exit = nf_ct_net_exit,
 };
 
+static const struct rhashtable_params nfct_rhash_params = {
+	.head_offset		= offsetof(struct inet_frag_queue, node),
+	.hashfn			= ip6frag_key_hashfn,
+	.obj_hashfn		= ip6frag_obj_hashfn,
+	.obj_cmpfn		= ip6frag_obj_cmpfn,
+	.automatic_shrinking	= true,
+};
+
 int nf_ct_frag6_init(void)
 {
 	int ret = 0;
 
-	nf_frags.constructor = ip6_frag_init;
+	nf_frags.constructor = ip6frag_init;
 	nf_frags.destructor = NULL;
 	nf_frags.qsize = sizeof(struct frag_queue);
 	nf_frags.frag_expire = nf_ct_frag6_expire;
 	nf_frags.frags_cache_name = nf_frags_cache_name;
-	nf_frags.rhash_params = ip6_rhash_params;
+	nf_frags.rhash_params = nfct_rhash_params;
 	ret = inet_frags_init(&nf_frags);
 	if (ret)
 		goto out;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index e631be25337e..72dd3e202375 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -14,8 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/icmp.h>
 #include <linux/sysctl.h>
-#include <net/ipv6.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
 
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_bridge.h>
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index b939b94e7e91..6edd2ac8ae4b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -57,7 +57,7 @@
 #include <net/rawv6.h>
 #include <net/ndisc.h>
 #include <net/addrconf.h>
-#include <net/inet_frag.h>
+#include <net/ipv6_frag.h>
 #include <net/inet_ecn.h>
 
 static const char ip6_frag_cache_name[] = "ip6-frags";
@@ -72,61 +72,6 @@ static struct inet_frags ip6_frags;
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			  struct net_device *dev);
 
-void ip6_frag_init(struct inet_frag_queue *q, const void *a)
-{
-	struct frag_queue *fq = container_of(q, struct frag_queue, q);
-	const struct frag_v6_compare_key *key = a;
-
-	q->key.v6 = *key;
-	fq->ecn = 0;
-}
-EXPORT_SYMBOL(ip6_frag_init);
-
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
-{
-	struct net_device *dev = NULL;
-	struct sk_buff *head;
-
-	rcu_read_lock();
-	spin_lock(&fq->q.lock);
-
-	if (fq->q.flags & INET_FRAG_COMPLETE)
-		goto out;
-
-	inet_frag_kill(&fq->q);
-
-	dev = dev_get_by_index_rcu(net, fq->iif);
-	if (!dev)
-		goto out;
-
-	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
-
-	/* Don't send error if the first segment did not arrive. */
-	head = fq->q.fragments;
-	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
-		goto out;
-
-	/* But use as source device on which LAST ARRIVED
-	 * segment was received. And do not use fq->dev
-	 * pointer directly, device might already disappeared.
-	 */
-	head->dev = dev;
-	skb_get(head);
-	spin_unlock(&fq->q.lock);
-
-	icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-	kfree_skb(head);
-	goto out_rcu_unlock;
-
-out:
-	spin_unlock(&fq->q.lock);
-out_rcu_unlock:
-	rcu_read_unlock();
-	inet_frag_put(&fq->q);
-}
-EXPORT_SYMBOL(ip6_expire_frag_queue);
-
 static void ip6_frag_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
@@ -136,7 +81,7 @@ static void ip6_frag_expire(struct timer_list *t)
 	fq = container_of(frag, struct frag_queue, q);
 	net = container_of(fq->q.net, struct net, ipv6.frags);
 
-	ip6_expire_frag_queue(net, fq);
+	ip6frag_expire_frag_queue(net, fq);
 }
 
 static struct frag_queue *
@@ -696,42 +641,19 @@ static struct pernet_operations ip6_frags_ops = {
 	.exit = ipv6_frags_exit_net,
 };
 
-static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
-{
-	return jhash2(data,
-		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
-}
-
-static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
-{
-	const struct inet_frag_queue *fq = data;
-
-	return jhash2((const u32 *)&fq->key.v6,
-		      sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
-}
-
-static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
-{
-	const struct frag_v6_compare_key *key = arg->key;
-	const struct inet_frag_queue *fq = ptr;
-
-	return !!memcmp(&fq->key, key, sizeof(*key));
-}
-
-const struct rhashtable_params ip6_rhash_params = {
+static const struct rhashtable_params ip6_rhash_params = {
 	.head_offset		= offsetof(struct inet_frag_queue, node),
-	.hashfn			= ip6_key_hashfn,
-	.obj_hashfn		= ip6_obj_hashfn,
-	.obj_cmpfn		= ip6_obj_cmpfn,
+	.hashfn			= ip6frag_key_hashfn,
+	.obj_hashfn		= ip6frag_obj_hashfn,
+	.obj_cmpfn		= ip6frag_obj_cmpfn,
 	.automatic_shrinking	= true,
 };
-EXPORT_SYMBOL(ip6_rhash_params);
 
 int __init ipv6_frag_init(void)
 {
 	int ret;
 
-	ip6_frags.constructor = ip6_frag_init;
+	ip6_frags.constructor = ip6frag_init;
 	ip6_frags.destructor = NULL;
 	ip6_frags.qsize = sizeof(struct frag_queue);
 	ip6_frags.frag_expire = ip6_frag_expire;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3e33c382367f..86a75105af1a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -26,6 +26,7 @@
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/ipv6_frag.h>
 
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <linux/netfilter/nf_nat.h>
-- 
cgit v1.2.3


From 24c458c485c87eef97e91d2e180f222555528b11 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Sat, 14 Jul 2018 16:50:59 +0200
Subject: netfilter: nf_osf: add missing definitions to header file

Add missing definitions from nf_osf.h in order to extract Passive OS
fingerprint infrastructure from xt_osf.

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_osf.h | 11 +++++++++++
 include/uapi/linux/netfilter/xt_osf.h | 10 ++--------
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
index 8f2f2f403183..3738116b2bbe 100644
--- a/include/uapi/linux/netfilter/nf_osf.h
+++ b/include/uapi/linux/netfilter/nf_osf.h
@@ -16,9 +16,14 @@
 
 #define NF_OSF_TTL_TRUE			0	/* True ip and fingerprint TTL comparison */
 
+/* Check if ip TTL is less than fingerprint one */
+#define NF_OSF_TTL_LESS			1
+
 /* Do not compare ip and fingerprint TTL at all */
 #define NF_OSF_TTL_NOCHECK		2
 
+#define NF_OSF_FLAGMASK		(NF_OSF_GENRE | NF_OSF_TTL | \
+				 NF_OSF_LOG | NF_OSF_INVERT)
 /* Wildcard MSS (kind of).
  * It is used to implement a state machine for the different wildcard values
  * of the MSS and window sizes.
@@ -83,4 +88,10 @@ enum iana_options {
 	OSFOPT_EMPTY = 255,
 };
 
+enum nf_osf_attr_type {
+	OSF_ATTR_UNSPEC,
+	OSF_ATTR_FINGER,
+	OSF_ATTR_MAX,
+};
+
 #endif /* _NF_OSF_H */
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index 72956eceeb09..b189007f4f28 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -37,8 +37,7 @@
 
 #define XT_OSF_TTL_TRUE		NF_OSF_TTL_TRUE
 #define XT_OSF_TTL_NOCHECK	NF_OSF_TTL_NOCHECK
-
-#define XT_OSF_TTL_LESS	1	/* Check if ip TTL is less than fingerprint one */
+#define XT_OSF_TTL_LESS		NF_OSF_TTL_LESS
 
 #define xt_osf_wc		nf_osf_wc
 #define xt_osf_opt		nf_osf_opt
@@ -47,6 +46,7 @@
 #define xt_osf_finger		nf_osf_finger
 #define xt_osf_nlmsg		nf_osf_nlmsg
 
+#define xt_osf_attr_type	nf_osf_attr_type
 /*
  * Add/remove fingerprint from the kernel.
  */
@@ -56,10 +56,4 @@ enum xt_osf_msg_types {
 	OSF_MSG_MAX,
 };
 
-enum xt_osf_attr_type {
-	OSF_ATTR_UNSPEC,
-	OSF_ATTR_FINGER,
-	OSF_ATTR_MAX,
-};
-
 #endif				/* _XT_OSF_H */
-- 
cgit v1.2.3


From dba31ee759417ef1a952e929524b0cca1751c036 Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.vnet.ibm.com>
Date: Mon, 4 Jun 2018 16:54:55 -0400
Subject: ima: Differentiate auditing policy rules from "audit" actions

The AUDIT_INTEGRITY_RULE is used for auditing IMA policy rules and
the IMA "audit" policy action.  This patch defines
AUDIT_INTEGRITY_POLICY_RULE to reflect the IMA policy rules.

Since we defined a new message type we can now also pass the
audit_context and get an associated SYSCALL record. This now produces
the following records when parsing IMA policy's rules:

type=UNKNOWN[1807] msg=audit(1527888965.738:320): action=audit \
  func=MMAP_CHECK mask=MAY_EXEC res=1
type=UNKNOWN[1807] msg=audit(1527888965.738:320): action=audit \
  func=FILE_CHECK mask=MAY_READ res=1
type=SYSCALL msg=audit(1527888965.738:320): arch=c000003e syscall=1 \
  success=yes exit=17 a0=1 a1=55bcfcca9030 a2=11 a3=7fcc1b55fb38 \
  items=0 ppid=1567 pid=1601 auid=0 uid=0 gid=0 euid=0 suid=0 \
  fsuid=0 egid=0 sgid=0 fsgid=0 tty=tty2 ses=2 comm="echo" \
  exe="/usr/bin/echo" \
  subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null)

Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 include/uapi/linux/audit.h          | 1 +
 security/integrity/ima/ima_policy.c | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index c35aee9ad4a6..cf2bad8d7873 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -148,6 +148,7 @@
 #define AUDIT_INTEGRITY_PCR	    1804 /* PCR invalidation msgs */
 #define AUDIT_INTEGRITY_RULE	    1805 /* policy rule */
 #define AUDIT_INTEGRITY_EVM_XATTR   1806 /* New EVM-covered xattr */
+#define AUDIT_INTEGRITY_POLICY_RULE 1807 /* IMA policy rules */
 
 #define AUDIT_KERNEL		2000	/* Asynchronous audit record. NOT A REQUEST. */
 
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 0178bdaa40aa..8c9499867c91 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -681,8 +681,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 	bool uid_token;
 	int result = 0;
 
-	ab = integrity_audit_log_start(NULL, GFP_KERNEL,
-				       AUDIT_INTEGRITY_RULE);
+	ab = integrity_audit_log_start(audit_context(), GFP_KERNEL,
+				       AUDIT_INTEGRITY_POLICY_RULE);
 
 	entry->uid = INVALID_UID;
 	entry->fowner = INVALID_UID;
-- 
cgit v1.2.3


From e2861fa71641c6414831d628a1f4f793b6562580 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@google.com>
Date: Fri, 8 Jun 2018 14:57:42 -0700
Subject: evm: Don't deadlock if a crypto algorithm is unavailable

When EVM attempts to appraise a file signed with a crypto algorithm the
kernel doesn't have support for, it will cause the kernel to trigger a
module load. If the EVM policy includes appraisal of kernel modules this
will in turn call back into EVM - since EVM is holding a lock until the
crypto initialisation is complete, this triggers a deadlock. Add a
CRYPTO_NOLOAD flag and skip module loading if it's set, and add that flag
in the EVM case in order to fail gracefully with an error message
instead of deadlocking.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 crypto/api.c                        | 2 +-
 include/linux/crypto.h              | 5 +++++
 security/integrity/evm/evm_crypto.c | 3 ++-
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/crypto/api.c b/crypto/api.c
index 0ee632bba064..7aca9f86c5f3 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -229,7 +229,7 @@ static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
 	mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
 
 	alg = crypto_alg_lookup(name, type, mask);
-	if (!alg) {
+	if (!alg && !(mask & CRYPTO_NOLOAD)) {
 		request_module("crypto-%s", name);
 
 		if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask &
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 6eb06101089f..e8839d3a7559 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -112,6 +112,11 @@
  */
 #define CRYPTO_ALG_OPTIONAL_KEY		0x00004000
 
+/*
+ * Don't trigger module loading
+ */
+#define CRYPTO_NOLOAD			0x00008000
+
 /*
  * Transform masks and values (for crt_flags).
  */
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index b60524310855..c20e3142b541 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -97,7 +97,8 @@ static struct shash_desc *init_desc(char type)
 		mutex_lock(&mutex);
 		if (*tfm)
 			goto out;
-		*tfm = crypto_alloc_shash(algo, 0, CRYPTO_ALG_ASYNC);
+		*tfm = crypto_alloc_shash(algo, 0,
+					  CRYPTO_ALG_ASYNC | CRYPTO_NOLOAD);
 		if (IS_ERR(*tfm)) {
 			rc = PTR_ERR(*tfm);
 			pr_err("Can not allocate %s (reason: %ld)\n", algo, rc);
-- 
cgit v1.2.3


From 6eb864c1d9dd1ef32b88e03c3f49d8be0dab7dcf Mon Sep 17 00:00:00 2001
From: Mikhail Kurinnoi <viewizard@viewizard.com>
Date: Wed, 27 Jun 2018 16:33:42 +0300
Subject: integrity: prevent deadlock during digsig verification.

This patch aimed to prevent deadlock during digsig verification.The point
of issue - user space utility modprobe and/or it's dependencies (ld-*.so,
libz.so.*, libc-*.so and /lib/modules/ files) that could be used for
kernel modules load during digsig verification and could be signed by
digsig in the same time.

First at all, look at crypto_alloc_tfm() work algorithm:
crypto_alloc_tfm() will first attempt to locate an already loaded
algorithm. If that fails and the kernel supports dynamically loadable
modules, it will then attempt to load a module of the same name or alias.
If that fails it will send a query to any loaded crypto manager to
construct an algorithm on the fly.

We have situation, when public_key_verify_signature() in case of RSA
algorithm use alg_name to store internal information in order to construct
an algorithm on the fly, but crypto_larval_lookup() will try to use
alg_name in order to load kernel module with same name.

1) we can't do anything with crypto module work, since it designed to work
exactly in this way;
2) we can't globally filter module requests for modprobe, since it
designed to work with any requests.

In this patch, I propose add an exception for "crypto-pkcs1pad(rsa,*)"
module requests only in case of enabled integrity asymmetric keys support.
Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules for
sure, we are safe to fail such module request from crypto_larval_lookup().
In this way we prevent modprobe execution during digsig verification and
avoid possible deadlock if modprobe and/or it's dependencies also signed
with digsig.

Requested "crypto-pkcs1pad(rsa,*)" kernel module name formed by:
1) "pkcs1pad(rsa,%s)" in public_key_verify_signature();
2) "crypto-%s" / "crypto-%s-all" in crypto_larval_lookup().
"crypto-pkcs1pad(rsa," part of request is a constant and unique and could
be used as filter.

Signed-off-by: Mikhail Kurinnoi <viewizard@viewizard.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>

 include/linux/integrity.h              | 13 +++++++++++++
 security/integrity/digsig_asymmetric.c | 23 +++++++++++++++++++++++
 security/security.c                    |  7 ++++++-
 3 files changed, 42 insertions(+), 1 deletion(-)
---
 include/linux/integrity.h              | 13 +++++++++++++
 security/integrity/digsig_asymmetric.c | 23 +++++++++++++++++++++++
 security/security.c                    |  7 ++++++-
 3 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/integrity.h b/include/linux/integrity.h
index 858d3f4a2241..54c853ec2fd1 100644
--- a/include/linux/integrity.h
+++ b/include/linux/integrity.h
@@ -44,4 +44,17 @@ static inline void integrity_load_keys(void)
 }
 #endif /* CONFIG_INTEGRITY */
 
+#ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS
+
+extern int integrity_kernel_module_request(char *kmod_name);
+
+#else
+
+static inline int integrity_kernel_module_request(char *kmod_name)
+{
+	return 0;
+}
+
+#endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */
+
 #endif /* _LINUX_INTEGRITY_H */
diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index ab6a029062a1..6dc075144508 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -115,3 +115,26 @@ int asymmetric_verify(struct key *keyring, const char *sig,
 	pr_debug("%s() = %d\n", __func__, ret);
 	return ret;
 }
+
+/**
+ * integrity_kernel_module_request - prevent crypto-pkcs1pad(rsa,*) requests
+ * @kmod_name: kernel module name
+ *
+ * We have situation, when public_key_verify_signature() in case of RSA
+ * algorithm use alg_name to store internal information in order to
+ * construct an algorithm on the fly, but crypto_larval_lookup() will try
+ * to use alg_name in order to load kernel module with same name.
+ * Since we don't have any real "crypto-pkcs1pad(rsa,*)" kernel modules,
+ * we are safe to fail such module request from crypto_larval_lookup().
+ *
+ * In this way we prevent modprobe execution during digsig verification
+ * and avoid possible deadlock if modprobe and/or it's dependencies
+ * also signed with digsig.
+ */
+int integrity_kernel_module_request(char *kmod_name)
+{
+	if (strncmp(kmod_name, "crypto-pkcs1pad(rsa,", 20) == 0)
+		return -EINVAL;
+
+	return 0;
+}
diff --git a/security/security.c b/security/security.c
index b49ee810371b..dbca03d3629b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1032,7 +1032,12 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 
 int security_kernel_module_request(char *kmod_name)
 {
-	return call_int_hook(kernel_module_request, 0, kmod_name);
+	int ret;
+
+	ret = call_int_hook(kernel_module_request, 0, kmod_name);
+	if (ret)
+		return ret;
+	return integrity_kernel_module_request(kmod_name);
 }
 
 int security_kernel_read_file(struct file *file, enum kernel_read_file_id id)
-- 
cgit v1.2.3


From bb4b894addb09a069c072a0a032f644cc470d17f Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Fri, 13 Jul 2018 16:36:28 +0100
Subject: ASoC: core: add support to card re-bind using component framework

This patch aims at achieving dynamic behaviour of audio card when
the dependent components disappear and reappear.

With this patch the card is removed if any of the dependent component
is removed and card is added back if the dependent component comes back.
All this is done using component framework and matching based on
component name.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  7 ++++++
 sound/soc/soc-core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index a4915148f739..a23ecdf3eff1 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -17,6 +17,7 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
+#include <linux/component.h>
 #include <linux/regmap.h>
 #include <linux/log2.h>
 #include <sound/core.h>
@@ -1090,6 +1091,12 @@ struct snd_soc_card {
 
 	struct work_struct deferred_resume_work;
 
+	/* component framework related */
+	bool components_added;
+	/* set in machine driver to enable/disable auto re-binding */
+	bool auto_bind;
+	struct component_match *match;
+
 	/* lists of probed devices belonging to this card */
 	struct list_head component_dev_list;
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 3be0310d5c81..08e189485009 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -279,11 +279,28 @@ static inline void snd_soc_debugfs_exit(void)
 
 #endif
 
+static int snd_soc_card_comp_compare(struct device *dev, void *data)
+{
+	struct snd_soc_component *component;
+
+	lockdep_assert_held(&client_mutex);
+	list_for_each_entry(component, &component_list, list) {
+		if (dev == component->dev) {
+			if (!strcmp(component->name, data))
+				return 1;
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static int snd_soc_rtdcom_add(struct snd_soc_pcm_runtime *rtd,
 			      struct snd_soc_component *component)
 {
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_rtdcom_list *new_rtdcom;
+	char *cname;
 
 	for_each_rtdcom(rtd, rtdcom) {
 		/* already connected */
@@ -300,6 +317,13 @@ static int snd_soc_rtdcom_add(struct snd_soc_pcm_runtime *rtd,
 
 	list_add_tail(&new_rtdcom->list, &rtd->component_list);
 
+	if (rtd->card->auto_bind && !rtd->card->components_added) {
+		cname = devm_kasprintf(rtd->card->dev, GFP_KERNEL,
+				       "%s", component->name);
+		component_match_add(rtd->card->dev, &rtd->card->match,
+				    snd_soc_card_comp_compare, cname);
+	}
+
 	return 0;
 }
 
@@ -835,6 +859,25 @@ static bool soc_is_dai_link_bound(struct snd_soc_card *card,
 	return false;
 }
 
+static int snd_soc_card_comp_bind(struct device *dev)
+{
+	struct snd_soc_card *card = dev_get_drvdata(dev);
+
+	if (card->instantiated)
+		return 0;
+
+	return snd_soc_register_card(card);
+}
+
+static void snd_soc_card_comp_unbind(struct device *dev)
+{
+}
+
+static const struct component_master_ops snd_soc_card_comp_ops = {
+	.bind = snd_soc_card_comp_bind,
+	.unbind = snd_soc_card_comp_unbind,
+};
+
 static int soc_bind_dai_link(struct snd_soc_card *card,
 	struct snd_soc_dai_link *dai_link)
 {
@@ -2126,6 +2169,12 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 
 	card->instantiated = 1;
 	snd_soc_dapm_sync(&card->dapm);
+	if (card->auto_bind && !card->components_added) {
+		component_master_add_with_match(card->dev,
+						&snd_soc_card_comp_ops,
+						card->match);
+		card->components_added = true;
+	}
 	mutex_unlock(&card->mutex);
 	mutex_unlock(&client_mutex);
 
@@ -2749,6 +2798,9 @@ int snd_soc_unregister_card(struct snd_soc_card *card)
 		dev_dbg(card->dev, "ASoC: Unregistered card '%s'\n", card->name);
 	}
 
+	if (!card->auto_bind && card->components_added)
+		component_master_del(card->dev, &snd_soc_card_comp_ops);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snd_soc_unregister_card);
@@ -3161,8 +3213,17 @@ int snd_soc_add_component(struct device *dev,
 
 	snd_soc_component_add(component);
 
+	ret = component_add(dev, NULL);
+	if (ret < 0) {
+		dev_err(dev, "ASoC: Failed to add Component: %d\n", ret);
+		goto err_comp;
+	}
+
 	return 0;
 
+err_comp:
+	soc_remove_component(component);
+	snd_soc_unregister_dais(component);
 err_cleanup:
 	snd_soc_component_cleanup(component);
 err_free:
@@ -3210,6 +3271,7 @@ static int __snd_soc_unregister_component(struct device *dev)
 	mutex_unlock(&client_mutex);
 
 	if (found) {
+		component_del(dev, NULL);
 		snd_soc_component_cleanup(component);
 	}
 
-- 
cgit v1.2.3


From d29ab6e1fa21ebc2a8a771015dd9e0e5d4e28dc1 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 13 Jul 2018 12:41:10 -0700
Subject: bpf: bpf_prog_array_alloc() should return a generic non-rcu pointer

Currently the return type of the bpf_prog_array_alloc() is
struct bpf_prog_array __rcu *, which is not quite correct.
Obviously, the returned pointer is a generic pointer, which
is valid for an indefinite amount of time and it's not shared
with anyone else, so there is no sense in marking it as __rcu.

This change eliminate the following sparse warnings:
kernel/bpf/core.c:1544:31: warning: incorrect type in return expression (different address spaces)
kernel/bpf/core.c:1544:31:    expected struct bpf_prog_array [noderef] <asn:4>*
kernel/bpf/core.c:1544:31:    got void *
kernel/bpf/core.c:1548:17: warning: incorrect type in return expression (different address spaces)
kernel/bpf/core.c:1548:17:    expected struct bpf_prog_array [noderef] <asn:4>*
kernel/bpf/core.c:1548:17:    got struct bpf_prog_array *<noident>
kernel/bpf/core.c:1681:15: warning: incorrect type in assignment (different address spaces)
kernel/bpf/core.c:1681:15:    expected struct bpf_prog_array *array
kernel/bpf/core.c:1681:15:    got struct bpf_prog_array [noderef] <asn:4>*

Fixes: 324bda9e6c5a ("bpf: multi program support for cgroup+bpf")
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h | 2 +-
 kernel/bpf/core.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8827e797ff97..943fb08d8287 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -352,7 +352,7 @@ struct bpf_prog_array {
 	struct bpf_prog *progs[0];
 };
 
-struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
+struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
 void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
 int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
 int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1e5625d46414..253aa8e79c7b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1538,7 +1538,7 @@ static struct {
 	.null_prog = NULL,
 };
 
-struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
+struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 {
 	if (prog_cnt)
 		return kzalloc(sizeof(struct bpf_prog_array) +
-- 
cgit v1.2.3


From 09728266b6f99ab57cd4f84f3eead65b7b65dbf7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 17 Jul 2018 10:53:23 -0700
Subject: bpf: offload: rename bpf_offload_dev_match() to
 bpf_offload_prog_map_match()

A set of new API functions exported for the drivers will soon use
'bpf_offload_dev_' as a prefix.  Rename the bpf_offload_dev_match()
which is internal to the core (used by the verifier) to avoid any
confusion.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h   | 2 +-
 kernel/bpf/offload.c  | 2 +-
 kernel/bpf/verifier.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 943fb08d8287..9b010d9129f3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -648,7 +648,7 @@ int bpf_map_offload_delete_elem(struct bpf_map *map, void *key);
 int bpf_map_offload_get_next_key(struct bpf_map *map,
 				 void *key, void *next_key);
 
-bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index ac747d5cf7c6..6184e48703f4 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -468,7 +468,7 @@ int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
 	return 0;
 }
 
-bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
 {
 	struct bpf_offloaded_map *offmap;
 	struct bpf_prog_offload *offload;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9e2bf834f13a..15d69b278277 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5054,7 +5054,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 	}
 
 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
-	    !bpf_offload_dev_match(prog, map)) {
+	    !bpf_offload_prog_map_match(prog, map)) {
 		verbose(env, "offload device mismatch between prog and map\n");
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 9fd7c5559165f4c679b40c5e6ad442955832dfad Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 17 Jul 2018 10:53:24 -0700
Subject: bpf: offload: aggregate offloads per-device

Currently we have two lists of offloaded objects - programs and maps.
Netdevice unregister notifier scans those lists to orphan objects
associated with device being unregistered.  This puts unnecessary
(even if negligible) burden on all netdev unregister calls in BPF-
-enabled kernel.  The lists of objects may potentially get long
making the linear scan even more problematic.  There haven't been
complaints about this mechanisms so far, but it is suboptimal.

Instead of relying on notifiers, make the few BPF-capable drivers
register explicitly for BPF offloads.  The programs and maps will
now be collected per-device not on a global list, and only scanned
for removal when driver unregisters from BPF offloads.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c |  13 +++
 drivers/net/netdevsim/bpf.c                   |   7 ++
 include/linux/bpf.h                           |   3 +
 kernel/bpf/offload.c                          | 142 +++++++++++++++++---------
 4 files changed, 119 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index b95b94d008cf..dee039ada75c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -404,6 +404,16 @@ err_release_free:
 	return -EINVAL;
 }
 
+static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev)
+{
+	return bpf_offload_dev_netdev_register(netdev);
+}
+
+static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev)
+{
+	bpf_offload_dev_netdev_unregister(netdev);
+}
+
 static int nfp_bpf_init(struct nfp_app *app)
 {
 	struct nfp_app_bpf *bpf;
@@ -466,6 +476,9 @@ const struct nfp_app_type app_bpf = {
 
 	.extra_cap	= nfp_bpf_extra_cap,
 
+	.ndo_init	= nfp_bpf_ndo_init,
+	.ndo_uninit	= nfp_bpf_ndo_uninit,
+
 	.vnic_alloc	= nfp_bpf_vnic_alloc,
 	.vnic_free	= nfp_bpf_vnic_free,
 
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 357f9e62f306..c4a2829e0e1f 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -582,6 +582,8 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
 
 int nsim_bpf_init(struct netdevsim *ns)
 {
+	int err;
+
 	if (ns->sdev->refcnt == 1) {
 		INIT_LIST_HEAD(&ns->sdev->bpf_bound_progs);
 		INIT_LIST_HEAD(&ns->sdev->bpf_bound_maps);
@@ -592,6 +594,10 @@ int nsim_bpf_init(struct netdevsim *ns)
 			return -ENOMEM;
 	}
 
+	err = bpf_offload_dev_netdev_register(ns->netdev);
+	if (err)
+		return err;
+
 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
 			   &ns->bpf_offloaded_id);
 
@@ -625,6 +631,7 @@ void nsim_bpf_uninit(struct netdevsim *ns)
 	WARN_ON(ns->xdp.prog);
 	WARN_ON(ns->xdp_hw.prog);
 	WARN_ON(ns->bpf_offloaded);
+	bpf_offload_dev_netdev_unregister(ns->netdev);
 
 	if (ns->sdev->refcnt == 1) {
 		WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs));
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b010d9129f3..aa2e834a122b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -650,6 +650,9 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
 
 bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
+int bpf_offload_dev_netdev_register(struct net_device *netdev);
+void bpf_offload_dev_netdev_unregister(struct net_device *netdev);
+
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
 
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 6184e48703f4..cd64a26807aa 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -18,19 +18,37 @@
 #include <linux/bug.h>
 #include <linux/kdev_t.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/printk.h>
 #include <linux/proc_ns.h>
+#include <linux/rhashtable.h>
 #include <linux/rtnetlink.h>
 #include <linux/rwsem.h>
 
-/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
+/* Protects offdevs, members of bpf_offload_netdev and offload members
  * of all progs.
  * RTNL lock cannot be taken when holding this lock.
  */
 static DECLARE_RWSEM(bpf_devs_lock);
-static LIST_HEAD(bpf_prog_offload_devs);
-static LIST_HEAD(bpf_map_offload_devs);
+
+struct bpf_offload_netdev {
+	struct rhash_head l;
+	struct net_device *netdev;
+	struct list_head progs;
+	struct list_head maps;
+};
+
+static const struct rhashtable_params offdevs_params = {
+	.nelem_hint		= 4,
+	.key_len		= sizeof(struct net_device *),
+	.key_offset		= offsetof(struct bpf_offload_netdev, netdev),
+	.head_offset		= offsetof(struct bpf_offload_netdev, l),
+	.automatic_shrinking	= true,
+};
+
+static struct rhashtable offdevs;
+static bool offdevs_inited;
 
 static int bpf_dev_offload_check(struct net_device *netdev)
 {
@@ -41,8 +59,19 @@ static int bpf_dev_offload_check(struct net_device *netdev)
 	return 0;
 }
 
+static struct bpf_offload_netdev *
+bpf_offload_find_netdev(struct net_device *netdev)
+{
+	lockdep_assert_held(&bpf_devs_lock);
+
+	if (!offdevs_inited)
+		return NULL;
+	return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+}
+
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 {
+	struct bpf_offload_netdev *ondev;
 	struct bpf_prog_offload *offload;
 	int err;
 
@@ -66,12 +95,13 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 		goto err_maybe_put;
 
 	down_write(&bpf_devs_lock);
-	if (offload->netdev->reg_state != NETREG_REGISTERED) {
+	ondev = bpf_offload_find_netdev(offload->netdev);
+	if (!ondev) {
 		err = -EINVAL;
 		goto err_unlock;
 	}
 	prog->aux->offload = offload;
-	list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
+	list_add_tail(&offload->offloads, &ondev->progs);
 	dev_put(offload->netdev);
 	up_write(&bpf_devs_lock);
 
@@ -294,6 +324,7 @@ static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
 struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 {
 	struct net *net = current->nsproxy->net_ns;
+	struct bpf_offload_netdev *ondev;
 	struct bpf_offloaded_map *offmap;
 	int err;
 
@@ -316,11 +347,17 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 	if (err)
 		goto err_unlock;
 
+	ondev = bpf_offload_find_netdev(offmap->netdev);
+	if (!ondev) {
+		err = -EINVAL;
+		goto err_unlock;
+	}
+
 	err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
 	if (err)
 		goto err_unlock;
 
-	list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
+	list_add_tail(&offmap->offloads, &ondev->maps);
 	up_write(&bpf_devs_lock);
 	rtnl_unlock();
 
@@ -489,56 +526,69 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
 	return ret;
 }
 
-static void bpf_offload_orphan_all_progs(struct net_device *netdev)
+int bpf_offload_dev_netdev_register(struct net_device *netdev)
 {
-	struct bpf_prog_offload *offload, *tmp;
+	struct bpf_offload_netdev *ondev;
+	int err;
 
-	list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
-		if (offload->netdev == netdev)
-			__bpf_prog_offload_destroy(offload->prog);
-}
+	down_write(&bpf_devs_lock);
+	if (!offdevs_inited) {
+		err = rhashtable_init(&offdevs, &offdevs_params);
+		if (err)
+			return err;
+		offdevs_inited = true;
+	}
+	up_write(&bpf_devs_lock);
 
-static void bpf_offload_orphan_all_maps(struct net_device *netdev)
-{
-	struct bpf_offloaded_map *offmap, *tmp;
+	ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
+	if (!ondev)
+		return -ENOMEM;
+
+	ondev->netdev = netdev;
+	INIT_LIST_HEAD(&ondev->progs);
+	INIT_LIST_HEAD(&ondev->maps);
+
+	down_write(&bpf_devs_lock);
+	err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
+	if (err) {
+		netdev_warn(netdev, "failed to register for BPF offload\n");
+		goto err_unlock_free;
+	}
 
-	list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
-		if (offmap->netdev == netdev)
-			__bpf_map_offload_destroy(offmap);
+	up_write(&bpf_devs_lock);
+	return 0;
+
+err_unlock_free:
+	up_write(&bpf_devs_lock);
+	kfree(ondev);
+	return err;
 }
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
 
-static int bpf_offload_notification(struct notifier_block *notifier,
-				    ulong event, void *ptr)
+void bpf_offload_dev_netdev_unregister(struct net_device *netdev)
 {
-	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct bpf_offloaded_map *offmap, *mtmp;
+	struct bpf_prog_offload *offload, *ptmp;
+	struct bpf_offload_netdev *ondev;
 
 	ASSERT_RTNL();
 
-	switch (event) {
-	case NETDEV_UNREGISTER:
-		/* ignore namespace changes */
-		if (netdev->reg_state != NETREG_UNREGISTERING)
-			break;
-
-		down_write(&bpf_devs_lock);
-		bpf_offload_orphan_all_progs(netdev);
-		bpf_offload_orphan_all_maps(netdev);
-		up_write(&bpf_devs_lock);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
+	down_write(&bpf_devs_lock);
+	ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
+	if (WARN_ON(!ondev))
+		goto unlock;
 
-static struct notifier_block bpf_offload_notifier = {
-	.notifier_call = bpf_offload_notification,
-};
+	WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
 
-static int __init bpf_offload_init(void)
-{
-	register_netdevice_notifier(&bpf_offload_notifier);
-	return 0;
-}
+	list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
+		__bpf_prog_offload_destroy(offload->prog);
+	list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
+		__bpf_map_offload_destroy(offmap);
 
-subsys_initcall(bpf_offload_init);
+	WARN_ON(!list_empty(&ondev->progs));
+	WARN_ON(!list_empty(&ondev->maps));
+	kfree(ondev);
+unlock:
+	up_write(&bpf_devs_lock);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
-- 
cgit v1.2.3


From 602144c224604f1cbff02ee2d1cf46825269ecbd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 17 Jul 2018 10:53:25 -0700
Subject: bpf: offload: keep the offload state per-ASIC

Create a higher-level entity to represent a device/ASIC to allow
programs and maps to be shared between device ports.  The extra
work is required to make sure we don't destroy BPF objects as
soon as the netdev for which they were loaded gets destroyed,
as other ports may still be using them.  When netdev goes away
all of its BPF objects will be moved to other netdevs of the
device, and only destroyed when last netdev is unregistered.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c | 14 ++++-
 drivers/net/ethernet/netronome/nfp/bpf/main.h |  4 ++
 drivers/net/netdevsim/bpf.c                   | 17 +++++-
 drivers/net/netdevsim/netdevsim.h             |  3 +
 include/linux/bpf.h                           |  9 ++-
 kernel/bpf/offload.c                          | 81 +++++++++++++++++++++------
 6 files changed, 104 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index dee039ada75c..458f49235d06 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -406,12 +406,16 @@ err_release_free:
 
 static int nfp_bpf_ndo_init(struct nfp_app *app, struct net_device *netdev)
 {
-	return bpf_offload_dev_netdev_register(netdev);
+	struct nfp_app_bpf *bpf = app->priv;
+
+	return bpf_offload_dev_netdev_register(bpf->bpf_dev, netdev);
 }
 
 static void nfp_bpf_ndo_uninit(struct nfp_app *app, struct net_device *netdev)
 {
-	bpf_offload_dev_netdev_unregister(netdev);
+	struct nfp_app_bpf *bpf = app->priv;
+
+	bpf_offload_dev_netdev_unregister(bpf->bpf_dev, netdev);
 }
 
 static int nfp_bpf_init(struct nfp_app *app)
@@ -437,6 +441,11 @@ static int nfp_bpf_init(struct nfp_app *app)
 	if (err)
 		goto err_free_neutral_maps;
 
+	bpf->bpf_dev = bpf_offload_dev_create();
+	err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
+	if (err)
+		goto err_free_neutral_maps;
+
 	return 0;
 
 err_free_neutral_maps:
@@ -455,6 +464,7 @@ static void nfp_bpf_clean(struct nfp_app *app)
 {
 	struct nfp_app_bpf *bpf = app->priv;
 
+	bpf_offload_dev_destroy(bpf->bpf_dev);
 	WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
 	WARN_ON(!list_empty(&bpf->map_list));
 	WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 9845c1a2d4c2..bec935468f90 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -110,6 +110,8 @@ enum pkt_vec {
  * struct nfp_app_bpf - bpf app priv structure
  * @app:		backpointer to the app
  *
+ * @bpf_dev:		BPF offload device handle
+ *
  * @tag_allocator:	bitmap of control message tags in use
  * @tag_alloc_next:	next tag bit to allocate
  * @tag_alloc_last:	next tag bit to be freed
@@ -150,6 +152,8 @@ enum pkt_vec {
 struct nfp_app_bpf {
 	struct nfp_app *app;
 
+	struct bpf_offload_dev *bpf_dev;
+
 	DECLARE_BITMAP(tag_allocator, U16_MAX + 1);
 	u16 tag_alloc_next;
 	u16 tag_alloc_last;
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index c4a2829e0e1f..9eab29f67a0e 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -592,11 +592,16 @@ int nsim_bpf_init(struct netdevsim *ns)
 			debugfs_create_dir("bpf_bound_progs", ns->sdev->ddir);
 		if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
 			return -ENOMEM;
+
+		ns->sdev->bpf_dev = bpf_offload_dev_create();
+		err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
+		if (err)
+			return err;
 	}
 
-	err = bpf_offload_dev_netdev_register(ns->netdev);
+	err = bpf_offload_dev_netdev_register(ns->sdev->bpf_dev, ns->netdev);
 	if (err)
-		return err;
+		goto err_destroy_bdev;
 
 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
 			   &ns->bpf_offloaded_id);
@@ -624,6 +629,11 @@ int nsim_bpf_init(struct netdevsim *ns)
 			    &ns->bpf_map_accept);
 
 	return 0;
+
+err_destroy_bdev:
+	if (ns->sdev->refcnt == 1)
+		bpf_offload_dev_destroy(ns->sdev->bpf_dev);
+	return err;
 }
 
 void nsim_bpf_uninit(struct netdevsim *ns)
@@ -631,10 +641,11 @@ void nsim_bpf_uninit(struct netdevsim *ns)
 	WARN_ON(ns->xdp.prog);
 	WARN_ON(ns->xdp_hw.prog);
 	WARN_ON(ns->bpf_offloaded);
-	bpf_offload_dev_netdev_unregister(ns->netdev);
+	bpf_offload_dev_netdev_unregister(ns->sdev->bpf_dev, ns->netdev);
 
 	if (ns->sdev->refcnt == 1) {
 		WARN_ON(!list_empty(&ns->sdev->bpf_bound_progs));
 		WARN_ON(!list_empty(&ns->sdev->bpf_bound_maps));
+		bpf_offload_dev_destroy(ns->sdev->bpf_dev);
 	}
 }
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 98f26fa1e671..02be199eb005 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -27,6 +27,7 @@
 #define NSIM_EA(extack, msg)	NL_SET_ERR_MSG_MOD((extack), msg)
 
 struct bpf_prog;
+struct bpf_offload_dev;
 struct dentry;
 struct nsim_vf_config;
 
@@ -36,6 +37,8 @@ struct netdevsim_shared_dev {
 
 	struct dentry *ddir;
 
+	struct bpf_offload_dev *bpf_dev;
+
 	struct dentry *ddir_bpf_bound_progs;
 	u32 prog_id_gen;
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aa2e834a122b..913465e45062 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -85,6 +85,7 @@ struct bpf_map {
 	char name[BPF_OBJ_NAME_LEN];
 };
 
+struct bpf_offload_dev;
 struct bpf_offloaded_map;
 
 struct bpf_map_dev_ops {
@@ -650,8 +651,12 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
 
 bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
-int bpf_offload_dev_netdev_register(struct net_device *netdev);
-void bpf_offload_dev_netdev_unregister(struct net_device *netdev);
+struct bpf_offload_dev *bpf_offload_dev_create(void);
+void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev);
+int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+				    struct net_device *netdev);
+void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+				       struct net_device *netdev);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index cd64a26807aa..925575f64ff1 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -32,11 +32,17 @@
  */
 static DECLARE_RWSEM(bpf_devs_lock);
 
+struct bpf_offload_dev {
+	struct list_head netdevs;
+};
+
 struct bpf_offload_netdev {
 	struct rhash_head l;
 	struct net_device *netdev;
+	struct bpf_offload_dev *offdev;
 	struct list_head progs;
 	struct list_head maps;
+	struct list_head offdev_netdevs;
 };
 
 static const struct rhashtable_params offdevs_params = {
@@ -526,25 +532,18 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
 	return ret;
 }
 
-int bpf_offload_dev_netdev_register(struct net_device *netdev)
+int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
+				    struct net_device *netdev)
 {
 	struct bpf_offload_netdev *ondev;
 	int err;
 
-	down_write(&bpf_devs_lock);
-	if (!offdevs_inited) {
-		err = rhashtable_init(&offdevs, &offdevs_params);
-		if (err)
-			return err;
-		offdevs_inited = true;
-	}
-	up_write(&bpf_devs_lock);
-
 	ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
 	if (!ondev)
 		return -ENOMEM;
 
 	ondev->netdev = netdev;
+	ondev->offdev = offdev;
 	INIT_LIST_HEAD(&ondev->progs);
 	INIT_LIST_HEAD(&ondev->maps);
 
@@ -555,6 +554,7 @@ int bpf_offload_dev_netdev_register(struct net_device *netdev)
 		goto err_unlock_free;
 	}
 
+	list_add(&ondev->offdev_netdevs, &offdev->netdevs);
 	up_write(&bpf_devs_lock);
 	return 0;
 
@@ -565,11 +565,12 @@ err_unlock_free:
 }
 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
 
-void bpf_offload_dev_netdev_unregister(struct net_device *netdev)
+void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
+				       struct net_device *netdev)
 {
+	struct bpf_offload_netdev *ondev, *altdev;
 	struct bpf_offloaded_map *offmap, *mtmp;
 	struct bpf_prog_offload *offload, *ptmp;
-	struct bpf_offload_netdev *ondev;
 
 	ASSERT_RTNL();
 
@@ -579,11 +580,26 @@ void bpf_offload_dev_netdev_unregister(struct net_device *netdev)
 		goto unlock;
 
 	WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
-
-	list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
-		__bpf_prog_offload_destroy(offload->prog);
-	list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
-		__bpf_map_offload_destroy(offmap);
+	list_del(&ondev->offdev_netdevs);
+
+	/* Try to move the objects to another netdev of the device */
+	altdev = list_first_entry_or_null(&offdev->netdevs,
+					  struct bpf_offload_netdev,
+					  offdev_netdevs);
+	if (altdev) {
+		list_for_each_entry(offload, &ondev->progs, offloads)
+			offload->netdev = altdev->netdev;
+		list_splice_init(&ondev->progs, &altdev->progs);
+
+		list_for_each_entry(offmap, &ondev->maps, offloads)
+			offmap->netdev = altdev->netdev;
+		list_splice_init(&ondev->maps, &altdev->maps);
+	} else {
+		list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
+			__bpf_prog_offload_destroy(offload->prog);
+		list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
+			__bpf_map_offload_destroy(offmap);
+	}
 
 	WARN_ON(!list_empty(&ondev->progs));
 	WARN_ON(!list_empty(&ondev->maps));
@@ -592,3 +608,34 @@ unlock:
 	up_write(&bpf_devs_lock);
 }
 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
+
+struct bpf_offload_dev *bpf_offload_dev_create(void)
+{
+	struct bpf_offload_dev *offdev;
+	int err;
+
+	down_write(&bpf_devs_lock);
+	if (!offdevs_inited) {
+		err = rhashtable_init(&offdevs, &offdevs_params);
+		if (err)
+			return ERR_PTR(err);
+		offdevs_inited = true;
+	}
+	up_write(&bpf_devs_lock);
+
+	offdev = kzalloc(sizeof(*offdev), GFP_KERNEL);
+	if (!offdev)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&offdev->netdevs);
+
+	return offdev;
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_create);
+
+void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
+{
+	WARN_ON(!list_empty(&offdev->netdevs));
+	kfree(offdev);
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);
-- 
cgit v1.2.3


From fd4f227dea0f24d89f52f7c4eb3207f84ddcbcbd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 17 Jul 2018 10:53:26 -0700
Subject: bpf: offload: allow program and map sharing per-ASIC

Allow programs and maps to be re-used across different netdevs,
as long as they belong to the same struct bpf_offload_dev.
Update the bpf_offload_prog_map_match() helper for the verifier
and export a new helper for the drivers to use when checking
programs at attachment time.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h  |  1 +
 kernel/bpf/offload.c | 42 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 913465e45062..5b5ad95cf339 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -657,6 +657,7 @@ int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
 				    struct net_device *netdev);
 void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
 				       struct net_device *netdev);
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 925575f64ff1..177a52436394 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -511,22 +511,50 @@ int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map)
 	return 0;
 }
 
-bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
+static bool __bpf_offload_dev_match(struct bpf_prog *prog,
+				    struct net_device *netdev)
 {
-	struct bpf_offloaded_map *offmap;
+	struct bpf_offload_netdev *ondev1, *ondev2;
 	struct bpf_prog_offload *offload;
-	bool ret;
 
 	if (!bpf_prog_is_dev_bound(prog->aux))
 		return false;
-	if (!bpf_map_is_dev_bound(map))
-		return bpf_map_offload_neutral(map);
 
-	down_read(&bpf_devs_lock);
 	offload = prog->aux->offload;
+	if (!offload)
+		return false;
+	if (offload->netdev == netdev)
+		return true;
+
+	ondev1 = bpf_offload_find_netdev(offload->netdev);
+	ondev2 = bpf_offload_find_netdev(netdev);
+
+	return ondev1 && ondev2 && ondev1->offdev == ondev2->offdev;
+}
+
+bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev)
+{
+	bool ret;
+
+	down_read(&bpf_devs_lock);
+	ret = __bpf_offload_dev_match(prog, netdev);
+	up_read(&bpf_devs_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_match);
+
+bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
+{
+	struct bpf_offloaded_map *offmap;
+	bool ret;
+
+	if (!bpf_map_is_dev_bound(map))
+		return bpf_map_offload_neutral(map);
 	offmap = map_to_offmap(map);
 
-	ret = offload && offload->netdev == offmap->netdev;
+	down_read(&bpf_devs_lock);
+	ret = __bpf_offload_dev_match(prog, offmap->netdev);
 	up_read(&bpf_devs_lock);
 
 	return ret;
-- 
cgit v1.2.3


From d3b1084dfd629ef89bc1c4bab95e5cb87e7d08c2 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: make open_with_fake_path() not contribute to nr_files

Stacking file operations in overlay will store an extra open file for each
overlay file opened.

The overhead is just that of "struct file" which is about 256bytes, because
overlay already pins an extra dentry and inode when the file is open, which
add up to a much larger overhead.

For fear of breaking working setups, don't start accounting the extra file.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/file_table.c    | 69 +++++++++++++++++++++++++++++++++++++-----------------
 fs/internal.h      |  1 +
 fs/open.c          |  2 +-
 include/linux/fs.h |  3 +++
 4 files changed, 53 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/file_table.c b/fs/file_table.c
index 9b70ed2bbc4e..0cc7bea6b51a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head)
 static inline void file_free(struct file *f)
 {
 	security_file_free(f);
-	percpu_counter_dec(&nr_files);
+	if (!(f->f_mode & FMODE_NOACCOUNT))
+		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
@@ -91,6 +92,34 @@ int proc_nr_files(struct ctl_table *table, int write,
 }
 #endif
 
+static struct file *__alloc_file(int flags, const struct cred *cred)
+{
+	struct file *f;
+	int error;
+
+	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+	if (unlikely(!f))
+		return ERR_PTR(-ENOMEM);
+
+	f->f_cred = get_cred(cred);
+	error = security_file_alloc(f);
+	if (unlikely(error)) {
+		file_free_rcu(&f->f_u.fu_rcuhead);
+		return ERR_PTR(error);
+	}
+
+	atomic_long_set(&f->f_count, 1);
+	rwlock_init(&f->f_owner.lock);
+	spin_lock_init(&f->f_lock);
+	mutex_init(&f->f_pos_lock);
+	eventpoll_init_file(f);
+	f->f_flags = flags;
+	f->f_mode = OPEN_FMODE(flags);
+	/* f->f_version: 0 */
+
+	return f;
+}
+
 /* Find an unused file structure and return a pointer to it.
  * Returns an error pointer if some error happend e.g. we over file
  * structures limit, run out of memory or operation is not permitted.
@@ -105,7 +134,6 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
 {
 	static long old_max;
 	struct file *f;
-	int error;
 
 	/*
 	 * Privileged users can go above max_files
@@ -119,26 +147,10 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
 			goto over;
 	}
 
-	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
-	if (unlikely(!f))
-		return ERR_PTR(-ENOMEM);
-
-	f->f_cred = get_cred(cred);
-	error = security_file_alloc(f);
-	if (unlikely(error)) {
-		file_free_rcu(&f->f_u.fu_rcuhead);
-		return ERR_PTR(error);
-	}
+	f = __alloc_file(flags, cred);
+	if (!IS_ERR(f))
+		percpu_counter_inc(&nr_files);
 
-	atomic_long_set(&f->f_count, 1);
-	rwlock_init(&f->f_owner.lock);
-	spin_lock_init(&f->f_lock);
-	mutex_init(&f->f_pos_lock);
-	eventpoll_init_file(f);
-	f->f_flags = flags;
-	f->f_mode = OPEN_FMODE(flags);
-	/* f->f_version: 0 */
-	percpu_counter_inc(&nr_files);
 	return f;
 
 over:
@@ -150,6 +162,21 @@ over:
 	return ERR_PTR(-ENFILE);
 }
 
+/*
+ * Variant of alloc_empty_file() that doesn't check and modify nr_files.
+ *
+ * Should not be used unless there's a very good reason to do so.
+ */
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
+{
+	struct file *f = __alloc_file(flags, cred);
+
+	if (!IS_ERR(f))
+		f->f_mode |= FMODE_NOACCOUNT;
+
+	return f;
+}
+
 /**
  * alloc_file - allocate and initialize a 'struct file'
  *
diff --git a/fs/internal.h b/fs/internal.h
index 52a346903748..442098fa0a84 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -94,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
  * file_table.c
  */
 extern struct file *alloc_empty_file(int, const struct cred *);
+extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
 
 /*
  * super.c
diff --git a/fs/open.c b/fs/open.c
index dd15711eb658..9c6617dbb2c0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -928,7 +928,7 @@ EXPORT_SYMBOL(dentry_open);
 struct file *open_with_fake_path(const struct path *path, int flags,
 				struct inode *inode, const struct cred *cred)
 {
-	struct file *f = alloc_empty_file(flags, cred);
+	struct file *f = alloc_empty_file_noaccount(flags, cred);
 	if (!IS_ERR(f)) {
 		int error;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ce2b413abc6..e1884840d556 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -156,6 +156,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File is capable of returning -EAGAIN if I/O will block */
 #define FMODE_NOWAIT	((__force fmode_t)0x8000000)
 
+/* File does not contribute to nr_files count */
+#define FMODE_NOACCOUNT	((__force fmode_t)0x20000000)
+
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
  * that indicates that they should check the contents of the iovec are
-- 
cgit v1.2.3


From 9df6702ad0e85901450fe48a7b5f0f8975353eeb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: export vfs_ioctl() to modules

This is needed by the stacked ioctl implementation in overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/internal.h      | 1 -
 fs/ioctl.c         | 1 +
 include/linux/fs.h | 2 ++
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/internal.h b/fs/internal.h
index 442098fa0a84..9c3b4c40e582 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -184,7 +184,6 @@ extern const struct dentry_operations ns_dentry_operations;
  */
 extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
 		    unsigned long arg);
-extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 
 /*
  * iomap support:
diff --git a/fs/ioctl.c b/fs/ioctl.c
index b445b13fc59b..3212c29235ce 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -49,6 +49,7 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
  out:
 	return error;
 }
+EXPORT_SYMBOL(vfs_ioctl);
 
 static int ioctl_fibmap(struct file *filp, int __user *p)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e1884840d556..019817a083a0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1634,6 +1634,8 @@ int vfs_mkobj(struct dentry *, umode_t,
 		int (*f)(struct dentry *, umode_t, void *),
 		void *);
 
+extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+
 /*
  * VFS file helper functions.
  */
-- 
cgit v1.2.3


From f182536684d876afaf4627c36a16c4e15ea8a2b8 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:40 +0200
Subject: vfs: export vfs_dedupe_file_range_one() to modules

This is needed by the stacked dedupe implementation in overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/read_write.c    | 6 +++---
 include/linux/fs.h | 4 ++++
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/read_write.c b/fs/read_write.c
index cce4ebac34a8..39b4a21dd933 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1964,9 +1964,8 @@ out_error:
 }
 EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
 
-static int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-				     struct file *dst_file, loff_t dst_pos,
-				     u64 len)
+int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+			      struct file *dst_file, loff_t dst_pos, u64 len)
 {
 	s64 ret;
 
@@ -2001,6 +2000,7 @@ out_drop_write:
 
 	return ret;
 }
+EXPORT_SYMBOL(vfs_dedupe_file_range_one);
 
 int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 019817a083a0..b67209948f1b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1829,6 +1829,10 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 loff_t len, bool *is_same);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
+extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+				     struct file *dst_file, loff_t dst_pos,
+				     u64 len);
+
 
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
-- 
cgit v1.2.3


From 88059de155d4db817a3a78ba899cb3b7f4de0fb0 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "ovl: fix relatime for directories"

This reverts commit cd91304e7190b4c4802f8e413ab2214b233e0260.

Overlayfs no longer relies on the vfs correct atime handling.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/inode.c             | 21 ++++-----------------
 fs/overlayfs/super.c   |  3 ---
 include/linux/dcache.h |  3 ---
 3 files changed, 4 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..7ef6f1942757 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1560,24 +1560,11 @@ EXPORT_SYMBOL(bmap);
 static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
 			       bool rcu)
 {
-	struct dentry *upperdentry;
+	if (!rcu) {
+		struct inode *realinode = d_real_inode(dentry);
 
-	/*
-	 * Nothing to do if in rcu or if non-overlayfs
-	 */
-	if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL)))
-		return;
-
-	upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
-
-	/*
-	 * If file is on lower then we can't update atime, so no worries about
-	 * stale mtime/ctime.
-	 */
-	if (upperdentry) {
-		struct inode *realinode = d_inode(upperdentry);
-
-		if ((!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
+		if (unlikely(inode != realinode) &&
+		    (!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
 		     !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) {
 			inode->i_mtime = realinode->i_mtime;
 			inode->i_ctime = realinode->i_ctime;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 211975921a90..a7e2287e4b8f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -101,9 +101,6 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 	if (inode && d_inode(dentry) == inode)
 		return dentry;
 
-	if (flags & D_REAL_UPPER)
-		return ovl_dentry_upper(dentry);
-
 	if (!d_is_reg(dentry)) {
 		if (!inode || inode == d_inode(dentry))
 			return dentry;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..ddae4103d324 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -564,9 +564,6 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
 	return upper;
 }
 
-/* d_real() flags */
-#define D_REAL_UPPER	0x2	/* return upper dentry or NULL if non-upper */
-
 /**
  * d_real - Return the real dentry
  * @dentry: the dentry to query
-- 
cgit v1.2.3


From c6718543463dbb78486ad259f884cb800df802b5 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "vfs: update ovl inode before relatime check"

This reverts commit 598e3c8f72f5b77c84d2cb26cfd936ffb3cfdbaa.

Overlayfs no longer relies on the vfs correct atime handling.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/inode.c         | 33 ++++++---------------------------
 fs/internal.h      |  7 -------
 fs/namei.c         |  2 +-
 include/linux/fs.h |  1 +
 4 files changed, 8 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 7ef6f1942757..077a6174dfd5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1554,37 +1554,17 @@ sector_t bmap(struct inode *inode, sector_t block)
 }
 EXPORT_SYMBOL(bmap);
 
-/*
- * Update times in overlayed inode from underlying real inode
- */
-static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
-			       bool rcu)
-{
-	if (!rcu) {
-		struct inode *realinode = d_real_inode(dentry);
-
-		if (unlikely(inode != realinode) &&
-		    (!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
-		     !timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) {
-			inode->i_mtime = realinode->i_mtime;
-			inode->i_ctime = realinode->i_ctime;
-		}
-	}
-}
-
 /*
  * With relative atime, only update atime if the previous atime is
  * earlier than either the ctime or mtime or if at least a day has
  * passed since the last atime update.
  */
-static int relatime_need_update(const struct path *path, struct inode *inode,
-				struct timespec now, bool rcu)
+static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+			     struct timespec now)
 {
 
-	if (!(path->mnt->mnt_flags & MNT_RELATIME))
+	if (!(mnt->mnt_flags & MNT_RELATIME))
 		return 1;
-
-	update_ovl_inode_times(path->dentry, inode, rcu);
 	/*
 	 * Is mtime younger than atime? If yes, update atime:
 	 */
@@ -1655,8 +1635,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags)
  *	This function automatically handles read only file systems and media,
  *	as well as the "noatime" flag and inode specific "noatime" markers.
  */
-bool __atime_needs_update(const struct path *path, struct inode *inode,
-			  bool rcu)
+bool atime_needs_update(const struct path *path, struct inode *inode)
 {
 	struct vfsmount *mnt = path->mnt;
 	struct timespec64 now;
@@ -1682,7 +1661,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode,
 
 	now = current_time(inode);
 
-	if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu))
+	if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
 		return false;
 
 	if (timespec64_equal(&inode->i_atime, &now))
@@ -1697,7 +1676,7 @@ void touch_atime(const struct path *path)
 	struct inode *inode = d_inode(path->dentry);
 	struct timespec64 now;
 
-	if (!__atime_needs_update(path, inode, false))
+	if (!atime_needs_update(path, inode))
 		return;
 
 	if (!sb_start_write_trylock(inode->i_sb))
diff --git a/fs/internal.h b/fs/internal.h
index abb41f346b18..e54ad4361d0d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -133,13 +133,6 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
 extern void inode_add_lru(struct inode *inode);
 extern int dentry_needs_remove_privs(struct dentry *dentry);
 
-extern bool __atime_needs_update(const struct path *, struct inode *, bool);
-static inline bool atime_needs_update_rcu(const struct path *path,
-					  struct inode *inode)
-{
-	return __atime_needs_update(path, inode, true);
-}
-
 /*
  * fs-writeback.c
  */
diff --git a/fs/namei.c b/fs/namei.c
index d152cc05fdc3..31182b71b313 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd)
 	if (!(nd->flags & LOOKUP_RCU)) {
 		touch_atime(&last->link);
 		cond_resched();
-	} else if (atime_needs_update_rcu(&last->link, inode)) {
+	} else if (atime_needs_update(&last->link, inode)) {
 		if (unlikely(unlazy_walk(nd)))
 			return ERR_PTR(-ECHILD);
 		touch_atime(&last->link);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b67209948f1b..8f8c9ac1c9d5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2088,6 +2088,7 @@ enum file_time_flags {
 	S_VERSION = 8,
 };
 
+extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
 static inline void file_accessed(struct file *file)
 {
-- 
cgit v1.2.3


From 4ab30319fd7c691a1b3165325c647a5cd6d282ac Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Revert "vfs: add flags to d_real()"

This reverts commit 495e642939114478a5237a7d91661ba93b76f15a.

No user of "flags" argument of d_real() remain.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 fs/open.c                         |  2 +-
 fs/overlayfs/super.c              |  4 ++--
 include/linux/dcache.h            | 11 +++++------
 include/linux/fs.h                |  2 +-
 6 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 2c391338c675..24e1a4f37c83 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -22,7 +22,7 @@ prototypes:
 	struct vfsmount *(*d_automount)(struct path *path);
 	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int, unsigned int);
+				 unsigned int);
 
 locking rules:
 		rename_lock	->d_lock	may block	rcu-walk
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 829a7b7857a4..6417b161f88d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -1001,7 +1001,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int, unsigned int);
+				 unsigned int);
 };
 
   d_revalidate: called when the VFS needs to revalidate a dentry. This
diff --git a/fs/open.c b/fs/open.c
index a3c03a4a9078..a973ca074896 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -96,7 +96,7 @@ long vfs_truncate(const struct path *path, loff_t length)
 	 * write access on the upper inode, not on the overlay inode.  For
 	 * non-overlay filesystems d_real() is an identity function.
 	 */
-	upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0);
+	upperdentry = d_real(path->dentry, NULL, O_WRONLY);
 	error = PTR_ERR(upperdentry);
 	if (IS_ERR(upperdentry))
 		goto mnt_drop_write_and_out;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index a7e2287e4b8f..5bc261de5041 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -92,7 +92,7 @@ static int ovl_check_append_only(struct inode *inode, int flag)
 
 static struct dentry *ovl_d_real(struct dentry *dentry,
 				 const struct inode *inode,
-				 unsigned int open_flags, unsigned int flags)
+				 unsigned int open_flags)
 {
 	struct dentry *real;
 	int err;
@@ -128,7 +128,7 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 		goto bug;
 
 	/* Handle recursion */
-	real = d_real(real, inode, open_flags, 0);
+	real = d_real(real, inode, open_flags);
 
 	if (!inode || inode == d_inode(real))
 		return real;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ddae4103d324..8fe4efa94af6 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -146,7 +146,7 @@ struct dentry_operations {
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
 	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int, unsigned int);
+				 unsigned int);
 } ____cacheline_aligned;
 
 /*
@@ -568,8 +568,7 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * d_real - Return the real dentry
  * @dentry: the dentry to query
  * @inode: inode to select the dentry from multiple layers (can be NULL)
- * @open_flags: open flags to control copy-up behavior
- * @flags: flags to control what is returned by this function
+ * @flags: open flags to control copy-up behavior
  *
  * If dentry is on a union/overlay, then return the underlying, real dentry.
  * Otherwise return the dentry itself.
@@ -578,10 +577,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  */
 static inline struct dentry *d_real(struct dentry *dentry,
 				    const struct inode *inode,
-				    unsigned int open_flags, unsigned int flags)
+				    unsigned int flags)
 {
 	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
-		return dentry->d_op->d_real(dentry, inode, open_flags, flags);
+		return dentry->d_op->d_real(dentry, inode, flags);
 	else
 		return dentry;
 }
@@ -596,7 +595,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
 static inline struct inode *d_real_inode(const struct dentry *dentry)
 {
 	/* This usage of d_real() results in const dentry */
-	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0));
+	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0));
 }
 
 struct name_snapshot {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8f8c9ac1c9d5..16e2741cec3c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1249,7 +1249,7 @@ static inline struct inode *file_inode(const struct file *f)
 
 static inline struct dentry *file_dentry(const struct file *file)
 {
-	return d_real(file->f_path.dentry, file_inode(file), 0, 0);
+	return d_real(file->f_path.dentry, file_inode(file), 0);
 }
 
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
-- 
cgit v1.2.3


From de2a4a501e716bbf5ff691ba16faf59a35320228 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:43 +0200
Subject: Partially revert "locks: fix file locking on overlayfs"

This partially reverts commit c568d68341be7030f5647def68851e469b21ca11.

Overlayfs files will now automatically get the correct locks, no need to
hack overlay support in VFS.

It is a partial revert, because it leaves the locks_inode() calls in place
and defines locks_inode() to file_inode().  We could revert those as well,
but it would be unnecessary code churn and it makes sense to document that
we are getting the inode for locking purposes.

Don't revert MS_NOREMOTELOCK yet since that has been part of the userspace
API for some time (though not in a useful way).  Will try to remove
internal flags later when the dust around the new mount API settles.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c           | 17 ++++++-----------
 fs/overlayfs/super.c |  2 +-
 include/linux/fs.h   | 13 +------------
 3 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/locks.c b/fs/locks.c
index baa564841c03..dab4e72f8bff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,11 +139,6 @@
 #define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)
 #define IS_REMOTELCK(fl)	(fl->fl_pid <= 0)
 
-static inline bool is_remote_lock(struct file *filp)
-{
-	return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK));
-}
-
 static bool lease_breaking(struct file_lock *fl)
 {
 	return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@ -1875,7 +1870,7 @@ EXPORT_SYMBOL(generic_setlease);
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
-	if (filp->f_op->setlease && is_remote_lock(filp))
+	if (filp->f_op->setlease)
 		return filp->f_op->setlease(filp, arg, lease, priv);
 	else
 		return generic_setlease(filp, arg, lease, priv);
@@ -2022,7 +2017,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
 	if (error)
 		goto out_free;
 
-	if (f.file->f_op->flock && is_remote_lock(f.file))
+	if (f.file->f_op->flock)
 		error = f.file->f_op->flock(f.file,
 					  (can_sleep) ? F_SETLKW : F_SETLK,
 					  lock);
@@ -2048,7 +2043,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
  */
 int vfs_test_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op->lock && is_remote_lock(filp))
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, F_GETLK, fl);
 	posix_test_lock(filp, fl);
 	return 0;
@@ -2191,7 +2186,7 @@ out:
  */
 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
 {
-	if (filp->f_op->lock && is_remote_lock(filp))
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, cmd, fl);
 	else
 		return posix_lock_file(filp, fl, conf);
@@ -2513,7 +2508,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 	if (list_empty(&flctx->flc_flock))
 		return;
 
-	if (filp->f_op->flock && is_remote_lock(filp))
+	if (filp->f_op->flock)
 		filp->f_op->flock(filp, F_SETLKW, &fl);
 	else
 		flock_lock_inode(inode, &fl);
@@ -2600,7 +2595,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
  */
 int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
 {
-	if (filp->f_op->lock && is_remote_lock(filp))
+	if (filp->f_op->lock)
 		return filp->f_op->lock(filp, F_CANCELLK, fl);
 	return 0;
 }
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 5bc261de5041..c63beccad4fc 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1456,7 +1456,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_op = &ovl_super_operations;
 	sb->s_xattr = ovl_xattr_handlers;
 	sb->s_fs_info = ofs;
-	sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
+	sb->s_flags |= SB_POSIXACL;
 
 	err = -ENOMEM;
 	root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 16e2741cec3c..1cbcf37c45e1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1054,17 +1054,7 @@ struct file_lock_context {
 
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
-/*
- * Return the inode to use for locking
- *
- * For overlayfs this should be the overlay inode, not the real inode returned
- * by file_inode().  For any other fs file_inode(filp) and locks_inode(filp) are
- * equal.
- */
-static inline struct inode *locks_inode(const struct file *f)
-{
-	return f->f_path.dentry->d_inode;
-}
+#define locks_inode(f) file_inode(f)
 
 #ifdef CONFIG_FILE_LOCKING
 extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
@@ -1305,7 +1295,6 @@ extern int send_sigurg(struct fown_struct *fown);
 
 /* These sb flags are internal to the kernel */
 #define SB_SUBMOUNT     (1<<26)
-#define SB_NOREMOTELOCK	(1<<27)
 #define SB_NOSEC	(1<<28)
 #define SB_BORN		(1<<29)
 #define SB_ACTIVE	(1<<30)
-- 
cgit v1.2.3


From 573e1784817ca1f13d76a0df636929e983e5de3c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:44 +0200
Subject: Revert "fsnotify: support overlayfs"

This reverts commit f3fbbb079263bd29ae592478de6808db7e708267.

Overlayfs now works correctly without adding hacks to fsnotify.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/linux/fsnotify.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index bdaf22582f6e..fd1ce10553bf 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -30,11 +30,7 @@ static inline int fsnotify_parent(const struct path *path, struct dentry *dentry
 static inline int fsnotify_perm(struct file *file, int mask)
 {
 	const struct path *path = &file->f_path;
-	/*
-	 * Do not use file_inode() here or anywhere in this file to get the
-	 * inode.  That would break *notity on overlayfs.
-	 */
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 fsnotify_mask = 0;
 	int ret;
 
@@ -178,7 +174,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 static inline void fsnotify_access(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_ACCESS;
 
 	if (S_ISDIR(inode->i_mode))
@@ -196,7 +192,7 @@ static inline void fsnotify_access(struct file *file)
 static inline void fsnotify_modify(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_MODIFY;
 
 	if (S_ISDIR(inode->i_mode))
@@ -214,7 +210,7 @@ static inline void fsnotify_modify(struct file *file)
 static inline void fsnotify_open(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	__u32 mask = FS_OPEN;
 
 	if (S_ISDIR(inode->i_mode))
@@ -230,7 +226,7 @@ static inline void fsnotify_open(struct file *file)
 static inline void fsnotify_close(struct file *file)
 {
 	const struct path *path = &file->f_path;
-	struct inode *inode = path->dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	fmode_t mode = file->f_mode;
 	__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
 
-- 
cgit v1.2.3


From fb16043b46831a75c9b076a7262ae035290b0409 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 18 Jul 2018 15:44:44 +0200
Subject: vfs: remove open_flags from d_real()

Opening regular files on overlayfs is now handled via ovl_open().  Remove
the now unused "open_flags" argument from d_op->d_real() and the d_real()
helper.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/Locking |  3 +--
 Documentation/filesystems/vfs.txt | 16 ++++------------
 fs/overlayfs/super.c              | 36 +++---------------------------------
 include/linux/dcache.h            | 11 ++++-------
 include/linux/fs.h                |  2 +-
 5 files changed, 13 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 24e1a4f37c83..08a36e14e8fc 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -21,8 +21,7 @@ prototypes:
 	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
 	struct vfsmount *(*d_automount)(struct path *path);
 	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int);
+	struct dentry *(*d_real)(struct dentry *, const struct inode *);
 
 locking rules:
 		rename_lock	->d_lock	may block	rcu-walk
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 6417b161f88d..497609f00e3e 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -1000,8 +1000,7 @@ struct dentry_operations {
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int);
+	struct dentry *(*d_real)(struct dentry *, const struct inode *);
 };
 
   d_revalidate: called when the VFS needs to revalidate a dentry. This
@@ -1135,22 +1134,15 @@ struct dentry_operations {
 	dentry being transited from.
 
   d_real: overlay/union type filesystems implement this method to return one of
-	the underlying dentries hidden by the overlay.  It is used in three
+	the underlying dentries hidden by the overlay.  It is used in two
 	different modes:
 
-	Called from open it may need to copy-up the file depending on the
-	supplied open flags.  This mode is selected with a non-zero flags
-	argument.  In this mode the d_real method can return an error.
-
 	Called from file_dentry() it returns the real dentry matching the inode
 	argument.  The real dentry may be from a lower layer already copied up,
 	but still referenced from the file.  This mode is selected with a
-	non-NULL inode argument.  This will always succeed.
-
-	With NULL inode and zero flags the topmost real underlying dentry is
-	returned.  This will always succeed.
+	non-NULL inode argument.
 
-	This method is never called with both non-NULL inode and non-zero flags.
+	With NULL inode the topmost real underlying dentry is returned.
 
 Each dentry has a pointer to its parent dentry, as well as a hash list
 of child dentries. Child dentries are basically like files in a
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index c63beccad4fc..0e84593af84e 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -74,28 +74,10 @@ static void ovl_dentry_release(struct dentry *dentry)
 	}
 }
 
-static int ovl_check_append_only(struct inode *inode, int flag)
-{
-	/*
-	 * This test was moot in vfs may_open() because overlay inode does
-	 * not have the S_APPEND flag, so re-check on real upper inode
-	 */
-	if (IS_APPEND(inode)) {
-		if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
-			return -EPERM;
-		if (flag & O_TRUNC)
-			return -EPERM;
-	}
-
-	return 0;
-}
-
 static struct dentry *ovl_d_real(struct dentry *dentry,
-				 const struct inode *inode,
-				 unsigned int open_flags)
+				 const struct inode *inode)
 {
 	struct dentry *real;
-	int err;
 
 	/* It's an overlay file */
 	if (inode && d_inode(dentry) == inode)
@@ -107,28 +89,16 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 		goto bug;
 	}
 
-	if (open_flags) {
-		err = ovl_open_maybe_copy_up(dentry, open_flags);
-		if (err)
-			return ERR_PTR(err);
-	}
-
 	real = ovl_dentry_upper(dentry);
-	if (real && (!inode || inode == d_inode(real))) {
-		if (!inode) {
-			err = ovl_check_append_only(d_inode(real), open_flags);
-			if (err)
-				return ERR_PTR(err);
-		}
+	if (real && (!inode || inode == d_inode(real)))
 		return real;
-	}
 
 	real = ovl_dentry_lower(dentry);
 	if (!real)
 		goto bug;
 
 	/* Handle recursion */
-	real = d_real(real, inode, open_flags);
+	real = d_real(real, inode);
 
 	if (!inode || inode == d_inode(real))
 		return real;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 8fe4efa94af6..78cea80423a3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -145,8 +145,7 @@ struct dentry_operations {
 	char *(*d_dname)(struct dentry *, char *, int);
 	struct vfsmount *(*d_automount)(struct path *);
 	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *,
-				 unsigned int);
+	struct dentry *(*d_real)(struct dentry *, const struct inode *);
 } ____cacheline_aligned;
 
 /*
@@ -568,7 +567,6 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * d_real - Return the real dentry
  * @dentry: the dentry to query
  * @inode: inode to select the dentry from multiple layers (can be NULL)
- * @flags: open flags to control copy-up behavior
  *
  * If dentry is on a union/overlay, then return the underlying, real dentry.
  * Otherwise return the dentry itself.
@@ -576,11 +574,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
  * See also: Documentation/filesystems/vfs.txt
  */
 static inline struct dentry *d_real(struct dentry *dentry,
-				    const struct inode *inode,
-				    unsigned int flags)
+				    const struct inode *inode)
 {
 	if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
-		return dentry->d_op->d_real(dentry, inode, flags);
+		return dentry->d_op->d_real(dentry, inode);
 	else
 		return dentry;
 }
@@ -595,7 +592,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
 static inline struct inode *d_real_inode(const struct dentry *dentry)
 {
 	/* This usage of d_real() results in const dentry */
-	return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0));
+	return d_backing_inode(d_real((struct dentry *) dentry, NULL));
 }
 
 struct name_snapshot {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1cbcf37c45e1..1fa63d184d1f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1239,7 +1239,7 @@ static inline struct inode *file_inode(const struct file *f)
 
 static inline struct dentry *file_dentry(const struct file *file)
 {
-	return d_real(file->f_path.dentry, file_inode(file), 0);
+	return d_real(file->f_path.dentry, file_inode(file));
 }
 
 static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
-- 
cgit v1.2.3


From 3f289dcb4b265416a57ca79cf4a324060bb09060 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 18 Jul 2018 04:47:36 -0700
Subject: block: make bdev_ops->rw_page() take a REQ_OP instead of bool

c11f0c0b5bb9 ("block/mm: make bdev_ops->rw_page() take a bool for
read/write") replaced @op with boolean @is_write, which limited the
amount of information going into ->rw_page() and more importantly
page_endio(), which removed the need to expose block internals to mm.

Unfortunately, we want to track discards separately and @is_write
isn't enough information.  This patch updates bdev_ops->rw_page() to
take REQ_OP instead but leaves page_endio() to take bool @is_write.
This allows the block part of operations to have enough information
while not leaking it to mm.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/brd.c           | 14 +++++++-------
 drivers/block/zram/zram_drv.c | 16 ++++++++--------
 drivers/nvdimm/btt.c          | 12 ++++++------
 drivers/nvdimm/pmem.c         | 13 ++++++-------
 fs/block_dev.c                |  6 ++++--
 fs/mpage.c                    |  4 ++--
 include/linux/blkdev.h        |  2 +-
 7 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bb976598ee43..df8103dd40ac 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -254,20 +254,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
  * Process a single bvec of a bio.
  */
 static int brd_do_bvec(struct brd_device *brd, struct page *page,
-			unsigned int len, unsigned int off, bool is_write,
+			unsigned int len, unsigned int off, unsigned int op,
 			sector_t sector)
 {
 	void *mem;
 	int err = 0;
 
-	if (is_write) {
+	if (op_is_write(op)) {
 		err = copy_to_brd_setup(brd, sector, len);
 		if (err)
 			goto out;
 	}
 
 	mem = kmap_atomic(page);
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		copy_from_brd(mem + off, brd, sector, len);
 		flush_dcache_page(page);
 	} else {
@@ -296,7 +296,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
 		int err;
 
 		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
-					op_is_write(bio_op(bio)), sector);
+				  bio_op(bio), sector);
 		if (err)
 			goto io_error;
 		sector += len >> SECTOR_SHIFT;
@@ -310,15 +310,15 @@ io_error:
 }
 
 static int brd_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	int err;
 
 	if (PageTransHuge(page))
 		return -ENOTSUPP;
-	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
-	page_endio(page, is_write, err);
+	err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
+	page_endio(page, op_is_write(op), err);
 	return err;
 }
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 7436b2d27fa3..78c29044684a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1274,17 +1274,17 @@ static void zram_bio_discard(struct zram *zram, u32 index,
  * Returns 1 if IO request was successfully submitted.
  */
 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
-			int offset, bool is_write, struct bio *bio)
+			int offset, unsigned int op, struct bio *bio)
 {
 	unsigned long start_time = jiffies;
-	int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
+	int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ;
 	struct request_queue *q = zram->disk->queue;
 	int ret;
 
 	generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
 			&zram->disk->part0);
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		atomic64_inc(&zram->stats.num_reads);
 		ret = zram_bvec_read(zram, bvec, index, offset, bio);
 		flush_dcache_page(bvec->bv_page);
@@ -1300,7 +1300,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 	zram_slot_unlock(zram, index);
 
 	if (unlikely(ret < 0)) {
-		if (!is_write)
+		if (!op_is_write(op))
 			atomic64_inc(&zram->stats.failed_reads);
 		else
 			atomic64_inc(&zram->stats.failed_writes);
@@ -1338,7 +1338,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 			bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
 							unwritten);
 			if (zram_bvec_rw(zram, &bv, index, offset,
-					op_is_write(bio_op(bio)), bio) < 0)
+					 bio_op(bio), bio) < 0)
 				goto out;
 
 			bv.bv_offset += bv.bv_len;
@@ -1390,7 +1390,7 @@ static void zram_slot_free_notify(struct block_device *bdev,
 }
 
 static int zram_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	int offset, ret;
 	u32 index;
@@ -1414,7 +1414,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 	bv.bv_len = PAGE_SIZE;
 	bv.bv_offset = 0;
 
-	ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
+	ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL);
 out:
 	/*
 	 * If I/O fails, just return error(ie, non-zero) without
@@ -1429,7 +1429,7 @@ out:
 
 	switch (ret) {
 	case 0:
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 		break;
 	case 1:
 		ret = 0;
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 85de8053aa34..0360c015f658 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1423,11 +1423,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
 
 static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
 			struct page *page, unsigned int len, unsigned int off,
-			bool is_write, sector_t sector)
+			unsigned int op, sector_t sector)
 {
 	int ret;
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		ret = btt_read_pg(btt, bip, page, off, sector, len);
 		flush_dcache_page(page);
 	} else {
@@ -1464,7 +1464,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 		}
 
 		err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
-				  op_is_write(bio_op(bio)), iter.bi_sector);
+				  bio_op(bio), iter.bi_sector);
 		if (err) {
 			dev_err(&btt->nd_btt->dev,
 					"io error in %s sector %lld, len %d,\n",
@@ -1483,16 +1483,16 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int btt_rw_page(struct block_device *bdev, sector_t sector,
-		struct page *page, bool is_write)
+		struct page *page, unsigned int op)
 {
 	struct btt *btt = bdev->bd_disk->private_data;
 	int rc;
 	unsigned int len;
 
 	len = hpage_nr_pages(page) * PAGE_SIZE;
-	rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector);
+	rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector);
 	if (rc == 0)
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8b1fd7f1a224..dd17acd8fe68 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -120,7 +120,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 }
 
 static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
-			unsigned int len, unsigned int off, bool is_write,
+			unsigned int len, unsigned int off, unsigned int op,
 			sector_t sector)
 {
 	blk_status_t rc = BLK_STS_OK;
@@ -131,7 +131,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
 		bad_pmem = true;
 
-	if (!is_write) {
+	if (!op_is_write(op)) {
 		if (unlikely(bad_pmem))
 			rc = BLK_STS_IOERR;
 		else {
@@ -180,8 +180,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 	do_acct = nd_iostat_start(bio, &start);
 	bio_for_each_segment(bvec, bio, iter) {
 		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
-				bvec.bv_offset, op_is_write(bio_op(bio)),
-				iter.bi_sector);
+				bvec.bv_offset, bio_op(bio), iter.bi_sector);
 		if (rc) {
 			bio->bi_status = rc;
 			break;
@@ -198,13 +197,13 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
-		       struct page *page, bool is_write)
+		       struct page *page, unsigned int op)
 {
 	struct pmem_device *pmem = bdev->bd_queue->queuedata;
 	blk_status_t rc;
 
 	rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
-			  0, is_write, sector);
+			  0, op, sector);
 
 	/*
 	 * The ->rw_page interface is subtle and tricky.  The core
@@ -213,7 +212,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 	 * caused by double completion.
 	 */
 	if (rc == 0)
-		page_endio(page, is_write, 0);
+		page_endio(page, op_is_write(op), 0);
 
 	return blk_status_to_errno(rc);
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0dd87aaeb39a..496fb51a1e1a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -665,7 +665,8 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
 	result = blk_queue_enter(bdev->bd_queue, 0);
 	if (result)
 		return result;
-	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+			      REQ_OP_READ);
 	blk_queue_exit(bdev->bd_queue);
 	return result;
 }
@@ -703,7 +704,8 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
 		return result;
 
 	set_page_writeback(page);
-	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+			      REQ_OP_WRITE);
 	if (result) {
 		end_page_writeback(page);
 	} else {
diff --git a/fs/mpage.c b/fs/mpage.c
index b7e7f570733a..b73638db9866 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -51,8 +51,8 @@ static void mpage_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-		page_endio(page, op_is_write(bio_op(bio)),
-				blk_status_to_errno(bio->bi_status));
+		page_endio(page, bio_op(bio),
+			   blk_status_to_errno(bio->bi_status));
 	}
 
 	bio_put(bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1939ed95f936..331a6cb8805f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1943,7 +1943,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	void (*release) (struct gendisk *, fmode_t);
-	int (*rw_page)(struct block_device *, sector_t, struct page *, bool);
+	int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	unsigned int (*check_events) (struct gendisk *disk,
-- 
cgit v1.2.3


From 59767fbd49d794b4499d30b314df6c0d4aca584b Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:37 -0700
Subject: block: Add part_stat_read_accum to read across field entries.

Add a part_stat_read_accum macro to genhd.h to read and sum across
field entries.  For example to sum up the number read and write
sectors completed.  In addition to being ar reasonable cleanup by
itself this will make it easier to add new stat fields in the future.

tj: Refreshed on top of v4.17.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_receiver.c | 3 +--
 drivers/block/drbd/drbd_worker.c   | 4 +---
 drivers/md/md.c                    | 3 +--
 include/linux/genhd.h              | 4 ++++
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index a36a30795c43..75f6b47169e6 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2674,8 +2674,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
 	if (c_min_rate == 0)
 		return false;
 
-	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
-		      (int)part_stat_read(&disk->part0, sectors[1]) -
+	curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
 			atomic_read(&device->rs_sect_ev);
 
 	if (atomic_read(&device->ap_actlog_cnt)
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 5e793dd7adfb..b8f77e83d456 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1690,9 +1690,7 @@ void drbd_rs_controller_reset(struct drbd_device *device)
 	atomic_set(&device->rs_sect_in, 0);
 	atomic_set(&device->rs_sect_ev, 0);
 	device->rs_in_flight = 0;
-	device->rs_last_events =
-		(int)part_stat_read(&disk->part0, sectors[0]) +
-		(int)part_stat_read(&disk->part0, sectors[1]);
+	device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors);
 
 	/* Updating the RCU protected object in place is necessary since
 	   this function gets called from atomic context.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 994aed2f9dff..dabe36723d60 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8046,8 +8046,7 @@ static int is_mddev_idle(struct mddev *mddev, int init)
 	rcu_read_lock();
 	rdev_for_each_rcu(rdev, mddev) {
 		struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
-		curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
-			      (int)part_stat_read(&disk->part0, sectors[1]) -
+		curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
 			      atomic_read(&disk->sync_io);
 		/* sync IO will cause sync_io to increase before the disk_stats
 		 * as sync_io is counted when a request starts, and
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 6cb8a5789668..19f36fa10995 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -353,6 +353,10 @@ static inline void free_part_stats(struct hd_struct *part)
 
 #endif /* CONFIG_SMP */
 
+#define part_stat_read_accum(part, field)				\
+	(part_stat_read(part, field[0]) +				\
+	 part_stat_read(part, field[1]))
+
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 	__part_stat_add((cpu), (part), field, addnd);			\
 	if ((part)->partno)						\
-- 
cgit v1.2.3


From dbae2c551377b6533a00c11fc7ede370100ab404 Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:38 -0700
Subject: block: Define and use STAT_READ and STAT_WRITE

Add defines for STAT_READ and STAT_WRITE for indexing the partition
stat entries. This clarifies some fs/ code which has hardcoded 1 for
STAT_WRITE and will make it easier to extend the stats with additional
fields.

tj: Refreshed on top of v4.17.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c             | 16 ++++++++--------
 block/partition-generic.c | 16 ++++++++--------
 fs/ext4/super.c           |  5 +++--
 fs/ext4/sysfs.c           |  6 ++++--
 fs/f2fs/f2fs.h            |  2 +-
 fs/f2fs/super.c           |  3 ++-
 include/linux/blk_types.h |  7 +++++++
 include/linux/genhd.h     | 13 +++++++------
 8 files changed, 40 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index f1543a45e73b..0711a800d0d4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1337,14 +1337,14 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   "%u %lu %lu %lu %u %u %u %u\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
-			   part_stat_read(hd, ios[READ]),
-			   part_stat_read(hd, merges[READ]),
-			   part_stat_read(hd, sectors[READ]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
-			   part_stat_read(hd, ios[WRITE]),
-			   part_stat_read(hd, merges[WRITE]),
-			   part_stat_read(hd, sectors[WRITE]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
+			   part_stat_read(hd, ios[STAT_READ]),
+			   part_stat_read(hd, merges[STAT_READ]),
+			   part_stat_read(hd, sectors[STAT_READ]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_READ])),
+			   part_stat_read(hd, ios[STAT_WRITE]),
+			   part_stat_read(hd, merges[STAT_WRITE]),
+			   part_stat_read(hd, sectors[STAT_WRITE]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
 			   inflight[0],
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 3dcfd4ec0e11..0ddb06722162 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -132,14 +132,14 @@ ssize_t part_stat_show(struct device *dev,
 		"%8lu %8lu %8llu %8u "
 		"%8u %8u %8u"
 		"\n",
-		part_stat_read(p, ios[READ]),
-		part_stat_read(p, merges[READ]),
-		(unsigned long long)part_stat_read(p, sectors[READ]),
-		jiffies_to_msecs(part_stat_read(p, ticks[READ])),
-		part_stat_read(p, ios[WRITE]),
-		part_stat_read(p, merges[WRITE]),
-		(unsigned long long)part_stat_read(p, sectors[WRITE]),
-		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
+		part_stat_read(p, ios[STAT_READ]),
+		part_stat_read(p, merges[STAT_READ]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_READ]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_READ])),
+		part_stat_read(p, ios[STAT_WRITE]),
+		part_stat_read(p, merges[STAT_WRITE]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
 		inflight[0],
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba2396a7bd04..4b8aef989552 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3514,7 +3514,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_sb_block = sb_block;
 	if (sb->s_bdev->bd_part)
 		sbi->s_sectors_written_start =
-			part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+			part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
 
 	/* Cleanup superblock name */
 	strreplace(sb->s_id, '/', '!');
@@ -4824,7 +4824,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	if (sb->s_bdev->bd_part)
 		es->s_kbytes_written =
 			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
-			    ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			    ((part_stat_read(sb->s_bdev->bd_part,
+					     sectors[STAT_WRITE]) -
 			      EXT4_SB(sb)->s_sectors_written_start) >> 1));
 	else
 		es->s_kbytes_written =
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index f34da0bb8f17..2be9ad790017 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -56,7 +56,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 	if (!sb->s_bdev->bd_part)
 		return snprintf(buf, PAGE_SIZE, "0\n");
 	return snprintf(buf, PAGE_SIZE, "%lu\n",
-			(part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			(part_stat_read(sb->s_bdev->bd_part,
+					sectors[STAT_WRITE]) -
 			 sbi->s_sectors_written_start) >> 1);
 }
 
@@ -68,7 +69,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf)
 		return snprintf(buf, PAGE_SIZE, "0\n");
 	return snprintf(buf, PAGE_SIZE, "%llu\n",
 			(unsigned long long)(sbi->s_kbytes_written +
-			((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
+			((part_stat_read(sb->s_bdev->bd_part,
+					 sectors[STAT_WRITE]) -
 			  EXT4_SB(sb)->s_sectors_written_start) >> 1)));
 }
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4d8b1de83143..6799c3fc44e3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1304,7 +1304,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
  * and the return value is in kbytes. s is of struct f2fs_sb_info.
  */
 #define BD_PART_WRITTEN(s)						 \
-(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) -		 \
+(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) -   \
 		(s)->sectors_written_start) >> 1)
 
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3995e926ba3a..17bcff789c08 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2882,7 +2882,8 @@ try_onemore:
 	/* For write statistics */
 	if (sb->s_bdev->bd_part)
 		sbi->sectors_written_start =
-			(u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+			(u64)part_stat_read(sb->s_bdev->bd_part,
+					    sectors[STAT_WRITE]);
 
 	/* Read accumulated write IO statistics if exists */
 	seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e13449a379a1..d2b44de56bc1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -357,6 +357,13 @@ enum req_flag_bits {
 #define REQ_NOMERGE_FLAGS \
 	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 
+enum stat_group {
+	STAT_READ,
+	STAT_WRITE,
+
+	NR_STAT_GROUPS
+};
+
 #define bio_op(bio) \
 	((bio)->bi_opf & REQ_OP_MASK)
 #define req_op(req) \
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 19f36fa10995..a75445446974 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/percpu-refcount.h>
 #include <linux/uuid.h>
+#include <linux/blk_types.h>
 
 #ifdef CONFIG_BLOCK
 
@@ -82,10 +83,10 @@ struct partition {
 } __attribute__((packed));
 
 struct disk_stats {
-	unsigned long sectors[2];	/* READs and WRITEs */
-	unsigned long ios[2];
-	unsigned long merges[2];
-	unsigned long ticks[2];
+	unsigned long sectors[NR_STAT_GROUPS];
+	unsigned long ios[NR_STAT_GROUPS];
+	unsigned long merges[NR_STAT_GROUPS];
+	unsigned long ticks[NR_STAT_GROUPS];
 	unsigned long io_ticks;
 	unsigned long time_in_queue;
 };
@@ -354,8 +355,8 @@ static inline void free_part_stats(struct hd_struct *part)
 #endif /* CONFIG_SMP */
 
 #define part_stat_read_accum(part, field)				\
-	(part_stat_read(part, field[0]) +				\
-	 part_stat_read(part, field[1]))
+	(part_stat_read(part, field[STAT_READ]) +			\
+	 part_stat_read(part, field[STAT_WRITE]))
 
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 	__part_stat_add((cpu), (part), field, addnd);			\
-- 
cgit v1.2.3


From ddcf35d397976421a4ec1d0d00fbcc027a8cb034 Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:39 -0700
Subject: block: Add and use op_stat_group() for indexing disk_stat fields.

Add and use a new op_stat_group() function for indexing partition stat
fields rather than indexing them by rq_data_dir() or bio_data_dir().
This function works similarly to op_is_sync() in that it takes the
request::cmd_flags or bio::bi_opf flags and determines which stats
should et updated.

In addition, the second parameter to generic_start_io_acct() and
generic_end_io_acct() is now a REQ_OP rather than simply a read or
write bit and it uses op_stat_group() on the parameter to determine
the stat group.

Note that the partition in_flight counts are not part of the per-cpu
statistics and as such are not indexed via this function.  It's now
indexed by op_is_write().

tj: Refreshed on top of v4.17.  Updated to pass around REQ_OP.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joshua Morris <josh.h.morris@us.ibm.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Matias Bjorling <mb@lightnvm.io>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Alasdair Kergon <agk@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                   | 16 +++++++++-------
 block/blk-core.c              | 12 ++++++------
 drivers/block/drbd/drbd_req.c |  4 ++--
 drivers/block/rsxx/dev.c      |  6 +++---
 drivers/block/zram/zram_drv.c |  5 ++---
 drivers/lightnvm/pblk-cache.c |  5 +++--
 drivers/lightnvm/pblk-read.c  |  5 +++--
 drivers/md/bcache/request.c   | 13 +++++--------
 drivers/md/dm.c               |  6 ++++--
 drivers/md/md.c               |  5 +++--
 drivers/nvdimm/nd.h           |  7 +++----
 include/linux/bio.h           |  4 ++--
 include/linux/blk_types.h     |  5 +++++
 13 files changed, 50 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index f3536bfc8298..8ecc95615941 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1728,29 +1728,31 @@ void bio_check_pages_dirty(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
 
-void generic_start_io_acct(struct request_queue *q, int rw,
+void generic_start_io_acct(struct request_queue *q, int op,
 			   unsigned long sectors, struct hd_struct *part)
 {
+	const int sgrp = op_stat_group(op);
 	int cpu = part_stat_lock();
 
 	part_round_stats(q, cpu, part);
-	part_stat_inc(cpu, part, ios[rw]);
-	part_stat_add(cpu, part, sectors[rw], sectors);
-	part_inc_in_flight(q, part, rw);
+	part_stat_inc(cpu, part, ios[sgrp]);
+	part_stat_add(cpu, part, sectors[sgrp], sectors);
+	part_inc_in_flight(q, part, op_is_write(op));
 
 	part_stat_unlock();
 }
 EXPORT_SYMBOL(generic_start_io_acct);
 
-void generic_end_io_acct(struct request_queue *q, int rw,
+void generic_end_io_acct(struct request_queue *q, int req_op,
 			 struct hd_struct *part, unsigned long start_time)
 {
 	unsigned long duration = jiffies - start_time;
+	const int sgrp = op_stat_group(req_op);
 	int cpu = part_stat_lock();
 
-	part_stat_add(cpu, part, ticks[rw], duration);
+	part_stat_add(cpu, part, ticks[sgrp], duration);
 	part_round_stats(q, cpu, part);
-	part_dec_in_flight(q, part, rw);
+	part_dec_in_flight(q, part, op_is_write(req_op));
 
 	part_stat_unlock();
 }
diff --git a/block/blk-core.c b/block/blk-core.c
index c4b57d8806fe..03a4ea93a5f3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2702,13 +2702,13 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
 void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
-		const int rw = rq_data_dir(req);
+		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 		int cpu;
 
 		cpu = part_stat_lock();
 		part = req->part;
-		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
+		part_stat_add(cpu, part, sectors[sgrp], bytes >> 9);
 		part_stat_unlock();
 	}
 }
@@ -2722,7 +2722,7 @@ void blk_account_io_done(struct request *req, u64 now)
 	 */
 	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
 		unsigned long duration;
-		const int rw = rq_data_dir(req);
+		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 		int cpu;
 
@@ -2730,10 +2730,10 @@ void blk_account_io_done(struct request *req, u64 now)
 		cpu = part_stat_lock();
 		part = req->part;
 
-		part_stat_inc(cpu, part, ios[rw]);
-		part_stat_add(cpu, part, ticks[rw], duration);
+		part_stat_inc(cpu, part, ios[sgrp]);
+		part_stat_add(cpu, part, ticks[sgrp], duration);
 		part_round_stats(req->q, cpu, part);
-		part_dec_in_flight(req->q, part, rw);
+		part_dec_in_flight(req->q, part, rq_data_dir(req));
 
 		hd_struct_put(part);
 		part_stat_unlock();
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index d146fedc38bb..19cac36e9737 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -38,7 +38,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request
 {
 	struct request_queue *q = device->rq_queue;
 
-	generic_start_io_acct(q, bio_data_dir(req->master_bio),
+	generic_start_io_acct(q, bio_op(req->master_bio),
 				req->i.size >> 9, &device->vdisk->part0);
 }
 
@@ -47,7 +47,7 @@ static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *r
 {
 	struct request_queue *q = device->rq_queue;
 
-	generic_end_io_acct(q, bio_data_dir(req->master_bio),
+	generic_end_io_acct(q, bio_op(req->master_bio),
 			    &device->vdisk->part0, req->start_jif);
 }
 
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index dddb3f2490b6..1a92f9e65937 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -112,7 +112,7 @@ static const struct block_device_operations rsxx_fops = {
 
 static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio)
 {
-	generic_start_io_acct(card->queue, bio_data_dir(bio), bio_sectors(bio),
+	generic_start_io_acct(card->queue, bio_op(bio), bio_sectors(bio),
 			     &card->gendisk->part0);
 }
 
@@ -120,8 +120,8 @@ static void disk_stats_complete(struct rsxx_cardinfo *card,
 				struct bio *bio,
 				unsigned long start_time)
 {
-	generic_end_io_acct(card->queue, bio_data_dir(bio),
-				&card->gendisk->part0, start_time);
+	generic_end_io_acct(card->queue, bio_op(bio),
+			    &card->gendisk->part0, start_time);
 }
 
 static void bio_dma_done_cb(struct rsxx_cardinfo *card,
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 78c29044684a..2907a8156aaf 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1277,11 +1277,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 			int offset, unsigned int op, struct bio *bio)
 {
 	unsigned long start_time = jiffies;
-	int rw_acct = op_is_write(op) ? REQ_OP_WRITE : REQ_OP_READ;
 	struct request_queue *q = zram->disk->queue;
 	int ret;
 
-	generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
+	generic_start_io_acct(q, op, bvec->bv_len >> SECTOR_SHIFT,
 			&zram->disk->part0);
 
 	if (!op_is_write(op)) {
@@ -1293,7 +1292,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 		ret = zram_bvec_write(zram, bvec, index, offset, bio);
 	}
 
-	generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
+	generic_end_io_acct(q, op, &zram->disk->part0, start_time);
 
 	zram_slot_lock(zram, index);
 	zram_accessed(zram, index);
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 77d811962818..f565a56b898a 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -27,7 +27,8 @@ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags)
 	int nr_entries = pblk_get_secs(bio);
 	int i, ret;
 
-	generic_start_io_acct(q, WRITE, bio_sectors(bio), &pblk->disk->part0);
+	generic_start_io_acct(q, REQ_OP_WRITE, bio_sectors(bio),
+			      &pblk->disk->part0);
 
 	/* Update the write buffer head (mem) with the entries that we can
 	 * write. The write in itself cannot fail, so there is no need to
@@ -75,7 +76,7 @@ retry:
 	pblk_rl_inserted(&pblk->rl, nr_entries);
 
 out:
-	generic_end_io_acct(q, WRITE, &pblk->disk->part0, start_time);
+	generic_end_io_acct(q, REQ_OP_WRITE, &pblk->disk->part0, start_time);
 	pblk_write_should_kick(pblk);
 	return ret;
 }
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 26d414ae25b6..5a46d7f9302f 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -199,7 +199,7 @@ static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
 	struct bio *int_bio = rqd->bio;
 	unsigned long start_time = r_ctx->start_time;
 
-	generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time);
+	generic_end_io_acct(dev->q, REQ_OP_READ, &pblk->disk->part0, start_time);
 
 	if (rqd->error)
 		pblk_log_read_err(pblk, rqd);
@@ -461,7 +461,8 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio)
 		return NVM_IO_ERR;
 	}
 
-	generic_start_io_acct(q, READ, bio_sectors(bio), &pblk->disk->part0);
+	generic_start_io_acct(q, REQ_OP_READ, bio_sectors(bio),
+			      &pblk->disk->part0);
 
 	bitmap_zero(read_bitmap, nr_secs);
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ae67f5fa8047..97707b0c54ce 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -667,8 +667,7 @@ static void backing_request_endio(struct bio *bio)
 static void bio_complete(struct search *s)
 {
 	if (s->orig_bio) {
-		generic_end_io_acct(s->d->disk->queue,
-				    bio_data_dir(s->orig_bio),
+		generic_end_io_acct(s->d->disk->queue, bio_op(s->orig_bio),
 				    &s->d->disk->part0, s->start_time);
 
 		trace_bcache_request_end(s->d, s->orig_bio);
@@ -1062,8 +1061,7 @@ static void detached_dev_end_io(struct bio *bio)
 	bio->bi_end_io = ddip->bi_end_io;
 	bio->bi_private = ddip->bi_private;
 
-	generic_end_io_acct(ddip->d->disk->queue,
-			    bio_data_dir(bio),
+	generic_end_io_acct(ddip->d->disk->queue, bio_op(bio),
 			    &ddip->d->disk->part0, ddip->start_time);
 
 	if (bio->bi_status) {
@@ -1120,7 +1118,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
 	}
 
 	atomic_set(&dc->backing_idle, 0);
-	generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
+	generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
 
 	bio_set_dev(bio, dc->bdev);
 	bio->bi_iter.bi_sector += dc->sb.data_offset;
@@ -1229,7 +1227,6 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
 	struct search *s;
 	struct closure *cl;
 	struct bcache_device *d = bio->bi_disk->private_data;
-	int rw = bio_data_dir(bio);
 
 	if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
 		bio->bi_status = BLK_STS_IOERR;
@@ -1237,7 +1234,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
 		return BLK_QC_T_NONE;
 	}
 
-	generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
+	generic_start_io_acct(q, bio_op(bio), bio_sectors(bio), &d->disk->part0);
 
 	s = search_alloc(bio, d);
 	cl = &s->cl;
@@ -1254,7 +1251,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
 				      flash_dev_nodata,
 				      bcache_wq);
 		return BLK_QC_T_NONE;
-	} else if (rw) {
+	} else if (bio_data_dir(bio)) {
 		bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
 					&KEY(d->id, bio->bi_iter.bi_sector, 0),
 					&KEY(d->id, bio_end_sector(bio), 0));
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b0dd7027848b..20f7e4ef5342 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -609,7 +609,8 @@ static void start_io_acct(struct dm_io *io)
 
 	io->start_time = jiffies;
 
-	generic_start_io_acct(md->queue, rw, bio_sectors(bio), &dm_disk(md)->part0);
+	generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
+			      &dm_disk(md)->part0);
 
 	atomic_set(&dm_disk(md)->part0.in_flight[rw],
 		   atomic_inc_return(&md->pending[rw]));
@@ -628,7 +629,8 @@ static void end_io_acct(struct dm_io *io)
 	int pending;
 	int rw = bio_data_dir(bio);
 
-	generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time);
+	generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
+			    io->start_time);
 
 	if (unlikely(dm_stats_used(&md->stats)))
 		dm_stats_account_io(&md->stats, bio_data_dir(bio),
diff --git a/drivers/md/md.c b/drivers/md/md.c
index dabe36723d60..f6e58dbca0d4 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -335,6 +335,7 @@ EXPORT_SYMBOL(md_handle_request);
 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
+	const int sgrp = op_stat_group(bio_op(bio));
 	struct mddev *mddev = q->queuedata;
 	unsigned int sectors;
 	int cpu;
@@ -363,8 +364,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	md_handle_request(mddev, bio);
 
 	cpu = part_stat_lock();
-	part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
-	part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+	part_stat_inc(cpu, &mddev->gendisk->part0, ios[sgrp]);
+	part_stat_add(cpu, &mddev->gendisk->part0, sectors[sgrp], sectors);
 	part_stat_unlock();
 
 	return BLK_QC_T_NONE;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 32e0364b48b9..6ee7fd7e4bbd 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -396,16 +396,15 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
 		return false;
 
 	*start = jiffies;
-	generic_start_io_acct(disk->queue, bio_data_dir(bio),
-			      bio_sectors(bio), &disk->part0);
+	generic_start_io_acct(disk->queue, bio_op(bio), bio_sectors(bio),
+			      &disk->part0);
 	return true;
 }
 static inline void nd_iostat_end(struct bio *bio, unsigned long start)
 {
 	struct gendisk *disk = bio->bi_disk;
 
-	generic_end_io_acct(disk->queue, bio_data_dir(bio), &disk->part0,
-				start);
+	generic_end_io_acct(disk->queue, bio_op(bio), &disk->part0, start);
 }
 static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
 		unsigned int len)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a00dfff51aa5..ab221c517f4e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -496,9 +496,9 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
-void generic_start_io_acct(struct request_queue *q, int rw,
+void generic_start_io_acct(struct request_queue *q, int op,
 				unsigned long sectors, struct hd_struct *part);
-void generic_end_io_acct(struct request_queue *q, int rw,
+void generic_end_io_acct(struct request_queue *q, int op,
 				struct hd_struct *part,
 				unsigned long start_time);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d2b44de56bc1..2960a96d833c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -401,6 +401,11 @@ static inline bool op_is_sync(unsigned int op)
 		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 }
 
+static inline int op_stat_group(unsigned int op)
+{
+	return op_is_write(op);
+}
+
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE		-1U
 #define BLK_QC_T_SHIFT		16
-- 
cgit v1.2.3


From bdca3c87fb7ad1cc61d231d37eb0d8f90d001e0c Mon Sep 17 00:00:00 2001
From: Michael Callahan <michaelcallahan@fb.com>
Date: Wed, 18 Jul 2018 04:47:40 -0700
Subject: block: Track DISCARD statistics and output them in stat and diskstat

Add tracking of REQ_OP_DISCARD ios to the partition statistics and
append them to the various stat files in /sys as well as
/proc/diskstats.  These are tracked with the same four stats as reads
and writes:

Number of discard ios completed.
Number of discard ios merged
Number of discard sectors completed
Milliseconds spent on discard requests

This is done via adding a new STAT_DISCARD define to genhd.h and then
using it to index that stat field for discard requests.

tj: Refreshed on top of v4.17 and other previous updates.

Signed-off-by: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andy Newell <newella@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/ABI/testing/procfs-diskstats | 10 ++++++++++
 Documentation/block/stat.txt               | 28 ++++++++++++++++------------
 Documentation/iostats.txt                  | 15 +++++++++++++++
 block/genhd.c                              | 13 ++++++++++---
 block/partition-generic.c                  |  9 +++++++--
 include/linux/blk_types.h                  |  8 ++++++++
 include/linux/genhd.h                      |  3 ++-
 7 files changed, 68 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
index f91a973a37fe..abac31d216de 100644
--- a/Documentation/ABI/testing/procfs-diskstats
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -5,6 +5,7 @@ Description:
 		The /proc/diskstats file displays the I/O statistics
 		of block devices. Each line contains the following 14
 		fields:
+
 		 1 - major number
 		 2 - minor mumber
 		 3 - device name
@@ -19,4 +20,13 @@ Description:
 		12 - I/Os currently in progress
 		13 - time spent doing I/Os (ms)
 		14 - weighted time spent doing I/Os (ms)
+
+		Kernel 4.18+ appends four more fields for discard
+		tracking putting the total at 18:
+
+		15 - discards completed successfully
+		16 - discards merged
+		17 - sectors discarded
+		18 - time spent discarding
+
 		For more details refer to Documentation/iostats.txt
diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.txt
index 0dbc946de2ea..0aace9cc536c 100644
--- a/Documentation/block/stat.txt
+++ b/Documentation/block/stat.txt
@@ -31,28 +31,32 @@ write ticks     milliseconds  total wait time for write requests
 in_flight       requests      number of I/Os currently in flight
 io_ticks        milliseconds  total time this block device has been active
 time_in_queue   milliseconds  total wait time for all requests
+discard I/Os    requests      number of discard I/Os processed
+discard merges  requests      number of discard I/Os merged with in-queue I/O
+discard sectors sectors       number of sectors discarded
+discard ticks   milliseconds  total wait time for discard requests
 
-read I/Os, write I/Os
-=====================
+read I/Os, write I/Os, discard I/0s
+===================================
 
 These values increment when an I/O request completes.
 
-read merges, write merges
-=========================
+read merges, write merges, discard merges
+=========================================
 
 These values increment when an I/O request is merged with an
 already-queued I/O request.
 
-read sectors, write sectors
-===========================
+read sectors, write sectors, discard_sectors
+============================================
 
-These values count the number of sectors read from or written to this
-block device.  The "sectors" in question are the standard UNIX 512-byte
-sectors, not any device- or filesystem-specific block size.  The
-counters are incremented when the I/O completes.
+These values count the number of sectors read from, written to, or
+discarded from this block device.  The "sectors" in question are the
+standard UNIX 512-byte sectors, not any device- or filesystem-specific
+block size.  The counters are incremented when the I/O completes.
 
-read ticks, write ticks
-=======================
+read ticks, write ticks, discard ticks
+======================================
 
 These values count the number of milliseconds that I/O requests have
 waited on this block device.  If there are multiple I/O requests waiting,
diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
index 04d394a2e06c..49df45f90e8a 100644
--- a/Documentation/iostats.txt
+++ b/Documentation/iostats.txt
@@ -31,6 +31,9 @@ Here are examples of these different formats::
       3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
       3    1   hda1 35486 38030 38030 38030
 
+   4.18+ diskstats:
+      3    0   hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 0 0 0 0
+
 On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
 a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
 
@@ -101,6 +104,18 @@ Field 11 -- weighted # of milliseconds spent doing I/Os
     last update of this field.  This can provide an easy measure of both
     I/O completion time and the backlog that may be accumulating.
 
+Field 12 -- # of discards completed
+    This is the total number of discards completed successfully.
+
+Field 13 -- # of discards merged
+    See the description of field 2
+
+Field 14 -- # of sectors discarded
+    This is the total number of sectors discarded successfully.
+
+Field 15 -- # of milliseconds spent discarding
+    This is the total number of milliseconds spent by all discards (as
+    measured from __make_request() to end_that_request_last()).
 
 To avoid introducing performance bottlenecks, no locks are held while
 modifying these counters.  This implies that minor inaccuracies may be
diff --git a/block/genhd.c b/block/genhd.c
index 0711a800d0d4..8cc719a37b32 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1333,8 +1333,11 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 		part_round_stats(gp->queue, cpu, hd);
 		part_stat_unlock();
 		part_in_flight(gp->queue, hd, inflight);
-		seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
-			   "%u %lu %lu %lu %u %u %u %u\n",
+		seq_printf(seqf, "%4d %7d %s "
+			   "%lu %lu %lu %u "
+			   "%lu %lu %lu %u "
+			   "%u %u %u "
+			   "%lu %lu %lu %u\n",
 			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
 			   disk_name(gp, hd->partno, buf),
 			   part_stat_read(hd, ios[STAT_READ]),
@@ -1347,7 +1350,11 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
 			   inflight[0],
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
-			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
+			   jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
+			   part_stat_read(hd, ios[STAT_DISCARD]),
+			   part_stat_read(hd, merges[STAT_DISCARD]),
+			   part_stat_read(hd, sectors[STAT_DISCARD]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD]))
 			);
 	}
 	disk_part_iter_exit(&piter);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 0ddb06722162..5a8975a1201c 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -130,7 +130,8 @@ ssize_t part_stat_show(struct device *dev,
 	return sprintf(buf,
 		"%8lu %8lu %8llu %8u "
 		"%8lu %8lu %8llu %8u "
-		"%8u %8u %8u"
+		"%8u %8u %8u "
+		"%8lu %8lu %8llu %8u"
 		"\n",
 		part_stat_read(p, ios[STAT_READ]),
 		part_stat_read(p, merges[STAT_READ]),
@@ -142,7 +143,11 @@ ssize_t part_stat_show(struct device *dev,
 		jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
 		inflight[0],
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
-		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
+		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
+		part_stat_read(p, ios[STAT_DISCARD]),
+		part_stat_read(p, merges[STAT_DISCARD]),
+		(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
+		jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD])));
 }
 
 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 2960a96d833c..f6dfb30737d8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -360,6 +360,7 @@ enum req_flag_bits {
 enum stat_group {
 	STAT_READ,
 	STAT_WRITE,
+	STAT_DISCARD,
 
 	NR_STAT_GROUPS
 };
@@ -401,8 +402,15 @@ static inline bool op_is_sync(unsigned int op)
 		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 }
 
+static inline bool op_is_discard(unsigned int op)
+{
+	return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
+}
+
 static inline int op_stat_group(unsigned int op)
 {
+	if (op_is_discard(op))
+		return STAT_DISCARD;
 	return op_is_write(op);
 }
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index a75445446974..57864422a2c8 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -356,7 +356,8 @@ static inline void free_part_stats(struct hd_struct *part)
 
 #define part_stat_read_accum(part, field)				\
 	(part_stat_read(part, field[STAT_READ]) +			\
-	 part_stat_read(part, field[STAT_WRITE]))
+	 part_stat_read(part, field[STAT_WRITE]) +			\
+	 part_stat_read(part, field[STAT_DISCARD]))
 
 #define part_stat_add(cpu, part, field, addnd)	do {			\
 	__part_stat_add((cpu), (part), field, addnd);			\
-- 
cgit v1.2.3


From 636620b66d5d4012c4a9c86206013964d3986c4f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 18 Jul 2018 04:47:41 -0700
Subject: blkcg: Track DISCARD statistics and output them in cgroup io.stat

Add tracking of REQ_OP_DISCARD ios to the per-cgroup io.stat.  Two
fields, dbytes and dios, to respectively count the total bytes and
number of discards are added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andy Newell <newella@fb.com>
Cc: Michael Callahan <michaelcallahan@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/cgroup-v2.rst | 10 ++++++----
 block/blk-cgroup.c                      | 13 +++++++++----
 include/linux/blk-cgroup.h              |  5 ++++-
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 569ce27b85e5..3afe10fa82bc 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1317,17 +1317,19 @@ IO Interface Files
 	Lines are keyed by $MAJ:$MIN device numbers and not ordered.
 	The following nested keys are defined.
 
-	  ======	===================
+	  ======	=====================
 	  rbytes	Bytes read
 	  wbytes	Bytes written
 	  rios		Number of read IOs
 	  wios		Number of write IOs
-	  ======	===================
+	  dbytes	Bytes discarded
+	  dios		Number of discard IOs
+	  ======	=====================
 
 	An example read output follows:
 
-	  8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353
-	  8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252
+	  8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 dbytes=0 dios=0
+	  8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021
 
   io.weight
 	A read-write flat-keyed file which exists on non-root cgroups.
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7e2c19ce1a08..1942357d7165 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -567,6 +567,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 		[BLKG_RWSTAT_WRITE]	= "Write",
 		[BLKG_RWSTAT_SYNC]	= "Sync",
 		[BLKG_RWSTAT_ASYNC]	= "Async",
+		[BLKG_RWSTAT_DISCARD]	= "Discard",
 	};
 	const char *dname = blkg_dev_name(pd->blkg);
 	u64 v;
@@ -580,7 +581,8 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 			   (unsigned long long)atomic64_read(&rwstat->aux_cnt[i]));
 
 	v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) +
-		atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]);
+		atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]) +
+		atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_DISCARD]);
 	seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
 	return v;
 }
@@ -959,7 +961,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 		const char *dname;
 		char *buf;
 		struct blkg_rwstat rwstat;
-		u64 rbytes, wbytes, rios, wios;
+		u64 rbytes, wbytes, rios, wios, dbytes, dios;
 		size_t size = seq_get_buf(sf, &buf), off = 0;
 		int i;
 		bool has_stats = false;
@@ -982,19 +984,22 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 					offsetof(struct blkcg_gq, stat_bytes));
 		rbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
 		wbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
+		dbytes = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]);
 
 		rwstat = blkg_rwstat_recursive_sum(blkg, NULL,
 					offsetof(struct blkcg_gq, stat_ios));
 		rios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_READ]);
 		wios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_WRITE]);
+		dios = atomic64_read(&rwstat.aux_cnt[BLKG_RWSTAT_DISCARD]);
 
 		spin_unlock_irq(blkg->q->queue_lock);
 
 		if (rbytes || wbytes || rios || wios) {
 			has_stats = true;
 			off += scnprintf(buf+off, size-off,
-					 "rbytes=%llu wbytes=%llu rios=%llu wios=%llu",
-					 rbytes, wbytes, rios, wios);
+					 "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
+					 rbytes, wbytes, rios, wios,
+					 dbytes, dios);
 		}
 
 		if (!blkcg_debug_stats)
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index de57de4831d5..3bed5e02a873 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -35,6 +35,7 @@ enum blkg_rwstat_type {
 	BLKG_RWSTAT_WRITE,
 	BLKG_RWSTAT_SYNC,
 	BLKG_RWSTAT_ASYNC,
+	BLKG_RWSTAT_DISCARD,
 
 	BLKG_RWSTAT_NR,
 	BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
@@ -649,7 +650,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 {
 	struct percpu_counter *cnt;
 
-	if (op_is_write(op))
+	if (op_is_discard(op))
+		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
+	else if (op_is_write(op))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
-- 
cgit v1.2.3


From bcf7eac3d97f49d8400ba52c71bee5934bf20093 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 17 Jul 2018 00:47:48 +0900
Subject: regmap: add SCCB support

This adds Serial Camera Control Bus (SCCB) support for regmap API that
is intended to be used by some of Omnivision sensor drivers.

The ov772x and ov9650 drivers are going to use this SCCB regmap API.

The ov772x driver was previously only worked with the i2c controller
drivers that support I2C_FUNC_PROTOCOL_MANGLING, because the ov772x
device doesn't support repeated starts.  After commit 0b964d183cbf
("media: ov772x: allow i2c controllers without
I2C_FUNC_PROTOCOL_MANGLING"), reading ov772x register is replaced with
issuing two separated i2c messages in order to avoid repeated start.
Using this SCCB regmap hides the implementation detail.

The ov9650 driver also issues two separated i2c messages to read the
registers as the device doesn't support repeated start.  So it can
make use of this SCCB regmap.

Cc: Mark Brown <broonie@kernel.org>
Cc: Peter Rosin <peda@axentia.se>
Cc: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Cc: Wolfram Sang <wsa@the-dreams.de>
Cc: Sylwester Nawrocki <s.nawrocki@samsung.com>
Cc: Jacopo Mondi <jacopo+renesas@jmondi.org>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Hans Verkuil <hans.verkuil@cisco.com>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Mauro Carvalho Chehab <mchehab@s-opensource.com>
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/Kconfig       |   4 ++
 drivers/base/regmap/Makefile      |   1 +
 drivers/base/regmap/regmap-sccb.c | 128 ++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h            |  35 +++++++++++
 4 files changed, 168 insertions(+)
 create mode 100644 drivers/base/regmap/regmap-sccb.c

(limited to 'include')

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index aff34c0c2a3e..6ad5ef48b61e 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -45,3 +45,7 @@ config REGMAP_IRQ
 config REGMAP_SOUNDWIRE
 	tristate
 	depends on SOUNDWIRE_BUS
+
+config REGMAP_SCCB
+	tristate
+	depends on I2C
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 5ed0023fabda..f5b4e8851d00 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -15,3 +15,4 @@ obj-$(CONFIG_REGMAP_MMIO) += regmap-mmio.o
 obj-$(CONFIG_REGMAP_IRQ) += regmap-irq.o
 obj-$(CONFIG_REGMAP_W1) += regmap-w1.o
 obj-$(CONFIG_REGMAP_SOUNDWIRE) += regmap-sdw.o
+obj-$(CONFIG_REGMAP_SCCB) += regmap-sccb.o
diff --git a/drivers/base/regmap/regmap-sccb.c b/drivers/base/regmap/regmap-sccb.c
new file mode 100644
index 000000000000..b6eb876ea819
--- /dev/null
+++ b/drivers/base/regmap/regmap-sccb.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+// Register map access API - SCCB support
+
+#include <linux/regmap.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+/**
+ * sccb_is_available - Check if the adapter supports SCCB protocol
+ * @adap: I2C adapter
+ *
+ * Return true if the I2C adapter is capable of using SCCB helper functions,
+ * false otherwise.
+ */
+static bool sccb_is_available(struct i2c_adapter *adap)
+{
+	u32 needed_funcs = I2C_FUNC_SMBUS_BYTE | I2C_FUNC_SMBUS_WRITE_BYTE_DATA;
+
+	/*
+	 * If we ever want support for hardware doing SCCB natively, we will
+	 * introduce a sccb_xfer() callback to struct i2c_algorithm and check
+	 * for it here.
+	 */
+
+	return (i2c_get_functionality(adap) & needed_funcs) == needed_funcs;
+}
+
+/**
+ * regmap_sccb_read - Read data from SCCB slave device
+ * @context: Device that will be interacted wit
+ * @reg: Register to be read from
+ * @val: Pointer to store read value
+ *
+ * This executes the 2-phase write transmission cycle that is followed by a
+ * 2-phase read transmission cycle, returning negative errno else zero on
+ * success.
+ */
+static int regmap_sccb_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+	int ret;
+	union i2c_smbus_data data;
+
+	i2c_lock_bus(i2c->adapter, I2C_LOCK_SEGMENT);
+
+	ret = __i2c_smbus_xfer(i2c->adapter, i2c->addr, i2c->flags,
+			       I2C_SMBUS_WRITE, reg, I2C_SMBUS_BYTE, NULL);
+	if (ret < 0)
+		goto out;
+
+	ret = __i2c_smbus_xfer(i2c->adapter, i2c->addr, i2c->flags,
+			       I2C_SMBUS_READ, 0, I2C_SMBUS_BYTE, &data);
+	if (ret < 0)
+		goto out;
+
+	*val = data.byte;
+out:
+	i2c_unlock_bus(i2c->adapter, I2C_LOCK_SEGMENT);
+
+	return ret;
+}
+
+/**
+ * regmap_sccb_write - Write data to SCCB slave device
+ * @context: Device that will be interacted wit
+ * @reg: Register to write to
+ * @val: Value to be written
+ *
+ * This executes the SCCB 3-phase write transmission cycle, returning negative
+ * errno else zero on success.
+ */
+static int regmap_sccb_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct device *dev = context;
+	struct i2c_client *i2c = to_i2c_client(dev);
+
+	return i2c_smbus_write_byte_data(i2c, reg, val);
+}
+
+static struct regmap_bus regmap_sccb_bus = {
+	.reg_write = regmap_sccb_write,
+	.reg_read = regmap_sccb_read,
+};
+
+static const struct regmap_bus *regmap_get_sccb_bus(struct i2c_client *i2c,
+					const struct regmap_config *config)
+{
+	if (config->val_bits == 8 && config->reg_bits == 8 &&
+			sccb_is_available(i2c->adapter))
+		return &regmap_sccb_bus;
+
+	return ERR_PTR(-ENOTSUPP);
+}
+
+struct regmap *__regmap_init_sccb(struct i2c_client *i2c,
+				  const struct regmap_config *config,
+				  struct lock_class_key *lock_key,
+				  const char *lock_name)
+{
+	const struct regmap_bus *bus = regmap_get_sccb_bus(i2c, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __regmap_init(&i2c->dev, bus, &i2c->dev, config,
+			     lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__regmap_init_sccb);
+
+struct regmap *__devm_regmap_init_sccb(struct i2c_client *i2c,
+				       const struct regmap_config *config,
+				       struct lock_class_key *lock_key,
+				       const char *lock_name)
+{
+	const struct regmap_bus *bus = regmap_get_sccb_bus(i2c, config);
+
+	if (IS_ERR(bus))
+		return ERR_CAST(bus);
+
+	return __devm_regmap_init(&i2c->dev, bus, &i2c->dev, config,
+				  lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__devm_regmap_init_sccb);
+
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4f38068ffb71..52bf358e2c73 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -514,6 +514,10 @@ struct regmap *__regmap_init_i2c(struct i2c_client *i2c,
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
 				 const char *lock_name);
+struct regmap *__regmap_init_sccb(struct i2c_client *i2c,
+				  const struct regmap_config *config,
+				  struct lock_class_key *lock_key,
+				  const char *lock_name);
 struct regmap *__regmap_init_slimbus(struct slim_device *slimbus,
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
@@ -558,6 +562,10 @@ struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
 				      const char *lock_name);
+struct regmap *__devm_regmap_init_sccb(struct i2c_client *i2c,
+				       const struct regmap_config *config,
+				       struct lock_class_key *lock_key,
+				       const char *lock_name);
 struct regmap *__devm_regmap_init_spi(struct spi_device *dev,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
@@ -645,6 +653,19 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 	__regmap_lockdep_wrapper(__regmap_init_i2c, #config,		\
 				i2c, config)
 
+/**
+ * regmap_init_sccb() - Initialise register map
+ *
+ * @i2c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+#define regmap_init_sccb(i2c, config)					\
+	__regmap_lockdep_wrapper(__regmap_init_sccb, #config,		\
+				i2c, config)
+
 /**
  * regmap_init_slimbus() - Initialise register map
  *
@@ -797,6 +818,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 	__regmap_lockdep_wrapper(__devm_regmap_init_i2c, #config,	\
 				i2c, config)
 
+/**
+ * devm_regmap_init_sccb() - Initialise managed register map
+ *
+ * @i2c: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_sccb(i2c, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_sccb, #config,	\
+				i2c, config)
+
 /**
  * devm_regmap_init_spi() - Initialise register map
  *
-- 
cgit v1.2.3


From 7c201ead1bfd19935f689fca69100c335028c047 Mon Sep 17 00:00:00 2001
From: David Lechner <david@lechnology.com>
Date: Mon, 16 Jul 2018 22:20:49 -0500
Subject: spi: spi-bitbang: change flags from u8 to u16

This changes the data type of the flags field in struct spi_bitbang from
u8 to u16. This matches the size of the mode field of struct spi_device
where these flags are also used.

This is done in preparation of adding a new SPI mode flag that will be
used with this field that would otherwise not fit in 8 bits.

Signed-off-by: David Lechner <david@lechnology.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi_bitbang.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 51d8c060e513..c1e61641a7f1 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -8,7 +8,7 @@ struct spi_bitbang {
 	struct mutex		lock;
 	u8			busy;
 	u8			use_dma;
-	u8			flags;		/* extra spi->mode support */
+	u16			flags;		/* extra spi->mode support */
 
 	struct spi_master	*master;
 
-- 
cgit v1.2.3


From 50fa3cd33f9ddcd870dd367381c514a2ef038444 Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Thu, 24 May 2018 20:35:31 +0800
Subject: dt-bindings: mediatek: Add binding for mt2712 IOMMU and SMI

This patch adds decriptions for mt2712 IOMMU and SMI.

In order to balance the bandwidth, mt2712 has two M4Us, two
smi-commons, 10 smi-larbs. and mt2712 is also MTK IOMMU gen2 which
uses ARM Short-Descriptor translation table format.

The mt2712 M4U-SMI HW diagram is as below:

                            EMI
                             |
              ------------------------------------
              |                                  |
             M4U0                              M4U1
              |                                  |
         smi-common0                        smi-common1
              |                                  |
  -------------------------       --------------------------------
  |     |     |     |     |       |         |        |     |     |
  |     |     |     |     |       |         |        |     |     |
larb0 larb1 larb2 larb3 larb6    larb4    larb5    larb7 larb8 larb9
disp0 vdec  cam   venc   jpg  mdp1/disp1 mdp2/disp2 mdp3 vdo/nr tvd

All the connections are HW fixed, SW can NOT adjust it.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 .../devicetree/bindings/iommu/mediatek,iommu.txt   |  6 +-
 .../memory-controllers/mediatek,smi-common.txt     |  6 +-
 .../memory-controllers/mediatek,smi-larb.txt       |  5 +-
 include/dt-bindings/memory/mt2712-larb-port.h      | 95 ++++++++++++++++++++++
 4 files changed, 106 insertions(+), 6 deletions(-)
 create mode 100644 include/dt-bindings/memory/mt2712-larb-port.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
index 53c20cae309f..df5db732138d 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
@@ -40,6 +40,7 @@ video decode local arbiter, all these ports are according to the video HW.
 Required properties:
 - compatible : must be one of the following string:
 	"mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW.
+	"mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW.
 	"mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW.
 - reg : m4u register base and size.
 - interrupts : the interrupt of m4u.
@@ -50,8 +51,9 @@ Required properties:
 	according to the local arbiter index, like larb0, larb1, larb2...
 - iommu-cells : must be 1. This is the mtk_m4u_id according to the HW.
 	Specifies the mtk_m4u_id as defined in
-	dt-binding/memory/mt2701-larb-port.h for mt2701 and
-	dt-binding/memory/mt8173-larb-port.h for mt8173
+	dt-binding/memory/mt2701-larb-port.h for mt2701,
+	dt-binding/memory/mt2712-larb-port.h for mt2712, and
+	dt-binding/memory/mt8173-larb-port.h for mt8173.
 
 Example:
 	iommu: iommu@10205000 {
diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
index aa614b2d7cab..615abdd0eb0d 100644
--- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
@@ -2,8 +2,9 @@ SMI (Smart Multimedia Interface) Common
 
 The hardware block diagram please check bindings/iommu/mediatek,iommu.txt
 
-Mediatek SMI have two generations of HW architecture, mt8173 uses the second
-generation of SMI HW while mt2701 uses the first generation HW of SMI.
+Mediatek SMI have two generations of HW architecture, mt2712 and mt8173 use
+the second generation of SMI HW while mt2701 uses the first generation HW of
+SMI.
 
 There's slight differences between the two SMI, for generation 2, the
 register which control the iommu port is at each larb's register base. But
@@ -15,6 +16,7 @@ not needed for SMI generation 2.
 Required properties:
 - compatible : must be one of :
 	"mediatek,mt2701-smi-common"
+	"mediatek,mt2712-smi-common"
 	"mediatek,mt8173-smi-common"
 - reg : the register and size of the SMI block.
 - power-domains : a phandle to the power domain of this local arbiter.
diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
index ddf46b8856a5..083155cdc2a0 100644
--- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
@@ -4,8 +4,9 @@ The hardware block diagram please check bindings/iommu/mediatek,iommu.txt
 
 Required properties:
 - compatible : must be one of :
-		"mediatek,mt8173-smi-larb"
 		"mediatek,mt2701-smi-larb"
+		"mediatek,mt2712-smi-larb"
+		"mediatek,mt8173-smi-larb"
 - reg : the register and size of this local arbiter.
 - mediatek,smi : a phandle to the smi_common node.
 - power-domains : a phandle to the power domain of this local arbiter.
@@ -15,7 +16,7 @@ Required properties:
 	    the register.
   - "smi" : It's the clock for transfer data and command.
 
-Required property for mt2701:
+Required property for mt2701 and mt2712:
 - mediatek,larb-id :the hardware id of this larb.
 
 Example:
diff --git a/include/dt-bindings/memory/mt2712-larb-port.h b/include/dt-bindings/memory/mt2712-larb-port.h
new file mode 100644
index 000000000000..6f9aa7349cef
--- /dev/null
+++ b/include/dt-bindings/memory/mt2712-larb-port.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017 MediaTek Inc.
+ * Author: Yong Wu <yong.wu@mediatek.com>
+ */
+#ifndef __DTS_IOMMU_PORT_MT2712_H
+#define __DTS_IOMMU_PORT_MT2712_H
+
+#define MTK_M4U_ID(larb, port)		(((larb) << 5) | (port))
+
+#define M4U_LARB0_ID			0
+#define M4U_LARB1_ID			1
+#define M4U_LARB2_ID			2
+#define M4U_LARB3_ID			3
+#define M4U_LARB4_ID			4
+#define M4U_LARB5_ID			5
+#define M4U_LARB6_ID			6
+#define M4U_LARB7_ID			7
+#define M4U_LARB8_ID			8
+#define M4U_LARB9_ID			9
+
+/* larb0 */
+#define M4U_PORT_DISP_OVL0		MTK_M4U_ID(M4U_LARB0_ID, 0)
+#define M4U_PORT_DISP_RDMA0		MTK_M4U_ID(M4U_LARB0_ID, 1)
+#define M4U_PORT_DISP_WDMA0		MTK_M4U_ID(M4U_LARB0_ID, 2)
+#define M4U_PORT_DISP_OD_R		MTK_M4U_ID(M4U_LARB0_ID, 3)
+#define M4U_PORT_DISP_OD_W		MTK_M4U_ID(M4U_LARB0_ID, 4)
+#define M4U_PORT_MDP_RDMA0		MTK_M4U_ID(M4U_LARB0_ID, 5)
+#define M4U_PORT_MDP_WDMA		MTK_M4U_ID(M4U_LARB0_ID, 6)
+#define M4U_PORT_DISP_RDMA2		MTK_M4U_ID(M4U_LARB0_ID, 7)
+
+/* larb1 */
+#define M4U_PORT_HW_VDEC_MC_EXT		MTK_M4U_ID(M4U_LARB1_ID, 0)
+#define M4U_PORT_HW_VDEC_PP_EXT		MTK_M4U_ID(M4U_LARB1_ID, 1)
+#define M4U_PORT_HW_VDEC_UFO_EXT	MTK_M4U_ID(M4U_LARB1_ID, 2)
+#define M4U_PORT_HW_VDEC_VLD_EXT	MTK_M4U_ID(M4U_LARB1_ID, 3)
+#define M4U_PORT_HW_VDEC_VLD2_EXT	MTK_M4U_ID(M4U_LARB1_ID, 4)
+#define M4U_PORT_HW_VDEC_AVC_MV_EXT	MTK_M4U_ID(M4U_LARB1_ID, 5)
+#define M4U_PORT_HW_VDEC_PRED_RD_EXT	MTK_M4U_ID(M4U_LARB1_ID, 6)
+#define M4U_PORT_HW_VDEC_PRED_WR_EXT	MTK_M4U_ID(M4U_LARB1_ID, 7)
+#define M4U_PORT_HW_VDEC_PPWRAP_EXT	MTK_M4U_ID(M4U_LARB1_ID, 8)
+#define M4U_PORT_HW_VDEC_TILE		MTK_M4U_ID(M4U_LARB1_ID, 9)
+#define M4U_PORT_HW_IMG_RESZ_EXT	MTK_M4U_ID(M4U_LARB1_ID, 10)
+
+/* larb2 */
+#define M4U_PORT_CAM_DMA0		MTK_M4U_ID(M4U_LARB2_ID, 0)
+#define M4U_PORT_CAM_DMA1		MTK_M4U_ID(M4U_LARB2_ID, 1)
+#define M4U_PORT_CAM_DMA2		MTK_M4U_ID(M4U_LARB2_ID, 2)
+
+/* larb3 */
+#define M4U_PORT_VENC_RCPU		MTK_M4U_ID(M4U_LARB3_ID, 0)
+#define M4U_PORT_VENC_REC		MTK_M4U_ID(M4U_LARB3_ID, 1)
+#define M4U_PORT_VENC_BSDMA		MTK_M4U_ID(M4U_LARB3_ID, 2)
+#define M4U_PORT_VENC_SV_COMV		MTK_M4U_ID(M4U_LARB3_ID, 3)
+#define M4U_PORT_VENC_RD_COMV		MTK_M4U_ID(M4U_LARB3_ID, 4)
+#define M4U_PORT_VENC_CUR_CHROMA	MTK_M4U_ID(M4U_LARB3_ID, 5)
+#define M4U_PORT_VENC_REF_CHROMA	MTK_M4U_ID(M4U_LARB3_ID, 6)
+#define M4U_PORT_VENC_CUR_LUMA		MTK_M4U_ID(M4U_LARB3_ID, 7)
+#define M4U_PORT_VENC_REF_LUMA		MTK_M4U_ID(M4U_LARB3_ID, 8)
+
+/* larb4 */
+#define M4U_PORT_DISP_OVL1		MTK_M4U_ID(M4U_LARB4_ID, 0)
+#define M4U_PORT_DISP_RDMA1		MTK_M4U_ID(M4U_LARB4_ID, 1)
+#define M4U_PORT_DISP_WDMA1		MTK_M4U_ID(M4U_LARB4_ID, 2)
+#define M4U_PORT_DISP_OD1_R		MTK_M4U_ID(M4U_LARB4_ID, 3)
+#define M4U_PORT_DISP_OD1_W		MTK_M4U_ID(M4U_LARB4_ID, 4)
+#define M4U_PORT_MDP_RDMA1		MTK_M4U_ID(M4U_LARB4_ID, 5)
+#define M4U_PORT_MDP_WROT1		MTK_M4U_ID(M4U_LARB4_ID, 6)
+
+/* larb5 */
+#define M4U_PORT_DISP_OVL2		MTK_M4U_ID(M4U_LARB5_ID, 0)
+#define M4U_PORT_DISP_WDMA2		MTK_M4U_ID(M4U_LARB5_ID, 1)
+#define M4U_PORT_MDP_RDMA2		MTK_M4U_ID(M4U_LARB5_ID, 2)
+#define M4U_PORT_MDP_WROT0		MTK_M4U_ID(M4U_LARB5_ID, 3)
+
+/* larb6 */
+#define M4U_PORT_JPGDEC_WDMA_0		MTK_M4U_ID(M4U_LARB6_ID, 0)
+#define M4U_PORT_JPGDEC_WDMA_1		MTK_M4U_ID(M4U_LARB6_ID, 1)
+#define M4U_PORT_JPGDEC_BSDMA_0		MTK_M4U_ID(M4U_LARB6_ID, 2)
+#define M4U_PORT_JPGDEC_BSDMA_1		MTK_M4U_ID(M4U_LARB6_ID, 3)
+
+/* larb7 */
+#define M4U_PORT_MDP_RDMA3		MTK_M4U_ID(M4U_LARB7_ID, 0)
+#define M4U_PORT_MDP_WROT2		MTK_M4U_ID(M4U_LARB7_ID, 1)
+
+/* larb8 */
+#define M4U_PORT_VDO			MTK_M4U_ID(M4U_LARB8_ID, 0)
+#define M4U_PORT_NR			MTK_M4U_ID(M4U_LARB8_ID, 1)
+#define M4U_PORT_WR_CHANNEL0		MTK_M4U_ID(M4U_LARB8_ID, 2)
+
+/* larb9 */
+#define M4U_PORT_TVD			MTK_M4U_ID(M4U_LARB9_ID, 0)
+#define M4U_PORT_WR_CHANNEL1		MTK_M4U_ID(M4U_LARB9_ID, 1)
+
+#endif
-- 
cgit v1.2.3


From 2b6e68119c474a62396b387130a984ad8240dce2 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 18 Jul 2018 16:40:46 +0000
Subject: Input: gpio_keys - add missing include to gpio_keys.h

gpio_keys.h uses 'bool' - type which is defined in linux/types.h.
Include this header.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/gpio_keys.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index 7160df54a6fe..3f84aeb81e48 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -2,6 +2,8 @@
 #ifndef _GPIO_KEYS_H
 #define _GPIO_KEYS_H
 
+#include <linux/types.h>
+
 struct device;
 
 /**
-- 
cgit v1.2.3


From a5fb9fb023a1435f2b42bccd7f547560f3a21dc3 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Wed, 18 Jul 2018 15:40:26 -0500
Subject: PCI: OF: Fix I/O space page leak

When testing the R-Car PCIe driver on the Condor board, if the PCIe PHY
driver was left disabled, the kernel crashed with this BUG:

  kernel BUG at lib/ioremap.c:72!
  Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
  Modules linked in:
  CPU: 0 PID: 39 Comm: kworker/0:1 Not tainted 4.17.0-dirty #1092
  Hardware name: Renesas Condor board based on r8a77980 (DT)
  Workqueue: events deferred_probe_work_func
  pstate: 80000005 (Nzcv daif -PAN -UAO)
  pc : ioremap_page_range+0x370/0x3c8
  lr : ioremap_page_range+0x40/0x3c8
  sp : ffff000008da39e0
  x29: ffff000008da39e0 x28: 00e8000000000f07
  x27: ffff7dfffee00000 x26: 0140000000000000
  x25: ffff7dfffef00000 x24: 00000000000fe100
  x23: ffff80007b906000 x22: ffff000008ab8000
  x21: ffff000008bb1d58 x20: ffff7dfffef00000
  x19: ffff800009c30fb8 x18: 0000000000000001
  x17: 00000000000152d0 x16: 00000000014012d0
  x15: 0000000000000000 x14: 0720072007200720
  x13: 0720072007200720 x12: 0720072007200720
  x11: 0720072007300730 x10: 00000000000000ae
  x9 : 0000000000000000 x8 : ffff7dffff000000
  x7 : 0000000000000000 x6 : 0000000000000100
  x5 : 0000000000000000 x4 : 000000007b906000
  x3 : ffff80007c61a880 x2 : ffff7dfffeefffff
  x1 : 0000000040000000 x0 : 00e80000fe100f07
  Process kworker/0:1 (pid: 39, stack limit = 0x        (ptrval))
  Call trace:
   ioremap_page_range+0x370/0x3c8
   pci_remap_iospace+0x7c/0xac
   pci_parse_request_of_pci_ranges+0x13c/0x190
   rcar_pcie_probe+0x4c/0xb04
   platform_drv_probe+0x50/0xbc
   driver_probe_device+0x21c/0x308
   __device_attach_driver+0x98/0xc8
   bus_for_each_drv+0x54/0x94
   __device_attach+0xc4/0x12c
   device_initial_probe+0x10/0x18
   bus_probe_device+0x90/0x98
   deferred_probe_work_func+0xb0/0x150
   process_one_work+0x12c/0x29c
   worker_thread+0x200/0x3fc
   kthread+0x108/0x134
   ret_from_fork+0x10/0x18
  Code: f9004ba2 54000080 aa0003fb 17ffff48 (d4210000)

It turned out that pci_remap_iospace() wasn't undone when the driver's
probe failed, and since devm_phy_optional_get() returned -EPROBE_DEFER,
the probe was retried, finally causing the BUG due to trying to remap
already remapped pages.

Introduce the devm_pci_remap_iospace() managed API and replace the
pci_remap_iospace() call with it to fix the bug.

Fixes: dbf9826d5797 ("PCI: generic: Convert to DT resource parsing API")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
[lorenzo.pieralisi@arm.com: split commit/updated the commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pci/of.c    |  2 +-
 drivers/pci/pci.c   | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  2 ++
 3 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index d088c9147f10..69a60d6ebd73 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -612,7 +612,7 @@ int pci_parse_request_of_pci_ranges(struct device *dev,
 
 		switch (resource_type(res)) {
 		case IORESOURCE_IO:
-			err = pci_remap_iospace(res, iobase);
+			err = devm_pci_remap_iospace(dev, res, iobase);
 			if (err) {
 				dev_warn(dev, "error %d: failed to map resource %pR\n",
 					 err, res);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 97acba712e4e..316496e99da9 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3579,6 +3579,44 @@ void pci_unmap_iospace(struct resource *res)
 }
 EXPORT_SYMBOL(pci_unmap_iospace);
 
+static void devm_pci_unmap_iospace(struct device *dev, void *ptr)
+{
+	struct resource **res = ptr;
+
+	pci_unmap_iospace(*res);
+}
+
+/**
+ * devm_pci_remap_iospace - Managed pci_remap_iospace()
+ * @dev: Generic device to remap IO address for
+ * @res: Resource describing the I/O space
+ * @phys_addr: physical address of range to be mapped
+ *
+ * Managed pci_remap_iospace().  Map is automatically unmapped on driver
+ * detach.
+ */
+int devm_pci_remap_iospace(struct device *dev, const struct resource *res,
+			   phys_addr_t phys_addr)
+{
+	const struct resource **ptr;
+	int error;
+
+	ptr = devres_alloc(devm_pci_unmap_iospace, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	error = pci_remap_iospace(res, phys_addr);
+	if (error) {
+		devres_free(ptr);
+	} else	{
+		*ptr = res;
+		devres_add(dev, ptr);
+	}
+
+	return error;
+}
+EXPORT_SYMBOL(devm_pci_remap_iospace);
+
 /**
  * devm_pci_remap_cfgspace - Managed pci_remap_cfgspace()
  * @dev: Generic device to remap IO address for
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..abd5d5e17aee 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1240,6 +1240,8 @@ int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr,
 unsigned long pci_address_to_pio(phys_addr_t addr);
 phys_addr_t pci_pio_to_address(unsigned long pio);
 int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
+int devm_pci_remap_iospace(struct device *dev, const struct resource *res,
+			   phys_addr_t phys_addr);
 void pci_unmap_iospace(struct resource *res);
 void __iomem *devm_pci_remap_cfgspace(struct device *dev,
 				      resource_size_t offset,
-- 
cgit v1.2.3


From a48d189ef53146a8df132a327a637c4182f50a16 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Tue, 17 Jul 2018 11:52:57 +0200
Subject: net: Move skb decrypted field, avoid explicity copy

Commit 784abe24c903 ("net: Add decrypted field to skb")
introduced a 'decrypted' field that is explicitly copied on skb
copy and clone.

Move it between headers_start[0] and headers_end[0], so that we
don't need to copy it explicitly as it's copied by the memcpy()
in __copy_skb_header().

While at it, drop the assignment in __skb_clone(), it was
already redundant.

This doesn't change the size of sk_buff or cacheline boundaries.

The 15-bits hole before tc_index becomes a 14-bits hole, and
will be again a 15-bits hole when this change is merged with
commit 8b7008620b84 ("net: Don't copy pfmemalloc flag in
__copy_skb_header()").

v2: as reported by kbuild test robot (oops, I forgot to build
    with CONFIG_TLS_DEVICE it seems), we can't use
    CHECK_SKB_FIELD() on a bit-field member. Just drop the
    check for the moment being, perhaps we could think of some
    magic to also check bit-field members one day.

Fixes: 784abe24c903 ("net: Add decrypted field to skb")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 ++++-----
 net/core/skbuff.c      | 6 ------
 2 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3ceb8dcc54da..14bc9ebe30f2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -630,7 +630,6 @@ typedef unsigned char *sk_buff_data_t;
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@xmit_more: More SKBs are pending for this queue
- *	@decrypted: Decrypted SKB
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -641,6 +640,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
  *	@csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
  *	@dst_pending_confirm: need to confirm neighbour
+ *	@decrypted: Decrypted SKB
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
@@ -737,11 +737,7 @@ struct sk_buff {
 				peeked:1,
 				head_frag:1,
 				xmit_more:1,
-#ifdef CONFIG_TLS_DEVICE
-				decrypted:1;
-#else
 				__unused:1;
-#endif
 
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
@@ -797,6 +793,9 @@ struct sk_buff {
 	__u8			tc_redirected:1;
 	__u8			tc_from_ingress:1;
 #endif
+#ifdef CONFIG_TLS_DEVICE
+	__u8			decrypted:1;
+#endif
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cfd6c6f35f9c..c4e24ac27464 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -805,9 +805,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	 * It is not yet because we do not want to have a 16 bit hole
 	 */
 	new->queue_mapping = old->queue_mapping;
-#ifdef CONFIG_TLS_DEVICE
-	new->decrypted = old->decrypted;
-#endif
 
 	memcpy(&new->headers_start, &old->headers_start,
 	       offsetof(struct sk_buff, headers_end) -
@@ -868,9 +865,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(head_frag);
 	C(data);
 	C(truesize);
-#ifdef CONFIG_TLS_DEVICE
-	C(decrypted);
-#endif
 	refcount_set(&n->users, 1);
 
 	atomic_inc(&(skb_shinfo(skb)->dataref));
-- 
cgit v1.2.3


From eff8ea8f24eac76bc21c25e4ca4ac4ee2dade846 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Mon, 16 Jul 2018 18:35:30 -0700
Subject: net/mlx5: FW tracer, add hardware structures

This change adds the infrastructure to mlx5 core fw tracer.
It introduces the following 4 new registers:
MLX5_REG_MTRC_CAP  - Used to read tracer capabilities
MLX5_REG_MTRC_CONF - Used to set tracer configurations
MLX5_REG_MTRC_STDB - Used to query tracer strings database
MLX5_REG_MTRC_CTRL - Used to control the tracer

The capability of the tracing can be checked using mcam access
register, therefore, the mcam access register interface will expose
the tracer register.

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  4 +++
 include/linux/mlx5/mlx5_ifc.h | 61 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 64 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1cb1c0317b77..4a4125b4279d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -138,6 +138,10 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MLCR		 = 0x902b,
+	MLX5_REG_MTRC_CAP	 = 0x9040,
+	MLX5_REG_MTRC_CONF	 = 0x9041,
+	MLX5_REG_MTRC_STDB	 = 0x9042,
+	MLX5_REG_MTRC_CTRL	 = 0x9043,
 	MLX5_REG_MPCNT		 = 0x9051,
 	MLX5_REG_MTPPS		 = 0x9053,
 	MLX5_REG_MTPPSE		 = 0x9054,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1853e7fd6924..bd7b71f54d59 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8112,7 +8112,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
 	u8         mcqi[0x1];
 	u8         reserved_at_1f[0x1];
 
-	u8         regs_95_to_64[0x20];
+	u8         regs_95_to_68[0x1c];
+	u8         tracer_registers[0x4];
+
 	u8         regs_63_to_32[0x20];
 	u8         regs_31_to_0[0x20];
 };
@@ -9187,4 +9189,61 @@ struct mlx5_ifc_create_uctx_in_bits {
 	struct mlx5_ifc_uctx_bits                     uctx;
 };
 
+struct mlx5_ifc_mtrc_string_db_param_bits {
+	u8         string_db_base_address[0x20];
+
+	u8         reserved_at_20[0x8];
+	u8         string_db_size[0x18];
+};
+
+struct mlx5_ifc_mtrc_cap_bits {
+	u8         trace_owner[0x1];
+	u8         trace_to_memory[0x1];
+	u8         reserved_at_2[0x4];
+	u8         trc_ver[0x2];
+	u8         reserved_at_8[0x14];
+	u8         num_string_db[0x4];
+
+	u8         first_string_trace[0x8];
+	u8         num_string_trace[0x8];
+	u8         reserved_at_30[0x28];
+
+	u8         log_max_trace_buffer_size[0x8];
+
+	u8         reserved_at_60[0x20];
+
+	struct mlx5_ifc_mtrc_string_db_param_bits string_db_param[8];
+
+	u8         reserved_at_280[0x180];
+};
+
+struct mlx5_ifc_mtrc_conf_bits {
+	u8         reserved_at_0[0x1c];
+	u8         trace_mode[0x4];
+	u8         reserved_at_20[0x18];
+	u8         log_trace_buffer_size[0x8];
+	u8         trace_mkey[0x20];
+	u8         reserved_at_60[0x3a0];
+};
+
+struct mlx5_ifc_mtrc_stdb_bits {
+	u8         string_db_index[0x4];
+	u8         reserved_at_4[0x4];
+	u8         read_size[0x18];
+	u8         start_offset[0x20];
+	u8         string_db_data[0];
+};
+
+struct mlx5_ifc_mtrc_ctrl_bits {
+	u8         trace_status[0x2];
+	u8         reserved_at_2[0x2];
+	u8         arm_event[0x1];
+	u8         reserved_at_5[0xb];
+	u8         modify_field_select[0x10];
+	u8         reserved_at_20[0x2b];
+	u8         current_timestamp52_32[0x15];
+	u8         current_timestamp31_0[0x20];
+	u8         reserved_at_80[0x180];
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From 5e022dd353b74132bf216a77b169c43e39f5be9e Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 16 Jul 2018 18:35:31 -0700
Subject: net/mlx5: Expose MPEGC (Management PCIe General Configuration)
 structures

This patch exposes PRM layout for handling MPEGC (Management PCIe
General Configuration).

This will be used in the downstream patch for configuring MPEGC via the
driver.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 23 +++++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 4a4125b4279d..957199c20a0f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -145,6 +145,7 @@ enum {
 	MLX5_REG_MPCNT		 = 0x9051,
 	MLX5_REG_MTPPS		 = 0x9053,
 	MLX5_REG_MTPPSE		 = 0x9054,
+	MLX5_REG_MPEGC		 = 0x9056,
 	MLX5_REG_MCQI		 = 0x9061,
 	MLX5_REG_MCC		 = 0x9062,
 	MLX5_REG_MCDA		 = 0x9063,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index bd7b71f54d59..2de5feaeb74a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8049,6 +8049,19 @@ struct mlx5_ifc_peir_reg_bits {
 	u8         error_type[0x8];
 };
 
+struct mlx5_ifc_mpegc_reg_bits {
+	u8         reserved_at_0[0x30];
+	u8         field_select[0x10];
+
+	u8         tx_overflow_sense[0x1];
+	u8         mark_cqe[0x1];
+	u8         mark_cnp[0x1];
+	u8         reserved_at_43[0x1b];
+	u8         tx_lossy_overflow_oper[0x2];
+
+	u8         reserved_at_60[0x100];
+};
+
 struct mlx5_ifc_pcam_enhanced_features_bits {
 	u8         reserved_at_0[0x6d];
 	u8         rx_icrc_encapsulated_counter[0x1];
@@ -8097,7 +8110,11 @@ struct mlx5_ifc_pcam_reg_bits {
 };
 
 struct mlx5_ifc_mcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7b];
+	u8         reserved_at_0[0x74];
+	u8         mark_tx_action_cnp[0x1];
+	u8         mark_tx_action_cqe[0x1];
+	u8         dynamic_tx_overflow[0x1];
+	u8         reserved_at_77[0x4];
 	u8         pcie_outbound_stalled[0x1];
 	u8         tx_overflow_buffer_pkt[0x1];
 	u8         mtpps_enh_out_per_adj[0x1];
@@ -8112,7 +8129,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
 	u8         mcqi[0x1];
 	u8         reserved_at_1f[0x1];
 
-	u8         regs_95_to_68[0x1c];
+	u8         regs_95_to_87[0x9];
+	u8         mpegc[0x1];
+	u8         regs_85_to_68[0x12];
 	u8         tracer_registers[0x4];
 
 	u8         regs_63_to_32[0x20];
-- 
cgit v1.2.3


From 8da6fe2a18505b9bd977e573d62d33f836c6903c Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@mellanox.com>
Date: Mon, 16 Jul 2018 18:35:32 -0700
Subject: net/mlx5: Add core support for double vlan push/pop steering action

As newer firmware supports double push/pop in a single FTE, we add
core bits and extend vlan action logic for it.

Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c   |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c             | 12 +++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c            |  4 +++-
 include/linux/mlx5/fs.h                                      |  4 +++-
 include/linux/mlx5/mlx5_ifc.h                                | 11 +++++++++--
 6 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 09f178a3fcab..0240aee9189e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -138,6 +138,8 @@ TRACE_EVENT(mlx5_fs_del_fg,
 	{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,	 "MOD_HDR"},\
 	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH,	 "VLAN_PUSH"},\
 	{MLX5_FLOW_CONTEXT_ACTION_VLAN_POP,	 "VLAN_POP"},\
+	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2,	 "VLAN_PUSH_2"},\
+	{MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2,	 "VLAN_POP_2"},\
 	{MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"}
 
 TRACE_EVENT(mlx5_fs_set_fte,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index cecd201f0b73..8f50ce80ff66 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -70,9 +70,9 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
 				     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 	else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
-		flow_act.vlan.ethtype = ntohs(attr->vlan_proto);
-		flow_act.vlan.vid = attr->vlan_vid;
-		flow_act.vlan.prio = attr->vlan_prio;
+		flow_act.vlan[0].ethtype = ntohs(attr->vlan_proto);
+		flow_act.vlan[0].vid = attr->vlan_vid;
+		flow_act.vlan[0].prio = attr->vlan_prio;
 	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 5a00deff5457..6a62b84e57f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -349,9 +349,15 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
 	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
 
-	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan.ethtype);
-	MLX5_SET(vlan, vlan, vid, fte->action.vlan.vid);
-	MLX5_SET(vlan, vlan, prio, fte->action.vlan.prio);
+	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+	MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+	MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+	MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+	MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
 
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 49a75d31185e..05e7a5112b74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1464,7 +1464,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
 			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
-			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH))
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2 |
+			     MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
 		return true;
 
 	return false;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 757b4a30281e..c40f2fc68655 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -152,6 +152,8 @@ struct mlx5_fs_vlan {
         u8  prio;
 };
 
+#define MLX5_FS_VLAN_DEPTH	2
+
 struct mlx5_flow_act {
 	u32 action;
 	bool has_flow_tag;
@@ -159,7 +161,7 @@ struct mlx5_flow_act {
 	u32 encap_id;
 	u32 modify_id;
 	uintptr_t esp_id;
-	struct mlx5_fs_vlan vlan;
+	struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
 	struct ib_counters *counters;
 };
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2de5feaeb74a..ae12120ef021 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -337,7 +337,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         reserved_at_9[0x1];
 	u8         pop_vlan[0x1];
 	u8         push_vlan[0x1];
-	u8         reserved_at_c[0x14];
+	u8         reserved_at_c[0x1];
+	u8         pop_vlan_2[0x1];
+	u8         push_vlan_2[0x1];
+	u8         reserved_at_f[0x11];
 
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
@@ -2386,6 +2389,8 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
 	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
 	MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH = 0x100,
+	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2  = 0x400,
+	MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 = 0x800,
 };
 
 struct mlx5_ifc_vlan_bits {
@@ -2416,7 +2421,9 @@ struct mlx5_ifc_flow_context_bits {
 
 	u8         modify_header_id[0x20];
 
-	u8         reserved_at_100[0x100];
+	struct mlx5_ifc_vlan_bits push_vlan_2;
+
+	u8         reserved_at_120[0xe0];
 
 	struct mlx5_ifc_fte_match_param_bits match_value;
 
-- 
cgit v1.2.3


From e2abdcf1d2dd59d7164cf975c50e1587b96b9d04 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 16 Jul 2018 18:35:36 -0700
Subject: net/mlx5: Better return types for CQE API

Reduce sizes of return types.
Use bool for binary indication.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f8671c0a43aa..0566c6a94805 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -750,7 +750,7 @@ enum {
 
 #define MLX5_MINI_CQE_ARRAY_SIZE 8
 
-static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
+static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
 {
 	return (cqe->op_own >> 2) & 0x3;
 }
@@ -770,14 +770,14 @@ static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
 	return (cqe->l4_l3_hdr_type >> 2) & 0x3;
 }
 
-static inline u8 cqe_is_tunneled(struct mlx5_cqe64 *cqe)
+static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe)
 {
 	return cqe->outer_l3_tunneled & 0x1;
 }
 
-static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe)
+static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe)
 {
-	return !!(cqe->l4_l3_hdr_type & 0x1);
+	return cqe->l4_l3_hdr_type & 0x1;
 }
 
 static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe)
-- 
cgit v1.2.3


From 0015b80abccecca82622d9e9d48eb210572a0c3b Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Mon, 16 Jul 2018 21:10:34 -0700
Subject: net: dsa: Remove VLA usage

We avoid 2 VLAs by using a pre-allocated field in dsa_switch. We also
try to avoid dynamic allocation whenever possible (when using fewer than
bits-per-long ports, which is the common case).

Link: http://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
Link: http://lkml.kernel.org/r/20180505185145.GB32630@lunn.ch
Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
[kees: tweak commit subject and message slightly]
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  3 +++
 net/dsa/dsa2.c    | 14 ++++++++++++++
 net/dsa/switch.c  | 22 ++++++++++------------
 3 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index fdbd6082945d..461e8a7661b7 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -259,6 +259,9 @@ struct dsa_switch {
 	/* Number of switch port queues */
 	unsigned int		num_tx_queues;
 
+	unsigned long		*bitmap;
+	unsigned long		_bitmap;
+
 	/* Dynamically allocated ports, keep last */
 	size_t num_ports;
 	struct dsa_port ports[];
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index dc5d9af3dc80..a1917025e155 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -775,6 +775,20 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
 	if (!ds)
 		return NULL;
 
+	/* We avoid allocating memory outside dsa_switch
+	 * if it is not needed.
+	 */
+	if (n <= sizeof(ds->_bitmap) * 8) {
+		ds->bitmap = &ds->_bitmap;
+	} else {
+		ds->bitmap = devm_kcalloc(dev,
+					  BITS_TO_LONGS(n),
+					  sizeof(unsigned long),
+					  GFP_KERNEL);
+		if (unlikely(!ds->bitmap))
+			return NULL;
+	}
+
 	ds->dev = dev;
 	ds->num_ports = n;
 
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index b93511726069..142b294d3446 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -136,21 +136,20 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 {
 	const struct switchdev_obj_port_mdb *mdb = info->mdb;
 	struct switchdev_trans *trans = info->trans;
-	DECLARE_BITMAP(group, ds->num_ports);
 	int port;
 
 	/* Build a mask of Multicast group members */
-	bitmap_zero(group, ds->num_ports);
+	bitmap_zero(ds->bitmap, ds->num_ports);
 	if (ds->index == info->sw_index)
-		set_bit(info->port, group);
+		set_bit(info->port, ds->bitmap);
 	for (port = 0; port < ds->num_ports; port++)
 		if (dsa_is_dsa_port(ds, port))
-			set_bit(port, group);
+			set_bit(port, ds->bitmap);
 
 	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_mdb_prepare_bitmap(ds, mdb, group);
+		return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
 
-	dsa_switch_mdb_add_bitmap(ds, mdb, group);
+	dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
 
 	return 0;
 }
@@ -204,21 +203,20 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
 {
 	const struct switchdev_obj_port_vlan *vlan = info->vlan;
 	struct switchdev_trans *trans = info->trans;
-	DECLARE_BITMAP(members, ds->num_ports);
 	int port;
 
 	/* Build a mask of VLAN members */
-	bitmap_zero(members, ds->num_ports);
+	bitmap_zero(ds->bitmap, ds->num_ports);
 	if (ds->index == info->sw_index)
-		set_bit(info->port, members);
+		set_bit(info->port, ds->bitmap);
 	for (port = 0; port < ds->num_ports; port++)
 		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
-			set_bit(port, members);
+			set_bit(port, ds->bitmap);
 
 	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_vlan_prepare_bitmap(ds, vlan, members);
+		return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
 
-	dsa_switch_vlan_add_bitmap(ds, vlan, members);
+	dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 169dc027fb02492ea37a0575db6a658cf922b854 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 17 Jul 2018 17:12:39 +0100
Subject: ipv6: fix useless rol32 call on hash

The rol32 call is currently rotating hash but the rol'd value is
being discarded. I believe the current code is incorrect and hash
should be assigned the rotated value returned from rol32.

Thanks to David Lebrun for spotting this.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7528632bcf2a..8f73be494503 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -823,7 +823,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 	 * to minimize possbility that any useful information to an
 	 * attacker is leaked. Only lower 20 bits are relevant.
 	 */
-	rol32(hash, 16);
+	hash = rol32(hash, 16);
 
 	flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
 
-- 
cgit v1.2.3


From d7037ad73daa9598b8caa7d5fdf41e8ceee6ef73 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 8 Jul 2018 12:14:59 +0300
Subject: net/mlx5: Fix QP fragmented buffer allocation

Fix bad alignment of SQ buffer in fragmented QP allocation.
It should start directly after RQ buffer ends.

Take special care of the end case where the RQ buffer does not occupy
a whole page. RQ size is a power of two, so would be the case only for
small RQ sizes (RQ size < PAGE_SIZE).

Fix wrong assignments for sqb->size (mistakenly assigned RQ size),
and for npages value of RQ and SQ.

Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/alloc.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/wq.c    | 34 ++++++++++++++++---------
 include/linux/mlx5/driver.h                     | 18 ++++++++++---
 3 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 323ffe8bf7e4..456f30007ad6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -123,7 +123,7 @@ int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 	int i;
 
 	buf->size = size;
-	buf->npages = 1 << get_order(size);
+	buf->npages = DIV_ROUND_UP(size, PAGE_SIZE);
 	buf->page_shift = PAGE_SHIFT;
 	buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list),
 			     GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index b97bb72b4db4..86478a6b99c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -113,35 +113,45 @@ err_db_free:
 	return err;
 }
 
-static void mlx5e_qp_set_frag_buf(struct mlx5_frag_buf *buf,
-				  struct mlx5_wq_qp *qp)
+static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
+				 struct mlx5_wq_qp *qp)
 {
+	struct mlx5_frag_buf_ctrl *sq_fbc;
 	struct mlx5_frag_buf *rqb, *sqb;
 
-	rqb = &qp->rq.fbc.frag_buf;
+	rqb  = &qp->rq.fbc.frag_buf;
 	*rqb = *buf;
 	rqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
-	rqb->npages = 1 << get_order(rqb->size);
+	rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
 
-	sqb = &qp->sq.fbc.frag_buf;
-	*sqb = *buf;
-	sqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
-	sqb->npages = 1 << get_order(sqb->size);
+	sq_fbc = &qp->sq.fbc;
+	sqb    = &sq_fbc->frag_buf;
+	*sqb   = *buf;
+	sqb->size   = mlx5_wq_cyc_get_byte_size(&qp->sq);
+	sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
 	sqb->frags += rqb->npages; /* first part is for the rq */
+	if (sq_fbc->strides_offset)
+		sqb->frags--;
 }
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u32 sq_strides_offset;
 	int err;
 
 	mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
 		      MLX5_GET(qpc, qpc, log_rq_size),
 		      &wq->rq.fbc);
-	mlx5_fill_fbc(ilog2(MLX5_SEND_WQE_BB),
-		      MLX5_GET(qpc, qpc, log_sq_size),
-		      &wq->sq.fbc);
+
+	sq_strides_offset =
+		((wq->rq.fbc.frag_sz_m1 + 1) % PAGE_SIZE) / MLX5_SEND_WQE_BB;
+
+	mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
+			     MLX5_GET(qpc, qpc, log_sq_size),
+			     sq_strides_offset,
+			     &wq->sq.fbc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -156,7 +166,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	mlx5e_qp_set_frag_buf(&wq_ctrl->buf, wq);
+	mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
 
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 80cbb7fdce4a..83957920653a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -358,6 +358,7 @@ struct mlx5_frag_buf_ctrl {
 	struct mlx5_frag_buf	frag_buf;
 	u32			sz_m1;
 	u32			frag_sz_m1;
+	u32			strides_offset;
 	u8			log_sz;
 	u8			log_stride;
 	u8			log_frag_strides;
@@ -983,14 +984,22 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
-static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
-				 struct mlx5_frag_buf_ctrl *fbc)
+static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
+					u32 strides_offset,
+					struct mlx5_frag_buf_ctrl *fbc)
 {
 	fbc->log_stride = log_stride;
 	fbc->log_sz     = log_sz;
 	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
 	fbc->log_frag_strides = PAGE_SHIFT - fbc->log_stride;
 	fbc->frag_sz_m1	= (1 << fbc->log_frag_strides) - 1;
+	fbc->strides_offset = strides_offset;
+}
+
+static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+				 struct mlx5_frag_buf_ctrl *fbc)
+{
+	mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc);
 }
 
 static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
@@ -1004,7 +1013,10 @@ static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
 static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 					  u32 ix)
 {
-	unsigned int frag = (ix >> fbc->log_frag_strides);
+	unsigned int frag;
+
+	ix  += fbc->strides_offset;
+	frag = ix >> fbc->log_frag_strides;
 
 	return fbc->frag_buf.frags[frag].buf +
 		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
-- 
cgit v1.2.3


From 8963106eabdc56911e9b65258eb5e9a6b7b3dfda Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Thu, 19 Jul 2018 10:32:12 +0200
Subject: PCI: endpoint: Add MSI-X interfaces

Add PCI_EPC_IRQ_MSIX type.

Add MSI-X callbacks signatures to the ops structure.

Add sysfs interface for set/get MSI-X capability maximum number.

Update documentation accordingly.

Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 .../PCI/endpoint/function/binding/pci-test.txt     |  2 +
 drivers/pci/endpoint/pci-ep-cfs.c                  | 24 +++++++++
 drivers/pci/endpoint/pci-epc-core.c                | 57 ++++++++++++++++++++++
 include/linux/pci-epc.h                            |  9 ++++
 include/linux/pci-epf.h                            |  1 +
 5 files changed, 93 insertions(+)

(limited to 'include')

diff --git a/Documentation/PCI/endpoint/function/binding/pci-test.txt b/Documentation/PCI/endpoint/function/binding/pci-test.txt
index 3b68b955fb50..cd76ba47394b 100644
--- a/Documentation/PCI/endpoint/function/binding/pci-test.txt
+++ b/Documentation/PCI/endpoint/function/binding/pci-test.txt
@@ -15,3 +15,5 @@ subsys_id	 : don't care
 interrupt_pin	 : Should be 1 - INTA, 2 - INTB, 3 - INTC, 4 -INTD
 msi_interrupts	 : Should be 1 to 32 depending on the number of MSI interrupts
 		   to test
+msix_interrupts	 : Should be 1 to 2048 depending on the number of MSI-X
+		   interrupts to test
diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c
index 018ea3433cb5..d1288a0bd530 100644
--- a/drivers/pci/endpoint/pci-ep-cfs.c
+++ b/drivers/pci/endpoint/pci-ep-cfs.c
@@ -286,6 +286,28 @@ static ssize_t pci_epf_msi_interrupts_show(struct config_item *item,
 		       to_pci_epf_group(item)->epf->msi_interrupts);
 }
 
+static ssize_t pci_epf_msix_interrupts_store(struct config_item *item,
+					     const char *page, size_t len)
+{
+	u16 val;
+	int ret;
+
+	ret = kstrtou16(page, 0, &val);
+	if (ret)
+		return ret;
+
+	to_pci_epf_group(item)->epf->msix_interrupts = val;
+
+	return len;
+}
+
+static ssize_t pci_epf_msix_interrupts_show(struct config_item *item,
+					    char *page)
+{
+	return sprintf(page, "%d\n",
+		       to_pci_epf_group(item)->epf->msix_interrupts);
+}
+
 PCI_EPF_HEADER_R(vendorid)
 PCI_EPF_HEADER_W_u16(vendorid)
 
@@ -327,6 +349,7 @@ CONFIGFS_ATTR(pci_epf_, subsys_vendor_id);
 CONFIGFS_ATTR(pci_epf_, subsys_id);
 CONFIGFS_ATTR(pci_epf_, interrupt_pin);
 CONFIGFS_ATTR(pci_epf_, msi_interrupts);
+CONFIGFS_ATTR(pci_epf_, msix_interrupts);
 
 static struct configfs_attribute *pci_epf_attrs[] = {
 	&pci_epf_attr_vendorid,
@@ -340,6 +363,7 @@ static struct configfs_attribute *pci_epf_attrs[] = {
 	&pci_epf_attr_subsys_id,
 	&pci_epf_attr_interrupt_pin,
 	&pci_epf_attr_msi_interrupts,
+	&pci_epf_attr_msix_interrupts,
 	NULL,
 };
 
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index b0ee42739c3c..7d77bd0e5d4a 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -217,6 +217,63 @@ int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts)
 }
 EXPORT_SYMBOL_GPL(pci_epc_set_msi);
 
+/**
+ * pci_epc_get_msix() - get the number of MSI-X interrupt numbers allocated
+ * @epc: the EPC device to which MSI-X interrupts was requested
+ * @func_no: the endpoint function number in the EPC device
+ *
+ * Invoke to get the number of MSI-X interrupts allocated by the RC
+ */
+int pci_epc_get_msix(struct pci_epc *epc, u8 func_no)
+{
+	int interrupt;
+	unsigned long flags;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return 0;
+
+	if (!epc->ops->get_msix)
+		return 0;
+
+	spin_lock_irqsave(&epc->lock, flags);
+	interrupt = epc->ops->get_msix(epc, func_no);
+	spin_unlock_irqrestore(&epc->lock, flags);
+
+	if (interrupt < 0)
+		return 0;
+
+	return interrupt + 1;
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_msix);
+
+/**
+ * pci_epc_set_msix() - set the number of MSI-X interrupt numbers required
+ * @epc: the EPC device on which MSI-X has to be configured
+ * @func_no: the endpoint function number in the EPC device
+ * @interrupts: number of MSI-X interrupts required by the EPF
+ *
+ * Invoke to set the required number of MSI-X interrupts.
+ */
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts)
+{
+	int ret;
+	unsigned long flags;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions ||
+	    interrupts < 1 || interrupts > 2048)
+		return -EINVAL;
+
+	if (!epc->ops->set_msix)
+		return 0;
+
+	spin_lock_irqsave(&epc->lock, flags);
+	ret = epc->ops->set_msix(epc, func_no, interrupts - 1);
+	spin_unlock_irqrestore(&epc->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_set_msix);
+
 /**
  * pci_epc_unmap_addr() - unmap CPU address from PCI address
  * @epc: the EPC device on which address is allocated
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 243eaa5a66ff..89f079f582df 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -17,6 +17,7 @@ enum pci_epc_irq_type {
 	PCI_EPC_IRQ_UNKNOWN,
 	PCI_EPC_IRQ_LEGACY,
 	PCI_EPC_IRQ_MSI,
+	PCI_EPC_IRQ_MSIX,
 };
 
 /**
@@ -30,6 +31,10 @@ enum pci_epc_irq_type {
  *	     capability register
  * @get_msi: ops to get the number of MSI interrupts allocated by the RC from
  *	     the MSI capability register
+ * @set_msix: ops to set the requested number of MSI-X interrupts in the
+ *	     MSI-X capability register
+ * @get_msix: ops to get the number of MSI-X interrupts allocated by the RC
+ *	     from the MSI-X capability register
  * @raise_irq: ops to raise a legacy or MSI interrupt
  * @start: ops to start the PCI link
  * @stop: ops to stop the PCI link
@@ -48,6 +53,8 @@ struct pci_epc_ops {
 			      phys_addr_t addr);
 	int	(*set_msi)(struct pci_epc *epc, u8 func_no, u8 interrupts);
 	int	(*get_msi)(struct pci_epc *epc, u8 func_no);
+	int	(*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts);
+	int	(*get_msix)(struct pci_epc *epc, u8 func_no);
 	int	(*raise_irq)(struct pci_epc *epc, u8 func_no,
 			     enum pci_epc_irq_type type, u8 interrupt_num);
 	int	(*start)(struct pci_epc *epc);
@@ -144,6 +151,8 @@ void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no,
 			phys_addr_t phys_addr);
 int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 interrupts);
 int pci_epc_get_msi(struct pci_epc *epc, u8 func_no);
+int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts);
+int pci_epc_get_msix(struct pci_epc *epc, u8 func_no);
 int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
 		      enum pci_epc_irq_type type, u8 interrupt_num);
 int pci_epc_start(struct pci_epc *epc);
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 4e7764935fa8..ec02f58758c8 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -119,6 +119,7 @@ struct pci_epf {
 	struct pci_epf_header	*header;
 	struct pci_epf_bar	bar[6];
 	u8			msi_interrupts;
+	u16			msix_interrupts;
 	u8			func_no;
 
 	struct pci_epc		*epc;
-- 
cgit v1.2.3


From d3c70a98d7d63cae02d50ebfafea04264a767401 Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Thu, 19 Jul 2018 10:32:13 +0200
Subject: PCI: Update xxx_pcie_ep_raise_irq() and pci_epc_raise_irq()
 signatures

Change {cdns, dra7xx, artpec6, dw, rockchip}_pcie_ep_raise_irq() and
pci_epc_raise_irq() signature, namely the interrupt_num variable type
from u8 to u16 to accommodate 2048 maximum MSI-X interrupts.

Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Alan Douglas <adouglas@cadence.com>
Acked-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Jesper Nilsson <jesper.nilsson@axis.com>
Acked-by: Joao Pinto <jpinto@synopsys.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/pci/controller/dwc/pci-dra7xx.c           | 2 +-
 drivers/pci/controller/dwc/pcie-artpec6.c         | 2 +-
 drivers/pci/controller/dwc/pcie-designware-ep.c   | 2 +-
 drivers/pci/controller/dwc/pcie-designware-plat.c | 2 +-
 drivers/pci/controller/dwc/pcie-designware.h      | 2 +-
 drivers/pci/controller/pcie-cadence-ep.c          | 3 ++-
 drivers/pci/controller/pcie-rockchip-ep.c         | 2 +-
 drivers/pci/endpoint/pci-epc-core.c               | 8 ++++----
 include/linux/pci-epc.h                           | 6 +++---
 9 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c
index 345aab56ce8b..ce9224a36f62 100644
--- a/drivers/pci/controller/dwc/pci-dra7xx.c
+++ b/drivers/pci/controller/dwc/pci-dra7xx.c
@@ -370,7 +370,7 @@ static void dra7xx_pcie_raise_msi_irq(struct dra7xx_pcie *dra7xx,
 }
 
 static int dra7xx_pcie_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
-				 enum pci_epc_irq_type type, u8 interrupt_num)
+				 enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pci);
diff --git a/drivers/pci/controller/dwc/pcie-artpec6.c b/drivers/pci/controller/dwc/pcie-artpec6.c
index 128b182648b3..dba83abfe764 100644
--- a/drivers/pci/controller/dwc/pcie-artpec6.c
+++ b/drivers/pci/controller/dwc/pcie-artpec6.c
@@ -427,7 +427,7 @@ static void artpec6_pcie_ep_init(struct dw_pcie_ep *ep)
 }
 
 static int artpec6_pcie_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
-				  enum pci_epc_irq_type type, u8 interrupt_num)
+				  enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 04092a7aba89..69d039de2af6 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -242,7 +242,7 @@ static int dw_pcie_ep_set_msi(struct pci_epc *epc, u8 func_no, u8 encode_int)
 }
 
 static int dw_pcie_ep_raise_irq(struct pci_epc *epc, u8 func_no,
-				enum pci_epc_irq_type type, u8 interrupt_num)
+				enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 
diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c
index a37dc92a03c7..cca69ac63319 100644
--- a/drivers/pci/controller/dwc/pcie-designware-plat.c
+++ b/drivers/pci/controller/dwc/pcie-designware-plat.c
@@ -81,7 +81,7 @@ static void dw_plat_pcie_ep_init(struct dw_pcie_ep *ep)
 
 static int dw_plat_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
 				     enum pci_epc_irq_type type,
-				     u8 interrupt_num)
+				     u16 interrupt_num)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index bee4e2535a61..9d581c077329 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -191,7 +191,7 @@ enum dw_pcie_as_type {
 struct dw_pcie_ep_ops {
 	void	(*ep_init)(struct dw_pcie_ep *ep);
 	int	(*raise_irq)(struct dw_pcie_ep *ep, u8 func_no,
-			     enum pci_epc_irq_type type, u8 interrupt_num);
+			     enum pci_epc_irq_type type, u16 interrupt_num);
 };
 
 struct dw_pcie_ep {
diff --git a/drivers/pci/controller/pcie-cadence-ep.c b/drivers/pci/controller/pcie-cadence-ep.c
index e3fe4124e3af..208d11f27d5d 100644
--- a/drivers/pci/controller/pcie-cadence-ep.c
+++ b/drivers/pci/controller/pcie-cadence-ep.c
@@ -363,7 +363,8 @@ static int cdns_pcie_ep_send_msi_irq(struct cdns_pcie_ep *ep, u8 fn,
 }
 
 static int cdns_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn,
-				  enum pci_epc_irq_type type, u8 interrupt_num)
+				  enum pci_epc_irq_type type,
+				  u16 interrupt_num)
 {
 	struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
 
diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
index 6beba8ed7b84..b8163c56a142 100644
--- a/drivers/pci/controller/pcie-rockchip-ep.c
+++ b/drivers/pci/controller/pcie-rockchip-ep.c
@@ -472,7 +472,7 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn,
 
 static int rockchip_pcie_ep_raise_irq(struct pci_epc *epc, u8 fn,
 				      enum pci_epc_irq_type type,
-				      u8 interrupt_num)
+				      u16 interrupt_num)
 {
 	struct rockchip_pcie_ep *ep = epc_get_drvdata(epc);
 
diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index 7d77bd0e5d4a..c72e656e7e29 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -131,13 +131,13 @@ EXPORT_SYMBOL_GPL(pci_epc_start);
  * pci_epc_raise_irq() - interrupt the host system
  * @epc: the EPC device which has to interrupt the host
  * @func_no: the endpoint function number in the EPC device
- * @type: specify the type of interrupt; legacy or MSI
- * @interrupt_num: the MSI interrupt number
+ * @type: specify the type of interrupt; legacy, MSI or MSI-X
+ * @interrupt_num: the MSI or MSI-X interrupt number
  *
- * Invoke to raise an MSI or legacy interrupt
+ * Invoke to raise an legacy, MSI or MSI-X interrupt
  */
 int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
-		      enum pci_epc_irq_type type, u8 interrupt_num)
+		      enum pci_epc_irq_type type, u16 interrupt_num)
 {
 	int ret;
 	unsigned long flags;
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 89f079f582df..bb2395b56f13 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -35,7 +35,7 @@ enum pci_epc_irq_type {
  *	     MSI-X capability register
  * @get_msix: ops to get the number of MSI-X interrupts allocated by the RC
  *	     from the MSI-X capability register
- * @raise_irq: ops to raise a legacy or MSI interrupt
+ * @raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
  * @start: ops to start the PCI link
  * @stop: ops to stop the PCI link
  * @owner: the module owner containing the ops
@@ -56,7 +56,7 @@ struct pci_epc_ops {
 	int	(*set_msix)(struct pci_epc *epc, u8 func_no, u16 interrupts);
 	int	(*get_msix)(struct pci_epc *epc, u8 func_no);
 	int	(*raise_irq)(struct pci_epc *epc, u8 func_no,
-			     enum pci_epc_irq_type type, u8 interrupt_num);
+			     enum pci_epc_irq_type type, u16 interrupt_num);
 	int	(*start)(struct pci_epc *epc);
 	void	(*stop)(struct pci_epc *epc);
 	struct module *owner;
@@ -154,7 +154,7 @@ int pci_epc_get_msi(struct pci_epc *epc, u8 func_no);
 int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts);
 int pci_epc_get_msix(struct pci_epc *epc, u8 func_no);
 int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
-		      enum pci_epc_irq_type type, u8 interrupt_num);
+		      enum pci_epc_irq_type type, u16 interrupt_num);
 int pci_epc_start(struct pci_epc *epc);
 void pci_epc_stop(struct pci_epc *epc);
 struct pci_epc *pci_epc_get(const char *epc_name);
-- 
cgit v1.2.3


From c2e00e31087e58f6c49b90b4702fc3df4fad6a83 Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Thu, 19 Jul 2018 10:32:19 +0200
Subject: pci-epf-test/pci_endpoint_test: Add MSI-X support

Add MSI-X support and update driver documentation accordingly.

Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 Documentation/PCI/endpoint/pci-endpoint.txt       |  4 ++--
 Documentation/PCI/endpoint/pci-test-function.txt  |  4 +++-
 Documentation/PCI/endpoint/pci-test-howto.txt     | 22 ++++++++++++++---
 Documentation/ioctl/ioctl-number.txt              |  1 +
 Documentation/misc-devices/pci-endpoint-test.txt  |  3 +++
 drivers/misc/pci_endpoint_test.c                  | 29 ++++++++++++++++-------
 drivers/pci/controller/dwc/pcie-designware-plat.c |  1 +
 drivers/pci/endpoint/functions/pci-epf-test.c     | 29 +++++++++++++++++++++--
 include/linux/pci-epc.h                           |  1 +
 include/uapi/linux/pcitest.h                      |  1 +
 10 files changed, 79 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/Documentation/PCI/endpoint/pci-endpoint.txt b/Documentation/PCI/endpoint/pci-endpoint.txt
index 9b1d66829290..e86a96b66a6a 100644
--- a/Documentation/PCI/endpoint/pci-endpoint.txt
+++ b/Documentation/PCI/endpoint/pci-endpoint.txt
@@ -44,7 +44,7 @@ by the PCI controller driver.
 	 * clear_bar: ops to reset the BAR
 	 * alloc_addr_space: ops to allocate in PCI controller address space
 	 * free_addr_space: ops to free the allocated address space
-	 * raise_irq: ops to raise a legacy or MSI interrupt
+	 * raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
 	 * start: ops to start the PCI link
 	 * stop: ops to stop the PCI link
 
@@ -96,7 +96,7 @@ by the PCI endpoint function driver.
 *) pci_epc_raise_irq()
 
    The PCI endpoint function driver should use pci_epc_raise_irq() to raise
-   Legacy Interrupt or MSI Interrupt.
+   Legacy Interrupt, MSI or MSI-X Interrupt.
 
 *) pci_epc_mem_alloc_addr()
 
diff --git a/Documentation/PCI/endpoint/pci-test-function.txt b/Documentation/PCI/endpoint/pci-test-function.txt
index bf4b5cf6fee6..5916f1f592bb 100644
--- a/Documentation/PCI/endpoint/pci-test-function.txt
+++ b/Documentation/PCI/endpoint/pci-test-function.txt
@@ -36,7 +36,7 @@ that the endpoint device must perform.
 Bitfield Description:
   Bit 0		: raise legacy IRQ
   Bit 1		: raise MSI IRQ
-  Bit 2		: raise MSI-X IRQ (reserved for future implementation)
+  Bit 2		: raise MSI-X IRQ
   Bit 3		: read command (read data from RC buffer)
   Bit 4		: write command (write data to RC buffer)
   Bit 5		: copy command (copy data from one RC buffer to another
@@ -75,6 +75,7 @@ for the READ/WRITE/COPY and raise IRQ (Legacy/MSI) commands.
 Possible types:
  - Legacy	: 0
  - MSI		: 1
+ - MSI-X	: 2
 
 *) PCI_ENDPOINT_TEST_IRQ_NUMBER
 
@@ -83,3 +84,4 @@ This register contains the triggered ID interrupt.
 Admissible values:
  - Legacy	: 0
  - MSI		: [1 .. 32]
+ - MSI-X	: [1 .. 2048]
diff --git a/Documentation/PCI/endpoint/pci-test-howto.txt b/Documentation/PCI/endpoint/pci-test-howto.txt
index 75f48c3bb191..65f1a137e35c 100644
--- a/Documentation/PCI/endpoint/pci-test-howto.txt
+++ b/Documentation/PCI/endpoint/pci-test-howto.txt
@@ -45,9 +45,9 @@ The PCI endpoint framework populates the directory with the following
 configurable fields.
 
 	# ls functions/pci_epf_test/func1
-	  baseclass_code	interrupt_pin	revid		subsys_vendor_id
-	  cache_line_size	msi_interrupts	subclass_code	vendorid
-	  deviceid          	progif_code	subsys_id
+	  baseclass_code	interrupt_pin	progif_code	subsys_id
+	  cache_line_size	msi_interrupts	revid		subsys_vendorid
+	  deviceid          	msix_interrupts	subclass_code	vendorid
 
 The PCI endpoint function driver populates these entries with default values
 when the device is bound to the driver. The pci-epf-test driver populates
@@ -67,6 +67,7 @@ device, the following commands can be used.
 	# echo 0x104c > functions/pci_epf_test/func1/vendorid
 	# echo 0xb500 > functions/pci_epf_test/func1/deviceid
 	# echo 16 > functions/pci_epf_test/func1/msi_interrupts
+	# echo 8 > functions/pci_epf_test/func1/msix_interrupts
 
 1.5 Binding pci-epf-test Device to EP Controller
 
@@ -153,6 +154,21 @@ following commands.
 	MSI30:          NOT OKAY
 	MSI31:          NOT OKAY
 	MSI32:          NOT OKAY
+	MSIX1:          OKAY
+	MSIX2:          OKAY
+	MSIX3:          OKAY
+	MSIX4:          OKAY
+	MSIX5:          OKAY
+	MSIX6:          OKAY
+	MSIX7:          OKAY
+	MSIX8:          OKAY
+	MSIX9:          NOT OKAY
+	MSIX10:         NOT OKAY
+	MSIX11:         NOT OKAY
+	MSIX12:         NOT OKAY
+	MSIX13:         NOT OKAY
+	[...]
+	MSIX2048:       NOT OKAY
 
 	Read Tests
 
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 480c8609dc58..65259d459fd1 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -166,6 +166,7 @@ Code  Seq#(hex)	Include File		Comments
 'P'	all	linux/soundcard.h	conflict!
 'P'	60-6F	sound/sscape_ioctl.h	conflict!
 'P'	00-0F	drivers/usb/class/usblp.c	conflict!
+'P'	01-07	drivers/misc/pci_endpoint_test.c	conflict!
 'Q'	all	linux/soundcard.h
 'R'	00-1F	linux/random.h		conflict!
 'R'	01	linux/rfkill.h		conflict!
diff --git a/Documentation/misc-devices/pci-endpoint-test.txt b/Documentation/misc-devices/pci-endpoint-test.txt
index 4ebc3594b32c..fdfa0f66d3d0 100644
--- a/Documentation/misc-devices/pci-endpoint-test.txt
+++ b/Documentation/misc-devices/pci-endpoint-test.txt
@@ -10,6 +10,7 @@ The PCI driver for the test device performs the following tests
 	*) verifying addresses programmed in BAR
 	*) raise legacy IRQ
 	*) raise MSI IRQ
+	*) raise MSI-X IRQ
 	*) read data
 	*) write data
 	*) copy data
@@ -25,6 +26,8 @@ ioctl
  PCITEST_LEGACY_IRQ: Tests legacy IRQ
  PCITEST_MSI: Tests message signalled interrupts. The MSI number
 	      to be tested should be passed as argument.
+ PCITEST_MSIX: Tests message signalled interrupts. The MSI-X number
+	      to be tested should be passed as argument.
  PCITEST_WRITE: Perform write tests. The size of the buffer should be passed
 		as argument.
  PCITEST_READ: Perform read tests. The size of the buffer should be passed
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index 349794cbe1f3..f4fef108caff 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -39,13 +39,14 @@
 
 #define IRQ_TYPE_LEGACY				0
 #define IRQ_TYPE_MSI				1
+#define IRQ_TYPE_MSIX				2
 
 #define PCI_ENDPOINT_TEST_MAGIC			0x0
 
 #define PCI_ENDPOINT_TEST_COMMAND		0x4
 #define COMMAND_RAISE_LEGACY_IRQ		BIT(0)
 #define COMMAND_RAISE_MSI_IRQ			BIT(1)
-/* BIT(2) is reserved for raising MSI-X IRQ command */
+#define COMMAND_RAISE_MSIX_IRQ			BIT(2)
 #define COMMAND_READ				BIT(3)
 #define COMMAND_WRITE				BIT(4)
 #define COMMAND_COPY				BIT(5)
@@ -84,7 +85,7 @@ MODULE_PARM_DESC(no_msi, "Disable MSI interrupt in pci_endpoint_test");
 
 static int irq_type = IRQ_TYPE_MSI;
 module_param(irq_type, int, 0444);
-MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI)");
+MODULE_PARM_DESC(irq_type, "IRQ mode selection in pci_endpoint_test (0 - Legacy, 1 - MSI, 2 - MSI-X)");
 
 enum pci_barno {
 	BAR_0,
@@ -202,16 +203,18 @@ static bool pci_endpoint_test_legacy_irq(struct pci_endpoint_test *test)
 }
 
 static bool pci_endpoint_test_msi_irq(struct pci_endpoint_test *test,
-				      u8 msi_num)
+				       u16 msi_num, bool msix)
 {
 	u32 val;
 	struct pci_dev *pdev = test->pdev;
 
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_TYPE,
-				 IRQ_TYPE_MSI);
+				 msix == false ? IRQ_TYPE_MSI :
+				 IRQ_TYPE_MSIX);
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_IRQ_NUMBER, msi_num);
 	pci_endpoint_test_writel(test, PCI_ENDPOINT_TEST_COMMAND,
-				 COMMAND_RAISE_MSI_IRQ);
+				 msix == false ? COMMAND_RAISE_MSI_IRQ :
+				 COMMAND_RAISE_MSIX_IRQ);
 	val = wait_for_completion_timeout(&test->irq_raised,
 					  msecs_to_jiffies(1000));
 	if (!val)
@@ -456,7 +459,8 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
 		ret = pci_endpoint_test_legacy_irq(test);
 		break;
 	case PCITEST_MSI:
-		ret = pci_endpoint_test_msi_irq(test, arg);
+	case PCITEST_MSIX:
+		ret = pci_endpoint_test_msi_irq(test, arg, cmd == PCITEST_MSIX);
 		break;
 	case PCITEST_WRITE:
 		ret = pci_endpoint_test_write(test, arg);
@@ -542,6 +546,12 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 			dev_err(dev, "Failed to get MSI interrupts\n");
 		test->num_irqs = irq;
 		break;
+	case IRQ_TYPE_MSIX:
+		irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX);
+		if (irq < 0)
+			dev_err(dev, "Failed to get MSI-X interrupts\n");
+		test->num_irqs = irq;
+		break;
 	default:
 		dev_err(dev, "Invalid IRQ type selected\n");
 	}
@@ -558,8 +568,9 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 				       pci_endpoint_test_irqhandler,
 				       IRQF_SHARED, DRV_MODULE_NAME, test);
 		if (err)
-			dev_err(dev, "failed to request IRQ %d for MSI %d\n",
-				pci_irq_vector(pdev, i), i + 1);
+			dev_err(dev, "Failed to request IRQ %d for MSI%s %d\n",
+				pci_irq_vector(pdev, i),
+				irq_type == IRQ_TYPE_MSIX ? "-X" : "", i + 1);
 	}
 
 	for (bar = BAR_0; bar <= BAR_5; bar++) {
@@ -625,6 +636,7 @@ err_iounmap:
 
 err_disable_msi:
 	pci_disable_msi(pdev);
+	pci_disable_msix(pdev);
 	pci_release_regions(pdev);
 
 err_disable_pdev:
@@ -656,6 +668,7 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev)
 	for (i = 0; i < test->num_irqs; i++)
 		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), test);
 	pci_disable_msi(pdev);
+	pci_disable_msix(pdev);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 }
diff --git a/drivers/pci/controller/dwc/pcie-designware-plat.c b/drivers/pci/controller/dwc/pcie-designware-plat.c
index 3f8a3aa3a91e..c12bf794d69c 100644
--- a/drivers/pci/controller/dwc/pcie-designware-plat.c
+++ b/drivers/pci/controller/dwc/pcie-designware-plat.c
@@ -77,6 +77,7 @@ static void dw_plat_pcie_ep_init(struct dw_pcie_ep *ep)
 		dw_pcie_ep_reset_bar(pci, bar);
 
 	epc->features |= EPC_FEATURE_NO_LINKUP_NOTIFIER;
+	epc->features |= EPC_FEATURE_MSIX_AVAILABLE;
 }
 
 static int dw_plat_pcie_ep_raise_irq(struct dw_pcie_ep *ep, u8 func_no,
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index db4b23672004..3e86fa3c7da3 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -20,10 +20,11 @@
 
 #define IRQ_TYPE_LEGACY			0
 #define IRQ_TYPE_MSI			1
+#define IRQ_TYPE_MSIX			2
 
 #define COMMAND_RAISE_LEGACY_IRQ	BIT(0)
 #define COMMAND_RAISE_MSI_IRQ		BIT(1)
-/* BIT(2) is reserved for raising MSI-X IRQ command */
+#define COMMAND_RAISE_MSIX_IRQ		BIT(2)
 #define COMMAND_READ			BIT(3)
 #define COMMAND_WRITE			BIT(4)
 #define COMMAND_COPY			BIT(5)
@@ -47,6 +48,7 @@ struct pci_epf_test {
 	struct pci_epf		*epf;
 	enum pci_barno		test_reg_bar;
 	bool			linkup_notifier;
+	bool			msix_available;
 	struct delayed_work	cmd_handler;
 };
 
@@ -266,6 +268,9 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq_type,
 	case IRQ_TYPE_MSI:
 		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSI, irq);
 		break;
+	case IRQ_TYPE_MSIX:
+		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSIX, irq);
+		break;
 	default:
 		dev_err(dev, "Failed to raise IRQ, unknown type\n");
 		break;
@@ -292,7 +297,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 	reg->command = 0;
 	reg->status = 0;
 
-	if (reg->irq_type > IRQ_TYPE_MSI) {
+	if (reg->irq_type > IRQ_TYPE_MSIX) {
 		dev_err(dev, "Failed to detect IRQ type\n");
 		goto reset_handler;
 	}
@@ -346,6 +351,16 @@ static void pci_epf_test_cmd_handler(struct work_struct *work)
 		goto reset_handler;
 	}
 
+	if (command & COMMAND_RAISE_MSIX_IRQ) {
+		count = pci_epc_get_msix(epc, epf->func_no);
+		if (reg->irq_number > count || count <= 0)
+			goto reset_handler;
+		reg->status = STATUS_IRQ_RAISED;
+		pci_epc_raise_irq(epc, epf->func_no, PCI_EPC_IRQ_MSIX,
+				  reg->irq_number);
+		goto reset_handler;
+	}
+
 reset_handler:
 	queue_delayed_work(kpcitest_workqueue, &epf_test->cmd_handler,
 			   msecs_to_jiffies(1));
@@ -459,6 +474,8 @@ static int pci_epf_test_bind(struct pci_epf *epf)
 	else
 		epf_test->linkup_notifier = true;
 
+	epf_test->msix_available = epc->features & EPC_FEATURE_MSIX_AVAILABLE;
+
 	epf_test->test_reg_bar = EPC_FEATURE_GET_BAR(epc->features);
 
 	ret = pci_epc_write_header(epc, epf->func_no, header);
@@ -481,6 +498,14 @@ static int pci_epf_test_bind(struct pci_epf *epf)
 		return ret;
 	}
 
+	if (epf_test->msix_available) {
+		ret = pci_epc_set_msix(epc, epf->func_no, epf->msix_interrupts);
+		if (ret) {
+			dev_err(dev, "MSI-X configuration failed\n");
+			return ret;
+		}
+	}
+
 	if (!epf_test->linkup_notifier)
 		queue_work(kpcitest_workqueue, &epf_test->cmd_handler.work);
 
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index bb2395b56f13..37dab8116901 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -102,6 +102,7 @@ struct pci_epc {
 
 #define EPC_FEATURE_NO_LINKUP_NOTIFIER		BIT(0)
 #define EPC_FEATURE_BAR_MASK			(BIT(1) | BIT(2) | BIT(3))
+#define EPC_FEATURE_MSIX_AVAILABLE		BIT(4)
 #define EPC_FEATURE_SET_BAR(features, bar)	\
 		(features |= (EPC_FEATURE_BAR_MASK & (bar << 1)))
 #define EPC_FEATURE_GET_BAR(features)		\
diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h
index 953cf036cb26..d746fb159dcd 100644
--- a/include/uapi/linux/pcitest.h
+++ b/include/uapi/linux/pcitest.h
@@ -16,5 +16,6 @@
 #define PCITEST_WRITE		_IOW('P', 0x4, unsigned long)
 #define PCITEST_READ		_IOW('P', 0x5, unsigned long)
 #define PCITEST_COPY		_IOW('P', 0x6, unsigned long)
+#define PCITEST_MSIX		_IOW('P', 0x7, int)
 
 #endif /* __UAPI_LINUX_PCITEST_H */
-- 
cgit v1.2.3


From e03327122e2c8e6ae4565ef5b3d3cbe4364546a1 Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Thu, 19 Jul 2018 10:32:20 +0200
Subject: pci_endpoint_test: Add 2 ioctl commands

Add MSI-X support and update driver documentation accordingly.

Add 2 new IOCTL commands:
 - Allow to reconfigure driver IRQ type in runtime.
 - Allow to retrieve current driver IRQ type configured.

Add IRQ type validation before executing the READ/WRITE/COPY tests.

Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 Documentation/ioctl/ioctl-number.txt             |   2 +-
 Documentation/misc-devices/pci-endpoint-test.txt |   3 +
 drivers/misc/pci_endpoint_test.c                 | 206 +++++++++++++++++------
 include/uapi/linux/pcitest.h                     |   2 +
 4 files changed, 165 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 65259d459fd1..c15c4f3bdd82 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -166,7 +166,7 @@ Code  Seq#(hex)	Include File		Comments
 'P'	all	linux/soundcard.h	conflict!
 'P'	60-6F	sound/sscape_ioctl.h	conflict!
 'P'	00-0F	drivers/usb/class/usblp.c	conflict!
-'P'	01-07	drivers/misc/pci_endpoint_test.c	conflict!
+'P'	01-09	drivers/misc/pci_endpoint_test.c	conflict!
 'Q'	all	linux/soundcard.h
 'R'	00-1F	linux/random.h		conflict!
 'R'	01	linux/rfkill.h		conflict!
diff --git a/Documentation/misc-devices/pci-endpoint-test.txt b/Documentation/misc-devices/pci-endpoint-test.txt
index fdfa0f66d3d0..58ccca4416b1 100644
--- a/Documentation/misc-devices/pci-endpoint-test.txt
+++ b/Documentation/misc-devices/pci-endpoint-test.txt
@@ -28,6 +28,9 @@ ioctl
 	      to be tested should be passed as argument.
  PCITEST_MSIX: Tests message signalled interrupts. The MSI-X number
 	      to be tested should be passed as argument.
+ PCITEST_SET_IRQTYPE: Changes driver IRQ type configuration. The IRQ type
+	      should be passed as argument (0: Legacy, 1:MSI, 2:MSI-X).
+ PCITEST_GET_IRQTYPE: Gets driver IRQ type configuration.
  PCITEST_WRITE: Perform write tests. The size of the buffer should be passed
 		as argument.
  PCITEST_READ: Perform read tests. The size of the buffer should be passed
diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c
index f4fef108caff..896e2df9400f 100644
--- a/drivers/misc/pci_endpoint_test.c
+++ b/drivers/misc/pci_endpoint_test.c
@@ -37,6 +37,7 @@
 
 #define DRV_MODULE_NAME				"pci-endpoint-test"
 
+#define IRQ_TYPE_UNDEFINED			-1
 #define IRQ_TYPE_LEGACY				0
 #define IRQ_TYPE_MSI				1
 #define IRQ_TYPE_MSIX				2
@@ -157,6 +158,100 @@ static irqreturn_t pci_endpoint_test_irqhandler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static void pci_endpoint_test_free_irq_vectors(struct pci_endpoint_test *test)
+{
+	struct pci_dev *pdev = test->pdev;
+
+	pci_free_irq_vectors(pdev);
+}
+
+static bool pci_endpoint_test_alloc_irq_vectors(struct pci_endpoint_test *test,
+						int type)
+{
+	int irq = -1;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+	bool res = true;
+
+	switch (type) {
+	case IRQ_TYPE_LEGACY:
+		irq = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY);
+		if (irq < 0)
+			dev_err(dev, "Failed to get Legacy interrupt\n");
+		break;
+	case IRQ_TYPE_MSI:
+		irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
+		if (irq < 0)
+			dev_err(dev, "Failed to get MSI interrupts\n");
+		break;
+	case IRQ_TYPE_MSIX:
+		irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX);
+		if (irq < 0)
+			dev_err(dev, "Failed to get MSI-X interrupts\n");
+		break;
+	default:
+		dev_err(dev, "Invalid IRQ type selected\n");
+	}
+
+	if (irq < 0) {
+		irq = 0;
+		res = false;
+	}
+	test->num_irqs = irq;
+
+	return res;
+}
+
+static void pci_endpoint_test_release_irq(struct pci_endpoint_test *test)
+{
+	int i;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	for (i = 0; i < test->num_irqs; i++)
+		devm_free_irq(dev, pci_irq_vector(pdev, i), test);
+
+	test->num_irqs = 0;
+}
+
+static bool pci_endpoint_test_request_irq(struct pci_endpoint_test *test)
+{
+	int i;
+	int err;
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	for (i = 0; i < test->num_irqs; i++) {
+		err = devm_request_irq(dev, pci_irq_vector(pdev, i),
+				       pci_endpoint_test_irqhandler,
+				       IRQF_SHARED, DRV_MODULE_NAME, test);
+		if (err)
+			goto fail;
+	}
+
+	return true;
+
+fail:
+	switch (irq_type) {
+	case IRQ_TYPE_LEGACY:
+		dev_err(dev, "Failed to request IRQ %d for Legacy\n",
+			pci_irq_vector(pdev, i));
+		break;
+	case IRQ_TYPE_MSI:
+		dev_err(dev, "Failed to request IRQ %d for MSI %d\n",
+			pci_irq_vector(pdev, i),
+			i + 1);
+		break;
+	case IRQ_TYPE_MSIX:
+		dev_err(dev, "Failed to request IRQ %d for MSI-X %d\n",
+			pci_irq_vector(pdev, i),
+			i + 1);
+		break;
+	}
+
+	return false;
+}
+
 static bool pci_endpoint_test_bar(struct pci_endpoint_test *test,
 				  enum pci_barno barno)
 {
@@ -247,6 +342,11 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size)
 	if (size > SIZE_MAX - alignment)
 		goto err;
 
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
 	orig_src_addr = dma_alloc_coherent(dev, size + alignment,
 					   &orig_src_phys_addr, GFP_KERNEL);
 	if (!orig_src_addr) {
@@ -337,6 +437,11 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size)
 	if (size > SIZE_MAX - alignment)
 		goto err;
 
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
 	orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr,
 				       GFP_KERNEL);
 	if (!orig_addr) {
@@ -400,6 +505,11 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size)
 	if (size > SIZE_MAX - alignment)
 		goto err;
 
+	if (irq_type < IRQ_TYPE_LEGACY || irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		goto err;
+	}
+
 	orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr,
 				       GFP_KERNEL);
 	if (!orig_addr) {
@@ -440,6 +550,38 @@ err:
 	return ret;
 }
 
+static bool pci_endpoint_test_set_irq(struct pci_endpoint_test *test,
+				      int req_irq_type)
+{
+	struct pci_dev *pdev = test->pdev;
+	struct device *dev = &pdev->dev;
+
+	if (req_irq_type < IRQ_TYPE_LEGACY || req_irq_type > IRQ_TYPE_MSIX) {
+		dev_err(dev, "Invalid IRQ type option\n");
+		return false;
+	}
+
+	if (irq_type == req_irq_type)
+		return true;
+
+	pci_endpoint_test_release_irq(test);
+	pci_endpoint_test_free_irq_vectors(test);
+
+	if (!pci_endpoint_test_alloc_irq_vectors(test, req_irq_type))
+		goto err;
+
+	if (!pci_endpoint_test_request_irq(test))
+		goto err;
+
+	irq_type = req_irq_type;
+	return true;
+
+err:
+	pci_endpoint_test_free_irq_vectors(test);
+	irq_type = IRQ_TYPE_UNDEFINED;
+	return false;
+}
+
 static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
 				    unsigned long arg)
 {
@@ -471,6 +613,12 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd,
 	case PCITEST_COPY:
 		ret = pci_endpoint_test_copy(test, arg);
 		break;
+	case PCITEST_SET_IRQTYPE:
+		ret = pci_endpoint_test_set_irq(test, arg);
+		break;
+	case PCITEST_GET_IRQTYPE:
+		ret = irq_type;
+		break;
 	}
 
 ret:
@@ -486,9 +634,7 @@ static const struct file_operations pci_endpoint_test_fops = {
 static int pci_endpoint_test_probe(struct pci_dev *pdev,
 				   const struct pci_device_id *ent)
 {
-	int i;
 	int err;
-	int irq = 0;
 	int id;
 	char name[20];
 	enum pci_barno bar;
@@ -537,41 +683,11 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev,
 
 	pci_set_master(pdev);
 
-	switch (irq_type) {
-	case IRQ_TYPE_LEGACY:
-		break;
-	case IRQ_TYPE_MSI:
-		irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
-		if (irq < 0)
-			dev_err(dev, "Failed to get MSI interrupts\n");
-		test->num_irqs = irq;
-		break;
-	case IRQ_TYPE_MSIX:
-		irq = pci_alloc_irq_vectors(pdev, 1, 2048, PCI_IRQ_MSIX);
-		if (irq < 0)
-			dev_err(dev, "Failed to get MSI-X interrupts\n");
-		test->num_irqs = irq;
-		break;
-	default:
-		dev_err(dev, "Invalid IRQ type selected\n");
-	}
+	if (!pci_endpoint_test_alloc_irq_vectors(test, irq_type))
+		goto err_disable_irq;
 
-	err = devm_request_irq(dev, pdev->irq, pci_endpoint_test_irqhandler,
-			       IRQF_SHARED, DRV_MODULE_NAME, test);
-	if (err) {
-		dev_err(dev, "Failed to request IRQ %d\n", pdev->irq);
-		goto err_disable_msi;
-	}
-
-	for (i = 1; i < irq; i++) {
-		err = devm_request_irq(dev, pci_irq_vector(pdev, i),
-				       pci_endpoint_test_irqhandler,
-				       IRQF_SHARED, DRV_MODULE_NAME, test);
-		if (err)
-			dev_err(dev, "Failed to request IRQ %d for MSI%s %d\n",
-				pci_irq_vector(pdev, i),
-				irq_type == IRQ_TYPE_MSIX ? "-X" : "", i + 1);
-	}
+	if (!pci_endpoint_test_request_irq(test))
+		goto err_disable_irq;
 
 	for (bar = BAR_0; bar <= BAR_5; bar++) {
 		if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) {
@@ -630,13 +746,10 @@ err_iounmap:
 		if (test->bar[bar])
 			pci_iounmap(pdev, test->bar[bar]);
 	}
+	pci_endpoint_test_release_irq(test);
 
-	for (i = 0; i < irq; i++)
-		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), test);
-
-err_disable_msi:
-	pci_disable_msi(pdev);
-	pci_disable_msix(pdev);
+err_disable_irq:
+	pci_endpoint_test_free_irq_vectors(test);
 	pci_release_regions(pdev);
 
 err_disable_pdev:
@@ -648,7 +761,6 @@ err_disable_pdev:
 static void pci_endpoint_test_remove(struct pci_dev *pdev)
 {
 	int id;
-	int i;
 	enum pci_barno bar;
 	struct pci_endpoint_test *test = pci_get_drvdata(pdev);
 	struct miscdevice *misc_device = &test->miscdev;
@@ -665,10 +777,10 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev)
 		if (test->bar[bar])
 			pci_iounmap(pdev, test->bar[bar]);
 	}
-	for (i = 0; i < test->num_irqs; i++)
-		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), test);
-	pci_disable_msi(pdev);
-	pci_disable_msix(pdev);
+
+	pci_endpoint_test_release_irq(test);
+	pci_endpoint_test_free_irq_vectors(test);
+
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 }
diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h
index d746fb159dcd..cbf422e56696 100644
--- a/include/uapi/linux/pcitest.h
+++ b/include/uapi/linux/pcitest.h
@@ -17,5 +17,7 @@
 #define PCITEST_READ		_IOW('P', 0x5, unsigned long)
 #define PCITEST_COPY		_IOW('P', 0x6, unsigned long)
 #define PCITEST_MSIX		_IOW('P', 0x7, int)
+#define PCITEST_SET_IRQTYPE	_IOW('P', 0x8, int)
+#define PCITEST_GET_IRQTYPE	_IO('P', 0x9)
 
 #endif /* __UAPI_LINUX_PCITEST_H */
-- 
cgit v1.2.3


From 44109c60176ae73924a42a6bef64ef151aba9095 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Tue, 15 May 2018 23:34:28 +0100
Subject: MIPS: Add DSP ASE regset support

Define an NT_MIPS_DSP core file note type and implement a corresponding
regset holding the DSP ASE register context, following the layout of the
`mips_dsp_state' structure, except for the DSPControl register stored as
a 64-bit rather than 32-bit quantity in a 64-bit note.

The lack of DSP ASE register saving to core files can be considered a
design flaw with commit e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP
ASE."), leading to an incomplete state being saved.  Consequently no DSP
ASE regset has been created with commit 7aeb753b5353 ("MIPS: Implement
task_user_regset_view."), when regset support was added to the MIPS
port.

Additionally there is no way for ptrace(2) to correctly access the DSP
accumulator registers in n32 processes with the existing interfaces.
This is due to 32-bit truncation of data passed with PTRACE_PEEKUSR and
PTRACE_POKEUSR requests, which cannot be avoided owing to how the data
types for ptrace(3) have been defined.  This new NT_MIPS_DSP regset
fills the missing interface gap.

[paul.burton@mips.com:
  - Change NT_MIPS_DSP to 0x800 to avoid conflict with NT_VMCOREDD
    introduced by commit 2724273e8fd0 ("vmcore: add API to collect
    hardware dump in second kernel").
  - Drop stable tag. Whilst I agree the lack of this functionality can
    be considered a flaw in earlier DSP ASE support, it's still new
    functionality which doesn't meet up to the requirements set out in
    Documentation/process/stable-kernel-rules.rst.]

Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
References: 7aeb753b5353 ("MIPS: Implement task_user_regset_view.")
Patchwork: https://patchwork.linux-mips.org/patch/19330/
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/ptrace.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/elf.h  |   1 +
 2 files changed, 190 insertions(+)

(limited to 'include')

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 8c8d42823bda..a536271ba084 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -41,6 +41,7 @@
 #include <asm/mipsmtregs.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/processor.h>
 #include <asm/syscall.h>
 #include <linux/uaccess.h>
 #include <asm/bootinfo.h>
@@ -589,9 +590,179 @@ static int fpr_set(struct task_struct *target,
 	return err;
 }
 
+#if defined(CONFIG_32BIT) || defined(CONFIG_MIPS32_O32)
+
+/*
+ * Copy the DSP context to the supplied 32-bit NT_MIPS_DSP buffer.
+ */
+static int dsp32_get(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     void *kbuf, void __user *ubuf)
+{
+	unsigned int start, num_regs, i;
+	u32 dspregs[NUM_DSP_REGS + 1];
+
+	BUG_ON(count % sizeof(u32));
+
+	if (!cpu_has_dsp)
+		return -EIO;
+
+	start = pos / sizeof(u32);
+	num_regs = count / sizeof(u32);
+
+	if (start + num_regs > NUM_DSP_REGS + 1)
+		return -EIO;
+
+	for (i = start; i < num_regs; i++)
+		switch (i) {
+		case 0 ... NUM_DSP_REGS - 1:
+			dspregs[i] = target->thread.dsp.dspr[i];
+			break;
+		case NUM_DSP_REGS:
+			dspregs[i] = target->thread.dsp.dspcontrol;
+			break;
+		}
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, dspregs, 0,
+				   sizeof(dspregs));
+}
+
+/*
+ * Copy the supplied 32-bit NT_MIPS_DSP buffer to the DSP context.
+ */
+static int dsp32_set(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf)
+{
+	unsigned int start, num_regs, i;
+	u32 dspregs[NUM_DSP_REGS + 1];
+	int err;
+
+	BUG_ON(count % sizeof(u32));
+
+	if (!cpu_has_dsp)
+		return -EIO;
+
+	start = pos / sizeof(u32);
+	num_regs = count / sizeof(u32);
+
+	if (start + num_regs > NUM_DSP_REGS + 1)
+		return -EIO;
+
+	err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, dspregs, 0,
+				 sizeof(dspregs));
+	if (err)
+		return err;
+
+	for (i = start; i < num_regs; i++)
+		switch (i) {
+		case 0 ... NUM_DSP_REGS - 1:
+			target->thread.dsp.dspr[i] = (s32)dspregs[i];
+			break;
+		case NUM_DSP_REGS:
+			target->thread.dsp.dspcontrol = (s32)dspregs[i];
+			break;
+		}
+
+	return 0;
+}
+
+#endif /* CONFIG_32BIT || CONFIG_MIPS32_O32 */
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Copy the DSP context to the supplied 64-bit NT_MIPS_DSP buffer.
+ */
+static int dsp64_get(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     void *kbuf, void __user *ubuf)
+{
+	unsigned int start, num_regs, i;
+	u64 dspregs[NUM_DSP_REGS + 1];
+
+	BUG_ON(count % sizeof(u64));
+
+	if (!cpu_has_dsp)
+		return -EIO;
+
+	start = pos / sizeof(u64);
+	num_regs = count / sizeof(u64);
+
+	if (start + num_regs > NUM_DSP_REGS + 1)
+		return -EIO;
+
+	for (i = start; i < num_regs; i++)
+		switch (i) {
+		case 0 ... NUM_DSP_REGS - 1:
+			dspregs[i] = target->thread.dsp.dspr[i];
+			break;
+		case NUM_DSP_REGS:
+			dspregs[i] = target->thread.dsp.dspcontrol;
+			break;
+		}
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, dspregs, 0,
+				   sizeof(dspregs));
+}
+
+/*
+ * Copy the supplied 64-bit NT_MIPS_DSP buffer to the DSP context.
+ */
+static int dsp64_set(struct task_struct *target,
+		     const struct user_regset *regset,
+		     unsigned int pos, unsigned int count,
+		     const void *kbuf, const void __user *ubuf)
+{
+	unsigned int start, num_regs, i;
+	u64 dspregs[NUM_DSP_REGS + 1];
+	int err;
+
+	BUG_ON(count % sizeof(u64));
+
+	if (!cpu_has_dsp)
+		return -EIO;
+
+	start = pos / sizeof(u64);
+	num_regs = count / sizeof(u64);
+
+	if (start + num_regs > NUM_DSP_REGS + 1)
+		return -EIO;
+
+	err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, dspregs, 0,
+				 sizeof(dspregs));
+	if (err)
+		return err;
+
+	for (i = start; i < num_regs; i++)
+		switch (i) {
+		case 0 ... NUM_DSP_REGS - 1:
+			target->thread.dsp.dspr[i] = dspregs[i];
+			break;
+		case NUM_DSP_REGS:
+			target->thread.dsp.dspcontrol = dspregs[i];
+			break;
+		}
+
+	return 0;
+}
+
+#endif /* CONFIG_64BIT */
+
+/*
+ * Determine whether the DSP context is present.
+ */
+static int dsp_active(struct task_struct *target,
+		      const struct user_regset *regset)
+{
+	return cpu_has_dsp ? NUM_DSP_REGS + 1 : -ENODEV;
+}
+
 enum mips_regset {
 	REGSET_GPR,
 	REGSET_FPR,
+	REGSET_DSP,
 };
 
 struct pt_regs_offset {
@@ -697,6 +868,15 @@ static const struct user_regset mips_regsets[] = {
 		.get		= fpr_get,
 		.set		= fpr_set,
 	},
+	[REGSET_DSP] = {
+		.core_note_type	= NT_MIPS_DSP,
+		.n		= NUM_DSP_REGS + 1,
+		.size		= sizeof(u32),
+		.align		= sizeof(u32),
+		.get		= dsp32_get,
+		.set		= dsp32_set,
+		.active		= dsp_active,
+	},
 };
 
 static const struct user_regset_view user_mips_view = {
@@ -728,6 +908,15 @@ static const struct user_regset mips64_regsets[] = {
 		.get		= fpr_get,
 		.set		= fpr_set,
 	},
+	[REGSET_DSP] = {
+		.core_note_type	= NT_MIPS_DSP,
+		.n		= NUM_DSP_REGS + 1,
+		.size		= sizeof(u64),
+		.align		= sizeof(u64),
+		.get		= dsp64_get,
+		.set		= dsp64_set,
+		.active		= dsp_active,
+	},
 };
 
 static const struct user_regset_view user_mips64_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 4e12c423b9fe..e326c99b3881 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -422,6 +422,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension registers */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
+#define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
-- 
cgit v1.2.3


From 1ae22a0e35636efceab83728ba30b013df761592 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@mips.com>
Date: Tue, 15 May 2018 23:40:18 +0100
Subject: MIPS: Add FP_MODE regset support

Define an NT_MIPS_FP_MODE core file note and implement a corresponding
regset holding the state handled by PR_SET_FP_MODE and PR_GET_FP_MODE
prctl(2) requests.  This lets debug software correctly interpret the
contents of floating-point general registers both in live debugging and
in core files, and also switch floating-point modes of a live process.

[paul.burton@mips.com:
  - Changed NT_MIPS_FP_MODE to 0x801 to match first nibble of
    NT_MIPS_DSP, which was also changed to avoid a conflict.]

Signed-off-by: Maciej W. Rozycki <macro@mips.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/19331/
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/ptrace.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/elf.h  |  1 +
 2 files changed, 64 insertions(+)

(limited to 'include')

diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index a536271ba084..e5ba56c01ee0 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -759,10 +759,57 @@ static int dsp_active(struct task_struct *target,
 	return cpu_has_dsp ? NUM_DSP_REGS + 1 : -ENODEV;
 }
 
+/* Copy the FP mode setting to the supplied NT_MIPS_FP_MODE buffer.  */
+static int fp_mode_get(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       void *kbuf, void __user *ubuf)
+{
+	int fp_mode;
+
+	fp_mode = mips_get_process_fp_mode(target);
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fp_mode, 0,
+				   sizeof(fp_mode));
+}
+
+/*
+ * Copy the supplied NT_MIPS_FP_MODE buffer to the FP mode setting.
+ *
+ * We optimize for the case where `count % sizeof(int) == 0', which
+ * is supposed to have been guaranteed by the kernel before calling
+ * us, e.g. in `ptrace_regset'.  We enforce that requirement, so
+ * that we can safely avoid preinitializing temporaries for partial
+ * mode writes.
+ */
+static int fp_mode_set(struct task_struct *target,
+		       const struct user_regset *regset,
+		       unsigned int pos, unsigned int count,
+		       const void *kbuf, const void __user *ubuf)
+{
+	int fp_mode;
+	int err;
+
+	BUG_ON(count % sizeof(int));
+
+	if (pos + count > sizeof(fp_mode))
+		return -EIO;
+
+	err = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fp_mode, 0,
+				 sizeof(fp_mode));
+	if (err)
+		return err;
+
+	if (count > 0)
+		err = mips_set_process_fp_mode(target, fp_mode);
+
+	return err;
+}
+
 enum mips_regset {
 	REGSET_GPR,
 	REGSET_FPR,
 	REGSET_DSP,
+	REGSET_FP_MODE,
 };
 
 struct pt_regs_offset {
@@ -877,6 +924,14 @@ static const struct user_regset mips_regsets[] = {
 		.set		= dsp32_set,
 		.active		= dsp_active,
 	},
+	[REGSET_FP_MODE] = {
+		.core_note_type	= NT_MIPS_FP_MODE,
+		.n		= 1,
+		.size		= sizeof(int),
+		.align		= sizeof(int),
+		.get		= fp_mode_get,
+		.set		= fp_mode_set,
+	},
 };
 
 static const struct user_regset_view user_mips_view = {
@@ -917,6 +972,14 @@ static const struct user_regset mips64_regsets[] = {
 		.set		= dsp64_set,
 		.active		= dsp_active,
 	},
+	[REGSET_FP_MODE] = {
+		.core_note_type	= NT_MIPS_FP_MODE,
+		.n		= 1,
+		.size		= sizeof(int),
+		.align		= sizeof(int),
+		.get		= fp_mode_get,
+		.set		= fp_mode_set,
+	},
 };
 
 static const struct user_regset_view user_mips64_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index e326c99b3881..c5358e0ae7c5 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -423,6 +423,7 @@ typedef struct elf64_shdr {
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
+#define NT_MIPS_FP_MODE	0x801		/* MIPS floating-point mode */
 
 /* Note header in a PT_NOTE section */
 typedef struct elf32_note {
-- 
cgit v1.2.3


From d70420bcd447e5400a0116b20b9b6088bb32448e Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 18 Jul 2018 22:33:02 +0200
Subject: mtd: adapt misleading comment in mtd_oob_ops structure

A comment in the kernel doc of the mtd_oob_ops structure tells that it
is not possible to write more than one page with OOB. This is actually
true for only a few MTD devices like 'onenand' but it is definitely not
a general limitation. While this would benefit to be handled elsewhere
either by the MTD layer or by the limited drivers, let's update this
comment to reflect the reality.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 include/linux/mtd/mtd.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a86c4fa93115..cd0be91bdefa 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -67,9 +67,11 @@ struct mtd_erase_region_info {
  * @datbuf:	data buffer - if NULL only oob data are read/written
  * @oobbuf:	oob data buffer
  *
- * Note, it is allowed to read more than one OOB area at one go, but not write.
- * The interface assumes that the OOB write requests program only one page's
- * OOB area.
+ * Note, some MTD drivers do not allow you to write more than one OOB area at
+ * one go. If you try to do that on such an MTD device, -EINVAL will be
+ * returned. If you want to make your implementation portable on all kind of MTD
+ * devices you should split the write request into several sub-requests when the
+ * request crosses a page boundary.
  */
 struct mtd_oob_ops {
 	unsigned int	mode;
-- 
cgit v1.2.3


From 0d6030ac041f6835974deb88a1a9c299b4adc3ad Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Wed, 18 Jul 2018 10:42:17 +0200
Subject: mtd: rawnand: Expose _notsupp() helpers for raw page accessors

Some implementations simply can't disable their ECC engine. Expose
helpers returning -ENOTSUPP so that the caller knows that raw accesses
are not supported instead of silently falling back to non-raw
accessors.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/mtd/rawnand.h      |  4 ++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 4fa5e20d9690..c4b74630f4c5 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -2966,6 +2966,23 @@ int nand_check_erased_ecc_chunk(void *data, int datalen,
 }
 EXPORT_SYMBOL(nand_check_erased_ecc_chunk);
 
+/**
+ * nand_read_page_raw_notsupp - dummy read raw page function
+ * @mtd: mtd info structure
+ * @chip: nand chip info structure
+ * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
+ * @page: page number to read
+ *
+ * Returns -ENOTSUPP unconditionally.
+ */
+int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
+			       u8 *buf, int oob_required, int page)
+{
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL(nand_read_page_raw_notsupp);
+
 /**
  * nand_read_page_raw - [INTERN] read raw page data without ecc
  * @mtd: mtd info structure
@@ -3960,6 +3977,22 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
 	return ret;
 }
 
+/**
+ * nand_write_page_raw_notsupp - dummy raw page write function
+ * @mtd: mtd info structure
+ * @chip: nand chip info structure
+ * @buf: data buffer
+ * @oob_required: must write chip->oob_poi to OOB
+ * @page: page number to write
+ *
+ * Returns -ENOTSUPP unconditionally.
+ */
+int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
+				const u8 *buf, int oob_required, int page)
+{
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL(nand_write_page_raw_notsupp);
 
 /**
  * nand_write_page_raw - [INTERN] raw page write function
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 11c2426fc363..f60fad29eae6 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1681,10 +1681,14 @@ int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
 /* Default read_page_raw implementation */
 int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 		       uint8_t *buf, int oob_required, int page);
+int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
+			       u8 *buf, int oob_required, int page);
 
 /* Default write_page_raw implementation */
 int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 			const uint8_t *buf, int oob_required, int page);
+int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
+				const u8 *buf, int oob_required, int page);
 
 /* Reset and initialize a NAND device */
 int nand_reset(struct nand_chip *chip, int chipnr);
-- 
cgit v1.2.3


From 60ed982a4e78ff938824a750dbac8a10e5b472ef Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja@google.com>
Date: Thu, 21 Jun 2018 16:48:26 -0700
Subject: PCI/AER: Move internal declarations to drivers/pci/pci.h

Since pci_aer_init() and pci_no_aer() are used only internally, move their
declarations to the PCI internal header file.  Also, no one cares about
return value of pci_aer_init(), so make it void.

Signed-off-by: Rajat Jain <rajatja@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h      | 8 ++++++++
 drivers/pci/pcie/aer.c | 4 ++--
 include/linux/pci.h    | 4 ----
 3 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 4f723442f602..52bc5b350dfb 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -480,4 +480,12 @@ static inline int devm_of_pci_get_host_bridge_resources(struct device *dev,
 }
 #endif
 
+#ifdef CONFIG_PCIEAER
+void pci_no_aer(void);
+void pci_aer_init(struct pci_dev *dev);
+#else
+static inline void pci_no_aer(void) { }
+static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
+#endif
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index e6d5255d718c..0c6fe22eaf75 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -382,10 +382,10 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 	return 0;
 }
 
-int pci_aer_init(struct pci_dev *dev)
+void pci_aer_init(struct pci_dev *dev)
 {
 	dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
-	return pci_cleanup_aer_error_status_regs(dev);
+	pci_cleanup_aer_error_status_regs(dev);
 }
 
 #define AER_AGENT_RECEIVER		0
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..b4ffea05c999 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1468,13 +1468,9 @@ static inline bool pcie_aspm_support_enabled(void) { return false; }
 #endif
 
 #ifdef CONFIG_PCIEAER
-void pci_no_aer(void);
 bool pci_aer_available(void);
-int pci_aer_init(struct pci_dev *dev);
 #else
-static inline void pci_no_aer(void) { }
 static inline bool pci_aer_available(void) { return false; }
-static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
 #endif
 
 #ifdef CONFIG_PCIE_ECRC
-- 
cgit v1.2.3


From db89ccbe52c7885644ba578c7771e57620f879b1 Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja@google.com>
Date: Sat, 30 Jun 2018 15:07:17 -0500
Subject: PCI/AER: Define aer_stats structure for AER capable devices

Define a structure to hold the AER statistics.  There are 2 groups of
statistics: dev_* counters that are to be collected for all AER capable
devices and rootport_* counters that are collected for all (AER capable)
rootports only.  Allocate and free this structure when device is added or
released (thus counters survive the lifetime of the device).

Signed-off-by: Rajat Jain <rajatja@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h      |  2 ++
 drivers/pci/pcie/aer.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++--
 drivers/pci/probe.c    |  1 +
 include/linux/pci.h    |  1 +
 4 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 52bc5b350dfb..1877a14e06a9 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -483,9 +483,11 @@ static inline int devm_of_pci_get_host_bridge_resources(struct device *dev,
 #ifdef CONFIG_PCIEAER
 void pci_no_aer(void);
 void pci_aer_init(struct pci_dev *dev);
+void pci_aer_exit(struct pci_dev *dev);
 #else
 static inline void pci_no_aer(void) { }
 static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; }
+static inline void pci_aer_exit(struct pci_dev *d) { }
 #endif
 
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 0c6fe22eaf75..fe1b9d22a331 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -32,6 +32,9 @@
 
 #define AER_ERROR_SOURCES_MAX		100
 
+#define AER_MAX_TYPEOF_COR_ERRS		16	/* as per PCI_ERR_COR_STATUS */
+#define AER_MAX_TYPEOF_UNCOR_ERRS	26	/* as per PCI_ERR_UNCOR_STATUS*/
+
 struct aer_err_source {
 	unsigned int status;
 	unsigned int id;
@@ -56,6 +59,42 @@ struct aer_rpc {
 					 */
 };
 
+/* AER stats for the device */
+struct aer_stats {
+
+	/*
+	 * Fields for all AER capable devices. They indicate the errors
+	 * "as seen by this device". Note that this may mean that if an
+	 * end point is causing problems, the AER counters may increment
+	 * at its link partner (e.g. root port) because the errors will be
+	 * "seen" by the link partner and not the the problematic end point
+	 * itself (which may report all counters as 0 as it never saw any
+	 * problems).
+	 */
+	/* Counters for different type of correctable errors */
+	u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
+	/* Counters for different type of fatal uncorrectable errors */
+	u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
+	/* Counters for different type of nonfatal uncorrectable errors */
+	u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
+	/* Total number of ERR_COR sent by this device */
+	u64 dev_total_cor_errs;
+	/* Total number of ERR_FATAL sent by this device */
+	u64 dev_total_fatal_errs;
+	/* Total number of ERR_NONFATAL sent by this device */
+	u64 dev_total_nonfatal_errs;
+
+	/*
+	 * Fields for Root ports & root complex event collectors only, these
+	 * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
+	 * messages received by the root port / event collector, INCLUDING the
+	 * ones that are generated internally (by the rootport itself)
+	 */
+	u64 rootport_total_cor_errs;
+	u64 rootport_total_fatal_errs;
+	u64 rootport_total_nonfatal_errs;
+};
+
 #define AER_LOG_TLP_MASKS		(PCI_ERR_UNC_POISON_TLP|	\
 					PCI_ERR_UNC_ECRC|		\
 					PCI_ERR_UNC_UNSUP|		\
@@ -385,9 +424,19 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
 void pci_aer_init(struct pci_dev *dev)
 {
 	dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+
+	if (dev->aer_cap)
+		dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
+
 	pci_cleanup_aer_error_status_regs(dev);
 }
 
+void pci_aer_exit(struct pci_dev *dev)
+{
+	kfree(dev->aer_stats);
+	dev->aer_stats = NULL;
+}
+
 #define AER_AGENT_RECEIVER		0
 #define AER_AGENT_REQUESTER		1
 #define AER_AGENT_COMPLETER		2
@@ -438,7 +487,7 @@ static const char *aer_error_layer[] = {
 	"Transaction Layer"
 };
 
-static const char *aer_correctable_error_string[] = {
+static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
 	"RxErr",			/* Bit Position 0	*/
 	NULL,
 	NULL,
@@ -457,7 +506,7 @@ static const char *aer_correctable_error_string[] = {
 	"HeaderOF",			/* Bit Position 15	*/
 };
 
-static const char *aer_uncorrectable_error_string[] = {
+static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
 	"Undefined",			/* Bit Position 0	*/
 	NULL,
 	NULL,
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e32de4b..48edd0c9e4bc 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2064,6 +2064,7 @@ static void pci_configure_device(struct pci_dev *dev)
 
 static void pci_release_capabilities(struct pci_dev *dev)
 {
+	pci_aer_exit(dev);
 	pci_vpd_release(dev);
 	pci_iov_release(dev);
 	pci_free_cap_save_buffers(dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b4ffea05c999..6bc0aa0fc33f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -299,6 +299,7 @@ struct pci_dev {
 	u8		hdr_type;	/* PCI header type (`multi' flag masked out) */
 #ifdef CONFIG_PCIEAER
 	u16		aer_cap;	/* AER capability offset */
+	struct aer_stats *aer_stats;	/* AER stats for this device */
 #endif
 	u8		pcie_cap;	/* PCIe capability offset */
 	u8		msi_cap;	/* MSI capability offset */
-- 
cgit v1.2.3


From 3eca993740b8eb40f514b90b1877a4dbcf0a6710 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Thu, 19 Jul 2018 16:55:34 -0400
Subject: timekeeping: Replace read_boot_clock64() with
 read_persistent_wall_and_boot_offset()

If architecture does not support exact boot time, it is challenging to
estimate boot time without having a reference to the current persistent
clock value. Yet, it cannot read the persistent clock time again, because
this may lead to math discrepancies with the caller of read_boot_clock64()
who have read the persistent clock at a different time.

This is why it is better to provide two values simultaneously: the
persistent clock value, and the boot time.

Replace read_boot_clock64() with:
read_persistent_wall_and_boot_offset(wall_time, boot_offset)

Where wall_time is returned by read_persistent_clock() And boot_offset is
wall_time - boot time, which defaults to 0.

Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: steven.sistare@oracle.com
Cc: daniel.m.jordan@oracle.com
Cc: linux@armlinux.org.uk
Cc: schwidefsky@de.ibm.com
Cc: heiko.carstens@de.ibm.com
Cc: john.stultz@linaro.org
Cc: sboyd@codeaurora.org
Cc: hpa@zytor.com
Cc: douly.fnst@cn.fujitsu.com
Cc: peterz@infradead.org
Cc: prarit@redhat.com
Cc: feng.tang@intel.com
Cc: pmladek@suse.com
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: linux-s390@vger.kernel.org
Cc: boris.ostrovsky@oracle.com
Cc: jgross@suse.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180719205545.16512-16-pasha.tatashin@oracle.com
---
 include/linux/timekeeping.h |  3 ++-
 kernel/time/timekeeping.c   | 59 +++++++++++++++++++++++----------------------
 2 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 86bc2026efce..686bc27acef0 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -243,7 +243,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
 extern int persistent_clock_is_local;
 
 extern void read_persistent_clock64(struct timespec64 *ts);
-extern void read_boot_clock64(struct timespec64 *ts);
+void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock,
+					   struct timespec64 *boot_offset);
 extern int update_persistent_clock64(struct timespec64 now);
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4786df904c22..cb738f825c12 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -17,6 +17,7 @@
 #include <linux/nmi.h>
 #include <linux/sched.h>
 #include <linux/sched/loadavg.h>
+#include <linux/sched/clock.h>
 #include <linux/syscore_ops.h>
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
@@ -1496,18 +1497,20 @@ void __weak read_persistent_clock64(struct timespec64 *ts64)
 }
 
 /**
- * read_boot_clock64 -  Return time of the system start.
+ * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
+ *                                        from the boot.
  *
  * Weak dummy function for arches that do not yet support it.
- * Function to read the exact time the system has been started.
- * Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported.
- *
- *  XXX - Do be sure to remove it once all arches implement it.
+ * wall_time	- current time as returned by persistent clock
+ * boot_offset	- offset that is defined as wall_time - boot_time
+ *		  default to 0.
  */
-void __weak read_boot_clock64(struct timespec64 *ts)
+void __weak __init
+read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
+				     struct timespec64 *boot_offset)
 {
-	ts->tv_sec = 0;
-	ts->tv_nsec = 0;
+	read_persistent_clock64(wall_time);
+	*boot_offset = (struct timespec64){0};
 }
 
 /* Flag for if timekeeping_resume() has injected sleeptime */
@@ -1521,28 +1524,29 @@ static bool persistent_clock_exists;
  */
 void __init timekeeping_init(void)
 {
+	struct timespec64 wall_time, boot_offset, wall_to_mono;
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct clocksource *clock;
 	unsigned long flags;
-	struct timespec64 now, boot, tmp;
-
-	read_persistent_clock64(&now);
-	if (!timespec64_valid_strict(&now)) {
-		pr_warn("WARNING: Persistent clock returned invalid value!\n"
-			"         Check your CMOS/BIOS settings.\n");
-		now.tv_sec = 0;
-		now.tv_nsec = 0;
-	} else if (now.tv_sec || now.tv_nsec)
-		persistent_clock_exists = true;
 
-	read_boot_clock64(&boot);
-	if (!timespec64_valid_strict(&boot)) {
-		pr_warn("WARNING: Boot clock returned invalid value!\n"
-			"         Check your CMOS/BIOS settings.\n");
-		boot.tv_sec = 0;
-		boot.tv_nsec = 0;
+	read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
+	if (timespec64_valid_strict(&wall_time) &&
+	    timespec64_to_ns(&wall_time) > 0) {
+		persistent_clock_exists = true;
+	} else {
+		pr_warn("Persistent clock returned invalid value");
+		wall_time = (struct timespec64){0};
 	}
 
+	if (timespec64_compare(&wall_time, &boot_offset) < 0)
+		boot_offset = (struct timespec64){0};
+
+	/*
+	 * We want set wall_to_mono, so the following is true:
+	 * wall time + wall_to_mono = boot time
+	 */
+	wall_to_mono = timespec64_sub(boot_offset, wall_time);
+
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&tk_core.seq);
 	ntp_init();
@@ -1552,13 +1556,10 @@ void __init timekeeping_init(void)
 		clock->enable(clock);
 	tk_setup_internals(tk, clock);
 
-	tk_set_xtime(tk, &now);
+	tk_set_xtime(tk, &wall_time);
 	tk->raw_sec = 0;
-	if (boot.tv_sec == 0 && boot.tv_nsec == 0)
-		boot = tk_xtime(tk);
 
-	set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
-	tk_set_wall_to_mono(tk, tmp);
+	tk_set_wall_to_mono(tk, wall_to_mono);
 
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 
-- 
cgit v1.2.3


From 5d2a4e91a541cb04d20d11602f0f9340291322ac Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Thu, 19 Jul 2018 16:55:41 -0400
Subject: sched/clock: Move sched clock initialization and merge with generic
 clock

sched_clock_postinit() initializes a generic clock on systems where no
other clock is provided. This function may be called only after
timekeeping_init().

Rename sched_clock_postinit to generic_clock_inti() and call it from
sched_clock_init(). Move the call for sched_clock_init() until after
time_init().

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: steven.sistare@oracle.com
Cc: daniel.m.jordan@oracle.com
Cc: linux@armlinux.org.uk
Cc: schwidefsky@de.ibm.com
Cc: heiko.carstens@de.ibm.com
Cc: john.stultz@linaro.org
Cc: sboyd@codeaurora.org
Cc: hpa@zytor.com
Cc: douly.fnst@cn.fujitsu.com
Cc: prarit@redhat.com
Cc: feng.tang@intel.com
Cc: pmladek@suse.com
Cc: gnomes@lxorguk.ukuu.org.uk
Cc: linux-s390@vger.kernel.org
Cc: boris.ostrovsky@oracle.com
Cc: jgross@suse.com
Cc: pbonzini@redhat.com
Link: https://lkml.kernel.org/r/20180719205545.16512-23-pasha.tatashin@oracle.com
---
 include/linux/sched_clock.h |  5 ++---
 init/main.c                 |  4 ++--
 kernel/sched/clock.c        | 27 +++++++++++++++++----------
 kernel/sched/core.c         |  1 -
 kernel/time/sched_clock.c   |  2 +-
 5 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index 411b52e424e1..abe28d5cb3f4 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -9,17 +9,16 @@
 #define LINUX_SCHED_CLOCK
 
 #ifdef CONFIG_GENERIC_SCHED_CLOCK
-extern void sched_clock_postinit(void);
+extern void generic_sched_clock_init(void);
 
 extern void sched_clock_register(u64 (*read)(void), int bits,
 				 unsigned long rate);
 #else
-static inline void sched_clock_postinit(void) { }
+static inline void generic_sched_clock_init(void) { }
 
 static inline void sched_clock_register(u64 (*read)(void), int bits,
 					unsigned long rate)
 {
-	;
 }
 #endif
 
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..162d931c9511 100644
--- a/init/main.c
+++ b/init/main.c
@@ -79,7 +79,7 @@
 #include <linux/pti.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
-#include <linux/sched_clock.h>
+#include <linux/sched/clock.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/context_tracking.h>
@@ -642,7 +642,7 @@ asmlinkage __visible void __init start_kernel(void)
 	softirq_init();
 	timekeeping_init();
 	time_init();
-	sched_clock_postinit();
+	sched_clock_init();
 	printk_safe_init();
 	perf_event_init();
 	profile_init();
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index 10c83e73837a..0e9dbb2d9aea 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -53,6 +53,7 @@
  *
  */
 #include "sched.h"
+#include <linux/sched_clock.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -68,11 +69,6 @@ EXPORT_SYMBOL_GPL(sched_clock);
 
 __read_mostly int sched_clock_running;
 
-void sched_clock_init(void)
-{
-	sched_clock_running = 1;
-}
-
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 /*
  * We must start with !__sched_clock_stable because the unstable -> stable
@@ -199,6 +195,15 @@ void clear_sched_clock_stable(void)
 		__clear_sched_clock_stable();
 }
 
+static void __sched_clock_gtod_offset(void)
+{
+	__gtod_offset = (sched_clock() + __sched_clock_offset) - ktime_get_ns();
+}
+
+void __init sched_clock_init(void)
+{
+	sched_clock_running = 1;
+}
 /*
  * We run this as late_initcall() such that it runs after all built-in drivers,
  * notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
@@ -385,8 +390,6 @@ void sched_clock_tick(void)
 
 void sched_clock_tick_stable(void)
 {
-	u64 gtod, clock;
-
 	if (!sched_clock_stable())
 		return;
 
@@ -398,9 +401,7 @@ void sched_clock_tick_stable(void)
 	 * TSC to be unstable, any computation will be computing crap.
 	 */
 	local_irq_disable();
-	gtod = ktime_get_ns();
-	clock = sched_clock();
-	__gtod_offset = (clock + __sched_clock_offset) - gtod;
+	__sched_clock_gtod_offset();
 	local_irq_enable();
 }
 
@@ -434,6 +435,12 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
+void __init sched_clock_init(void)
+{
+	sched_clock_running = 1;
+	generic_sched_clock_init();
+}
+
 u64 sched_clock_cpu(int cpu)
 {
 	if (unlikely(!sched_clock_running))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..552406e9713b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5954,7 +5954,6 @@ void __init sched_init(void)
 	int i, j;
 	unsigned long alloc_size = 0, ptr;
 
-	sched_clock_init();
 	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 2d8f05aad442..cbc72c2c1fca 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -237,7 +237,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
-void __init sched_clock_postinit(void)
+void __init generic_sched_clock_init(void)
 {
 	/*
 	 * If no sched_clock() function has been provided at that point,
-- 
cgit v1.2.3


From 381634cad15b711e033a2638d558232b60f753f6 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 19 Jul 2018 18:04:11 -0500
Subject: PCI: Hide pci_reset_bridge_secondary_bus() from drivers

Rename pci_reset_bridge_secondary_bus() to pci_bridge_secondary_bus_reset()
and move the declaration from linux/pci.h to drivers/pci.h to be used
internally in PCI directory only.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/hotplug/pciehp_hpc.c |  2 +-
 drivers/pci/pci.c                | 11 +++++------
 drivers/pci/pci.h                |  1 +
 drivers/pci/pcie/aer.c           |  2 +-
 drivers/pci/pcie/err.c           |  2 +-
 include/linux/pci.h              |  1 -
 6 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index bbaa2114f953..8dae23221344 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -746,7 +746,7 @@ int pciehp_reset_slot(struct slot *slot, int probe)
 	if (pciehp_poll_mode)
 		del_timer_sync(&ctrl->poll_timer);
 
-	rc = pci_reset_bridge_secondary_bus(ctrl->pcie->port);
+	rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port);
 
 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask);
 	pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 98d149070205..236220cb0f77 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4224,19 +4224,18 @@ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
 }
 
 /**
- * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge.
+ * pci_bridge_secondary_bus_reset - Reset the secondary bus on a PCI bridge.
  * @dev: Bridge device
  *
  * Use the bridge control register to assert reset on the secondary bus.
  * Devices on the secondary bus are left in power-on state.
  */
-int pci_reset_bridge_secondary_bus(struct pci_dev *dev)
+int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
 {
 	pcibios_reset_secondary_bus(dev);
 
 	return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
 }
-EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus);
 
 static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
 {
@@ -4253,7 +4252,7 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
 	if (probe)
 		return 0;
 
-	return pci_reset_bridge_secondary_bus(dev->bus->self);
+	return pci_bridge_secondary_bus_reset(dev->bus->self);
 }
 
 static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, int probe)
@@ -4860,7 +4859,7 @@ static int pci_bus_reset(struct pci_bus *bus, int probe)
 
 	might_sleep();
 
-	ret = pci_reset_bridge_secondary_bus(bus->self);
+	ret = pci_bridge_secondary_bus_reset(bus->self);
 
 	pci_bus_unlock(bus);
 
@@ -4924,7 +4923,7 @@ int pci_try_reset_bus(struct pci_bus *bus)
 
 	if (pci_bus_trylock(bus)) {
 		might_sleep();
-		rc = pci_reset_bridge_secondary_bus(bus->self);
+		rc = pci_bridge_secondary_bus_reset(bus->self);
 		pci_bus_unlock(bus);
 	} else
 		rc = -EAGAIN;
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index c358e7a07f3f..f784263ad587 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -33,6 +33,7 @@ int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai,
 		  enum pci_mmap_api mmap_api);
 
 int pci_probe_reset_function(struct pci_dev *dev);
+int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 
 /**
  * struct pci_platform_pm_ops - Firmware PM callbacks
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index bde723db3d65..8c12efca9259 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1314,7 +1314,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
 	reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
 	pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
 
-	rc = pci_reset_bridge_secondary_bus(dev);
+	rc = pci_bridge_secondary_bus_reset(dev);
 	pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n");
 
 	/* Clear Root Error Status */
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 03075cff86f4..ae72f88d3ca2 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -177,7 +177,7 @@ static pci_ers_result_t default_reset_link(struct pci_dev *dev)
 {
 	int rc;
 
-	rc = pci_reset_bridge_secondary_bus(dev);
+	rc = pci_bridge_secondary_bus_reset(dev);
 	pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n");
 	return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6ba818449095..03520ff23d73 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1103,7 +1103,6 @@ int pci_reset_bus(struct pci_bus *bus);
 int pci_try_reset_bus(struct pci_bus *bus);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
-int pci_reset_bridge_secondary_bus(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
 int __must_check pci_assign_resource(struct pci_dev *dev, int i);
 int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align);
-- 
cgit v1.2.3


From 811c5cb37df46b0cd714dbd053d19cdb97d08cff Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 19 Jul 2018 18:04:12 -0500
Subject: PCI: Unify try slot and bus reset API

Drivers are expected to call pci_try_reset_slot() or pci_try_reset_bus() by
querying if a system supports hotplug or not.  A survey showed that most
drivers don't do this and we are leaking hotplug capability to the user.

Hide pci_try_slot_reset() from drivers and embed into pci_try_bus_reset().
Change pci_try_reset_bus() parameter from struct pci_bus to struct pci_dev.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/infiniband/hw/hfi1/pcie.c |  2 +-
 drivers/pci/pci.c                 | 21 ++++++++++++++++-----
 drivers/vfio/pci/vfio_pci.c       |  6 ++----
 include/linux/pci.h               |  3 +--
 4 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 4570c4dc93d9..df4f2d390be8 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -905,7 +905,7 @@ static int trigger_sbr(struct hfi1_devdata *dd)
 	 * delay after a reset is required.  Per spec requirements,
 	 * the link is either working or not after that point.
 	 */
-	return pci_try_reset_bus(dev->bus);
+	return pci_try_reset_bus(dev);
 }
 
 /*
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 236220cb0f77..a31e6dbf21c3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4817,12 +4817,12 @@ int pci_reset_slot(struct pci_slot *slot)
 EXPORT_SYMBOL_GPL(pci_reset_slot);
 
 /**
- * pci_try_reset_slot - Try to reset a PCI slot
+ * __pci_try_reset_slot - Try to reset a PCI slot
  * @slot: PCI slot to reset
  *
  * Same as above except return -EAGAIN if the slot cannot be locked
  */
-int pci_try_reset_slot(struct pci_slot *slot)
+static int __pci_try_reset_slot(struct pci_slot *slot)
 {
 	int rc;
 
@@ -4843,7 +4843,6 @@ int pci_try_reset_slot(struct pci_slot *slot)
 
 	return rc;
 }
-EXPORT_SYMBOL_GPL(pci_try_reset_slot);
 
 static int pci_bus_reset(struct pci_bus *bus, int probe)
 {
@@ -4906,12 +4905,12 @@ int pci_reset_bus(struct pci_bus *bus)
 EXPORT_SYMBOL_GPL(pci_reset_bus);
 
 /**
- * pci_try_reset_bus - Try to reset a PCI bus
+ * __pci_try_reset_bus - Try to reset a PCI bus
  * @bus: top level PCI bus to reset
  *
  * Same as above except return -EAGAIN if the bus cannot be locked
  */
-int pci_try_reset_bus(struct pci_bus *bus)
+static int __pci_try_reset_bus(struct pci_bus *bus)
 {
 	int rc;
 
@@ -4932,6 +4931,18 @@ int pci_try_reset_bus(struct pci_bus *bus)
 
 	return rc;
 }
+
+/**
+ * pci_try_reset_bus - Try to reset a PCI bus
+ * @pdev: top level PCI device to reset via slot/bus
+ *
+ * Same as above except return -EAGAIN if the bus cannot be locked
+ */
+int pci_try_reset_bus(struct pci_dev *pdev)
+{
+	return pci_probe_reset_slot(pdev->slot) ?
+	    __pci_try_reset_slot(pdev->slot) : __pci_try_reset_bus(pdev->bus);
+}
 EXPORT_SYMBOL_GPL(pci_try_reset_bus);
 
 /**
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b423a309a6e0..71018ec9065b 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1010,8 +1010,7 @@ reset_info_exit:
 						    &info, slot);
 		if (!ret)
 			/* User has access, do the reset */
-			ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
-				     pci_try_reset_bus(vdev->pdev->bus);
+			ret = pci_try_reset_bus(vdev->pdev);
 
 hot_reset_release:
 		for (i--; i >= 0; i--)
@@ -1373,8 +1372,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 	}
 
 	if (needs_reset)
-		ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
-			     pci_try_reset_bus(vdev->pdev->bus);
+		ret = pci_try_reset_bus(vdev->pdev);
 
 put_devs:
 	for (i = 0; i < devs.cur_index; i++) {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 03520ff23d73..19fb82559145 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1097,10 +1097,9 @@ int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
 int pci_reset_slot(struct pci_slot *slot);
-int pci_try_reset_slot(struct pci_slot *slot);
 int pci_probe_reset_bus(struct pci_bus *bus);
 int pci_reset_bus(struct pci_bus *bus);
-int pci_try_reset_bus(struct pci_bus *bus);
+int pci_try_reset_bus(struct pci_dev *dev);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
-- 
cgit v1.2.3


From fe32e2fa656c29d5d25f959f8e6168ac405d9ab4 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 19 Jul 2018 18:04:14 -0500
Subject: PCI: Deprecate pci_reset_bus() and pci_reset_slot() functions

pci_reset_bus() and pci_reset_slot() functions are not being used by any
code.  Remove them from the kernel in favor of pci_try_reset_bus() and
pci_try_reset_slot() functions.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c   | 53 +----------------------------------------------------
 include/linux/pci.h |  2 --
 2 files changed, 1 insertion(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index a31e6dbf21c3..8ee9f386c1ee 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4784,7 +4784,7 @@ int pci_probe_reset_slot(struct pci_slot *slot)
 EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
 
 /**
- * pci_reset_slot - reset a PCI slot
+ * __pci_try_reset_slot - Try to reset a PCI slot
  * @slot: PCI slot to reset
  *
  * A PCI bus may host multiple slots, each slot may support a reset mechanism
@@ -4796,30 +4796,6 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
  * through this function.  PCI config space of all devices in the slot and
  * behind the slot is saved before and restored after reset.
  *
- * Return 0 on success, non-zero on error.
- */
-int pci_reset_slot(struct pci_slot *slot)
-{
-	int rc;
-
-	rc = pci_slot_reset(slot, 1);
-	if (rc)
-		return rc;
-
-	pci_slot_save_and_disable(slot);
-
-	rc = pci_slot_reset(slot, 0);
-
-	pci_slot_restore(slot);
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(pci_reset_slot);
-
-/**
- * __pci_try_reset_slot - Try to reset a PCI slot
- * @slot: PCI slot to reset
- *
  * Same as above except return -EAGAIN if the slot cannot be locked
  */
 static int __pci_try_reset_slot(struct pci_slot *slot)
@@ -4877,33 +4853,6 @@ int pci_probe_reset_bus(struct pci_bus *bus)
 }
 EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
 
-/**
- * pci_reset_bus - reset a PCI bus
- * @bus: top level PCI bus to reset
- *
- * Do a bus reset on the given bus and any subordinate buses, saving
- * and restoring state of all devices.
- *
- * Return 0 on success, non-zero on error.
- */
-int pci_reset_bus(struct pci_bus *bus)
-{
-	int rc;
-
-	rc = pci_bus_reset(bus, 1);
-	if (rc)
-		return rc;
-
-	pci_bus_save_and_disable(bus);
-
-	rc = pci_bus_reset(bus, 0);
-
-	pci_bus_restore(bus);
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(pci_reset_bus);
-
 /**
  * __pci_try_reset_bus - Try to reset a PCI bus
  * @bus: top level PCI bus to reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 19fb82559145..cf53ecb50789 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1096,9 +1096,7 @@ int pci_reset_function(struct pci_dev *dev);
 int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
-int pci_reset_slot(struct pci_slot *slot);
 int pci_probe_reset_bus(struct pci_bus *bus);
-int pci_reset_bus(struct pci_bus *bus);
 int pci_try_reset_bus(struct pci_dev *dev);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
-- 
cgit v1.2.3


From c6a44ba950d147e15fe6dab6455a52f91d8fe625 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 19 Jul 2018 18:04:15 -0500
Subject: PCI: Rename pci_try_reset_bus() to pci_reset_bus()

Now that the old implementation of pci_reset_bus() is gone, replace
pci_try_reset_bus() with pci_reset_bus().

Compared to the old implementation, new code will fail immmediately with
-EAGAIN if object lock cannot be obtained.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/infiniband/hw/hfi1/pcie.c |  2 +-
 drivers/pci/pci.c                 | 16 ++++++++--------
 drivers/vfio/pci/vfio_pci.c       |  4 ++--
 include/linux/pci.h               |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index df4f2d390be8..baf7c324f7b8 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -905,7 +905,7 @@ static int trigger_sbr(struct hfi1_devdata *dd)
 	 * delay after a reset is required.  Per spec requirements,
 	 * the link is either working or not after that point.
 	 */
-	return pci_try_reset_bus(dev);
+	return pci_reset_bus(dev);
 }
 
 /*
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 8ee9f386c1ee..d123c2b173da 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4784,7 +4784,7 @@ int pci_probe_reset_slot(struct pci_slot *slot)
 EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
 
 /**
- * __pci_try_reset_slot - Try to reset a PCI slot
+ * __pci_reset_slot - Try to reset a PCI slot
  * @slot: PCI slot to reset
  *
  * A PCI bus may host multiple slots, each slot may support a reset mechanism
@@ -4798,7 +4798,7 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_slot);
  *
  * Same as above except return -EAGAIN if the slot cannot be locked
  */
-static int __pci_try_reset_slot(struct pci_slot *slot)
+static int __pci_reset_slot(struct pci_slot *slot)
 {
 	int rc;
 
@@ -4854,12 +4854,12 @@ int pci_probe_reset_bus(struct pci_bus *bus)
 EXPORT_SYMBOL_GPL(pci_probe_reset_bus);
 
 /**
- * __pci_try_reset_bus - Try to reset a PCI bus
+ * __pci_reset_bus - Try to reset a PCI bus
  * @bus: top level PCI bus to reset
  *
  * Same as above except return -EAGAIN if the bus cannot be locked
  */
-static int __pci_try_reset_bus(struct pci_bus *bus)
+static int __pci_reset_bus(struct pci_bus *bus)
 {
 	int rc;
 
@@ -4882,17 +4882,17 @@ static int __pci_try_reset_bus(struct pci_bus *bus)
 }
 
 /**
- * pci_try_reset_bus - Try to reset a PCI bus
+ * pci_reset_bus - Try to reset a PCI bus
  * @pdev: top level PCI device to reset via slot/bus
  *
  * Same as above except return -EAGAIN if the bus cannot be locked
  */
-int pci_try_reset_bus(struct pci_dev *pdev)
+int pci_reset_bus(struct pci_dev *pdev)
 {
 	return pci_probe_reset_slot(pdev->slot) ?
-	    __pci_try_reset_slot(pdev->slot) : __pci_try_reset_bus(pdev->bus);
+	    __pci_reset_slot(pdev->slot) : __pci_reset_bus(pdev->bus);
 }
-EXPORT_SYMBOL_GPL(pci_try_reset_bus);
+EXPORT_SYMBOL_GPL(pci_reset_bus);
 
 /**
  * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 71018ec9065b..345c0dc8a6dc 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1010,7 +1010,7 @@ reset_info_exit:
 						    &info, slot);
 		if (!ret)
 			/* User has access, do the reset */
-			ret = pci_try_reset_bus(vdev->pdev);
+			ret = pci_reset_bus(vdev->pdev);
 
 hot_reset_release:
 		for (i--; i >= 0; i--)
@@ -1372,7 +1372,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
 	}
 
 	if (needs_reset)
-		ret = pci_try_reset_bus(vdev->pdev);
+		ret = pci_reset_bus(vdev->pdev);
 
 put_devs:
 	for (i = 0; i < devs.cur_index; i++) {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cf53ecb50789..307b336496f6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1097,7 +1097,7 @@ int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
 int pci_probe_reset_bus(struct pci_bus *bus);
-int pci_try_reset_bus(struct pci_dev *dev);
+int pci_reset_bus(struct pci_dev *dev);
 void pci_reset_secondary_bus(struct pci_dev *dev);
 void pcibios_reset_secondary_bus(struct pci_dev *dev);
 void pci_update_resource(struct pci_dev *dev, int resno);
-- 
cgit v1.2.3


From b976690f5db26fbc7c2be413bfa0fbd270547a94 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 18 Jul 2018 11:41:06 +0200
Subject: x86/mm/pti: Introduce pti_finalize()

Introduce a new function to finalize the kernel mappings for the userspace
page-table after all ro/nx protections have been applied to the kernel
mappings.

Also move the call to pti_clone_kernel_text() to that function so that it
will run on 32 bit kernels too.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Pavel Machek <pavel@ucw.cz>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: linux-mm@kvack.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Waiman Long <llong@redhat.com>
Cc: "David H . Gutteridge" <dhgutteridge@sympatico.ca>
Cc: joro@8bytes.org
Link: https://lkml.kernel.org/r/1531906876-13451-30-git-send-email-joro@8bytes.org
---
 arch/x86/include/asm/pti.h |  3 +--
 arch/x86/mm/init_64.c      |  6 ------
 arch/x86/mm/pti.c          | 14 +++++++++++++-
 include/linux/pti.h        |  1 +
 init/main.c                |  7 +++++++
 5 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 38a17f1d5c9d..5df09a0b80b8 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -6,10 +6,9 @@
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 extern void pti_init(void);
 extern void pti_check_boottime_disable(void);
-extern void pti_clone_kernel_text(void);
+extern void pti_finalize(void);
 #else
 static inline void pti_check_boottime_disable(void) { }
-static inline void pti_clone_kernel_text(void) { }
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a688617c727e..9b19f9a8948e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1291,12 +1291,6 @@ void mark_rodata_ro(void)
 			(unsigned long) __va(__pa_symbol(_sdata)));
 
 	debug_checkwx();
-
-	/*
-	 * Do this after all of the manipulation of the
-	 * kernel text page tables are complete.
-	 */
-	pti_clone_kernel_text();
 }
 
 int kern_addr_valid(unsigned long addr)
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index e41ee93c430d..fcfb815d420f 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -462,7 +462,7 @@ static inline bool pti_kernel_image_global_ok(void)
  * For some configurations, map all of kernel text into the user page
  * tables.  This reduces TLB misses, especially on non-PCID systems.
  */
-void pti_clone_kernel_text(void)
+static void pti_clone_kernel_text(void)
 {
 	/*
 	 * rodata is part of the kernel image and is normally
@@ -526,3 +526,15 @@ void __init pti_init(void)
 	pti_setup_espfix64();
 	pti_setup_vsyscall();
 }
+
+/*
+ * Finalize the kernel mappings in the userspace page-table.
+ */
+void pti_finalize(void)
+{
+	/*
+	 * Do this after all of the manipulation of the
+	 * kernel text page tables are complete.
+	 */
+	pti_clone_kernel_text();
+}
diff --git a/include/linux/pti.h b/include/linux/pti.h
index 0174883a935a..1a941efcaa62 100644
--- a/include/linux/pti.h
+++ b/include/linux/pti.h
@@ -6,6 +6,7 @@
 #include <asm/pti.h>
 #else
 static inline void pti_init(void) { }
+static inline void pti_finalize(void) { }
 #endif
 
 #endif
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..fcfef46c935a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1065,6 +1065,13 @@ static int __ref kernel_init(void *unused)
 	jump_label_invalidate_initmem();
 	free_initmem();
 	mark_readonly();
+
+	/*
+	 * Kernel mappings are now finalized - update the userspace page-table
+	 * to finalize PTI.
+	 */
+	pti_finalize();
+
 	system_state = SYSTEM_RUNNING;
 	numa_default_policy();
 
-- 
cgit v1.2.3


From 985e695074d35768cb04d65f58bca45f7bf1a99d Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Fri, 13 Jul 2018 14:06:42 +0200
Subject: timekeeping/ntp: Constify some function arguments

Add 'const' to some function arguments and variables to make it easier
to read the code.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
[jstultz: Also fixup pre-existing checkpatch warnings for
 prototype arguments with no variable name]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h        |  2 +-
 kernel/time/ntp.c                  |  6 +++---
 kernel/time/ntp_internal.h         |  4 ++--
 kernel/time/timekeeping.c          | 29 +++++++++++++++--------------
 kernel/time/timekeeping_debug.c    |  2 +-
 kernel/time/timekeeping_internal.h |  2 +-
 6 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 86bc2026efce..edace6b656e9 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -177,7 +177,7 @@ static inline time64_t ktime_get_clocktai_seconds(void)
 extern bool timekeeping_rtc_skipsuspend(void);
 extern bool timekeeping_rtc_skipresume(void);
 
-extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
+extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta);
 
 /*
  * struct system_time_snapshot - simultaneous raw/real time capture with
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index a627cae8baab..c5e0cba3b39c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -642,7 +642,7 @@ void ntp_notify_cmos_timer(void)
 /*
  * Propagate a new txc->status value into the NTP state:
  */
-static inline void process_adj_status(struct timex *txc)
+static inline void process_adj_status(const struct timex *txc)
 {
 	if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
 		time_state = TIME_OK;
@@ -665,7 +665,7 @@ static inline void process_adj_status(struct timex *txc)
 }
 
 
-static inline void process_adjtimex_modes(struct timex *txc, s32 *time_tai)
+static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai)
 {
 	if (txc->modes & ADJ_STATUS)
 		process_adj_status(txc);
@@ -716,7 +716,7 @@ static inline void process_adjtimex_modes(struct timex *txc, s32 *time_tai)
  * adjtimex mainly allows reading (and writing, if superuser) of
  * kernel time-keeping variables. used by xntpd.
  */
-int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
+int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai)
 {
 	int result;
 
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 909bd1f1bfb1..c24b0e13f011 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -8,6 +8,6 @@ extern void ntp_clear(void);
 extern u64 ntp_tick_length(void);
 extern ktime_t ntp_get_next_leap(void);
 extern int second_overflow(time64_t secs);
-extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
-extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
+extern int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai);
+extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ad779c2ec53a..7033ac1fee65 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -105,7 +105,7 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
 	}
 }
 
-static inline struct timespec64 tk_xtime(struct timekeeper *tk)
+static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
 {
 	struct timespec64 ts;
 
@@ -162,7 +162,7 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
  * a read of the fast-timekeeper tkrs (which is protected by its own locking
  * and update logic).
  */
-static inline u64 tk_clock_read(struct tk_read_base *tkr)
+static inline u64 tk_clock_read(const struct tk_read_base *tkr)
 {
 	struct clocksource *clock = READ_ONCE(tkr->clock);
 
@@ -211,7 +211,7 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
 	}
 }
 
-static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	u64 now, last, mask, max, delta;
@@ -255,7 +255,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
 static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
 {
 }
-static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
 {
 	u64 cycle_now, delta;
 
@@ -352,7 +352,7 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, u64 delta)
+static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
 {
 	u64 nsec;
 
@@ -363,7 +363,7 @@ static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr, u64 delta)
 	return nsec + arch_gettimeoffset();
 }
 
-static inline u64 timekeeping_get_ns(struct tk_read_base *tkr)
+static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
 {
 	u64 delta;
 
@@ -371,7 +371,7 @@ static inline u64 timekeeping_get_ns(struct tk_read_base *tkr)
 	return timekeeping_delta_to_ns(tkr, delta);
 }
 
-static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, u64 cycles)
+static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
 {
 	u64 delta;
 
@@ -394,7 +394,8 @@ static inline u64 timekeeping_cycles_to_ns(struct tk_read_base *tkr, u64 cycles)
  * slightly wrong timestamp (a few nanoseconds). See
  * @ktime_get_mono_fast_ns.
  */
-static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
+static void update_fast_timekeeper(const struct tk_read_base *tkr,
+				   struct tk_fast *tkf)
 {
 	struct tk_read_base *base = tkf->base;
 
@@ -549,10 +550,10 @@ EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
  * number of cycles every time until timekeeping is resumed at which time the
  * proper readout base for the fast timekeeper will be restored automatically.
  */
-static void halt_fast_timekeeper(struct timekeeper *tk)
+static void halt_fast_timekeeper(const struct timekeeper *tk)
 {
 	static struct tk_read_base tkr_dummy;
-	struct tk_read_base *tkr = &tk->tkr_mono;
+	const struct tk_read_base *tkr = &tk->tkr_mono;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 	cycles_at_suspend = tk_clock_read(tkr);
@@ -1277,7 +1278,7 @@ EXPORT_SYMBOL(do_settimeofday64);
  *
  * Adds or subtracts an offset value from the current time.
  */
-static int timekeeping_inject_offset(struct timespec64 *ts)
+static int timekeeping_inject_offset(const struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
@@ -1585,7 +1586,7 @@ static struct timespec64 timekeeping_suspend_time;
  * adds the sleep offset to the timekeeping variables.
  */
 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
-					   struct timespec64 *delta)
+					   const struct timespec64 *delta)
 {
 	if (!timespec64_valid_strict(delta)) {
 		printk_deferred(KERN_WARNING
@@ -1646,7 +1647,7 @@ bool timekeeping_rtc_skipsuspend(void)
  * This function should only be called by rtc_resume(), and allows
  * a suspend offset to be injected into the timekeeping values.
  */
-void timekeeping_inject_sleeptime64(struct timespec64 *delta)
+void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
@@ -2240,7 +2241,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
 /**
  * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
  */
-static int timekeeping_validate_timex(struct timex *txc)
+static int timekeeping_validate_timex(const struct timex *txc)
 {
 	if (txc->modes & ADJ_ADJTIME) {
 		/* singleshot must not be used with any other mode bits */
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 0754cadfa9e6..238e4be60229 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -70,7 +70,7 @@ static int __init tk_debug_sleep_time_init(void)
 }
 late_initcall(tk_debug_sleep_time_init);
 
-void tk_debug_account_sleep_time(struct timespec64 *t)
+void tk_debug_account_sleep_time(const struct timespec64 *t)
 {
 	/* Cap bin index so we don't overflow the array */
 	int bin = min(fls(t->tv_sec), NUM_BINS-1);
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index cf5c0828ee31..bcbb52db2256 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -8,7 +8,7 @@
 #include <linux/time.h>
 
 #ifdef CONFIG_DEBUG_FS
-extern void tk_debug_account_sleep_time(struct timespec64 *t);
+extern void tk_debug_account_sleep_time(const struct timespec64 *t);
 #else
 #define tk_debug_account_sleep_time(x)
 #endif
-- 
cgit v1.2.3


From 39232ed5a1793f67b11430c43ed8a9ed6e96c6eb Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Tue, 17 Jul 2018 15:55:16 +0800
Subject: time: Introduce one suspend clocksource to compensate the suspend
 time

On some hardware with multiple clocksources, we have coarse grained
clocksources that support the CLOCK_SOURCE_SUSPEND_NONSTOP flag, but
which are less than ideal for timekeeping whereas other clocksources
can be better candidates but halt on suspend.

Currently, the timekeeping core only supports timing suspend using
CLOCK_SOURCE_SUSPEND_NONSTOP clocksources if that clocksource is the
current clocksource for timekeeping.

As a result, some architectures try to implement read_persistent_clock64()
using those non-stop clocksources, but isn't really ideal, which will
introduce more duplicate code. To fix this, provide logic to allow a
registered SUSPEND_NONSTOP clocksource, which isn't the current
clocksource, to be used to calculate the suspend time.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
[jstultz: minor tweaks to merge with previous resume changes]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/clocksource.h |   3 +
 kernel/time/clocksource.c   | 149 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/time/timekeeping.c   |  22 ++++---
 3 files changed, 166 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7dff1963c185..308918928767 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -194,6 +194,9 @@ extern void clocksource_suspend(void);
 extern void clocksource_resume(void);
 extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
+extern void
+clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles);
+extern u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 now);
 
 extern u64
 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f89a78e2792b..f74fb00d8064 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -94,6 +94,8 @@ EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
 /*[Clocksource internal variables]---------
  * curr_clocksource:
  *	currently selected clocksource.
+ * suspend_clocksource:
+ *	used to calculate the suspend time.
  * clocksource_list:
  *	linked list with the registered clocksources
  * clocksource_mutex:
@@ -102,10 +104,12 @@ EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
  *	Name of the user-specified clocksource.
  */
 static struct clocksource *curr_clocksource;
+static struct clocksource *suspend_clocksource;
 static LIST_HEAD(clocksource_list);
 static DEFINE_MUTEX(clocksource_mutex);
 static char override_name[CS_NAME_LEN];
 static int finished_booting;
+static u64 suspend_start;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
@@ -447,6 +451,140 @@ static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
 
 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 
+static bool clocksource_is_suspend(struct clocksource *cs)
+{
+	return cs == suspend_clocksource;
+}
+
+static void __clocksource_suspend_select(struct clocksource *cs)
+{
+	/*
+	 * Skip the clocksource which will be stopped in suspend state.
+	 */
+	if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
+		return;
+
+	/*
+	 * The nonstop clocksource can be selected as the suspend clocksource to
+	 * calculate the suspend time, so it should not supply suspend/resume
+	 * interfaces to suspend the nonstop clocksource when system suspends.
+	 */
+	if (cs->suspend || cs->resume) {
+		pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
+			cs->name);
+	}
+
+	/* Pick the best rating. */
+	if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
+		suspend_clocksource = cs;
+}
+
+/**
+ * clocksource_suspend_select - Select the best clocksource for suspend timing
+ * @fallback:	if select a fallback clocksource
+ */
+static void clocksource_suspend_select(bool fallback)
+{
+	struct clocksource *cs, *old_suspend;
+
+	old_suspend = suspend_clocksource;
+	if (fallback)
+		suspend_clocksource = NULL;
+
+	list_for_each_entry(cs, &clocksource_list, list) {
+		/* Skip current if we were requested for a fallback. */
+		if (fallback && cs == old_suspend)
+			continue;
+
+		__clocksource_suspend_select(cs);
+	}
+}
+
+/**
+ * clocksource_start_suspend_timing - Start measuring the suspend timing
+ * @cs:			current clocksource from timekeeping
+ * @start_cycles:	current cycles from timekeeping
+ *
+ * This function will save the start cycle values of suspend timer to calculate
+ * the suspend time when resuming system.
+ *
+ * This function is called late in the suspend process from timekeeping_suspend(),
+ * that means processes are freezed, non-boot cpus and interrupts are disabled
+ * now. It is therefore possible to start the suspend timer without taking the
+ * clocksource mutex.
+ */
+void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
+{
+	if (!suspend_clocksource)
+		return;
+
+	/*
+	 * If current clocksource is the suspend timer, we should use the
+	 * tkr_mono.cycle_last value as suspend_start to avoid same reading
+	 * from suspend timer.
+	 */
+	if (clocksource_is_suspend(cs)) {
+		suspend_start = start_cycles;
+		return;
+	}
+
+	if (suspend_clocksource->enable &&
+	    suspend_clocksource->enable(suspend_clocksource)) {
+		pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
+		return;
+	}
+
+	suspend_start = suspend_clocksource->read(suspend_clocksource);
+}
+
+/**
+ * clocksource_stop_suspend_timing - Stop measuring the suspend timing
+ * @cs:		current clocksource from timekeeping
+ * @cycle_now:	current cycles from timekeeping
+ *
+ * This function will calculate the suspend time from suspend timer.
+ *
+ * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
+ *
+ * This function is called early in the resume process from timekeeping_resume(),
+ * that means there is only one cpu, no processes are running and the interrupts
+ * are disabled. It is therefore possible to stop the suspend timer without
+ * taking the clocksource mutex.
+ */
+u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
+{
+	u64 now, delta, nsec = 0;
+
+	if (!suspend_clocksource)
+		return 0;
+
+	/*
+	 * If current clocksource is the suspend timer, we should use the
+	 * tkr_mono.cycle_last value from timekeeping as current cycle to
+	 * avoid same reading from suspend timer.
+	 */
+	if (clocksource_is_suspend(cs))
+		now = cycle_now;
+	else
+		now = suspend_clocksource->read(suspend_clocksource);
+
+	if (now > suspend_start) {
+		delta = clocksource_delta(now, suspend_start,
+					  suspend_clocksource->mask);
+		nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
+				       suspend_clocksource->shift);
+	}
+
+	/*
+	 * Disable the suspend timer to save power if current clocksource is
+	 * not the suspend timer.
+	 */
+	if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
+		suspend_clocksource->disable(suspend_clocksource);
+
+	return nsec;
+}
+
 /**
  * clocksource_suspend - suspend the clocksource(s)
  */
@@ -792,6 +930,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 
 	clocksource_select();
 	clocksource_select_watchdog(false);
+	__clocksource_suspend_select(cs);
 	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
@@ -820,6 +959,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
 
 	clocksource_select();
 	clocksource_select_watchdog(false);
+	clocksource_suspend_select(false);
 	mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_change_rating);
@@ -845,6 +985,15 @@ static int clocksource_unbind(struct clocksource *cs)
 			return -EBUSY;
 	}
 
+	if (clocksource_is_suspend(cs)) {
+		/*
+		 * Select and try to install a replacement suspend clocksource.
+		 * If no replacement suspend clocksource, we will just let the
+		 * clocksource go and have no suspend clocksource.
+		 */
+		clocksource_suspend_select(true);
+	}
+
 	clocksource_watchdog_lock(&flags);
 	clocksource_dequeue_watchdog(cs);
 	list_del_init(&cs->list);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 19414b16cf9e..d9e659a12c76 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1692,7 +1692,7 @@ void timekeeping_resume(void)
 	struct clocksource *clock = tk->tkr_mono.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
-	u64 cycle_now;
+	u64 cycle_now, nsec;
 	bool inject_sleeptime = false;
 
 	read_persistent_clock64(&ts_new);
@@ -1716,13 +1716,8 @@ void timekeeping_resume(void)
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
 	cycle_now = tk_clock_read(&tk->tkr_mono);
-	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-		cycle_now > tk->tkr_mono.cycle_last) {
-		u64 nsec, cyc_delta;
-
-		cyc_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
-					      tk->tkr_mono.mask);
-		nsec = mul_u64_u32_shr(cyc_delta, clock->mult, clock->shift);
+	nsec = clocksource_stop_suspend_timing(clock, cycle_now);
+	if (nsec > 0) {
 		ts_delta = ns_to_timespec64(nsec);
 		inject_sleeptime = true;
 	} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
@@ -1757,6 +1752,8 @@ int timekeeping_suspend(void)
 	unsigned long flags;
 	struct timespec64		delta, delta_delta;
 	static struct timespec64	old_delta;
+	struct clocksource *curr_clock;
+	u64 cycle_now;
 
 	read_persistent_clock64(&timekeeping_suspend_time);
 
@@ -1775,6 +1772,15 @@ int timekeeping_suspend(void)
 	timekeeping_forward_now(tk);
 	timekeeping_suspended = 1;
 
+	/*
+	 * Since we've called forward_now, cycle_last stores the value
+	 * just read from the current clocksource. Save this to potentially
+	 * use in suspend timing.
+	 */
+	curr_clock = tk->tkr_mono.clock;
+	cycle_now = tk->tkr_mono.cycle_last;
+	clocksource_start_suspend_timing(curr_clock, cycle_now);
+
 	if (persistent_clock_exists) {
 		/*
 		 * To avoid drift caused by repeated suspend/resumes,
-- 
cgit v1.2.3


From cf862cbc831982a27f14a08adf82ad9ca8d86205 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Tue, 10 Jul 2018 17:56:33 +0200
Subject: crypto: drbg - eliminate constant reinitialization of SGL

The CTR DRBG requires two SGLs pointing to input/output buffers for the
CTR AES operation. The used SGLs always have only one entry. Thus, the
SGL can be initialized during allocation time, preventing a
re-initialization of the SGLs during each call.

The performance is increased by about 1 to 3 percent depending on the
size of the requested buffer size.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/drbg.c         | 11 +++++++----
 include/crypto/drbg.h |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/crypto/drbg.c b/crypto/drbg.c
index 466a112a4446..ee302fd229ad 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1715,6 +1715,9 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg)
 	drbg->outscratchpad = (u8 *)PTR_ALIGN(drbg->outscratchpadbuf,
 					      alignmask + 1);
 
+	sg_init_table(&drbg->sg_in, 1);
+	sg_init_table(&drbg->sg_out, 1);
+
 	return alignmask;
 }
 
@@ -1743,17 +1746,17 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 			      u8 *inbuf, u32 inlen,
 			      u8 *outbuf, u32 outlen)
 {
-	struct scatterlist sg_in, sg_out;
+	struct scatterlist *sg_in = &drbg->sg_in, *sg_out = &drbg->sg_out;
 	int ret;
 
-	sg_init_one(&sg_in, inbuf, inlen);
-	sg_init_one(&sg_out, drbg->outscratchpad, DRBG_OUTSCRATCHLEN);
+	sg_set_buf(sg_in, inbuf, inlen);
+	sg_set_buf(sg_out, drbg->outscratchpad, DRBG_OUTSCRATCHLEN);
 
 	while (outlen) {
 		u32 cryptlen = min3(inlen, outlen, (u32)DRBG_OUTSCRATCHLEN);
 
 		/* Output buffer may not be valid for SGL, use scratchpad */
-		skcipher_request_set_crypt(drbg->ctr_req, &sg_in, &sg_out,
+		skcipher_request_set_crypt(drbg->ctr_req, sg_in, sg_out,
 					   cryptlen, drbg->V);
 		ret = crypto_wait_req(crypto_skcipher_encrypt(drbg->ctr_req),
 					&drbg->ctr_wait);
diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 8f941102af36..54b9f5d375f5 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -127,6 +127,7 @@ struct drbg_state {
 	__u8 *outscratchpadbuf;			/* CTR mode output scratchpad */
         __u8 *outscratchpad;			/* CTR mode aligned outbuf */
 	struct crypto_wait ctr_wait;		/* CTR mode async wait obj */
+	struct scatterlist sg_in, sg_out;	/* CTR mode SGLs */
 
 	bool seeded;		/* DRBG fully seeded? */
 	bool pr;		/* Prediction resistance enabled? */
-- 
cgit v1.2.3


From 07a557f47d7e09b2c60ad4d51b1ac8b035b75f73 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 17 Jul 2018 19:27:16 +0300
Subject: net/sched: tunnel_key: Allow to set tos and ttl for tc based ip
 tunnels

Allow user-space to provide tos and ttl to be set for the tunnel headers.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tc_act/tc_tunnel_key.h |  2 ++
 net/sched/act_tunnel_key.c                | 20 ++++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
index e284fec8c467..be384d63e1b5 100644
--- a/include/uapi/linux/tc_act/tc_tunnel_key.h
+++ b/include/uapi/linux/tc_act/tc_tunnel_key.h
@@ -39,6 +39,8 @@ enum {
 	TCA_TUNNEL_KEY_ENC_OPTS,	/* Nested TCA_TUNNEL_KEY_ENC_OPTS_
 					 * attributes
 					 */
+	TCA_TUNNEL_KEY_ENC_TOS,		/* u8 */
+	TCA_TUNNEL_KEY_ENC_TTL,		/* u8 */
 	__TCA_TUNNEL_KEY_MAX,
 };
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 3ec585d58762..22f26e9ea8f1 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -197,6 +197,8 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 	[TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
 	[TCA_TUNNEL_KEY_NO_CSUM]      = { .type = NLA_U8 },
 	[TCA_TUNNEL_KEY_ENC_OPTS]     = { .type = NLA_NESTED },
+	[TCA_TUNNEL_KEY_ENC_TOS]      = { .type = NLA_U8 },
+	[TCA_TUNNEL_KEY_ENC_TTL]      = { .type = NLA_U8 },
 };
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
@@ -216,6 +218,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	int opts_len = 0;
 	__be64 key_id;
 	__be16 flags;
+	u8 tos, ttl;
 	int ret = 0;
 	int err;
 
@@ -273,6 +276,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			}
 		}
 
+		tos = 0;
+		if (tb[TCA_TUNNEL_KEY_ENC_TOS])
+			tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]);
+		ttl = 0;
+		if (tb[TCA_TUNNEL_KEY_ENC_TTL])
+			ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]);
+
 		if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
 		    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
 			__be32 saddr;
@@ -281,7 +291,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
 			daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
 
-			metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+			metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl,
 						    dst_port, flags,
 						    key_id, opts_len);
 		} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
@@ -292,7 +302,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
 			daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
 
-			metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port,
+			metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
 						      0, flags,
 						      key_id, 0);
 		} else {
@@ -504,6 +514,12 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 			       !(key->tun_flags & TUNNEL_CSUM)) ||
 		    tunnel_key_opts_dump(skb, info))
 			goto nla_put_failure;
+
+		if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos))
+			goto nla_put_failure;
+
+		if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl))
+			goto nla_put_failure;
 	}
 
 	tcf_tm_dump(&tm, &t->tcf_tm);
-- 
cgit v1.2.3


From 5544adb9707fda5d54494c37940701894c16b9a0 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 17 Jul 2018 19:27:17 +0300
Subject: flow_dissector: Dissect tos and ttl from the tunnel info

Add dissection of the tos and ttl from the ip tunnel headers
fields in case a match is needed on them.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h |  2 +-
 net/core/flow_dissector.c    | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index c64406717eee..2a17f041f7a1 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -207,7 +207,7 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_TCP, /* struct flow_dissector_key_tcp */
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
-
+	FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_MAX,
 };
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index b555fc229e96..08a5184f4b34 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -152,7 +152,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 	    !dissector_uses_key(flow_dissector,
 				FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
 	    !dissector_uses_key(flow_dissector,
-				FLOW_DISSECTOR_KEY_ENC_PORTS))
+				FLOW_DISSECTOR_KEY_ENC_PORTS) &&
+	    !dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_IP))
 		return;
 
 	info = skb_tunnel_info(skb);
@@ -212,6 +214,16 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 		tp->src = key->tp_src;
 		tp->dst = key->tp_dst;
 	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_dissector_key_ip *ip;
+
+		ip = skb_flow_dissector_target(flow_dissector,
+					       FLOW_DISSECTOR_KEY_ENC_IP,
+					       target_container);
+		ip->tos = key->tos;
+		ip->ttl = key->ttl;
+	}
 }
 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
 
-- 
cgit v1.2.3


From 0e2c17b64d5c7f57bcd7054ef87797376dcdee26 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 17 Jul 2018 19:27:18 +0300
Subject: net/sched: cls_flower: Support matching on ip tos and ttl for tunnels

Allow users to set rules matching on ipv4 tos and ttl or
ipv6 traffic-class and hoplimit of tunnel headers.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  5 +++++
 net/sched/cls_flower.c       | 43 ++++++++++++++++++++++++++++---------------
 2 files changed, 33 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index c4262d911596..b4512254036b 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -473,6 +473,11 @@ enum {
 	TCA_FLOWER_KEY_CVLAN_PRIO,	/* u8   */
 	TCA_FLOWER_KEY_CVLAN_ETH_TYPE,	/* be16 */
 
+	TCA_FLOWER_KEY_ENC_IP_TOS,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TOS_MASK,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL_MASK,	/* u8 */
+
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c53fdd411f90..38d74803e2df 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -52,6 +52,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_mpls mpls;
 	struct flow_dissector_key_tcp tcp;
 	struct flow_dissector_key_ip ip;
+	struct flow_dissector_key_ip enc_ip;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -453,6 +454,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_CVLAN_ID]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_CVLAN_PRIO]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_ENC_IP_TOS]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_IP_TTL]	 = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -561,17 +566,17 @@ static int fl_set_key_flags(struct nlattr **tb,
 	return 0;
 }
 
-static void fl_set_key_ip(struct nlattr **tb,
+static void fl_set_key_ip(struct nlattr **tb, bool encap,
 			  struct flow_dissector_key_ip *key,
 			  struct flow_dissector_key_ip *mask)
 {
-		fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS,
-			       &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK,
-			       sizeof(key->tos));
+	int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
+	int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
+	int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
+	int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
 
-		fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL,
-			       &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK,
-			       sizeof(key->ttl));
+	fl_set_key_val(tb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos));
+	fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
 }
 
 static int fl_set_key(struct net *net, struct nlattr **tb,
@@ -633,7 +638,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
 			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
 			       sizeof(key->basic.ip_proto));
-		fl_set_key_ip(tb, &key->ip, &mask->ip);
+		fl_set_key_ip(tb, false, &key->ip, &mask->ip);
 	}
 
 	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
@@ -768,6 +773,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		       &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
 		       sizeof(key->enc_tp.dst));
 
+	fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
+
 	if (tb[TCA_FLOWER_KEY_FLAGS])
 		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
 
@@ -860,6 +867,8 @@ static void fl_init_dissector(struct fl_flow_mask *mask)
 			   enc_control);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
 
 	skb_flow_dissector_init(&mask->dissector, keys, cnt);
 }
@@ -1208,14 +1217,17 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
 	return 0;
 }
 
-static int fl_dump_key_ip(struct sk_buff *skb,
+static int fl_dump_key_ip(struct sk_buff *skb, bool encap,
 			  struct flow_dissector_key_ip *key,
 			  struct flow_dissector_key_ip *mask)
 {
-	if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos,
-			    TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) ||
-	    fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl,
-			    TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl)))
+	int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
+	int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
+	int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
+	int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
+
+	if (fl_dump_key_val(skb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos)) ||
+	    fl_dump_key_val(skb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)))
 		return -1;
 
 	return 0;
@@ -1361,7 +1373,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	    (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
 			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
 			    sizeof(key->basic.ip_proto)) ||
-	    fl_dump_key_ip(skb, &key->ip, &mask->ip)))
+	    fl_dump_key_ip(skb, false, &key->ip, &mask->ip)))
 		goto nla_put_failure;
 
 	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
@@ -1486,7 +1498,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
 			    &mask->enc_tp.dst,
 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
-			    sizeof(key->enc_tp.dst)))
+			    sizeof(key->enc_tp.dst)) ||
+	    fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
 		goto nla_put_failure;
 
 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
-- 
cgit v1.2.3


From b51dab46c6adfbb7e80cd0f59ae17b8a30d94b1a Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Wed, 18 Jul 2018 06:27:22 -0700
Subject: qed: Add qed APIs for PHY module query.

This patch adds qed APIs for reading the PHY module.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h  | 16 ++++++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c | 23 ++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 49 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  | 16 ++++++++++
 include/linux/qed/qed_if.h                 | 15 +++++++++
 5 files changed, 119 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index bee10c1781fb..8faceb691657 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12444,6 +12444,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_STATS_TYPE_ISCSI           3
 #define DRV_MSG_CODE_STATS_TYPE_RDMA            4
 
+#define DRV_MSG_CODE_TRANSCEIVER_READ           0x00160000
+
 #define DRV_MSG_CODE_MASK_PARITIES              0x001a0000
 
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
@@ -12543,6 +12545,15 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_SET_LED_MODE_ON		0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF		0x2
 
+#define DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET		0
+#define DRV_MB_PARAM_TRANSCEIVER_PORT_MASK		0x00000003
+#define DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET		2
+#define DRV_MB_PARAM_TRANSCEIVER_SIZE_MASK		0x000000FC
+#define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET	8
+#define DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK	0x0000FF00
+#define DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET		16
+#define DRV_MB_PARAM_TRANSCEIVER_OFFSET_MASK		0xFFFF0000
+
 	/* Resource Allocation params - Driver version support */
 #define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK	0xFFFF0000
 #define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT	16
@@ -12596,6 +12607,9 @@ struct public_drv_mb {
 #define FW_MSG_CODE_PHY_OK			0x00110000
 #define FW_MSG_CODE_OK				0x00160000
 #define FW_MSG_CODE_ERROR			0x00170000
+#define FW_MSG_CODE_TRANSCEIVER_DIAG_OK		0x00160000
+#define FW_MSG_CODE_TRANSCEIVER_DIAG_ERROR	0x00170000
+#define FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT	0x00020000
 
 #define FW_MSG_CODE_OS_WOL_SUPPORTED            0x00800000
 #define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED        0x00810000
@@ -12687,6 +12701,8 @@ struct mcp_public_data {
 	struct public_func func[MCP_GLOB_FUNC_MAX];
 };
 
+#define MAX_I2C_TRANSACTION_SIZE	16
+
 /* OCBB definitions */
 enum tlvs {
 	/* Category 1: Device Properties */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 0cbc74d6ca8b..158944aa6097 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2102,6 +2102,28 @@ out:
 	return status;
 }
 
+static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf,
+				  u8 dev_addr, u32 offset, u32 len)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int rc = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	rc = qed_mcp_phy_sfp_read(hwfn, ptt, MFW_PORT(hwfn), dev_addr,
+				  offset, len, buf);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
 static struct qed_selftest_ops qed_selftest_ops_pass = {
 	.selftest_memory = &qed_selftest_memory,
 	.selftest_interrupt = &qed_selftest_interrupt,
@@ -2144,6 +2166,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.update_mac = &qed_update_mac,
 	.update_mtu = &qed_update_mtu,
 	.update_wol = &qed_update_wol,
+	.read_module_eeprom = &qed_read_module_eeprom,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 4e0b443c9519..62a220fce6e1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2463,6 +2463,55 @@ out:
 	return rc;
 }
 
+int qed_mcp_phy_sfp_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 u32 port, u32 addr, u32 offset, u32 len, u8 *p_buf)
+{
+	u32 bytes_left, bytes_to_copy, buf_size, nvm_offset = 0;
+	u32 resp, param;
+	int rc;
+
+	nvm_offset |= (port << DRV_MB_PARAM_TRANSCEIVER_PORT_OFFSET) &
+		       DRV_MB_PARAM_TRANSCEIVER_PORT_MASK;
+	nvm_offset |= (addr << DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_OFFSET) &
+		       DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK;
+
+	addr = offset;
+	offset = 0;
+	bytes_left = len;
+	while (bytes_left > 0) {
+		bytes_to_copy = min_t(u32, bytes_left,
+				      MAX_I2C_TRANSACTION_SIZE);
+		nvm_offset &= (DRV_MB_PARAM_TRANSCEIVER_I2C_ADDRESS_MASK |
+			       DRV_MB_PARAM_TRANSCEIVER_PORT_MASK);
+		nvm_offset |= ((addr + offset) <<
+			       DRV_MB_PARAM_TRANSCEIVER_OFFSET_OFFSET) &
+			       DRV_MB_PARAM_TRANSCEIVER_OFFSET_MASK;
+		nvm_offset |= (bytes_to_copy <<
+			       DRV_MB_PARAM_TRANSCEIVER_SIZE_OFFSET) &
+			       DRV_MB_PARAM_TRANSCEIVER_SIZE_MASK;
+		rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+					DRV_MSG_CODE_TRANSCEIVER_READ,
+					nvm_offset, &resp, &param, &buf_size,
+					(u32 *)(p_buf + offset));
+		if (rc) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to send a transceiver read command to the MFW. rc = %d.\n",
+				  rc);
+			return rc;
+		}
+
+		if (resp == FW_MSG_CODE_TRANSCEIVER_NOT_PRESENT)
+			return -ENODEV;
+		else if (resp != FW_MSG_CODE_TRANSCEIVER_DIAG_OK)
+			return -EINVAL;
+
+		offset += buf_size;
+		bytes_left -= buf_size;
+	}
+
+	return 0;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 drv_mb_param = 0, rsp, param;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 632a838f1fe3..047976d5c6e9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -839,6 +839,22 @@ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
 		       u32 *o_mcp_resp,
 		       u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf);
 
+/**
+ * @brief Read from sfp
+ *
+ *  @param p_hwfn - hw function
+ *  @param p_ptt  - PTT required for register access
+ *  @param port   - transceiver port
+ *  @param addr   - I2C address
+ *  @param offset - offset in sfp
+ *  @param len    - buffer length
+ *  @param p_buf  - buffer to read into
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_phy_sfp_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			 u32 port, u32 addr, u32 offset, u32 len, u8 *p_buf);
+
 /**
  * @brief indicates whether the MFW objects [under mcp_info] are accessible
  *
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b4040023cbfb..8cd34645e892 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -759,6 +759,9 @@ struct qed_generic_tlvs {
 	u8 mac[QED_TLV_MAC_COUNT][ETH_ALEN];
 };
 
+#define QED_I2C_DEV_ADDR_A0 0xA0
+#define QED_I2C_DEV_ADDR_A2 0xA2
+
 #define QED_NVM_SIGNATURE 0x12435687
 
 enum qed_nvm_flash_cmd {
@@ -1026,6 +1029,18 @@ struct qed_common_ops {
  * @param enabled - true iff WoL should be enabled.
  */
 	int (*update_wol) (struct qed_dev *cdev, bool enabled);
+
+/**
+ * @brief read_module_eeprom
+ *
+ * @param cdev
+ * @param buf - buffer
+ * @param dev_addr - PHY device memory region
+ * @param offset - offset into eeprom contents to be read
+ * @param len - buffer length, i.e., max bytes to be read
+ */
+	int (*read_module_eeprom)(struct qed_dev *cdev,
+				  char *buf, u8 dev_addr, u32 offset, u32 len);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From bc56b33404599edc412b91933d74b36873e8ea25 Mon Sep 17 00:00:00 2001
From: Benedict Wong <benedictwong@google.com>
Date: Thu, 19 Jul 2018 10:50:44 -0700
Subject: xfrm: Remove xfrmi interface ID from flowi

In order to remove performance impact of having the extra u32 in every
single flowi, this change removes the flowi_xfrm struct, prefering to
take the if_id as a method parameter where needed.

In the inbound direction, if_id is only needed during the
__xfrm_check_policy() function, and the if_id can be determined at that
point based on the skb. As such, xfrmi_decode_session() is only called
with the skb in __xfrm_check_policy().

In the outbound direction, the only place where if_id is needed is the
xfrm_lookup() call in xfrmi_xmit2(). With this change, the if_id is
directly passed into the xfrm_lookup_with_ifid() call. All existing
callers can still call xfrm_lookup(), which uses a default if_id of 0.

This change does not change any behavior of XFRMIs except for improving
overall system performance via flowi size reduction.

This change has been tested against the Android Kernel Networking Tests:

https://android.googlesource.com/kernel/tests/+/master/net/test

Signed-off-by: Benedict Wong <benedictwong@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h         | 14 +++++++
 include/net/flow.h        |  9 -----
 include/net/xfrm.h        |  2 +-
 net/xfrm/xfrm_interface.c |  4 +-
 net/xfrm/xfrm_policy.c    | 98 +++++++++++++++++++++++++++++++----------------
 net/xfrm/xfrm_state.c     |  3 +-
 6 files changed, 83 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index b3219cd8a5a1..7f735e76ca73 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -475,6 +475,14 @@ static inline struct dst_entry *xfrm_lookup(struct net *net,
 	return dst_orig;
 }
 
+static inline struct dst_entry *
+xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig,
+		      const struct flowi *fl, const struct sock *sk,
+		      int flags, u32 if_id)
+{
+	return dst_orig;
+}
+
 static inline struct dst_entry *xfrm_lookup_route(struct net *net,
 						  struct dst_entry *dst_orig,
 						  const struct flowi *fl,
@@ -494,6 +502,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			      const struct flowi *fl, const struct sock *sk,
 			      int flags);
 
+struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+					struct dst_entry *dst_orig,
+					const struct flowi *fl,
+					const struct sock *sk, int flags,
+					u32 if_id);
+
 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 				    const struct flowi *fl, const struct sock *sk,
 				    int flags);
diff --git a/include/net/flow.h b/include/net/flow.h
index 187c9bef672f..8ce21793094e 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -26,10 +26,6 @@ struct flowi_tunnel {
 	__be64			tun_id;
 };
 
-struct flowi_xfrm {
-	__u32			if_id;
-};
-
 struct flowi_common {
 	int	flowic_oif;
 	int	flowic_iif;
@@ -43,7 +39,6 @@ struct flowi_common {
 #define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
-	struct flowi_xfrm xfrm;
 	kuid_t  flowic_uid;
 };
 
@@ -83,7 +78,6 @@ struct flowi4 {
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
 #define flowi4_uid		__fl_common.flowic_uid
-#define flowi4_xfrm		__fl_common.xfrm
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
@@ -115,7 +109,6 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
 	fl4->flowi4_flags = flags;
 	fl4->flowi4_secid = 0;
 	fl4->flowi4_tun_key.tun_id = 0;
-	fl4->flowi4_xfrm.if_id = 0;
 	fl4->flowi4_uid = uid;
 	fl4->daddr = daddr;
 	fl4->saddr = saddr;
@@ -145,7 +138,6 @@ struct flowi6 {
 #define flowi6_secid		__fl_common.flowic_secid
 #define flowi6_tun_key		__fl_common.flowic_tun_key
 #define flowi6_uid		__fl_common.flowic_uid
-#define flowi6_xfrm		__fl_common.xfrm
 	struct in6_addr		daddr;
 	struct in6_addr		saddr;
 	/* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -193,7 +185,6 @@ struct flowi {
 #define flowi_secid	u.__fl_common.flowic_secid
 #define flowi_tun_key	u.__fl_common.flowic_tun_key
 #define flowi_uid	u.__fl_common.flowic_uid
-#define flowi_xfrm	u.__fl_common.xfrm
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1350e2cf0749..ca820945f30c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1557,7 +1557,7 @@ struct xfrm_state *xfrm_state_find(const xfrm_address_t *daddr,
 				   const struct flowi *fl,
 				   struct xfrm_tmpl *tmpl,
 				   struct xfrm_policy *pol, int *err,
-				   unsigned short family);
+				   unsigned short family, u32 if_id);
 struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
 				       xfrm_address_t *daddr,
 				       xfrm_address_t *saddr,
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 31cb1c7e3881..ccfe18d67e98 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -307,10 +307,8 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	if (!dst)
 		goto tx_err_link_failure;
 
-	fl->flowi_xfrm.if_id = xi->p.if_id;
-
 	dst_hold(dst);
-	dst = xfrm_lookup(xi->net, dst, fl, NULL, 0);
+	dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
 	if (IS_ERR(dst)) {
 		err = PTR_ERR(dst);
 		dst = NULL;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5d2f734f4309..2f70fe68b9b0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1068,14 +1068,14 @@ EXPORT_SYMBOL(xfrm_policy_walk_done);
  */
 static int xfrm_policy_match(const struct xfrm_policy *pol,
 			     const struct flowi *fl,
-			     u8 type, u16 family, int dir)
+			     u8 type, u16 family, int dir, u32 if_id)
 {
 	const struct xfrm_selector *sel = &pol->selector;
 	int ret = -ESRCH;
 	bool match;
 
 	if (pol->family != family ||
-	    pol->if_id != fl->flowi_xfrm.if_id ||
+	    pol->if_id != if_id ||
 	    (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
 	    pol->type != type)
 		return ret;
@@ -1090,7 +1090,8 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
 
 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 						     const struct flowi *fl,
-						     u16 family, u8 dir)
+						     u16 family, u8 dir,
+						     u32 if_id)
 {
 	int err;
 	struct xfrm_policy *pol, *ret;
@@ -1114,7 +1115,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 	priority = ~0U;
 	ret = NULL;
 	hlist_for_each_entry_rcu(pol, chain, bydst) {
-		err = xfrm_policy_match(pol, fl, type, family, dir);
+		err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
 		if (err) {
 			if (err == -ESRCH)
 				continue;
@@ -1133,7 +1134,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 		if ((pol->priority >= priority) && ret)
 			break;
 
-		err = xfrm_policy_match(pol, fl, type, family, dir);
+		err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
 		if (err) {
 			if (err == -ESRCH)
 				continue;
@@ -1158,21 +1159,25 @@ fail:
 	return ret;
 }
 
-static struct xfrm_policy *
-xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+static struct xfrm_policy *xfrm_policy_lookup(struct net *net,
+					      const struct flowi *fl,
+					      u16 family, u8 dir, u32 if_id)
 {
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct xfrm_policy *pol;
 
-	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family,
+					dir, if_id);
 	if (pol != NULL)
 		return pol;
 #endif
-	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family,
+					 dir, if_id);
 }
 
 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
-						 const struct flowi *fl, u16 family)
+						 const struct flowi *fl,
+						 u16 family, u32 if_id)
 {
 	struct xfrm_policy *pol;
 
@@ -1191,7 +1196,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 		match = xfrm_selector_match(&pol->selector, fl, family);
 		if (match) {
 			if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
-			    pol->if_id != fl->flowi_xfrm.if_id) {
+			    pol->if_id != if_id) {
 				pol = NULL;
 				goto out;
 			}
@@ -1405,7 +1410,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
 			}
 		}
 
-		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
+		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error,
+				    family, policy->if_id);
 
 		if (x && x->km.state == XFRM_STATE_VALID) {
 			xfrm[nx++] = x;
@@ -1708,7 +1714,8 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
 		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
 						    XFRM_POLICY_TYPE_MAIN,
 						    fl, family,
-						    XFRM_POLICY_OUT);
+						    XFRM_POLICY_OUT,
+						    pols[0]->if_id);
 		if (pols[1]) {
 			if (IS_ERR(pols[1])) {
 				xfrm_pols_put(pols, *num_pols);
@@ -1942,8 +1949,10 @@ free_dst:
 	goto out;
 }
 
-static struct xfrm_dst *
-xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
+static struct xfrm_dst *xfrm_bundle_lookup(struct net *net,
+					   const struct flowi *fl,
+					   u16 family, u8 dir,
+					   struct xfrm_flo *xflo, u32 if_id)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	int num_pols = 0, num_xfrms = 0, err;
@@ -1952,7 +1961,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 	/* Resolve policies to use if we couldn't get them from
 	 * previous cache entry */
 	num_pols = 1;
-	pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+	pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id);
 	err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
 	if (err < 0)
@@ -2020,14 +2029,19 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
 	return ret;
 }
 
-/* Main function: finds/creates a bundle for given flow.
+/* Finds/creates a bundle for given flow and if_id
  *
  * At the moment we eat a raw IP route. Mostly to speed up lookups
  * on interfaces with disabled IPsec.
+ *
+ * xfrm_lookup uses an if_id of 0 by default, and is provided for
+ * compatibility
  */
-struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
-			      const struct flowi *fl,
-			      const struct sock *sk, int flags)
+struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
+					struct dst_entry *dst_orig,
+					const struct flowi *fl,
+					const struct sock *sk,
+					int flags, u32 if_id)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	struct xfrm_dst *xdst;
@@ -2043,7 +2057,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 	sk = sk_const_to_full_sk(sk);
 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
 		num_pols = 1;
-		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
+		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family,
+						if_id);
 		err = xfrm_expand_policies(fl, family, pols,
 					   &num_pols, &num_xfrms);
 		if (err < 0)
@@ -2087,7 +2102,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+		xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
 		if (xdst == NULL)
 			goto nopol;
 		if (IS_ERR(xdst)) {
@@ -2168,6 +2183,19 @@ dropdst:
 	xfrm_pols_put(pols, drop_pols);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(xfrm_lookup_with_ifid);
+
+/* Main function: finds/creates a bundle for given flow.
+ *
+ * At the moment we eat a raw IP route. Mostly to speed up lookups
+ * on interfaces with disabled IPsec.
+ */
+struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
+			      const struct flowi *fl, const struct sock *sk,
+			      int flags)
+{
+	return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0);
+}
 EXPORT_SYMBOL(xfrm_lookup);
 
 /* Callers of xfrm_lookup_route() must ensure a call to dst_output().
@@ -2257,19 +2285,12 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
 			  unsigned int family, int reverse)
 {
 	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	const struct xfrm_if_cb *ifcb = xfrm_if_get_cb();
-	struct xfrm_if *xi;
 	int err;
 
 	if (unlikely(afinfo == NULL))
 		return -EAFNOSUPPORT;
 
 	afinfo->decode_session(skb, fl, reverse);
-	if (ifcb) {
-		xi = ifcb->decode_session(skb);
-		if (xi)
-			fl->flowi_xfrm.if_id = xi->p.if_id;
-	}
 
 	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
 	rcu_read_unlock();
@@ -2301,6 +2322,19 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	int reverse;
 	struct flowi fl;
 	int xerr_idx = -1;
+	const struct xfrm_if_cb *ifcb;
+	struct xfrm_if *xi;
+	u32 if_id = 0;
+
+	rcu_read_lock();
+	ifcb = xfrm_if_get_cb();
+
+	if (ifcb) {
+		xi = ifcb->decode_session(skb);
+		if (xi)
+			if_id = xi->p.if_id;
+	}
+	rcu_read_unlock();
 
 	reverse = dir & ~XFRM_POLICY_MASK;
 	dir &= XFRM_POLICY_MASK;
@@ -2328,7 +2362,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	pol = NULL;
 	sk = sk_to_full_sk(sk);
 	if (sk && sk->sk_policy[dir]) {
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id);
 		if (IS_ERR(pol)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
 			return 0;
@@ -2336,7 +2370,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	}
 
 	if (!pol)
-		pol = xfrm_policy_lookup(net, &fl, family, dir);
+		pol = xfrm_policy_lookup(net, &fl, family, dir, if_id);
 
 	if (IS_ERR(pol)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2360,7 +2394,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 	if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
 		pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
 						    &fl, family,
-						    XFRM_POLICY_IN);
+						    XFRM_POLICY_IN, if_id);
 		if (pols[1]) {
 			if (IS_ERR(pols[1])) {
 				XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 27c84e63c7ff..bd5cb7ad2447 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -930,7 +930,7 @@ struct xfrm_state *
 xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		const struct flowi *fl, struct xfrm_tmpl *tmpl,
 		struct xfrm_policy *pol, int *err,
-		unsigned short family)
+		unsigned short family, u32 if_id)
 {
 	static xfrm_address_t saddr_wildcard = { };
 	struct net *net = xp_net(pol);
@@ -940,7 +940,6 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	int error = 0;
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
-	u32 if_id = fl->flowi_xfrm.if_id;
 	unsigned short encap_family = tmpl->encap_family;
 	unsigned int sequence;
 	struct km_event c;
-- 
cgit v1.2.3


From 36fc3c8c282c01ad1570bd864de52f128d731b75 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 19 Jul 2018 22:14:31 -0700
Subject: bpf: btf: Clean up BTF_INT_BITS() in uapi btf.h

This patch shrinks the BTF_INT_BITS() mask.  The current
btf_int_check_meta() ensures the nr_bits of an integer
cannot exceed 64.  Hence, it is mostly an uapi cleanup.

The actual btf usage (i.e. seq_show()) is also modified
to use u8 instead of u16.  The verification (e.g. btf_int_check_meta())
path stays as is to deal with invalid BTF situation.

Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/btf.h |  2 +-
 kernel/bpf/btf.c         | 16 ++++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 0b5ddbe135a4..972265f32871 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -76,7 +76,7 @@ struct btf_type {
  */
 #define BTF_INT_ENCODING(VAL)	(((VAL) & 0x0f000000) >> 24)
 #define BTF_INT_OFFSET(VAL)	(((VAL  & 0x00ff0000)) >> 16)
-#define BTF_INT_BITS(VAL)	((VAL)  & 0x0000ffff)
+#define BTF_INT_BITS(VAL)	((VAL)  & 0x000000ff)
 
 /* Attributes stored in the BTF_INT_ENCODING */
 #define BTF_INT_SIGNED	(1 << 0)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index e016ac3afa24..9704934252b3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -450,7 +450,7 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
  */
 static bool btf_type_int_is_regular(const struct btf_type *t)
 {
-	u16 nr_bits, nr_bytes;
+	u8 nr_bits, nr_bytes;
 	u32 int_data;
 
 	int_data = btf_type_int(t);
@@ -993,12 +993,16 @@ static void btf_int_bits_seq_show(const struct btf *btf,
 {
 	u16 left_shift_bits, right_shift_bits;
 	u32 int_data = btf_type_int(t);
-	u16 nr_bits = BTF_INT_BITS(int_data);
-	u16 total_bits_offset;
-	u16 nr_copy_bytes;
-	u16 nr_copy_bits;
+	u8 nr_bits = BTF_INT_BITS(int_data);
+	u8 total_bits_offset;
+	u8 nr_copy_bytes;
+	u8 nr_copy_bits;
 	u64 print_num;
 
+	/*
+	 * bits_offset is at most 7.
+	 * BTF_INT_OFFSET() cannot exceed 64 bits.
+	 */
 	total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
 	data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
 	bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
@@ -1028,7 +1032,7 @@ static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
 	u32 int_data = btf_type_int(t);
 	u8 encoding = BTF_INT_ENCODING(int_data);
 	bool sign = encoding & BTF_INT_SIGNED;
-	u32 nr_bits = BTF_INT_BITS(int_data);
+	u8 nr_bits = BTF_INT_BITS(int_data);
 
 	if (bits_offset || BTF_INT_OFFSET(int_data) ||
 	    BITS_PER_BYTE_MASKED(nr_bits)) {
-- 
cgit v1.2.3


From 2db1581e1f432ac6b4efe152c57fdfb4de85c154 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sun, 8 Jul 2018 14:23:21 +0800
Subject: Revert "iommu/vt-d: Clean up pasid quirk for pre-production devices"

This reverts commit ab96746aaa344fb720a198245a837e266fad3b62.

The commit ab96746aaa34 ("iommu/vt-d: Clean up pasid quirk for
pre-production devices") triggers ECS mode on some platforms
which have broken ECS support. As the result, graphic device
will be inoperable on boot.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107017

Cc: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 32 ++++++++++++++++++++++++++++++--
 include/linux/intel-iommu.h |  1 +
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index b344a883f116..115ff26e9ced 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -484,14 +484,37 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int intel_iommu_ecs = 1;
+static int intel_iommu_pasid28;
 static int iommu_identity_mapping;
 
 #define IDENTMAP_ALL		1
 #define IDENTMAP_GFX		2
 #define IDENTMAP_AZALIA		4
 
-#define ecs_enabled(iommu)	(intel_iommu_ecs && ecap_ecs(iommu->ecap))
-#define pasid_enabled(iommu)	(ecs_enabled(iommu) && ecap_pasid(iommu->ecap))
+/* Broadwell and Skylake have broken ECS support — normal so-called "second
+ * level" translation of DMA requests-without-PASID doesn't actually happen
+ * unless you also set the NESTE bit in an extended context-entry. Which of
+ * course means that SVM doesn't work because it's trying to do nested
+ * translation of the physical addresses it finds in the process page tables,
+ * through the IOVA->phys mapping found in the "second level" page tables.
+ *
+ * The VT-d specification was retroactively changed to change the definition
+ * of the capability bits and pretend that Broadwell/Skylake never happened...
+ * but unfortunately the wrong bit was changed. It's ECS which is broken, but
+ * for some reason it was the PASID capability bit which was redefined (from
+ * bit 28 on BDW/SKL to bit 40 in future).
+ *
+ * So our test for ECS needs to eschew those implementations which set the old
+ * PASID capabiity bit 28, since those are the ones on which ECS is broken.
+ * Unless we are working around the 'pasid28' limitations, that is, by putting
+ * the device into passthrough mode for normal DMA and thus masking the bug.
+ */
+#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
+			    (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
+/* PASID support is thus enabled if ECS is enabled and *either* of the old
+ * or new capability bits are set. */
+#define pasid_enabled(iommu) (ecs_enabled(iommu) &&			\
+			      (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
 
 int intel_iommu_gfx_mapped;
 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -554,6 +577,11 @@ static int __init intel_iommu_setup(char *str)
 			printk(KERN_INFO
 				"Intel-IOMMU: disable extended context table support\n");
 			intel_iommu_ecs = 0;
+		} else if (!strncmp(str, "pasid28", 7)) {
+			printk(KERN_INFO
+				"Intel-IOMMU: enable pre-production PASID support\n");
+			intel_iommu_pasid28 = 1;
+			iommu_identity_mapping |= IDENTMAP_GFX;
 		} else if (!strncmp(str, "tboot_noforce", 13)) {
 			printk(KERN_INFO
 				"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1df940196ab2..ef169d67df92 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -121,6 +121,7 @@
 #define ecap_srs(e)		((e >> 31) & 0x1)
 #define ecap_ers(e)		((e >> 30) & 0x1)
 #define ecap_prs(e)		((e >> 29) & 0x1)
+#define ecap_broken_pasid(e)	((e >> 28) & 0x1)
 #define ecap_dis(e)		((e >> 27) & 0x1)
 #define ecap_nest(e)		((e >> 26) & 0x1)
 #define ecap_mts(e)		((e >> 25) & 0x1)
-- 
cgit v1.2.3


From 51261aac51a05c791ef880a100ac2ceed201ef72 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:55 +0800
Subject: iommu/vt-d: Avoid using idr_for_each_entry()

idr_for_each_entry() is used to iteratte over idr elements
of a given type. It isn't suitable for the globle pasid idr
since the pasid idr consumer could specify different types
of pointers to bind with a pasid.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c   | 14 ++++++++++----
 include/linux/intel-iommu.h |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 2cc0aac93201..36cc1d1b8afc 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -298,6 +298,7 @@ static const struct mmu_notifier_ops intel_mmuops = {
 };
 
 static DEFINE_MUTEX(pasid_mutex);
+static LIST_HEAD(global_svm_list);
 
 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
 {
@@ -329,13 +330,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 
 	mutex_lock(&pasid_mutex);
 	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
-		int i;
+		struct intel_svm *t;
 
-		idr_for_each_entry(&iommu->pasid_idr, svm, i) {
-			if (svm->mm != mm ||
-			    (svm->flags & SVM_FLAG_PRIVATE_PASID))
+		list_for_each_entry(t, &global_svm_list, list) {
+			if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
 				continue;
 
+			svm = t;
 			if (svm->pasid >= pasid_max) {
 				dev_warn(dev,
 					 "Limited PASID width. Cannot use existing PASID %d\n",
@@ -404,6 +405,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		svm->mm = mm;
 		svm->flags = flags;
 		INIT_LIST_HEAD_RCU(&svm->devs);
+		INIT_LIST_HEAD(&svm->list);
 		ret = -ENOMEM;
 		if (mm) {
 			ret = mmu_notifier_register(&svm->notifier, mm);
@@ -430,6 +432,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		 */
 		if (cap_caching_mode(iommu->cap))
 			intel_flush_pasid_dev(svm, sdev, svm->pasid);
+
+		list_add_tail(&svm->list, &global_svm_list);
 	}
 	list_add_rcu(&sdev->list, &svm->devs);
 
@@ -485,6 +489,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 					if (svm->mm)
 						mmu_notifier_unregister(&svm->notifier, svm->mm);
 
+					list_del(&svm->list);
+
 					/* We mandate that no page faults may be outstanding
 					 * for the PASID when intel_svm_unbind_mm() is called.
 					 * If that is not obeyed, subtle errors will happen.
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 6692b40ca814..e1e193855581 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -487,6 +487,7 @@ struct intel_svm {
 	int flags;
 	int pasid;
 	struct list_head devs;
+	struct list_head list;
 };
 
 extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
-- 
cgit v1.2.3


From af39507305fb83a5d3c475c2851f4d59545d8a18 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:56 +0800
Subject: iommu/vt-d: Apply global PASID in SVA

This patch applies the global pasid name space in the shared
virtual address (SVA) implementation.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c   | 22 +++++++++++-----------
 include/linux/intel-iommu.h |  1 -
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 36cc1d1b8afc..56e65d0b1871 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -26,6 +26,8 @@
 #include <linux/interrupt.h>
 #include <asm/page.h>
 
+#include "intel-pasid.h"
+
 #define PASID_ENTRY_P		BIT_ULL(0)
 #define PASID_ENTRY_FLPM_5LP	BIT_ULL(9)
 #define PASID_ENTRY_SRE		BIT_ULL(11)
@@ -85,8 +87,6 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 				iommu->name);
 	}
 
-	idr_init(&iommu->pasid_idr);
-
 	return 0;
 }
 
@@ -102,7 +102,7 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
 		free_pages((unsigned long)iommu->pasid_state_table, order);
 		iommu->pasid_state_table = NULL;
 	}
-	idr_destroy(&iommu->pasid_idr);
+
 	return 0;
 }
 
@@ -392,9 +392,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 			pasid_max = iommu->pasid_max;
 
 		/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
-		ret = idr_alloc(&iommu->pasid_idr, svm,
-				!!cap_caching_mode(iommu->cap),
-				pasid_max - 1, GFP_KERNEL);
+		ret = intel_pasid_alloc_id(svm,
+					   !!cap_caching_mode(iommu->cap),
+					   pasid_max - 1, GFP_KERNEL);
 		if (ret < 0) {
 			kfree(svm);
 			kfree(sdev);
@@ -410,7 +410,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		if (mm) {
 			ret = mmu_notifier_register(&svm->notifier, mm);
 			if (ret) {
-				idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+				intel_pasid_free_id(svm->pasid);
 				kfree(svm);
 				kfree(sdev);
 				goto out;
@@ -460,7 +460,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 	if (!iommu || !iommu->pasid_table)
 		goto out;
 
-	svm = idr_find(&iommu->pasid_idr, pasid);
+	svm = intel_pasid_lookup_id(pasid);
 	if (!svm)
 		goto out;
 
@@ -485,7 +485,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 					svm->iommu->pasid_table[svm->pasid].val = 0;
 					wmb();
 
-					idr_remove(&svm->iommu->pasid_idr, svm->pasid);
+					intel_pasid_free_id(svm->pasid);
 					if (svm->mm)
 						mmu_notifier_unregister(&svm->notifier, svm->mm);
 
@@ -520,7 +520,7 @@ int intel_svm_is_pasid_valid(struct device *dev, int pasid)
 	if (!iommu || !iommu->pasid_table)
 		goto out;
 
-	svm = idr_find(&iommu->pasid_idr, pasid);
+	svm = intel_pasid_lookup_id(pasid);
 	if (!svm)
 		goto out;
 
@@ -618,7 +618,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 
 		if (!svm || svm->pasid != req->pasid) {
 			rcu_read_lock();
-			svm = idr_find(&iommu->pasid_idr, req->pasid);
+			svm = intel_pasid_lookup_id(req->pasid);
 			/* It *can't* go away, because the driver is not permitted
 			 * to unbind the mm while any page faults are outstanding.
 			 * So we only need RCU to protect the internal idr code. */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e1e193855581..2ff15195f73d 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -420,7 +420,6 @@ struct intel_iommu {
 	struct pasid_state_entry *pasid_state_table;
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
-	struct idr pasid_idr;
 	u32 pasid_max;
 #endif
 	struct q_inval  *qi;            /* Queued invalidation info */
-- 
cgit v1.2.3


From 9ddbfb42138d84bb326023616c40a3dc30ea2837 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:57 +0800
Subject: iommu/vt-d: Move device_domain_info to header

This allows the per device iommu data and some helpers to be
used in other files.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 63 +++------------------------------------------
 include/linux/intel-iommu.h | 61 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fa15ed036ddc..bb7f2b2a37ee 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -381,61 +381,6 @@ static int hw_pass_through = 1;
 	for (idx = 0; idx < g_num_of_iommus; idx++)		\
 		if (domain->iommu_refcnt[idx])
 
-struct dmar_domain {
-	int	nid;			/* node id */
-
-	unsigned	iommu_refcnt[DMAR_UNITS_SUPPORTED];
-					/* Refcount of devices per iommu */
-
-
-	u16		iommu_did[DMAR_UNITS_SUPPORTED];
-					/* Domain ids per IOMMU. Use u16 since
-					 * domain ids are 16 bit wide according
-					 * to VT-d spec, section 9.3 */
-
-	bool has_iotlb_device;
-	struct list_head devices;	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
-
-	struct dma_pte	*pgd;		/* virtual address */
-	int		gaw;		/* max guest address width */
-
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
-
-	int		flags;		/* flags to find out type of domain */
-
-	int		iommu_coherency;/* indicate coherency of iommu access */
-	int		iommu_snooping; /* indicate snooping control feature*/
-	int		iommu_count;	/* reference count of iommu */
-	int		iommu_superpage;/* Level of superpages supported:
-					   0 == 4KiB (no superpages), 1 == 2MiB,
-					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
-	u64		max_addr;	/* maximum mapped address */
-
-	struct iommu_domain domain;	/* generic domain data structure for
-					   iommu core */
-};
-
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus number */
-	u8 devfn;		/* PCI devfn number */
-	u16 pfsid;		/* SRIOV physical function source ID */
-	u8 pasid_supported:3;
-	u8 pasid_enabled:1;
-	u8 pri_supported:1;
-	u8 pri_enabled:1;
-	u8 ats_supported:1;
-	u8 ats_enabled:1;
-	u8 ats_qdep;
-	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
-	struct intel_iommu *iommu; /* IOMMU used by this device */
-	struct dmar_domain *domain; /* pointer to domain */
-};
-
 struct dmar_rmrr_unit {
 	struct list_head list;		/* list of rmrr units	*/
 	struct acpi_dmar_header *hdr;	/* ACPI header		*/
@@ -604,7 +549,7 @@ static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
 		domains[did & 0xff] = domain;
 }
 
-static inline void *alloc_pgtable_page(int node)
+void *alloc_pgtable_page(int node)
 {
 	struct page *page;
 	void *vaddr = NULL;
@@ -615,7 +560,7 @@ static inline void *alloc_pgtable_page(int node)
 	return vaddr;
 }
 
-static inline void free_pgtable_page(void *vaddr)
+void free_pgtable_page(void *vaddr)
 {
 	free_page((unsigned long)vaddr);
 }
@@ -698,7 +643,7 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
 }
 
 /* This functionin only returns single iommu in a domain */
-static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
+struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 {
 	int iommu_id;
 
@@ -3528,7 +3473,7 @@ static unsigned long intel_alloc_iova(struct device *dev,
 	return iova_pfn;
 }
 
-static struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
+struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
 {
 	struct dmar_domain *domain, *tmp;
 	struct dmar_rmrr_unit *rmrr;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 2ff15195f73d..2e1fbde020ca 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/iommu.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmar.h>
 
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -387,6 +388,42 @@ struct pasid_entry;
 struct pasid_state_entry;
 struct page_req_dsc;
 
+struct dmar_domain {
+	int	nid;			/* node id */
+
+	unsigned	iommu_refcnt[DMAR_UNITS_SUPPORTED];
+					/* Refcount of devices per iommu */
+
+
+	u16		iommu_did[DMAR_UNITS_SUPPORTED];
+					/* Domain ids per IOMMU. Use u16 since
+					 * domain ids are 16 bit wide according
+					 * to VT-d spec, section 9.3 */
+
+	bool has_iotlb_device;
+	struct list_head devices;	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+	int		flags;		/* flags to find out type of domain */
+
+	int		iommu_coherency;/* indicate coherency of iommu access */
+	int		iommu_snooping; /* indicate snooping control feature*/
+	int		iommu_count;	/* reference count of iommu */
+	int		iommu_superpage;/* Level of superpages supported:
+					   0 == 4KiB (no superpages), 1 == 2MiB,
+					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
+	u64		max_addr;	/* maximum mapped address */
+
+	struct iommu_domain domain;	/* generic domain data structure for
+					   iommu core */
+};
+
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64 		reg_phys; /* physical address of hw register set */
@@ -435,6 +472,25 @@ struct intel_iommu {
 	u32		flags;      /* Software defined flags */
 };
 
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus number */
+	u8 devfn;		/* PCI devfn number */
+	u16 pfsid;		/* SRIOV physical function source ID */
+	u8 pasid_supported:3;
+	u8 pasid_enabled:1;
+	u8 pri_supported:1;
+	u8 pri_enabled:1;
+	u8 ats_supported:1;
+	u8 ats_enabled:1;
+	u8 ats_qdep;
+	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
+	struct intel_iommu *iommu; /* IOMMU used by this device */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
 static inline void __iommu_flush_cache(
 	struct intel_iommu *iommu, void *addr, int size)
 {
@@ -460,6 +516,11 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern int dmar_ir_support(void);
 
+struct dmar_domain *get_valid_domain_for_dev(struct device *dev);
+void *alloc_pgtable_page(int node);
+void free_pgtable_page(void *vaddr);
+struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
+
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
 extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
-- 
cgit v1.2.3


From 85319dcc8955f8f31828dc8bafff29f6aa011d93 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:58 +0800
Subject: iommu/vt-d: Add for_each_device_domain() helper

This adds a helper named for_each_device_domain() to iterate
over the elements in device_domain_list and invoke a callback
against each element. This allows to search the device_domain
list in other source files.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 21 +++++++++++++++++++++
 include/linux/intel-iommu.h |  2 ++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index bb7f2b2a37ee..f020da439ace 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -448,6 +448,27 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
+/*
+ * Iterate over elements in device_domain_list and call the specified
+ * callback @fn against each element. This helper should only be used
+ * in the context where the device_domain_lock has already been holden.
+ */
+int for_each_device_domain(int (*fn)(struct device_domain_info *info,
+				     void *data), void *data)
+{
+	int ret = 0;
+	struct device_domain_info *info;
+
+	assert_spin_locked(&device_domain_lock);
+	list_for_each_entry(info, &device_domain_list, global) {
+		ret = fn(info, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 const struct iommu_ops intel_iommu_ops;
 
 static bool translation_pre_enabled(struct intel_iommu *iommu)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 2e1fbde020ca..4fd4c6fee93e 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -520,6 +520,8 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev);
 void *alloc_pgtable_page(int node);
 void free_pgtable_page(void *vaddr);
 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
+int for_each_device_domain(int (*fn)(struct device_domain_info *info,
+				     void *data), void *data);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
-- 
cgit v1.2.3


From cc580e41260dbf1a46269235f1f2b572137d9d03 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:46:59 +0800
Subject: iommu/vt-d: Per PCI device pasid table interfaces

This patch adds the interfaces for per PCI device pasid
table management. Currently we allocate one pasid table
for all PCI devices under the scope of an IOMMU. It's
insecure in some cases where multiple devices under one
single IOMMU unit support PASID features. With per PCI
device pasid table, we can achieve finer protection and
isolation granularity.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Suggested-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c |   1 +
 drivers/iommu/intel-pasid.c | 179 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/intel-pasid.h |  18 +++++
 drivers/iommu/intel-svm.c   |   4 -
 include/linux/intel-iommu.h |   2 +
 5 files changed, 200 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f020da439ace..211925a75fb4 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2451,6 +2451,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	info->dev = dev;
 	info->domain = domain;
 	info->iommu = iommu;
+	info->pasid_table = NULL;
 
 	if (dev && dev_is_pci(dev)) {
 		struct pci_dev *pdev = to_pci_dev(info->dev);
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index e918fe01ce7f..fe95c9bd4d33 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -13,6 +13,8 @@
 #include <linux/intel-iommu.h>
 #include <linux/iommu.h>
 #include <linux/memory.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
 #include <linux/spinlock.h>
 
 #include "intel-pasid.h"
@@ -58,3 +60,180 @@ void *intel_pasid_lookup_id(int pasid)
 
 	return p;
 }
+
+/*
+ * Per device pasid table management:
+ */
+static inline void
+device_attach_pasid_table(struct device_domain_info *info,
+			  struct pasid_table *pasid_table)
+{
+	info->pasid_table = pasid_table;
+	list_add(&info->table, &pasid_table->dev);
+}
+
+static inline void
+device_detach_pasid_table(struct device_domain_info *info,
+			  struct pasid_table *pasid_table)
+{
+	info->pasid_table = NULL;
+	list_del(&info->table);
+}
+
+struct pasid_table_opaque {
+	struct pasid_table	**pasid_table;
+	int			segment;
+	int			bus;
+	int			devfn;
+};
+
+static int search_pasid_table(struct device_domain_info *info, void *opaque)
+{
+	struct pasid_table_opaque *data = opaque;
+
+	if (info->iommu->segment == data->segment &&
+	    info->bus == data->bus &&
+	    info->devfn == data->devfn &&
+	    info->pasid_table) {
+		*data->pasid_table = info->pasid_table;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+	struct pasid_table_opaque *data = opaque;
+
+	data->segment = pci_domain_nr(pdev->bus);
+	data->bus = PCI_BUS_NUM(alias);
+	data->devfn = alias & 0xff;
+
+	return for_each_device_domain(&search_pasid_table, data);
+}
+
+/*
+ * Allocate a pasid table for @dev. It should be called in a
+ * single-thread context.
+ */
+int intel_pasid_alloc_table(struct device *dev)
+{
+	struct device_domain_info *info;
+	struct pasid_table *pasid_table;
+	struct pasid_table_opaque data;
+	struct page *pages;
+	size_t size, count;
+	int ret, order;
+
+	info = dev->archdata.iommu;
+	if (WARN_ON(!info || !dev_is_pci(dev) ||
+		    !info->pasid_supported || info->pasid_table))
+		return -EINVAL;
+
+	/* DMA alias device already has a pasid table, use it: */
+	data.pasid_table = &pasid_table;
+	ret = pci_for_each_dma_alias(to_pci_dev(dev),
+				     &get_alias_pasid_table, &data);
+	if (ret)
+		goto attach_out;
+
+	pasid_table = kzalloc(sizeof(*pasid_table), GFP_ATOMIC);
+	if (!pasid_table)
+		return -ENOMEM;
+	INIT_LIST_HEAD(&pasid_table->dev);
+
+	size = sizeof(struct pasid_entry);
+	count = min_t(int, pci_max_pasids(to_pci_dev(dev)), intel_pasid_max_id);
+	order = get_order(size * count);
+	pages = alloc_pages_node(info->iommu->node,
+				 GFP_ATOMIC | __GFP_ZERO,
+				 order);
+	if (!pages)
+		return -ENOMEM;
+
+	pasid_table->table = page_address(pages);
+	pasid_table->order = order;
+	pasid_table->max_pasid = count;
+
+attach_out:
+	device_attach_pasid_table(info, pasid_table);
+
+	return 0;
+}
+
+void intel_pasid_free_table(struct device *dev)
+{
+	struct device_domain_info *info;
+	struct pasid_table *pasid_table;
+
+	info = dev->archdata.iommu;
+	if (!info || !dev_is_pci(dev) ||
+	    !info->pasid_supported || !info->pasid_table)
+		return;
+
+	pasid_table = info->pasid_table;
+	device_detach_pasid_table(info, pasid_table);
+
+	if (!list_empty(&pasid_table->dev))
+		return;
+
+	free_pages((unsigned long)pasid_table->table, pasid_table->order);
+	kfree(pasid_table);
+}
+
+struct pasid_table *intel_pasid_get_table(struct device *dev)
+{
+	struct device_domain_info *info;
+
+	info = dev->archdata.iommu;
+	if (!info)
+		return NULL;
+
+	return info->pasid_table;
+}
+
+int intel_pasid_get_dev_max_id(struct device *dev)
+{
+	struct device_domain_info *info;
+
+	info = dev->archdata.iommu;
+	if (!info || !info->pasid_table)
+		return 0;
+
+	return info->pasid_table->max_pasid;
+}
+
+struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
+{
+	struct pasid_table *pasid_table;
+	struct pasid_entry *entries;
+
+	pasid_table = intel_pasid_get_table(dev);
+	if (WARN_ON(!pasid_table || pasid < 0 ||
+		    pasid >= intel_pasid_get_dev_max_id(dev)))
+		return NULL;
+
+	entries = pasid_table->table;
+
+	return &entries[pasid];
+}
+
+/*
+ * Interfaces for PASID table entry manipulation:
+ */
+static inline void pasid_clear_entry(struct pasid_entry *pe)
+{
+	WRITE_ONCE(pe->val, 0);
+}
+
+void intel_pasid_clear_entry(struct device *dev, int pasid)
+{
+	struct pasid_entry *pe;
+
+	pe = intel_pasid_get_entry(dev, pasid);
+	if (WARN_ON(!pe))
+		return;
+
+	pasid_clear_entry(pe);
+}
diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
index b1c5296290e5..1c05ed6fc5a5 100644
--- a/drivers/iommu/intel-pasid.h
+++ b/drivers/iommu/intel-pasid.h
@@ -13,9 +13,27 @@
 #define PASID_MIN			0x1
 #define PASID_MAX			0x100000
 
+struct pasid_entry {
+	u64 val;
+};
+
+/* The representative of a PASID table */
+struct pasid_table {
+	void			*table;		/* pasid table pointer */
+	int			order;		/* page order of pasid table */
+	int			max_pasid;	/* max pasid */
+	struct list_head	dev;		/* device list */
+};
+
 extern u32 intel_pasid_max_id;
 int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp);
 void intel_pasid_free_id(int pasid);
 void *intel_pasid_lookup_id(int pasid);
+int intel_pasid_alloc_table(struct device *dev);
+void intel_pasid_free_table(struct device *dev);
+struct pasid_table *intel_pasid_get_table(struct device *dev);
+int intel_pasid_get_dev_max_id(struct device *dev);
+struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
+void intel_pasid_clear_entry(struct device *dev, int pasid);
 
 #endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 56e65d0b1871..9b5dc7262677 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -34,10 +34,6 @@
 
 static irqreturn_t prq_event_thread(int irq, void *d);
 
-struct pasid_entry {
-	u64 val;
-};
-
 struct pasid_state_entry {
 	u64 val;
 };
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 4fd4c6fee93e..e7901d402337 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -476,6 +476,7 @@ struct intel_iommu {
 struct device_domain_info {
 	struct list_head link;	/* link to domain siblings */
 	struct list_head global; /* link to global list */
+	struct list_head table;	/* link to pasid table */
 	u8 bus;			/* PCI bus number */
 	u8 devfn;		/* PCI devfn number */
 	u16 pfsid;		/* SRIOV physical function source ID */
@@ -489,6 +490,7 @@ struct device_domain_info {
 	struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
 	struct intel_iommu *iommu; /* IOMMU used by this device */
 	struct dmar_domain *domain; /* pointer to domain */
+	struct pasid_table *pasid_table; /* pasid table */
 };
 
 static inline void __iommu_flush_cache(
-- 
cgit v1.2.3


From d9737953d85131436b09668b5e8d3389c37c1f28 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 14 Jul 2018 15:47:02 +0800
Subject: iommu/vt-d: Remove the obsolete per iommu pasid tables

The obsolete per iommu pasid tables are no longer used. Hence,
clean up them.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c |  6 +++---
 drivers/iommu/intel-svm.c   | 17 ++---------------
 include/linux/intel-iommu.h |  5 ++---
 3 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f1a3c5e5aff0..7ed0221dccf8 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1756,7 +1756,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
 	if (pasid_enabled(iommu)) {
 		if (ecap_prs(iommu->ecap))
 			intel_svm_finish_prq(iommu);
-		intel_svm_free_pasid_tables(iommu);
+		intel_svm_exit(iommu);
 	}
 #endif
 }
@@ -3337,7 +3337,7 @@ static int __init init_dmars(void)
 			hw_pass_through = 0;
 #ifdef CONFIG_INTEL_IOMMU_SVM
 		if (pasid_enabled(iommu))
-			intel_svm_alloc_pasid_tables(iommu);
+			intel_svm_init(iommu);
 #endif
 	}
 
@@ -4300,7 +4300,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 	if (pasid_enabled(iommu))
-		intel_svm_alloc_pasid_tables(iommu);
+		intel_svm_init(iommu);
 #endif
 
 	if (dmaru->ignored) {
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index a253cdeabd61..eb308363e541 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -38,7 +38,7 @@ struct pasid_state_entry {
 	u64 val;
 };
 
-int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
+int intel_svm_init(struct intel_iommu *iommu)
 {
 	struct page *pages;
 	int order;
@@ -63,15 +63,6 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 		iommu->pasid_max = 0x20000;
 
 	order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
-	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
-	if (!pages) {
-		pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
-			iommu->name);
-		return -ENOMEM;
-	}
-	iommu->pasid_table = page_address(pages);
-	pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
-
 	if (ecap_dis(iommu->ecap)) {
 		/* Just making it explicit... */
 		BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
@@ -86,14 +77,10 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
 	return 0;
 }
 
-int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
+int intel_svm_exit(struct intel_iommu *iommu)
 {
 	int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
 
-	if (iommu->pasid_table) {
-		free_pages((unsigned long)iommu->pasid_table, order);
-		iommu->pasid_table = NULL;
-	}
 	if (iommu->pasid_state_table) {
 		free_pages((unsigned long)iommu->pasid_state_table, order);
 		iommu->pasid_state_table = NULL;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index e7901d402337..3c43882d3b77 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -453,7 +453,6 @@ struct intel_iommu {
 	 * devices away to userspace processes (e.g. for DPDK) and don't
 	 * want to trust that userspace will use *only* the PASID it was
 	 * told to. But while it's all driver-arbitrated, we're fine. */
-	struct pasid_entry *pasid_table;
 	struct pasid_state_entry *pasid_state_table;
 	struct page_req_dsc *prq;
 	unsigned char prq_name[16];    /* Name for PRQ interrupt */
@@ -526,8 +525,8 @@ int for_each_device_domain(int (*fn)(struct device_domain_info *info,
 				     void *data), void *data);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
-extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
-extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
+int intel_svm_init(struct intel_iommu *iommu);
+int intel_svm_exit(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 
-- 
cgit v1.2.3


From b8088dda98b9064a2b3007fe54b03ede70a15602 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 17 Jul 2018 07:17:53 +0200
Subject: netfilter: nf_tables: use dev->name directly

no need to store the name in separate area.

Furthermore, it uses kmalloc but not kfree and most accesses seem to treat
it as char[IFNAMSIZ] not char *.

Remove this and use dev->name instead.

In case event zeroed dev, just omit the name in the dump.

Fixes: d92191aa84e5f1 ("netfilter: nf_tables: cache device name in flowtable object")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  1 -
 net/netfilter/nf_tables_api.c     | 14 +++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4e82a4c49912..dc417ef0a0c5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1124,7 +1124,6 @@ struct nft_flowtable {
 	u32				genmask:2,
 					use:30;
 	u64				handle;
-	char				*dev_name[NFT_FLOWTABLE_DEVICE_MAX];
 	/* runtime data below here */
 	struct nf_hook_ops		*ops ____cacheline_aligned;
 	struct nf_flowtable		data;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d41fa2c82f14..54a4f75ff9da 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5324,8 +5324,6 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 		flowtable->ops[i].priv		= &flowtable->data;
 		flowtable->ops[i].hook		= flowtable->data.type->hook;
 		flowtable->ops[i].dev		= dev_array[i];
-		flowtable->dev_name[i]		= kstrdup(dev_array[i]->name,
-							  GFP_KERNEL);
 	}
 
 	return err;
@@ -5483,10 +5481,8 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 err6:
 	i = flowtable->ops_len;
 err5:
-	for (k = i - 1; k >= 0; k--) {
-		kfree(flowtable->dev_name[k]);
+	for (k = i - 1; k >= 0; k--)
 		nf_unregister_net_hook(net, &flowtable->ops[k]);
-	}
 
 	kfree(flowtable->ops);
 err4:
@@ -5585,9 +5581,10 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
 		goto nla_put_failure;
 
 	for (i = 0; i < flowtable->ops_len; i++) {
-		if (flowtable->dev_name[i][0] &&
-		    nla_put_string(skb, NFTA_DEVICE_NAME,
-				   flowtable->dev_name[i]))
+		const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
+
+		if (dev &&
+		    nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
 			goto nla_put_failure;
 	}
 	nla_nest_end(skb, nest_devs);
@@ -5829,7 +5826,6 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev,
 			continue;
 
 		nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
-		flowtable->dev_name[i][0] = '\0';
 		flowtable->ops[i].dev = NULL;
 		break;
 	}
-- 
cgit v1.2.3


From 5338073561bea23363e5552b647dad75bc7d0f11 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 20 Jul 2018 17:26:32 +0200
Subject: reset: meson: add dt-bindings for meson-axg audio arb

Add dt-bindings for the audio memory arbiter found on Amlogic's
A113 based SoCs

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 .../bindings/reset/amlogic,meson-axg-audio-arb.txt  | 21 +++++++++++++++++++++
 .../dt-bindings/reset/amlogic,meson-axg-audio-arb.h | 17 +++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt
 create mode 100644 include/dt-bindings/reset/amlogic,meson-axg-audio-arb.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt b/Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt
new file mode 100644
index 000000000000..26e542eb96df
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/amlogic,meson-axg-audio-arb.txt
@@ -0,0 +1,21 @@
+* Amlogic audio memory arbiter controller
+
+The Amlogic Audio ARB is a simple device which enables or
+disables the access of Audio FIFOs to DDR on AXG based SoC.
+
+Required properties:
+- compatible: 'amlogic,meson-axg-audio-arb'
+- reg: physical base address of the controller and length of memory
+       mapped region.
+- clocks: phandle to the fifo peripheral clock provided by the audio
+	  clock controller.
+- #reset-cells: must be 1.
+
+Example on the A113 SoC:
+
+arb: reset-controller@280 {
+	compatible = "amlogic,meson-axg-audio-arb";
+	reg = <0x0 0x280 0x0 0x4>;
+	#reset-cells = <1>;
+	clocks = <&clkc_audio AUD_CLKID_DDR_ARB>;
+};
diff --git a/include/dt-bindings/reset/amlogic,meson-axg-audio-arb.h b/include/dt-bindings/reset/amlogic,meson-axg-audio-arb.h
new file mode 100644
index 000000000000..05c36367875c
--- /dev/null
+++ b/include/dt-bindings/reset/amlogic,meson-axg-audio-arb.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ *
+ * Copyright (c) 2018 Baylibre SAS.
+ * Author: Jerome Brunet <jbrunet@baylibre.com>
+ */
+
+#ifndef _DT_BINDINGS_AMLOGIC_MESON_AXG_AUDIO_ARB_H
+#define _DT_BINDINGS_AMLOGIC_MESON_AXG_AUDIO_ARB_H
+
+#define AXG_ARB_TODDR_A	0
+#define AXG_ARB_TODDR_B	1
+#define AXG_ARB_TODDR_C	2
+#define AXG_ARB_FRDDR_A	3
+#define AXG_ARB_FRDDR_B	4
+#define AXG_ARB_FRDDR_C	5
+
+#endif /* _DT_BINDINGS_AMLOGIC_MESON_AXG_AUDIO_ARB_H */
-- 
cgit v1.2.3


From cbdfab3b675f4c34258b0ec9e4707de44e1f6989 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 17 Jul 2018 17:43:02 +0200
Subject: ASoC: export snd_soc_of_get_slot_mask

Amlogic's axg card driver can't use snd_soc_of_parse_tdm_slot()
directly because it needs to handle 4 mask for each direction.
Yet the parsing of each mask is the same, so export
snd_soc_of_get_slot_mask() to reuse the the existing code.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  | 3 +++
 sound/soc/soc-core.c | 7 ++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index a23ecdf3eff1..ace474e8649e 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1433,6 +1433,9 @@ int snd_soc_of_parse_card_name(struct snd_soc_card *card,
 			       const char *propname);
 int snd_soc_of_parse_audio_simple_widgets(struct snd_soc_card *card,
 					  const char *propname);
+int snd_soc_of_get_slot_mask(struct device_node *np,
+			     const char *prop_name,
+			     unsigned int *mask);
 int snd_soc_of_parse_tdm_slot(struct device_node *np,
 			      unsigned int *tx_mask,
 			      unsigned int *rx_mask,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 08e189485009..ad5b0ef16d82 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3428,9 +3428,9 @@ int snd_soc_of_parse_audio_simple_widgets(struct snd_soc_card *card,
 }
 EXPORT_SYMBOL_GPL(snd_soc_of_parse_audio_simple_widgets);
 
-static int snd_soc_of_get_slot_mask(struct device_node *np,
-				    const char *prop_name,
-				    unsigned int *mask)
+int snd_soc_of_get_slot_mask(struct device_node *np,
+			     const char *prop_name,
+			     unsigned int *mask)
 {
 	u32 val;
 	const __be32 *of_slot_mask = of_get_property(np, prop_name, &val);
@@ -3445,6 +3445,7 @@ static int snd_soc_of_get_slot_mask(struct device_node *np,
 
 	return val;
 }
+EXPORT_SYMBOL_GPL(snd_soc_of_get_slot_mask);
 
 int snd_soc_of_parse_tdm_slot(struct device_node *np,
 			      unsigned int *tx_mask,
-- 
cgit v1.2.3


From 226ab561075f6f8f3cd5f7b3b7544f3997aab51f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:49:34 -0700
Subject: device-dax: Convert to vmf_insert_mixed and vm_fault_t

Use new return type vm_fault_t for fault and huge_fault handler. For
now, this is just documenting that the function returns a VM_FAULT value
rather than an errno.  Once all instances are converted, vm_fault_t will
become a distinct type.

Commit 1c8f422059ae ("mm: change return type to vm_fault_t")

Previously vm_insert_mixed() returned an error code which driver mapped into
VM_FAULT_* type. The new function vmf_insert_mixed() will replace this
inefficiency by returning VM_FAULT_* type.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/dax/device.c    | 26 +++++++++++---------------
 include/linux/huge_mm.h |  5 +++--
 mm/huge_memory.c        |  4 ++--
 3 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index de2f8297a210..ad5e7b4a15dc 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -244,11 +244,11 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
 	return -1;
 }
 
-static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
+				struct vm_fault *vmf)
 {
 	struct device *dev = &dev_dax->dev;
 	struct dax_region *dax_region;
-	int rc = VM_FAULT_SIGBUS;
 	phys_addr_t phys;
 	pfn_t pfn;
 	unsigned int fault_size = PAGE_SIZE;
@@ -274,17 +274,11 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-	rc = vm_insert_mixed(vmf->vma, vmf->address, pfn);
-
-	if (rc == -ENOMEM)
-		return VM_FAULT_OOM;
-	if (rc < 0 && rc != -EBUSY)
-		return VM_FAULT_SIGBUS;
-
-	return VM_FAULT_NOPAGE;
+	return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
 }
 
-static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
+				struct vm_fault *vmf)
 {
 	unsigned long pmd_addr = vmf->address & PMD_MASK;
 	struct device *dev = &dev_dax->dev;
@@ -334,7 +328,8 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
+				struct vm_fault *vmf)
 {
 	unsigned long pud_addr = vmf->address & PUD_MASK;
 	struct device *dev = &dev_dax->dev;
@@ -384,13 +379,14 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 			vmf->flags & FAULT_FLAG_WRITE);
 }
 #else
-static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
+static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
+				struct vm_fault *vmf)
 {
 	return VM_FAULT_FALLBACK;
 }
 #endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 
-static int dev_dax_huge_fault(struct vm_fault *vmf,
+static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
 	int rc, id;
@@ -420,7 +416,7 @@ static int dev_dax_huge_fault(struct vm_fault *vmf,
 	return rc;
 }
 
-static int dev_dax_fault(struct vm_fault *vmf)
+static vm_fault_t dev_dax_fault(struct vm_fault *vmf)
 {
 	return dev_dax_huge_fault(vmf, PE_SIZE_PTE);
 }
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a8a126259bc4..d3bbf6bea9e9 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -3,6 +3,7 @@
 #define _LINUX_HUGE_MM_H
 
 #include <linux/sched/coredump.h>
+#include <linux/mm_types.h>
 
 #include <linux/fs.h> /* only for vma_is_dax() */
 
@@ -46,9 +47,9 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
-int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 			pmd_t *pmd, pfn_t pfn, bool write);
-int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 			pud_t *pud, pfn_t pfn, bool write);
 enum transparent_hugepage_flag {
 	TRANSPARENT_HUGEPAGE_FLAG,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1cd7c1a57a14..feba371169ca 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -752,7 +752,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 	spin_unlock(ptl);
 }
 
-int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
 			pmd_t *pmd, pfn_t pfn, bool write)
 {
 	pgprot_t pgprot = vma->vm_page_prot;
@@ -812,7 +812,7 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 	spin_unlock(ptl);
 }
 
-int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 			pud_t *pud, pfn_t pfn, bool write)
 {
 	pgprot_t pgprot = vma->vm_page_prot;
-- 
cgit v1.2.3


From 27cde44a259c380a3c09066fc4b42de7dde9b1ad Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:35 -0700
Subject: tcp: do not cancel delay-AcK on DCTCP special ACK

Currently when a DCTCP receiver delays an ACK and receive a
data packet with a different CE mark from the previous one's, it
sends two immediate ACKs acking previous and latest sequences
respectly (for ECN accounting).

Previously sending the first ACK may mark off the delayed ACK timer
(tcp_event_ack_sent). This may subsequently prevent sending the
second ACK to acknowledge the latest sequence (tcp_ack_snd_check).
The culprit is that tcp_send_ack() assumes it always acknowleges
the latest sequence, which is not true for the first special ACK.

The fix is to not make the assumption in tcp_send_ack and check the
actual ack sequence before cancelling the delayed ACK. Further it's
safer to pass the ack sequence number as a local variable into
tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid
future bugs like this.

Reported-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  1 +
 net/ipv4/tcp_dctcp.c  | 34 ++++------------------------------
 net/ipv4/tcp_output.c | 10 +++++++---
 3 files changed, 12 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3482d13d655b..a08de496d1b2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -539,6 +539,7 @@ void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
 void tcp_push_one(struct sock *, unsigned int mss_now);
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
 void tcp_send_ack(struct sock *sk);
 void tcp_send_delayed_ack(struct sock *sk);
 void tcp_send_loss_probe(struct sock *sk);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 5869f89ca656..078328afbfe3 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -133,21 +133,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 	 * ACK has not sent yet.
 	 */
 	if (!ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
-		u32 tmp_rcv_nxt;
-
-		/* Save current rcv_nxt. */
-		tmp_rcv_nxt = tp->rcv_nxt;
-
-		/* Generate previous ack with CE=0. */
-		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
-		tp->rcv_nxt = ca->prior_rcv_nxt;
-
-		tcp_send_ack(sk);
-
-		/* Recover current rcv_nxt. */
-		tp->rcv_nxt = tmp_rcv_nxt;
-	}
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+		__tcp_send_ack(sk, ca->prior_rcv_nxt);
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 1;
@@ -164,21 +151,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 	 * ACK has not sent yet.
 	 */
 	if (ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
-		u32 tmp_rcv_nxt;
-
-		/* Save current rcv_nxt. */
-		tmp_rcv_nxt = tp->rcv_nxt;
-
-		/* Generate previous ack with CE=1. */
-		tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
-		tp->rcv_nxt = ca->prior_rcv_nxt;
-
-		tcp_send_ack(sk);
-
-		/* Recover current rcv_nxt. */
-		tp->rcv_nxt = tmp_rcv_nxt;
-	}
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+		__tcp_send_ack(sk, ca->prior_rcv_nxt);
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ee1b0705321d..c4172c1fb198 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 }
 
 /* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
+				      u32 rcv_nxt)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
 		if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
 			__sock_put(sk);
 	}
+
+	if (unlikely(rcv_nxt != tp->rcv_nxt))
+		return;  /* Special ACK sent by DCTCP to reflect ECN */
 	tcp_dec_quickack_mode(sk, pkts);
 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 }
@@ -1141,7 +1145,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	icsk->icsk_af_ops->send_check(sk, skb);
 
 	if (likely(tcb->tcp_flags & TCPHDR_ACK))
-		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+		tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
 
 	if (skb->len != tcp_header_size) {
 		tcp_event_data_sent(tp, sk);
@@ -3613,12 +3617,12 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
 	/* Send it off, this clears delayed acks for us. */
 	__tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
 }
+EXPORT_SYMBOL_GPL(__tcp_send_ack);
 
 void tcp_send_ack(struct sock *sk)
 {
 	__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
 }
-EXPORT_SYMBOL_GPL(tcp_send_ack);
 
 /* This routine sends a packet with an out of date sequence
  * number. It assumes the other end will try to ack it.
-- 
cgit v1.2.3


From a0496ef2c23b3b180902dd185d0d63ccbc624cf8 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Jul 2018 13:56:36 -0700
Subject: tcp: do not delay ACK in DCTCP upon CE status change

Per DCTCP RFC8257 (Section 3.2) the ACK reflecting the CE status change
has to be sent immediately so the sender can respond quickly:

""" When receiving packets, the CE codepoint MUST be processed as follows:

   1.  If the CE codepoint is set and DCTCP.CE is false, set DCTCP.CE to
       true and send an immediate ACK.

   2.  If the CE codepoint is not set and DCTCP.CE is true, set DCTCP.CE
       to false and send an immediate ACK.
"""

Previously DCTCP implementation may continue to delay the ACK. This
patch fixes that to implement the RFC by forcing an immediate ACK.

Tested with this packetdrill script provided by Larry Brakmo

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 setsockopt(3, SOL_TCP, TCP_CONGESTION, "dctcp", 5) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < [ect0] SEW 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
0.100 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
0.110 < [ect0] . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4
   +0 setsockopt(4, SOL_SOCKET, SO_DEBUG, [1], 4) = 0

0.200 < [ect0] . 1:1001(1000) ack 1 win 257
0.200 > [ect01] . 1:1(0) ack 1001

0.200 write(4, ..., 1) = 1
0.200 > [ect01] P. 1:2(1) ack 1001

0.200 < [ect0] . 1001:2001(1000) ack 2 win 257
+0.005 < [ce] . 2001:3001(1000) ack 2 win 257

+0.000 > [ect01] . 2:2(0) ack 2001
// Previously the ACK below would be delayed by 40ms
+0.000 > [ect01] E. 2:2(0) ack 3001

+0.500 < F. 9501:9501(0) ack 4 win 257

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    |  1 +
 net/ipv4/tcp_dctcp.c | 30 ++++++++++++++++++------------
 net/ipv4/tcp_input.c |  3 ++-
 3 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a08de496d1b2..25116ec02087 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -342,6 +342,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
 			struct pipe_inode_info *pipe, size_t len,
 			unsigned int flags);
 
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
 {
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 078328afbfe3..8b637f9f23a2 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -129,12 +129,15 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
 	struct dctcp *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	/* State has changed from CE=0 to CE=1 and delayed
-	 * ACK has not sent yet.
-	 */
-	if (!ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-		__tcp_send_ack(sk, ca->prior_rcv_nxt);
+	if (!ca->ce_state) {
+		/* State has changed from CE=0 to CE=1, force an immediate
+		 * ACK to reflect the new CE state. If an ACK was delayed,
+		 * send that first to reflect the prior CE state.
+		 */
+		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+			__tcp_send_ack(sk, ca->prior_rcv_nxt);
+		tcp_enter_quickack_mode(sk, 1);
+	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 1;
@@ -147,12 +150,15 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 	struct dctcp *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	/* State has changed from CE=1 to CE=0 and delayed
-	 * ACK has not sent yet.
-	 */
-	if (ca->ce_state &&
-	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
-		__tcp_send_ack(sk, ca->prior_rcv_nxt);
+	if (ca->ce_state) {
+		/* State has changed from CE=1 to CE=0, force an immediate
+		 * ACK to reflect the new CE state. If an ACK was delayed,
+		 * send that first to reflect the prior CE state.
+		 */
+		if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+			__tcp_send_ack(sk, ca->prior_rcv_nxt);
+		tcp_enter_quickack_mode(sk, 1);
+	}
 
 	ca->prior_rcv_nxt = tp->rcv_nxt;
 	ca->ce_state = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8e5522c6833a..6bade06aaf72 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
 		icsk->icsk_ack.quick = quickacks;
 }
 
-static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
 	icsk->icsk_ack.pingpong = 0;
 	icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
+EXPORT_SYMBOL(tcp_enter_quickack_mode);
 
 /* Send ACKs quickly, if "quick" count is not exhausted
  * and the session is not interactive.
-- 
cgit v1.2.3


From eef5ba1aa148ca5e6deb1e0aa1de797fa4e12cb7 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Wed, 20 Jun 2018 10:51:53 +0200
Subject: i2c: smbus: add unlocked __i2c_smbus_xfer variant

Removes all locking from i2c_smbus_xfer and renames it to __i2c_smbus_xfer,
then adds a new i2c_smbus_xfer function that simply grabs the lock while
calling the unlocked variant.

This is not perfectly equivalent, since i2c_smbus_xfer was callable from
atomic/irq context if you happened to end up emulating SMBus with an I2C
transfer, and that is no longer the case with this patch. It is unknown
(to me) if anything depends on that quirk, but it seems fragile enough to
simply break those cases and require them to call i2c_transfer directly
instead.

While at it, for consistency rename the 2nd to last argument (size) of
the i2c_smbus_xfer declaration to protocol and remove the surplus extern
marker.

Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-smbus.c | 28 ++++++++++++++++++++--------
 include/linux/i2c.h          | 11 ++++++++---
 2 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
index 51970bae3c4a..9cd66cabb84f 100644
--- a/drivers/i2c/i2c-core-smbus.c
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -463,7 +463,7 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
 			msg[num-1].len++;
 	}
 
-	status = i2c_transfer(adapter, msg, num);
+	status = __i2c_transfer(adapter, msg, num);
 	if (status < 0)
 		goto cleanup;
 	if (status != num) {
@@ -524,9 +524,24 @@ cleanup:
  * This executes an SMBus protocol operation, and returns a negative
  * errno code else zero on success.
  */
-s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
-		   char read_write, u8 command, int protocol,
-		   union i2c_smbus_data *data)
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write,
+		   u8 command, int protocol, union i2c_smbus_data *data)
+{
+	s32 res;
+
+	i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
+	res = __i2c_smbus_xfer(adapter, addr, flags, read_write,
+			       command, protocol, data);
+	i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
+
+	return res;
+}
+EXPORT_SYMBOL(i2c_smbus_xfer);
+
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write,
+		     u8 command, int protocol, union i2c_smbus_data *data)
 {
 	unsigned long orig_jiffies;
 	int try;
@@ -543,8 +558,6 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 	flags &= I2C_M_TEN | I2C_CLIENT_PEC | I2C_CLIENT_SCCB;
 
 	if (adapter->algo->smbus_xfer) {
-		i2c_lock_bus(adapter, I2C_LOCK_SEGMENT);
-
 		/* Retry automatically on arbitration loss */
 		orig_jiffies = jiffies;
 		for (res = 0, try = 0; try <= adapter->retries; try++) {
@@ -557,7 +570,6 @@ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, unsigned short flags,
 				       orig_jiffies + adapter->timeout))
 				break;
 		}
-		i2c_unlock_bus(adapter, I2C_LOCK_SEGMENT);
 
 		if (res != -EOPNOTSUPP || !adapter->algo->master_xfer)
 			goto trace;
@@ -579,7 +591,7 @@ trace:
 
 	return res;
 }
-EXPORT_SYMBOL(i2c_smbus_xfer);
+EXPORT_SYMBOL(__i2c_smbus_xfer);
 
 /**
  * i2c_smbus_read_i2c_block_data_or_emulated - read block or emulate
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 254cd34eeae2..465afb092fa7 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -140,9 +140,14 @@ extern int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
    and probably just as fast.
    Note that we use i2c_adapter here, because you do not need a specific
    smbus adapter to call this function. */
-extern s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
-			  unsigned short flags, char read_write, u8 command,
-			  int size, union i2c_smbus_data *data);
+s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		   unsigned short flags, char read_write, u8 command,
+		   int protocol, union i2c_smbus_data *data);
+
+/* Unlocked flavor */
+s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+		     unsigned short flags, char read_write, u8 command,
+		     int protocol, union i2c_smbus_data *data);
 
 /* Now follow the 'nice' access routines. These also document the calling
    conventions of i2c_smbus_xfer. */
-- 
cgit v1.2.3


From 048c6d88a0214757926f264823829e79154fcd4f Mon Sep 17 00:00:00 2001
From: Guido Kiener <guido@kiener-muenchen.de>
Date: Wed, 18 Jul 2018 10:45:36 +0200
Subject: usb: usbtmc: Add ioctls to set/get usb timeout

Add ioctls USBTMC_IOCTL_GET_TIMEOUT / USBTMC_IOCTL_SET_TIMEOUT to
get/set I/O timeout for specific file handle.

Different operations on an instrument can take different lengths of
time thus it is important to be able to set the timeout slightly
longer than the expected duration of each operation to optimise the
responsiveness of the application. As the instrument may be shared by
multiple applications the timeout should be settable on a per file
descriptor basis.

Tested-by: Dave Penkler <dpenkler@gmail.com>
Reviewed-by: Steve Bayless <steve_bayless@keysight.com>
Signed-off-by: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Guido Kiener <guido.kiener@rohde-schwarz.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c   | 65 ++++++++++++++++++++++++++++++++++++++++----
 include/uapi/linux/usb/tmc.h |  4 +++
 2 files changed, 63 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 243e8446b8dd..36d740c4c6fb 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -30,6 +30,8 @@
  */
 #define USBTMC_SIZE_IOBUFFER	2048
 
+/* Minimum USB timeout (in milliseconds) */
+#define USBTMC_MIN_TIMEOUT	100
 /* Default USB timeout (in milliseconds) */
 #define USBTMC_TIMEOUT		5000
 
@@ -115,6 +117,7 @@ struct usbtmc_file_data {
 	struct usbtmc_device_data *data;
 	struct list_head file_elem;
 
+	u32            timeout;
 	u8             srq_byte;
 	atomic_t       srq_asserted;
 };
@@ -153,6 +156,8 @@ static int usbtmc_open(struct inode *inode, struct file *filp)
 	mutex_lock(&data->io_mutex);
 	file_data->data = data;
 
+	file_data->timeout = USBTMC_TIMEOUT;
+
 	INIT_LIST_HEAD(&file_data->file_elem);
 	spin_lock_irq(&data->dev_lock);
 	list_add_tail(&file_data->file_elem, &data->file_list);
@@ -460,7 +465,7 @@ static int usbtmc488_ioctl_read_stb(struct usbtmc_file_data *file_data,
 		rv = wait_event_interruptible_timeout(
 			data->waitq,
 			atomic_read(&data->iin_data_valid) != 0,
-			USBTMC_TIMEOUT);
+			file_data->timeout);
 		if (rv < 0) {
 			dev_dbg(dev, "wait interrupted %d\n", rv);
 			goto exit;
@@ -560,8 +565,10 @@ static int usbtmc488_ioctl_simple(struct usbtmc_device_data *data,
  *
  * Also updates bTag_last_write.
  */
-static int send_request_dev_dep_msg_in(struct usbtmc_device_data *data, size_t transfer_size)
+static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data,
+				       size_t transfer_size)
 {
+	struct usbtmc_device_data *data = file_data->data;
 	int retval;
 	u8 *buffer;
 	int actual;
@@ -590,7 +597,8 @@ static int send_request_dev_dep_msg_in(struct usbtmc_device_data *data, size_t t
 	retval = usb_bulk_msg(data->usb_dev,
 			      usb_sndbulkpipe(data->usb_dev,
 					      data->bulk_out),
-			      buffer, USBTMC_HEADER_SIZE, &actual, USBTMC_TIMEOUT);
+			      buffer, USBTMC_HEADER_SIZE,
+			      &actual, file_data->timeout);
 
 	/* Store bTag (in case we need to abort) */
 	data->bTag_last_write = data->bTag;
@@ -640,7 +648,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 
 	dev_dbg(dev, "usb_bulk_msg_in: count(%zu)\n", count);
 
-	retval = send_request_dev_dep_msg_in(data, count);
+	retval = send_request_dev_dep_msg_in(file_data, count);
 
 	if (retval < 0) {
 		if (data->auto_abort)
@@ -659,7 +667,7 @@ static ssize_t usbtmc_read(struct file *filp, char __user *buf,
 				      usb_rcvbulkpipe(data->usb_dev,
 						      data->bulk_in),
 				      buffer, USBTMC_SIZE_IOBUFFER, &actual,
-				      USBTMC_TIMEOUT);
+				      file_data->timeout);
 
 		dev_dbg(dev, "usb_bulk_msg: retval(%u), done(%zu), remaining(%zu), actual(%d)\n", retval, done, remaining, actual);
 
@@ -832,7 +840,7 @@ static ssize_t usbtmc_write(struct file *filp, const char __user *buf,
 					      usb_sndbulkpipe(data->usb_dev,
 							      data->bulk_out),
 					      buffer, n_bytes,
-					      &actual, USBTMC_TIMEOUT);
+					      &actual, file_data->timeout);
 			if (retval != 0)
 				break;
 			n_bytes -= actual;
@@ -1189,6 +1197,41 @@ exit:
 	return rv;
 }
 
+/*
+ * Get the usb timeout value
+ */
+static int usbtmc_ioctl_get_timeout(struct usbtmc_file_data *file_data,
+				void __user *arg)
+{
+	u32 timeout;
+
+	timeout = file_data->timeout;
+
+	return put_user(timeout, (__u32 __user *)arg);
+}
+
+/*
+ * Set the usb timeout value
+ */
+static int usbtmc_ioctl_set_timeout(struct usbtmc_file_data *file_data,
+				void __user *arg)
+{
+	u32 timeout;
+
+	if (get_user(timeout, (__u32 __user *)arg))
+		return -EFAULT;
+
+	/* Note that timeout = 0 means
+	 * MAX_SCHEDULE_TIMEOUT in usb_control_msg
+	 */
+	if (timeout < USBTMC_MIN_TIMEOUT)
+		return -EINVAL;
+
+	file_data->timeout = timeout;
+
+	return 0;
+}
+
 static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct usbtmc_file_data *file_data;
@@ -1229,6 +1272,16 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		retval = usbtmc_ioctl_abort_bulk_in(data);
 		break;
 
+	case USBTMC_IOCTL_GET_TIMEOUT:
+		retval = usbtmc_ioctl_get_timeout(file_data,
+						  (void __user *)arg);
+		break;
+
+	case USBTMC_IOCTL_SET_TIMEOUT:
+		retval = usbtmc_ioctl_set_timeout(file_data,
+						  (void __user *)arg);
+		break;
+
 	case USBTMC488_IOCTL_GET_CAPS:
 		retval = copy_to_user((void __user *)arg,
 				&data->usb488_caps,
diff --git a/include/uapi/linux/usb/tmc.h b/include/uapi/linux/usb/tmc.h
index 03f6adc8f35b..a89ffc33532e 100644
--- a/include/uapi/linux/usb/tmc.h
+++ b/include/uapi/linux/usb/tmc.h
@@ -16,6 +16,8 @@
 #ifndef __LINUX_USB_TMC_H
 #define __LINUX_USB_TMC_H
 
+#include <linux/types.h>   /* __u8 etc */
+
 /* USB TMC status values */
 #define USBTMC_STATUS_SUCCESS				0x01
 #define USBTMC_STATUS_PENDING				0x02
@@ -46,6 +48,8 @@
 #define USBTMC_IOCTL_ABORT_BULK_IN	_IO(USBTMC_IOC_NR, 4)
 #define USBTMC_IOCTL_CLEAR_OUT_HALT	_IO(USBTMC_IOC_NR, 6)
 #define USBTMC_IOCTL_CLEAR_IN_HALT	_IO(USBTMC_IOC_NR, 7)
+#define USBTMC_IOCTL_GET_TIMEOUT	_IOR(USBTMC_IOC_NR, 9, __u32)
+#define USBTMC_IOCTL_SET_TIMEOUT	_IOW(USBTMC_IOC_NR, 10, __u32)
 #define USBTMC488_IOCTL_GET_CAPS	_IOR(USBTMC_IOC_NR, 17, unsigned char)
 #define USBTMC488_IOCTL_READ_STB	_IOR(USBTMC_IOC_NR, 18, unsigned char)
 #define USBTMC488_IOCTL_REN_CONTROL	_IOW(USBTMC_IOC_NR, 19, unsigned char)
-- 
cgit v1.2.3


From fe78a7c637057070f20ac9460608a18d775e6349 Mon Sep 17 00:00:00 2001
From: Guido Kiener <guido@kiener-muenchen.de>
Date: Wed, 18 Jul 2018 10:45:37 +0200
Subject: usb: usbtmc: Add ioctl for trigger

add USBTMC488_IOCTL_TRIGGER to send TRIGGER Bulk-OUT header
according to Subclass USB488 Specification

The usbtmc trigger command is equivalent to the IEEE 488 GET (Group
Execute Trigger) action. While the "*TRG" command can be sent as
data to perform the same operation, in some situations an instrument
will be busy and unable to process the data immediately in which
case the USBTMC488_IOCTL_TRIGGER can be used to trigger the
instrument with lower latency.

Reviewed-by: Steve Bayless <steve_bayless@keysight.com>
Tested-by: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Guido Kiener <guido.kiener@rohde-schwarz.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c   | 49 ++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/usb/tmc.h |  1 +
 2 files changed, 50 insertions(+)

(limited to 'include')

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 36d740c4c6fb..38fc7abdc00c 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -557,6 +557,51 @@ static int usbtmc488_ioctl_simple(struct usbtmc_device_data *data,
 	return rv;
 }
 
+/*
+ * Sends a TRIGGER Bulk-OUT command message
+ * See the USBTMC-USB488 specification, Table 2.
+ *
+ * Also updates bTag_last_write.
+ */
+static int usbtmc488_ioctl_trigger(struct usbtmc_file_data *file_data)
+{
+	struct usbtmc_device_data *data = file_data->data;
+	int retval;
+	u8 *buffer;
+	int actual;
+
+	buffer = kzalloc(USBTMC_HEADER_SIZE, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	buffer[0] = 128;
+	buffer[1] = data->bTag;
+	buffer[2] = ~data->bTag;
+
+	retval = usb_bulk_msg(data->usb_dev,
+			      usb_sndbulkpipe(data->usb_dev,
+					      data->bulk_out),
+			      buffer, USBTMC_HEADER_SIZE,
+			      &actual, file_data->timeout);
+
+	/* Store bTag (in case we need to abort) */
+	data->bTag_last_write = data->bTag;
+
+	/* Increment bTag -- and increment again if zero */
+	data->bTag++;
+	if (!data->bTag)
+		data->bTag++;
+
+	kfree(buffer);
+	if (retval < 0) {
+		dev_err(&data->intf->dev, "%s returned %d\n",
+			__func__, retval);
+		return retval;
+	}
+
+	return 0;
+}
+
 /*
  * Sends a REQUEST_DEV_DEP_MSG_IN message on the Bulk-OUT endpoint.
  * @transfer_size: number of bytes to request from the device.
@@ -1309,6 +1354,10 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		retval = usbtmc488_ioctl_simple(data, (void __user *)arg,
 						USBTMC488_REQUEST_LOCAL_LOCKOUT);
 		break;
+
+	case USBTMC488_IOCTL_TRIGGER:
+		retval = usbtmc488_ioctl_trigger(file_data);
+		break;
 	}
 
 skip_io_on_zombie:
diff --git a/include/uapi/linux/usb/tmc.h b/include/uapi/linux/usb/tmc.h
index a89ffc33532e..c61bad7150dd 100644
--- a/include/uapi/linux/usb/tmc.h
+++ b/include/uapi/linux/usb/tmc.h
@@ -55,6 +55,7 @@
 #define USBTMC488_IOCTL_REN_CONTROL	_IOW(USBTMC_IOC_NR, 19, unsigned char)
 #define USBTMC488_IOCTL_GOTO_LOCAL	_IO(USBTMC_IOC_NR, 20)
 #define USBTMC488_IOCTL_LOCAL_LOCKOUT	_IO(USBTMC_IOC_NR, 21)
+#define USBTMC488_IOCTL_TRIGGER		_IO(USBTMC_IOC_NR, 22)
 
 /* Driver encoded usb488 capabilities */
 #define USBTMC488_CAPABILITY_TRIGGER         1
-- 
cgit v1.2.3


From fbd83971f9429849dd3a105b663822d15b7b992b Mon Sep 17 00:00:00 2001
From: Guido Kiener <guido@kiener-muenchen.de>
Date: Wed, 18 Jul 2018 10:45:38 +0200
Subject: usb: usbtmc: Add ioctl for EOM bit

add USBTMC_IOCTL_EOM_ENABLE to specify EOM bit for next write()
call. Sets Bit 0 of field 'bmTransferAttributes' of DEV_DEP_MSG_OUT
Bulk-OUT Header.
Allows fine grained control over end of message handling on a
per file descriptor basis.

Reviewed-by: Steve Bayless <steve_bayless@keysight.com>
Tested-by: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Guido Kiener <guido.kiener@rohde-schwarz.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c   | 28 +++++++++++++++++++++++++++-
 include/uapi/linux/usb/tmc.h |  2 ++
 2 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 38fc7abdc00c..c77e0ac6260b 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -120,6 +120,7 @@ struct usbtmc_file_data {
 	u32            timeout;
 	u8             srq_byte;
 	atomic_t       srq_asserted;
+	u8             eom_val;
 };
 
 /* Forward declarations */
@@ -157,6 +158,7 @@ static int usbtmc_open(struct inode *inode, struct file *filp)
 	file_data->data = data;
 
 	file_data->timeout = USBTMC_TIMEOUT;
+	file_data->eom_val = 1;
 
 	INIT_LIST_HEAD(&file_data->file_elem);
 	spin_lock_irq(&data->dev_lock);
@@ -855,7 +857,7 @@ static ssize_t usbtmc_write(struct file *filp, const char __user *buf,
 			buffer[8] = 0;
 		} else {
 			this_part = remaining;
-			buffer[8] = 1;
+			buffer[8] = file_data->eom_val;
 		}
 
 		/* Setup IO buffer for DEV_DEP_MSG_OUT message */
@@ -1277,6 +1279,25 @@ static int usbtmc_ioctl_set_timeout(struct usbtmc_file_data *file_data,
 	return 0;
 }
 
+/*
+ * enables/disables sending EOM on write
+ */
+static int usbtmc_ioctl_eom_enable(struct usbtmc_file_data *file_data,
+				void __user *arg)
+{
+	u8 eom_enable;
+
+	if (copy_from_user(&eom_enable, arg, sizeof(eom_enable)))
+		return -EFAULT;
+
+	if (eom_enable > 1)
+		return -EINVAL;
+
+	file_data->eom_val = eom_enable;
+
+	return 0;
+}
+
 static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct usbtmc_file_data *file_data;
@@ -1327,6 +1348,11 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 						  (void __user *)arg);
 		break;
 
+	case USBTMC_IOCTL_EOM_ENABLE:
+		retval = usbtmc_ioctl_eom_enable(file_data,
+						 (void __user *)arg);
+		break;
+
 	case USBTMC488_IOCTL_GET_CAPS:
 		retval = copy_to_user((void __user *)arg,
 				&data->usb488_caps,
diff --git a/include/uapi/linux/usb/tmc.h b/include/uapi/linux/usb/tmc.h
index c61bad7150dd..e7317dfdd2ae 100644
--- a/include/uapi/linux/usb/tmc.h
+++ b/include/uapi/linux/usb/tmc.h
@@ -50,6 +50,8 @@
 #define USBTMC_IOCTL_CLEAR_IN_HALT	_IO(USBTMC_IOC_NR, 7)
 #define USBTMC_IOCTL_GET_TIMEOUT	_IOR(USBTMC_IOC_NR, 9, __u32)
 #define USBTMC_IOCTL_SET_TIMEOUT	_IOW(USBTMC_IOC_NR, 10, __u32)
+#define USBTMC_IOCTL_EOM_ENABLE	        _IOW(USBTMC_IOC_NR, 11, __u8)
+
 #define USBTMC488_IOCTL_GET_CAPS	_IOR(USBTMC_IOC_NR, 17, unsigned char)
 #define USBTMC488_IOCTL_READ_STB	_IOR(USBTMC_IOC_NR, 18, unsigned char)
 #define USBTMC488_IOCTL_REN_CONTROL	_IOW(USBTMC_IOC_NR, 19, unsigned char)
-- 
cgit v1.2.3


From 12dcaeb77e67c1162a2604f6b589266baec2d1ef Mon Sep 17 00:00:00 2001
From: Guido Kiener <guido@kiener-muenchen.de>
Date: Wed, 18 Jul 2018 10:45:39 +0200
Subject: usb: usbtmc: Add ioctl for termination character

add USBTMC_IOCTL_CONFIG_TERMCHAR to control TermChar handling
for next read(). Controls field 'TermChar' and Bit 1 of field
'bmTransferAttributes' of REQUEST_DEV_DEP_MSG_IN BULK-OUT header.

Allows enabling/disabling of terminating a read on reception of
term_char individually for each read request.

Reviewed-by: Steve Bayless <steve_bayless@keysight.com>
Tested-by: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Dave Penkler <dpenkler@gmail.com>
Signed-off-by: Guido Kiener <guido.kiener@rohde-schwarz.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usbtmc.c   | 36 ++++++++++++++++++++++++++++++++++--
 include/uapi/linux/usb/tmc.h |  6 ++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index c77e0ac6260b..1b7b2e402adb 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -121,6 +121,8 @@ struct usbtmc_file_data {
 	u8             srq_byte;
 	atomic_t       srq_asserted;
 	u8             eom_val;
+	u8             term_char;
+	bool           term_char_enabled;
 };
 
 /* Forward declarations */
@@ -157,7 +159,10 @@ static int usbtmc_open(struct inode *inode, struct file *filp)
 	mutex_lock(&data->io_mutex);
 	file_data->data = data;
 
+	/* copy default values from device settings */
 	file_data->timeout = USBTMC_TIMEOUT;
+	file_data->term_char = data->TermChar;
+	file_data->term_char_enabled = data->TermCharEnabled;
 	file_data->eom_val = 1;
 
 	INIT_LIST_HEAD(&file_data->file_elem);
@@ -634,9 +639,9 @@ static int send_request_dev_dep_msg_in(struct usbtmc_file_data *file_data,
 	buffer[5] = transfer_size >> 8;
 	buffer[6] = transfer_size >> 16;
 	buffer[7] = transfer_size >> 24;
-	buffer[8] = data->TermCharEnabled * 2;
+	buffer[8] = file_data->term_char_enabled * 2;
 	/* Use term character? */
-	buffer[9] = data->TermChar;
+	buffer[9] = file_data->term_char;
 	buffer[10] = 0; /* Reserved */
 	buffer[11] = 0; /* Reserved */
 
@@ -1298,6 +1303,28 @@ static int usbtmc_ioctl_eom_enable(struct usbtmc_file_data *file_data,
 	return 0;
 }
 
+/*
+ * Configure termination character for read()
+ */
+static int usbtmc_ioctl_config_termc(struct usbtmc_file_data *file_data,
+				void __user *arg)
+{
+	struct usbtmc_termchar termc;
+
+	if (copy_from_user(&termc, arg, sizeof(termc)))
+		return -EFAULT;
+
+	if ((termc.term_char_enabled > 1) ||
+		(termc.term_char_enabled &&
+		!(file_data->data->capabilities.device_capabilities & 1)))
+		return -EINVAL;
+
+	file_data->term_char = termc.term_char;
+	file_data->term_char_enabled = termc.term_char_enabled;
+
+	return 0;
+}
+
 static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct usbtmc_file_data *file_data;
@@ -1353,6 +1380,11 @@ static long usbtmc_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 						 (void __user *)arg);
 		break;
 
+	case USBTMC_IOCTL_CONFIG_TERMCHAR:
+		retval = usbtmc_ioctl_config_termc(file_data,
+						   (void __user *)arg);
+		break;
+
 	case USBTMC488_IOCTL_GET_CAPS:
 		retval = copy_to_user((void __user *)arg,
 				&data->usb488_caps,
diff --git a/include/uapi/linux/usb/tmc.h b/include/uapi/linux/usb/tmc.h
index e7317dfdd2ae..729af2f861a4 100644
--- a/include/uapi/linux/usb/tmc.h
+++ b/include/uapi/linux/usb/tmc.h
@@ -40,6 +40,11 @@
 #define USBTMC488_REQUEST_GOTO_LOCAL			161
 #define USBTMC488_REQUEST_LOCAL_LOCKOUT			162
 
+struct usbtmc_termchar {
+	__u8 term_char;
+	__u8 term_char_enabled;
+} __attribute__ ((packed));
+
 /* Request values for USBTMC driver's ioctl entry point */
 #define USBTMC_IOC_NR			91
 #define USBTMC_IOCTL_INDICATOR_PULSE	_IO(USBTMC_IOC_NR, 1)
@@ -51,6 +56,7 @@
 #define USBTMC_IOCTL_GET_TIMEOUT	_IOR(USBTMC_IOC_NR, 9, __u32)
 #define USBTMC_IOCTL_SET_TIMEOUT	_IOW(USBTMC_IOC_NR, 10, __u32)
 #define USBTMC_IOCTL_EOM_ENABLE	        _IOW(USBTMC_IOC_NR, 11, __u8)
+#define USBTMC_IOCTL_CONFIG_TERMCHAR	_IOW(USBTMC_IOC_NR, 12, struct usbtmc_termchar)
 
 #define USBTMC488_IOCTL_GET_CAPS	_IOR(USBTMC_IOC_NR, 17, unsigned char)
 #define USBTMC488_IOCTL_READ_STB	_IOR(USBTMC_IOC_NR, 18, unsigned char)
-- 
cgit v1.2.3


From 488dee96bb62f0b3d9e678cf42574034d5b033a5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 20 Jul 2018 21:56:47 +0000
Subject: kernfs: allow creating kernfs objects with arbitrary uid/gid

This change allows creating kernfs files and directories with arbitrary
uid/gid instead of always using GLOBAL_ROOT_UID/GID by extending
kernfs_create_dir_ns() and kernfs_create_file_ns() with uid/gid arguments.
The "simple" kernfs_create_file() and kernfs_create_dir() are left alone
and always create objects belonging to the global root.

When creating symlinks ownership (uid/gid) is taken from the target kernfs
object.

Co-Developed-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c |  4 +++-
 fs/kernfs/dir.c                          | 29 ++++++++++++++++++++++++++---
 fs/kernfs/file.c                         |  8 ++++++--
 fs/kernfs/inode.c                        |  2 +-
 fs/kernfs/kernfs-internal.h              |  2 ++
 fs/kernfs/symlink.c                      | 11 ++++++++++-
 fs/sysfs/dir.c                           |  4 +++-
 fs/sysfs/file.c                          |  5 +++--
 include/linux/kernfs.h                   | 28 +++++++++++++++++++---------
 kernel/cgroup/cgroup.c                   |  4 +++-
 10 files changed, 76 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 749856a2e736..9af1a21265d3 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -146,6 +146,7 @@ static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
 	int ret;
 
 	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 				  0, rft->kf_ops, rft, NULL, NULL);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
@@ -1503,7 +1504,8 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
 	struct kernfs_node *kn;
 	int ret = 0;
 
-	kn = __kernfs_create_file(parent_kn, name, 0444, 0,
+	kn = __kernfs_create_file(parent_kn, name, 0444,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
 				  &kf_mondata_ops, priv, NULL, NULL);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index d66cc0777303..4ca0b5c18192 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -619,6 +619,7 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
 
 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 					     const char *name, umode_t mode,
+					     kuid_t uid, kgid_t gid,
 					     unsigned flags)
 {
 	struct kernfs_node *kn;
@@ -661,8 +662,22 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 	kn->mode = mode;
 	kn->flags = flags;
 
+	if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) {
+		struct iattr iattr = {
+			.ia_valid = ATTR_UID | ATTR_GID,
+			.ia_uid = uid,
+			.ia_gid = gid,
+		};
+
+		ret = __kernfs_setattr(kn, &iattr);
+		if (ret < 0)
+			goto err_out3;
+	}
+
 	return kn;
 
+ err_out3:
+	idr_remove(&root->ino_idr, kn->id.ino);
  err_out2:
 	kmem_cache_free(kernfs_node_cache, kn);
  err_out1:
@@ -672,11 +687,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
 
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 				    const char *name, umode_t mode,
+				    kuid_t uid, kgid_t gid,
 				    unsigned flags)
 {
 	struct kernfs_node *kn;
 
-	kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
+	kn = __kernfs_new_node(kernfs_root(parent),
+			       name, mode, uid, gid, flags);
 	if (kn) {
 		kernfs_get(parent);
 		kn->parent = parent;
@@ -946,6 +963,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 	root->next_generation = 1;
 
 	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
+			       GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
 			       KERNFS_DIR);
 	if (!kn) {
 		idr_destroy(&root->ino_idr);
@@ -984,6 +1002,8 @@ void kernfs_destroy_root(struct kernfs_root *root)
  * @parent: parent in which to create a new directory
  * @name: name of the new directory
  * @mode: mode of the new directory
+ * @uid: uid of the new directory
+ * @gid: gid of the new directory
  * @priv: opaque data associated with the new directory
  * @ns: optional namespace tag of the directory
  *
@@ -991,13 +1011,15 @@ void kernfs_destroy_root(struct kernfs_root *root)
  */
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
 					 void *priv, const void *ns)
 {
 	struct kernfs_node *kn;
 	int rc;
 
 	/* allocate */
-	kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
+	kn = kernfs_new_node(parent, name, mode | S_IFDIR,
+			     uid, gid, KERNFS_DIR);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
@@ -1028,7 +1050,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 	int rc;
 
 	/* allocate */
-	kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
+	kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR,
+			     GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 2015d8c45e4a..dbf5bc250bfd 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -965,6 +965,8 @@ const struct file_operations kernfs_file_fops = {
  * @parent: directory to create the file in
  * @name: name of the file
  * @mode: mode of the file
+ * @uid: uid of the file
+ * @gid: gid of the file
  * @size: size of the file
  * @ops: kernfs operations for the file
  * @priv: private data for the file
@@ -975,7 +977,8 @@ const struct file_operations kernfs_file_fops = {
  */
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 					 const char *name,
-					 umode_t mode, loff_t size,
+					 umode_t mode, kuid_t uid, kgid_t gid,
+					 loff_t size,
 					 const struct kernfs_ops *ops,
 					 void *priv, const void *ns,
 					 struct lock_class_key *key)
@@ -986,7 +989,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 
 	flags = KERNFS_FILE;
 
-	kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags);
+	kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
+			     uid, gid, flags);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 3d73fe9d56e2..80cebcd94c90 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -63,7 +63,7 @@ out_unlock:
 	return ret;
 }
 
-static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
+int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
 {
 	struct kernfs_iattrs *attrs;
 	struct iattr *iattrs;
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 0f260dcca177..3d83b114bb08 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -90,6 +90,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
 int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
 		       u32 request_mask, unsigned int query_flags);
 ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
+int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
 
 /*
  * dir.c
@@ -104,6 +105,7 @@ void kernfs_put_active(struct kernfs_node *kn);
 int kernfs_add_one(struct kernfs_node *kn);
 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
 				    const char *name, umode_t mode,
+				    kuid_t uid, kgid_t gid,
 				    unsigned flags);
 struct kernfs_node *kernfs_find_and_get_node_by_ino(struct kernfs_root *root,
 						    unsigned int ino);
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 08ccabd7047f..5ffed48f3d0e 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -21,6 +21,7 @@
  * @target: target node for the symlink to point to
  *
  * Returns the created node on success, ERR_PTR() value on error.
+ * Ownership of the link matches ownership of the target.
  */
 struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
 				       const char *name,
@@ -28,8 +29,16 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
 {
 	struct kernfs_node *kn;
 	int error;
+	kuid_t uid = GLOBAL_ROOT_UID;
+	kgid_t gid = GLOBAL_ROOT_GID;
 
-	kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK);
+	if (target->iattr) {
+		uid = target->iattr->ia_iattr.ia_uid;
+		gid = target->iattr->ia_iattr.ia_gid;
+	}
+
+	kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, uid, gid,
+			     KERNFS_LINK);
 	if (!kn)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 58eba92a0e41..e39b884f0867 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -52,7 +52,9 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 		return -ENOENT;
 
 	kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
-				  S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns);
+				  S_IRWXU | S_IRUGO | S_IXUGO,
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  kobj, ns);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
 			sysfs_warn_dup(parent, kobject_name(kobj));
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5c13f29bfcdb..513fa691ecbd 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -302,8 +302,9 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	if (!attr->ignore_lockdep)
 		key = attr->key ?: (struct lock_class_key *)&attr->skey;
 #endif
-	kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
-				  (void *)attr, ns, key);
+	kn = __kernfs_create_file(parent, attr->name,
+				  mode & 0777, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  size, ops, (void *)attr, ns, key);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
 			sysfs_warn_dup(parent, attr->name);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index ab25c8b6d9e3..814643f7ee52 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -15,6 +15,7 @@
 #include <linux/lockdep.h>
 #include <linux/rbtree.h>
 #include <linux/atomic.h>
+#include <linux/uidgid.h>
 #include <linux/wait.h>
 
 struct file;
@@ -325,12 +326,14 @@ void kernfs_destroy_root(struct kernfs_root *root);
 
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
 					 void *priv, const void *ns);
 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 					    const char *name);
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
-					 const char *name,
-					 umode_t mode, loff_t size,
+					 const char *name, umode_t mode,
+					 kuid_t uid, kgid_t gid,
+					 loff_t size,
 					 const struct kernfs_ops *ops,
 					 void *priv, const void *ns,
 					 struct lock_class_key *key);
@@ -415,12 +418,14 @@ static inline void kernfs_destroy_root(struct kernfs_root *root) { }
 
 static inline struct kernfs_node *
 kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
-		     umode_t mode, void *priv, const void *ns)
+		     umode_t mode, kuid_t uid, kgid_t gid,
+		     void *priv, const void *ns)
 { return ERR_PTR(-ENOSYS); }
 
 static inline struct kernfs_node *
 __kernfs_create_file(struct kernfs_node *parent, const char *name,
-		     umode_t mode, loff_t size, const struct kernfs_ops *ops,
+		     umode_t mode, kuid_t uid, kgid_t gid,
+		     loff_t size, const struct kernfs_ops *ops,
 		     void *priv, const void *ns, struct lock_class_key *key)
 { return ERR_PTR(-ENOSYS); }
 
@@ -498,12 +503,15 @@ static inline struct kernfs_node *
 kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
 		  void *priv)
 {
-	return kernfs_create_dir_ns(parent, name, mode, priv, NULL);
+	return kernfs_create_dir_ns(parent, name, mode,
+				    GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				    priv, NULL);
 }
 
 static inline struct kernfs_node *
 kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
-		      umode_t mode, loff_t size, const struct kernfs_ops *ops,
+		      umode_t mode, kuid_t uid, kgid_t gid,
+		      loff_t size, const struct kernfs_ops *ops,
 		      void *priv, const void *ns)
 {
 	struct lock_class_key *key = NULL;
@@ -511,15 +519,17 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	key = (struct lock_class_key *)&ops->lockdep_key;
 #endif
-	return __kernfs_create_file(parent, name, mode, size, ops, priv, ns,
-				    key);
+	return __kernfs_create_file(parent, name, mode, uid, gid,
+				    size, ops, priv, ns, key);
 }
 
 static inline struct kernfs_node *
 kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
 		   loff_t size, const struct kernfs_ops *ops, void *priv)
 {
-	return kernfs_create_file_ns(parent, name, mode, size, ops, priv, NULL);
+	return kernfs_create_file_ns(parent, name, mode,
+				     GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				     size, ops, priv, NULL);
 }
 
 static inline int kernfs_remove_by_name(struct kernfs_node *parent,
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 077370bf8964..35cf3d71f8aa 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3557,7 +3557,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 	key = &cft->lockdep_key;
 #endif
 	kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
-				  cgroup_file_mode(cft), 0, cft->kf_ops, cft,
+				  cgroup_file_mode(cft),
+				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  0, cft->kf_ops, cft,
 				  NULL, key);
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
-- 
cgit v1.2.3


From 5f81880d5204ee2388fd9a75bb850ccd526885b7 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 20 Jul 2018 21:56:48 +0000
Subject: sysfs, kobject: allow creating kobject belonging to arbitrary users

Normally kobjects and their sysfs representation belong to global root,
however it is not necessarily the case for objects in separate namespaces.
For example, objects in separate network namespace logically belong to the
container's root and not global root.

This change lays groundwork for allowing network namespace objects
ownership to be transferred to container's root user by defining
get_ownership() callback in ktype structure and using it in sysfs code to
retrieve desired uid/gid when creating sysfs objects for given kobject.

Co-Developed-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/sysfs/dir.c          |  7 +++++--
 fs/sysfs/file.c         | 32 ++++++++++++++++++++------------
 fs/sysfs/group.c        | 23 +++++++++++++++++------
 fs/sysfs/sysfs.h        |  5 ++---
 include/linux/kobject.h |  4 ++++
 lib/kobject.c           | 19 +++++++++++++++++++
 6 files changed, 67 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e39b884f0867..feeae8081c22 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -40,6 +40,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
 int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 {
 	struct kernfs_node *parent, *kn;
+	kuid_t uid;
+	kgid_t gid;
 
 	BUG_ON(!kobj);
 
@@ -51,9 +53,10 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
 	if (!parent)
 		return -ENOENT;
 
+	kobject_get_ownership(kobj, &uid, &gid);
+
 	kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
-				  S_IRWXU | S_IRUGO | S_IXUGO,
-				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				  S_IRWXU | S_IRUGO | S_IXUGO, uid, gid,
 				  kobj, ns);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 513fa691ecbd..fa46216523cf 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -245,7 +245,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = {
 
 int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 			   const struct attribute *attr, bool is_bin,
-			   umode_t mode, const void *ns)
+			   umode_t mode, kuid_t uid, kgid_t gid, const void *ns)
 {
 	struct lock_class_key *key = NULL;
 	const struct kernfs_ops *ops;
@@ -302,8 +302,8 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	if (!attr->ignore_lockdep)
 		key = attr->key ?: (struct lock_class_key *)&attr->skey;
 #endif
-	kn = __kernfs_create_file(parent, attr->name,
-				  mode & 0777, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+
+	kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid,
 				  size, ops, (void *)attr, ns, key);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
@@ -313,12 +313,6 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	return 0;
 }
 
-int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
-		   bool is_bin)
-{
-	return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL);
-}
-
 /**
  * sysfs_create_file_ns - create an attribute file for an object with custom ns
  * @kobj: object we're creating for
@@ -328,9 +322,14 @@ int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
 int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
 			 const void *ns)
 {
+	kuid_t uid;
+	kgid_t gid;
+
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
+	kobject_get_ownership(kobj, &uid, &gid);
+	return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode,
+				      uid, gid, ns);
 
 }
 EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -359,6 +358,8 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 		const struct attribute *attr, const char *group)
 {
 	struct kernfs_node *parent;
+	kuid_t uid;
+	kgid_t gid;
 	int error;
 
 	if (group) {
@@ -371,7 +372,9 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 	if (!parent)
 		return -ENOENT;
 
-	error = sysfs_add_file(parent, attr, false);
+	kobject_get_ownership(kobj, &uid, &gid);
+	error = sysfs_add_file_mode_ns(kobj->sd, attr, false,
+				       attr->mode, uid, gid, NULL);
 	kernfs_put(parent);
 
 	return error;
@@ -487,9 +490,14 @@ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
 int sysfs_create_bin_file(struct kobject *kobj,
 			  const struct bin_attribute *attr)
 {
+	kuid_t uid;
+	kgid_t gid;
+
 	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->sd, &attr->attr, true);
+	kobject_get_ownership(kobj, &uid, &gid);
+	return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true,
+				      attr->attr.mode, uid, gid, NULL);
 }
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
 
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 4802ec0e1e3a..c7a716c4acc9 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -31,6 +31,7 @@ static void remove_files(struct kernfs_node *parent,
 }
 
 static int create_files(struct kernfs_node *parent, struct kobject *kobj,
+			kuid_t uid, kgid_t gid,
 			const struct attribute_group *grp, int update)
 {
 	struct attribute *const *attr;
@@ -60,7 +61,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 
 			mode &= SYSFS_PREALLOC | 0664;
 			error = sysfs_add_file_mode_ns(parent, *attr, false,
-						       mode, NULL);
+						       mode, uid, gid, NULL);
 			if (unlikely(error))
 				break;
 		}
@@ -90,7 +91,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
 			mode &= SYSFS_PREALLOC | 0664;
 			error = sysfs_add_file_mode_ns(parent,
 					&(*bin_attr)->attr, true,
-					mode, NULL);
+					mode,
+					uid, gid, NULL);
 			if (error)
 				break;
 		}
@@ -106,6 +108,8 @@ static int internal_create_group(struct kobject *kobj, int update,
 				 const struct attribute_group *grp)
 {
 	struct kernfs_node *kn;
+	kuid_t uid;
+	kgid_t gid;
 	int error;
 
 	BUG_ON(!kobj || (!update && !kobj->sd));
@@ -118,9 +122,11 @@ static int internal_create_group(struct kobject *kobj, int update,
 			kobj->name, grp->name ?: "");
 		return -EINVAL;
 	}
+	kobject_get_ownership(kobj, &uid, &gid);
 	if (grp->name) {
-		kn = kernfs_create_dir(kobj->sd, grp->name,
-				       S_IRWXU | S_IRUGO | S_IXUGO, kobj);
+		kn = kernfs_create_dir_ns(kobj->sd, grp->name,
+					  S_IRWXU | S_IRUGO | S_IXUGO,
+					  uid, gid, kobj, NULL);
 		if (IS_ERR(kn)) {
 			if (PTR_ERR(kn) == -EEXIST)
 				sysfs_warn_dup(kobj->sd, grp->name);
@@ -129,7 +135,7 @@ static int internal_create_group(struct kobject *kobj, int update,
 	} else
 		kn = kobj->sd;
 	kernfs_get(kn);
-	error = create_files(kn, kobj, grp, update);
+	error = create_files(kn, kobj, uid, gid, grp, update);
 	if (error) {
 		if (grp->name)
 			kernfs_remove(kn);
@@ -281,6 +287,8 @@ int sysfs_merge_group(struct kobject *kobj,
 		       const struct attribute_group *grp)
 {
 	struct kernfs_node *parent;
+	kuid_t uid;
+	kgid_t gid;
 	int error = 0;
 	struct attribute *const *attr;
 	int i;
@@ -289,8 +297,11 @@ int sysfs_merge_group(struct kobject *kobj,
 	if (!parent)
 		return -ENOENT;
 
+	kobject_get_ownership(kobj, &uid, &gid);
+
 	for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
-		error = sysfs_add_file(parent, *attr, false);
+		error = sysfs_add_file_mode_ns(parent, *attr, false,
+					       (*attr)->mode, uid, gid, NULL);
 	if (error) {
 		while (--i >= 0)
 			kernfs_remove_by_name(parent, (*--attr)->name);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d098e015fcc9..0050cc0c0236 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -27,11 +27,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
 /*
  * file.c
  */
-int sysfs_add_file(struct kernfs_node *parent,
-		   const struct attribute *attr, bool is_bin);
 int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 			   const struct attribute *attr, bool is_bin,
-			   umode_t amode, const void *ns);
+			   umode_t amode, kuid_t uid, kgid_t gid,
+			   const void *ns);
 
 /*
  * symlink.c
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 7f6f93c3df9c..b49ff230beba 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -26,6 +26,7 @@
 #include <linux/wait.h>
 #include <linux/atomic.h>
 #include <linux/workqueue.h>
+#include <linux/uidgid.h>
 
 #define UEVENT_HELPER_PATH_LEN		256
 #define UEVENT_NUM_ENVP			32	/* number of env pointers */
@@ -114,6 +115,8 @@ extern struct kobject * __must_check kobject_get_unless_zero(
 extern void kobject_put(struct kobject *kobj);
 
 extern const void *kobject_namespace(struct kobject *kobj);
+extern void kobject_get_ownership(struct kobject *kobj,
+				  kuid_t *uid, kgid_t *gid);
 extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
@@ -122,6 +125,7 @@ struct kobj_type {
 	struct attribute **default_attrs;
 	const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
 	const void *(*namespace)(struct kobject *kobj);
+	void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
 };
 
 struct kobj_uevent_env {
diff --git a/lib/kobject.c b/lib/kobject.c
index 18989b5b3b56..f2dc1f756007 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -35,6 +35,25 @@ const void *kobject_namespace(struct kobject *kobj)
 	return kobj->ktype->namespace(kobj);
 }
 
+/**
+ * kobject_get_ownership - get sysfs ownership data for @kobj
+ * @kobj: kobject in question
+ * @uid: kernel user ID for sysfs objects
+ * @gid: kernel group ID for sysfs objects
+ *
+ * Returns initial uid/gid pair that should be used when creating sysfs
+ * representation of given kobject. Normally used to adjust ownership of
+ * objects in a container.
+ */
+void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+	*uid = GLOBAL_ROOT_UID;
+	*gid = GLOBAL_ROOT_GID;
+
+	if (kobj->ktype->get_ownership)
+		kobj->ktype->get_ownership(kobj, uid, gid);
+}
+
 /*
  * populate_dir - populate directory with attributes.
  * @kobj: object we're working on.
-- 
cgit v1.2.3


From 9944e894c1266dc8515c82d1ff752d681215526b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 20 Jul 2018 21:56:50 +0000
Subject: driver core: set up ownership of class devices in sysfs

Plumb in get_ownership() callback for devices belonging to a class so that
they can be created with uid/gid different from global root. This will
allow network devices in a container to belong to container's root and not
global root.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/base/core.c    | 9 +++++++++
 include/linux/device.h | 5 +++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index df3e1a44707a..276c7e3f754c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -866,10 +866,19 @@ static const void *device_namespace(struct kobject *kobj)
 	return ns;
 }
 
+static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+{
+	struct device *dev = kobj_to_dev(kobj);
+
+	if (dev->class && dev->class->get_ownership)
+		dev->class->get_ownership(dev, uid, gid);
+}
+
 static struct kobj_type device_ktype = {
 	.release	= device_release,
 	.sysfs_ops	= &dev_sysfs_ops,
 	.namespace	= device_namespace,
+	.get_ownership	= device_get_ownership,
 };
 
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..fe6ccb6dc119 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -384,6 +384,9 @@ int subsys_virtual_register(struct bus_type *subsys,
  * @shutdown_pre: Called at shut-down time before driver shutdown.
  * @ns_type:	Callbacks so sysfs can detemine namespaces.
  * @namespace:	Namespace of the device belongs to this class.
+ * @get_ownership: Allows class to specify uid/gid of the sysfs directories
+ *		for the devices belonging to the class. Usually tied to
+ *		device's namespace.
  * @pm:		The default device power management operations of this class.
  * @p:		The private data of the driver core, no one other than the
  *		driver core can touch this.
@@ -413,6 +416,8 @@ struct class {
 	const struct kobj_ns_type_operations *ns_type;
 	const void *(*namespace)(struct device *dev);
 
+	void (*get_ownership)(struct device *dev, kuid_t *uid, kgid_t *gid);
+
 	const struct dev_pm_ops *pm;
 
 	struct subsys_private *p;
-- 
cgit v1.2.3


From fbdeaed408cf2728c62640c10848ddb1b67e63d3 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 20 Jul 2018 21:56:53 +0000
Subject: net: create reusable function for getting ownership info of sysfs
 inodes

Make net_ns_get_ownership() reusable by networking code outside of core.
This is useful, for example, to allow bridge related sysfs files to be
owned by container root.

Add a function comment since this is a potentially dangerous function to
use given the way that kobject_get_ownership() works by initializing uid
and gid before calling .get_ownership().

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 10 ++++++++++
 net/core/net-sysfs.c        | 18 ------------------
 net/core/net_namespace.c    | 28 ++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index a71264d75d7f..9b5fdc50519a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -10,6 +10,7 @@
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <linux/sysctl.h>
+#include <linux/uidgid.h>
 
 #include <net/flow.h>
 #include <net/netns/core.h>
@@ -170,6 +171,8 @@ extern struct net init_net;
 struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
 			struct net *old_net);
 
+void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
+
 void net_ns_barrier(void);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
@@ -182,6 +185,13 @@ static inline struct net *copy_net_ns(unsigned long flags,
 	return old_net;
 }
 
+static inline void net_ns_get_ownership(const struct net *net,
+					kuid_t *uid, kgid_t *gid)
+{
+	*uid = GLOBAL_ROOT_UID;
+	*gid = GLOBAL_ROOT_GID;
+}
+
 static inline void net_ns_barrier(void) {}
 #endif /* CONFIG_NET_NS */
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ada065fc685e..0a95bcf64cdc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -656,24 +656,6 @@ static const struct attribute_group wireless_group = {
 #define net_class_groups	NULL
 #endif /* CONFIG_SYSFS */
 
-static void net_ns_get_ownership(const struct net *net,
-				 kuid_t *uid, kgid_t *gid)
-{
-	if (net) {
-		kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
-		kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
-
-		if (uid_valid(ns_root_uid))
-			*uid = ns_root_uid;
-
-		if (gid_valid(ns_root_gid))
-			*gid = ns_root_gid;
-	} else {
-		*uid = GLOBAL_ROOT_UID;
-		*gid = GLOBAL_ROOT_GID;
-	}
-}
-
 #ifdef CONFIG_SYSFS
 #define to_rx_queue_attr(_attr) \
 	container_of(_attr, struct rx_queue_attribute, attr)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a11e03f920d3..738871af5efa 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -17,6 +17,7 @@
 #include <linux/user_namespace.h>
 #include <linux/net_namespace.h>
 #include <linux/sched/task.h>
+#include <linux/uidgid.h>
 
 #include <net/sock.h>
 #include <net/netlink.h>
@@ -448,6 +449,33 @@ dec_ucounts:
 	return net;
 }
 
+/**
+ * net_ns_get_ownership - get sysfs ownership data for @net
+ * @net: network namespace in question (can be NULL)
+ * @uid: kernel user ID for sysfs objects
+ * @gid: kernel group ID for sysfs objects
+ *
+ * Returns the uid/gid pair of root in the user namespace associated with the
+ * given network namespace.
+ */
+void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
+{
+	if (net) {
+		kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
+		kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
+
+		if (uid_valid(ns_root_uid))
+			*uid = ns_root_uid;
+
+		if (gid_valid(ns_root_gid))
+			*gid = ns_root_gid;
+	} else {
+		*uid = GLOBAL_ROOT_UID;
+		*gid = GLOBAL_ROOT_GID;
+	}
+}
+EXPORT_SYMBOL_GPL(net_ns_get_ownership);
+
 static void unhash_nsid(struct net *net, struct net *last)
 {
 	struct net *tmp;
-- 
cgit v1.2.3


From 9bfdc2611d417be453c3deb7a7ef2ffc718febfa Mon Sep 17 00:00:00 2001
From: Adam Borowski <kilobyte@angband.pl>
Date: Wed, 18 Jul 2018 04:10:44 +0200
Subject: vt: selection: take screen contents from uniscr if available

This preserves whatever was written even if we can't currently display the
given glyph.  Mouse paste won't corrupt any character of wcwidth() == 1
anymore.

Note that for now uniscr doesn't get allocated until something reads
/dev/vcsuN for that console, making this code dormant for most users.

Signed-off-by: Adam Borowski <kilobyte@angband.pl>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/selection.c | 11 +++++++----
 drivers/tty/vt/vt.c        | 10 ++++++++++
 include/linux/selection.h  |  1 +
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c
index 69ca337d3220..07496c711d7d 100644
--- a/drivers/tty/vt/selection.c
+++ b/drivers/tty/vt/selection.c
@@ -57,11 +57,13 @@ static inline void highlight_pointer(const int where)
 	complement_pos(sel_cons, where);
 }
 
-static u16
+static u32
 sel_pos(int n)
 {
+	if (use_unicode)
+		return screen_glyph_unicode(sel_cons, n / 2);
 	return inverse_translate(sel_cons, screen_glyph(sel_cons, n),
-				use_unicode);
+				0);
 }
 
 /**
@@ -90,7 +92,8 @@ static u32 inwordLut[]={
   0x07FFFFFE, /* lowercase         */
 };
 
-static inline int inword(const u16 c) {
+static inline int inword(const u32 c)
+{
 	return c > 0x7f || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
 }
 
@@ -167,7 +170,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
 	struct tiocl_selection v;
 	char *bp, *obp;
 	int i, ps, pe, multiplier;
-	u16 c;
+	u32 c;
 	int mode;
 
 	poke_blanked_console();
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 846dfedb657d..7adcb6dcc50c 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -4543,6 +4543,16 @@ u16 screen_glyph(struct vc_data *vc, int offset)
 }
 EXPORT_SYMBOL_GPL(screen_glyph);
 
+u32 screen_glyph_unicode(struct vc_data *vc, int n)
+{
+	struct uni_screen *uniscr = get_vc_uniscr(vc);
+
+	if (uniscr)
+		return uniscr->lines[n / vc->vc_cols][n % vc->vc_cols];
+	return inverse_translate(vc, screen_glyph(vc, n * 2), 1);
+}
+EXPORT_SYMBOL_GPL(screen_glyph_unicode);
+
 /* used by vcs - note the word offset */
 unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed)
 {
diff --git a/include/linux/selection.h b/include/linux/selection.h
index 067d2e99c79f..a8f5b97b216f 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -32,6 +32,7 @@ extern unsigned char default_blu[];
 
 extern unsigned short *screen_pos(struct vc_data *vc, int w_offset, int viewed);
 extern u16 screen_glyph(struct vc_data *vc, int offset);
+extern u32 screen_glyph_unicode(struct vc_data *vc, int offset);
 extern void complement_pos(struct vc_data *vc, int offset);
 extern void invert_screen(struct vc_data *vc, int offset, int count, int shift);
 
-- 
cgit v1.2.3


From f4c6fbc96eb187d6432fbba1f9c0c2db598cdf29 Mon Sep 17 00:00:00 2001
From: Adam Borowski <kilobyte@angband.pl>
Date: Wed, 18 Jul 2018 05:03:22 +0200
Subject: vt: drop unused struct vt_struct

Hasn't been ever used within historic (ie, git) times.

Signed-off-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/console_struct.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 2c8d3239899b..fea64f2692a0 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -17,7 +17,6 @@
 #include <linux/vt.h>
 #include <linux/workqueue.h>
 
-struct vt_struct;
 struct uni_pagedir;
 struct uni_screen;
 
@@ -150,7 +149,7 @@ struct vc {
 	struct vc_data *d;
 	struct work_struct SAK_work;
 
-	/* might add  scrmem, vt_struct, kbd  at some time,
+	/* might add  scrmem, kbd  at some time,
 	   to have everything in one place - the disadvantage
 	   would be that vc_cons etc can no longer be static */
 };
-- 
cgit v1.2.3


From c73314e6ebb2651a70ca8a3ff08d4bd6b9f9ade1 Mon Sep 17 00:00:00 2001
From: Mathieu Othacehe <m.othacehe@gmail.com>
Date: Fri, 20 Jul 2018 19:34:25 +0200
Subject: iio: Add channel for Phase

Add new channel type support for phase.

This channel may be used by Time-of-flight sensors to express the
phase difference between emitted and received signals. Those sensor
will then use the phase shift of return signals to approximate the
distance to objects.

Signed-off-by: Mathieu Othacehe <m.othacehe@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/ABI/testing/sysfs-bus-iio | 7 +++++++
 drivers/iio/industrialio-core.c         | 1 +
 include/uapi/linux/iio/types.h          | 1 +
 tools/iio/iio_event_monitor.c           | 2 ++
 4 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index c7353030670a..c9cfa833cf47 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -1675,3 +1675,10 @@ KernelVersion:	4.12
 Contact:	linux-iio@vger.kernel.org
 Description:
 		Raw counter device counters direction for channel Y.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_phaseY_raw
+KernelVersion:	4.18
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Raw (unscaled) phase difference reading from channel Y
+		that can be processed to radians.
\ No newline at end of file
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index ed1b3ebade94..a16ad5a4ab0c 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -86,6 +86,7 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_INDEX] = "index",
 	[IIO_GRAVITY]  = "gravity",
 	[IIO_POSITIONRELATIVE]  = "positionrelative",
+	[IIO_PHASE] = "phase",
 };
 
 static const char * const iio_modifier_names[] = {
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index 033c7d28924e..e4df3cc268db 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -45,6 +45,7 @@ enum iio_chan_type {
 	IIO_INDEX,
 	IIO_GRAVITY,
 	IIO_POSITIONRELATIVE,
+	IIO_PHASE,
 };
 
 enum iio_modifier {
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index 148f69dfae75..f478f5558720 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -59,6 +59,7 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_UVINDEX] = "uvindex",
 	[IIO_GRAVITY] = "gravity",
 	[IIO_POSITIONRELATIVE] = "positionrelative",
+	[IIO_PHASE] = "phase",
 };
 
 static const char * const iio_ev_type_text[] = {
@@ -153,6 +154,7 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_UVINDEX:
 	case IIO_GRAVITY:
 	case IIO_POSITIONRELATIVE:
+	case IIO_PHASE:
 		break;
 	default:
 		return false;
-- 
cgit v1.2.3


From a2dca217dae29c4ff6420e8c78d56b3f61ae0797 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:18 +0200
Subject: KVM: arm/arm64: vgic: Define GICD_IIDR fields for GICv2 and GIv3

Instead of hardcoding the shifts and masks in the GICD_IIDR register
emulation, let's add the definition of these fields to the GIC header
files and use them.

This will make things more obvious when we're going to bump the revision
in the IIDR when we'll make guest-visible changes to the implementation.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 10 ++++++++++
 include/linux/irqchip/arm-gic.h    | 10 ++++++++++
 virt/kvm/arm/vgic/vgic-mmio-v2.c   |  3 ++-
 virt/kvm/arm/vgic/vgic-mmio-v3.c   |  3 ++-
 4 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index cbb872c1b607..b22f9dfa61af 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -61,6 +61,16 @@
 #define GICD_CTLR_ENABLE_G1A		(1U << 1)
 #define GICD_CTLR_ENABLE_G1		(1U << 0)
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT	0
+#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT	12
+#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT		16
+#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT	24
+#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 /*
  * In systems with a single security state (what we emulate in KVM)
  * the meaning of the interrupt group enable bits is slightly different
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 68d8b1f73682..484f5bfa9f3d 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -71,6 +71,16 @@
 					(GICD_INT_DEF_PRI << 8) |\
 					GICD_INT_DEF_PRI)
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT	0
+#define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT	12
+#define GICD_IIDR_REVISION_MASK		(0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT		16
+#define GICD_IIDR_VARIANT_MASK		(0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT	24
+#define GICD_IIDR_PRODUCT_ID_MASK	(0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
 #define GICH_VMCR			0x8
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index ffc587bf4742..af44e569373a 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -37,7 +37,8 @@ static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
 		value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
 		break;
 	case GIC_DIST_IIDR:
-		value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+			(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
 		break;
 	default:
 		return 0;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 287784095b5b..c03f42409b98 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -81,7 +81,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 		}
 		break;
 	case GICD_IIDR:
-		value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+			(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
 		break;
 	default:
 		return 0;
-- 
cgit v1.2.3


From aa075b0f30b53e397fd4d4162ebf4a3a236b9206 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:19 +0200
Subject: KVM: arm/arm64: vgic: Keep track of implementation revision

As we are about to tweak implementation aspects of the VGIC emulation,
while still preserving some level of backwards compatibility support,
add a field to keep track of the implementation revision field which is
reported to the VM and to userspace.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h           | 3 +++
 virt/kvm/arm/vgic/vgic-init.c    | 1 +
 virt/kvm/arm/vgic/vgic-mmio-v2.c | 6 ++++--
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 6 ++++--
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cfdd2484cc42..7e64c463ab4d 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -217,6 +217,9 @@ struct vgic_dist {
 	/* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
 	u32			vgic_model;
 
+	/* Implementation revision as reported in the GICD_IIDR */
+	u32			implementation_rev;
+
 	/* Do injected MSIs require an additional device ID? */
 	bool			msis_require_devid;
 
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index b71417913741..8b6fc45c42fe 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -298,6 +298,7 @@ int vgic_init(struct kvm *kvm)
 
 	vgic_debug_init(kvm);
 
+	dist->implementation_rev = 0;
 	dist->initialized = true;
 
 out:
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index af44e569373a..f0c5351805b6 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -25,19 +25,21 @@
 static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
 					    gpa_t addr, unsigned int len)
 {
+	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
 	u32 value;
 
 	switch (addr & 0x0c) {
 	case GIC_DIST_CTRL:
-		value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
+		value = vgic->enabled ? GICD_ENABLE : 0;
 		break;
 	case GIC_DIST_CTR:
-		value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+		value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
 		value = (value >> 5) - 1;
 		value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
 		break;
 	case GIC_DIST_IIDR:
 		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+			(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
 			(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
 		break;
 	default:
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index c03f42409b98..ebe10a015bfb 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -62,16 +62,17 @@ bool vgic_supports_direct_msis(struct kvm *kvm)
 static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 					    gpa_t addr, unsigned int len)
 {
+	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
 	u32 value = 0;
 
 	switch (addr & 0x0c) {
 	case GICD_CTLR:
-		if (vcpu->kvm->arch.vgic.enabled)
+		if (vgic->enabled)
 			value |= GICD_CTLR_ENABLE_SS_G1;
 		value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
 		break;
 	case GICD_TYPER:
-		value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+		value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
 		value = (value >> 5) - 1;
 		if (vgic_has_its(vcpu->kvm)) {
 			value |= (INTERRUPT_ID_BITS_ITS - 1) << 19;
@@ -82,6 +83,7 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 		break;
 	case GICD_IIDR:
 		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+			(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
 			(IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
 		break;
 	default:
-- 
cgit v1.2.3


From 8df3c8f33f46adbaa811c0d57fb1d7eb421b88a9 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:21 +0200
Subject: KVM: arm/arm64: vgic: Add group field to struct irq

In preparation for proper group 0 and group 1 support in the vgic, we
add a field in the struct irq to store the group of all interrupts.

We initialize the group to group 0 when emulating GICv2 and to group 1
when emulating GICv3, just like we treat them today.  LPIs are always
group 1.  We also continue to ignore writes from the guest, preserving
existing functionality, for now.

Finally, we also add this field to the vgic debug logic to show the
group for all interrupts.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h         |  1 +
 virt/kvm/arm/vgic/vgic-debug.c |  8 +++++---
 virt/kvm/arm/vgic/vgic-init.c  | 19 +++++++++++++++++--
 virt/kvm/arm/vgic/vgic-its.c   |  1 +
 4 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7e64c463ab4d..c661d0ee6628 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -133,6 +133,7 @@ struct vgic_irq {
 	u8 source;			/* GICv2 SGIs only */
 	u8 active_source;		/* GICv2 SGIs only */
 	u8 priority;
+	u8 group;			/* 0 == group 0, 1 == group 1 */
 	enum vgic_irq_config config;	/* Level or edge */
 
 	/*
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c
index 9279e35fefb1..07aa900bac56 100644
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -166,6 +166,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
 
 	seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
 	seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
+	seq_printf(s, "G=group\n");
 }
 
 static void print_header(struct seq_file *s, struct vgic_irq *irq,
@@ -180,8 +181,8 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq,
 	}
 
 	seq_printf(s, "\n");
-	seq_printf(s, "%s%2d TYP   ID TGT_ID PLAEHC     HWID   TARGET SRC PRI VCPU_ID\n", hdr, id);
-	seq_printf(s, "---------------------------------------------------------------\n");
+	seq_printf(s, "%s%2d TYP   ID TGT_ID PLAEHCG     HWID   TARGET SRC PRI VCPU_ID\n", hdr, id);
+	seq_printf(s, "----------------------------------------------------------------\n");
 }
 
 static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
@@ -202,7 +203,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
 
 	seq_printf(s, "       %s %4d "
 		      "    %2d "
-		      "%d%d%d%d%d%d "
+		      "%d%d%d%d%d%d%d "
 		      "%8d "
 		      "%8x "
 		      " %2x "
@@ -217,6 +218,7 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
 			irq->enabled,
 			irq->hw,
 			irq->config == VGIC_CONFIG_LEVEL,
+			irq->group,
 			irq->hwintid,
 			irq->mpidr,
 			irq->source,
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 230c9221fe70..a7c19cda5835 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -175,10 +175,13 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
 		irq->vcpu = NULL;
 		irq->target_vcpu = vcpu0;
 		kref_init(&irq->refcount);
-		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
 			irq->targets = 0;
-		else
+			irq->group = 0;
+		} else {
 			irq->mpidr = 0;
+			irq->group = 1;
+		}
 	}
 	return 0;
 }
@@ -227,6 +230,18 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 			/* PPIs */
 			irq->config = VGIC_CONFIG_LEVEL;
 		}
+
+		/*
+		 * GICv3 can only be created via the KVM_DEVICE_CREATE API and
+		 * so we always know the emulation type at this point as it's
+		 * either explicitly configured as GICv3, or explicitly
+		 * configured as GICv2, or not configured yet which also
+		 * implies GICv2.
+		 */
+		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+			irq->group = 1;
+		else
+			irq->group = 0;
 	}
 
 	if (!irqchip_in_kernel(vcpu->kvm))
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index cee2c3c5519c..12502251727e 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -71,6 +71,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
 	kref_init(&irq->refcount);
 	irq->intid = intid;
 	irq->target_vcpu = vcpu;
+	irq->group = 1;
 
 	spin_lock_irqsave(&dist->lpi_list_lock, flags);
 
-- 
cgit v1.2.3


From 87322099052b6a534cecd3d303fc097d4560b7d0 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:22 +0200
Subject: KVM: arm/arm64: vgic: Signal IRQs using their configured group

Now when we have a group configuration on the struct IRQ, use this state
when populating the LR and signaling interrupts as either group 0 or
group 1 to the VM.  Depending on the model of the emulated GIC, and the
guest's configuration of the VMCR, interrupts may be signaled as IRQs or
FIQs to the VM.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic.h | 1 +
 virt/kvm/arm/vgic/vgic-v2.c     | 3 +++
 virt/kvm/arm/vgic/vgic-v3.c     | 6 +-----
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 484f5bfa9f3d..6c4aaf04046c 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -104,6 +104,7 @@
 #define GICH_LR_PENDING_BIT		(1 << 28)
 #define GICH_LR_ACTIVE_BIT		(1 << 29)
 #define GICH_LR_EOI			(1 << 19)
+#define GICH_LR_GROUP1			(1 << 30)
 #define GICH_LR_HW			(1 << 31)
 
 #define GICH_VMCR_ENABLE_GRP0_SHIFT	0
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index a5f2e44f1c33..df5e6a6e3186 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -159,6 +159,9 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 		}
 	}
 
+	if (irq->group)
+		val |= GICH_LR_GROUP1;
+
 	if (irq->hw) {
 		val |= GICH_LR_HW;
 		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index cdce653e3c47..530b8491c892 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -197,11 +197,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 	if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
 		irq->line_level = false;
 
-	/*
-	 * We currently only support Group1 interrupts, which is a
-	 * known defect. This needs to be addressed at some point.
-	 */
-	if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+	if (irq->group)
 		val |= ICH_LR_GROUP;
 
 	val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
-- 
cgit v1.2.3


From 32f8777ed92d73e504840a955a9dc92617826b69 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 16 Jul 2018 15:06:26 +0200
Subject: KVM: arm/arm64: vgic: Let userspace opt-in to writable v2 IGROUPR

Simply letting IGROUPR be writable from userspace would break
migration from old kernels to newer kernels, because old kernels
incorrectly report interrupt groups as group 1.  This would not be a big
problem if userspace wrote GICD_IIDR as read from the kernel, because we
could detect the incompatibility and return an error to userspace.
Unfortunately, this is not the case with current userspace
implementations and simply letting IGROUPR be writable from userspace for
an emulated GICv2 silently breaks migration and causes the destination
VM to no longer run after migration.

We now encourage userspace to write the read and expected value of
GICD_IIDR as the first part of a GIC register restore, and if we observe
a write to GICD_IIDR we know that userspace has been updated and has had
a chance to cope with older kernels (VGICv2 IIDR.Revision == 0)
incorrectly reporting interrupts as group 1, and therefore we now allow
groups to be user writable.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h           |  3 +++
 virt/kvm/arm/vgic/vgic-mmio-v2.c | 16 +++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index c661d0ee6628..c134790be32c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -221,6 +221,9 @@ struct vgic_dist {
 	/* Implementation revision as reported in the GICD_IIDR */
 	u32			implementation_rev;
 
+	/* Userspace can write to GICv2 IGROUPR */
+	bool			v2_groups_user_writable;
+
 	/* Do injected MSIs require an additional device ID? */
 	bool			msis_require_devid;
 
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index ee164f831401..26654f4140ed 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -85,6 +85,18 @@ static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
 	case GIC_DIST_IIDR:
 		if (val != vgic_mmio_read_v2_misc(vcpu, addr, len))
 			return -EINVAL;
+
+		/*
+		 * If we observe a write to GICD_IIDR we know that userspace
+		 * has been updated and has had a chance to cope with older
+		 * kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting
+		 * interrupts as group 1, and therefore we now allow groups to
+		 * be user writable.  Doing this by default would break
+		 * migration from old kernels to new kernels with legacy
+		 * userspace.
+		 */
+		vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
+		return 0;
 	}
 
 	vgic_mmio_write_v2_misc(vcpu, addr, len, val);
@@ -95,7 +107,9 @@ static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu,
 					    gpa_t addr, unsigned int len,
 					    unsigned long val)
 {
-	/* Ignore writes from userspace */
+	if (vcpu->kvm->arch.vgic.v2_groups_user_writable)
+		vgic_mmio_write_group(vcpu, addr, len, val);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From be26b3a73413c2ebf14d5e76a66ad964e6458080 Mon Sep 17 00:00:00 2001
From: Dongjiu Geng <gengdongjiu@huawei.com>
Date: Thu, 19 Jul 2018 16:24:23 +0100
Subject: arm64: KVM: export the capability to set guest SError syndrome

For the arm64 RAS Extension, user space can inject a virtual-SError
with specified ESR. So user space needs to know whether KVM support
to inject such SError, this interface adds this query for this capability.

KVM will check whether system support RAS Extension, if supported, KVM
returns true to user space, otherwise returns false.

Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
[expanded documentation wording]
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/virtual/kvm/api.txt | 26 ++++++++++++++++++++++++++
 arch/arm64/kvm/reset.c            |  3 +++
 include/uapi/linux/kvm.h          |  1 +
 3 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 284d36e72f28..dbbb95d5798a 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -907,6 +907,18 @@ SError is pending, the architecture's 'Multiple SError interrupts' rules should
 be followed. (2.5.3 of DDI0587.a "ARM Reliability, Availability, and
 Serviceability (RAS) Specification").
 
+SError exceptions always have an ESR value. Some CPUs have the ability to
+specify what the virtual SError's ESR value should be. These systems will
+advertise KVM_CAP_ARM_SET_SERROR_ESR. In this case exception.has_esr will
+always have a non-zero value when read, and the agent making an SError pending
+should specify the ISS field in the lower 24 bits of exception.serror_esr. If
+the system supports KVM_CAP_ARM_SET_SERROR_ESR, but user-space sets the events
+with exception.has_esr as zero, KVM will choose an ESR.
+
+Specifying exception.has_esr on a system that does not support it will return
+-EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
+will return -EINVAL.
+
 struct kvm_vcpu_events {
 	struct {
 		__u8 serror_pending;
@@ -4664,3 +4676,17 @@ This capability indicates that KVM supports paravirtualized Hyper-V TLB Flush
 hypercalls:
 HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx,
 HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
+
+8.19 KVM_CAP_ARM_SET_SERROR_ESR
+
+Architectures: arm, arm64
+
+This capability indicates that userspace can specify (via the
+KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
+takes a virtual SError interrupt exception.
+If KVM advertises this capability, userspace can only specify the ISS field for
+the ESR syndrome. Other parts of the ESR, such as the EC are generated by the
+CPU when the exception is taken. If this virtual SError is taken to EL1 using
+AArch64, this value will be reported in the ISS field of ESR_ELx.
+
+See KVM_CAP_VCPU_EVENTS for more details.
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index a3db01a28062..067c6ba969bd 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -77,6 +77,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ARM_PMU_V3:
 		r = kvm_arm_support_pmu_v3();
 		break;
+	case KVM_CAP_ARM_INJECT_SERROR_ESR:
+		r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+		break;
 	case KVM_CAP_SET_GUEST_DEBUG:
 	case KVM_CAP_VCPU_ATTRIBUTES:
 	case KVM_CAP_VCPU_EVENTS:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index b6270a3b38e9..a7d9bc4e4068 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_GET_MSR_FEATURES 153
 #define KVM_CAP_HYPERV_EVENTFD 154
 #define KVM_CAP_HYPERV_TLBFLUSH 155
+#define KVM_CAP_ARM_INJECT_SERROR_ESR 156
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 1fb53567a3633740aac8761eb7023dc5671f0edb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 5 May 2017 13:45:14 -0500
Subject: pids: Move task_pid_type into sched/signal.h

The function is general and inline so there is no need
to hide it inside of exit.c

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h | 8 ++++++++
 kernel/exit.c                | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 113d1ad1ced7..d8ef0a3d2e7e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -556,6 +556,14 @@ extern bool current_is_single_threaded(void);
 typedef int (*proc_visitor)(struct task_struct *p, void *data);
 void walk_process_tree(struct task_struct *top, proc_visitor, void *);
 
+static inline
+struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
+{
+	if (type != PIDTYPE_PID)
+		task = task->group_leader;
+	return task->pids[type].pid;
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
diff --git a/kernel/exit.c b/kernel/exit.c
index c3c7ac560114..16432428fc6c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1001,14 +1001,6 @@ struct wait_opts {
 	int			notask_error;
 };
 
-static inline
-struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
-{
-	if (type != PIDTYPE_PID)
-		task = task->group_leader;
-	return task->pids[type].pid;
-}
-
 static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
 {
 	return	wo->wo_type == PIDTYPE_MAX ||
-- 
cgit v1.2.3


From 7a36094d61bfe9843de5484ff0140227983ac5d5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 26 Sep 2017 12:45:33 -0500
Subject: pids: Compute task_tgid using signal->leader_pid

The cost is the the same and this removes the need
to worry about complications that come from de_thread
and group_leader changing.

__task_pid_nr_ns has been updated to take advantage of this change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/ia64/kernel/asm-offsets.c       |  2 +-
 arch/ia64/kernel/fsys.S              |  8 ++++----
 drivers/platform/x86/thinkpad_acpi.c |  1 +
 fs/fuse/file.c                       |  1 +
 fs/notify/fanotify/fanotify.c        |  1 +
 include/linux/sched.h                |  5 -----
 include/linux/sched/signal.h         |  5 +++++
 include/net/scm.h                    |  1 +
 kernel/pid.c                         | 15 ++++++++-------
 9 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index f4db2168d1b8..f5433bb7f04a 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -50,7 +50,6 @@ void foo(void)
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
-	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
 	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
 	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
 	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
@@ -68,6 +67,7 @@ void foo(void)
 	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
 							     group_stop_count));
 	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
+	DEFINE(IA64_SIGNAL_LEADER_PID_OFFSET, offsetof (struct signal_struct, leader_pid));
 
 	BLANK();
 
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index fe742ffafc7a..eaf5a0d6f3e0 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -62,16 +62,16 @@ ENTRY(fsys_getpid)
 	.prologue
 	.altrp b6
 	.body
-	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	add r17=IA64_TASK_SIGNAL_OFFSET,r16
 	;;
-	ld8 r17=[r17]				// r17 = current->group_leader
+	ld8 r17=[r17]				// r17 = current->signal
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
-	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
+	add r17=IA64_SIGNAL_LEADER_PID_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
+	ld8 r17=[r17]				// r17 = current->signal->leader_pid
 	;;
 	add r8=IA64_PID_LEVEL_OFFSET,r17
 	;;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index cae9b0595692..d556e95c532c 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -57,6 +57,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a201fb0ac64f..b00a3f126a89 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 #include <linux/compat.h>
 #include <linux/swap.h>
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f90842efea13..6e828cb82e5e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -8,6 +8,7 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/sched/user.h>
+#include <linux/sched/signal.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/audit.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 87bf02d93a27..a461ff89a3af 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,11 +1202,6 @@ static inline struct pid *task_pid(struct task_struct *task)
 	return task->pids[PIDTYPE_PID].pid;
 }
 
-static inline struct pid *task_tgid(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_PID].pid;
-}
-
 /*
  * Without tasklist or RCU lock it is not safe to dereference
  * the result of task_pgrp/task_session even if task == current,
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d8ef0a3d2e7e..b95a272c1ab5 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -564,6 +564,11 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 	return task->pids[type].pid;
 }
 
+static inline struct pid *task_tgid(struct task_struct *task)
+{
+	return task->signal->leader_pid;
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
diff --git a/include/net/scm.h b/include/net/scm.h
index 903771c8d4e3..1ce365f4c256 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -8,6 +8,7 @@
 #include <linux/security.h>
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
+#include <linux/sched/signal.h>
 
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
diff --git a/kernel/pid.c b/kernel/pid.c
index 157fe4b19971..d0de2b59f86f 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -421,13 +421,14 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
 	if (!ns)
 		ns = task_active_pid_ns(current);
 	if (likely(pid_alive(task))) {
-		if (type != PIDTYPE_PID) {
-			if (type == __PIDTYPE_TGID)
-				type = PIDTYPE_PID;
-
-			task = task->group_leader;
-		}
-		nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
+		struct pid *pid;
+		if (type == PIDTYPE_PID)
+			pid = task_pid(task);
+		else if (type == __PIDTYPE_TGID)
+			pid = task_tgid(task);
+		else
+			pid = rcu_dereference(task->group_leader->pids[type].pid);
+		nr = pid_nr_ns(pid, ns);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 2c4704756cab7cfa031ada4dab361562f0e357c0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 26 Sep 2017 13:06:43 -0500
Subject: pids: Move the pgrp and session pid pointers from task_struct to
 signal_struct

To access these fields the code always has to go to group leader so
going to signal struct is no loss and is actually a fundamental simplification.

This saves a little bit of memory by only allocating the pid pointer array
once instead of once for every thread, and even better this removes a
few potential races caused by the fact that group_leader can be changed
by de_thread, while signal_struct can not.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/ia64/kernel/asm-offsets.c |  2 +-
 arch/ia64/kernel/fsys.S        |  4 ++--
 fs/autofs/autofs_i.h           |  1 +
 include/linux/init_task.h      |  9 ---------
 include/linux/pid.h            |  8 +-------
 include/linux/sched.h          | 22 ++++-----------------
 include/linux/sched/signal.h   | 26 +++++++++++++++++++++---
 init/init_task.c               | 11 ++++++-----
 kernel/fork.c                  | 23 ++++++++++++++++-----
 kernel/pid.c                   | 45 +++++++++++++++++++++---------------------
 10 files changed, 78 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index f5433bb7f04a..c1f8a57855af 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -50,7 +50,7 @@ void foo(void)
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
-	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_TASK_THREAD_PID_OFFSET,offsetof (struct task_struct, thread_pid));
 	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
 	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index eaf5a0d6f3e0..e85ebdac678b 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -96,11 +96,11 @@ ENTRY(fsys_set_tid_address)
 	.altrp b6
 	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
-	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
+	add r17=IA64_TASK_THREAD_PID_OFFSET,r16
 	;;
 	ld4 r9=[r9]
 	tnat.z p6,p7=r32		// check argument register for being NaT
-	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
+	ld8 r17=[r17]				// r17 = current->thread_pid
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
 	add r8=IA64_PID_LEVEL_OFFSET,r17
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 9400a9f6318a..502812289850 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -18,6 +18,7 @@
 #include <linux/string.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/uaccess.h>
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a454b8aeb938..a7083a45a26c 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -46,15 +46,6 @@ extern struct cred init_cred;
 #define INIT_CPU_TIMERS(s)
 #endif
 
-#define INIT_PID_LINK(type) 					\
-{								\
-	.node = {						\
-		.next = NULL,					\
-		.pprev = NULL,					\
-	},							\
-	.pid = &init_struct_pid,				\
-}
-
 #define INIT_TASK_COMM "swapper"
 
 /* Attach to the init_task data structure for proper alignment */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 7633d55d9a24..3d4c504dcc8c 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -67,12 +67,6 @@ struct pid
 
 extern struct pid init_struct_pid;
 
-struct pid_link
-{
-	struct hlist_node node;
-	struct pid *pid;
-};
-
 static inline struct pid *get_pid(struct pid *pid)
 {
 	if (pid)
@@ -177,7 +171,7 @@ pid_t pid_vnr(struct pid *pid);
 	do {								\
 		if ((pid) != NULL)					\
 			hlist_for_each_entry_rcu((task),		\
-				&(pid)->tasks[type], pids[type].node) {
+				&(pid)->tasks[type], pid_links[type]) {
 
 			/*
 			 * Both old and new leaders may be attached to
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a461ff89a3af..445bdf5b1f64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -775,7 +775,8 @@ struct task_struct {
 	struct list_head		ptrace_entry;
 
 	/* PID/PID hash table linkage. */
-	struct pid_link			pids[PIDTYPE_MAX];
+	struct pid			*thread_pid;
+	struct hlist_node		pid_links[PIDTYPE_MAX];
 	struct list_head		thread_group;
 	struct list_head		thread_node;
 
@@ -1199,22 +1200,7 @@ struct task_struct {
 
 static inline struct pid *task_pid(struct task_struct *task)
 {
-	return task->pids[PIDTYPE_PID].pid;
-}
-
-/*
- * Without tasklist or RCU lock it is not safe to dereference
- * the result of task_pgrp/task_session even if task == current,
- * we can race with another thread doing sys_setsid/sys_setpgid.
- */
-static inline struct pid *task_pgrp(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_PGID].pid;
-}
-
-static inline struct pid *task_session(struct task_struct *task)
-{
-	return task->group_leader->pids[PIDTYPE_SID].pid;
+	return task->thread_pid;
 }
 
 /*
@@ -1263,7 +1249,7 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk)
  */
 static inline int pid_alive(const struct task_struct *p)
 {
-	return p->pids[PIDTYPE_PID].pid != NULL;
+	return p->thread_pid != NULL;
 }
 
 static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b95a272c1ab5..2dcded16eb1e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -146,7 +146,9 @@ struct signal_struct {
 
 #endif
 
+	/* PID/PID hash table linkage. */
 	struct pid *leader_pid;
+	struct pid *pids[PIDTYPE_MAX];
 
 #ifdef CONFIG_NO_HZ_FULL
 	atomic_t tick_dep_mask;
@@ -559,9 +561,12 @@ void walk_process_tree(struct task_struct *top, proc_visitor, void *);
 static inline
 struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 {
-	if (type != PIDTYPE_PID)
-		task = task->group_leader;
-	return task->pids[type].pid;
+	struct pid *pid;
+	if (type == PIDTYPE_PID)
+		pid = task_pid(task);
+	else
+		pid = task->signal->pids[type];
+	return pid;
 }
 
 static inline struct pid *task_tgid(struct task_struct *task)
@@ -569,6 +574,21 @@ static inline struct pid *task_tgid(struct task_struct *task)
 	return task->signal->leader_pid;
 }
 
+/*
+ * Without tasklist or RCU lock it is not safe to dereference
+ * the result of task_pgrp/task_session even if task == current,
+ * we can race with another thread doing sys_setsid/sys_setpgid.
+ */
+static inline struct pid *task_pgrp(struct task_struct *task)
+{
+	return task->signal->pids[PIDTYPE_PGID];
+}
+
+static inline struct pid *task_session(struct task_struct *task)
+{
+	return task->signal->pids[PIDTYPE_SID];
+}
+
 static inline int get_nr_threads(struct task_struct *tsk)
 {
 	return tsk->signal->nr_threads;
diff --git a/init/init_task.c b/init/init_task.c
index 7914ffb8dc73..db12a61259f1 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -34,6 +34,11 @@ static struct signal_struct init_signals = {
 #endif
 	INIT_CPU_TIMERS(init_signals)
 	.leader_pid = &init_struct_pid,
+	.pids = {
+		[PIDTYPE_PID]	= &init_struct_pid,
+		[PIDTYPE_PGID]	= &init_struct_pid,
+		[PIDTYPE_SID]	= &init_struct_pid,
+	},
 	INIT_PREV_CPUTIME(init_signals)
 };
 
@@ -112,11 +117,7 @@ struct task_struct init_task
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
 	.timer_slack_ns = 50000, /* 50 usec default slack */
-	.pids = {
-		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),
-		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),
-		[PIDTYPE_SID]  = INIT_PID_LINK(PIDTYPE_SID),
-	},
+	.thread_pid	= &init_struct_pid,
 	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDITSYSCALL
diff --git a/kernel/fork.c b/kernel/fork.c
index 9440d61b925c..d2952162399b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1549,10 +1549,22 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
 static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
 #endif
 
+static inline void init_task_pid_links(struct task_struct *task)
+{
+	enum pid_type type;
+
+	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+		INIT_HLIST_NODE(&task->pid_links[type]);
+	}
+}
+
 static inline void
 init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
 {
-	 task->pids[type].pid = pid;
+	if (type == PIDTYPE_PID)
+		task->thread_pid = pid;
+	else
+		task->signal->pids[type] = pid;
 }
 
 static inline void rcu_copy_process(struct task_struct *p)
@@ -1928,6 +1940,7 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cancel_cgroup;
 	}
 
+	init_task_pid_links(p);
 	if (likely(p->pid)) {
 		ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
 
@@ -2036,13 +2049,13 @@ fork_out:
 	return ERR_PTR(retval);
 }
 
-static inline void init_idle_pids(struct pid_link *links)
+static inline void init_idle_pids(struct task_struct *idle)
 {
 	enum pid_type type;
 
 	for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
-		INIT_HLIST_NODE(&links[type].node); /* not really needed */
-		links[type].pid = &init_struct_pid;
+		INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
+		init_task_pid(idle, type, &init_struct_pid);
 	}
 }
 
@@ -2052,7 +2065,7 @@ struct task_struct *fork_idle(int cpu)
 	task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
 			    cpu_to_node(cpu));
 	if (!IS_ERR(task)) {
-		init_idle_pids(task->pids);
+		init_idle_pids(task);
 		init_idle(task, cpu);
 	}
 
diff --git a/kernel/pid.c b/kernel/pid.c
index d0de2b59f86f..f8486d2e2346 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -265,27 +265,35 @@ struct pid *find_vpid(int nr)
 }
 EXPORT_SYMBOL_GPL(find_vpid);
 
+static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
+{
+	return (type == PIDTYPE_PID) ?
+		&task->thread_pid :
+		(type == __PIDTYPE_TGID) ?
+		&task->signal->leader_pid :
+		&task->signal->pids[type];
+}
+
 /*
  * attach_pid() must be called with the tasklist_lock write-held.
  */
 void attach_pid(struct task_struct *task, enum pid_type type)
 {
-	struct pid_link *link = &task->pids[type];
-	hlist_add_head_rcu(&link->node, &link->pid->tasks[type]);
+	struct pid *pid = *task_pid_ptr(task, type);
+	hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
 }
 
 static void __change_pid(struct task_struct *task, enum pid_type type,
 			struct pid *new)
 {
-	struct pid_link *link;
+	struct pid **pid_ptr = task_pid_ptr(task, type);
 	struct pid *pid;
 	int tmp;
 
-	link = &task->pids[type];
-	pid = link->pid;
+	pid = *pid_ptr;
 
-	hlist_del_rcu(&link->node);
-	link->pid = new;
+	hlist_del_rcu(&task->pid_links[type]);
+	*pid_ptr = new;
 
 	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
 		if (!hlist_empty(&pid->tasks[tmp]))
@@ -310,8 +318,9 @@ void change_pid(struct task_struct *task, enum pid_type type,
 void transfer_pid(struct task_struct *old, struct task_struct *new,
 			   enum pid_type type)
 {
-	new->pids[type].pid = old->pids[type].pid;
-	hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node);
+	if (type == PIDTYPE_PID)
+		new->thread_pid = old->thread_pid;
+	hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
 }
 
 struct task_struct *pid_task(struct pid *pid, enum pid_type type)
@@ -322,7 +331,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
 		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
 					      lockdep_tasklist_lock_is_held());
 		if (first)
-			result = hlist_entry(first, struct task_struct, pids[(type)].node);
+			result = hlist_entry(first, struct task_struct, pid_links[(type)]);
 	}
 	return result;
 }
@@ -360,9 +369,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
 {
 	struct pid *pid;
 	rcu_read_lock();
-	if (type != PIDTYPE_PID)
-		task = task->group_leader;
-	pid = get_pid(rcu_dereference(task->pids[type].pid));
+	pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
 	rcu_read_unlock();
 	return pid;
 }
@@ -420,16 +427,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
 	rcu_read_lock();
 	if (!ns)
 		ns = task_active_pid_ns(current);
-	if (likely(pid_alive(task))) {
-		struct pid *pid;
-		if (type == PIDTYPE_PID)
-			pid = task_pid(task);
-		else if (type == __PIDTYPE_TGID)
-			pid = task_tgid(task);
-		else
-			pid = rcu_dereference(task->group_leader->pids[type].pid);
-		nr = pid_nr_ns(pid, ns);
-	}
+	if (likely(pid_alive(task)))
+		nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
 	rcu_read_unlock();
 
 	return nr;
-- 
cgit v1.2.3


From 6883f81aac6f44e7df70a6af189b3689ff52cbfb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 4 Jun 2017 04:32:13 -0500
Subject: pid: Implement PIDTYPE_TGID

Everywhere except in the pid array we distinguish between a tasks pid and
a tasks tgid (thread group id).  Even in the enumeration we want that
distinction sometimes so we have added __PIDTYPE_TGID.  With leader_pid
we almost have an implementation of PIDTYPE_TGID in struct signal_struct.

Add PIDTYPE_TGID as a first class member of the pid_type enumeration and
into the pids array.  Then remove the __PIDTYPE_TGID special case and the
leader_pid in signal_struct.

The net size increase is just an extra pointer added to struct pid and
an extra pair of pointers of an hlist_node added to task_struct.

The effect on code maintenance is the removal of a number of special
cases today and the potential to remove many more special cases as
PIDTYPE_TGID gets used to it's fullest.  The long term potential
is allowing zombie thread group leaders to exit, which will remove
a lot more special cases in the code.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/ia64/kernel/asm-offsets.c  | 2 +-
 arch/ia64/kernel/fsys.S         | 4 ++--
 arch/s390/kernel/perf_cpum_sf.c | 2 +-
 fs/exec.c                       | 1 +
 include/linux/pid.h             | 3 +--
 include/linux/sched.h           | 4 ++--
 include/linux/sched/signal.h    | 5 ++---
 init/init_task.c                | 2 +-
 kernel/events/core.c            | 2 +-
 kernel/exit.c                   | 1 +
 kernel/fork.c                   | 3 ++-
 kernel/pid.c                    | 2 --
 kernel/time/itimer.c            | 5 +++--
 kernel/time/posix-cpu-timers.c  | 2 +-
 14 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index c1f8a57855af..00e8e2a1eb19 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -67,7 +67,7 @@ void foo(void)
 	DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
 							     group_stop_count));
 	DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
-	DEFINE(IA64_SIGNAL_LEADER_PID_OFFSET, offsetof (struct signal_struct, leader_pid));
+	DEFINE(IA64_SIGNAL_PIDS_TGID_OFFSET, offsetof (struct signal_struct, pids[PIDTYPE_TGID]));
 
 	BLANK();
 
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index e85ebdac678b..d80c99a5f55d 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -68,10 +68,10 @@ ENTRY(fsys_getpid)
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
-	add r17=IA64_SIGNAL_LEADER_PID_OFFSET,r17
+	add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld8 r17=[r17]				// r17 = current->signal->leader_pid
+	ld8 r17=[r17]				// r17 = current->signal->pids[PIDTYPE_TGID]
 	;;
 	add r8=IA64_PID_LEVEL_OFFSET,r17
 	;;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 0292d68e7dde..ca0b7ae894bb 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -665,7 +665,7 @@ static void cpumsf_output_event_pid(struct perf_event *event,
 		goto out;
 
 	/* Update the process ID (see also kernel/events/core.c) */
-	data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
+	data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
 	data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
 
 	perf_output_sample(&handle, &header, data, event);
diff --git a/fs/exec.c b/fs/exec.c
index 2d4e0075bd24..79a11fbded7a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1146,6 +1146,7 @@ static int de_thread(struct task_struct *tsk)
 		 */
 		tsk->pid = leader->pid;
 		change_pid(tsk, PIDTYPE_PID, task_pid(leader));
+		transfer_pid(leader, tsk, PIDTYPE_TGID);
 		transfer_pid(leader, tsk, PIDTYPE_PGID);
 		transfer_pid(leader, tsk, PIDTYPE_SID);
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 3d4c504dcc8c..14a9a39da9c7 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -7,11 +7,10 @@
 enum pid_type
 {
 	PIDTYPE_PID,
+	PIDTYPE_TGID,
 	PIDTYPE_PGID,
 	PIDTYPE_SID,
 	PIDTYPE_MAX,
-	/* only valid to __task_pid_nr_ns() */
-	__PIDTYPE_TGID
 };
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 445bdf5b1f64..06b4e3bda93a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1275,12 +1275,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk)
 
 static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
 {
-	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
+	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
 }
 
 static inline pid_t task_tgid_vnr(struct task_struct *tsk)
 {
-	return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
+	return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
 }
 
 static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 2dcded16eb1e..ee30a5ba475f 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -147,7 +147,6 @@ struct signal_struct {
 #endif
 
 	/* PID/PID hash table linkage. */
-	struct pid *leader_pid;
 	struct pid *pids[PIDTYPE_MAX];
 
 #ifdef CONFIG_NO_HZ_FULL
@@ -571,7 +570,7 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 
 static inline struct pid *task_tgid(struct task_struct *task)
 {
-	return task->signal->leader_pid;
+	return task->signal->pids[PIDTYPE_TGID];
 }
 
 /*
@@ -607,7 +606,7 @@ static inline bool thread_group_leader(struct task_struct *p)
  */
 static inline bool has_group_leader_pid(struct task_struct *p)
 {
-	return task_pid(p) == p->signal->leader_pid;
+	return task_pid(p) == task_tgid(p);
 }
 
 static inline
diff --git a/init/init_task.c b/init/init_task.c
index db12a61259f1..4f97846256d7 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -33,9 +33,9 @@ static struct signal_struct init_signals = {
 	},
 #endif
 	INIT_CPU_TIMERS(init_signals)
-	.leader_pid = &init_struct_pid,
 	.pids = {
 		[PIDTYPE_PID]	= &init_struct_pid,
+		[PIDTYPE_TGID]	= &init_struct_pid,
 		[PIDTYPE_PGID]	= &init_struct_pid,
 		[PIDTYPE_SID]	= &init_struct_pid,
 	},
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 80cca2b30c4f..9025b1796ca8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1334,7 +1334,7 @@ static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p,
 
 static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
 {
-	return perf_event_pid_type(event, p, __PIDTYPE_TGID);
+	return perf_event_pid_type(event, p, PIDTYPE_TGID);
 }
 
 static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
diff --git a/kernel/exit.c b/kernel/exit.c
index 16432428fc6c..25582b442955 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -73,6 +73,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
 	nr_threads--;
 	detach_pid(p, PIDTYPE_PID);
 	if (group_dead) {
+		detach_pid(p, PIDTYPE_TGID);
 		detach_pid(p, PIDTYPE_PGID);
 		detach_pid(p, PIDTYPE_SID);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index d2952162399b..cc5be0d01ce6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1946,6 +1946,7 @@ static __latent_entropy struct task_struct *copy_process(
 
 		init_task_pid(p, PIDTYPE_PID, pid);
 		if (thread_group_leader(p)) {
+			init_task_pid(p, PIDTYPE_TGID, pid);
 			init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
 			init_task_pid(p, PIDTYPE_SID, task_session(current));
 
@@ -1954,7 +1955,6 @@ static __latent_entropy struct task_struct *copy_process(
 				p->signal->flags |= SIGNAL_UNKILLABLE;
 			}
 
-			p->signal->leader_pid = pid;
 			p->signal->tty = tty_kref_get(current->signal->tty);
 			/*
 			 * Inherit has_child_subreaper flag under the same
@@ -1965,6 +1965,7 @@ static __latent_entropy struct task_struct *copy_process(
 							 p->real_parent->signal->is_child_subreaper;
 			list_add_tail(&p->sibling, &p->real_parent->children);
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
+			attach_pid(p, PIDTYPE_TGID);
 			attach_pid(p, PIDTYPE_PGID);
 			attach_pid(p, PIDTYPE_SID);
 			__this_cpu_inc(process_counts);
diff --git a/kernel/pid.c b/kernel/pid.c
index f8486d2e2346..de1cfc4f75a2 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -269,8 +269,6 @@ static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
 {
 	return (type == PIDTYPE_PID) ?
 		&task->thread_pid :
-		(type == __PIDTYPE_TGID) ?
-		&task->signal->leader_pid :
 		&task->signal->pids[type];
 }
 
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index f26acef5d7b4..9a65713c8309 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -139,9 +139,10 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 {
 	struct signal_struct *sig =
 		container_of(timer, struct signal_struct, real_timer);
+	struct pid *leader_pid = sig->pids[PIDTYPE_TGID];
 
-	trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
-	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
+	trace_itimer_expire(ITIMER_REAL, leader_pid, 0);
+	kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 5a6251ac6f7a..40e6fae46cec 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -895,7 +895,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 
 		trace_itimer_expire(signo == SIGPROF ?
 				    ITIMER_PROF : ITIMER_VIRTUAL,
-				    tsk->signal->leader_pid, cur_time);
+				    task_tgid(tsk), cur_time);
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
 
-- 
cgit v1.2.3


From 24122c7f4969adeeaeca3fb1656a31569e9aa59b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 20 Jul 2018 14:30:23 -0500
Subject: signal: Pass pid and pid type into send_sigqueue

Make the code more maintainable by performing more of the signal
related work in send_sigqueue.

A quick inspection of do_timer_create will show that this code path
does not lookup a thread group by a thread's pid.  Making it safe
to find the task pointed to by it_pid with "pid_task(it_pid, type)";

This supports the changes needed in fork to tell if a signal was sent
to a single process or a group of processes.

Having the pid to task transition in signal.c will also make it easier
to sort out races with de_thread and and the thread group leader
exiting when it comes time to address that.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  2 +-
 kernel/signal.c              | 14 +++++++++-----
 kernel/time/posix-timers.c   | 13 ++++---------
 3 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ee30a5ba475f..94558ffa82ab 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -330,7 +330,7 @@ extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
+extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 
 static inline int restart_syscall(void)
diff --git a/kernel/signal.c b/kernel/signal.c
index 8d8a940422a8..40feb14e276d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1664,17 +1664,20 @@ void sigqueue_free(struct sigqueue *q)
 		__sigqueue_free(q);
 }
 
-int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
+int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
 {
 	int sig = q->info.si_signo;
 	struct sigpending *pending;
+	struct task_struct *t;
 	unsigned long flags;
 	int ret, result;
 
 	BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
 
 	ret = -1;
-	if (!likely(lock_task_sighand(t, &flags)))
+	rcu_read_lock();
+	t = pid_task(pid, type);
+	if (!t || !likely(lock_task_sighand(t, &flags)))
 		goto ret;
 
 	ret = 1; /* the signal is ignored */
@@ -1696,15 +1699,16 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
 	q->info.si_overrun = 0;
 
 	signalfd_notify(t, sig);
-	pending = group ? &t->signal->shared_pending : &t->pending;
+	pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
 	list_add_tail(&q->list, &pending->list);
 	sigaddset(&pending->signal, sig);
-	complete_signal(sig, t, group);
+	complete_signal(sig, t, type != PIDTYPE_PID);
 	result = TRACE_SIGNAL_DELIVERED;
 out:
-	trace_signal_generate(sig, &q->info, t, group, result);
+	trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result);
 	unlock_task_sighand(t, &flags);
 ret:
+	rcu_read_unlock();
 	return ret;
 }
 
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 2bdf08a2bae9..2d2e739fbc57 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -332,8 +332,8 @@ void posixtimer_rearm(struct siginfo *info)
 
 int posix_timer_event(struct k_itimer *timr, int si_private)
 {
-	struct task_struct *task;
-	int shared, ret = -1;
+	enum pid_type type;
+	int ret = -1;
 	/*
 	 * FIXME: if ->sigq is queued we can race with
 	 * dequeue_signal()->posixtimer_rearm().
@@ -347,13 +347,8 @@ int posix_timer_event(struct k_itimer *timr, int si_private)
 	 */
 	timr->sigq->info.si_sys_private = si_private;
 
-	rcu_read_lock();
-	task = pid_task(timr->it_pid, PIDTYPE_PID);
-	if (task) {
-		shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
-		ret = send_sigqueue(timr->sigq, task, shared);
-	}
-	rcu_read_unlock();
+	type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID;
+	ret = send_sigqueue(timr->sigq, timr->it_pid, type);
 	/* If we failed to send the signal the timer stops. */
 	return ret > 0;
 }
-- 
cgit v1.2.3


From 0102498083d58d8b17759642c602b525215e1a54 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 13 Jul 2018 18:40:57 -0500
Subject: signal: Pass pid type into group_send_sig_info

This passes the information we already have at the call sight
into group_send_sig_info.  Ultimatelly allowing for to better handle
signals sent to a group of processes.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal.h |  4 +++-
 kernel/exit.c          |  3 ++-
 kernel/signal.c        | 10 ++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3c5200137b24..d8f2bf3d41e6 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -254,11 +254,13 @@ static inline int valid_signal(unsigned long sig)
 
 struct timespec;
 struct pt_regs;
+enum pid_type;
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
 				struct task_struct *p, bool group);
-extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
+extern int group_send_sig_info(int sig, struct siginfo *info,
+			       struct task_struct *p, enum pid_type type);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(sigset_t *);
diff --git a/kernel/exit.c b/kernel/exit.c
index 25582b442955..0e21e6d21f35 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -681,7 +681,8 @@ static void forget_original_parent(struct task_struct *father,
 				t->parent = t->real_parent;
 			if (t->pdeath_signal)
 				group_send_sig_info(t->pdeath_signal,
-						    SEND_SIG_NOINFO, t);
+						    SEND_SIG_NOINFO, t,
+						    PIDTYPE_TGID);
 		}
 		/*
 		 * If this is a threaded reparent there is no need to
diff --git a/kernel/signal.c b/kernel/signal.c
index 40feb14e276d..c7527338fe9d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1274,7 +1274,8 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
 /*
  * send signal info to all the members of a group
  */
-int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+			enum pid_type type)
 {
 	int ret;
 
@@ -1301,7 +1302,7 @@ int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
 	success = 0;
 	retval = -ESRCH;
 	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
-		int err = group_send_sig_info(sig, info, p);
+		int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID);
 		success |= !err;
 		retval = err;
 	} while_each_pid_task(pgrp, PIDTYPE_PGID, p);
@@ -1317,7 +1318,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
 		rcu_read_lock();
 		p = pid_task(pid, PIDTYPE_PID);
 		if (p)
-			error = group_send_sig_info(sig, info, p);
+			error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
 		rcu_read_unlock();
 		if (likely(!p || error != -ESRCH))
 			return error;
@@ -1420,7 +1421,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
 		for_each_process(p) {
 			if (task_pid_vnr(p) > 1 &&
 					!same_thread_group(p, current)) {
-				int err = group_send_sig_info(sig, info, p);
+				int err = group_send_sig_info(sig, info, p,
+							      PIDTYPE_MAX);
 				++count;
 				if (err != -EPERM)
 					retval = err;
-- 
cgit v1.2.3


From 40b3b02535621027f56d248139e0e467573c3098 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 21 Jul 2018 10:45:15 -0500
Subject: signal: Pass pid type into do_send_sig_info

This passes the information we already have at the call sight into
do_send_sig_info.  Ultimately allowing for better handling of signals
sent to a group of processes during fork.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 drivers/tty/sysrq.c    |  2 +-
 fs/fcntl.c             |  6 +++---
 include/linux/signal.h |  2 +-
 kernel/signal.c        | 10 +++++-----
 mm/oom_kill.c          |  4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 6364890575ec..06ed20dd01ba 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -348,7 +348,7 @@ static void send_sig_all(int sig)
 		if (is_global_init(p))
 			continue;
 
-		do_send_sig_info(sig, SEND_SIG_FORCED, p, true);
+		do_send_sig_info(sig, SEND_SIG_FORCED, p, PIDTYPE_MAX);
 	}
 	read_unlock(&tasklist_lock);
 }
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 5d596a00f40b..a04accf6847f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -767,11 +767,11 @@ static void send_sigio_to_task(struct task_struct *p,
 			else
 				si.si_band = mangle_poll(band_table[reason - POLL_IN]);
 			si.si_fd    = fd;
-			if (!do_send_sig_info(signum, &si, p, type != PIDTYPE_PID))
+			if (!do_send_sig_info(signum, &si, p, type))
 				break;
 		/* fall-through: fall back on the old plain SIGIO signal */
 		case 0:
-			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type != PIDTYPE_PID);
+			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
 	}
 }
 
@@ -808,7 +808,7 @@ static void send_sigurg_to_task(struct task_struct *p,
 				struct fown_struct *fown, enum pid_type type)
 {
 	if (sigio_perm(p, fown, SIGURG))
-		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type != PIDTYPE_PID);
+		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
 }
 
 int send_sigurg(struct fown_struct *fown)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index d8f2bf3d41e6..fe125b0335f7 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -258,7 +258,7 @@ enum pid_type;
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int do_send_sig_info(int sig, struct siginfo *info,
-				struct task_struct *p, bool group);
+				struct task_struct *p, enum pid_type type);
 extern int group_send_sig_info(int sig, struct siginfo *info,
 			       struct task_struct *p, enum pid_type type);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
diff --git a/kernel/signal.c b/kernel/signal.c
index c7527338fe9d..2c09e6143dd8 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1161,13 +1161,13 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 }
 
 int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
-			bool group)
+			enum pid_type type)
 {
 	unsigned long flags;
 	int ret = -ESRCH;
 
 	if (lock_task_sighand(p, &flags)) {
-		ret = send_signal(sig, info, p, group);
+		ret = send_signal(sig, info, p, type != PIDTYPE_PID);
 		unlock_task_sighand(p, &flags);
 	}
 
@@ -1284,7 +1284,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
 	rcu_read_unlock();
 
 	if (!ret && sig)
-		ret = do_send_sig_info(sig, info, p, true);
+		ret = do_send_sig_info(sig, info, p, type);
 
 	return ret;
 }
@@ -1448,7 +1448,7 @@ int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	if (!valid_signal(sig))
 		return -EINVAL;
 
-	return do_send_sig_info(sig, info, p, false);
+	return do_send_sig_info(sig, info, p, PIDTYPE_PID);
 }
 
 #define __si_special(priv) \
@@ -3199,7 +3199,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 		 * probe.  No signal is actually delivered.
 		 */
 		if (!error && sig) {
-			error = do_send_sig_info(sig, info, p, false);
+			error = do_send_sig_info(sig, info, p, PIDTYPE_PID);
 			/*
 			 * If lock_task_sighand() failed we pretend the task
 			 * dies after receiving the signal. The window is tiny,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 84081e77bc51..2cc9b238368f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -920,7 +920,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	 * in order to prevent the OOM victim from depleting the memory
 	 * reserves from the user space under its control.
 	 */
-	do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
+	do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, PIDTYPE_TGID);
 	mark_oom_victim(victim);
 	pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
 		task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
@@ -958,7 +958,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 		 */
 		if (unlikely(p->flags & PF_KTHREAD))
 			continue;
-		do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
+		do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, PIDTYPE_TGID);
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From a3134fb09e0bc5bee76e13bf863173b86f21cf87 Mon Sep 17 00:00:00 2001
From: Rishabh Bhatnagar <rishabhb@codeaurora.org>
Date: Wed, 23 May 2018 17:35:21 -0700
Subject: drivers: soc: Add LLCC driver

LLCC (Last Level Cache Controller) provides additional cache memory
in the system. LLCC is partitioned into multiple slices and each
slice gets its own priority, size, ID and other config parameters.
LLCC driver programs these parameters for each slice. Clients that
are assigned to use LLCC need to get information such size & ID of the
slice they get and activate or deactivate the slice as needed. LLCC driver
provides API for the clients to perform these operations.

Signed-off-by: Channagoud Kadabi <ckadabi@codeaurora.org>
Signed-off-by: Rishabh Bhatnagar <rishabhb@codeaurora.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/Kconfig           |  17 ++
 drivers/soc/qcom/Makefile          |   2 +
 drivers/soc/qcom/llcc-sdm845.c     |  94 +++++++++++
 drivers/soc/qcom/llcc-slice.c      | 335 +++++++++++++++++++++++++++++++++++++
 include/linux/soc/qcom/llcc-qcom.h | 180 ++++++++++++++++++++
 5 files changed, 628 insertions(+)
 create mode 100644 drivers/soc/qcom/llcc-sdm845.c
 create mode 100644 drivers/soc/qcom/llcc-slice.c
 create mode 100644 include/linux/soc/qcom/llcc-qcom.h

(limited to 'include')

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 9dc02f390ba3..d1a41852ae7a 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -39,6 +39,23 @@ config QCOM_GSBI
           functions for connecting the underlying serial UART, SPI, and I2C
           devices to the output pins.
 
+config QCOM_LLCC
+	tristate "Qualcomm Technologies, Inc. LLCC driver"
+	depends on ARCH_QCOM
+	help
+	  Qualcomm Technologies, Inc. platform specific
+	  Last Level Cache Controller(LLCC) driver. This provides interfaces
+	  to clients that use the LLCC. Say yes here to enable LLCC slice
+	  driver.
+
+config QCOM_SDM845_LLCC
+	tristate "Qualcomm Technologies, Inc. SDM845 LLCC driver"
+	depends on QCOM_LLCC
+	help
+	  Say yes here to enable the LLCC driver for SDM845. This provides
+	  data required to configure LLCC so that clients can start using the
+	  LLCC slices.
+
 config QCOM_MDT_LOADER
 	tristate
 	select QCOM_SCM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 19dcf957cb3a..5921454c4ddd 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -15,3 +15,5 @@ obj-$(CONFIG_QCOM_SMP2P)	+= smp2p.o
 obj-$(CONFIG_QCOM_SMSM)	+= smsm.o
 obj-$(CONFIG_QCOM_WCNSS_CTRL) += wcnss_ctrl.o
 obj-$(CONFIG_QCOM_APR) += apr.o
+obj-$(CONFIG_QCOM_LLCC) += llcc-slice.o
+obj-$(CONFIG_QCOM_SDM845_LLCC) += llcc-sdm845.o
diff --git a/drivers/soc/qcom/llcc-sdm845.c b/drivers/soc/qcom/llcc-sdm845.c
new file mode 100644
index 000000000000..2e1e4f0a5db8
--- /dev/null
+++ b/drivers/soc/qcom/llcc-sdm845.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+
+/*
+ * SCT(System Cache Table) entry contains of the following members:
+ * usecase_id: Unique id for the client's use case
+ * slice_id: llcc slice id for each client
+ * max_cap: The maximum capacity of the cache slice provided in KB
+ * priority: Priority of the client used to select victim line for replacement
+ * fixed_size: Boolean indicating if the slice has a fixed capacity
+ * bonus_ways: Bonus ways are additional ways to be used for any slice,
+ *		if client ends up using more than reserved cache ways. Bonus
+ *		ways are allocated only if they are not reserved for some
+ *		other client.
+ * res_ways: Reserved ways for the cache slice, the reserved ways cannot
+ *		be used by any other client than the one its assigned to.
+ * cache_mode: Each slice operates as a cache, this controls the mode of the
+ *             slice: normal or TCM(Tightly Coupled Memory)
+ * probe_target_ways: Determines what ways to probe for access hit. When
+ *                    configured to 1 only bonus and reserved ways are probed.
+ *                    When configured to 0 all ways in llcc are probed.
+ * dis_cap_alloc: Disable capacity based allocation for a client
+ * retain_on_pc: If this bit is set and client has maintained active vote
+ *               then the ways assigned to this client are not flushed on power
+ *               collapse.
+ * activate_on_init: Activate the slice immediately after the SCT is programmed
+ */
+#define SCT_ENTRY(uid, sid, mc, p, fs, bway, rway, cmod, ptw, dca, rp, a) \
+	{					\
+		.usecase_id = uid,		\
+		.slice_id = sid,		\
+		.max_cap = mc,			\
+		.priority = p,			\
+		.fixed_size = fs,		\
+		.bonus_ways = bway,		\
+		.res_ways = rway,		\
+		.cache_mode = cmod,		\
+		.probe_target_ways = ptw,	\
+		.dis_cap_alloc = dca,		\
+		.retain_on_pc = rp,		\
+		.activate_on_init = a,		\
+	}
+
+static struct llcc_slice_config sdm845_data[] =  {
+	SCT_ENTRY(LLCC_CPUSS,    1,  2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 1),
+	SCT_ENTRY(LLCC_VIDSC0,   2,  512,  2, 1, 0x0,   0x0f0, 0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_VIDSC1,   3,  512,  2, 1, 0x0,   0x0f0, 0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_ROTATOR,  4,  563,  2, 1, 0x0,   0x00e, 2, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_VOICE,    5,  2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_AUDIO,    6,  2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MDMHPGRW, 7,  1024, 2, 0, 0xfc,  0xf00, 0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MDM,      8,  2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_CMPT,     10, 2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_GPUHTW,   11, 512,  1, 1, 0xc,   0x0,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_GPU,      12, 2304, 1, 0, 0xff0, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MMUHWT,   13, 256,  2, 0, 0x0,   0x1,   0, 0, 1, 0, 1),
+	SCT_ENTRY(LLCC_CMPTDMA,  15, 2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_DISP,     16, 2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_VIDFW,    17, 2816, 1, 0, 0xffc, 0x2,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MDMHPFX,  20, 1024, 2, 1, 0x0,   0xf00, 0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_MDMPNG,   21, 1024, 0, 1, 0x1e,  0x0,   0, 0, 1, 1, 0),
+	SCT_ENTRY(LLCC_AUDHW,    22, 1024, 1, 1, 0xffc, 0x2,   0, 0, 1, 1, 0),
+};
+
+static int sdm845_qcom_llcc_probe(struct platform_device *pdev)
+{
+	return qcom_llcc_probe(pdev, sdm845_data, ARRAY_SIZE(sdm845_data));
+}
+
+static const struct of_device_id sdm845_qcom_llcc_of_match[] = {
+	{ .compatible = "qcom,sdm845-llcc", },
+	{ }
+};
+
+static struct platform_driver sdm845_qcom_llcc_driver = {
+	.driver = {
+		.name = "sdm845-llcc",
+		.of_match_table = sdm845_qcom_llcc_of_match,
+	},
+	.probe = sdm845_qcom_llcc_probe,
+};
+module_platform_driver(sdm845_qcom_llcc_driver);
+
+MODULE_DESCRIPTION("QCOM sdm845 LLCC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/soc/qcom/llcc-slice.c b/drivers/soc/qcom/llcc-slice.c
new file mode 100644
index 000000000000..fcaad1a4b41d
--- /dev/null
+++ b/drivers/soc/qcom/llcc-slice.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ *
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+
+#define ACTIVATE                      BIT(0)
+#define DEACTIVATE                    BIT(1)
+#define ACT_CTRL_OPCODE_ACTIVATE      BIT(0)
+#define ACT_CTRL_OPCODE_DEACTIVATE    BIT(1)
+#define ACT_CTRL_ACT_TRIG             BIT(0)
+#define ACT_CTRL_OPCODE_SHIFT         0x01
+#define ATTR1_PROBE_TARGET_WAYS_SHIFT 0x02
+#define ATTR1_FIXED_SIZE_SHIFT        0x03
+#define ATTR1_PRIORITY_SHIFT          0x04
+#define ATTR1_MAX_CAP_SHIFT           0x10
+#define ATTR0_RES_WAYS_MASK           GENMASK(11, 0)
+#define ATTR0_BONUS_WAYS_MASK         GENMASK(27, 16)
+#define ATTR0_BONUS_WAYS_SHIFT        0x10
+#define LLCC_STATUS_READ_DELAY        100
+
+#define CACHE_LINE_SIZE_SHIFT         6
+
+#define LLCC_COMMON_STATUS0           0x0003000c
+#define LLCC_LB_CNT_MASK              GENMASK(31, 28)
+#define LLCC_LB_CNT_SHIFT             28
+
+#define MAX_CAP_TO_BYTES(n)           (n * SZ_1K)
+#define LLCC_TRP_ACT_CTRLn(n)         (n * SZ_4K)
+#define LLCC_TRP_STATUSn(n)           (4 + n * SZ_4K)
+#define LLCC_TRP_ATTR0_CFGn(n)        (0x21000 + SZ_8 * n)
+#define LLCC_TRP_ATTR1_CFGn(n)        (0x21004 + SZ_8 * n)
+
+#define BANK_OFFSET_STRIDE	      0x80000
+
+static struct llcc_drv_data *drv_data;
+
+static const struct regmap_config llcc_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.fast_io = true,
+};
+
+/**
+ * llcc_slice_getd - get llcc slice descriptor
+ * @uid: usecase_id for the client
+ *
+ * A pointer to llcc slice descriptor will be returned on success and
+ * and error pointer is returned on failure
+ */
+struct llcc_slice_desc *llcc_slice_getd(u32 uid)
+{
+	const struct llcc_slice_config *cfg;
+	struct llcc_slice_desc *desc;
+	u32 sz, count;
+
+	cfg = drv_data->cfg;
+	sz = drv_data->cfg_size;
+
+	for (count = 0; cfg && count < sz; count++, cfg++)
+		if (cfg->usecase_id == uid)
+			break;
+
+	if (count == sz || !cfg)
+		return ERR_PTR(-ENODEV);
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return ERR_PTR(-ENOMEM);
+
+	desc->slice_id = cfg->slice_id;
+	desc->slice_size = cfg->max_cap;
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(llcc_slice_getd);
+
+/**
+ * llcc_slice_putd - llcc slice descritpor
+ * @desc: Pointer to llcc slice descriptor
+ */
+void llcc_slice_putd(struct llcc_slice_desc *desc)
+{
+	kfree(desc);
+}
+EXPORT_SYMBOL_GPL(llcc_slice_putd);
+
+static int llcc_update_act_ctrl(u32 sid,
+				u32 act_ctrl_reg_val, u32 status)
+{
+	u32 act_ctrl_reg;
+	u32 status_reg;
+	u32 slice_status;
+	int ret;
+
+	act_ctrl_reg = drv_data->bcast_off + LLCC_TRP_ACT_CTRLn(sid);
+	status_reg = drv_data->bcast_off + LLCC_TRP_STATUSn(sid);
+
+	/* Set the ACTIVE trigger */
+	act_ctrl_reg_val |= ACT_CTRL_ACT_TRIG;
+	ret = regmap_write(drv_data->regmap, act_ctrl_reg, act_ctrl_reg_val);
+	if (ret)
+		return ret;
+
+	/* Clear the ACTIVE trigger */
+	act_ctrl_reg_val &= ~ACT_CTRL_ACT_TRIG;
+	ret = regmap_write(drv_data->regmap, act_ctrl_reg, act_ctrl_reg_val);
+	if (ret)
+		return ret;
+
+	ret = regmap_read_poll_timeout(drv_data->regmap, status_reg,
+				      slice_status, !(slice_status & status),
+				      0, LLCC_STATUS_READ_DELAY);
+	return ret;
+}
+
+/**
+ * llcc_slice_activate - Activate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ *
+ * A value of zero will be returned on success and a negative errno will
+ * be returned in error cases
+ */
+int llcc_slice_activate(struct llcc_slice_desc *desc)
+{
+	int ret;
+	u32 act_ctrl_val;
+
+	mutex_lock(&drv_data->lock);
+	if (test_bit(desc->slice_id, drv_data->bitmap)) {
+		mutex_unlock(&drv_data->lock);
+		return 0;
+	}
+
+	act_ctrl_val = ACT_CTRL_OPCODE_ACTIVATE << ACT_CTRL_OPCODE_SHIFT;
+
+	ret = llcc_update_act_ctrl(desc->slice_id, act_ctrl_val,
+				  DEACTIVATE);
+	if (ret) {
+		mutex_unlock(&drv_data->lock);
+		return ret;
+	}
+
+	__set_bit(desc->slice_id, drv_data->bitmap);
+	mutex_unlock(&drv_data->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(llcc_slice_activate);
+
+/**
+ * llcc_slice_deactivate - Deactivate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ *
+ * A value of zero will be returned on success and a negative errno will
+ * be returned in error cases
+ */
+int llcc_slice_deactivate(struct llcc_slice_desc *desc)
+{
+	u32 act_ctrl_val;
+	int ret;
+
+	mutex_lock(&drv_data->lock);
+	if (!test_bit(desc->slice_id, drv_data->bitmap)) {
+		mutex_unlock(&drv_data->lock);
+		return 0;
+	}
+	act_ctrl_val = ACT_CTRL_OPCODE_DEACTIVATE << ACT_CTRL_OPCODE_SHIFT;
+
+	ret = llcc_update_act_ctrl(desc->slice_id, act_ctrl_val,
+				  ACTIVATE);
+	if (ret) {
+		mutex_unlock(&drv_data->lock);
+		return ret;
+	}
+
+	__clear_bit(desc->slice_id, drv_data->bitmap);
+	mutex_unlock(&drv_data->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(llcc_slice_deactivate);
+
+/**
+ * llcc_get_slice_id - return the slice id
+ * @desc: Pointer to llcc slice descriptor
+ */
+int llcc_get_slice_id(struct llcc_slice_desc *desc)
+{
+	return desc->slice_id;
+}
+EXPORT_SYMBOL_GPL(llcc_get_slice_id);
+
+/**
+ * llcc_get_slice_size - return the slice id
+ * @desc: Pointer to llcc slice descriptor
+ */
+size_t llcc_get_slice_size(struct llcc_slice_desc *desc)
+{
+	return desc->slice_size;
+}
+EXPORT_SYMBOL_GPL(llcc_get_slice_size);
+
+static int qcom_llcc_cfg_program(struct platform_device *pdev)
+{
+	int i;
+	u32 attr1_cfg;
+	u32 attr0_cfg;
+	u32 attr1_val;
+	u32 attr0_val;
+	u32 max_cap_cacheline;
+	u32 sz;
+	int ret;
+	const struct llcc_slice_config *llcc_table;
+	struct llcc_slice_desc desc;
+	u32 bcast_off = drv_data->bcast_off;
+
+	sz = drv_data->cfg_size;
+	llcc_table = drv_data->cfg;
+
+	for (i = 0; i < sz; i++) {
+		attr1_cfg = bcast_off +
+				LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
+		attr0_cfg = bcast_off +
+				LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
+
+		attr1_val = llcc_table[i].cache_mode;
+		attr1_val |= llcc_table[i].probe_target_ways <<
+				ATTR1_PROBE_TARGET_WAYS_SHIFT;
+		attr1_val |= llcc_table[i].fixed_size <<
+				ATTR1_FIXED_SIZE_SHIFT;
+		attr1_val |= llcc_table[i].priority <<
+				ATTR1_PRIORITY_SHIFT;
+
+		max_cap_cacheline = MAX_CAP_TO_BYTES(llcc_table[i].max_cap);
+
+		/* LLCC instances can vary for each target.
+		 * The SW writes to broadcast register which gets propagated
+		 * to each llcc instace (llcc0,.. llccN).
+		 * Since the size of the memory is divided equally amongst the
+		 * llcc instances, we need to configure the max cap accordingly.
+		 */
+		max_cap_cacheline = max_cap_cacheline / drv_data->num_banks;
+		max_cap_cacheline >>= CACHE_LINE_SIZE_SHIFT;
+		attr1_val |= max_cap_cacheline << ATTR1_MAX_CAP_SHIFT;
+
+		attr0_val = llcc_table[i].res_ways & ATTR0_RES_WAYS_MASK;
+		attr0_val |= llcc_table[i].bonus_ways << ATTR0_BONUS_WAYS_SHIFT;
+
+		ret = regmap_write(drv_data->regmap, attr1_cfg, attr1_val);
+		if (ret)
+			return ret;
+		ret = regmap_write(drv_data->regmap, attr0_cfg, attr0_val);
+		if (ret)
+			return ret;
+		if (llcc_table[i].activate_on_init) {
+			desc.slice_id = llcc_table[i].slice_id;
+			ret = llcc_slice_activate(&desc);
+		}
+	}
+	return ret;
+}
+
+int qcom_llcc_probe(struct platform_device *pdev,
+		      const struct llcc_slice_config *llcc_cfg, u32 sz)
+{
+	u32 num_banks;
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	void __iomem *base;
+	int ret, i;
+
+	drv_data = devm_kzalloc(dev, sizeof(*drv_data), GFP_KERNEL);
+	if (!drv_data)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drv_data->regmap = devm_regmap_init_mmio(dev, base,
+					&llcc_regmap_config);
+	if (IS_ERR(drv_data->regmap))
+		return PTR_ERR(drv_data->regmap);
+
+	ret = regmap_read(drv_data->regmap, LLCC_COMMON_STATUS0,
+						&num_banks);
+	if (ret)
+		return ret;
+
+	num_banks &= LLCC_LB_CNT_MASK;
+	num_banks >>= LLCC_LB_CNT_SHIFT;
+	drv_data->num_banks = num_banks;
+
+	for (i = 0; i < sz; i++)
+		if (llcc_cfg[i].slice_id > drv_data->max_slices)
+			drv_data->max_slices = llcc_cfg[i].slice_id;
+
+	drv_data->offsets = devm_kcalloc(dev, num_banks, sizeof(u32),
+							GFP_KERNEL);
+	if (!drv_data->offsets)
+		return -ENOMEM;
+
+	for (i = 0; i < num_banks; i++)
+		drv_data->offsets[i] = i * BANK_OFFSET_STRIDE;
+
+	drv_data->bcast_off = num_banks * BANK_OFFSET_STRIDE;
+
+	drv_data->bitmap = devm_kcalloc(dev,
+	BITS_TO_LONGS(drv_data->max_slices), sizeof(unsigned long),
+						GFP_KERNEL);
+	if (!drv_data->bitmap)
+		return -ENOMEM;
+
+	drv_data->cfg = llcc_cfg;
+	drv_data->cfg_size = sz;
+	mutex_init(&drv_data->lock);
+	platform_set_drvdata(pdev, drv_data);
+
+	return qcom_llcc_cfg_program(pdev);
+}
+EXPORT_SYMBOL_GPL(qcom_llcc_probe);
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
new file mode 100644
index 000000000000..7e3b9c605ab2
--- /dev/null
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
+ *
+ */
+
+#include <linux/platform_device.h>
+#ifndef __LLCC_QCOM__
+#define __LLCC_QCOM__
+
+#define LLCC_CPUSS       1
+#define LLCC_VIDSC0      2
+#define LLCC_VIDSC1      3
+#define LLCC_ROTATOR     4
+#define LLCC_VOICE       5
+#define LLCC_AUDIO       6
+#define LLCC_MDMHPGRW    7
+#define LLCC_MDM         8
+#define LLCC_CMPT        10
+#define LLCC_GPUHTW      11
+#define LLCC_GPU         12
+#define LLCC_MMUHWT      13
+#define LLCC_CMPTDMA     15
+#define LLCC_DISP        16
+#define LLCC_VIDFW       17
+#define LLCC_MDMHPFX     20
+#define LLCC_MDMPNG      21
+#define LLCC_AUDHW       22
+
+/**
+ * llcc_slice_desc - Cache slice descriptor
+ * @slice_id: llcc slice id
+ * @slice_size: Size allocated for the llcc slice
+ */
+struct llcc_slice_desc {
+	u32 slice_id;
+	size_t slice_size;
+};
+
+/**
+ * llcc_slice_config - Data associated with the llcc slice
+ * @usecase_id: usecase id for which the llcc slice is used
+ * @slice_id: llcc slice id assigned to each slice
+ * @max_cap: maximum capacity of the llcc slice
+ * @priority: priority of the llcc slice
+ * @fixed_size: whether the llcc slice can grow beyond its size
+ * @bonus_ways: bonus ways associated with llcc slice
+ * @res_ways: reserved ways associated with llcc slice
+ * @cache_mode: mode of the llcc slice
+ * @probe_target_ways: Probe only reserved and bonus ways on a cache miss
+ * @dis_cap_alloc: Disable capacity based allocation
+ * @retain_on_pc: Retain through power collapse
+ * @activate_on_init: activate the slice on init
+ */
+struct llcc_slice_config {
+	u32 usecase_id;
+	u32 slice_id;
+	u32 max_cap;
+	u32 priority;
+	bool fixed_size;
+	u32 bonus_ways;
+	u32 res_ways;
+	u32 cache_mode;
+	u32 probe_target_ways;
+	bool dis_cap_alloc;
+	bool retain_on_pc;
+	bool activate_on_init;
+};
+
+/**
+ * llcc_drv_data - Data associated with the llcc driver
+ * @regmap: regmap associated with the llcc device
+ * @cfg: pointer to the data structure for slice configuration
+ * @lock: mutex associated with each slice
+ * @cfg_size: size of the config data table
+ * @max_slices: max slices as read from device tree
+ * @bcast_off: Offset of the broadcast bank
+ * @num_banks: Number of llcc banks
+ * @bitmap: Bit map to track the active slice ids
+ * @offsets: Pointer to the bank offsets array
+ */
+struct llcc_drv_data {
+	struct regmap *regmap;
+	const struct llcc_slice_config *cfg;
+	struct mutex lock;
+	u32 cfg_size;
+	u32 max_slices;
+	u32 bcast_off;
+	u32 num_banks;
+	unsigned long *bitmap;
+	u32 *offsets;
+};
+
+#if IS_ENABLED(CONFIG_QCOM_LLCC)
+/**
+ * llcc_slice_getd - get llcc slice descriptor
+ * @uid: usecase_id of the client
+ */
+struct llcc_slice_desc *llcc_slice_getd(u32 uid);
+
+/**
+ * llcc_slice_putd - llcc slice descritpor
+ * @desc: Pointer to llcc slice descriptor
+ */
+void llcc_slice_putd(struct llcc_slice_desc *desc);
+
+/**
+ * llcc_get_slice_id - get slice id
+ * @desc: Pointer to llcc slice descriptor
+ */
+int llcc_get_slice_id(struct llcc_slice_desc *desc);
+
+/**
+ * llcc_get_slice_size - llcc slice size
+ * @desc: Pointer to llcc slice descriptor
+ */
+size_t llcc_get_slice_size(struct llcc_slice_desc *desc);
+
+/**
+ * llcc_slice_activate - Activate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ */
+int llcc_slice_activate(struct llcc_slice_desc *desc);
+
+/**
+ * llcc_slice_deactivate - Deactivate the llcc slice
+ * @desc: Pointer to llcc slice descriptor
+ */
+int llcc_slice_deactivate(struct llcc_slice_desc *desc);
+
+/**
+ * qcom_llcc_probe - program the sct table
+ * @pdev: platform device pointer
+ * @table: soc sct table
+ * @sz: Size of the config table
+ */
+int qcom_llcc_probe(struct platform_device *pdev,
+		      const struct llcc_slice_config *table, u32 sz);
+#else
+static inline struct llcc_slice_desc *llcc_slice_getd(u32 uid)
+{
+	return NULL;
+}
+
+static inline void llcc_slice_putd(struct llcc_slice_desc *desc)
+{
+
+};
+
+static inline int llcc_get_slice_id(struct llcc_slice_desc *desc)
+{
+	return -EINVAL;
+}
+
+static inline size_t llcc_get_slice_size(struct llcc_slice_desc *desc)
+{
+	return 0;
+}
+static inline int llcc_slice_activate(struct llcc_slice_desc *desc)
+{
+	return -EINVAL;
+}
+
+static inline int llcc_slice_deactivate(struct llcc_slice_desc *desc)
+{
+	return -EINVAL;
+}
+static inline int qcom_llcc_probe(struct platform_device *pdev,
+		      const struct llcc_slice_config *table, u32 sz)
+{
+	return -ENODEV;
+}
+
+static inline int qcom_llcc_remove(struct platform_device *pdev)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif
-- 
cgit v1.2.3


From 658628e7ef78e875cfe13064387c1a7a287d6338 Mon Sep 17 00:00:00 2001
From: Lina Iyer <ilina@codeaurora.org>
Date: Wed, 20 Jun 2018 18:56:58 +0530
Subject: drivers: qcom: rpmh-rsc: add RPMH controller for QCOM SoCs

Add controller driver for QCOM SoCs that have hardware based shared
resource management. The hardware IP known as RSC (Resource State
Coordinator) houses multiple Direct Resource Voter (DRV) for different
execution levels. A DRV is a unique voter on the state of a shared
resource. A Trigger Control Set (TCS) is a bunch of slots that can house
multiple resource state requests, that when triggered will issue those
requests through an internal bus to the Resource Power Manager Hardened
(RPMH) blocks. These hardware blocks are capable of adjusting clocks,
voltages, etc. The resource state request from a DRV are aggregated
along with state requests from other processors in the SoC and the
aggregate value is applied on the resource.

Some important aspects of the RPMH communication -
- Requests are <addr, value> with some header information
- Multiple requests (upto 16) may be sent through a TCS, at a time
- Requests in a TCS are sent in sequence
- Requests may be fire-n-forget or completion (response expected)
- Multiple TCS from the same DRV may be triggered simultaneously
- Cannot send a request if another request for the same addr is in
  progress from the same DRV
- When all the requests from a TCS are complete, an IRQ is raised
- The IRQ handler needs to clear the TCS before it is available for
  reuse
- TCS configuration is specific to a DRV
- Platform drivers may use DRV from different RSCs to make requests

Resource state requests made when CPUs are active are called 'active'
state requests. Requests made when all the CPUs are powered down (idle
state) are called 'sleep' state requests. They are matched by a
corresponding 'wake' state requests which puts the resources back in to
previously requested active state before resuming any CPU. TCSes are
dedicated for each type of requests. Active mode TCSes (AMC) are used to
send requests immediately to the resource, while control TCS are used to
provide specific information to the controller. Sleep and Wake TCS send
sleep and wake requests, after and before the system halt respectively.

Signed-off-by: Lina Iyer <ilina@codeaurora.org>
Signed-off-by: Raju P.L.S.S.S.N <rplsssn@codeaurora.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/Kconfig                |  10 +
 drivers/soc/qcom/Makefile               |   1 +
 drivers/soc/qcom/rpmh-internal.h        |  69 +++++
 drivers/soc/qcom/rpmh-rsc.c             | 481 ++++++++++++++++++++++++++++++++
 include/dt-bindings/soc/qcom,rpmh-rsc.h |  14 +
 include/soc/qcom/tcs.h                  |  56 ++++
 6 files changed, 631 insertions(+)
 create mode 100644 drivers/soc/qcom/rpmh-internal.h
 create mode 100644 drivers/soc/qcom/rpmh-rsc.c
 create mode 100644 include/dt-bindings/soc/qcom,rpmh-rsc.h
 create mode 100644 include/soc/qcom/tcs.h

(limited to 'include')

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index d1a41852ae7a..ccbdb398fa63 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -91,6 +91,16 @@ config QCOM_RMTFS_MEM
 
 	  Say y here if you intend to boot the modem remoteproc.
 
+config QCOM_RPMH
+	bool "Qualcomm RPM-Hardened (RPMH) Communication"
+	depends on ARCH_QCOM && ARM64 && OF || COMPILE_TEST
+	help
+	  Support for communication with the hardened-RPM blocks in
+	  Qualcomm Technologies Inc (QTI) SoCs. RPMH communication uses an
+	  internal bus to transmit state requests for shared resources. A set
+	  of hardware components aggregate requests for these resources and
+	  help apply the aggregated state on the resource.
+
 config QCOM_SMEM
 	tristate "Qualcomm Shared Memory Manager (SMEM)"
 	depends on ARCH_QCOM
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 5921454c4ddd..b2faf9aeed3f 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_QCOM_PM)	+=	spm.o
 obj-$(CONFIG_QCOM_QMI_HELPERS)	+= qmi_helpers.o
 qmi_helpers-y	+= qmi_encdec.o qmi_interface.o
 obj-$(CONFIG_QCOM_RMTFS_MEM)	+= rmtfs_mem.o
+obj-$(CONFIG_QCOM_RPMH)	+=	rpmh-rsc.o
 obj-$(CONFIG_QCOM_SMD_RPM)	+= smd-rpm.o
 obj-$(CONFIG_QCOM_SMEM) +=	smem.o
 obj-$(CONFIG_QCOM_SMEM_STATE) += smem_state.o
diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
new file mode 100644
index 000000000000..cc29176f1303
--- /dev/null
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ */
+
+
+#ifndef __RPM_INTERNAL_H__
+#define __RPM_INTERNAL_H__
+
+#include <linux/bitmap.h>
+#include <soc/qcom/tcs.h>
+
+#define TCS_TYPE_NR			4
+#define MAX_CMDS_PER_TCS		16
+#define MAX_TCS_PER_TYPE		3
+#define MAX_TCS_NR			(MAX_TCS_PER_TYPE * TCS_TYPE_NR)
+
+struct rsc_drv;
+
+/**
+ * struct tcs_group: group of Trigger Command Sets (TCS) to send state requests
+ * to the controller
+ *
+ * @drv:       the controller
+ * @type:      type of the TCS in this group - active, sleep, wake
+ * @mask:      mask of the TCSes relative to all the TCSes in the RSC
+ * @offset:    start of the TCS group relative to the TCSes in the RSC
+ * @num_tcs:   number of TCSes in this type
+ * @ncpt:      number of commands in each TCS
+ * @lock:      lock for synchronizing this TCS writes
+ * @req:       requests that are sent from the TCS
+ */
+struct tcs_group {
+	struct rsc_drv *drv;
+	int type;
+	u32 mask;
+	u32 offset;
+	int num_tcs;
+	int ncpt;
+	spinlock_t lock;
+	const struct tcs_request *req[MAX_TCS_PER_TYPE];
+};
+
+/**
+ * struct rsc_drv: the Direct Resource Voter (DRV) of the
+ * Resource State Coordinator controller (RSC)
+ *
+ * @name:       controller identifier
+ * @tcs_base:   start address of the TCS registers in this controller
+ * @id:         instance id in the controller (Direct Resource Voter)
+ * @num_tcs:    number of TCSes in this DRV
+ * @tcs:        TCS groups
+ * @tcs_in_use: s/w state of the TCS
+ * @lock:       synchronize state of the controller
+ */
+struct rsc_drv {
+	const char *name;
+	void __iomem *tcs_base;
+	int id;
+	int num_tcs;
+	struct tcs_group tcs[TCS_TYPE_NR];
+	DECLARE_BITMAP(tcs_in_use, MAX_TCS_NR);
+	spinlock_t lock;
+};
+
+
+int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg);
+
+#endif /* __RPM_INTERNAL_H__ */
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
new file mode 100644
index 000000000000..c5e0793cdfb7
--- /dev/null
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ */
+
+#define pr_fmt(fmt) "%s " fmt, KBUILD_MODNAME
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <soc/qcom/tcs.h>
+#include <dt-bindings/soc/qcom,rpmh-rsc.h>
+
+#include "rpmh-internal.h"
+
+#define RSC_DRV_TCS_OFFSET		672
+#define RSC_DRV_CMD_OFFSET		20
+
+/* DRV Configuration Information Register */
+#define DRV_PRNT_CHLD_CONFIG		0x0C
+#define DRV_NUM_TCS_MASK		0x3F
+#define DRV_NUM_TCS_SHIFT		6
+#define DRV_NCPT_MASK			0x1F
+#define DRV_NCPT_SHIFT			27
+
+/* Register offsets */
+#define RSC_DRV_IRQ_ENABLE		0x00
+#define RSC_DRV_IRQ_STATUS		0x04
+#define RSC_DRV_IRQ_CLEAR		0x08
+#define RSC_DRV_CMD_WAIT_FOR_CMPL	0x10
+#define RSC_DRV_CONTROL			0x14
+#define RSC_DRV_STATUS			0x18
+#define RSC_DRV_CMD_ENABLE		0x1C
+#define RSC_DRV_CMD_MSGID		0x30
+#define RSC_DRV_CMD_ADDR		0x34
+#define RSC_DRV_CMD_DATA		0x38
+#define RSC_DRV_CMD_STATUS		0x3C
+#define RSC_DRV_CMD_RESP_DATA		0x40
+
+#define TCS_AMC_MODE_ENABLE		BIT(16)
+#define TCS_AMC_MODE_TRIGGER		BIT(24)
+
+/* TCS CMD register bit mask */
+#define CMD_MSGID_LEN			8
+#define CMD_MSGID_RESP_REQ		BIT(8)
+#define CMD_MSGID_WRITE			BIT(16)
+#define CMD_STATUS_ISSUED		BIT(8)
+#define CMD_STATUS_COMPL		BIT(16)
+
+static u32 read_tcs_reg(struct rsc_drv *drv, int reg, int tcs_id, int cmd_id)
+{
+	return readl_relaxed(drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * tcs_id +
+			     RSC_DRV_CMD_OFFSET * cmd_id);
+}
+
+static void write_tcs_cmd(struct rsc_drv *drv, int reg, int tcs_id, int cmd_id,
+			  u32 data)
+{
+	writel_relaxed(data, drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * tcs_id +
+		       RSC_DRV_CMD_OFFSET * cmd_id);
+}
+
+static void write_tcs_reg(struct rsc_drv *drv, int reg, int tcs_id, u32 data)
+{
+	writel_relaxed(data, drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * tcs_id);
+}
+
+static void write_tcs_reg_sync(struct rsc_drv *drv, int reg, int tcs_id,
+			       u32 data)
+{
+	writel(data, drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * tcs_id);
+	for (;;) {
+		if (data == readl(drv->tcs_base + reg +
+				  RSC_DRV_TCS_OFFSET * tcs_id))
+			break;
+		udelay(1);
+	}
+}
+
+static bool tcs_is_free(struct rsc_drv *drv, int tcs_id)
+{
+	return !test_bit(tcs_id, drv->tcs_in_use) &&
+	       read_tcs_reg(drv, RSC_DRV_STATUS, tcs_id, 0);
+}
+
+static struct tcs_group *get_tcs_of_type(struct rsc_drv *drv, int type)
+{
+	return &drv->tcs[type];
+}
+
+static struct tcs_group *get_tcs_for_msg(struct rsc_drv *drv,
+					 const struct tcs_request *msg)
+{
+	int type;
+
+	switch (msg->state) {
+	case RPMH_ACTIVE_ONLY_STATE:
+		type = ACTIVE_TCS;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	return get_tcs_of_type(drv, type);
+}
+
+static const struct tcs_request *get_req_from_tcs(struct rsc_drv *drv,
+						  int tcs_id)
+{
+	struct tcs_group *tcs;
+	int i;
+
+	for (i = 0; i < drv->num_tcs; i++) {
+		tcs = &drv->tcs[i];
+		if (tcs->mask & BIT(tcs_id))
+			return tcs->req[tcs_id - tcs->offset];
+	}
+
+	return NULL;
+}
+
+/**
+ * tcs_tx_done: TX Done interrupt handler
+ */
+static irqreturn_t tcs_tx_done(int irq, void *p)
+{
+	struct rsc_drv *drv = p;
+	int i, j;
+	unsigned long irq_status;
+	const struct tcs_request *req;
+	struct tcs_cmd *cmd;
+
+	irq_status = read_tcs_reg(drv, RSC_DRV_IRQ_STATUS, 0, 0);
+
+	for_each_set_bit(i, &irq_status, BITS_PER_LONG) {
+		req = get_req_from_tcs(drv, i);
+		if (!req) {
+			WARN_ON(1);
+			goto skip;
+		}
+
+		for (j = 0; j < req->num_cmds; j++) {
+			u32 sts;
+
+			cmd = &req->cmds[j];
+			sts = read_tcs_reg(drv, RSC_DRV_CMD_STATUS, i, j);
+			if (!(sts & CMD_STATUS_ISSUED) ||
+			   ((req->wait_for_compl || cmd->wait) &&
+			   !(sts & CMD_STATUS_COMPL))) {
+				pr_err("Incomplete request: %s: addr=%#x data=%#x",
+				       drv->name, cmd->addr, cmd->data);
+			}
+		}
+skip:
+		/* Reclaim the TCS */
+		write_tcs_reg(drv, RSC_DRV_CMD_ENABLE, i, 0);
+		write_tcs_reg(drv, RSC_DRV_IRQ_CLEAR, 0, BIT(i));
+		spin_lock(&drv->lock);
+		clear_bit(i, drv->tcs_in_use);
+		spin_unlock(&drv->lock);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void __tcs_buffer_write(struct rsc_drv *drv, int tcs_id, int cmd_id,
+			       const struct tcs_request *msg)
+{
+	u32 msgid, cmd_msgid;
+	u32 cmd_enable = 0;
+	u32 cmd_complete;
+	struct tcs_cmd *cmd;
+	int i, j;
+
+	cmd_msgid = CMD_MSGID_LEN;
+	cmd_msgid |= msg->wait_for_compl ? CMD_MSGID_RESP_REQ : 0;
+	cmd_msgid |= CMD_MSGID_WRITE;
+
+	cmd_complete = read_tcs_reg(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, tcs_id, 0);
+
+	for (i = 0, j = cmd_id; i < msg->num_cmds; i++, j++) {
+		cmd = &msg->cmds[i];
+		cmd_enable |= BIT(j);
+		cmd_complete |= cmd->wait << j;
+		msgid = cmd_msgid;
+		msgid |= cmd->wait ? CMD_MSGID_RESP_REQ : 0;
+		write_tcs_cmd(drv, RSC_DRV_CMD_MSGID, tcs_id, j, msgid);
+		write_tcs_cmd(drv, RSC_DRV_CMD_ADDR, tcs_id, j, cmd->addr);
+		write_tcs_cmd(drv, RSC_DRV_CMD_DATA, tcs_id, j, cmd->data);
+	}
+
+	write_tcs_reg(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, tcs_id, cmd_complete);
+	cmd_enable |= read_tcs_reg(drv, RSC_DRV_CMD_ENABLE, tcs_id, 0);
+	write_tcs_reg(drv, RSC_DRV_CMD_ENABLE, tcs_id, cmd_enable);
+}
+
+static void __tcs_trigger(struct rsc_drv *drv, int tcs_id)
+{
+	u32 enable;
+
+	/*
+	 * HW req: Clear the DRV_CONTROL and enable TCS again
+	 * While clearing ensure that the AMC mode trigger is cleared
+	 * and then the mode enable is cleared.
+	 */
+	enable = read_tcs_reg(drv, RSC_DRV_CONTROL, tcs_id, 0);
+	enable &= ~TCS_AMC_MODE_TRIGGER;
+	write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
+	enable &= ~TCS_AMC_MODE_ENABLE;
+	write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
+
+	/* Enable the AMC mode on the TCS and then trigger the TCS */
+	enable = TCS_AMC_MODE_ENABLE;
+	write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
+	enable |= TCS_AMC_MODE_TRIGGER;
+	write_tcs_reg_sync(drv, RSC_DRV_CONTROL, tcs_id, enable);
+}
+
+static int check_for_req_inflight(struct rsc_drv *drv, struct tcs_group *tcs,
+				  const struct tcs_request *msg)
+{
+	unsigned long curr_enabled;
+	u32 addr;
+	int i, j, k;
+	int tcs_id = tcs->offset;
+
+	for (i = 0; i < tcs->num_tcs; i++, tcs_id++) {
+		if (tcs_is_free(drv, tcs_id))
+			continue;
+
+		curr_enabled = read_tcs_reg(drv, RSC_DRV_CMD_ENABLE, tcs_id, 0);
+
+		for_each_set_bit(j, &curr_enabled, MAX_CMDS_PER_TCS) {
+			addr = read_tcs_reg(drv, RSC_DRV_CMD_ADDR, tcs_id, j);
+			for (k = 0; k < msg->num_cmds; k++) {
+				if (addr == msg->cmds[k].addr)
+					return -EBUSY;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int find_free_tcs(struct tcs_group *tcs)
+{
+	int i;
+
+	for (i = 0; i < tcs->num_tcs; i++) {
+		if (tcs_is_free(tcs->drv, tcs->offset + i))
+			return tcs->offset + i;
+	}
+
+	return -EBUSY;
+}
+
+static int tcs_write(struct rsc_drv *drv, const struct tcs_request *msg)
+{
+	struct tcs_group *tcs;
+	int tcs_id;
+	unsigned long flags;
+	int ret;
+
+	tcs = get_tcs_for_msg(drv, msg);
+	if (IS_ERR(tcs))
+		return PTR_ERR(tcs);
+
+	spin_lock_irqsave(&tcs->lock, flags);
+	spin_lock(&drv->lock);
+	/*
+	 * The h/w does not like if we send a request to the same address,
+	 * when one is already in-flight or being processed.
+	 */
+	ret = check_for_req_inflight(drv, tcs, msg);
+	if (ret) {
+		spin_unlock(&drv->lock);
+		goto done_write;
+	}
+
+	tcs_id = find_free_tcs(tcs);
+	if (tcs_id < 0) {
+		ret = tcs_id;
+		spin_unlock(&drv->lock);
+		goto done_write;
+	}
+
+	tcs->req[tcs_id - tcs->offset] = msg;
+	set_bit(tcs_id, drv->tcs_in_use);
+	spin_unlock(&drv->lock);
+
+	__tcs_buffer_write(drv, tcs_id, 0, msg);
+	__tcs_trigger(drv, tcs_id);
+
+done_write:
+	spin_unlock_irqrestore(&tcs->lock, flags);
+	return ret;
+}
+
+/**
+ * rpmh_rsc_send_data: Validate the incoming message and write to the
+ * appropriate TCS block.
+ *
+ * @drv: the controller
+ * @msg: the data to be sent
+ *
+ * Return: 0 on success, -EINVAL on error.
+ * Note: This call blocks until a valid data is written to the TCS.
+ */
+int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg)
+{
+	int ret;
+
+	if (!msg || !msg->cmds || !msg->num_cmds ||
+	    msg->num_cmds > MAX_RPMH_PAYLOAD) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	do {
+		ret = tcs_write(drv, msg);
+		if (ret == -EBUSY) {
+			pr_info_ratelimited("TCS Busy, retrying RPMH message send: addr=%#x\n",
+					    msg->cmds[0].addr);
+			udelay(10);
+		}
+	} while (ret == -EBUSY);
+
+	return ret;
+}
+
+static int rpmh_probe_tcs_config(struct platform_device *pdev,
+				 struct rsc_drv *drv)
+{
+	struct tcs_type_config {
+		u32 type;
+		u32 n;
+	} tcs_cfg[TCS_TYPE_NR] = { { 0 } };
+	struct device_node *dn = pdev->dev.of_node;
+	u32 config, max_tcs, ncpt, offset;
+	int i, ret, n, st = 0;
+	struct tcs_group *tcs;
+	struct resource *res;
+	void __iomem *base;
+	char drv_id[10] = {0};
+
+	snprintf(drv_id, ARRAY_SIZE(drv_id), "drv-%d", drv->id);
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, drv_id);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	ret = of_property_read_u32(dn, "qcom,tcs-offset", &offset);
+	if (ret)
+		return ret;
+	drv->tcs_base = base + offset;
+
+	config = readl_relaxed(base + DRV_PRNT_CHLD_CONFIG);
+
+	max_tcs = config;
+	max_tcs &= DRV_NUM_TCS_MASK << (DRV_NUM_TCS_SHIFT * drv->id);
+	max_tcs = max_tcs >> (DRV_NUM_TCS_SHIFT * drv->id);
+
+	ncpt = config & (DRV_NCPT_MASK << DRV_NCPT_SHIFT);
+	ncpt = ncpt >> DRV_NCPT_SHIFT;
+
+	n = of_property_count_u32_elems(dn, "qcom,tcs-config");
+	if (n != 2 * TCS_TYPE_NR)
+		return -EINVAL;
+
+	for (i = 0; i < TCS_TYPE_NR; i++) {
+		ret = of_property_read_u32_index(dn, "qcom,tcs-config",
+						 i * 2, &tcs_cfg[i].type);
+		if (ret)
+			return ret;
+		if (tcs_cfg[i].type >= TCS_TYPE_NR)
+			return -EINVAL;
+
+		ret = of_property_read_u32_index(dn, "qcom,tcs-config",
+						 i * 2 + 1, &tcs_cfg[i].n);
+		if (ret)
+			return ret;
+		if (tcs_cfg[i].n > MAX_TCS_PER_TYPE)
+			return -EINVAL;
+	}
+
+	for (i = 0; i < TCS_TYPE_NR; i++) {
+		tcs = &drv->tcs[tcs_cfg[i].type];
+		if (tcs->drv)
+			return -EINVAL;
+		tcs->drv = drv;
+		tcs->type = tcs_cfg[i].type;
+		tcs->num_tcs = tcs_cfg[i].n;
+		tcs->ncpt = ncpt;
+		spin_lock_init(&tcs->lock);
+
+		if (!tcs->num_tcs || tcs->type == CONTROL_TCS)
+			continue;
+
+		if (st + tcs->num_tcs > max_tcs ||
+		    st + tcs->num_tcs >= BITS_PER_BYTE * sizeof(tcs->mask))
+			return -EINVAL;
+
+		tcs->mask = ((1 << tcs->num_tcs) - 1) << st;
+		tcs->offset = st;
+		st += tcs->num_tcs;
+	}
+
+	drv->num_tcs = st;
+
+	return 0;
+}
+
+static int rpmh_rsc_probe(struct platform_device *pdev)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	struct rsc_drv *drv;
+	int ret, irq;
+
+	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
+	if (!drv)
+		return -ENOMEM;
+
+	ret = of_property_read_u32(dn, "qcom,drv-id", &drv->id);
+	if (ret)
+		return ret;
+
+	drv->name = of_get_property(dn, "label", NULL);
+	if (!drv->name)
+		drv->name = dev_name(&pdev->dev);
+
+	ret = rpmh_probe_tcs_config(pdev, drv);
+	if (ret)
+		return ret;
+
+	spin_lock_init(&drv->lock);
+	bitmap_zero(drv->tcs_in_use, MAX_TCS_NR);
+
+	irq = platform_get_irq(pdev, drv->id);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(&pdev->dev, irq, tcs_tx_done,
+			       IRQF_TRIGGER_HIGH | IRQF_NO_SUSPEND,
+			       drv->name, drv);
+	if (ret)
+		return ret;
+
+	/* Enable the active TCS to send requests immediately */
+	write_tcs_reg(drv, RSC_DRV_IRQ_ENABLE, 0, drv->tcs[ACTIVE_TCS].mask);
+
+	return devm_of_platform_populate(&pdev->dev);
+}
+
+static const struct of_device_id rpmh_drv_match[] = {
+	{ .compatible = "qcom,rpmh-rsc", },
+	{ }
+};
+
+static struct platform_driver rpmh_driver = {
+	.probe = rpmh_rsc_probe,
+	.driver = {
+		  .name = "rpmh",
+		  .of_match_table = rpmh_drv_match,
+	},
+};
+
+static int __init rpmh_driver_init(void)
+{
+	return platform_driver_register(&rpmh_driver);
+}
+arch_initcall(rpmh_driver_init);
diff --git a/include/dt-bindings/soc/qcom,rpmh-rsc.h b/include/dt-bindings/soc/qcom,rpmh-rsc.h
new file mode 100644
index 000000000000..868f998ea998
--- /dev/null
+++ b/include/dt-bindings/soc/qcom,rpmh-rsc.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __DT_QCOM_RPMH_RSC_H__
+#define __DT_QCOM_RPMH_RSC_H__
+
+#define SLEEP_TCS	0
+#define WAKE_TCS	1
+#define ACTIVE_TCS	2
+#define CONTROL_TCS	3
+
+#endif /* __DT_QCOM_RPMH_RSC_H__ */
diff --git a/include/soc/qcom/tcs.h b/include/soc/qcom/tcs.h
new file mode 100644
index 000000000000..262876a59e86
--- /dev/null
+++ b/include/soc/qcom/tcs.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __SOC_QCOM_TCS_H__
+#define __SOC_QCOM_TCS_H__
+
+#define MAX_RPMH_PAYLOAD	16
+
+/**
+ * rpmh_state: state for the request
+ *
+ * RPMH_SLEEP_STATE:       State of the resource when the processor subsystem
+ *                         is powered down. There is no client using the
+ *                         resource actively.
+ * RPMH_WAKE_ONLY_STATE:   Resume resource state to the value previously
+ *                         requested before the processor was powered down.
+ * RPMH_ACTIVE_ONLY_STATE: Active or AMC mode requests. Resource state
+ *                         is aggregated immediately.
+ */
+enum rpmh_state {
+	RPMH_SLEEP_STATE,
+	RPMH_WAKE_ONLY_STATE,
+	RPMH_ACTIVE_ONLY_STATE,
+};
+
+/**
+ * struct tcs_cmd: an individual request to RPMH.
+ *
+ * @addr: the address of the resource slv_id:18:16 | offset:0:15
+ * @data: the resource state request
+ * @wait: wait for this request to be complete before sending the next
+ */
+struct tcs_cmd {
+	u32 addr;
+	u32 data;
+	u32 wait;
+};
+
+/**
+ * struct tcs_request: A set of tcs_cmds sent together in a TCS
+ *
+ * @state:          state for the request.
+ * @wait_for_compl: wait until we get a response from the h/w accelerator
+ * @num_cmds:       the number of @cmds in this request
+ * @cmds:           an array of tcs_cmds
+ */
+struct tcs_request {
+	enum rpmh_state state;
+	u32 wait_for_compl;
+	u32 num_cmds;
+	struct tcs_cmd *cmds;
+};
+
+#endif /* __SOC_QCOM_TCS_H__ */
-- 
cgit v1.2.3


From c1038456b02b86cc4445441a8d33c5aca0ac103e Mon Sep 17 00:00:00 2001
From: Lina Iyer <ilina@codeaurora.org>
Date: Wed, 20 Jun 2018 18:57:01 +0530
Subject: drivers: qcom: rpmh: add RPMH helper functions

Sending RPMH requests and waiting for response from the controller
through a callback is common functionality across all platform drivers.
To simplify drivers, add a library functions to create RPMH client and
send resource state requests.

rpmh_write() is a synchronous blocking call that can be used to send
active state requests.

Signed-off-by: Lina Iyer <ilina@codeaurora.org>
Signed-off-by: Raju P.L.S.S.S.N <rplsssn@codeaurora.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/Makefile        |   4 +-
 drivers/soc/qcom/rpmh-internal.h |  31 ++++++++++-
 drivers/soc/qcom/rpmh-rsc.c      |   6 +-
 drivers/soc/qcom/rpmh.c          | 116 +++++++++++++++++++++++++++++++++++++++
 include/soc/qcom/rpmh.h          |  25 +++++++++
 5 files changed, 179 insertions(+), 3 deletions(-)
 create mode 100644 drivers/soc/qcom/rpmh.c
 create mode 100644 include/soc/qcom/rpmh.h

(limited to 'include')

diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index 036ecc45cbbd..f25b54cd6cf8 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -9,7 +9,9 @@ obj-$(CONFIG_QCOM_PM)	+=	spm.o
 obj-$(CONFIG_QCOM_QMI_HELPERS)	+= qmi_helpers.o
 qmi_helpers-y	+= qmi_encdec.o qmi_interface.o
 obj-$(CONFIG_QCOM_RMTFS_MEM)	+= rmtfs_mem.o
-obj-$(CONFIG_QCOM_RPMH)	+=	rpmh-rsc.o
+obj-$(CONFIG_QCOM_RPMH)		+= qcom_rpmh.o
+qcom_rpmh-y			+= rpmh-rsc.o
+qcom_rpmh-y			+= rpmh.o
 obj-$(CONFIG_QCOM_SMD_RPM)	+= smd-rpm.o
 obj-$(CONFIG_QCOM_SMEM) +=	smem.o
 obj-$(CONFIG_QCOM_SMEM_STATE) += smem_state.o
diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
index cc29176f1303..1e687f719301 100644
--- a/drivers/soc/qcom/rpmh-internal.h
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -41,6 +41,32 @@ struct tcs_group {
 	const struct tcs_request *req[MAX_TCS_PER_TYPE];
 };
 
+/**
+ * struct rpmh_request: the message to be sent to rpmh-rsc
+ *
+ * @msg: the request
+ * @cmd: the payload that will be part of the @msg
+ * @completion: triggered when request is done
+ * @dev: the device making the request
+ * @err: err return from the controller
+ */
+struct rpmh_request {
+	struct tcs_request msg;
+	struct tcs_cmd cmd[MAX_RPMH_PAYLOAD];
+	struct completion *completion;
+	const struct device *dev;
+	int err;
+};
+
+/**
+ * struct rpmh_ctrlr: our representation of the controller
+ *
+ * @drv: the controller instance
+ */
+struct rpmh_ctrlr {
+	struct rsc_drv *drv;
+};
+
 /**
  * struct rsc_drv: the Direct Resource Voter (DRV) of the
  * Resource State Coordinator controller (RSC)
@@ -52,6 +78,7 @@ struct tcs_group {
  * @tcs:        TCS groups
  * @tcs_in_use: s/w state of the TCS
  * @lock:       synchronize state of the controller
+ * @client:     handle to the DRV's client.
  */
 struct rsc_drv {
 	const char *name;
@@ -61,9 +88,11 @@ struct rsc_drv {
 	struct tcs_group tcs[TCS_TYPE_NR];
 	DECLARE_BITMAP(tcs_in_use, MAX_TCS_NR);
 	spinlock_t lock;
+	struct rpmh_ctrlr client;
 };
 
-
 int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg);
 
+void rpmh_tx_done(const struct tcs_request *msg, int r);
+
 #endif /* __RPM_INTERNAL_H__ */
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index 89d41cdd8d71..91f8d60d17cc 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -138,7 +138,7 @@ static const struct tcs_request *get_req_from_tcs(struct rsc_drv *drv,
 static irqreturn_t tcs_tx_done(int irq, void *p)
 {
 	struct rsc_drv *drv = p;
-	int i, j, err;
+	int i, j, err = 0;
 	unsigned long irq_status;
 	const struct tcs_request *req;
 	struct tcs_cmd *cmd;
@@ -175,6 +175,8 @@ skip:
 		spin_lock(&drv->lock);
 		clear_bit(i, drv->tcs_in_use);
 		spin_unlock(&drv->lock);
+		if (req)
+			rpmh_tx_done(req, err);
 	}
 
 	return IRQ_HANDLED;
@@ -467,6 +469,8 @@ static int rpmh_rsc_probe(struct platform_device *pdev)
 	/* Enable the active TCS to send requests immediately */
 	write_tcs_reg(drv, RSC_DRV_IRQ_ENABLE, 0, drv->tcs[ACTIVE_TCS].mask);
 
+	dev_set_drvdata(&pdev->dev, drv);
+
 	return devm_of_platform_populate(&pdev->dev);
 }
 
diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
new file mode 100644
index 000000000000..5e022cba8ab5
--- /dev/null
+++ b/drivers/soc/qcom/rpmh.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include <soc/qcom/rpmh.h>
+
+#include "rpmh-internal.h"
+
+#define RPMH_TIMEOUT_MS			msecs_to_jiffies(10000)
+
+#define DEFINE_RPMH_MSG_ONSTACK(dev, s, q, name)	\
+	struct rpmh_request name = {			\
+		.msg = {				\
+			.state = s,			\
+			.cmds = name.cmd,		\
+			.num_cmds = 0,			\
+			.wait_for_compl = true,		\
+		},					\
+		.cmd = { { 0 } },			\
+		.completion = q,			\
+		.dev = dev,				\
+	}
+
+#define ctrlr_to_drv(ctrlr) container_of(ctrlr, struct rsc_drv, client)
+
+static struct rpmh_ctrlr *get_rpmh_ctrlr(const struct device *dev)
+{
+	struct rsc_drv *drv = dev_get_drvdata(dev->parent);
+
+	return &drv->client;
+}
+
+void rpmh_tx_done(const struct tcs_request *msg, int r)
+{
+	struct rpmh_request *rpm_msg = container_of(msg, struct rpmh_request,
+						    msg);
+	struct completion *compl = rpm_msg->completion;
+
+	rpm_msg->err = r;
+
+	if (r)
+		dev_err(rpm_msg->dev, "RPMH TX fail in msg addr=%#x, err=%d\n",
+			rpm_msg->msg.cmds[0].addr, r);
+
+	/* Signal the blocking thread we are done */
+	if (compl)
+		complete(compl);
+}
+
+/**
+ * __rpmh_write: send the RPMH request
+ *
+ * @dev: The device making the request
+ * @state: Active/Sleep request type
+ * @rpm_msg: The data that needs to be sent (cmds).
+ */
+static int __rpmh_write(const struct device *dev, enum rpmh_state state,
+			struct rpmh_request *rpm_msg)
+{
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
+
+	rpm_msg->msg.state = state;
+
+	if (state != RPMH_ACTIVE_ONLY_STATE)
+		return -EINVAL;
+
+	WARN_ON(irqs_disabled());
+
+	return rpmh_rsc_send_data(ctrlr_to_drv(ctrlr), &rpm_msg->msg);
+}
+
+/**
+ * rpmh_write: Write a set of RPMH commands and block until response
+ *
+ * @rc: The RPMH handle got from rpmh_get_client
+ * @state: Active/sleep set
+ * @cmd: The payload data
+ * @n: The number of elements in @cmd
+ *
+ * May sleep. Do not call from atomic contexts.
+ */
+int rpmh_write(const struct device *dev, enum rpmh_state state,
+	       const struct tcs_cmd *cmd, u32 n)
+{
+	DECLARE_COMPLETION_ONSTACK(compl);
+	DEFINE_RPMH_MSG_ONSTACK(dev, state, &compl, rpm_msg);
+	int ret;
+
+	if (!cmd || !n || n > MAX_RPMH_PAYLOAD)
+		return -EINVAL;
+
+	memcpy(rpm_msg.cmd, cmd, n * sizeof(*cmd));
+	rpm_msg.msg.num_cmds = n;
+
+	ret = __rpmh_write(dev, state, &rpm_msg);
+	if (ret)
+		return ret;
+
+	ret = wait_for_completion_timeout(&compl, RPMH_TIMEOUT_MS);
+	WARN_ON(!ret);
+	return (ret > 0) ? 0 : -ETIMEDOUT;
+}
+EXPORT_SYMBOL(rpmh_write);
diff --git a/include/soc/qcom/rpmh.h b/include/soc/qcom/rpmh.h
new file mode 100644
index 000000000000..c1d0f902bd71
--- /dev/null
+++ b/include/soc/qcom/rpmh.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __SOC_QCOM_RPMH_H__
+#define __SOC_QCOM_RPMH_H__
+
+#include <soc/qcom/tcs.h>
+#include <linux/platform_device.h>
+
+
+#if IS_ENABLED(CONFIG_QCOM_RPMH)
+int rpmh_write(const struct device *dev, enum rpmh_state state,
+	       const struct tcs_cmd *cmd, u32 n);
+
+#else
+
+static inline int rpmh_write(const struct device *dev, enum rpmh_state state,
+			     const struct tcs_cmd *cmd, u32 n)
+{ return -ENODEV; }
+
+#endif /* CONFIG_QCOM_RPMH */
+
+#endif /* __SOC_QCOM_RPMH_H__ */
-- 
cgit v1.2.3


From 600513dfeef33cb05c694d1b13d319b9e8cde536 Mon Sep 17 00:00:00 2001
From: Lina Iyer <ilina@codeaurora.org>
Date: Wed, 20 Jun 2018 18:57:04 +0530
Subject: drivers: qcom: rpmh: cache sleep/wake state requests

Active state requests are sent immediately to the RSC controller, while
sleep and wake state requests are cached in this driver to avoid taxing
the RSC controller repeatedly. The cached values will be sent to the
controller when the rpmh_flush() is called.

Generally, flushing is a system PM activity and may be called from the
system PM drivers when the system is entering suspend or deeper sleep
modes during cpuidle.

Also allow invalidating the cached requests, so they may be re-populated
again.

Signed-off-by: Lina Iyer <ilina@codeaurora.org>
[rplsssn: remove unneeded semicolon, address line over 80chars error]
Signed-off-by: Raju P.L.S.S.S.N <rplsssn@codeaurora.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/rpmh-internal.h |   8 +-
 drivers/soc/qcom/rpmh-rsc.c      |   3 +
 drivers/soc/qcom/rpmh.c          | 201 ++++++++++++++++++++++++++++++++++++++-
 include/soc/qcom/rpmh.h          |  11 +++
 4 files changed, 216 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
index af4da1cb6558..101145c9db1c 100644
--- a/drivers/soc/qcom/rpmh-internal.h
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -66,10 +66,14 @@ struct rpmh_request {
 /**
  * struct rpmh_ctrlr: our representation of the controller
  *
- * @drv: the controller instance
+ * @cache: the list of cached requests
+ * @cache_lock: synchronize access to the cache data
+ * @dirty: was the cache updated since flush
  */
 struct rpmh_ctrlr {
-	struct rsc_drv *drv;
+	struct list_head cache;
+	spinlock_t cache_lock;
+	bool dirty;
 };
 
 /**
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index fed8459c0ef6..5d0dd0581904 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -634,6 +634,9 @@ static int rpmh_rsc_probe(struct platform_device *pdev)
 	/* Enable the active TCS to send requests immediately */
 	write_tcs_reg(drv, RSC_DRV_IRQ_ENABLE, 0, drv->tcs[ACTIVE_TCS].mask);
 
+	spin_lock_init(&drv->client.cache_lock);
+	INIT_LIST_HEAD(&drv->client.cache);
+
 	dev_set_drvdata(&pdev->dev, drv);
 
 	return devm_of_platform_populate(&pdev->dev);
diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
index 5e022cba8ab5..c355ba13e73f 100644
--- a/drivers/soc/qcom/rpmh.c
+++ b/drivers/soc/qcom/rpmh.c
@@ -8,10 +8,12 @@
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 
@@ -36,6 +38,21 @@
 
 #define ctrlr_to_drv(ctrlr) container_of(ctrlr, struct rsc_drv, client)
 
+/**
+ * struct cache_req: the request object for caching
+ *
+ * @addr: the address of the resource
+ * @sleep_val: the sleep vote
+ * @wake_val: the wake vote
+ * @list: linked list obj
+ */
+struct cache_req {
+	u32 addr;
+	u32 sleep_val;
+	u32 wake_val;
+	struct list_head list;
+};
+
 static struct rpmh_ctrlr *get_rpmh_ctrlr(const struct device *dev)
 {
 	struct rsc_drv *drv = dev_get_drvdata(dev->parent);
@@ -60,26 +77,107 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
 		complete(compl);
 }
 
+static struct cache_req *__find_req(struct rpmh_ctrlr *ctrlr, u32 addr)
+{
+	struct cache_req *p, *req = NULL;
+
+	list_for_each_entry(p, &ctrlr->cache, list) {
+		if (p->addr == addr) {
+			req = p;
+			break;
+		}
+	}
+
+	return req;
+}
+
+static struct cache_req *cache_rpm_request(struct rpmh_ctrlr *ctrlr,
+					   enum rpmh_state state,
+					   struct tcs_cmd *cmd)
+{
+	struct cache_req *req;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctrlr->cache_lock, flags);
+	req = __find_req(ctrlr, cmd->addr);
+	if (req)
+		goto existing;
+
+	req = kzalloc(sizeof(*req), GFP_ATOMIC);
+	if (!req) {
+		req = ERR_PTR(-ENOMEM);
+		goto unlock;
+	}
+
+	req->addr = cmd->addr;
+	req->sleep_val = req->wake_val = UINT_MAX;
+	INIT_LIST_HEAD(&req->list);
+	list_add_tail(&req->list, &ctrlr->cache);
+
+existing:
+	switch (state) {
+	case RPMH_ACTIVE_ONLY_STATE:
+		if (req->sleep_val != UINT_MAX)
+			req->wake_val = cmd->data;
+		break;
+	case RPMH_WAKE_ONLY_STATE:
+		req->wake_val = cmd->data;
+		break;
+	case RPMH_SLEEP_STATE:
+		req->sleep_val = cmd->data;
+		break;
+	default:
+		break;
+	}
+
+	ctrlr->dirty = true;
+unlock:
+	spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
+
+	return req;
+}
+
 /**
- * __rpmh_write: send the RPMH request
+ * __rpmh_write: Cache and send the RPMH request
  *
  * @dev: The device making the request
  * @state: Active/Sleep request type
  * @rpm_msg: The data that needs to be sent (cmds).
+ *
+ * Cache the RPMH request and send if the state is ACTIVE_ONLY.
+ * SLEEP/WAKE_ONLY requests are not sent to the controller at
+ * this time. Use rpmh_flush() to send them to the controller.
  */
 static int __rpmh_write(const struct device *dev, enum rpmh_state state,
 			struct rpmh_request *rpm_msg)
 {
 	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
+	int ret = -EINVAL;
+	struct cache_req *req;
+	int i;
 
 	rpm_msg->msg.state = state;
 
-	if (state != RPMH_ACTIVE_ONLY_STATE)
-		return -EINVAL;
+	/* Cache the request in our store and link the payload */
+	for (i = 0; i < rpm_msg->msg.num_cmds; i++) {
+		req = cache_rpm_request(ctrlr, state, &rpm_msg->msg.cmds[i]);
+		if (IS_ERR(req))
+			return PTR_ERR(req);
+	}
+
+	rpm_msg->msg.state = state;
 
-	WARN_ON(irqs_disabled());
+	if (state == RPMH_ACTIVE_ONLY_STATE) {
+		WARN_ON(irqs_disabled());
+		ret = rpmh_rsc_send_data(ctrlr_to_drv(ctrlr), &rpm_msg->msg);
+	} else {
+		ret = rpmh_rsc_write_ctrl_data(ctrlr_to_drv(ctrlr),
+				&rpm_msg->msg);
+		/* Clean up our call by spoofing tx_done */
+		rpmh_tx_done(&rpm_msg->msg, ret);
+	}
 
-	return rpmh_rsc_send_data(ctrlr_to_drv(ctrlr), &rpm_msg->msg);
+	return ret;
 }
 
 /**
@@ -114,3 +212,96 @@ int rpmh_write(const struct device *dev, enum rpmh_state state,
 	return (ret > 0) ? 0 : -ETIMEDOUT;
 }
 EXPORT_SYMBOL(rpmh_write);
+
+static int is_req_valid(struct cache_req *req)
+{
+	return (req->sleep_val != UINT_MAX &&
+		req->wake_val != UINT_MAX &&
+		req->sleep_val != req->wake_val);
+}
+
+static int send_single(const struct device *dev, enum rpmh_state state,
+		       u32 addr, u32 data)
+{
+	DEFINE_RPMH_MSG_ONSTACK(dev, state, NULL, rpm_msg);
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
+
+	/* Wake sets are always complete and sleep sets are not */
+	rpm_msg.msg.wait_for_compl = (state == RPMH_WAKE_ONLY_STATE);
+	rpm_msg.cmd[0].addr = addr;
+	rpm_msg.cmd[0].data = data;
+	rpm_msg.msg.num_cmds = 1;
+
+	return rpmh_rsc_write_ctrl_data(ctrlr_to_drv(ctrlr), &rpm_msg.msg);
+}
+
+/**
+ * rpmh_flush: Flushes the buffered active and sleep sets to TCS
+ *
+ * @dev: The device making the request
+ *
+ * Return: -EBUSY if the controller is busy, probably waiting on a response
+ * to a RPMH request sent earlier.
+ *
+ * This function is always called from the sleep code from the last CPU
+ * that is powering down the entire system. Since no other RPMH API would be
+ * executing at this time, it is safe to run lockless.
+ */
+int rpmh_flush(const struct device *dev)
+{
+	struct cache_req *p;
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
+	int ret;
+
+	if (!ctrlr->dirty) {
+		pr_debug("Skipping flush, TCS has latest data.\n");
+		return 0;
+	}
+
+	/*
+	 * Nobody else should be calling this function other than system PM,
+	 * hence we can run without locks.
+	 */
+	list_for_each_entry(p, &ctrlr->cache, list) {
+		if (!is_req_valid(p)) {
+			pr_debug("%s: skipping RPMH req: a:%#x s:%#x w:%#x",
+				 __func__, p->addr, p->sleep_val, p->wake_val);
+			continue;
+		}
+		ret = send_single(dev, RPMH_SLEEP_STATE, p->addr, p->sleep_val);
+		if (ret)
+			return ret;
+		ret = send_single(dev, RPMH_WAKE_ONLY_STATE,
+				  p->addr, p->wake_val);
+		if (ret)
+			return ret;
+	}
+
+	ctrlr->dirty = false;
+
+	return 0;
+}
+EXPORT_SYMBOL(rpmh_flush);
+
+/**
+ * rpmh_invalidate: Invalidate all sleep and active sets
+ * sets.
+ *
+ * @dev: The device making the request
+ *
+ * Invalidate the sleep and active values in the TCS blocks.
+ */
+int rpmh_invalidate(const struct device *dev)
+{
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
+	int ret;
+
+	ctrlr->dirty = true;
+
+	do {
+		ret = rpmh_rsc_invalidate(ctrlr_to_drv(ctrlr));
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+EXPORT_SYMBOL(rpmh_invalidate);
diff --git a/include/soc/qcom/rpmh.h b/include/soc/qcom/rpmh.h
index c1d0f902bd71..42e62a0d26d8 100644
--- a/include/soc/qcom/rpmh.h
+++ b/include/soc/qcom/rpmh.h
@@ -14,12 +14,23 @@
 int rpmh_write(const struct device *dev, enum rpmh_state state,
 	       const struct tcs_cmd *cmd, u32 n);
 
+int rpmh_flush(const struct device *dev);
+
+int rpmh_invalidate(const struct device *dev);
+
 #else
 
 static inline int rpmh_write(const struct device *dev, enum rpmh_state state,
 			     const struct tcs_cmd *cmd, u32 n)
 { return -ENODEV; }
 
+
+static inline int rpmh_flush(const struct device *dev)
+{ return -ENODEV; }
+
+static inline int rpmh_invalidate(const struct device *dev)
+{ return -ENODEV; }
+
 #endif /* CONFIG_QCOM_RPMH */
 
 #endif /* __SOC_QCOM_RPMH_H__ */
-- 
cgit v1.2.3


From 564b5e24ccd4c840a7f84dfd952e5715dd9b3966 Mon Sep 17 00:00:00 2001
From: Lina Iyer <ilina@codeaurora.org>
Date: Wed, 20 Jun 2018 18:57:05 +0530
Subject: drivers: qcom: rpmh: allow requests to be sent asynchronously

Platform drivers that want to send a request but do not want to block
until the RPMH request completes have now a new API -
rpmh_write_async().

The API allocates memory and send the requests and returns the control
back to the platform driver. The tx_done callback from the controller is
handled in the context of the controller's thread and frees the
allocated memory. This API allows RPMH requests from atomic contexts as
well.

Signed-off-by: Lina Iyer <ilina@codeaurora.org>
Signed-off-by: Raju P.L.S.S.S.N <rplsssn@codeaurora.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/rpmh-internal.h |  2 ++
 drivers/soc/qcom/rpmh.c          | 51 ++++++++++++++++++++++++++++++++++++++++
 include/soc/qcom/rpmh.h          |  7 ++++++
 3 files changed, 60 insertions(+)

(limited to 'include')

diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
index 101145c9db1c..6a8a4b7aeead 100644
--- a/drivers/soc/qcom/rpmh-internal.h
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -54,6 +54,7 @@ struct tcs_group {
  * @completion: triggered when request is done
  * @dev: the device making the request
  * @err: err return from the controller
+ * @needs_free: check to free dynamically allocated request object
  */
 struct rpmh_request {
 	struct tcs_request msg;
@@ -61,6 +62,7 @@ struct rpmh_request {
 	struct completion *completion;
 	const struct device *dev;
 	int err;
+	bool needs_free;
 };
 
 /**
diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
index c355ba13e73f..1e3d34876af1 100644
--- a/drivers/soc/qcom/rpmh.c
+++ b/drivers/soc/qcom/rpmh.c
@@ -34,6 +34,7 @@
 		.cmd = { { 0 } },			\
 		.completion = q,			\
 		.dev = dev,				\
+		.needs_free = false,				\
 	}
 
 #define ctrlr_to_drv(ctrlr) container_of(ctrlr, struct rsc_drv, client)
@@ -75,6 +76,9 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
 	/* Signal the blocking thread we are done */
 	if (compl)
 		complete(compl);
+
+	if (rpm_msg->needs_free)
+		kfree(rpm_msg);
 }
 
 static struct cache_req *__find_req(struct rpmh_ctrlr *ctrlr, u32 addr)
@@ -180,6 +184,53 @@ static int __rpmh_write(const struct device *dev, enum rpmh_state state,
 	return ret;
 }
 
+static int __fill_rpmh_msg(struct rpmh_request *req, enum rpmh_state state,
+		const struct tcs_cmd *cmd, u32 n)
+{
+	if (!cmd || !n || n > MAX_RPMH_PAYLOAD)
+		return -EINVAL;
+
+	memcpy(req->cmd, cmd, n * sizeof(*cmd));
+
+	req->msg.state = state;
+	req->msg.cmds = req->cmd;
+	req->msg.num_cmds = n;
+
+	return 0;
+}
+
+/**
+ * rpmh_write_async: Write a set of RPMH commands
+ *
+ * @dev: The device making the request
+ * @state: Active/sleep set
+ * @cmd: The payload data
+ * @n: The number of elements in payload
+ *
+ * Write a set of RPMH commands, the order of commands is maintained
+ * and will be sent as a single shot.
+ */
+int rpmh_write_async(const struct device *dev, enum rpmh_state state,
+		     const struct tcs_cmd *cmd, u32 n)
+{
+	struct rpmh_request *rpm_msg;
+	int ret;
+
+	rpm_msg = kzalloc(sizeof(*rpm_msg), GFP_ATOMIC);
+	if (!rpm_msg)
+		return -ENOMEM;
+	rpm_msg->needs_free = true;
+
+	ret = __fill_rpmh_msg(rpm_msg, state, cmd, n);
+	if (ret) {
+		kfree(rpm_msg);
+		return ret;
+	}
+
+	return __rpmh_write(dev, state, rpm_msg);
+}
+EXPORT_SYMBOL(rpmh_write_async);
+
 /**
  * rpmh_write: Write a set of RPMH commands and block until response
  *
diff --git a/include/soc/qcom/rpmh.h b/include/soc/qcom/rpmh.h
index 42e62a0d26d8..1161a5c77e75 100644
--- a/include/soc/qcom/rpmh.h
+++ b/include/soc/qcom/rpmh.h
@@ -14,6 +14,9 @@
 int rpmh_write(const struct device *dev, enum rpmh_state state,
 	       const struct tcs_cmd *cmd, u32 n);
 
+int rpmh_write_async(const struct device *dev, enum rpmh_state state,
+		     const struct tcs_cmd *cmd, u32 n);
+
 int rpmh_flush(const struct device *dev);
 
 int rpmh_invalidate(const struct device *dev);
@@ -24,6 +27,10 @@ static inline int rpmh_write(const struct device *dev, enum rpmh_state state,
 			     const struct tcs_cmd *cmd, u32 n)
 { return -ENODEV; }
 
+static inline int rpmh_write_async(const struct device *dev,
+				   enum rpmh_state state,
+				   const struct tcs_cmd *cmd, u32 n)
+{ return -ENODEV; }
 
 static inline int rpmh_flush(const struct device *dev)
 { return -ENODEV; }
-- 
cgit v1.2.3


From c8790cb6da58d3fa09dfa707aa486fe6769c23bc Mon Sep 17 00:00:00 2001
From: Lina Iyer <ilina@codeaurora.org>
Date: Wed, 20 Jun 2018 18:57:06 +0530
Subject: drivers: qcom: rpmh: add support for batch RPMH request

Platform drivers need make a lot of resource state requests at the same
time, say, at the start or end of an usecase. It can be quite
inefficient to send each request separately. Instead they can give the
RPMH library a batch of requests to be sent and wait on the whole
transaction to be complete.

rpmh_write_batch() is a blocking call that can be used to send multiple
RPMH command sets. Each RPMH command set is set asynchronously and the
API blocks until all the command sets are complete and receive their
tx_done callbacks.

Signed-off-by: Lina Iyer <ilina@codeaurora.org>
Signed-off-by: Raju P.L.S.S.S.N <rplsssn@codeaurora.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/rpmh-internal.h |   2 +
 drivers/soc/qcom/rpmh-rsc.c      |   1 +
 drivers/soc/qcom/rpmh.c          | 159 ++++++++++++++++++++++++++++++++++++++-
 include/soc/qcom/rpmh.h          |   8 ++
 4 files changed, 168 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
index 6a8a4b7aeead..a7bbbb67991c 100644
--- a/drivers/soc/qcom/rpmh-internal.h
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -71,11 +71,13 @@ struct rpmh_request {
  * @cache: the list of cached requests
  * @cache_lock: synchronize access to the cache data
  * @dirty: was the cache updated since flush
+ * @batch_cache: Cache sleep and wake requests sent as batch
  */
 struct rpmh_ctrlr {
 	struct list_head cache;
 	spinlock_t cache_lock;
 	bool dirty;
+	struct list_head batch_cache;
 };
 
 /**
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index 5d0dd0581904..4c0c1f24911a 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -636,6 +636,7 @@ static int rpmh_rsc_probe(struct platform_device *pdev)
 
 	spin_lock_init(&drv->client.cache_lock);
 	INIT_LIST_HEAD(&drv->client.cache);
+	INIT_LIST_HEAD(&drv->client.batch_cache);
 
 	dev_set_drvdata(&pdev->dev, drv);
 
diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
index 1e3d34876af1..c7beb6841289 100644
--- a/drivers/soc/qcom/rpmh.c
+++ b/drivers/soc/qcom/rpmh.c
@@ -54,6 +54,20 @@ struct cache_req {
 	struct list_head list;
 };
 
+/**
+ * struct batch_cache_req - An entry in our batch catch
+ *
+ * @list: linked list obj
+ * @count: number of messages
+ * @rpm_msgs: the messages
+ */
+
+struct batch_cache_req {
+	struct list_head list;
+	int count;
+	struct rpmh_request rpm_msgs[];
+};
+
 static struct rpmh_ctrlr *get_rpmh_ctrlr(const struct device *dev)
 {
 	struct rsc_drv *drv = dev_get_drvdata(dev->parent);
@@ -73,10 +87,13 @@ void rpmh_tx_done(const struct tcs_request *msg, int r)
 		dev_err(rpm_msg->dev, "RPMH TX fail in msg addr=%#x, err=%d\n",
 			rpm_msg->msg.cmds[0].addr, r);
 
+	if (!compl)
+		goto exit;
+
 	/* Signal the blocking thread we are done */
-	if (compl)
-		complete(compl);
+	complete(compl);
 
+exit:
 	if (rpm_msg->needs_free)
 		kfree(rpm_msg);
 }
@@ -264,6 +281,138 @@ int rpmh_write(const struct device *dev, enum rpmh_state state,
 }
 EXPORT_SYMBOL(rpmh_write);
 
+static void cache_batch(struct rpmh_ctrlr *ctrlr, struct batch_cache_req *req)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctrlr->cache_lock, flags);
+	list_add_tail(&req->list, &ctrlr->batch_cache);
+	spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
+}
+
+static int flush_batch(struct rpmh_ctrlr *ctrlr)
+{
+	struct batch_cache_req *req;
+	const struct rpmh_request *rpm_msg;
+	unsigned long flags;
+	int ret = 0;
+	int i;
+
+	/* Send Sleep/Wake requests to the controller, expect no response */
+	spin_lock_irqsave(&ctrlr->cache_lock, flags);
+	list_for_each_entry(req, &ctrlr->batch_cache, list) {
+		for (i = 0; i < req->count; i++) {
+			rpm_msg = req->rpm_msgs + i;
+			ret = rpmh_rsc_write_ctrl_data(ctrlr_to_drv(ctrlr),
+						       &rpm_msg->msg);
+			if (ret)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
+
+	return ret;
+}
+
+static void invalidate_batch(struct rpmh_ctrlr *ctrlr)
+{
+	struct batch_cache_req *req, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctrlr->cache_lock, flags);
+	list_for_each_entry_safe(req, tmp, &ctrlr->batch_cache, list)
+		kfree(req);
+	INIT_LIST_HEAD(&ctrlr->batch_cache);
+	spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
+}
+
+/**
+ * rpmh_write_batch: Write multiple sets of RPMH commands and wait for the
+ * batch to finish.
+ *
+ * @dev: the device making the request
+ * @state: Active/sleep set
+ * @cmd: The payload data
+ * @n: The array of count of elements in each batch, 0 terminated.
+ *
+ * Write a request to the RSC controller without caching. If the request
+ * state is ACTIVE, then the requests are treated as completion request
+ * and sent to the controller immediately. The function waits until all the
+ * commands are complete. If the request was to SLEEP or WAKE_ONLY, then the
+ * request is sent as fire-n-forget and no ack is expected.
+ *
+ * May sleep. Do not call from atomic contexts for ACTIVE_ONLY requests.
+ */
+int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
+		     const struct tcs_cmd *cmd, u32 *n)
+{
+	struct batch_cache_req *req;
+	struct rpmh_request *rpm_msgs;
+	DECLARE_COMPLETION_ONSTACK(compl);
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
+	unsigned long time_left;
+	int count = 0;
+	int ret, i, j;
+
+	if (!cmd || !n)
+		return -EINVAL;
+
+	while (n[count] > 0)
+		count++;
+	if (!count)
+		return -EINVAL;
+
+	req = kzalloc(sizeof(*req) + count * sizeof(req->rpm_msgs[0]),
+		      GFP_ATOMIC);
+	if (!req)
+		return -ENOMEM;
+	req->count = count;
+	rpm_msgs = req->rpm_msgs;
+
+	for (i = 0; i < count; i++) {
+		__fill_rpmh_msg(rpm_msgs + i, state, cmd, n[i]);
+		cmd += n[i];
+	}
+
+	if (state != RPMH_ACTIVE_ONLY_STATE) {
+		cache_batch(ctrlr, req);
+		return 0;
+	}
+
+	for (i = 0; i < count; i++) {
+		rpm_msgs[i].completion = &compl;
+		ret = rpmh_rsc_send_data(ctrlr_to_drv(ctrlr), &rpm_msgs[i].msg);
+		if (ret) {
+			pr_err("Error(%d) sending RPMH message addr=%#x\n",
+			       ret, rpm_msgs[i].msg.cmds[0].addr);
+			for (j = i; j < count; j++)
+				rpmh_tx_done(&rpm_msgs[j].msg, ret);
+			break;
+		}
+	}
+
+	time_left = RPMH_TIMEOUT_MS;
+	for (i = 0; i < count; i++) {
+		time_left = wait_for_completion_timeout(&compl, time_left);
+		if (!time_left) {
+			/*
+			 * Better hope they never finish because they'll signal
+			 * the completion on our stack and that's bad once
+			 * we've returned from the function.
+			 */
+			WARN_ON(1);
+			ret = -ETIMEDOUT;
+			goto exit;
+		}
+	}
+
+exit:
+	kfree(req);
+
+	return ret;
+}
+EXPORT_SYMBOL(rpmh_write_batch);
+
 static int is_req_valid(struct cache_req *req)
 {
 	return (req->sleep_val != UINT_MAX &&
@@ -309,6 +458,11 @@ int rpmh_flush(const struct device *dev)
 		return 0;
 	}
 
+	/* First flush the cached batch requests */
+	ret = flush_batch(ctrlr);
+	if (ret)
+		return ret;
+
 	/*
 	 * Nobody else should be calling this function other than system PM,
 	 * hence we can run without locks.
@@ -347,6 +501,7 @@ int rpmh_invalidate(const struct device *dev)
 	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
 	int ret;
 
+	invalidate_batch(ctrlr);
 	ctrlr->dirty = true;
 
 	do {
diff --git a/include/soc/qcom/rpmh.h b/include/soc/qcom/rpmh.h
index 1161a5c77e75..619e07c75da9 100644
--- a/include/soc/qcom/rpmh.h
+++ b/include/soc/qcom/rpmh.h
@@ -17,6 +17,9 @@ int rpmh_write(const struct device *dev, enum rpmh_state state,
 int rpmh_write_async(const struct device *dev, enum rpmh_state state,
 		     const struct tcs_cmd *cmd, u32 n);
 
+int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
+		     const struct tcs_cmd *cmd, u32 *n);
+
 int rpmh_flush(const struct device *dev);
 
 int rpmh_invalidate(const struct device *dev);
@@ -32,6 +35,11 @@ static inline int rpmh_write_async(const struct device *dev,
 				   const struct tcs_cmd *cmd, u32 n)
 { return -ENODEV; }
 
+static inline int rpmh_write_batch(const struct device *dev,
+				   enum rpmh_state state,
+				   const struct tcs_cmd *cmd, u32 *n)
+{ return -ENODEV; }
+
 static inline int rpmh_flush(const struct device *dev)
 { return -ENODEV; }
 
-- 
cgit v1.2.3


From a0b1561f846191a1967e8f5a26d8ef59c1b9162a Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@linaro.org>
Date: Tue, 12 Jun 2018 15:23:13 +0200
Subject: firmware: qcom: scm: add a dummy qcom_scm_assign_mem()

Add a dummy qcom_scm_assign_mem() to enable building drivers when
CONFIG_COMPILE_TEST=y && CONFIG_QCOM_SCM=n.

All other qcom_scm_* functions already have a dummy version.

Signed-off-by: Niklas Cassel <niklas.cassel@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 include/linux/qcom_scm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index b401b962afff..5d65521260b3 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -87,6 +87,10 @@ static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
 static inline int
 qcom_scm_pas_auth_and_reset(u32 peripheral) { return -ENODEV; }
 static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; }
+static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
+				      unsigned int *src,
+				      struct qcom_scm_vmperm *newvm,
+				      int dest_cnt) { return -ENODEV; }
 static inline void qcom_scm_cpu_power_down(u32 flags) {}
 static inline u32 qcom_scm_get_version(void) { return 0; }
 static inline u32
-- 
cgit v1.2.3


From 3928d4f5ee37cdc523894f6e549e6aae521d8980 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 21 Jul 2018 13:48:51 -0700
Subject: mm: use helper functions for allocating and freeing vm_area structs

The vm_area_struct is one of the most fundamental memory management
objects, but the management of it is entirely open-coded evertwhere,
ranging from allocation and freeing (using kmem_cache_[z]alloc and
kmem_cache_free) to initializing all the fields.

We want to unify this in order to end up having some unified
initialization of the vmas, and the first step to this is to at least
have basic allocation functions.

Right now those functions are literally just wrappers around the
kmem_cache_*() calls.  This is a purely mechanical conversion:

    # new vma:
    kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL) -> vm_area_alloc()

    # copy old vma
    kmem_cache_alloc(vm_area_cachep, GFP_KERNEL) -> vm_area_dup(old)

    # free vma
    kmem_cache_free(vm_area_cachep, vma) -> vm_area_free(vma)

to the point where the old vma passed in to the vm_area_dup() function
isn't even used yet (because I've left all the old manual initialization
alone).

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/perfmon.c |  4 ++--
 arch/ia64/mm/init.c        |  8 ++++----
 fs/exec.c                  |  4 ++--
 include/linux/mm.h         |  4 +++-
 kernel/fork.c              | 21 ++++++++++++++++++---
 mm/mmap.c                  | 22 +++++++++++-----------
 mm/nommu.c                 |  8 ++++----
 7 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 3b38c717008a..e859246badca 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2278,7 +2278,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
 
 	/* allocate vma */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (!vma) {
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
@@ -2346,7 +2346,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	return 0;
 
 error:
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 error_kmem:
 	pfm_rvfree(smpl_buf, size);
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 18278b448530..3f2321bffb72 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -114,7 +114,7 @@ ia64_init_addr_space (void)
 	 * the problem.  When the process attempts to write to the register backing store
 	 * for the first time, it will get a SEGFAULT in this case.
 	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (vma) {
 		INIT_LIST_HEAD(&vma->anon_vma_chain);
 		vma->vm_mm = current->mm;
@@ -125,7 +125,7 @@ ia64_init_addr_space (void)
 		down_write(&current->mm->mmap_sem);
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
-			kmem_cache_free(vm_area_cachep, vma);
+			vm_area_free(vma);
 			return;
 		}
 		up_write(&current->mm->mmap_sem);
@@ -133,7 +133,7 @@ ia64_init_addr_space (void)
 
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
-		vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+		vma = vm_area_alloc();
 		if (vma) {
 			INIT_LIST_HEAD(&vma->anon_vma_chain);
 			vma->vm_mm = current->mm;
@@ -144,7 +144,7 @@ ia64_init_addr_space (void)
 			down_write(&current->mm->mmap_sem);
 			if (insert_vm_struct(current->mm, vma)) {
 				up_write(&current->mm->mmap_sem);
-				kmem_cache_free(vm_area_cachep, vma);
+				vm_area_free(vma);
 				return;
 			}
 			up_write(&current->mm->mmap_sem);
diff --git a/fs/exec.c b/fs/exec.c
index 2d4e0075bd24..9bd83989ea25 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = bprm->mm;
 
-	bprm->vma = vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	bprm->vma = vma = vm_area_alloc();
 	if (!vma)
 		return -ENOMEM;
 
@@ -326,7 +326,7 @@ err:
 	up_write(&mm->mmap_sem);
 err_free:
 	bprm->vma = NULL;
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 	return err;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3982c83fdcbf..de2fd86c6154 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -155,7 +155,9 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
  * mmap() functions).
  */
 
-extern struct kmem_cache *vm_area_cachep;
+struct vm_area_struct *vm_area_alloc(void);
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *);
+void vm_area_free(struct vm_area_struct *);
 
 #ifndef CONFIG_MMU
 extern struct rb_root nommu_region_tree;
diff --git a/kernel/fork.c b/kernel/fork.c
index 9440d61b925c..0e23deb5acfc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -303,11 +303,26 @@ struct kmem_cache *files_cachep;
 struct kmem_cache *fs_cachep;
 
 /* SLAB cache for vm_area_struct structures */
-struct kmem_cache *vm_area_cachep;
+static struct kmem_cache *vm_area_cachep;
 
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+struct vm_area_struct *vm_area_alloc(void)
+{
+	return kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+}
+
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+{
+	return kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+}
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+	kmem_cache_free(vm_area_cachep, vma);
+}
+
 static void account_kernel_stack(struct task_struct *tsk, int account)
 {
 	void *stack = task_stack_page(tsk);
@@ -455,7 +470,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 				goto fail_nomem;
 			charge = len;
 		}
-		tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+		tmp = vm_area_dup(mpnt);
 		if (!tmp)
 			goto fail_nomem;
 		*tmp = *mpnt;
@@ -539,7 +554,7 @@ fail_uprobe_end:
 fail_nomem_anon_vma_fork:
 	mpol_put(vma_policy(tmp));
 fail_nomem_policy:
-	kmem_cache_free(vm_area_cachep, tmp);
+	vm_area_free(tmp);
 fail_nomem:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
diff --git a/mm/mmap.c b/mm/mmap.c
index 5801b5f0a634..4286ad2dd1f5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -182,7 +182,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	mpol_put(vma_policy(vma));
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 	return next;
 }
 
@@ -911,7 +911,7 @@ again:
 			anon_vma_merge(vma, next);
 		mm->map_count--;
 		mpol_put(vma_policy(next));
-		kmem_cache_free(vm_area_cachep, next);
+		vm_area_free(next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
 		 * we must remove another next too. It would clutter
@@ -1729,7 +1729,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
@@ -1832,7 +1832,7 @@ allow_write_and_free_vma:
 	if (vm_flags & VM_DENYWRITE)
 		allow_write_access(file);
 free_vma:
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 unacct_error:
 	if (charged)
 		vm_unacct_memory(charged);
@@ -2620,7 +2620,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 			return err;
 	}
 
-	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	new = vm_area_dup(vma);
 	if (!new)
 		return -ENOMEM;
 
@@ -2669,7 +2669,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  out_free_mpol:
 	mpol_put(vma_policy(new));
  out_free_vma:
-	kmem_cache_free(vm_area_cachep, new);
+	vm_area_free(new);
 	return err;
 }
 
@@ -2984,7 +2984,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (!vma) {
 		vm_unacct_memory(len >> PAGE_SHIFT);
 		return -ENOMEM;
@@ -3202,7 +3202,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		}
 		*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
 	} else {
-		new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+		new_vma = vm_area_dup(vma);
 		if (!new_vma)
 			goto out;
 		*new_vma = *vma;
@@ -3226,7 +3226,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 out_free_mempol:
 	mpol_put(vma_policy(new_vma));
 out_free_vma:
-	kmem_cache_free(vm_area_cachep, new_vma);
+	vm_area_free(new_vma);
 out:
 	return NULL;
 }
@@ -3350,7 +3350,7 @@ static struct vm_area_struct *__install_special_mapping(
 	int ret;
 	struct vm_area_struct *vma;
 
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (unlikely(vma == NULL))
 		return ERR_PTR(-ENOMEM);
 
@@ -3376,7 +3376,7 @@ static struct vm_area_struct *__install_special_mapping(
 	return vma;
 
 out:
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 	return ERR_PTR(ret);
 }
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 4452d8bd9ae4..006e3fe65017 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -769,7 +769,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	put_nommu_region(vma->vm_region);
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 }
 
 /*
@@ -1204,7 +1204,7 @@ unsigned long do_mmap(struct file *file,
 	if (!region)
 		goto error_getting_region;
 
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	vma = vm_area_alloc();
 	if (!vma)
 		goto error_getting_vma;
 
@@ -1368,7 +1368,7 @@ error:
 	kmem_cache_free(vm_region_jar, region);
 	if (vma->vm_file)
 		fput(vma->vm_file);
-	kmem_cache_free(vm_area_cachep, vma);
+	vm_area_free(vma);
 	return ret;
 
 sharing_violation:
@@ -1469,7 +1469,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (!region)
 		return -ENOMEM;
 
-	new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+	new = vm_area_dup(vma);
 	if (!new) {
 		kmem_cache_free(vm_region_jar, region);
 		return -ENOMEM;
-- 
cgit v1.2.3


From 490fc053865c9cc40f1085ef8a5504f5341f79d2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 21 Jul 2018 15:24:03 -0700
Subject: mm: make vm_area_alloc() initialize core fields

Like vm_area_dup(), it initializes the anon_vma_chain head, and the
basic mm pointer.

The rest of the fields end up being different for different users,
although the plan is to also initialize the 'vm_ops' field to a dummy
entry.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/perfmon.c |  4 +---
 arch/ia64/mm/init.c        |  8 ++------
 fs/exec.c                  |  4 +---
 include/linux/mm.h         |  2 +-
 kernel/fork.c              | 10 ++++++++--
 mm/mmap.c                  | 12 +++---------
 mm/nommu.c                 |  3 +--
 7 files changed, 17 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index e859246badca..46bff1661836 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2278,17 +2278,15 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	DPRINT(("smpl_buf @%p\n", smpl_buf));
 
 	/* allocate vma */
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(mm);
 	if (!vma) {
 		DPRINT(("Cannot allocate vma\n"));
 		goto error_kmem;
 	}
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	/*
 	 * partially initialize the vma for the sampling buffer
 	 */
-	vma->vm_mm	     = mm;
 	vma->vm_file	     = get_file(filp);
 	vma->vm_flags	     = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP;
 	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 3f2321bffb72..bdb14a369137 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -114,10 +114,8 @@ ia64_init_addr_space (void)
 	 * the problem.  When the process attempts to write to the register backing store
 	 * for the first time, it will get a SEGFAULT in this case.
 	 */
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(current->mm);
 	if (vma) {
-		INIT_LIST_HEAD(&vma->anon_vma_chain);
-		vma->vm_mm = current->mm;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
 		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
@@ -133,10 +131,8 @@ ia64_init_addr_space (void)
 
 	/* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
 	if (!(current->personality & MMAP_PAGE_ZERO)) {
-		vma = vm_area_alloc();
+		vma = vm_area_alloc(current->mm);
 		if (vma) {
-			INIT_LIST_HEAD(&vma->anon_vma_chain);
-			vma->vm_mm = current->mm;
 			vma->vm_end = PAGE_SIZE;
 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
 			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
diff --git a/fs/exec.c b/fs/exec.c
index 9bd83989ea25..72e961a62adb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -290,7 +290,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	struct vm_area_struct *vma = NULL;
 	struct mm_struct *mm = bprm->mm;
 
-	bprm->vma = vma = vm_area_alloc();
+	bprm->vma = vma = vm_area_alloc(mm);
 	if (!vma)
 		return -ENOMEM;
 
@@ -298,7 +298,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 		err = -EINTR;
 		goto err_free;
 	}
-	vma->vm_mm = mm;
 
 	/*
 	 * Place the stack at the largest stack address the architecture
@@ -311,7 +310,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
 	vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	err = insert_vm_struct(mm, vma);
 	if (err)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index de2fd86c6154..d3a3842316b8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -155,7 +155,7 @@ extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
  * mmap() functions).
  */
 
-struct vm_area_struct *vm_area_alloc(void);
+struct vm_area_struct *vm_area_alloc(struct mm_struct *);
 struct vm_area_struct *vm_area_dup(struct vm_area_struct *);
 void vm_area_free(struct vm_area_struct *);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 67253e41bfb0..a191c05e757d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -308,9 +308,15 @@ static struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
-struct vm_area_struct *vm_area_alloc(void)
+struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
 {
-	return kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+
+	if (vma) {
+		vma->vm_mm = mm;
+		INIT_LIST_HEAD(&vma->anon_vma_chain);
+	}
+	return vma;
 }
 
 struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
diff --git a/mm/mmap.c b/mm/mmap.c
index b0ed8ce1b67e..ff1944d8d458 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1729,19 +1729,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	 * specific mapper. the address has already been validated, but
 	 * not unmapped, but the maps are removed from the list.
 	 */
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(mm);
 	if (!vma) {
 		error = -ENOMEM;
 		goto unacct_error;
 	}
 
-	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_flags = vm_flags;
 	vma->vm_page_prot = vm_get_page_prot(vm_flags);
 	vma->vm_pgoff = pgoff;
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
 	if (file) {
 		if (vm_flags & VM_DENYWRITE) {
@@ -2979,14 +2977,12 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 	/*
 	 * create a vma struct for an anonymous mapping
 	 */
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(mm);
 	if (!vma) {
 		vm_unacct_memory(len >> PAGE_SHIFT);
 		return -ENOMEM;
 	}
 
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
-	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_pgoff = pgoff;
@@ -3343,12 +3339,10 @@ static struct vm_area_struct *__install_special_mapping(
 	int ret;
 	struct vm_area_struct *vma;
 
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(mm);
 	if (unlikely(vma == NULL))
 		return ERR_PTR(-ENOMEM);
 
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
-	vma->vm_mm = mm;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 
diff --git a/mm/nommu.c b/mm/nommu.c
index c2560e9cc803..1d22fdbf7d7c 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1204,7 +1204,7 @@ unsigned long do_mmap(struct file *file,
 	if (!region)
 		goto error_getting_region;
 
-	vma = vm_area_alloc();
+	vma = vm_area_alloc(current->mm);
 	if (!vma)
 		goto error_getting_vma;
 
@@ -1212,7 +1212,6 @@ unsigned long do_mmap(struct file *file,
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
-	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_flags = vm_flags;
 	vma->vm_pgoff = pgoff;
 
-- 
cgit v1.2.3


From f95de8aa9f824d96421cb7ca81552b4ad8768a31 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 19 Jul 2018 15:56:59 +0800
Subject: bpfilter: Fix mismatch in function argument types

Fix following warning:
net/ipv4/bpfilter/sockopt.c:28:5: error: symbol 'bpfilter_ip_set_sockopt' redeclared with different type
net/ipv4/bpfilter/sockopt.c:34:5: error: symbol 'bpfilter_ip_get_sockopt' redeclared with different type

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpfilter.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
index 687b1760bb9f..f02cee0225d4 100644
--- a/include/linux/bpfilter.h
+++ b/include/linux/bpfilter.h
@@ -5,10 +5,10 @@
 #include <uapi/linux/bpfilter.h>
 
 struct sock;
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval,
+int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
 			    unsigned int optlen);
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval,
-			    int *optlen);
+int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
+			    int __user *optlen);
 extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
 				       char __user *optval,
 				       unsigned int optlen, bool is_set);
-- 
cgit v1.2.3


From 24b711edfc34bc45777a3f068812b7d1ed004a5d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 19 Jul 2018 12:41:18 -0700
Subject: net/ipv6: Fix linklocal to global address with VRF

Example setup:
    host: ip -6 addr add dev eth1 2001:db8:104::4
           where eth1 is enslaved to a VRF

    switch: ip -6 ro add 2001:db8:104::4/128 dev br1
            where br1 only has an LLA

           ping6 2001:db8:104::4
           ssh   2001:db8:104::4

(NOTE: UDP works fine if the PKTINFO has the address set to the global
address and ifindex is set to the index of eth1 with a destination an
LLA).

For ICMP, icmp6_iif needs to be updated to check if skb->dev is an
L3 master. If it is then return the ifindex from rt6i_idev similar
to what is done for loopback.

For TCP, restore the original tcp_v6_iif definition which is needed in
most places and add a new tcp_v6_iif_l3_slave that considers the
l3_slave variability. This latter check is only needed for socket
lookups.

Fixes: 9ff74384600a ("net: vrf: Handle ipv6 multicast and link-local addresses")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   | 5 +++++
 net/ipv6/icmp.c     | 5 +++--
 net/ipv6/tcp_ipv6.c | 6 ++++--
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 25116ec02087..cd3ecda9386a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -840,6 +840,11 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
  * as TCP moves IP6CB into a different location in skb->cb[]
  */
 static inline int tcp_v6_iif(const struct sk_buff *skb)
+{
+	return TCP_SKB_CB(skb)->header.h6.iif;
+}
+
+static inline int tcp_v6_iif_l3_slave(const struct sk_buff *skb)
 {
 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index be491bf6ab6e..ef2505aefc15 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -402,9 +402,10 @@ static int icmp6_iif(const struct sk_buff *skb)
 
 	/* for local traffic to local address, skb dev is the loopback
 	 * device. Check if there is a dst attached to the skb and if so
-	 * get the real device index.
+	 * get the real device index. Same is needed for replies to a link
+	 * local address on a device enslaved to an L3 master device
 	 */
-	if (unlikely(iif == LOOPBACK_IFINDEX)) {
+	if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
 		const struct rt6_info *rt6 = skb_rt6_info(skb);
 
 		if (rt6)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7efa9fd7e109..03e6b7a2bc53 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -938,7 +938,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 					   &tcp_hashinfo, NULL, 0,
 					   &ipv6h->saddr,
 					   th->source, &ipv6h->daddr,
-					   ntohs(th->source), tcp_v6_iif(skb),
+					   ntohs(th->source),
+					   tcp_v6_iif_l3_slave(skb),
 					   tcp_v6_sdif(skb));
 		if (!sk1)
 			goto out;
@@ -1609,7 +1610,8 @@ do_time_wait:
 					    skb, __tcp_hdrlen(th),
 					    &ipv6_hdr(skb)->saddr, th->source,
 					    &ipv6_hdr(skb)->daddr,
-					    ntohs(th->dest), tcp_v6_iif(skb),
+					    ntohs(th->dest),
+					    tcp_v6_iif_l3_slave(skb),
 					    sdif);
 		if (sk2) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
-- 
cgit v1.2.3


From c4db9c1e8c70bc60e392da8a485bcfb035d559c2 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 20 Jul 2018 10:47:23 +0900
Subject: efi: Deduplicate efi_open_volume()

There's one ARM, one x86_32 and one x86_64 version of efi_open_volume()
which can be folded into a single shared version by masking their
differences with the efi_call_proto() macro introduced by commit:

  3552fdf29f01 ("efi: Allow bitness-agnostic protocol calls").

To be able to dereference the device_handle attribute from the
efi_loaded_image_t table in an arch- and bitness-agnostic manner,
introduce the efi_table_attr() macro (which already exists for x86)
to arm and arm64.

No functional change intended.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180720014726.24031-7-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/include/asm/efi.h                     |  3 ++
 arch/arm64/include/asm/efi.h                   |  3 ++
 arch/x86/boot/compressed/eboot.c               | 63 --------------------------
 drivers/firmware/efi/libstub/arm-stub.c        | 25 ----------
 drivers/firmware/efi/libstub/efi-stub-helper.c | 31 ++++++++++++-
 drivers/firmware/efi/libstub/efistub.h         |  3 --
 include/linux/efi.h                            | 10 ++++
 7 files changed, 45 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index 17f1f1a814ff..38badaae8d9d 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -58,6 +58,9 @@ void efi_virtmap_unload(void);
 #define efi_call_runtime(f, ...)	sys_table_arg->runtime->f(__VA_ARGS__)
 #define efi_is_64bit()			(false)
 
+#define efi_table_attr(table, attr, instance)				\
+	((table##_t *)instance)->attr
+
 #define efi_call_proto(protocol, f, instance, ...)			\
 	((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
 
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 192d791f1103..7ed320895d1f 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -87,6 +87,9 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base,
 #define efi_call_runtime(f, ...)	sys_table_arg->runtime->f(__VA_ARGS__)
 #define efi_is_64bit()			(true)
 
+#define efi_table_attr(table, attr, instance)				\
+	((table##_t *)instance)->attr
+
 #define efi_call_proto(protocol, f, instance, ...)			\
 	((protocol##_t *)instance)->f(instance, ##__VA_ARGS__)
 
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 92b573fd239c..915c64edbe8e 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -41,69 +41,6 @@ static void setup_boot_services##bits(struct efi_config *c)		\
 BOOT_SERVICES(32);
 BOOT_SERVICES(64);
 
-static inline efi_status_t __open_volume32(void *__image, void **__fh)
-{
-	efi_file_io_interface_t *io;
-	efi_loaded_image_32_t *image = __image;
-	efi_file_handle_32_t *fh;
-	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
-	efi_status_t status;
-	void *handle = (void *)(unsigned long)image->device_handle;
-	unsigned long func;
-
-	status = efi_call_early(handle_protocol, handle,
-				&fs_proto, (void **)&io);
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to handle fs_proto\n");
-		return status;
-	}
-
-	func = (unsigned long)io->open_volume;
-	status = efi_early->call(func, io, &fh);
-	if (status != EFI_SUCCESS)
-		efi_printk(sys_table, "Failed to open volume\n");
-
-	*__fh = fh;
-
-	return status;
-}
-
-static inline efi_status_t __open_volume64(void *__image, void **__fh)
-{
-	efi_file_io_interface_t *io;
-	efi_loaded_image_64_t *image = __image;
-	efi_file_handle_64_t *fh;
-	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
-	efi_status_t status;
-	void *handle = (void *)(unsigned long)image->device_handle;
-	unsigned long func;
-
-	status = efi_call_early(handle_protocol, handle,
-				&fs_proto, (void **)&io);
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to handle fs_proto\n");
-		return status;
-	}
-
-	func = (unsigned long)io->open_volume;
-	status = efi_early->call(func, io, &fh);
-	if (status != EFI_SUCCESS)
-		efi_printk(sys_table, "Failed to open volume\n");
-
-	*__fh = fh;
-
-	return status;
-}
-
-efi_status_t
-efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh)
-{
-	if (efi_early->is64)
-		return __open_volume64(__image, __fh);
-
-	return __open_volume32(__image, __fh);
-}
-
 void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str)
 {
 	efi_call_proto(efi_simple_text_output_protocol, output_string,
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index c98b1856fc3d..6920033de6d4 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -40,31 +40,6 @@
 
 static u64 virtmap_base = EFI_RT_VIRTUAL_BASE;
 
-efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
-			     void *__image, void **__fh)
-{
-	efi_file_io_interface_t *io;
-	efi_loaded_image_t *image = __image;
-	efi_file_handle_t *fh;
-	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
-	efi_status_t status;
-	void *handle = (void *)(unsigned long)image->device_handle;
-
-	status = sys_table_arg->boottime->handle_protocol(handle,
-				 &fs_proto, (void **)&io);
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table_arg, "Failed to handle fs_proto\n");
-		return status;
-	}
-
-	status = io->open_volume(io, &fh);
-	if (status != EFI_SUCCESS)
-		efi_printk(sys_table_arg, "Failed to open volume\n");
-
-	*__fh = fh;
-	return status;
-}
-
 void efi_char16_printk(efi_system_table_t *sys_table_arg,
 			      efi_char16_t *str)
 {
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 50a9cab5a834..e94975f4655b 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -413,6 +413,34 @@ static efi_status_t efi_file_close(void *handle)
 	return efi_call_proto(efi_file_handle, close, handle);
 }
 
+static efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg,
+				    efi_loaded_image_t *image,
+				    efi_file_handle_t **__fh)
+{
+	efi_file_io_interface_t *io;
+	efi_file_handle_t *fh;
+	efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID;
+	efi_status_t status;
+	void *handle = (void *)(unsigned long)efi_table_attr(efi_loaded_image,
+							     device_handle,
+							     image);
+
+	status = efi_call_early(handle_protocol, handle,
+				&fs_proto, (void **)&io);
+	if (status != EFI_SUCCESS) {
+		efi_printk(sys_table_arg, "Failed to handle fs_proto\n");
+		return status;
+	}
+
+	status = efi_call_proto(efi_file_io_interface, open_volume, io, &fh);
+	if (status != EFI_SUCCESS)
+		efi_printk(sys_table_arg, "Failed to open volume\n");
+	else
+		*__fh = fh;
+
+	return status;
+}
+
 /*
  * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi=
  * option, e.g. efi=nochunk.
@@ -563,8 +591,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 
 		/* Only open the volume once. */
 		if (!i) {
-			status = efi_open_volume(sys_table_arg, image,
-						 (void **)&fh);
+			status = efi_open_volume(sys_table_arg, image, &fh);
 			if (status != EFI_SUCCESS)
 				goto free_files;
 		}
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index f59564b72ddc..32799cf039ef 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -36,9 +36,6 @@ extern int __pure is_quiet(void);
 
 void efi_char16_printk(efi_system_table_t *, efi_char16_t *);
 
-efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image,
-			     void **__fh);
-
 unsigned long get_dram_base(efi_system_table_t *sys_table_arg);
 
 efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index e190652f5ef9..401e4b254e30 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -894,6 +894,16 @@ typedef struct _efi_file_handle {
 	void *flush;
 } efi_file_handle_t;
 
+typedef struct {
+	u64 revision;
+	u32 open_volume;
+} efi_file_io_interface_32_t;
+
+typedef struct {
+	u64 revision;
+	u64 open_volume;
+} efi_file_io_interface_64_t;
+
 typedef struct _efi_file_io_interface {
 	u64 revision;
 	int (*open_volume)(struct _efi_file_io_interface *,
-- 
cgit v1.2.3


From 042f8825569d628517784d558aefe23c212f0fb2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 20 Jul 2018 21:14:38 -0700
Subject: nfp: bring back support for offloading shared blocks

Now that we have offload replay infrastructure added by
commit 326367427cc0 ("net: sched: call reoffload op on block callback reg")
and flows are guaranteed to be removed correctly, we can revert
commit 951a8ee6def3 ("nfp: reject binding to shared blocks").

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c       | 3 ---
 drivers/net/ethernet/netronome/nfp/flower/offload.c | 3 ---
 include/net/pkt_cls.h                               | 5 -----
 3 files changed, 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 458f49235d06..994d2b756fe1 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -195,9 +195,6 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev,
 	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
-	if (tcf_block_shared(f->block))
-		return -EOPNOTSUPP;
-
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 43b9bf12b174..6bc8a97f7e03 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -631,9 +631,6 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
-	if (tcf_block_shared(f->block))
-		return -EOPNOTSUPP;
-
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e4252a176eec..4f405ca8346f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -114,11 +114,6 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 {
 }
 
-static inline bool tcf_block_shared(struct tcf_block *block)
-{
-	return false;
-}
-
 static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 {
 	return NULL;
-- 
cgit v1.2.3


From f88a333b44318643282b8acc92af90deda441f5e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Sun, 22 Jul 2018 15:07:11 +0100
Subject: alpha: fix osf_wait4() breakage

kernel_wait4() expects a userland address for status - it's only
rusage that goes as a kernel one (and needs a copyout afterwards)

[ Also, fix the prototype of kernel_wait4() to have that __user
  annotation   - Linus ]

Fixes: 92ebce5ac55d ("osf_wait4: switch to kernel_wait4()")
Cc: stable@kernel.org # v4.13+
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/osf_sys.c | 5 +----
 include/linux/sched/task.h  | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 6e921754c8fc..c210a25dd6da 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1180,13 +1180,10 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru)
 SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
 		struct rusage32 __user *, ur)
 {
-	unsigned int status = 0;
 	struct rusage r;
-	long err = kernel_wait4(pid, &status, options, &r);
+	long err = kernel_wait4(pid, ustatus, options, &r);
 	if (err <= 0)
 		return err;
-	if (put_user(status, ustatus))
-		return -EFAULT;
 	if (!ur)
 		return err;
 	if (put_tv_to_tv32(&ur->ru_utime, &r.ru_utime))
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 5be31eb7b266..108ede99e533 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -75,7 +75,7 @@ extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *,
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-extern long kernel_wait4(pid_t, int *, int, struct rusage *);
+extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
 
 extern void free_task(struct task_struct *tsk);
 
-- 
cgit v1.2.3


From d5e838275c80aeb96d4c0b81442a851dfef2e948 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 5 Jun 2018 13:21:26 +1000
Subject: devres: Add devm_of_iomap()

There are still quite a few cases where a device might want
to get to a different node of the device-tree, obtain the
resources and map them.

We have of_iomap() and of_io_request_and_map() but they both
have shortcomings, such as not returning the size of the
resource found (which can be useful) and not being "managed".

This adds a devm_of_iomap() that provides all of these and
should probably replace uses of the above in most drivers.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>
---
 include/linux/device.h |  4 ++++
 lib/devres.c           | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..6aa8d51eabe9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -696,6 +696,10 @@ extern void devm_free_pages(struct device *dev, unsigned long addr);
 
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
 
+void __iomem *devm_of_iomap(struct device *dev,
+			    struct device_node *node, int index,
+			    resource_size_t *size);
+
 /* allows to add/remove a custom action to devres stack */
 int devm_add_action(struct device *dev, void (*action)(void *), void *data);
 void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
diff --git a/lib/devres.c b/lib/devres.c
index 5bec1120b392..faccf1a037d0 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -4,6 +4,7 @@
 #include <linux/io.h>
 #include <linux/gfp.h>
 #include <linux/export.h>
+#include <linux/of_address.h>
 
 enum devm_ioremap_type {
 	DEVM_IOREMAP = 0,
@@ -162,6 +163,41 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
 }
 EXPORT_SYMBOL(devm_ioremap_resource);
 
+/*
+ * devm_of_iomap - Requests a resource and maps the memory mapped IO
+ *		   for a given device_node managed by a given device
+ *
+ * Checks that a resource is a valid memory region, requests the memory
+ * region and ioremaps it. All operations are managed and will be undone
+ * on driver detach of the device.
+ *
+ * This is to be used when a device requests/maps resources described
+ * by other device tree nodes (children or otherwise).
+ *
+ * @dev:	The device "managing" the resource
+ * @node:       The device-tree node where the resource resides
+ * @index:	index of the MMIO range in the "reg" property
+ * @size:	Returns the size of the resource (pass NULL if not needed)
+ * Returns a pointer to the requested and mapped memory or an ERR_PTR() encoded
+ * error code on failure. Usage example:
+ *
+ *	base = devm_of_iomap(&pdev->dev, node, 0, NULL);
+ *	if (IS_ERR(base))
+ *		return PTR_ERR(base);
+ */
+void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index,
+			    resource_size_t *size)
+{
+	struct resource res;
+
+	if (of_address_to_resource(node, index, &res))
+		return IOMEM_ERR_PTR(-EINVAL);
+	if (size)
+		*size = resource_size(&res);
+	return devm_ioremap_resource(dev, &res);
+}
+EXPORT_SYMBOL(devm_of_iomap);
+
 #ifdef CONFIG_HAS_IOPORT_MAP
 /*
  * Generic iomap devres
-- 
cgit v1.2.3


From 6a794a27daca9c5a39de13c03b0748bb2d4a7a70 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 12 Jun 2018 09:55:04 +1000
Subject: fsi: master-ast-cf: Add new FSI master using Aspeed ColdFire

The Aspeed AST2x00 can contain a ColdFire v1 coprocessor which
is currently unused on OpenPower systems.

This adds an alternative to the fsi-master-gpio driver that
uses that coprocessor instead of bit banging from the ARM
core itself. The end result is about 4 times faster.

The firmware for the coprocessor and its source code can be
found at https://github.com/ozbenh/cf-fsi and is system specific.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/fsi/Kconfig                      |    9 +
 drivers/fsi/Makefile                     |    1 +
 drivers/fsi/cf-fsi-fw.h                  |  157 ++++
 drivers/fsi/fsi-master-ast-cf.c          | 1438 ++++++++++++++++++++++++++++++
 include/trace/events/fsi_master_ast_cf.h |  150 ++++
 5 files changed, 1755 insertions(+)
 create mode 100644 drivers/fsi/cf-fsi-fw.h
 create mode 100644 drivers/fsi/fsi-master-ast-cf.c
 create mode 100644 include/trace/events/fsi_master_ast_cf.h

(limited to 'include')

diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index 9c08f467a7bb..8d82b1e60514 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -27,6 +27,15 @@ config FSI_MASTER_HUB
 	allow chaining of FSI links to an arbitrary depth.  This allows for
 	a high target device fanout.
 
+config FSI_MASTER_AST_CF
+	tristate "FSI master based on Aspeed ColdFire coprocessor"
+	depends on GPIOLIB
+	depends on GPIO_ASPEED
+	---help---
+	This option enables a FSI master using the AST2400 and AST2500 GPIO
+	lines driven by the internal ColdFire coprocessor. This requires
+	the corresponding machine specific ColdFire firmware to be available.
+
 config FSI_SCOM
 	tristate "SCOM FSI client device driver"
 	---help---
diff --git a/drivers/fsi/Makefile b/drivers/fsi/Makefile
index 851182e1cd9e..a50d6ce22fb3 100644
--- a/drivers/fsi/Makefile
+++ b/drivers/fsi/Makefile
@@ -2,5 +2,6 @@
 obj-$(CONFIG_FSI) += fsi-core.o
 obj-$(CONFIG_FSI_MASTER_HUB) += fsi-master-hub.o
 obj-$(CONFIG_FSI_MASTER_GPIO) += fsi-master-gpio.o
+obj-$(CONFIG_FSI_MASTER_AST_CF) += fsi-master-ast-cf.o
 obj-$(CONFIG_FSI_SCOM) += fsi-scom.o
 obj-$(CONFIG_FSI_SBEFIFO) += fsi-sbefifo.o
diff --git a/drivers/fsi/cf-fsi-fw.h b/drivers/fsi/cf-fsi-fw.h
new file mode 100644
index 000000000000..712df0461911
--- /dev/null
+++ b/drivers/fsi/cf-fsi-fw.h
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0+
+#ifndef __CF_FSI_FW_H
+#define __CF_FSI_FW_H
+
+/*
+ * uCode file layout
+ *
+ * 0000...03ff : m68k exception vectors
+ * 0400...04ff : Header info & boot config block
+ * 0500....... : Code & stack
+ */
+
+/*
+ * Header info & boot config area
+ *
+ * The Header info is built into the ucode and provide version and
+ * platform information.
+ *
+ * the Boot config needs to be adjusted by the ARM prior to starting
+ * the ucode if the Command/Status area isn't at 0x320000 in CF space
+ * (ie. beginning of SRAM).
+ */
+
+#define HDR_OFFSET	        0x400
+
+/* Info: Signature & version */
+#define HDR_SYS_SIG		0x00	/* 2 bytes system signature */
+#define  SYS_SIG_SHARED		0x5348
+#define  SYS_SIG_SPLIT		0x5350
+#define HDR_FW_VERS		0x02	/* 2 bytes Major.Minor */
+#define HDR_API_VERS		0x04	/* 2 bytes Major.Minor */
+#define  API_VERSION_MAJ	2	/* Current version */
+#define  API_VERSION_MIN	1
+#define HDR_FW_OPTIONS		0x08	/* 4 bytes option flags */
+#define  FW_OPTION_TRACE_EN	0x00000001	/* FW tracing enabled */
+#define	 FW_OPTION_CONT_CLOCK	0x00000002	/* Continuous clocking supported */
+#define HDR_FW_SIZE		0x10	/* 4 bytes size for combo image */
+
+/* Boot Config: Address of Command/Status area */
+#define HDR_CMD_STAT_AREA	0x80	/* 4 bytes CF address */
+#define HDR_FW_CONTROL		0x84	/* 4 bytes control flags */
+#define	 FW_CONTROL_CONT_CLOCK	0x00000002	/* Continuous clocking enabled */
+#define	 FW_CONTROL_DUMMY_RD	0x00000004	/* Extra dummy read (AST2400) */
+#define	 FW_CONTROL_USE_STOP	0x00000008	/* Use STOP instructions */
+#define HDR_CLOCK_GPIO_VADDR	0x90	/* 2 bytes offset from GPIO base */
+#define HDR_CLOCK_GPIO_DADDR	0x92	/* 2 bytes offset from GPIO base */
+#define HDR_DATA_GPIO_VADDR	0x94	/* 2 bytes offset from GPIO base */
+#define HDR_DATA_GPIO_DADDR	0x96	/* 2 bytes offset from GPIO base */
+#define HDR_TRANS_GPIO_VADDR	0x98	/* 2 bytes offset from GPIO base */
+#define HDR_TRANS_GPIO_DADDR	0x9a	/* 2 bytes offset from GPIO base */
+#define HDR_CLOCK_GPIO_BIT	0x9c	/* 1 byte bit number */
+#define HDR_DATA_GPIO_BIT	0x9d	/* 1 byte bit number */
+#define HDR_TRANS_GPIO_BIT	0x9e	/* 1 byte bit number */
+
+/*
+ *  Command/Status area layout: Main part
+ */
+
+/* Command/Status register:
+ *
+ * +---------------------------+
+ * | STAT | RLEN | CLEN | CMD  |
+ * |   8  |   8  |   8  |   8  |
+ * +---------------------------+
+ *    |       |      |      |
+ *    status  |      |      |
+ * Response len      |      |
+ * (in bits)         |      |
+ *                   |      |
+ *         Command len      |
+ *         (in bits)        |
+ *                          |
+ *               Command code
+ *
+ * Due to the big endian layout, that means that a byte read will
+ * return the status byte
+ */
+#define	CMD_STAT_REG	        0x00
+#define  CMD_REG_CMD_MASK	0x000000ff
+#define  CMD_REG_CMD_SHIFT	0
+#define	  CMD_NONE		0x00
+#define	  CMD_COMMAND		0x01
+#define	  CMD_BREAK		0x02
+#define	  CMD_IDLE_CLOCKS	0x03 /* clen = #clocks */
+#define   CMD_INVALID		0xff
+#define  CMD_REG_CLEN_MASK	0x0000ff00
+#define  CMD_REG_CLEN_SHIFT	8
+#define  CMD_REG_RLEN_MASK	0x00ff0000
+#define  CMD_REG_RLEN_SHIFT	16
+#define  CMD_REG_STAT_MASK	0xff000000
+#define  CMD_REG_STAT_SHIFT	24
+#define	  STAT_WORKING		0x00
+#define	  STAT_COMPLETE		0x01
+#define	  STAT_ERR_INVAL_CMD	0x80
+#define	  STAT_ERR_INVAL_IRQ	0x81
+#define	  STAT_ERR_MTOE		0x82
+
+/* Response tag & CRC */
+#define	STAT_RTAG		0x04
+
+/* Response CRC */
+#define	STAT_RCRC		0x05
+
+/* Echo and Send delay */
+#define	ECHO_DLY_REG		0x08
+#define	SEND_DLY_REG		0x09
+
+/* Command data area
+ *
+ * Last byte of message must be left aligned
+ */
+#define	CMD_DATA		0x10 /* 64 bit of data */
+
+/* Response data area, right aligned, unused top bits are 1 */
+#define	RSP_DATA		0x20 /* 32 bit of data */
+
+/* Misc */
+#define	INT_CNT			0x30 /* 32-bit interrupt count */
+#define	BAD_INT_VEC		0x34 /* 32-bit bad interrupt vector # */
+#define	CF_STARTED		0x38 /* byte, set to -1 when copro started */
+#define	CLK_CNT			0x3c /* 32-bit, clock count (debug only) */
+
+/*
+ *  SRAM layout: GPIO arbitration part
+ */
+#define ARB_REG			0x40
+#define  ARB_ARM_REQ		0x01
+#define  ARB_ARM_ACK		0x02
+
+/* Misc2 */
+#define CF_RESET_D0		0x50
+#define CF_RESET_D1		0x54
+#define BAD_INT_S0		0x58
+#define BAD_INT_S1		0x5c
+#define STOP_CNT		0x60
+
+/* Internal */
+
+/*
+ * SRAM layout: Trace buffer (debug builds only)
+ */
+#define	TRACEBUF		0x100
+#define	  TR_CLKOBIT0		0xc0
+#define	  TR_CLKOBIT1		0xc1
+#define	  TR_CLKOSTART		0x82
+#define	  TR_OLEN		0x83 /* + len */
+#define	  TR_CLKZ		0x84 /* + count */
+#define	  TR_CLKWSTART		0x85
+#define	  TR_CLKTAG		0x86 /* + tag */
+#define	  TR_CLKDATA		0x87 /* + len */
+#define	  TR_CLKCRC		0x88 /* + raw crc */
+#define	  TR_CLKIBIT0		0x90
+#define	  TR_CLKIBIT1		0x91
+#define	  TR_END		0xff
+
+#endif /* __CF_FSI_FW_H */
+
diff --git a/drivers/fsi/fsi-master-ast-cf.c b/drivers/fsi/fsi-master-ast-cf.c
new file mode 100644
index 000000000000..57afaae0b691
--- /dev/null
+++ b/drivers/fsi/fsi-master-ast-cf.c
@@ -0,0 +1,1438 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2018 IBM Corp
+/*
+ * A FSI master controller, using a simple GPIO bit-banging interface
+ */
+
+#include <linux/crc4.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fsi.h>
+#include <linux/gpio/consumer.h>
+#include <linux/io.h>
+#include <linux/irqflags.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/regmap.h>
+#include <linux/firmware.h>
+#include <linux/gpio/aspeed.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
+#include <linux/genalloc.h>
+
+#include "fsi-master.h"
+#include "cf-fsi-fw.h"
+
+#define FW_FILE_NAME	"cf-fsi-fw.bin"
+
+/* Common SCU based coprocessor control registers */
+#define SCU_COPRO_CTRL			0x100
+#define   SCU_COPRO_RESET			0x00000002
+#define   SCU_COPRO_CLK_EN			0x00000001
+
+/* AST2500 specific ones */
+#define SCU_2500_COPRO_SEG0		0x104
+#define SCU_2500_COPRO_SEG1		0x108
+#define SCU_2500_COPRO_SEG2		0x10c
+#define SCU_2500_COPRO_SEG3		0x110
+#define SCU_2500_COPRO_SEG4		0x114
+#define SCU_2500_COPRO_SEG5		0x118
+#define SCU_2500_COPRO_SEG6		0x11c
+#define SCU_2500_COPRO_SEG7		0x120
+#define SCU_2500_COPRO_SEG8		0x124
+#define   SCU_2500_COPRO_SEG_SWAP		0x00000001
+#define SCU_2500_COPRO_CACHE_CTL	0x128
+#define   SCU_2500_COPRO_CACHE_EN		0x00000001
+#define   SCU_2500_COPRO_SEG0_CACHE_EN		0x00000002
+#define   SCU_2500_COPRO_SEG1_CACHE_EN		0x00000004
+#define   SCU_2500_COPRO_SEG2_CACHE_EN		0x00000008
+#define   SCU_2500_COPRO_SEG3_CACHE_EN		0x00000010
+#define   SCU_2500_COPRO_SEG4_CACHE_EN		0x00000020
+#define   SCU_2500_COPRO_SEG5_CACHE_EN		0x00000040
+#define   SCU_2500_COPRO_SEG6_CACHE_EN		0x00000080
+#define   SCU_2500_COPRO_SEG7_CACHE_EN		0x00000100
+#define   SCU_2500_COPRO_SEG8_CACHE_EN		0x00000200
+
+#define SCU_2400_COPRO_SEG0		0x104
+#define SCU_2400_COPRO_SEG2		0x108
+#define SCU_2400_COPRO_SEG4		0x10c
+#define SCU_2400_COPRO_SEG6		0x110
+#define SCU_2400_COPRO_SEG8		0x114
+#define   SCU_2400_COPRO_SEG_SWAP		0x80000000
+#define SCU_2400_COPRO_CACHE_CTL	0x118
+#define   SCU_2400_COPRO_CACHE_EN		0x00000001
+#define   SCU_2400_COPRO_SEG0_CACHE_EN		0x00000002
+#define   SCU_2400_COPRO_SEG2_CACHE_EN		0x00000004
+#define   SCU_2400_COPRO_SEG4_CACHE_EN		0x00000008
+#define   SCU_2400_COPRO_SEG6_CACHE_EN		0x00000010
+#define   SCU_2400_COPRO_SEG8_CACHE_EN		0x00000020
+
+/* CVIC registers */
+#define CVIC_EN_REG			0x10
+#define CVIC_TRIG_REG			0x18
+
+/*
+ * System register base address (needed for configuring the
+ * coldfire maps)
+ */
+#define SYSREG_BASE			0x1e600000
+
+/* Amount of SRAM required */
+#define SRAM_SIZE			0x1000
+
+#define LAST_ADDR_INVALID		0x1
+
+struct fsi_master_acf {
+	struct fsi_master	master;
+	struct device		*dev;
+	struct regmap		*scu;
+	struct mutex		lock;	/* mutex for command ordering */
+	struct gpio_desc	*gpio_clk;
+	struct gpio_desc	*gpio_data;
+	struct gpio_desc	*gpio_trans;	/* Voltage translator */
+	struct gpio_desc	*gpio_enable;	/* FSI enable */
+	struct gpio_desc	*gpio_mux;	/* Mux control */
+	uint16_t		gpio_clk_vreg;
+	uint16_t		gpio_clk_dreg;
+	uint16_t       		gpio_dat_vreg;
+	uint16_t       		gpio_dat_dreg;
+	uint16_t       		gpio_tra_vreg;
+	uint16_t       		gpio_tra_dreg;
+	uint8_t			gpio_clk_bit;
+	uint8_t			gpio_dat_bit;
+	uint8_t			gpio_tra_bit;
+	uint32_t		cf_mem_addr;
+	size_t			cf_mem_size;
+	void __iomem		*cf_mem;
+	void __iomem		*cvic;
+	struct gen_pool		*sram_pool;
+	void __iomem		*sram;
+	bool			is_ast2500;
+	bool			external_mode;
+	bool			trace_enabled;
+	uint32_t		last_addr;
+	uint8_t			t_send_delay;
+	uint8_t			t_echo_delay;
+	uint32_t		cvic_sw_irq;
+};
+#define to_fsi_master_acf(m) container_of(m, struct fsi_master_acf, master)
+
+struct fsi_msg {
+	uint64_t	msg;
+	uint8_t		bits;
+};
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/fsi_master_ast_cf.h>
+
+static void msg_push_bits(struct fsi_msg *msg, uint64_t data, int bits)
+{
+	msg->msg <<= bits;
+	msg->msg |= data & ((1ull << bits) - 1);
+	msg->bits += bits;
+}
+
+static void msg_push_crc(struct fsi_msg *msg)
+{
+	uint8_t crc;
+	int top;
+
+	top = msg->bits & 0x3;
+
+	/* start bit, and any non-aligned top bits */
+	crc = crc4(0, 1 << top | msg->msg >> (msg->bits - top), top + 1);
+
+	/* aligned bits */
+	crc = crc4(crc, msg->msg, msg->bits - top);
+
+	msg_push_bits(msg, crc, 4);
+}
+
+static void msg_finish_cmd(struct fsi_msg *cmd)
+{
+	/* Left align message */
+	cmd->msg <<= (64 - cmd->bits);
+}
+
+static bool check_same_address(struct fsi_master_acf *master, int id,
+			       uint32_t addr)
+{
+	/* this will also handle LAST_ADDR_INVALID */
+	return master->last_addr == (((id & 0x3) << 21) | (addr & ~0x3));
+}
+
+static bool check_relative_address(struct fsi_master_acf *master, int id,
+				   uint32_t addr, uint32_t *rel_addrp)
+{
+	uint32_t last_addr = master->last_addr;
+	int32_t rel_addr;
+
+	if (last_addr == LAST_ADDR_INVALID)
+		return false;
+
+	/* We may be in 23-bit addressing mode, which uses the id as the
+	 * top two address bits. So, if we're referencing a different ID,
+	 * use absolute addresses.
+	 */
+	if (((last_addr >> 21) & 0x3) != id)
+		return false;
+
+	/* remove the top two bits from any 23-bit addressing */
+	last_addr &= (1 << 21) - 1;
+
+	/* We know that the addresses are limited to 21 bits, so this won't
+	 * overflow the signed rel_addr */
+	rel_addr = addr - last_addr;
+	if (rel_addr > 255 || rel_addr < -256)
+		return false;
+
+	*rel_addrp = (uint32_t)rel_addr;
+
+	return true;
+}
+
+static void last_address_update(struct fsi_master_acf *master,
+				int id, bool valid, uint32_t addr)
+{
+	if (!valid)
+		master->last_addr = LAST_ADDR_INVALID;
+	else
+		master->last_addr = ((id & 0x3) << 21) | (addr & ~0x3);
+}
+
+/*
+ * Encode an Absolute/Relative/Same Address command
+ */
+static void build_ar_command(struct fsi_master_acf *master,
+			     struct fsi_msg *cmd, uint8_t id,
+			     uint32_t addr, size_t size,
+			     const void *data)
+{
+	int i, addr_bits, opcode_bits;
+	bool write = !!data;
+	uint8_t ds, opcode;
+	uint32_t rel_addr;
+
+	cmd->bits = 0;
+	cmd->msg = 0;
+
+	/* we have 21 bits of address max */
+	addr &= ((1 << 21) - 1);
+
+	/* cmd opcodes are variable length - SAME_AR is only two bits */
+	opcode_bits = 3;
+
+	if (check_same_address(master, id, addr)) {
+		/* we still address the byte offset within the word */
+		addr_bits = 2;
+		opcode_bits = 2;
+		opcode = FSI_CMD_SAME_AR;
+		trace_fsi_master_acf_cmd_same_addr(master);
+
+	} else if (check_relative_address(master, id, addr, &rel_addr)) {
+		/* 8 bits plus sign */
+		addr_bits = 9;
+		addr = rel_addr;
+		opcode = FSI_CMD_REL_AR;
+		trace_fsi_master_acf_cmd_rel_addr(master, rel_addr);
+
+	} else {
+		addr_bits = 21;
+		opcode = FSI_CMD_ABS_AR;
+		trace_fsi_master_acf_cmd_abs_addr(master, addr);
+	}
+
+	/*
+	 * The read/write size is encoded in the lower bits of the address
+	 * (as it must be naturally-aligned), and the following ds bit.
+	 *
+	 *	size	addr:1	addr:0	ds
+	 *	1	x	x	0
+	 *	2	x	0	1
+	 *	4	0	1	1
+	 *
+	 */
+	ds = size > 1 ? 1 : 0;
+	addr &= ~(size - 1);
+	if (size == 4)
+		addr |= 1;
+
+	msg_push_bits(cmd, id, 2);
+	msg_push_bits(cmd, opcode, opcode_bits);
+	msg_push_bits(cmd, write ? 0 : 1, 1);
+	msg_push_bits(cmd, addr, addr_bits);
+	msg_push_bits(cmd, ds, 1);
+	for (i = 0; write && i < size; i++)
+		msg_push_bits(cmd, ((uint8_t *)data)[i], 8);
+
+	msg_push_crc(cmd);
+	msg_finish_cmd(cmd);
+}
+
+static void build_dpoll_command(struct fsi_msg *cmd, uint8_t slave_id)
+{
+	cmd->bits = 0;
+	cmd->msg = 0;
+
+	msg_push_bits(cmd, slave_id, 2);
+	msg_push_bits(cmd, FSI_CMD_DPOLL, 3);
+	msg_push_crc(cmd);
+	msg_finish_cmd(cmd);
+}
+
+static void build_epoll_command(struct fsi_msg *cmd, uint8_t slave_id)
+{
+	cmd->bits = 0;
+	cmd->msg = 0;
+
+	msg_push_bits(cmd, slave_id, 2);
+	msg_push_bits(cmd, FSI_CMD_EPOLL, 3);
+	msg_push_crc(cmd);
+	msg_finish_cmd(cmd);
+}
+
+static void build_term_command(struct fsi_msg *cmd, uint8_t slave_id)
+{
+	cmd->bits = 0;
+	cmd->msg = 0;
+
+	msg_push_bits(cmd, slave_id, 2);
+	msg_push_bits(cmd, FSI_CMD_TERM, 6);
+	msg_push_crc(cmd);
+	msg_finish_cmd(cmd);
+}
+
+static int do_copro_command(struct fsi_master_acf *master, uint32_t op)
+{
+	uint32_t timeout = 10000000;
+	uint8_t stat;
+
+	trace_fsi_master_acf_copro_command(master, op);
+
+	/* Send command */
+	iowrite32be(op, master->sram + CMD_STAT_REG);
+
+	/* Ring doorbell if any */
+	if (master->cvic)
+		iowrite32(0x2, master->cvic + CVIC_TRIG_REG);
+
+	/* Wait for status to indicate completion (or error) */
+	do {
+		if (timeout-- == 0) {
+			dev_warn(master->dev,
+				 "Timeout waiting for coprocessor completion\n");
+			return -ETIMEDOUT;
+		}
+		stat = ioread8(master->sram + CMD_STAT_REG);
+	} while(stat < STAT_COMPLETE || stat == 0xff);
+
+	if (stat == STAT_COMPLETE)
+		return 0;
+	switch(stat) {
+	case STAT_ERR_INVAL_CMD:
+		return -EINVAL;
+	case STAT_ERR_INVAL_IRQ:
+		return -EIO;
+	case STAT_ERR_MTOE:
+		return -ESHUTDOWN;
+	}
+	return -ENXIO;
+}
+
+static int clock_zeros(struct fsi_master_acf *master, int count)
+{
+	while (count) {
+		int rc, lcnt = min(count, 255);
+
+		rc = do_copro_command(master,
+				      CMD_IDLE_CLOCKS | (lcnt << CMD_REG_CLEN_SHIFT));
+		if (rc)
+			return rc;
+		count -= lcnt;
+	}
+	return 0;
+}
+
+static int send_request(struct fsi_master_acf *master, struct fsi_msg *cmd,
+			unsigned int resp_bits)
+{
+	uint32_t op;
+
+	trace_fsi_master_acf_send_request(master, cmd, resp_bits);
+
+	/* Store message into SRAM */
+	iowrite32be((cmd->msg >> 32), master->sram + CMD_DATA);
+	iowrite32be((cmd->msg & 0xffffffff), master->sram + CMD_DATA + 4);
+
+	op = CMD_COMMAND;
+	op |= cmd->bits << CMD_REG_CLEN_SHIFT;
+	if (resp_bits)
+		op |= resp_bits << CMD_REG_RLEN_SHIFT;
+
+	return do_copro_command(master, op);
+}
+
+static int read_copro_response(struct fsi_master_acf *master, uint8_t size,
+			       uint32_t *response, u8 *tag)
+{
+	uint8_t rtag = ioread8(master->sram + STAT_RTAG);
+	uint8_t rcrc = ioread8(master->sram + STAT_RCRC);
+	uint32_t rdata = 0;
+	uint32_t crc;
+	uint8_t ack;
+
+	*tag = ack = rtag & 3;
+
+	/* we have a whole message now; check CRC */
+	crc = crc4(0, 1, 1);
+	crc = crc4(crc, rtag, 4);
+	if (ack == FSI_RESP_ACK && size) {
+		rdata = ioread32be(master->sram + RSP_DATA);
+		crc = crc4(crc, rdata, size);
+		if (response)
+			*response = rdata;
+	}
+	crc = crc4(crc, rcrc, 4);
+
+	trace_fsi_master_acf_copro_response(master, rtag, rcrc, rdata, crc == 0);
+
+	if (crc) {
+		/*
+		 * Check if it's all 1's or all 0's, that probably means
+		 * the host is off
+		 */
+		if ((rtag == 0xf && rcrc == 0xf) || (rtag == 0 && rcrc == 0))
+			return -ENODEV;
+		dev_dbg(master->dev, "Bad response CRC !\n");
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+static int send_term(struct fsi_master_acf *master, uint8_t slave)
+{
+	struct fsi_msg cmd;
+	uint8_t tag;
+	int rc;
+
+	build_term_command(&cmd, slave);
+
+	rc = send_request(master, &cmd, 0);
+	if (rc) {
+		dev_warn(master->dev, "Error %d sending term\n", rc);
+		return rc;
+	}
+
+	rc = read_copro_response(master, 0, NULL, &tag);
+	if (rc < 0) {
+		dev_err(master->dev,
+				"TERM failed; lost communication with slave\n");
+		return -EIO;
+	} else if (tag != FSI_RESP_ACK) {
+		dev_err(master->dev, "TERM failed; response %d\n", tag);
+		return -EIO;
+	}
+	return 0;
+}
+
+static void dump_trace(struct fsi_master_acf *master)
+{
+	char trbuf[52];
+	char *p;
+	int i;
+
+	dev_dbg(master->dev,
+		"CMDSTAT:%08x RTAG=%02x RCRC=%02x RDATA=%02x #INT=%08x\n",
+		ioread32be(master->sram + CMD_STAT_REG),
+		ioread8(master->sram + STAT_RTAG),
+		ioread8(master->sram + STAT_RCRC),
+		ioread32be(master->sram + RSP_DATA),
+		ioread32be(master->sram + INT_CNT));
+
+	for (i = 0; i < 512; i++) {
+		uint8_t v;
+		if ((i % 16) == 0)
+			p = trbuf;
+		v = ioread8(master->sram + TRACEBUF + i);
+		p += sprintf(p, "%02x ", v);
+		if (((i % 16) == 15) || v == TR_END)
+			dev_dbg(master->dev, "%s\n", trbuf);
+		if (v == TR_END)
+			break;
+	}
+}
+
+static int handle_response(struct fsi_master_acf *master,
+			   uint8_t slave, uint8_t size, void *data)
+{
+	int busy_count = 0, rc;
+	int crc_err_retries = 0;
+	struct fsi_msg cmd;
+	uint32_t response;
+	uint8_t tag;
+retry:
+	rc = read_copro_response(master, size, &response, &tag);
+
+	/* Handle retries on CRC errors */
+	if (rc == -EAGAIN) {
+		/* Too many retries ? */
+		if (crc_err_retries++ > FSI_CRC_ERR_RETRIES) {
+			/*
+			 * Pass it up as a -EIO otherwise upper level will retry
+			 * the whole command which isn't what we want here.
+			 */
+			rc = -EIO;
+			goto bail;
+		}
+		trace_fsi_master_acf_crc_rsp_error(master, crc_err_retries);
+		if (master->trace_enabled)
+			dump_trace(master);
+		rc = clock_zeros(master, FSI_MASTER_EPOLL_CLOCKS);
+		if (rc) {
+			dev_warn(master->dev,
+				 "Error %d clocking zeros for E_POLL\n", rc);
+			return rc;
+		}
+		build_epoll_command(&cmd, slave);
+		rc = send_request(master, &cmd, size);
+		if (rc) {
+			dev_warn(master->dev, "Error %d sending E_POLL\n", rc);
+			return -EIO;
+		}
+		goto retry;
+	}
+	if (rc)
+		return rc;
+
+	switch (tag) {
+	case FSI_RESP_ACK:
+		if (size && data) {
+			if (size == 32)
+				*(__be32 *)data = cpu_to_be32(response);
+			else if (size == 16)
+				*(__be16 *)data = cpu_to_be16(response);
+			else
+				*(u8 *)data = response;
+		}
+		break;
+	case FSI_RESP_BUSY:
+		/*
+		 * Its necessary to clock slave before issuing
+		 * d-poll, not indicated in the hardware protocol
+		 * spec. < 20 clocks causes slave to hang, 21 ok.
+		 */
+		dev_dbg(master->dev, "Busy, retrying...\n");
+		if (master->trace_enabled)
+			dump_trace(master);
+		rc = clock_zeros(master, FSI_MASTER_DPOLL_CLOCKS);
+		if (rc) {
+			dev_warn(master->dev,
+				 "Error %d clocking zeros for D_POLL\n", rc);
+			break;
+		}
+		if (busy_count++ < FSI_MASTER_MAX_BUSY) {
+			build_dpoll_command(&cmd, slave);
+			rc = send_request(master, &cmd, size);
+			if (rc) {
+				dev_warn(master->dev, "Error %d sending D_POLL\n", rc);
+				break;
+			}
+			goto retry;
+		}
+		dev_dbg(master->dev,
+			"ERR slave is stuck in busy state, issuing TERM\n");
+		send_term(master, slave);
+		rc = -EIO;
+		break;
+
+	case FSI_RESP_ERRA:
+		dev_dbg(master->dev, "ERRA received\n");
+		if (master->trace_enabled)
+			dump_trace(master);
+		rc = -EIO;
+		break;
+	case FSI_RESP_ERRC:
+		dev_dbg(master->dev, "ERRC received\n");
+		if (master->trace_enabled)
+			dump_trace(master);
+		rc = -EAGAIN;
+		break;
+	}
+ bail:
+	if (busy_count > 0) {
+		trace_fsi_master_acf_poll_response_busy(master, busy_count);
+	}
+
+	return rc;
+}
+
+static int fsi_master_acf_xfer(struct fsi_master_acf *master, uint8_t slave,
+			       struct fsi_msg *cmd, size_t resp_len, void *resp)
+{
+	int rc = -EAGAIN, retries = 0;
+
+	resp_len <<= 3;
+	while ((retries++) < FSI_CRC_ERR_RETRIES) {
+		rc = send_request(master, cmd, resp_len);
+		if (rc) {
+			if (rc != -ESHUTDOWN)
+				dev_warn(master->dev, "Error %d sending command\n", rc);
+			break;
+		}
+		rc = handle_response(master, slave, resp_len, resp);
+		if (rc != -EAGAIN)
+			break;
+		rc = -EIO;
+		dev_dbg(master->dev, "ECRC retry %d\n", retries);
+
+		/* Pace it a bit before retry */
+		msleep(1);
+	}
+
+	return rc;
+}
+
+static int fsi_master_acf_read(struct fsi_master *_master, int link,
+			       uint8_t id, uint32_t addr, void *val,
+			       size_t size)
+{
+	struct fsi_master_acf *master = to_fsi_master_acf(_master);
+	struct fsi_msg cmd;
+	int rc;
+
+	if (link != 0)
+		return -ENODEV;
+
+	mutex_lock(&master->lock);
+	dev_dbg(master->dev, "read id %d addr %x size %ud\n", id, addr, size);
+	build_ar_command(master, &cmd, id, addr, size, NULL);
+	rc = fsi_master_acf_xfer(master, id, &cmd, size, val);
+	last_address_update(master, id, rc == 0, addr);
+	if (rc)
+		dev_dbg(master->dev, "read id %d addr 0x%08x err: %d\n",
+			id, addr, rc);
+	mutex_unlock(&master->lock);
+
+	return rc;
+}
+
+static int fsi_master_acf_write(struct fsi_master *_master, int link,
+				uint8_t id, uint32_t addr, const void *val,
+				size_t size)
+{
+	struct fsi_master_acf *master = to_fsi_master_acf(_master);
+	struct fsi_msg cmd;
+	int rc;
+
+	if (link != 0)
+		return -ENODEV;
+
+	mutex_lock(&master->lock);
+	build_ar_command(master, &cmd, id, addr, size, val);
+	dev_dbg(master->dev, "write id %d addr %x size %ud raw_data: %08x\n",
+		id, addr, size, *(uint32_t *)val);
+	rc = fsi_master_acf_xfer(master, id, &cmd, 0, NULL);
+	last_address_update(master, id, rc == 0, addr);
+	if (rc)
+		dev_dbg(master->dev, "write id %d addr 0x%08x err: %d\n",
+			id, addr, rc);
+	mutex_unlock(&master->lock);
+
+	return rc;
+}
+
+static int fsi_master_acf_term(struct fsi_master *_master,
+			       int link, uint8_t id)
+{
+	struct fsi_master_acf *master = to_fsi_master_acf(_master);
+	struct fsi_msg cmd;
+	int rc;
+
+	if (link != 0)
+		return -ENODEV;
+
+	mutex_lock(&master->lock);
+	build_term_command(&cmd, id);
+	dev_dbg(master->dev, "term id %d\n", id);
+	rc = fsi_master_acf_xfer(master, id, &cmd, 0, NULL);
+	last_address_update(master, id, false, 0);
+	mutex_unlock(&master->lock);
+
+	return rc;
+}
+
+static int fsi_master_acf_break(struct fsi_master *_master, int link)
+{
+	struct fsi_master_acf *master = to_fsi_master_acf(_master);
+	int rc;
+
+	if (link != 0)
+		return -ENODEV;
+
+	mutex_lock(&master->lock);
+	if (master->external_mode) {
+		mutex_unlock(&master->lock);
+		return -EBUSY;
+	}
+	dev_dbg(master->dev, "sending BREAK\n");
+	rc = do_copro_command(master, CMD_BREAK);
+	last_address_update(master, 0, false, 0);
+	mutex_unlock(&master->lock);
+
+	/* Wait for logic reset to take effect */
+	udelay(200);
+
+	return rc;
+}
+
+static void reset_cf(struct fsi_master_acf *master)
+{
+	regmap_write(master->scu, SCU_COPRO_CTRL, SCU_COPRO_RESET);
+	usleep_range(20,20);
+	regmap_write(master->scu, SCU_COPRO_CTRL, 0);
+	usleep_range(20,20);
+}
+
+static void start_cf(struct fsi_master_acf *master)
+{
+	regmap_write(master->scu, SCU_COPRO_CTRL, SCU_COPRO_CLK_EN);
+}
+
+static void setup_ast2500_cf_maps(struct fsi_master_acf *master)
+{
+	/*
+	 * Note about byteswap setting: the bus is wired backwards,
+	 * so setting the byteswap bit actually makes the ColdFire
+	 * work "normally" for a BE processor, ie, put the MSB in
+	 * the lowest address byte.
+	 *
+	 * We thus need to set the bit for our main memory which
+	 * contains our program code. We create two mappings for
+	 * the register, one with each setting.
+	 *
+	 * Segments 2 and 3 has a "swapped" mapping (BE)
+	 * and 6 and 7 have a non-swapped mapping (LE) which allows
+	 * us to avoid byteswapping register accesses since the
+	 * registers are all LE.
+	 */
+
+	/* Setup segment 0 to our memory region */
+	regmap_write(master->scu, SCU_2500_COPRO_SEG0, master->cf_mem_addr |
+		     SCU_2500_COPRO_SEG_SWAP);
+
+	/* Segments 2 and 3 to sysregs with byteswap (for SRAM) */
+	regmap_write(master->scu, SCU_2500_COPRO_SEG2, SYSREG_BASE |
+		     SCU_2500_COPRO_SEG_SWAP);
+	regmap_write(master->scu, SCU_2500_COPRO_SEG3, SYSREG_BASE | 0x100000 |
+		     SCU_2500_COPRO_SEG_SWAP);
+
+	/* And segment 6 and 7 to sysregs no byteswap */
+	regmap_write(master->scu, SCU_2500_COPRO_SEG6, SYSREG_BASE);
+	regmap_write(master->scu, SCU_2500_COPRO_SEG7, SYSREG_BASE | 0x100000);
+
+	/* Memory cachable, regs and SRAM not cachable */
+	regmap_write(master->scu, SCU_2500_COPRO_CACHE_CTL,
+		     SCU_2500_COPRO_SEG0_CACHE_EN | SCU_2500_COPRO_CACHE_EN);
+}
+
+static void setup_ast2400_cf_maps(struct fsi_master_acf *master)
+{
+	/* Setup segment 0 to our memory region */
+	regmap_write(master->scu, SCU_2400_COPRO_SEG0, master->cf_mem_addr |
+		     SCU_2400_COPRO_SEG_SWAP);
+
+	/* Segments 2 to sysregs with byteswap (for SRAM) */
+	regmap_write(master->scu, SCU_2400_COPRO_SEG2, SYSREG_BASE |
+		     SCU_2400_COPRO_SEG_SWAP);
+
+	/* And segment 6 to sysregs no byteswap */
+	regmap_write(master->scu, SCU_2400_COPRO_SEG6, SYSREG_BASE);
+
+	/* Memory cachable, regs and SRAM not cachable */
+	regmap_write(master->scu, SCU_2400_COPRO_CACHE_CTL,
+		     SCU_2400_COPRO_SEG0_CACHE_EN | SCU_2400_COPRO_CACHE_EN);
+}
+
+static void setup_common_fw_config(struct fsi_master_acf *master,
+				   void __iomem *base)
+{
+	iowrite16be(master->gpio_clk_vreg, base + HDR_CLOCK_GPIO_VADDR);
+	iowrite16be(master->gpio_clk_dreg, base + HDR_CLOCK_GPIO_DADDR);
+	iowrite16be(master->gpio_dat_vreg, base + HDR_DATA_GPIO_VADDR);
+	iowrite16be(master->gpio_dat_dreg, base + HDR_DATA_GPIO_DADDR);
+	iowrite16be(master->gpio_tra_vreg, base + HDR_TRANS_GPIO_VADDR);
+	iowrite16be(master->gpio_tra_dreg, base + HDR_TRANS_GPIO_DADDR);
+	iowrite8(master->gpio_clk_bit, base + HDR_CLOCK_GPIO_BIT);
+	iowrite8(master->gpio_dat_bit, base + HDR_DATA_GPIO_BIT);
+	iowrite8(master->gpio_tra_bit, base + HDR_TRANS_GPIO_BIT);
+}
+
+static void setup_ast2500_fw_config(struct fsi_master_acf *master)
+{
+	void __iomem *base = master->cf_mem + HDR_OFFSET;
+
+	setup_common_fw_config(master, base);
+	iowrite32be(FW_CONTROL_USE_STOP, base + HDR_FW_CONTROL);
+}
+
+static void setup_ast2400_fw_config(struct fsi_master_acf *master)
+{
+	void __iomem *base = master->cf_mem + HDR_OFFSET;
+
+	setup_common_fw_config(master, base);
+	iowrite32be(FW_CONTROL_CONT_CLOCK|FW_CONTROL_DUMMY_RD, base + HDR_FW_CONTROL);
+}
+
+static int setup_gpios_for_copro(struct fsi_master_acf *master)
+{
+
+	int rc;
+
+	/* This aren't under ColdFire control, just set them up appropriately */
+	gpiod_direction_output(master->gpio_mux, 1);
+	gpiod_direction_output(master->gpio_enable, 1);
+
+	/* Those are under ColdFire control, let it configure them */
+	rc = aspeed_gpio_copro_grab_gpio(master->gpio_clk, &master->gpio_clk_vreg,
+					 &master->gpio_clk_dreg, &master->gpio_clk_bit);
+	if (rc) {
+		dev_err(master->dev, "failed to assign clock gpio to coprocessor\n");
+		return rc;
+	}
+	rc = aspeed_gpio_copro_grab_gpio(master->gpio_data, &master->gpio_dat_vreg,
+					 &master->gpio_dat_dreg, &master->gpio_dat_bit);
+	if (rc) {
+		dev_err(master->dev, "failed to assign data gpio to coprocessor\n");
+		aspeed_gpio_copro_release_gpio(master->gpio_clk);
+		return rc;
+	}
+	rc = aspeed_gpio_copro_grab_gpio(master->gpio_trans, &master->gpio_tra_vreg,
+					 &master->gpio_tra_dreg, &master->gpio_tra_bit);
+	if (rc) {
+		dev_err(master->dev, "failed to assign trans gpio to coprocessor\n");
+		aspeed_gpio_copro_release_gpio(master->gpio_clk);
+		aspeed_gpio_copro_release_gpio(master->gpio_data);
+		return rc;
+	}
+	return 0;
+}
+
+static void release_copro_gpios(struct fsi_master_acf *master)
+{
+	aspeed_gpio_copro_release_gpio(master->gpio_clk);
+	aspeed_gpio_copro_release_gpio(master->gpio_data);
+	aspeed_gpio_copro_release_gpio(master->gpio_trans);
+}
+
+static int load_copro_firmware(struct fsi_master_acf *master)
+{
+	const struct firmware *fw;
+	uint16_t sig = 0, wanted_sig;
+	const u8 *data;
+	size_t size = 0;
+	int rc;
+
+	/* Get the binary */
+	rc = request_firmware(&fw, FW_FILE_NAME, master->dev);
+	if (rc) {
+		dev_err(
+			master->dev, "Error %d to load firwmare '%s' !\n",
+			rc, FW_FILE_NAME);
+		return rc;
+	}
+
+	/* Which image do we want ? (shared vs. split clock/data GPIOs) */
+	if (master->gpio_clk_vreg == master->gpio_dat_vreg)
+		wanted_sig = SYS_SIG_SHARED;
+	else
+		wanted_sig = SYS_SIG_SPLIT;
+	dev_dbg(master->dev, "Looking for image sig %04x\n", wanted_sig);
+
+	/* Try to find it */
+	for (data = fw->data; data < (fw->data + fw->size);) {
+		sig = be16_to_cpup((__be16 *)(data + HDR_OFFSET + HDR_SYS_SIG));
+		size = be32_to_cpup((__be32 *)(data + HDR_OFFSET + HDR_FW_SIZE));
+		if (sig == wanted_sig)
+			break;
+		data += size;
+	}
+	if (sig != wanted_sig) {
+		dev_err(master->dev, "Failed to locate image sig %04x in FW blob\n",
+			wanted_sig);
+		return -ENODEV;
+	}
+	if (size > master->cf_mem_size) {
+		dev_err(master->dev, "FW size (%zd) bigger than memory reserve (%zd)\n",
+			fw->size, master->cf_mem_size);
+		rc = -ENOMEM;
+	} else {
+		memcpy_toio(master->cf_mem, data, size);
+	}
+	release_firmware(fw);
+
+	return rc;
+}
+
+static int check_firmware_image(struct fsi_master_acf *master)
+{
+	uint32_t fw_vers, fw_api, fw_options;
+
+	fw_vers = ioread16be(master->cf_mem + HDR_OFFSET + HDR_FW_VERS);
+	fw_api = ioread16be(master->cf_mem + HDR_OFFSET + HDR_API_VERS);
+	fw_options = ioread32be(master->cf_mem + HDR_OFFSET + HDR_FW_OPTIONS);
+	master->trace_enabled = !!(fw_options & FW_OPTION_TRACE_EN);
+
+	/* Check version and signature */
+	dev_info(master->dev, "ColdFire initialized, firmware v%d API v%d.%d (trace %s)\n",
+		 fw_vers, fw_api >> 8, fw_api & 0xff,
+		 master->trace_enabled ? "enabled" : "disabled");
+
+	if ((fw_api >> 8) != API_VERSION_MAJ) {
+		dev_err(master->dev, "Unsupported coprocessor API version !\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int copro_enable_sw_irq(struct fsi_master_acf *master)
+{
+	int timeout;
+	uint32_t val;
+
+	/*
+	 * Enable coprocessor interrupt input. I've had problems getting the
+	 * value to stick, so try in a loop
+	 */
+	for (timeout = 0; timeout < 10; timeout++) {
+		iowrite32(0x2, master->cvic + CVIC_EN_REG);
+		val = ioread32(master->cvic + CVIC_EN_REG);
+		if (val & 2)
+			break;
+		msleep(1);
+	}
+	if (!(val & 2)) {
+		dev_err(master->dev, "Failed to enable coprocessor interrupt !\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int fsi_master_acf_setup(struct fsi_master_acf *master)
+{
+	int timeout, rc;
+	uint32_t val;
+
+	/* Make sure the ColdFire is stopped  */
+	reset_cf(master);
+
+	/*
+	 * Clear SRAM. This needs to happen before we setup the GPIOs
+	 * as we might start trying to arbitrate as soon as that happens.
+	 */
+	memset_io(master->sram, 0, SRAM_SIZE);
+
+	/* Configure GPIOs */
+	rc = setup_gpios_for_copro(master);
+	if (rc)
+		return rc;
+
+	/* Load the firmware into the reserved memory */
+	rc = load_copro_firmware(master);
+	if (rc)
+		return rc;
+
+	/* Read signature and check versions */
+	rc = check_firmware_image(master);
+	if (rc)
+		return rc;
+
+	/* Setup coldfire memory map */
+	if (master->is_ast2500) {
+		setup_ast2500_cf_maps(master);
+		setup_ast2500_fw_config(master);
+	} else {
+		setup_ast2400_cf_maps(master);
+		setup_ast2400_fw_config(master);
+	}
+
+	/* Start the ColdFire */
+	start_cf(master);
+
+	/* Wait for status register to indicate command completion
+	 * which signals the initialization is complete
+	 */
+	for (timeout = 0; timeout < 10; timeout++) {
+		val = ioread8(master->sram + CF_STARTED);
+		if (val)
+			break;
+		msleep(1);
+	}
+	if (!val) {
+		dev_err(master->dev, "Coprocessor startup timeout !\n");
+		rc = -ENODEV;
+		goto err;
+	}
+
+	/* Configure echo & send delay */
+	iowrite8(master->t_send_delay, master->sram + SEND_DLY_REG);
+	iowrite8(master->t_echo_delay, master->sram + ECHO_DLY_REG);
+
+	/* Enable SW interrupt to copro if any */
+	if (master->cvic) {
+		rc = copro_enable_sw_irq(master);
+		if (rc)
+			goto err;
+	}
+	return 0;
+ err:
+	/* An error occurred, don't leave the coprocessor running */
+	reset_cf(master);
+
+	/* Release the GPIOs */
+	release_copro_gpios(master);
+
+	return rc;
+}
+
+
+static void fsi_master_acf_terminate(struct fsi_master_acf *master)
+{
+	unsigned long flags;
+
+	/*
+	 * A GPIO arbitration requestion could come in while this is
+	 * happening. To avoid problems, we disable interrupts so it
+	 * cannot preempt us on this CPU
+	 */
+
+	local_irq_save(flags);
+
+	/* Stop the coprocessor */
+	reset_cf(master);
+
+	/* We mark the copro not-started */
+	iowrite32(0, master->sram + CF_STARTED);
+
+	/* We mark the ARB register as having given up arbitration to
+	 * deal with a potential race with the arbitration request
+	 */
+	iowrite8(ARB_ARM_ACK, master->sram + ARB_REG);
+
+	local_irq_restore(flags);
+
+	/* Return the GPIOs to the ARM */
+	release_copro_gpios(master);
+}
+
+static void fsi_master_acf_setup_external(struct fsi_master_acf *master)
+{
+	/* Setup GPIOs for external FSI master (FSP box) */
+	gpiod_direction_output(master->gpio_mux, 0);
+	gpiod_direction_output(master->gpio_trans, 0);
+	gpiod_direction_output(master->gpio_enable, 1);
+	gpiod_direction_input(master->gpio_clk);
+	gpiod_direction_input(master->gpio_data);
+}
+
+static int fsi_master_acf_link_enable(struct fsi_master *_master, int link)
+{
+	struct fsi_master_acf *master = to_fsi_master_acf(_master);
+	int rc = -EBUSY;
+
+	if (link != 0)
+		return -ENODEV;
+
+	mutex_lock(&master->lock);
+	if (!master->external_mode) {
+		gpiod_set_value(master->gpio_enable, 1);
+		rc = 0;
+	}
+	mutex_unlock(&master->lock);
+
+	return rc;
+}
+
+static int fsi_master_acf_link_config(struct fsi_master *_master, int link,
+				      u8 t_send_delay, u8 t_echo_delay)
+{
+	struct fsi_master_acf *master = to_fsi_master_acf(_master);
+
+	if (link != 0)
+		return -ENODEV;
+
+	mutex_lock(&master->lock);
+	master->t_send_delay = t_send_delay;
+	master->t_echo_delay = t_echo_delay;
+	dev_dbg(master->dev, "Changing delays: send=%d echo=%d\n",
+		t_send_delay, t_echo_delay);
+	iowrite8(master->t_send_delay, master->sram + SEND_DLY_REG);
+	iowrite8(master->t_echo_delay, master->sram + ECHO_DLY_REG);
+	mutex_unlock(&master->lock);
+
+	return 0;
+}
+
+static ssize_t external_mode_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct fsi_master_acf *master = dev_get_drvdata(dev);
+
+	return snprintf(buf, PAGE_SIZE - 1, "%u\n",
+			master->external_mode ? 1 : 0);
+}
+
+static ssize_t external_mode_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct fsi_master_acf *master = dev_get_drvdata(dev);
+	unsigned long val;
+	bool external_mode;
+	int err;
+
+	err = kstrtoul(buf, 0, &val);
+	if (err)
+		return err;
+
+	external_mode = !!val;
+
+	mutex_lock(&master->lock);
+
+	if (external_mode == master->external_mode) {
+		mutex_unlock(&master->lock);
+		return count;
+	}
+
+	master->external_mode = external_mode;
+	if (master->external_mode) {
+		fsi_master_acf_terminate(master);
+		fsi_master_acf_setup_external(master);
+	} else
+		fsi_master_acf_setup(master);
+
+	mutex_unlock(&master->lock);
+
+	fsi_master_rescan(&master->master);
+
+	return count;
+}
+
+static DEVICE_ATTR(external_mode, 0664,
+		external_mode_show, external_mode_store);
+
+static int fsi_master_acf_gpio_request(void *data)
+{
+	struct fsi_master_acf *master = data;
+	int timeout;
+	u8 val;
+
+	/* Note: This doesn't require holding out mutex */
+
+	/* Write reqest */
+	iowrite8(ARB_ARM_REQ, master->sram + ARB_REG);
+
+	/*
+	 * There is a race (which does happen at boot time) when we get an
+	 * arbitration request as we are either about to or just starting
+	 * the coprocessor.
+	 *
+	 * To handle it, we first check if we are running. If not yet we
+	 * check whether the copro is started in the SCU.
+	 *
+	 * If it's not started, we can basically just assume we have arbitration
+	 * and return. Otherwise, we wait normally expecting for the arbitration
+	 * to eventually complete.
+	 */
+	if (ioread32(master->sram + CF_STARTED) == 0) {
+		unsigned int reg = 0;
+
+		regmap_read(master->scu, SCU_COPRO_CTRL, &reg);
+		if (!(reg & SCU_COPRO_CLK_EN))
+			return 0;
+	}
+
+	/* Ring doorbell if any */
+	if (master->cvic)
+		iowrite32(0x2, master->cvic + CVIC_TRIG_REG);
+
+	for (timeout = 0; timeout < 10000; timeout++) {
+		val = ioread8(master->sram + ARB_REG);
+		if (val != ARB_ARM_REQ)
+			break;
+		udelay(1);
+	}
+
+	/* If it failed, override anyway */
+	if (val != ARB_ARM_ACK)
+		dev_warn(master->dev, "GPIO request arbitration timeout\n");
+
+	return 0;
+}
+
+static int fsi_master_acf_gpio_release(void *data)
+{
+	struct fsi_master_acf *master = data;
+
+	/* Write release */
+	iowrite8(0, master->sram + ARB_REG);
+
+	/* Ring doorbell if any */
+	if (master->cvic)
+		iowrite32(0x2, master->cvic + CVIC_TRIG_REG);
+
+	return 0;
+}
+
+static void fsi_master_acf_release(struct device *dev)
+{
+	struct fsi_master_acf *master = to_fsi_master_acf(dev_to_fsi_master(dev));
+
+	/* Cleanup, stop coprocessor */
+	mutex_lock(&master->lock);
+	fsi_master_acf_terminate(master);
+	aspeed_gpio_copro_set_ops(NULL, NULL);
+	mutex_unlock(&master->lock);
+
+	/* Free resources */
+	gen_pool_free(master->sram_pool, (unsigned long)master->sram, SRAM_SIZE);
+	of_node_put(dev_of_node(master->dev));
+
+	kfree(master);
+}
+
+static const struct aspeed_gpio_copro_ops fsi_master_acf_gpio_ops = {
+	.request_access = fsi_master_acf_gpio_request,
+	.release_access = fsi_master_acf_gpio_release,
+};
+
+static int fsi_master_acf_probe(struct platform_device *pdev)
+{
+	struct device_node *np, *mnode = dev_of_node(&pdev->dev);
+	struct genpool_data_fixed gpdf;
+	struct fsi_master_acf *master;
+	struct gpio_desc *gpio;
+	struct resource res;
+	uint32_t cf_mem_align;
+	int rc;
+
+	master = kzalloc(sizeof(*master), GFP_KERNEL);
+	if (!master)
+		return -ENOMEM;
+
+	master->dev = &pdev->dev;
+	master->master.dev.parent = master->dev;
+	master->last_addr = LAST_ADDR_INVALID;
+
+	/* AST2400 vs. AST2500 */
+	master->is_ast2500 = of_device_is_compatible(mnode, "aspeed,ast2500-cf-fsi-master");
+
+	/* Grab the SCU, we'll need to access it to configure the coprocessor */
+	if (master->is_ast2500)
+		master->scu = syscon_regmap_lookup_by_compatible("aspeed,ast2500-scu");
+	else
+		master->scu = syscon_regmap_lookup_by_compatible("aspeed,ast2400-scu");
+	if (IS_ERR(master->scu)) {
+		dev_err(&pdev->dev, "failed to find SCU regmap\n");
+		rc = PTR_ERR(master->scu);
+		goto err_free;
+	}
+
+	/* Grab all the GPIOs we need */
+	gpio = devm_gpiod_get(&pdev->dev, "clock", 0);
+	if (IS_ERR(gpio)) {
+		dev_err(&pdev->dev, "failed to get clock gpio\n");
+		rc = PTR_ERR(gpio);
+		goto err_free;
+	}
+	master->gpio_clk = gpio;
+
+	gpio = devm_gpiod_get(&pdev->dev, "data", 0);
+	if (IS_ERR(gpio)) {
+		dev_err(&pdev->dev, "failed to get data gpio\n");
+		rc = PTR_ERR(gpio);
+		goto err_free;
+	}
+	master->gpio_data = gpio;
+
+	/* Optional GPIOs */
+	gpio = devm_gpiod_get_optional(&pdev->dev, "trans", 0);
+	if (IS_ERR(gpio)) {
+		dev_err(&pdev->dev, "failed to get trans gpio\n");
+		rc = PTR_ERR(gpio);
+		goto err_free;
+	}
+	master->gpio_trans = gpio;
+
+	gpio = devm_gpiod_get_optional(&pdev->dev, "enable", 0);
+	if (IS_ERR(gpio)) {
+		dev_err(&pdev->dev, "failed to get enable gpio\n");
+		rc = PTR_ERR(gpio);
+		goto err_free;
+	}
+	master->gpio_enable = gpio;
+
+	gpio = devm_gpiod_get_optional(&pdev->dev, "mux", 0);
+	if (IS_ERR(gpio)) {
+		dev_err(&pdev->dev, "failed to get mux gpio\n");
+		rc = PTR_ERR(gpio);
+		goto err_free;
+	}
+	master->gpio_mux = gpio;
+
+	/* Grab the reserved memory region (use DMA API instead ?) */
+	np = of_parse_phandle(mnode, "memory-region", 0);
+	if (!np) {
+		dev_err(&pdev->dev, "Didn't find reserved memory\n");
+		rc = -EINVAL;
+		goto err_free;
+	}
+	rc = of_address_to_resource(np, 0, &res);
+	of_node_put(np);
+	if (rc) {
+		dev_err(&pdev->dev, "Couldn't address to resource for reserved memory\n");
+		rc = -ENOMEM;
+		goto err_free;
+	}
+	master->cf_mem_size = resource_size(&res);
+	master->cf_mem_addr = (uint32_t)res.start;
+	cf_mem_align = master->is_ast2500 ? 0x00100000 : 0x00200000;
+	if (master->cf_mem_addr & (cf_mem_align - 1)) {
+		dev_err(&pdev->dev, "Reserved memory has insufficient alignment\n");
+		rc = -ENOMEM;
+		goto err_free;
+	}
+	master->cf_mem = devm_ioremap_resource(&pdev->dev, &res);
+ 	if (IS_ERR(master->cf_mem)) {
+		rc = PTR_ERR(master->cf_mem);
+		dev_err(&pdev->dev, "Error %d mapping coldfire memory\n", rc);
+ 		goto err_free;
+	}
+	dev_dbg(&pdev->dev, "DRAM allocation @%x\n", master->cf_mem_addr);
+
+	/* AST2500 has a SW interrupt to the coprocessor */
+	if (master->is_ast2500) {
+		/* Grab the CVIC (ColdFire interrupts controller) */
+		np = of_parse_phandle(mnode, "aspeed,cvic", 0);
+		if (!np) {
+			dev_err(&pdev->dev, "Didn't find CVIC\n");
+			rc = -EINVAL;
+			goto err_free;
+		}
+		master->cvic = devm_of_iomap(&pdev->dev, np, 0, NULL);
+		if (IS_ERR(master->cvic)) {
+			rc = PTR_ERR(master->cvic);
+			dev_err(&pdev->dev, "Error %d mapping CVIC\n", rc);
+			goto err_free;
+		}
+		rc = of_property_read_u32(np, "copro-sw-interrupts",
+					  &master->cvic_sw_irq);
+		if (rc) {
+			dev_err(&pdev->dev, "Can't find coprocessor SW interrupt\n");
+			goto err_free;
+		}
+	}
+
+	/* Grab the SRAM */
+	master->sram_pool = of_gen_pool_get(dev_of_node(&pdev->dev), "aspeed,sram", 0);
+	if (!master->sram_pool) {
+		rc = -ENODEV;
+		dev_err(&pdev->dev, "Can't find sram pool\n");
+		goto err_free;
+	}
+
+	/* Current microcode only deals with fixed location in SRAM */
+	gpdf.offset = 0;
+	master->sram = (void __iomem *)gen_pool_alloc_algo(master->sram_pool, SRAM_SIZE,
+							   gen_pool_fixed_alloc, &gpdf);
+	if (!master->sram) {
+		rc = -ENOMEM;
+		dev_err(&pdev->dev, "Failed to allocate sram from pool\n");
+		goto err_free;
+	}
+	dev_dbg(&pdev->dev, "SRAM allocation @%lx\n",
+		(unsigned long)gen_pool_virt_to_phys(master->sram_pool,
+						     (unsigned long)master->sram));
+
+	/*
+	 * Hookup with the GPIO driver for arbitration of GPIO banks
+	 * ownership.
+	 */
+	aspeed_gpio_copro_set_ops(&fsi_master_acf_gpio_ops, master);
+
+	/* Default FSI command delays */
+	master->t_send_delay = FSI_SEND_DELAY_CLOCKS;
+	master->t_echo_delay = FSI_ECHO_DELAY_CLOCKS;
+	master->master.n_links = 1;
+	if (master->is_ast2500)
+		master->master.flags = FSI_MASTER_FLAG_SWCLOCK;
+	master->master.read = fsi_master_acf_read;
+	master->master.write = fsi_master_acf_write;
+	master->master.term = fsi_master_acf_term;
+	master->master.send_break = fsi_master_acf_break;
+	master->master.link_enable = fsi_master_acf_link_enable;
+	master->master.link_config = fsi_master_acf_link_config;
+	master->master.dev.of_node = of_node_get(dev_of_node(master->dev));
+	master->master.dev.release = fsi_master_acf_release;
+	platform_set_drvdata(pdev, master);
+	mutex_init(&master->lock);
+
+	mutex_lock(&master->lock);
+	rc = fsi_master_acf_setup(master);
+	mutex_unlock(&master->lock);
+	if (rc)
+		goto release_of_dev;
+
+	rc = device_create_file(&pdev->dev, &dev_attr_external_mode);
+	if (rc)
+		goto stop_copro;
+
+	rc = fsi_master_register(&master->master);
+	if (!rc)
+		return 0;
+
+	device_remove_file(master->dev, &dev_attr_external_mode);
+	put_device(&master->master.dev);
+	return rc;
+
+ stop_copro:
+	fsi_master_acf_terminate(master);
+ release_of_dev:
+	aspeed_gpio_copro_set_ops(NULL, NULL);
+	gen_pool_free(master->sram_pool, (unsigned long)master->sram, SRAM_SIZE);
+	of_node_put(dev_of_node(master->dev));
+ err_free:
+	kfree(master);
+	return rc;
+}
+
+
+static int fsi_master_acf_remove(struct platform_device *pdev)
+{
+	struct fsi_master_acf *master = platform_get_drvdata(pdev);
+
+	device_remove_file(master->dev, &dev_attr_external_mode);
+
+	fsi_master_unregister(&master->master);
+
+	return 0;
+}
+
+static const struct of_device_id fsi_master_acf_match[] = {
+	{ .compatible = "aspeed,ast2400-cf-fsi-master" },
+	{ .compatible = "aspeed,ast2500-cf-fsi-master" },
+	{ },
+};
+
+static struct platform_driver fsi_master_acf = {
+	.driver = {
+		.name		= "fsi-master-acf",
+		.of_match_table	= fsi_master_acf_match,
+	},
+	.probe	= fsi_master_acf_probe,
+	.remove = fsi_master_acf_remove,
+};
+
+module_platform_driver(fsi_master_acf);
+MODULE_LICENSE("GPL");
diff --git a/include/trace/events/fsi_master_ast_cf.h b/include/trace/events/fsi_master_ast_cf.h
new file mode 100644
index 000000000000..a0fdfa58622a
--- /dev/null
+++ b/include/trace/events/fsi_master_ast_cf.h
@@ -0,0 +1,150 @@
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fsi_master_ast_cf
+
+#if !defined(_TRACE_FSI_MASTER_ACF_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FSI_MASTER_ACF_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(fsi_master_acf_copro_command,
+	TP_PROTO(const struct fsi_master_acf *master, uint32_t op),
+	TP_ARGS(master, op),
+	TP_STRUCT__entry(
+		__field(int,		master_idx)
+		__field(uint32_t,	op)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->op = op;
+	),
+	TP_printk("fsi-acf%d command %08x",
+		  __entry->master_idx, __entry->op
+	)
+);
+
+TRACE_EVENT(fsi_master_acf_send_request,
+	TP_PROTO(const struct fsi_master_acf *master, const struct fsi_msg *cmd, u8 rbits),
+	TP_ARGS(master, cmd, rbits),
+	TP_STRUCT__entry(
+		__field(int,		master_idx)
+		__field(uint64_t,	msg)
+		__field(u8,		bits)
+		__field(u8,		rbits)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->msg = cmd->msg;
+		__entry->bits = cmd->bits;
+		__entry->rbits = rbits;
+	),
+	TP_printk("fsi-acf%d cmd: %016llx/%d/%d",
+		__entry->master_idx, (unsigned long long)__entry->msg,
+		__entry->bits, __entry->rbits
+	)
+);
+
+TRACE_EVENT(fsi_master_acf_copro_response,
+	TP_PROTO(const struct fsi_master_acf *master, u8 rtag, u8 rcrc, __be32 rdata, bool crc_ok),
+	TP_ARGS(master, rtag, rcrc, rdata, crc_ok),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(u8,	rtag)
+		__field(u8,	rcrc)
+		__field(u32,    rdata)
+		__field(bool,   crc_ok)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->rtag = rtag;
+		__entry->rcrc = rcrc;
+		__entry->rdata = be32_to_cpu(rdata);
+		__entry->crc_ok = crc_ok;
+	),
+	TP_printk("fsi-acf%d rsp: tag=%04x crc=%04x data=%08x %c\n",
+		__entry->master_idx, __entry->rtag, __entry->rcrc,
+		__entry->rdata, __entry->crc_ok ? ' ' : '!'
+	)
+);
+
+TRACE_EVENT(fsi_master_acf_crc_rsp_error,
+	TP_PROTO(const struct fsi_master_acf *master, int retries),
+	TP_ARGS(master, retries),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(int,	retries)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->retries = retries;
+	),
+	TP_printk("fsi-acf%d CRC error in response retry %d",
+		__entry->master_idx, __entry->retries
+	)
+);
+
+TRACE_EVENT(fsi_master_acf_poll_response_busy,
+	TP_PROTO(const struct fsi_master_acf *master, int busy_count),
+	TP_ARGS(master, busy_count),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(int,	busy_count)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->busy_count = busy_count;
+	),
+	TP_printk("fsi-acf%d: device reported busy %d times",
+		__entry->master_idx, __entry->busy_count
+	)
+);
+
+TRACE_EVENT(fsi_master_acf_cmd_abs_addr,
+	TP_PROTO(const struct fsi_master_acf *master, u32 addr),
+	TP_ARGS(master, addr),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(u32,	addr)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->addr = addr;
+	),
+	TP_printk("fsi-acf%d: Sending ABS_ADR %06x",
+		__entry->master_idx, __entry->addr
+	)
+);
+
+TRACE_EVENT(fsi_master_acf_cmd_rel_addr,
+	TP_PROTO(const struct fsi_master_acf *master, u32 rel_addr),
+	TP_ARGS(master, rel_addr),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+		__field(u32,	rel_addr)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+		__entry->rel_addr = rel_addr;
+	),
+	TP_printk("fsi-acf%d: Sending REL_ADR %03x",
+		__entry->master_idx, __entry->rel_addr
+	)
+);
+
+TRACE_EVENT(fsi_master_acf_cmd_same_addr,
+	TP_PROTO(const struct fsi_master_acf *master),
+	TP_ARGS(master),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+	),
+	TP_printk("fsi-acf%d: Sending SAME_ADR",
+		__entry->master_idx
+	)
+);
+
+#endif /* _TRACE_FSI_MASTER_ACF_H */
+
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 40c6f9c28ef03f2f2c3ee58c2447a6e6b9a713f2 Mon Sep 17 00:00:00 2001
From: Revanth Rajashekar <revanth.rajashekar@intel.com>
Date: Fri, 15 Jun 2018 12:39:27 -0600
Subject: nvme.h: resync with nvme-cli

Added some feature ids present in nvme-cli but not kernel.

Signed-off-by: Revanth Rajashekar <revanth.rajashekar@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2950ce957656..80dfedcf0bf7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -749,6 +749,11 @@ enum {
 	NVME_FEAT_HOST_MEM_BUF	= 0x0d,
 	NVME_FEAT_TIMESTAMP	= 0x0e,
 	NVME_FEAT_KATO		= 0x0f,
+	NVME_FEAT_HCTM		= 0x10,
+	NVME_FEAT_NOPSC		= 0x11,
+	NVME_FEAT_RRL		= 0x12,
+	NVME_FEAT_PLM_CONFIG	= 0x13,
+	NVME_FEAT_PLM_WINDOW	= 0x14,
 	NVME_FEAT_SW_PROGRESS	= 0x80,
 	NVME_FEAT_HOST_ID	= 0x81,
 	NVME_FEAT_RESV_MASK	= 0x82,
-- 
cgit v1.2.3


From 1a8861f117608e269647fb09658ce77bd5899d42 Mon Sep 17 00:00:00 2001
From: "Daniel M. Lambea" <dmlambea@gmail.com>
Date: Tue, 17 Jul 2018 22:35:36 +0100
Subject: HID: cougar: make compare_device_paths reusable

The function compare_device_paths from wacom_sys.c is generic
and useful for other drivers. Move the function to hid-core and
rename it as hid_compare_device_paths.

Signed-off-by: Daniel M. Lambea <dmlambea@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c  | 23 +++++++++++++++++++++++
 drivers/hid/wacom_sys.c | 18 +++---------------
 include/linux/hid.h     |  2 ++
 3 files changed, 28 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3942ee61bd1c..402ad974b31c 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1939,6 +1939,29 @@ static int hid_bus_match(struct device *dev, struct device_driver *drv)
 	return hid_match_device(hdev, hdrv) != NULL;
 }
 
+/**
+ * hid_compare_device_paths - check if both devices share the same path
+ * @hdev_a: hid device
+ * @hdev_b: hid device
+ * @separator: char to use as separator
+ *
+ * Check if two devices share the same path up to the last occurrence of
+ * the separator char. Both paths must exist (i.e., zero-length paths
+ * don't match).
+ */
+bool hid_compare_device_paths(struct hid_device *hdev_a,
+			      struct hid_device *hdev_b, char separator)
+{
+	int n1 = strrchr(hdev_a->phys, separator) - hdev_a->phys;
+	int n2 = strrchr(hdev_b->phys, separator) - hdev_b->phys;
+
+	if (n1 != n2 || n1 <= 0 || n2 <= 0)
+		return false;
+
+	return !strncmp(hdev_a->phys, hdev_b->phys, n1);
+}
+EXPORT_SYMBOL_GPL(hid_compare_device_paths);
+
 static int hid_device_probe(struct device *dev)
 {
 	struct hid_driver *hdrv = to_hid_driver(dev->driver);
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index d6797535fff9..ffe59a19b3a3 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -703,18 +703,6 @@ struct wacom_hdev_data {
 static LIST_HEAD(wacom_udev_list);
 static DEFINE_MUTEX(wacom_udev_list_lock);
 
-static bool compare_device_paths(struct hid_device *hdev_a,
-		struct hid_device *hdev_b, char separator)
-{
-	int n1 = strrchr(hdev_a->phys, separator) - hdev_a->phys;
-	int n2 = strrchr(hdev_b->phys, separator) - hdev_b->phys;
-
-	if (n1 != n2 || n1 <= 0 || n2 <= 0)
-		return false;
-
-	return !strncmp(hdev_a->phys, hdev_b->phys, n1);
-}
-
 static bool wacom_are_sibling(struct hid_device *hdev,
 		struct hid_device *sibling)
 {
@@ -737,10 +725,10 @@ static bool wacom_are_sibling(struct hid_device *hdev,
 	 * the same physical parent device path.
 	 */
 	if (hdev->vendor == sibling->vendor && hdev->product == sibling->product) {
-		if (!compare_device_paths(hdev, sibling, '/'))
+		if (!hid_compare_device_paths(hdev, sibling, '/'))
 			return false;
 	} else {
-		if (!compare_device_paths(hdev, sibling, '.'))
+		if (!hid_compare_device_paths(hdev, sibling, '.'))
 			return false;
 	}
 
@@ -787,7 +775,7 @@ static struct wacom_hdev_data *wacom_get_hdev_data(struct hid_device *hdev)
 
 	/* Try to find an already-probed interface from the same device */
 	list_for_each_entry(data, &wacom_udev_list, list) {
-		if (compare_device_paths(hdev, data->dev, '/')) {
+		if (hid_compare_device_paths(hdev, data->dev, '/')) {
 			kref_get(&data->kref);
 			return data;
 		}
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 773bcb1d4044..938d9ba6d7cd 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -894,6 +894,8 @@ const struct hid_device_id *hid_match_id(const struct hid_device *hdev,
 					 const struct hid_device_id *id);
 const struct hid_device_id *hid_match_device(struct hid_device *hdev,
 					     struct hid_driver *hdrv);
+bool hid_compare_device_paths(struct hid_device *hdev_a,
+			      struct hid_device *hdev_b, char separator);
 s32 hid_snto32(__u32 value, unsigned n);
 __u32 hid_field_extract(const struct hid_device *hid, __u8 *report,
 		     unsigned offset, unsigned n);
-- 
cgit v1.2.3


From 977d5ad39f3ea12ac0bd51d75020cea5ecdca235 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 17 Jul 2018 17:19:11 +0300
Subject: ACPI: Convert ACPI reference args to generic fwnode reference args

Convert all users of struct acpi_reference_args to more generic
fwnode_reference_args. This will

 1) avoid an ACPI specific references to device nodes with integer
    arguments as well as

 2) allow making references to nodes other than device nodes in ACPI.

As a by-product, convert the fwnode interger arguments to u64. The
arguments were 64-bit integers on ACPI but the fwnode arguments were
just 32-bit.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c                           | 36 ++++++++---------------
 drivers/gpio/gpiolib-acpi.c                       | 11 ++++---
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c        | 10 +++----
 drivers/media/v4l2-core/v4l2-fwnode.c             |  2 +-
 drivers/net/ethernet/apm/xgene/xgene_enet_hw.c    |  6 ++--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c |  6 ++--
 drivers/net/ethernet/hisilicon/hns/hns_enet.c     |  8 +++--
 include/linux/acpi.h                              | 17 ++++-------
 include/linux/fwnode.h                            |  2 +-
 9 files changed, 43 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 5815356ea6ad..3fa40010fd67 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -579,7 +579,7 @@ static int acpi_data_get_property_array(const struct acpi_device_data *data,
  */
 int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 	const char *propname, size_t index, size_t num_args,
-	struct acpi_reference_args *args)
+	struct fwnode_reference_args *args)
 {
 	const union acpi_object *element, *end;
 	const union acpi_object *obj;
@@ -607,7 +607,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 		if (ret)
 			return ret == -ENODEV ? -EINVAL : ret;
 
-		args->adev = device;
+		args->fwnode = acpi_fwnode_handle(device);
 		args->nargs = 0;
 		return 0;
 	}
@@ -653,11 +653,11 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 					return -EINVAL;
 			}
 
-			if (nargs > MAX_ACPI_REFERENCE_ARGS)
+			if (nargs > NR_FWNODE_REFERENCE_ARGS)
 				return -EINVAL;
 
 			if (idx == index) {
-				args->adev = device;
+				args->fwnode = acpi_fwnode_handle(device);
 				args->nargs = nargs;
 				for (i = 0; i < nargs; i++)
 					args->args[i] = element[i].integer.value;
@@ -1089,7 +1089,7 @@ int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
 {
 	struct fwnode_handle *fwnode;
 	unsigned int port_nr, endpoint_nr;
-	struct acpi_reference_args args;
+	struct fwnode_reference_args args;
 	int ret;
 
 	memset(&args, 0, sizeof(args));
@@ -1098,6 +1098,10 @@ int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
 	if (ret)
 		return ret;
 
+	/* Ensure this is a device node. */
+	if (!is_acpi_device_node(args.fwnode))
+		return -ENODEV;
+
 	/*
 	 * Always require two arguments with the reference: port and
 	 * endpoint indices.
@@ -1105,7 +1109,7 @@ int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
 	if (args.nargs != 2)
 		return -EPROTO;
 
-	fwnode = acpi_fwnode_handle(args.adev);
+	fwnode = args.fwnode;
 	port_nr = args.args[0];
 	endpoint_nr = args.args[1];
 
@@ -1209,24 +1213,8 @@ acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
 			       unsigned int args_count, unsigned int index,
 			       struct fwnode_reference_args *args)
 {
-	struct acpi_reference_args acpi_args;
-	unsigned int i;
-	int ret;
-
-	ret = __acpi_node_get_property_reference(fwnode, prop, index,
-						 args_count, &acpi_args);
-	if (ret < 0)
-		return ret;
-	if (!args)
-		return 0;
-
-	args->nargs = acpi_args.nargs;
-	args->fwnode = acpi_fwnode_handle(acpi_args.adev);
-
-	for (i = 0; i < NR_FWNODE_REFERENCE_ARGS; i++)
-		args->args[i] = i < acpi_args.nargs ? acpi_args.args[i] : 0;
-
-	return 0;
+	return __acpi_node_get_property_reference(fwnode, prop, index,
+						  args_count, args);
 }
 
 static struct fwnode_handle *
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index e2232cbcec8b..4e8fdae1cde4 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL_GPL(devm_acpi_dev_remove_driver_gpios);
 
 static bool acpi_get_driver_gpio_data(struct acpi_device *adev,
 				      const char *name, int index,
-				      struct acpi_reference_args *args,
+				      struct fwnode_reference_args *args,
 				      unsigned int *quirks)
 {
 	const struct acpi_gpio_mapping *gm;
@@ -365,7 +365,7 @@ static bool acpi_get_driver_gpio_data(struct acpi_device *adev,
 		if (!strcmp(name, gm->name) && gm->data && index < gm->size) {
 			const struct acpi_gpio_params *par = gm->data + index;
 
-			args->adev = adev;
+			args->fwnode = acpi_fwnode_handle(adev);
 			args->args[0] = par->crs_entry_index;
 			args->args[1] = par->line_index;
 			args->args[2] = par->active_low;
@@ -528,7 +528,7 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
 				     const char *propname, int index,
 				     struct acpi_gpio_lookup *lookup)
 {
-	struct acpi_reference_args args;
+	struct fwnode_reference_args args;
 	unsigned int quirks = 0;
 	int ret;
 
@@ -549,6 +549,8 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
 	 * The property was found and resolved, so need to lookup the GPIO based
 	 * on returned args.
 	 */
+	if (!to_acpi_device_node(args.fwnode))
+		return -EINVAL;
 	if (args.nargs != 3)
 		return -EPROTO;
 
@@ -556,8 +558,9 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
 	lookup->pin_index = args.args[1];
 	lookup->active_low = !!args.args[2];
 
-	lookup->info.adev = args.adev;
+	lookup->info.adev = to_acpi_device_node(args.fwnode);
 	lookup->info.quirks = quirks;
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 8013d69c5ac4..8444234ed092 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1435,7 +1435,7 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
 		}
 		fwnode = &dsaf_node->fwnode;
 	} else if (is_acpi_device_node(dev->fwnode)) {
-		struct acpi_reference_args args;
+		struct fwnode_reference_args args;
 
 		ret = acpi_node_get_property_reference(dev->fwnode,
 						       "dsaf-handle", 0, &args);
@@ -1443,7 +1443,7 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
 			dev_err(dev, "could not find dsaf-handle\n");
 			return ret;
 		}
-		fwnode = acpi_fwnode_handle(args.adev);
+		fwnode = args.fwnode;
 	} else {
 		dev_err(dev, "cannot read data from DT or ACPI\n");
 		return -ENXIO;
@@ -4835,16 +4835,14 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
 				continue;
 			pdev = of_find_device_by_node(net_node);
 		} else if (is_acpi_device_node(dev->fwnode)) {
-			struct acpi_reference_args args;
-			struct fwnode_handle *fwnode;
+			struct fwnode_reference_args args;
 
 			ret = acpi_node_get_property_reference(dev->fwnode,
 							       "eth-handle",
 							       i, &args);
 			if (ret)
 				continue;
-			fwnode = acpi_fwnode_handle(args.adev);
-			pdev = hns_roce_find_pdev(fwnode);
+			pdev = hns_roce_find_pdev(args.fwnode);
 		} else {
 			dev_err(dev, "cannot read data from DT or ACPI\n");
 			return -ENXIO;
diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
index 3f77aa318035..82595cebc0b8 100644
--- a/drivers/media/v4l2-core/v4l2-fwnode.c
+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
@@ -739,7 +739,7 @@ static struct fwnode_handle *v4l2_fwnode_reference_get_int_prop(
 	const char * const *props, unsigned int nprops)
 {
 	struct fwnode_reference_args fwnode_args;
-	unsigned int *args = fwnode_args.args;
+	u64 *args = fwnode_args.args;
 	struct fwnode_handle *child;
 	int ret;
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 3188f553da35..078a04dc1182 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -836,19 +836,19 @@ static void xgene_enet_adjust_link(struct net_device *ndev)
 #ifdef CONFIG_ACPI
 static struct acpi_device *acpi_phy_find_device(struct device *dev)
 {
-	struct acpi_reference_args args;
+	struct fwnode_reference_args args;
 	struct fwnode_handle *fw_node;
 	int status;
 
 	fw_node = acpi_fwnode_handle(ACPI_COMPANION(dev));
 	status = acpi_node_get_property_reference(fw_node, "phy-handle", 0,
 						  &args);
-	if (ACPI_FAILURE(status)) {
+	if (ACPI_FAILURE(status) || !is_acpi_device_node(args.fwnode)) {
 		dev_dbg(dev, "No matching phy in ACPI table\n");
 		return NULL;
 	}
 
-	return args.adev;
+	return to_acpi_device_node(args.fwnode);
 }
 #endif
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 9dcc5765f11f..794516718d9d 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -708,7 +708,7 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb,
 
 static int hns_mac_register_phy(struct hns_mac_cb *mac_cb)
 {
-	struct acpi_reference_args args;
+	struct fwnode_reference_args args;
 	struct platform_device *pdev;
 	struct mii_bus *mii_bus;
 	int rc;
@@ -722,13 +722,15 @@ static int hns_mac_register_phy(struct hns_mac_cb *mac_cb)
 			mac_cb->fw_port, "mdio-node", 0, &args);
 	if (rc)
 		return rc;
+	if (!is_acpi_device_node(args.fwnode))
+		return -EINVAL;
 
 	addr = hns_mac_phy_parse_addr(mac_cb->dev, mac_cb->fw_port);
 	if (addr < 0)
 		return addr;
 
 	/* dev address in adev */
-	pdev = hns_dsaf_find_platform_device(acpi_fwnode_handle(args.adev));
+	pdev = hns_dsaf_find_platform_device(args.fwnode);
 	if (!pdev) {
 		dev_err(mac_cb->dev, "mac%d mdio pdev is NULL\n",
 			mac_cb->mac_id);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index ef9ef703d13a..5608f807d7ba 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2377,7 +2377,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 		}
 		priv->fwnode = &ae_node->fwnode;
 	} else if (is_acpi_node(dev->fwnode)) {
-		struct acpi_reference_args args;
+		struct fwnode_reference_args args;
 
 		if (acpi_dev_found(hns_enet_acpi_match[0].id))
 			priv->enet_ver = AE_VERSION_1;
@@ -2393,7 +2393,11 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 			dev_err(dev, "not find ae-handle\n");
 			goto out_read_prop_fail;
 		}
-		priv->fwnode = acpi_fwnode_handle(args.adev);
+		if (!is_acpi_device_node(args.fwnode)) {
+			ret = -EINVAL;
+			goto out_read_prop_fail;
+		}
+		priv->fwnode = args.fwnode;
 	} else {
 		dev_err(dev, "cannot read cfg data from OF or acpi\n");
 		return -ENXIO;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index e54f40974eb0..11cf39719fd8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1058,27 +1058,20 @@ static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
 
 /* Device properties */
 
-#define MAX_ACPI_REFERENCE_ARGS	8
-struct acpi_reference_args {
-	struct acpi_device *adev;
-	size_t nargs;
-	u64 args[MAX_ACPI_REFERENCE_ARGS];
-};
-
 #ifdef CONFIG_ACPI
 int acpi_dev_get_property(const struct acpi_device *adev, const char *name,
 			  acpi_object_type type, const union acpi_object **obj);
 int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 				const char *name, size_t index, size_t num_args,
-				struct acpi_reference_args *args);
+				struct fwnode_reference_args *args);
 
 static inline int acpi_node_get_property_reference(
 				const struct fwnode_handle *fwnode,
 				const char *name, size_t index,
-				struct acpi_reference_args *args)
+				struct fwnode_reference_args *args)
 {
 	return __acpi_node_get_property_reference(fwnode, name, index,
-		MAX_ACPI_REFERENCE_ARGS, args);
+		NR_FWNODE_REFERENCE_ARGS, args);
 }
 
 int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
@@ -1169,7 +1162,7 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
 static inline int
 __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 				const char *name, size_t index, size_t num_args,
-				struct acpi_reference_args *args)
+				struct fwnode_reference_args *args)
 {
 	return -ENXIO;
 }
@@ -1177,7 +1170,7 @@ __acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 static inline int
 acpi_node_get_property_reference(const struct fwnode_handle *fwnode,
 				 const char *name, size_t index,
-				 struct acpi_reference_args *args)
+				 struct fwnode_reference_args *args)
 {
 	return -ENXIO;
 }
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 4fe8f289b3f6..faebf0ca0686 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -45,7 +45,7 @@ struct fwnode_endpoint {
 struct fwnode_reference_args {
 	struct fwnode_handle *fwnode;
 	unsigned int nargs;
-	unsigned int args[NR_FWNODE_REFERENCE_ARGS];
+	u64 args[NR_FWNODE_REFERENCE_ARGS];
 };
 
 /**
-- 
cgit v1.2.3


From 0ef7478639c5165a672f01b4024aacfffa951813 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 17 Jul 2018 17:19:14 +0300
Subject: ACPI: property: Make the ACPI graph API private

The fwnode graph API is preferred over the ACPI graph API. Therefore
make the ACPI graph API private, and use it as a back-end for the
fwnode graph API only.

Unused functionality is removed while the functionality actually used
remains the same.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 83 ++++++++++---------------------------------------
 include/linux/acpi.h    |  8 -----
 2 files changed, 16 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 5878d3678b38..19bdada64435 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1033,10 +1033,10 @@ struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode)
  * @prev: Previous endpoint node or %NULL to get the first
  *
  * Looks up next endpoint ACPI firmware node below a given @fwnode. Returns
- * %NULL if there is no next endpoint, ERR_PTR() in case of error. In case
- * of success the next endpoint is returned.
+ * %NULL if there is no next endpoint or in case of error. In case of success
+ * the next endpoint is returned.
  */
-struct fwnode_handle *acpi_graph_get_next_endpoint(
+static struct fwnode_handle *acpi_graph_get_next_endpoint(
 	const struct fwnode_handle *fwnode, struct fwnode_handle *prev)
 {
 	struct fwnode_handle *port = NULL;
@@ -1065,11 +1065,9 @@ struct fwnode_handle *acpi_graph_get_next_endpoint(
 			endpoint = fwnode_get_next_child_node(port, NULL);
 	}
 
-	if (endpoint) {
-		/* Endpoints must have "endpoint" property */
-		if (!fwnode_property_present(endpoint, "endpoint"))
-			return ERR_PTR(-EPROTO);
-	}
+	/* Endpoints must have "endpoint" property */
+	if (!fwnode_property_present(endpoint, "endpoint"))
+		return NULL;
 
 	return endpoint;
 }
@@ -1106,18 +1104,12 @@ static struct fwnode_handle *acpi_graph_get_child_prop_value(
 /**
  * acpi_graph_get_remote_enpoint - Parses and returns remote end of an endpoint
  * @fwnode: Endpoint firmware node pointing to a remote device
- * @parent: Firmware node of remote port parent is filled here if not %NULL
- * @port: Firmware node of remote port is filled here if not %NULL
  * @endpoint: Firmware node of remote endpoint is filled here if not %NULL
  *
- * Function parses remote end of ACPI firmware remote endpoint and fills in
- * fields requested by the caller. Returns %0 in case of success and
- * negative errno otherwise.
+ * Returns the remote endpoint corresponding to @__fwnode. NULL on error.
  */
-int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
-				   struct fwnode_handle **parent,
-				   struct fwnode_handle **port,
-				   struct fwnode_handle **endpoint)
+static struct fwnode_handle *
+acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode)
 {
 	struct fwnode_handle *fwnode;
 	unsigned int port_nr, endpoint_nr;
@@ -1128,47 +1120,27 @@ int acpi_graph_get_remote_endpoint(const struct fwnode_handle *__fwnode,
 	ret = acpi_node_get_property_reference(__fwnode, "remote-endpoint", 0,
 					       &args);
 	if (ret)
-		return ret;
+		return NULL;
 
 	/* Ensure this is a device node. */
 	if (!is_acpi_device_node(args.fwnode))
-		return -ENODEV;
+		return NULL;
 
 	/*
 	 * Always require two arguments with the reference: port and
 	 * endpoint indices.
 	 */
 	if (args.nargs != 2)
-		return -EPROTO;
+		return NULL;
 
 	fwnode = args.fwnode;
 	port_nr = args.args[0];
 	endpoint_nr = args.args[1];
 
-	if (parent)
-		*parent = fwnode;
-
-	if (!port && !endpoint)
-		return 0;
-
 	fwnode = acpi_graph_get_child_prop_value(fwnode, "port", port_nr);
-	if (!fwnode)
-		return -EPROTO;
-
-	if (port)
-		*port = fwnode;
-
-	if (!endpoint)
-		return 0;
-
-	fwnode = acpi_graph_get_child_prop_value(fwnode, "endpoint",
-						 endpoint_nr);
-	if (!fwnode)
-		return -EPROTO;
 
-	*endpoint = fwnode;
-
-	return 0;
+	return acpi_graph_get_child_prop_value(fwnode, "endpoint",
+					       endpoint_nr);
 }
 
 static bool acpi_fwnode_device_is_available(const struct fwnode_handle *fwnode)
@@ -1232,29 +1204,6 @@ acpi_fwnode_get_reference_args(const struct fwnode_handle *fwnode,
 						  args_count, args);
 }
 
-static struct fwnode_handle *
-acpi_fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
-				    struct fwnode_handle *prev)
-{
-	struct fwnode_handle *endpoint;
-
-	endpoint = acpi_graph_get_next_endpoint(fwnode, prev);
-	if (IS_ERR(endpoint))
-		return NULL;
-
-	return endpoint;
-}
-
-static struct fwnode_handle *
-acpi_fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode)
-{
-	struct fwnode_handle *endpoint = NULL;
-
-	acpi_graph_get_remote_endpoint(fwnode, NULL, NULL, &endpoint);
-
-	return endpoint;
-}
-
 static struct fwnode_handle *
 acpi_fwnode_get_parent(struct fwnode_handle *fwnode)
 {
@@ -1295,9 +1244,9 @@ acpi_fwnode_device_get_match_data(const struct fwnode_handle *fwnode,
 		.get_named_child_node = acpi_fwnode_get_named_child_node, \
 		.get_reference_args = acpi_fwnode_get_reference_args,	\
 		.graph_get_next_endpoint =				\
-			acpi_fwnode_graph_get_next_endpoint,		\
+			acpi_graph_get_next_endpoint,			\
 		.graph_get_remote_endpoint =				\
-			acpi_fwnode_graph_get_remote_endpoint,		\
+			acpi_graph_get_remote_endpoint,			\
 		.graph_get_port_parent = acpi_fwnode_get_parent,	\
 		.graph_parse_endpoint = acpi_fwnode_graph_parse_endpoint, \
 	};								\
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 11cf39719fd8..de8d3d3fa651 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1089,14 +1089,6 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 					    struct fwnode_handle *child);
 struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode);
 
-struct fwnode_handle *
-acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
-			     struct fwnode_handle *prev);
-int acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode,
-				   struct fwnode_handle **remote,
-				   struct fwnode_handle **port,
-				   struct fwnode_handle **endpoint);
-
 struct acpi_probe_entry;
 typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
 						 struct acpi_probe_entry *);
-- 
cgit v1.2.3


From 0e3fd810c4f41dbd63fb7caddc11684959176727 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Jul 2018 16:46:41 +0200
Subject: Documentation: document ktime_get_*() APIs

As Dave Chinner points out, we don't have a proper documentation for the
ktime_get() family of interfaces, making it rather unclear which of the
over 30 (!) interfaces one should actually use in a driver or elsewhere
in the kernel.

I wrote up an explanation from how I personally see the interfaces,
documenting what each of the functions do and hopefully making it a bit
clearer which should be used where.

This is the first time I tried writing .rst format documentation, so
in addition to any mistakes in the content, I probably also introduce
nonstandard formatting ;-)

I first tried to add an extra section to
Documentation/timers/timekeeping.txt, but this is currently not included
in the generated API, and it seems useful to have the API docs as part
of what gets generated in
https://www.kernel.org/doc/html/latest/core-api/index.html#core-utilities
instead, so I started a new file there.

I also considered adding the documentation inline in the
include/linux/timekeeping.h header, but couldn't figure out how to do
that in a way that would result both in helpful inline comments as
well as readable html output, so I settled for the latter, with
a small note pointing to it from the header.

Cc: Dave Chinner <david@fromorbit.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephen Boyd <sboyd@kernel.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/index.rst       |   1 +
 Documentation/core-api/timekeeping.rst | 185 +++++++++++++++++++++++++++++++++
 include/linux/timekeeping.h            |  15 +++
 3 files changed, 201 insertions(+)
 create mode 100644 Documentation/core-api/timekeeping.rst

(limited to 'include')

diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index f5a66b72f984..989c97cc232a 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -28,6 +28,7 @@ Core utilities
    printk-formats
    circular-buffers
    gfp_mask-from-fs-io
+   timekeeping
 
 Interfaces for kernel debugging
 ===============================
diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
new file mode 100644
index 000000000000..93cbeb9daec0
--- /dev/null
+++ b/Documentation/core-api/timekeeping.rst
@@ -0,0 +1,185 @@
+ktime accessors
+===============
+
+Device drivers can read the current time using ktime_get() and the many
+related functions declared in linux/timekeeping.h. As a rule of thumb,
+using an accessor with a shorter name is preferred over one with a longer
+name if both are equally fit for a particular use case.
+
+Basic ktime_t based interfaces
+------------------------------
+
+The recommended simplest form returns an opaque ktime_t, with variants
+that return time for different clock references:
+
+
+.. c:function:: ktime_t ktime_get( void )
+
+	CLOCK_MONOTONIC
+
+	Useful for reliable timestamps and measuring short time intervals
+	accurately. Starts at system boot time but stops during suspend.
+
+.. c:function:: ktime_t ktime_get_boottime( void )
+
+	CLOCK_BOOTTIME
+
+	Like ktime_get(), but does not stop when suspended. This can be
+	used e.g. for key expiration times that need to be synchronized
+	with other machines across a suspend operation.
+
+.. c:function:: ktime_t ktime_get_real( void )
+
+	CLOCK_REALTIME
+
+	Returns the time in relative to the UNIX epoch starting in 1970
+	using the Coordinated Universal Time (UTC), same as gettimeofday()
+	user space. This is used for all timestamps that need to
+	persist across a reboot, like inode times, but should be avoided
+	for internal uses, since it can jump backwards due to a leap
+	second update, NTP adjustment settimeofday() operation from user
+	space.
+
+.. c:function:: ktime_t ktime_get_clocktai( void )
+
+	 CLOCK_TAI
+
+	Like ktime_get_real(), but uses the International Atomic Time (TAI)
+	reference instead of UTC to avoid jumping on leap second updates.
+	This is rarely useful in the kernel.
+
+.. c:function:: ktime_t ktime_get_raw( void )
+
+	CLOCK_MONOTONIC_RAW
+
+	Like ktime_get(), but runs at the same rate as the hardware
+	clocksource without (NTP) adjustments for clock drift. This is
+	also rarely needed in the kernel.
+
+nanosecond, timespec64, and second output
+-----------------------------------------
+
+For all of the above, there are variants that return the time in a
+different format depending on what is required by the user:
+
+.. c:function:: u64 ktime_get_ns( void )
+		u64 ktime_get_boottime_ns( void )
+		u64 ktime_get_real_ns( void )
+		u64 ktime_get_tai_ns( void )
+		u64 ktime_get_raw_ns( void )
+
+	Same as the plain ktime_get functions, but returning a u64 number
+	of nanoseconds in the respective time reference, which may be
+	more convenient for some callers.
+
+.. c:function:: void ktime_get_ts64( struct timespec64 * )
+		void ktime_get_boottime_ts64( struct timespec64 * )
+		void ktime_get_real_ts64( struct timespec64 * )
+		void ktime_get_clocktai_ts64( struct timespec64 * )
+		void ktime_get_raw_ts64( struct timespec64 * )
+
+	Same above, but returns the time in a 'struct timespec64', split
+	into seconds and nanoseconds. This can avoid an extra division
+	when printing the time, or when passing it into an external
+	interface that expects a 'timespec' or 'timeval' structure.
+
+.. c:function:: time64_t ktime_get_seconds( void )
+		time64_t ktime_get_boottime_seconds( void )
+		time64_t ktime_get_real_seconds( void )
+		time64_t ktime_get_clocktai_seconds( void )
+		time64_t ktime_get_raw_seconds( void )
+
+	Return a coarse-grained version of the time as a scalar
+	time64_t. This avoids accessing the clock hardware and rounds
+	down the seconds to the full seconds of the last timer tick
+	using the respective reference.
+
+Coarse and fast_ns access
+-------------------------
+
+Some additional variants exist for more specialized cases:
+
+.. c:function:: ktime_t ktime_get_coarse_boottime( void )
+		ktime_t ktime_get_coarse_real( void )
+		ktime_t ktime_get_coarse_clocktai( void )
+		ktime_t ktime_get_coarse_raw( void )
+
+.. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
+		void ktime_get_coarse_boottime_ts64( struct timespec64 * )
+		void ktime_get_coarse_real_ts64( struct timespec64 * )
+		void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
+		void ktime_get_coarse_raw_ts64( struct timespec64 * )
+
+	These are quicker than the non-coarse versions, but less accurate,
+	corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
+	in user space, along with the equivalent boottime/tai/raw
+	timebase not available in user space.
+
+	The time returned here corresponds to the last timer tick, which
+	may be as much as 10ms in the past (for CONFIG_HZ=100), same as
+	reading the 'jiffies' variable.  These are only useful when called
+	in a fast path and one still expects better than second accuracy,
+	but can't easily use 'jiffies', e.g. for inode timestamps.
+	Skipping the hardware clock access saves around 100 CPU cycles
+	on most modern machines with a reliable cycle counter, but
+	up to several microseconds on older hardware with an external
+	clocksource.
+
+.. c:function:: u64 ktime_get_mono_fast_ns( void )
+		u64 ktime_get_raw_fast_ns( void )
+		u64 ktime_get_boot_fast_ns( void )
+		u64 ktime_get_real_fast_ns( void )
+
+	These variants are safe to call from any context, including from
+	a non-maskable interrupt (NMI) during a timekeeper update, and
+	while we are entering suspend with the clocksource powered down.
+	This is useful in some tracing or debugging code as well as
+	machine check reporting, but most drivers should never call them,
+	since the time is allowed to jump under certain conditions.
+
+Deprecated time interfaces
+--------------------------
+
+Older kernels used some other interfaces that are now being phased out
+but may appear in third-party drivers being ported here. In particular,
+all interfaces returning a 'struct timeval' or 'struct timespec' have
+been replaced because the tv_sec member overflows in year 2038 on 32-bit
+architectures. These are the recommended replacements:
+
+.. c:function:: void ktime_get_ts( struct timespec * )
+
+	Use ktime_get() or ktime_get_ts64() instead.
+
+.. c:function:: struct timeval do_gettimeofday( void )
+		struct timespec getnstimeofday( void )
+		struct timespec64 getnstimeofday64( void )
+		void ktime_get_real_ts( struct timespec * )
+
+	ktime_get_real_ts64() is a direct replacement, but consider using
+	monotonic time (ktime_get_ts64()) and/or a ktime_t based interface
+	(ktime_get()/ktime_get_real()).
+
+.. c:function:: struct timespec current_kernel_time( void )
+		struct timespec64 current_kernel_time64( void )
+		struct timespec get_monotonic_coarse( void )
+		struct timespec64 get_monotonic_coarse64( void )
+
+	These are replaced by ktime_get_coarse_real_ts64() and
+	ktime_get_coarse_ts64(). However, A lot of code that wants
+	coarse-grained times can use the simple 'jiffies' instead, while
+	some drivers may actually want the higher resolution accessors
+	these days.
+
+.. c:function:: struct timespec getrawmonotonic( void )
+		struct timespec64 getrawmonotonic64( void )
+		struct timespec timekeeping_clocktai( void )
+		struct timespec64 timekeeping_clocktai64( void )
+		struct timespec get_monotonic_boottime( void )
+		struct timespec64 get_monotonic_boottime64( void )
+
+	These are replaced by ktime_get_raw()/ktime_get_raw_ts64(),
+	ktime_get_clocktai()/ktime_get_clocktai_ts64() as well
+	as ktime_get_boottime()/ktime_get_boottime_ts64().
+	However, if the particular choice of clock source is not
+	important for the user, consider converting to
+	ktime_get()/ktime_get_ts64() instead for consistency.
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 86bc2026efce..947b1b8d2d01 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -20,6 +20,21 @@ extern int do_settimeofday64(const struct timespec64 *ts);
 extern int do_sys_settimeofday64(const struct timespec64 *tv,
 				 const struct timezone *tz);
 
+/*
+ * ktime_get() family: read the current time in a multitude of ways,
+ *
+ * The default time reference is CLOCK_MONOTONIC, starting at
+ * boot time but not counting the time spent in suspend.
+ * For other references, use the functions with "real", "clocktai",
+ * "boottime" and "raw" suffixes.
+ *
+ * To get the time in a different format, use the ones wit
+ * "ns", "ts64" and "seconds" suffix.
+ *
+ * See Documentation/core-api/timekeeping.rst for more details.
+ */
+
+
 /*
  * timespec64 based interfaces
  */
-- 
cgit v1.2.3


From 2756f68c314917d03eb348084edb08bb929139d9 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Mon, 23 Jul 2018 11:16:59 +0300
Subject: net: bridge: add support for backup port

This patch adds a new port attribute - IFLA_BRPORT_BACKUP_PORT, which
allows to set a backup port to be used for known unicast traffic if the
port has gone carrier down. The backup pointer is rcu protected and set
only under RTNL, a counter is maintained so when deleting a port we know
how many other ports reference it as a backup and we remove it from all.
Also the pointer is in the first cache line which is hot at the time of
the check and thus in the common case we only add one more test.
The backup port will be used only for the non-flooding case since
it's a part of the bridge and the flooded packets will be forwarded to it
anyway. To remove the forwarding just send a 0/non-existing backup port.
This is used to avoid numerous scalability problems when using MLAG most
notably if we have thousands of fdbs one would need to change all of them
on port carrier going down which takes too long and causes a storm of fdb
notifications (and again when the port comes back up). In a Multi-chassis
Link Aggregation setup usually hosts are connected to two different
switches which act as a single logical switch. Those switches usually have
a control and backup link between them called peerlink which might be used
for communication in case a host loses connectivity to one of them.
We need a fast way to failover in case a host port goes down and currently
none of the solutions (like bond) cannot fulfill the requirements because
the participating ports are actually the "master" devices and must have the
same peerlink as their backup interface and at the same time all of them
must participate in the bridge device. As Roopa noted it's normal practice
in routing called fast re-route where a precalculated backup path is used
when the main one is down.
Another use case of this is with EVPN, having a single vxlan device which
is backup of every port. Due to the nature of master devices it's not
currently possible to use one device as a backup for many and still have
all of them participate in the bridge (which is master itself).
More detailed information about MLAG is available at the link below.
https://docs.cumulusnetworks.com/display/DOCS/Multi-Chassis+Link+Aggregation+-+MLAG

Further explanation and a diagram by Roopa:
Two switches acting in a MLAG pair are connected by the peerlink
interface which is a bridge port.

the config on one of the switches looks like the below. The other
switch also has a similar config.
eth0 is connected to one port on the server. And the server is
connected to both switches.

br0 -- team0---eth0
      |
      -- switch-peerlink

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h |  1 +
 net/bridge/br_forward.c      | 16 ++++++++++++-
 net/bridge/br_if.c           | 53 ++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_netlink.c      | 30 ++++++++++++++++++++++++-
 net/bridge/br_private.h      |  3 +++
 net/bridge/br_sysfs_if.c     | 33 +++++++++++++++++++++++++++
 6 files changed, 134 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8759cfb8aa2e..01b5069a73a5 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -334,6 +334,7 @@ enum {
 	IFLA_BRPORT_GROUP_FWD_MASK,
 	IFLA_BRPORT_NEIGH_SUPPRESS,
 	IFLA_BRPORT_ISOLATED,
+	IFLA_BRPORT_BACKUP_PORT,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 9019f326fe81..5372e2042adf 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -142,7 +142,20 @@ static int deliver_clone(const struct net_bridge_port *prev,
 void br_forward(const struct net_bridge_port *to,
 		struct sk_buff *skb, bool local_rcv, bool local_orig)
 {
-	if (to && should_deliver(to, skb)) {
+	if (unlikely(!to))
+		goto out;
+
+	/* redirect to backup link if the destination port is down */
+	if (rcu_access_pointer(to->backup_port) && !netif_carrier_ok(to->dev)) {
+		struct net_bridge_port *backup_port;
+
+		backup_port = rcu_dereference(to->backup_port);
+		if (unlikely(!backup_port))
+			goto out;
+		to = backup_port;
+	}
+
+	if (should_deliver(to, skb)) {
 		if (local_rcv)
 			deliver_clone(to, skb, local_orig);
 		else
@@ -150,6 +163,7 @@ void br_forward(const struct net_bridge_port *to,
 		return;
 	}
 
+out:
 	if (!local_rcv)
 		kfree_skb(skb);
 }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index e7c8d55212aa..0363f1bdc401 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -170,6 +170,58 @@ void br_manage_promisc(struct net_bridge *br)
 	}
 }
 
+int nbp_backup_change(struct net_bridge_port *p,
+		      struct net_device *backup_dev)
+{
+	struct net_bridge_port *old_backup = rtnl_dereference(p->backup_port);
+	struct net_bridge_port *backup_p = NULL;
+
+	ASSERT_RTNL();
+
+	if (backup_dev) {
+		if (!br_port_exists(backup_dev))
+			return -ENOENT;
+
+		backup_p = br_port_get_rtnl(backup_dev);
+		if (backup_p->br != p->br)
+			return -EINVAL;
+	}
+
+	if (p == backup_p)
+		return -EINVAL;
+
+	if (old_backup == backup_p)
+		return 0;
+
+	/* if the backup link is already set, clear it */
+	if (old_backup)
+		old_backup->backup_redirected_cnt--;
+
+	if (backup_p)
+		backup_p->backup_redirected_cnt++;
+	rcu_assign_pointer(p->backup_port, backup_p);
+
+	return 0;
+}
+
+static void nbp_backup_clear(struct net_bridge_port *p)
+{
+	nbp_backup_change(p, NULL);
+	if (p->backup_redirected_cnt) {
+		struct net_bridge_port *cur_p;
+
+		list_for_each_entry(cur_p, &p->br->port_list, list) {
+			struct net_bridge_port *backup_p;
+
+			backup_p = rtnl_dereference(cur_p->backup_port);
+			if (backup_p == p)
+				nbp_backup_change(cur_p, NULL);
+		}
+	}
+
+	WARN_ON(rcu_access_pointer(p->backup_port) || p->backup_redirected_cnt);
+}
+
 static void nbp_update_port_count(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
@@ -295,6 +347,7 @@ static void del_nbp(struct net_bridge_port *p)
 	nbp_vlan_flush(p);
 	br_fdb_delete_by_port(br, p, 0, 1);
 	switchdev_deferred_process();
+	nbp_backup_clear(p);
 
 	nbp_update_port_count(br);
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 9f5eb05b0373..ec2b58a09f76 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -169,13 +169,15 @@ static inline size_t br_nlmsg_size(struct net_device *dev, u32 filter_mask)
 		+ nla_total_size(1) /* IFLA_OPERSTATE */
 		+ nla_total_size(br_port_info_size()) /* IFLA_PROTINFO */
 		+ nla_total_size(br_get_link_af_size_filtered(dev,
-				 filter_mask)); /* IFLA_AF_SPEC */
+				 filter_mask)) /* IFLA_AF_SPEC */
+		+ nla_total_size(4); /* IFLA_BRPORT_BACKUP_PORT */
 }
 
 static int br_port_fill_attrs(struct sk_buff *skb,
 			      const struct net_bridge_port *p)
 {
 	u8 mode = !!(p->flags & BR_HAIRPIN_MODE);
+	struct net_bridge_port *backup_p;
 	u64 timerval;
 
 	if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) ||
@@ -237,6 +239,14 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 		return -EMSGSIZE;
 #endif
 
+	/* we might be called only with br->lock */
+	rcu_read_lock();
+	backup_p = rcu_dereference(p->backup_port);
+	if (backup_p)
+		nla_put_u32(skb, IFLA_BRPORT_BACKUP_PORT,
+			    backup_p->dev->ifindex);
+	rcu_read_unlock();
+
 	return 0;
 }
 
@@ -663,6 +673,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
 	[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
 	[IFLA_BRPORT_ISOLATED]	= { .type = NLA_U8 },
+	[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -817,6 +828,23 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 	if (err)
 		return err;
 
+	if (tb[IFLA_BRPORT_BACKUP_PORT]) {
+		struct net_device *backup_dev = NULL;
+		u32 backup_ifindex;
+
+		backup_ifindex = nla_get_u32(tb[IFLA_BRPORT_BACKUP_PORT]);
+		if (backup_ifindex) {
+			backup_dev = __dev_get_by_index(dev_net(p->dev),
+							backup_ifindex);
+			if (!backup_dev)
+				return -ENOENT;
+		}
+
+		err = nbp_backup_change(p, backup_dev);
+		if (err)
+			return err;
+	}
+
 	br_port_flags_change(p, old_flags ^ p->flags);
 	return 0;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cf0005d2a4d0..11ed2029985f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -237,6 +237,7 @@ struct net_bridge_port {
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	struct net_bridge_vlan_group	__rcu *vlgrp;
 #endif
+	struct net_bridge_port		__rcu *backup_port;
 
 	/* STP */
 	u8				priority;
@@ -281,6 +282,7 @@ struct net_bridge_port {
 	int				offload_fwd_mark;
 #endif
 	u16				group_fwd_mask;
+	u16				backup_redirected_cnt;
 };
 
 #define kobj_to_brport(obj)	container_of(obj, struct net_bridge_port, kobj)
@@ -597,6 +599,7 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
 					netdev_features_t features);
 void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
 void br_manage_promisc(struct net_bridge *br);
+int nbp_backup_change(struct net_bridge_port *p, struct net_device *backup_dev);
 
 /* br_input.c */
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 4ac940067754..7c87a2fe5248 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -191,6 +191,38 @@ static int store_group_fwd_mask(struct net_bridge_port *p,
 static BRPORT_ATTR(group_fwd_mask, 0644, show_group_fwd_mask,
 		   store_group_fwd_mask);
 
+static ssize_t show_backup_port(struct net_bridge_port *p, char *buf)
+{
+	struct net_bridge_port *backup_p;
+	int ret = 0;
+
+	rcu_read_lock();
+	backup_p = rcu_dereference(p->backup_port);
+	if (backup_p)
+		ret = sprintf(buf, "%s\n", backup_p->dev->name);
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int store_backup_port(struct net_bridge_port *p, char *buf)
+{
+	struct net_device *backup_dev = NULL;
+	char *nl = strchr(buf, '\n');
+
+	if (nl)
+		*nl = '\0';
+
+	if (strlen(buf) > 0) {
+		backup_dev = __dev_get_by_name(dev_net(p->dev), buf);
+		if (!backup_dev)
+			return -ENOENT;
+	}
+
+	return nbp_backup_change(p, backup_dev);
+}
+static BRPORT_ATTR_RAW(backup_port, 0644, show_backup_port, store_backup_port);
+
 BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
 BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
 BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
@@ -254,6 +286,7 @@ static const struct brport_attribute *brport_attrs[] = {
 	&brport_attr_group_fwd_mask,
 	&brport_attr_neigh_suppress,
 	&brport_attr_isolated,
+	&brport_attr_backup_port,
 	NULL
 };
 
-- 
cgit v1.2.3


From c2a7d2a115525d3501d38e23d24875a79a07e15e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:16 -0700
Subject: filesystem-dax: Introduce dax_lock_mapping_entry()

In preparation for implementing support for memory poison (media error)
handling via dax mappings, implement a lock_page() equivalent. Poison
error handling requires rmap and needs guarantees that the page->mapping
association is maintained / valid (inode not freed) for the duration of
the lookup.

In the device-dax case it is sufficient to simply hold a dev_pagemap
reference. In the filesystem-dax case we need to use the entry lock.

Export the entry lock via dax_lock_mapping_entry() that uses
rcu_read_lock() to protect against the inode being freed, and
revalidates the page->mapping association under xa_lock().

Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 fs/dax.c            | 109 +++++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/dax.h |  13 +++++++
 2 files changed, 116 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/dax.c b/fs/dax.c
index 4de11ed463ce..57ec272038da 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
  *
  * Must be called with the i_pages lock held.
  */
-static void *get_unlocked_mapping_entry(struct address_space *mapping,
-					pgoff_t index, void ***slotp)
+static void *__get_unlocked_mapping_entry(struct address_space *mapping,
+		pgoff_t index, void ***slotp, bool (*wait_fn)(void))
 {
 	void *entry, **slot;
 	struct wait_exceptional_entry_queue ewait;
@@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 	ewait.wait.func = wake_exceptional_entry_func;
 
 	for (;;) {
+		bool revalidate;
+
 		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
 					  &slot);
 		if (!entry ||
@@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
 		xa_unlock_irq(&mapping->i_pages);
-		schedule();
+		revalidate = wait_fn();
 		finish_wait(wq, &ewait.wait);
 		xa_lock_irq(&mapping->i_pages);
+		if (revalidate)
+			return ERR_PTR(-EAGAIN);
 	}
 }
 
-static void dax_unlock_mapping_entry(struct address_space *mapping,
-				     pgoff_t index)
+static bool entry_wait(void)
+{
+	schedule();
+	/*
+	 * Never return an ERR_PTR() from
+	 * __get_unlocked_mapping_entry(), just keep looping.
+	 */
+	return false;
+}
+
+static void *get_unlocked_mapping_entry(struct address_space *mapping,
+		pgoff_t index, void ***slotp)
+{
+	return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
+}
+
+static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 {
 	void *entry, **slot;
 
@@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
 static void put_locked_mapping_entry(struct address_space *mapping,
 		pgoff_t index)
 {
-	dax_unlock_mapping_entry(mapping, index);
+	unlock_mapping_entry(mapping, index);
 }
 
 /*
@@ -374,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
 	return NULL;
 }
 
+static bool entry_wait_revalidate(void)
+{
+	rcu_read_unlock();
+	schedule();
+	rcu_read_lock();
+
+	/*
+	 * Tell __get_unlocked_mapping_entry() to take a break, we need
+	 * to revalidate page->mapping after dropping locks
+	 */
+	return true;
+}
+
+bool dax_lock_mapping_entry(struct page *page)
+{
+	pgoff_t index;
+	struct inode *inode;
+	bool did_lock = false;
+	void *entry = NULL, **slot;
+	struct address_space *mapping;
+
+	rcu_read_lock();
+	for (;;) {
+		mapping = READ_ONCE(page->mapping);
+
+		if (!dax_mapping(mapping))
+			break;
+
+		/*
+		 * In the device-dax case there's no need to lock, a
+		 * struct dev_pagemap pin is sufficient to keep the
+		 * inode alive, and we assume we have dev_pagemap pin
+		 * otherwise we would not have a valid pfn_to_page()
+		 * translation.
+		 */
+		inode = mapping->host;
+		if (S_ISCHR(inode->i_mode)) {
+			did_lock = true;
+			break;
+		}
+
+		xa_lock_irq(&mapping->i_pages);
+		if (mapping != page->mapping) {
+			xa_unlock_irq(&mapping->i_pages);
+			continue;
+		}
+		index = page->index;
+
+		entry = __get_unlocked_mapping_entry(mapping, index, &slot,
+				entry_wait_revalidate);
+		if (!entry) {
+			xa_unlock_irq(&mapping->i_pages);
+			break;
+		} else if (IS_ERR(entry)) {
+			WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
+			continue;
+		}
+		lock_slot(mapping, slot);
+		did_lock = true;
+		xa_unlock_irq(&mapping->i_pages);
+		break;
+	}
+	rcu_read_unlock();
+
+	return did_lock;
+}
+
+void dax_unlock_mapping_entry(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+
+	if (S_ISCHR(inode->i_mode))
+		return;
+
+	unlock_mapping_entry(mapping, page->index);
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
diff --git a/include/linux/dax.h b/include/linux/dax.h
index deb0f663252f..450b28db9533 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -88,6 +88,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
 
 struct page *dax_layout_busy_page(struct address_space *mapping);
+bool dax_lock_mapping_entry(struct page *page);
+void dax_unlock_mapping_entry(struct page *page);
 #else
 static inline bool bdev_dax_supported(struct block_device *bdev,
 		int blocksize)
@@ -119,6 +121,17 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline bool dax_lock_mapping_entry(struct page *page)
+{
+	if (IS_DAX(page->mapping->host))
+		return true;
+	return false;
+}
+
+static inline void dax_unlock_mapping_entry(struct page *page)
+{
+}
 #endif
 
 int dax_read_lock(void);
-- 
cgit v1.2.3


From 6100e34b2526e1dc3dbcc47fea2677974d6aaea5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:21 -0700
Subject: mm, memory_failure: Teach memory_failure() about dev_pagemap pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mce: Uncorrected hardware memory error in user-access at af34214200
    {1}[Hardware Error]: It has been corrected by h/w and requires no further action
    mce: [Hardware Error]: Machine check events logged
    {1}[Hardware Error]: event severity: corrected
    Memory failure: 0xaf34214: reserved kernel page still referenced by 1 users
    [..]
    Memory failure: 0xaf34214: recovery action for reserved kernel page: Failed
    mce: Memory error not recovered

In contrast to typical memory, dev_pagemap pages may be dax mapped. With
dax there is no possibility to map in another page dynamically since dax
establishes 1:1 physical address to file offset associations. Also
dev_pagemap pages associated with NVDIMM / persistent memory devices can
internal remap/repair addresses with poison. While memory_failure()
assumes that it can discard typical poisoned pages and keep them
unmapped indefinitely, dev_pagemap pages may be returned to service
after the error is cleared.

Teach memory_failure() to detect and handle MEMORY_DEVICE_HOST
dev_pagemap pages that have poison consumed by userspace. Mark the
memory as UC instead of unmapping it completely to allow ongoing access
via the device driver (nd_pmem). Later, nd_pmem will grow support for
marking the page back to WB when the error is cleared.

Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 include/linux/mm.h  |   1 +
 mm/memory-failure.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 124 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a0fbb9ffe380..374e5e9284f7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2725,6 +2725,7 @@ enum mf_action_page_type {
 	MF_MSG_TRUNCATED_LRU,
 	MF_MSG_BUDDY,
 	MF_MSG_BUDDY_2ND,
+	MF_MSG_DAX,
 	MF_MSG_UNKNOWN,
 };
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8a81680d00dd..32a644d9c2ee 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -55,6 +55,7 @@
 #include <linux/hugetlb.h>
 #include <linux/memory_hotplug.h>
 #include <linux/mm_inline.h>
+#include <linux/memremap.h>
 #include <linux/kfifo.h>
 #include <linux/ratelimit.h>
 #include "internal.h"
@@ -263,6 +264,40 @@ void shake_page(struct page *p, int access)
 }
 EXPORT_SYMBOL_GPL(shake_page);
 
+static unsigned long dev_pagemap_mapping_shift(struct page *page,
+		struct vm_area_struct *vma)
+{
+	unsigned long address = vma_address(page, vma);
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(vma->vm_mm, address);
+	if (!pgd_present(*pgd))
+		return 0;
+	p4d = p4d_offset(pgd, address);
+	if (!p4d_present(*p4d))
+		return 0;
+	pud = pud_offset(p4d, address);
+	if (!pud_present(*pud))
+		return 0;
+	if (pud_devmap(*pud))
+		return PUD_SHIFT;
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+	if (pmd_devmap(*pmd))
+		return PMD_SHIFT;
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+	if (pte_devmap(*pte))
+		return PAGE_SHIFT;
+	return 0;
+}
+
 /*
  * Failure handling: if we can't find or can't kill a process there's
  * not much we can do.	We just print a message and ignore otherwise.
@@ -292,7 +327,10 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
 	}
 	tk->addr = page_address_in_vma(p, vma);
 	tk->addr_valid = 1;
-	tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
+	if (is_zone_device_page(p))
+		tk->size_shift = dev_pagemap_mapping_shift(p, vma);
+	else
+		tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
 
 	/*
 	 * In theory we don't have to kill when the page was
@@ -300,7 +338,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
 	 * likely very rare kill anyways just out of paranoia, but use
 	 * a SIGKILL because the error is not contained anymore.
 	 */
-	if (tk->addr == -EFAULT) {
+	if (tk->addr == -EFAULT || tk->size_shift == 0) {
 		pr_info("Memory failure: Unable to find user space address %lx in %s\n",
 			page_to_pfn(p), tsk->comm);
 		tk->addr_valid = 0;
@@ -514,6 +552,7 @@ static const char * const action_page_types[] = {
 	[MF_MSG_TRUNCATED_LRU]		= "already truncated LRU page",
 	[MF_MSG_BUDDY]			= "free buddy page",
 	[MF_MSG_BUDDY_2ND]		= "free buddy page (2nd try)",
+	[MF_MSG_DAX]			= "dax page",
 	[MF_MSG_UNKNOWN]		= "unknown page",
 };
 
@@ -1111,6 +1150,83 @@ out:
 	return res;
 }
 
+static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
+		struct dev_pagemap *pgmap)
+{
+	struct page *page = pfn_to_page(pfn);
+	const bool unmap_success = true;
+	unsigned long size = 0;
+	struct to_kill *tk;
+	LIST_HEAD(tokill);
+	int rc = -EBUSY;
+	loff_t start;
+
+	/*
+	 * Prevent the inode from being freed while we are interrogating
+	 * the address_space, typically this would be handled by
+	 * lock_page(), but dax pages do not use the page lock. This
+	 * also prevents changes to the mapping of this pfn until
+	 * poison signaling is complete.
+	 */
+	if (!dax_lock_mapping_entry(page))
+		goto out;
+
+	if (hwpoison_filter(page)) {
+		rc = 0;
+		goto unlock;
+	}
+
+	switch (pgmap->type) {
+	case MEMORY_DEVICE_PRIVATE:
+	case MEMORY_DEVICE_PUBLIC:
+		/*
+		 * TODO: Handle HMM pages which may need coordination
+		 * with device-side memory.
+		 */
+		goto unlock;
+	default:
+		break;
+	}
+
+	/*
+	 * Use this flag as an indication that the dax page has been
+	 * remapped UC to prevent speculative consumption of poison.
+	 */
+	SetPageHWPoison(page);
+
+	/*
+	 * Unlike System-RAM there is no possibility to swap in a
+	 * different physical page at a given virtual address, so all
+	 * userspace consumption of ZONE_DEVICE memory necessitates
+	 * SIGBUS (i.e. MF_MUST_KILL)
+	 */
+	flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
+	collect_procs(page, &tokill, flags & MF_ACTION_REQUIRED);
+
+	list_for_each_entry(tk, &tokill, nd)
+		if (tk->size_shift)
+			size = max(size, 1UL << tk->size_shift);
+	if (size) {
+		/*
+		 * Unmap the largest mapping to avoid breaking up
+		 * device-dax mappings which are constant size. The
+		 * actual size of the mapping being torn down is
+		 * communicated in siginfo, see kill_proc()
+		 */
+		start = (page->index << PAGE_SHIFT) & ~(size - 1);
+		unmap_mapping_range(page->mapping, start, start + size, 0);
+	}
+	kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
+	rc = 0;
+unlock:
+	dax_unlock_mapping_entry(page);
+out:
+	/* drop pgmap ref acquired in caller */
+	put_dev_pagemap(pgmap);
+	action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
+	return rc;
+}
+
 /**
  * memory_failure - Handle memory failure of a page.
  * @pfn: Page Number of the corrupted page
@@ -1133,6 +1249,7 @@ int memory_failure(unsigned long pfn, int flags)
 	struct page *p;
 	struct page *hpage;
 	struct page *orig_head;
+	struct dev_pagemap *pgmap;
 	int res;
 	unsigned long page_flags;
 
@@ -1145,6 +1262,10 @@ int memory_failure(unsigned long pfn, int flags)
 		return -ENXIO;
 	}
 
+	pgmap = get_dev_pagemap(pfn, NULL);
+	if (pgmap)
+		return memory_failure_dev_pagemap(pfn, flags, pgmap);
+
 	p = pfn_to_page(pfn);
 	if (PageHuge(p))
 		return memory_failure_hugetlb(pfn, flags);
-- 
cgit v1.2.3


From c601171d7a60b5b09d7c2fe0579953323a80744e Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 23 Jul 2018 13:53:08 +0200
Subject: net/smc: provide smc mode in smc_diag.c

Rename field diag_fallback into diag_mode and set the smc mode of a
connection explicitly.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc_diag.h | 9 ++++++++-
 net/smc/smc_diag.c            | 7 ++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 92be255e534c..48ae3ee22b2d 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -20,7 +20,7 @@ struct smc_diag_req {
 struct smc_diag_msg {
 	__u8	diag_family;
 	__u8	diag_state;
-	__u8	diag_fallback;
+	__u8	diag_mode;
 	__u8	diag_shutdown;
 	struct inet_diag_sockid id;
 
@@ -28,6 +28,13 @@ struct smc_diag_msg {
 	__u64	diag_inode;
 };
 
+/* Mode of a connection */
+enum {
+	SMC_DIAG_MODE_SMCR,
+	SMC_DIAG_MODE_FALLBACK_TCP,
+	SMC_DIAG_MODE_SMCD,
+};
+
 /* Extensions */
 
 enum {
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 6d83eef1b743..d772cd10297e 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -91,7 +91,12 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 	r = nlmsg_data(nlh);
 	smc_diag_msg_common_fill(r, sk);
 	r->diag_state = sk->sk_state;
-	r->diag_fallback = smc->use_fallback;
+	if (smc->use_fallback)
+		r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP;
+	else if (smc->conn.lgr && smc->conn.lgr->is_smcd)
+		r->diag_mode = SMC_DIAG_MODE_SMCD;
+	else
+		r->diag_mode = SMC_DIAG_MODE_SMCR;
 	user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk);
 	if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
 		goto errout;
-- 
cgit v1.2.3


From c0e4e0fd952b73bf6aae67e92b9a496a52837eb9 Mon Sep 17 00:00:00 2001
From: Maxime Roussin-Bélanger <maxime.roussinbelanger@gmail.com>
Date: Thu, 19 Jul 2018 16:26:24 -0400
Subject: iio: Add modifier for DUV light
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Maxime Roussin-Bélanger <maxime.roussinbelanger@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/ABI/testing/sysfs-bus-iio | 7 +++++--
 drivers/iio/industrialio-core.c         | 1 +
 include/uapi/linux/iio/types.h          | 1 +
 tools/iio/iio_event_monitor.c           | 2 ++
 4 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index c9cfa833cf47..a5b4f223641d 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -1307,13 +1307,16 @@ What:		/sys/.../iio:deviceX/in_intensityY_raw
 What:		/sys/.../iio:deviceX/in_intensityY_ir_raw
 What:		/sys/.../iio:deviceX/in_intensityY_both_raw
 What:		/sys/.../iio:deviceX/in_intensityY_uv_raw
+What:		/sys/.../iio:deviceX/in_intensityY_duv_raw
 KernelVersion:	3.4
 Contact:	linux-iio@vger.kernel.org
 Description:
 		Unit-less light intensity. Modifiers both and ir indicate
 		that measurements contain visible and infrared light
-		components or just infrared light, respectively. Modifier uv indicates
-		that measurements contain ultraviolet light components.
+		components or just infrared light, respectively. Modifier
+		uv indicates that measurements contain ultraviolet light
+		components. Modifier duv indicates that measurements
+		contain deep ultraviolet light components.
 
 What:		/sys/.../iio:deviceX/in_uvindex_input
 KernelVersion:	4.6
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index a16ad5a4ab0c..a062cfddc5af 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -110,6 +110,7 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_LIGHT_GREEN] = "green",
 	[IIO_MOD_LIGHT_BLUE] = "blue",
 	[IIO_MOD_LIGHT_UV] = "uv",
+	[IIO_MOD_LIGHT_DUV] = "duv",
 	[IIO_MOD_QUATERNION] = "quaternion",
 	[IIO_MOD_TEMP_AMBIENT] = "ambient",
 	[IIO_MOD_TEMP_OBJECT] = "object",
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index e4df3cc268db..92baabc103ac 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -86,6 +86,7 @@ enum iio_modifier {
 	IIO_MOD_CO2,
 	IIO_MOD_VOC,
 	IIO_MOD_LIGHT_UV,
+	IIO_MOD_LIGHT_DUV,
 };
 
 enum iio_event_type {
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index f478f5558720..ac2de6b7e89f 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -98,6 +98,7 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_LIGHT_GREEN] = "green",
 	[IIO_MOD_LIGHT_BLUE] = "blue",
 	[IIO_MOD_LIGHT_UV] = "uv",
+	[IIO_MOD_LIGHT_DUV] = "duv",
 	[IIO_MOD_QUATERNION] = "quaternion",
 	[IIO_MOD_TEMP_AMBIENT] = "ambient",
 	[IIO_MOD_TEMP_OBJECT] = "object",
@@ -182,6 +183,7 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_MOD_LIGHT_GREEN:
 	case IIO_MOD_LIGHT_BLUE:
 	case IIO_MOD_LIGHT_UV:
+	case IIO_MOD_LIGHT_DUV:
 	case IIO_MOD_QUATERNION:
 	case IIO_MOD_TEMP_AMBIENT:
 	case IIO_MOD_TEMP_OBJECT:
-- 
cgit v1.2.3


From e873e4b9cc7e8ce79e5c5627b32b107035bb3f5d Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Sat, 21 Jul 2018 20:56:32 -0700
Subject: ipv6: use fib6_info_hold_safe() when necessary

In the code path where only rcu read lock is held, e.g. in the route
lookup code path, it is not safe to directly call fib6_info_hold()
because the fib6_info may already have been deleted but still exists
in the rcu grace period. Holding reference to it could cause double
free and crash the kernel.

This patch adds a new function fib6_info_hold_safe() and replace
fib6_info_hold() in all necessary places.

Syzbot reported 3 crash traces because of this. One of them is:
8021q: adding VLAN 0 to HW filter on device team0
IPv6: ADDRCONF(NETDEV_CHANGE): team0: link becomes ready
dst_release: dst:(____ptrval____) refcnt:-1
dst_release: dst:(____ptrval____) refcnt:-2
WARNING: CPU: 1 PID: 4845 at include/net/dst.h:239 dst_hold include/net/dst.h:239 [inline]
WARNING: CPU: 1 PID: 4845 at include/net/dst.h:239 ip6_setup_cork+0xd66/0x1830 net/ipv6/ip6_output.c:1204
dst_release: dst:(____ptrval____) refcnt:-1
Kernel panic - not syncing: panic_on_warn set ...

CPU: 1 PID: 4845 Comm: syz-executor493 Not tainted 4.18.0-rc3+ #10
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 panic+0x238/0x4e7 kernel/panic.c:184
dst_release: dst:(____ptrval____) refcnt:-2
dst_release: dst:(____ptrval____) refcnt:-3
 __warn.cold.8+0x163/0x1ba kernel/panic.c:536
dst_release: dst:(____ptrval____) refcnt:-4
 report_bug+0x252/0x2d0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:178 [inline]
 do_error_trap+0x1fc/0x4d0 arch/x86/kernel/traps.c:296
dst_release: dst:(____ptrval____) refcnt:-5
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:316
 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:992
RIP: 0010:dst_hold include/net/dst.h:239 [inline]
RIP: 0010:ip6_setup_cork+0xd66/0x1830 net/ipv6/ip6_output.c:1204
Code: c1 ed 03 89 9d 18 ff ff ff 48 b8 00 00 00 00 00 fc ff df 41 c6 44 05 00 f8 e9 2d 01 00 00 4c 8b a5 c8 fe ff ff e8 1a f6 e6 fa <0f> 0b e9 6a fc ff ff e8 0e f6 e6 fa 48 8b 85 d0 fe ff ff 48 8d 78
RSP: 0018:ffff8801a8fcf178 EFLAGS: 00010293
RAX: ffff8801a8eba5c0 RBX: 0000000000000000 RCX: ffffffff869511e6
RDX: 0000000000000000 RSI: ffffffff869515b6 RDI: 0000000000000005
RBP: ffff8801a8fcf2c8 R08: ffff8801a8eba5c0 R09: ffffed0035ac8338
R10: ffffed0035ac8338 R11: ffff8801ad6419c3 R12: ffff8801a8fcf720
R13: ffff8801a8fcf6a0 R14: ffff8801ad6419c0 R15: ffff8801ad641980
 ip6_make_skb+0x2c8/0x600 net/ipv6/ip6_output.c:1768
 udpv6_sendmsg+0x2c90/0x35f0 net/ipv6/udp.c:1376
 inet_sendmsg+0x1a1/0x690 net/ipv4/af_inet.c:798
 sock_sendmsg_nosec net/socket.c:641 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:651
 ___sys_sendmsg+0x51d/0x930 net/socket.c:2125
 __sys_sendmmsg+0x240/0x6f0 net/socket.c:2220
 __do_sys_sendmmsg net/socket.c:2249 [inline]
 __se_sys_sendmmsg net/socket.c:2246 [inline]
 __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2246
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x446ba9
Code: e8 cc bb 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fb39a469da8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133
RAX: ffffffffffffffda RBX: 00000000006dcc54 RCX: 0000000000446ba9
RDX: 00000000000000b8 RSI: 0000000020001b00 RDI: 0000000000000003
RBP: 00000000006dcc50 R08: 00007fb39a46a700 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 45c828efc7a64843
R13: e6eeb815b9d8a477 R14: 5068caf6f713c6fc R15: 0000000000000001
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..

Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
Reported-by: syzbot+902e2a1bcd4f7808cef5@syzkaller.appspotmail.com
Reported-by: syzbot+8ae62d67f647abeeceb9@syzkaller.appspotmail.com
Reported-by: syzbot+3f08feb14086930677d0@syzkaller.appspotmail.com
Signed-off-by: Wei Wang <weiwan@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h |  5 +++++
 net/ipv6/addrconf.c   |  3 ++-
 net/ipv6/route.c      | 41 +++++++++++++++++++++++++++++++----------
 3 files changed, 38 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 71b9043aa0e7..3d4930528db0 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -281,6 +281,11 @@ static inline void fib6_info_hold(struct fib6_info *f6i)
 	atomic_inc(&f6i->fib6_ref);
 }
 
+static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
+{
+	return atomic_inc_not_zero(&f6i->fib6_ref);
+}
+
 static inline void fib6_info_release(struct fib6_info *f6i)
 {
 	if (f6i && atomic_dec_and_test(&f6i->fib6_ref))
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 91580c62bb86..f66a1cae3366 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2374,7 +2374,8 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 			continue;
 		if ((rt->fib6_flags & noflags) != 0)
 			continue;
-		fib6_info_hold(rt);
+		if (!fib6_info_hold_safe(rt))
+			continue;
 		break;
 	}
 out:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2ce0bd17de4f..ec18b3ce8b6d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -972,10 +972,10 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
 	rt->dst.lastuse = jiffies;
 }
 
+/* Caller must already hold reference to @from */
 static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
 {
 	rt->rt6i_flags &= ~RTF_EXPIRES;
-	fib6_info_hold(from);
 	rcu_assign_pointer(rt->from, from);
 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
 	if (from->fib6_metrics != &dst_default_metrics) {
@@ -984,6 +984,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
 	}
 }
 
+/* Caller must already hold reference to @ort */
 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 {
 	struct net_device *dev = fib6_info_nh_dev(ort);
@@ -1044,9 +1045,14 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
 	struct net_device *dev = rt->fib6_nh.nh_dev;
 	struct rt6_info *nrt;
 
+	if (!fib6_info_hold_safe(rt))
+		return NULL;
+
 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	if (nrt)
 		ip6_rt_copy_init(nrt, rt);
+	else
+		fib6_info_release(rt);
 
 	return nrt;
 }
@@ -1178,10 +1184,15 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
 	 *	Clone the route.
 	 */
 
+	if (!fib6_info_hold_safe(ort))
+		return NULL;
+
 	dev = ip6_rt_get_dev_rcu(ort);
 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
-	if (!rt)
+	if (!rt) {
+		fib6_info_release(ort);
 		return NULL;
+	}
 
 	ip6_rt_copy_init(rt, ort);
 	rt->rt6i_flags |= RTF_CACHE;
@@ -1210,12 +1221,17 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
 	struct net_device *dev;
 	struct rt6_info *pcpu_rt;
 
+	if (!fib6_info_hold_safe(rt))
+		return NULL;
+
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(rt);
 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
 	rcu_read_unlock();
-	if (!pcpu_rt)
+	if (!pcpu_rt) {
+		fib6_info_release(rt);
 		return NULL;
+	}
 	ip6_rt_copy_init(pcpu_rt, rt);
 	pcpu_rt->rt6i_flags |= RTF_PCPU;
 	return pcpu_rt;
@@ -2486,7 +2502,7 @@ restart:
 
 out:
 	if (ret)
-		dst_hold(&ret->dst);
+		ip6_hold_safe(net, &ret, true);
 	else
 		ret = ip6_create_rt_rcu(rt);
 
@@ -3303,7 +3319,8 @@ static int ip6_route_del(struct fib6_config *cfg,
 				continue;
 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
 				continue;
-			fib6_info_hold(rt);
+			if (!fib6_info_hold_safe(rt))
+				continue;
 			rcu_read_unlock();
 
 			/* if gateway was specified only delete the one hop */
@@ -3409,6 +3426,9 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 
 	rcu_read_lock();
 	from = rcu_dereference(rt->from);
+	/* This fib6_info_hold() is safe here because we hold reference to rt
+	 * and rt already holds reference to fib6_info.
+	 */
 	fib6_info_hold(from);
 	rcu_read_unlock();
 
@@ -3470,7 +3490,8 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
 			continue;
 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
 			continue;
-		fib6_info_hold(rt);
+		if (!fib6_info_hold_safe(rt))
+			continue;
 		break;
 	}
 out:
@@ -3530,8 +3551,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
 		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
 			break;
 	}
-	if (rt)
-		fib6_info_hold(rt);
+	if (rt && !fib6_info_hold_safe(rt))
+		rt = NULL;
 	rcu_read_unlock();
 	return rt;
 }
@@ -3579,8 +3600,8 @@ restart:
 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
 
 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
-		    (!idev || idev->cnf.accept_ra != 2)) {
-			fib6_info_hold(rt);
+		    (!idev || idev->cnf.accept_ra != 2) &&
+		    fib6_info_hold_safe(rt)) {
 			rcu_read_unlock();
 			ip6_del_rt(net, rt);
 			goto restart;
-- 
cgit v1.2.3


From 4fca037783512cedfb23a116c66727ce40c8558a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 11 Jul 2018 16:20:44 -0600
Subject: IB/uverbs: Move ib_access_flags and ib_read_counters_flags to uapi

These constants are used in the ioctl interface so they are part of the
uapi, place them in the correct header for clarity.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/rdma/ib_verbs.h                 | 23 ++++++++++-------------
 include/uapi/rdma/ib_user_ioctl_verbs.h | 16 ++++++++++++++++
 2 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 2696f1d730a1..08348e53082c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1425,14 +1425,16 @@ struct ib_recv_wr {
 };
 
 enum ib_access_flags {
-	IB_ACCESS_LOCAL_WRITE	= 1,
-	IB_ACCESS_REMOTE_WRITE	= (1<<1),
-	IB_ACCESS_REMOTE_READ	= (1<<2),
-	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
-	IB_ACCESS_MW_BIND	= (1<<4),
-	IB_ZERO_BASED		= (1<<5),
-	IB_ACCESS_ON_DEMAND     = (1<<6),
-	IB_ACCESS_HUGETLB	= (1<<7),
+	IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
+	IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
+	IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
+	IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
+	IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
+	IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
+	IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
+	IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+
+	IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
 };
 
 /*
@@ -2223,11 +2225,6 @@ struct ib_counters {
 	atomic_t	usecnt;
 };
 
-enum ib_read_counters_flags {
-	/* prefer read values from driver cache */
-	IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0,
-};
-
 struct ib_counters_read_attr {
 	u64	*counters_buff;
 	u32	ncounters;
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index a81d853bf25d..6cdf192070a2 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -40,6 +40,17 @@
 #define RDMA_UAPI_PTR(_type, _name)	__aligned_u64 _name
 #endif
 
+enum ib_uverbs_access_flags {
+	IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0,
+	IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1,
+	IB_UVERBS_ACCESS_REMOTE_READ = 1 << 2,
+	IB_UVERBS_ACCESS_REMOTE_ATOMIC = 1 << 3,
+	IB_UVERBS_ACCESS_MW_BIND = 1 << 4,
+	IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
+	IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
+	IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
+};
+
 enum ib_uverbs_query_port_cap_flags {
 	IB_UVERBS_PCF_SM = 1 << 1,
 	IB_UVERBS_PCF_NOTICE_SUP = 1 << 2,
@@ -141,4 +152,9 @@ struct ib_uverbs_flow_action_esp {
 	__aligned_u64	hard_limit_pkts;
 };
 
+enum ib_uverbs_read_counters_flags {
+	/* prefer read values from driver cache */
+	IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0,
+};
+
 #endif
-- 
cgit v1.2.3


From f53aaa31cce7b543e407da7e97690a700206f7b9 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Mon, 16 Jul 2018 15:22:01 -0700
Subject: net/mlx5: FW tracer, implement tracer logic

Implement FW tracer logic and registers access, initialization and
cleanup flows.

Initializing the tracer will be part of load one flow, as multiple
PFs will try to acquire ownership but only one will succeed and will
be the tracer owner.

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c   | 196 +++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h   |  66 +++++++
 include/linux/mlx5/driver.h                        |   3 +
 3 files changed, 265 insertions(+)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
new file mode 100644
index 000000000000..3ecbf06b4d71
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "fw_tracer.h"
+
+static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer)
+{
+	u32 *string_db_base_address_out = tracer->str_db.base_address_out;
+	u32 *string_db_size_out = tracer->str_db.size_out;
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+	void *mtrc_cap_sp;
+	int err, i;
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTRC_CAP, 0, 0);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Error reading tracer caps %d\n",
+			       err);
+		return err;
+	}
+
+	if (!MLX5_GET(mtrc_cap, out, trace_to_memory)) {
+		mlx5_core_dbg(dev, "FWTracer: Device does not support logging traces to memory\n");
+		return -ENOTSUPP;
+	}
+
+	tracer->trc_ver = MLX5_GET(mtrc_cap, out, trc_ver);
+	tracer->str_db.first_string_trace =
+			MLX5_GET(mtrc_cap, out, first_string_trace);
+	tracer->str_db.num_string_trace =
+			MLX5_GET(mtrc_cap, out, num_string_trace);
+	tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
+	tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+	for (i = 0; i < tracer->str_db.num_string_db; i++) {
+		mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
+		string_db_base_address_out[i] = MLX5_GET(mtrc_string_db_param,
+							 mtrc_cap_sp,
+							 string_db_base_address);
+		string_db_size_out[i] = MLX5_GET(mtrc_string_db_param,
+						 mtrc_cap_sp, string_db_size);
+	}
+
+	return err;
+}
+
+static int mlx5_set_mtrc_caps_trace_owner(struct mlx5_fw_tracer *tracer,
+					  u32 *out, u32 out_size,
+					  u8 trace_owner)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 in[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+	MLX5_SET(mtrc_cap, in, trace_owner, trace_owner);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out, out_size,
+				    MLX5_REG_MTRC_CAP, 0, 1);
+}
+
+static int mlx5_fw_tracer_ownership_acquire(struct mlx5_fw_tracer *tracer)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+	int err;
+
+	err = mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+					     MLX5_FW_TRACER_ACQUIRE_OWNERSHIP);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Acquire tracer ownership failed %d\n",
+			       err);
+		return err;
+	}
+
+	tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
+
+	if (!tracer->owner)
+		return -EBUSY;
+
+	return 0;
+}
+
+static void mlx5_fw_tracer_ownership_release(struct mlx5_fw_tracer *tracer)
+{
+	u32 out[MLX5_ST_SZ_DW(mtrc_cap)] = {0};
+
+	mlx5_set_mtrc_caps_trace_owner(tracer, out, sizeof(out),
+				       MLX5_FW_TRACER_RELEASE_OWNERSHIP);
+	tracer->owner = false;
+}
+
+static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+{
+	struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
+						     ownership_change_work);
+	struct mlx5_core_dev *dev = tracer->dev;
+	int err;
+
+	if (tracer->owner) {
+		mlx5_fw_tracer_ownership_release(tracer);
+		return;
+	}
+
+	err = mlx5_fw_tracer_ownership_acquire(tracer);
+	if (err) {
+		mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+		return;
+	}
+}
+
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_fw_tracer *tracer = NULL;
+	int err;
+
+	if (!MLX5_CAP_MCAM_REG(dev, tracer_registers)) {
+		mlx5_core_dbg(dev, "FWTracer: Tracer capability not present\n");
+		return NULL;
+	}
+
+	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+	if (!tracer)
+		return ERR_PTR(-ENOMEM);
+
+	tracer->work_queue = create_singlethread_workqueue("mlx5_fw_tracer");
+	if (!tracer->work_queue) {
+		err = -ENOMEM;
+		goto free_tracer;
+	}
+
+	tracer->dev = dev;
+
+	INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
+
+	err = mlx5_query_mtrc_caps(tracer);
+	if (err) {
+		mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
+		goto destroy_workqueue;
+	}
+
+	mlx5_fw_tracer_ownership_change(&tracer->ownership_change_work);
+
+	return tracer;
+
+destroy_workqueue:
+	tracer->dev = NULL;
+	destroy_workqueue(tracer->work_queue);
+free_tracer:
+	kfree(tracer);
+	return ERR_PTR(err);
+}
+
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
+{
+	if (!tracer)
+		return;
+
+	cancel_work_sync(&tracer->ownership_change_work);
+
+	if (tracer->owner)
+		mlx5_fw_tracer_ownership_release(tracer);
+
+	flush_workqueue(tracer->work_queue);
+	destroy_workqueue(tracer->work_queue);
+	kfree(tracer);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
new file mode 100644
index 000000000000..721c41a5e827
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_TRACER_H__
+#define __LIB_TRACER_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#define STRINGS_DB_SECTIONS_NUM 8
+
+struct mlx5_fw_tracer {
+	struct mlx5_core_dev *dev;
+	bool owner;
+	u8   trc_ver;
+	struct workqueue_struct *work_queue;
+	struct work_struct ownership_change_work;
+
+	/* Strings DB */
+	struct {
+		u8 first_string_trace;
+		u8 num_string_trace;
+		u32 num_string_db;
+		u32 base_address_out[STRINGS_DB_SECTIONS_NUM];
+		u32 size_out[STRINGS_DB_SECTIONS_NUM];
+	} str_db;
+};
+
+enum mlx5_fw_tracer_ownership_state {
+	MLX5_FW_TRACER_RELEASE_OWNERSHIP,
+	MLX5_FW_TRACER_ACQUIRE_OWNERSHIP,
+};
+
+struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
+void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+
+#endif
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 957199c20a0f..86cb0ebf92fa 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -816,6 +816,8 @@ struct mlx5_clock {
 	struct mlx5_pps            pps_info;
 };
 
+struct mlx5_fw_tracer;
+
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
 	/* sync pci state */
@@ -860,6 +862,7 @@ struct mlx5_core_dev {
 	struct mlx5_clock        clock;
 	struct mlx5_ib_clock_info  *clock_info;
 	struct page             *clock_info_page;
+	struct mlx5_fw_tracer   *tracer;
 };
 
 struct mlx5_db {
-- 
cgit v1.2.3


From c71ad41ccb0c29fce95149b74786574b354c9dda Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Wed, 7 Feb 2018 11:08:56 +0200
Subject: net/mlx5: FW tracer, events handling

The tracer has one event, event 0x26, with two subtypes:
- Subtype 0: Ownership change
- Subtype 1: Traces available

An ownership change occurs in the following cases:
1- Owner releases his ownership, in this case, an event will be
sent to inform others to reattempt acquire ownership.
2- Ownership was taken by a higher priority tool, in this case
the owner should understand that it lost ownership, and go through
tear down flow.

The second subtype indicates that there are traces in the trace buffer,
in this case, the driver polls the tracer buffer for new traces, parse
them and prepares the messages for printing.

The HW starts tracing from the first address in the tracer buffer.
Driver receives an event notifying that new trace block exists.
HW posts a timestamp event at the last 8B of every 256B block.
Comparing the timestamp to the last handled timestamp would indicate
that this is a new trace block. Once the new timestamp is detected,
the entire block is considered valid.

Block validation and parsing, should be done after copying the current
block to a different location, in order to avoid block overwritten
during processing.

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c   | 268 ++++++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h   |  71 +++++-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  11 +
 include/linux/mlx5/device.h                        |   7 +
 4 files changed, 347 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index d6cc27b0ff34..bd887d1d3396 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -318,25 +318,244 @@ out:
 	return;
 }
 
-static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+static void mlx5_fw_tracer_arm(struct mlx5_core_dev *dev)
 {
-	struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
-						     ownership_change_work);
-	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
 	int err;
 
-	if (tracer->owner) {
-		mlx5_fw_tracer_ownership_release(tracer);
+	MLX5_SET(mtrc_ctrl, in, arm_event, 1);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTRC_CTRL, 0, 1);
+	if (err)
+		mlx5_core_warn(dev, "FWTracer: Failed to arm tracer event %d\n", err);
+}
+
+static void poll_trace(struct mlx5_fw_tracer *tracer,
+		       struct tracer_event *tracer_event, u64 *trace)
+{
+	u32 timestamp_low, timestamp_mid, timestamp_high, urts;
+
+	tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id);
+	tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost);
+
+	switch (tracer_event->event_id) {
+	case TRACER_EVENT_TYPE_TIMESTAMP:
+		tracer_event->type = TRACER_EVENT_TYPE_TIMESTAMP;
+		urts = MLX5_GET(tracer_timestamp_event, trace, urts);
+		if (tracer->trc_ver == 0)
+			tracer_event->timestamp_event.unreliable = !!(urts >> 2);
+		else
+			tracer_event->timestamp_event.unreliable = !!(urts & 1);
+
+		timestamp_low = MLX5_GET(tracer_timestamp_event,
+					 trace, timestamp7_0);
+		timestamp_mid = MLX5_GET(tracer_timestamp_event,
+					 trace, timestamp39_8);
+		timestamp_high = MLX5_GET(tracer_timestamp_event,
+					  trace, timestamp52_40);
+
+		tracer_event->timestamp_event.timestamp =
+				((u64)timestamp_high << 40) |
+				((u64)timestamp_mid << 8) |
+				(u64)timestamp_low;
+		break;
+	default:
+		if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
+		    tracer_event->event_id <= tracer->str_db.first_string_trace +
+					      tracer->str_db.num_string_trace) {
+			tracer_event->type = TRACER_EVENT_TYPE_STRING;
+			tracer_event->string_event.timestamp =
+				MLX5_GET(tracer_string_event, trace, timestamp);
+			tracer_event->string_event.string_param =
+				MLX5_GET(tracer_string_event, trace, string_param);
+			tracer_event->string_event.tmsn =
+				MLX5_GET(tracer_string_event, trace, tmsn);
+			tracer_event->string_event.tdsn =
+				MLX5_GET(tracer_string_event, trace, tdsn);
+		} else {
+			tracer_event->type = TRACER_EVENT_TYPE_UNRECOGNIZED;
+		}
+		break;
+	}
+}
+
+static u64 get_block_timestamp(struct mlx5_fw_tracer *tracer, u64 *ts_event)
+{
+	struct tracer_event tracer_event;
+	u8 event_id;
+
+	event_id = MLX5_GET(tracer_event, ts_event, event_id);
+
+	if (event_id == TRACER_EVENT_TYPE_TIMESTAMP)
+		poll_trace(tracer, &tracer_event, ts_event);
+	else
+		tracer_event.timestamp_event.timestamp = 0;
+
+	return tracer_event.timestamp_event.timestamp;
+}
+
+static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
+{
+	struct mlx5_fw_tracer *tracer =
+			container_of(work, struct mlx5_fw_tracer, handle_traces_work);
+	u64 block_timestamp, last_block_timestamp, tmp_trace_block[TRACES_PER_BLOCK];
+	u32 block_count, start_offset, prev_start_offset, prev_consumer_index;
+	u32 trace_event_size = MLX5_ST_SZ_BYTES(tracer_event);
+	struct tracer_event tracer_event;
+	struct mlx5_core_dev *dev;
+	int i;
+
+	if (!tracer->owner)
 		return;
+
+	dev = tracer->dev;
+	block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
+	start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+	/* Copy the block to local buffer to avoid HW override while being processed*/
+	memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+	       TRACER_BLOCK_SIZE_BYTE);
+
+	block_timestamp =
+		get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
+
+	while (block_timestamp > tracer->last_timestamp) {
+		/* Check block override if its not the first block */
+		if (!tracer->last_timestamp) {
+			u64 *ts_event;
+			/* To avoid block override be the HW in case of buffer
+			 * wraparound, the time stamp of the previous block
+			 * should be compared to the last timestamp handled
+			 * by the driver.
+			 */
+			prev_consumer_index =
+				(tracer->buff.consumer_index - 1) & (block_count - 1);
+			prev_start_offset = prev_consumer_index * TRACER_BLOCK_SIZE_BYTE;
+
+			ts_event = tracer->buff.log_buf + prev_start_offset +
+				   (TRACES_PER_BLOCK - 1) * trace_event_size;
+			last_block_timestamp = get_block_timestamp(tracer, ts_event);
+			/* If previous timestamp different from last stored
+			 * timestamp then there is a good chance that the
+			 * current buffer is overwritten and therefore should
+			 * not be parsed.
+			 */
+			if (tracer->last_timestamp != last_block_timestamp) {
+				mlx5_core_warn(dev, "FWTracer: Events were lost\n");
+				tracer->last_timestamp = block_timestamp;
+				tracer->buff.consumer_index =
+					(tracer->buff.consumer_index + 1) & (block_count - 1);
+				break;
+			}
+		}
+
+		/* Parse events */
+		for (i = 0; i < TRACES_PER_BLOCK ; i++)
+			poll_trace(tracer, &tracer_event, &tmp_trace_block[i]);
+
+		tracer->buff.consumer_index =
+			(tracer->buff.consumer_index + 1) & (block_count - 1);
+
+		tracer->last_timestamp = block_timestamp;
+		start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
+		memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
+		       TRACER_BLOCK_SIZE_BYTE);
+		block_timestamp = get_block_timestamp(tracer,
+						      &tmp_trace_block[TRACES_PER_BLOCK - 1]);
 	}
 
+	mlx5_fw_tracer_arm(dev);
+}
+
+static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtrc_conf)] = {0};
+	int err;
+
+	MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
+	MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
+		 ilog2(TRACER_BUFFER_PAGE_NUM));
+	MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTRC_CONF, 0, 1);
+	if (err)
+		mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
+
+	return err;
+}
+
+static int mlx5_fw_tracer_set_mtrc_ctrl(struct mlx5_fw_tracer *tracer, u8 status, u8 arm)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	u32 out[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+	u32 in[MLX5_ST_SZ_DW(mtrc_ctrl)] = {0};
+	int err;
+
+	MLX5_SET(mtrc_ctrl, in, modify_field_select, TRACE_STATUS);
+	MLX5_SET(mtrc_ctrl, in, trace_status, status);
+	MLX5_SET(mtrc_ctrl, in, arm_event, arm);
+
+	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTRC_CTRL, 0, 1);
+
+	if (!err && status)
+		tracer->last_timestamp = 0;
+
+	return err;
+}
+
+static int mlx5_fw_tracer_start(struct mlx5_fw_tracer *tracer)
+{
+	struct mlx5_core_dev *dev = tracer->dev;
+	int err;
+
 	err = mlx5_fw_tracer_ownership_acquire(tracer);
 	if (err) {
 		mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+		/* Don't fail since ownership can be acquired on a later FW event */
+		return 0;
+	}
+
+	err = mlx5_fw_tracer_set_mtrc_conf(tracer);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Failed to set tracer configuration %d\n", err);
+		goto release_ownership;
+	}
+
+	/* enable tracer & trace events */
+	err = mlx5_fw_tracer_set_mtrc_ctrl(tracer, 1, 1);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Failed to enable tracer %d\n", err);
+		goto release_ownership;
+	}
+
+	return 0;
+
+release_ownership:
+	mlx5_fw_tracer_ownership_release(tracer);
+	return err;
+}
+
+static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
+{
+	struct mlx5_fw_tracer *tracer =
+		container_of(work, struct mlx5_fw_tracer, ownership_change_work);
+
+	if (tracer->owner) {
+		tracer->owner = false;
+		tracer->buff.consumer_index = 0;
 		return;
 	}
+
+	mlx5_fw_tracer_start(tracer);
 }
 
+/* Create software resources (Buffers, etc ..) */
 struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fw_tracer *tracer = NULL;
@@ -361,6 +580,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 
 	INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change);
 	INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db);
+	INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces);
+
 
 	err = mlx5_query_mtrc_caps(tracer);
 	if (err) {
@@ -392,6 +613,9 @@ free_tracer:
 	return ERR_PTR(err);
 }
 
+/* Create HW resources + start tracer
+ * must be called before Async EQ is created
+ */
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 {
 	struct mlx5_core_dev *dev;
@@ -417,22 +641,25 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 		goto err_dealloc_pd;
 	}
 
-	err = mlx5_fw_tracer_ownership_acquire(tracer);
-	if (err) /* Don't fail since ownership can be acquired on a later FW event */
-		mlx5_core_dbg(dev, "FWTracer: Ownership was not granted %d\n", err);
+	mlx5_fw_tracer_start(tracer);
 
 	return 0;
+
 err_dealloc_pd:
 	mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
 	return err;
 }
 
+/* Stop tracer + Cleanup HW resources
+ * must be called after Async EQ is destroyed
+ */
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 {
 	if (IS_ERR_OR_NULL(tracer))
 		return;
 
 	cancel_work_sync(&tracer->ownership_change_work);
+	cancel_work_sync(&tracer->handle_traces_work);
 
 	if (tracer->owner)
 		mlx5_fw_tracer_ownership_release(tracer);
@@ -441,6 +668,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 	mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
 }
 
+/* Free software resources (Buffers, etc ..) */
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
 {
 	if (IS_ERR_OR_NULL(tracer))
@@ -454,4 +682,26 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
 	kfree(tracer);
 }
 
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+	struct mlx5_fw_tracer *tracer = dev->tracer;
+
+	if (!tracer)
+		return;
+
+	switch (eqe->sub_type) {
+	case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
+		if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state))
+			queue_work(tracer->work_queue, &tracer->ownership_change_work);
+		break;
+	case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
+		if (likely(tracer->str_db.loaded))
+			queue_work(tracer->work_queue, &tracer->handle_traces_work);
+		break;
+	default:
+		mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
+			      eqe->sub_type);
+	}
+}
+
 EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 66cb7e7ada28..3915e91486b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -43,6 +43,9 @@
 #define TRACER_BUFFER_CHUNK 4096
 #define TRACE_BUFFER_SIZE_BYTE (TRACER_BUFFER_PAGE_NUM * TRACER_BUFFER_CHUNK)
 
+#define TRACER_BLOCK_SIZE_BYTE 256
+#define TRACES_PER_BLOCK 32
+
 struct mlx5_fw_tracer {
 	struct mlx5_core_dev *dev;
 	bool owner;
@@ -69,8 +72,11 @@ struct mlx5_fw_tracer {
 		dma_addr_t dma;
 		u32 size;
 		struct mlx5_core_mkey mkey;
-
+		u32 consumer_index;
 	} buff;
+
+	u64 last_timestamp;
+	struct work_struct handle_traces_work;
 };
 
 enum mlx5_fw_tracer_ownership_state {
@@ -78,7 +84,70 @@ enum mlx5_fw_tracer_ownership_state {
 	MLX5_FW_TRACER_ACQUIRE_OWNERSHIP,
 };
 
+enum tracer_ctrl_fields_select {
+	TRACE_STATUS = 1 << 0,
+};
+
+enum tracer_event_type {
+	TRACER_EVENT_TYPE_STRING,
+	TRACER_EVENT_TYPE_TIMESTAMP = 0xFF,
+	TRACER_EVENT_TYPE_UNRECOGNIZED,
+};
+
+enum tracing_mode {
+	TRACE_TO_MEMORY = 1 << 0,
+};
+
+struct tracer_timestamp_event {
+	u64        timestamp;
+	u8         unreliable;
+};
+
+struct tracer_string_event {
+	u32        timestamp;
+	u32        tmsn;
+	u32        tdsn;
+	u32        string_param;
+};
+
+struct tracer_event {
+	bool      lost_event;
+	u32       type;
+	u8        event_id;
+	union {
+		struct tracer_string_event string_event;
+		struct tracer_timestamp_event timestamp_event;
+	};
+};
+
+struct mlx5_ifc_tracer_event_bits {
+	u8         lost[0x1];
+	u8         timestamp[0x7];
+	u8         event_id[0x8];
+	u8         event_data[0x30];
+};
+
+struct mlx5_ifc_tracer_string_event_bits {
+	u8         lost[0x1];
+	u8         timestamp[0x7];
+	u8         event_id[0x8];
+	u8         tmsn[0xd];
+	u8         tdsn[0x3];
+	u8         string_param[0x20];
+};
+
+struct mlx5_ifc_tracer_timestamp_event_bits {
+	u8         timestamp7_0[0x8];
+	u8         event_id[0x8];
+	u8         urts[0x3];
+	u8         timestamp52_40[0xd];
+	u8         timestamp39_8[0x20];
+};
+
 struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { return; }
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 406c23862f5f..7669b4380779 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -40,6 +40,7 @@
 #include "mlx5_core.h"
 #include "fpga/core.h"
 #include "eswitch.h"
+#include "diag/fw_tracer.h"
 
 enum {
 	MLX5_EQE_SIZE		= sizeof(struct mlx5_eqe),
@@ -168,6 +169,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
 	case MLX5_EVENT_TYPE_GENERAL_EVENT:
 		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
+	case MLX5_EVENT_TYPE_DEVICE_TRACER:
+		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
 	default:
 		return "Unrecognized event";
 	}
@@ -576,6 +579,11 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 		case MLX5_EVENT_TYPE_GENERAL_EVENT:
 			general_event_handler(dev, eqe);
 			break;
+
+		case MLX5_EVENT_TYPE_DEVICE_TRACER:
+			mlx5_fw_tracer_event(dev, eqe);
+			break;
+
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
 				       eqe->type, eq->eqn);
@@ -853,6 +861,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, temp_warn_event))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
 
+	if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0566c6a94805..d489494b0a84 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -332,6 +332,13 @@ enum mlx5_event {
 
 	MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
 	MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
+
+	MLX5_EVENT_TYPE_DEVICE_TRACER      = 0x26,
+};
+
+enum {
+	MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE = 0x0,
+	MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE = 0x1,
 };
 
 enum {
-- 
cgit v1.2.3


From 51bbf9bee34ff5d4006d266f24a54dc9c1669eb5 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 19 Jul 2018 17:27:43 -0500
Subject: PCI: hotplug: Demidlayer registration with the core

When a hotplug driver calls pci_hp_register(), all steps necessary for
registration are carried out in one go, including creation of a kobject
and addition to sysfs.  That's a problem for pciehp once it's converted
to enable/disable the slot exclusively from the IRQ thread:  The thread
needs to be spawned after creation of the kobject (because it uses the
kobject's name), but before addition to sysfs (because it will handle
enable/disable requests submitted via sysfs).

pci_hp_deregister() does offer a ->release callback that's invoked
after deletion from sysfs and before destruction of the kobject.  But
because pci_hp_register() doesn't offer a counterpart, hotplug drivers'
->probe and ->remove code becomes asymmetric, which is error prone
as recently discovered use-after-free bugs in pciehp's ->remove hook
have shown.

In a sense, this appears to be a case of the midlayer antipattern:

   "The core thesis of the "midlayer mistake" is that midlayers are
    bad and should not exist.  That common functionality which it is
    so tempting to put in a midlayer should instead be provided as
    library routines which can [be] used, augmented, or ignored by
    each bottom level driver independently.  Thus every subsystem
    that supports multiple implementations (or drivers) should
    provide a very thin top layer which calls directly into the
    bottom layer drivers, and a rich library of support code that
    eases the implementation of those drivers.  This library is
    available to, but not forced upon, those drivers."
        --  Neil Brown (2009), https://lwn.net/Articles/336262/

The presence of midlayer traits in the PCI hotplug core might be ascribed
to its age:  When it was introduced in February 2002, the blessings of a
library approach might not have been well known:
https://git.kernel.org/tglx/history/c/a8a2069f432c

For comparison, the driver core does offer split functions for creating
a kobject (device_initialize()) and addition to sysfs (device_add()) as
an alternative to carrying out everything at once (device_register()).
This was introduced in October 2002:
https://git.kernel.org/tglx/history/c/8b290eb19962

The odd ->release callback in the PCI hotplug core was added in 2003:
https://git.kernel.org/tglx/history/c/69f8d663b595

Clearly, a library approach would not force every hotplug driver to
implement a ->release callback, but rather allow the driver to remove
the sysfs files, release its data structures and finally destroy the
kobject.  Alternatively, a driver may choose to remove everything with
pci_hp_deregister(), then release its data structures.

To this end, offer drivers pci_hp_initialize() and pci_hp_add() as a
split-up version of pci_hp_register().  Likewise, offer pci_hp_del()
and pci_hp_destroy() as a split-up version of pci_hp_deregister().

Eliminate the ->release callback and move its code into each driver's
teardown routine.

Declare pci_hp_deregister() void, in keeping with the usual kernel
pattern that enablement can fail, but disablement cannot.  It only
returned an error if the caller passed in a NULL pointer or a slot which
has never or is no longer registered or is sharing its name with another
slot.  Those would be bugs, so WARN about them.  Few hotplug drivers
actually checked the return value and those that did only printed a
useless error message to dmesg.  Remove that.

For most drivers the conversion was straightforward since it doesn't
matter whether the code in the ->release callback is executed before or
after destruction of the kobject.  But in the case of ibmphp, it was
unclear to me whether setting slot_cur->ctrl and slot_cur->bus_on to
NULL needs to happen before the kobject is destroyed, so I erred on
the side of caution and ensured that the order stays the same.  Another
nontrivial case is pnv_php, I've found the list and kref logic difficult
to understand, however my impression was that it is safe to delete the
list element and drop the references until after the kobject is
destroyed.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>  # drivers/platform/x86
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Scott Murray <scott@spiteful.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Gavin Shan <gwshan@linux.vnet.ibm.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Corentin Chary <corentin.chary@gmail.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Andy Shevchenko <andy@infradead.org>
---
 drivers/pci/hotplug/acpiphp_core.c      |  22 +----
 drivers/pci/hotplug/cpci_hotplug_core.c |  14 +---
 drivers/pci/hotplug/cpqphp_core.c       |  16 +---
 drivers/pci/hotplug/ibmphp_core.c       |  15 +++-
 drivers/pci/hotplug/ibmphp_ebda.c       |  20 -----
 drivers/pci/hotplug/pci_hotplug_core.c  | 139 +++++++++++++++++++++++---------
 drivers/pci/hotplug/pciehp_core.c       |  19 ++---
 drivers/pci/hotplug/pnv_php.c           |   5 +-
 drivers/pci/hotplug/rpaphp_core.c       |   2 +-
 drivers/pci/hotplug/rpaphp_slot.c       |  13 +--
 drivers/pci/hotplug/s390_pci_hpc.c      |  13 +--
 drivers/pci/hotplug/sgi_hotplug.c       |   9 ++-
 drivers/pci/hotplug/shpchp_core.c       |  20 +----
 drivers/platform/x86/asus-wmi.c         |  12 +--
 drivers/platform/x86/eeepc-laptop.c     |  12 +--
 include/linux/pci_hotplug.h             |  15 +++-
 16 files changed, 168 insertions(+), 178 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index 12b5655fd390..ad32ffbc4b91 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -254,20 +254,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-/**
- * release_slot - free up the memory used by a slot
- * @hotplug_slot: slot to free
- */
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
-
-	kfree(slot->hotplug_slot);
-	kfree(slot);
-}
-
 /* callback routine to initialize 'struct slot' for each slot */
 int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 				  unsigned int sun)
@@ -287,7 +273,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 	slot->hotplug_slot->info = &slot->info;
 
 	slot->hotplug_slot->private = slot;
-	slot->hotplug_slot->release = &release_slot;
 	slot->hotplug_slot->ops = &acpi_hotplug_slot_ops;
 
 	slot->acpi_slot = acpiphp_slot;
@@ -324,13 +309,12 @@ error:
 void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
 {
 	struct slot *slot = acpiphp_slot->slot;
-	int retval = 0;
 
 	pr_info("Slot [%s] unregistered\n", slot_name(slot));
 
-	retval = pci_hp_deregister(slot->hotplug_slot);
-	if (retval)
-		pr_err("pci_hp_deregister failed with error %d\n", retval);
+	pci_hp_deregister(slot->hotplug_slot);
+	kfree(slot->hotplug_slot);
+	kfree(slot);
 }
 
 
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index 07b533adc9df..52a339baf06c 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -195,10 +195,8 @@ get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-static void release_slot(struct hotplug_slot *hotplug_slot)
+static void release_slot(struct slot *slot)
 {
-	struct slot *slot = hotplug_slot->private;
-
 	kfree(slot->hotplug_slot->info);
 	kfree(slot->hotplug_slot);
 	pci_dev_put(slot->dev);
@@ -253,7 +251,6 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 		snprintf(name, SLOT_NAME_SIZE, "%02x:%02x", bus->number, i);
 
 		hotplug_slot->private = slot;
-		hotplug_slot->release = &release_slot;
 		hotplug_slot->ops = &cpci_hotplug_slot_ops;
 
 		/*
@@ -308,12 +305,8 @@ cpci_hp_unregister_bus(struct pci_bus *bus)
 			slots--;
 
 			dbg("deregistering slot %s", slot_name(slot));
-			status = pci_hp_deregister(slot->hotplug_slot);
-			if (status) {
-				err("pci_hp_deregister failed with error %d",
-				    status);
-				break;
-			}
+			pci_hp_deregister(slot->hotplug_slot);
+			release_slot(slot);
 		}
 	}
 	up_write(&list_rwsem);
@@ -623,6 +616,7 @@ cleanup_slots(void)
 	list_for_each_entry_safe(slot, tmp, &slot_list, slot_list) {
 		list_del(&slot->slot_list);
 		pci_hp_deregister(slot->hotplug_slot);
+		release_slot(slot);
 	}
 cleanup_null:
 	up_write(&list_rwsem);
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 1797e36ec586..5a06636e910a 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -266,17 +266,6 @@ static void __iomem *get_SMBIOS_entry(void __iomem *smbios_start,
 	return previous;
 }
 
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
-
-	kfree(slot->hotplug_slot->info);
-	kfree(slot->hotplug_slot);
-	kfree(slot);
-}
-
 static int ctrl_slot_cleanup(struct controller *ctrl)
 {
 	struct slot *old_slot, *next_slot;
@@ -285,9 +274,11 @@ static int ctrl_slot_cleanup(struct controller *ctrl)
 	ctrl->slot = NULL;
 
 	while (old_slot) {
-		/* memory will be freed by the release_slot callback */
 		next_slot = old_slot->next;
 		pci_hp_deregister(old_slot->hotplug_slot);
+		kfree(old_slot->hotplug_slot->info);
+		kfree(old_slot->hotplug_slot);
+		kfree(old_slot);
 		old_slot = next_slot;
 	}
 
@@ -678,7 +669,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 			((read_slot_enable(ctrl) << 2) >> ctrl_slot) & 0x04;
 
 		/* register this slot with the hotplug pci core */
-		hotplug_slot->release = &release_slot;
 		hotplug_slot->private = slot;
 		snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
 		hotplug_slot->ops = &cpqphp_hotplug_slot_ops;
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 1869b0411ce0..4ea57e9019f1 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -673,7 +673,20 @@ static void free_slots(void)
 
 	list_for_each_entry_safe(slot_cur, next, &ibmphp_slot_head,
 				 ibm_slot_list) {
-		pci_hp_deregister(slot_cur->hotplug_slot);
+		pci_hp_del(slot_cur->hotplug_slot);
+		slot_cur->ctrl = NULL;
+		slot_cur->bus_on = NULL;
+
+		/*
+		 * We don't want to actually remove the resources,
+		 * since ibmphp_free_resources() will do just that.
+		 */
+		ibmphp_unconfigure_card(&slot_cur, -1);
+
+		pci_hp_destroy(slot_cur->hotplug_slot);
+		kfree(slot_cur->hotplug_slot->info);
+		kfree(slot_cur->hotplug_slot);
+		kfree(slot_cur);
 	}
 	debug("%s -- exit\n", __func__);
 }
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 64549aa24c0f..6f8e90e3ec08 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -699,25 +699,6 @@ static int fillslotinfo(struct hotplug_slot *hotplug_slot)
 	return rc;
 }
 
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot;
-
-	if (!hotplug_slot || !hotplug_slot->private)
-		return;
-
-	slot = hotplug_slot->private;
-	kfree(slot->hotplug_slot->info);
-	kfree(slot->hotplug_slot);
-	slot->ctrl = NULL;
-	slot->bus_on = NULL;
-
-	/* we don't want to actually remove the resources, since free_resources will do just that */
-	ibmphp_unconfigure_card(&slot, -1);
-
-	kfree(slot);
-}
-
 static struct pci_driver ibmphp_driver;
 
 /*
@@ -941,7 +922,6 @@ static int __init ebda_rsrc_controller(void)
 			tmp_slot->hotplug_slot = hp_slot_ptr;
 
 			hp_slot_ptr->private = tmp_slot;
-			hp_slot_ptr->release = release_slot;
 
 			rc = fillslotinfo(hp_slot_ptr);
 			if (rc)
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index fd93783a87b0..90fde5f106d8 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -396,8 +396,9 @@ static struct hotplug_slot *get_slot_from_name(const char *name)
  * @owner: caller module owner
  * @mod_name: caller module name
  *
- * Registers a hotplug slot with the pci hotplug subsystem, which will allow
- * userspace interaction to the slot.
+ * Prepares a hotplug slot for in-kernel use and immediately publishes it to
+ * user space in one go.  Drivers may alternatively carry out the two steps
+ * separately by invoking pci_hp_initialize() and pci_hp_add().
  *
  * Returns 0 if successful, anything else for an error.
  */
@@ -406,54 +407,91 @@ int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *bus,
 		      struct module *owner, const char *mod_name)
 {
 	int result;
+
+	result = __pci_hp_initialize(slot, bus, devnr, name, owner, mod_name);
+	if (result)
+		return result;
+
+	result = pci_hp_add(slot);
+	if (result)
+		pci_hp_destroy(slot);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(__pci_hp_register);
+
+/**
+ * __pci_hp_initialize - prepare hotplug slot for in-kernel use
+ * @slot: pointer to the &struct hotplug_slot to initialize
+ * @bus: bus this slot is on
+ * @devnr: slot number
+ * @name: name registered with kobject core
+ * @owner: caller module owner
+ * @mod_name: caller module name
+ *
+ * Allocate and fill in a PCI slot for use by a hotplug driver.  Once this has
+ * been called, the driver may invoke hotplug_slot_name() to get the slot's
+ * unique name.  The driver must be prepared to handle a ->reset_slot callback
+ * from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+int __pci_hp_initialize(struct hotplug_slot *slot, struct pci_bus *bus,
+			int devnr, const char *name, struct module *owner,
+			const char *mod_name)
+{
 	struct pci_slot *pci_slot;
 
 	if (slot == NULL)
 		return -ENODEV;
 	if ((slot->info == NULL) || (slot->ops == NULL))
 		return -EINVAL;
-	if (slot->release == NULL) {
-		dbg("Why are you trying to register a hotplug slot without a proper release function?\n");
-		return -EINVAL;
-	}
 
 	slot->ops->owner = owner;
 	slot->ops->mod_name = mod_name;
 
-	mutex_lock(&pci_hp_mutex);
 	/*
 	 * No problems if we call this interface from both ACPI_PCI_SLOT
 	 * driver and call it here again. If we've already created the
 	 * pci_slot, the interface will simply bump the refcount.
 	 */
 	pci_slot = pci_create_slot(bus, devnr, name, slot);
-	if (IS_ERR(pci_slot)) {
-		result = PTR_ERR(pci_slot);
-		goto out;
-	}
+	if (IS_ERR(pci_slot))
+		return PTR_ERR(pci_slot);
 
 	slot->pci_slot = pci_slot;
 	pci_slot->hotplug = slot;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__pci_hp_initialize);
 
-	list_add(&slot->slot_list, &pci_hotplug_slot_list);
+/**
+ * pci_hp_add - publish hotplug slot to user space
+ * @slot: pointer to the &struct hotplug_slot to publish
+ *
+ * Make a hotplug slot's sysfs interface available and inform user space of its
+ * addition by sending a uevent.  The hotplug driver must be prepared to handle
+ * all &struct hotplug_slot_ops callbacks from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+int pci_hp_add(struct hotplug_slot *slot)
+{
+	struct pci_slot *pci_slot = slot->pci_slot;
+	int result;
 
 	result = fs_add_slot(pci_slot);
 	if (result)
-		goto err_list_del;
+		return result;
 
 	kobject_uevent(&pci_slot->kobj, KOBJ_ADD);
-	dbg("Added slot %s to the list\n", name);
-	goto out;
-
-err_list_del:
-	list_del(&slot->slot_list);
-	pci_slot->hotplug = NULL;
-	pci_destroy_slot(pci_slot);
-out:
+	mutex_lock(&pci_hp_mutex);
+	list_add(&slot->slot_list, &pci_hotplug_slot_list);
 	mutex_unlock(&pci_hp_mutex);
-	return result;
+	dbg("Added slot %s to the list\n", hotplug_slot_name(slot));
+	return 0;
 }
-EXPORT_SYMBOL_GPL(__pci_hp_register);
+EXPORT_SYMBOL_GPL(pci_hp_add);
 
 /**
  * pci_hp_deregister - deregister a hotplug_slot with the PCI hotplug subsystem
@@ -464,35 +502,62 @@ EXPORT_SYMBOL_GPL(__pci_hp_register);
  *
  * Returns 0 if successful, anything else for an error.
  */
-int pci_hp_deregister(struct hotplug_slot *slot)
+void pci_hp_deregister(struct hotplug_slot *slot)
+{
+	pci_hp_del(slot);
+	pci_hp_destroy(slot);
+}
+EXPORT_SYMBOL_GPL(pci_hp_deregister);
+
+/**
+ * pci_hp_del - unpublish hotplug slot from user space
+ * @slot: pointer to the &struct hotplug_slot to unpublish
+ *
+ * Remove a hotplug slot's sysfs interface.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+void pci_hp_del(struct hotplug_slot *slot)
 {
 	struct hotplug_slot *temp;
-	struct pci_slot *pci_slot;
 
-	if (!slot)
-		return -ENODEV;
+	if (WARN_ON(!slot))
+		return;
 
 	mutex_lock(&pci_hp_mutex);
 	temp = get_slot_from_name(hotplug_slot_name(slot));
-	if (temp != slot) {
+	if (WARN_ON(temp != slot)) {
 		mutex_unlock(&pci_hp_mutex);
-		return -ENODEV;
+		return;
 	}
 
 	list_del(&slot->slot_list);
-
-	pci_slot = slot->pci_slot;
-	fs_remove_slot(pci_slot);
+	mutex_unlock(&pci_hp_mutex);
 	dbg("Removed slot %s from the list\n", hotplug_slot_name(slot));
+	fs_remove_slot(slot->pci_slot);
+}
+EXPORT_SYMBOL_GPL(pci_hp_del);
+
+/**
+ * pci_hp_destroy - remove hotplug slot from in-kernel use
+ * @slot: pointer to the &struct hotplug_slot to destroy
+ *
+ * Destroy a PCI slot used by a hotplug driver.  Once this has been called,
+ * the driver may no longer invoke hotplug_slot_name() to get the slot's
+ * unique name.  The driver no longer needs to handle a ->reset_slot callback
+ * from this point on.
+ *
+ * Returns 0 on success or a negative int on error.
+ */
+void pci_hp_destroy(struct hotplug_slot *slot)
+{
+	struct pci_slot *pci_slot = slot->pci_slot;
 
-	slot->release(slot);
+	slot->pci_slot = NULL;
 	pci_slot->hotplug = NULL;
 	pci_destroy_slot(pci_slot);
-	mutex_unlock(&pci_hp_mutex);
-
-	return 0;
 }
-EXPORT_SYMBOL_GPL(pci_hp_deregister);
+EXPORT_SYMBOL_GPL(pci_hp_destroy);
 
 /**
  * pci_hp_change_slot_info - changes the slot's information structure in the core
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index b360645377c2..37d8f81e548f 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -56,17 +56,6 @@ static int get_latch_status(struct hotplug_slot *slot, u8 *value);
 static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
 static int reset_slot(struct hotplug_slot *slot, int probe);
 
-/**
- * release_slot - free up the memory used by a slot
- * @hotplug_slot: slot to free
- */
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	kfree(hotplug_slot->ops);
-	kfree(hotplug_slot->info);
-	kfree(hotplug_slot);
-}
-
 static int init_slot(struct controller *ctrl)
 {
 	struct slot *slot = ctrl->slot;
@@ -107,7 +96,6 @@ static int init_slot(struct controller *ctrl)
 	/* register this slot with the hotplug pci core */
 	hotplug->info = info;
 	hotplug->private = slot;
-	hotplug->release = &release_slot;
 	hotplug->ops = ops;
 	slot->hotplug_slot = hotplug;
 	snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
@@ -127,7 +115,12 @@ out:
 
 static void cleanup_slot(struct controller *ctrl)
 {
-	pci_hp_deregister(ctrl->slot->hotplug_slot);
+	struct hotplug_slot *hotplug_slot = ctrl->slot->hotplug_slot;
+
+	pci_hp_deregister(hotplug_slot);
+	kfree(hotplug_slot->ops);
+	kfree(hotplug_slot->info);
+	kfree(hotplug_slot);
 }
 
 /*
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 6c2e8d7307c6..3276a5e4c430 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -538,9 +538,8 @@ static struct hotplug_slot_ops php_slot_ops = {
 	.disable_slot		= pnv_php_disable_slot,
 };
 
-static void pnv_php_release(struct hotplug_slot *slot)
+static void pnv_php_release(struct pnv_php_slot *php_slot)
 {
-	struct pnv_php_slot *php_slot = slot->private;
 	unsigned long flags;
 
 	/* Remove from global or child list */
@@ -596,7 +595,6 @@ static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
 	php_slot->power_state_check     = false;
 	php_slot->slot.ops              = &php_slot_ops;
 	php_slot->slot.info             = &php_slot->slot_info;
-	php_slot->slot.release          = pnv_php_release;
 	php_slot->slot.private          = php_slot;
 
 	INIT_LIST_HEAD(&php_slot->children);
@@ -924,6 +922,7 @@ static void pnv_php_unregister_one(struct device_node *dn)
 
 	php_slot->state = PNV_PHP_STATE_OFFLINE;
 	pci_hp_deregister(&php_slot->slot);
+	pnv_php_release(php_slot);
 	pnv_php_put_slot(php_slot);
 }
 
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index fb5e0845429d..857c358b727b 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -404,13 +404,13 @@ static void __exit cleanup_slots(void)
 	/*
 	 * Unregister all of our slots with the pci_hotplug subsystem,
 	 * and free up all memory that we had allocated.
-	 * memory will be freed in release_slot callback.
 	 */
 
 	list_for_each_entry_safe(slot, next, &rpaphp_slot_head,
 				 rpaphp_slot_list) {
 		list_del(&slot->rpaphp_slot_list);
 		pci_hp_deregister(slot->hotplug_slot);
+		dealloc_slot_struct(slot);
 	}
 	return;
 }
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
index 3840a2075e6a..b916c8e4372d 100644
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -19,12 +19,6 @@
 #include "rpaphp.h"
 
 /* free up the memory used by a slot */
-static void rpaphp_release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = (struct slot *) hotplug_slot->private;
-	dealloc_slot_struct(slot);
-}
-
 void dealloc_slot_struct(struct slot *slot)
 {
 	kfree(slot->hotplug_slot->info);
@@ -56,7 +50,6 @@ struct slot *alloc_slot_struct(struct device_node *dn,
 	slot->power_domain = power_domain;
 	slot->hotplug_slot->private = slot;
 	slot->hotplug_slot->ops = &rpaphp_hotplug_slot_ops;
-	slot->hotplug_slot->release = &rpaphp_release_slot;
 
 	return (slot);
 
@@ -90,10 +83,8 @@ int rpaphp_deregister_slot(struct slot *slot)
 		__func__, slot->name);
 
 	list_del(&slot->rpaphp_slot_list);
-
-	retval = pci_hp_deregister(php_slot);
-	if (retval)
-		err("Problem unregistering a slot %s\n", slot->name);
+	pci_hp_deregister(php_slot);
+	dealloc_slot_struct(slot);
 
 	dbg("%s - Exit: rc[%d]\n", __func__, retval);
 	return retval;
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index ffdc2977395d..93b5341d282c 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -130,15 +130,6 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	kfree(slot->hotplug_slot->info);
-	kfree(slot->hotplug_slot);
-	kfree(slot);
-}
-
 static struct hotplug_slot_ops s390_hotplug_slot_ops = {
 	.enable_slot =		enable_slot,
 	.disable_slot =		disable_slot,
@@ -175,7 +166,6 @@ int zpci_init_slot(struct zpci_dev *zdev)
 	hotplug_slot->info = info;
 
 	hotplug_slot->ops = &s390_hotplug_slot_ops;
-	hotplug_slot->release = &release_slot;
 
 	get_power_status(hotplug_slot, &info->power_status);
 	get_adapter_status(hotplug_slot, &info->adapter_status);
@@ -209,5 +199,8 @@ void zpci_exit_slot(struct zpci_dev *zdev)
 			continue;
 		list_del(&slot->slot_list);
 		pci_hp_deregister(slot->hotplug_slot);
+		kfree(slot->hotplug_slot->info);
+		kfree(slot->hotplug_slot);
+		kfree(slot);
 	}
 }
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index 78b6bdbb3a39..babd23409f61 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -628,7 +628,6 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
 			goto alloc_err;
 		}
 		bss_hotplug_slot->ops = &sn_hotplug_slot_ops;
-		bss_hotplug_slot->release = &sn_release_slot;
 
 		rc = pci_hp_register(bss_hotplug_slot, pci_bus, device, name);
 		if (rc)
@@ -656,8 +655,10 @@ alloc_err:
 		sn_release_slot(bss_hotplug_slot);
 
 	/* destroy anything else on the list */
-	while ((bss_hotplug_slot = sn_hp_destroy()))
+	while ((bss_hotplug_slot = sn_hp_destroy())) {
 		pci_hp_deregister(bss_hotplug_slot);
+		sn_release_slot(bss_hotplug_slot);
+	}
 
 	return rc;
 }
@@ -703,8 +704,10 @@ static void __exit sn_pci_hotplug_exit(void)
 {
 	struct hotplug_slot *bss_hotplug_slot;
 
-	while ((bss_hotplug_slot = sn_hp_destroy()))
+	while ((bss_hotplug_slot = sn_hp_destroy())) {
 		pci_hp_deregister(bss_hotplug_slot);
+		sn_release_slot(bss_hotplug_slot);
+	}
 
 	if (!list_empty(&sn_hp_list))
 		printk(KERN_ERR "%s: internal list is not empty\n", __FILE__);
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 8e3c6ce12f31..97cee23f3d51 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -61,22 +61,6 @@ static struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
 	.get_adapter_status =	get_adapter_status,
 };
 
-/**
- * release_slot - free up the memory used by a slot
- * @hotplug_slot: slot to free
- */
-static void release_slot(struct hotplug_slot *hotplug_slot)
-{
-	struct slot *slot = hotplug_slot->private;
-
-	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
-		 __func__, slot_name(slot));
-
-	kfree(slot->hotplug_slot->info);
-	kfree(slot->hotplug_slot);
-	kfree(slot);
-}
-
 static int init_slots(struct controller *ctrl)
 {
 	struct slot *slot;
@@ -125,7 +109,6 @@ static int init_slots(struct controller *ctrl)
 
 		/* register this slot with the hotplug pci core */
 		hotplug_slot->private = slot;
-		hotplug_slot->release = &release_slot;
 		snprintf(name, SLOT_NAME_SIZE, "%d", slot->number);
 		hotplug_slot->ops = &shpchp_hotplug_slot_ops;
 
@@ -171,6 +154,9 @@ void cleanup_slots(struct controller *ctrl)
 		cancel_delayed_work(&slot->work);
 		destroy_workqueue(slot->wq);
 		pci_hp_deregister(slot->hotplug_slot);
+		kfree(slot->hotplug_slot->info);
+		kfree(slot->hotplug_slot);
+		kfree(slot);
 	}
 }
 
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 3d523ca64694..d67f32a29bb4 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -858,12 +858,6 @@ static int asus_get_adapter_status(struct hotplug_slot *hotplug_slot,
 	return 0;
 }
 
-static void asus_cleanup_pci_hotplug(struct hotplug_slot *hotplug_slot)
-{
-	kfree(hotplug_slot->info);
-	kfree(hotplug_slot);
-}
-
 static struct hotplug_slot_ops asus_hotplug_slot_ops = {
 	.owner = THIS_MODULE,
 	.get_adapter_status = asus_get_adapter_status,
@@ -905,7 +899,6 @@ static int asus_setup_pci_hotplug(struct asus_wmi *asus)
 		goto error_info;
 
 	asus->hotplug_slot->private = asus;
-	asus->hotplug_slot->release = &asus_cleanup_pci_hotplug;
 	asus->hotplug_slot->ops = &asus_hotplug_slot_ops;
 	asus_get_adapter_status(asus->hotplug_slot,
 				&asus->hotplug_slot->info->adapter_status);
@@ -1051,8 +1044,11 @@ static void asus_wmi_rfkill_exit(struct asus_wmi *asus)
 	 * asus_unregister_rfkill_notifier()
 	 */
 	asus_rfkill_hotplug(asus);
-	if (asus->hotplug_slot)
+	if (asus->hotplug_slot) {
 		pci_hp_deregister(asus->hotplug_slot);
+		kfree(asus->hotplug_slot->info);
+		kfree(asus->hotplug_slot);
+	}
 	if (asus->hotplug_workqueue)
 		destroy_workqueue(asus->hotplug_workqueue);
 
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 4c38904a8a32..a4bbf6ecd1f0 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -726,12 +726,6 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot,
 	return 0;
 }
 
-static void eeepc_cleanup_pci_hotplug(struct hotplug_slot *hotplug_slot)
-{
-	kfree(hotplug_slot->info);
-	kfree(hotplug_slot);
-}
-
 static struct hotplug_slot_ops eeepc_hotplug_slot_ops = {
 	.owner = THIS_MODULE,
 	.get_adapter_status = eeepc_get_adapter_status,
@@ -758,7 +752,6 @@ static int eeepc_setup_pci_hotplug(struct eeepc_laptop *eeepc)
 		goto error_info;
 
 	eeepc->hotplug_slot->private = eeepc;
-	eeepc->hotplug_slot->release = &eeepc_cleanup_pci_hotplug;
 	eeepc->hotplug_slot->ops = &eeepc_hotplug_slot_ops;
 	eeepc_get_adapter_status(eeepc->hotplug_slot,
 				 &eeepc->hotplug_slot->info->adapter_status);
@@ -837,8 +830,11 @@ static void eeepc_rfkill_exit(struct eeepc_laptop *eeepc)
 		eeepc->wlan_rfkill = NULL;
 	}
 
-	if (eeepc->hotplug_slot)
+	if (eeepc->hotplug_slot) {
 		pci_hp_deregister(eeepc->hotplug_slot);
+		kfree(eeepc->hotplug_slot->info);
+		kfree(eeepc->hotplug_slot);
+	}
 
 	if (eeepc->bluetooth_rfkill) {
 		rfkill_unregister(eeepc->bluetooth_rfkill);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index cf5e22103f68..a6d6650a0490 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -80,15 +80,12 @@ struct hotplug_slot_info {
  * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot
  * @info: pointer to the &struct hotplug_slot_info for the initial values for
  * this slot.
- * @release: called during pci_hp_deregister to free memory allocated in a
- * hotplug_slot structure.
  * @private: used by the hotplug pci controller driver to store whatever it
  * needs.
  */
 struct hotplug_slot {
 	struct hotplug_slot_ops		*ops;
 	struct hotplug_slot_info	*info;
-	void (*release) (struct hotplug_slot *slot);
 	void				*private;
 
 	/* Variables below this are for use only by the hotplug pci core. */
@@ -104,13 +101,23 @@ static inline const char *hotplug_slot_name(const struct hotplug_slot *slot)
 int __pci_hp_register(struct hotplug_slot *slot, struct pci_bus *pbus, int nr,
 		      const char *name, struct module *owner,
 		      const char *mod_name);
-int pci_hp_deregister(struct hotplug_slot *slot);
+int __pci_hp_initialize(struct hotplug_slot *slot, struct pci_bus *bus, int nr,
+			const char *name, struct module *owner,
+			const char *mod_name);
+int pci_hp_add(struct hotplug_slot *slot);
+
+void pci_hp_del(struct hotplug_slot *slot);
+void pci_hp_destroy(struct hotplug_slot *slot);
+void pci_hp_deregister(struct hotplug_slot *slot);
+
 int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot,
 					 struct hotplug_slot_info *info);
 
 /* use a define to avoid include chaining to get THIS_MODULE & friends */
 #define pci_hp_register(slot, pbus, devnr, name) \
 	__pci_hp_register(slot, pbus, devnr, name, THIS_MODULE, KBUILD_MODNAME)
+#define pci_hp_initialize(slot, bus, nr, name) \
+	__pci_hp_initialize(slot, bus, nr, name, THIS_MODULE, KBUILD_MODNAME)
 
 /* PCI Setting Record (Type 0) */
 struct hpp_type0 {
-- 
cgit v1.2.3


From f71e0ca4db187af7c44987e9d21e9042c3046070 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:05 +0200
Subject: net: sched: Avoid implicit chain 0 creation

Currently, chain 0 is implicitly created during block creation. However
that does not align with chain object exposure, creation and destruction
api introduced later on. So make the chain 0 behave the same way as any
other chain and only create it when it is needed. Since chain 0 is
somehow special as the qdiscs need to hold pointer to the first chain
tp, this requires to move the chain head change callback infra to the
block structure.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  5 ++-
 net/sched/cls_api.c       | 86 +++++++++++++++++++++--------------------------
 2 files changed, 43 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7432100027b7..86f4651784e8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -300,7 +300,6 @@ typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
 
 struct tcf_chain {
 	struct tcf_proto __rcu *filter_chain;
-	struct list_head filter_chain_list;
 	struct list_head list;
 	struct tcf_block *block;
 	u32 index; /* chain index */
@@ -318,6 +317,10 @@ struct tcf_block {
 	bool keep_dst;
 	unsigned int offloadcnt; /* Number of oddloaded filters */
 	unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
+	struct {
+		struct tcf_chain *chain;
+		struct list_head filter_chain_list;
+	} chain0;
 };
 
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1e8d69790d82..eb0bf9037ef9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -204,11 +204,12 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 	if (!chain)
 		return NULL;
-	INIT_LIST_HEAD(&chain->filter_chain_list);
 	list_add_tail(&chain->list, &block->chain_list);
 	chain->block = block;
 	chain->index = chain_index;
 	chain->refcnt = 1;
+	if (!chain->index)
+		block->chain0.chain = chain;
 	return chain;
 }
 
@@ -218,12 +219,16 @@ static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
 	if (item->chain_head_change)
 		item->chain_head_change(tp_head, item->chain_head_change_priv);
 }
-static void tcf_chain_head_change(struct tcf_chain *chain,
-				  struct tcf_proto *tp_head)
+
+static void tcf_chain0_head_change(struct tcf_chain *chain,
+				   struct tcf_proto *tp_head)
 {
 	struct tcf_filter_chain_list_item *item;
+	struct tcf_block *block = chain->block;
 
-	list_for_each_entry(item, &chain->filter_chain_list, list)
+	if (chain->index)
+		return;
+	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
 		tcf_chain_head_change_item(item, tp_head);
 }
 
@@ -231,7 +236,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
 {
 	struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
 
-	tcf_chain_head_change(chain, NULL);
+	tcf_chain0_head_change(chain, NULL);
 	while (tp) {
 		RCU_INIT_POINTER(chain->filter_chain, tp->next);
 		tcf_proto_destroy(tp, NULL);
@@ -245,8 +250,10 @@ static void tcf_chain_destroy(struct tcf_chain *chain)
 	struct tcf_block *block = chain->block;
 
 	list_del(&chain->list);
+	if (!chain->index)
+		block->chain0.chain = NULL;
 	kfree(chain);
-	if (list_empty(&block->chain_list))
+	if (list_empty(&block->chain_list) && block->refcnt == 0)
 		kfree(block);
 }
 
@@ -346,10 +353,11 @@ no_offload_dev_dec:
 }
 
 static int
-tcf_chain_head_change_cb_add(struct tcf_chain *chain,
-			     struct tcf_block_ext_info *ei,
-			     struct netlink_ext_ack *extack)
+tcf_chain0_head_change_cb_add(struct tcf_block *block,
+			      struct tcf_block_ext_info *ei,
+			      struct netlink_ext_ack *extack)
 {
+	struct tcf_chain *chain0 = block->chain0.chain;
 	struct tcf_filter_chain_list_item *item;
 
 	item = kmalloc(sizeof(*item), GFP_KERNEL);
@@ -359,23 +367,25 @@ tcf_chain_head_change_cb_add(struct tcf_chain *chain,
 	}
 	item->chain_head_change = ei->chain_head_change;
 	item->chain_head_change_priv = ei->chain_head_change_priv;
-	if (chain->filter_chain)
-		tcf_chain_head_change_item(item, chain->filter_chain);
-	list_add(&item->list, &chain->filter_chain_list);
+	if (chain0 && chain0->filter_chain)
+		tcf_chain_head_change_item(item, chain0->filter_chain);
+	list_add(&item->list, &block->chain0.filter_chain_list);
 	return 0;
 }
 
 static void
-tcf_chain_head_change_cb_del(struct tcf_chain *chain,
-			     struct tcf_block_ext_info *ei)
+tcf_chain0_head_change_cb_del(struct tcf_block *block,
+			      struct tcf_block_ext_info *ei)
 {
+	struct tcf_chain *chain0 = block->chain0.chain;
 	struct tcf_filter_chain_list_item *item;
 
-	list_for_each_entry(item, &chain->filter_chain_list, list) {
+	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
 		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
 		    (item->chain_head_change == ei->chain_head_change &&
 		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
-			tcf_chain_head_change_item(item, NULL);
+			if (chain0)
+				tcf_chain_head_change_item(item, NULL);
 			list_del(&item->list);
 			kfree(item);
 			return;
@@ -411,8 +421,6 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 					  struct netlink_ext_ack *extack)
 {
 	struct tcf_block *block;
-	struct tcf_chain *chain;
-	int err;
 
 	block = kzalloc(sizeof(*block), GFP_KERNEL);
 	if (!block) {
@@ -422,14 +430,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 	INIT_LIST_HEAD(&block->chain_list);
 	INIT_LIST_HEAD(&block->cb_list);
 	INIT_LIST_HEAD(&block->owner_list);
+	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
 
-	/* Create chain 0 by default, it has to be always present. */
-	chain = tcf_chain_create(block, 0);
-	if (!chain) {
-		NL_SET_ERR_MSG(extack, "Failed to create new tcf chain");
-		err = -ENOMEM;
-		goto err_chain_create;
-	}
 	block->refcnt = 1;
 	block->net = net;
 	block->index = block_index;
@@ -438,10 +440,6 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 	if (!tcf_block_shared(block))
 		block->q = q;
 	return block;
-
-err_chain_create:
-	kfree(block);
-	return ERR_PTR(err);
 }
 
 static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
@@ -523,11 +521,6 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
 	return block;
 }
 
-static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block)
-{
-	return list_first_entry(&block->chain_list, struct tcf_chain, list);
-}
-
 struct tcf_block_owner_item {
 	struct list_head list;
 	struct Qdisc *q;
@@ -621,10 +614,9 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 
 	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
 
-	err = tcf_chain_head_change_cb_add(tcf_block_chain_zero(block),
-					   ei, extack);
+	err = tcf_chain0_head_change_cb_add(block, ei, extack);
 	if (err)
-		goto err_chain_head_change_cb_add;
+		goto err_chain0_head_change_cb_add;
 
 	err = tcf_block_offload_bind(block, q, ei, extack);
 	if (err)
@@ -634,15 +626,14 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 	return 0;
 
 err_block_offload_bind:
-	tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
-err_chain_head_change_cb_add:
+	tcf_chain0_head_change_cb_del(block, ei);
+err_chain0_head_change_cb_add:
 	tcf_block_owner_del(block, q, ei->binder_type);
 err_block_owner_add:
 	if (created) {
 		if (tcf_block_shared(block))
 			tcf_block_remove(block, net);
 err_block_insert:
-		kfree(tcf_block_chain_zero(block));
 		kfree(block);
 	} else {
 		block->refcnt--;
@@ -682,10 +673,10 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 
 	if (!block)
 		return;
-	tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
+	tcf_chain0_head_change_cb_del(block, ei);
 	tcf_block_owner_del(block, q, ei->binder_type);
 
-	if (--block->refcnt == 0) {
+	if (block->refcnt == 1) {
 		if (tcf_block_shared(block))
 			tcf_block_remove(block, block->net);
 
@@ -701,13 +692,14 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 
 	tcf_block_offload_unbind(block, q, ei);
 
-	if (block->refcnt == 0) {
+	if (block->refcnt == 1) {
 		/* At this point, all the chains should have refcnt >= 1. */
 		list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
 			tcf_chain_put(chain);
 
-		/* Finally, put chain 0 and allow block to be freed. */
-		tcf_chain_put(tcf_block_chain_zero(block));
+		block->refcnt--;
+		if (list_empty(&block->chain_list))
+			kfree(block);
 	}
 }
 EXPORT_SYMBOL(tcf_block_put_ext);
@@ -947,7 +939,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
 				struct tcf_proto *tp)
 {
 	if (*chain_info->pprev == chain->filter_chain)
-		tcf_chain_head_change(chain, tp);
+		tcf_chain0_head_change(chain, tp);
 	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
 	rcu_assign_pointer(*chain_info->pprev, tp);
 	tcf_chain_hold(chain);
@@ -960,7 +952,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
 	struct tcf_proto *next = rtnl_dereference(chain_info->next);
 
 	if (tp == chain->filter_chain)
-		tcf_chain_head_change(chain, next);
+		tcf_chain0_head_change(chain, next);
 	RCU_INIT_POINTER(*chain_info->pprev, next);
 	tcf_chain_put(chain);
 }
-- 
cgit v1.2.3


From 32a4f5ecd7381f30ae3bb36dea77a150ba68af2e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:06 +0200
Subject: net: sched: introduce chain object to uapi

Allow user to create, destroy, get and dump chain objects. Do that by
extending rtnl commands by the chain-specific ones. User will now be
able to explicitly create or destroy chains (so far this was done only
automatically according the filter/act needs and refcounting). Also, the
user will receive notification about any chain creation or destuction.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h      |   1 +
 include/uapi/linux/rtnetlink.h |   7 +
 net/sched/cls_api.c            | 308 +++++++++++++++++++++++++++++++++++++++--
 security/selinux/nlmsgtab.c    |   2 +-
 4 files changed, 309 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 86f4651784e8..81ec8276db9c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -304,6 +304,7 @@ struct tcf_chain {
 	struct tcf_block *block;
 	u32 index; /* chain index */
 	unsigned int refcnt;
+	bool explicitly_created;
 };
 
 struct tcf_block {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 7d8502313c99..46399367627f 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -150,6 +150,13 @@ enum {
 	RTM_NEWCACHEREPORT = 96,
 #define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
 
+	RTM_NEWCHAIN = 100,
+#define RTM_NEWCHAIN RTM_NEWCHAIN
+	RTM_DELCHAIN,
+#define RTM_DELCHAIN RTM_DELCHAIN
+	RTM_GETCHAIN,
+#define RTM_GETCHAIN RTM_GETCHAIN
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index eb0bf9037ef9..e65b390336aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -262,29 +262,57 @@ static void tcf_chain_hold(struct tcf_chain *chain)
 	++chain->refcnt;
 }
 
-struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
-				bool create)
+static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
+					  u32 chain_index)
 {
 	struct tcf_chain *chain;
 
 	list_for_each_entry(chain, &block->chain_list, list) {
-		if (chain->index == chain_index) {
-			tcf_chain_hold(chain);
+		if (chain->index == chain_index)
 			return chain;
-		}
+	}
+	return NULL;
+}
+
+static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
+			   u32 seq, u16 flags, int event, bool unicast);
+
+struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
+				bool create)
+{
+	struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+
+	if (chain) {
+		tcf_chain_hold(chain);
+		return chain;
 	}
 
-	return create ? tcf_chain_create(block, chain_index) : NULL;
+	if (!create)
+		return NULL;
+	chain = tcf_chain_create(block, chain_index);
+	if (!chain)
+		return NULL;
+	tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
+			RTM_NEWCHAIN, false);
+	return chain;
 }
 EXPORT_SYMBOL(tcf_chain_get);
 
 void tcf_chain_put(struct tcf_chain *chain)
 {
-	if (--chain->refcnt == 0)
+	if (--chain->refcnt == 0) {
+		tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
 		tcf_chain_destroy(chain);
+	}
 }
 EXPORT_SYMBOL(tcf_chain_put);
 
+static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
+{
+	if (chain->explicitly_created)
+		tcf_chain_put(chain);
+}
+
 static bool tcf_block_offload_in_use(struct tcf_block *block)
 {
 	return block->offloadcnt;
@@ -694,8 +722,10 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 
 	if (block->refcnt == 1) {
 		/* At this point, all the chains should have refcnt >= 1. */
-		list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+		list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
+			tcf_chain_put_explicitly_created(chain);
 			tcf_chain_put(chain);
+		}
 
 		block->refcnt--;
 		if (list_empty(&block->chain_list))
@@ -1609,6 +1639,264 @@ out:
 	return skb->len;
 }
 
+static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
+			      struct sk_buff *skb, struct tcf_block *block,
+			      u32 portid, u32 seq, u16 flags, int event)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
+	if (!nlh)
+		goto out_nlmsg_trim;
+	tcm = nlmsg_data(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_handle = 0;
+	if (block->q) {
+		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
+		tcm->tcm_parent = block->q->handle;
+	} else {
+		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
+		tcm->tcm_block_index = block->index;
+	}
+
+	if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+		goto nla_put_failure;
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+out_nlmsg_trim:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -EMSGSIZE;
+}
+
+static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
+			   u32 seq, u16 flags, int event, bool unicast)
+{
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+	struct tcf_block *block = chain->block;
+	struct net *net = block->net;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_chain_fill_node(chain, net, skb, block, portid,
+			       seq, flags, event) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (unicast)
+		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
+}
+
+/* Add/delete/get a chain */
+
+static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
+			struct netlink_ext_ack *extack)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tca[TCA_MAX + 1];
+	struct tcmsg *t;
+	u32 parent;
+	u32 chain_index;
+	struct Qdisc *q = NULL;
+	struct tcf_chain *chain = NULL;
+	struct tcf_block *block;
+	unsigned long cl;
+	int err;
+
+	if (n->nlmsg_type != RTM_GETCHAIN &&
+	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+replay:
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+	if (err < 0)
+		return err;
+
+	t = nlmsg_data(n);
+	parent = t->tcm_parent;
+	cl = 0;
+
+	block = tcf_block_find(net, &q, &parent, &cl,
+			       t->tcm_ifindex, t->tcm_block_index, extack);
+	if (IS_ERR(block))
+		return PTR_ERR(block);
+
+	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+	if (chain_index > TC_ACT_EXT_VAL_MASK) {
+		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
+		return -EINVAL;
+	}
+	chain = tcf_chain_lookup(block, chain_index);
+	if (n->nlmsg_type == RTM_NEWCHAIN) {
+		if (chain) {
+			NL_SET_ERR_MSG(extack, "Filter chain already exists");
+			return -EEXIST;
+		}
+		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
+			NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
+			return -ENOENT;
+		}
+		chain = tcf_chain_create(block, chain_index);
+		if (!chain) {
+			NL_SET_ERR_MSG(extack, "Failed to create filter chain");
+			return -ENOMEM;
+		}
+	} else {
+		if (!chain) {
+			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
+			return -EINVAL;
+		}
+		tcf_chain_hold(chain);
+	}
+
+	switch (n->nlmsg_type) {
+	case RTM_NEWCHAIN:
+		/* In case the chain was successfully added, take a reference
+		 * to the chain. This ensures that an empty chain
+		 * does not disappear at the end of this function.
+		 */
+		tcf_chain_hold(chain);
+		chain->explicitly_created = true;
+		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
+				RTM_NEWCHAIN, false);
+		break;
+	case RTM_DELCHAIN:
+		/* Flush the chain first as the user requested chain removal. */
+		tcf_chain_flush(chain);
+		/* In case the chain was successfully deleted, put a reference
+		 * to the chain previously taken during addition.
+		 */
+		tcf_chain_put_explicitly_created(chain);
+		break;
+	case RTM_GETCHAIN:
+		break;
+		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
+				      n->nlmsg_seq, n->nlmsg_type, true);
+		if (err < 0)
+			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		NL_SET_ERR_MSG(extack, "Unsupported message type");
+		goto errout;
+	}
+
+errout:
+	tcf_chain_put(chain);
+	if (err == -EAGAIN)
+		/* Replay the request. */
+		goto replay;
+	return err;
+}
+
+/* called with RTNL */
+static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tca[TCA_MAX + 1];
+	struct Qdisc *q = NULL;
+	struct tcf_block *block;
+	struct tcf_chain *chain;
+	struct tcmsg *tcm = nlmsg_data(cb->nlh);
+	long index_start;
+	long index;
+	u32 parent;
+	int err;
+
+	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
+		return skb->len;
+
+	err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+	if (err)
+		return err;
+
+	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+		block = tcf_block_lookup(net, tcm->tcm_block_index);
+		if (!block)
+			goto out;
+		/* If we work with block index, q is NULL and parent value
+		 * will never be used in the following code. The check
+		 * in tcf_fill_node prevents it. However, compiler does not
+		 * see that far, so set parent to zero to silence the warning
+		 * about parent being uninitialized.
+		 */
+		parent = 0;
+	} else {
+		const struct Qdisc_class_ops *cops;
+		struct net_device *dev;
+		unsigned long cl = 0;
+
+		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+		if (!dev)
+			return skb->len;
+
+		parent = tcm->tcm_parent;
+		if (!parent) {
+			q = dev->qdisc;
+			parent = q->handle;
+		} else {
+			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+		}
+		if (!q)
+			goto out;
+		cops = q->ops->cl_ops;
+		if (!cops)
+			goto out;
+		if (!cops->tcf_block)
+			goto out;
+		if (TC_H_MIN(tcm->tcm_parent)) {
+			cl = cops->find(q, tcm->tcm_parent);
+			if (cl == 0)
+				goto out;
+		}
+		block = cops->tcf_block(q, cl, NULL);
+		if (!block)
+			goto out;
+		if (tcf_block_shared(block))
+			q = NULL;
+	}
+
+	index_start = cb->args[0];
+	index = 0;
+
+	list_for_each_entry(chain, &block->chain_list, list) {
+		if ((tca[TCA_CHAIN] &&
+		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
+			continue;
+		if (index < index_start) {
+			index++;
+			continue;
+		}
+		err = tc_chain_fill_node(chain, net, skb, block,
+					 NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 RTM_NEWCHAIN);
+		if (err <= 0)
+			break;
+		index++;
+	}
+
+	cb->args[0] = index;
+
+out:
+	/* If we did no progress, the error (EMSGSIZE) is real */
+	if (skb->len == 0 && err)
+		return err;
+	return skb->len;
+}
+
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1825,6 +2113,10 @@ static int __init tc_filter_init(void)
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
 		      tc_dump_tfilter, 0);
+	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
+	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
+		      tc_dump_chain, 0);
 
 	return 0;
 
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 7b7433a1a34c..74b951f55608 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -159,7 +159,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 	switch (sclass) {
 	case SECCLASS_NETLINK_ROUTE_SOCKET:
 		/* RTM_MAX always point to RTM_SETxxxx, ie RTM_NEWxxx + 3 */
-		BUILD_BUG_ON(RTM_MAX != (RTM_NEWCACHEREPORT + 3));
+		BUILD_BUG_ON(RTM_MAX != (RTM_NEWCHAIN + 3));
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 				 sizeof(nlmsg_route_perms));
 		break;
-- 
cgit v1.2.3


From 9f407f1768d3e1a5ddd7bd49fa4d1f5a26e10ed2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:07 +0200
Subject: net: sched: introduce chain templates

Allow user to set a template for newly created chains. Template lock
down the chain for particular classifier type/options combinations.
The classifier needs to support templates, otherwise kernel would
reply with error.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 12 +++++++++
 net/sched/cls_api.c       | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 81ec8276db9c..085c509c8674 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -238,6 +238,8 @@ struct tcf_result {
 	};
 };
 
+struct tcf_chain;
+
 struct tcf_proto_ops {
 	struct list_head	head;
 	char			kind[IFNAMSIZ];
@@ -263,10 +265,18 @@ struct tcf_proto_ops {
 					     tc_setup_cb_t *cb, void *cb_priv,
 					     struct netlink_ext_ack *extack);
 	void			(*bind_class)(void *, u32, unsigned long);
+	void *			(*tmplt_create)(struct net *net,
+						struct tcf_chain *chain,
+						struct nlattr **tca,
+						struct netlink_ext_ack *extack);
+	void			(*tmplt_destroy)(void *tmplt_priv);
 
 	/* rtnetlink specific */
 	int			(*dump)(struct net*, struct tcf_proto*, void *,
 					struct sk_buff *skb, struct tcmsg*);
+	int			(*tmplt_dump)(struct sk_buff *skb,
+					      struct net *net,
+					      void *tmplt_priv);
 
 	struct module		*owner;
 };
@@ -305,6 +315,8 @@ struct tcf_chain {
 	u32 index; /* chain index */
 	unsigned int refcnt;
 	bool explicitly_created;
+	const struct tcf_proto_ops *tmplt_ops;
+	void *tmplt_priv;
 };
 
 struct tcf_block {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e65b390336aa..5f7098b5405e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -298,10 +298,13 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 }
 EXPORT_SYMBOL(tcf_chain_get);
 
+static void tc_chain_tmplt_del(struct tcf_chain *chain);
+
 void tcf_chain_put(struct tcf_chain *chain)
 {
 	if (--chain->refcnt == 0) {
 		tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
+		tc_chain_tmplt_del(chain);
 		tcf_chain_destroy(chain);
 	}
 }
@@ -1258,6 +1261,12 @@ replay:
 		goto errout;
 	}
 
+	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
+		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
+		err = -EINVAL;
+		goto errout;
+	}
+
 	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
 			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
 			      extack);
@@ -1644,8 +1653,13 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
 			      u32 portid, u32 seq, u16 flags, int event)
 {
 	unsigned char *b = skb_tail_pointer(skb);
+	const struct tcf_proto_ops *ops;
 	struct nlmsghdr *nlh;
 	struct tcmsg *tcm;
+	void *priv;
+
+	ops = chain->tmplt_ops;
+	priv = chain->tmplt_priv;
 
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
 	if (!nlh)
@@ -1666,6 +1680,13 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
 	if (nla_put_u32(skb, TCA_CHAIN, chain->index))
 		goto nla_put_failure;
 
+	if (ops) {
+		if (nla_put_string(skb, TCA_KIND, ops->kind))
+			goto nla_put_failure;
+		if (ops->tmplt_dump(skb, net, priv) < 0)
+			goto nla_put_failure;
+	}
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
@@ -1699,6 +1720,47 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
 }
 
+static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
+			      struct nlattr **tca,
+			      struct netlink_ext_ack *extack)
+{
+	const struct tcf_proto_ops *ops;
+	void *tmplt_priv;
+
+	/* If kind is not set, user did not specify template. */
+	if (!tca[TCA_KIND])
+		return 0;
+
+	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
+		return -EOPNOTSUPP;
+	}
+
+	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
+	if (IS_ERR(tmplt_priv)) {
+		module_put(ops->owner);
+		return PTR_ERR(tmplt_priv);
+	}
+	chain->tmplt_ops = ops;
+	chain->tmplt_priv = tmplt_priv;
+	return 0;
+}
+
+static void tc_chain_tmplt_del(struct tcf_chain *chain)
+{
+	const struct tcf_proto_ops *ops = chain->tmplt_ops;
+
+	/* If template ops are set, no work to do for us. */
+	if (!ops)
+		return;
+
+	ops->tmplt_destroy(chain->tmplt_priv);
+	module_put(ops->owner);
+}
+
 /* Add/delete/get a chain */
 
 static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1763,6 +1825,9 @@ replay:
 
 	switch (n->nlmsg_type) {
 	case RTM_NEWCHAIN:
+		err = tc_chain_tmplt_add(chain, net, tca, extack);
+		if (err)
+			goto errout;
 		/* In case the chain was successfully added, take a reference
 		 * to the chain. This ensures that an empty chain
 		 * does not disappear at the end of this function.
-- 
cgit v1.2.3


From 34738452739069947e528123810533f28dd8332b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 23 Jul 2018 09:23:11 +0200
Subject: net: sched: cls_flower: propagate chain teplate creation and
 destruction to drivers

Introduce a couple of flower offload commands in order to propagate
template creation/destruction events down to device drivers.
Drivers may use this information to prepare HW in an optimal way
for future filter insertions.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h  |  2 ++
 net/sched/cls_flower.c | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 4f405ca8346f..a3101582f642 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -721,6 +721,8 @@ enum tc_fl_command {
 	TC_CLSFLOWER_REPLACE,
 	TC_CLSFLOWER_DESTROY,
 	TC_CLSFLOWER_STATS,
+	TC_CLSFLOWER_TMPLT_CREATE,
+	TC_CLSFLOWER_TMPLT_DESTROY,
 };
 
 struct tc_cls_flower_offload {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f0c80758a594..6ccf60364297 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1194,6 +1194,42 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 	return 0;
 }
 
+static void fl_hw_create_tmplt(struct tcf_chain *chain,
+			       struct fl_flow_tmplt *tmplt)
+{
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = chain->block;
+	struct tcf_exts dummy_exts = { 0, };
+
+	cls_flower.common.chain_index = chain->index;
+	cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
+	cls_flower.cookie = (unsigned long) tmplt;
+	cls_flower.dissector = &tmplt->dissector;
+	cls_flower.mask = &tmplt->mask;
+	cls_flower.key = &tmplt->dummy_key;
+	cls_flower.exts = &dummy_exts;
+
+	/* We don't care if driver (any of them) fails to handle this
+	 * call. It serves just as a hint for it.
+	 */
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
+			 &cls_flower, false);
+}
+
+static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
+				struct fl_flow_tmplt *tmplt)
+{
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = chain->block;
+
+	cls_flower.common.chain_index = chain->index;
+	cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY;
+	cls_flower.cookie = (unsigned long) tmplt;
+
+	tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
+			 &cls_flower, false);
+}
+
 static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
 			     struct nlattr **tca,
 			     struct netlink_ext_ack *extack)
@@ -1224,6 +1260,8 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
 
 	fl_init_dissector(&tmplt->dissector, &tmplt->mask);
 
+	fl_hw_create_tmplt(chain, tmplt);
+
 	return tmplt;
 
 errout_tmplt:
@@ -1237,6 +1275,7 @@ static void fl_tmplt_destroy(void *tmplt_priv)
 {
 	struct fl_flow_tmplt *tmplt = tmplt_priv;
 
+	fl_hw_destroy_tmplt(tmplt->chain, tmplt);
 	kfree(tmplt);
 }
 
-- 
cgit v1.2.3


From b7ff8b1036f0b0df1390ba6b5e9bc7ec458e857a Mon Sep 17 00:00:00 2001
From: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Date: Mon, 23 Jul 2018 20:51:23 -0700
Subject: rds: Extend RDS API for IPv6 support

There are many data structures (RDS socket options) used by RDS apps
which use a 32 bit integer to store IP address. To support IPv6,
struct in6_addr needs to be used. To ensure backward compatibility, a
new data structure is introduced for each of those data structures
which use a 32 bit integer to represent an IP address. And new socket
options are introduced to use those new structures. This means that
existing apps should work without a problem with the new RDS module.
For apps which want to use IPv6, those new data structures and socket
options can be used. IPv4 mapped address is used to represent IPv4
address in the new data structures.

v4: Revert changes to SO_RDS_TRANSPORT

Signed-off-by: Ka-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rds.h |  69 +++++++++++++++++++++++++++++++-
 net/rds/connection.c     | 101 +++++++++++++++++++++++++++++++++++++++++++----
 net/rds/ib.c             |  52 ++++++++++++++++++++++++
 net/rds/ib_mr.h          |   2 +
 net/rds/ib_rdma.c        |  11 +++++-
 net/rds/recv.c           |  25 ++++++++++++
 net/rds/tcp.c            |  44 +++++++++++++++++++++
 7 files changed, 293 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 20c6bd0b0007..dc520e1a4123 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
 /*
- * Copyright (c) 2008 Oracle.  All rights reserved.
+ * Copyright (c) 2008, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -118,7 +118,17 @@
 #define RDS_INFO_IB_CONNECTIONS		10008
 #define RDS_INFO_CONNECTION_STATS	10009
 #define RDS_INFO_IWARP_CONNECTIONS	10010
-#define RDS_INFO_LAST			10010
+
+/* PF_RDS6 options */
+#define RDS6_INFO_CONNECTIONS		10011
+#define RDS6_INFO_SEND_MESSAGES		10012
+#define RDS6_INFO_RETRANS_MESSAGES	10013
+#define RDS6_INFO_RECV_MESSAGES		10014
+#define RDS6_INFO_SOCKETS		10015
+#define RDS6_INFO_TCP_SOCKETS		10016
+#define RDS6_INFO_IB_CONNECTIONS	10017
+
+#define RDS_INFO_LAST			10017
 
 struct rds_info_counter {
 	__u8	name[32];
@@ -140,6 +150,15 @@ struct rds_info_connection {
 	__u8		flags;
 } __attribute__((packed));
 
+struct rds6_info_connection {
+	__u64		next_tx_seq;
+	__u64		next_rx_seq;
+	struct in6_addr	laddr;
+	struct in6_addr	faddr;
+	__u8		transport[TRANSNAMSIZ];		/* null term ascii */
+	__u8		flags;
+} __attribute__((packed));
+
 #define RDS_INFO_MESSAGE_FLAG_ACK               0x01
 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
 
@@ -153,6 +172,17 @@ struct rds_info_message {
 	__u8		flags;
 } __attribute__((packed));
 
+struct rds6_info_message {
+	__u64	seq;
+	__u32	len;
+	struct in6_addr	laddr;
+	struct in6_addr	faddr;
+	__be16		lport;
+	__be16		fport;
+	__u8		flags;
+	__u8		tos;
+} __attribute__((packed));
+
 struct rds_info_socket {
 	__u32		sndbuf;
 	__be32		bound_addr;
@@ -163,6 +193,16 @@ struct rds_info_socket {
 	__u64		inum;
 } __attribute__((packed));
 
+struct rds6_info_socket {
+	__u32		sndbuf;
+	struct in6_addr	bound_addr;
+	struct in6_addr	connected_addr;
+	__be16		bound_port;
+	__be16		connected_port;
+	__u32		rcvbuf;
+	__u64		inum;
+} __attribute__((packed));
+
 struct rds_info_tcp_socket {
 	__be32          local_addr;
 	__be16          local_port;
@@ -175,6 +215,18 @@ struct rds_info_tcp_socket {
 	__u32           last_seen_una;
 } __attribute__((packed));
 
+struct rds6_info_tcp_socket {
+	struct in6_addr	local_addr;
+	__be16		local_port;
+	struct in6_addr	peer_addr;
+	__be16		peer_port;
+	__u64		hdr_rem;
+	__u64		data_rem;
+	__u32		last_sent_nxt;
+	__u32		last_expected_una;
+	__u32		last_seen_una;
+} __attribute__((packed));
+
 #define RDS_IB_GID_LEN	16
 struct rds_info_rdma_connection {
 	__be32		src_addr;
@@ -189,6 +241,19 @@ struct rds_info_rdma_connection {
 	__u32		rdma_mr_size;
 };
 
+struct rds6_info_rdma_connection {
+	struct in6_addr	src_addr;
+	struct in6_addr	dst_addr;
+	__u8		src_gid[RDS_IB_GID_LEN];
+	__u8		dst_gid[RDS_IB_GID_LEN];
+
+	__u32		max_send_wr;
+	__u32		max_recv_wr;
+	__u32		max_send_sge;
+	__u32		rdma_mr_max;
+	__u32		rdma_mr_size;
+};
+
 /* RDS message Receive Path Latency points */
 enum rds_message_rxpath_latency {
 	RDS_MSG_RX_HDR_TO_DGRAM_START = 0,
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 5c9ceed55dae..051e35c1e7c6 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -498,16 +498,19 @@ EXPORT_SYMBOL_GPL(rds_conn_destroy);
 
 static void __rds_inc_msg_cp(struct rds_incoming *inc,
 			     struct rds_info_iterator *iter,
-			     void *saddr, void *daddr, int flip)
+			     void *saddr, void *daddr, int flip, bool isv6)
 {
-	rds_inc_info_copy(inc, iter, *(__be32 *)saddr,
-			  *(__be32 *)daddr, flip);
+	if (isv6)
+		rds6_inc_info_copy(inc, iter, saddr, daddr, flip);
+	else
+		rds_inc_info_copy(inc, iter, *(__be32 *)saddr,
+				  *(__be32 *)daddr, flip);
 }
 
 static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
 				      struct rds_info_iterator *iter,
 				      struct rds_info_lengths *lens,
-				      int want_send)
+				      int want_send, bool isv6)
 {
 	struct hlist_head *head;
 	struct list_head *list;
@@ -518,7 +521,10 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
 	size_t i;
 	int j;
 
-	len /= sizeof(struct rds_info_message);
+	if (isv6)
+		len /= sizeof(struct rds6_info_message);
+	else
+		len /= sizeof(struct rds_info_message);
 
 	rcu_read_lock();
 
@@ -528,6 +534,9 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
 			struct rds_conn_path *cp;
 			int npaths;
 
+			if (!isv6 && conn->c_isv6)
+				continue;
+
 			npaths = (conn->c_trans->t_mp_capable ?
 				 RDS_MPATH_WORKERS : 1);
 
@@ -548,7 +557,7 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
 								 iter,
 								 &conn->c_laddr,
 								 &conn->c_faddr,
-								 0);
+								 0, isv6);
 				}
 
 				spin_unlock_irqrestore(&cp->cp_lock, flags);
@@ -558,7 +567,10 @@ static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
 	rcu_read_unlock();
 
 	lens->nr = total;
-	lens->each = sizeof(struct rds_info_message);
+	if (isv6)
+		lens->each = sizeof(struct rds6_info_message);
+	else
+		lens->each = sizeof(struct rds_info_message);
 }
 
 static void rds_conn_message_info(struct socket *sock, unsigned int len,
@@ -566,7 +578,15 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
 				  struct rds_info_lengths *lens,
 				  int want_send)
 {
-	rds_conn_message_info_cmn(sock, len, iter, lens, want_send);
+	rds_conn_message_info_cmn(sock, len, iter, lens, want_send, false);
+}
+
+static void rds6_conn_message_info(struct socket *sock, unsigned int len,
+				   struct rds_info_iterator *iter,
+				   struct rds_info_lengths *lens,
+				   int want_send)
+{
+	rds_conn_message_info_cmn(sock, len, iter, lens, want_send, true);
 }
 
 static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
@@ -576,6 +596,13 @@ static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
 	rds_conn_message_info(sock, len, iter, lens, 1);
 }
 
+static void rds6_conn_message_info_send(struct socket *sock, unsigned int len,
+					struct rds_info_iterator *iter,
+					struct rds_info_lengths *lens)
+{
+	rds6_conn_message_info(sock, len, iter, lens, 1);
+}
+
 static void rds_conn_message_info_retrans(struct socket *sock,
 					  unsigned int len,
 					  struct rds_info_iterator *iter,
@@ -584,6 +611,14 @@ static void rds_conn_message_info_retrans(struct socket *sock,
 	rds_conn_message_info(sock, len, iter, lens, 0);
 }
 
+static void rds6_conn_message_info_retrans(struct socket *sock,
+					   unsigned int len,
+					   struct rds_info_iterator *iter,
+					   struct rds_info_lengths *lens)
+{
+	rds6_conn_message_info(sock, len, iter, lens, 0);
+}
+
 void rds_for_each_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens,
@@ -699,6 +734,34 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
 	return 1;
 }
 
+static int rds6_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
+{
+	struct rds6_info_connection *cinfo6 = buffer;
+	struct rds_connection *conn = cp->cp_conn;
+
+	cinfo6->next_tx_seq = cp->cp_next_tx_seq;
+	cinfo6->next_rx_seq = cp->cp_next_rx_seq;
+	cinfo6->laddr = conn->c_laddr;
+	cinfo6->faddr = conn->c_faddr;
+	strncpy(cinfo6->transport, conn->c_trans->t_name,
+		sizeof(cinfo6->transport));
+	cinfo6->flags = 0;
+
+	rds_conn_info_set(cinfo6->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags),
+			  SENDING);
+	/* XXX Future: return the state rather than these funky bits */
+	rds_conn_info_set(cinfo6->flags,
+			  atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING,
+			  CONNECTING);
+	rds_conn_info_set(cinfo6->flags,
+			  atomic_read(&cp->cp_state) == RDS_CONN_UP,
+			  CONNECTED);
+	/* Just return 1 as there is no error case. This is a helper function
+	 * for rds_walk_conn_path_info() and it wants a return value.
+	 */
+	return 1;
+}
+
 static void rds_conn_info(struct socket *sock, unsigned int len,
 			  struct rds_info_iterator *iter,
 			  struct rds_info_lengths *lens)
@@ -711,6 +774,18 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
 				sizeof(struct rds_info_connection));
 }
 
+static void rds6_conn_info(struct socket *sock, unsigned int len,
+			   struct rds_info_iterator *iter,
+			   struct rds_info_lengths *lens)
+{
+	u64 buffer[(sizeof(struct rds6_info_connection) + 7) / 8];
+
+	rds_walk_conn_path_info(sock, len, iter, lens,
+				rds6_conn_info_visitor,
+				buffer,
+				sizeof(struct rds6_info_connection));
+}
+
 int rds_conn_init(void)
 {
 	int ret;
@@ -732,6 +807,11 @@ int rds_conn_init(void)
 			       rds_conn_message_info_send);
 	rds_info_register_func(RDS_INFO_RETRANS_MESSAGES,
 			       rds_conn_message_info_retrans);
+	rds_info_register_func(RDS6_INFO_CONNECTIONS, rds6_conn_info);
+	rds_info_register_func(RDS6_INFO_SEND_MESSAGES,
+			       rds6_conn_message_info_send);
+	rds_info_register_func(RDS6_INFO_RETRANS_MESSAGES,
+			       rds6_conn_message_info_retrans);
 
 	return 0;
 }
@@ -750,6 +830,11 @@ void rds_conn_exit(void)
 				 rds_conn_message_info_send);
 	rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES,
 				 rds_conn_message_info_retrans);
+	rds_info_deregister_func(RDS6_INFO_CONNECTIONS, rds6_conn_info);
+	rds_info_deregister_func(RDS6_INFO_SEND_MESSAGES,
+				 rds6_conn_message_info_send);
+	rds_info_deregister_func(RDS6_INFO_RETRANS_MESSAGES,
+				 rds6_conn_message_info_retrans);
 }
 
 /*
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 756225c5540f..63d95ea7cdff 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -321,6 +321,43 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 	return 1;
 }
 
+/* IPv6 version of rds_ib_conn_info_visitor(). */
+static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
+				     void *buffer)
+{
+	struct rds6_info_rdma_connection *iinfo6 = buffer;
+	struct rds_ib_connection *ic;
+
+	/* We will only ever look at IB transports */
+	if (conn->c_trans != &rds_ib_transport)
+		return 0;
+
+	iinfo6->src_addr = conn->c_laddr;
+	iinfo6->dst_addr = conn->c_faddr;
+
+	memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
+	memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
+
+	if (rds_conn_state(conn) == RDS_CONN_UP) {
+		struct rds_ib_device *rds_ibdev;
+		struct rdma_dev_addr *dev_addr;
+
+		ic = conn->c_transport_data;
+		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
+		rdma_addr_get_sgid(dev_addr,
+				   (union ib_gid *)&iinfo6->src_gid);
+		rdma_addr_get_dgid(dev_addr,
+				   (union ib_gid *)&iinfo6->dst_gid);
+
+		rds_ibdev = ic->rds_ibdev;
+		iinfo6->max_send_wr = ic->i_send_ring.w_nr;
+		iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;
+		iinfo6->max_send_sge = rds_ibdev->max_sge;
+		rds6_ib_get_mr_info(rds_ibdev, iinfo6);
+	}
+	return 1;
+}
+
 static void rds_ib_ic_info(struct socket *sock, unsigned int len,
 			   struct rds_info_iterator *iter,
 			   struct rds_info_lengths *lens)
@@ -333,6 +370,19 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
 				sizeof(struct rds_info_rdma_connection));
 }
 
+/* IPv6 version of rds_ib_ic_info(). */
+static void rds6_ib_ic_info(struct socket *sock, unsigned int len,
+			    struct rds_info_iterator *iter,
+			    struct rds_info_lengths *lens)
+{
+	u64 buffer[(sizeof(struct rds6_info_rdma_connection) + 7) / 8];
+
+	rds_for_each_conn_info(sock, len, iter, lens,
+			       rds6_ib_conn_info_visitor,
+			       buffer,
+			       sizeof(struct rds6_info_rdma_connection));
+}
+
 /*
  * Early RDS/IB was built to only bind to an address if there is an IPoIB
  * device with that address set.
@@ -441,6 +491,7 @@ void rds_ib_exit(void)
 	rds_ib_set_unloading();
 	synchronize_rcu();
 	rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
+	rds_info_deregister_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
 	rds_ib_unregister_client();
 	rds_ib_destroy_nodev_conns();
 	rds_ib_sysctl_exit();
@@ -502,6 +553,7 @@ int rds_ib_init(void)
 	rds_trans_register(&rds_ib_transport);
 
 	rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
+	rds_info_register_func(RDS6_INFO_IB_CONNECTIONS, rds6_ib_ic_info);
 
 	goto out;
 
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 0ea4ab017a8c..f440ace584c8 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -113,6 +113,8 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
 					     int npages);
 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
 			struct rds_info_rdma_connection *iinfo);
+void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
+			 struct rds6_info_rdma_connection *iinfo6);
 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 		    struct rds_sock *rs, u32 *key_ret);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 0ec9df043dd0..e3c8bbbdb43f 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -180,6 +180,15 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
 	iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
 }
 
+void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
+			 struct rds6_info_rdma_connection *iinfo6)
+{
+	struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
+
+	iinfo6->rdma_mr_max = pool_1m->max_items;
+	iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages;
+}
+
 struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
 {
 	struct rds_ib_mr *ibmr = NULL;
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 1402c21210b1..03cd8df54c26 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -792,3 +792,28 @@ void rds_inc_info_copy(struct rds_incoming *inc,
 
 	rds_info_copy(iter, &minfo, sizeof(minfo));
 }
+
+void rds6_inc_info_copy(struct rds_incoming *inc,
+			struct rds_info_iterator *iter,
+			struct in6_addr *saddr, struct in6_addr *daddr,
+			int flip)
+{
+	struct rds6_info_message minfo6;
+
+	minfo6.seq = be64_to_cpu(inc->i_hdr.h_sequence);
+	minfo6.len = be32_to_cpu(inc->i_hdr.h_len);
+
+	if (flip) {
+		minfo6.laddr = *daddr;
+		minfo6.faddr = *saddr;
+		minfo6.lport = inc->i_hdr.h_dport;
+		minfo6.fport = inc->i_hdr.h_sport;
+	} else {
+		minfo6.laddr = *saddr;
+		minfo6.faddr = *daddr;
+		minfo6.lport = inc->i_hdr.h_sport;
+		minfo6.fport = inc->i_hdr.h_dport;
+	}
+
+	rds_info_copy(iter, &minfo6, sizeof(minfo6));
+}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 890d0e1d8908..7028d6e51947 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -273,6 +273,48 @@ out:
 	spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
 }
 
+/* Handle RDS6_INFO_TCP_SOCKETS socket option. It returns both IPv4 and
+ * IPv6 connections. IPv4 connection address is returned in an IPv4 mapped
+ * address.
+ */
+static void rds6_tcp_tc_info(struct socket *sock, unsigned int len,
+			     struct rds_info_iterator *iter,
+			     struct rds_info_lengths *lens)
+{
+	struct rds6_info_tcp_socket tsinfo6;
+	struct rds_tcp_connection *tc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
+
+	if (len / sizeof(tsinfo6) < rds6_tcp_tc_count)
+		goto out;
+
+	list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+		struct sock *sk = tc->t_sock->sk;
+		struct inet_sock *inet = inet_sk(sk);
+
+		tsinfo6.local_addr = sk->sk_v6_rcv_saddr;
+		tsinfo6.local_port = inet->inet_sport;
+		tsinfo6.peer_addr = sk->sk_v6_daddr;
+		tsinfo6.peer_port = inet->inet_dport;
+
+		tsinfo6.hdr_rem = tc->t_tinc_hdr_rem;
+		tsinfo6.data_rem = tc->t_tinc_data_rem;
+		tsinfo6.last_sent_nxt = tc->t_last_sent_nxt;
+		tsinfo6.last_expected_una = tc->t_last_expected_una;
+		tsinfo6.last_seen_una = tc->t_last_seen_una;
+
+		rds_info_copy(iter, &tsinfo6, sizeof(tsinfo6));
+	}
+
+out:
+	lens->nr = rds6_tcp_tc_count;
+	lens->each = sizeof(tsinfo6);
+
+	spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
+}
+
 static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
 			       __u32 scope_id)
 {
@@ -628,6 +670,7 @@ static void rds_tcp_exit(void)
 	rds_tcp_set_unloading();
 	synchronize_rcu();
 	rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+	rds_info_deregister_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info);
 	unregister_pernet_device(&rds_tcp_net_ops);
 	rds_tcp_destroy_conns();
 	rds_trans_unregister(&rds_tcp_transport);
@@ -659,6 +702,7 @@ static int rds_tcp_init(void)
 	rds_trans_register(&rds_tcp_transport);
 
 	rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+	rds_info_register_func(RDS6_INFO_TCP_SOCKETS, rds6_tcp_tc_info);
 
 	goto out;
 out_recv:
-- 
cgit v1.2.3


From 664000b6bb4352295dc774108a1fc87c4a1ad0e3 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Tue, 19 Jun 2018 15:23:36 +0300
Subject: net/mlx5: Add support for flow table destination number

Add support to set a destination from a flow table number.
This functionality will be used in downstream patches from this
series by the DEVX stuff.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../mellanox/mlx5/core/diag/fs_tracepoint.c        |  3 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 24 ++++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  4 +++-
 include/linux/mlx5/fs.h                            |  1 +
 include/linux/mlx5/mlx5_ifc.h                      |  1 +
 5 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index b3820a34e773..0f11fff32a9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -240,6 +240,9 @@ const char *parse_fs_dst(struct trace_seq *p,
 	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
 		trace_seq_printf(p, "ft=%p\n", dst->ft);
 		break;
+	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+		trace_seq_printf(p, "ft_num=%u\n", dst->ft_num);
+		break;
 	case MLX5_FLOW_DESTINATION_TYPE_TIR:
 		trace_seq_printf(p, "tir=%u\n", dst->tir_num);
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 6a62b84e57f4..8e01f818021b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -368,18 +368,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 		int list_size = 0;
 
 		list_for_each_entry(dst, &fte->node.children, node.list) {
-			unsigned int id;
+			unsigned int id, type = dst->dest_attr.type;
 
-			if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+			if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
 				continue;
 
-			MLX5_SET(dest_format_struct, in_dests, destination_type,
-				 dst->dest_attr.type);
-			if (dst->dest_attr.type ==
-			    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+			switch (type) {
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+				id = dst->dest_attr.ft_num;
+				type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
 				id = dst->dest_attr.ft->id;
-			} else if (dst->dest_attr.type ==
-				   MLX5_FLOW_DESTINATION_TYPE_VPORT) {
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
 				id = dst->dest_attr.vport.num;
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id_valid,
@@ -387,9 +389,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id,
 					 dst->dest_attr.vport.vhca_id);
-			} else {
+				break;
+			default:
 				id = dst->dest_attr.tir_num;
 			}
+
+			MLX5_SET(dest_format_struct, in_dests, destination_type,
+				 type);
 			MLX5_SET(dest_format_struct, in_dests, destination_id, id);
 			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
 			list_size++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index a8d7d00f92f6..0d8378243903 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1356,7 +1356,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
 		     d1->ft == d2->ft) ||
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
-		     d1->tir_num == d2->tir_num))
+		     d1->tir_num == d2->tir_num) ||
+		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
+		     d1->ft_num == d2->ft_num))
 			return true;
 	}
 
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index c40f2fc68655..af0592400499 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -89,6 +89,7 @@ struct mlx5_flow_destination {
 	enum mlx5_flow_destination_type	type;
 	union {
 		u32			tir_num;
+		u32			ft_num;
 		struct mlx5_flow_table	*ft;
 		struct mlx5_fc		*counter;
 		struct {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ae12120ef021..c14b81559505 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1180,6 +1180,7 @@ enum mlx5_flow_destination_type {
 
 	MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
 	MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
+	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101,
 };
 
 struct mlx5_ifc_dest_format_struct_bits {
-- 
cgit v1.2.3


From 467b061f1ac80f323bedb56d20a24f7bc0d2cec5 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Mon, 23 Jul 2018 16:54:03 +0100
Subject: ASoC: core: add support to snd_soc_dai_get_channel_map()

On Qualcomm platforms, specifically with SLIMbus interfaced codecs,
the codec slim channel numbers are passed to DSP while configuring
the slim audio path. Having get_channel_map() would allow dais to
share such information across multiple dais.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  8 ++++++++
 sound/soc/soc-core.c    | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index a14bc0608ae9..f5d70041108f 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -138,6 +138,11 @@ int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate);
 int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute,
 			     int direction);
 
+
+int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai,
+		unsigned int *tx_num, unsigned int *tx_slot,
+		unsigned int *rx_num, unsigned int *rx_slot);
+
 int snd_soc_dai_is_dummy(struct snd_soc_dai *dai);
 
 struct snd_soc_dai_ops {
@@ -165,6 +170,9 @@ struct snd_soc_dai_ops {
 	int (*set_channel_map)(struct snd_soc_dai *dai,
 		unsigned int tx_num, unsigned int *tx_slot,
 		unsigned int rx_num, unsigned int *rx_slot);
+	int (*get_channel_map)(struct snd_soc_dai *dai,
+			unsigned int *tx_num, unsigned int *tx_slot,
+			unsigned int *rx_num, unsigned int *rx_slot);
 	int (*set_tristate)(struct snd_soc_dai *dai, int tristate);
 
 	int (*set_sdw_stream)(struct snd_soc_dai *dai,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index ad5b0ef16d82..81b27923303d 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2679,6 +2679,28 @@ int snd_soc_dai_set_channel_map(struct snd_soc_dai *dai,
 }
 EXPORT_SYMBOL_GPL(snd_soc_dai_set_channel_map);
 
+/**
+ * snd_soc_dai_get_channel_map - Get DAI audio channel map
+ * @dai: DAI
+ * @tx_num: how many TX channels
+ * @tx_slot: pointer to an array which imply the TX slot number channel
+ *           0~num-1 uses
+ * @rx_num: how many RX channels
+ * @rx_slot: pointer to an array which imply the RX slot number channel
+ *           0~num-1 uses
+ */
+int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai,
+	unsigned int *tx_num, unsigned int *tx_slot,
+	unsigned int *rx_num, unsigned int *rx_slot)
+{
+	if (dai->driver->ops->get_channel_map)
+		return dai->driver->ops->get_channel_map(dai, tx_num, tx_slot,
+			rx_num, rx_slot);
+	else
+		return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(snd_soc_dai_get_channel_map);
+
 /**
  * snd_soc_dai_set_tristate - configure DAI system or master clock.
  * @dai: DAI
-- 
cgit v1.2.3


From fb174b27e8267776bf8c20ca178e82b27c5b2444 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 24 Apr 2018 22:08:11 +0200
Subject: clk: samsung: Remove support for Exynos5440

The Exynos5440 is not actively developed, there are no development
boards available and probably there are no real products with it.
Remove wide-tree support for Exynos5440.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Acked-by: Sylwester Nawrocki <snawrocki@kernel.org>
---
 .../devicetree/bindings/clock/exynos5440-clock.txt |  28 ----
 drivers/clk/samsung/Makefile                       |   1 -
 drivers/clk/samsung/clk-exynos5440.c               | 167 ---------------------
 include/dt-bindings/clock/exynos5440.h             |  44 ------
 4 files changed, 240 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/clock/exynos5440-clock.txt
 delete mode 100644 drivers/clk/samsung/clk-exynos5440.c
 delete mode 100644 include/dt-bindings/clock/exynos5440.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/exynos5440-clock.txt b/Documentation/devicetree/bindings/clock/exynos5440-clock.txt
deleted file mode 100644
index c7d227c31e95..000000000000
--- a/Documentation/devicetree/bindings/clock/exynos5440-clock.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* Samsung Exynos5440 Clock Controller
-
-The Exynos5440 clock controller generates and supplies clock to various
-controllers within the Exynos5440 SoC.
-
-Required Properties:
-
-- compatible: should be "samsung,exynos5440-clock".
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/exynos5440.h header and can be used in device
-tree sources.
-
-Example: An example of a clock controller node is listed below.
-
-	clock: clock-controller@10010000 {
-		compatible = "samsung,exynos5440-clock";
-		reg = <0x160000 0x10000>;
-		#clock-cells = <1>;
-	};
diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile
index 513826393158..1a4e6b787978 100644
--- a/drivers/clk/samsung/Makefile
+++ b/drivers/clk/samsung/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_SOC_EXYNOS5410)	+= clk-exynos5410.o
 obj-$(CONFIG_SOC_EXYNOS5420)	+= clk-exynos5420.o
 obj-$(CONFIG_SOC_EXYNOS5420)	+= clk-exynos5-subcmu.o
 obj-$(CONFIG_EXYNOS_ARM64_COMMON_CLK)	+= clk-exynos5433.o
-obj-$(CONFIG_SOC_EXYNOS5440)	+= clk-exynos5440.o
 obj-$(CONFIG_EXYNOS_AUDSS_CLK_CON) += clk-exynos-audss.o
 obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos-clkout.o
 obj-$(CONFIG_EXYNOS_ARM64_COMMON_CLK)	+= clk-exynos7.o
diff --git a/drivers/clk/samsung/clk-exynos5440.c b/drivers/clk/samsung/clk-exynos5440.c
deleted file mode 100644
index b08bd54c5e76..000000000000
--- a/drivers/clk/samsung/clk-exynos5440.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (c) 2013 Samsung Electronics Co., Ltd.
- * Author: Thomas Abraham <thomas.ab@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Common Clock Framework support for Exynos5440 SoC.
-*/
-
-#include <dt-bindings/clock/exynos5440.h>
-#include <linux/clk-provider.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-
-#include "clk.h"
-#include "clk-pll.h"
-
-#define CLKEN_OV_VAL		0xf8
-#define CPU_CLK_STATUS		0xfc
-#define MISC_DOUT1		0x558
-
-static void __iomem *reg_base;
-
-/* parent clock name list */
-PNAME(mout_armclk_p)	= { "cplla", "cpllb" };
-PNAME(mout_spi_p)	= { "div125", "div200" };
-
-/* fixed rate clocks generated outside the soc */
-static struct samsung_fixed_rate_clock exynos5440_fixed_rate_ext_clks[] __initdata = {
-	FRATE(0, "xtal", NULL, 0, 0),
-};
-
-/* fixed rate clocks */
-static const struct samsung_fixed_rate_clock exynos5440_fixed_rate_clks[] __initconst = {
-	FRATE(0, "ppll", NULL, 0, 1000000000),
-	FRATE(0, "usb_phy0", NULL, 0, 60000000),
-	FRATE(0, "usb_phy1", NULL, 0, 60000000),
-	FRATE(0, "usb_ohci12", NULL, 0, 12000000),
-	FRATE(0, "usb_ohci48", NULL, 0, 48000000),
-};
-
-/* fixed factor clocks */
-static const struct samsung_fixed_factor_clock exynos5440_fixed_factor_clks[] __initconst = {
-	FFACTOR(0, "div250", "ppll", 1, 4, 0),
-	FFACTOR(0, "div200", "ppll", 1, 5, 0),
-	FFACTOR(0, "div125", "div250", 1, 2, 0),
-};
-
-/* mux clocks */
-static const struct samsung_mux_clock exynos5440_mux_clks[] __initconst = {
-	MUX(0, "mout_spi", mout_spi_p, MISC_DOUT1, 5, 1),
-	MUX(CLK_ARM_CLK, "arm_clk", mout_armclk_p, CPU_CLK_STATUS, 0, 1),
-};
-
-/* divider clocks */
-static const struct samsung_div_clock exynos5440_div_clks[] __initconst = {
-	DIV(CLK_SPI_BAUD, "div_spi", "mout_spi", MISC_DOUT1, 3, 2),
-};
-
-/* gate clocks */
-static const struct samsung_gate_clock exynos5440_gate_clks[] __initconst = {
-	GATE(CLK_PB0_250, "pb0_250", "div250", CLKEN_OV_VAL, 3, 0, 0),
-	GATE(CLK_PR0_250, "pr0_250", "div250", CLKEN_OV_VAL, 4, 0, 0),
-	GATE(CLK_PR1_250, "pr1_250", "div250", CLKEN_OV_VAL, 5, 0, 0),
-	GATE(CLK_B_250, "b_250", "div250", CLKEN_OV_VAL, 9, 0, 0),
-	GATE(CLK_B_125, "b_125", "div125", CLKEN_OV_VAL, 10, 0, 0),
-	GATE(CLK_B_200, "b_200", "div200", CLKEN_OV_VAL, 11, 0, 0),
-	GATE(CLK_SATA, "sata", "div200", CLKEN_OV_VAL, 12, 0, 0),
-	GATE(CLK_USB, "usb", "div200", CLKEN_OV_VAL, 13, 0, 0),
-	GATE(CLK_GMAC0, "gmac0", "div200", CLKEN_OV_VAL, 14, 0, 0),
-	GATE(CLK_CS250, "cs250", "div250", CLKEN_OV_VAL, 19, 0, 0),
-	GATE(CLK_PB0_250_O, "pb0_250_o", "pb0_250", CLKEN_OV_VAL, 3, 0, 0),
-	GATE(CLK_PR0_250_O, "pr0_250_o", "pr0_250", CLKEN_OV_VAL, 4, 0, 0),
-	GATE(CLK_PR1_250_O, "pr1_250_o", "pr1_250", CLKEN_OV_VAL, 5, 0, 0),
-	GATE(CLK_B_250_O, "b_250_o", "b_250", CLKEN_OV_VAL, 9, 0, 0),
-	GATE(CLK_B_125_O, "b_125_o", "b_125", CLKEN_OV_VAL, 10, 0, 0),
-	GATE(CLK_B_200_O, "b_200_o", "b_200", CLKEN_OV_VAL, 11, 0, 0),
-	GATE(CLK_SATA_O, "sata_o", "sata", CLKEN_OV_VAL, 12, 0, 0),
-	GATE(CLK_USB_O, "usb_o", "usb", CLKEN_OV_VAL, 13, 0, 0),
-	GATE(CLK_GMAC0_O, "gmac0_o", "gmac", CLKEN_OV_VAL, 14, 0, 0),
-	GATE(CLK_CS250_O, "cs250_o", "cs250", CLKEN_OV_VAL, 19, 0, 0),
-};
-
-static const struct of_device_id ext_clk_match[] __initconst = {
-	{ .compatible = "samsung,clock-xtal", .data = (void *)0, },
-	{},
-};
-
-static int exynos5440_clk_restart_notify(struct notifier_block *this,
-		unsigned long code, void *unused)
-{
-	u32 val, status;
-
-	status = readl_relaxed(reg_base + 0xbc);
-	val = readl_relaxed(reg_base + 0xcc);
-	val = (val & 0xffff0000) | (status & 0xffff);
-	writel_relaxed(val, reg_base + 0xcc);
-
-	return NOTIFY_DONE;
-}
-
-/*
- * Exynos5440 Clock restart notifier, handles restart functionality
- */
-static struct notifier_block exynos5440_clk_restart_handler = {
-	.notifier_call = exynos5440_clk_restart_notify,
-	.priority = 128,
-};
-
-static const struct samsung_pll_clock exynos5440_plls[] __initconst = {
-	PLL(pll_2550x, CLK_CPLLA, "cplla", "xtal", 0, 0x4c, NULL),
-	PLL(pll_2550x, CLK_CPLLB, "cpllb", "xtal", 0, 0x50, NULL),
-};
-
-/*
- * Clock aliases for legacy clkdev look-up.
- */
-static const struct samsung_clock_alias exynos5440_aliases[] __initconst = {
-	ALIAS(CLK_ARM_CLK, NULL, "armclk"),
-};
-
-/* register exynos5440 clocks */
-static void __init exynos5440_clk_init(struct device_node *np)
-{
-	struct samsung_clk_provider *ctx;
-
-	reg_base = of_iomap(np, 0);
-	if (!reg_base) {
-		pr_err("%s: failed to map clock controller registers,"
-			" aborting clock initialization\n", __func__);
-		return;
-	}
-
-	ctx = samsung_clk_init(np, reg_base, CLK_NR_CLKS);
-
-	samsung_clk_of_register_fixed_ext(ctx, exynos5440_fixed_rate_ext_clks,
-		ARRAY_SIZE(exynos5440_fixed_rate_ext_clks), ext_clk_match);
-
-	samsung_clk_register_pll(ctx, exynos5440_plls,
-			ARRAY_SIZE(exynos5440_plls), ctx->reg_base);
-
-	samsung_clk_register_fixed_rate(ctx, exynos5440_fixed_rate_clks,
-			ARRAY_SIZE(exynos5440_fixed_rate_clks));
-	samsung_clk_register_fixed_factor(ctx, exynos5440_fixed_factor_clks,
-			ARRAY_SIZE(exynos5440_fixed_factor_clks));
-	samsung_clk_register_mux(ctx, exynos5440_mux_clks,
-			ARRAY_SIZE(exynos5440_mux_clks));
-	samsung_clk_register_div(ctx, exynos5440_div_clks,
-			ARRAY_SIZE(exynos5440_div_clks));
-	samsung_clk_register_gate(ctx, exynos5440_gate_clks,
-			ARRAY_SIZE(exynos5440_gate_clks));
-	samsung_clk_register_alias(ctx, exynos5440_aliases,
-						ARRAY_SIZE(exynos5440_aliases));
-
-	samsung_clk_of_add_provider(np, ctx);
-
-	if (register_restart_handler(&exynos5440_clk_restart_handler))
-		pr_warn("exynos5440 clock can't register restart handler\n");
-
-	pr_info("Exynos5440: arm_clk = %ldHz\n", _get_rate("arm_clk"));
-	pr_info("exynos5440 clock initialization complete\n");
-}
-CLK_OF_DECLARE(exynos5440_clk, "samsung,exynos5440-clock", exynos5440_clk_init);
diff --git a/include/dt-bindings/clock/exynos5440.h b/include/dt-bindings/clock/exynos5440.h
deleted file mode 100644
index 842cdc0adff1..000000000000
--- a/include/dt-bindings/clock/exynos5440.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2013 Samsung Electronics Co., Ltd.
- * Author: Andrzej Hajda <a.hajda@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Device Tree binding constants for Exynos5440 clock controller.
-*/
-
-#ifndef _DT_BINDINGS_CLOCK_EXYNOS_5440_H
-#define _DT_BINDINGS_CLOCK_EXYNOS_5440_H
-
-#define CLK_XTAL		1
-#define CLK_ARM_CLK		2
-#define CLK_CPLLA		3
-#define CLK_CPLLB		4
-#define CLK_SPI_BAUD		16
-#define CLK_PB0_250		17
-#define CLK_PR0_250		18
-#define CLK_PR1_250		19
-#define CLK_B_250		20
-#define CLK_B_125		21
-#define CLK_B_200		22
-#define CLK_SATA		23
-#define CLK_USB			24
-#define CLK_GMAC0		25
-#define CLK_CS250		26
-#define CLK_PB0_250_O		27
-#define CLK_PR0_250_O		28
-#define CLK_PR1_250_O		29
-#define CLK_B_250_O		30
-#define CLK_B_125_O		31
-#define CLK_B_200_O		32
-#define CLK_SATA_O		33
-#define CLK_USB_O		34
-#define CLK_GMAC0_O		35
-#define CLK_CS250_O		36
-
-/* must be greater than maximal clock id */
-#define CLK_NR_CLKS		37
-
-#endif /* _DT_BINDINGS_CLOCK_EXYNOS_5440_H */
-- 
cgit v1.2.3


From 3730cf4dd70b6a36e48d58a862120311411b77f5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 24 Jul 2018 12:47:56 +0200
Subject: netlink: do not store start function in netlink_cb

->start() is called once when dump is being initialized, there is no
need to store it in netlink_cb.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 1 -
 net/netlink/af_netlink.c | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index f3075d6c7e82..71f121b66ca8 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -170,7 +170,6 @@ netlink_skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 struct netlink_callback {
 	struct sk_buff		*skb;
 	const struct nlmsghdr	*nlh;
-	int			(*start)(struct netlink_callback *);
 	int			(*dump)(struct sk_buff * skb,
 					struct netlink_callback *cb);
 	int			(*done)(struct netlink_callback *cb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 393573a99a5a..f6ac7693d2cc 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2300,7 +2300,6 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 
 	cb = &nlk->cb;
 	memset(cb, 0, sizeof(*cb));
-	cb->start = control->start;
 	cb->dump = control->dump;
 	cb->done = control->done;
 	cb->nlh = nlh;
@@ -2309,8 +2308,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	cb->min_dump_alloc = control->min_dump_alloc;
 	cb->skb = skb;
 
-	if (cb->start) {
-		ret = cb->start(cb);
+	if (control->start) {
+		ret = control->start(cb);
 		if (ret)
 			goto error_put;
 	}
-- 
cgit v1.2.3


From 10ac86884b4d7642a235f9da61367c3f8d2ab2ff Mon Sep 17 00:00:00 2001
From: Yisheng Xie <ysxie@foxmail.com>
Date: Tue, 24 Jul 2018 19:11:26 +0200
Subject: fbcon: introduce for_each_registered_fb() helper

Following pattern is often used:

 for (i = 0; i < FB_MAX; i++) {
        if (registered_fb[i]) {
                ...
        }
 }

Therefore, as Andy's suggestion, for_each_registered_fb() helper can
be introduced to make the code easier to read and write by reducing
indentation level. It also saves few lines of code in each occurrence.

This patch convert all part here at the same time.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yisheng Xie <ysxie@foxmail.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: David Lechner <david@lechnology.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/core/fbcon.c | 31 +++++++++++--------------------
 drivers/video/fbdev/core/fbmem.c |  4 +---
 include/linux/fb.h               |  4 ++++
 3 files changed, 16 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 5fb156bdcf4e..e30d3a138c97 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2234,8 +2234,8 @@ static int fbcon_switch(struct vc_data *vc)
 	 *
 	 * info->currcon = vc->vc_num;
 	 */
-	for (i = 0; i < FB_MAX; i++) {
-		if (registered_fb[i] != NULL && registered_fb[i]->fbcon_par) {
+	for_each_registered_fb(i) {
+		if (registered_fb[i]->fbcon_par) {
 			struct fbcon_ops *o = registered_fb[i]->fbcon_par;
 
 			o->currcon = vc->vc_num;
@@ -3124,11 +3124,9 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 	if (idx == info_idx) {
 		info_idx = -1;
 
-		for (i = 0; i < FB_MAX; i++) {
-			if (registered_fb[i] != NULL) {
-				info_idx = i;
-				break;
-			}
+		for_each_registered_fb(i) {
+			info_idx = i;
+			break;
 		}
 	}
 
@@ -3609,10 +3607,8 @@ static int fbcon_output_notifier(struct notifier_block *nb,
 	deferred_takeover = false;
 	logo_shown = FBCON_LOGO_DONTSHOW;
 
-	for (i = 0; i < FB_MAX; i++) {
-		if (registered_fb[i])
-			fbcon_fb_registered(registered_fb[i]);
-	}
+	for_each_registered_fb(i)
+		fbcon_fb_registered(registered_fb[i]);
 
 	return NOTIFY_OK;
 }
@@ -3638,11 +3634,9 @@ static void fbcon_start(void)
 
 		console_lock();
 
-		for (i = 0; i < FB_MAX; i++) {
-			if (registered_fb[i] != NULL) {
-				info_idx = i;
-				break;
-			}
+		for_each_registered_fb(i) {
+			info_idx = i;
+			break;
 		}
 
 		do_fbcon_takeover(0);
@@ -3669,15 +3663,12 @@ static void fbcon_exit(void)
 	kfree((void *)softback_buf);
 	softback_buf = 0UL;
 
-	for (i = 0; i < FB_MAX; i++) {
+	for_each_registered_fb(i) {
 		int pending = 0;
 
 		mapped = 0;
 		info = registered_fb[i];
 
-		if (info == NULL)
-			continue;
-
 		if (info->queue.func)
 			pending = cancel_work_sync(&info->queue);
 		DPRINTK("fbcon: %s pending work\n", (pending ? "canceled" :
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 609438d2465b..645c6ac932d6 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1593,10 +1593,8 @@ static int do_remove_conflicting_framebuffers(struct apertures_struct *a,
 	int i, ret;
 
 	/* check all firmware fbs and kick off if the base addr overlaps */
-	for (i = 0 ; i < FB_MAX; i++) {
+	for_each_registered_fb(i) {
 		struct apertures_struct *gen_aper;
-		if (!registered_fb[i])
-			continue;
 
 		if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE))
 			continue;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index aa74a228bb92..fd31e6f24b8d 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -650,6 +650,10 @@ extern struct fb_info *registered_fb[FB_MAX];
 extern int num_registered_fb;
 extern struct class *fb_class;
 
+#define for_each_registered_fb(i)		\
+	for (i = 0; i < FB_MAX; i++)		\
+		if (!registered_fb[i]) {} else
+
 extern int lock_fb_info(struct fb_info *info);
 
 static inline void unlock_fb_info(struct fb_info *info)
-- 
cgit v1.2.3


From 3d910ef71732d15f251ca9b679da634adca72f5a Mon Sep 17 00:00:00 2001
From: Yisheng Xie <ysxie@foxmail.com>
Date: Tue, 24 Jul 2018 19:11:26 +0200
Subject: fbdev: fix typo in comment

Change beeng to being and occured to occurred.

Signed-off-by: Yisheng Xie <ysxie@foxmail.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: David Lechner <david@lechnology.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/fb.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index fd31e6f24b8d..3e7e75383d32 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -126,7 +126,7 @@ struct fb_cursor_user {
 
 /*	The resolution of the passed in fb_info about to change */ 
 #define FB_EVENT_MODE_CHANGE		0x01
-/*	The display on this fb_info is beeing suspended, no access to the
+/*	The display on this fb_info is being suspended, no access to the
  *	framebuffer is allowed any more after that call returns
  */
 #define FB_EVENT_SUSPEND		0x02
@@ -159,9 +159,9 @@ struct fb_cursor_user {
 #define FB_EVENT_FB_UNBIND              0x0E
 /*      CONSOLE-SPECIFIC: remap all consoles to new fb - for vga_switcheroo */
 #define FB_EVENT_REMAP_ALL_CONSOLE      0x0F
-/*      A hardware display blank early change occured */
+/*      A hardware display blank early change occurred */
 #define FB_EARLY_EVENT_BLANK		0x10
-/*      A hardware display blank revert early change occured */
+/*      A hardware display blank revert early change occurred */
 #define FB_R_EARLY_EVENT_BLANK		0x11
 
 struct fb_event {
-- 
cgit v1.2.3


From 9b04fcc1d8a9b5480f9e7ec93072c3a1bc517256 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Mon, 9 Jul 2018 10:19:19 -0400
Subject: media: v4l2-fwnode: parse 'data-enable-active' prop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Parse the newly defined 'data-enable-active' property in parallel
endpoint parsing function.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-fwnode.c | 4 ++++
 include/media/v4l2-mediabus.h         | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
index 3f77aa318035..61051911e779 100644
--- a/drivers/media/v4l2-core/v4l2-fwnode.c
+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
@@ -154,6 +154,10 @@ static void v4l2_fwnode_endpoint_parse_parallel_bus(
 		flags |= v ? V4L2_MBUS_VIDEO_SOG_ACTIVE_HIGH :
 			V4L2_MBUS_VIDEO_SOG_ACTIVE_LOW;
 
+	if (!fwnode_property_read_u32(fwnode, "data-enable-active", &v))
+		flags |= v ? V4L2_MBUS_DATA_ENABLE_HIGH :
+			V4L2_MBUS_DATA_ENABLE_LOW;
+
 	bus->flags = flags;
 
 }
diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index 4d8626c468bc..4bbb5f3d2b02 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -45,6 +45,8 @@
 /* Active state of Sync-on-green (SoG) signal, 0/1 for LOW/HIGH respectively. */
 #define V4L2_MBUS_VIDEO_SOG_ACTIVE_HIGH		BIT(12)
 #define V4L2_MBUS_VIDEO_SOG_ACTIVE_LOW		BIT(13)
+#define V4L2_MBUS_DATA_ENABLE_HIGH		BIT(14)
+#define V4L2_MBUS_DATA_ENABLE_LOW		BIT(15)
 
 /* Serial flags */
 /* How many lanes the client can use */
-- 
cgit v1.2.3


From fd44e3853c0155fa82314f341f476d4793415cd2 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Mon, 23 Jul 2018 15:25:07 +0300
Subject: IB/mlx5: Introduce flow steering matcher uapi object

Introduce flow steering matcher object and its create and destroy methods.

This matcher object holds some mlx5 specific driver properties that
matches the underlay device specification when an mlx5 flow steering group
is created.

It will be used in downstream patches to be part of mlx5 specific create
flow method.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 .../infiniband/core/uverbs_std_types_flow_action.c |   3 +-
 drivers/infiniband/hw/mlx5/Makefile                |   1 +
 drivers/infiniband/hw/mlx5/flow.c                  | 134 +++++++++++++++++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h               |  11 ++
 include/rdma/uverbs_ioctl.h                        |   2 +
 include/uapi/rdma/mlx5_user_ioctl_cmds.h           |  33 ++++-
 6 files changed, 181 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/hw/mlx5/flow.c

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index c753a34cd984..adb9209c4710 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -376,8 +376,7 @@ static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
 static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
 	[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
 		.type = UVERBS_ATTR_TYPE_PTR_IN,
-		/* No need to specify any data */
-		UVERBS_ATTR_SIZE(0, 0),
+		UVERBS_ATTR_NO_DATA(),
 	},
 	[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
 		.type = UVERBS_ATTR_TYPE_PTR_IN,
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 577e4c418bae..b8e4b15e2674 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -4,3 +4,4 @@ mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_vi
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
 mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
new file mode 100644
index 000000000000..ab4bc3778edd
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_umem.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
+	[MLX5_IB_FLOW_TYPE_NORMAL] = {
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		.u.ptr = {
+			.len = sizeof(u16), /* data is priority */
+			.min_len = sizeof(u16),
+		}
+	},
+	[MLX5_IB_FLOW_TYPE_SNIFFER] = {
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_NO_DATA(),
+	},
+	[MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_NO_DATA(),
+	},
+	[MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_NO_DATA(),
+	},
+};
+
+static int flow_matcher_cleanup(struct ib_uobject *uobject,
+				enum rdma_remove_reason why)
+{
+	struct mlx5_ib_flow_matcher *obj = uobject->object;
+	int ret;
+
+	ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
+	if (ret)
+		return ret;
+
+	kfree(obj);
+	return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
+	struct ib_device *ib_dev, struct ib_uverbs_file *file,
+	struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
+	struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+	struct mlx5_ib_flow_matcher *obj;
+	int err;
+
+	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	obj->mask_len = uverbs_attr_get_len(
+		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+	err = uverbs_copy_from(&obj->matcher_mask,
+			       attrs,
+			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+	if (err)
+		goto end;
+
+	obj->flow_type = uverbs_attr_get_enum_id(
+		attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+
+	if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
+		err = uverbs_copy_from(&obj->priority,
+				       attrs,
+				       MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+		if (err)
+			goto end;
+	}
+
+	err = uverbs_copy_from(&obj->match_criteria_enable,
+			       attrs,
+			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
+	if (err)
+		goto end;
+
+	uobj->object = obj;
+	obj->mdev = dev->mdev;
+	atomic_set(&obj->usecnt, 0);
+	return 0;
+
+end:
+	kfree(obj);
+	return err;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
+			MLX5_IB_OBJECT_FLOW_MATCHER,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+		UA_MANDATORY),
+	UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+			    mlx5_ib_flow_type,
+			    UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+			   UVERBS_ATTR_TYPE(u8),
+			   UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
+			MLX5_IB_OBJECT_FLOW_MATCHER,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
+			    UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+
+DECLARE_UVERBS_OBJECT_TREE(flow_objects,
+			   &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER));
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 04a5d82c9cf3..c556b00bf4f7 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -46,6 +46,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <rdma/mlx5-abi.h>
 #include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
 
 #define mlx5_ib_dbg(dev, format, arg...)				\
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
@@ -173,6 +174,16 @@ struct mlx5_ib_flow_handler {
 	struct ib_counters		*ibcounters;
 };
 
+struct mlx5_ib_flow_matcher {
+	struct mlx5_ib_match_params matcher_mask;
+	int			mask_len;
+	enum mlx5_ib_flow_type	flow_type;
+	u16			priority;
+	struct mlx5_core_dev	*mdev;
+	atomic_t		usecnt;
+	u8			match_criteria_enable;
+};
+
 struct mlx5_ib_flow_db {
 	struct mlx5_ib_flow_prio	prios[MLX5_IB_NUM_FLOW_FT];
 	struct mlx5_ib_flow_prio	sniffer[MLX5_IB_NUM_SNIFFER_FTS];
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 017ccf75890c..7f230d1ec2b8 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -204,6 +204,8 @@ struct uverbs_object_tree_def {
 #define UVERBS_ATTR_SIZE(_min_len, _len)			\
 	.u.ptr.min_len = _min_len, .u.ptr.len = _len
 
+#define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0)
+
 /*
  * Specifies a uapi structure that cannot be extended. The user must always
  * supply the whole structure and nothing more. The structure must be declared
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 1a05bb4b0b34..233d5d140179 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -33,6 +33,7 @@
 #ifndef MLX5_USER_IOCTL_CMDS_H
 #define MLX5_USER_IOCTL_CMDS_H
 
+#include <linux/types.h>
 #include <rdma/ib_user_ioctl_cmds.h>
 
 enum mlx5_ib_create_flow_action_attrs {
@@ -112,10 +113,40 @@ enum mlx5_ib_devx_umem_methods {
 	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
 };
 
-enum mlx5_ib_devx_objects {
+enum mlx5_ib_objects {
 	MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_OBJECT_DEVX_OBJ,
 	MLX5_IB_OBJECT_DEVX_UMEM,
+	MLX5_IB_OBJECT_FLOW_MATCHER,
+};
+
+enum mlx5_ib_flow_matcher_create_attrs {
+	MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+	MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+	MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+};
+
+enum mlx5_ib_flow_matcher_destroy_attrs {
+	MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_flow_matcher_methods {
+	MLX5_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+};
+
+#define MLX5_IB_DW_MATCH_PARAM 0x80
+
+struct mlx5_ib_match_params {
+	__u32	match_params[MLX5_IB_DW_MATCH_PARAM];
+};
+
+enum mlx5_ib_flow_type {
+	MLX5_IB_FLOW_TYPE_NORMAL,
+	MLX5_IB_FLOW_TYPE_SNIFFER,
+	MLX5_IB_FLOW_TYPE_ALL_DEFAULT,
+	MLX5_IB_FLOW_TYPE_MC_DEFAULT,
 };
 
 #endif
-- 
cgit v1.2.3


From 6cd080a674a7adce97c0189c4579cf40782c2770 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Mon, 23 Jul 2018 15:25:08 +0300
Subject: IB: Support ib_flow creation in drivers

This patch considers the case that ib_flow is created by some device
driver with its specific parameters using the KABI infrastructure.

In that case both QP and ib_uflow_resources might not be applicable.
Downstream patches from this series use the above functionality.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c       | 4 ++++
 drivers/infiniband/core/uverbs_std_types.c | 5 +++--
 include/rdma/ib_verbs.h                    | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index bd6eefaecbd6..987ee38ab4b3 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2771,6 +2771,9 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
 {
 	unsigned int i;
 
+	if (!uflow_res)
+		return;
+
 	for (i = 0; i < uflow_res->collection_num; i++)
 		atomic_dec(&uflow_res->collection[i]->usecnt);
 
@@ -3585,6 +3588,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	}
 	atomic_inc(&qp->usecnt);
 	flow_id->qp = qp;
+	flow_id->device = qp->device;
 	flow_id->uobject = uobj;
 	uobj->object = flow_id;
 	uflow = container_of(uobj, typeof(*uflow), uobject);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 718c8430d364..c1e0492cc78a 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -54,9 +54,10 @@ static int uverbs_free_flow(struct ib_uobject *uobject,
 	struct ib_qp *qp = flow->qp;
 	int ret;
 
-	ret = qp->device->destroy_flow(flow);
+	ret = flow->device->destroy_flow(flow);
 	if (!ret) {
-		atomic_dec(&qp->usecnt);
+		if (qp)
+			atomic_dec(&qp->usecnt);
 		ib_uverbs_flow_resources_free(uflow->resources);
 	}
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 08348e53082c..24d6ec38feea 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2081,6 +2081,7 @@ struct ib_flow_attr {
 
 struct ib_flow {
 	struct ib_qp		*qp;
+	struct ib_device	*device;
 	struct ib_uobject	*uobject;
 };
 
-- 
cgit v1.2.3


From e5cda42c16d89720c29678f51d95a119490ef7d8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Mon, 23 Jul 2018 19:52:57 +0200
Subject: ARM: exynos: Define EINT_WAKEUP_MASK registers for S5Pv210 and
 Exynos5433

S5Pv210 and Exynos5433/Exynos7 have different address of
EINT_WAKEUP_MASK register.  Rename existing S5P_EINT_WAKEUP_MASK to
avoid confusion and add new ones.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Tomasz Figa <tomasz.figa@gmail.com>
Cc: Sylwester Nawrocki <snawrocki@kernel.org>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mach-exynos/suspend.c              | 2 +-
 include/linux/soc/samsung/exynos-regs-pmu.h | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index d3db306a5a70..f3384e3a675d 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -272,7 +272,7 @@ static int exynos5420_cpu_suspend(unsigned long arg)
 static void exynos_pm_set_wakeup_mask(void)
 {
 	/* Set wake-up mask registers */
-	pmu_raw_writel(exynos_get_eint_wake_mask(), S5P_EINT_WAKEUP_MASK);
+	pmu_raw_writel(exynos_get_eint_wake_mask(), EXYNOS_EINT_WAKEUP_MASK);
 	pmu_raw_writel(exynos_irqwake_intmask & ~(1 << 31), S5P_WAKEUP_MASK);
 }
 
diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index 66dcb9ec273a..eb0d240df7a7 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -42,7 +42,7 @@
 #define EXYNOS_SWRESET				0x0400
 
 #define S5P_WAKEUP_STAT				0x0600
-#define S5P_EINT_WAKEUP_MASK			0x0604
+#define EXYNOS_EINT_WAKEUP_MASK			0x0604
 #define S5P_WAKEUP_MASK				0x0608
 #define S5P_WAKEUP_MASK2				0x0614
 
@@ -180,6 +180,9 @@
 #define S5P_CORE_WAKEUP_FROM_LOCAL_CFG		(0x3 << 8)
 #define S5P_CORE_AUTOWAKEUP_EN			(1 << 31)
 
+/* Only for S5Pv210 */
+#define S5PV210_EINT_WAKEUP_MASK	0xC004
+
 /* Only for EXYNOS4210 */
 #define S5P_CMU_CLKSTOP_LCD1_LOWPWR	0x1154
 #define S5P_CMU_RESET_LCD1_LOWPWR	0x1174
@@ -641,6 +644,7 @@
 					 | EXYNOS5420_KFC_USE_STANDBY_WFI3)
 
 /* For EXYNOS5433 */
+#define EXYNOS5433_EINT_WAKEUP_MASK				(0x060C)
 #define EXYNOS5433_USBHOST30_PHY_CONTROL			(0x0728)
 #define EXYNOS5433_PAD_RETENTION_AUD_OPTION			(0x3028)
 #define EXYNOS5433_PAD_RETENTION_MMC2_OPTION			(0x30C8)
-- 
cgit v1.2.3


From a8be2af0218cf037704dc2e733bf56d6560fa324 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Mon, 23 Jul 2018 19:52:58 +0200
Subject: pinctrl: samsung: Write external wakeup interrupt mask

The pinctrl driver defines an IRQ chip which handles external wakeup
interrupts, therefore from logical point of view, it is the owner of
external interrupt mask.  The register controlling the mask belongs to
Power Management Unit address space so it has to be accessed with PMU
syscon regmap handle.

This mask should be written to hardware during system suspend.  Till now
ARMv7 machine code was responsible for this which created a dependency
between pin controller driver and arch/arm/mach code.

Try to rework this dependency so the pinctrl driver will write external
wakeup interrupt mask during late suspend.

Impact on ARMv7 designs (S5Pv210 and Exynos)
============================================
This duplicates setting mask with existing machine code
arch/arm/mach-exynos/suspend.c and arch/arm/mach-s5pv210/pm.c but it is
not a problem - the wakeup mask register will be written twice.  The
machine code will be cleaned up later.

The difference between implementation here and ARMv7 machine code
(arch/arm/mach-*) is the time of writing the mask:
1. The machine code is writing the mask quite late during system suspend
   path, after offlining secondary CPUs and just before doing actual
   suspend.
2. The implementation in pinctrl driver uses late suspend ops, therefore it
   will write the mask much earlier.  Hopefully late enough, after all
   drivers will enable or disable their interrupt wakeups
   (enable_irq_wake() etc).

Impact on ARMv8 designs (Exynos5433 and Exynos7)
================================================
The Suspend to RAM was not supported and external wakeup interrupt mask
was not written to HW.  This change brings us one step closer to
supporting Suspend to RAM.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Tomasz Figa <tomasz.figa@gmail.com>
Cc: Sylwester Nawrocki <snawrocki@kernel.org>
Acked-by: Tomasz Figa <tomasz.figa@gmail.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 drivers/pinctrl/samsung/pinctrl-exynos.c    | 50 ++++++++++++++++++++++++++++-
 drivers/pinctrl/samsung/pinctrl-samsung.h   |  3 ++
 include/linux/soc/samsung/exynos-regs-pmu.h |  2 ++
 3 files changed, 54 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c
index 29d86d704b0c..40ef14956876 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -25,6 +25,7 @@
 #include <linux/regmap.h>
 #include <linux/err.h>
 #include <linux/soc/samsung/exynos-pmu.h>
+#include <linux/soc/samsung/exynos-regs-pmu.h>
 
 #include <dt-bindings/pinctrl/samsung.h>
 
@@ -37,6 +38,8 @@ struct exynos_irq_chip {
 	u32 eint_con;
 	u32 eint_mask;
 	u32 eint_pend;
+	u32 eint_wake_mask_value;
+	u32 eint_wake_mask_reg;
 };
 
 static inline struct exynos_irq_chip *to_exynos_irq_chip(struct irq_chip *chip)
@@ -215,6 +218,7 @@ static struct exynos_irq_chip exynos_gpio_irq_chip = {
 	.eint_con = EXYNOS_GPIO_ECON_OFFSET,
 	.eint_mask = EXYNOS_GPIO_EMASK_OFFSET,
 	.eint_pend = EXYNOS_GPIO_EPEND_OFFSET,
+	/* eint_wake_mask_value not used */
 };
 
 static int exynos_eint_irq_map(struct irq_domain *h, unsigned int virq,
@@ -330,6 +334,8 @@ u32 exynos_get_eint_wake_mask(void)
 
 static int exynos_wkup_irq_set_wake(struct irq_data *irqd, unsigned int on)
 {
+	struct irq_chip *chip = irq_data_get_irq_chip(irqd);
+	struct exynos_irq_chip *our_chip = to_exynos_irq_chip(chip);
 	struct samsung_pin_bank *bank = irq_data_get_irq_chip_data(irqd);
 	unsigned long bit = 1UL << (2 * bank->eint_offset + irqd->hwirq);
 
@@ -339,6 +345,7 @@ static int exynos_wkup_irq_set_wake(struct irq_data *irqd, unsigned int on)
 		exynos_eint_wake_mask |= bit;
 	else
 		exynos_eint_wake_mask &= ~bit;
+	our_chip->eint_wake_mask_value = exynos_eint_wake_mask;
 
 	return 0;
 }
@@ -360,6 +367,9 @@ static const struct exynos_irq_chip s5pv210_wkup_irq_chip __initconst = {
 	.eint_con = EXYNOS_WKUP_ECON_OFFSET,
 	.eint_mask = EXYNOS_WKUP_EMASK_OFFSET,
 	.eint_pend = EXYNOS_WKUP_EPEND_OFFSET,
+	.eint_wake_mask_value = EXYNOS_EINT_WAKEUP_MASK_DISABLED,
+	/* Only difference with exynos4210_wkup_irq_chip: */
+	.eint_wake_mask_reg = S5PV210_EINT_WAKEUP_MASK,
 };
 
 static const struct exynos_irq_chip exynos4210_wkup_irq_chip __initconst = {
@@ -376,6 +386,8 @@ static const struct exynos_irq_chip exynos4210_wkup_irq_chip __initconst = {
 	.eint_con = EXYNOS_WKUP_ECON_OFFSET,
 	.eint_mask = EXYNOS_WKUP_EMASK_OFFSET,
 	.eint_pend = EXYNOS_WKUP_EPEND_OFFSET,
+	.eint_wake_mask_value = EXYNOS_EINT_WAKEUP_MASK_DISABLED,
+	.eint_wake_mask_reg = EXYNOS_EINT_WAKEUP_MASK,
 };
 
 static const struct exynos_irq_chip exynos7_wkup_irq_chip __initconst = {
@@ -392,6 +404,8 @@ static const struct exynos_irq_chip exynos7_wkup_irq_chip __initconst = {
 	.eint_con = EXYNOS7_WKUP_ECON_OFFSET,
 	.eint_mask = EXYNOS7_WKUP_EMASK_OFFSET,
 	.eint_pend = EXYNOS7_WKUP_EPEND_OFFSET,
+	.eint_wake_mask_value = EXYNOS_EINT_WAKEUP_MASK_DISABLED,
+	.eint_wake_mask_reg = EXYNOS5433_EINT_WAKEUP_MASK,
 };
 
 /* list of external wakeup controllers supported */
@@ -560,6 +574,27 @@ int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d)
 	return 0;
 }
 
+static void
+exynos_pinctrl_set_eint_wakeup_mask(struct samsung_pinctrl_drv_data *drvdata,
+				    struct exynos_irq_chip *irq_chip)
+{
+	struct regmap *pmu_regs;
+
+	if (!drvdata->retention_ctrl || !drvdata->retention_ctrl->priv) {
+		dev_warn(drvdata->dev,
+			 "No retention data configured bank with external wakeup interrupt. Wake-up mask will not be set.\n");
+		return;
+	}
+
+	pmu_regs = drvdata->retention_ctrl->priv;
+	dev_info(drvdata->dev,
+		 "Setting external wakeup interrupt wakeup mask: 0x%x\n",
+		 irq_chip->eint_wake_mask_value);
+
+	regmap_write(pmu_regs, irq_chip->eint_wake_mask_reg,
+		     irq_chip->eint_wake_mask_value);
+}
+
 static void exynos_pinctrl_suspend_bank(
 				struct samsung_pinctrl_drv_data *drvdata,
 				struct samsung_pin_bank *bank)
@@ -582,11 +617,24 @@ static void exynos_pinctrl_suspend_bank(
 void exynos_pinctrl_suspend(struct samsung_pinctrl_drv_data *drvdata)
 {
 	struct samsung_pin_bank *bank = drvdata->pin_banks;
+	struct exynos_irq_chip *irq_chip = NULL;
 	int i;
 
-	for (i = 0; i < drvdata->nr_banks; ++i, ++bank)
+	for (i = 0; i < drvdata->nr_banks; ++i, ++bank) {
 		if (bank->eint_type == EINT_TYPE_GPIO)
 			exynos_pinctrl_suspend_bank(drvdata, bank);
+		else if (bank->eint_type == EINT_TYPE_WKUP) {
+			if (!irq_chip) {
+				irq_chip = bank->irq_chip;
+				exynos_pinctrl_set_eint_wakeup_mask(drvdata,
+								    irq_chip);
+			} else if (bank->irq_chip != irq_chip) {
+				dev_warn(drvdata->dev,
+					 "More than one external wakeup interrupt chip configured (bank: %s). This is not supported by hardware nor by driver.\n",
+					 bank->name);
+			}
+		}
+	}
 }
 
 static void exynos_pinctrl_resume_bank(
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.h b/drivers/pinctrl/samsung/pinctrl-samsung.h
index aac16cc8362a..e571bbd7139b 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.h
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.h
@@ -227,6 +227,9 @@ struct samsung_retention_data {
  *	device suspend, see samsung_pinctrl_suspend()
  * @resume: platform specific resume callback, executed during pin controller
  *	device suspend, see samsung_pinctrl_resume()
+ *
+ * External wakeup interrupts must define at least eint_wkup_init,
+ * retention_data and suspend in order for proper suspend/resume to work.
  */
 struct samsung_pin_ctrl {
 	const struct samsung_pin_bank_data *pin_banks;
diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h
index eb0d240df7a7..5addaf5ccbce 100644
--- a/include/linux/soc/samsung/exynos-regs-pmu.h
+++ b/include/linux/soc/samsung/exynos-regs-pmu.h
@@ -42,6 +42,8 @@
 #define EXYNOS_SWRESET				0x0400
 
 #define S5P_WAKEUP_STAT				0x0600
+/* Value for EXYNOS_EINT_WAKEUP_MASK disabling all external wakeup interrupts */
+#define EXYNOS_EINT_WAKEUP_MASK_DISABLED	0xffffffff
 #define EXYNOS_EINT_WAKEUP_MASK			0x0604
 #define S5P_WAKEUP_MASK				0x0608
 #define S5P_WAKEUP_MASK2				0x0614
-- 
cgit v1.2.3


From b3793159249bb091efce4b1515ce18cdfc4c6e41 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 18 Jul 2018 22:03:50 +0200
Subject: dt-bindings: pinctrl: samsung: Add SPDX license identifier

Replace GPL license statement with SPDX license identifier (GPL-2.0).

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 include/dt-bindings/pinctrl/samsung.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/pinctrl/samsung.h b/include/dt-bindings/pinctrl/samsung.h
index ceb672305f59..b1832506b923 100644
--- a/include/dt-bindings/pinctrl/samsung.h
+++ b/include/dt-bindings/pinctrl/samsung.h
@@ -1,14 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Samsung's Exynos pinctrl bindings
  *
  * Copyright (c) 2016 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  * Author: Krzysztof Kozlowski <krzk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
+ */
 
 #ifndef __DT_BINDINGS_PINCTRL_SAMSUNG_H__
 #define __DT_BINDINGS_PINCTRL_SAMSUNG_H__
-- 
cgit v1.2.3


From 32269441240064c7475241ae28fee787fcdf55b9 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Mon, 23 Jul 2018 15:25:09 +0300
Subject: IB/mlx5: Introduce driver create and destroy flow methods

Introduce driver create and destroy flow methods on the uverbs flow
object.

This allows the driver to get its specific device attributes to match the
underlay specification while still using the generic ib_flow object for
cleanup and code sharing.

The IB object's attributes are set via the ib_set_flow() helper function.

The specific implementation for the given specification is added in
downstream patches.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c        |  22 +++++++
 drivers/infiniband/hw/mlx5/flow.c        | 110 +++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/main.c        |   9 +++
 drivers/infiniband/hw/mlx5/mlx5_ib.h     |  15 +++++
 include/rdma/ib_verbs.h                  |  14 ++++
 include/rdma/uverbs_named_ioctl.h        |  29 ++++----
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  17 +++++
 7 files changed, 204 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 7f9d73b03421..270452c9e673 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -87,6 +87,28 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
 	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
+bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
+{
+	struct devx_obj *devx_obj = obj;
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_DESTROY_TIR:
+		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
+				    obj_id);
+		return true;
+
+	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
+				    table_id);
+		return true;
+	default:
+		return false;
+	}
+}
+
 static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
 {
 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index ab4bc3778edd..b4422e4def17 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -38,6 +38,82 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
 	},
 };
 
+static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
+	struct ib_device *ib_dev, struct ib_uverbs_file *file,
+	struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_flow_handler *flow_handler;
+	struct mlx5_ib_flow_matcher *fs_matcher;
+	void *devx_obj;
+	int dest_id, dest_type;
+	void *cmd_in;
+	int inlen;
+	bool dest_devx, dest_qp;
+	struct ib_qp *qp = NULL;
+	struct ib_uobject *uobj =
+		uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
+	struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+
+	if (!capable(CAP_NET_RAW))
+		return -EPERM;
+
+	dest_devx =
+		uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+	dest_qp = uverbs_attr_is_valid(attrs,
+				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+
+	if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))
+		return -EINVAL;
+
+	if (dest_devx) {
+		devx_obj = uverbs_attr_get_obj(
+			attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+		if (IS_ERR(devx_obj))
+			return PTR_ERR(devx_obj);
+
+		/* Verify that the given DEVX object is a flow
+		 * steering destination.
+		 */
+		if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
+			return -EINVAL;
+	} else {
+		struct mlx5_ib_qp *mqp;
+
+		qp = uverbs_attr_get_obj(attrs,
+					 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+		if (IS_ERR(qp))
+			return PTR_ERR(qp);
+
+		if (qp->qp_type != IB_QPT_RAW_PACKET)
+			return -EINVAL;
+
+		mqp = to_mqp(qp);
+		if (mqp->flags & MLX5_IB_QP_RSS)
+			dest_id = mqp->rss_qp.tirn;
+		else
+			dest_id = mqp->raw_packet_qp.rq.tirn;
+		dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	}
+
+	if (dev->rep)
+		return -ENOTSUPP;
+
+	cmd_in = uverbs_attr_get_alloced_ptr(
+		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+	inlen = uverbs_attr_get_len(attrs,
+				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+	fs_matcher = uverbs_attr_get_obj(attrs,
+					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+	flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen,
+					       dest_id, dest_type);
+	if (IS_ERR(flow_handler))
+		return PTR_ERR(flow_handler);
+
+	ib_set_flow(uobj, &flow_handler->ibflow, qp, ib_dev);
+
+	return 0;
+}
+
 static int flow_matcher_cleanup(struct ib_uobject *uobject,
 				enum rdma_remove_reason why)
 {
@@ -101,6 +177,40 @@ end:
 	return err;
 }
 
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_CREATE_FLOW,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+			UVERBS_OBJECT_FLOW,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+		UA_MANDATORY,
+		UA_ALLOC_AND_COPY),
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+			MLX5_IB_OBJECT_FLOW_MATCHER,
+			UVERBS_ACCESS_READ,
+			UA_MANDATORY),
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+			UVERBS_OBJECT_QP,
+			UVERBS_ACCESS_READ),
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_READ));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	MLX5_IB_METHOD_DESTROY_FLOW,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+			UVERBS_OBJECT_FLOW,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_fs,
+		   UVERBS_OBJECT_FLOW,
+		   &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
+		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
+
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d4d894e9f942..6c8b4745fb0b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3641,6 +3641,15 @@ unlock:
 	return ERR_PTR(err);
 }
 
+struct mlx5_ib_flow_handler *
+mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
+			struct mlx5_ib_flow_matcher *fs_matcher,
+			void *cmd_in, int inlen, int dest_id,
+			int dest_type)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
 {
 	u32 flags = 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c556b00bf4f7..324f4ea5fce6 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1230,6 +1230,10 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
 			  struct mlx5_ib_ucontext *context);
 const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
+struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
+	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+	void *cmd_in, int inlen, int dest_id, int dest_type);
+bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
 #else
 static inline int
 mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
@@ -1238,6 +1242,17 @@ static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
 					struct mlx5_ib_ucontext *context) {}
 static inline const struct uverbs_object_tree_def *
 mlx5_ib_get_devx_tree(void) { return NULL; }
+static inline struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
+	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+	void *cmd_in, int inlen, int dest_id, int dest_type)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
+					     int *dest_type)
+{
+	return false;
+}
 #endif
 static inline void init_query_mad(struct ib_smp *mad)
 {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 24d6ec38feea..b626aa2310c5 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4134,6 +4134,20 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
 
 }
 
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+			       struct ib_qp *qp, struct ib_device *device)
+{
+	uobj->object = ibflow;
+	ibflow->uobject = uobj;
+
+	if (qp) {
+		atomic_inc(&qp->usecnt);
+		ibflow->qp = qp;
+	}
+
+	ibflow->device = device;
+}
+
 /**
  * rdma_roce_rescan_device - Rescan all of the network devices in the system
  * and add their gids, as needed, to the relevant RoCE devices.
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index 2eb1767042af..b3b21733cc55 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -97,22 +97,14 @@
 		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
 	}
 
-/* Used by drivers to declare a complete parsing tree for a single method that
- * differs only in having additional driver specific attributes.
+/* Used by drivers to declare a complete parsing tree for new methods
  */
-#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...)       \
-	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
-		_method_id)[] = { __VA_ARGS__ };                               \
-	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
-		.id = _method_id,                                              \
-		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
-		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
-	};                                                                     \
+#define ADD_UVERBS_METHODS(_name, _object_id, ...)                             \
 	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
-		_object_id)[] = { &UVERBS_METHOD(_method_id) };                \
+		_object_id)[] = { __VA_ARGS__ };                               \
 	static const struct uverbs_object_def _name##_struct = {               \
 		.id = _object_id,                                              \
-		.num_methods = 1,                                              \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
 		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
 	};                                                                     \
 	static const struct uverbs_object_def *const _name##_ptrs[] = {        \
@@ -123,4 +115,17 @@
 		.objects = &_name##_ptrs,                                      \
 	}
 
+/* Used by drivers to declare a complete parsing tree for a single method that
+ * differs only in having additional driver specific attributes.
+ */
+#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...)       \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	};                                                                     \
+	ADD_UVERBS_METHODS(_name, _object_id, &UVERBS_METHOD(_method_id))
+
 #endif
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 233d5d140179..9c51801b9e64 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -149,4 +149,21 @@ enum mlx5_ib_flow_type {
 	MLX5_IB_FLOW_TYPE_MC_DEFAULT,
 };
 
+enum mlx5_ib_create_flow_attrs {
+	MLX5_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+	MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+	MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+	MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+};
+
+enum mlx5_ib_destoy_flow_attrs {
+	MLX5_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_flow_methods {
+	MLX5_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DESTROY_FLOW,
+};
+
 #endif
-- 
cgit v1.2.3


From 0fc09f920983f61be625658c62cc40ac25a7b3a5 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 23 Jul 2018 08:37:50 -0600
Subject: blk-mq: export setting request completion state

This is preparing for drivers that want to directly alter the state of
their requests. No functional change here.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         |  4 +---
 include/linux/blk-mq.h | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d394cdd8d8c6..5291a95ba362 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -558,10 +558,8 @@ static void __blk_mq_complete_request(struct request *rq)
 	bool shared = false;
 	int cpu;
 
-	if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
-			MQ_RQ_IN_FLIGHT)
+	if (!blk_mq_mark_complete(rq))
 		return;
-
 	if (rq->internal_tag != -1)
 		blk_mq_sched_completed_request(rq);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e3147eb74222..ca3f2c2edd85 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -287,6 +287,20 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
 
 void blk_mq_quiesce_queue_nowait(struct request_queue *q);
 
+/**
+ * blk_mq_mark_complete() - Set request state to complete
+ * @rq: request to set to complete state
+ *
+ * Returns true if request state was successfully set to complete. If
+ * successful, the caller is responsibile for seeing this request is ended, as
+ * blk_mq_complete_request will not work again.
+ */
+static inline bool blk_mq_mark_complete(struct request *rq)
+{
+	return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) ==
+			MQ_RQ_IN_FLIGHT;
+}
+
 /*
  * Driver command data is immediately after the request. So subtract request
  * size to get back to the original request, add request size to get the PDU.
-- 
cgit v1.2.3


From 071f52fbce6161706d070ceada5accb81630bf02 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 24 Jul 2018 09:52:32 +0200
Subject: block: remove bio_clone_kmalloc

Unused now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index ab221c517f4e..b861baa59454 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -443,12 +443,6 @@ static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
 }
 
-static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
-{
-	return bio_clone_bioset(bio, gfp_mask, NULL);
-
-}
-
 extern blk_qc_t submit_bio(struct bio *);
 
 extern void bio_endio(struct bio *);
-- 
cgit v1.2.3


From c55183c9aaa00d2bbb578169a480e31aff3d397c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 24 Jul 2018 09:52:34 +0200
Subject: block: unexport bio_clone_bioset

Now only used by the bounce code, so move it there and mark the function
static.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 77 -----------------------------------------------------
 block/bounce.c      | 69 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/bio.h |  1 -
 3 files changed, 68 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 07d3ffd95989..b832151cd0bf 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -646,83 +646,6 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 }
 EXPORT_SYMBOL(bio_clone_fast);
 
-/**
- * 	bio_clone_bioset - clone a bio
- * 	@bio_src: bio to clone
- *	@gfp_mask: allocation priority
- *	@bs: bio_set to allocate from
- *
- *	Clone bio. Caller will own the returned bio, but not the actual data it
- *	points to. Reference count of returned bio will be one.
- */
-struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
-			     struct bio_set *bs)
-{
-	struct bvec_iter iter;
-	struct bio_vec bv;
-	struct bio *bio;
-
-	/*
-	 * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
-	 * bio_src->bi_io_vec to bio->bi_io_vec.
-	 *
-	 * We can't do that anymore, because:
-	 *
-	 *  - The point of cloning the biovec is to produce a bio with a biovec
-	 *    the caller can modify: bi_idx and bi_bvec_done should be 0.
-	 *
-	 *  - The original bio could've had more than BIO_MAX_PAGES biovecs; if
-	 *    we tried to clone the whole thing bio_alloc_bioset() would fail.
-	 *    But the clone should succeed as long as the number of biovecs we
-	 *    actually need to allocate is fewer than BIO_MAX_PAGES.
-	 *
-	 *  - Lastly, bi_vcnt should not be looked at or relied upon by code
-	 *    that does not own the bio - reason being drivers don't use it for
-	 *    iterating over the biovec anymore, so expecting it to be kept up
-	 *    to date (i.e. for clones that share the parent biovec) is just
-	 *    asking for trouble and would force extra work on
-	 *    __bio_clone_fast() anyways.
-	 */
-
-	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
-	if (!bio)
-		return NULL;
-	bio->bi_disk		= bio_src->bi_disk;
-	bio->bi_opf		= bio_src->bi_opf;
-	bio->bi_write_hint	= bio_src->bi_write_hint;
-	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
-	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
-
-	switch (bio_op(bio)) {
-	case REQ_OP_DISCARD:
-	case REQ_OP_SECURE_ERASE:
-	case REQ_OP_WRITE_ZEROES:
-		break;
-	case REQ_OP_WRITE_SAME:
-		bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
-		break;
-	default:
-		bio_for_each_segment(bv, bio_src, iter)
-			bio->bi_io_vec[bio->bi_vcnt++] = bv;
-		break;
-	}
-
-	if (bio_integrity(bio_src)) {
-		int ret;
-
-		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
-		if (ret < 0) {
-			bio_put(bio);
-			return NULL;
-		}
-	}
-
-	bio_clone_blkcg_association(bio, bio_src);
-
-	return bio;
-}
-EXPORT_SYMBOL(bio_clone_bioset);
-
 /**
  *	bio_add_pc_page	-	attempt to add page to bio
  *	@q: the target queue
diff --git a/block/bounce.c b/block/bounce.c
index fd31347b7836..bc63b3a2d18c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -195,6 +195,73 @@ static void bounce_end_io_read_isa(struct bio *bio)
 	__bounce_end_io_read(bio, &isa_page_pool);
 }
 
+static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
+		struct bio_set *bs)
+{
+	struct bvec_iter iter;
+	struct bio_vec bv;
+	struct bio *bio;
+
+	/*
+	 * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
+	 * bio_src->bi_io_vec to bio->bi_io_vec.
+	 *
+	 * We can't do that anymore, because:
+	 *
+	 *  - The point of cloning the biovec is to produce a bio with a biovec
+	 *    the caller can modify: bi_idx and bi_bvec_done should be 0.
+	 *
+	 *  - The original bio could've had more than BIO_MAX_PAGES biovecs; if
+	 *    we tried to clone the whole thing bio_alloc_bioset() would fail.
+	 *    But the clone should succeed as long as the number of biovecs we
+	 *    actually need to allocate is fewer than BIO_MAX_PAGES.
+	 *
+	 *  - Lastly, bi_vcnt should not be looked at or relied upon by code
+	 *    that does not own the bio - reason being drivers don't use it for
+	 *    iterating over the biovec anymore, so expecting it to be kept up
+	 *    to date (i.e. for clones that share the parent biovec) is just
+	 *    asking for trouble and would force extra work on
+	 *    __bio_clone_fast() anyways.
+	 */
+
+	bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
+	if (!bio)
+		return NULL;
+	bio->bi_disk		= bio_src->bi_disk;
+	bio->bi_opf		= bio_src->bi_opf;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
+	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
+	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
+
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_ZEROES:
+		break;
+	case REQ_OP_WRITE_SAME:
+		bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
+		break;
+	default:
+		bio_for_each_segment(bv, bio_src, iter)
+			bio->bi_io_vec[bio->bi_vcnt++] = bv;
+		break;
+	}
+
+	if (bio_integrity(bio_src)) {
+		int ret;
+
+		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
+		if (ret < 0) {
+			bio_put(bio);
+			return NULL;
+		}
+	}
+
+	bio_clone_blkcg_association(bio, bio_src);
+
+	return bio;
+}
+
 static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 			       mempool_t *pool)
 {
@@ -222,7 +289,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 		generic_make_request(*bio_orig);
 		*bio_orig = bio;
 	}
-	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
+	bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
 			&bounce_bio_set);
 
 	bio_for_each_segment_all(to, bio, i) {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b861baa59454..51371740d2a8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -429,7 +429,6 @@ extern void bio_put(struct bio *);
 
 extern void __bio_clone_fast(struct bio *, struct bio *);
 extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
-extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
 
 extern struct bio_set fs_bio_set;
 
-- 
cgit v1.2.3


From c89105c9b39037bbf2aca0614e39afe176e867c5 Mon Sep 17 00:00:00 2001
From: Roy Pledge <roy.pledge@nxp.com>
Date: Tue, 24 Jul 2018 09:21:29 -0500
Subject: staging: fsl-mc: Move DPIO from staging to drivers/soc/fsl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the NXP DPIO (Datapath I/O Driver) out of the
drivers/staging directory and into the drivers/soc/fsl directory.

The DPIO driver enables access to Queue and Buffer Manager (QBMAN)
hardware on NXP DPAA2 devices. This is a prerequisite to moving the
DPAA2 Ethernet driver out of staging.

Signed-off-by: Roy Pledge <roy.pledge@nxp.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Reviewed-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 MAINTAINERS                                     |    2 +-
 drivers/crypto/caam/sg_sw_qm2.h                 |    2 +-
 drivers/crypto/caam/sg_sw_sec4.h                |    2 +-
 drivers/soc/fsl/Kconfig                         |   10 +
 drivers/soc/fsl/Makefile                        |    1 +
 drivers/soc/fsl/dpio/Makefile                   |    8 +
 drivers/soc/fsl/dpio/dpio-cmd.h                 |   49 ++
 drivers/soc/fsl/dpio/dpio-driver.c              |  281 +++++++
 drivers/soc/fsl/dpio/dpio-driver.txt            |  135 +++
 drivers/soc/fsl/dpio/dpio-service.c             |  545 ++++++++++++
 drivers/soc/fsl/dpio/dpio.c                     |  198 +++++
 drivers/soc/fsl/dpio/dpio.h                     |   83 ++
 drivers/soc/fsl/dpio/qbman-portal.c             | 1005 +++++++++++++++++++++++
 drivers/soc/fsl/dpio/qbman-portal.h             |  444 ++++++++++
 drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h  |    4 +-
 drivers/staging/fsl-mc/bus/Kconfig              |    9 -
 drivers/staging/fsl-mc/bus/Makefile             |    2 -
 drivers/staging/fsl-mc/bus/dpio/Makefile        |    8 -
 drivers/staging/fsl-mc/bus/dpio/dpio-cmd.h      |   49 --
 drivers/staging/fsl-mc/bus/dpio/dpio-driver.c   |  281 -------
 drivers/staging/fsl-mc/bus/dpio/dpio-driver.txt |  135 ---
 drivers/staging/fsl-mc/bus/dpio/dpio-service.c  |  545 ------------
 drivers/staging/fsl-mc/bus/dpio/dpio.c          |  198 -----
 drivers/staging/fsl-mc/bus/dpio/dpio.h          |   83 --
 drivers/staging/fsl-mc/bus/dpio/qbman-portal.c  | 1005 -----------------------
 drivers/staging/fsl-mc/bus/dpio/qbman-portal.h  |  444 ----------
 drivers/staging/fsl-mc/include/dpaa2-fd.h       |  438 ----------
 drivers/staging/fsl-mc/include/dpaa2-global.h   |  177 ----
 drivers/staging/fsl-mc/include/dpaa2-io.h       |  115 ---
 include/soc/fsl/dpaa2-fd.h                      |  438 ++++++++++
 include/soc/fsl/dpaa2-global.h                  |  177 ++++
 include/soc/fsl/dpaa2-io.h                      |  115 +++
 32 files changed, 3494 insertions(+), 3494 deletions(-)
 create mode 100644 drivers/soc/fsl/dpio/Makefile
 create mode 100644 drivers/soc/fsl/dpio/dpio-cmd.h
 create mode 100644 drivers/soc/fsl/dpio/dpio-driver.c
 create mode 100644 drivers/soc/fsl/dpio/dpio-driver.txt
 create mode 100644 drivers/soc/fsl/dpio/dpio-service.c
 create mode 100644 drivers/soc/fsl/dpio/dpio.c
 create mode 100644 drivers/soc/fsl/dpio/dpio.h
 create mode 100644 drivers/soc/fsl/dpio/qbman-portal.c
 create mode 100644 drivers/soc/fsl/dpio/qbman-portal.h
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/Makefile
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/dpio-cmd.h
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/dpio-driver.txt
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/dpio-service.c
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/dpio.c
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/dpio.h
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
 delete mode 100644 drivers/staging/fsl-mc/bus/dpio/qbman-portal.h
 delete mode 100644 drivers/staging/fsl-mc/include/dpaa2-fd.h
 delete mode 100644 drivers/staging/fsl-mc/include/dpaa2-global.h
 delete mode 100644 drivers/staging/fsl-mc/include/dpaa2-io.h
 create mode 100644 include/soc/fsl/dpaa2-fd.h
 create mode 100644 include/soc/fsl/dpaa2-global.h
 create mode 100644 include/soc/fsl/dpaa2-io.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9d5eeff51b5f..c48dda4f0107 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4419,7 +4419,7 @@ DPAA2 DATAPATH I/O (DPIO) DRIVER
 M:	Roy Pledge <Roy.Pledge@nxp.com>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
-F:	drivers/staging/fsl-mc/bus/dpio
+F:	drivers/soc/fsl/dpio
 
 DPAA2 ETHERNET DRIVER
 M:	Ioana Radulescu <ruxandra.radulescu@nxp.com>
diff --git a/drivers/crypto/caam/sg_sw_qm2.h b/drivers/crypto/caam/sg_sw_qm2.h
index 31b440757146..b5b4c12179df 100644
--- a/drivers/crypto/caam/sg_sw_qm2.h
+++ b/drivers/crypto/caam/sg_sw_qm2.h
@@ -35,7 +35,7 @@
 #ifndef _SG_SW_QM2_H_
 #define _SG_SW_QM2_H_
 
-#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
+#include <soc/fsl/dpaa2-fd.h>
 
 static inline void dma_to_qm_sg_one(struct dpaa2_sg_entry *qm_sg_ptr,
 				    dma_addr_t dma, u32 len, u16 offset)
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index e586ffab8358..dbfa9fce33e0 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -12,7 +12,7 @@
 #include "ctrl.h"
 #include "regs.h"
 #include "sg_sw_qm2.h"
-#include "../../../drivers/staging/fsl-mc/include/dpaa2-fd.h"
+#include <soc/fsl/dpaa2-fd.h>
 
 struct sec4_sg_entry {
 	u64 ptr;
diff --git a/drivers/soc/fsl/Kconfig b/drivers/soc/fsl/Kconfig
index 7a9fb9baa66d..c17bf388b0b1 100644
--- a/drivers/soc/fsl/Kconfig
+++ b/drivers/soc/fsl/Kconfig
@@ -16,3 +16,13 @@ config FSL_GUTS
 	  Initially only reading SVR and registering soc device are supported.
 	  Other guts accesses, such as reading RCW, should eventually be moved
 	  into this driver as well.
+
+config FSL_MC_DPIO
+        tristate "QorIQ DPAA2 DPIO driver"
+        depends on FSL_MC_BUS
+        help
+	  Driver for the DPAA2 DPIO object.  A DPIO provides queue and
+	  buffer management facilities for software to interact with
+	  other DPAA2 objects. This driver does not expose the DPIO
+	  objects individually, but groups them under a service layer
+	  API.
diff --git a/drivers/soc/fsl/Makefile b/drivers/soc/fsl/Makefile
index 44b3bebef24a..803ef1bfb5ff 100644
--- a/drivers/soc/fsl/Makefile
+++ b/drivers/soc/fsl/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_FSL_DPAA)                 += qbman/
 obj-$(CONFIG_QUICC_ENGINE)		+= qe/
 obj-$(CONFIG_CPM)			+= qe/
 obj-$(CONFIG_FSL_GUTS)			+= guts.o
+obj-$(CONFIG_FSL_MC_DPIO) 		+= dpio/
diff --git a/drivers/soc/fsl/dpio/Makefile b/drivers/soc/fsl/dpio/Makefile
new file mode 100644
index 000000000000..b9ff24c76582
--- /dev/null
+++ b/drivers/soc/fsl/dpio/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# QorIQ DPAA2 DPIO driver
+#
+
+obj-$(CONFIG_FSL_MC_DPIO) += fsl-mc-dpio.o
+
+fsl-mc-dpio-objs := dpio.o qbman-portal.o dpio-service.o dpio-driver.o
diff --git a/drivers/soc/fsl/dpio/dpio-cmd.h b/drivers/soc/fsl/dpio/dpio-cmd.h
new file mode 100644
index 000000000000..ab8f82ee7ee5
--- /dev/null
+++ b/drivers/soc/fsl/dpio/dpio-cmd.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ *
+ */
+#ifndef _FSL_DPIO_CMD_H
+#define _FSL_DPIO_CMD_H
+
+/* DPIO Version */
+#define DPIO_VER_MAJOR			4
+#define DPIO_VER_MINOR			2
+
+/* Command Versioning */
+
+#define DPIO_CMD_ID_OFFSET		4
+#define DPIO_CMD_BASE_VERSION		1
+
+#define DPIO_CMD(id)	(((id) << DPIO_CMD_ID_OFFSET) | DPIO_CMD_BASE_VERSION)
+
+/* Command IDs */
+#define DPIO_CMDID_CLOSE				DPIO_CMD(0x800)
+#define DPIO_CMDID_OPEN					DPIO_CMD(0x803)
+#define DPIO_CMDID_GET_API_VERSION			DPIO_CMD(0xa03)
+#define DPIO_CMDID_ENABLE				DPIO_CMD(0x002)
+#define DPIO_CMDID_DISABLE				DPIO_CMD(0x003)
+#define DPIO_CMDID_GET_ATTR				DPIO_CMD(0x004)
+
+struct dpio_cmd_open {
+	__le32 dpio_id;
+};
+
+#define DPIO_CHANNEL_MODE_MASK		0x3
+
+struct dpio_rsp_get_attr {
+	/* cmd word 0 */
+	__le32 id;
+	__le16 qbman_portal_id;
+	u8 num_priorities;
+	u8 channel_mode;
+	/* cmd word 1 */
+	__le64 qbman_portal_ce_addr;
+	/* cmd word 2 */
+	__le64 qbman_portal_ci_addr;
+	/* cmd word 3 */
+	__le32 qbman_version;
+};
+
+#endif /* _FSL_DPIO_CMD_H */
diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c
new file mode 100644
index 000000000000..b60b77bfaffa
--- /dev/null
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright NXP 2016
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/msi.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <linux/fsl/mc.h>
+#include <soc/fsl/dpaa2-io.h>
+
+#include "qbman-portal.h"
+#include "dpio.h"
+#include "dpio-cmd.h"
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Freescale Semiconductor, Inc");
+MODULE_DESCRIPTION("DPIO Driver");
+
+struct dpio_priv {
+	struct dpaa2_io *io;
+};
+
+static irqreturn_t dpio_irq_handler(int irq_num, void *arg)
+{
+	struct device *dev = (struct device *)arg;
+	struct dpio_priv *priv = dev_get_drvdata(dev);
+
+	return dpaa2_io_irq(priv->io);
+}
+
+static void unregister_dpio_irq_handlers(struct fsl_mc_device *dpio_dev)
+{
+	struct fsl_mc_device_irq *irq;
+
+	irq = dpio_dev->irqs[0];
+
+	/* clear the affinity hint */
+	irq_set_affinity_hint(irq->msi_desc->irq, NULL);
+}
+
+static int register_dpio_irq_handlers(struct fsl_mc_device *dpio_dev, int cpu)
+{
+	struct dpio_priv *priv;
+	int error;
+	struct fsl_mc_device_irq *irq;
+	cpumask_t mask;
+
+	priv = dev_get_drvdata(&dpio_dev->dev);
+
+	irq = dpio_dev->irqs[0];
+	error = devm_request_irq(&dpio_dev->dev,
+				 irq->msi_desc->irq,
+				 dpio_irq_handler,
+				 0,
+				 dev_name(&dpio_dev->dev),
+				 &dpio_dev->dev);
+	if (error < 0) {
+		dev_err(&dpio_dev->dev,
+			"devm_request_irq() failed: %d\n",
+			error);
+		return error;
+	}
+
+	/* set the affinity hint */
+	cpumask_clear(&mask);
+	cpumask_set_cpu(cpu, &mask);
+	if (irq_set_affinity_hint(irq->msi_desc->irq, &mask))
+		dev_err(&dpio_dev->dev,
+			"irq_set_affinity failed irq %d cpu %d\n",
+			irq->msi_desc->irq, cpu);
+
+	return 0;
+}
+
+static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
+{
+	struct dpio_attr dpio_attrs;
+	struct dpaa2_io_desc desc;
+	struct dpio_priv *priv;
+	int err = -ENOMEM;
+	struct device *dev = &dpio_dev->dev;
+	static int next_cpu = -1;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		goto err_priv_alloc;
+
+	dev_set_drvdata(dev, priv);
+
+	err = fsl_mc_portal_allocate(dpio_dev, 0, &dpio_dev->mc_io);
+	if (err) {
+		dev_dbg(dev, "MC portal allocation failed\n");
+		err = -EPROBE_DEFER;
+		goto err_priv_alloc;
+	}
+
+	err = dpio_open(dpio_dev->mc_io, 0, dpio_dev->obj_desc.id,
+			&dpio_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpio_open() failed\n");
+		goto err_open;
+	}
+
+	err = dpio_get_attributes(dpio_dev->mc_io, 0, dpio_dev->mc_handle,
+				  &dpio_attrs);
+	if (err) {
+		dev_err(dev, "dpio_get_attributes() failed %d\n", err);
+		goto err_get_attr;
+	}
+	desc.qman_version = dpio_attrs.qbman_version;
+
+	err = dpio_enable(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpio_enable() failed %d\n", err);
+		goto err_get_attr;
+	}
+
+	/* initialize DPIO descriptor */
+	desc.receives_notifications = dpio_attrs.num_priorities ? 1 : 0;
+	desc.has_8prio = dpio_attrs.num_priorities == 8 ? 1 : 0;
+	desc.dpio_id = dpio_dev->obj_desc.id;
+
+	/* get the cpu to use for the affinity hint */
+	if (next_cpu == -1)
+		next_cpu = cpumask_first(cpu_online_mask);
+	else
+		next_cpu = cpumask_next(next_cpu, cpu_online_mask);
+
+	if (!cpu_possible(next_cpu)) {
+		dev_err(dev, "probe failed. Number of DPIOs exceeds NR_CPUS.\n");
+		err = -ERANGE;
+		goto err_allocate_irqs;
+	}
+	desc.cpu = next_cpu;
+
+	/*
+	 * Set the CENA regs to be the cache inhibited area of the portal to
+	 * avoid coherency issues if a user migrates to another core.
+	 */
+	desc.regs_cena = devm_memremap(dev, dpio_dev->regions[1].start,
+				       resource_size(&dpio_dev->regions[1]),
+				       MEMREMAP_WC);
+	if (IS_ERR(desc.regs_cena)) {
+		dev_err(dev, "devm_memremap failed\n");
+		err = PTR_ERR(desc.regs_cena);
+		goto err_allocate_irqs;
+	}
+
+	desc.regs_cinh = devm_ioremap(dev, dpio_dev->regions[1].start,
+				      resource_size(&dpio_dev->regions[1]));
+	if (!desc.regs_cinh) {
+		err = -ENOMEM;
+		dev_err(dev, "devm_ioremap failed\n");
+		goto err_allocate_irqs;
+	}
+
+	err = fsl_mc_allocate_irqs(dpio_dev);
+	if (err) {
+		dev_err(dev, "fsl_mc_allocate_irqs failed. err=%d\n", err);
+		goto err_allocate_irqs;
+	}
+
+	err = register_dpio_irq_handlers(dpio_dev, desc.cpu);
+	if (err)
+		goto err_register_dpio_irq;
+
+	priv->io = dpaa2_io_create(&desc);
+	if (!priv->io) {
+		dev_err(dev, "dpaa2_io_create failed\n");
+		err = -ENOMEM;
+		goto err_dpaa2_io_create;
+	}
+
+	dev_info(dev, "probed\n");
+	dev_dbg(dev, "   receives_notifications = %d\n",
+		desc.receives_notifications);
+	dpio_close(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
+	fsl_mc_portal_free(dpio_dev->mc_io);
+
+	return 0;
+
+err_dpaa2_io_create:
+	unregister_dpio_irq_handlers(dpio_dev);
+err_register_dpio_irq:
+	fsl_mc_free_irqs(dpio_dev);
+err_allocate_irqs:
+	dpio_disable(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
+err_get_attr:
+	dpio_close(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
+err_open:
+	fsl_mc_portal_free(dpio_dev->mc_io);
+err_priv_alloc:
+	return err;
+}
+
+/* Tear down interrupts for a given DPIO object */
+static void dpio_teardown_irqs(struct fsl_mc_device *dpio_dev)
+{
+	unregister_dpio_irq_handlers(dpio_dev);
+	fsl_mc_free_irqs(dpio_dev);
+}
+
+static int dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev)
+{
+	struct device *dev;
+	struct dpio_priv *priv;
+	int err;
+
+	dev = &dpio_dev->dev;
+	priv = dev_get_drvdata(dev);
+
+	dpaa2_io_down(priv->io);
+
+	dpio_teardown_irqs(dpio_dev);
+
+	err = fsl_mc_portal_allocate(dpio_dev, 0, &dpio_dev->mc_io);
+	if (err) {
+		dev_err(dev, "MC portal allocation failed\n");
+		goto err_mcportal;
+	}
+
+	err = dpio_open(dpio_dev->mc_io, 0, dpio_dev->obj_desc.id,
+			&dpio_dev->mc_handle);
+	if (err) {
+		dev_err(dev, "dpio_open() failed\n");
+		goto err_open;
+	}
+
+	dpio_disable(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
+
+	dpio_close(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
+
+	fsl_mc_portal_free(dpio_dev->mc_io);
+
+	return 0;
+
+err_open:
+	fsl_mc_portal_free(dpio_dev->mc_io);
+err_mcportal:
+	return err;
+}
+
+static const struct fsl_mc_device_id dpaa2_dpio_match_id_table[] = {
+	{
+		.vendor = FSL_MC_VENDOR_FREESCALE,
+		.obj_type = "dpio",
+	},
+	{ .vendor = 0x0 }
+};
+
+static struct fsl_mc_driver dpaa2_dpio_driver = {
+	.driver = {
+		.name		= KBUILD_MODNAME,
+		.owner		= THIS_MODULE,
+	},
+	.probe		= dpaa2_dpio_probe,
+	.remove		= dpaa2_dpio_remove,
+	.match_id_table = dpaa2_dpio_match_id_table
+};
+
+static int dpio_driver_init(void)
+{
+	return fsl_mc_driver_register(&dpaa2_dpio_driver);
+}
+
+static void dpio_driver_exit(void)
+{
+	fsl_mc_driver_unregister(&dpaa2_dpio_driver);
+}
+module_init(dpio_driver_init);
+module_exit(dpio_driver_exit);
diff --git a/drivers/soc/fsl/dpio/dpio-driver.txt b/drivers/soc/fsl/dpio/dpio-driver.txt
new file mode 100644
index 000000000000..72ba9da3d179
--- /dev/null
+++ b/drivers/soc/fsl/dpio/dpio-driver.txt
@@ -0,0 +1,135 @@
+Copyright 2016 NXP
+
+Introduction
+------------
+
+A DPAA2 DPIO (Data Path I/O) is a hardware object that provides
+interfaces to enqueue and dequeue frames to/from network interfaces
+and other accelerators.  A DPIO also provides hardware buffer
+pool management for network interfaces.
+
+This document provides an overview the Linux DPIO driver, its
+subcomponents, and its APIs.
+
+See Documentation/networking/dpaa2/overview.rst for a general overview of DPAA2
+and the general DPAA2 driver architecture in Linux.
+
+Driver Overview
+---------------
+
+The DPIO driver is bound to DPIO objects discovered on the fsl-mc bus and
+provides services that:
+  A) allow other drivers, such as the Ethernet driver, to enqueue and dequeue
+     frames for their respective objects
+  B) allow drivers to register callbacks for data availability notifications
+     when data becomes available on a queue or channel
+  C) allow drivers to manage hardware buffer pools
+
+The Linux DPIO driver consists of 3 primary components--
+   DPIO object driver-- fsl-mc driver that manages the DPIO object
+   DPIO service-- provides APIs to other Linux drivers for services
+   QBman portal interface-- sends portal commands, gets responses
+
+          fsl-mc          other
+           bus           drivers
+            |               |
+        +---+----+   +------+-----+
+        |DPIO obj|   |DPIO service|
+        | driver |---|  (DPIO)    |
+        +--------+   +------+-----+
+                            |
+                     +------+-----+
+                     |    QBman   |
+                     | portal i/f |
+                     +------------+
+                            |
+                         hardware
+
+The diagram below shows how the DPIO driver components fit with the other
+DPAA2 Linux driver components:
+                                                   +------------+
+                                                   | OS Network |
+                                                   |   Stack    |
+                 +------------+                    +------------+
+                 | Allocator  |. . . . . . .       |  Ethernet  |
+                 |(DPMCP,DPBP)|                    |   (DPNI)   |
+                 +-.----------+                    +---+---+----+
+                  .          .                         ^   |
+                 .            .           <data avail, |   |<enqueue,
+                .              .           tx confirm> |   | dequeue>
+    +-------------+             .                      |   |
+    | DPRC driver |              .    +--------+ +------------+
+    |   (DPRC)    |               . . |DPIO obj| |DPIO service|
+    +----------+--+                   | driver |-|  (DPIO)    |
+               |                      +--------+ +------+-----+
+               |<dev add/remove>                 +------|-----+
+               |                                 |   QBman    |
+          +----+--------------+                  | portal i/f |
+          |   MC-bus driver   |                  +------------+
+          |                   |                     |
+          | /soc/fsl-mc       |                     |
+          +-------------------+                     |
+                                                    |
+ =========================================|=========|========================
+                                        +-+--DPIO---|-----------+
+                                        |           |           |
+                                        |        QBman Portal   |
+                                        +-----------------------+
+
+ ============================================================================
+
+
+DPIO Object Driver (dpio-driver.c)
+----------------------------------
+
+   The dpio-driver component registers with the fsl-mc bus to handle objects of
+   type "dpio".  The implementation of probe() handles basic initialization
+   of the DPIO including mapping of the DPIO regions (the QBman SW portal)
+   and initializing interrupts and registering irq handlers.  The dpio-driver
+   registers the probed DPIO with dpio-service.
+
+DPIO service  (dpio-service.c, dpaa2-io.h)
+------------------------------------------
+
+   The dpio service component provides queuing, notification, and buffers
+   management services to DPAA2 drivers, such as the Ethernet driver.  A system
+   will typically allocate 1 DPIO object per CPU to allow queuing operations
+   to happen simultaneously across all CPUs.
+
+   Notification handling
+      dpaa2_io_service_register()
+      dpaa2_io_service_deregister()
+      dpaa2_io_service_rearm()
+
+   Queuing
+      dpaa2_io_service_pull_fq()
+      dpaa2_io_service_pull_channel()
+      dpaa2_io_service_enqueue_fq()
+      dpaa2_io_service_enqueue_qd()
+      dpaa2_io_store_create()
+      dpaa2_io_store_destroy()
+      dpaa2_io_store_next()
+
+   Buffer pool management
+      dpaa2_io_service_release()
+      dpaa2_io_service_acquire()
+
+QBman portal interface (qbman-portal.c)
+---------------------------------------
+
+   The qbman-portal component provides APIs to do the low level hardware
+   bit twiddling for operations such as:
+      -initializing Qman software portals
+      -building and sending portal commands
+      -portal interrupt configuration and processing
+
+   The qbman-portal APIs are not public to other drivers, and are
+   only used by dpio-service.
+
+Other (dpaa2-fd.h, dpaa2-global.h)
+----------------------------------
+
+   Frame descriptor and scatter-gather definitions and the APIs used to
+   manipulate them are defined in dpaa2-fd.h.
+
+   Dequeue result struct and parsing APIs are defined in dpaa2-global.h.
diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c
new file mode 100644
index 000000000000..9b17f72349ed
--- /dev/null
+++ b/drivers/soc/fsl/dpio/dpio-service.c
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ *
+ */
+#include <linux/types.h>
+#include <linux/fsl/mc.h>
+#include <soc/fsl/dpaa2-io.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+#include "dpio.h"
+#include "qbman-portal.h"
+
+struct dpaa2_io {
+	struct dpaa2_io_desc dpio_desc;
+	struct qbman_swp_desc swp_desc;
+	struct qbman_swp *swp;
+	struct list_head node;
+	/* protect against multiple management commands */
+	spinlock_t lock_mgmt_cmd;
+	/* protect notifications list */
+	spinlock_t lock_notifications;
+	struct list_head notifications;
+};
+
+struct dpaa2_io_store {
+	unsigned int max;
+	dma_addr_t paddr;
+	struct dpaa2_dq *vaddr;
+	void *alloced_addr;    /* unaligned value from kmalloc() */
+	unsigned int idx;      /* position of the next-to-be-returned entry */
+	struct qbman_swp *swp; /* portal used to issue VDQCR */
+	struct device *dev;    /* device used for DMA mapping */
+};
+
+/* keep a per cpu array of DPIOs for fast access */
+static struct dpaa2_io *dpio_by_cpu[NR_CPUS];
+static struct list_head dpio_list = LIST_HEAD_INIT(dpio_list);
+static DEFINE_SPINLOCK(dpio_list_lock);
+
+static inline struct dpaa2_io *service_select_by_cpu(struct dpaa2_io *d,
+						     int cpu)
+{
+	if (d)
+		return d;
+
+	if (cpu != DPAA2_IO_ANY_CPU && cpu >= num_possible_cpus())
+		return NULL;
+
+	/*
+	 * If cpu == -1, choose the current cpu, with no guarantees about
+	 * potentially being migrated away.
+	 */
+	if (unlikely(cpu < 0))
+		cpu = smp_processor_id();
+
+	/* If a specific cpu was requested, pick it up immediately */
+	return dpio_by_cpu[cpu];
+}
+
+static inline struct dpaa2_io *service_select(struct dpaa2_io *d)
+{
+	if (d)
+		return d;
+
+	spin_lock(&dpio_list_lock);
+	d = list_entry(dpio_list.next, struct dpaa2_io, node);
+	list_del(&d->node);
+	list_add_tail(&d->node, &dpio_list);
+	spin_unlock(&dpio_list_lock);
+
+	return d;
+}
+
+/**
+ * dpaa2_io_service_select() - return a dpaa2_io service affined to this cpu
+ * @cpu: the cpu id
+ *
+ * Return the affine dpaa2_io service, or NULL if there is no service affined
+ * to the specified cpu. If DPAA2_IO_ANY_CPU is used, return the next available
+ * service.
+ */
+struct dpaa2_io *dpaa2_io_service_select(int cpu)
+{
+	if (cpu == DPAA2_IO_ANY_CPU)
+		return service_select(NULL);
+
+	return service_select_by_cpu(NULL, cpu);
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_service_select);
+
+/**
+ * dpaa2_io_create() - create a dpaa2_io object.
+ * @desc: the dpaa2_io descriptor
+ *
+ * Activates a "struct dpaa2_io" corresponding to the given config of an actual
+ * DPIO object.
+ *
+ * Return a valid dpaa2_io object for success, or NULL for failure.
+ */
+struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc)
+{
+	struct dpaa2_io *obj = kmalloc(sizeof(*obj), GFP_KERNEL);
+
+	if (!obj)
+		return NULL;
+
+	/* check if CPU is out of range (-1 means any cpu) */
+	if (desc->cpu != DPAA2_IO_ANY_CPU && desc->cpu >= num_possible_cpus()) {
+		kfree(obj);
+		return NULL;
+	}
+
+	obj->dpio_desc = *desc;
+	obj->swp_desc.cena_bar = obj->dpio_desc.regs_cena;
+	obj->swp_desc.cinh_bar = obj->dpio_desc.regs_cinh;
+	obj->swp_desc.qman_version = obj->dpio_desc.qman_version;
+	obj->swp = qbman_swp_init(&obj->swp_desc);
+
+	if (!obj->swp) {
+		kfree(obj);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&obj->node);
+	spin_lock_init(&obj->lock_mgmt_cmd);
+	spin_lock_init(&obj->lock_notifications);
+	INIT_LIST_HEAD(&obj->notifications);
+
+	/* For now only enable DQRR interrupts */
+	qbman_swp_interrupt_set_trigger(obj->swp,
+					QBMAN_SWP_INTERRUPT_DQRI);
+	qbman_swp_interrupt_clear_status(obj->swp, 0xffffffff);
+	if (obj->dpio_desc.receives_notifications)
+		qbman_swp_push_set(obj->swp, 0, 1);
+
+	spin_lock(&dpio_list_lock);
+	list_add_tail(&obj->node, &dpio_list);
+	if (desc->cpu >= 0 && !dpio_by_cpu[desc->cpu])
+		dpio_by_cpu[desc->cpu] = obj;
+	spin_unlock(&dpio_list_lock);
+
+	return obj;
+}
+
+/**
+ * dpaa2_io_down() - release the dpaa2_io object.
+ * @d: the dpaa2_io object to be released.
+ *
+ * The "struct dpaa2_io" type can represent an individual DPIO object (as
+ * described by "struct dpaa2_io_desc") or an instance of a "DPIO service",
+ * which can be used to group/encapsulate multiple DPIO objects. In all cases,
+ * each handle obtained should be released using this function.
+ */
+void dpaa2_io_down(struct dpaa2_io *d)
+{
+	kfree(d);
+}
+
+#define DPAA_POLL_MAX 32
+
+/**
+ * dpaa2_io_irq() - ISR for DPIO interrupts
+ *
+ * @obj: the given DPIO object.
+ *
+ * Return IRQ_HANDLED for success or IRQ_NONE if there
+ * were no pending interrupts.
+ */
+irqreturn_t dpaa2_io_irq(struct dpaa2_io *obj)
+{
+	const struct dpaa2_dq *dq;
+	int max = 0;
+	struct qbman_swp *swp;
+	u32 status;
+
+	swp = obj->swp;
+	status = qbman_swp_interrupt_read_status(swp);
+	if (!status)
+		return IRQ_NONE;
+
+	dq = qbman_swp_dqrr_next(swp);
+	while (dq) {
+		if (qbman_result_is_SCN(dq)) {
+			struct dpaa2_io_notification_ctx *ctx;
+			u64 q64;
+
+			q64 = qbman_result_SCN_ctx(dq);
+			ctx = (void *)(uintptr_t)q64;
+			ctx->cb(ctx);
+		} else {
+			pr_crit("fsl-mc-dpio: Unrecognised/ignored DQRR entry\n");
+		}
+		qbman_swp_dqrr_consume(swp, dq);
+		++max;
+		if (max > DPAA_POLL_MAX)
+			goto done;
+		dq = qbman_swp_dqrr_next(swp);
+	}
+done:
+	qbman_swp_interrupt_clear_status(swp, status);
+	qbman_swp_interrupt_set_inhibit(swp, 0);
+	return IRQ_HANDLED;
+}
+
+/**
+ * dpaa2_io_service_register() - Prepare for servicing of FQDAN or CDAN
+ *                               notifications on the given DPIO service.
+ * @d:   the given DPIO service.
+ * @ctx: the notification context.
+ *
+ * The caller should make the MC command to attach a DPAA2 object to
+ * a DPIO after this function completes successfully.  In that way:
+ *    (a) The DPIO service is "ready" to handle a notification arrival
+ *        (which might happen before the "attach" command to MC has
+ *        returned control of execution back to the caller)
+ *    (b) The DPIO service can provide back to the caller the 'dpio_id' and
+ *        'qman64' parameters that it should pass along in the MC command
+ *        in order for the object to be configured to produce the right
+ *        notification fields to the DPIO service.
+ *
+ * Return 0 for success, or -ENODEV for failure.
+ */
+int dpaa2_io_service_register(struct dpaa2_io *d,
+			      struct dpaa2_io_notification_ctx *ctx)
+{
+	unsigned long irqflags;
+
+	d = service_select_by_cpu(d, ctx->desired_cpu);
+	if (!d)
+		return -ENODEV;
+
+	ctx->dpio_id = d->dpio_desc.dpio_id;
+	ctx->qman64 = (u64)(uintptr_t)ctx;
+	ctx->dpio_private = d;
+	spin_lock_irqsave(&d->lock_notifications, irqflags);
+	list_add(&ctx->node, &d->notifications);
+	spin_unlock_irqrestore(&d->lock_notifications, irqflags);
+
+	/* Enable the generation of CDAN notifications */
+	if (ctx->is_cdan)
+		return qbman_swp_CDAN_set_context_enable(d->swp,
+							 (u16)ctx->id,
+							 ctx->qman64);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_service_register);
+
+/**
+ * dpaa2_io_service_deregister - The opposite of 'register'.
+ * @service: the given DPIO service.
+ * @ctx: the notification context.
+ *
+ * This function should be called only after sending the MC command to
+ * to detach the notification-producing device from the DPIO.
+ */
+void dpaa2_io_service_deregister(struct dpaa2_io *service,
+				 struct dpaa2_io_notification_ctx *ctx)
+{
+	struct dpaa2_io *d = ctx->dpio_private;
+	unsigned long irqflags;
+
+	if (ctx->is_cdan)
+		qbman_swp_CDAN_disable(d->swp, (u16)ctx->id);
+
+	spin_lock_irqsave(&d->lock_notifications, irqflags);
+	list_del(&ctx->node);
+	spin_unlock_irqrestore(&d->lock_notifications, irqflags);
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_service_deregister);
+
+/**
+ * dpaa2_io_service_rearm() - Rearm the notification for the given DPIO service.
+ * @d: the given DPIO service.
+ * @ctx: the notification context.
+ *
+ * Once a FQDAN/CDAN has been produced, the corresponding FQ/channel is
+ * considered "disarmed". Ie. the user can issue pull dequeue operations on that
+ * traffic source for as long as it likes. Eventually it may wish to "rearm"
+ * that source to allow it to produce another FQDAN/CDAN, that's what this
+ * function achieves.
+ *
+ * Return 0 for success.
+ */
+int dpaa2_io_service_rearm(struct dpaa2_io *d,
+			   struct dpaa2_io_notification_ctx *ctx)
+{
+	unsigned long irqflags;
+	int err;
+
+	d = service_select_by_cpu(d, ctx->desired_cpu);
+	if (!unlikely(d))
+		return -ENODEV;
+
+	spin_lock_irqsave(&d->lock_mgmt_cmd, irqflags);
+	if (ctx->is_cdan)
+		err = qbman_swp_CDAN_enable(d->swp, (u16)ctx->id);
+	else
+		err = qbman_swp_fq_schedule(d->swp, ctx->id);
+	spin_unlock_irqrestore(&d->lock_mgmt_cmd, irqflags);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_service_rearm);
+
+/**
+ * dpaa2_io_service_pull_channel() - pull dequeue functions from a channel.
+ * @d: the given DPIO service.
+ * @channelid: the given channel id.
+ * @s: the dpaa2_io_store object for the result.
+ *
+ * Return 0 for success, or error code for failure.
+ */
+int dpaa2_io_service_pull_channel(struct dpaa2_io *d, u32 channelid,
+				  struct dpaa2_io_store *s)
+{
+	struct qbman_pull_desc pd;
+	int err;
+
+	qbman_pull_desc_clear(&pd);
+	qbman_pull_desc_set_storage(&pd, s->vaddr, s->paddr, 1);
+	qbman_pull_desc_set_numframes(&pd, (u8)s->max);
+	qbman_pull_desc_set_channel(&pd, channelid, qbman_pull_type_prio);
+
+	d = service_select(d);
+	if (!d)
+		return -ENODEV;
+
+	s->swp = d->swp;
+	err = qbman_swp_pull(d->swp, &pd);
+	if (err)
+		s->swp = NULL;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_service_pull_channel);
+
+/**
+ * dpaa2_io_service_enqueue_qd() - Enqueue a frame to a QD.
+ * @d: the given DPIO service.
+ * @qdid: the given queuing destination id.
+ * @prio: the given queuing priority.
+ * @qdbin: the given queuing destination bin.
+ * @fd: the frame descriptor which is enqueued.
+ *
+ * Return 0 for successful enqueue, or -EBUSY if the enqueue ring is not ready,
+ * or -ENODEV if there is no dpio service.
+ */
+int dpaa2_io_service_enqueue_qd(struct dpaa2_io *d,
+				u32 qdid, u8 prio, u16 qdbin,
+				const struct dpaa2_fd *fd)
+{
+	struct qbman_eq_desc ed;
+
+	d = service_select(d);
+	if (!d)
+		return -ENODEV;
+
+	qbman_eq_desc_clear(&ed);
+	qbman_eq_desc_set_no_orp(&ed, 0);
+	qbman_eq_desc_set_qd(&ed, qdid, qdbin, prio);
+
+	return qbman_swp_enqueue(d->swp, &ed, fd);
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_service_enqueue_qd);
+
+/**
+ * dpaa2_io_service_release() - Release buffers to a buffer pool.
+ * @d: the given DPIO object.
+ * @bpid: the buffer pool id.
+ * @buffers: the buffers to be released.
+ * @num_buffers: the number of the buffers to be released.
+ *
+ * Return 0 for success, and negative error code for failure.
+ */
+int dpaa2_io_service_release(struct dpaa2_io *d,
+			     u32 bpid,
+			     const u64 *buffers,
+			     unsigned int num_buffers)
+{
+	struct qbman_release_desc rd;
+
+	d = service_select(d);
+	if (!d)
+		return -ENODEV;
+
+	qbman_release_desc_clear(&rd);
+	qbman_release_desc_set_bpid(&rd, bpid);
+
+	return qbman_swp_release(d->swp, &rd, buffers, num_buffers);
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_service_release);
+
+/**
+ * dpaa2_io_service_acquire() - Acquire buffers from a buffer pool.
+ * @d: the given DPIO object.
+ * @bpid: the buffer pool id.
+ * @buffers: the buffer addresses for acquired buffers.
+ * @num_buffers: the expected number of the buffers to acquire.
+ *
+ * Return a negative error code if the command failed, otherwise it returns
+ * the number of buffers acquired, which may be less than the number requested.
+ * Eg. if the buffer pool is empty, this will return zero.
+ */
+int dpaa2_io_service_acquire(struct dpaa2_io *d,
+			     u32 bpid,
+			     u64 *buffers,
+			     unsigned int num_buffers)
+{
+	unsigned long irqflags;
+	int err;
+
+	d = service_select(d);
+	if (!d)
+		return -ENODEV;
+
+	spin_lock_irqsave(&d->lock_mgmt_cmd, irqflags);
+	err = qbman_swp_acquire(d->swp, bpid, buffers, num_buffers);
+	spin_unlock_irqrestore(&d->lock_mgmt_cmd, irqflags);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_service_acquire);
+
+/*
+ * 'Stores' are reusable memory blocks for holding dequeue results, and to
+ * assist with parsing those results.
+ */
+
+/**
+ * dpaa2_io_store_create() - Create the dma memory storage for dequeue result.
+ * @max_frames: the maximum number of dequeued result for frames, must be <= 16.
+ * @dev:        the device to allow mapping/unmapping the DMAable region.
+ *
+ * The size of the storage is "max_frames*sizeof(struct dpaa2_dq)".
+ * The 'dpaa2_io_store' returned is a DPIO service managed object.
+ *
+ * Return pointer to dpaa2_io_store struct for successfully created storage
+ * memory, or NULL on error.
+ */
+struct dpaa2_io_store *dpaa2_io_store_create(unsigned int max_frames,
+					     struct device *dev)
+{
+	struct dpaa2_io_store *ret;
+	size_t size;
+
+	if (!max_frames || (max_frames > 16))
+		return NULL;
+
+	ret = kmalloc(sizeof(*ret), GFP_KERNEL);
+	if (!ret)
+		return NULL;
+
+	ret->max = max_frames;
+	size = max_frames * sizeof(struct dpaa2_dq) + 64;
+	ret->alloced_addr = kzalloc(size, GFP_KERNEL);
+	if (!ret->alloced_addr) {
+		kfree(ret);
+		return NULL;
+	}
+
+	ret->vaddr = PTR_ALIGN(ret->alloced_addr, 64);
+	ret->paddr = dma_map_single(dev, ret->vaddr,
+				    sizeof(struct dpaa2_dq) * max_frames,
+				    DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, ret->paddr)) {
+		kfree(ret->alloced_addr);
+		kfree(ret);
+		return NULL;
+	}
+
+	ret->idx = 0;
+	ret->dev = dev;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_store_create);
+
+/**
+ * dpaa2_io_store_destroy() - Frees the dma memory storage for dequeue
+ *                            result.
+ * @s: the storage memory to be destroyed.
+ */
+void dpaa2_io_store_destroy(struct dpaa2_io_store *s)
+{
+	dma_unmap_single(s->dev, s->paddr, sizeof(struct dpaa2_dq) * s->max,
+			 DMA_FROM_DEVICE);
+	kfree(s->alloced_addr);
+	kfree(s);
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_store_destroy);
+
+/**
+ * dpaa2_io_store_next() - Determine when the next dequeue result is available.
+ * @s: the dpaa2_io_store object.
+ * @is_last: indicate whether this is the last frame in the pull command.
+ *
+ * When an object driver performs dequeues to a dpaa2_io_store, this function
+ * can be used to determine when the next frame result is available. Once
+ * this function returns non-NULL, a subsequent call to it will try to find
+ * the next dequeue result.
+ *
+ * Note that if a pull-dequeue has a NULL result because the target FQ/channel
+ * was empty, then this function will also return NULL (rather than expecting
+ * the caller to always check for this. As such, "is_last" can be used to
+ * differentiate between "end-of-empty-dequeue" and "still-waiting".
+ *
+ * Return dequeue result for a valid dequeue result, or NULL for empty dequeue.
+ */
+struct dpaa2_dq *dpaa2_io_store_next(struct dpaa2_io_store *s, int *is_last)
+{
+	int match;
+	struct dpaa2_dq *ret = &s->vaddr[s->idx];
+
+	match = qbman_result_has_new_result(s->swp, ret);
+	if (!match) {
+		*is_last = 0;
+		return NULL;
+	}
+
+	s->idx++;
+
+	if (dpaa2_dq_is_pull_complete(ret)) {
+		*is_last = 1;
+		s->idx = 0;
+		/*
+		 * If we get an empty dequeue result to terminate a zero-results
+		 * vdqcr, return NULL to the caller rather than expecting him to
+		 * check non-NULL results every time.
+		 */
+		if (!(dpaa2_dq_flags(ret) & DPAA2_DQ_STAT_VALIDFRAME))
+			ret = NULL;
+	} else {
+		*is_last = 0;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dpaa2_io_store_next);
diff --git a/drivers/soc/fsl/dpio/dpio.c b/drivers/soc/fsl/dpio/dpio.c
new file mode 100644
index 000000000000..ff37c80e11a0
--- /dev/null
+++ b/drivers/soc/fsl/dpio/dpio.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/fsl/mc.h>
+
+#include "dpio.h"
+#include "dpio-cmd.h"
+
+/*
+ * Data Path I/O Portal API
+ * Contains initialization APIs and runtime control APIs for DPIO
+ */
+
+/**
+ * dpio_open() - Open a control session for the specified object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @dpio_id:	DPIO unique ID
+ * @token:	Returned token; use in subsequent API calls
+ *
+ * This function can be used to open a control session for an
+ * already created object; an object may have been declared in
+ * the DPL or by calling the dpio_create() function.
+ * This function returns a unique authentication token,
+ * associated with the specific object ID and the specific MC
+ * portal; this token must be used in all subsequent commands for
+ * this specific object.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpio_open(struct fsl_mc_io *mc_io,
+	      u32 cmd_flags,
+	      int dpio_id,
+	      u16 *token)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpio_cmd_open *dpio_cmd;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPIO_CMDID_OPEN,
+					  cmd_flags,
+					  0);
+	dpio_cmd = (struct dpio_cmd_open *)cmd.params;
+	dpio_cmd->dpio_id = cpu_to_le32(dpio_id);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return 0;
+}
+
+/**
+ * dpio_close() - Close the control session of the object
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPIO object
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpio_close(struct fsl_mc_io *mc_io,
+	       u32 cmd_flags,
+	       u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPIO_CMDID_CLOSE,
+					  cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpio_enable() - Enable the DPIO, allow I/O portal operations.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPIO object
+ *
+ * Return:	'0' on Success; Error code otherwise
+ */
+int dpio_enable(struct fsl_mc_io *mc_io,
+		u32 cmd_flags,
+		u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPIO_CMDID_ENABLE,
+					  cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpio_disable() - Disable the DPIO, stop any I/O portal operation.
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPIO object
+ *
+ * Return:	'0' on Success; Error code otherwise
+ */
+int dpio_disable(struct fsl_mc_io *mc_io,
+		 u32 cmd_flags,
+		 u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPIO_CMDID_DISABLE,
+					  cmd_flags,
+					  token);
+
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpio_get_attributes() - Retrieve DPIO attributes
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPIO object
+ * @attr:	Returned object's attributes
+ *
+ * Return:	'0' on Success; Error code otherwise
+ */
+int dpio_get_attributes(struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			struct dpio_attr *attr)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct dpio_rsp_get_attr *dpio_rsp;
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_ATTR,
+					  cmd_flags,
+					  token);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	dpio_rsp = (struct dpio_rsp_get_attr *)cmd.params;
+	attr->id = le32_to_cpu(dpio_rsp->id);
+	attr->qbman_portal_id = le16_to_cpu(dpio_rsp->qbman_portal_id);
+	attr->num_priorities = dpio_rsp->num_priorities;
+	attr->channel_mode = dpio_rsp->channel_mode & DPIO_CHANNEL_MODE_MASK;
+	attr->qbman_portal_ce_offset =
+		le64_to_cpu(dpio_rsp->qbman_portal_ce_addr);
+	attr->qbman_portal_ci_offset =
+		le64_to_cpu(dpio_rsp->qbman_portal_ci_addr);
+	attr->qbman_version = le32_to_cpu(dpio_rsp->qbman_version);
+
+	return 0;
+}
+
+/**
+ * dpio_get_api_version - Get Data Path I/O API version
+ * @mc_io:	Pointer to MC portal's DPIO object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @major_ver:	Major version of DPIO API
+ * @minor_ver:	Minor version of DPIO API
+ *
+ * Return:	'0' on Success; Error code otherwise
+ */
+int dpio_get_api_version(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 *major_ver,
+			 u16 *minor_ver)
+{
+	struct fsl_mc_command cmd = { 0 };
+	int err;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_API_VERSION,
+					  cmd_flags, 0);
+
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	mc_cmd_read_api_version(&cmd, major_ver, minor_ver);
+
+	return 0;
+}
diff --git a/drivers/soc/fsl/dpio/dpio.h b/drivers/soc/fsl/dpio/dpio.h
new file mode 100644
index 000000000000..49194c8e45f1
--- /dev/null
+++ b/drivers/soc/fsl/dpio/dpio.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ *
+ */
+#ifndef __FSL_DPIO_H
+#define __FSL_DPIO_H
+
+struct fsl_mc_io;
+
+int dpio_open(struct fsl_mc_io	*mc_io,
+	      u32		cmd_flags,
+	      int		dpio_id,
+	      u16		*token);
+
+int dpio_close(struct fsl_mc_io	*mc_io,
+	       u32		cmd_flags,
+	       u16		token);
+
+/**
+ * enum dpio_channel_mode - DPIO notification channel mode
+ * @DPIO_NO_CHANNEL: No support for notification channel
+ * @DPIO_LOCAL_CHANNEL: Notifications on data availability can be received by a
+ *	dedicated channel in the DPIO; user should point the queue's
+ *	destination in the relevant interface to this DPIO
+ */
+enum dpio_channel_mode {
+	DPIO_NO_CHANNEL = 0,
+	DPIO_LOCAL_CHANNEL = 1,
+};
+
+/**
+ * struct dpio_cfg - Structure representing DPIO configuration
+ * @channel_mode: Notification channel mode
+ * @num_priorities: Number of priorities for the notification channel (1-8);
+ *			relevant only if 'channel_mode = DPIO_LOCAL_CHANNEL'
+ */
+struct dpio_cfg {
+	enum dpio_channel_mode	channel_mode;
+	u8		num_priorities;
+};
+
+int dpio_enable(struct fsl_mc_io	*mc_io,
+		u32		cmd_flags,
+		u16		token);
+
+int dpio_disable(struct fsl_mc_io	*mc_io,
+		 u32		cmd_flags,
+		 u16		token);
+
+/**
+ * struct dpio_attr - Structure representing DPIO attributes
+ * @id: DPIO object ID
+ * @qbman_portal_ce_offset: offset of the software portal cache-enabled area
+ * @qbman_portal_ci_offset: offset of the software portal cache-inhibited area
+ * @qbman_portal_id: Software portal ID
+ * @channel_mode: Notification channel mode
+ * @num_priorities: Number of priorities for the notification channel (1-8);
+ *			relevant only if 'channel_mode = DPIO_LOCAL_CHANNEL'
+ * @qbman_version: QBMAN version
+ */
+struct dpio_attr {
+	int			id;
+	u64		qbman_portal_ce_offset;
+	u64		qbman_portal_ci_offset;
+	u16		qbman_portal_id;
+	enum dpio_channel_mode	channel_mode;
+	u8			num_priorities;
+	u32		qbman_version;
+};
+
+int dpio_get_attributes(struct fsl_mc_io	*mc_io,
+			u32		cmd_flags,
+			u16		token,
+			struct dpio_attr	*attr);
+
+int dpio_get_api_version(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 *major_ver,
+			 u16 *minor_ver);
+
+#endif /* __FSL_DPIO_H */
diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c
new file mode 100644
index 000000000000..cf1d448ea468
--- /dev/null
+++ b/drivers/soc/fsl/dpio/qbman-portal.c
@@ -0,0 +1,1005 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
+ * Copyright 2016 NXP
+ *
+ */
+
+#include <asm/cacheflush.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <soc/fsl/dpaa2-global.h>
+
+#include "qbman-portal.h"
+
+#define QMAN_REV_4000   0x04000000
+#define QMAN_REV_4100   0x04010000
+#define QMAN_REV_4101   0x04010001
+#define QMAN_REV_MASK   0xffff0000
+
+/* All QBMan command and result structures use this "valid bit" encoding */
+#define QB_VALID_BIT ((u32)0x80)
+
+/* QBMan portal management command codes */
+#define QBMAN_MC_ACQUIRE       0x30
+#define QBMAN_WQCHAN_CONFIGURE 0x46
+
+/* CINH register offsets */
+#define QBMAN_CINH_SWP_EQAR    0x8c0
+#define QBMAN_CINH_SWP_DQPI    0xa00
+#define QBMAN_CINH_SWP_DCAP    0xac0
+#define QBMAN_CINH_SWP_SDQCR   0xb00
+#define QBMAN_CINH_SWP_RAR     0xcc0
+#define QBMAN_CINH_SWP_ISR     0xe00
+#define QBMAN_CINH_SWP_IER     0xe40
+#define QBMAN_CINH_SWP_ISDR    0xe80
+#define QBMAN_CINH_SWP_IIR     0xec0
+
+/* CENA register offsets */
+#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((u32)(n) << 6))
+#define QBMAN_CENA_SWP_DQRR(n) (0x200 + ((u32)(n) << 6))
+#define QBMAN_CENA_SWP_RCR(n)  (0x400 + ((u32)(n) << 6))
+#define QBMAN_CENA_SWP_CR      0x600
+#define QBMAN_CENA_SWP_RR(vb)  (0x700 + ((u32)(vb) >> 1))
+#define QBMAN_CENA_SWP_VDQCR   0x780
+
+/* Reverse mapping of QBMAN_CENA_SWP_DQRR() */
+#define QBMAN_IDX_FROM_DQRR(p) (((unsigned long)(p) & 0x1ff) >> 6)
+
+/* Define token used to determine if response written to memory is valid */
+#define QMAN_DQ_TOKEN_VALID 1
+
+/* SDQCR attribute codes */
+#define QB_SDQCR_FC_SHIFT   29
+#define QB_SDQCR_FC_MASK    0x1
+#define QB_SDQCR_DCT_SHIFT  24
+#define QB_SDQCR_DCT_MASK   0x3
+#define QB_SDQCR_TOK_SHIFT  16
+#define QB_SDQCR_TOK_MASK   0xff
+#define QB_SDQCR_SRC_SHIFT  0
+#define QB_SDQCR_SRC_MASK   0xffff
+
+/* opaque token for static dequeues */
+#define QMAN_SDQCR_TOKEN    0xbb
+
+enum qbman_sdqcr_dct {
+	qbman_sdqcr_dct_null = 0,
+	qbman_sdqcr_dct_prio_ics,
+	qbman_sdqcr_dct_active_ics,
+	qbman_sdqcr_dct_active
+};
+
+enum qbman_sdqcr_fc {
+	qbman_sdqcr_fc_one = 0,
+	qbman_sdqcr_fc_up_to_3 = 1
+};
+
+/* Portal Access */
+
+static inline u32 qbman_read_register(struct qbman_swp *p, u32 offset)
+{
+	return readl_relaxed(p->addr_cinh + offset);
+}
+
+static inline void qbman_write_register(struct qbman_swp *p, u32 offset,
+					u32 value)
+{
+	writel_relaxed(value, p->addr_cinh + offset);
+}
+
+static inline void *qbman_get_cmd(struct qbman_swp *p, u32 offset)
+{
+	return p->addr_cena + offset;
+}
+
+#define QBMAN_CINH_SWP_CFG   0xd00
+
+#define SWP_CFG_DQRR_MF_SHIFT 20
+#define SWP_CFG_EST_SHIFT     16
+#define SWP_CFG_WN_SHIFT      14
+#define SWP_CFG_RPM_SHIFT     12
+#define SWP_CFG_DCM_SHIFT     10
+#define SWP_CFG_EPM_SHIFT     8
+#define SWP_CFG_SD_SHIFT      5
+#define SWP_CFG_SP_SHIFT      4
+#define SWP_CFG_SE_SHIFT      3
+#define SWP_CFG_DP_SHIFT      2
+#define SWP_CFG_DE_SHIFT      1
+#define SWP_CFG_EP_SHIFT      0
+
+static inline u32 qbman_set_swp_cfg(u8 max_fill, u8 wn,	u8 est, u8 rpm, u8 dcm,
+				    u8 epm, int sd, int sp, int se,
+				    int dp, int de, int ep)
+{
+	return (max_fill << SWP_CFG_DQRR_MF_SHIFT |
+		est << SWP_CFG_EST_SHIFT |
+		wn << SWP_CFG_WN_SHIFT |
+		rpm << SWP_CFG_RPM_SHIFT |
+		dcm << SWP_CFG_DCM_SHIFT |
+		epm << SWP_CFG_EPM_SHIFT |
+		sd << SWP_CFG_SD_SHIFT |
+		sp << SWP_CFG_SP_SHIFT |
+		se << SWP_CFG_SE_SHIFT |
+		dp << SWP_CFG_DP_SHIFT |
+		de << SWP_CFG_DE_SHIFT |
+		ep << SWP_CFG_EP_SHIFT);
+}
+
+/**
+ * qbman_swp_init() - Create a functional object representing the given
+ *                    QBMan portal descriptor.
+ * @d: the given qbman swp descriptor
+ *
+ * Return qbman_swp portal for success, NULL if the object cannot
+ * be created.
+ */
+struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
+{
+	struct qbman_swp *p = kmalloc(sizeof(*p), GFP_KERNEL);
+	u32 reg;
+
+	if (!p)
+		return NULL;
+	p->desc = d;
+	p->mc.valid_bit = QB_VALID_BIT;
+	p->sdq = 0;
+	p->sdq |= qbman_sdqcr_dct_prio_ics << QB_SDQCR_DCT_SHIFT;
+	p->sdq |= qbman_sdqcr_fc_up_to_3 << QB_SDQCR_FC_SHIFT;
+	p->sdq |= QMAN_SDQCR_TOKEN << QB_SDQCR_TOK_SHIFT;
+
+	atomic_set(&p->vdq.available, 1);
+	p->vdq.valid_bit = QB_VALID_BIT;
+	p->dqrr.next_idx = 0;
+	p->dqrr.valid_bit = QB_VALID_BIT;
+
+	if ((p->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_4100) {
+		p->dqrr.dqrr_size = 4;
+		p->dqrr.reset_bug = 1;
+	} else {
+		p->dqrr.dqrr_size = 8;
+		p->dqrr.reset_bug = 0;
+	}
+
+	p->addr_cena = d->cena_bar;
+	p->addr_cinh = d->cinh_bar;
+
+	reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
+				1, /* Writes Non-cacheable */
+				0, /* EQCR_CI stashing threshold */
+				3, /* RPM: Valid bit mode, RCR in array mode */
+				2, /* DCM: Discrete consumption ack mode */
+				3, /* EPM: Valid bit mode, EQCR in array mode */
+				0, /* mem stashing drop enable == FALSE */
+				1, /* mem stashing priority == TRUE */
+				0, /* mem stashing enable == FALSE */
+				1, /* dequeue stashing priority == TRUE */
+				0, /* dequeue stashing enable == FALSE */
+				0); /* EQCR_CI stashing priority == FALSE */
+
+	qbman_write_register(p, QBMAN_CINH_SWP_CFG, reg);
+	reg = qbman_read_register(p, QBMAN_CINH_SWP_CFG);
+	if (!reg) {
+		pr_err("qbman: the portal is not enabled!\n");
+		return NULL;
+	}
+
+	/*
+	 * SDQCR needs to be initialized to 0 when no channels are
+	 * being dequeued from or else the QMan HW will indicate an
+	 * error.  The values that were calculated above will be
+	 * applied when dequeues from a specific channel are enabled.
+	 */
+	qbman_write_register(p, QBMAN_CINH_SWP_SDQCR, 0);
+	return p;
+}
+
+/**
+ * qbman_swp_finish() - Create and destroy a functional object representing
+ *                      the given QBMan portal descriptor.
+ * @p: the qbman_swp object to be destroyed
+ */
+void qbman_swp_finish(struct qbman_swp *p)
+{
+	kfree(p);
+}
+
+/**
+ * qbman_swp_interrupt_read_status()
+ * @p: the given software portal
+ *
+ * Return the value in the SWP_ISR register.
+ */
+u32 qbman_swp_interrupt_read_status(struct qbman_swp *p)
+{
+	return qbman_read_register(p, QBMAN_CINH_SWP_ISR);
+}
+
+/**
+ * qbman_swp_interrupt_clear_status()
+ * @p: the given software portal
+ * @mask: The mask to clear in SWP_ISR register
+ */
+void qbman_swp_interrupt_clear_status(struct qbman_swp *p, u32 mask)
+{
+	qbman_write_register(p, QBMAN_CINH_SWP_ISR, mask);
+}
+
+/**
+ * qbman_swp_interrupt_get_trigger() - read interrupt enable register
+ * @p: the given software portal
+ *
+ * Return the value in the SWP_IER register.
+ */
+u32 qbman_swp_interrupt_get_trigger(struct qbman_swp *p)
+{
+	return qbman_read_register(p, QBMAN_CINH_SWP_IER);
+}
+
+/**
+ * qbman_swp_interrupt_set_trigger() - enable interrupts for a swp
+ * @p: the given software portal
+ * @mask: The mask of bits to enable in SWP_IER
+ */
+void qbman_swp_interrupt_set_trigger(struct qbman_swp *p, u32 mask)
+{
+	qbman_write_register(p, QBMAN_CINH_SWP_IER, mask);
+}
+
+/**
+ * qbman_swp_interrupt_get_inhibit() - read interrupt mask register
+ * @p: the given software portal object
+ *
+ * Return the value in the SWP_IIR register.
+ */
+int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p)
+{
+	return qbman_read_register(p, QBMAN_CINH_SWP_IIR);
+}
+
+/**
+ * qbman_swp_interrupt_set_inhibit() - write interrupt mask register
+ * @p: the given software portal object
+ * @mask: The mask to set in SWP_IIR register
+ */
+void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit)
+{
+	qbman_write_register(p, QBMAN_CINH_SWP_IIR, inhibit ? 0xffffffff : 0);
+}
+
+/*
+ * Different management commands all use this common base layer of code to issue
+ * commands and poll for results.
+ */
+
+/*
+ * Returns a pointer to where the caller should fill in their management command
+ * (caller should ignore the verb byte)
+ */
+void *qbman_swp_mc_start(struct qbman_swp *p)
+{
+	return qbman_get_cmd(p, QBMAN_CENA_SWP_CR);
+}
+
+/*
+ * Commits merges in the caller-supplied command verb (which should not include
+ * the valid-bit) and submits the command to hardware
+ */
+void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, u8 cmd_verb)
+{
+	u8 *v = cmd;
+
+	dma_wmb();
+	*v = cmd_verb | p->mc.valid_bit;
+}
+
+/*
+ * Checks for a completed response (returns non-NULL if only if the response
+ * is complete).
+ */
+void *qbman_swp_mc_result(struct qbman_swp *p)
+{
+	u32 *ret, verb;
+
+	ret = qbman_get_cmd(p, QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+
+	/* Remove the valid-bit - command completed if the rest is non-zero */
+	verb = ret[0] & ~QB_VALID_BIT;
+	if (!verb)
+		return NULL;
+	p->mc.valid_bit ^= QB_VALID_BIT;
+	return ret;
+}
+
+#define QB_ENQUEUE_CMD_OPTIONS_SHIFT    0
+enum qb_enqueue_commands {
+	enqueue_empty = 0,
+	enqueue_response_always = 1,
+	enqueue_rejects_to_fq = 2
+};
+
+#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT      2
+#define QB_ENQUEUE_CMD_IRQ_ON_DISPATCH_SHIFT 3
+#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT     4
+
+/**
+ * qbman_eq_desc_clear() - Clear the contents of a descriptor to
+ *                         default/starting state.
+ */
+void qbman_eq_desc_clear(struct qbman_eq_desc *d)
+{
+	memset(d, 0, sizeof(*d));
+}
+
+/**
+ * qbman_eq_desc_set_no_orp() - Set enqueue descriptor without orp
+ * @d:                the enqueue descriptor.
+ * @response_success: 1 = enqueue with response always; 0 = enqueue with
+ *                    rejections returned on a FQ.
+ */
+void qbman_eq_desc_set_no_orp(struct qbman_eq_desc *d, int respond_success)
+{
+	d->verb &= ~(1 << QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT);
+	if (respond_success)
+		d->verb |= enqueue_response_always;
+	else
+		d->verb |= enqueue_rejects_to_fq;
+}
+
+/*
+ * Exactly one of the following descriptor "targets" should be set. (Calling any
+ * one of these will replace the effect of any prior call to one of these.)
+ *   -enqueue to a frame queue
+ *   -enqueue to a queuing destination
+ */
+
+/**
+ * qbman_eq_desc_set_fq() - set the FQ for the enqueue command
+ * @d:    the enqueue descriptor
+ * @fqid: the id of the frame queue to be enqueued
+ */
+void qbman_eq_desc_set_fq(struct qbman_eq_desc *d, u32 fqid)
+{
+	d->verb &= ~(1 << QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT);
+	d->tgtid = cpu_to_le32(fqid);
+}
+
+/**
+ * qbman_eq_desc_set_qd() - Set Queuing Destination for the enqueue command
+ * @d:       the enqueue descriptor
+ * @qdid:    the id of the queuing destination to be enqueued
+ * @qd_bin:  the queuing destination bin
+ * @qd_prio: the queuing destination priority
+ */
+void qbman_eq_desc_set_qd(struct qbman_eq_desc *d, u32 qdid,
+			  u32 qd_bin, u32 qd_prio)
+{
+	d->verb |= 1 << QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT;
+	d->tgtid = cpu_to_le32(qdid);
+	d->qdbin = cpu_to_le16(qd_bin);
+	d->qpri = qd_prio;
+}
+
+#define EQAR_IDX(eqar)     ((eqar) & 0x7)
+#define EQAR_VB(eqar)      ((eqar) & 0x80)
+#define EQAR_SUCCESS(eqar) ((eqar) & 0x100)
+
+/**
+ * qbman_swp_enqueue() - Issue an enqueue command
+ * @s:  the software portal used for enqueue
+ * @d:  the enqueue descriptor
+ * @fd: the frame descriptor to be enqueued
+ *
+ * Please note that 'fd' should only be NULL if the "action" of the
+ * descriptor is "orp_hole" or "orp_nesn".
+ *
+ * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready.
+ */
+int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
+		      const struct dpaa2_fd *fd)
+{
+	struct qbman_eq_desc *p;
+	u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR);
+
+	if (!EQAR_SUCCESS(eqar))
+		return -EBUSY;
+
+	p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+	memcpy(&p->dca, &d->dca, 31);
+	memcpy(&p->fd, fd, sizeof(*fd));
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	dma_wmb();
+	p->verb = d->verb | EQAR_VB(eqar);
+
+	return 0;
+}
+
+/* Static (push) dequeue */
+
+/**
+ * qbman_swp_push_get() - Get the push dequeue setup
+ * @p:           the software portal object
+ * @channel_idx: the channel index to query
+ * @enabled:     returned boolean to show whether the push dequeue is enabled
+ *               for the given channel
+ */
+void qbman_swp_push_get(struct qbman_swp *s, u8 channel_idx, int *enabled)
+{
+	u16 src = (s->sdq >> QB_SDQCR_SRC_SHIFT) & QB_SDQCR_SRC_MASK;
+
+	WARN_ON(channel_idx > 15);
+	*enabled = src | (1 << channel_idx);
+}
+
+/**
+ * qbman_swp_push_set() - Enable or disable push dequeue
+ * @p:           the software portal object
+ * @channel_idx: the channel index (0 to 15)
+ * @enable:      enable or disable push dequeue
+ */
+void qbman_swp_push_set(struct qbman_swp *s, u8 channel_idx, int enable)
+{
+	u16 dqsrc;
+
+	WARN_ON(channel_idx > 15);
+	if (enable)
+		s->sdq |= 1 << channel_idx;
+	else
+		s->sdq &= ~(1 << channel_idx);
+
+	/* Read make the complete src map.  If no channels are enabled
+	 * the SDQCR must be 0 or else QMan will assert errors
+	 */
+	dqsrc = (s->sdq >> QB_SDQCR_SRC_SHIFT) & QB_SDQCR_SRC_MASK;
+	if (dqsrc != 0)
+		qbman_write_register(s, QBMAN_CINH_SWP_SDQCR, s->sdq);
+	else
+		qbman_write_register(s, QBMAN_CINH_SWP_SDQCR, 0);
+}
+
+#define QB_VDQCR_VERB_DCT_SHIFT    0
+#define QB_VDQCR_VERB_DT_SHIFT     2
+#define QB_VDQCR_VERB_RLS_SHIFT    4
+#define QB_VDQCR_VERB_WAE_SHIFT    5
+
+enum qb_pull_dt_e {
+	qb_pull_dt_channel,
+	qb_pull_dt_workqueue,
+	qb_pull_dt_framequeue
+};
+
+/**
+ * qbman_pull_desc_clear() - Clear the contents of a descriptor to
+ *                           default/starting state
+ * @d: the pull dequeue descriptor to be cleared
+ */
+void qbman_pull_desc_clear(struct qbman_pull_desc *d)
+{
+	memset(d, 0, sizeof(*d));
+}
+
+/**
+ * qbman_pull_desc_set_storage()- Set the pull dequeue storage
+ * @d:            the pull dequeue descriptor to be set
+ * @storage:      the pointer of the memory to store the dequeue result
+ * @storage_phys: the physical address of the storage memory
+ * @stash:        to indicate whether write allocate is enabled
+ *
+ * If not called, or if called with 'storage' as NULL, the result pull dequeues
+ * will produce results to DQRR. If 'storage' is non-NULL, then results are
+ * produced to the given memory location (using the DMA address which
+ * the caller provides in 'storage_phys'), and 'stash' controls whether or not
+ * those writes to main-memory express a cache-warming attribute.
+ */
+void qbman_pull_desc_set_storage(struct qbman_pull_desc *d,
+				 struct dpaa2_dq *storage,
+				 dma_addr_t storage_phys,
+				 int stash)
+{
+	/* save the virtual address */
+	d->rsp_addr_virt = (u64)(uintptr_t)storage;
+
+	if (!storage) {
+		d->verb &= ~(1 << QB_VDQCR_VERB_RLS_SHIFT);
+		return;
+	}
+	d->verb |= 1 << QB_VDQCR_VERB_RLS_SHIFT;
+	if (stash)
+		d->verb |= 1 << QB_VDQCR_VERB_WAE_SHIFT;
+	else
+		d->verb &= ~(1 << QB_VDQCR_VERB_WAE_SHIFT);
+
+	d->rsp_addr = cpu_to_le64(storage_phys);
+}
+
+/**
+ * qbman_pull_desc_set_numframes() - Set the number of frames to be dequeued
+ * @d:         the pull dequeue descriptor to be set
+ * @numframes: number of frames to be set, must be between 1 and 16, inclusive
+ */
+void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, u8 numframes)
+{
+	d->numf = numframes - 1;
+}
+
+/*
+ * Exactly one of the following descriptor "actions" should be set. (Calling any
+ * one of these will replace the effect of any prior call to one of these.)
+ * - pull dequeue from the given frame queue (FQ)
+ * - pull dequeue from any FQ in the given work queue (WQ)
+ * - pull dequeue from any FQ in any WQ in the given channel
+ */
+
+/**
+ * qbman_pull_desc_set_fq() - Set fqid from which the dequeue command dequeues
+ * @fqid: the frame queue index of the given FQ
+ */
+void qbman_pull_desc_set_fq(struct qbman_pull_desc *d, u32 fqid)
+{
+	d->verb |= 1 << QB_VDQCR_VERB_DCT_SHIFT;
+	d->verb |= qb_pull_dt_framequeue << QB_VDQCR_VERB_DT_SHIFT;
+	d->dq_src = cpu_to_le32(fqid);
+}
+
+/**
+ * qbman_pull_desc_set_wq() - Set wqid from which the dequeue command dequeues
+ * @wqid: composed of channel id and wqid within the channel
+ * @dct:  the dequeue command type
+ */
+void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, u32 wqid,
+			    enum qbman_pull_type_e dct)
+{
+	d->verb |= dct << QB_VDQCR_VERB_DCT_SHIFT;
+	d->verb |= qb_pull_dt_workqueue << QB_VDQCR_VERB_DT_SHIFT;
+	d->dq_src = cpu_to_le32(wqid);
+}
+
+/**
+ * qbman_pull_desc_set_channel() - Set channelid from which the dequeue command
+ *                                 dequeues
+ * @chid: the channel id to be dequeued
+ * @dct:  the dequeue command type
+ */
+void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, u32 chid,
+				 enum qbman_pull_type_e dct)
+{
+	d->verb |= dct << QB_VDQCR_VERB_DCT_SHIFT;
+	d->verb |= qb_pull_dt_channel << QB_VDQCR_VERB_DT_SHIFT;
+	d->dq_src = cpu_to_le32(chid);
+}
+
+/**
+ * qbman_swp_pull() - Issue the pull dequeue command
+ * @s: the software portal object
+ * @d: the software portal descriptor which has been configured with
+ *     the set of qbman_pull_desc_set_*() calls
+ *
+ * Return 0 for success, and -EBUSY if the software portal is not ready
+ * to do pull dequeue.
+ */
+int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
+{
+	struct qbman_pull_desc *p;
+
+	if (!atomic_dec_and_test(&s->vdq.available)) {
+		atomic_inc(&s->vdq.available);
+		return -EBUSY;
+	}
+	s->vdq.storage = (void *)(uintptr_t)d->rsp_addr_virt;
+	p = qbman_get_cmd(s, QBMAN_CENA_SWP_VDQCR);
+	p->numf = d->numf;
+	p->tok = QMAN_DQ_TOKEN_VALID;
+	p->dq_src = d->dq_src;
+	p->rsp_addr = d->rsp_addr;
+	p->rsp_addr_virt = d->rsp_addr_virt;
+	dma_wmb();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	p->verb = d->verb | s->vdq.valid_bit;
+	s->vdq.valid_bit ^= QB_VALID_BIT;
+
+	return 0;
+}
+
+#define QMAN_DQRR_PI_MASK   0xf
+
+/**
+ * qbman_swp_dqrr_next() - Get an valid DQRR entry
+ * @s: the software portal object
+ *
+ * Return NULL if there are no unconsumed DQRR entries. Return a DQRR entry
+ * only once, so repeated calls can return a sequence of DQRR entries, without
+ * requiring they be consumed immediately or in any particular order.
+ */
+const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp *s)
+{
+	u32 verb;
+	u32 response_verb;
+	u32 flags;
+	struct dpaa2_dq *p;
+
+	/* Before using valid-bit to detect if something is there, we have to
+	 * handle the case of the DQRR reset bug...
+	 */
+	if (unlikely(s->dqrr.reset_bug)) {
+		/*
+		 * We pick up new entries by cache-inhibited producer index,
+		 * which means that a non-coherent mapping would require us to
+		 * invalidate and read *only* once that PI has indicated that
+		 * there's an entry here. The first trip around the DQRR ring
+		 * will be much less efficient than all subsequent trips around
+		 * it...
+		 */
+		u8 pi = qbman_read_register(s, QBMAN_CINH_SWP_DQPI) &
+			QMAN_DQRR_PI_MASK;
+
+		/* there are new entries if pi != next_idx */
+		if (pi == s->dqrr.next_idx)
+			return NULL;
+
+		/*
+		 * if next_idx is/was the last ring index, and 'pi' is
+		 * different, we can disable the workaround as all the ring
+		 * entries have now been DMA'd to so valid-bit checking is
+		 * repaired. Note: this logic needs to be based on next_idx
+		 * (which increments one at a time), rather than on pi (which
+		 * can burst and wrap-around between our snapshots of it).
+		 */
+		if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1)) {
+			pr_debug("next_idx=%d, pi=%d, clear reset bug\n",
+				 s->dqrr.next_idx, pi);
+			s->dqrr.reset_bug = 0;
+		}
+		prefetch(qbman_get_cmd(s,
+				       QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+	}
+
+	p = qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+	verb = p->dq.verb;
+
+	/*
+	 * If the valid-bit isn't of the expected polarity, nothing there. Note,
+	 * in the DQRR reset bug workaround, we shouldn't need to skip these
+	 * check, because we've already determined that a new entry is available
+	 * and we've invalidated the cacheline before reading it, so the
+	 * valid-bit behaviour is repaired and should tell us what we already
+	 * knew from reading PI.
+	 */
+	if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) {
+		prefetch(qbman_get_cmd(s,
+				       QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+		return NULL;
+	}
+	/*
+	 * There's something there. Move "next_idx" attention to the next ring
+	 * entry (and prefetch it) before returning what we found.
+	 */
+	s->dqrr.next_idx++;
+	s->dqrr.next_idx &= s->dqrr.dqrr_size - 1; /* Wrap around */
+	if (!s->dqrr.next_idx)
+		s->dqrr.valid_bit ^= QB_VALID_BIT;
+
+	/*
+	 * If this is the final response to a volatile dequeue command
+	 * indicate that the vdq is available
+	 */
+	flags = p->dq.stat;
+	response_verb = verb & QBMAN_RESULT_MASK;
+	if ((response_verb == QBMAN_RESULT_DQ) &&
+	    (flags & DPAA2_DQ_STAT_VOLATILE) &&
+	    (flags & DPAA2_DQ_STAT_EXPIRED))
+		atomic_inc(&s->vdq.available);
+
+	prefetch(qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+
+	return p;
+}
+
+/**
+ * qbman_swp_dqrr_consume() -  Consume DQRR entries previously returned from
+ *                             qbman_swp_dqrr_next().
+ * @s: the software portal object
+ * @dq: the DQRR entry to be consumed
+ */
+void qbman_swp_dqrr_consume(struct qbman_swp *s, const struct dpaa2_dq *dq)
+{
+	qbman_write_register(s, QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq));
+}
+
+/**
+ * qbman_result_has_new_result() - Check and get the dequeue response from the
+ *                                 dq storage memory set in pull dequeue command
+ * @s: the software portal object
+ * @dq: the dequeue result read from the memory
+ *
+ * Return 1 for getting a valid dequeue result, or 0 for not getting a valid
+ * dequeue result.
+ *
+ * Only used for user-provided storage of dequeue results, not DQRR. For
+ * efficiency purposes, the driver will perform any required endianness
+ * conversion to ensure that the user's dequeue result storage is in host-endian
+ * format. As such, once the user has called qbman_result_has_new_result() and
+ * been returned a valid dequeue result, they should not call it again on
+ * the same memory location (except of course if another dequeue command has
+ * been executed to produce a new result to that location).
+ */
+int qbman_result_has_new_result(struct qbman_swp *s, const struct dpaa2_dq *dq)
+{
+	if (dq->dq.tok != QMAN_DQ_TOKEN_VALID)
+		return 0;
+
+	/*
+	 * Set token to be 0 so we will detect change back to 1
+	 * next time the looping is traversed. Const is cast away here
+	 * as we want users to treat the dequeue responses as read only.
+	 */
+	((struct dpaa2_dq *)dq)->dq.tok = 0;
+
+	/*
+	 * Determine whether VDQCR is available based on whether the
+	 * current result is sitting in the first storage location of
+	 * the busy command.
+	 */
+	if (s->vdq.storage == dq) {
+		s->vdq.storage = NULL;
+		atomic_inc(&s->vdq.available);
+	}
+
+	return 1;
+}
+
+/**
+ * qbman_release_desc_clear() - Clear the contents of a descriptor to
+ *                              default/starting state.
+ */
+void qbman_release_desc_clear(struct qbman_release_desc *d)
+{
+	memset(d, 0, sizeof(*d));
+	d->verb = 1 << 5; /* Release Command Valid */
+}
+
+/**
+ * qbman_release_desc_set_bpid() - Set the ID of the buffer pool to release to
+ */
+void qbman_release_desc_set_bpid(struct qbman_release_desc *d, u16 bpid)
+{
+	d->bpid = cpu_to_le16(bpid);
+}
+
+/**
+ * qbman_release_desc_set_rcdi() - Determines whether or not the portal's RCDI
+ * interrupt source should be asserted after the release command is completed.
+ */
+void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable)
+{
+	if (enable)
+		d->verb |= 1 << 6;
+	else
+		d->verb &= ~(1 << 6);
+}
+
+#define RAR_IDX(rar)     ((rar) & 0x7)
+#define RAR_VB(rar)      ((rar) & 0x80)
+#define RAR_SUCCESS(rar) ((rar) & 0x100)
+
+/**
+ * qbman_swp_release() - Issue a buffer release command
+ * @s:           the software portal object
+ * @d:           the release descriptor
+ * @buffers:     a pointer pointing to the buffer address to be released
+ * @num_buffers: number of buffers to be released,  must be less than 8
+ *
+ * Return 0 for success, -EBUSY if the release command ring is not ready.
+ */
+int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d,
+		      const u64 *buffers, unsigned int num_buffers)
+{
+	int i;
+	struct qbman_release_desc *p;
+	u32 rar;
+
+	if (!num_buffers || (num_buffers > 7))
+		return -EINVAL;
+
+	rar = qbman_read_register(s, QBMAN_CINH_SWP_RAR);
+	if (!RAR_SUCCESS(rar))
+		return -EBUSY;
+
+	/* Start the release command */
+	p = qbman_get_cmd(s, QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+	/* Copy the caller's buffer pointers to the command */
+	for (i = 0; i < num_buffers; i++)
+		p->buf[i] = cpu_to_le64(buffers[i]);
+	p->bpid = d->bpid;
+
+	/*
+	 * Set the verb byte, have to substitute in the valid-bit and the number
+	 * of buffers.
+	 */
+	dma_wmb();
+	p->verb = d->verb | RAR_VB(rar) | num_buffers;
+
+	return 0;
+}
+
+struct qbman_acquire_desc {
+	u8 verb;
+	u8 reserved;
+	__le16 bpid;
+	u8 num;
+	u8 reserved2[59];
+};
+
+struct qbman_acquire_rslt {
+	u8 verb;
+	u8 rslt;
+	__le16 reserved;
+	u8 num;
+	u8 reserved2[3];
+	__le64 buf[7];
+};
+
+/**
+ * qbman_swp_acquire() - Issue a buffer acquire command
+ * @s:           the software portal object
+ * @bpid:        the buffer pool index
+ * @buffers:     a pointer pointing to the acquired buffer addresses
+ * @num_buffers: number of buffers to be acquired, must be less than 8
+ *
+ * Return 0 for success, or negative error code if the acquire command
+ * fails.
+ */
+int qbman_swp_acquire(struct qbman_swp *s, u16 bpid, u64 *buffers,
+		      unsigned int num_buffers)
+{
+	struct qbman_acquire_desc *p;
+	struct qbman_acquire_rslt *r;
+	int i;
+
+	if (!num_buffers || (num_buffers > 7))
+		return -EINVAL;
+
+	/* Start the management command */
+	p = qbman_swp_mc_start(s);
+
+	if (!p)
+		return -EBUSY;
+
+	/* Encode the caller-provided attributes */
+	p->bpid = cpu_to_le16(bpid);
+	p->num = num_buffers;
+
+	/* Complete the management command */
+	r = qbman_swp_mc_complete(s, p, QBMAN_MC_ACQUIRE);
+	if (unlikely(!r)) {
+		pr_err("qbman: acquire from BPID %d failed, no response\n",
+		       bpid);
+		return -EIO;
+	}
+
+	/* Decode the outcome */
+	WARN_ON((r->verb & 0x7f) != QBMAN_MC_ACQUIRE);
+
+	/* Determine success or failure */
+	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+		pr_err("qbman: acquire from BPID 0x%x failed, code=0x%02x\n",
+		       bpid, r->rslt);
+		return -EIO;
+	}
+
+	WARN_ON(r->num > num_buffers);
+
+	/* Copy the acquired buffers to the caller's array */
+	for (i = 0; i < r->num; i++)
+		buffers[i] = le64_to_cpu(r->buf[i]);
+
+	return (int)r->num;
+}
+
+struct qbman_alt_fq_state_desc {
+	u8 verb;
+	u8 reserved[3];
+	__le32 fqid;
+	u8 reserved2[56];
+};
+
+struct qbman_alt_fq_state_rslt {
+	u8 verb;
+	u8 rslt;
+	u8 reserved[62];
+};
+
+#define ALT_FQ_FQID_MASK 0x00FFFFFF
+
+int qbman_swp_alt_fq_state(struct qbman_swp *s, u32 fqid,
+			   u8 alt_fq_verb)
+{
+	struct qbman_alt_fq_state_desc *p;
+	struct qbman_alt_fq_state_rslt *r;
+
+	/* Start the management command */
+	p = qbman_swp_mc_start(s);
+	if (!p)
+		return -EBUSY;
+
+	p->fqid = cpu_to_le32(fqid & ALT_FQ_FQID_MASK);
+
+	/* Complete the management command */
+	r = qbman_swp_mc_complete(s, p, alt_fq_verb);
+	if (unlikely(!r)) {
+		pr_err("qbman: mgmt cmd failed, no response (verb=0x%x)\n",
+		       alt_fq_verb);
+		return -EIO;
+	}
+
+	/* Decode the outcome */
+	WARN_ON((r->verb & QBMAN_RESULT_MASK) != alt_fq_verb);
+
+	/* Determine success or failure */
+	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+		pr_err("qbman: ALT FQID %d failed: verb = 0x%08x code = 0x%02x\n",
+		       fqid, r->verb, r->rslt);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+struct qbman_cdan_ctrl_desc {
+	u8 verb;
+	u8 reserved;
+	__le16 ch;
+	u8 we;
+	u8 ctrl;
+	__le16 reserved2;
+	__le64 cdan_ctx;
+	u8 reserved3[48];
+
+};
+
+struct qbman_cdan_ctrl_rslt {
+	u8 verb;
+	u8 rslt;
+	__le16 ch;
+	u8 reserved[60];
+};
+
+int qbman_swp_CDAN_set(struct qbman_swp *s, u16 channelid,
+		       u8 we_mask, u8 cdan_en,
+		       u64 ctx)
+{
+	struct qbman_cdan_ctrl_desc *p = NULL;
+	struct qbman_cdan_ctrl_rslt *r = NULL;
+
+	/* Start the management command */
+	p = qbman_swp_mc_start(s);
+	if (!p)
+		return -EBUSY;
+
+	/* Encode the caller-provided attributes */
+	p->ch = cpu_to_le16(channelid);
+	p->we = we_mask;
+	if (cdan_en)
+		p->ctrl = 1;
+	else
+		p->ctrl = 0;
+	p->cdan_ctx = cpu_to_le64(ctx);
+
+	/* Complete the management command */
+	r = qbman_swp_mc_complete(s, p, QBMAN_WQCHAN_CONFIGURE);
+	if (unlikely(!r)) {
+		pr_err("qbman: wqchan config failed, no response\n");
+		return -EIO;
+	}
+
+	WARN_ON((r->verb & 0x7f) != QBMAN_WQCHAN_CONFIGURE);
+
+	/* Determine success or failure */
+	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+		pr_err("qbman: CDAN cQID %d failed: code = 0x%02x\n",
+		       channelid, r->rslt);
+		return -EIO;
+	}
+
+	return 0;
+}
diff --git a/drivers/soc/fsl/dpio/qbman-portal.h b/drivers/soc/fsl/dpio/qbman-portal.h
new file mode 100644
index 000000000000..89d1dd9969b6
--- /dev/null
+++ b/drivers/soc/fsl/dpio/qbman-portal.h
@@ -0,0 +1,444 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
+ * Copyright 2016 NXP
+ *
+ */
+#ifndef __FSL_QBMAN_PORTAL_H
+#define __FSL_QBMAN_PORTAL_H
+
+#include <soc/fsl/dpaa2-fd.h>
+
+struct dpaa2_dq;
+struct qbman_swp;
+
+/* qbman software portal descriptor structure */
+struct qbman_swp_desc {
+	void *cena_bar; /* Cache-enabled portal base address */
+	void __iomem *cinh_bar; /* Cache-inhibited portal base address */
+	u32 qman_version;
+};
+
+#define QBMAN_SWP_INTERRUPT_EQRI 0x01
+#define QBMAN_SWP_INTERRUPT_EQDI 0x02
+#define QBMAN_SWP_INTERRUPT_DQRI 0x04
+#define QBMAN_SWP_INTERRUPT_RCRI 0x08
+#define QBMAN_SWP_INTERRUPT_RCDI 0x10
+#define QBMAN_SWP_INTERRUPT_VDCI 0x20
+
+/* the structure for pull dequeue descriptor */
+struct qbman_pull_desc {
+	u8 verb;
+	u8 numf;
+	u8 tok;
+	u8 reserved;
+	__le32 dq_src;
+	__le64 rsp_addr;
+	u64 rsp_addr_virt;
+	u8 padding[40];
+};
+
+enum qbman_pull_type_e {
+	/* dequeue with priority precedence, respect intra-class scheduling */
+	qbman_pull_type_prio = 1,
+	/* dequeue with active FQ precedence, respect ICS */
+	qbman_pull_type_active,
+	/* dequeue with active FQ precedence, no ICS */
+	qbman_pull_type_active_noics
+};
+
+/* Definitions for parsing dequeue entries */
+#define QBMAN_RESULT_MASK      0x7f
+#define QBMAN_RESULT_DQ        0x60
+#define QBMAN_RESULT_FQRN      0x21
+#define QBMAN_RESULT_FQRNI     0x22
+#define QBMAN_RESULT_FQPN      0x24
+#define QBMAN_RESULT_FQDAN     0x25
+#define QBMAN_RESULT_CDAN      0x26
+#define QBMAN_RESULT_CSCN_MEM  0x27
+#define QBMAN_RESULT_CGCU      0x28
+#define QBMAN_RESULT_BPSCN     0x29
+#define QBMAN_RESULT_CSCN_WQ   0x2a
+
+/* QBMan FQ management command codes */
+#define QBMAN_FQ_SCHEDULE	0x48
+#define QBMAN_FQ_FORCE		0x49
+#define QBMAN_FQ_XON		0x4d
+#define QBMAN_FQ_XOFF		0x4e
+
+/* structure of enqueue descriptor */
+struct qbman_eq_desc {
+	u8 verb;
+	u8 dca;
+	__le16 seqnum;
+	__le16 orpid;
+	__le16 reserved1;
+	__le32 tgtid;
+	__le32 tag;
+	__le16 qdbin;
+	u8 qpri;
+	u8 reserved[3];
+	u8 wae;
+	u8 rspid;
+	__le64 rsp_addr;
+	u8 fd[32];
+};
+
+/* buffer release descriptor */
+struct qbman_release_desc {
+	u8 verb;
+	u8 reserved;
+	__le16 bpid;
+	__le32 reserved2;
+	__le64 buf[7];
+};
+
+/* Management command result codes */
+#define QBMAN_MC_RSLT_OK      0xf0
+
+#define CODE_CDAN_WE_EN    0x1
+#define CODE_CDAN_WE_CTX   0x4
+
+/* portal data structure */
+struct qbman_swp {
+	const struct qbman_swp_desc *desc;
+	void *addr_cena;
+	void __iomem *addr_cinh;
+
+	/* Management commands */
+	struct {
+		u32 valid_bit; /* 0x00 or 0x80 */
+	} mc;
+
+	/* Push dequeues */
+	u32 sdq;
+
+	/* Volatile dequeues */
+	struct {
+		atomic_t available; /* indicates if a command can be sent */
+		u32 valid_bit; /* 0x00 or 0x80 */
+		struct dpaa2_dq *storage; /* NULL if DQRR */
+	} vdq;
+
+	/* DQRR */
+	struct {
+		u32 next_idx;
+		u32 valid_bit;
+		u8 dqrr_size;
+		int reset_bug; /* indicates dqrr reset workaround is needed */
+	} dqrr;
+};
+
+struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d);
+void qbman_swp_finish(struct qbman_swp *p);
+u32 qbman_swp_interrupt_read_status(struct qbman_swp *p);
+void qbman_swp_interrupt_clear_status(struct qbman_swp *p, u32 mask);
+u32 qbman_swp_interrupt_get_trigger(struct qbman_swp *p);
+void qbman_swp_interrupt_set_trigger(struct qbman_swp *p, u32 mask);
+int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p);
+void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit);
+
+void qbman_swp_push_get(struct qbman_swp *p, u8 channel_idx, int *enabled);
+void qbman_swp_push_set(struct qbman_swp *p, u8 channel_idx, int enable);
+
+void qbman_pull_desc_clear(struct qbman_pull_desc *d);
+void qbman_pull_desc_set_storage(struct qbman_pull_desc *d,
+				 struct dpaa2_dq *storage,
+				 dma_addr_t storage_phys,
+				 int stash);
+void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, u8 numframes);
+void qbman_pull_desc_set_fq(struct qbman_pull_desc *d, u32 fqid);
+void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, u32 wqid,
+			    enum qbman_pull_type_e dct);
+void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, u32 chid,
+				 enum qbman_pull_type_e dct);
+
+int qbman_swp_pull(struct qbman_swp *p, struct qbman_pull_desc *d);
+
+const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp *s);
+void qbman_swp_dqrr_consume(struct qbman_swp *s, const struct dpaa2_dq *dq);
+
+int qbman_result_has_new_result(struct qbman_swp *p, const struct dpaa2_dq *dq);
+
+void qbman_eq_desc_clear(struct qbman_eq_desc *d);
+void qbman_eq_desc_set_no_orp(struct qbman_eq_desc *d, int respond_success);
+void qbman_eq_desc_set_token(struct qbman_eq_desc *d, u8 token);
+void qbman_eq_desc_set_fq(struct qbman_eq_desc *d, u32 fqid);
+void qbman_eq_desc_set_qd(struct qbman_eq_desc *d, u32 qdid,
+			  u32 qd_bin, u32 qd_prio);
+
+int qbman_swp_enqueue(struct qbman_swp *p, const struct qbman_eq_desc *d,
+		      const struct dpaa2_fd *fd);
+
+void qbman_release_desc_clear(struct qbman_release_desc *d);
+void qbman_release_desc_set_bpid(struct qbman_release_desc *d, u16 bpid);
+void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable);
+
+int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d,
+		      const u64 *buffers, unsigned int num_buffers);
+int qbman_swp_acquire(struct qbman_swp *s, u16 bpid, u64 *buffers,
+		      unsigned int num_buffers);
+int qbman_swp_alt_fq_state(struct qbman_swp *s, u32 fqid,
+			   u8 alt_fq_verb);
+int qbman_swp_CDAN_set(struct qbman_swp *s, u16 channelid,
+		       u8 we_mask, u8 cdan_en,
+		       u64 ctx);
+
+void *qbman_swp_mc_start(struct qbman_swp *p);
+void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, u8 cmd_verb);
+void *qbman_swp_mc_result(struct qbman_swp *p);
+
+/**
+ * qbman_result_is_DQ() - check if the dequeue result is a dequeue response
+ * @dq: the dequeue result to be checked
+ *
+ * DQRR entries may contain non-dequeue results, ie. notifications
+ */
+static inline int qbman_result_is_DQ(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_DQ);
+}
+
+/**
+ * qbman_result_is_SCN() - Check the dequeue result is notification or not
+ * @dq: the dequeue result to be checked
+ *
+ */
+static inline int qbman_result_is_SCN(const struct dpaa2_dq *dq)
+{
+	return !qbman_result_is_DQ(dq);
+}
+
+/* FQ Data Availability */
+static inline int qbman_result_is_FQDAN(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_FQDAN);
+}
+
+/* Channel Data Availability */
+static inline int qbman_result_is_CDAN(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_CDAN);
+}
+
+/* Congestion State Change */
+static inline int qbman_result_is_CSCN(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_CSCN_WQ);
+}
+
+/* Buffer Pool State Change */
+static inline int qbman_result_is_BPSCN(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_BPSCN);
+}
+
+/* Congestion Group Count Update */
+static inline int qbman_result_is_CGCU(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_CGCU);
+}
+
+/* Retirement */
+static inline int qbman_result_is_FQRN(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_FQRN);
+}
+
+/* Retirement Immediate */
+static inline int qbman_result_is_FQRNI(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_FQRNI);
+}
+
+ /* Park */
+static inline int qbman_result_is_FQPN(const struct dpaa2_dq *dq)
+{
+	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_FQPN);
+}
+
+/**
+ * qbman_result_SCN_state() - Get the state field in State-change notification
+ */
+static inline u8 qbman_result_SCN_state(const struct dpaa2_dq *scn)
+{
+	return scn->scn.state;
+}
+
+#define SCN_RID_MASK 0x00FFFFFF
+
+/**
+ * qbman_result_SCN_rid() - Get the resource id in State-change notification
+ */
+static inline u32 qbman_result_SCN_rid(const struct dpaa2_dq *scn)
+{
+	return le32_to_cpu(scn->scn.rid_tok) & SCN_RID_MASK;
+}
+
+/**
+ * qbman_result_SCN_ctx() - Get the context data in State-change notification
+ */
+static inline u64 qbman_result_SCN_ctx(const struct dpaa2_dq *scn)
+{
+	return le64_to_cpu(scn->scn.ctx);
+}
+
+/**
+ * qbman_swp_fq_schedule() - Move the fq to the scheduled state
+ * @s:    the software portal object
+ * @fqid: the index of frame queue to be scheduled
+ *
+ * There are a couple of different ways that a FQ can end up parked state,
+ * This schedules it.
+ *
+ * Return 0 for success, or negative error code for failure.
+ */
+static inline int qbman_swp_fq_schedule(struct qbman_swp *s, u32 fqid)
+{
+	return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_SCHEDULE);
+}
+
+/**
+ * qbman_swp_fq_force() - Force the FQ to fully scheduled state
+ * @s:    the software portal object
+ * @fqid: the index of frame queue to be forced
+ *
+ * Force eligible will force a tentatively-scheduled FQ to be fully-scheduled
+ * and thus be available for selection by any channel-dequeuing behaviour (push
+ * or pull). If the FQ is subsequently "dequeued" from the channel and is still
+ * empty at the time this happens, the resulting dq_entry will have no FD.
+ * (qbman_result_DQ_fd() will return NULL.)
+ *
+ * Return 0 for success, or negative error code for failure.
+ */
+static inline int qbman_swp_fq_force(struct qbman_swp *s, u32 fqid)
+{
+	return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_FORCE);
+}
+
+/**
+ * qbman_swp_fq_xon() - sets FQ flow-control to XON
+ * @s:    the software portal object
+ * @fqid: the index of frame queue
+ *
+ * This setting doesn't affect enqueues to the FQ, just dequeues.
+ *
+ * Return 0 for success, or negative error code for failure.
+ */
+static inline int qbman_swp_fq_xon(struct qbman_swp *s, u32 fqid)
+{
+	return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_XON);
+}
+
+/**
+ * qbman_swp_fq_xoff() - sets FQ flow-control to XOFF
+ * @s:    the software portal object
+ * @fqid: the index of frame queue
+ *
+ * This setting doesn't affect enqueues to the FQ, just dequeues.
+ * XOFF FQs will remain in the tenatively-scheduled state, even when
+ * non-empty, meaning they won't be selected for scheduled dequeuing.
+ * If a FQ is changed to XOFF after it had already become truly-scheduled
+ * to a channel, and a pull dequeue of that channel occurs that selects
+ * that FQ for dequeuing, then the resulting dq_entry will have no FD.
+ * (qbman_result_DQ_fd() will return NULL.)
+ *
+ * Return 0 for success, or negative error code for failure.
+ */
+static inline int qbman_swp_fq_xoff(struct qbman_swp *s, u32 fqid)
+{
+	return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_XOFF);
+}
+
+/* If the user has been allocated a channel object that is going to generate
+ * CDANs to another channel, then the qbman_swp_CDAN* functions will be
+ * necessary.
+ *
+ * CDAN-enabled channels only generate a single CDAN notification, after which
+ * they need to be reenabled before they'll generate another. The idea is
+ * that pull dequeuing will occur in reaction to the CDAN, followed by a
+ * reenable step. Each function generates a distinct command to hardware, so a
+ * combination function is provided if the user wishes to modify the "context"
+ * (which shows up in each CDAN message) each time they reenable, as a single
+ * command to hardware.
+ */
+
+/**
+ * qbman_swp_CDAN_set_context() - Set CDAN context
+ * @s:         the software portal object
+ * @channelid: the channel index
+ * @ctx:       the context to be set in CDAN
+ *
+ * Return 0 for success, or negative error code for failure.
+ */
+static inline int qbman_swp_CDAN_set_context(struct qbman_swp *s, u16 channelid,
+					     u64 ctx)
+{
+	return qbman_swp_CDAN_set(s, channelid,
+				  CODE_CDAN_WE_CTX,
+				  0, ctx);
+}
+
+/**
+ * qbman_swp_CDAN_enable() - Enable CDAN for the channel
+ * @s:         the software portal object
+ * @channelid: the index of the channel to generate CDAN
+ *
+ * Return 0 for success, or negative error code for failure.
+ */
+static inline int qbman_swp_CDAN_enable(struct qbman_swp *s, u16 channelid)
+{
+	return qbman_swp_CDAN_set(s, channelid,
+				  CODE_CDAN_WE_EN,
+				  1, 0);
+}
+
+/**
+ * qbman_swp_CDAN_disable() - disable CDAN for the channel
+ * @s:         the software portal object
+ * @channelid: the index of the channel to generate CDAN
+ *
+ * Return 0 for success, or negative error code for failure.
+ */
+static inline int qbman_swp_CDAN_disable(struct qbman_swp *s, u16 channelid)
+{
+	return qbman_swp_CDAN_set(s, channelid,
+				  CODE_CDAN_WE_EN,
+				  0, 0);
+}
+
+/**
+ * qbman_swp_CDAN_set_context_enable() - Set CDAN contest and enable CDAN
+ * @s:         the software portal object
+ * @channelid: the index of the channel to generate CDAN
+ * @ctx:i      the context set in CDAN
+ *
+ * Return 0 for success, or negative error code for failure.
+ */
+static inline int qbman_swp_CDAN_set_context_enable(struct qbman_swp *s,
+						    u16 channelid,
+						    u64 ctx)
+{
+	return qbman_swp_CDAN_set(s, channelid,
+				  CODE_CDAN_WE_EN | CODE_CDAN_WE_CTX,
+				  1, ctx);
+}
+
+/* Wraps up submit + poll-for-result */
+static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
+					  u8 cmd_verb)
+{
+	int loopvar = 1000;
+
+	qbman_swp_mc_submit(swp, cmd, cmd_verb);
+
+	do {
+		cmd = qbman_swp_mc_result(swp);
+	} while (!cmd && loopvar--);
+
+	WARN_ON(!loopvar);
+
+	return cmd;
+}
+
+#endif /* __FSL_QBMAN_PORTAL_H */
diff --git a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
index 9269cb05a84b..f2917b55b85a 100644
--- a/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
+++ b/drivers/staging/fsl-dpaa2/ethernet/dpaa2-eth.h
@@ -37,8 +37,8 @@
 #include <linux/if_vlan.h>
 #include <linux/fsl/mc.h>
 
-#include "../../fsl-mc/include/dpaa2-io.h"
-#include "../../fsl-mc/include/dpaa2-fd.h"
+#include <soc/fsl/dpaa2-io.h>
+#include <soc/fsl/dpaa2-fd.h>
 #include "dpni.h"
 #include "dpni-cmd.h"
 
diff --git a/drivers/staging/fsl-mc/bus/Kconfig b/drivers/staging/fsl-mc/bus/Kconfig
index 342453035269..90f234deb1de 100644
--- a/drivers/staging/fsl-mc/bus/Kconfig
+++ b/drivers/staging/fsl-mc/bus/Kconfig
@@ -5,12 +5,3 @@
 # Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
 #
 
-config FSL_MC_DPIO
-        tristate "QorIQ DPAA2 DPIO driver"
-        depends on FSL_MC_BUS
-        help
-	  Driver for the DPAA2 DPIO object.  A DPIO provides queue and
-	  buffer management facilities for software to interact with
-	  other DPAA2 objects. This driver does not expose the DPIO
-	  objects individually, but groups them under a service layer
-	  API.
diff --git a/drivers/staging/fsl-mc/bus/Makefile b/drivers/staging/fsl-mc/bus/Makefile
index 21d8ebc8ce21..2141e4b590b2 100644
--- a/drivers/staging/fsl-mc/bus/Makefile
+++ b/drivers/staging/fsl-mc/bus/Makefile
@@ -5,5 +5,3 @@
 # Copyright (C) 2014 Freescale Semiconductor, Inc.
 #
 
-# MC DPIO driver
-obj-$(CONFIG_FSL_MC_DPIO) += dpio/
diff --git a/drivers/staging/fsl-mc/bus/dpio/Makefile b/drivers/staging/fsl-mc/bus/dpio/Makefile
deleted file mode 100644
index b9ff24c76582..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# QorIQ DPAA2 DPIO driver
-#
-
-obj-$(CONFIG_FSL_MC_DPIO) += fsl-mc-dpio.o
-
-fsl-mc-dpio-objs := dpio.o qbman-portal.o dpio-service.o dpio-driver.o
diff --git a/drivers/staging/fsl-mc/bus/dpio/dpio-cmd.h b/drivers/staging/fsl-mc/bus/dpio/dpio-cmd.h
deleted file mode 100644
index ab8f82ee7ee5..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/dpio-cmd.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/*
- * Copyright 2013-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP
- *
- */
-#ifndef _FSL_DPIO_CMD_H
-#define _FSL_DPIO_CMD_H
-
-/* DPIO Version */
-#define DPIO_VER_MAJOR			4
-#define DPIO_VER_MINOR			2
-
-/* Command Versioning */
-
-#define DPIO_CMD_ID_OFFSET		4
-#define DPIO_CMD_BASE_VERSION		1
-
-#define DPIO_CMD(id)	(((id) << DPIO_CMD_ID_OFFSET) | DPIO_CMD_BASE_VERSION)
-
-/* Command IDs */
-#define DPIO_CMDID_CLOSE				DPIO_CMD(0x800)
-#define DPIO_CMDID_OPEN					DPIO_CMD(0x803)
-#define DPIO_CMDID_GET_API_VERSION			DPIO_CMD(0xa03)
-#define DPIO_CMDID_ENABLE				DPIO_CMD(0x002)
-#define DPIO_CMDID_DISABLE				DPIO_CMD(0x003)
-#define DPIO_CMDID_GET_ATTR				DPIO_CMD(0x004)
-
-struct dpio_cmd_open {
-	__le32 dpio_id;
-};
-
-#define DPIO_CHANNEL_MODE_MASK		0x3
-
-struct dpio_rsp_get_attr {
-	/* cmd word 0 */
-	__le32 id;
-	__le16 qbman_portal_id;
-	u8 num_priorities;
-	u8 channel_mode;
-	/* cmd word 1 */
-	__le64 qbman_portal_ce_addr;
-	/* cmd word 2 */
-	__le64 qbman_portal_ci_addr;
-	/* cmd word 3 */
-	__le32 qbman_version;
-};
-
-#endif /* _FSL_DPIO_CMD_H */
diff --git a/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c b/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
deleted file mode 100644
index 11a90a90d827..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
+++ /dev/null
@@ -1,281 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright NXP 2016
- *
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/msi.h>
-#include <linux/dma-mapping.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-
-#include <linux/fsl/mc.h>
-#include "../../include/dpaa2-io.h"
-
-#include "qbman-portal.h"
-#include "dpio.h"
-#include "dpio-cmd.h"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Freescale Semiconductor, Inc");
-MODULE_DESCRIPTION("DPIO Driver");
-
-struct dpio_priv {
-	struct dpaa2_io *io;
-};
-
-static irqreturn_t dpio_irq_handler(int irq_num, void *arg)
-{
-	struct device *dev = (struct device *)arg;
-	struct dpio_priv *priv = dev_get_drvdata(dev);
-
-	return dpaa2_io_irq(priv->io);
-}
-
-static void unregister_dpio_irq_handlers(struct fsl_mc_device *dpio_dev)
-{
-	struct fsl_mc_device_irq *irq;
-
-	irq = dpio_dev->irqs[0];
-
-	/* clear the affinity hint */
-	irq_set_affinity_hint(irq->msi_desc->irq, NULL);
-}
-
-static int register_dpio_irq_handlers(struct fsl_mc_device *dpio_dev, int cpu)
-{
-	struct dpio_priv *priv;
-	int error;
-	struct fsl_mc_device_irq *irq;
-	cpumask_t mask;
-
-	priv = dev_get_drvdata(&dpio_dev->dev);
-
-	irq = dpio_dev->irqs[0];
-	error = devm_request_irq(&dpio_dev->dev,
-				 irq->msi_desc->irq,
-				 dpio_irq_handler,
-				 0,
-				 dev_name(&dpio_dev->dev),
-				 &dpio_dev->dev);
-	if (error < 0) {
-		dev_err(&dpio_dev->dev,
-			"devm_request_irq() failed: %d\n",
-			error);
-		return error;
-	}
-
-	/* set the affinity hint */
-	cpumask_clear(&mask);
-	cpumask_set_cpu(cpu, &mask);
-	if (irq_set_affinity_hint(irq->msi_desc->irq, &mask))
-		dev_err(&dpio_dev->dev,
-			"irq_set_affinity failed irq %d cpu %d\n",
-			irq->msi_desc->irq, cpu);
-
-	return 0;
-}
-
-static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
-{
-	struct dpio_attr dpio_attrs;
-	struct dpaa2_io_desc desc;
-	struct dpio_priv *priv;
-	int err = -ENOMEM;
-	struct device *dev = &dpio_dev->dev;
-	static int next_cpu = -1;
-
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		goto err_priv_alloc;
-
-	dev_set_drvdata(dev, priv);
-
-	err = fsl_mc_portal_allocate(dpio_dev, 0, &dpio_dev->mc_io);
-	if (err) {
-		dev_dbg(dev, "MC portal allocation failed\n");
-		err = -EPROBE_DEFER;
-		goto err_priv_alloc;
-	}
-
-	err = dpio_open(dpio_dev->mc_io, 0, dpio_dev->obj_desc.id,
-			&dpio_dev->mc_handle);
-	if (err) {
-		dev_err(dev, "dpio_open() failed\n");
-		goto err_open;
-	}
-
-	err = dpio_get_attributes(dpio_dev->mc_io, 0, dpio_dev->mc_handle,
-				  &dpio_attrs);
-	if (err) {
-		dev_err(dev, "dpio_get_attributes() failed %d\n", err);
-		goto err_get_attr;
-	}
-	desc.qman_version = dpio_attrs.qbman_version;
-
-	err = dpio_enable(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
-	if (err) {
-		dev_err(dev, "dpio_enable() failed %d\n", err);
-		goto err_get_attr;
-	}
-
-	/* initialize DPIO descriptor */
-	desc.receives_notifications = dpio_attrs.num_priorities ? 1 : 0;
-	desc.has_8prio = dpio_attrs.num_priorities == 8 ? 1 : 0;
-	desc.dpio_id = dpio_dev->obj_desc.id;
-
-	/* get the cpu to use for the affinity hint */
-	if (next_cpu == -1)
-		next_cpu = cpumask_first(cpu_online_mask);
-	else
-		next_cpu = cpumask_next(next_cpu, cpu_online_mask);
-
-	if (!cpu_possible(next_cpu)) {
-		dev_err(dev, "probe failed. Number of DPIOs exceeds NR_CPUS.\n");
-		err = -ERANGE;
-		goto err_allocate_irqs;
-	}
-	desc.cpu = next_cpu;
-
-	/*
-	 * Set the CENA regs to be the cache inhibited area of the portal to
-	 * avoid coherency issues if a user migrates to another core.
-	 */
-	desc.regs_cena = devm_memremap(dev, dpio_dev->regions[1].start,
-				       resource_size(&dpio_dev->regions[1]),
-				       MEMREMAP_WC);
-	if (IS_ERR(desc.regs_cena)) {
-		dev_err(dev, "devm_memremap failed\n");
-		err = PTR_ERR(desc.regs_cena);
-		goto err_allocate_irqs;
-	}
-
-	desc.regs_cinh = devm_ioremap(dev, dpio_dev->regions[1].start,
-				      resource_size(&dpio_dev->regions[1]));
-	if (!desc.regs_cinh) {
-		err = -ENOMEM;
-		dev_err(dev, "devm_ioremap failed\n");
-		goto err_allocate_irqs;
-	}
-
-	err = fsl_mc_allocate_irqs(dpio_dev);
-	if (err) {
-		dev_err(dev, "fsl_mc_allocate_irqs failed. err=%d\n", err);
-		goto err_allocate_irqs;
-	}
-
-	err = register_dpio_irq_handlers(dpio_dev, desc.cpu);
-	if (err)
-		goto err_register_dpio_irq;
-
-	priv->io = dpaa2_io_create(&desc);
-	if (!priv->io) {
-		dev_err(dev, "dpaa2_io_create failed\n");
-		err = -ENOMEM;
-		goto err_dpaa2_io_create;
-	}
-
-	dev_info(dev, "probed\n");
-	dev_dbg(dev, "   receives_notifications = %d\n",
-		desc.receives_notifications);
-	dpio_close(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
-	fsl_mc_portal_free(dpio_dev->mc_io);
-
-	return 0;
-
-err_dpaa2_io_create:
-	unregister_dpio_irq_handlers(dpio_dev);
-err_register_dpio_irq:
-	fsl_mc_free_irqs(dpio_dev);
-err_allocate_irqs:
-	dpio_disable(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
-err_get_attr:
-	dpio_close(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
-err_open:
-	fsl_mc_portal_free(dpio_dev->mc_io);
-err_priv_alloc:
-	return err;
-}
-
-/* Tear down interrupts for a given DPIO object */
-static void dpio_teardown_irqs(struct fsl_mc_device *dpio_dev)
-{
-	unregister_dpio_irq_handlers(dpio_dev);
-	fsl_mc_free_irqs(dpio_dev);
-}
-
-static int dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev)
-{
-	struct device *dev;
-	struct dpio_priv *priv;
-	int err;
-
-	dev = &dpio_dev->dev;
-	priv = dev_get_drvdata(dev);
-
-	dpaa2_io_down(priv->io);
-
-	dpio_teardown_irqs(dpio_dev);
-
-	err = fsl_mc_portal_allocate(dpio_dev, 0, &dpio_dev->mc_io);
-	if (err) {
-		dev_err(dev, "MC portal allocation failed\n");
-		goto err_mcportal;
-	}
-
-	err = dpio_open(dpio_dev->mc_io, 0, dpio_dev->obj_desc.id,
-			&dpio_dev->mc_handle);
-	if (err) {
-		dev_err(dev, "dpio_open() failed\n");
-		goto err_open;
-	}
-
-	dpio_disable(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
-
-	dpio_close(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
-
-	fsl_mc_portal_free(dpio_dev->mc_io);
-
-	return 0;
-
-err_open:
-	fsl_mc_portal_free(dpio_dev->mc_io);
-err_mcportal:
-	return err;
-}
-
-static const struct fsl_mc_device_id dpaa2_dpio_match_id_table[] = {
-	{
-		.vendor = FSL_MC_VENDOR_FREESCALE,
-		.obj_type = "dpio",
-	},
-	{ .vendor = 0x0 }
-};
-
-static struct fsl_mc_driver dpaa2_dpio_driver = {
-	.driver = {
-		.name		= KBUILD_MODNAME,
-		.owner		= THIS_MODULE,
-	},
-	.probe		= dpaa2_dpio_probe,
-	.remove		= dpaa2_dpio_remove,
-	.match_id_table = dpaa2_dpio_match_id_table
-};
-
-static int dpio_driver_init(void)
-{
-	return fsl_mc_driver_register(&dpaa2_dpio_driver);
-}
-
-static void dpio_driver_exit(void)
-{
-	fsl_mc_driver_unregister(&dpaa2_dpio_driver);
-}
-module_init(dpio_driver_init);
-module_exit(dpio_driver_exit);
diff --git a/drivers/staging/fsl-mc/bus/dpio/dpio-driver.txt b/drivers/staging/fsl-mc/bus/dpio/dpio-driver.txt
deleted file mode 100644
index 72ba9da3d179..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/dpio-driver.txt
+++ /dev/null
@@ -1,135 +0,0 @@
-Copyright 2016 NXP
-
-Introduction
-------------
-
-A DPAA2 DPIO (Data Path I/O) is a hardware object that provides
-interfaces to enqueue and dequeue frames to/from network interfaces
-and other accelerators.  A DPIO also provides hardware buffer
-pool management for network interfaces.
-
-This document provides an overview the Linux DPIO driver, its
-subcomponents, and its APIs.
-
-See Documentation/networking/dpaa2/overview.rst for a general overview of DPAA2
-and the general DPAA2 driver architecture in Linux.
-
-Driver Overview
----------------
-
-The DPIO driver is bound to DPIO objects discovered on the fsl-mc bus and
-provides services that:
-  A) allow other drivers, such as the Ethernet driver, to enqueue and dequeue
-     frames for their respective objects
-  B) allow drivers to register callbacks for data availability notifications
-     when data becomes available on a queue or channel
-  C) allow drivers to manage hardware buffer pools
-
-The Linux DPIO driver consists of 3 primary components--
-   DPIO object driver-- fsl-mc driver that manages the DPIO object
-   DPIO service-- provides APIs to other Linux drivers for services
-   QBman portal interface-- sends portal commands, gets responses
-
-          fsl-mc          other
-           bus           drivers
-            |               |
-        +---+----+   +------+-----+
-        |DPIO obj|   |DPIO service|
-        | driver |---|  (DPIO)    |
-        +--------+   +------+-----+
-                            |
-                     +------+-----+
-                     |    QBman   |
-                     | portal i/f |
-                     +------------+
-                            |
-                         hardware
-
-The diagram below shows how the DPIO driver components fit with the other
-DPAA2 Linux driver components:
-                                                   +------------+
-                                                   | OS Network |
-                                                   |   Stack    |
-                 +------------+                    +------------+
-                 | Allocator  |. . . . . . .       |  Ethernet  |
-                 |(DPMCP,DPBP)|                    |   (DPNI)   |
-                 +-.----------+                    +---+---+----+
-                  .          .                         ^   |
-                 .            .           <data avail, |   |<enqueue,
-                .              .           tx confirm> |   | dequeue>
-    +-------------+             .                      |   |
-    | DPRC driver |              .    +--------+ +------------+
-    |   (DPRC)    |               . . |DPIO obj| |DPIO service|
-    +----------+--+                   | driver |-|  (DPIO)    |
-               |                      +--------+ +------+-----+
-               |<dev add/remove>                 +------|-----+
-               |                                 |   QBman    |
-          +----+--------------+                  | portal i/f |
-          |   MC-bus driver   |                  +------------+
-          |                   |                     |
-          | /soc/fsl-mc       |                     |
-          +-------------------+                     |
-                                                    |
- =========================================|=========|========================
-                                        +-+--DPIO---|-----------+
-                                        |           |           |
-                                        |        QBman Portal   |
-                                        +-----------------------+
-
- ============================================================================
-
-
-DPIO Object Driver (dpio-driver.c)
-----------------------------------
-
-   The dpio-driver component registers with the fsl-mc bus to handle objects of
-   type "dpio".  The implementation of probe() handles basic initialization
-   of the DPIO including mapping of the DPIO regions (the QBman SW portal)
-   and initializing interrupts and registering irq handlers.  The dpio-driver
-   registers the probed DPIO with dpio-service.
-
-DPIO service  (dpio-service.c, dpaa2-io.h)
-------------------------------------------
-
-   The dpio service component provides queuing, notification, and buffers
-   management services to DPAA2 drivers, such as the Ethernet driver.  A system
-   will typically allocate 1 DPIO object per CPU to allow queuing operations
-   to happen simultaneously across all CPUs.
-
-   Notification handling
-      dpaa2_io_service_register()
-      dpaa2_io_service_deregister()
-      dpaa2_io_service_rearm()
-
-   Queuing
-      dpaa2_io_service_pull_fq()
-      dpaa2_io_service_pull_channel()
-      dpaa2_io_service_enqueue_fq()
-      dpaa2_io_service_enqueue_qd()
-      dpaa2_io_store_create()
-      dpaa2_io_store_destroy()
-      dpaa2_io_store_next()
-
-   Buffer pool management
-      dpaa2_io_service_release()
-      dpaa2_io_service_acquire()
-
-QBman portal interface (qbman-portal.c)
----------------------------------------
-
-   The qbman-portal component provides APIs to do the low level hardware
-   bit twiddling for operations such as:
-      -initializing Qman software portals
-      -building and sending portal commands
-      -portal interrupt configuration and processing
-
-   The qbman-portal APIs are not public to other drivers, and are
-   only used by dpio-service.
-
-Other (dpaa2-fd.h, dpaa2-global.h)
-----------------------------------
-
-   Frame descriptor and scatter-gather definitions and the APIs used to
-   manipulate them are defined in dpaa2-fd.h.
-
-   Dequeue result struct and parsing APIs are defined in dpaa2-global.h.
diff --git a/drivers/staging/fsl-mc/bus/dpio/dpio-service.c b/drivers/staging/fsl-mc/bus/dpio/dpio-service.c
deleted file mode 100644
index 14ed2beb7432..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/dpio-service.c
+++ /dev/null
@@ -1,545 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP
- *
- */
-#include <linux/types.h>
-#include <linux/fsl/mc.h>
-#include "../../include/dpaa2-io.h"
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-
-#include "dpio.h"
-#include "qbman-portal.h"
-
-struct dpaa2_io {
-	struct dpaa2_io_desc dpio_desc;
-	struct qbman_swp_desc swp_desc;
-	struct qbman_swp *swp;
-	struct list_head node;
-	/* protect against multiple management commands */
-	spinlock_t lock_mgmt_cmd;
-	/* protect notifications list */
-	spinlock_t lock_notifications;
-	struct list_head notifications;
-};
-
-struct dpaa2_io_store {
-	unsigned int max;
-	dma_addr_t paddr;
-	struct dpaa2_dq *vaddr;
-	void *alloced_addr;    /* unaligned value from kmalloc() */
-	unsigned int idx;      /* position of the next-to-be-returned entry */
-	struct qbman_swp *swp; /* portal used to issue VDQCR */
-	struct device *dev;    /* device used for DMA mapping */
-};
-
-/* keep a per cpu array of DPIOs for fast access */
-static struct dpaa2_io *dpio_by_cpu[NR_CPUS];
-static struct list_head dpio_list = LIST_HEAD_INIT(dpio_list);
-static DEFINE_SPINLOCK(dpio_list_lock);
-
-static inline struct dpaa2_io *service_select_by_cpu(struct dpaa2_io *d,
-						     int cpu)
-{
-	if (d)
-		return d;
-
-	if (cpu != DPAA2_IO_ANY_CPU && cpu >= num_possible_cpus())
-		return NULL;
-
-	/*
-	 * If cpu == -1, choose the current cpu, with no guarantees about
-	 * potentially being migrated away.
-	 */
-	if (unlikely(cpu < 0))
-		cpu = smp_processor_id();
-
-	/* If a specific cpu was requested, pick it up immediately */
-	return dpio_by_cpu[cpu];
-}
-
-static inline struct dpaa2_io *service_select(struct dpaa2_io *d)
-{
-	if (d)
-		return d;
-
-	spin_lock(&dpio_list_lock);
-	d = list_entry(dpio_list.next, struct dpaa2_io, node);
-	list_del(&d->node);
-	list_add_tail(&d->node, &dpio_list);
-	spin_unlock(&dpio_list_lock);
-
-	return d;
-}
-
-/**
- * dpaa2_io_service_select() - return a dpaa2_io service affined to this cpu
- * @cpu: the cpu id
- *
- * Return the affine dpaa2_io service, or NULL if there is no service affined
- * to the specified cpu. If DPAA2_IO_ANY_CPU is used, return the next available
- * service.
- */
-struct dpaa2_io *dpaa2_io_service_select(int cpu)
-{
-	if (cpu == DPAA2_IO_ANY_CPU)
-		return service_select(NULL);
-
-	return service_select_by_cpu(NULL, cpu);
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_service_select);
-
-/**
- * dpaa2_io_create() - create a dpaa2_io object.
- * @desc: the dpaa2_io descriptor
- *
- * Activates a "struct dpaa2_io" corresponding to the given config of an actual
- * DPIO object.
- *
- * Return a valid dpaa2_io object for success, or NULL for failure.
- */
-struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc)
-{
-	struct dpaa2_io *obj = kmalloc(sizeof(*obj), GFP_KERNEL);
-
-	if (!obj)
-		return NULL;
-
-	/* check if CPU is out of range (-1 means any cpu) */
-	if (desc->cpu != DPAA2_IO_ANY_CPU && desc->cpu >= num_possible_cpus()) {
-		kfree(obj);
-		return NULL;
-	}
-
-	obj->dpio_desc = *desc;
-	obj->swp_desc.cena_bar = obj->dpio_desc.regs_cena;
-	obj->swp_desc.cinh_bar = obj->dpio_desc.regs_cinh;
-	obj->swp_desc.qman_version = obj->dpio_desc.qman_version;
-	obj->swp = qbman_swp_init(&obj->swp_desc);
-
-	if (!obj->swp) {
-		kfree(obj);
-		return NULL;
-	}
-
-	INIT_LIST_HEAD(&obj->node);
-	spin_lock_init(&obj->lock_mgmt_cmd);
-	spin_lock_init(&obj->lock_notifications);
-	INIT_LIST_HEAD(&obj->notifications);
-
-	/* For now only enable DQRR interrupts */
-	qbman_swp_interrupt_set_trigger(obj->swp,
-					QBMAN_SWP_INTERRUPT_DQRI);
-	qbman_swp_interrupt_clear_status(obj->swp, 0xffffffff);
-	if (obj->dpio_desc.receives_notifications)
-		qbman_swp_push_set(obj->swp, 0, 1);
-
-	spin_lock(&dpio_list_lock);
-	list_add_tail(&obj->node, &dpio_list);
-	if (desc->cpu >= 0 && !dpio_by_cpu[desc->cpu])
-		dpio_by_cpu[desc->cpu] = obj;
-	spin_unlock(&dpio_list_lock);
-
-	return obj;
-}
-
-/**
- * dpaa2_io_down() - release the dpaa2_io object.
- * @d: the dpaa2_io object to be released.
- *
- * The "struct dpaa2_io" type can represent an individual DPIO object (as
- * described by "struct dpaa2_io_desc") or an instance of a "DPIO service",
- * which can be used to group/encapsulate multiple DPIO objects. In all cases,
- * each handle obtained should be released using this function.
- */
-void dpaa2_io_down(struct dpaa2_io *d)
-{
-	kfree(d);
-}
-
-#define DPAA_POLL_MAX 32
-
-/**
- * dpaa2_io_irq() - ISR for DPIO interrupts
- *
- * @obj: the given DPIO object.
- *
- * Return IRQ_HANDLED for success or IRQ_NONE if there
- * were no pending interrupts.
- */
-irqreturn_t dpaa2_io_irq(struct dpaa2_io *obj)
-{
-	const struct dpaa2_dq *dq;
-	int max = 0;
-	struct qbman_swp *swp;
-	u32 status;
-
-	swp = obj->swp;
-	status = qbman_swp_interrupt_read_status(swp);
-	if (!status)
-		return IRQ_NONE;
-
-	dq = qbman_swp_dqrr_next(swp);
-	while (dq) {
-		if (qbman_result_is_SCN(dq)) {
-			struct dpaa2_io_notification_ctx *ctx;
-			u64 q64;
-
-			q64 = qbman_result_SCN_ctx(dq);
-			ctx = (void *)(uintptr_t)q64;
-			ctx->cb(ctx);
-		} else {
-			pr_crit("fsl-mc-dpio: Unrecognised/ignored DQRR entry\n");
-		}
-		qbman_swp_dqrr_consume(swp, dq);
-		++max;
-		if (max > DPAA_POLL_MAX)
-			goto done;
-		dq = qbman_swp_dqrr_next(swp);
-	}
-done:
-	qbman_swp_interrupt_clear_status(swp, status);
-	qbman_swp_interrupt_set_inhibit(swp, 0);
-	return IRQ_HANDLED;
-}
-
-/**
- * dpaa2_io_service_register() - Prepare for servicing of FQDAN or CDAN
- *                               notifications on the given DPIO service.
- * @d:   the given DPIO service.
- * @ctx: the notification context.
- *
- * The caller should make the MC command to attach a DPAA2 object to
- * a DPIO after this function completes successfully.  In that way:
- *    (a) The DPIO service is "ready" to handle a notification arrival
- *        (which might happen before the "attach" command to MC has
- *        returned control of execution back to the caller)
- *    (b) The DPIO service can provide back to the caller the 'dpio_id' and
- *        'qman64' parameters that it should pass along in the MC command
- *        in order for the object to be configured to produce the right
- *        notification fields to the DPIO service.
- *
- * Return 0 for success, or -ENODEV for failure.
- */
-int dpaa2_io_service_register(struct dpaa2_io *d,
-			      struct dpaa2_io_notification_ctx *ctx)
-{
-	unsigned long irqflags;
-
-	d = service_select_by_cpu(d, ctx->desired_cpu);
-	if (!d)
-		return -ENODEV;
-
-	ctx->dpio_id = d->dpio_desc.dpio_id;
-	ctx->qman64 = (u64)(uintptr_t)ctx;
-	ctx->dpio_private = d;
-	spin_lock_irqsave(&d->lock_notifications, irqflags);
-	list_add(&ctx->node, &d->notifications);
-	spin_unlock_irqrestore(&d->lock_notifications, irqflags);
-
-	/* Enable the generation of CDAN notifications */
-	if (ctx->is_cdan)
-		return qbman_swp_CDAN_set_context_enable(d->swp,
-							 (u16)ctx->id,
-							 ctx->qman64);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_service_register);
-
-/**
- * dpaa2_io_service_deregister - The opposite of 'register'.
- * @service: the given DPIO service.
- * @ctx: the notification context.
- *
- * This function should be called only after sending the MC command to
- * to detach the notification-producing device from the DPIO.
- */
-void dpaa2_io_service_deregister(struct dpaa2_io *service,
-				 struct dpaa2_io_notification_ctx *ctx)
-{
-	struct dpaa2_io *d = ctx->dpio_private;
-	unsigned long irqflags;
-
-	if (ctx->is_cdan)
-		qbman_swp_CDAN_disable(d->swp, (u16)ctx->id);
-
-	spin_lock_irqsave(&d->lock_notifications, irqflags);
-	list_del(&ctx->node);
-	spin_unlock_irqrestore(&d->lock_notifications, irqflags);
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_service_deregister);
-
-/**
- * dpaa2_io_service_rearm() - Rearm the notification for the given DPIO service.
- * @d: the given DPIO service.
- * @ctx: the notification context.
- *
- * Once a FQDAN/CDAN has been produced, the corresponding FQ/channel is
- * considered "disarmed". Ie. the user can issue pull dequeue operations on that
- * traffic source for as long as it likes. Eventually it may wish to "rearm"
- * that source to allow it to produce another FQDAN/CDAN, that's what this
- * function achieves.
- *
- * Return 0 for success.
- */
-int dpaa2_io_service_rearm(struct dpaa2_io *d,
-			   struct dpaa2_io_notification_ctx *ctx)
-{
-	unsigned long irqflags;
-	int err;
-
-	d = service_select_by_cpu(d, ctx->desired_cpu);
-	if (!unlikely(d))
-		return -ENODEV;
-
-	spin_lock_irqsave(&d->lock_mgmt_cmd, irqflags);
-	if (ctx->is_cdan)
-		err = qbman_swp_CDAN_enable(d->swp, (u16)ctx->id);
-	else
-		err = qbman_swp_fq_schedule(d->swp, ctx->id);
-	spin_unlock_irqrestore(&d->lock_mgmt_cmd, irqflags);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_service_rearm);
-
-/**
- * dpaa2_io_service_pull_channel() - pull dequeue functions from a channel.
- * @d: the given DPIO service.
- * @channelid: the given channel id.
- * @s: the dpaa2_io_store object for the result.
- *
- * Return 0 for success, or error code for failure.
- */
-int dpaa2_io_service_pull_channel(struct dpaa2_io *d, u32 channelid,
-				  struct dpaa2_io_store *s)
-{
-	struct qbman_pull_desc pd;
-	int err;
-
-	qbman_pull_desc_clear(&pd);
-	qbman_pull_desc_set_storage(&pd, s->vaddr, s->paddr, 1);
-	qbman_pull_desc_set_numframes(&pd, (u8)s->max);
-	qbman_pull_desc_set_channel(&pd, channelid, qbman_pull_type_prio);
-
-	d = service_select(d);
-	if (!d)
-		return -ENODEV;
-
-	s->swp = d->swp;
-	err = qbman_swp_pull(d->swp, &pd);
-	if (err)
-		s->swp = NULL;
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_service_pull_channel);
-
-/**
- * dpaa2_io_service_enqueue_qd() - Enqueue a frame to a QD.
- * @d: the given DPIO service.
- * @qdid: the given queuing destination id.
- * @prio: the given queuing priority.
- * @qdbin: the given queuing destination bin.
- * @fd: the frame descriptor which is enqueued.
- *
- * Return 0 for successful enqueue, or -EBUSY if the enqueue ring is not ready,
- * or -ENODEV if there is no dpio service.
- */
-int dpaa2_io_service_enqueue_qd(struct dpaa2_io *d,
-				u32 qdid, u8 prio, u16 qdbin,
-				const struct dpaa2_fd *fd)
-{
-	struct qbman_eq_desc ed;
-
-	d = service_select(d);
-	if (!d)
-		return -ENODEV;
-
-	qbman_eq_desc_clear(&ed);
-	qbman_eq_desc_set_no_orp(&ed, 0);
-	qbman_eq_desc_set_qd(&ed, qdid, qdbin, prio);
-
-	return qbman_swp_enqueue(d->swp, &ed, fd);
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_service_enqueue_qd);
-
-/**
- * dpaa2_io_service_release() - Release buffers to a buffer pool.
- * @d: the given DPIO object.
- * @bpid: the buffer pool id.
- * @buffers: the buffers to be released.
- * @num_buffers: the number of the buffers to be released.
- *
- * Return 0 for success, and negative error code for failure.
- */
-int dpaa2_io_service_release(struct dpaa2_io *d,
-			     u32 bpid,
-			     const u64 *buffers,
-			     unsigned int num_buffers)
-{
-	struct qbman_release_desc rd;
-
-	d = service_select(d);
-	if (!d)
-		return -ENODEV;
-
-	qbman_release_desc_clear(&rd);
-	qbman_release_desc_set_bpid(&rd, bpid);
-
-	return qbman_swp_release(d->swp, &rd, buffers, num_buffers);
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_service_release);
-
-/**
- * dpaa2_io_service_acquire() - Acquire buffers from a buffer pool.
- * @d: the given DPIO object.
- * @bpid: the buffer pool id.
- * @buffers: the buffer addresses for acquired buffers.
- * @num_buffers: the expected number of the buffers to acquire.
- *
- * Return a negative error code if the command failed, otherwise it returns
- * the number of buffers acquired, which may be less than the number requested.
- * Eg. if the buffer pool is empty, this will return zero.
- */
-int dpaa2_io_service_acquire(struct dpaa2_io *d,
-			     u32 bpid,
-			     u64 *buffers,
-			     unsigned int num_buffers)
-{
-	unsigned long irqflags;
-	int err;
-
-	d = service_select(d);
-	if (!d)
-		return -ENODEV;
-
-	spin_lock_irqsave(&d->lock_mgmt_cmd, irqflags);
-	err = qbman_swp_acquire(d->swp, bpid, buffers, num_buffers);
-	spin_unlock_irqrestore(&d->lock_mgmt_cmd, irqflags);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_service_acquire);
-
-/*
- * 'Stores' are reusable memory blocks for holding dequeue results, and to
- * assist with parsing those results.
- */
-
-/**
- * dpaa2_io_store_create() - Create the dma memory storage for dequeue result.
- * @max_frames: the maximum number of dequeued result for frames, must be <= 16.
- * @dev:        the device to allow mapping/unmapping the DMAable region.
- *
- * The size of the storage is "max_frames*sizeof(struct dpaa2_dq)".
- * The 'dpaa2_io_store' returned is a DPIO service managed object.
- *
- * Return pointer to dpaa2_io_store struct for successfully created storage
- * memory, or NULL on error.
- */
-struct dpaa2_io_store *dpaa2_io_store_create(unsigned int max_frames,
-					     struct device *dev)
-{
-	struct dpaa2_io_store *ret;
-	size_t size;
-
-	if (!max_frames || (max_frames > 16))
-		return NULL;
-
-	ret = kmalloc(sizeof(*ret), GFP_KERNEL);
-	if (!ret)
-		return NULL;
-
-	ret->max = max_frames;
-	size = max_frames * sizeof(struct dpaa2_dq) + 64;
-	ret->alloced_addr = kzalloc(size, GFP_KERNEL);
-	if (!ret->alloced_addr) {
-		kfree(ret);
-		return NULL;
-	}
-
-	ret->vaddr = PTR_ALIGN(ret->alloced_addr, 64);
-	ret->paddr = dma_map_single(dev, ret->vaddr,
-				    sizeof(struct dpaa2_dq) * max_frames,
-				    DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, ret->paddr)) {
-		kfree(ret->alloced_addr);
-		kfree(ret);
-		return NULL;
-	}
-
-	ret->idx = 0;
-	ret->dev = dev;
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_store_create);
-
-/**
- * dpaa2_io_store_destroy() - Frees the dma memory storage for dequeue
- *                            result.
- * @s: the storage memory to be destroyed.
- */
-void dpaa2_io_store_destroy(struct dpaa2_io_store *s)
-{
-	dma_unmap_single(s->dev, s->paddr, sizeof(struct dpaa2_dq) * s->max,
-			 DMA_FROM_DEVICE);
-	kfree(s->alloced_addr);
-	kfree(s);
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_store_destroy);
-
-/**
- * dpaa2_io_store_next() - Determine when the next dequeue result is available.
- * @s: the dpaa2_io_store object.
- * @is_last: indicate whether this is the last frame in the pull command.
- *
- * When an object driver performs dequeues to a dpaa2_io_store, this function
- * can be used to determine when the next frame result is available. Once
- * this function returns non-NULL, a subsequent call to it will try to find
- * the next dequeue result.
- *
- * Note that if a pull-dequeue has a NULL result because the target FQ/channel
- * was empty, then this function will also return NULL (rather than expecting
- * the caller to always check for this. As such, "is_last" can be used to
- * differentiate between "end-of-empty-dequeue" and "still-waiting".
- *
- * Return dequeue result for a valid dequeue result, or NULL for empty dequeue.
- */
-struct dpaa2_dq *dpaa2_io_store_next(struct dpaa2_io_store *s, int *is_last)
-{
-	int match;
-	struct dpaa2_dq *ret = &s->vaddr[s->idx];
-
-	match = qbman_result_has_new_result(s->swp, ret);
-	if (!match) {
-		*is_last = 0;
-		return NULL;
-	}
-
-	s->idx++;
-
-	if (dpaa2_dq_is_pull_complete(ret)) {
-		*is_last = 1;
-		s->idx = 0;
-		/*
-		 * If we get an empty dequeue result to terminate a zero-results
-		 * vdqcr, return NULL to the caller rather than expecting him to
-		 * check non-NULL results every time.
-		 */
-		if (!(dpaa2_dq_flags(ret) & DPAA2_DQ_STAT_VALIDFRAME))
-			ret = NULL;
-	} else {
-		*is_last = 0;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dpaa2_io_store_next);
diff --git a/drivers/staging/fsl-mc/bus/dpio/dpio.c b/drivers/staging/fsl-mc/bus/dpio/dpio.c
deleted file mode 100644
index ff37c80e11a0..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/dpio.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
-/*
- * Copyright 2013-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP
- *
- */
-#include <linux/kernel.h>
-#include <linux/fsl/mc.h>
-
-#include "dpio.h"
-#include "dpio-cmd.h"
-
-/*
- * Data Path I/O Portal API
- * Contains initialization APIs and runtime control APIs for DPIO
- */
-
-/**
- * dpio_open() - Open a control session for the specified object
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @dpio_id:	DPIO unique ID
- * @token:	Returned token; use in subsequent API calls
- *
- * This function can be used to open a control session for an
- * already created object; an object may have been declared in
- * the DPL or by calling the dpio_create() function.
- * This function returns a unique authentication token,
- * associated with the specific object ID and the specific MC
- * portal; this token must be used in all subsequent commands for
- * this specific object.
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpio_open(struct fsl_mc_io *mc_io,
-	      u32 cmd_flags,
-	      int dpio_id,
-	      u16 *token)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpio_cmd_open *dpio_cmd;
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPIO_CMDID_OPEN,
-					  cmd_flags,
-					  0);
-	dpio_cmd = (struct dpio_cmd_open *)cmd.params;
-	dpio_cmd->dpio_id = cpu_to_le32(dpio_id);
-
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	*token = mc_cmd_hdr_read_token(&cmd);
-
-	return 0;
-}
-
-/**
- * dpio_close() - Close the control session of the object
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPIO object
- *
- * Return:	'0' on Success; Error code otherwise.
- */
-int dpio_close(struct fsl_mc_io *mc_io,
-	       u32 cmd_flags,
-	       u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPIO_CMDID_CLOSE,
-					  cmd_flags,
-					  token);
-
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpio_enable() - Enable the DPIO, allow I/O portal operations.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPIO object
- *
- * Return:	'0' on Success; Error code otherwise
- */
-int dpio_enable(struct fsl_mc_io *mc_io,
-		u32 cmd_flags,
-		u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPIO_CMDID_ENABLE,
-					  cmd_flags,
-					  token);
-
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpio_disable() - Disable the DPIO, stop any I/O portal operation.
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPIO object
- *
- * Return:	'0' on Success; Error code otherwise
- */
-int dpio_disable(struct fsl_mc_io *mc_io,
-		 u32 cmd_flags,
-		 u16 token)
-{
-	struct fsl_mc_command cmd = { 0 };
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPIO_CMDID_DISABLE,
-					  cmd_flags,
-					  token);
-
-	return mc_send_command(mc_io, &cmd);
-}
-
-/**
- * dpio_get_attributes() - Retrieve DPIO attributes
- * @mc_io:	Pointer to MC portal's I/O object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @token:	Token of DPIO object
- * @attr:	Returned object's attributes
- *
- * Return:	'0' on Success; Error code otherwise
- */
-int dpio_get_attributes(struct fsl_mc_io *mc_io,
-			u32 cmd_flags,
-			u16 token,
-			struct dpio_attr *attr)
-{
-	struct fsl_mc_command cmd = { 0 };
-	struct dpio_rsp_get_attr *dpio_rsp;
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_ATTR,
-					  cmd_flags,
-					  token);
-
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	dpio_rsp = (struct dpio_rsp_get_attr *)cmd.params;
-	attr->id = le32_to_cpu(dpio_rsp->id);
-	attr->qbman_portal_id = le16_to_cpu(dpio_rsp->qbman_portal_id);
-	attr->num_priorities = dpio_rsp->num_priorities;
-	attr->channel_mode = dpio_rsp->channel_mode & DPIO_CHANNEL_MODE_MASK;
-	attr->qbman_portal_ce_offset =
-		le64_to_cpu(dpio_rsp->qbman_portal_ce_addr);
-	attr->qbman_portal_ci_offset =
-		le64_to_cpu(dpio_rsp->qbman_portal_ci_addr);
-	attr->qbman_version = le32_to_cpu(dpio_rsp->qbman_version);
-
-	return 0;
-}
-
-/**
- * dpio_get_api_version - Get Data Path I/O API version
- * @mc_io:	Pointer to MC portal's DPIO object
- * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
- * @major_ver:	Major version of DPIO API
- * @minor_ver:	Minor version of DPIO API
- *
- * Return:	'0' on Success; Error code otherwise
- */
-int dpio_get_api_version(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 *major_ver,
-			 u16 *minor_ver)
-{
-	struct fsl_mc_command cmd = { 0 };
-	int err;
-
-	/* prepare command */
-	cmd.header = mc_encode_cmd_header(DPIO_CMDID_GET_API_VERSION,
-					  cmd_flags, 0);
-
-	err = mc_send_command(mc_io, &cmd);
-	if (err)
-		return err;
-
-	/* retrieve response parameters */
-	mc_cmd_read_api_version(&cmd, major_ver, minor_ver);
-
-	return 0;
-}
diff --git a/drivers/staging/fsl-mc/bus/dpio/dpio.h b/drivers/staging/fsl-mc/bus/dpio/dpio.h
deleted file mode 100644
index 49194c8e45f1..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/dpio.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/*
- * Copyright 2013-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP
- *
- */
-#ifndef __FSL_DPIO_H
-#define __FSL_DPIO_H
-
-struct fsl_mc_io;
-
-int dpio_open(struct fsl_mc_io	*mc_io,
-	      u32		cmd_flags,
-	      int		dpio_id,
-	      u16		*token);
-
-int dpio_close(struct fsl_mc_io	*mc_io,
-	       u32		cmd_flags,
-	       u16		token);
-
-/**
- * enum dpio_channel_mode - DPIO notification channel mode
- * @DPIO_NO_CHANNEL: No support for notification channel
- * @DPIO_LOCAL_CHANNEL: Notifications on data availability can be received by a
- *	dedicated channel in the DPIO; user should point the queue's
- *	destination in the relevant interface to this DPIO
- */
-enum dpio_channel_mode {
-	DPIO_NO_CHANNEL = 0,
-	DPIO_LOCAL_CHANNEL = 1,
-};
-
-/**
- * struct dpio_cfg - Structure representing DPIO configuration
- * @channel_mode: Notification channel mode
- * @num_priorities: Number of priorities for the notification channel (1-8);
- *			relevant only if 'channel_mode = DPIO_LOCAL_CHANNEL'
- */
-struct dpio_cfg {
-	enum dpio_channel_mode	channel_mode;
-	u8		num_priorities;
-};
-
-int dpio_enable(struct fsl_mc_io	*mc_io,
-		u32		cmd_flags,
-		u16		token);
-
-int dpio_disable(struct fsl_mc_io	*mc_io,
-		 u32		cmd_flags,
-		 u16		token);
-
-/**
- * struct dpio_attr - Structure representing DPIO attributes
- * @id: DPIO object ID
- * @qbman_portal_ce_offset: offset of the software portal cache-enabled area
- * @qbman_portal_ci_offset: offset of the software portal cache-inhibited area
- * @qbman_portal_id: Software portal ID
- * @channel_mode: Notification channel mode
- * @num_priorities: Number of priorities for the notification channel (1-8);
- *			relevant only if 'channel_mode = DPIO_LOCAL_CHANNEL'
- * @qbman_version: QBMAN version
- */
-struct dpio_attr {
-	int			id;
-	u64		qbman_portal_ce_offset;
-	u64		qbman_portal_ci_offset;
-	u16		qbman_portal_id;
-	enum dpio_channel_mode	channel_mode;
-	u8			num_priorities;
-	u32		qbman_version;
-};
-
-int dpio_get_attributes(struct fsl_mc_io	*mc_io,
-			u32		cmd_flags,
-			u16		token,
-			struct dpio_attr	*attr);
-
-int dpio_get_api_version(struct fsl_mc_io *mc_io,
-			 u32 cmd_flags,
-			 u16 *major_ver,
-			 u16 *minor_ver);
-
-#endif /* __FSL_DPIO_H */
diff --git a/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c b/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
deleted file mode 100644
index 116fafb28640..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
+++ /dev/null
@@ -1,1005 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
-/*
- * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2016 NXP
- *
- */
-
-#include <asm/cacheflush.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include "../../include/dpaa2-global.h"
-
-#include "qbman-portal.h"
-
-#define QMAN_REV_4000   0x04000000
-#define QMAN_REV_4100   0x04010000
-#define QMAN_REV_4101   0x04010001
-#define QMAN_REV_MASK   0xffff0000
-
-/* All QBMan command and result structures use this "valid bit" encoding */
-#define QB_VALID_BIT ((u32)0x80)
-
-/* QBMan portal management command codes */
-#define QBMAN_MC_ACQUIRE       0x30
-#define QBMAN_WQCHAN_CONFIGURE 0x46
-
-/* CINH register offsets */
-#define QBMAN_CINH_SWP_EQAR    0x8c0
-#define QBMAN_CINH_SWP_DQPI    0xa00
-#define QBMAN_CINH_SWP_DCAP    0xac0
-#define QBMAN_CINH_SWP_SDQCR   0xb00
-#define QBMAN_CINH_SWP_RAR     0xcc0
-#define QBMAN_CINH_SWP_ISR     0xe00
-#define QBMAN_CINH_SWP_IER     0xe40
-#define QBMAN_CINH_SWP_ISDR    0xe80
-#define QBMAN_CINH_SWP_IIR     0xec0
-
-/* CENA register offsets */
-#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((u32)(n) << 6))
-#define QBMAN_CENA_SWP_DQRR(n) (0x200 + ((u32)(n) << 6))
-#define QBMAN_CENA_SWP_RCR(n)  (0x400 + ((u32)(n) << 6))
-#define QBMAN_CENA_SWP_CR      0x600
-#define QBMAN_CENA_SWP_RR(vb)  (0x700 + ((u32)(vb) >> 1))
-#define QBMAN_CENA_SWP_VDQCR   0x780
-
-/* Reverse mapping of QBMAN_CENA_SWP_DQRR() */
-#define QBMAN_IDX_FROM_DQRR(p) (((unsigned long)(p) & 0x1ff) >> 6)
-
-/* Define token used to determine if response written to memory is valid */
-#define QMAN_DQ_TOKEN_VALID 1
-
-/* SDQCR attribute codes */
-#define QB_SDQCR_FC_SHIFT   29
-#define QB_SDQCR_FC_MASK    0x1
-#define QB_SDQCR_DCT_SHIFT  24
-#define QB_SDQCR_DCT_MASK   0x3
-#define QB_SDQCR_TOK_SHIFT  16
-#define QB_SDQCR_TOK_MASK   0xff
-#define QB_SDQCR_SRC_SHIFT  0
-#define QB_SDQCR_SRC_MASK   0xffff
-
-/* opaque token for static dequeues */
-#define QMAN_SDQCR_TOKEN    0xbb
-
-enum qbman_sdqcr_dct {
-	qbman_sdqcr_dct_null = 0,
-	qbman_sdqcr_dct_prio_ics,
-	qbman_sdqcr_dct_active_ics,
-	qbman_sdqcr_dct_active
-};
-
-enum qbman_sdqcr_fc {
-	qbman_sdqcr_fc_one = 0,
-	qbman_sdqcr_fc_up_to_3 = 1
-};
-
-/* Portal Access */
-
-static inline u32 qbman_read_register(struct qbman_swp *p, u32 offset)
-{
-	return readl_relaxed(p->addr_cinh + offset);
-}
-
-static inline void qbman_write_register(struct qbman_swp *p, u32 offset,
-					u32 value)
-{
-	writel_relaxed(value, p->addr_cinh + offset);
-}
-
-static inline void *qbman_get_cmd(struct qbman_swp *p, u32 offset)
-{
-	return p->addr_cena + offset;
-}
-
-#define QBMAN_CINH_SWP_CFG   0xd00
-
-#define SWP_CFG_DQRR_MF_SHIFT 20
-#define SWP_CFG_EST_SHIFT     16
-#define SWP_CFG_WN_SHIFT      14
-#define SWP_CFG_RPM_SHIFT     12
-#define SWP_CFG_DCM_SHIFT     10
-#define SWP_CFG_EPM_SHIFT     8
-#define SWP_CFG_SD_SHIFT      5
-#define SWP_CFG_SP_SHIFT      4
-#define SWP_CFG_SE_SHIFT      3
-#define SWP_CFG_DP_SHIFT      2
-#define SWP_CFG_DE_SHIFT      1
-#define SWP_CFG_EP_SHIFT      0
-
-static inline u32 qbman_set_swp_cfg(u8 max_fill, u8 wn,	u8 est, u8 rpm, u8 dcm,
-				    u8 epm, int sd, int sp, int se,
-				    int dp, int de, int ep)
-{
-	return (max_fill << SWP_CFG_DQRR_MF_SHIFT |
-		est << SWP_CFG_EST_SHIFT |
-		wn << SWP_CFG_WN_SHIFT |
-		rpm << SWP_CFG_RPM_SHIFT |
-		dcm << SWP_CFG_DCM_SHIFT |
-		epm << SWP_CFG_EPM_SHIFT |
-		sd << SWP_CFG_SD_SHIFT |
-		sp << SWP_CFG_SP_SHIFT |
-		se << SWP_CFG_SE_SHIFT |
-		dp << SWP_CFG_DP_SHIFT |
-		de << SWP_CFG_DE_SHIFT |
-		ep << SWP_CFG_EP_SHIFT);
-}
-
-/**
- * qbman_swp_init() - Create a functional object representing the given
- *                    QBMan portal descriptor.
- * @d: the given qbman swp descriptor
- *
- * Return qbman_swp portal for success, NULL if the object cannot
- * be created.
- */
-struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
-{
-	struct qbman_swp *p = kmalloc(sizeof(*p), GFP_KERNEL);
-	u32 reg;
-
-	if (!p)
-		return NULL;
-	p->desc = d;
-	p->mc.valid_bit = QB_VALID_BIT;
-	p->sdq = 0;
-	p->sdq |= qbman_sdqcr_dct_prio_ics << QB_SDQCR_DCT_SHIFT;
-	p->sdq |= qbman_sdqcr_fc_up_to_3 << QB_SDQCR_FC_SHIFT;
-	p->sdq |= QMAN_SDQCR_TOKEN << QB_SDQCR_TOK_SHIFT;
-
-	atomic_set(&p->vdq.available, 1);
-	p->vdq.valid_bit = QB_VALID_BIT;
-	p->dqrr.next_idx = 0;
-	p->dqrr.valid_bit = QB_VALID_BIT;
-
-	if ((p->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_4100) {
-		p->dqrr.dqrr_size = 4;
-		p->dqrr.reset_bug = 1;
-	} else {
-		p->dqrr.dqrr_size = 8;
-		p->dqrr.reset_bug = 0;
-	}
-
-	p->addr_cena = d->cena_bar;
-	p->addr_cinh = d->cinh_bar;
-
-	reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
-				1, /* Writes Non-cacheable */
-				0, /* EQCR_CI stashing threshold */
-				3, /* RPM: Valid bit mode, RCR in array mode */
-				2, /* DCM: Discrete consumption ack mode */
-				3, /* EPM: Valid bit mode, EQCR in array mode */
-				0, /* mem stashing drop enable == FALSE */
-				1, /* mem stashing priority == TRUE */
-				0, /* mem stashing enable == FALSE */
-				1, /* dequeue stashing priority == TRUE */
-				0, /* dequeue stashing enable == FALSE */
-				0); /* EQCR_CI stashing priority == FALSE */
-
-	qbman_write_register(p, QBMAN_CINH_SWP_CFG, reg);
-	reg = qbman_read_register(p, QBMAN_CINH_SWP_CFG);
-	if (!reg) {
-		pr_err("qbman: the portal is not enabled!\n");
-		return NULL;
-	}
-
-	/*
-	 * SDQCR needs to be initialized to 0 when no channels are
-	 * being dequeued from or else the QMan HW will indicate an
-	 * error.  The values that were calculated above will be
-	 * applied when dequeues from a specific channel are enabled.
-	 */
-	qbman_write_register(p, QBMAN_CINH_SWP_SDQCR, 0);
-	return p;
-}
-
-/**
- * qbman_swp_finish() - Create and destroy a functional object representing
- *                      the given QBMan portal descriptor.
- * @p: the qbman_swp object to be destroyed
- */
-void qbman_swp_finish(struct qbman_swp *p)
-{
-	kfree(p);
-}
-
-/**
- * qbman_swp_interrupt_read_status()
- * @p: the given software portal
- *
- * Return the value in the SWP_ISR register.
- */
-u32 qbman_swp_interrupt_read_status(struct qbman_swp *p)
-{
-	return qbman_read_register(p, QBMAN_CINH_SWP_ISR);
-}
-
-/**
- * qbman_swp_interrupt_clear_status()
- * @p: the given software portal
- * @mask: The mask to clear in SWP_ISR register
- */
-void qbman_swp_interrupt_clear_status(struct qbman_swp *p, u32 mask)
-{
-	qbman_write_register(p, QBMAN_CINH_SWP_ISR, mask);
-}
-
-/**
- * qbman_swp_interrupt_get_trigger() - read interrupt enable register
- * @p: the given software portal
- *
- * Return the value in the SWP_IER register.
- */
-u32 qbman_swp_interrupt_get_trigger(struct qbman_swp *p)
-{
-	return qbman_read_register(p, QBMAN_CINH_SWP_IER);
-}
-
-/**
- * qbman_swp_interrupt_set_trigger() - enable interrupts for a swp
- * @p: the given software portal
- * @mask: The mask of bits to enable in SWP_IER
- */
-void qbman_swp_interrupt_set_trigger(struct qbman_swp *p, u32 mask)
-{
-	qbman_write_register(p, QBMAN_CINH_SWP_IER, mask);
-}
-
-/**
- * qbman_swp_interrupt_get_inhibit() - read interrupt mask register
- * @p: the given software portal object
- *
- * Return the value in the SWP_IIR register.
- */
-int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p)
-{
-	return qbman_read_register(p, QBMAN_CINH_SWP_IIR);
-}
-
-/**
- * qbman_swp_interrupt_set_inhibit() - write interrupt mask register
- * @p: the given software portal object
- * @mask: The mask to set in SWP_IIR register
- */
-void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit)
-{
-	qbman_write_register(p, QBMAN_CINH_SWP_IIR, inhibit ? 0xffffffff : 0);
-}
-
-/*
- * Different management commands all use this common base layer of code to issue
- * commands and poll for results.
- */
-
-/*
- * Returns a pointer to where the caller should fill in their management command
- * (caller should ignore the verb byte)
- */
-void *qbman_swp_mc_start(struct qbman_swp *p)
-{
-	return qbman_get_cmd(p, QBMAN_CENA_SWP_CR);
-}
-
-/*
- * Commits merges in the caller-supplied command verb (which should not include
- * the valid-bit) and submits the command to hardware
- */
-void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, u8 cmd_verb)
-{
-	u8 *v = cmd;
-
-	dma_wmb();
-	*v = cmd_verb | p->mc.valid_bit;
-}
-
-/*
- * Checks for a completed response (returns non-NULL if only if the response
- * is complete).
- */
-void *qbman_swp_mc_result(struct qbman_swp *p)
-{
-	u32 *ret, verb;
-
-	ret = qbman_get_cmd(p, QBMAN_CENA_SWP_RR(p->mc.valid_bit));
-
-	/* Remove the valid-bit - command completed if the rest is non-zero */
-	verb = ret[0] & ~QB_VALID_BIT;
-	if (!verb)
-		return NULL;
-	p->mc.valid_bit ^= QB_VALID_BIT;
-	return ret;
-}
-
-#define QB_ENQUEUE_CMD_OPTIONS_SHIFT    0
-enum qb_enqueue_commands {
-	enqueue_empty = 0,
-	enqueue_response_always = 1,
-	enqueue_rejects_to_fq = 2
-};
-
-#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT      2
-#define QB_ENQUEUE_CMD_IRQ_ON_DISPATCH_SHIFT 3
-#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT     4
-
-/**
- * qbman_eq_desc_clear() - Clear the contents of a descriptor to
- *                         default/starting state.
- */
-void qbman_eq_desc_clear(struct qbman_eq_desc *d)
-{
-	memset(d, 0, sizeof(*d));
-}
-
-/**
- * qbman_eq_desc_set_no_orp() - Set enqueue descriptor without orp
- * @d:                the enqueue descriptor.
- * @response_success: 1 = enqueue with response always; 0 = enqueue with
- *                    rejections returned on a FQ.
- */
-void qbman_eq_desc_set_no_orp(struct qbman_eq_desc *d, int respond_success)
-{
-	d->verb &= ~(1 << QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT);
-	if (respond_success)
-		d->verb |= enqueue_response_always;
-	else
-		d->verb |= enqueue_rejects_to_fq;
-}
-
-/*
- * Exactly one of the following descriptor "targets" should be set. (Calling any
- * one of these will replace the effect of any prior call to one of these.)
- *   -enqueue to a frame queue
- *   -enqueue to a queuing destination
- */
-
-/**
- * qbman_eq_desc_set_fq() - set the FQ for the enqueue command
- * @d:    the enqueue descriptor
- * @fqid: the id of the frame queue to be enqueued
- */
-void qbman_eq_desc_set_fq(struct qbman_eq_desc *d, u32 fqid)
-{
-	d->verb &= ~(1 << QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT);
-	d->tgtid = cpu_to_le32(fqid);
-}
-
-/**
- * qbman_eq_desc_set_qd() - Set Queuing Destination for the enqueue command
- * @d:       the enqueue descriptor
- * @qdid:    the id of the queuing destination to be enqueued
- * @qd_bin:  the queuing destination bin
- * @qd_prio: the queuing destination priority
- */
-void qbman_eq_desc_set_qd(struct qbman_eq_desc *d, u32 qdid,
-			  u32 qd_bin, u32 qd_prio)
-{
-	d->verb |= 1 << QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT;
-	d->tgtid = cpu_to_le32(qdid);
-	d->qdbin = cpu_to_le16(qd_bin);
-	d->qpri = qd_prio;
-}
-
-#define EQAR_IDX(eqar)     ((eqar) & 0x7)
-#define EQAR_VB(eqar)      ((eqar) & 0x80)
-#define EQAR_SUCCESS(eqar) ((eqar) & 0x100)
-
-/**
- * qbman_swp_enqueue() - Issue an enqueue command
- * @s:  the software portal used for enqueue
- * @d:  the enqueue descriptor
- * @fd: the frame descriptor to be enqueued
- *
- * Please note that 'fd' should only be NULL if the "action" of the
- * descriptor is "orp_hole" or "orp_nesn".
- *
- * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready.
- */
-int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
-		      const struct dpaa2_fd *fd)
-{
-	struct qbman_eq_desc *p;
-	u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR);
-
-	if (!EQAR_SUCCESS(eqar))
-		return -EBUSY;
-
-	p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
-	memcpy(&p->dca, &d->dca, 31);
-	memcpy(&p->fd, fd, sizeof(*fd));
-
-	/* Set the verb byte, have to substitute in the valid-bit */
-	dma_wmb();
-	p->verb = d->verb | EQAR_VB(eqar);
-
-	return 0;
-}
-
-/* Static (push) dequeue */
-
-/**
- * qbman_swp_push_get() - Get the push dequeue setup
- * @p:           the software portal object
- * @channel_idx: the channel index to query
- * @enabled:     returned boolean to show whether the push dequeue is enabled
- *               for the given channel
- */
-void qbman_swp_push_get(struct qbman_swp *s, u8 channel_idx, int *enabled)
-{
-	u16 src = (s->sdq >> QB_SDQCR_SRC_SHIFT) & QB_SDQCR_SRC_MASK;
-
-	WARN_ON(channel_idx > 15);
-	*enabled = src | (1 << channel_idx);
-}
-
-/**
- * qbman_swp_push_set() - Enable or disable push dequeue
- * @p:           the software portal object
- * @channel_idx: the channel index (0 to 15)
- * @enable:      enable or disable push dequeue
- */
-void qbman_swp_push_set(struct qbman_swp *s, u8 channel_idx, int enable)
-{
-	u16 dqsrc;
-
-	WARN_ON(channel_idx > 15);
-	if (enable)
-		s->sdq |= 1 << channel_idx;
-	else
-		s->sdq &= ~(1 << channel_idx);
-
-	/* Read make the complete src map.  If no channels are enabled
-	 * the SDQCR must be 0 or else QMan will assert errors
-	 */
-	dqsrc = (s->sdq >> QB_SDQCR_SRC_SHIFT) & QB_SDQCR_SRC_MASK;
-	if (dqsrc != 0)
-		qbman_write_register(s, QBMAN_CINH_SWP_SDQCR, s->sdq);
-	else
-		qbman_write_register(s, QBMAN_CINH_SWP_SDQCR, 0);
-}
-
-#define QB_VDQCR_VERB_DCT_SHIFT    0
-#define QB_VDQCR_VERB_DT_SHIFT     2
-#define QB_VDQCR_VERB_RLS_SHIFT    4
-#define QB_VDQCR_VERB_WAE_SHIFT    5
-
-enum qb_pull_dt_e {
-	qb_pull_dt_channel,
-	qb_pull_dt_workqueue,
-	qb_pull_dt_framequeue
-};
-
-/**
- * qbman_pull_desc_clear() - Clear the contents of a descriptor to
- *                           default/starting state
- * @d: the pull dequeue descriptor to be cleared
- */
-void qbman_pull_desc_clear(struct qbman_pull_desc *d)
-{
-	memset(d, 0, sizeof(*d));
-}
-
-/**
- * qbman_pull_desc_set_storage()- Set the pull dequeue storage
- * @d:            the pull dequeue descriptor to be set
- * @storage:      the pointer of the memory to store the dequeue result
- * @storage_phys: the physical address of the storage memory
- * @stash:        to indicate whether write allocate is enabled
- *
- * If not called, or if called with 'storage' as NULL, the result pull dequeues
- * will produce results to DQRR. If 'storage' is non-NULL, then results are
- * produced to the given memory location (using the DMA address which
- * the caller provides in 'storage_phys'), and 'stash' controls whether or not
- * those writes to main-memory express a cache-warming attribute.
- */
-void qbman_pull_desc_set_storage(struct qbman_pull_desc *d,
-				 struct dpaa2_dq *storage,
-				 dma_addr_t storage_phys,
-				 int stash)
-{
-	/* save the virtual address */
-	d->rsp_addr_virt = (u64)(uintptr_t)storage;
-
-	if (!storage) {
-		d->verb &= ~(1 << QB_VDQCR_VERB_RLS_SHIFT);
-		return;
-	}
-	d->verb |= 1 << QB_VDQCR_VERB_RLS_SHIFT;
-	if (stash)
-		d->verb |= 1 << QB_VDQCR_VERB_WAE_SHIFT;
-	else
-		d->verb &= ~(1 << QB_VDQCR_VERB_WAE_SHIFT);
-
-	d->rsp_addr = cpu_to_le64(storage_phys);
-}
-
-/**
- * qbman_pull_desc_set_numframes() - Set the number of frames to be dequeued
- * @d:         the pull dequeue descriptor to be set
- * @numframes: number of frames to be set, must be between 1 and 16, inclusive
- */
-void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, u8 numframes)
-{
-	d->numf = numframes - 1;
-}
-
-/*
- * Exactly one of the following descriptor "actions" should be set. (Calling any
- * one of these will replace the effect of any prior call to one of these.)
- * - pull dequeue from the given frame queue (FQ)
- * - pull dequeue from any FQ in the given work queue (WQ)
- * - pull dequeue from any FQ in any WQ in the given channel
- */
-
-/**
- * qbman_pull_desc_set_fq() - Set fqid from which the dequeue command dequeues
- * @fqid: the frame queue index of the given FQ
- */
-void qbman_pull_desc_set_fq(struct qbman_pull_desc *d, u32 fqid)
-{
-	d->verb |= 1 << QB_VDQCR_VERB_DCT_SHIFT;
-	d->verb |= qb_pull_dt_framequeue << QB_VDQCR_VERB_DT_SHIFT;
-	d->dq_src = cpu_to_le32(fqid);
-}
-
-/**
- * qbman_pull_desc_set_wq() - Set wqid from which the dequeue command dequeues
- * @wqid: composed of channel id and wqid within the channel
- * @dct:  the dequeue command type
- */
-void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, u32 wqid,
-			    enum qbman_pull_type_e dct)
-{
-	d->verb |= dct << QB_VDQCR_VERB_DCT_SHIFT;
-	d->verb |= qb_pull_dt_workqueue << QB_VDQCR_VERB_DT_SHIFT;
-	d->dq_src = cpu_to_le32(wqid);
-}
-
-/**
- * qbman_pull_desc_set_channel() - Set channelid from which the dequeue command
- *                                 dequeues
- * @chid: the channel id to be dequeued
- * @dct:  the dequeue command type
- */
-void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, u32 chid,
-				 enum qbman_pull_type_e dct)
-{
-	d->verb |= dct << QB_VDQCR_VERB_DCT_SHIFT;
-	d->verb |= qb_pull_dt_channel << QB_VDQCR_VERB_DT_SHIFT;
-	d->dq_src = cpu_to_le32(chid);
-}
-
-/**
- * qbman_swp_pull() - Issue the pull dequeue command
- * @s: the software portal object
- * @d: the software portal descriptor which has been configured with
- *     the set of qbman_pull_desc_set_*() calls
- *
- * Return 0 for success, and -EBUSY if the software portal is not ready
- * to do pull dequeue.
- */
-int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
-{
-	struct qbman_pull_desc *p;
-
-	if (!atomic_dec_and_test(&s->vdq.available)) {
-		atomic_inc(&s->vdq.available);
-		return -EBUSY;
-	}
-	s->vdq.storage = (void *)(uintptr_t)d->rsp_addr_virt;
-	p = qbman_get_cmd(s, QBMAN_CENA_SWP_VDQCR);
-	p->numf = d->numf;
-	p->tok = QMAN_DQ_TOKEN_VALID;
-	p->dq_src = d->dq_src;
-	p->rsp_addr = d->rsp_addr;
-	p->rsp_addr_virt = d->rsp_addr_virt;
-	dma_wmb();
-
-	/* Set the verb byte, have to substitute in the valid-bit */
-	p->verb = d->verb | s->vdq.valid_bit;
-	s->vdq.valid_bit ^= QB_VALID_BIT;
-
-	return 0;
-}
-
-#define QMAN_DQRR_PI_MASK   0xf
-
-/**
- * qbman_swp_dqrr_next() - Get an valid DQRR entry
- * @s: the software portal object
- *
- * Return NULL if there are no unconsumed DQRR entries. Return a DQRR entry
- * only once, so repeated calls can return a sequence of DQRR entries, without
- * requiring they be consumed immediately or in any particular order.
- */
-const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp *s)
-{
-	u32 verb;
-	u32 response_verb;
-	u32 flags;
-	struct dpaa2_dq *p;
-
-	/* Before using valid-bit to detect if something is there, we have to
-	 * handle the case of the DQRR reset bug...
-	 */
-	if (unlikely(s->dqrr.reset_bug)) {
-		/*
-		 * We pick up new entries by cache-inhibited producer index,
-		 * which means that a non-coherent mapping would require us to
-		 * invalidate and read *only* once that PI has indicated that
-		 * there's an entry here. The first trip around the DQRR ring
-		 * will be much less efficient than all subsequent trips around
-		 * it...
-		 */
-		u8 pi = qbman_read_register(s, QBMAN_CINH_SWP_DQPI) &
-			QMAN_DQRR_PI_MASK;
-
-		/* there are new entries if pi != next_idx */
-		if (pi == s->dqrr.next_idx)
-			return NULL;
-
-		/*
-		 * if next_idx is/was the last ring index, and 'pi' is
-		 * different, we can disable the workaround as all the ring
-		 * entries have now been DMA'd to so valid-bit checking is
-		 * repaired. Note: this logic needs to be based on next_idx
-		 * (which increments one at a time), rather than on pi (which
-		 * can burst and wrap-around between our snapshots of it).
-		 */
-		if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1)) {
-			pr_debug("next_idx=%d, pi=%d, clear reset bug\n",
-				 s->dqrr.next_idx, pi);
-			s->dqrr.reset_bug = 0;
-		}
-		prefetch(qbman_get_cmd(s,
-				       QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
-	}
-
-	p = qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
-	verb = p->dq.verb;
-
-	/*
-	 * If the valid-bit isn't of the expected polarity, nothing there. Note,
-	 * in the DQRR reset bug workaround, we shouldn't need to skip these
-	 * check, because we've already determined that a new entry is available
-	 * and we've invalidated the cacheline before reading it, so the
-	 * valid-bit behaviour is repaired and should tell us what we already
-	 * knew from reading PI.
-	 */
-	if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) {
-		prefetch(qbman_get_cmd(s,
-				       QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
-		return NULL;
-	}
-	/*
-	 * There's something there. Move "next_idx" attention to the next ring
-	 * entry (and prefetch it) before returning what we found.
-	 */
-	s->dqrr.next_idx++;
-	s->dqrr.next_idx &= s->dqrr.dqrr_size - 1; /* Wrap around */
-	if (!s->dqrr.next_idx)
-		s->dqrr.valid_bit ^= QB_VALID_BIT;
-
-	/*
-	 * If this is the final response to a volatile dequeue command
-	 * indicate that the vdq is available
-	 */
-	flags = p->dq.stat;
-	response_verb = verb & QBMAN_RESULT_MASK;
-	if ((response_verb == QBMAN_RESULT_DQ) &&
-	    (flags & DPAA2_DQ_STAT_VOLATILE) &&
-	    (flags & DPAA2_DQ_STAT_EXPIRED))
-		atomic_inc(&s->vdq.available);
-
-	prefetch(qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
-
-	return p;
-}
-
-/**
- * qbman_swp_dqrr_consume() -  Consume DQRR entries previously returned from
- *                             qbman_swp_dqrr_next().
- * @s: the software portal object
- * @dq: the DQRR entry to be consumed
- */
-void qbman_swp_dqrr_consume(struct qbman_swp *s, const struct dpaa2_dq *dq)
-{
-	qbman_write_register(s, QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq));
-}
-
-/**
- * qbman_result_has_new_result() - Check and get the dequeue response from the
- *                                 dq storage memory set in pull dequeue command
- * @s: the software portal object
- * @dq: the dequeue result read from the memory
- *
- * Return 1 for getting a valid dequeue result, or 0 for not getting a valid
- * dequeue result.
- *
- * Only used for user-provided storage of dequeue results, not DQRR. For
- * efficiency purposes, the driver will perform any required endianness
- * conversion to ensure that the user's dequeue result storage is in host-endian
- * format. As such, once the user has called qbman_result_has_new_result() and
- * been returned a valid dequeue result, they should not call it again on
- * the same memory location (except of course if another dequeue command has
- * been executed to produce a new result to that location).
- */
-int qbman_result_has_new_result(struct qbman_swp *s, const struct dpaa2_dq *dq)
-{
-	if (dq->dq.tok != QMAN_DQ_TOKEN_VALID)
-		return 0;
-
-	/*
-	 * Set token to be 0 so we will detect change back to 1
-	 * next time the looping is traversed. Const is cast away here
-	 * as we want users to treat the dequeue responses as read only.
-	 */
-	((struct dpaa2_dq *)dq)->dq.tok = 0;
-
-	/*
-	 * Determine whether VDQCR is available based on whether the
-	 * current result is sitting in the first storage location of
-	 * the busy command.
-	 */
-	if (s->vdq.storage == dq) {
-		s->vdq.storage = NULL;
-		atomic_inc(&s->vdq.available);
-	}
-
-	return 1;
-}
-
-/**
- * qbman_release_desc_clear() - Clear the contents of a descriptor to
- *                              default/starting state.
- */
-void qbman_release_desc_clear(struct qbman_release_desc *d)
-{
-	memset(d, 0, sizeof(*d));
-	d->verb = 1 << 5; /* Release Command Valid */
-}
-
-/**
- * qbman_release_desc_set_bpid() - Set the ID of the buffer pool to release to
- */
-void qbman_release_desc_set_bpid(struct qbman_release_desc *d, u16 bpid)
-{
-	d->bpid = cpu_to_le16(bpid);
-}
-
-/**
- * qbman_release_desc_set_rcdi() - Determines whether or not the portal's RCDI
- * interrupt source should be asserted after the release command is completed.
- */
-void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable)
-{
-	if (enable)
-		d->verb |= 1 << 6;
-	else
-		d->verb &= ~(1 << 6);
-}
-
-#define RAR_IDX(rar)     ((rar) & 0x7)
-#define RAR_VB(rar)      ((rar) & 0x80)
-#define RAR_SUCCESS(rar) ((rar) & 0x100)
-
-/**
- * qbman_swp_release() - Issue a buffer release command
- * @s:           the software portal object
- * @d:           the release descriptor
- * @buffers:     a pointer pointing to the buffer address to be released
- * @num_buffers: number of buffers to be released,  must be less than 8
- *
- * Return 0 for success, -EBUSY if the release command ring is not ready.
- */
-int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d,
-		      const u64 *buffers, unsigned int num_buffers)
-{
-	int i;
-	struct qbman_release_desc *p;
-	u32 rar;
-
-	if (!num_buffers || (num_buffers > 7))
-		return -EINVAL;
-
-	rar = qbman_read_register(s, QBMAN_CINH_SWP_RAR);
-	if (!RAR_SUCCESS(rar))
-		return -EBUSY;
-
-	/* Start the release command */
-	p = qbman_get_cmd(s, QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
-	/* Copy the caller's buffer pointers to the command */
-	for (i = 0; i < num_buffers; i++)
-		p->buf[i] = cpu_to_le64(buffers[i]);
-	p->bpid = d->bpid;
-
-	/*
-	 * Set the verb byte, have to substitute in the valid-bit and the number
-	 * of buffers.
-	 */
-	dma_wmb();
-	p->verb = d->verb | RAR_VB(rar) | num_buffers;
-
-	return 0;
-}
-
-struct qbman_acquire_desc {
-	u8 verb;
-	u8 reserved;
-	__le16 bpid;
-	u8 num;
-	u8 reserved2[59];
-};
-
-struct qbman_acquire_rslt {
-	u8 verb;
-	u8 rslt;
-	__le16 reserved;
-	u8 num;
-	u8 reserved2[3];
-	__le64 buf[7];
-};
-
-/**
- * qbman_swp_acquire() - Issue a buffer acquire command
- * @s:           the software portal object
- * @bpid:        the buffer pool index
- * @buffers:     a pointer pointing to the acquired buffer addresses
- * @num_buffers: number of buffers to be acquired, must be less than 8
- *
- * Return 0 for success, or negative error code if the acquire command
- * fails.
- */
-int qbman_swp_acquire(struct qbman_swp *s, u16 bpid, u64 *buffers,
-		      unsigned int num_buffers)
-{
-	struct qbman_acquire_desc *p;
-	struct qbman_acquire_rslt *r;
-	int i;
-
-	if (!num_buffers || (num_buffers > 7))
-		return -EINVAL;
-
-	/* Start the management command */
-	p = qbman_swp_mc_start(s);
-
-	if (!p)
-		return -EBUSY;
-
-	/* Encode the caller-provided attributes */
-	p->bpid = cpu_to_le16(bpid);
-	p->num = num_buffers;
-
-	/* Complete the management command */
-	r = qbman_swp_mc_complete(s, p, QBMAN_MC_ACQUIRE);
-	if (unlikely(!r)) {
-		pr_err("qbman: acquire from BPID %d failed, no response\n",
-		       bpid);
-		return -EIO;
-	}
-
-	/* Decode the outcome */
-	WARN_ON((r->verb & 0x7f) != QBMAN_MC_ACQUIRE);
-
-	/* Determine success or failure */
-	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
-		pr_err("qbman: acquire from BPID 0x%x failed, code=0x%02x\n",
-		       bpid, r->rslt);
-		return -EIO;
-	}
-
-	WARN_ON(r->num > num_buffers);
-
-	/* Copy the acquired buffers to the caller's array */
-	for (i = 0; i < r->num; i++)
-		buffers[i] = le64_to_cpu(r->buf[i]);
-
-	return (int)r->num;
-}
-
-struct qbman_alt_fq_state_desc {
-	u8 verb;
-	u8 reserved[3];
-	__le32 fqid;
-	u8 reserved2[56];
-};
-
-struct qbman_alt_fq_state_rslt {
-	u8 verb;
-	u8 rslt;
-	u8 reserved[62];
-};
-
-#define ALT_FQ_FQID_MASK 0x00FFFFFF
-
-int qbman_swp_alt_fq_state(struct qbman_swp *s, u32 fqid,
-			   u8 alt_fq_verb)
-{
-	struct qbman_alt_fq_state_desc *p;
-	struct qbman_alt_fq_state_rslt *r;
-
-	/* Start the management command */
-	p = qbman_swp_mc_start(s);
-	if (!p)
-		return -EBUSY;
-
-	p->fqid = cpu_to_le32(fqid & ALT_FQ_FQID_MASK);
-
-	/* Complete the management command */
-	r = qbman_swp_mc_complete(s, p, alt_fq_verb);
-	if (unlikely(!r)) {
-		pr_err("qbman: mgmt cmd failed, no response (verb=0x%x)\n",
-		       alt_fq_verb);
-		return -EIO;
-	}
-
-	/* Decode the outcome */
-	WARN_ON((r->verb & QBMAN_RESULT_MASK) != alt_fq_verb);
-
-	/* Determine success or failure */
-	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
-		pr_err("qbman: ALT FQID %d failed: verb = 0x%08x code = 0x%02x\n",
-		       fqid, r->verb, r->rslt);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-struct qbman_cdan_ctrl_desc {
-	u8 verb;
-	u8 reserved;
-	__le16 ch;
-	u8 we;
-	u8 ctrl;
-	__le16 reserved2;
-	__le64 cdan_ctx;
-	u8 reserved3[48];
-
-};
-
-struct qbman_cdan_ctrl_rslt {
-	u8 verb;
-	u8 rslt;
-	__le16 ch;
-	u8 reserved[60];
-};
-
-int qbman_swp_CDAN_set(struct qbman_swp *s, u16 channelid,
-		       u8 we_mask, u8 cdan_en,
-		       u64 ctx)
-{
-	struct qbman_cdan_ctrl_desc *p = NULL;
-	struct qbman_cdan_ctrl_rslt *r = NULL;
-
-	/* Start the management command */
-	p = qbman_swp_mc_start(s);
-	if (!p)
-		return -EBUSY;
-
-	/* Encode the caller-provided attributes */
-	p->ch = cpu_to_le16(channelid);
-	p->we = we_mask;
-	if (cdan_en)
-		p->ctrl = 1;
-	else
-		p->ctrl = 0;
-	p->cdan_ctx = cpu_to_le64(ctx);
-
-	/* Complete the management command */
-	r = qbman_swp_mc_complete(s, p, QBMAN_WQCHAN_CONFIGURE);
-	if (unlikely(!r)) {
-		pr_err("qbman: wqchan config failed, no response\n");
-		return -EIO;
-	}
-
-	WARN_ON((r->verb & 0x7f) != QBMAN_WQCHAN_CONFIGURE);
-
-	/* Determine success or failure */
-	if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
-		pr_err("qbman: CDAN cQID %d failed: code = 0x%02x\n",
-		       channelid, r->rslt);
-		return -EIO;
-	}
-
-	return 0;
-}
diff --git a/drivers/staging/fsl-mc/bus/dpio/qbman-portal.h b/drivers/staging/fsl-mc/bus/dpio/qbman-portal.h
deleted file mode 100644
index 69db3c818742..000000000000
--- a/drivers/staging/fsl-mc/bus/dpio/qbman-portal.h
+++ /dev/null
@@ -1,444 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/*
- * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2016 NXP
- *
- */
-#ifndef __FSL_QBMAN_PORTAL_H
-#define __FSL_QBMAN_PORTAL_H
-
-#include "../../include/dpaa2-fd.h"
-
-struct dpaa2_dq;
-struct qbman_swp;
-
-/* qbman software portal descriptor structure */
-struct qbman_swp_desc {
-	void *cena_bar; /* Cache-enabled portal base address */
-	void __iomem *cinh_bar; /* Cache-inhibited portal base address */
-	u32 qman_version;
-};
-
-#define QBMAN_SWP_INTERRUPT_EQRI 0x01
-#define QBMAN_SWP_INTERRUPT_EQDI 0x02
-#define QBMAN_SWP_INTERRUPT_DQRI 0x04
-#define QBMAN_SWP_INTERRUPT_RCRI 0x08
-#define QBMAN_SWP_INTERRUPT_RCDI 0x10
-#define QBMAN_SWP_INTERRUPT_VDCI 0x20
-
-/* the structure for pull dequeue descriptor */
-struct qbman_pull_desc {
-	u8 verb;
-	u8 numf;
-	u8 tok;
-	u8 reserved;
-	__le32 dq_src;
-	__le64 rsp_addr;
-	u64 rsp_addr_virt;
-	u8 padding[40];
-};
-
-enum qbman_pull_type_e {
-	/* dequeue with priority precedence, respect intra-class scheduling */
-	qbman_pull_type_prio = 1,
-	/* dequeue with active FQ precedence, respect ICS */
-	qbman_pull_type_active,
-	/* dequeue with active FQ precedence, no ICS */
-	qbman_pull_type_active_noics
-};
-
-/* Definitions for parsing dequeue entries */
-#define QBMAN_RESULT_MASK      0x7f
-#define QBMAN_RESULT_DQ        0x60
-#define QBMAN_RESULT_FQRN      0x21
-#define QBMAN_RESULT_FQRNI     0x22
-#define QBMAN_RESULT_FQPN      0x24
-#define QBMAN_RESULT_FQDAN     0x25
-#define QBMAN_RESULT_CDAN      0x26
-#define QBMAN_RESULT_CSCN_MEM  0x27
-#define QBMAN_RESULT_CGCU      0x28
-#define QBMAN_RESULT_BPSCN     0x29
-#define QBMAN_RESULT_CSCN_WQ   0x2a
-
-/* QBMan FQ management command codes */
-#define QBMAN_FQ_SCHEDULE	0x48
-#define QBMAN_FQ_FORCE		0x49
-#define QBMAN_FQ_XON		0x4d
-#define QBMAN_FQ_XOFF		0x4e
-
-/* structure of enqueue descriptor */
-struct qbman_eq_desc {
-	u8 verb;
-	u8 dca;
-	__le16 seqnum;
-	__le16 orpid;
-	__le16 reserved1;
-	__le32 tgtid;
-	__le32 tag;
-	__le16 qdbin;
-	u8 qpri;
-	u8 reserved[3];
-	u8 wae;
-	u8 rspid;
-	__le64 rsp_addr;
-	u8 fd[32];
-};
-
-/* buffer release descriptor */
-struct qbman_release_desc {
-	u8 verb;
-	u8 reserved;
-	__le16 bpid;
-	__le32 reserved2;
-	__le64 buf[7];
-};
-
-/* Management command result codes */
-#define QBMAN_MC_RSLT_OK      0xf0
-
-#define CODE_CDAN_WE_EN    0x1
-#define CODE_CDAN_WE_CTX   0x4
-
-/* portal data structure */
-struct qbman_swp {
-	const struct qbman_swp_desc *desc;
-	void *addr_cena;
-	void __iomem *addr_cinh;
-
-	/* Management commands */
-	struct {
-		u32 valid_bit; /* 0x00 or 0x80 */
-	} mc;
-
-	/* Push dequeues */
-	u32 sdq;
-
-	/* Volatile dequeues */
-	struct {
-		atomic_t available; /* indicates if a command can be sent */
-		u32 valid_bit; /* 0x00 or 0x80 */
-		struct dpaa2_dq *storage; /* NULL if DQRR */
-	} vdq;
-
-	/* DQRR */
-	struct {
-		u32 next_idx;
-		u32 valid_bit;
-		u8 dqrr_size;
-		int reset_bug; /* indicates dqrr reset workaround is needed */
-	} dqrr;
-};
-
-struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d);
-void qbman_swp_finish(struct qbman_swp *p);
-u32 qbman_swp_interrupt_read_status(struct qbman_swp *p);
-void qbman_swp_interrupt_clear_status(struct qbman_swp *p, u32 mask);
-u32 qbman_swp_interrupt_get_trigger(struct qbman_swp *p);
-void qbman_swp_interrupt_set_trigger(struct qbman_swp *p, u32 mask);
-int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p);
-void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit);
-
-void qbman_swp_push_get(struct qbman_swp *p, u8 channel_idx, int *enabled);
-void qbman_swp_push_set(struct qbman_swp *p, u8 channel_idx, int enable);
-
-void qbman_pull_desc_clear(struct qbman_pull_desc *d);
-void qbman_pull_desc_set_storage(struct qbman_pull_desc *d,
-				 struct dpaa2_dq *storage,
-				 dma_addr_t storage_phys,
-				 int stash);
-void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, u8 numframes);
-void qbman_pull_desc_set_fq(struct qbman_pull_desc *d, u32 fqid);
-void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, u32 wqid,
-			    enum qbman_pull_type_e dct);
-void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, u32 chid,
-				 enum qbman_pull_type_e dct);
-
-int qbman_swp_pull(struct qbman_swp *p, struct qbman_pull_desc *d);
-
-const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp *s);
-void qbman_swp_dqrr_consume(struct qbman_swp *s, const struct dpaa2_dq *dq);
-
-int qbman_result_has_new_result(struct qbman_swp *p, const struct dpaa2_dq *dq);
-
-void qbman_eq_desc_clear(struct qbman_eq_desc *d);
-void qbman_eq_desc_set_no_orp(struct qbman_eq_desc *d, int respond_success);
-void qbman_eq_desc_set_token(struct qbman_eq_desc *d, u8 token);
-void qbman_eq_desc_set_fq(struct qbman_eq_desc *d, u32 fqid);
-void qbman_eq_desc_set_qd(struct qbman_eq_desc *d, u32 qdid,
-			  u32 qd_bin, u32 qd_prio);
-
-int qbman_swp_enqueue(struct qbman_swp *p, const struct qbman_eq_desc *d,
-		      const struct dpaa2_fd *fd);
-
-void qbman_release_desc_clear(struct qbman_release_desc *d);
-void qbman_release_desc_set_bpid(struct qbman_release_desc *d, u16 bpid);
-void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable);
-
-int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d,
-		      const u64 *buffers, unsigned int num_buffers);
-int qbman_swp_acquire(struct qbman_swp *s, u16 bpid, u64 *buffers,
-		      unsigned int num_buffers);
-int qbman_swp_alt_fq_state(struct qbman_swp *s, u32 fqid,
-			   u8 alt_fq_verb);
-int qbman_swp_CDAN_set(struct qbman_swp *s, u16 channelid,
-		       u8 we_mask, u8 cdan_en,
-		       u64 ctx);
-
-void *qbman_swp_mc_start(struct qbman_swp *p);
-void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, u8 cmd_verb);
-void *qbman_swp_mc_result(struct qbman_swp *p);
-
-/**
- * qbman_result_is_DQ() - check if the dequeue result is a dequeue response
- * @dq: the dequeue result to be checked
- *
- * DQRR entries may contain non-dequeue results, ie. notifications
- */
-static inline int qbman_result_is_DQ(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_DQ);
-}
-
-/**
- * qbman_result_is_SCN() - Check the dequeue result is notification or not
- * @dq: the dequeue result to be checked
- *
- */
-static inline int qbman_result_is_SCN(const struct dpaa2_dq *dq)
-{
-	return !qbman_result_is_DQ(dq);
-}
-
-/* FQ Data Availability */
-static inline int qbman_result_is_FQDAN(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_FQDAN);
-}
-
-/* Channel Data Availability */
-static inline int qbman_result_is_CDAN(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_CDAN);
-}
-
-/* Congestion State Change */
-static inline int qbman_result_is_CSCN(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_CSCN_WQ);
-}
-
-/* Buffer Pool State Change */
-static inline int qbman_result_is_BPSCN(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_BPSCN);
-}
-
-/* Congestion Group Count Update */
-static inline int qbman_result_is_CGCU(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_CGCU);
-}
-
-/* Retirement */
-static inline int qbman_result_is_FQRN(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_FQRN);
-}
-
-/* Retirement Immediate */
-static inline int qbman_result_is_FQRNI(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_FQRNI);
-}
-
- /* Park */
-static inline int qbman_result_is_FQPN(const struct dpaa2_dq *dq)
-{
-	return ((dq->dq.verb & QBMAN_RESULT_MASK) == QBMAN_RESULT_FQPN);
-}
-
-/**
- * qbman_result_SCN_state() - Get the state field in State-change notification
- */
-static inline u8 qbman_result_SCN_state(const struct dpaa2_dq *scn)
-{
-	return scn->scn.state;
-}
-
-#define SCN_RID_MASK 0x00FFFFFF
-
-/**
- * qbman_result_SCN_rid() - Get the resource id in State-change notification
- */
-static inline u32 qbman_result_SCN_rid(const struct dpaa2_dq *scn)
-{
-	return le32_to_cpu(scn->scn.rid_tok) & SCN_RID_MASK;
-}
-
-/**
- * qbman_result_SCN_ctx() - Get the context data in State-change notification
- */
-static inline u64 qbman_result_SCN_ctx(const struct dpaa2_dq *scn)
-{
-	return le64_to_cpu(scn->scn.ctx);
-}
-
-/**
- * qbman_swp_fq_schedule() - Move the fq to the scheduled state
- * @s:    the software portal object
- * @fqid: the index of frame queue to be scheduled
- *
- * There are a couple of different ways that a FQ can end up parked state,
- * This schedules it.
- *
- * Return 0 for success, or negative error code for failure.
- */
-static inline int qbman_swp_fq_schedule(struct qbman_swp *s, u32 fqid)
-{
-	return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_SCHEDULE);
-}
-
-/**
- * qbman_swp_fq_force() - Force the FQ to fully scheduled state
- * @s:    the software portal object
- * @fqid: the index of frame queue to be forced
- *
- * Force eligible will force a tentatively-scheduled FQ to be fully-scheduled
- * and thus be available for selection by any channel-dequeuing behaviour (push
- * or pull). If the FQ is subsequently "dequeued" from the channel and is still
- * empty at the time this happens, the resulting dq_entry will have no FD.
- * (qbman_result_DQ_fd() will return NULL.)
- *
- * Return 0 for success, or negative error code for failure.
- */
-static inline int qbman_swp_fq_force(struct qbman_swp *s, u32 fqid)
-{
-	return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_FORCE);
-}
-
-/**
- * qbman_swp_fq_xon() - sets FQ flow-control to XON
- * @s:    the software portal object
- * @fqid: the index of frame queue
- *
- * This setting doesn't affect enqueues to the FQ, just dequeues.
- *
- * Return 0 for success, or negative error code for failure.
- */
-static inline int qbman_swp_fq_xon(struct qbman_swp *s, u32 fqid)
-{
-	return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_XON);
-}
-
-/**
- * qbman_swp_fq_xoff() - sets FQ flow-control to XOFF
- * @s:    the software portal object
- * @fqid: the index of frame queue
- *
- * This setting doesn't affect enqueues to the FQ, just dequeues.
- * XOFF FQs will remain in the tenatively-scheduled state, even when
- * non-empty, meaning they won't be selected for scheduled dequeuing.
- * If a FQ is changed to XOFF after it had already become truly-scheduled
- * to a channel, and a pull dequeue of that channel occurs that selects
- * that FQ for dequeuing, then the resulting dq_entry will have no FD.
- * (qbman_result_DQ_fd() will return NULL.)
- *
- * Return 0 for success, or negative error code for failure.
- */
-static inline int qbman_swp_fq_xoff(struct qbman_swp *s, u32 fqid)
-{
-	return qbman_swp_alt_fq_state(s, fqid, QBMAN_FQ_XOFF);
-}
-
-/* If the user has been allocated a channel object that is going to generate
- * CDANs to another channel, then the qbman_swp_CDAN* functions will be
- * necessary.
- *
- * CDAN-enabled channels only generate a single CDAN notification, after which
- * they need to be reenabled before they'll generate another. The idea is
- * that pull dequeuing will occur in reaction to the CDAN, followed by a
- * reenable step. Each function generates a distinct command to hardware, so a
- * combination function is provided if the user wishes to modify the "context"
- * (which shows up in each CDAN message) each time they reenable, as a single
- * command to hardware.
- */
-
-/**
- * qbman_swp_CDAN_set_context() - Set CDAN context
- * @s:         the software portal object
- * @channelid: the channel index
- * @ctx:       the context to be set in CDAN
- *
- * Return 0 for success, or negative error code for failure.
- */
-static inline int qbman_swp_CDAN_set_context(struct qbman_swp *s, u16 channelid,
-					     u64 ctx)
-{
-	return qbman_swp_CDAN_set(s, channelid,
-				  CODE_CDAN_WE_CTX,
-				  0, ctx);
-}
-
-/**
- * qbman_swp_CDAN_enable() - Enable CDAN for the channel
- * @s:         the software portal object
- * @channelid: the index of the channel to generate CDAN
- *
- * Return 0 for success, or negative error code for failure.
- */
-static inline int qbman_swp_CDAN_enable(struct qbman_swp *s, u16 channelid)
-{
-	return qbman_swp_CDAN_set(s, channelid,
-				  CODE_CDAN_WE_EN,
-				  1, 0);
-}
-
-/**
- * qbman_swp_CDAN_disable() - disable CDAN for the channel
- * @s:         the software portal object
- * @channelid: the index of the channel to generate CDAN
- *
- * Return 0 for success, or negative error code for failure.
- */
-static inline int qbman_swp_CDAN_disable(struct qbman_swp *s, u16 channelid)
-{
-	return qbman_swp_CDAN_set(s, channelid,
-				  CODE_CDAN_WE_EN,
-				  0, 0);
-}
-
-/**
- * qbman_swp_CDAN_set_context_enable() - Set CDAN contest and enable CDAN
- * @s:         the software portal object
- * @channelid: the index of the channel to generate CDAN
- * @ctx:i      the context set in CDAN
- *
- * Return 0 for success, or negative error code for failure.
- */
-static inline int qbman_swp_CDAN_set_context_enable(struct qbman_swp *s,
-						    u16 channelid,
-						    u64 ctx)
-{
-	return qbman_swp_CDAN_set(s, channelid,
-				  CODE_CDAN_WE_EN | CODE_CDAN_WE_CTX,
-				  1, ctx);
-}
-
-/* Wraps up submit + poll-for-result */
-static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
-					  u8 cmd_verb)
-{
-	int loopvar = 1000;
-
-	qbman_swp_mc_submit(swp, cmd, cmd_verb);
-
-	do {
-		cmd = qbman_swp_mc_result(swp);
-	} while (!cmd && loopvar--);
-
-	WARN_ON(!loopvar);
-
-	return cmd;
-}
-
-#endif /* __FSL_QBMAN_PORTAL_H */
diff --git a/drivers/staging/fsl-mc/include/dpaa2-fd.h b/drivers/staging/fsl-mc/include/dpaa2-fd.h
deleted file mode 100644
index 2576abaa7779..000000000000
--- a/drivers/staging/fsl-mc/include/dpaa2-fd.h
+++ /dev/null
@@ -1,438 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP
- *
- */
-#ifndef __FSL_DPAA2_FD_H
-#define __FSL_DPAA2_FD_H
-
-#include <linux/kernel.h>
-
-/**
- * DOC: DPAA2 FD - Frame Descriptor APIs for DPAA2
- *
- * Frame Descriptors (FDs) are used to describe frame data in the DPAA2.
- * Frames can be enqueued and dequeued to Frame Queues (FQs) which are consumed
- * by the various DPAA accelerators (WRIOP, SEC, PME, DCE)
- *
- * There are three types of frames: single, scatter gather, and frame lists.
- *
- * The set of APIs in this file must be used to create, manipulate and
- * query Frame Descriptors.
- */
-
-/**
- * struct dpaa2_fd - Struct describing FDs
- * @words:         for easier/faster copying the whole FD structure
- * @addr:          address in the FD
- * @len:           length in the FD
- * @bpid:          buffer pool ID
- * @format_offset: format, offset, and short-length fields
- * @frc:           frame context
- * @ctrl:          control bits...including dd, sc, va, err, etc
- * @flc:           flow context address
- *
- * This structure represents the basic Frame Descriptor used in the system.
- */
-struct dpaa2_fd {
-	union {
-		u32 words[8];
-		struct dpaa2_fd_simple {
-			__le64 addr;
-			__le32 len;
-			__le16 bpid;
-			__le16 format_offset;
-			__le32 frc;
-			__le32 ctrl;
-			__le64 flc;
-		} simple;
-	};
-};
-
-#define FD_SHORT_LEN_FLAG_MASK	0x1
-#define FD_SHORT_LEN_FLAG_SHIFT	14
-#define FD_SHORT_LEN_MASK	0x3FFFF
-#define FD_OFFSET_MASK		0x0FFF
-#define FD_FORMAT_MASK		0x3
-#define FD_FORMAT_SHIFT		12
-#define FD_BPID_MASK		0x3FFF
-#define SG_SHORT_LEN_FLAG_MASK	0x1
-#define SG_SHORT_LEN_FLAG_SHIFT	14
-#define SG_SHORT_LEN_MASK	0x1FFFF
-#define SG_OFFSET_MASK		0x0FFF
-#define SG_FORMAT_MASK		0x3
-#define SG_FORMAT_SHIFT		12
-#define SG_BPID_MASK		0x3FFF
-#define SG_FINAL_FLAG_MASK	0x1
-#define SG_FINAL_FLAG_SHIFT	15
-
-/* Error bits in FD CTRL */
-#define FD_CTRL_ERR_MASK	0x000000FF
-#define FD_CTRL_UFD		0x00000004
-#define FD_CTRL_SBE		0x00000008
-#define FD_CTRL_FLC		0x00000010
-#define FD_CTRL_FSE		0x00000020
-#define FD_CTRL_FAERR		0x00000040
-
-/* Annotation bits in FD CTRL */
-#define FD_CTRL_PTA		0x00800000
-#define FD_CTRL_PTV1		0x00400000
-
-enum dpaa2_fd_format {
-	dpaa2_fd_single = 0,
-	dpaa2_fd_list,
-	dpaa2_fd_sg
-};
-
-/**
- * dpaa2_fd_get_addr() - get the addr field of frame descriptor
- * @fd: the given frame descriptor
- *
- * Return the address in the frame descriptor.
- */
-static inline dma_addr_t dpaa2_fd_get_addr(const struct dpaa2_fd *fd)
-{
-	return (dma_addr_t)le64_to_cpu(fd->simple.addr);
-}
-
-/**
- * dpaa2_fd_set_addr() - Set the addr field of frame descriptor
- * @fd: the given frame descriptor
- * @addr: the address needs to be set in frame descriptor
- */
-static inline void dpaa2_fd_set_addr(struct dpaa2_fd *fd, dma_addr_t addr)
-{
-	fd->simple.addr = cpu_to_le64(addr);
-}
-
-/**
- * dpaa2_fd_get_frc() - Get the frame context in the frame descriptor
- * @fd: the given frame descriptor
- *
- * Return the frame context field in the frame descriptor.
- */
-static inline u32 dpaa2_fd_get_frc(const struct dpaa2_fd *fd)
-{
-	return le32_to_cpu(fd->simple.frc);
-}
-
-/**
- * dpaa2_fd_set_frc() - Set the frame context in the frame descriptor
- * @fd: the given frame descriptor
- * @frc: the frame context needs to be set in frame descriptor
- */
-static inline void dpaa2_fd_set_frc(struct dpaa2_fd *fd, u32 frc)
-{
-	fd->simple.frc = cpu_to_le32(frc);
-}
-
-/**
- * dpaa2_fd_get_ctrl() - Get the control bits in the frame descriptor
- * @fd: the given frame descriptor
- *
- * Return the control bits field in the frame descriptor.
- */
-static inline u32 dpaa2_fd_get_ctrl(const struct dpaa2_fd *fd)
-{
-	return le32_to_cpu(fd->simple.ctrl);
-}
-
-/**
- * dpaa2_fd_set_ctrl() - Set the control bits in the frame descriptor
- * @fd: the given frame descriptor
- * @ctrl: the control bits to be set in the frame descriptor
- */
-static inline void dpaa2_fd_set_ctrl(struct dpaa2_fd *fd, u32 ctrl)
-{
-	fd->simple.ctrl = cpu_to_le32(ctrl);
-}
-
-/**
- * dpaa2_fd_get_flc() - Get the flow context in the frame descriptor
- * @fd: the given frame descriptor
- *
- * Return the flow context in the frame descriptor.
- */
-static inline dma_addr_t dpaa2_fd_get_flc(const struct dpaa2_fd *fd)
-{
-	return (dma_addr_t)le64_to_cpu(fd->simple.flc);
-}
-
-/**
- * dpaa2_fd_set_flc() - Set the flow context field of frame descriptor
- * @fd: the given frame descriptor
- * @flc_addr: the flow context needs to be set in frame descriptor
- */
-static inline void dpaa2_fd_set_flc(struct dpaa2_fd *fd,  dma_addr_t flc_addr)
-{
-	fd->simple.flc = cpu_to_le64(flc_addr);
-}
-
-static inline bool dpaa2_fd_short_len(const struct dpaa2_fd *fd)
-{
-	return !!((le16_to_cpu(fd->simple.format_offset) >>
-		  FD_SHORT_LEN_FLAG_SHIFT) & FD_SHORT_LEN_FLAG_MASK);
-}
-
-/**
- * dpaa2_fd_get_len() - Get the length in the frame descriptor
- * @fd: the given frame descriptor
- *
- * Return the length field in the frame descriptor.
- */
-static inline u32 dpaa2_fd_get_len(const struct dpaa2_fd *fd)
-{
-	if (dpaa2_fd_short_len(fd))
-		return le32_to_cpu(fd->simple.len) & FD_SHORT_LEN_MASK;
-
-	return le32_to_cpu(fd->simple.len);
-}
-
-/**
- * dpaa2_fd_set_len() - Set the length field of frame descriptor
- * @fd: the given frame descriptor
- * @len: the length needs to be set in frame descriptor
- */
-static inline void dpaa2_fd_set_len(struct dpaa2_fd *fd, u32 len)
-{
-	fd->simple.len = cpu_to_le32(len);
-}
-
-/**
- * dpaa2_fd_get_offset() - Get the offset field in the frame descriptor
- * @fd: the given frame descriptor
- *
- * Return the offset.
- */
-static inline uint16_t dpaa2_fd_get_offset(const struct dpaa2_fd *fd)
-{
-	return le16_to_cpu(fd->simple.format_offset) & FD_OFFSET_MASK;
-}
-
-/**
- * dpaa2_fd_set_offset() - Set the offset field of frame descriptor
- * @fd: the given frame descriptor
- * @offset: the offset needs to be set in frame descriptor
- */
-static inline void dpaa2_fd_set_offset(struct dpaa2_fd *fd, uint16_t offset)
-{
-	fd->simple.format_offset &= cpu_to_le16(~FD_OFFSET_MASK);
-	fd->simple.format_offset |= cpu_to_le16(offset);
-}
-
-/**
- * dpaa2_fd_get_format() - Get the format field in the frame descriptor
- * @fd: the given frame descriptor
- *
- * Return the format.
- */
-static inline enum dpaa2_fd_format dpaa2_fd_get_format(
-						const struct dpaa2_fd *fd)
-{
-	return (enum dpaa2_fd_format)((le16_to_cpu(fd->simple.format_offset)
-				      >> FD_FORMAT_SHIFT) & FD_FORMAT_MASK);
-}
-
-/**
- * dpaa2_fd_set_format() - Set the format field of frame descriptor
- * @fd: the given frame descriptor
- * @format: the format needs to be set in frame descriptor
- */
-static inline void dpaa2_fd_set_format(struct dpaa2_fd *fd,
-				       enum dpaa2_fd_format format)
-{
-	fd->simple.format_offset &=
-		cpu_to_le16(~(FD_FORMAT_MASK << FD_FORMAT_SHIFT));
-	fd->simple.format_offset |= cpu_to_le16(format << FD_FORMAT_SHIFT);
-}
-
-/**
- * dpaa2_fd_get_bpid() - Get the bpid field in the frame descriptor
- * @fd: the given frame descriptor
- *
- * Return the buffer pool id.
- */
-static inline uint16_t dpaa2_fd_get_bpid(const struct dpaa2_fd *fd)
-{
-	return le16_to_cpu(fd->simple.bpid) & FD_BPID_MASK;
-}
-
-/**
- * dpaa2_fd_set_bpid() - Set the bpid field of frame descriptor
- * @fd: the given frame descriptor
- * @bpid: buffer pool id to be set
- */
-static inline void dpaa2_fd_set_bpid(struct dpaa2_fd *fd, uint16_t bpid)
-{
-	fd->simple.bpid &= cpu_to_le16(~(FD_BPID_MASK));
-	fd->simple.bpid |= cpu_to_le16(bpid);
-}
-
-/**
- * struct dpaa2_sg_entry - the scatter-gathering structure
- * @addr: address of the sg entry
- * @len: length in this sg entry
- * @bpid: buffer pool id
- * @format_offset: format and offset fields
- */
-struct dpaa2_sg_entry {
-	__le64 addr;
-	__le32 len;
-	__le16 bpid;
-	__le16 format_offset;
-};
-
-enum dpaa2_sg_format {
-	dpaa2_sg_single = 0,
-	dpaa2_sg_frame_data,
-	dpaa2_sg_sgt_ext
-};
-
-/* Accessors for SG entry fields */
-
-/**
- * dpaa2_sg_get_addr() - Get the address from SG entry
- * @sg: the given scatter-gathering object
- *
- * Return the address.
- */
-static inline dma_addr_t dpaa2_sg_get_addr(const struct dpaa2_sg_entry *sg)
-{
-	return (dma_addr_t)le64_to_cpu(sg->addr);
-}
-
-/**
- * dpaa2_sg_set_addr() - Set the address in SG entry
- * @sg: the given scatter-gathering object
- * @addr: the address to be set
- */
-static inline void dpaa2_sg_set_addr(struct dpaa2_sg_entry *sg, dma_addr_t addr)
-{
-	sg->addr = cpu_to_le64(addr);
-}
-
-static inline bool dpaa2_sg_short_len(const struct dpaa2_sg_entry *sg)
-{
-	return !!((le16_to_cpu(sg->format_offset) >> SG_SHORT_LEN_FLAG_SHIFT)
-		& SG_SHORT_LEN_FLAG_MASK);
-}
-
-/**
- * dpaa2_sg_get_len() - Get the length in SG entry
- * @sg: the given scatter-gathering object
- *
- * Return the length.
- */
-static inline u32 dpaa2_sg_get_len(const struct dpaa2_sg_entry *sg)
-{
-	if (dpaa2_sg_short_len(sg))
-		return le32_to_cpu(sg->len) & SG_SHORT_LEN_MASK;
-
-	return le32_to_cpu(sg->len);
-}
-
-/**
- * dpaa2_sg_set_len() - Set the length in SG entry
- * @sg: the given scatter-gathering object
- * @len: the length to be set
- */
-static inline void dpaa2_sg_set_len(struct dpaa2_sg_entry *sg, u32 len)
-{
-	sg->len = cpu_to_le32(len);
-}
-
-/**
- * dpaa2_sg_get_offset() - Get the offset in SG entry
- * @sg: the given scatter-gathering object
- *
- * Return the offset.
- */
-static inline u16 dpaa2_sg_get_offset(const struct dpaa2_sg_entry *sg)
-{
-	return le16_to_cpu(sg->format_offset) & SG_OFFSET_MASK;
-}
-
-/**
- * dpaa2_sg_set_offset() - Set the offset in SG entry
- * @sg: the given scatter-gathering object
- * @offset: the offset to be set
- */
-static inline void dpaa2_sg_set_offset(struct dpaa2_sg_entry *sg,
-				       u16 offset)
-{
-	sg->format_offset &= cpu_to_le16(~SG_OFFSET_MASK);
-	sg->format_offset |= cpu_to_le16(offset);
-}
-
-/**
- * dpaa2_sg_get_format() - Get the SG format in SG entry
- * @sg: the given scatter-gathering object
- *
- * Return the format.
- */
-static inline enum dpaa2_sg_format
-	dpaa2_sg_get_format(const struct dpaa2_sg_entry *sg)
-{
-	return (enum dpaa2_sg_format)((le16_to_cpu(sg->format_offset)
-				       >> SG_FORMAT_SHIFT) & SG_FORMAT_MASK);
-}
-
-/**
- * dpaa2_sg_set_format() - Set the SG format in SG entry
- * @sg: the given scatter-gathering object
- * @format: the format to be set
- */
-static inline void dpaa2_sg_set_format(struct dpaa2_sg_entry *sg,
-				       enum dpaa2_sg_format format)
-{
-	sg->format_offset &= cpu_to_le16(~(SG_FORMAT_MASK << SG_FORMAT_SHIFT));
-	sg->format_offset |= cpu_to_le16(format << SG_FORMAT_SHIFT);
-}
-
-/**
- * dpaa2_sg_get_bpid() - Get the buffer pool id in SG entry
- * @sg: the given scatter-gathering object
- *
- * Return the bpid.
- */
-static inline u16 dpaa2_sg_get_bpid(const struct dpaa2_sg_entry *sg)
-{
-	return le16_to_cpu(sg->bpid) & SG_BPID_MASK;
-}
-
-/**
- * dpaa2_sg_set_bpid() - Set the buffer pool id in SG entry
- * @sg: the given scatter-gathering object
- * @bpid: the bpid to be set
- */
-static inline void dpaa2_sg_set_bpid(struct dpaa2_sg_entry *sg, u16 bpid)
-{
-	sg->bpid &= cpu_to_le16(~(SG_BPID_MASK));
-	sg->bpid |= cpu_to_le16(bpid);
-}
-
-/**
- * dpaa2_sg_is_final() - Check final bit in SG entry
- * @sg: the given scatter-gathering object
- *
- * Return bool.
- */
-static inline bool dpaa2_sg_is_final(const struct dpaa2_sg_entry *sg)
-{
-	return !!(le16_to_cpu(sg->format_offset) >> SG_FINAL_FLAG_SHIFT);
-}
-
-/**
- * dpaa2_sg_set_final() - Set the final bit in SG entry
- * @sg: the given scatter-gathering object
- * @final: the final boolean to be set
- */
-static inline void dpaa2_sg_set_final(struct dpaa2_sg_entry *sg, bool final)
-{
-	sg->format_offset &= cpu_to_le16((~(SG_FINAL_FLAG_MASK
-					 << SG_FINAL_FLAG_SHIFT)) & 0xFFFF);
-	sg->format_offset |= cpu_to_le16(final << SG_FINAL_FLAG_SHIFT);
-}
-
-#endif /* __FSL_DPAA2_FD_H */
diff --git a/drivers/staging/fsl-mc/include/dpaa2-global.h b/drivers/staging/fsl-mc/include/dpaa2-global.h
deleted file mode 100644
index 9bc0713346a8..000000000000
--- a/drivers/staging/fsl-mc/include/dpaa2-global.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP
- *
- */
-#ifndef __FSL_DPAA2_GLOBAL_H
-#define __FSL_DPAA2_GLOBAL_H
-
-#include <linux/types.h>
-#include <linux/cpumask.h>
-#include "dpaa2-fd.h"
-
-struct dpaa2_dq {
-	union {
-		struct common {
-			u8 verb;
-			u8 reserved[63];
-		} common;
-		struct dq {
-			u8 verb;
-			u8 stat;
-			__le16 seqnum;
-			__le16 oprid;
-			u8 reserved;
-			u8 tok;
-			__le32 fqid;
-			u32 reserved2;
-			__le32 fq_byte_cnt;
-			__le32 fq_frm_cnt;
-			__le64 fqd_ctx;
-			u8 fd[32];
-		} dq;
-		struct scn {
-			u8 verb;
-			u8 stat;
-			u8 state;
-			u8 reserved;
-			__le32 rid_tok;
-			__le64 ctx;
-		} scn;
-	};
-};
-
-/* Parsing frame dequeue results */
-/* FQ empty */
-#define DPAA2_DQ_STAT_FQEMPTY       0x80
-/* FQ held active */
-#define DPAA2_DQ_STAT_HELDACTIVE    0x40
-/* FQ force eligible */
-#define DPAA2_DQ_STAT_FORCEELIGIBLE 0x20
-/* valid frame */
-#define DPAA2_DQ_STAT_VALIDFRAME    0x10
-/* FQ ODP enable */
-#define DPAA2_DQ_STAT_ODPVALID      0x04
-/* volatile dequeue */
-#define DPAA2_DQ_STAT_VOLATILE      0x02
-/* volatile dequeue command is expired */
-#define DPAA2_DQ_STAT_EXPIRED       0x01
-
-#define DQ_FQID_MASK		0x00FFFFFF
-#define DQ_FRAME_COUNT_MASK	0x00FFFFFF
-
-/**
- * dpaa2_dq_flags() - Get the stat field of dequeue response
- * @dq: the dequeue result.
- */
-static inline u32 dpaa2_dq_flags(const struct dpaa2_dq *dq)
-{
-	return dq->dq.stat;
-}
-
-/**
- * dpaa2_dq_is_pull() - Check whether the dq response is from a pull
- *                      command.
- * @dq: the dequeue result
- *
- * Return 1 for volatile(pull) dequeue, 0 for static dequeue.
- */
-static inline int dpaa2_dq_is_pull(const struct dpaa2_dq *dq)
-{
-	return (int)(dpaa2_dq_flags(dq) & DPAA2_DQ_STAT_VOLATILE);
-}
-
-/**
- * dpaa2_dq_is_pull_complete() - Check whether the pull command is completed.
- * @dq: the dequeue result
- *
- * Return boolean.
- */
-static inline bool dpaa2_dq_is_pull_complete(const struct dpaa2_dq *dq)
-{
-	return !!(dpaa2_dq_flags(dq) & DPAA2_DQ_STAT_EXPIRED);
-}
-
-/**
- * dpaa2_dq_seqnum() - Get the seqnum field in dequeue response
- * @dq: the dequeue result
- *
- * seqnum is valid only if VALIDFRAME flag is TRUE
- *
- * Return seqnum.
- */
-static inline u16 dpaa2_dq_seqnum(const struct dpaa2_dq *dq)
-{
-	return le16_to_cpu(dq->dq.seqnum);
-}
-
-/**
- * dpaa2_dq_odpid() - Get the odpid field in dequeue response
- * @dq: the dequeue result
- *
- * odpid is valid only if ODPVALID flag is TRUE.
- *
- * Return odpid.
- */
-static inline u16 dpaa2_dq_odpid(const struct dpaa2_dq *dq)
-{
-	return le16_to_cpu(dq->dq.oprid);
-}
-
-/**
- * dpaa2_dq_fqid() - Get the fqid in dequeue response
- * @dq: the dequeue result
- *
- * Return fqid.
- */
-static inline u32 dpaa2_dq_fqid(const struct dpaa2_dq *dq)
-{
-	return le32_to_cpu(dq->dq.fqid) & DQ_FQID_MASK;
-}
-
-/**
- * dpaa2_dq_byte_count() - Get the byte count in dequeue response
- * @dq: the dequeue result
- *
- * Return the byte count remaining in the FQ.
- */
-static inline u32 dpaa2_dq_byte_count(const struct dpaa2_dq *dq)
-{
-	return le32_to_cpu(dq->dq.fq_byte_cnt);
-}
-
-/**
- * dpaa2_dq_frame_count() - Get the frame count in dequeue response
- * @dq: the dequeue result
- *
- * Return the frame count remaining in the FQ.
- */
-static inline u32 dpaa2_dq_frame_count(const struct dpaa2_dq *dq)
-{
-	return le32_to_cpu(dq->dq.fq_frm_cnt) & DQ_FRAME_COUNT_MASK;
-}
-
-/**
- * dpaa2_dq_fd_ctx() - Get the frame queue context in dequeue response
- * @dq: the dequeue result
- *
- * Return the frame queue context.
- */
-static inline u64 dpaa2_dq_fqd_ctx(const struct dpaa2_dq *dq)
-{
-	return le64_to_cpu(dq->dq.fqd_ctx);
-}
-
-/**
- * dpaa2_dq_fd() - Get the frame descriptor in dequeue response
- * @dq: the dequeue result
- *
- * Return the frame descriptor.
- */
-static inline const struct dpaa2_fd *dpaa2_dq_fd(const struct dpaa2_dq *dq)
-{
-	return (const struct dpaa2_fd *)&dq->dq.fd[0];
-}
-
-#endif /* __FSL_DPAA2_GLOBAL_H */
diff --git a/drivers/staging/fsl-mc/include/dpaa2-io.h b/drivers/staging/fsl-mc/include/dpaa2-io.h
deleted file mode 100644
index ab51e40d11db..000000000000
--- a/drivers/staging/fsl-mc/include/dpaa2-io.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/*
- * Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright NXP
- *
- */
-#ifndef __FSL_DPAA2_IO_H
-#define __FSL_DPAA2_IO_H
-
-#include <linux/types.h>
-#include <linux/cpumask.h>
-#include <linux/irqreturn.h>
-
-#include "dpaa2-fd.h"
-#include "dpaa2-global.h"
-
-struct dpaa2_io;
-struct dpaa2_io_store;
-struct device;
-
-/**
- * DOC: DPIO Service
- *
- * The DPIO service provides APIs for users to interact with the datapath
- * by enqueueing and dequeing frame descriptors.
- *
- * The following set of APIs can be used to enqueue and dequeue frames
- * as well as producing notification callbacks when data is available
- * for dequeue.
- */
-
-#define DPAA2_IO_ANY_CPU	-1
-
-/**
- * struct dpaa2_io_desc - The DPIO descriptor
- * @receives_notifications: Use notificaton mode. Non-zero if the DPIO
- *                  has a channel.
- * @has_8prio:      Set to non-zero for channel with 8 priority WQs.  Ignored
- *                  unless receives_notification is TRUE.
- * @cpu:            The cpu index that at least interrupt handlers will
- *                  execute on.
- * @stash_affinity: The stash affinity for this portal favour 'cpu'
- * @regs_cena:      The cache enabled regs.
- * @regs_cinh:      The cache inhibited regs
- * @dpio_id:        The dpio index
- * @qman_version:   The qman version
- *
- * Describes the attributes and features of the DPIO object.
- */
-struct dpaa2_io_desc {
-	int receives_notifications;
-	int has_8prio;
-	int cpu;
-	void *regs_cena;
-	void __iomem *regs_cinh;
-	int dpio_id;
-	u32 qman_version;
-};
-
-struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc);
-
-void dpaa2_io_down(struct dpaa2_io *d);
-
-irqreturn_t dpaa2_io_irq(struct dpaa2_io *obj);
-
-struct dpaa2_io *dpaa2_io_service_select(int cpu);
-
-/**
- * struct dpaa2_io_notification_ctx - The DPIO notification context structure
- * @cb:           The callback to be invoked when the notification arrives
- * @is_cdan:      Zero for FQDAN, non-zero for CDAN
- * @id:           FQID or channel ID, needed for rearm
- * @desired_cpu:  The cpu on which the notifications will show up. Use
- *                DPAA2_IO_ANY_CPU if don't care
- * @dpio_id:      The dpio index
- * @qman64:       The 64-bit context value shows up in the FQDAN/CDAN.
- * @node:         The list node
- * @dpio_private: The dpio object internal to dpio_service
- *
- * Used when a FQDAN/CDAN registration is made by drivers.
- */
-struct dpaa2_io_notification_ctx {
-	void (*cb)(struct dpaa2_io_notification_ctx *ctx);
-	int is_cdan;
-	u32 id;
-	int desired_cpu;
-	int dpio_id;
-	u64 qman64;
-	struct list_head node;
-	void *dpio_private;
-};
-
-int dpaa2_io_service_register(struct dpaa2_io *service,
-			      struct dpaa2_io_notification_ctx *ctx);
-void dpaa2_io_service_deregister(struct dpaa2_io *service,
-				 struct dpaa2_io_notification_ctx *ctx);
-int dpaa2_io_service_rearm(struct dpaa2_io *service,
-			   struct dpaa2_io_notification_ctx *ctx);
-
-int dpaa2_io_service_pull_channel(struct dpaa2_io *d, u32 channelid,
-				  struct dpaa2_io_store *s);
-
-int dpaa2_io_service_enqueue_qd(struct dpaa2_io *d, u32 qdid, u8 prio,
-				u16 qdbin, const struct dpaa2_fd *fd);
-int dpaa2_io_service_release(struct dpaa2_io *d, u32 bpid,
-			     const u64 *buffers, unsigned int num_buffers);
-int dpaa2_io_service_acquire(struct dpaa2_io *d, u32 bpid,
-			     u64 *buffers, unsigned int num_buffers);
-
-struct dpaa2_io_store *dpaa2_io_store_create(unsigned int max_frames,
-					     struct device *dev);
-void dpaa2_io_store_destroy(struct dpaa2_io_store *s);
-struct dpaa2_dq *dpaa2_io_store_next(struct dpaa2_io_store *s, int *is_last);
-
-#endif /* __FSL_DPAA2_IO_H */
diff --git a/include/soc/fsl/dpaa2-fd.h b/include/soc/fsl/dpaa2-fd.h
new file mode 100644
index 000000000000..2576abaa7779
--- /dev/null
+++ b/include/soc/fsl/dpaa2-fd.h
@@ -0,0 +1,438 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ *
+ */
+#ifndef __FSL_DPAA2_FD_H
+#define __FSL_DPAA2_FD_H
+
+#include <linux/kernel.h>
+
+/**
+ * DOC: DPAA2 FD - Frame Descriptor APIs for DPAA2
+ *
+ * Frame Descriptors (FDs) are used to describe frame data in the DPAA2.
+ * Frames can be enqueued and dequeued to Frame Queues (FQs) which are consumed
+ * by the various DPAA accelerators (WRIOP, SEC, PME, DCE)
+ *
+ * There are three types of frames: single, scatter gather, and frame lists.
+ *
+ * The set of APIs in this file must be used to create, manipulate and
+ * query Frame Descriptors.
+ */
+
+/**
+ * struct dpaa2_fd - Struct describing FDs
+ * @words:         for easier/faster copying the whole FD structure
+ * @addr:          address in the FD
+ * @len:           length in the FD
+ * @bpid:          buffer pool ID
+ * @format_offset: format, offset, and short-length fields
+ * @frc:           frame context
+ * @ctrl:          control bits...including dd, sc, va, err, etc
+ * @flc:           flow context address
+ *
+ * This structure represents the basic Frame Descriptor used in the system.
+ */
+struct dpaa2_fd {
+	union {
+		u32 words[8];
+		struct dpaa2_fd_simple {
+			__le64 addr;
+			__le32 len;
+			__le16 bpid;
+			__le16 format_offset;
+			__le32 frc;
+			__le32 ctrl;
+			__le64 flc;
+		} simple;
+	};
+};
+
+#define FD_SHORT_LEN_FLAG_MASK	0x1
+#define FD_SHORT_LEN_FLAG_SHIFT	14
+#define FD_SHORT_LEN_MASK	0x3FFFF
+#define FD_OFFSET_MASK		0x0FFF
+#define FD_FORMAT_MASK		0x3
+#define FD_FORMAT_SHIFT		12
+#define FD_BPID_MASK		0x3FFF
+#define SG_SHORT_LEN_FLAG_MASK	0x1
+#define SG_SHORT_LEN_FLAG_SHIFT	14
+#define SG_SHORT_LEN_MASK	0x1FFFF
+#define SG_OFFSET_MASK		0x0FFF
+#define SG_FORMAT_MASK		0x3
+#define SG_FORMAT_SHIFT		12
+#define SG_BPID_MASK		0x3FFF
+#define SG_FINAL_FLAG_MASK	0x1
+#define SG_FINAL_FLAG_SHIFT	15
+
+/* Error bits in FD CTRL */
+#define FD_CTRL_ERR_MASK	0x000000FF
+#define FD_CTRL_UFD		0x00000004
+#define FD_CTRL_SBE		0x00000008
+#define FD_CTRL_FLC		0x00000010
+#define FD_CTRL_FSE		0x00000020
+#define FD_CTRL_FAERR		0x00000040
+
+/* Annotation bits in FD CTRL */
+#define FD_CTRL_PTA		0x00800000
+#define FD_CTRL_PTV1		0x00400000
+
+enum dpaa2_fd_format {
+	dpaa2_fd_single = 0,
+	dpaa2_fd_list,
+	dpaa2_fd_sg
+};
+
+/**
+ * dpaa2_fd_get_addr() - get the addr field of frame descriptor
+ * @fd: the given frame descriptor
+ *
+ * Return the address in the frame descriptor.
+ */
+static inline dma_addr_t dpaa2_fd_get_addr(const struct dpaa2_fd *fd)
+{
+	return (dma_addr_t)le64_to_cpu(fd->simple.addr);
+}
+
+/**
+ * dpaa2_fd_set_addr() - Set the addr field of frame descriptor
+ * @fd: the given frame descriptor
+ * @addr: the address needs to be set in frame descriptor
+ */
+static inline void dpaa2_fd_set_addr(struct dpaa2_fd *fd, dma_addr_t addr)
+{
+	fd->simple.addr = cpu_to_le64(addr);
+}
+
+/**
+ * dpaa2_fd_get_frc() - Get the frame context in the frame descriptor
+ * @fd: the given frame descriptor
+ *
+ * Return the frame context field in the frame descriptor.
+ */
+static inline u32 dpaa2_fd_get_frc(const struct dpaa2_fd *fd)
+{
+	return le32_to_cpu(fd->simple.frc);
+}
+
+/**
+ * dpaa2_fd_set_frc() - Set the frame context in the frame descriptor
+ * @fd: the given frame descriptor
+ * @frc: the frame context needs to be set in frame descriptor
+ */
+static inline void dpaa2_fd_set_frc(struct dpaa2_fd *fd, u32 frc)
+{
+	fd->simple.frc = cpu_to_le32(frc);
+}
+
+/**
+ * dpaa2_fd_get_ctrl() - Get the control bits in the frame descriptor
+ * @fd: the given frame descriptor
+ *
+ * Return the control bits field in the frame descriptor.
+ */
+static inline u32 dpaa2_fd_get_ctrl(const struct dpaa2_fd *fd)
+{
+	return le32_to_cpu(fd->simple.ctrl);
+}
+
+/**
+ * dpaa2_fd_set_ctrl() - Set the control bits in the frame descriptor
+ * @fd: the given frame descriptor
+ * @ctrl: the control bits to be set in the frame descriptor
+ */
+static inline void dpaa2_fd_set_ctrl(struct dpaa2_fd *fd, u32 ctrl)
+{
+	fd->simple.ctrl = cpu_to_le32(ctrl);
+}
+
+/**
+ * dpaa2_fd_get_flc() - Get the flow context in the frame descriptor
+ * @fd: the given frame descriptor
+ *
+ * Return the flow context in the frame descriptor.
+ */
+static inline dma_addr_t dpaa2_fd_get_flc(const struct dpaa2_fd *fd)
+{
+	return (dma_addr_t)le64_to_cpu(fd->simple.flc);
+}
+
+/**
+ * dpaa2_fd_set_flc() - Set the flow context field of frame descriptor
+ * @fd: the given frame descriptor
+ * @flc_addr: the flow context needs to be set in frame descriptor
+ */
+static inline void dpaa2_fd_set_flc(struct dpaa2_fd *fd,  dma_addr_t flc_addr)
+{
+	fd->simple.flc = cpu_to_le64(flc_addr);
+}
+
+static inline bool dpaa2_fd_short_len(const struct dpaa2_fd *fd)
+{
+	return !!((le16_to_cpu(fd->simple.format_offset) >>
+		  FD_SHORT_LEN_FLAG_SHIFT) & FD_SHORT_LEN_FLAG_MASK);
+}
+
+/**
+ * dpaa2_fd_get_len() - Get the length in the frame descriptor
+ * @fd: the given frame descriptor
+ *
+ * Return the length field in the frame descriptor.
+ */
+static inline u32 dpaa2_fd_get_len(const struct dpaa2_fd *fd)
+{
+	if (dpaa2_fd_short_len(fd))
+		return le32_to_cpu(fd->simple.len) & FD_SHORT_LEN_MASK;
+
+	return le32_to_cpu(fd->simple.len);
+}
+
+/**
+ * dpaa2_fd_set_len() - Set the length field of frame descriptor
+ * @fd: the given frame descriptor
+ * @len: the length needs to be set in frame descriptor
+ */
+static inline void dpaa2_fd_set_len(struct dpaa2_fd *fd, u32 len)
+{
+	fd->simple.len = cpu_to_le32(len);
+}
+
+/**
+ * dpaa2_fd_get_offset() - Get the offset field in the frame descriptor
+ * @fd: the given frame descriptor
+ *
+ * Return the offset.
+ */
+static inline uint16_t dpaa2_fd_get_offset(const struct dpaa2_fd *fd)
+{
+	return le16_to_cpu(fd->simple.format_offset) & FD_OFFSET_MASK;
+}
+
+/**
+ * dpaa2_fd_set_offset() - Set the offset field of frame descriptor
+ * @fd: the given frame descriptor
+ * @offset: the offset needs to be set in frame descriptor
+ */
+static inline void dpaa2_fd_set_offset(struct dpaa2_fd *fd, uint16_t offset)
+{
+	fd->simple.format_offset &= cpu_to_le16(~FD_OFFSET_MASK);
+	fd->simple.format_offset |= cpu_to_le16(offset);
+}
+
+/**
+ * dpaa2_fd_get_format() - Get the format field in the frame descriptor
+ * @fd: the given frame descriptor
+ *
+ * Return the format.
+ */
+static inline enum dpaa2_fd_format dpaa2_fd_get_format(
+						const struct dpaa2_fd *fd)
+{
+	return (enum dpaa2_fd_format)((le16_to_cpu(fd->simple.format_offset)
+				      >> FD_FORMAT_SHIFT) & FD_FORMAT_MASK);
+}
+
+/**
+ * dpaa2_fd_set_format() - Set the format field of frame descriptor
+ * @fd: the given frame descriptor
+ * @format: the format needs to be set in frame descriptor
+ */
+static inline void dpaa2_fd_set_format(struct dpaa2_fd *fd,
+				       enum dpaa2_fd_format format)
+{
+	fd->simple.format_offset &=
+		cpu_to_le16(~(FD_FORMAT_MASK << FD_FORMAT_SHIFT));
+	fd->simple.format_offset |= cpu_to_le16(format << FD_FORMAT_SHIFT);
+}
+
+/**
+ * dpaa2_fd_get_bpid() - Get the bpid field in the frame descriptor
+ * @fd: the given frame descriptor
+ *
+ * Return the buffer pool id.
+ */
+static inline uint16_t dpaa2_fd_get_bpid(const struct dpaa2_fd *fd)
+{
+	return le16_to_cpu(fd->simple.bpid) & FD_BPID_MASK;
+}
+
+/**
+ * dpaa2_fd_set_bpid() - Set the bpid field of frame descriptor
+ * @fd: the given frame descriptor
+ * @bpid: buffer pool id to be set
+ */
+static inline void dpaa2_fd_set_bpid(struct dpaa2_fd *fd, uint16_t bpid)
+{
+	fd->simple.bpid &= cpu_to_le16(~(FD_BPID_MASK));
+	fd->simple.bpid |= cpu_to_le16(bpid);
+}
+
+/**
+ * struct dpaa2_sg_entry - the scatter-gathering structure
+ * @addr: address of the sg entry
+ * @len: length in this sg entry
+ * @bpid: buffer pool id
+ * @format_offset: format and offset fields
+ */
+struct dpaa2_sg_entry {
+	__le64 addr;
+	__le32 len;
+	__le16 bpid;
+	__le16 format_offset;
+};
+
+enum dpaa2_sg_format {
+	dpaa2_sg_single = 0,
+	dpaa2_sg_frame_data,
+	dpaa2_sg_sgt_ext
+};
+
+/* Accessors for SG entry fields */
+
+/**
+ * dpaa2_sg_get_addr() - Get the address from SG entry
+ * @sg: the given scatter-gathering object
+ *
+ * Return the address.
+ */
+static inline dma_addr_t dpaa2_sg_get_addr(const struct dpaa2_sg_entry *sg)
+{
+	return (dma_addr_t)le64_to_cpu(sg->addr);
+}
+
+/**
+ * dpaa2_sg_set_addr() - Set the address in SG entry
+ * @sg: the given scatter-gathering object
+ * @addr: the address to be set
+ */
+static inline void dpaa2_sg_set_addr(struct dpaa2_sg_entry *sg, dma_addr_t addr)
+{
+	sg->addr = cpu_to_le64(addr);
+}
+
+static inline bool dpaa2_sg_short_len(const struct dpaa2_sg_entry *sg)
+{
+	return !!((le16_to_cpu(sg->format_offset) >> SG_SHORT_LEN_FLAG_SHIFT)
+		& SG_SHORT_LEN_FLAG_MASK);
+}
+
+/**
+ * dpaa2_sg_get_len() - Get the length in SG entry
+ * @sg: the given scatter-gathering object
+ *
+ * Return the length.
+ */
+static inline u32 dpaa2_sg_get_len(const struct dpaa2_sg_entry *sg)
+{
+	if (dpaa2_sg_short_len(sg))
+		return le32_to_cpu(sg->len) & SG_SHORT_LEN_MASK;
+
+	return le32_to_cpu(sg->len);
+}
+
+/**
+ * dpaa2_sg_set_len() - Set the length in SG entry
+ * @sg: the given scatter-gathering object
+ * @len: the length to be set
+ */
+static inline void dpaa2_sg_set_len(struct dpaa2_sg_entry *sg, u32 len)
+{
+	sg->len = cpu_to_le32(len);
+}
+
+/**
+ * dpaa2_sg_get_offset() - Get the offset in SG entry
+ * @sg: the given scatter-gathering object
+ *
+ * Return the offset.
+ */
+static inline u16 dpaa2_sg_get_offset(const struct dpaa2_sg_entry *sg)
+{
+	return le16_to_cpu(sg->format_offset) & SG_OFFSET_MASK;
+}
+
+/**
+ * dpaa2_sg_set_offset() - Set the offset in SG entry
+ * @sg: the given scatter-gathering object
+ * @offset: the offset to be set
+ */
+static inline void dpaa2_sg_set_offset(struct dpaa2_sg_entry *sg,
+				       u16 offset)
+{
+	sg->format_offset &= cpu_to_le16(~SG_OFFSET_MASK);
+	sg->format_offset |= cpu_to_le16(offset);
+}
+
+/**
+ * dpaa2_sg_get_format() - Get the SG format in SG entry
+ * @sg: the given scatter-gathering object
+ *
+ * Return the format.
+ */
+static inline enum dpaa2_sg_format
+	dpaa2_sg_get_format(const struct dpaa2_sg_entry *sg)
+{
+	return (enum dpaa2_sg_format)((le16_to_cpu(sg->format_offset)
+				       >> SG_FORMAT_SHIFT) & SG_FORMAT_MASK);
+}
+
+/**
+ * dpaa2_sg_set_format() - Set the SG format in SG entry
+ * @sg: the given scatter-gathering object
+ * @format: the format to be set
+ */
+static inline void dpaa2_sg_set_format(struct dpaa2_sg_entry *sg,
+				       enum dpaa2_sg_format format)
+{
+	sg->format_offset &= cpu_to_le16(~(SG_FORMAT_MASK << SG_FORMAT_SHIFT));
+	sg->format_offset |= cpu_to_le16(format << SG_FORMAT_SHIFT);
+}
+
+/**
+ * dpaa2_sg_get_bpid() - Get the buffer pool id in SG entry
+ * @sg: the given scatter-gathering object
+ *
+ * Return the bpid.
+ */
+static inline u16 dpaa2_sg_get_bpid(const struct dpaa2_sg_entry *sg)
+{
+	return le16_to_cpu(sg->bpid) & SG_BPID_MASK;
+}
+
+/**
+ * dpaa2_sg_set_bpid() - Set the buffer pool id in SG entry
+ * @sg: the given scatter-gathering object
+ * @bpid: the bpid to be set
+ */
+static inline void dpaa2_sg_set_bpid(struct dpaa2_sg_entry *sg, u16 bpid)
+{
+	sg->bpid &= cpu_to_le16(~(SG_BPID_MASK));
+	sg->bpid |= cpu_to_le16(bpid);
+}
+
+/**
+ * dpaa2_sg_is_final() - Check final bit in SG entry
+ * @sg: the given scatter-gathering object
+ *
+ * Return bool.
+ */
+static inline bool dpaa2_sg_is_final(const struct dpaa2_sg_entry *sg)
+{
+	return !!(le16_to_cpu(sg->format_offset) >> SG_FINAL_FLAG_SHIFT);
+}
+
+/**
+ * dpaa2_sg_set_final() - Set the final bit in SG entry
+ * @sg: the given scatter-gathering object
+ * @final: the final boolean to be set
+ */
+static inline void dpaa2_sg_set_final(struct dpaa2_sg_entry *sg, bool final)
+{
+	sg->format_offset &= cpu_to_le16((~(SG_FINAL_FLAG_MASK
+					 << SG_FINAL_FLAG_SHIFT)) & 0xFFFF);
+	sg->format_offset |= cpu_to_le16(final << SG_FINAL_FLAG_SHIFT);
+}
+
+#endif /* __FSL_DPAA2_FD_H */
diff --git a/include/soc/fsl/dpaa2-global.h b/include/soc/fsl/dpaa2-global.h
new file mode 100644
index 000000000000..9bc0713346a8
--- /dev/null
+++ b/include/soc/fsl/dpaa2-global.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright 2016 NXP
+ *
+ */
+#ifndef __FSL_DPAA2_GLOBAL_H
+#define __FSL_DPAA2_GLOBAL_H
+
+#include <linux/types.h>
+#include <linux/cpumask.h>
+#include "dpaa2-fd.h"
+
+struct dpaa2_dq {
+	union {
+		struct common {
+			u8 verb;
+			u8 reserved[63];
+		} common;
+		struct dq {
+			u8 verb;
+			u8 stat;
+			__le16 seqnum;
+			__le16 oprid;
+			u8 reserved;
+			u8 tok;
+			__le32 fqid;
+			u32 reserved2;
+			__le32 fq_byte_cnt;
+			__le32 fq_frm_cnt;
+			__le64 fqd_ctx;
+			u8 fd[32];
+		} dq;
+		struct scn {
+			u8 verb;
+			u8 stat;
+			u8 state;
+			u8 reserved;
+			__le32 rid_tok;
+			__le64 ctx;
+		} scn;
+	};
+};
+
+/* Parsing frame dequeue results */
+/* FQ empty */
+#define DPAA2_DQ_STAT_FQEMPTY       0x80
+/* FQ held active */
+#define DPAA2_DQ_STAT_HELDACTIVE    0x40
+/* FQ force eligible */
+#define DPAA2_DQ_STAT_FORCEELIGIBLE 0x20
+/* valid frame */
+#define DPAA2_DQ_STAT_VALIDFRAME    0x10
+/* FQ ODP enable */
+#define DPAA2_DQ_STAT_ODPVALID      0x04
+/* volatile dequeue */
+#define DPAA2_DQ_STAT_VOLATILE      0x02
+/* volatile dequeue command is expired */
+#define DPAA2_DQ_STAT_EXPIRED       0x01
+
+#define DQ_FQID_MASK		0x00FFFFFF
+#define DQ_FRAME_COUNT_MASK	0x00FFFFFF
+
+/**
+ * dpaa2_dq_flags() - Get the stat field of dequeue response
+ * @dq: the dequeue result.
+ */
+static inline u32 dpaa2_dq_flags(const struct dpaa2_dq *dq)
+{
+	return dq->dq.stat;
+}
+
+/**
+ * dpaa2_dq_is_pull() - Check whether the dq response is from a pull
+ *                      command.
+ * @dq: the dequeue result
+ *
+ * Return 1 for volatile(pull) dequeue, 0 for static dequeue.
+ */
+static inline int dpaa2_dq_is_pull(const struct dpaa2_dq *dq)
+{
+	return (int)(dpaa2_dq_flags(dq) & DPAA2_DQ_STAT_VOLATILE);
+}
+
+/**
+ * dpaa2_dq_is_pull_complete() - Check whether the pull command is completed.
+ * @dq: the dequeue result
+ *
+ * Return boolean.
+ */
+static inline bool dpaa2_dq_is_pull_complete(const struct dpaa2_dq *dq)
+{
+	return !!(dpaa2_dq_flags(dq) & DPAA2_DQ_STAT_EXPIRED);
+}
+
+/**
+ * dpaa2_dq_seqnum() - Get the seqnum field in dequeue response
+ * @dq: the dequeue result
+ *
+ * seqnum is valid only if VALIDFRAME flag is TRUE
+ *
+ * Return seqnum.
+ */
+static inline u16 dpaa2_dq_seqnum(const struct dpaa2_dq *dq)
+{
+	return le16_to_cpu(dq->dq.seqnum);
+}
+
+/**
+ * dpaa2_dq_odpid() - Get the odpid field in dequeue response
+ * @dq: the dequeue result
+ *
+ * odpid is valid only if ODPVALID flag is TRUE.
+ *
+ * Return odpid.
+ */
+static inline u16 dpaa2_dq_odpid(const struct dpaa2_dq *dq)
+{
+	return le16_to_cpu(dq->dq.oprid);
+}
+
+/**
+ * dpaa2_dq_fqid() - Get the fqid in dequeue response
+ * @dq: the dequeue result
+ *
+ * Return fqid.
+ */
+static inline u32 dpaa2_dq_fqid(const struct dpaa2_dq *dq)
+{
+	return le32_to_cpu(dq->dq.fqid) & DQ_FQID_MASK;
+}
+
+/**
+ * dpaa2_dq_byte_count() - Get the byte count in dequeue response
+ * @dq: the dequeue result
+ *
+ * Return the byte count remaining in the FQ.
+ */
+static inline u32 dpaa2_dq_byte_count(const struct dpaa2_dq *dq)
+{
+	return le32_to_cpu(dq->dq.fq_byte_cnt);
+}
+
+/**
+ * dpaa2_dq_frame_count() - Get the frame count in dequeue response
+ * @dq: the dequeue result
+ *
+ * Return the frame count remaining in the FQ.
+ */
+static inline u32 dpaa2_dq_frame_count(const struct dpaa2_dq *dq)
+{
+	return le32_to_cpu(dq->dq.fq_frm_cnt) & DQ_FRAME_COUNT_MASK;
+}
+
+/**
+ * dpaa2_dq_fd_ctx() - Get the frame queue context in dequeue response
+ * @dq: the dequeue result
+ *
+ * Return the frame queue context.
+ */
+static inline u64 dpaa2_dq_fqd_ctx(const struct dpaa2_dq *dq)
+{
+	return le64_to_cpu(dq->dq.fqd_ctx);
+}
+
+/**
+ * dpaa2_dq_fd() - Get the frame descriptor in dequeue response
+ * @dq: the dequeue result
+ *
+ * Return the frame descriptor.
+ */
+static inline const struct dpaa2_fd *dpaa2_dq_fd(const struct dpaa2_dq *dq)
+{
+	return (const struct dpaa2_fd *)&dq->dq.fd[0];
+}
+
+#endif /* __FSL_DPAA2_GLOBAL_H */
diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h
new file mode 100644
index 000000000000..ab51e40d11db
--- /dev/null
+++ b/include/soc/fsl/dpaa2-io.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2014-2016 Freescale Semiconductor Inc.
+ * Copyright NXP
+ *
+ */
+#ifndef __FSL_DPAA2_IO_H
+#define __FSL_DPAA2_IO_H
+
+#include <linux/types.h>
+#include <linux/cpumask.h>
+#include <linux/irqreturn.h>
+
+#include "dpaa2-fd.h"
+#include "dpaa2-global.h"
+
+struct dpaa2_io;
+struct dpaa2_io_store;
+struct device;
+
+/**
+ * DOC: DPIO Service
+ *
+ * The DPIO service provides APIs for users to interact with the datapath
+ * by enqueueing and dequeing frame descriptors.
+ *
+ * The following set of APIs can be used to enqueue and dequeue frames
+ * as well as producing notification callbacks when data is available
+ * for dequeue.
+ */
+
+#define DPAA2_IO_ANY_CPU	-1
+
+/**
+ * struct dpaa2_io_desc - The DPIO descriptor
+ * @receives_notifications: Use notificaton mode. Non-zero if the DPIO
+ *                  has a channel.
+ * @has_8prio:      Set to non-zero for channel with 8 priority WQs.  Ignored
+ *                  unless receives_notification is TRUE.
+ * @cpu:            The cpu index that at least interrupt handlers will
+ *                  execute on.
+ * @stash_affinity: The stash affinity for this portal favour 'cpu'
+ * @regs_cena:      The cache enabled regs.
+ * @regs_cinh:      The cache inhibited regs
+ * @dpio_id:        The dpio index
+ * @qman_version:   The qman version
+ *
+ * Describes the attributes and features of the DPIO object.
+ */
+struct dpaa2_io_desc {
+	int receives_notifications;
+	int has_8prio;
+	int cpu;
+	void *regs_cena;
+	void __iomem *regs_cinh;
+	int dpio_id;
+	u32 qman_version;
+};
+
+struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc);
+
+void dpaa2_io_down(struct dpaa2_io *d);
+
+irqreturn_t dpaa2_io_irq(struct dpaa2_io *obj);
+
+struct dpaa2_io *dpaa2_io_service_select(int cpu);
+
+/**
+ * struct dpaa2_io_notification_ctx - The DPIO notification context structure
+ * @cb:           The callback to be invoked when the notification arrives
+ * @is_cdan:      Zero for FQDAN, non-zero for CDAN
+ * @id:           FQID or channel ID, needed for rearm
+ * @desired_cpu:  The cpu on which the notifications will show up. Use
+ *                DPAA2_IO_ANY_CPU if don't care
+ * @dpio_id:      The dpio index
+ * @qman64:       The 64-bit context value shows up in the FQDAN/CDAN.
+ * @node:         The list node
+ * @dpio_private: The dpio object internal to dpio_service
+ *
+ * Used when a FQDAN/CDAN registration is made by drivers.
+ */
+struct dpaa2_io_notification_ctx {
+	void (*cb)(struct dpaa2_io_notification_ctx *ctx);
+	int is_cdan;
+	u32 id;
+	int desired_cpu;
+	int dpio_id;
+	u64 qman64;
+	struct list_head node;
+	void *dpio_private;
+};
+
+int dpaa2_io_service_register(struct dpaa2_io *service,
+			      struct dpaa2_io_notification_ctx *ctx);
+void dpaa2_io_service_deregister(struct dpaa2_io *service,
+				 struct dpaa2_io_notification_ctx *ctx);
+int dpaa2_io_service_rearm(struct dpaa2_io *service,
+			   struct dpaa2_io_notification_ctx *ctx);
+
+int dpaa2_io_service_pull_channel(struct dpaa2_io *d, u32 channelid,
+				  struct dpaa2_io_store *s);
+
+int dpaa2_io_service_enqueue_qd(struct dpaa2_io *d, u32 qdid, u8 prio,
+				u16 qdbin, const struct dpaa2_fd *fd);
+int dpaa2_io_service_release(struct dpaa2_io *d, u32 bpid,
+			     const u64 *buffers, unsigned int num_buffers);
+int dpaa2_io_service_acquire(struct dpaa2_io *d, u32 bpid,
+			     u64 *buffers, unsigned int num_buffers);
+
+struct dpaa2_io_store *dpaa2_io_store_create(unsigned int max_frames,
+					     struct device *dev);
+void dpaa2_io_store_destroy(struct dpaa2_io_store *s);
+struct dpaa2_dq *dpaa2_io_store_next(struct dpaa2_io_store *s, int *is_last);
+
+#endif /* __FSL_DPAA2_IO_H */
-- 
cgit v1.2.3


From 56ab8cdbc1438507d79085fcc7e511327d84aeb8 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Thu, 17 May 2018 10:30:16 -0400
Subject: media: v4l: Add support for STD ioctls on subdev nodes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no way to control the standard of subdevices which are part of
a media device. The ioctls which exists all target video devices
explicitly and the idea is that the video device should talk to the
subdevice. For subdevices part of a media graph this is not possible and
the standard must be controlled on the subdev device directly.

Add four new ioctls to be able to directly interact with subdevices and
control the video standard; VIDIOC_SUBDEV_ENUMSTD, VIDIOC_SUBDEV_G_STD,
VIDIOC_SUBDEV_S_STD and VIDIOC_SUBDEV_QUERYSTD.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/vidioc-enumstd.rst  | 11 +++++++----
 Documentation/media/uapi/v4l/vidioc-g-std.rst    | 14 ++++++++++----
 Documentation/media/uapi/v4l/vidioc-querystd.rst | 11 +++++++----
 drivers/media/v4l2-core/v4l2-subdev.c            | 22 ++++++++++++++++++++++
 include/uapi/linux/v4l2-subdev.h                 |  4 ++++
 5 files changed, 50 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/vidioc-enumstd.rst b/Documentation/media/uapi/v4l/vidioc-enumstd.rst
index b7fda29f46a1..2644a62acd4b 100644
--- a/Documentation/media/uapi/v4l/vidioc-enumstd.rst
+++ b/Documentation/media/uapi/v4l/vidioc-enumstd.rst
@@ -2,14 +2,14 @@
 
 .. _VIDIOC_ENUMSTD:
 
-********************
-ioctl VIDIOC_ENUMSTD
-********************
+*******************************************
+ioctl VIDIOC_ENUMSTD, VIDIOC_SUBDEV_ENUMSTD
+*******************************************
 
 Name
 ====
 
-VIDIOC_ENUMSTD - Enumerate supported video standards
+VIDIOC_ENUMSTD - VIDIOC_SUBDEV_ENUMSTD - Enumerate supported video standards
 
 
 Synopsis
@@ -18,6 +18,9 @@ Synopsis
 .. c:function:: int ioctl( int fd, VIDIOC_ENUMSTD, struct v4l2_standard *argp )
     :name: VIDIOC_ENUMSTD
 
+.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_ENUMSTD, struct v4l2_standard *argp )
+    :name: VIDIOC_SUBDEV_ENUMSTD
+
 
 Arguments
 =========
diff --git a/Documentation/media/uapi/v4l/vidioc-g-std.rst b/Documentation/media/uapi/v4l/vidioc-g-std.rst
index 90791ab51a53..8d94f0404df2 100644
--- a/Documentation/media/uapi/v4l/vidioc-g-std.rst
+++ b/Documentation/media/uapi/v4l/vidioc-g-std.rst
@@ -2,14 +2,14 @@
 
 .. _VIDIOC_G_STD:
 
-********************************
-ioctl VIDIOC_G_STD, VIDIOC_S_STD
-********************************
+**************************************************************************
+ioctl VIDIOC_G_STD, VIDIOC_S_STD, VIDIOC_SUBDEV_G_STD, VIDIOC_SUBDEV_S_STD
+**************************************************************************
 
 Name
 ====
 
-VIDIOC_G_STD - VIDIOC_S_STD - Query or select the video standard of the current input
+VIDIOC_G_STD - VIDIOC_S_STD - VIDIOC_SUBDEV_G_STD - VIDIOC_SUBDEV_S_STD - Query or select the video standard of the current input
 
 
 Synopsis
@@ -21,6 +21,12 @@ Synopsis
 .. c:function:: int ioctl( int fd, VIDIOC_S_STD, const v4l2_std_id *argp )
     :name: VIDIOC_S_STD
 
+.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_G_STD, v4l2_std_id *argp )
+    :name: VIDIOC_SUBDEV_G_STD
+
+.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_S_STD, const v4l2_std_id *argp )
+    :name: VIDIOC_SUBDEV_S_STD
+
 
 Arguments
 =========
diff --git a/Documentation/media/uapi/v4l/vidioc-querystd.rst b/Documentation/media/uapi/v4l/vidioc-querystd.rst
index cf40bca19b9f..a8385cc74818 100644
--- a/Documentation/media/uapi/v4l/vidioc-querystd.rst
+++ b/Documentation/media/uapi/v4l/vidioc-querystd.rst
@@ -2,14 +2,14 @@
 
 .. _VIDIOC_QUERYSTD:
 
-*********************
-ioctl VIDIOC_QUERYSTD
-*********************
+*********************************************
+ioctl VIDIOC_QUERYSTD, VIDIOC_SUBDEV_QUERYSTD
+*********************************************
 
 Name
 ====
 
-VIDIOC_QUERYSTD - Sense the video standard received by the current input
+VIDIOC_QUERYSTD - VIDIOC_SUBDEV_QUERYSTD - Sense the video standard received by the current input
 
 
 Synopsis
@@ -18,6 +18,9 @@ Synopsis
 .. c:function:: int ioctl( int fd, VIDIOC_QUERYSTD, v4l2_std_id *argp )
     :name: VIDIOC_QUERYSTD
 
+.. c:function:: int ioctl( int fd, VIDIOC_SUBDEV_QUERYSTD, v4l2_std_id *argp )
+    :name: VIDIOC_SUBDEV_QUERYSTD
+
 
 Arguments
 =========
diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c
index 6a7f7f75dfd7..2b63fa6b6fc9 100644
--- a/drivers/media/v4l2-core/v4l2-subdev.c
+++ b/drivers/media/v4l2-core/v4l2-subdev.c
@@ -494,6 +494,28 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 	case VIDIOC_SUBDEV_S_DV_TIMINGS:
 		return v4l2_subdev_call(sd, video, s_dv_timings, arg);
+
+	case VIDIOC_SUBDEV_G_STD:
+		return v4l2_subdev_call(sd, video, g_std, arg);
+
+	case VIDIOC_SUBDEV_S_STD: {
+		v4l2_std_id *std = arg;
+
+		return v4l2_subdev_call(sd, video, s_std, *std);
+	}
+
+	case VIDIOC_SUBDEV_ENUMSTD: {
+		struct v4l2_standard *p = arg;
+		v4l2_std_id id;
+
+		if (v4l2_subdev_call(sd, video, g_tvnorms, &id))
+			return -EINVAL;
+
+		return v4l_video_std_enumstd(p, id);
+	}
+
+	case VIDIOC_SUBDEV_QUERYSTD:
+		return v4l2_subdev_call(sd, video, querystd, arg);
 #endif
 	default:
 		return v4l2_subdev_call(sd, core, ioctl, cmd, arg);
diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h
index c95a53e6743c..03970ce30741 100644
--- a/include/uapi/linux/v4l2-subdev.h
+++ b/include/uapi/linux/v4l2-subdev.h
@@ -170,8 +170,12 @@ struct v4l2_subdev_selection {
 #define VIDIOC_SUBDEV_G_SELECTION		_IOWR('V', 61, struct v4l2_subdev_selection)
 #define VIDIOC_SUBDEV_S_SELECTION		_IOWR('V', 62, struct v4l2_subdev_selection)
 /* The following ioctls are identical to the ioctls in videodev2.h */
+#define VIDIOC_SUBDEV_G_STD			_IOR('V', 23, v4l2_std_id)
+#define VIDIOC_SUBDEV_S_STD			_IOW('V', 24, v4l2_std_id)
+#define VIDIOC_SUBDEV_ENUMSTD			_IOWR('V', 25, struct v4l2_standard)
 #define VIDIOC_SUBDEV_G_EDID			_IOWR('V', 40, struct v4l2_edid)
 #define VIDIOC_SUBDEV_S_EDID			_IOWR('V', 41, struct v4l2_edid)
+#define VIDIOC_SUBDEV_QUERYSTD			_IOR('V', 63, v4l2_std_id)
 #define VIDIOC_SUBDEV_S_DV_TIMINGS		_IOWR('V', 87, struct v4l2_dv_timings)
 #define VIDIOC_SUBDEV_G_DV_TIMINGS		_IOWR('V', 88, struct v4l2_dv_timings)
 #define VIDIOC_SUBDEV_ENUM_DV_TIMINGS		_IOWR('V', 98, struct v4l2_enum_dv_timings)
-- 
cgit v1.2.3


From b8f8c8eb408b36ad55dd41a616b3f51998880fb6 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 21 Jul 2018 15:48:47 +0200
Subject: net: phy: add GBit master / slave error detection

Certain PHY's have issues when operating in GBit slave mode and can
be forced to master mode. Examples are RTL8211C, also the Micrel PHY
driver has a DT setting to force master mode.
If two such chips are link partners the autonegotiation will fail.
Standard defines a self-clearing on read, latched-high bit to
indicate this error. Check this bit to inform the user.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 8 ++++++++
 include/uapi/linux/mii.h     | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b9f5f40a7ac1..db1172db1e7c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1555,6 +1555,14 @@ int genphy_read_status(struct phy_device *phydev)
 			if (adv < 0)
 				return adv;
 
+			if (lpagb & LPA_1000MSFAIL) {
+				if (adv & CTL1000_ENABLE_MASTER)
+					phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n");
+				else
+					phydev_err(phydev, "Master/Slave resolution failed\n");
+				return -ENOLINK;
+			}
+
 			phydev->lp_advertising =
 				mii_stat1000_to_ethtool_lpa_t(lpagb);
 			common_adv_gb = lpagb & adv << 2;
diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h
index b5c2fdcf23fd..a506216591d6 100644
--- a/include/uapi/linux/mii.h
+++ b/include/uapi/linux/mii.h
@@ -136,6 +136,7 @@
 #define CTL1000_ENABLE_MASTER	0x1000
 
 /* 1000BASE-T Status register */
+#define LPA_1000MSFAIL		0x8000	/* Master/Slave resolution failure */
 #define LPA_1000LOCALRXOK	0x2000	/* Link partner local receiver status */
 #define LPA_1000REMRXOK		0x1000	/* Link partner remote receiver status */
 #define LPA_1000FULL		0x0800	/* Link partner 1000BASE-T full duplex */
-- 
cgit v1.2.3


From aea5f654e6b78a0c976f7a25950155932c77a53f Mon Sep 17 00:00:00 2001
From: Nishanth Devarajan <ndev2021@gmail.com>
Date: Mon, 23 Jul 2018 19:37:41 +0530
Subject: net/sched: add skbprio scheduler

Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes packets
according to their skb->priority field. Under congestion, already-enqueued lower
priority packets will be dropped to make space available for higher priority
packets. Skbprio was conceived as a solution for denial-of-service defenses that
need to route packets with different priorities as a means to overcome DoS
attacks.

v5
*Do not reference qdisc_dev(sch)->tx_queue_len for setting limit. Instead set
default sch->limit to 64.

v4
*Drop Documentation/networking/sch_skbprio.txt doc file to move it to tc man
page for Skbprio, in iproute2.

v3
*Drop max_limit parameter in struct skbprio_sched_data and instead use
sch->limit.

*Reference qdisc_dev(sch)->tx_queue_len only once, during initialisation for
qdisc (previously being referenced every time qdisc changes).

*Move qdisc's detailed description from in-code to Documentation/networking.

*When qdisc is saturated, enqueue incoming packet first before dequeueing
lowest priority packet in queue - improves usage of call stack registers.

*Introduce and use overlimit stat to keep track of number of dropped packets.

v2
*Use skb->priority field rather than DS field. Rename queueing discipline as
SKB Priority Queue (previously Gatekeeper Priority Queue).

*Queueing discipline is made classful to expose Skbprio's internal priority
queues.

Signed-off-by: Nishanth Devarajan <ndev2021@gmail.com>
Reviewed-by: Sachin Paryani <sachin.paryani@gmail.com>
Reviewed-by: Cody Doucette <doucette@bu.edu>
Reviewed-by: Michel Machado <michel@digirati.com.br>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  15 ++
 net/sched/Kconfig              |  13 ++
 net/sched/Makefile             |   1 +
 net/sched/sch_skbprio.c        | 320 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 349 insertions(+)
 create mode 100644 net/sched/sch_skbprio.c

(limited to 'include')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d9cc9dc4f547..8975fd1a1421 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -124,6 +124,21 @@ struct tc_fifo_qopt {
 	__u32	limit;	/* Queue length: bytes for bfifo, packets for pfifo */
 };
 
+/* SKBPRIO section */
+
+/*
+ * Priorities go from zero to (SKBPRIO_MAX_PRIORITY - 1).
+ * SKBPRIO_MAX_PRIORITY should be at least 64 in order for skbprio to be able
+ * to map one to one the DS field of IPV4 and IPV6 headers.
+ * Memory allocation grows linearly with SKBPRIO_MAX_PRIORITY.
+ */
+
+#define SKBPRIO_MAX_PRIORITY 64
+
+struct tc_skbprio_qopt {
+	__u32	limit;		/* Queue length in packets. */
+};
+
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS	16
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index bba71225adbd..e95741388311 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -251,6 +251,19 @@ config NET_SCH_MQPRIO
 
 	  If unsure, say N.
 
+config NET_SCH_SKBPRIO
+	tristate "SKB priority queue scheduler (SKBPRIO)"
+	help
+	  Say Y here if you want to use the SKB priority queue
+	  scheduler. This schedules packets according to skb->priority,
+	  which is useful for request packets in DoS mitigation systems such
+	  as Gatekeeper.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called sch_skbprio.
+
+	  If unsure, say N.
+
 config NET_SCH_CHOKE
 	tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 910ec7463a36..f0403f49edcb 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_SCH_PLUG)	+= sch_plug.o
 obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
+obj-$(CONFIG_NET_SCH_SKBPRIO)	+= sch_skbprio.o
 obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c
new file mode 100644
index 000000000000..52c0b6d8f1d7
--- /dev/null
+++ b/net/sched/sch_skbprio.c
@@ -0,0 +1,320 @@
+/*
+ * net/sched/sch_skbprio.c  SKB Priority Queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Nishanth Devarajan, <ndev2021@gmail.com>
+ *		Cody Doucette, <doucette@bu.edu>
+ *	        original idea by Michel Machado, Cody Doucette, and Qiaobin Fu
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/inet_ecn.h>
+
+/*		SKB Priority Queue
+ *	=================================
+ *
+ * Skbprio (SKB Priority Queue) is a queueing discipline that prioritizes
+ * packets according to their skb->priority field. Under congestion,
+ * Skbprio drops already-enqueued lower priority packets to make space
+ * available for higher priority packets; it was conceived as a solution
+ * for denial-of-service defenses that need to route packets with different
+ * priorities as a mean to overcome DoS attacks.
+ */
+
+struct skbprio_sched_data {
+	/* Queue state. */
+	struct sk_buff_head qdiscs[SKBPRIO_MAX_PRIORITY];
+	struct gnet_stats_queue qstats[SKBPRIO_MAX_PRIORITY];
+	u16 highest_prio;
+	u16 lowest_prio;
+};
+
+static u16 calc_new_high_prio(const struct skbprio_sched_data *q)
+{
+	int prio;
+
+	for (prio = q->highest_prio - 1; prio >= q->lowest_prio; prio--) {
+		if (!skb_queue_empty(&q->qdiscs[prio]))
+			return prio;
+	}
+
+	/* SKB queue is empty, return 0 (default highest priority setting). */
+	return 0;
+}
+
+static u16 calc_new_low_prio(const struct skbprio_sched_data *q)
+{
+	int prio;
+
+	for (prio = q->lowest_prio + 1; prio <= q->highest_prio; prio++) {
+		if (!skb_queue_empty(&q->qdiscs[prio]))
+			return prio;
+	}
+
+	/* SKB queue is empty, return SKBPRIO_MAX_PRIORITY - 1
+	 * (default lowest priority setting).
+	 */
+	return SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static int skbprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			  struct sk_buff **to_free)
+{
+	const unsigned int max_priority = SKBPRIO_MAX_PRIORITY - 1;
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *qdisc;
+	struct sk_buff_head *lp_qdisc;
+	struct sk_buff *to_drop;
+	u16 prio, lp;
+
+	/* Obtain the priority of @skb. */
+	prio = min(skb->priority, max_priority);
+
+	qdisc = &q->qdiscs[prio];
+	if (sch->q.qlen < sch->limit) {
+		__skb_queue_tail(qdisc, skb);
+		qdisc_qstats_backlog_inc(sch, skb);
+		q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+		/* Check to update highest and lowest priorities. */
+		if (prio > q->highest_prio)
+			q->highest_prio = prio;
+
+		if (prio < q->lowest_prio)
+			q->lowest_prio = prio;
+
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+
+	/* If this packet has the lowest priority, drop it. */
+	lp = q->lowest_prio;
+	if (prio <= lp) {
+		q->qstats[prio].drops++;
+		q->qstats[prio].overlimits++;
+		return qdisc_drop(skb, sch, to_free);
+	}
+
+	__skb_queue_tail(qdisc, skb);
+	qdisc_qstats_backlog_inc(sch, skb);
+	q->qstats[prio].backlog += qdisc_pkt_len(skb);
+
+	/* Drop the packet at the tail of the lowest priority qdisc. */
+	lp_qdisc = &q->qdiscs[lp];
+	to_drop = __skb_dequeue_tail(lp_qdisc);
+	BUG_ON(!to_drop);
+	qdisc_qstats_backlog_dec(sch, to_drop);
+	qdisc_drop(to_drop, sch, to_free);
+
+	q->qstats[lp].backlog -= qdisc_pkt_len(to_drop);
+	q->qstats[lp].drops++;
+	q->qstats[lp].overlimits++;
+
+	/* Check to update highest and lowest priorities. */
+	if (skb_queue_empty(lp_qdisc)) {
+		if (q->lowest_prio == q->highest_prio) {
+			/* The incoming packet is the only packet in queue. */
+			BUG_ON(sch->q.qlen != 1);
+			q->lowest_prio = prio;
+			q->highest_prio = prio;
+		} else {
+			q->lowest_prio = calc_new_low_prio(q);
+		}
+	}
+
+	if (prio > q->highest_prio)
+		q->highest_prio = prio;
+
+	return NET_XMIT_CN;
+}
+
+static struct sk_buff *skbprio_dequeue(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *hpq = &q->qdiscs[q->highest_prio];
+	struct sk_buff *skb = __skb_dequeue(hpq);
+
+	if (unlikely(!skb))
+		return NULL;
+
+	sch->q.qlen--;
+	qdisc_qstats_backlog_dec(sch, skb);
+	qdisc_bstats_update(sch, skb);
+
+	q->qstats[q->highest_prio].backlog -= qdisc_pkt_len(skb);
+
+	/* Update highest priority field. */
+	if (skb_queue_empty(hpq)) {
+		if (q->lowest_prio == q->highest_prio) {
+			BUG_ON(sch->q.qlen);
+			q->highest_prio = 0;
+			q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+		} else {
+			q->highest_prio = calc_new_high_prio(q);
+		}
+	}
+	return skb;
+}
+
+static int skbprio_change(struct Qdisc *sch, struct nlattr *opt,
+			struct netlink_ext_ack *extack)
+{
+	struct tc_skbprio_qopt *ctl = nla_data(opt);
+
+	sch->limit = ctl->limit;
+	return 0;
+}
+
+static int skbprio_init(struct Qdisc *sch, struct nlattr *opt,
+			struct netlink_ext_ack *extack)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	/* Initialise all queues, one for each possible priority. */
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_head_init(&q->qdiscs[prio]);
+
+	memset(&q->qstats, 0, sizeof(q->qstats));
+	q->highest_prio = 0;
+	q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+	sch->limit = 64;
+	if (!opt)
+		return 0;
+
+	return skbprio_change(sch, opt, extack);
+}
+
+static int skbprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct tc_skbprio_qopt opt;
+
+	opt.limit = sch->limit;
+
+	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+		return -1;
+
+	return skb->len;
+}
+
+static void skbprio_reset(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	sch->qstats.backlog = 0;
+	sch->q.qlen = 0;
+
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_purge(&q->qdiscs[prio]);
+
+	memset(&q->qstats, 0, sizeof(q->qstats));
+	q->highest_prio = 0;
+	q->lowest_prio = SKBPRIO_MAX_PRIORITY - 1;
+}
+
+static void skbprio_destroy(struct Qdisc *sch)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = 0; prio < SKBPRIO_MAX_PRIORITY; prio++)
+		__skb_queue_purge(&q->qdiscs[prio]);
+}
+
+static struct Qdisc *skbprio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long skbprio_find(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static int skbprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int skbprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				   struct gnet_dump *d)
+{
+	struct skbprio_sched_data *q = qdisc_priv(sch);
+	if (gnet_stats_copy_queue(d, NULL, &q->qstats[cl - 1],
+		q->qstats[cl - 1].qlen) < 0)
+		return -1;
+	return 0;
+}
+
+static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, i + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops skbprio_class_ops = {
+	.leaf		=	skbprio_leaf,
+	.find		=	skbprio_find,
+	.dump		=	skbprio_dump_class,
+	.dump_stats	=	skbprio_dump_class_stats,
+	.walk		=	skbprio_walk,
+};
+
+static struct Qdisc_ops skbprio_qdisc_ops __read_mostly = {
+	.cl_ops		=	&skbprio_class_ops,
+	.id		=	"skbprio",
+	.priv_size	=	sizeof(struct skbprio_sched_data),
+	.enqueue	=	skbprio_enqueue,
+	.dequeue	=	skbprio_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	skbprio_init,
+	.reset		=	skbprio_reset,
+	.change		=	skbprio_change,
+	.dump		=	skbprio_dump,
+	.destroy	=	skbprio_destroy,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init skbprio_module_init(void)
+{
+	return register_qdisc(&skbprio_qdisc_ops);
+}
+
+static void __exit skbprio_module_exit(void)
+{
+	unregister_qdisc(&skbprio_qdisc_ops);
+}
+
+module_init(skbprio_module_init)
+module_exit(skbprio_module_exit)
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 5525b8314389a0c558d15464e86f438974b94e32 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Mon, 18 Jun 2018 00:38:52 -0400
Subject: media: mem2mem: Make .job_abort optional

Implementing job_abort() does not make sense on some drivers.
This is not a problem, as the abort is not required to
wait for the job to finish. Quite the opposite, drivers
are encouraged not to wait.

Demote v4l2_m2m_ops.job_abort from required to optional, and
clean all drivers with dummy implementations.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c | 5 -----
 drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c    | 5 -----
 drivers/media/platform/rcar_jpu.c               | 5 -----
 drivers/media/platform/rockchip/rga/rga.c       | 6 ------
 drivers/media/platform/s5p-g2d/g2d.c            | 5 -----
 drivers/media/platform/s5p-jpeg/jpeg-core.c     | 7 -------
 drivers/media/v4l2-core/v4l2-mem2mem.c          | 6 +++---
 include/media/v4l2-mem2mem.h                    | 2 +-
 8 files changed, 4 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
index 328e8f650d9b..4f24da8afecc 100644
--- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
@@ -861,14 +861,9 @@ static int mtk_jpeg_job_ready(void *priv)
 	return (ctx->state == MTK_JPEG_RUNNING) ? 1 : 0;
 }
 
-static void mtk_jpeg_job_abort(void *priv)
-{
-}
-
 static const struct v4l2_m2m_ops mtk_jpeg_m2m_ops = {
 	.device_run = mtk_jpeg_device_run,
 	.job_ready  = mtk_jpeg_job_ready,
-	.job_abort  = mtk_jpeg_job_abort,
 };
 
 static int mtk_jpeg_queue_init(void *priv, struct vb2_queue *src_vq,
diff --git a/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c b/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c
index c2e2f5f1ebf1..ceffc31cc6eb 100644
--- a/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c
+++ b/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c
@@ -441,10 +441,6 @@ static void mtk_mdp_m2m_stop_streaming(struct vb2_queue *q)
 	pm_runtime_put(&ctx->mdp_dev->pdev->dev);
 }
 
-static void mtk_mdp_m2m_job_abort(void *priv)
-{
-}
-
 /* The color format (num_planes) must be already configured. */
 static void mtk_mdp_prepare_addr(struct mtk_mdp_ctx *ctx,
 				 struct vb2_buffer *vb,
@@ -1215,7 +1211,6 @@ static const struct v4l2_file_operations mtk_mdp_m2m_fops = {
 
 static const struct v4l2_m2m_ops mtk_mdp_m2m_ops = {
 	.device_run	= mtk_mdp_m2m_device_run,
-	.job_abort	= mtk_mdp_m2m_job_abort,
 };
 
 int mtk_mdp_register_m2m_device(struct mtk_mdp_dev *mdp)
diff --git a/drivers/media/platform/rcar_jpu.c b/drivers/media/platform/rcar_jpu.c
index 90d40b2d6259..a808258a4180 100644
--- a/drivers/media/platform/rcar_jpu.c
+++ b/drivers/media/platform/rcar_jpu.c
@@ -1492,13 +1492,8 @@ static void jpu_device_run(void *priv)
 	spin_unlock_irqrestore(&ctx->jpu->lock, flags);
 }
 
-static void jpu_job_abort(void *priv)
-{
-}
-
 static const struct v4l2_m2m_ops jpu_m2m_ops = {
 	.device_run	= jpu_device_run,
-	.job_abort	= jpu_job_abort,
 };
 
 /*
diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c
index 8ace1873202a..69a2797d7bbe 100644
--- a/drivers/media/platform/rockchip/rga/rga.c
+++ b/drivers/media/platform/rockchip/rga/rga.c
@@ -39,11 +39,6 @@
 static int debug;
 module_param(debug, int, 0644);
 
-static void job_abort(void *prv)
-{
-	/* Can't do anything rational here */
-}
-
 static void device_run(void *prv)
 {
 	struct rga_ctx *ctx = prv;
@@ -104,7 +99,6 @@ static irqreturn_t rga_isr(int irq, void *prv)
 
 static struct v4l2_m2m_ops rga_m2m_ops = {
 	.device_run = device_run,
-	.job_abort = job_abort,
 };
 
 static int
diff --git a/drivers/media/platform/s5p-g2d/g2d.c b/drivers/media/platform/s5p-g2d/g2d.c
index ee7322b33666..e901201b6fcc 100644
--- a/drivers/media/platform/s5p-g2d/g2d.c
+++ b/drivers/media/platform/s5p-g2d/g2d.c
@@ -483,10 +483,6 @@ static int vidioc_s_crop(struct file *file, void *prv, const struct v4l2_crop *c
 	return 0;
 }
 
-static void job_abort(void *prv)
-{
-}
-
 static void device_run(void *prv)
 {
 	struct g2d_ctx *ctx = prv;
@@ -605,7 +601,6 @@ static const struct video_device g2d_videodev = {
 
 static const struct v4l2_m2m_ops g2d_m2m_ops = {
 	.device_run	= device_run,
-	.job_abort	= job_abort,
 };
 
 static const struct of_device_id exynos_g2d_match[];
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c
index 79b63da27f53..04fd2e0493c0 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c
@@ -2467,26 +2467,19 @@ static int s5p_jpeg_job_ready(void *priv)
 	return 1;
 }
 
-static void s5p_jpeg_job_abort(void *priv)
-{
-}
-
 static struct v4l2_m2m_ops s5p_jpeg_m2m_ops = {
 	.device_run	= s5p_jpeg_device_run,
 	.job_ready	= s5p_jpeg_job_ready,
-	.job_abort	= s5p_jpeg_job_abort,
 };
 
 static struct v4l2_m2m_ops exynos3250_jpeg_m2m_ops = {
 	.device_run	= exynos3250_jpeg_device_run,
 	.job_ready	= s5p_jpeg_job_ready,
-	.job_abort	= s5p_jpeg_job_abort,
 };
 
 static struct v4l2_m2m_ops exynos4_jpeg_m2m_ops = {
 	.device_run	= exynos4_jpeg_device_run,
 	.job_ready	= s5p_jpeg_job_ready,
-	.job_abort	= s5p_jpeg_job_abort,
 };
 
 /*
diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 725da74d15d8..4aeedb91594a 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -322,7 +322,8 @@ static void v4l2_m2m_cancel_job(struct v4l2_m2m_ctx *m2m_ctx)
 	m2m_ctx->job_flags |= TRANS_ABORT;
 	if (m2m_ctx->job_flags & TRANS_RUNNING) {
 		spin_unlock_irqrestore(&m2m_dev->job_spinlock, flags);
-		m2m_dev->m2m_ops->job_abort(m2m_ctx->priv);
+		if (m2m_dev->m2m_ops->job_abort)
+			m2m_dev->m2m_ops->job_abort(m2m_ctx->priv);
 		dprintk("m2m_ctx %p running, will wait to complete", m2m_ctx);
 		wait_event(m2m_ctx->finished,
 				!(m2m_ctx->job_flags & TRANS_RUNNING));
@@ -788,8 +789,7 @@ struct v4l2_m2m_dev *v4l2_m2m_init(const struct v4l2_m2m_ops *m2m_ops)
 {
 	struct v4l2_m2m_dev *m2m_dev;
 
-	if (!m2m_ops || WARN_ON(!m2m_ops->device_run) ||
-			WARN_ON(!m2m_ops->job_abort))
+	if (!m2m_ops || WARN_ON(!m2m_ops->device_run))
 		return ERR_PTR(-EINVAL);
 
 	m2m_dev = kzalloc(sizeof *m2m_dev, GFP_KERNEL);
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index af48b1eca025..d60d3060f762 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -32,7 +32,7 @@
  *		assumed that one source and one destination buffer are all
  *		that is required for the driver to perform one full transaction.
  *		This method may not sleep.
- * @job_abort:	required. Informs the driver that it has to abort the currently
+ * @job_abort:	optional. Informs the driver that it has to abort the currently
  *		running transaction as soon as possible (i.e. as soon as it can
  *		stop the device safely; e.g. in the next interrupt handler),
  *		even if the transaction would not have been finished by then.
-- 
cgit v1.2.3


From bb039a870c0593a4deaa72c2693d02a87723305c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 18 Jul 2018 09:25:16 -0700
Subject: IB/core: Allow ULPs to specify NULL as the third
 ib_post_(send|recv|srq_recv)() argument

This patch does not change the behavior of the modified functions.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/ib_verbs.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b626aa2310c5..99bcf64a4762 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3278,7 +3278,9 @@ static inline int ib_post_srq_recv(struct ib_srq *srq,
 				   struct ib_recv_wr *recv_wr,
 				   struct ib_recv_wr **bad_recv_wr)
 {
-	return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
+	struct ib_recv_wr *dummy;
+
+	return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
 }
 
 /**
@@ -3379,7 +3381,9 @@ static inline int ib_post_send(struct ib_qp *qp,
 			       struct ib_send_wr *send_wr,
 			       struct ib_send_wr **bad_send_wr)
 {
-	return qp->device->post_send(qp, send_wr, bad_send_wr);
+	struct ib_send_wr *dummy;
+
+	return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
 }
 
 /**
@@ -3394,7 +3398,9 @@ static inline int ib_post_recv(struct ib_qp *qp,
 			       struct ib_recv_wr *recv_wr,
 			       struct ib_recv_wr **bad_recv_wr)
 {
-	return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
+	struct ib_recv_wr *dummy;
+
+	return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
 }
 
 struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
-- 
cgit v1.2.3


From 62cedf3e60af03e47849fe2bd6a03ec179422a8a Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Fri, 20 Jul 2018 10:39:13 +0200
Subject: locking/rtmutex: Allow specifying a subclass for nested locking

Needed for annotating rt_mutex locks.

Tested-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Deepa Dinamani <deepadinamani@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Chang <dpf@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Wolfram Sang <wsa@the-dreams.de>
Link: http://lkml.kernel.org/r/20180720083914.1950-2-peda@axentia.se
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rtmutex.h  |  7 +++++++
 kernel/locking/rtmutex.c | 29 +++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h
index 1b92a28dd672..6fd615a0eea9 100644
--- a/include/linux/rtmutex.h
+++ b/include/linux/rtmutex.h
@@ -106,7 +106,14 @@ static inline int rt_mutex_is_locked(struct rt_mutex *lock)
 extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
 extern void rt_mutex_destroy(struct rt_mutex *lock);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass);
+#define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0)
+#else
 extern void rt_mutex_lock(struct rt_mutex *lock);
+#define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock)
+#endif
+
 extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
 extern int rt_mutex_timed_lock(struct rt_mutex *lock,
 			       struct hrtimer_sleeper *timeout);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 4f014be7a4b8..2823d4163a37 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1465,6 +1465,29 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
 		rt_mutex_postunlock(&wake_q);
 }
 
+static inline void __rt_mutex_lock(struct rt_mutex *lock, unsigned int subclass)
+{
+	might_sleep();
+
+	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/**
+ * rt_mutex_lock_nested - lock a rt_mutex
+ *
+ * @lock: the rt_mutex to be locked
+ * @subclass: the lockdep subclass
+ */
+void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
+{
+	__rt_mutex_lock(lock, subclass);
+}
+EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
+#endif
+
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /**
  * rt_mutex_lock - lock a rt_mutex
  *
@@ -1472,12 +1495,10 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
  */
 void __sched rt_mutex_lock(struct rt_mutex *lock)
 {
-	might_sleep();
-
-	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
+	__rt_mutex_lock(lock, 0);
 }
 EXPORT_SYMBOL_GPL(rt_mutex_lock);
+#endif
 
 /**
  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
-- 
cgit v1.2.3


From 6e30396767508101eacec8b93b068e8905e660dc Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Wed, 20 Jun 2018 22:32:42 +0530
Subject: sched/numa: Remove redundant field

'numa_entry' is a struct list_head defined in task_struct, but never used.

No functional change.

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1529514181-9842-2-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43731fe51c97..e0f4f56c9310 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1017,7 +1017,6 @@ struct task_struct {
 	u64				last_sum_exec_runtime;
 	struct callback_head		numa_work;
 
-	struct list_head		numa_entry;
 	struct numa_group		*numa_group;
 
 	/*
-- 
cgit v1.2.3


From 6cbc304f2f360f25cc8607817239d6f4a2fd3dc5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 10 May 2018 15:48:41 +0200
Subject: perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)

Vince reported the perf_fuzzer giving various unwinder warnings and
Josh reported:

> Deja vu.  Most of these are related to perf PEBS, similar to the
> following issue:
>
>   b8000586c90b ("perf/x86/intel: Cure bogus unwind from PEBS entries")
>
> This is basically the ORC version of that.  setup_pebs_sample_data() is
> assembling a franken-pt_regs which ORC isn't happy about.  RIP is
> inconsistent with some of the other registers (like RSP and RBP).

And where the previous unwinder only needed BP,SP ORC also requires
IP. But we cannot spoof IP because then the sample will get displaced,
entirely negating the point of PEBS.

So cure the whole thing differently by doing the unwind early; this
does however require a means to communicate we did the unwind early.
We (ab)use an unused sample_type bit for this, which we set on events
that fill out the data->callchain before the normal
perf_prepare_sample().

Debugged-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Tested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c    |  3 +++
 arch/x86/events/intel/ds.c      | 25 +++++++++++--------------
 include/linux/perf_event.h      |  1 +
 include/uapi/linux/perf_event.h |  2 ++
 kernel/events/core.c            |  6 ++++--
 5 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 707b2a96e516..86f0c15dcc2d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2997,6 +2997,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		}
 		if (x86_pmu.pebs_aliases)
 			x86_pmu.pebs_aliases(event);
+
+		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+			event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
 	}
 
 	if (needs_branch_stack(event)) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8cf03f101938..8dbba77e0518 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1185,17 +1185,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		data->data_src.val = val;
 	}
 
+	/*
+	 * We must however always use iregs for the unwinder to stay sane; the
+	 * record BP,SP,IP can point into thin air when the record is from a
+	 * previous PMI context or an (I)RET happend between the record and
+	 * PMI.
+	 */
+	if (sample_type & PERF_SAMPLE_CALLCHAIN)
+		data->callchain = perf_callchain(event, iregs);
+
 	/*
 	 * We use the interrupt regs as a base because the PEBS record does not
 	 * contain a full regs set, specifically it seems to lack segment
 	 * descriptors, which get used by things like user_mode().
 	 *
 	 * In the simple case fix up only the IP for PERF_SAMPLE_IP.
-	 *
-	 * We must however always use BP,SP from iregs for the unwinder to stay
-	 * sane; the record BP,SP can point into thin air when the record is
-	 * from a previous PMI context or an (I)RET happend between the record
-	 * and PMI.
 	 */
 	*regs = *iregs;
 
@@ -1214,15 +1218,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		regs->si = pebs->si;
 		regs->di = pebs->di;
 
-		/*
-		 * Per the above; only set BP,SP if we don't need callchains.
-		 *
-		 * XXX: does this make sense?
-		 */
-		if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
-			regs->bp = pebs->bp;
-			regs->sp = pebs->sp;
-		}
+		regs->bp = pebs->bp;
+		regs->sp = pebs->sp;
 
 #ifndef CONFIG_X86_32
 		regs->r8 = pebs->r8;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1fa12887ec02..87f6db437e4a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1130,6 +1130,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
 extern struct perf_callchain_entry *
 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
 		   u32 max_stack, bool crosstask, bool add_mark);
+extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
 extern int get_callchain_buffers(int max_stack);
 extern void put_callchain_buffers(void);
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b8e288a1f740..eeb787b1c53c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -143,6 +143,8 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
 	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
+
+	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63,
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f0434a9951a..cdb32cf8e33c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6343,7 +6343,7 @@ static u64 perf_virt_to_phys(u64 virt)
 
 static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
 
-static struct perf_callchain_entry *
+struct perf_callchain_entry *
 perf_callchain(struct perf_event *event, struct pt_regs *regs)
 {
 	bool kernel = !event->attr.exclude_callchain_kernel;
@@ -6382,7 +6382,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int size = 1;
 
-		data->callchain = perf_callchain(event, regs);
+		if (!(sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
+			data->callchain = perf_callchain(event, regs);
+
 		size += data->callchain->nr;
 
 		header->size += size * sizeof(u64);
-- 
cgit v1.2.3


From df79ed2c064363cdc7d2d896923c1885d4e30520 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 16 Jul 2018 12:30:08 +0100
Subject: locking/atomics: Simplify cmpxchg() instrumentation

Currently we define some fairly verbose wrappers for the cmpxchg()
family so that we can pass a pointer and size into kasan_check_write().

The wrappers duplicate the size-switching logic necessary in arch code,
and only work for scalar types. On some architectures, (cmp)xchg are
used on non-scalar types, and thus the instrumented wrappers need to be
able to handle this.

We could take the type-punning logic from {READ,WRITE}_ONCE(), but this
makes the wrappers even more verbose, and requires several local
variables in the macros.

Instead, let's simplify the wrappers into simple macros which:

* snapshot the pointer into a single local variable, called __ai_ptr to
  avoid conflicts with variables in the scope of the caller.

* call kasan_check_write() on __ai_ptr.

* invoke the relevant arch_*() function, passing the original arguments,
  bar __ai_ptr being substituted for ptr.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: andy.shevchenko@gmail.com
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: catalin.marinas@arm.com
Cc: glider@google.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: parri.andrea@gmail.com
Cc: peter@hurleysoftware.com
Link: http://lkml.kernel.org/r/20180716113017.3909-4-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-instrumented.h | 100 +++++-------------------------
 1 file changed, 15 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 3c64e95d5ed0..c7c3e4cdd942 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -408,109 +408,39 @@ static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v)
 }
 #endif
 
-static __always_inline unsigned long
-cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new, int size)
-{
-	kasan_check_write(ptr, size);
-	switch (size) {
-	case 1:
-		return arch_cmpxchg((u8 *)ptr, (u8)old, (u8)new);
-	case 2:
-		return arch_cmpxchg((u16 *)ptr, (u16)old, (u16)new);
-	case 4:
-		return arch_cmpxchg((u32 *)ptr, (u32)old, (u32)new);
-	case 8:
-		BUILD_BUG_ON(sizeof(unsigned long) != 8);
-		return arch_cmpxchg((u64 *)ptr, (u64)old, (u64)new);
-	}
-	BUILD_BUG();
-	return 0;
-}
-
 #define cmpxchg(ptr, old, new)						\
 ({									\
-	((__typeof__(*(ptr)))cmpxchg_size((ptr), (unsigned long)(old),	\
-		(unsigned long)(new), sizeof(*(ptr))));			\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
+	arch_cmpxchg(__ai_ptr, (old), (new));				\
 })
 
-static __always_inline unsigned long
-sync_cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new,
-		  int size)
-{
-	kasan_check_write(ptr, size);
-	switch (size) {
-	case 1:
-		return arch_sync_cmpxchg((u8 *)ptr, (u8)old, (u8)new);
-	case 2:
-		return arch_sync_cmpxchg((u16 *)ptr, (u16)old, (u16)new);
-	case 4:
-		return arch_sync_cmpxchg((u32 *)ptr, (u32)old, (u32)new);
-	case 8:
-		BUILD_BUG_ON(sizeof(unsigned long) != 8);
-		return arch_sync_cmpxchg((u64 *)ptr, (u64)old, (u64)new);
-	}
-	BUILD_BUG();
-	return 0;
-}
-
 #define sync_cmpxchg(ptr, old, new)					\
 ({									\
-	((__typeof__(*(ptr)))sync_cmpxchg_size((ptr),			\
-		(unsigned long)(old), (unsigned long)(new),		\
-		sizeof(*(ptr))));					\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
+	arch_sync_cmpxchg(__ai_ptr, (old), (new));			\
 })
 
-static __always_inline unsigned long
-cmpxchg_local_size(volatile void *ptr, unsigned long old, unsigned long new,
-		   int size)
-{
-	kasan_check_write(ptr, size);
-	switch (size) {
-	case 1:
-		return arch_cmpxchg_local((u8 *)ptr, (u8)old, (u8)new);
-	case 2:
-		return arch_cmpxchg_local((u16 *)ptr, (u16)old, (u16)new);
-	case 4:
-		return arch_cmpxchg_local((u32 *)ptr, (u32)old, (u32)new);
-	case 8:
-		BUILD_BUG_ON(sizeof(unsigned long) != 8);
-		return arch_cmpxchg_local((u64 *)ptr, (u64)old, (u64)new);
-	}
-	BUILD_BUG();
-	return 0;
-}
-
 #define cmpxchg_local(ptr, old, new)					\
 ({									\
-	((__typeof__(*(ptr)))cmpxchg_local_size((ptr),			\
-		(unsigned long)(old), (unsigned long)(new),		\
-		sizeof(*(ptr))));					\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
+	arch_cmpxchg_local(__ai_ptr, (old), (new));			\
 })
 
-static __always_inline u64
-cmpxchg64_size(volatile u64 *ptr, u64 old, u64 new)
-{
-	kasan_check_write(ptr, sizeof(*ptr));
-	return arch_cmpxchg64(ptr, old, new);
-}
-
 #define cmpxchg64(ptr, old, new)					\
 ({									\
-	((__typeof__(*(ptr)))cmpxchg64_size((ptr), (u64)(old),		\
-		(u64)(new)));						\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
+	arch_cmpxchg64(__ai_ptr, (old), (new));				\
 })
 
-static __always_inline u64
-cmpxchg64_local_size(volatile u64 *ptr, u64 old, u64 new)
-{
-	kasan_check_write(ptr, sizeof(*ptr));
-	return arch_cmpxchg64_local(ptr, old, new);
-}
-
 #define cmpxchg64_local(ptr, old, new)					\
 ({									\
-	((__typeof__(*(ptr)))cmpxchg64_local_size((ptr), (u64)(old),	\
-		(u64)(new)));						\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
+	arch_cmpxchg64_local(__ai_ptr, (old), (new));			\
 })
 
 /*
-- 
cgit v1.2.3


From f9881cc43b118efc6f82fef2d121166113ee9f8e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 16 Jul 2018 12:30:09 +0100
Subject: locking/atomics: Instrument xchg()

While we instrument all of the (non-relaxed) atomic_*() functions and
cmpxchg(), we missed xchg().

Let's add instrumentation for xchg(), fixing up x86 to implement
arch_xchg().

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: andy.shevchenko@gmail.com
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: catalin.marinas@arm.com
Cc: glider@google.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: parri.andrea@gmail.com
Cc: peter@hurleysoftware.com
Link: http://lkml.kernel.org/r/20180716113017.3909-5-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/atomic.h             | 2 +-
 arch/x86/include/asm/atomic64_64.h        | 2 +-
 arch/x86/include/asm/cmpxchg.h            | 2 +-
 include/asm-generic/atomic-instrumented.h | 7 +++++++
 4 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 823fd2f320cf..b143717b92b3 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -202,7 +202,7 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n
 
 static inline int arch_atomic_xchg(atomic_t *v, int new)
 {
-	return xchg(&v->counter, new);
+	return arch_xchg(&v->counter, new);
 }
 
 static inline void arch_atomic_and(int i, atomic_t *v)
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 849f1c566a11..4343d9b4f30e 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -188,7 +188,7 @@ static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, l
 
 static inline long arch_atomic64_xchg(atomic64_t *v, long new)
 {
-	return xchg(&v->counter, new);
+	return arch_xchg(&v->counter, new);
 }
 
 static inline void arch_atomic64_and(long i, atomic64_t *v)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index e3efd8a06066..a55d79b233d3 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -75,7 +75,7 @@ extern void __add_wrong_size(void)
  * use "asm volatile" and "memory" clobbers to prevent gcc from moving
  * information around.
  */
-#define xchg(ptr, v)	__xchg_op((ptr), (v), xchg, "")
+#define arch_xchg(ptr, v)	__xchg_op((ptr), (v), xchg, "")
 
 /*
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index c7c3e4cdd942..53481b6eacdf 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -408,6 +408,13 @@ static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v)
 }
 #endif
 
+#define xchg(ptr, new)							\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
+	arch_xchg(__ai_ptr, (new));					\
+})
+
 #define cmpxchg(ptr, old, new)						\
 ({									\
 	typeof(ptr) __ai_ptr = (ptr);					\
-- 
cgit v1.2.3


From 4d2b25f630c731218d04f72580b4de68cb7a6e00 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 16 Jul 2018 12:30:10 +0100
Subject: locking/atomics: Instrument cmpxchg_double*()

We currently don't instrument cmpxchg_double() and
cmpxchg_double_local() due to compilation issues reported in the past,
which are supposedly related to GCC bug 72873 [1], reported when GCC 7
was not yet released. This bug only applies to x86-64, and does not
apply to other architectures.

While the test case for GCC bug 72873 triggers issues with released
versions of GCC, the instrumented kernel code compiles fine for all
configurations I have tried, and it is unclear how the two cases
are/were related.

As we can't reproduce the kernel build failures, let's instrument
cmpxchg_double*() again. We can revisit the issue if build failures
reappear.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: andy.shevchenko@gmail.com
Cc: aryabinin@virtuozzo.com
Cc: catalin.marinas@arm.com
Cc: glider@google.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: parri.andrea@gmail.com
Cc: peter@hurleysoftware.com
Link: http://lkml.kernel.org/r/20180716113017.3909-6-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-instrumented.h | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 53481b6eacdf..0d4b1d3dbc1e 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -450,23 +450,18 @@ static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v)
 	arch_cmpxchg64_local(__ai_ptr, (old), (new));			\
 })
 
-/*
- * Originally we had the following code here:
- *     __typeof__(p1) ____p1 = (p1);
- *     kasan_check_write(____p1, 2 * sizeof(*____p1));
- *     arch_cmpxchg_double(____p1, (p2), (o1), (o2), (n1), (n2));
- * But it leads to compilation failures (see gcc issue 72873).
- * So for now it's left non-instrumented.
- * There are few callers of cmpxchg_double(), so it's not critical.
- */
 #define cmpxchg_double(p1, p2, o1, o2, n1, n2)				\
 ({									\
-	arch_cmpxchg_double((p1), (p2), (o1), (o2), (n1), (n2));	\
+	typeof(p1) __ai_p1 = (p1);					\
+	kasan_check_write(__ai_p1, 2 * sizeof(*__ai_p1));		\
+	arch_cmpxchg_double(__ai_p1, (p2), (o1), (o2), (n1), (n2));	\
 })
 
-#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2)			\
-({									\
-	arch_cmpxchg_double_local((p1), (p2), (o1), (o2), (n1), (n2));	\
+#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2)				\
+({										\
+	typeof(p1) __ai_p1 = (p1);						\
+	kasan_check_write(__ai_p1, 2 * sizeof(*__ai_p1));			\
+	arch_cmpxchg_double_local(__ai_p1, (p2), (o1), (o2), (n1), (n2));	\
 })
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-- 
cgit v1.2.3


From fd2efaa4eb5317c3a86357a83a7d456a1b86a0ac Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 16 Jul 2018 12:30:11 +0100
Subject: locking/atomics: Rework ordering barriers

Currently architectures can override __atomic_op_*() to define the barriers
used before/after a relaxed atomic when used to build acquire/release/fence
variants.

This has the unfortunate property of requiring the architecture to define the
full wrapper for the atomics, rather than just the barriers they care about,
and gets in the way of generating atomics which can be easily read.

Instead, this patch has architectures define an optional set of barriers:

* __atomic_acquire_fence()
* __atomic_release_fence()
* __atomic_pre_full_fence()
* __atomic_post_full_fence()

... which <linux/atomic.h> uses to build the wrappers.

It would be nice if we could undef these, along with the __atomic_op_*()
wrappers, but that would break the cmpxchg() wrappers, which are written
in preprocessor. Undefs would have been nice, but alas.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: andy.shevchenko@gmail.com
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: catalin.marinas@arm.com
Cc: dvyukov@google.com
Cc: glider@google.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: peter@hurleysoftware.com
Link: http://lkml.kernel.org/r/20180716113017.3909-7-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/alpha/include/asm/atomic.h   |  8 ++++----
 arch/powerpc/include/asm/atomic.h | 17 +++++------------
 arch/riscv/include/asm/atomic.h   | 17 +++++------------
 include/linux/atomic.h            | 38 ++++++++++++++++++++++----------------
 4 files changed, 36 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 4a6a8f58c9c9..150a1c5d6a2c 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -18,11 +18,11 @@
  * To ensure dependency ordering is preserved for the _relaxed and
  * _release atomics, an smp_read_barrier_depends() is unconditionally
  * inserted into the _relaxed variants, which are used to build the
- * barriered versions. To avoid redundant back-to-back fences, we can
- * define the _acquire and _fence versions explicitly.
+ * barriered versions. Avoid redundant back-to-back fences in the
+ * _acquire and _fence versions.
  */
-#define __atomic_op_acquire(op, args...)	op##_relaxed(args)
-#define __atomic_op_fence			__atomic_op_release
+#define __atomic_acquire_fence()
+#define __atomic_post_full_fence()
 
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index a0156cb43d1f..963abf8bf1c0 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -18,18 +18,11 @@
  * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
  * on the platform without lwsync.
  */
-#define __atomic_op_acquire(op, args...)				\
-({									\
-	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
-	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory");	\
-	__ret;								\
-})
-
-#define __atomic_op_release(op, args...)				\
-({									\
-	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory");	\
-	op##_relaxed(args);						\
-})
+#define __atomic_acquire_fence()					\
+	__asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
+
+#define __atomic_release_fence()					\
+	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
 
 static __inline__ int atomic_read(const atomic_t *v)
 {
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 512b89485790..c452359c9cb8 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -25,18 +25,11 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define __atomic_op_acquire(op, args...)				\
-({									\
-	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
-	__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory");	\
-	__ret;								\
-})
-
-#define __atomic_op_release(op, args...)				\
-({									\
-	__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");	\
-	op##_relaxed(args);						\
-})
+#define __atomic_acquire_fence()					\
+	__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
+
+#define __atomic_release_fence()					\
+	__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");
 
 static __always_inline int atomic_read(const atomic_t *v)
 {
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8e04f1f69bd9..1e8e88bdaf09 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -38,40 +38,46 @@
  * barriers on top of the relaxed variant. In the case where the relaxed
  * variant is already fully ordered, no additional barriers are needed.
  *
- * Besides, if an arch has a special barrier for acquire/release, it could
- * implement its own __atomic_op_* and use the same framework for building
- * variants
- *
- * If an architecture overrides __atomic_op_acquire() it will probably want
- * to define smp_mb__after_spinlock().
+ * If an architecture overrides __atomic_acquire_fence() it will probably
+ * want to define smp_mb__after_spinlock().
  */
-#ifndef __atomic_op_acquire
+#ifndef __atomic_acquire_fence
+#define __atomic_acquire_fence		smp_mb__after_atomic
+#endif
+
+#ifndef __atomic_release_fence
+#define __atomic_release_fence		smp_mb__before_atomic
+#endif
+
+#ifndef __atomic_pre_full_fence
+#define __atomic_pre_full_fence		smp_mb__before_atomic
+#endif
+
+#ifndef __atomic_post_full_fence
+#define __atomic_post_full_fence	smp_mb__after_atomic
+#endif
+
 #define __atomic_op_acquire(op, args...)				\
 ({									\
 	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
-	smp_mb__after_atomic();						\
+	__atomic_acquire_fence();					\
 	__ret;								\
 })
-#endif
 
-#ifndef __atomic_op_release
 #define __atomic_op_release(op, args...)				\
 ({									\
-	smp_mb__before_atomic();					\
+	__atomic_release_fence();					\
 	op##_relaxed(args);						\
 })
-#endif
 
-#ifndef __atomic_op_fence
 #define __atomic_op_fence(op, args...)					\
 ({									\
 	typeof(op##_relaxed(args)) __ret;				\
-	smp_mb__before_atomic();					\
+	__atomic_pre_full_fence();					\
 	__ret = op##_relaxed(args);					\
-	smp_mb__after_atomic();						\
+	__atomic_post_full_fence();					\
 	__ret;								\
 })
-#endif
 
 /* atomic_add_return_relaxed */
 #ifndef atomic_add_return_relaxed
-- 
cgit v1.2.3


From f48a534adbd321362c3199140008c9e2f2a81b3a Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Thu, 28 Jun 2018 07:43:26 -0400
Subject: media: uapi/linux/cec.h: add 5V events

Add two new events to signal when the 5V line goes high or low.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/cec.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h
index 20fe091b7e96..097fcd812471 100644
--- a/include/uapi/linux/cec.h
+++ b/include/uapi/linux/cec.h
@@ -384,6 +384,8 @@ struct cec_log_addrs {
 #define CEC_EVENT_PIN_CEC_HIGH		4
 #define CEC_EVENT_PIN_HPD_LOW		5
 #define CEC_EVENT_PIN_HPD_HIGH		6
+#define CEC_EVENT_PIN_5V_LOW		7
+#define CEC_EVENT_PIN_5V_HIGH		8
 
 #define CEC_EVENT_FL_INITIAL_STATE	(1 << 0)
 #define CEC_EVENT_FL_DROPPED_EVENTS	(1 << 1)
-- 
cgit v1.2.3


From 4786b0d6f3ca6da8db02b0a58cd2b785cb0b80ab Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Thu, 28 Jun 2018 07:43:46 -0400
Subject: media: cec: add support for 5V signal testing

Add support for the new 5V CEC events

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/cec/cec-adap.c | 18 +++++++++++++++++-
 drivers/media/cec/cec-api.c  |  8 ++++++++
 include/media/cec-pin.h      |  4 ++++
 include/media/cec.h          | 12 +++++++++++-
 4 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index b7fad0ec5710..030b2602faf0 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -74,7 +74,7 @@ void cec_queue_event_fh(struct cec_fh *fh,
 			const struct cec_event *new_ev, u64 ts)
 {
 	static const u16 max_events[CEC_NUM_EVENTS] = {
-		1, 1, 800, 800, 8, 8,
+		1, 1, 800, 800, 8, 8, 8, 8
 	};
 	struct cec_event_entry *entry;
 	unsigned int ev_idx = new_ev->event - 1;
@@ -176,6 +176,22 @@ void cec_queue_pin_hpd_event(struct cec_adapter *adap, bool is_high, ktime_t ts)
 }
 EXPORT_SYMBOL_GPL(cec_queue_pin_hpd_event);
 
+/* Notify userspace that the 5V pin changed state at the given time. */
+void cec_queue_pin_5v_event(struct cec_adapter *adap, bool is_high, ktime_t ts)
+{
+	struct cec_event ev = {
+		.event = is_high ? CEC_EVENT_PIN_5V_HIGH :
+				   CEC_EVENT_PIN_5V_LOW,
+	};
+	struct cec_fh *fh;
+
+	mutex_lock(&adap->devnode.lock);
+	list_for_each_entry(fh, &adap->devnode.fhs, list)
+		cec_queue_event_fh(fh, &ev, ktime_to_ns(ts));
+	mutex_unlock(&adap->devnode.lock);
+}
+EXPORT_SYMBOL_GPL(cec_queue_pin_5v_event);
+
 /*
  * Queue a new message for this filehandle.
  *
diff --git a/drivers/media/cec/cec-api.c b/drivers/media/cec/cec-api.c
index 10b67fc40318..b6536bbad530 100644
--- a/drivers/media/cec/cec-api.c
+++ b/drivers/media/cec/cec-api.c
@@ -579,6 +579,14 @@ static int cec_open(struct inode *inode, struct file *filp)
 			cec_queue_event_fh(fh, &ev, 0);
 		}
 	}
+	if (adap->pin && adap->pin->ops->read_5v) {
+		err = adap->pin->ops->read_5v(adap);
+		if (err >= 0) {
+			ev.event = err ? CEC_EVENT_PIN_5V_HIGH :
+					 CEC_EVENT_PIN_5V_LOW;
+			cec_queue_event_fh(fh, &ev, 0);
+		}
+	}
 #endif
 
 	list_add(&fh->list, &devnode->fhs);
diff --git a/include/media/cec-pin.h b/include/media/cec-pin.h
index ed16c6dde0ba..604e79cb6cbf 100644
--- a/include/media/cec-pin.h
+++ b/include/media/cec-pin.h
@@ -25,6 +25,9 @@
  * @read_hpd:	read the HPD pin. Return true if high, false if low or
  *		an error if negative. If NULL or -ENOTTY is returned,
  *		then this is not supported.
+ * @read_5v:	read the 5V pin. Return true if high, false if low or
+ *		an error if negative. If NULL or -ENOTTY is returned,
+ *		then this is not supported.
  *
  * These operations are used by the cec pin framework to manipulate
  * the CEC pin.
@@ -38,6 +41,7 @@ struct cec_pin_ops {
 	void (*free)(struct cec_adapter *adap);
 	void (*status)(struct cec_adapter *adap, struct seq_file *file);
 	int  (*read_hpd)(struct cec_adapter *adap);
+	int  (*read_5v)(struct cec_adapter *adap);
 };
 
 /**
diff --git a/include/media/cec.h b/include/media/cec.h
index 580ab1042898..ff9847f7f99d 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h
@@ -79,7 +79,7 @@ struct cec_event_entry {
 };
 
 #define CEC_NUM_CORE_EVENTS 2
-#define CEC_NUM_EVENTS CEC_EVENT_PIN_HPD_HIGH
+#define CEC_NUM_EVENTS CEC_EVENT_PIN_5V_HIGH
 
 struct cec_fh {
 	struct list_head	list;
@@ -308,6 +308,16 @@ void cec_queue_pin_cec_event(struct cec_adapter *adap, bool is_high,
  */
 void cec_queue_pin_hpd_event(struct cec_adapter *adap, bool is_high, ktime_t ts);
 
+/**
+ * cec_queue_pin_5v_event() - queue a pin event with a given timestamp.
+ *
+ * @adap:	pointer to the cec adapter
+ * @is_high:	when true the 5V pin is high, otherwise it is low
+ * @ts:		the timestamp for this event
+ *
+ */
+void cec_queue_pin_5v_event(struct cec_adapter *adap, bool is_high, ktime_t ts);
+
 /**
  * cec_get_edid_phys_addr() - find and return the physical address
  *
-- 
cgit v1.2.3


From d27fb99f62af7b79c542d161aa5155ed57271ddc Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 23 Jul 2018 22:42:48 +0100
Subject: dma-mapping: relax warning for per-device areas

The reasons why dma_free_attrs() should not be called from IRQ context
are not necessarily obvious and somewhat buried in the development
history, so let's start by documenting the warning itself to help anyone
who does happen to hit it and wonder what the deal is.

However, this check turns out to be slightly over-restrictive for the
way that per-device memory has been spliced into the general API, since
for that case we know that dma_declare_coherent_memory() has created an
appropriate CPU mapping for the entire area and nothing dynamic should
be happening. Given that the usage model for per-device memory is often
more akin to streaming DMA than 'real' coherent DMA (e.g. allocating and
freeing space to copy short-lived packets in and out), it is also
somewhat more reasonable for those operations to happen in IRQ handlers
for such devices.

Therefore, let's move the irqs_disabled() check down past the per-device
area hook, so that that gets a chance to resolve the request before we
reach definite "you're doing it wrong" territory.

Reported-by: Fredrik Noring <noring@nocrew.org>
Tested-by: Fredrik Noring <noring@nocrew.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-mapping.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f9cc309507d9..1db6a6b46d0d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -538,10 +538,17 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!ops);
-	WARN_ON(irqs_disabled());
 
 	if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
 		return;
+	/*
+	 * On non-coherent platforms which implement DMA-coherent buffers via
+	 * non-cacheable remaps, ops->free() may call vunmap(). Thus getting
+	 * this far in IRQ context is a) at risk of a BUG_ON() or trying to
+	 * sleep on some machines, and b) an indication that the driver is
+	 * probably misusing the coherent API anyway.
+	 */
+	WARN_ON(irqs_disabled());
 
 	if (!ops->free || !cpu_addr)
 		return;
-- 
cgit v1.2.3


From 30b914c8d825da7d4c651ade34667cef05e3ee27 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Tue, 27 Feb 2018 07:24:09 -0500
Subject: media: add 'index' to struct media_v2_pad

The v2 pad structure never exposed the pad index, which made it impossible
to call the MEDIA_IOC_SETUP_LINK ioctl, which needs that information.

It is really trivial to just expose this information, so implement this.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/media-device.c |  1 +
 include/uapi/linux/media.h   | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index 47bb2254fbfd..047d38372a27 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -331,6 +331,7 @@ static long media_device_get_topology(struct media_device *mdev, void *arg)
 		kpad.id = pad->graph_obj.id;
 		kpad.entity_id = pad->entity->graph_obj.id;
 		kpad.flags = pad->flags;
+		kpad.index = pad->index;
 
 		if (copy_to_user(upad, &kpad, sizeof(kpad)))
 			ret = -EFAULT;
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 86c7dcc9cba3..f6338bd57929 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -305,11 +305,21 @@ struct media_v2_interface {
 	};
 } __attribute__ ((packed));
 
+/*
+ * Appeared in 4.19.0.
+ *
+ * The media_version argument comes from the media_version field in
+ * struct media_device_info.
+ */
+#define MEDIA_V2_PAD_HAS_INDEX(media_version) \
+	((media_version) >= ((4 << 16) | (19 << 8) | 0))
+
 struct media_v2_pad {
 	__u32 id;
 	__u32 entity_id;
 	__u32 flags;
-	__u32 reserved[5];
+	__u32 index;
+	__u32 reserved[4];
 } __attribute__ ((packed));
 
 struct media_v2_link {
-- 
cgit v1.2.3


From 588f4ee7e6fc5c9a0fb07c7051cdd341949e0feb Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Wed, 28 Feb 2018 05:41:11 -0500
Subject: media: add flags field to struct media_v2_entity

The v2 entity structure never exposed the entity flags, which made it
impossible to detect connector or default entities.

It is really trivial to just expose this information, so implement this.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/media-device.c |  1 +
 include/uapi/linux/media.h   | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index 047d38372a27..14959b19a342 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -266,6 +266,7 @@ static long media_device_get_topology(struct media_device *mdev, void *arg)
 		memset(&kentity, 0, sizeof(kentity));
 		kentity.id = entity->graph_obj.id;
 		kentity.function = entity->function;
+		kentity.flags = entity->flags;
 		strlcpy(kentity.name, entity->name,
 			sizeof(kentity.name));
 
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index f6338bd57929..ebd2cda67833 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -280,11 +280,21 @@ struct media_links_enum {
  * MC next gen API definitions
  */
 
+/*
+ * Appeared in 4.19.0.
+ *
+ * The media_version argument comes from the media_version field in
+ * struct media_device_info.
+ */
+#define MEDIA_V2_ENTITY_HAS_FLAGS(media_version) \
+	((media_version) >= ((4 << 16) | (19 << 8) | 0))
+
 struct media_v2_entity {
 	__u32 id;
 	char name[64];
 	__u32 function;		/* Main function of the entity */
-	__u32 reserved[6];
+	__u32 flags;
+	__u32 reserved[5];
 } __attribute__ ((packed));
 
 /* Should match the specific fields at media_intf_devnode */
-- 
cgit v1.2.3


From d272bc92c4a4fcec4102c011eaa85630bd2d8d38 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Thu, 28 Jun 2018 08:56:02 -0400
Subject: media: rename MEDIA_ENT_F_DTV_DECODER to MEDIA_ENT_F_DV_DECODER

The use of 'DTV' is very confusing since it normally refers to Digital
TV e.g. DVB etc.

Instead use 'DV' (Digital Video), which nicely corresponds to the
DV Timings API used to configure such receivers and transmitters.

We keep an alias to avoid breaking userspace applications.

Since this alias is only available if __KERNEL__ is *not* defined
(i.e. it is only available for userspace, not kernelspace), any
drivers that use it also have to be converted to the new define.
These drivers are adv7604, adv7842 and tda1997x.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/mediactl/media-types.rst | 2 +-
 drivers/media/i2c/adv7604.c                       | 1 +
 drivers/media/i2c/adv7842.c                       | 1 +
 drivers/media/i2c/tda1997x.c                      | 2 +-
 include/uapi/linux/media.h                        | 4 +++-
 5 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/mediactl/media-types.rst b/Documentation/media/uapi/mediactl/media-types.rst
index 96910cf2eaaa..c11b0c7e890b 100644
--- a/Documentation/media/uapi/mediactl/media-types.rst
+++ b/Documentation/media/uapi/mediactl/media-types.rst
@@ -200,7 +200,7 @@ Types and flags used to represent the media graph elements
          MIPI CSI-2, etc.), and outputs them on its source pad to an output
          video bus of another type (eDP, MIPI CSI-2, parallel, etc.).
 
-    *  -  ``MEDIA_ENT_F_DTV_DECODER``
+    *  -  ``MEDIA_ENT_F_DV_DECODER``
        -  Digital video decoder. The basic function of the video decoder is
 	  to accept digital video from a wide variety of sources
 	  and output it in some digital video standard, with appropriate
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index 1a3b2c04d9f9..668be2bca57a 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -3499,6 +3499,7 @@ static int adv76xx_probe(struct i2c_client *client,
 	for (i = 0; i < state->source_pad; ++i)
 		state->pads[i].flags = MEDIA_PAD_FL_SINK;
 	state->pads[state->source_pad].flags = MEDIA_PAD_FL_SOURCE;
+	sd->entity.function = MEDIA_ENT_F_DV_DECODER;
 
 	err = media_entity_pads_init(&sd->entity, state->source_pad + 1,
 				state->pads);
diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c
index fddac32e5051..99d781343fb1 100644
--- a/drivers/media/i2c/adv7842.c
+++ b/drivers/media/i2c/adv7842.c
@@ -3541,6 +3541,7 @@ static int adv7842_probe(struct i2c_client *client,
 	INIT_DELAYED_WORK(&state->delayed_work_enable_hotplug,
 			adv7842_delayed_work_enable_hotplug);
 
+	sd->entity.function = MEDIA_ENT_F_DV_DECODER;
 	state->pad.flags = MEDIA_PAD_FL_SOURCE;
 	err = media_entity_pads_init(&sd->entity, 1, &state->pad);
 	if (err)
diff --git a/drivers/media/i2c/tda1997x.c b/drivers/media/i2c/tda1997x.c
index 039a92c3294a..d114ac5243ec 100644
--- a/drivers/media/i2c/tda1997x.c
+++ b/drivers/media/i2c/tda1997x.c
@@ -2570,7 +2570,7 @@ static int tda1997x_probe(struct i2c_client *client,
 		 id->name, i2c_adapter_id(client->adapter),
 		 client->addr);
 	sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS;
-	sd->entity.function = MEDIA_ENT_F_DTV_DECODER;
+	sd->entity.function = MEDIA_ENT_F_DV_DECODER;
 	sd->entity.ops = &tda1997x_media_ops;
 
 	/* set allowed mbus modes based on chip, bus-type, and bus-width */
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index ebd2cda67833..99f5e0978ebb 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -93,7 +93,7 @@ struct media_device_info {
  * Video decoder functions
  */
 #define MEDIA_ENT_F_ATV_DECODER			(MEDIA_ENT_F_OLD_SUBDEV_BASE + 4)
-#define MEDIA_ENT_F_DTV_DECODER			(MEDIA_ENT_F_BASE + 0x6001)
+#define MEDIA_ENT_F_DV_DECODER			(MEDIA_ENT_F_BASE + 0x6001)
 
 /*
  * Digital TV, analog TV, radio and/or software defined radio tuner functions.
@@ -400,6 +400,8 @@ struct media_v2_topology {
 #define MEDIA_ENT_T_V4L2_SUBDEV_DECODER		MEDIA_ENT_F_ATV_DECODER
 #define MEDIA_ENT_T_V4L2_SUBDEV_TUNER		MEDIA_ENT_F_TUNER
 
+#define MEDIA_ENT_F_DTV_DECODER			MEDIA_ENT_F_DV_DECODER
+
 /*
  * There is still no ALSA support in the media controller. These
  * defines should not have been added and we leave them here only
-- 
cgit v1.2.3


From 7c8362c4b9eb7e4e9cbebc71ab529dc455c21b6b Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 18 Jun 2018 05:07:43 -0400
Subject: media: media.h: add MEDIA_ENT_F_DV_ENCODER

Add a new function for digital video encoders such as HDMI transmitters.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/mediactl/media-types.rst | 7 +++++++
 include/uapi/linux/media.h                        | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/mediactl/media-types.rst b/Documentation/media/uapi/mediactl/media-types.rst
index c11b0c7e890b..e90d4d0a7f8b 100644
--- a/Documentation/media/uapi/mediactl/media-types.rst
+++ b/Documentation/media/uapi/mediactl/media-types.rst
@@ -206,6 +206,13 @@ Types and flags used to represent the media graph elements
 	  and output it in some digital video standard, with appropriate
 	  timing signals.
 
+    *  -  ``MEDIA_ENT_F_DV_ENCODER``
+       -  Digital video encoder. The basic function of the video encoder is
+	  to accept digital video from some digital video standard with
+	  appropriate timing signals (usually a parallel video bus with sync
+	  signals) and output this to a digital video output connector such
+	  as HDMI or DisplayPort.
+
 ..  tabularcolumns:: |p{5.5cm}|p{12.0cm}|
 
 .. _media-entity-flag:
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 99f5e0978ebb..6f594fa238c2 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -90,10 +90,11 @@ struct media_device_info {
 #define MEDIA_ENT_F_LENS			(MEDIA_ENT_F_OLD_SUBDEV_BASE + 3)
 
 /*
- * Video decoder functions
+ * Video decoder/encoder functions
  */
 #define MEDIA_ENT_F_ATV_DECODER			(MEDIA_ENT_F_OLD_SUBDEV_BASE + 4)
 #define MEDIA_ENT_F_DV_DECODER			(MEDIA_ENT_F_BASE + 0x6001)
+#define MEDIA_ENT_F_DV_ENCODER			(MEDIA_ENT_F_BASE + 0x6002)
 
 /*
  * Digital TV, analog TV, radio and/or software defined radio tuner functions.
-- 
cgit v1.2.3


From f2399f7522ac2a255f6502537609a51974be84be Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Thu, 28 Jun 2018 09:03:42 -0400
Subject: media: media.h: reorder video en/decoder functions

Keep the function defines in numerical order: 0x6000 comes after
0x2000, so move it back.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/media.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 6f594fa238c2..76d9bd64c116 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -89,13 +89,6 @@ struct media_device_info {
 #define MEDIA_ENT_F_FLASH			(MEDIA_ENT_F_OLD_SUBDEV_BASE + 2)
 #define MEDIA_ENT_F_LENS			(MEDIA_ENT_F_OLD_SUBDEV_BASE + 3)
 
-/*
- * Video decoder/encoder functions
- */
-#define MEDIA_ENT_F_ATV_DECODER			(MEDIA_ENT_F_OLD_SUBDEV_BASE + 4)
-#define MEDIA_ENT_F_DV_DECODER			(MEDIA_ENT_F_BASE + 0x6001)
-#define MEDIA_ENT_F_DV_ENCODER			(MEDIA_ENT_F_BASE + 0x6002)
-
 /*
  * Digital TV, analog TV, radio and/or software defined radio tuner functions.
  *
@@ -140,6 +133,13 @@ struct media_device_info {
 #define MEDIA_ENT_F_VID_MUX			(MEDIA_ENT_F_BASE + 0x5001)
 #define MEDIA_ENT_F_VID_IF_BRIDGE		(MEDIA_ENT_F_BASE + 0x5002)
 
+/*
+ * Video decoder/encoder functions
+ */
+#define MEDIA_ENT_F_ATV_DECODER			(MEDIA_ENT_F_OLD_SUBDEV_BASE + 4)
+#define MEDIA_ENT_F_DV_DECODER			(MEDIA_ENT_F_BASE + 0x6001)
+#define MEDIA_ENT_F_DV_ENCODER			(MEDIA_ENT_F_BASE + 0x6002)
+
 /* Entity flags */
 #define MEDIA_ENT_FL_DEFAULT			(1 << 0)
 #define MEDIA_ENT_FL_CONNECTOR			(1 << 1)
-- 
cgit v1.2.3


From ea107a183be1d6e69448d7100121c479e24b7c1d Mon Sep 17 00:00:00 2001
From: vkorjani <vikas.korjani@intel.com>
Date: Tue, 10 Apr 2018 10:12:45 -0400
Subject: drm: Add support for pps and compression mode command packet

After enabling DSC we need to send compression mode command packet
and pps data packet, for which 2 new data types are added
07h  Compression Mode Data Type Write , short write, 2 parameters
0Ah  PPS Long Write (word count determines number of bytes)
This patch adds support to send these packets.

Cc: David Airlie <airlied@linux.ie>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-fbdev@vger.kernel.org

Changes in v3:
- None

Signed-off-by: vkorjani <vikas.korjani@intel.com>
[seanpaul removed pps_write_buffer fn, added types to packet_format helpers]
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/drm_mipi_dsi.c | 2 ++
 include/video/mipi_display.h   | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index bc73b7f5b9fc..80b75501f5c6 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -392,6 +392,7 @@ bool mipi_dsi_packet_format_is_short(u8 type)
 	case MIPI_DSI_DCS_SHORT_WRITE:
 	case MIPI_DSI_DCS_SHORT_WRITE_PARAM:
 	case MIPI_DSI_DCS_READ:
+	case MIPI_DSI_DCS_COMPRESSION_MODE:
 	case MIPI_DSI_SET_MAXIMUM_RETURN_PACKET_SIZE:
 		return true;
 	}
@@ -410,6 +411,7 @@ EXPORT_SYMBOL(mipi_dsi_packet_format_is_short);
 bool mipi_dsi_packet_format_is_long(u8 type)
 {
 	switch (type) {
+	case MIPI_DSI_PPS_LONG_WRITE:
 	case MIPI_DSI_NULL_PACKET:
 	case MIPI_DSI_BLANKING_PACKET:
 	case MIPI_DSI_GENERIC_LONG_WRITE:
diff --git a/include/video/mipi_display.h b/include/video/mipi_display.h
index 19aa65a35546..49a53ef8da96 100644
--- a/include/video/mipi_display.h
+++ b/include/video/mipi_display.h
@@ -38,6 +38,9 @@ enum {
 
 	MIPI_DSI_DCS_READ				= 0x06,
 
+	MIPI_DSI_DCS_COMPRESSION_MODE                   = 0x07,
+	MIPI_DSI_PPS_LONG_WRITE                         = 0x0A,
+
 	MIPI_DSI_SET_MAXIMUM_RETURN_PACKET_SIZE		= 0x37,
 
 	MIPI_DSI_END_OF_TRANSMISSION			= 0x08,
-- 
cgit v1.2.3


From 45841a977391f24b9bf713548c588d148a576d22 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 4 Jul 2018 10:13:47 -0400
Subject: media: media.h: add encoder/decoder functions for codecs

Add MEDIA_ENT_F_PROC_VIDEO_EN/DECODER to be used for the encoder
and decoder entities of codec hardware.

[mchehab+samsung@kernel.org: split description on two senteces by adding dots]
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/mediactl/media-types.rst | 11 +++++++++++
 include/uapi/linux/media.h                        |  2 ++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/Documentation/media/uapi/mediactl/media-types.rst b/Documentation/media/uapi/mediactl/media-types.rst
index e90d4d0a7f8b..7b17acc049cf 100644
--- a/Documentation/media/uapi/mediactl/media-types.rst
+++ b/Documentation/media/uapi/mediactl/media-types.rst
@@ -37,6 +37,8 @@ Types and flags used to represent the media graph elements
 .. _MEDIA-ENT-F-PROC-VIDEO-LUT:
 .. _MEDIA-ENT-F-PROC-VIDEO-SCALER:
 .. _MEDIA-ENT-F-PROC-VIDEO-STATISTICS:
+.. _MEDIA-ENT-F-PROC-VIDEO-ENCODER:
+.. _MEDIA-ENT-F-PROC-VIDEO-DECODER:
 .. _MEDIA-ENT-F-VID-MUX:
 .. _MEDIA-ENT-F-VID-IF-BRIDGE:
 .. _MEDIA-ENT-F-DTV-DECODER:
@@ -188,6 +190,15 @@ Types and flags used to represent the media graph elements
 	  received on its sink pad and outputs the statistics data on
 	  its source pad.
 
+    *  -  ``MEDIA_ENT_F_PROC_VIDEO_ENCODER``
+       -  Video (MPEG, HEVC, VPx, etc.) encoder. An entity capable of
+          compressing video frames. Must have one sink pad and one source pad.
+
+    *  -  ``MEDIA_ENT_F_PROC_VIDEO_DECODER``
+       -  Video (MPEG, HEVC, VPx, etc.) decoder. An entity capable of
+          decompressing a compressed video stream into uncompressed video
+	  frames. Must have one sink pad and one source pad.
+
     *  -  ``MEDIA_ENT_F_VID_MUX``
        - Video multiplexer. An entity capable of multiplexing must have at
          least two sink pads and one source pad, and must pass the video
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 76d9bd64c116..82ec9f132a53 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -126,6 +126,8 @@ struct media_device_info {
 #define MEDIA_ENT_F_PROC_VIDEO_LUT		(MEDIA_ENT_F_BASE + 0x4004)
 #define MEDIA_ENT_F_PROC_VIDEO_SCALER		(MEDIA_ENT_F_BASE + 0x4005)
 #define MEDIA_ENT_F_PROC_VIDEO_STATISTICS	(MEDIA_ENT_F_BASE + 0x4006)
+#define MEDIA_ENT_F_PROC_VIDEO_ENCODER		(MEDIA_ENT_F_BASE + 0x4007)
+#define MEDIA_ENT_F_PROC_VIDEO_DECODER		(MEDIA_ENT_F_BASE + 0x4008)
 
 /*
  * Switch and bridge entity functions
-- 
cgit v1.2.3


From 62c3fce04154777e6a3ce3a27f123b645d36dcff Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 4 Jul 2018 10:13:47 -0400
Subject: media: videodev.h: add PIX_FMT_FWHT for use with vicodec

Add a new pixelformat for the vicodec software codec using the
Fast Walsh Hadamard Transform.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/pixfmt-compressed.rst | 7 +++++++
 drivers/media/v4l2-core/v4l2-ioctl.c               | 1 +
 include/uapi/linux/videodev2.h                     | 1 +
 3 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
index abec03937bb3..d382e7a5c38e 100644
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -95,3 +95,10 @@ Compressed Formats
       - ``V4L2_PIX_FMT_HEVC``
       - 'HEVC'
       - HEVC/H.265 video elementary stream.
+    * .. _V4L2-PIX-FMT-FWHT:
+
+      - ``V4L2_PIX_FMT_FWHT``
+      - 'FWHT'
+      - Video elementary stream using a codec based on the Fast Walsh Hadamard
+        Transform. This codec is implemented by the vicodec ('Virtual Codec')
+	driver. See the vicodec-codec.h header for more details.
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 01670567641a..26d9702069fd 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1311,6 +1311,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 		case V4L2_PIX_FMT_VP8:		descr = "VP8"; break;
 		case V4L2_PIX_FMT_VP9:		descr = "VP9"; break;
 		case V4L2_PIX_FMT_HEVC:		descr = "HEVC"; break; /* aka H.265 */
+		case V4L2_PIX_FMT_FWHT:		descr = "FWHT"; break; /* used in vicodec */
 		case V4L2_PIX_FMT_CPIA1:	descr = "GSPCA CPiA YUV"; break;
 		case V4L2_PIX_FMT_WNVA:		descr = "WNVA"; break;
 		case V4L2_PIX_FMT_SN9C10X:	descr = "GSPCA SN9C10X"; break;
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 600877be5c22..3ea8097c2470 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -636,6 +636,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_VP8      v4l2_fourcc('V', 'P', '8', '0') /* VP8 */
 #define V4L2_PIX_FMT_VP9      v4l2_fourcc('V', 'P', '9', '0') /* VP9 */
 #define V4L2_PIX_FMT_HEVC     v4l2_fourcc('H', 'E', 'V', 'C') /* HEVC aka H.265 */
+#define V4L2_PIX_FMT_FWHT     v4l2_fourcc('F', 'W', 'H', 'T') /* Fast Walsh Hadamard Transform (vicodec) */
 
 /*  Vendor-specific formats   */
 #define V4L2_PIX_FMT_CPIA1    v4l2_fourcc('C', 'P', 'I', 'A') /* cpia1 YUV */
-- 
cgit v1.2.3


From ee1228cca15ce097b7badebfdd0fef23a2cca9e1 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 26 Jun 2018 09:26:52 -0400
Subject: media: v4l2-mem2mem: add v4l2_m2m_last_buf()

This can be used to mark the last queued source buffer as the last
buffer.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-mem2mem.c | 18 ++++++++++++++++++
 include/media/v4l2-mem2mem.h           | 29 +++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 4aeedb91594a..d9565ee44578 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -129,6 +129,24 @@ void *v4l2_m2m_next_buf(struct v4l2_m2m_queue_ctx *q_ctx)
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_next_buf);
 
+void *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx)
+{
+	struct v4l2_m2m_buffer *b;
+	unsigned long flags;
+
+	spin_lock_irqsave(&q_ctx->rdy_spinlock, flags);
+
+	if (list_empty(&q_ctx->rdy_queue)) {
+		spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
+		return NULL;
+	}
+
+	b = list_last_entry(&q_ctx->rdy_queue, struct v4l2_m2m_buffer, list);
+	spin_unlock_irqrestore(&q_ctx->rdy_spinlock, flags);
+	return &b->vb;
+}
+EXPORT_SYMBOL_GPL(v4l2_m2m_last_buf);
+
 void *v4l2_m2m_buf_remove(struct v4l2_m2m_queue_ctx *q_ctx)
 {
 	struct v4l2_m2m_buffer *b;
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index d60d3060f762..d655720e16a1 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -449,6 +449,35 @@ static inline void *v4l2_m2m_next_dst_buf(struct v4l2_m2m_ctx *m2m_ctx)
 	return v4l2_m2m_next_buf(&m2m_ctx->cap_q_ctx);
 }
 
+/**
+ * v4l2_m2m_last_buf() - return last buffer from the list of ready buffers
+ *
+ * @q_ctx: pointer to struct @v4l2_m2m_queue_ctx
+ */
+void *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx);
+
+/**
+ * v4l2_m2m_last_src_buf() - return last destination buffer from the list of
+ * ready buffers
+ *
+ * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
+ */
+static inline void *v4l2_m2m_last_src_buf(struct v4l2_m2m_ctx *m2m_ctx)
+{
+	return v4l2_m2m_last_buf(&m2m_ctx->out_q_ctx);
+}
+
+/**
+ * v4l2_m2m_last_dst_buf() - return last destination buffer from the list of
+ * ready buffers
+ *
+ * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
+ */
+static inline void *v4l2_m2m_last_dst_buf(struct v4l2_m2m_ctx *m2m_ctx)
+{
+	return v4l2_m2m_last_buf(&m2m_ctx->cap_q_ctx);
+}
+
 /**
  * v4l2_m2m_for_each_dst_buf() - iterate over a list of destination ready
  * buffers
-- 
cgit v1.2.3


From bf7b70482704bb0785e6b65ee30a6383ab22bbc6 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 5 Jul 2018 05:38:00 -0400
Subject: media: v4l2-ctrls.h: fix v4l2_ctrl field description typos

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/v4l2-ctrls.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 5b445b5654f7..f615ba1b29dd 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -181,10 +181,10 @@ typedef void (*v4l2_ctrl_notify_fnc)(struct v4l2_ctrl *ctrl, void *priv);
  *		not freed when the control is deleted. Should this be needed
  *		then a new internal bitfield can be added to tell the framework
  *		to free this pointer.
- * @p_cur:	The control's current value represented via a union with
+ * @p_cur:	The control's current value represented via a union which
  *		provides a standard way of accessing control types
  *		through a pointer.
- * @p_new:	The control's new value represented via a union with provides
+ * @p_new:	The control's new value represented via a union which provides
  *		a standard way of accessing control types
  *		through a pointer.
  */
-- 
cgit v1.2.3


From 9d0aa601e4cd9c0892f90d36e8488d79b72f4073 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 25 Jul 2018 15:41:54 +0200
Subject: udlfb: fix semaphore value leak

I observed that the performance of the udl fb driver degrades over time.
On a freshly booted machine, it takes 6 seconds to do "ls -la /usr/bin";
after some time of use, the same operation takes 14 seconds.

The reason is that the value of "limit_sem" decays over time.

The udl driver uses a semaphore "limit_set" to specify how many free urbs
are there on dlfb->urbs.list. If the count is zero, the "down" operation
will sleep until some urbs are added to the freelist.

In order to avoid some hypothetical deadlock, the driver will not call
"up" immediately, but it will offload it to a workqueue. The problem is
that if we call "schedule_delayed_work" on the same work item multiple
times, the work item may only be executed once.

This is happening:
* some urb completes
* dlfb_urb_completion adds it to the free list
* dlfb_urb_completion calls schedule_delayed_work to schedule the function
  dlfb_release_urb_work to increase the semaphore count
* as the urb is on the free list, some other task grabs it and submits it
* the submitted urb completes, dlfb_urb_completion is called again
* dlfb_urb_completion calls schedule_delayed_work, but the work is already
  scheduled, so it does nothing
* finally, dlfb_release_urb_work is called, it increases the semaphore
  count by 1, although it should increase it by 2

So, the semaphore count is decreasing over time, and this causes gradual
performance degradation.

Note that in the current kernel, the "up" function may be called from
interrupt and it may race with the "down" function called by another
thread, so we don't have to offload the call of "up" to a workqueue at
all. This patch removes the workqueue code. The patch also changes
"down_interruptible" to "down" in dlfb_free_urb_list, so that we will
clean up the driver properly even if a signal arrives.

With this patch, the performance of udlfb no longer degrades.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
[b.zolnierkie: fix immediatelly -> immediately typo]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/udlfb.c | 27 ++-------------------------
 include/video/udlfb.h       |  1 -
 2 files changed, 2 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index f365d4862015..c1b1bb8fddcd 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -922,14 +922,6 @@ static void dlfb_free(struct kref *kref)
 	kfree(dlfb);
 }
 
-static void dlfb_release_urb_work(struct work_struct *work)
-{
-	struct urb_node *unode = container_of(work, struct urb_node,
-					      release_urb_work.work);
-
-	up(&unode->dlfb->urbs.limit_sem);
-}
-
 static void dlfb_free_framebuffer(struct dlfb_data *dlfb)
 {
 	struct fb_info *info = dlfb->info;
@@ -1789,14 +1781,7 @@ static void dlfb_urb_completion(struct urb *urb)
 	dlfb->urbs.available++;
 	spin_unlock_irqrestore(&dlfb->urbs.lock, flags);
 
-	/*
-	 * When using fb_defio, we deadlock if up() is called
-	 * while another is waiting. So queue to another process.
-	 */
-	if (fb_defio)
-		schedule_delayed_work(&unode->release_urb_work, 0);
-	else
-		up(&dlfb->urbs.limit_sem);
+	up(&dlfb->urbs.limit_sem);
 }
 
 static void dlfb_free_urb_list(struct dlfb_data *dlfb)
@@ -1805,16 +1790,11 @@ static void dlfb_free_urb_list(struct dlfb_data *dlfb)
 	struct list_head *node;
 	struct urb_node *unode;
 	struct urb *urb;
-	int ret;
 	unsigned long flags;
 
 	/* keep waiting and freeing, until we've got 'em all */
 	while (count--) {
-
-		/* Getting interrupted means a leak, but ok at disconnect */
-		ret = down_interruptible(&dlfb->urbs.limit_sem);
-		if (ret)
-			break;
+		down(&dlfb->urbs.limit_sem);
 
 		spin_lock_irqsave(&dlfb->urbs.lock, flags);
 
@@ -1854,9 +1834,6 @@ static int dlfb_alloc_urb_list(struct dlfb_data *dlfb, int count, size_t size)
 			break;
 		unode->dlfb = dlfb;
 
-		INIT_DELAYED_WORK(&unode->release_urb_work,
-			  dlfb_release_urb_work);
-
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
 			kfree(unode);
diff --git a/include/video/udlfb.h b/include/video/udlfb.h
index 0cabe6b09095..8a1948f3c07f 100644
--- a/include/video/udlfb.h
+++ b/include/video/udlfb.h
@@ -20,7 +20,6 @@ struct dloarea {
 struct urb_node {
 	struct list_head entry;
 	struct dlfb_data *dlfb;
-	struct delayed_work release_urb_work;
 	struct urb *urb;
 };
 
-- 
cgit v1.2.3


From 564f1807379298dfdb12ed0d5b25fcb89c238527 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 25 Jul 2018 15:41:55 +0200
Subject: udlfb: don't switch if we are switching to the same videomode

The udlfb driver reprograms the hardware everytime the user switches the
console, that makes quite unusable when working on the console.

This patch makes the driver remember the videomode we are in and avoid
reprogramming the hardware if we switch to the same videomode.

We mask the "activate" field and the "FB_VMODE_SMOOTH_XPAN" flag when
comparing the videomode, because they cause spurious switches when
switching to and from the Xserver.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/udlfb.c | 18 ++++++++++++++++--
 include/video/udlfb.h       |  1 +
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index 864e2917c276..fa63a2e359d6 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -1041,10 +1041,24 @@ static int dlfb_ops_set_par(struct fb_info *info)
 	int result;
 	u16 *pix_framebuffer;
 	int i;
+	struct fb_var_screeninfo fvs;
+
+	/* clear the activate field because it causes spurious miscompares */
+	fvs = info->var;
+	fvs.activate = 0;
+	fvs.vmode &= ~FB_VMODE_SMOOTH_XPAN;
+
+	if (!memcmp(&dlfb->current_mode, &fvs, sizeof(struct fb_var_screeninfo)))
+		return 0;
 
 	result = dlfb_set_video_mode(dlfb, &info->var);
 
-	if ((result == 0) && (dlfb->fb_count == 0)) {
+	if (result)
+		return result;
+
+	dlfb->current_mode = fvs;
+
+	if (dlfb->fb_count == 0) {
 
 		/* paint greenscreen */
 
@@ -1056,7 +1070,7 @@ static int dlfb_ops_set_par(struct fb_info *info)
 				   info->screen_base);
 	}
 
-	return result;
+	return 0;
 }
 
 /* To fonzi the jukebox (e.g. make blanking changes take effect) */
diff --git a/include/video/udlfb.h b/include/video/udlfb.h
index 8a1948f3c07f..bc2e9cf34aa5 100644
--- a/include/video/udlfb.h
+++ b/include/video/udlfb.h
@@ -56,6 +56,7 @@ struct dlfb_data {
 	atomic_t bytes_identical; /* saved effort with backbuffer comparison */
 	atomic_t bytes_sent; /* to usb, after compression including overhead */
 	atomic_t cpu_kcycles_used; /* transpired during pixel processing */
+	struct fb_var_screeninfo current_mode;
 };
 
 #define NR_USB_REQUEST_I2C_SUB_IO 0x02
-- 
cgit v1.2.3


From 2c29cfc3eaf11779176bf41475cfca49bccba11c Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 25 Jul 2018 15:41:55 +0200
Subject: udlfb: make a local copy of fb_ops

The defio subsystem overwrites the method fb_osp->mmap. That method is
stored in module's static data - and that means that if we have multiple
diplaylink adapters, they will over write each other's method.

In order to avoid interference between multiple adapters, we copy the
fb_ops structure to a device-local memory.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/udlfb.c | 3 ++-
 include/video/udlfb.h       | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index fa63a2e359d6..770c1c8cfbd0 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -1665,7 +1665,8 @@ static void dlfb_init_framebuffer_work(struct work_struct *work)
 	dlfb->info = info;
 	info->par = dlfb;
 	info->pseudo_palette = dlfb->pseudo_palette;
-	info->fbops = &dlfb_ops;
+	dlfb->ops = dlfb_ops;
+	info->fbops = &dlfb->ops;
 
 	retval = fb_alloc_cmap(&info->cmap, 256, 0);
 	if (retval < 0) {
diff --git a/include/video/udlfb.h b/include/video/udlfb.h
index bc2e9cf34aa5..b4f43d3ac1af 100644
--- a/include/video/udlfb.h
+++ b/include/video/udlfb.h
@@ -51,6 +51,7 @@ struct dlfb_data {
 	int base8;
 	u32 pseudo_palette[256];
 	int blank_mode; /*one of FB_BLANK_ */
+	struct fb_ops ops;
 	/* blit-only rendering path metrics, exposed through sysfs */
 	atomic_t bytes_rendered; /* raw pixel-bytes driver asked to render */
 	atomic_t bytes_identical; /* saved effort with backbuffer comparison */
-- 
cgit v1.2.3


From bb24153a3f13dd0dbc1f8055ad97fe346d598f66 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 25 Jul 2018 15:41:55 +0200
Subject: udlfb: set optimal write delay

The default delay 5 jiffies is too much when the kernel is compiled with
HZ=100 - it results in jumpy cursor in Xwindow.

In order to find out the optimal delay, I benchmarked the driver on
1280x720x30fps video. I found out that with HZ=1000, 10ms is acceptable,
but with HZ=250 or HZ=300, we need 4ms, so that the video is played
without any frame skips.

This patch changes the delay to this value.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/video/udlfb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/video/udlfb.h b/include/video/udlfb.h
index b4f43d3ac1af..6e1a2e790b1b 100644
--- a/include/video/udlfb.h
+++ b/include/video/udlfb.h
@@ -88,7 +88,7 @@ struct dlfb_data {
 #define MIN_RAW_PIX_BYTES	2
 #define MIN_RAW_CMD_BYTES	(RAW_HEADER_BYTES + MIN_RAW_PIX_BYTES)
 
-#define DL_DEFIO_WRITE_DELAY    5 /* fb_deferred_io.delay in jiffies */
+#define DL_DEFIO_WRITE_DELAY    msecs_to_jiffies(HZ <= 300 ? 4 : 10) /* optimal value for 720p video */
 #define DL_DEFIO_WRITE_DISABLE  (HZ*60) /* "disable" with long delay */
 
 /* remove these once align.h patch is taken into kernel */
-- 
cgit v1.2.3


From 7433914efd584b22bb49d3e1eee001f5d0525ecd Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 25 Jul 2018 15:41:56 +0200
Subject: udlfb: allow reallocating the framebuffer

This patch changes udlfb so that it may reallocate the framebuffer when
setting higher-resolution mode. If we boot the system without monitor
attached, udlfb creates a framebuffer with the size 800x600. This patch
makes it possible to select higher videomode with the fbset command when
a monitor is attached.

Note that there is no reliable way to prevent the system from touching the
old framebuffer, so we must not free it. We add it to the list
dlfb->deferred_free and free it when the driver is unloaded.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
[b.zolnierkie: sparse fixes]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/udlfb.c | 74 ++++++++++++++++++++++++++++++---------------
 include/video/udlfb.h       |  1 +
 2 files changed, 50 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index 862e8027acf6..bdca60f92098 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -73,6 +73,13 @@ static bool fb_defio = 1;  /* Detect mmap writes using page faults */
 static bool shadow = 1; /* Optionally disable shadow framebuffer */
 static int pixel_limit; /* Optionally force a pixel resolution limit */
 
+struct dlfb_deferred_free {
+	struct list_head list;
+	void *mem;
+};
+
+static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info, u32 new_len);
+
 /* dlfb keeps a list of urbs for efficient bulk transfers */
 static void dlfb_urb_completion(struct urb *urb);
 static struct urb *dlfb_get_urb(struct dlfb_data *dlfb);
@@ -927,6 +934,12 @@ static void dlfb_free(struct kref *kref)
 {
 	struct dlfb_data *dlfb = container_of(kref, struct dlfb_data, kref);
 
+	while (!list_empty(&dlfb->deferred_free)) {
+		struct dlfb_deferred_free *d = list_entry(dlfb->deferred_free.next, struct dlfb_deferred_free, list);
+		list_del(&d->list);
+		vfree(d->mem);
+		kfree(d);
+	}
 	vfree(dlfb->backing_buffer);
 	kfree(dlfb->edid);
 	kfree(dlfb);
@@ -1020,10 +1033,6 @@ static int dlfb_ops_check_var(struct fb_var_screeninfo *var,
 	struct fb_videomode mode;
 	struct dlfb_data *dlfb = info->par;
 
-	/* TODO: support dynamically changing framebuffer size */
-	if ((var->xres * var->yres * 2) > info->fix.smem_len)
-		return -EINVAL;
-
 	/* set device-specific elements of var unrelated to mode */
 	dlfb_var_color_format(var);
 
@@ -1042,6 +1051,7 @@ static int dlfb_ops_set_par(struct fb_info *info)
 	u16 *pix_framebuffer;
 	int i;
 	struct fb_var_screeninfo fvs;
+	u32 line_length = info->var.xres * (info->var.bits_per_pixel / 8);
 
 	/* clear the activate field because it causes spurious miscompares */
 	fvs = info->var;
@@ -1051,13 +1061,17 @@ static int dlfb_ops_set_par(struct fb_info *info)
 	if (!memcmp(&dlfb->current_mode, &fvs, sizeof(struct fb_var_screeninfo)))
 		return 0;
 
+	result = dlfb_realloc_framebuffer(dlfb, info, info->var.yres * line_length);
+	if (result)
+		return result;
+
 	result = dlfb_set_video_mode(dlfb, &info->var);
 
 	if (result)
 		return result;
 
 	dlfb->current_mode = fvs;
-	info->fix.line_length = info->var.xres * (info->var.bits_per_pixel / 8);
+	info->fix.line_length = line_length;
 
 	if (dlfb->fb_count == 0) {
 
@@ -1066,11 +1080,11 @@ static int dlfb_ops_set_par(struct fb_info *info)
 		pix_framebuffer = (u16 *) info->screen_base;
 		for (i = 0; i < info->fix.smem_len / 2; i++)
 			pix_framebuffer[i] = 0x37e6;
-
-		dlfb_handle_damage(dlfb, 0, 0, info->var.xres, info->var.yres,
-				   info->screen_base);
 	}
 
+	dlfb_handle_damage(dlfb, 0, 0, info->var.xres, info->var.yres,
+			   info->screen_base);
+
 	return 0;
 }
 
@@ -1146,21 +1160,29 @@ static struct fb_ops dlfb_ops = {
 };
 
 
+static void dlfb_deferred_vfree(struct dlfb_data *dlfb, void *mem)
+{
+	struct dlfb_deferred_free *d = kmalloc(sizeof(struct dlfb_deferred_free), GFP_KERNEL);
+	if (!d)
+		return;
+	d->mem = mem;
+	list_add(&d->list, &dlfb->deferred_free);
+}
+
 /*
  * Assumes &info->lock held by caller
  * Assumes no active clients have framebuffer open
  */
-static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info)
+static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info, u32 new_len)
 {
-	int old_len = info->fix.smem_len;
-	int new_len;
-	unsigned char *old_fb = info->screen_base;
+	u32 old_len = info->fix.smem_len;
+	const void *old_fb = (const void __force *)info->screen_base;
 	unsigned char *new_fb;
 	unsigned char *new_back = NULL;
 
-	new_len = info->fix.line_length * info->var.yres;
+	new_len = PAGE_ALIGN(new_len);
 
-	if (PAGE_ALIGN(new_len) > old_len) {
+	if (new_len > old_len) {
 		/*
 		 * Alloc system memory for virtual framebuffer
 		 */
@@ -1169,14 +1191,15 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info
 			dev_err(info->dev, "Virtual framebuffer alloc failed\n");
 			return -ENOMEM;
 		}
+		memset(new_fb, 0xff, new_len);
 
 		if (info->screen_base) {
 			memcpy(new_fb, old_fb, old_len);
-			vfree(info->screen_base);
+			dlfb_deferred_vfree(dlfb, (void __force *)info->screen_base);
 		}
 
-		info->screen_base = new_fb;
-		info->fix.smem_len = PAGE_ALIGN(new_len);
+		info->screen_base = (char __iomem *)new_fb;
+		info->fix.smem_len = new_len;
 		info->fix.smem_start = (unsigned long) new_fb;
 		info->flags = udlfb_info_flags;
 
@@ -1192,7 +1215,7 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info
 			dev_info(info->dev,
 				 "No shadow/backing buffer allocated\n");
 		else {
-			vfree(dlfb->backing_buffer);
+			dlfb_deferred_vfree(dlfb, dlfb->backing_buffer);
 			dlfb->backing_buffer = new_back;
 		}
 	}
@@ -1344,11 +1367,6 @@ static int dlfb_setup_modes(struct dlfb_data *dlfb,
 		 * with mode size info, we can now alloc our framebuffer.
 		 */
 		memcpy(&info->fix, &dlfb_fix, sizeof(dlfb_fix));
-		info->fix.line_length = info->var.xres *
-			(info->var.bits_per_pixel / 8);
-
-		result = dlfb_realloc_framebuffer(dlfb, info);
-
 	} else
 		result = -EINVAL;
 
@@ -1436,7 +1454,10 @@ static ssize_t edid_store(
 	if (!dlfb->edid || memcmp(src, dlfb->edid, src_size))
 		return -EINVAL;
 
-	dlfb_ops_set_par(fb_info);
+	ret = dlfb_ops_set_par(fb_info);
+	if (ret)
+		return ret;
+
 	return src_size;
 }
 
@@ -1596,6 +1617,7 @@ static int dlfb_usb_probe(struct usb_interface *intf,
 	}
 
 	kref_init(&dlfb->kref); /* matching kref_put in usb .disconnect fn */
+	INIT_LIST_HEAD(&dlfb->deferred_free);
 
 	dlfb->udev = usbdev;
 	usb_set_intfdata(intf, dlfb);
@@ -1693,7 +1715,9 @@ static void dlfb_init_framebuffer_work(struct work_struct *work)
 	dlfb_select_std_channel(dlfb);
 
 	dlfb_ops_check_var(&info->var, info);
-	dlfb_ops_set_par(info);
+	retval = dlfb_ops_set_par(info);
+	if (retval)
+		goto error;
 
 	retval = register_framebuffer(info);
 	if (retval < 0) {
diff --git a/include/video/udlfb.h b/include/video/udlfb.h
index 6e1a2e790b1b..3abd327bada6 100644
--- a/include/video/udlfb.h
+++ b/include/video/udlfb.h
@@ -58,6 +58,7 @@ struct dlfb_data {
 	atomic_t bytes_sent; /* to usb, after compression including overhead */
 	atomic_t cpu_kcycles_used; /* transpired during pixel processing */
 	struct fb_var_screeninfo current_mode;
+	struct list_head deferred_free;
 };
 
 #define NR_USB_REQUEST_I2C_SUB_IO 0x02
-- 
cgit v1.2.3


From 73c8d8945505acdcbae137c2e00a1232e0be709f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 14 Jul 2018 01:28:15 +0900
Subject: ring_buffer: tracing: Inherit the tracing setting to next ring buffer

Maintain the tracing on/off setting of the ring_buffer when switching
to the trace buffer snapshot.

Taking a snapshot is done by swapping the backup ring buffer
(max_tr_buffer). But since the tracing on/off setting is defined
by the ring buffer, when swapping it, the tracing on/off setting
can also be changed. This causes a strange result like below:

  /sys/kernel/debug/tracing # cat tracing_on
  1
  /sys/kernel/debug/tracing # echo 0 > tracing_on
  /sys/kernel/debug/tracing # cat tracing_on
  0
  /sys/kernel/debug/tracing # echo 1 > snapshot
  /sys/kernel/debug/tracing # cat tracing_on
  1
  /sys/kernel/debug/tracing # echo 1 > snapshot
  /sys/kernel/debug/tracing # cat tracing_on
  0

We don't touch tracing_on, but snapshot changes tracing_on
setting each time. This is an anomaly, because user doesn't know
that each "ring_buffer" stores its own tracing-enable state and
the snapshot is done by swapping ring buffers.

Link: http://lkml.kernel.org/r/153149929558.11274.11730609978254724394.stgit@devbox

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Hiraku Toyooka <hiraku.toyooka@cybertrust.co.jp>
Cc: stable@vger.kernel.org
Fixes: debdd57f5145 ("tracing: Make a snapshot feature available from userspace")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
[ Updated commit log and comment in the code ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  1 +
 kernel/trace/ring_buffer.c  | 16 ++++++++++++++++
 kernel/trace/trace.c        |  6 ++++++
 3 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index b72ebdff0b77..003d09ab308d 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -165,6 +165,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
 int ring_buffer_record_is_on(struct ring_buffer *buffer);
+int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6a46af21765c..0b0b688ea166 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3226,6 +3226,22 @@ int ring_buffer_record_is_on(struct ring_buffer *buffer)
 	return !atomic_read(&buffer->record_disabled);
 }
 
+/**
+ * ring_buffer_record_is_set_on - return true if the ring buffer is set writable
+ * @buffer: The ring buffer to see if write is set enabled
+ *
+ * Returns true if the ring buffer is set writable by ring_buffer_record_on().
+ * Note that this does NOT mean it is in a writable state.
+ *
+ * It may return true when the ring buffer has been disabled by
+ * ring_buffer_record_disable(), as that is a temporary disabling of
+ * the ring buffer.
+ */
+int ring_buffer_record_is_set_on(struct ring_buffer *buffer)
+{
+	return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
+}
+
 /**
  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
  * @buffer: The ring buffer to stop writes to.
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 87cf25171fb8..823687997b01 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1373,6 +1373,12 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	arch_spin_lock(&tr->max_lock);
 
+	/* Inherit the recordable setting from trace_buffer */
+	if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
+		ring_buffer_record_on(tr->max_buffer.buffer);
+	else
+		ring_buffer_record_off(tr->max_buffer.buffer);
+
 	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
 
 	__update_max_tr(tr, tsk, cpu);
-- 
cgit v1.2.3


From cdc50176597cb44ce25eb7331c450058775b8d2a Mon Sep 17 00:00:00 2001
From: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Date: Fri, 20 Jul 2018 17:51:05 +0530
Subject: drm/scheduler: modify API to avoid redundancy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

entity has a scheduler field and we don't need the sched argument
in any of the functions where entity is provided.

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c   | 13 +++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   |  3 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  4 ++--
 drivers/gpu/drm/etnaviv/etnaviv_drv.c     |  3 +--
 drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  4 ++--
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 20 +++++++++++---------
 drivers/gpu/drm/v3d/v3d_drv.c             |  4 +---
 drivers/gpu/drm/v3d/v3d_gem.c             |  2 --
 include/drm/gpu_scheduler.h               | 10 +++-------
 13 files changed, 30 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c5d81d6a90e0..4d4575b3bba7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1232,7 +1232,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job = p->job;
 	p->job = NULL;
 
-	r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp);
+	r = drm_sched_job_init(&job->base, entity, p->filp);
 	if (r) {
 		amdgpu_job_free(job);
 		amdgpu_mn_unlock(p->mn);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 83e3b320a793..df6965761046 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -104,8 +104,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 failed:
 	for (j = 0; j < i; j++)
-		drm_sched_entity_destroy(&adev->rings[j]->sched,
-				      &ctx->rings[j].entity);
+		drm_sched_entity_destroy(&ctx->rings[j].entity);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 	return r;
@@ -178,8 +177,7 @@ static void amdgpu_ctx_do_release(struct kref *ref)
 		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
 			continue;
 
-		drm_sched_entity_destroy(&ctx->adev->rings[i]->sched,
-			&ctx->rings[i].entity);
+		drm_sched_entity_destroy(&ctx->rings[i].entity);
 	}
 
 	amdgpu_ctx_fini(ref);
@@ -466,8 +464,8 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
 				continue;
 
-			max_wait = drm_sched_entity_flush(&ctx->adev->rings[i]->sched,
-					  &ctx->rings[i].entity, max_wait);
+			max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
+							  max_wait);
 		}
 	}
 	mutex_unlock(&mgr->lock);
@@ -492,8 +490,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 				continue;
 
 			if (kref_read(&ctx->refcount) == 1)
-				drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
-					&ctx->rings[i].entity);
+				drm_sched_entity_fini(&ctx->rings[i].entity);
 			else
 				DRM_ERROR("ctx %p is still alive\n", ctx);
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 5a2c26a85984..631481a730e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -133,7 +133,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 	if (!f)
 		return -EINVAL;
 
-	r = drm_sched_job_init(&job->base, entity->sched, entity, owner);
+	r = drm_sched_job_init(&job->base, entity, owner);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 6039f8e21358..8c4358e36c87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1925,8 +1925,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 			return;
 		}
 	} else {
-		drm_sched_entity_destroy(adev->mman.entity.sched,
-					 &adev->mman.entity);
+		drm_sched_entity_destroy(&adev->mman.entity);
 		dma_fence_put(man->move);
 		man->move = NULL;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e1e4810b9d9e..fca86d71fafc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -305,8 +305,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
 	int i, j;
 
-	drm_sched_entity_destroy(&adev->uvd.inst->ring.sched,
-				 &adev->uvd.entity);
+	drm_sched_entity_destroy(&adev->uvd.entity);
 
 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
 		kfree(adev->uvd.inst[j].saved_bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 86182c966ed6..b6ab4f5350c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -221,7 +221,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	if (adev->vce.vcpu_bo == NULL)
 		return 0;
 
-	drm_sched_entity_destroy(&adev->vce.ring[0].sched, &adev->vce.entity);
+	drm_sched_entity_destroy(&adev->vce.entity);
 
 	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
 		(void **)&adev->vce.cpu_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 098dd1ba751a..74b4a28a41d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2642,7 +2642,7 @@ error_free_root:
 	vm->root.base.bo = NULL;
 
 error_free_sched_entity:
-	drm_sched_entity_destroy(&ring->sched, &vm->entity);
+	drm_sched_entity_destroy(&vm->entity);
 
 	return r;
 }
@@ -2779,7 +2779,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 	}
 
-	drm_sched_entity_destroy(vm->entity.sched, &vm->entity);
+	drm_sched_entity_destroy(&vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
 		dev_err(adev->dev, "still active bo inside vm\n");
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 36414ba56b22..207532c05eb8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -78,8 +78,7 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
 				gpu->lastctx = NULL;
 			mutex_unlock(&gpu->lock);
 
-			drm_sched_entity_destroy(&gpu->sched,
-						&ctx->sched_entity[i]);
+			drm_sched_entity_destroy(&ctx->sched_entity[i]);
 		}
 	}
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index a74eb57af15b..590e44b0d963 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -118,8 +118,8 @@ int etnaviv_sched_push_job(struct drm_sched_entity *sched_entity,
 {
 	int ret;
 
-	ret = drm_sched_job_init(&submit->sched_job, &submit->gpu->sched,
-				 sched_entity, submit->cmdbuf.ctx);
+	ret = drm_sched_job_init(&submit->sched_job, sched_entity,
+				 submit->cmdbuf.ctx);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index dac71e3b4514..a3b55c542025 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -273,11 +273,12 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
  *
  * Returns the remaining time in jiffies left from the input timeout
  */
-long drm_sched_entity_flush(struct drm_gpu_scheduler *sched,
-			   struct drm_sched_entity *entity, long timeout)
+long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 {
+	struct drm_gpu_scheduler *sched;
 	long ret = timeout;
 
+	sched = entity->sched;
 	if (!drm_sched_entity_is_initialized(sched, entity))
 		return ret;
 	/**
@@ -312,10 +313,11 @@ EXPORT_SYMBOL(drm_sched_entity_flush);
  * entity and signals all jobs with an error code if the process was killed.
  *
  */
-void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
-			   struct drm_sched_entity *entity)
+void drm_sched_entity_fini(struct drm_sched_entity *entity)
 {
+	struct drm_gpu_scheduler *sched;
 
+	sched = entity->sched;
 	drm_sched_entity_set_rq(entity, NULL);
 
 	/* Consumption of existing IBs wasn't completed. Forcefully
@@ -373,11 +375,10 @@ EXPORT_SYMBOL(drm_sched_entity_fini);
  *
  * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup()
  */
-void drm_sched_entity_destroy(struct drm_gpu_scheduler *sched,
-				struct drm_sched_entity *entity)
+void drm_sched_entity_destroy(struct drm_sched_entity *entity)
 {
-	drm_sched_entity_flush(sched, entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY);
-	drm_sched_entity_fini(sched, entity);
+	drm_sched_entity_flush(entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY);
+	drm_sched_entity_fini(entity);
 }
 EXPORT_SYMBOL(drm_sched_entity_destroy);
 
@@ -740,10 +741,11 @@ EXPORT_SYMBOL(drm_sched_job_recovery);
  * Returns 0 for success, negative error code otherwise.
  */
 int drm_sched_job_init(struct drm_sched_job *job,
-		       struct drm_gpu_scheduler *sched,
 		       struct drm_sched_entity *entity,
 		       void *owner)
 {
+	struct drm_gpu_scheduler *sched = entity->sched;
+
 	job->sched = sched;
 	job->entity = entity;
 	job->s_priority = entity->rq - sched->sched_rq;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 1dceba2b42fd..2a85fa68ffea 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -145,13 +145,11 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 static void
 v3d_postclose(struct drm_device *dev, struct drm_file *file)
 {
-	struct v3d_dev *v3d = to_v3d_dev(dev);
 	struct v3d_file_priv *v3d_priv = file->driver_priv;
 	enum v3d_queue q;
 
 	for (q = 0; q < V3D_MAX_QUEUES; q++) {
-		drm_sched_entity_destroy(&v3d->queue[q].sched,
-				      &v3d_priv->sched_entity[q]);
+		drm_sched_entity_destroy(&v3d_priv->sched_entity[q]);
 	}
 
 	kfree(v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index e1fcbb4cd0ae..5ce24098a5fd 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -553,7 +553,6 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&v3d->sched_lock);
 	if (exec->bin.start != exec->bin.end) {
 		ret = drm_sched_job_init(&exec->bin.base,
-					 &v3d->queue[V3D_BIN].sched,
 					 &v3d_priv->sched_entity[V3D_BIN],
 					 v3d_priv);
 		if (ret)
@@ -568,7 +567,6 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	}
 
 	ret = drm_sched_job_init(&exec->render.base,
-				 &v3d->queue[V3D_RENDER].sched,
 				 &v3d_priv->sched_entity[V3D_RENDER],
 				 v3d_priv);
 	if (ret)
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 2205e89722f6..728346abcc81 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -286,12 +286,9 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 			  struct drm_sched_rq **rq_list,
 			  unsigned int num_rq_list,
 			  atomic_t *guilty);
-long drm_sched_entity_flush(struct drm_gpu_scheduler *sched,
-			   struct drm_sched_entity *entity, long timeout);
-void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
-			   struct drm_sched_entity *entity);
-void drm_sched_entity_destroy(struct drm_gpu_scheduler *sched,
-			   struct drm_sched_entity *entity);
+long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout);
+void drm_sched_entity_fini(struct drm_sched_entity *entity);
+void drm_sched_entity_destroy(struct drm_sched_entity *entity);
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 			       struct drm_sched_entity *entity);
 void drm_sched_entity_set_rq(struct drm_sched_entity *entity,
@@ -302,7 +299,6 @@ struct drm_sched_fence *drm_sched_fence_create(
 void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
 void drm_sched_fence_finished(struct drm_sched_fence *fence);
 int drm_sched_job_init(struct drm_sched_job *job,
-		       struct drm_gpu_scheduler *sched,
 		       struct drm_sched_entity *entity,
 		       void *owner);
 void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched,
-- 
cgit v1.2.3


From 068c330419ffb3422a43cb7d34351f1ef033950f Mon Sep 17 00:00:00 2001
From: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Date: Fri, 20 Jul 2018 17:51:06 +0530
Subject: drm/scheduler: remove sched field from the entity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The scheduler of the entity is decided by the run queue on which
it is queued. This patch avoids us the effort required to maintain
a sync between rq and sched field when we start shifting entites
among different rqs.

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  6 +++---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 19 +++++++++----------
 drivers/gpu/drm/scheduler/sched_fence.c   |  2 +-
 include/drm/gpu_scheduler.h               |  2 --
 6 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4d4575b3bba7..178d9ce4eba1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1262,7 +1262,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);
 
-	ring = to_amdgpu_ring(entity->sched);
+	ring = to_amdgpu_ring(entity->rq->sched);
 	amdgpu_ring_priority_get(ring, priority);
 
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 631481a730e0..391e2f7c03aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -143,7 +143,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);
 
-	ring = to_amdgpu_ring(entity->sched);
+	ring = to_amdgpu_ring(entity->rq->sched);
 	amdgpu_ring_priority_get(ring, priority);
 
 	return 0;
@@ -167,7 +167,7 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
 static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
 					       struct drm_sched_entity *s_entity)
 {
-	struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->sched);
+	struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
 	struct amdgpu_vm *vm = job->vm;
 	struct dma_fence *fence;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 74b4a28a41d6..5d7d7900ccab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -387,7 +387,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		ats_entries = 0;
 	}
 
-	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
 
 	r = reservation_object_reserve_shared(bo->tbo.resv);
 	if (r)
@@ -1113,7 +1113,7 @@ restart:
 		struct amdgpu_ring *ring;
 		struct dma_fence *fence;
 
-		ring = container_of(vm->entity.sched, struct amdgpu_ring,
+		ring = container_of(vm->entity.rq->sched, struct amdgpu_ring,
 				    sched);
 
 		amdgpu_ring_pad_ib(ring, params.ib);
@@ -1403,7 +1403,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 					   addr, flags);
 	}
 
-	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
 
 	nptes = last - start + 1;
 
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index a3b55c542025..3f2fc5e8242a 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -185,7 +185,6 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 	memset(entity, 0, sizeof(struct drm_sched_entity));
 	INIT_LIST_HEAD(&entity->list);
 	entity->rq = rq_list[0];
-	entity->sched = rq_list[0]->sched;
 	entity->guilty = guilty;
 	entity->last_scheduled = NULL;
 
@@ -210,8 +209,8 @@ EXPORT_SYMBOL(drm_sched_entity_init);
 static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched,
 					    struct drm_sched_entity *entity)
 {
-	return entity->sched == sched &&
-		entity->rq != NULL;
+	return entity->rq != NULL &&
+		entity->rq->sched == sched;
 }
 
 /**
@@ -278,7 +277,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 	struct drm_gpu_scheduler *sched;
 	long ret = timeout;
 
-	sched = entity->sched;
+	sched = entity->rq->sched;
 	if (!drm_sched_entity_is_initialized(sched, entity))
 		return ret;
 	/**
@@ -317,7 +316,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
 {
 	struct drm_gpu_scheduler *sched;
 
-	sched = entity->sched;
+	sched = entity->rq->sched;
 	drm_sched_entity_set_rq(entity, NULL);
 
 	/* Consumption of existing IBs wasn't completed. Forcefully
@@ -388,7 +387,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb
 		container_of(cb, struct drm_sched_entity, cb);
 	entity->dependency = NULL;
 	dma_fence_put(f);
-	drm_sched_wakeup(entity->sched);
+	drm_sched_wakeup(entity->rq->sched);
 }
 
 static void drm_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb *cb)
@@ -438,7 +437,7 @@ EXPORT_SYMBOL(drm_sched_entity_set_rq);
 bool drm_sched_dependency_optimized(struct dma_fence* fence,
 				    struct drm_sched_entity *entity)
 {
-	struct drm_gpu_scheduler *sched = entity->sched;
+	struct drm_gpu_scheduler *sched = entity->rq->sched;
 	struct drm_sched_fence *s_fence;
 
 	if (!fence || dma_fence_is_signaled(fence))
@@ -455,7 +454,7 @@ EXPORT_SYMBOL(drm_sched_dependency_optimized);
 
 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
 {
-	struct drm_gpu_scheduler *sched = entity->sched;
+	struct drm_gpu_scheduler *sched = entity->rq->sched;
 	struct dma_fence * fence = entity->dependency;
 	struct drm_sched_fence *s_fence;
 
@@ -500,7 +499,7 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
 static struct drm_sched_job *
 drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 {
-	struct drm_gpu_scheduler *sched = entity->sched;
+	struct drm_gpu_scheduler *sched = entity->rq->sched;
 	struct drm_sched_job *sched_job = to_drm_sched_job(
 						spsc_queue_peek(&entity->job_queue));
 
@@ -744,7 +743,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
 		       struct drm_sched_entity *entity,
 		       void *owner)
 {
-	struct drm_gpu_scheduler *sched = entity->sched;
+	struct drm_gpu_scheduler *sched = entity->rq->sched;
 
 	job->sched = sched;
 	job->entity = entity;
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
index 45d9c3affbea..d8d2dff9ea2f 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -161,7 +161,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity,
 		return NULL;
 
 	fence->owner = owner;
-	fence->sched = entity->sched;
+	fence->sched = entity->rq->sched;
 	spin_lock_init(&fence->lock);
 
 	seq = atomic_inc_return(&entity->fence_seq);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 728346abcc81..091b9afcd184 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -52,7 +52,6 @@ enum drm_sched_priority {
  *        runqueue.
  * @rq: runqueue to which this entity belongs.
  * @rq_lock: lock to modify the runqueue to which this entity belongs.
- * @sched: the scheduler instance to which this entity is enqueued.
  * @job_queue: the list of jobs of this entity.
  * @fence_seq: a linearly increasing seqno incremented with each
  *             new &drm_sched_fence which is part of the entity.
@@ -76,7 +75,6 @@ struct drm_sched_entity {
 	struct list_head		list;
 	struct drm_sched_rq		*rq;
 	spinlock_t			rq_lock;
-	struct drm_gpu_scheduler	*sched;
 
 	struct spsc_queue		job_queue;
 
-- 
cgit v1.2.3


From 1250c3048cf1632f5dbb99a0242410baff67955d Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 10 Jul 2018 20:55:14 -0600
Subject: IB/uverbs: Handle IDR and FD types without truncation

Our ABI for write() uses a s32 for FDs and a u32 for IDRs, but internally
we ended up implicitly casting these ABI values into an 'int'. For ioctl()
we use a s64 for FDs and a u64 for IDRs, again casting to an int.

The various casts to int are all missing range checks which can cause
userspace values that should be considered invalid to be accepted.

Fix this by making the generic lookup routine accept a s64, which does not
truncate the write API's u32/s32 or the ioctl API's s64. Then push the
detailed range checking down to the actual type implementations to be
shared by both interfaces.

Finally, change the copy of the uobj->id to sign extend into a s64, so eg,
if we ever wish to return a negative value for a FD it is carried
properly.

This ensures that userspace values are never weirdly interpreted due to
the various trunctations and everything that is really out of range gets
an EINVAL.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c      | 22 ++++++++++++------
 drivers/infiniband/core/rdma_core.h      |  2 +-
 drivers/infiniband/core/uverbs_cmd.c     |  8 ++++---
 drivers/infiniband/core/uverbs_ioctl.c   | 16 ++++++++------
 include/rdma/uverbs_std_types.h          | 38 ++++++++++++++++++--------------
 include/rdma/uverbs_types.h              |  4 ++--
 include/uapi/rdma/rdma_user_ioctl_cmds.h |  7 +++++-
 7 files changed, 59 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index aed7cc2a9e86..c63583dbc6b9 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -133,7 +133,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
  * returns success_res on success (negative errno on failure). For use by
  * callers that do not need the uobj.
  */
-int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
+int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
 			   struct ib_uverbs_file *ufile, int success_res)
 {
 	struct ib_uobject *uobj;
@@ -212,13 +212,17 @@ static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
 static struct ib_uobject *
 lookup_get_idr_uobject(const struct uverbs_obj_type *type,
-		       struct ib_uverbs_file *ufile, int id, bool exclusive)
+		       struct ib_uverbs_file *ufile, s64 id, bool exclusive)
 {
 	struct ib_uobject *uobj;
+	unsigned long idrno = id;
+
+	if (id < 0 || id > ULONG_MAX)
+		return ERR_PTR(-EINVAL);
 
 	rcu_read_lock();
 	/* object won't be released as we're protected in rcu */
-	uobj = idr_find(&ufile->idr, id);
+	uobj = idr_find(&ufile->idr, idrno);
 	if (!uobj) {
 		uobj = ERR_PTR(-ENOENT);
 		goto free;
@@ -240,17 +244,21 @@ free:
 
 static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
 						struct ib_uverbs_file *ufile,
-						int id, bool exclusive)
+						s64 id, bool exclusive)
 {
 	struct file *f;
 	struct ib_uobject *uobject;
+	int fdno = id;
 	const struct uverbs_obj_fd_type *fd_type =
 		container_of(type, struct uverbs_obj_fd_type, type);
 
+	if (fdno != id)
+		return ERR_PTR(-EINVAL);
+
 	if (exclusive)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	f = fget(id);
+	f = fget(fdno);
 	if (!f)
 		return ERR_PTR(-EBADF);
 
@@ -270,7 +278,7 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty
 }
 
 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
-					   struct ib_uverbs_file *ufile, int id,
+					   struct ib_uverbs_file *ufile, s64 id,
 					   bool exclusive)
 {
 	struct ib_uobject *uobj;
@@ -725,7 +733,7 @@ EXPORT_SYMBOL(uverbs_fd_class);
 struct ib_uobject *
 uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
 			     struct ib_uverbs_file *ufile,
-			     enum uverbs_obj_access access, int id)
+			     enum uverbs_obj_access access, s64 id)
 {
 	switch (access) {
 	case UVERBS_ACCESS_READ:
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 1bba60e960c1..db2339330f6f 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -86,7 +86,7 @@ void uverbs_close_fd(struct file *f);
 struct ib_uobject *
 uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
 			     struct ib_uverbs_file *ufile,
-			     enum uverbs_obj_access access, int id);
+			     enum uverbs_obj_access access, s64 id);
 
 /*
  * Note that certain finalize stages could return a status:
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 987ee38ab4b3..409fd46a2a99 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -48,10 +48,10 @@
 #include "core_priv.h"
 
 static struct ib_uverbs_completion_event_file *
-ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile)
+_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
 {
-	struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
-						fd, ufile);
+	struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
+					       fd, ufile);
 
 	if (IS_ERR(uobj))
 		return (void *)uobj;
@@ -62,6 +62,8 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile)
 	return container_of(uobj, struct ib_uverbs_completion_event_file,
 			    uobj);
 }
+#define ib_uverbs_lookup_comp_file(_fd, _ufile)                                \
+	_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
 
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 			      struct ib_device *ib_dev,
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index d3bf82cfaa2b..26ddc5cadcdb 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -136,15 +136,11 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		break;
 
 	case UVERBS_ATTR_TYPE_IDR:
-		if (uattr->data >> 32)
-			return -EINVAL;
-	/* fall through */
 	case UVERBS_ATTR_TYPE_FD:
 		if (uattr->attr_data.reserved)
 			return -EINVAL;
 
-		if (uattr->len != 0 || !ufile->ucontext ||
-		    uattr->data > INT_MAX)
+		if (uattr->len != 0 || !ufile->ucontext)
 			return -EINVAL;
 
 		o_attr = &e->obj_attr;
@@ -152,17 +148,23 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		if (!object)
 			return -EINVAL;
 
+		/*
+		 * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
+		 * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64
+		 * here without caring about truncation as we know that the
+		 * IDR implementation today rejects negative IDs
+		 */
 		o_attr->uobject = uverbs_get_uobject_from_file(
 					object->type_attrs,
 					ufile,
 					spec->u.obj.access,
-					(int)uattr->data);
+					uattr->data_s64);
 
 		if (IS_ERR(o_attr->uobject))
 			return PTR_ERR(o_attr->uobject);
 
 		if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
-			u64 id = o_attr->uobject->id;
+			s64 id = o_attr->uobject->id;
 
 			/* Copy the allocated id to the user-space */
 			if (put_user(id, &e->uattr->data)) {
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 3e3f108f0912..4f32eab8b7a4 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -46,39 +46,43 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
 }
 #endif
 
-static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
-					    bool write,
-					    struct ib_uverbs_file *ufile,
-					    int id)
-{
-	return rdma_lookup_get_uobject(type, ufile, id, write);
-}
+/* Returns _id, or causes a compile error if _id is not a u32.
+ *
+ * The uobj APIs should only be used with the write based uAPI to access
+ * object IDs. The write API must use a u32 for the object handle, which is
+ * checked by this macro.
+ */
+#define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
 
 #define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs
 
 #define uobj_get_read(_type, _id, _ufile)                                      \
-	__uobj_get(uobj_get_type(_type), false, _ufile, _id)
+	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
+				_uobj_check_id(_id), false)
 
-static inline void *_uobj_get_obj_read(const struct uverbs_obj_type *type,
-				       int id, struct ib_uverbs_file *ufile)
-{
-	struct ib_uobject *uobj = __uobj_get(type, false, ufile, id);
+#define ufd_get_read(_type, _fdnum, _ufile)                                    \
+	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
+				(_fdnum)*typecheck(s32, _fdnum), false)
 
+static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
+{
 	if (IS_ERR(uobj))
 		return NULL;
 	return uobj->object;
 }
 #define uobj_get_obj_read(_object, _type, _id, _ufile)                         \
-	((struct ib_##_object *)_uobj_get_obj_read(uobj_get_type(_type), _id,  \
-						   _ufile))
+	((struct ib_##_object *)_uobj_get_obj_read(                            \
+		uobj_get_read(_type, _id, _ufile)))
 
 #define uobj_get_write(_type, _id, _ufile)                                     \
-	__uobj_get(uobj_get_type(_type), true, _ufile, _id)
+	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
+				_uobj_check_id(_id), true)
 
-int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
+int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
 			   struct ib_uverbs_file *ufile, int success_res);
 #define uobj_perform_destroy(_type, _id, _ufile, _success_res)                 \
-	__uobj_perform_destroy(uobj_get_type(_type), _id, _ufile, _success_res)
+	__uobj_perform_destroy(uobj_get_type(_type), _uobj_check_id(_id),      \
+			       _ufile, _success_res)
 
 static inline void uobj_put_read(struct ib_uobject *uobj)
 {
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index e2fc9db466d3..2f50cc6def3c 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -77,7 +77,7 @@ struct uverbs_obj_type_class {
 	void (*alloc_abort)(struct ib_uobject *uobj);
 
 	struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
-					 struct ib_uverbs_file *ufile, int id,
+					 struct ib_uverbs_file *ufile, s64 id,
 					 bool exclusive);
 	void (*lookup_put)(struct ib_uobject *uobj, bool exclusive);
 	/*
@@ -121,7 +121,7 @@ struct uverbs_obj_idr_type {
 
 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
 					   struct ib_uverbs_file *ufile,
-					   int id, bool exclusive);
+					   s64 id, bool exclusive);
 void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive);
 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
 					    struct ib_uverbs_file *ufile);
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 1da5a1e1f3a8..24800c6c1f32 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -62,7 +62,12 @@ struct ib_uverbs_attr {
 		} enum_data;
 		__u16 reserved;
 	} attr_data;
-	__aligned_u64 data;	/* ptr to command, inline data or idr/fd */
+	union {
+		/* Used by PTR_IN/OUT, ENUM_IN and IDR */
+		__aligned_u64 data;
+		/* Used by FD_IN and FD_OUT */
+		__s64 data_s64;
+	};
 };
 
 struct ib_uverbs_ioctl_hdr {
-- 
cgit v1.2.3


From e951747a087a8655f467833bb367ebf53d57527c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 10 Jul 2018 20:55:19 -0600
Subject: IB/uverbs: Rework the locking for cleaning up the ucontext

The locking here has always been a bit crazy and spread out, upon some
careful analysis we can simplify things.

Create a single function uverbs_destroy_ufile_hw() that internally handles
all locking. This pulls together pieces of this process that were
sprinkled all over the places into one place, and covers them with one
lock.

This eliminates several duplicate/confusing locks and makes the control
flow in ib_uverbs_close() and ib_uverbs_free_hw_resources() extremely
simple.

Unfortunately we have to keep an extra mutex, ucontext_lock.  This lock is
logically part of the rwsem and provides the 'down write, fail if write
locked, wait if read locked' semantic we require.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c   | 117 ++++++++++++++++++++++++++++++----
 drivers/infiniband/core/rdma_core.h   |   3 +-
 drivers/infiniband/core/uverbs.h      |   6 +-
 drivers/infiniband/core/uverbs_cmd.c  |   6 +-
 drivers/infiniband/core/uverbs_main.c |  98 ++++------------------------
 include/rdma/ib_verbs.h               |   5 ++
 6 files changed, 127 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 4545c661acaa..eeed6374134c 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -32,6 +32,7 @@
 
 #include <linux/file.h>
 #include <linux/anon_inodes.h>
+#include <linux/sched/mm.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/uverbs_types.h>
 #include <linux/rcupdate.h>
@@ -284,11 +285,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
 	}
 
 	ret = uverbs_try_lock_object(uobj, exclusive);
-	if (ret) {
-		WARN(uobj->ufile->cleanup_reason,
-		     "ib_uverbs: Trying to lookup_get while cleanup context\n");
+	if (ret)
 		goto free;
-	}
 
 	return uobj;
 free:
@@ -694,6 +692,71 @@ void uverbs_close_fd(struct file *f)
 	uverbs_uobject_put(uobj);
 }
 
+static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+	struct ib_device *ib_dev = ibcontext->device;
+	struct task_struct *owning_process  = NULL;
+	struct mm_struct   *owning_mm       = NULL;
+
+	owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+	if (!owning_process)
+		return;
+
+	owning_mm = get_task_mm(owning_process);
+	if (!owning_mm) {
+		pr_info("no mm, disassociate ucontext is pending task termination\n");
+		while (1) {
+			put_task_struct(owning_process);
+			usleep_range(1000, 2000);
+			owning_process = get_pid_task(ibcontext->tgid,
+						      PIDTYPE_PID);
+			if (!owning_process ||
+			    owning_process->state == TASK_DEAD) {
+				pr_info("disassociate ucontext done, task was terminated\n");
+				/* in case task was dead need to release the
+				 * task struct.
+				 */
+				if (owning_process)
+					put_task_struct(owning_process);
+				return;
+			}
+		}
+	}
+
+	down_write(&owning_mm->mmap_sem);
+	ib_dev->disassociate_ucontext(ibcontext);
+	up_write(&owning_mm->mmap_sem);
+	mmput(owning_mm);
+	put_task_struct(owning_process);
+}
+
+/*
+ * Drop the ucontext off the ufile and completely disconnect it from the
+ * ib_device
+ */
+static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
+				   enum rdma_remove_reason reason)
+{
+	struct ib_ucontext *ucontext = ufile->ucontext;
+	int ret;
+
+	if (reason == RDMA_REMOVE_DRIVER_REMOVE)
+		ufile_disassociate_ucontext(ucontext);
+
+	put_pid(ucontext->tgid);
+	ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+			   RDMACG_RESOURCE_HCA_HANDLE);
+
+	/*
+	 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
+	 * the error return.
+	 */
+	ret = ucontext->device->dealloc_ucontext(ufile->ucontext);
+	WARN_ON(ret);
+
+	ufile->ucontext = NULL;
+}
+
 static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 				  enum rdma_remove_reason reason)
 {
@@ -710,7 +773,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 	 * We take and release the lock per traversal in order to let
 	 * other threads (which might still use the FDs) chance to run.
 	 */
-	ufile->cleanup_reason = reason;
 	list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
 		/*
 		 * if we hit this WARN_ON, that means we are
@@ -738,18 +800,43 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 	return ret;
 }
 
-void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed)
+/*
+ * Destroy the uncontext and every uobject associated with it. If called with
+ * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
+ * been completed and ufile->ucontext is NULL.
+ *
+ * This is internally locked and can be called in parallel from multiple
+ * contexts.
+ */
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+			     enum rdma_remove_reason reason)
 {
-	enum rdma_remove_reason reason = device_removed ?
-					RDMA_REMOVE_DRIVER_REMOVE :
-					RDMA_REMOVE_CLOSE;
+	if (reason == RDMA_REMOVE_CLOSE) {
+		/*
+		 * During destruction we might trigger something that
+		 * synchronously calls release on any file descriptor. For
+		 * this reason all paths that come from file_operations
+		 * release must use try_lock. They can progress knowing that
+		 * there is an ongoing uverbs_destroy_ufile_hw that will clean
+		 * up the driver resources.
+		 */
+		if (!mutex_trylock(&ufile->ucontext_lock))
+			return;
+
+	} else {
+		mutex_lock(&ufile->ucontext_lock);
+	}
+
+	down_write(&ufile->hw_destroy_rwsem);
 
 	/*
-	 * Waits for all remove_commit and alloc_commit to finish. Logically, We
-	 * want to hold this forever as the context is going to be destroyed,
-	 * but we'll release it since it causes a "held lock freed" BUG message.
+	 * If a ucontext was never created then we can't have any uobjects to
+	 * cleanup, nothing to do.
 	 */
-	down_write(&ufile->hw_destroy_rwsem);
+	if (!ufile->ucontext)
+		goto done;
+
+	ufile->ucontext->closing = true;
 	ufile->ucontext->cleanup_retryable = true;
 	while (!list_empty(&ufile->uobjects))
 		if (__uverbs_cleanup_ufile(ufile, reason)) {
@@ -764,7 +851,11 @@ void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed)
 	if (!list_empty(&ufile->uobjects))
 		__uverbs_cleanup_ufile(ufile, reason);
 
+	ufile_destroy_ucontext(ufile, reason);
+
+done:
 	up_write(&ufile->hw_destroy_rwsem);
+	mutex_unlock(&ufile->ucontext_lock);
 }
 
 const struct uverbs_obj_type_class uverbs_fd_class = {
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index db2339330f6f..a736b46d18e3 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -49,7 +49,8 @@ const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
 const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
 						   uint16_t method);
 
-void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed);
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+			     enum rdma_remove_reason reason);
 
 /*
  * uverbs_uobject_get is called in order to increase the reference count on
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 58b16e840e56..ca9b0450d3f9 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -136,9 +136,9 @@ struct ib_uverbs_completion_event_file {
 
 struct ib_uverbs_file {
 	struct kref				ref;
-	struct mutex				mutex;
-	struct mutex                            cleanup_mutex; /* protect cleanup */
 	struct ib_uverbs_device		       *device;
+	/* Protects writing to ucontext */
+	struct mutex				ucontext_lock;
 	struct ib_ucontext		       *ucontext;
 	struct ib_event_handler			event_handler;
 	struct ib_uverbs_async_event_file       *async_file;
@@ -155,8 +155,6 @@ struct ib_uverbs_file {
 	spinlock_t		uobjects_lock;
 	struct list_head	uobjects;
 
-	enum rdma_remove_reason cleanup_reason;
-
 	struct idr		idr;
 	/* spinlock protects write access to idr */
 	spinlock_t		idr_lock;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 409fd46a2a99..f2611c760184 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -84,7 +84,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&file->mutex);
+	mutex_lock(&file->ucontext_lock);
 
 	if (file->ucontext) {
 		ret = -EINVAL;
@@ -150,7 +150,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 
 	fd_install(resp.async_fd, filp);
 
-	mutex_unlock(&file->mutex);
+	mutex_unlock(&file->ucontext_lock);
 
 	return in_len;
 
@@ -169,7 +169,7 @@ err_alloc:
 	ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
 
 err:
-	mutex_unlock(&file->mutex);
+	mutex_unlock(&file->ucontext_lock);
 	return ret;
 }
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 77faf32fc997..78d79020ea5c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -41,8 +41,6 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/sched/task.h>
 #include <linux/file.h>
 #include <linux/cdev.h>
 #include <linux/anon_inodes.h>
@@ -227,21 +225,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
 	}
 }
 
-static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file,
-				   bool device_removed)
-{
-	struct ib_ucontext *context = file->ucontext;
-
-	context->closing = 1;
-	uverbs_cleanup_ufile(file, device_removed);
-	put_pid(context->tgid);
-
-	ib_rdmacg_uncharge(&context->cg_obj, context->device,
-			   RDMACG_RESOURCE_HCA_HANDLE);
-
-	return context->device->dealloc_ucontext(context);
-}
-
 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
 {
 	complete(&dev->comp);
@@ -886,8 +869,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	spin_lock_init(&file->idr_lock);
 	idr_init(&file->idr);
 	kref_init(&file->ref);
-	mutex_init(&file->mutex);
-	mutex_init(&file->cleanup_mutex);
+	mutex_init(&file->ucontext_lock);
 
 	spin_lock_init(&file->uobjects_lock);
 	INIT_LIST_HEAD(&file->uobjects);
@@ -917,12 +899,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
 {
 	struct ib_uverbs_file *file = filp->private_data;
 
-	mutex_lock(&file->cleanup_mutex);
-	if (file->ucontext) {
-		ib_uverbs_cleanup_ufile(file, false);
-		file->ucontext = NULL;
-	}
-	mutex_unlock(&file->cleanup_mutex);
+	uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
 	idr_destroy(&file->idr);
 
 	mutex_lock(&file->device->lists_mutex);
@@ -1109,44 +1086,6 @@ err:
 	return;
 }
 
-static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
-{
-	struct ib_device *ib_dev = ibcontext->device;
-	struct task_struct *owning_process  = NULL;
-	struct mm_struct   *owning_mm       = NULL;
-
-	owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
-	if (!owning_process)
-		return;
-
-	owning_mm = get_task_mm(owning_process);
-	if (!owning_mm) {
-		pr_info("no mm, disassociate ucontext is pending task termination\n");
-		while (1) {
-			put_task_struct(owning_process);
-			usleep_range(1000, 2000);
-			owning_process = get_pid_task(ibcontext->tgid,
-						      PIDTYPE_PID);
-			if (!owning_process ||
-			    owning_process->state == TASK_DEAD) {
-				pr_info("disassociate ucontext done, task was terminated\n");
-				/* in case task was dead need to release the
-				 * task struct.
-				 */
-				if (owning_process)
-					put_task_struct(owning_process);
-				return;
-			}
-		}
-	}
-
-	down_write(&owning_mm->mmap_sem);
-	ib_dev->disassociate_ucontext(ibcontext);
-	up_write(&owning_mm->mmap_sem);
-	mmput(owning_mm);
-	put_task_struct(owning_process);
-}
-
 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 					struct ib_device *ib_dev)
 {
@@ -1162,39 +1101,24 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 
 	mutex_lock(&uverbs_dev->lists_mutex);
 	while (!list_empty(&uverbs_dev->uverbs_file_list)) {
-		struct ib_ucontext *ucontext;
 		file = list_first_entry(&uverbs_dev->uverbs_file_list,
 					struct ib_uverbs_file, list);
 		file->is_closed = 1;
 		list_del(&file->list);
 		kref_get(&file->ref);
-		mutex_unlock(&uverbs_dev->lists_mutex);
-
-
-		mutex_lock(&file->cleanup_mutex);
-		ucontext = file->ucontext;
-		file->ucontext = NULL;
-		mutex_unlock(&file->cleanup_mutex);
 
-		/* At this point ib_uverbs_close cannot be running
-		 * ib_uverbs_cleanup_ufile
+		/* We must release the mutex before going ahead and calling
+		 * uverbs_cleanup_ufile, as it might end up indirectly calling
+		 * uverbs_close, for example due to freeing the resources (e.g
+		 * mmput).
 		 */
-		if (ucontext) {
-			/* We must release the mutex before going ahead and
-			 * calling disassociate_ucontext. disassociate_ucontext
-			 * might end up indirectly calling uverbs_close,
-			 * for example due to freeing the resources
-			 * (e.g mmput).
-			 */
-			ib_uverbs_event_handler(&file->event_handler, &event);
-			ib_uverbs_disassociate_ucontext(ucontext);
-			mutex_lock(&file->cleanup_mutex);
-			ib_uverbs_cleanup_ufile(file, true);
-			mutex_unlock(&file->cleanup_mutex);
-		}
+		mutex_unlock(&uverbs_dev->lists_mutex);
 
-		mutex_lock(&uverbs_dev->lists_mutex);
+		ib_uverbs_event_handler(&file->event_handler, &event);
+		uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
 		kref_put(&file->ref, ib_uverbs_release_file);
+
+		mutex_lock(&uverbs_dev->lists_mutex);
 	}
 
 	while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 99bcf64a4762..42cbf8eabe9d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1479,6 +1479,11 @@ struct ib_rdmacg_object {
 struct ib_ucontext {
 	struct ib_device       *device;
 	struct ib_uverbs_file  *ufile;
+	/*
+	 * 'closing' can be read by the driver only during a destroy callback,
+	 * it is set when we are closing the file descriptor and indicates
+	 * that mm_sem may be locked.
+	 */
 	int			closing;
 
 	bool cleanup_retryable;
-- 
cgit v1.2.3


From 2c96eb7d62de5048aa08e9ee4fbb607f29e2638c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 10 Jul 2018 20:55:20 -0600
Subject: IB/uverbs: Always propagate errors from rdma_alloc_commit_uobject()

The ioctl framework already does this correctly, but the write path did
not. This is trivially fixed by simply using a standard pattern to return
uobj_alloc_commit() as the last statement in every function.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c  |  5 ++--
 drivers/infiniband/core/uverbs_cmd.c | 49 +++++++++++-------------------------
 include/rdma/uverbs_std_types.h      |  9 +++++--
 include/rdma/uverbs_types.h          |  2 +-
 4 files changed, 26 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index eeed6374134c..2aab8cd2ca6b 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -532,9 +532,10 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
 
 /*
  * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
- * caller can no longer assume uobj is valid.
+ * caller can no longer assume uobj is valid. If this function fails it
+ * destroys the uboject, including the attached HW object.
  */
-int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
+int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index f2611c760184..73b563edb587 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -372,9 +372,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 		goto err_copy;
 	}
 
-	uobj_alloc_commit(uobj);
-
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 
 err_copy:
 	ib_dealloc_pd(pd);
@@ -579,9 +577,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 
 	mutex_unlock(&file->device->xrcd_tree_mutex);
 
-	uobj_alloc_commit(&obj->uobject);
-
-	return in_len;
+	return uobj_alloc_commit(&obj->uobject, in_len);
 
 err_copy:
 	if (inode) {
@@ -723,9 +719,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 
 	uobj_put_obj_read(pd);
 
-	uobj_alloc_commit(uobj);
-
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 
 err_copy:
 	ib_dereg_mr(mr);
@@ -901,9 +895,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 	}
 
 	uobj_put_obj_read(pd);
-	uobj_alloc_commit(uobj);
-
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 
 err_copy:
 	uverbs_dealloc_mw(mw);
@@ -959,8 +951,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 		return -EFAULT;
 	}
 
-	uobj_alloc_commit(uobj);
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 }
 
 static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
@@ -1041,7 +1032,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	if (ret)
 		goto err_cb;
 
-	uobj_alloc_commit(&obj->uobject);
+	ret = uobj_alloc_commit(&obj->uobject, 0);
+	if (ret)
+		return ERR_PTR(ret);
 	return obj;
 
 err_cb:
@@ -1596,9 +1589,7 @@ static int create_qp(struct ib_uverbs_file *file,
 	if (ind_tbl)
 		uobj_put_obj_read(ind_tbl);
 
-	uobj_alloc_commit(&obj->uevent.uobject);
-
-	return 0;
+	return uobj_alloc_commit(&obj->uevent.uobject, 0);
 err_cb:
 	ib_destroy_qp(qp);
 
@@ -1801,10 +1792,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 	qp->uobject = &obj->uevent.uobject;
 	uobj_put_read(xrcd_uobj);
 
-
-	uobj_alloc_commit(&obj->uevent.uobject);
-
-	return in_len;
+	return uobj_alloc_commit(&obj->uevent.uobject, in_len);
 
 err_destroy:
 	ib_destroy_qp(qp);
@@ -2607,9 +2595,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	}
 
 	uobj_put_obj_read(pd);
-	uobj_alloc_commit(uobj);
-
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 
 err_copy:
 	rdma_destroy_ah(ah);
@@ -3155,8 +3141,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 
 	uobj_put_obj_read(pd);
 	uobj_put_obj_read(cq);
-	uobj_alloc_commit(&obj->uevent.uobject);
-	return 0;
+	return uobj_alloc_commit(&obj->uevent.uobject, 0);
 
 err_copy:
 	ib_destroy_wq(wq);
@@ -3403,8 +3388,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 	for (j = 0; j < num_read_wqs; j++)
 		uobj_put_obj_read(wqs[j]);
 
-	uobj_alloc_commit(uobj);
-	return 0;
+	return uobj_alloc_commit(uobj, 0);
 
 err_copy:
 	ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3605,11 +3589,10 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		goto err_copy;
 
 	uobj_put_obj_read(qp);
-	uobj_alloc_commit(uobj);
 	kfree(flow_attr);
 	if (cmd.flow_attr.num_of_specs)
 		kfree(kern_flow_attr);
-	return 0;
+	return uobj_alloc_commit(uobj, 0);
 err_copy:
 	if (!qp->device->destroy_flow(flow_id))
 		atomic_dec(&qp->usecnt);
@@ -3761,9 +3744,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 		uobj_put_obj_read(attr.ext.cq);
 
 	uobj_put_obj_read(pd);
-	uobj_alloc_commit(&obj->uevent.uobject);
-
-	return 0;
+	return uobj_alloc_commit(&obj->uevent.uobject, 0);
 
 err_copy:
 	ib_destroy_srq(srq);
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 4f32eab8b7a4..076f085d2dcf 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -102,9 +102,14 @@ static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj)
 	return rdma_remove_commit_uobject(uobj);
 }
 
-static inline void uobj_alloc_commit(struct ib_uobject *uobj)
+static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
+						 int success_res)
 {
-	rdma_alloc_commit_uobject(uobj);
+	int ret = rdma_alloc_commit_uobject(uobj);
+
+	if (ret)
+		return ret;
+	return success_res;
 }
 
 static inline void uobj_alloc_abort(struct ib_uobject *uobj)
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 2f50cc6def3c..9b82e36128aa 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -127,7 +127,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
 					    struct ib_uverbs_file *ufile);
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
 int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj);
-int rdma_alloc_commit_uobject(struct ib_uobject *uobj);
+int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
 int rdma_explicit_destroy(struct ib_uobject *uobject);
 
 struct uverbs_obj_fd_type {
-- 
cgit v1.2.3


From aba94548c9e49939fafc92bb406a7f8e7ed87643 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 10 Jul 2018 20:55:21 -0600
Subject: IB/uverbs: Move the FD uobj type struct file allocation to
 alloc_commit

Allocating the struct file during alloc_begin creates this strange
asymmetry with IDR, where the FD has two krefs pointing at it during the
pre-commit phase. In particular this makes the abort process for FD very
strange and confusing.

For instance abort currently calls the type's destroy_object twice, and
the fops release once if abort is done. This is very counter intuitive. No
fops should be called until alloc_commit succeeds, and destroy_object
should only ever be called once.

Moving the struct file allocation to the alloc_commit is now simple, as we
already support failure of rdma_alloc_commit_uobject, with all the
required rollback pieces.

This creates an understandable symmetry with IDR and simplifies/fixes the
abort handling for FD types.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c | 83 ++++++++++++++++++++-----------------
 include/rdma/uverbs_types.h         |  2 +-
 2 files changed, 47 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 2aab8cd2ca6b..8a6ce66d4726 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -328,11 +328,8 @@ uobj_put:
 static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
 						 struct ib_uverbs_file *ufile)
 {
-	const struct uverbs_obj_fd_type *fd_type =
-		container_of(type, struct uverbs_obj_fd_type, type);
 	int new_fd;
 	struct ib_uobject *uobj;
-	struct file *filp;
 
 	new_fd = get_unused_fd_flags(O_CLOEXEC);
 	if (new_fd < 0)
@@ -344,28 +341,8 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 		return uobj;
 	}
 
-	/*
-	 * The kref for uobj is moved into filp->private data and put in
-	 * uverbs_close_fd(). Once anon_inode_getfile() succeeds
-	 * uverbs_close_fd() must be guaranteed to be called from the provided
-	 * fops release callback. We piggyback our kref of uobj on the stack
-	 * with the lifetime of the struct file.
-	 */
-	filp = anon_inode_getfile(fd_type->name,
-				  fd_type->fops,
-				  uobj,
-				  fd_type->flags);
-	if (IS_ERR(filp)) {
-		put_unused_fd(new_fd);
-		uverbs_uobject_put(uobj);
-		return (void *)filp;
-	}
-
 	uobj->id = new_fd;
-	uobj->object = filp;
 	uobj->ufile = ufile;
-	/* Matching put will be done in uverbs_close_fd() */
-	kref_get(&ufile->ref);
 
 	return uobj;
 }
@@ -407,12 +384,10 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
 
 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
 {
-	struct file *filp = uobj->object;
-	int id = uobj->id;
+	put_unused_fd(uobj->id);
 
-	/* Unsuccessful NEW */
-	fput(filp);
-	put_unused_fd(id);
+	/* Pairs with the kref from alloc_begin_idr_uobject */
+	uverbs_uobject_put(uobj);
 }
 
 static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
@@ -500,7 +475,7 @@ int rdma_explicit_destroy(struct ib_uobject *uobject)
 	return ret;
 }
 
-static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
 
@@ -514,11 +489,34 @@ static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
 	 */
 	WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
 	spin_unlock(&ufile->idr_lock);
+
+	return 0;
 }
 
-static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
 {
+	const struct uverbs_obj_fd_type *fd_type =
+		container_of(uobj->type, struct uverbs_obj_fd_type, type);
 	int fd = uobj->id;
+	struct file *filp;
+
+	/*
+	 * The kref for uobj is moved into filp->private data and put in
+	 * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
+	 * must be guaranteed to be called from the provided fops release
+	 * callback.
+	 */
+	filp = anon_inode_getfile(fd_type->name,
+				  fd_type->fops,
+				  uobj,
+				  fd_type->flags);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	uobj->object = filp;
+
+	/* Matching put will be done in uverbs_close_fd() */
+	kref_get(&uobj->ufile->ref);
 
 	/* This shouldn't be used anymore. Use the file object instead */
 	uobj->id = 0;
@@ -527,7 +525,9 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
 	 * NOTE: Once we install the file we loose ownership of our kref on
 	 * uobj. It will be put by uverbs_close_fd()
 	 */
-	fd_install(fd, uobj->object);
+	fd_install(fd, filp);
+
+	return 0;
 }
 
 /*
@@ -538,11 +538,10 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
 int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
+	int ret;
 
 	/* Cleanup is running. Calling this should have been impossible */
 	if (!down_read_trylock(&ufile->hw_destroy_rwsem)) {
-		int ret;
-
 		WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
 		ret = uobj->type->type_class->remove_commit(uobj,
 							    RDMA_REMOVE_DURING_CLEANUP);
@@ -552,9 +551,18 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 		return ret;
 	}
 
-	/* matches atomic_set(-1) in alloc_uobj */
 	assert_uverbs_usecnt(uobj, true);
-	atomic_set(&uobj->usecnt, 0);
+
+	/* alloc_commit consumes the uobj kref */
+	ret = uobj->type->type_class->alloc_commit(uobj);
+	if (ret) {
+		if (uobj->type->type_class->remove_commit(
+			    uobj, RDMA_REMOVE_DURING_CLEANUP))
+			pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
+				uobj->id);
+		up_read(&ufile->hw_destroy_rwsem);
+		return ret;
+	}
 
 	/* kref is held so long as the uobj is on the uobj list. */
 	uverbs_uobject_get(uobj);
@@ -562,8 +570,9 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 	list_add(&uobj->list, &ufile->uobjects);
 	spin_unlock_irq(&ufile->uobjects_lock);
 
-	/* alloc_commit consumes the uobj kref */
-	uobj->type->type_class->alloc_commit(uobj);
+	/* matches atomic_set(-1) in alloc_uobj */
+	atomic_set(&uobj->usecnt, 0);
+
 	up_read(&ufile->hw_destroy_rwsem);
 
 	return 0;
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 9b82e36128aa..cfc50fcdbff6 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -73,7 +73,7 @@ struct uverbs_obj_type_class {
 	 */
 	struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type,
 					  struct ib_uverbs_file *ufile);
-	void (*alloc_commit)(struct ib_uobject *uobj);
+	int (*alloc_commit)(struct ib_uobject *uobj);
 	void (*alloc_abort)(struct ib_uobject *uobj);
 
 	struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
-- 
cgit v1.2.3


From 22fa27fbc64d01cbbe1e4da751e64cc22d24a6e4 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 10 Jul 2018 13:43:06 -0600
Subject: IB/uverbs: Fix locking around struct ib_uverbs_file ucontext

We have a parallel unlocked reader and writer with ib_uverbs_get_context()
vs everything else, and nothing guarantees this works properly.

Audit and fix all of the places that access ucontext to use one of the
following locking schemes:
- Call ib_uverbs_get_ucontext() under SRCU and check for failure
- Access the ucontext through an struct ib_uobject context member
  while holding a READ or WRITE lock on the uobject.
  This value cannot be NULL and has no race.
- Hold the ucontext_lock and check for ufile->ucontext !NULL

This also re-implements ib_uverbs_get_ucontext() in a way that is safe
against concurrent ib_uverbs_get_context() and disassociation.

As a side effect, every access to ucontext in the commands is via
ib_uverbs_get_context() with an error check, or via the uobject, so there
is no longer any need for the core code to check ucontext on every command
call. These checks are also removed.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c           | 14 +++++++---
 drivers/infiniband/core/uverbs.h              |  5 +++-
 drivers/infiniband/core/uverbs_cmd.c          | 14 ++++++----
 drivers/infiniband/core/uverbs_ioctl.c        |  5 +---
 drivers/infiniband/core/uverbs_main.c         | 38 +++++++++++++++++----------
 drivers/infiniband/core/uverbs_std_types_cq.c |  2 +-
 drivers/infiniband/core/uverbs_std_types_dm.c |  2 +-
 drivers/infiniband/hw/mlx5/devx.c             | 20 ++++++++++----
 include/rdma/uverbs_ioctl.h                   |  8 ------
 9 files changed, 65 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 8a6ce66d4726..a63844ba8414 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -154,8 +154,14 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
 static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 				     const struct uverbs_obj_type *type)
 {
-	struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
+	struct ib_uobject *uobj;
+	struct ib_ucontext *ucontext;
+
+	ucontext = ib_uverbs_get_ucontext(ufile);
+	if (IS_ERR(ucontext))
+		return ERR_CAST(ucontext);
 
+	uobj = kzalloc(type->obj_size, GFP_KERNEL);
 	if (!uobj)
 		return ERR_PTR(-ENOMEM);
 	/*
@@ -163,7 +169,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 	 * The object is added to the list in the commit stage.
 	 */
 	uobj->ufile = ufile;
-	uobj->context = ufile->ucontext;
+	uobj->context = ucontext;
 	INIT_LIST_HEAD(&uobj->list);
 	uobj->type = type;
 	/*
@@ -309,7 +315,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
 	if (ret)
 		goto uobj_put;
 
-	ret = ib_rdmacg_try_charge(&uobj->cg_obj, ufile->ucontext->device,
+	ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
 				   RDMACG_RESOURCE_HCA_OBJECT);
 	if (ret)
 		goto idr_remove;
@@ -761,7 +767,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
 	 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
 	 * the error return.
 	 */
-	ret = ucontext->device->dealloc_ucontext(ufile->ucontext);
+	ret = ucontext->device->dealloc_ucontext(ucontext);
 	WARN_ON(ret);
 
 	ufile->ucontext = NULL;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index ca9b0450d3f9..cf02b433000c 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -137,8 +137,11 @@ struct ib_uverbs_completion_event_file {
 struct ib_uverbs_file {
 	struct kref				ref;
 	struct ib_uverbs_device		       *device;
-	/* Protects writing to ucontext */
 	struct mutex				ucontext_lock;
+	/*
+	 * ucontext must be accessed via ib_uverbs_get_ucontext() or with
+	 * ucontext_lock held
+	 */
 	struct ib_ucontext		       *ucontext;
 	struct ib_event_handler			event_handler;
 	struct ib_uverbs_async_event_file       *async_file;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 73b563edb587..38d7de3f9b2f 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -146,10 +146,14 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 		goto err_file;
 	}
 
-	file->ucontext = ucontext;
-
 	fd_install(resp.async_fd, filp);
 
+	/*
+	 * Make sure that ib_uverbs_get_ucontext() sees the pointer update
+	 * only after all writes to setup the ucontext have completed
+	 */
+	smp_store_release(&file->ucontext, ucontext);
+
 	mutex_unlock(&file->ucontext_lock);
 
 	return in_len;
@@ -350,7 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
+	pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
 	if (IS_ERR(pd)) {
 		ret = PTR_ERR(pd);
 		goto err;
@@ -538,7 +542,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	}
 
 	if (!xrcd) {
-		xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
+		xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
 		if (IS_ERR(xrcd)) {
 			ret = PTR_ERR(xrcd);
 			goto err;
@@ -1004,7 +1008,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
 		attr.flags = cmd->flags;
 
-	cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
+	cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
 	if (IS_ERR(cq)) {
 		ret = PTR_ERR(cq);
 		goto err_file;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 26ddc5cadcdb..db7a92ea5dbe 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -140,7 +140,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		if (uattr->attr_data.reserved)
 			return -EINVAL;
 
-		if (uattr->len != 0 || !ufile->ucontext)
+		if (uattr->len != 0)
 			return -EINVAL;
 
 		o_attr = &e->obj_attr;
@@ -373,9 +373,6 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 	if (!method_spec)
 		return -EPROTONOSUPPORT;
 
-	if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext)
-		return -EINVAL;
-
 	ctx_size = sizeof(*ctx) +
 		   sizeof(struct uverbs_attr_bundle) +
 		   sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets +
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 78d79020ea5c..34df04ed142b 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -136,9 +136,27 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
 
+/*
+ * Must be called with the ufile->device->disassociate_srcu held, and the lock
+ * must be held until use of the ucontext is finished.
+ */
 struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
 {
-	return ufile->ucontext;
+	/*
+	 * We do not hold the hw_destroy_rwsem lock for this flow, instead
+	 * srcu is used. It does not matter if someone races this with
+	 * get_context, we get NULL or valid ucontext.
+	 */
+	struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
+
+	if (!srcu_dereference(ufile->device->ib_dev,
+			      &ufile->device->disassociate_srcu))
+		return ERR_PTR(-EIO);
+
+	if (!ucontext)
+		return ERR_PTR(-EINVAL);
+
+	return ucontext;
 }
 EXPORT_SYMBOL(ib_uverbs_get_ucontext);
 
@@ -729,10 +747,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 	if (ret)
 		return ret;
 
-	if (!file->ucontext &&
-	    (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended))
-		return -EINVAL;
-
 	if (extended) {
 		if (count < (sizeof(hdr) + sizeof(ex_hdr)))
 			return -EINVAL;
@@ -791,22 +805,18 @@ out:
 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct ib_uverbs_file *file = filp->private_data;
-	struct ib_device *ib_dev;
+	struct ib_ucontext *ucontext;
 	int ret = 0;
 	int srcu_key;
 
 	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
-	ib_dev = srcu_dereference(file->device->ib_dev,
-				  &file->device->disassociate_srcu);
-	if (!ib_dev) {
-		ret = -EIO;
+	ucontext = ib_uverbs_get_ucontext(file);
+	if (IS_ERR(ucontext)) {
+		ret = PTR_ERR(ucontext);
 		goto out;
 	}
 
-	if (!file->ucontext)
-		ret = -ENODEV;
-	else
-		ret = ib_dev->mmap(file->ucontext, vma);
+	ret = ucontext->device->mmap(ucontext, vma);
 out:
 	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
 	return ret;
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 5a6154345fa0..c71305fc0433 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -113,7 +113,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 	/* Temporary, only until drivers get the new uverbs_attr_bundle */
 	create_udata(attrs, &uhw);
 
-	cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, &uhw);
+	cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw);
 	if (IS_ERR(cq)) {
 		ret = PTR_ERR(cq);
 		goto err_event_file;
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index 9e148e322523..c90efa4b99f4 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -70,7 +70,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
 
 	uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
 
-	dm = ib_dev->alloc_dm(ib_dev, file->ucontext, &attr, attrs);
+	dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs);
 	if (IS_ERR(dm))
 		return PTR_ERR(dm);
 
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 98b1575226c1..fee800f2fdec 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -458,16 +458,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_de
 				  struct ib_uverbs_file *file,
 				  struct uverbs_attr_bundle *attrs)
 {
-	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+	struct mlx5_ib_ucontext *c;
+	struct mlx5_ib_dev *dev;
 	u32 user_idx;
 	s32 dev_idx;
 
+	c = devx_ufile2uctx(file);
+	if (IS_ERR(c))
+		return PTR_ERR(c);
+	dev = to_mdev(c->ibucontext.device);
+
 	if (uverbs_copy_from(&user_idx, attrs,
 			     MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
 		return -EFAULT;
 
-	dev_idx = bfregn_to_uar_index(to_mdev(ib_dev),
-				      &c->bfregi, user_idx, true);
+	dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
 	if (dev_idx < 0)
 		return dev_idx;
 
@@ -482,8 +487,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev,
 				  struct ib_uverbs_file *file,
 				  struct uverbs_attr_bundle *attrs)
 {
-	struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
-	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	struct mlx5_ib_ucontext *c;
+	struct mlx5_ib_dev *dev;
 	void *cmd_in = uverbs_attr_get_alloced_ptr(
 		attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
 	int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -491,6 +496,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev,
 	void *cmd_out;
 	int err;
 
+	c = devx_ufile2uctx(file);
+	if (IS_ERR(c))
+		return PTR_ERR(c);
+	dev = to_mdev(c->ibucontext.device);
+
 	if (!c->devx_uid)
 		return -EPERM;
 
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 7f230d1ec2b8..d16d31d4322d 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -123,14 +123,6 @@ struct uverbs_attr_spec_hash {
 struct uverbs_attr_bundle;
 struct ib_uverbs_file;
 
-enum {
-	/*
-	 * Action marked with this flag creates a context (or root for all
-	 * objects).
-	 */
-	UVERBS_ACTION_FLAG_CREATE_ROOT = 1U << 0,
-};
-
 struct uverbs_method_spec {
 	/* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */
 	u32						flags;
-- 
cgit v1.2.3


From 3c507b8af638c67d4a80d70091d2057ecb01e8a6 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 23 Jul 2018 21:40:07 +0200
Subject: net: phy: add helper phy_polling_mode

Add a helper for checking whether polling is used to detect PHY status
changes.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c |  8 ++++----
 include/linux/phy.h   | 10 ++++++++++
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 04780db4c963..1ee25877c4d1 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -525,7 +525,7 @@ static int phy_start_aneg_priv(struct phy_device *phydev, bool sync)
 	 * negotiation may already be done and aneg interrupt may not be
 	 * generated.
 	 */
-	if (phydev->irq != PHY_POLL && phydev->state == PHY_AN) {
+	if (!phy_polling_mode(phydev) && phydev->state == PHY_AN) {
 		err = phy_aneg_done(phydev);
 		if (err > 0) {
 			trigger = true;
@@ -983,7 +983,7 @@ void phy_state_machine(struct work_struct *work)
 			needs_aneg = true;
 		break;
 	case PHY_NOLINK:
-		if (phydev->irq != PHY_POLL)
+		if (!phy_polling_mode(phydev))
 			break;
 
 		err = phy_read_status(phydev);
@@ -1024,7 +1024,7 @@ void phy_state_machine(struct work_struct *work)
 		/* Only register a CHANGE if we are polling and link changed
 		 * since latest checking.
 		 */
-		if (phydev->irq == PHY_POLL) {
+		if (phy_polling_mode(phydev)) {
 			old_link = phydev->link;
 			err = phy_read_status(phydev);
 			if (err)
@@ -1123,7 +1123,7 @@ void phy_state_machine(struct work_struct *work)
 	 * PHY, if PHY_IGNORE_INTERRUPT is set, then we will be moving
 	 * between states from phy_mac_interrupt()
 	 */
-	if (phydev->irq == PHY_POLL)
+	if (phy_polling_mode(phydev))
 		queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
 				   PHY_STATE_TIME * HZ);
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 075c2f770d3e..cd6f637cbbfb 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -824,6 +824,16 @@ static inline bool phy_interrupt_is_valid(struct phy_device *phydev)
 	return phydev->irq != PHY_POLL && phydev->irq != PHY_IGNORE_INTERRUPT;
 }
 
+/**
+ * phy_polling_mode - Convenience function for testing whether polling is
+ * used to detect PHY status changes
+ * @phydev: the phy_device struct
+ */
+static inline bool phy_polling_mode(struct phy_device *phydev)
+{
+	return phydev->irq == PHY_POLL;
+}
+
 /**
  * phy_is_internal - Convenience function for testing if a PHY is internal
  * @phydev: the phy_device struct
-- 
cgit v1.2.3


From d0e45d686a3ef18a20d4eac6675b76d3656c4727 Mon Sep 17 00:00:00 2001
From: Saravanan Sekar <sravanhome@gmail.com>
Date: Thu, 19 Jul 2018 11:06:46 +0200
Subject: dt-bindings: clock: Add S700 support for Actions Semi Soc's

Add clock bindings constants for action S700
Maintain common clock dt-bindings for Actions Semi SoC's
S700 and S900.

Signed-off-by: Parthiban Nallathambi <pn@denx.de>
Signed-off-by: Saravanan Sekar <sravanhome@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/actions,owl-cmu.txt  |  49 +++++++++
 .../devicetree/bindings/clock/actions,s900-cmu.txt |  47 --------
 include/dt-bindings/clock/actions,s700-cmu.h       | 118 +++++++++++++++++++++
 3 files changed, 167 insertions(+), 47 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
 delete mode 100644 Documentation/devicetree/bindings/clock/actions,s900-cmu.txt
 create mode 100644 include/dt-bindings/clock/actions,s700-cmu.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt b/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
new file mode 100644
index 000000000000..d1e60d297387
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
@@ -0,0 +1,49 @@
+* Actions Semi Owl Clock Management Unit (CMU)
+
+The Actions Semi Owl Clock Management Unit generates and supplies clock
+to various controllers within the SoC. The clock binding described here is
+applicable to S900 and S700 SoC's.
+
+Required Properties:
+
+- compatible: should be one of the following,
+	"actions,s900-cmu"
+	"actions,s700-cmu"
+- reg: physical base address of the controller and length of memory mapped
+  region.
+- clocks: Reference to the parent clocks ("hosc", "losc")
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier, and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in corresponding
+dt-bindings/clock/actions,s900-cmu.h or actions,s700-cmu.h header and can be
+used in device tree sources.
+
+External clocks:
+
+The hosc clock used as input for the plls is generated outside the SoC. It is
+expected that it is defined using standard clock bindings as "hosc".
+
+Actions Semi S900 CMU also requires one more clock:
+ - "losc" - internal low frequency oscillator
+
+Example: Clock Management Unit node:
+
+        cmu: clock-controller@e0160000 {
+                compatible = "actions,s900-cmu";
+                reg = <0x0 0xe0160000 0x0 0x1000>;
+                clocks = <&hosc>, <&losc>;
+                #clock-cells = <1>;
+        };
+
+Example: UART controller node that consumes clock generated by the clock
+management unit:
+
+        uart: serial@e012a000 {
+                compatible = "actions,s900-uart", "actions,owl-uart";
+                reg = <0x0 0xe012a000 0x0 0x2000>;
+                interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+                clocks = <&cmu CLK_UART5>;
+        };
diff --git a/Documentation/devicetree/bindings/clock/actions,s900-cmu.txt b/Documentation/devicetree/bindings/clock/actions,s900-cmu.txt
deleted file mode 100644
index 93e4fb827cd6..000000000000
--- a/Documentation/devicetree/bindings/clock/actions,s900-cmu.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-* Actions S900 Clock Management Unit (CMU)
-
-The Actions S900 clock management unit generates and supplies clock to various
-controllers within the SoC. The clock binding described here is applicable to
-S900 SoC.
-
-Required Properties:
-
-- compatible: should be "actions,s900-cmu"
-- reg: physical base address of the controller and length of memory mapped
-  region.
-- clocks: Reference to the parent clocks ("hosc", "losc")
-- #clock-cells: should be 1.
-
-Each clock is assigned an identifier, and client nodes can use this identifier
-to specify the clock which they consume.
-
-All available clocks are defined as preprocessor macros in
-dt-bindings/clock/actions,s900-cmu.h header and can be used in device
-tree sources.
-
-External clocks:
-
-The hosc clock used as input for the plls is generated outside the SoC. It is
-expected that it is defined using standard clock bindings as "hosc".
-
-Actions S900 CMU also requires one more clock:
- - "losc" - internal low frequency oscillator
-
-Example: Clock Management Unit node:
-
-        cmu: clock-controller@e0160000 {
-                compatible = "actions,s900-cmu";
-                reg = <0x0 0xe0160000 0x0 0x1000>;
-                clocks = <&hosc>, <&losc>;
-                #clock-cells = <1>;
-        };
-
-Example: UART controller node that consumes clock generated by the clock
-management unit:
-
-        uart: serial@e012a000 {
-                compatible = "actions,s900-uart", "actions,owl-uart";
-                reg = <0x0 0xe012a000 0x0 0x2000>;
-                interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
-                clocks = <&cmu CLK_UART5>;
-        };
diff --git a/include/dt-bindings/clock/actions,s700-cmu.h b/include/dt-bindings/clock/actions,s700-cmu.h
new file mode 100644
index 000000000000..3e1942996724
--- /dev/null
+++ b/include/dt-bindings/clock/actions,s700-cmu.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Device Tree binding constants for Actions Semi S700 Clock Management Unit
+ *
+ * Copyright (c) 2014 Actions Semi Inc.
+ * Author: David Liu <liuwei@actions-semi.com>
+ *
+ * Author: Pathiban Nallathambi <pn@denx.de>
+ * Author: Saravanan Sekar <sravanhome@gmail.com>
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_S700_H
+#define __DT_BINDINGS_CLOCK_S700_H
+
+#define CLK_NONE			0
+
+/* pll clocks */
+#define CLK_CORE_PLL			1
+#define CLK_DEV_PLL			2
+#define CLK_DDR_PLL			3
+#define CLK_NAND_PLL			4
+#define CLK_DISPLAY_PLL			5
+#define CLK_TVOUT_PLL			6
+#define CLK_CVBS_PLL			7
+#define CLK_AUDIO_PLL			8
+#define CLK_ETHERNET_PLL		9
+
+/* system clock */
+#define CLK_CPU				10
+#define CLK_DEV				11
+#define CLK_AHB				12
+#define CLK_APB				13
+#define CLK_DMAC			14
+#define CLK_NOC0_CLK_MUX		15
+#define CLK_NOC1_CLK_MUX		16
+#define CLK_HP_CLK_MUX			17
+#define CLK_HP_CLK_DIV			18
+#define CLK_NOC1_CLK_DIV		19
+#define CLK_NOC0			20
+#define CLK_NOC1			21
+#define CLK_SENOR_SRC			22
+
+/* peripheral device clock */
+#define CLK_GPIO			23
+#define CLK_TIMER			24
+#define CLK_DSI				25
+#define CLK_CSI				26
+#define CLK_SI				27
+#define CLK_DE				28
+#define CLK_HDE				29
+#define CLK_VDE				30
+#define CLK_VCE				31
+#define CLK_NAND			32
+#define CLK_SD0				33
+#define CLK_SD1				34
+#define CLK_SD2				35
+
+#define CLK_UART0			36
+#define CLK_UART1			37
+#define CLK_UART2			38
+#define CLK_UART3			39
+#define CLK_UART4			40
+#define CLK_UART5			41
+#define CLK_UART6			42
+
+#define CLK_PWM0			43
+#define CLK_PWM1			44
+#define CLK_PWM2			45
+#define CLK_PWM3			46
+#define CLK_PWM4			47
+#define CLK_PWM5			48
+#define CLK_GPU3D			49
+
+#define CLK_I2C0			50
+#define CLK_I2C1			51
+#define CLK_I2C2			52
+#define CLK_I2C3			53
+
+#define CLK_SPI0			54
+#define CLK_SPI1			55
+#define CLK_SPI2			56
+#define CLK_SPI3			57
+
+#define CLK_USB3_480MPLL0		58
+#define CLK_USB3_480MPHY0		59
+#define CLK_USB3_5GPHY			60
+#define CLK_USB3_CCE			61
+#define CLK_USB3_MAC			62
+
+#define CLK_LCD				63
+#define CLK_HDMI_AUDIO			64
+#define CLK_I2SRX			65
+#define CLK_I2STX			66
+
+#define CLK_SENSOR0			67
+#define CLK_SENSOR1			68
+
+#define CLK_HDMI_DEV			69
+
+#define CLK_ETHERNET			70
+#define CLK_RMII_REF			71
+
+#define CLK_USB2H0_PLLEN		72
+#define CLK_USB2H0_PHY			73
+#define CLK_USB2H0_CCE			74
+#define CLK_USB2H1_PLLEN		75
+#define CLK_USB2H1_PHY			76
+#define CLK_USB2H1_CCE			77
+
+#define CLK_TVOUT			78
+
+#define CLK_THERMAL_SENSOR		79
+
+#define CLK_IRC_SWITCH			80
+#define CLK_PCM1			81
+#define CLK_NR_CLKS			(CLK_PCM1 + 1)
+
+#endif /* __DT_BINDINGS_CLOCK_S700_H */
-- 
cgit v1.2.3


From 7fc7a7cffab6b94cb5e47148e6852ba633078ea1 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Wed, 25 Jul 2018 21:22:13 +0530
Subject: rdma/cxgb4: Add support for srq functions & structs

This patch adds kernel mode t4_srq structures and support functions,
uapi structures and defines, as well as firmware work request structures.

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h    |  38 ++++++++++
 drivers/infiniband/hw/cxgb4/t4.h          | 117 +++++++++++++++++++++++++++++-
 drivers/infiniband/hw/cxgb4/t4fw_ri_api.h |  19 +++++
 include/uapi/rdma/cxgb4-abi.h             |  17 +++++
 4 files changed, 190 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 8866bf992316..1d567aaf88e3 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -97,6 +97,7 @@ struct c4iw_resource {
 	struct c4iw_id_table tpt_table;
 	struct c4iw_id_table qid_table;
 	struct c4iw_id_table pdid_table;
+	struct c4iw_id_table srq_table;
 };
 
 struct c4iw_qid_list {
@@ -130,6 +131,8 @@ struct c4iw_stats {
 	struct c4iw_stat stag;
 	struct c4iw_stat pbl;
 	struct c4iw_stat rqt;
+	struct c4iw_stat srqt;
+	struct c4iw_stat srq;
 	struct c4iw_stat ocqp;
 	u64  db_full;
 	u64  db_empty;
@@ -549,6 +552,7 @@ struct c4iw_qp {
 	struct kref kref;
 	wait_queue_head_t wait;
 	int sq_sig_all;
+	struct c4iw_srq *srq;
 	struct work_struct free_work;
 	struct c4iw_ucontext *ucontext;
 	struct c4iw_wr_wait *wr_waitp;
@@ -559,6 +563,26 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
 	return container_of(ibqp, struct c4iw_qp, ibqp);
 }
 
+struct c4iw_srq {
+	struct ib_srq ibsrq;
+	struct list_head db_fc_entry;
+	struct c4iw_dev *rhp;
+	struct t4_srq wq;
+	struct sk_buff *destroy_skb;
+	u32 srq_limit;
+	u32 pdid;
+	int idx;
+	u32 flags;
+	spinlock_t lock; /* protects srq */
+	struct c4iw_wr_wait *wr_waitp;
+	bool armed;
+};
+
+static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct c4iw_srq, ibsrq);
+}
+
 struct c4iw_ucontext {
 	struct ib_ucontext ibucontext;
 	struct c4iw_dev_ucontext uctx;
@@ -1040,6 +1064,13 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 			     struct ib_udata *udata);
 int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask srq_attr_mask,
+		    struct ib_udata *udata);
+int c4iw_destroy_srq(struct ib_srq *ib_srq);
+struct ib_srq *c4iw_create_srq(struct ib_pd *pd,
+			       struct ib_srq_init_attr *attrs,
+			       struct ib_udata *udata);
 int c4iw_destroy_qp(struct ib_qp *ib_qp);
 struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
 			     struct ib_qp_init_attr *attrs,
@@ -1076,12 +1107,19 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
 void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
 			      enum cxgb4_bar2_qtype qtype,
 			      unsigned int *pbar2_qid, u64 *pbar2_pa);
+int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev);
+void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx);
 extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe);
 extern int c4iw_wr_log;
 extern int db_fc_threshold;
 extern int db_coalescing_threshold;
 extern int use_dsgl;
 void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
+void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq);
+void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16);
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx);
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+		       struct ib_recv_wr **bad_wr);
 struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
 
 typedef int c4iw_restrack_func(struct sk_buff *msg,
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 838a7dee48bd..29a4dd5053f2 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -52,12 +52,16 @@ struct t4_status_page {
 	__be16 pidx;
 	u8 qp_err;	/* flit 1 - sw owns */
 	u8 db_off;
-	u8 pad;
+	u8 pad[2];
 	u16 host_wq_pidx;
 	u16 host_cidx;
 	u16 host_pidx;
+	u16 pad2;
+	u32 srqidx;
 };
 
+#define T4_RQT_ENTRY_SHIFT 6
+#define T4_RQT_ENTRY_SIZE  BIT(T4_RQT_ENTRY_SHIFT)
 #define T4_EQ_ENTRY_SIZE 64
 
 #define T4_SQ_NUM_SLOTS 5
@@ -248,6 +252,7 @@ struct t4_cqe {
 /* used for RQ completion processing */
 #define CQE_WRID_STAG(x)  (be32_to_cpu((x)->u.rcqe.stag))
 #define CQE_WRID_MSN(x)   (be32_to_cpu((x)->u.rcqe.msn))
+#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx))
 
 /* used for SQ completion processing */
 #define CQE_WRID_SQ_IDX(x)	((x)->u.scqe.cidx)
@@ -331,6 +336,7 @@ struct t4_swrqe {
 	u64 wr_id;
 	ktime_t	host_time;
 	u64 sge_ts;
+	int valid;
 };
 
 struct t4_rq {
@@ -360,8 +366,98 @@ struct t4_wq {
 	void __iomem *db;
 	struct c4iw_rdev *rdev;
 	int flushed;
+	u8 *qp_errp;
+	u32 *srqidxp;
+};
+
+struct t4_srq_pending_wr {
+	u64 wr_id;
+	union t4_recv_wr wqe;
+	u8 len16;
+};
+
+struct t4_srq {
+	union t4_recv_wr *queue;
+	dma_addr_t dma_addr;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	struct t4_swrqe *sw_rq;
+	void __iomem *bar2_va;
+	u64 bar2_pa;
+	size_t memsize;
+	u32 bar2_qid;
+	u32 qid;
+	u32 msn;
+	u32 rqt_hwaddr;
+	u32 rqt_abs_idx;
+	u16 rqt_size;
+	u16 size;
+	u16 cidx;
+	u16 pidx;
+	u16 wq_pidx;
+	u16 wq_pidx_inc;
+	u16 in_use;
+	struct t4_srq_pending_wr *pending_wrs;
+	u16 pending_cidx;
+	u16 pending_pidx;
+	u16 pending_in_use;
+	u16 ooo_count;
 };
 
+static inline u32 t4_srq_avail(struct t4_srq *srq)
+{
+	return srq->size - 1 - srq->in_use;
+}
+
+static inline void t4_srq_produce(struct t4_srq *srq, u8 len16)
+{
+	srq->in_use++;
+	if (++srq->pidx == srq->size)
+		srq->pidx = 0;
+	srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+	if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS)
+		srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS;
+	srq->queue[srq->size].status.host_pidx = srq->pidx;
+}
+
+static inline void t4_srq_produce_pending_wr(struct t4_srq *srq)
+{
+	srq->pending_in_use++;
+	srq->in_use++;
+	if (++srq->pending_pidx == srq->size)
+		srq->pending_pidx = 0;
+}
+
+static inline void t4_srq_consume_pending_wr(struct t4_srq *srq)
+{
+	srq->pending_in_use--;
+	srq->in_use--;
+	if (++srq->pending_cidx == srq->size)
+		srq->pending_cidx = 0;
+}
+
+static inline void t4_srq_produce_ooo(struct t4_srq *srq)
+{
+	srq->in_use--;
+	srq->ooo_count++;
+}
+
+static inline void t4_srq_consume_ooo(struct t4_srq *srq)
+{
+	srq->cidx++;
+	if (srq->cidx == srq->size)
+		srq->cidx  = 0;
+	srq->queue[srq->size].status.host_cidx = srq->cidx;
+	srq->ooo_count--;
+}
+
+static inline void t4_srq_consume(struct t4_srq *srq)
+{
+	srq->in_use--;
+	if (++srq->cidx == srq->size)
+		srq->cidx = 0;
+	srq->queue[srq->size].status.host_cidx = srq->cidx;
+}
+
 static inline int t4_rqes_posted(struct t4_wq *wq)
 {
 	return wq->rq.in_use;
@@ -475,6 +571,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src)
 	}
 }
 
+static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16,
+				  union t4_recv_wr *wqe)
+{
+	/* Flush host queue memory writes. */
+	wmb();
+	if (inc == 1 && srq->bar2_qid == 0 && wqe) {
+		pr_debug("%s : WC srq->pidx = %d; len16=%d\n",
+			 __func__, srq->pidx, len16);
+		pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe);
+	} else {
+		pr_debug("%s: DB srq->pidx = %d; len16=%d\n",
+			 __func__, srq->pidx, len16);
+		writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid),
+		       srq->bar2_va + SGE_UDB_KDOORBELL);
+	}
+	/* Flush user doorbell area writes. */
+	wmb();
+}
+
 static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
 {
 
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 58c531db4f4a..0f4f86b004d6 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -263,6 +263,7 @@ enum fw_ri_res_type {
 	FW_RI_RES_TYPE_SQ,
 	FW_RI_RES_TYPE_RQ,
 	FW_RI_RES_TYPE_CQ,
+	FW_RI_RES_TYPE_SRQ,
 };
 
 enum fw_ri_res_op {
@@ -296,6 +297,20 @@ struct fw_ri_res {
 			__be32 r6_lo;
 			__be64 r7;
 		} cq;
+		struct fw_ri_res_srq {
+			__u8   restype;
+			__u8   op;
+			__be16 r3;
+			__be32 eqid;
+			__be32 r4[2];
+			__be32 fetchszm_to_iqid;
+			__be32 dcaen_to_eqsize;
+			__be64 eqaddr;
+			__be32 srqid;
+			__be32 pdid;
+			__be32 hwsrqsize;
+			__be32 hwsrqaddr;
+		} srq;
 	} u;
 };
 
@@ -707,6 +722,10 @@ enum fw_ri_init_p2ptype {
 	FW_RI_INIT_P2PTYPE_DISABLED		= 0xf,
 };
 
+enum fw_ri_init_rqeqid_srq {
+	FW_RI_INIT_RQEQID_SRQ			= 1 << 31,
+};
+
 struct fw_ri_wr {
 	__be32 op_compl;
 	__be32 flowid_len16;
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index 65c9eacd3ffb..d0b2d829471a 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -84,6 +84,23 @@ struct c4iw_create_qp_resp {
 	__u32 flags;
 };
 
+struct c4iw_create_srq_resp {
+	__aligned_u64 srq_key;
+	__aligned_u64 srq_db_gts_key;
+	__aligned_u64 srq_memsize;
+	__u32 srqid;
+	__u32 srq_size;
+	__u32 rqt_abs_idx;
+	__u32 qid_mask;
+	__u32 flags;
+	__u32 reserved; /* explicit padding */
+};
+
+enum {
+	/* HW supports SRQ_LIMIT_REACHED event */
+	T4_SRQ_LIMIT_SUPPORT = 1 << 0,
+};
+
 struct c4iw_alloc_ucontext_resp {
 	__aligned_u64 status_page_key;
 	__u32 status_page_size;
-- 
cgit v1.2.3


From 603cc1498455cf57f5ca4483b600efb37ea2c56c Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 25 Jul 2018 16:35:32 +0200
Subject: net/smc: provide fallback reason code

Remember the fallback reason code and the peer diagnosis code for
smc sockets, and provide them in smc_diag.c to the netlink interface.
And add more detailed reason codes.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/smc_diag.h |  6 +++++
 net/smc/af_smc.c              | 52 +++++++++++++++++++++++++------------------
 net/smc/smc.h                 |  2 ++
 net/smc/smc_clc.c             |  6 ++++-
 net/smc/smc_clc.h             | 18 ++++++++++-----
 net/smc/smc_diag.c            |  6 +++++
 6 files changed, 61 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h
index 48ae3ee22b2d..ac9e8c96d9bd 100644
--- a/include/uapi/linux/smc_diag.h
+++ b/include/uapi/linux/smc_diag.h
@@ -43,6 +43,7 @@ enum {
 	SMC_DIAG_LGRINFO,
 	SMC_DIAG_SHUTDOWN,
 	SMC_DIAG_DMBINFO,
+	SMC_DIAG_FALLBACK,
 	__SMC_DIAG_MAX,
 };
 
@@ -92,6 +93,11 @@ struct smc_diag_lgrinfo {
 	__u8				role;
 };
 
+struct smc_diag_fallback {
+	__u32 reason;
+	__u32 peer_diagnosis;
+};
+
 struct smcd_diag_dmbinfo {		/* SMC-D Socket internals */
 	__u32 linkid;			/* Link identifier */
 	__u64 peer_gid;			/* Peer GID */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b81797103260..fce7e4751151 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -344,17 +344,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 
 	rc = smc_ib_modify_qp_rts(link);
 	if (rc)
-		return SMC_CLC_DECL_INTERR;
+		return SMC_CLC_DECL_ERR_RDYLNK;
 
 	smc_wr_remember_qp_attr(link);
 
 	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
-		return SMC_CLC_DECL_INTERR;
+		return SMC_CLC_DECL_ERR_REGRMB;
 
 	/* send CONFIRM LINK response over RoCE fabric */
 	rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
 	if (rc < 0)
-		return SMC_CLC_DECL_TCL;
+		return SMC_CLC_DECL_TIMEOUT_CL;
 
 	/* receive ADD LINK request from server over RoCE fabric */
 	rest = wait_for_completion_interruptible_timeout(&link->llc_add,
@@ -372,7 +372,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 				   link->smcibdev->mac[link->ibport - 1],
 				   link->gid, SMC_LLC_RESP);
 	if (rc < 0)
-		return SMC_CLC_DECL_TCL;
+		return SMC_CLC_DECL_TIMEOUT_AL;
 
 	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 
@@ -424,9 +424,10 @@ static void smc_link_save_peer_info(struct smc_link *link,
 }
 
 /* fall back during connect */
-static int smc_connect_fallback(struct smc_sock *smc)
+static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 {
 	smc->use_fallback = true;
+	smc->fallback_rsn = reason_code;
 	smc_copy_sock_settings_to_clc(smc);
 	if (smc->sk.sk_state == SMC_INIT)
 		smc->sk.sk_state = SMC_ACTIVE;
@@ -443,7 +444,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
 			sock_put(&smc->sk); /* passive closing */
 		return reason_code;
 	}
-	if (reason_code != SMC_CLC_DECL_REPLY) {
+	if (reason_code != SMC_CLC_DECL_PEERDECL) {
 		rc = smc_clc_send_decline(smc, reason_code);
 		if (rc < 0) {
 			if (smc->sk.sk_state == SMC_INIT)
@@ -451,7 +452,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
 			return rc;
 		}
 	}
-	return smc_connect_fallback(smc);
+	return smc_connect_fallback(smc, reason_code);
 }
 
 /* abort connecting */
@@ -568,7 +569,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 		smc_link_save_peer_info(link, aclc);
 
 	if (smc_rmb_rtoken_handling(&smc->conn, aclc))
-		return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+		return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
 					 local_contact);
 
 	smc_close_init(smc);
@@ -576,12 +577,12 @@ static int smc_connect_rdma(struct smc_sock *smc,
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		if (smc_ib_ready_link(link))
-			return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
 						 local_contact);
 	} else {
 		if (!smc->conn.rmb_desc->reused &&
 		    smc_reg_rmb(link, smc->conn.rmb_desc, true))
-			return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
 						 local_contact);
 	}
 	smc_rmb_sync_sg_for_device(&smc->conn);
@@ -659,11 +660,11 @@ static int __smc_connect(struct smc_sock *smc)
 	sock_hold(&smc->sk); /* sock put in passive closing */
 
 	if (smc->use_fallback)
-		return smc_connect_fallback(smc);
+		return smc_connect_fallback(smc, smc->fallback_rsn);
 
 	/* if peer has not signalled SMC-capability, fall back */
 	if (!tcp_sk(smc->clcsock->sk)->syn_smc)
-		return smc_connect_fallback(smc);
+		return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
 
 	/* IPSec connections opt out of SMC-R optimizations */
 	if (using_ipsec(smc))
@@ -693,7 +694,7 @@ static int __smc_connect(struct smc_sock *smc)
 
 	/* if neither ISM nor RDMA are supported, fallback */
 	if (!rdma_supported && !ism_supported)
-		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
 
 	/* perform CLC handshake */
 	rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
@@ -708,7 +709,7 @@ static int __smc_connect(struct smc_sock *smc)
 	else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
 		rc = smc_connect_ism(smc, &aclc, ismdev);
 	else
-		rc = SMC_CLC_DECL_CNFERR;
+		rc = SMC_CLC_DECL_MODEUNSUPP;
 	if (rc) {
 		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 		return smc_connect_decline_fallback(smc, rc);
@@ -946,12 +947,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 	link = &lgr->lnk[SMC_SINGLE_LINK];
 
 	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
-		return SMC_CLC_DECL_INTERR;
+		return SMC_CLC_DECL_ERR_REGRMB;
 
 	/* send CONFIRM LINK request to client over the RoCE fabric */
 	rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
 	if (rc < 0)
-		return SMC_CLC_DECL_TCL;
+		return SMC_CLC_DECL_TIMEOUT_CL;
 
 	/* receive CONFIRM LINK response from client over the RoCE fabric */
 	rest = wait_for_completion_interruptible_timeout(
@@ -973,7 +974,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 				   link->smcibdev->mac[link->ibport - 1],
 				   link->gid, SMC_LLC_REQ);
 	if (rc < 0)
-		return SMC_CLC_DECL_TCL;
+		return SMC_CLC_DECL_TIMEOUT_AL;
 
 	/* receive ADD LINK response from client over the RoCE fabric */
 	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
@@ -1048,7 +1049,8 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
 	}
 	smc_conn_free(&new_smc->conn);
 	new_smc->use_fallback = true;
-	if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
+	new_smc->fallback_rsn = reason_code;
+	if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
 		if (smc_clc_send_decline(new_smc, reason_code) < 0) {
 			smc_listen_out_err(new_smc);
 			return;
@@ -1139,7 +1141,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 	if (local_contact != SMC_FIRST_CONTACT) {
 		if (!new_smc->conn.rmb_desc->reused) {
 			if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
-				return SMC_CLC_DECL_INTERR;
+				return SMC_CLC_DECL_ERR_REGRMB;
 		}
 	}
 	smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -1159,13 +1161,13 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc,
 		smc_link_save_peer_info(link, cclc);
 
 	if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
-		reason_code = SMC_CLC_DECL_INTERR;
+		reason_code = SMC_CLC_DECL_ERR_RTOK;
 		goto decline;
 	}
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		if (smc_ib_ready_link(link)) {
-			reason_code = SMC_CLC_DECL_INTERR;
+			reason_code = SMC_CLC_DECL_ERR_RDYLNK;
 			goto decline;
 		}
 		/* QP confirmation over RoCE fabric */
@@ -1206,6 +1208,7 @@ static void smc_listen_work(struct work_struct *work)
 	/* check if peer is smc capable */
 	if (!tcp_sk(newclcsock->sk)->syn_smc) {
 		new_smc->use_fallback = true;
+		new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
 		smc_listen_out_connected(new_smc);
 		return;
 	}
@@ -1250,7 +1253,8 @@ static void smc_listen_work(struct work_struct *work)
 	     smc_listen_rdma_reg(new_smc, local_contact))) {
 		/* SMC not supported, decline */
 		mutex_unlock(&smc_create_lgr_pending);
-		smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
+		smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
+				   local_contact);
 		return;
 	}
 
@@ -1297,6 +1301,7 @@ static void smc_tcp_listen_work(struct work_struct *work)
 
 		new_smc->listen_smc = lsmc;
 		new_smc->use_fallback = lsmc->use_fallback;
+		new_smc->fallback_rsn = lsmc->fallback_rsn;
 		sock_hold(lsk); /* sock_put in smc_listen_work */
 		INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
 		smc_copy_sock_settings_to_smc(new_smc);
@@ -1451,6 +1456,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	if (msg->msg_flags & MSG_FASTOPEN) {
 		if (sk->sk_state == SMC_INIT) {
 			smc->use_fallback = true;
+			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			rc = -EINVAL;
 			goto out;
@@ -1648,6 +1654,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 		/* option not supported by SMC */
 		if (sk->sk_state == SMC_INIT) {
 			smc->use_fallback = true;
+			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			if (!smc->use_fallback)
 				rc = -EINVAL;
@@ -1885,6 +1892,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 	/* create internal TCP socket for CLC handshake and fallback */
 	smc = smc_sk(sk);
 	smc->use_fallback = false; /* assume rdma capability first */
+	smc->fallback_rsn = 0;
 	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
 			      &smc->clcsock);
 	if (rc) {
diff --git a/net/smc/smc.h b/net/smc/smc.h
index be20acd7b5ab..08786ace6010 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -208,6 +208,8 @@ struct smc_sock {				/* smc sock container */
 	struct list_head	accept_q;	/* sockets to be accepted */
 	spinlock_t		accept_q_lock;	/* protects accept_q */
 	bool			use_fallback;	/* fallback to tcp */
+	int			fallback_rsn;	/* reason for fallback */
+	u32			peer_diagnosis; /* decline reason from peer */
 	int			sockopt_defer_accept;
 						/* sockopt TCP_DEFER_ACCEPT
 						 * value
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 78d74938a9d9..83aba9ade060 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -334,7 +334,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		goto out;
 	}
 	if (clcm->type == SMC_CLC_DECLINE) {
-		reason_code = SMC_CLC_DECL_REPLY;
+		struct smc_clc_msg_decline *dclc;
+
+		dclc = (struct smc_clc_msg_decline *)clcm;
+		reason_code = SMC_CLC_DECL_PEERDECL;
+		smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
 		if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
 			smc->conn.lgr->sync_err = 1;
 			smc_lgr_terminate(smc->conn.lgr);
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 6bdc63352d6a..18da89b681c2 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -28,15 +28,21 @@
 #define SMC_TYPE_B		3		/* SMC-R and SMC-D	      */
 #define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */
-#define SMC_CLC_DECL_TIMEOUT	0x02000000  /* timeout                        */
+#define SMC_CLC_DECL_TIMEOUT_CL	0x02010000  /* timeout w4 QP confirm link     */
+#define SMC_CLC_DECL_TIMEOUT_AL	0x02020000  /* timeout w4 QP add link	      */
 #define SMC_CLC_DECL_CNFERR	0x03000000  /* configuration error            */
-#define SMC_CLC_DECL_IPSEC	0x03030000  /* IPsec usage                    */
+#define SMC_CLC_DECL_PEERNOSMC	0x03010000  /* peer did not indicate SMC      */
+#define SMC_CLC_DECL_IPSEC	0x03020000  /* IPsec usage		      */
+#define SMC_CLC_DECL_NOSMCDEV	0x03030000  /* no SMC device found	      */
+#define SMC_CLC_DECL_MODEUNSUPP	0x03040000  /* smc modes do not match (R or D)*/
+#define SMC_CLC_DECL_RMBE_EC	0x03050000  /* peer has eyecatcher in RMBE    */
+#define SMC_CLC_DECL_OPTUNSUPP	0x03060000  /* fastopen sockopt not supported */
 #define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */
-#define SMC_CLC_DECL_REPLY	0x06000000  /* reply to a received decline    */
+#define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */
 #define SMC_CLC_DECL_INTERR	0x99990000  /* internal error                 */
-#define SMC_CLC_DECL_TCL	0x02040000  /* timeout w4 QP confirm          */
-#define SMC_CLC_DECL_SEND	0x07000000  /* sending problem                */
-#define SMC_CLC_DECL_RMBE_EC	0x08000000  /* peer has eyecatcher in RMBE    */
+#define SMC_CLC_DECL_ERR_RTOK	0x99990001  /*	 rtoken handling failed       */
+#define SMC_CLC_DECL_ERR_RDYLNK	0x99990002  /*	 ib ready link failed	      */
+#define SMC_CLC_DECL_ERR_REGRMB	0x99990003  /*	 reg rmb failed		      */
 
 struct smc_clc_msg_hdr {	/* header1 of clc messages */
 	u8 eyecatcher[4];	/* eye catcher */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index a3cf7313a2d3..dbf64a93d68a 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -79,6 +79,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 			   struct nlattr *bc)
 {
 	struct smc_sock *smc = smc_sk(sk);
+	struct smc_diag_fallback fallback;
 	struct user_namespace *user_ns;
 	struct smc_diag_msg *r;
 	struct nlmsghdr *nlh;
@@ -101,6 +102,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 	if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
 		goto errout;
 
+	fallback.reason = smc->fallback_rsn;
+	fallback.peer_diagnosis = smc->peer_diagnosis;
+	if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0)
+		goto errout;
+
 	if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
 	    smc->conn.alert_token_local) {
 		struct smc_connection *conn = &smc->conn;
-- 
cgit v1.2.3


From 95a48b7d4459948b6bacf809809cf01a7dc06d1d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 23:00:54 +0200
Subject: ALSA: pcm: Add __force to cast in snd_pcm_lib_read/write()

The snd_pcm_lib_read() and snd_pcm_lib_write() inline functions have
the explicit cast from a user pointer to a kernel pointer, but they
lacks of __force prefix.

This fixes sparse warnings like:
  ./include/sound/pcm.h:1093:47: warning: cast removes address space of expression

Fixes: 68541213720d ("ALSA: pcm: Direct in-kernel read/write support")
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 1206045ccf03..d6bd3caf6878 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -1090,14 +1090,14 @@ static inline snd_pcm_sframes_t
 snd_pcm_lib_write(struct snd_pcm_substream *substream,
 		  const void __user *buf, snd_pcm_uframes_t frames)
 {
-	return __snd_pcm_lib_xfer(substream, (void *)buf, true, frames, false);
+	return __snd_pcm_lib_xfer(substream, (void __force *)buf, true, frames, false);
 }
 
 static inline snd_pcm_sframes_t
 snd_pcm_lib_read(struct snd_pcm_substream *substream,
 		 void __user *buf, snd_pcm_uframes_t frames)
 {
-	return __snd_pcm_lib_xfer(substream, (void *)buf, true, frames, false);
+	return __snd_pcm_lib_xfer(substream, (void __force *)buf, true, frames, false);
 }
 
 static inline snd_pcm_sframes_t
-- 
cgit v1.2.3


From 601b218568a107370086dc5c7a1b283f8d463268 Mon Sep 17 00:00:00 2001
From: Ruchi Kandoi <kandoiruchi@google.com>
Date: Tue, 24 Jul 2018 10:35:44 -0700
Subject: cpufreq: trace frequency limits change

systrace used for tracing for Android systems has carried a patch for
many years in the Android tree that traces when the cpufreq limits
change.  With the help of this information, systrace can know when the
policy limits change and can visually display the data. Lets add
upstream support for the same.

Signed-off-by: Ruchi Kandoi <kandoiruchi@google.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/trace/events-power.rst |  1 +
 drivers/cpufreq/cpufreq.c            |  1 +
 include/trace/events/power.h         | 25 +++++++++++++++++++++++++
 3 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/Documentation/trace/events-power.rst b/Documentation/trace/events-power.rst
index a77daca75e30..2ef318962e29 100644
--- a/Documentation/trace/events-power.rst
+++ b/Documentation/trace/events-power.rst
@@ -27,6 +27,7 @@ cpufreq.
 
   cpu_idle		"state=%lu cpu_id=%lu"
   cpu_frequency		"state=%lu cpu_id=%lu"
+  cpu_frequency_limits	"min=%lu max=%lu cpu_id=%lu"
 
 A suspend event is used to indicate the system going in and out of the
 suspend mode:
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b0dfd3222013..52566f1f1050 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2236,6 +2236,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 
 	policy->min = new_policy->min;
 	policy->max = new_policy->max;
+	trace_cpu_frequency_limits(policy);
 
 	policy->cached_target_freq = UINT_MAX;
 
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 908977d69783..f7aece721aed 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -5,6 +5,7 @@
 #if !defined(_TRACE_POWER_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_POWER_H
 
+#include <linux/cpufreq.h>
 #include <linux/ktime.h>
 #include <linux/pm_qos.h>
 #include <linux/tracepoint.h>
@@ -148,6 +149,30 @@ DEFINE_EVENT(cpu, cpu_frequency,
 	TP_ARGS(frequency, cpu_id)
 );
 
+TRACE_EVENT(cpu_frequency_limits,
+
+	TP_PROTO(struct cpufreq_policy *policy),
+
+	TP_ARGS(policy),
+
+	TP_STRUCT__entry(
+		__field(u32, min_freq)
+		__field(u32, max_freq)
+		__field(u32, cpu_id)
+	),
+
+	TP_fast_assign(
+		__entry->min_freq = policy->min;
+		__entry->max_freq = policy->max;
+		__entry->cpu_id = policy->cpu;
+	),
+
+	TP_printk("min=%lu max=%lu cpu_id=%lu",
+		  (unsigned long)__entry->min_freq,
+		  (unsigned long)__entry->max_freq,
+		  (unsigned long)__entry->cpu_id)
+);
+
 TRACE_EVENT(device_pm_callback_start,
 
 	TP_PROTO(struct device *dev, const char *pm_ops, int event),
-- 
cgit v1.2.3


From 6f4ceee9305dc3fe74099159b460f4b56b506f1d Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 24 Jul 2018 14:26:04 -0400
Subject: cpu/hotplug: Add a cpus_read_trylock() function

There are use cases where it can be useful to have a cpus_read_trylock()
function to work around circular lock dependency problem involving
the cpu_hotplug_lock.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpu.h | 2 ++
 kernel/cpu.c        | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a97a63eef59f..e850bfea3e84 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -103,6 +103,7 @@ extern void cpus_write_lock(void);
 extern void cpus_write_unlock(void);
 extern void cpus_read_lock(void);
 extern void cpus_read_unlock(void);
+extern int  cpus_read_trylock(void);
 extern void lockdep_assert_cpus_held(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
@@ -115,6 +116,7 @@ static inline void cpus_write_lock(void) { }
 static inline void cpus_write_unlock(void) { }
 static inline void cpus_read_lock(void) { }
 static inline void cpus_read_unlock(void) { }
+static inline int  cpus_read_trylock(void) { return true; }
 static inline void lockdep_assert_cpus_held(void) { }
 static inline void cpu_hotplug_disable(void) { }
 static inline void cpu_hotplug_enable(void) { }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0db8938fbb23..307486baa477 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -290,6 +290,12 @@ void cpus_read_lock(void)
 }
 EXPORT_SYMBOL_GPL(cpus_read_lock);
 
+int cpus_read_trylock(void)
+{
+	return percpu_down_read_trylock(&cpu_hotplug_lock);
+}
+EXPORT_SYMBOL_GPL(cpus_read_trylock);
+
 void cpus_read_unlock(void)
 {
 	percpu_up_read(&cpu_hotplug_lock);
-- 
cgit v1.2.3


From d396e47fb558a819226955ce5db0149fde88da0f Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Mon, 21 May 2018 11:28:51 +0300
Subject: usb: gadget: uvc: Move userspace API definition to public header

The UVC gadget userspace API (V4L2 events and custom ioctls) is defined
in a header internal to the kernel. Move it to a new public header to
make it accessible to userspace.

The UVC_INTF_CONTROL and UVC_INTF_STREAMING macros are not used, so
remove them in the process.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 MAINTAINERS                       |  1 +
 drivers/usb/gadget/function/uvc.h | 45 +++++----------------------------------
 include/uapi/linux/usb/g_uvc.h    | 39 +++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 40 deletions(-)
 create mode 100644 include/uapi/linux/usb/g_uvc.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 0fe4228f78cb..37035a0c7522 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14996,6 +14996,7 @@ L:	linux-usb@vger.kernel.org
 S:	Maintained
 F:	drivers/usb/gadget/function/*uvc*
 F:	drivers/usb/gadget/legacy/webcam.c
+F:	include/uapi/linux/usb/g_uvc.h
 
 USB WIRELESS RNDIS DRIVER (rndis_wlan)
 M:	Jussi Kivilinna <jussi.kivilinna@iki.fi>
diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
index a64e07e61f8c..053e4b72039d 100644
--- a/drivers/usb/gadget/function/uvc.h
+++ b/drivers/usb/gadget/function/uvc.h
@@ -9,52 +9,20 @@
 #ifndef _UVC_GADGET_H_
 #define _UVC_GADGET_H_
 
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include <linux/usb/ch9.h>
-
-#define UVC_EVENT_FIRST			(V4L2_EVENT_PRIVATE_START + 0)
-#define UVC_EVENT_CONNECT		(V4L2_EVENT_PRIVATE_START + 0)
-#define UVC_EVENT_DISCONNECT		(V4L2_EVENT_PRIVATE_START + 1)
-#define UVC_EVENT_STREAMON		(V4L2_EVENT_PRIVATE_START + 2)
-#define UVC_EVENT_STREAMOFF		(V4L2_EVENT_PRIVATE_START + 3)
-#define UVC_EVENT_SETUP			(V4L2_EVENT_PRIVATE_START + 4)
-#define UVC_EVENT_DATA			(V4L2_EVENT_PRIVATE_START + 5)
-#define UVC_EVENT_LAST			(V4L2_EVENT_PRIVATE_START + 5)
-
-struct uvc_request_data {
-	__s32 length;
-	__u8 data[60];
-};
-
-struct uvc_event {
-	union {
-		enum usb_device_speed speed;
-		struct usb_ctrlrequest req;
-		struct uvc_request_data data;
-	};
-};
-
-#define UVCIOC_SEND_RESPONSE		_IOW('U', 1, struct uvc_request_data)
-
-#define UVC_INTF_CONTROL		0
-#define UVC_INTF_STREAMING		1
-
-/* ------------------------------------------------------------------------
- * Debugging, printing and logging
- */
-
-#ifdef __KERNEL__
-
 #include <linux/usb.h>	/* For usb_endpoint_* */
 #include <linux/usb/composite.h>
 #include <linux/usb/gadget.h>
+#include <linux/usb/g_uvc.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-fh.h>
 #include <media/v4l2-device.h>
 
 #include "uvc_queue.h"
 
+/* ------------------------------------------------------------------------
+ * Debugging, printing and logging
+ */
+
 #define UVC_TRACE_PROBE				(1 << 0)
 #define UVC_TRACE_DESCR				(1 << 1)
 #define UVC_TRACE_CONTROL			(1 << 2)
@@ -184,7 +152,4 @@ extern void uvc_endpoint_stream(struct uvc_device *dev);
 extern void uvc_function_connect(struct uvc_device *uvc);
 extern void uvc_function_disconnect(struct uvc_device *uvc);
 
-#endif /* __KERNEL__ */
-
 #endif /* _UVC_GADGET_H_ */
-
diff --git a/include/uapi/linux/usb/g_uvc.h b/include/uapi/linux/usb/g_uvc.h
new file mode 100644
index 000000000000..3c9ee3020cbb
--- /dev/null
+++ b/include/uapi/linux/usb/g_uvc.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * g_uvc.h  --  USB Video Class Gadget driver API
+ *
+ * Copyright (C) 2009-2010 Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ */
+
+#ifndef __LINUX_USB_G_UVC_H
+#define __LINUX_USB_G_UVC_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <linux/usb/ch9.h>
+
+#define UVC_EVENT_FIRST			(V4L2_EVENT_PRIVATE_START + 0)
+#define UVC_EVENT_CONNECT		(V4L2_EVENT_PRIVATE_START + 0)
+#define UVC_EVENT_DISCONNECT		(V4L2_EVENT_PRIVATE_START + 1)
+#define UVC_EVENT_STREAMON		(V4L2_EVENT_PRIVATE_START + 2)
+#define UVC_EVENT_STREAMOFF		(V4L2_EVENT_PRIVATE_START + 3)
+#define UVC_EVENT_SETUP			(V4L2_EVENT_PRIVATE_START + 4)
+#define UVC_EVENT_DATA			(V4L2_EVENT_PRIVATE_START + 5)
+#define UVC_EVENT_LAST			(V4L2_EVENT_PRIVATE_START + 5)
+
+struct uvc_request_data {
+	__s32 length;
+	__u8 data[60];
+};
+
+struct uvc_event {
+	union {
+		enum usb_device_speed speed;
+		struct usb_ctrlrequest req;
+		struct uvc_request_data data;
+	};
+};
+
+#define UVCIOC_SEND_RESPONSE		_IOW('U', 1, struct uvc_request_data)
+
+#endif /* __LINUX_USB_G_UVC_H */
-- 
cgit v1.2.3


From 1560d0848a1a84db6c1d9b17c14273c0dae41828 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 14:34:29 +0200
Subject: rtc: remove rtc_irq_register/rtc_irq_unregister

The rtc_irq_* interface is not used from outside the RTC subsytem since
2016.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/interface.c | 33 ---------------------------------
 include/linux/rtc.h     |  4 ----
 2 files changed, 37 deletions(-)

(limited to 'include')

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 6d4012dd6922..2b1b9a0b9f8a 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -719,39 +719,6 @@ void rtc_class_close(struct rtc_device *rtc)
 }
 EXPORT_SYMBOL_GPL(rtc_class_close);
 
-int rtc_irq_register(struct rtc_device *rtc, struct rtc_task *task)
-{
-	int retval = -EBUSY;
-
-	if (task == NULL || task->func == NULL)
-		return -EINVAL;
-
-	/* Cannot register while the char dev is in use */
-	if (test_and_set_bit_lock(RTC_DEV_BUSY, &rtc->flags))
-		return -EBUSY;
-
-	spin_lock_irq(&rtc->irq_task_lock);
-	if (rtc->irq_task == NULL) {
-		rtc->irq_task = task;
-		retval = 0;
-	}
-	spin_unlock_irq(&rtc->irq_task_lock);
-
-	clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(rtc_irq_register);
-
-void rtc_irq_unregister(struct rtc_device *rtc, struct rtc_task *task)
-{
-	spin_lock_irq(&rtc->irq_task_lock);
-	if (rtc->irq_task == task)
-		rtc->irq_task = NULL;
-	spin_unlock_irq(&rtc->irq_task_lock);
-}
-EXPORT_SYMBOL_GPL(rtc_irq_unregister);
-
 static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
 {
 	/*
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 6268208760e9..f868d6b619ab 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -204,10 +204,6 @@ extern void rtc_update_irq(struct rtc_device *rtc,
 extern struct rtc_device *rtc_class_open(const char *name);
 extern void rtc_class_close(struct rtc_device *rtc);
 
-extern int rtc_irq_register(struct rtc_device *rtc,
-				struct rtc_task *task);
-extern void rtc_irq_unregister(struct rtc_device *rtc,
-				struct rtc_task *task);
 extern int rtc_irq_set_state(struct rtc_device *rtc,
 				struct rtc_task *task, int enabled);
 extern int rtc_irq_set_freq(struct rtc_device *rtc,
-- 
cgit v1.2.3


From acecb3ad8b21a519ce4ad728106d45d4e978bb56 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 14:58:10 +0200
Subject: rtc: remove irq_task and irq_task_lock

There is no way to set a periodic task anymore, remove task pointer and
lock.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/class.c     |  1 -
 drivers/rtc/interface.c | 50 ++++++++++---------------------------------------
 include/linux/rtc.h     |  2 --
 3 files changed, 10 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 7fa32c922617..0fca4d74c76b 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -172,7 +172,6 @@ static struct rtc_device *rtc_allocate_device(void)
 
 	mutex_init(&rtc->ops_lock);
 	spin_lock_init(&rtc->irq_lock);
-	spin_lock_init(&rtc->irq_task_lock);
 	init_waitqueue_head(&rtc->irq_queue);
 
 	/* Init timerqueue */
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 2b1b9a0b9f8a..ae0d67610c7b 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -605,12 +605,6 @@ void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode)
 	rtc->irq_data = (rtc->irq_data + (num << 8)) | (RTC_IRQF|mode);
 	spin_unlock_irqrestore(&rtc->irq_lock, flags);
 
-	/* call the task func */
-	spin_lock_irqsave(&rtc->irq_task_lock, flags);
-	if (rtc->irq_task)
-		rtc->irq_task->func(rtc->irq_task->private_data);
-	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
-
 	wake_up_interruptible(&rtc->irq_queue);
 	kill_fasync(&rtc->async_queue, SIGIO, POLL_IN);
 }
@@ -750,28 +744,16 @@ static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
  * Context: any
  *
  * Note that rtc_irq_set_freq() should previously have been used to
- * specify the desired frequency of periodic IRQ task->func() callbacks.
+ * specify the desired frequency of periodic IRQ.
  */
 int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled)
 {
 	int err = 0;
-	unsigned long flags;
 
-retry:
-	spin_lock_irqsave(&rtc->irq_task_lock, flags);
-	if (rtc->irq_task != NULL && task == NULL)
-		err = -EBUSY;
-	else if (rtc->irq_task != task)
-		err = -EACCES;
-	else {
-		if (rtc_update_hrtimer(rtc, enabled) < 0) {
-			spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
-			cpu_relax();
-			goto retry;
-		}
-		rtc->pie_enabled = enabled;
-	}
-	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+	while (rtc_update_hrtimer(rtc, enabled) < 0)
+		cpu_relax();
+
+	rtc->pie_enabled = enabled;
 
 	trace_rtc_irq_set_state(enabled, err);
 	return err;
@@ -782,7 +764,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_state);
  * rtc_irq_set_freq - set 2^N Hz periodic IRQ frequency for IRQ
  * @rtc: the rtc device
  * @task: currently registered with rtc_irq_register()
- * @freq: positive frequency with which task->func() will be called
+ * @freq: positive frequency
  * Context: any
  *
  * Note that rtc_irq_set_state() is used to enable or disable the
@@ -791,25 +773,13 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_state);
 int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
 {
 	int err = 0;
-	unsigned long flags;
 
 	if (freq <= 0 || freq > RTC_MAX_FREQ)
 		return -EINVAL;
-retry:
-	spin_lock_irqsave(&rtc->irq_task_lock, flags);
-	if (rtc->irq_task != NULL && task == NULL)
-		err = -EBUSY;
-	else if (rtc->irq_task != task)
-		err = -EACCES;
-	else {
-		rtc->irq_freq = freq;
-		if (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0) {
-			spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
-			cpu_relax();
-			goto retry;
-		}
-	}
-	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+	rtc->irq_freq = freq;
+	while (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0)
+		cpu_relax();
 
 	trace_rtc_irq_set_freq(freq, err);
 	return err;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index f868d6b619ab..8cc23fdfd4ee 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -121,8 +121,6 @@ struct rtc_device {
 	wait_queue_head_t irq_queue;
 	struct fasync_struct *async_queue;
 
-	struct rtc_task *irq_task;
-	spinlock_t irq_task_lock;
 	int irq_freq;
 	int max_user_freq;
 
-- 
cgit v1.2.3


From 8719d3c9188b38db462a77ecd8c7a8e25e7e8c4c Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 25 Jul 2018 15:07:09 +0200
Subject: rtc: simplify rtc_irq_set_state/rtc_irq_set_freq

The PIE doesn't handle tasks anymore, remove the pointer from the
interface.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/interface.c | 4 ++--
 drivers/rtc/rtc-dev.c   | 8 ++++----
 include/linux/rtc.h     | 6 ++----
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index ae0d67610c7b..76eb3a2957cc 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -746,7 +746,7 @@ static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
  * Note that rtc_irq_set_freq() should previously have been used to
  * specify the desired frequency of periodic IRQ.
  */
-int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled)
+int rtc_irq_set_state(struct rtc_device *rtc, int enabled)
 {
 	int err = 0;
 
@@ -770,7 +770,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_set_state);
  * Note that rtc_irq_set_state() is used to enable or disable the
  * periodic IRQs.
  */
-int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
+int rtc_irq_set_freq(struct rtc_device *rtc, int freq)
 {
 	int err = 0;
 
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index efa221e8bc22..43d962a9c210 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -341,11 +341,11 @@ static long rtc_dev_ioctl(struct file *file,
 		return rtc_set_time(rtc, &tm);
 
 	case RTC_PIE_ON:
-		err = rtc_irq_set_state(rtc, NULL, 1);
+		err = rtc_irq_set_state(rtc, 1);
 		break;
 
 	case RTC_PIE_OFF:
-		err = rtc_irq_set_state(rtc, NULL, 0);
+		err = rtc_irq_set_state(rtc, 0);
 		break;
 
 	case RTC_AIE_ON:
@@ -365,7 +365,7 @@ static long rtc_dev_ioctl(struct file *file,
 		return rtc_update_irq_enable(rtc, 0);
 
 	case RTC_IRQP_SET:
-		err = rtc_irq_set_freq(rtc, NULL, arg);
+		err = rtc_irq_set_freq(rtc, arg);
 		break;
 
 	case RTC_IRQP_READ:
@@ -427,7 +427,7 @@ static int rtc_dev_release(struct inode *inode, struct file *file)
 	/* Keep ioctl until all drivers are converted */
 	rtc_dev_ioctl(file, RTC_UIE_OFF, 0);
 	rtc_update_irq_enable(rtc, 0);
-	rtc_irq_set_state(rtc, NULL, 0);
+	rtc_irq_set_state(rtc, 0);
 
 	clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags);
 	return 0;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 8cc23fdfd4ee..bf4d375025d1 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -202,10 +202,8 @@ extern void rtc_update_irq(struct rtc_device *rtc,
 extern struct rtc_device *rtc_class_open(const char *name);
 extern void rtc_class_close(struct rtc_device *rtc);
 
-extern int rtc_irq_set_state(struct rtc_device *rtc,
-				struct rtc_task *task, int enabled);
-extern int rtc_irq_set_freq(struct rtc_device *rtc,
-				struct rtc_task *task, int freq);
+extern int rtc_irq_set_state(struct rtc_device *rtc, int enabled);
+extern int rtc_irq_set_freq(struct rtc_device *rtc, int freq);
 extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled);
 extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
 extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
-- 
cgit v1.2.3


From 2a8536f6e8464b8988baa7db881f30721616b0da Mon Sep 17 00:00:00 2001
From: Jeykumar Sankaran <jsanka@codeaurora.org>
Date: Tue, 5 Jun 2018 19:00:54 -0700
Subject: drm: add msm compressed format modifiers

Qualcomm Snapdragon chipsets uses compressed format
to optimize BW across multiple IP's. This change adds
needed modifier support in drm for a simple 4x4 tile
based compressed variants of base formats.

Changes in v3:
- Removed duplicate entry for DRM_FORMAT_MOD_QCOM_COMPRESSED (Rob Clark)
Changes in v4:
- Remove all modifiers aside from COMPRESSED, this includes tiled and
  10-bit

Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/drm_fourcc.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index d43949b5bb3e..721ab7e54d96 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -299,6 +299,19 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE	fourcc_mod_code(SAMSUNG, 1)
 
+/*
+ * Qualcomm Compressed Format
+ *
+ * Refers to a compressed variant of the base format that is compressed.
+ * Implementation may be platform and base-format specific.
+ *
+ * Each macrotile consists of m x n (mostly 4 x 4) tiles.
+ * Pixel data pitch/stride is aligned with macrotile width.
+ * Pixel data height is aligned with macrotile height.
+ * Entire pixel data buffer is aligned with 4k(bytes).
+ */
+#define DRM_FORMAT_MOD_QCOM_COMPRESSED	fourcc_mod_code(QCOM, 1)
+
 /* Vivante framebuffer modifiers */
 
 /*
-- 
cgit v1.2.3


From 1f45a4db3657c5ecafb7d3331536c987e6d18311 Mon Sep 17 00:00:00 2001
From: Paul McKenney <paulmck@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2018 11:21:43 -0700
Subject: srcu: Add notrace variants of srcu_read_{lock,unlock}

This is needed for a future tracepoint patch that uses srcu, and to make
sure it doesn't call into lockdep.

tracepoint code already calls notrace variants for rcu_read_lock_sched
so this patch does the same for srcu which will be used in a later
patch. Keeps it consistent with rcu-sched.

[Joel: Added commit message]
Link: http://lkml.kernel.org/r/20180628182149.226164-2-joel@joelfernandes.org

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Paul McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcu.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 91494d7e8e41..3e72a291c401 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -195,6 +195,16 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 	return retval;
 }
 
+/* Used by tracing, cannot be traced and cannot invoke lockdep. */
+static inline notrace int
+srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp)
+{
+	int retval;
+
+	retval = __srcu_read_lock(sp);
+	return retval;
+}
+
 /**
  * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
  * @sp: srcu_struct in which to unregister the old reader.
@@ -209,6 +219,13 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__srcu_read_unlock(sp, idx);
 }
 
+/* Used by tracing, cannot be traced and cannot call lockdep. */
+static inline notrace void
+srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp)
+{
+	__srcu_read_unlock(sp, idx);
+}
+
 /**
  * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
  *
-- 
cgit v1.2.3


From 0b764a6e4e19dc254caca636002eaa47b1d0b0af Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 28 Jun 2018 11:21:44 -0700
Subject: srcu: Add notrace variant of srcu_dereference

In the last patch in this series, we are making lockdep register hooks
onto the irq_{disable,enable} tracepoints. These tracepoints use the
_rcuidle tracepoint variant. In this series we switch the _rcuidle
tracepoint callers to use SRCU instead of sched-RCU. Inorder to
dereference the pointer to the probe functions, we could call
srcu_dereference, however this API will call back into lockdep to check
if the lock is held *before* the lockdep probe hooks have a chance to
run and annotate the IRQ enabled/disabled state.

For this reason we need a notrace variant of srcu_dereference since
otherwise we get lockdep splats. This patch adds the needed
srcu_dereference_notrace variant.

Link: http://lkml.kernel.org/r/20180628182149.226164-3-joel@joelfernandes.org

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 3e72a291c401..67135d4a8a30 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -169,6 +169,11 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp)
  */
 #define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
 
+/**
+ * srcu_dereference_notrace - no tracing and no lockdep calls from here
+ */
+#define srcu_dereference_notrace(p, sp) srcu_dereference_check((p), (sp), 1)
+
 /**
  * srcu_read_lock - register a new reader for an SRCU-protected structure.
  * @sp: srcu_struct in which to register the new reader.
-- 
cgit v1.2.3


From 72809cbf6748830ae4a59a45bcb2367a6c24d74d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 26 Jul 2018 21:44:32 +0900
Subject: tracing: Remove orphaned function ftrace_nr_registered_ops()

Remove ftrace_nr_registered_ops() because it is no longer used.

ftrace_nr_registered_ops() has been introduced by commit ea701f11da44
("ftrace: Add selftest to test function trace recursion protection"), but
its caller has been removed by commit 05cbbf643b8e ("tracing: Fix selftest
function recursion accounting"). So it is not called anymore.

Link: http://lkml.kernel.org/r/153260907227.12474.5234899025934963683.stgit@devbox

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  6 ------
 kernel/trace/ftrace.c  | 24 ------------------------
 2 files changed, 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ebb77674be90..63af5eb0ff46 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -234,10 +234,6 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1,
  */
 #define register_ftrace_function(ops) ({ 0; })
 #define unregister_ftrace_function(ops) ({ 0; })
-static inline int ftrace_nr_registered_ops(void)
-{
-	return 0;
-}
 static inline void ftrace_kill(void) { }
 static inline void ftrace_free_init_mem(void) { }
 static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { }
@@ -328,8 +324,6 @@ struct seq_file;
 
 extern int ftrace_text_reserved(const void *start, const void *end);
 
-extern int ftrace_nr_registered_ops(void);
-
 struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr);
 
 bool is_ftrace_trampoline(unsigned long addr);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b8b3324ca1c8..0d380a98a880 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -157,30 +157,6 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
 #endif
 }
 
-/**
- * ftrace_nr_registered_ops - return number of ops registered
- *
- * Returns the number of ftrace_ops registered and tracing functions
- */
-int ftrace_nr_registered_ops(void)
-{
-	struct ftrace_ops *ops;
-	int cnt = 0;
-
-	mutex_lock(&ftrace_lock);
-
-	for (ops = rcu_dereference_protected(ftrace_ops_list,
-					     lockdep_is_held(&ftrace_lock));
-	     ops != &ftrace_list_end;
-	     ops = rcu_dereference_protected(ops->next,
-					     lockdep_is_held(&ftrace_lock)))
-		cnt++;
-
-	mutex_unlock(&ftrace_lock);
-
-	return cnt;
-}
-
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 			    struct ftrace_ops *op, struct pt_regs *regs)
 {
-- 
cgit v1.2.3


From cee104334c98dd04e9dd4d9a4fa4784f7f6aada9 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Mon, 16 Jul 2018 11:50:11 +0300
Subject: IB/core: Introduce and use sgid_attr in CM requests

For RoCE, when CM requests are received for RC and UD connections,
netdevice of the incoming request is unavailable. Because of that CM
requests are always forwarded to init_net namespace.

Now that we have the GID attribute available, introduce SGID attribute in
incoming CM requests and refer to the netdevice of it.  This is similar to
existing SGID attribute field in outgoing CM requests for RC and UD
transports.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cm.c  |  5 ++++-
 drivers/infiniband/core/cma.c | 28 ++++++++++++++++++++++------
 include/rdma/ib_cm.h          | 13 +++++++++++++
 3 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 4724cb09b69d..6e39c27dca8e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1716,6 +1716,7 @@ static void cm_format_req_event(struct cm_work *work,
 	param->retry_count = cm_req_get_retry_count(req_msg);
 	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
 	param->srq = cm_req_get_srq(req_msg);
+	param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
 	work->cm_event.private_data = &req_msg->private_data;
 }
 
@@ -3532,6 +3533,7 @@ out:
 EXPORT_SYMBOL(ib_send_cm_sidr_req);
 
 static void cm_format_sidr_req_event(struct cm_work *work,
+				     const struct cm_id_private *rx_cm_id,
 				     struct ib_cm_id *listen_id)
 {
 	struct cm_sidr_req_msg *sidr_req_msg;
@@ -3545,6 +3547,7 @@ static void cm_format_sidr_req_event(struct cm_work *work,
 	param->service_id = sidr_req_msg->service_id;
 	param->bth_pkey = cm_get_bth_pkey(work);
 	param->port = work->port->port_num;
+	param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
 	work->cm_event.private_data = &sidr_req_msg->private_data;
 }
 
@@ -3602,7 +3605,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
 	cm_id_priv->id.service_id = sidr_req_msg->service_id;
 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
 
-	cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
+	cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
 	cm_process_work(cm_id_priv, work);
 	cm_deref_id(cur_cm_id_priv);
 	return 0;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 338df1789884..693e025a1585 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1371,6 +1371,22 @@ static bool validate_net_dev(struct net_device *net_dev,
 	}
 }
 
+static struct net_device *
+roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
+{
+	const struct ib_gid_attr *sgid_attr = NULL;
+
+	if (ib_event->event == IB_CM_REQ_RECEIVED)
+		sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
+	else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
+		sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr;
+
+	if (!sgid_attr)
+		return NULL;
+	dev_hold(sgid_attr->ndev);
+	return sgid_attr->ndev;
+}
+
 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
 					  struct cma_req_info *req)
 {
@@ -1386,8 +1402,12 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
 	if (err)
 		return ERR_PTR(err);
 
-	net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey,
-					   gid, listen_addr);
+	if (rdma_protocol_roce(req->device, req->port))
+		net_dev = roce_get_net_dev_by_cm_event(ib_event);
+	else
+		net_dev = ib_get_net_dev_by_params(req->device, req->port,
+						   req->pkey,
+						   gid, listen_addr);
 	if (!net_dev)
 		return ERR_PTR(-ENODEV);
 
@@ -1508,10 +1528,6 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
 		if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
 			/* Assuming the protocol is AF_IB */
 			*net_dev = NULL;
-		} else if (rdma_protocol_roce(req.device, req.port)) {
-			/* TODO find the net dev matching the request parameters
-			 * through the RoCE GID table */
-			*net_dev = NULL;
 		} else {
 			return ERR_CAST(*net_dev);
 		}
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index c98d603c0b63..568708a87239 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -120,6 +120,13 @@ struct ib_cm_req_event_param {
 	struct sa_path_rec	*primary_path;
 	struct sa_path_rec	*alternate_path;
 
+	/*
+	 * SGID attribute of the primary path. Currently only
+	 * useful for RoCE. Alternate path GID attributes
+	 * are not yet supported.
+	 */
+	const struct ib_gid_attr *ppath_sgid_attr;
+
 	__be64			remote_ca_guid;
 	u32			remote_qkey;
 	u32			remote_qpn;
@@ -226,6 +233,12 @@ struct ib_cm_apr_event_param {
 struct ib_cm_sidr_req_event_param {
 	struct ib_cm_id		*listen_id;
 	__be64			service_id;
+
+	/*
+	 * SGID attribute of the request. Currently only
+	 * useful for RoCE.
+	 */
+	const struct ib_gid_attr *sgid_attr;
 	/* P_Key that was used by the GMP's BTH header */
 	u16			bth_pkey;
 	u8			port;
-- 
cgit v1.2.3


From 4cae99d9b5305ab8cccc839fccceb81ec9e5abda Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Wed, 25 Jul 2018 15:15:56 -0500
Subject: ALSA: memalloc: declare snd_sgbuf_aligned_pages() unconditionally

Make this helper inline function available for all platforms. This
helps solve 0-day compilation issues when CONFIG_SND_DMA_SGBUF is not
defined.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/memalloc.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index 9c3db3dce32b..c669900e6cbe 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -67,6 +67,14 @@ struct snd_dma_buffer {
 	void *private_data;	/* private for allocator; don't touch */
 };
 
+/*
+ * return the pages matching with the given byte size
+ */
+static inline unsigned int snd_sgbuf_aligned_pages(size_t size)
+{
+	return (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
 #ifdef CONFIG_SND_DMA_SGBUF
 /*
  * Scatter-Gather generic device pages
@@ -90,14 +98,6 @@ struct snd_sg_buf {
 	struct device *dev;
 };
 
-/*
- * return the pages matching with the given byte size
- */
-static inline unsigned int snd_sgbuf_aligned_pages(size_t size)
-{
-	return (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-}
-
 /*
  * return the physical address at the corresponding offset
  */
-- 
cgit v1.2.3


From 0b62834e73e332fea76a340d62aaf50c732b17e0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 23:17:17 +0200
Subject: ALSA: pcm: Add snd_mask_set_format() helper for standard usages

Many drivers calling snd_mask_set() need to do ugly cast with __force
for shutting up the sparse warnings.  Actually almost all of them are
about setting the format, so it's far better to provide a common
helper snd_mask_set_format() to pass SNDRV_PCM_FORMAT_* directly
without the cast.

There are a few other calls of snd_mask_set(), but they are in the PCM
core code, so we leave them for now.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/pcm_params.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm_params.h b/include/sound/pcm_params.h
index c704357775fc..2dd37cada7c0 100644
--- a/include/sound/pcm_params.h
+++ b/include/sound/pcm_params.h
@@ -87,6 +87,13 @@ static inline void snd_mask_set(struct snd_mask *mask, unsigned int val)
 	mask->bits[MASK_OFS(val)] |= MASK_BIT(val);
 }
 
+/* Most of drivers need only this one */
+static inline void snd_mask_set_format(struct snd_mask *mask,
+				       snd_pcm_format_t format)
+{
+	snd_mask_set(mask, (__force unsigned int)format);
+}
+
 static inline void snd_mask_reset(struct snd_mask *mask, unsigned int val)
 {
 	mask->bits[MASK_OFS(val)] &= ~MASK_BIT(val);
@@ -369,8 +376,7 @@ static inline int params_physical_width(const struct snd_pcm_hw_params *p)
 static inline void
 params_set_format(struct snd_pcm_hw_params *p, snd_pcm_format_t fmt)
 {
-	snd_mask_set(hw_param_mask(p, SNDRV_PCM_HW_PARAM_FORMAT),
-		(__force int)fmt);
+	snd_mask_set_format(hw_param_mask(p, SNDRV_PCM_HW_PARAM_FORMAT), fmt);
 }
 
 #endif /* __SOUND_PCM_PARAMS_H */
-- 
cgit v1.2.3


From af9b6d7570ca9afbbc6076ab7920d8f00f7e55c1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 29 Jun 2018 12:45:53 -0400
Subject: pNFS: Parse the results of layoutget on open even if permissions
 checks fail

Even if the results of the permissions checks failed, we should parse
the results of the layout on open call so that we can return the
layout if required.
Note that we also want to ignore the sequence counter for whether or not
a layout recall occurred. If the recall pertained to our OPEN, then the
callback will know, and will attempt to wait for us to finih processing
anyway.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/callback_proc.c    | 2 --
 fs/nfs/nfs4proc.c         | 5 +++--
 fs/nfs/pnfs.c             | 4 ----
 include/linux/nfs_fs_sb.h | 1 -
 include/linux/nfs_xdr.h   | 1 -
 5 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index efca3d6c89f2..43ba390bb653 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -331,8 +331,6 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
 static u32 do_callback_layoutrecall(struct nfs_client *clp,
 				    struct cb_layoutrecallargs *args)
 {
-	write_seqcount_begin(&clp->cl_callback_count);
-	write_seqcount_end(&clp->cl_callback_count);
 	if (args->cbl_recall_type == RETURN_FILE)
 		return initiate_file_draining(clp, args);
 	return initiate_bulk_draining(clp, args);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6dd146885da9..5a8190ec31a2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2843,6 +2843,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 				nfs_save_change_attribute(d_inode(opendata->dir)));
 	}
 
+	/* Parse layoutget results before we check for access */
+	pnfs_parse_lgopen(state->inode, opendata->lgp, ctx);
+
 	ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
 	if (ret != 0)
 		goto out;
@@ -2851,8 +2854,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 		nfs_inode_attach_open_context(ctx);
 		if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
 			nfs4_schedule_stateid_recovery(server, state);
-		else
-			pnfs_parse_lgopen(state->inode, opendata->lgp, ctx);
 	}
 
 out:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 030c39c107c2..7fdac8b504dd 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1018,7 +1018,6 @@ pnfs_alloc_init_layoutget_args(struct inode *ino,
 	nfs4_stateid_copy(&lgp->args.stateid, stateid);
 	lgp->gfp_flags = gfp_flags;
 	lgp->cred = get_rpccred(ctx->cred);
-	lgp->callback_count = raw_seqcount_begin(&server->nfs_client->cl_callback_count);
 	return lgp;
 }
 
@@ -2181,9 +2180,6 @@ void pnfs_parse_lgopen(struct inode *ino, struct nfs4_layoutget *lgp,
 	} else
 		lo = NFS_I(lgp->args.inode)->layout;
 
-	if (read_seqcount_retry(&srv->nfs_client->cl_callback_count,
-				lgp->callback_count))
-		return;
 	lseg = pnfs_layout_process(lgp);
 	if (!IS_ERR(lseg)) {
 		iomode = lgp->args.range.iomode;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 74ae3e1d19a0..2c18d618604e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -28,7 +28,6 @@ struct nfs41_impl_id;
 struct nfs_client {
 	refcount_t		cl_count;
 	atomic_t		cl_mds_count;
-	seqcount_t		cl_callback_count;
 	int			cl_cons_state;	/* current construction state (-ve: init error) */
 #define NFS_CS_READY		0		/* ready to be used */
 #define NFS_CS_INITING		1		/* busy initialising */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 712eed156d09..3b7325cfb291 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -271,7 +271,6 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_args args;
 	struct nfs4_layoutget_res res;
 	struct rpc_cred *cred;
-	unsigned callback_count;
 	gfp_t gfp_flags;
 };
 
-- 
cgit v1.2.3


From 359f642700f2ff05d9c94cd9216c97af7b8e9553 Mon Sep 17 00:00:00 2001
From: Greg Edwards <gedwards@ddn.com>
Date: Wed, 25 Jul 2018 10:22:58 -0400
Subject: block: move bio_integrity_{intervals,bytes} into blkdev.h

This allows bio_integrity_bytes() to be called from drivers instead of
open coding it.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Edwards <gedwards@ddn.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c  | 22 ----------------------
 include/linux/blkdev.h | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index add7c7c85335..67b5fb861a51 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -159,28 +159,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-/**
- * bio_integrity_intervals - Return number of integrity intervals for a bio
- * @bi:		blk_integrity profile for device
- * @sectors:	Size of the bio in 512-byte sectors
- *
- * Description: The block layer calculates everything in 512 byte
- * sectors but integrity metadata is done in terms of the data integrity
- * interval size of the storage device.  Convert the block layer sectors
- * to the appropriate number of integrity intervals.
- */
-static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
-						   unsigned int sectors)
-{
-	return sectors >> (bi->interval_exp - 9);
-}
-
-static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
-					       unsigned int sectors)
-{
-	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
-}
-
 /**
  * bio_integrity_process - Process integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 331a6cb8805f..050d599f5ea9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1865,6 +1865,28 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 				bip_next->bip_vec[0].bv_offset);
 }
 
+/**
+ * bio_integrity_intervals - Return number of integrity intervals for a bio
+ * @bi:		blk_integrity profile for device
+ * @sectors:	Size of the bio in 512-byte sectors
+ *
+ * Description: The block layer calculates everything in 512 byte
+ * sectors but integrity metadata is done in terms of the data integrity
+ * interval size of the storage device.  Convert the block layer sectors
+ * to the appropriate number of integrity intervals.
+ */
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
+{
+	return sectors >> (bi->interval_exp - 9);
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
+}
+
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 struct bio;
@@ -1938,6 +1960,18 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
 	return false;
 }
 
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
+{
+	return 0;
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return 0;
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 struct block_device_operations {
-- 
cgit v1.2.3


From 0ab5fe5374743d5a279b1ff6297ef2c54d06cd5f Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 20 Jun 2018 15:22:52 +1000
Subject: fsi: Add new central chardev support

The various FSI devices (sbefifo, occ, scom, more to come)
currently use misc devices.

This is problematic as the minor device space for misc is
limited and there can be a lot of them. Also it limits our
ability to move them to a dedicated /dev/fsi directory or
to be smart about device naming and numbering.

It also means we have IDAs on every single of these drivers

This creates a common fsi "device_type" for the optional
/dev/fsi grouping and a dev_t allocator for all FSI devices.

"Legacy" devices get to use a backward compatible numbering
scheme (as long as chip id <16 and there's only one copy
of a given unit type per chip).

A single major number and a single IDA are shared for all
FSI devices.

This doesn't convert the FSI device drivers to use the new
scheme yet, they will be converted individually.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/fsi/Kconfig    | 15 ++++++++
 drivers/fsi/fsi-core.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/fsi.h    | 12 ++++++-
 3 files changed, 119 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index 8d82b1e60514..af3a20dd5aa4 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -12,6 +12,21 @@ menuconfig FSI
 
 if FSI
 
+config FSI_NEW_DEV_NODE
+	bool "Create '/dev/fsi' directory for char devices"
+	default n
+	---help---
+	This option causes char devices created for FSI devices to be
+	located under a common /dev/fsi/ directory. Set to N unless your
+	userspace has been updated to handle the new location.
+
+	Additionally, it also causes the char device names to be offset
+	by one so that chip 0 will have /dev/scom1 and chip1 /dev/scom2
+	to match old userspace expectations.
+
+	New userspace will use udev rules to generate predictable access
+	symlinks in /dev/fsi/by-path when this option is enabled.
+
 config FSI_MASTER_GPIO
 	tristate "GPIO-based FSI master"
 	depends on GPIOLIB
diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index eab6c5c4990e..faa1760a5a40 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -92,6 +92,13 @@ struct fsi_slave {
 static const int slave_retries = 2;
 static int discard_errors;
 
+static dev_t fsi_base_dev;
+static DEFINE_IDA(fsi_minor_ida);
+#define FSI_CHAR_MAX_DEVICES	0x1000
+
+/* Legacy /dev numbering: 4 devices per chip, 16 chips */
+#define FSI_CHAR_LEGACY_TOP	64
+
 static int fsi_master_read(struct fsi_master *master, int link,
 		uint8_t slave_id, uint32_t addr, void *val, size_t size);
 static int fsi_master_write(struct fsi_master *master, int link,
@@ -627,6 +634,7 @@ static void fsi_slave_release(struct device *dev)
 {
 	struct fsi_slave *slave = to_fsi_slave(dev);
 
+	fsi_free_minor(slave->dev.devt);
 	of_node_put(dev->of_node);
 	kfree(slave);
 }
@@ -729,6 +737,75 @@ static ssize_t chip_id_show(struct device *dev,
 
 static DEVICE_ATTR_RO(chip_id);
 
+static char *fsi_cdev_devnode(struct device *dev, umode_t *mode,
+			      kuid_t *uid, kgid_t *gid)
+{
+#ifdef CONFIG_FSI_NEW_DEV_NODE
+	return kasprintf(GFP_KERNEL, "fsi/%s", dev_name(dev));
+#else
+	return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+#endif
+}
+
+const struct device_type fsi_cdev_type = {
+	.name = "fsi-cdev",
+	.devnode = fsi_cdev_devnode,
+};
+EXPORT_SYMBOL_GPL(fsi_cdev_type);
+
+/* Backward compatible /dev/ numbering in "old style" mode */
+static int fsi_adjust_index(int index)
+{
+#ifdef CONFIG_FSI_NEW_DEV_NODE
+	return index;
+#else
+	return index + 1;
+#endif
+}
+
+static int __fsi_get_new_minor(struct fsi_slave *slave, enum fsi_dev_type type,
+			       dev_t *out_dev, int *out_index)
+{
+	int cid = slave->chip_id;
+	int id;
+
+	/* Check if we qualify for legacy numbering */
+	if (cid >= 0 && cid < 16 && type < 4) {
+		/* Try reserving the legacy number */
+		id = (cid << 4) | type;
+		id = ida_simple_get(&fsi_minor_ida, id, id + 1, GFP_KERNEL);
+		if (id >= 0) {
+			*out_index = fsi_adjust_index(cid);
+			*out_dev = fsi_base_dev + id;
+			return 0;
+		}
+		/* Other failure */
+		if (id != -ENOSPC)
+			return id;
+		/* Fallback to non-legacy allocation */
+	}
+	id = ida_simple_get(&fsi_minor_ida, FSI_CHAR_LEGACY_TOP,
+			    FSI_CHAR_MAX_DEVICES, GFP_KERNEL);
+	if (id < 0)
+		return id;
+	*out_index = fsi_adjust_index(id);
+	*out_dev = fsi_base_dev + id;
+	return 0;
+}
+
+int fsi_get_new_minor(struct fsi_device *fdev, enum fsi_dev_type type,
+		      dev_t *out_dev, int *out_index)
+{
+	return __fsi_get_new_minor(fdev->slave, type, out_dev, out_index);
+}
+EXPORT_SYMBOL_GPL(fsi_get_new_minor);
+
+void fsi_free_minor(dev_t dev)
+{
+	ida_simple_remove(&fsi_minor_ida, MINOR(dev));
+}
+EXPORT_SYMBOL_GPL(fsi_free_minor);
+
 static int fsi_slave_init(struct fsi_master *master, int link, uint8_t id)
 {
 	uint32_t chip_id;
@@ -953,7 +1030,7 @@ static int fsi_slave_remove_device(struct device *dev, void *arg)
 static int fsi_master_remove_slave(struct device *dev, void *arg)
 {
 	device_for_each_child(dev, NULL, fsi_slave_remove_device);
-	device_unregister(dev);
+	put_device(dev);
 	return 0;
 }
 
@@ -1091,13 +1168,27 @@ EXPORT_SYMBOL_GPL(fsi_bus_type);
 
 static int __init fsi_init(void)
 {
-	return bus_register(&fsi_bus_type);
+	int rc;
+
+	rc = alloc_chrdev_region(&fsi_base_dev, 0, FSI_CHAR_MAX_DEVICES, "fsi");
+	if (rc)
+		return rc;
+	rc = bus_register(&fsi_bus_type);
+	if (rc)
+		goto fail_bus;
+	return 0;
+
+ fail_bus:
+	unregister_chrdev_region(fsi_base_dev, FSI_CHAR_MAX_DEVICES);
+	return rc;
 }
 postcore_initcall(fsi_init);
 
 static void fsi_exit(void)
 {
 	bus_unregister(&fsi_bus_type);
+	unregister_chrdev_region(fsi_base_dev, FSI_CHAR_MAX_DEVICES);
+	ida_destroy(&fsi_minor_ida);
 }
 module_exit(fsi_exit);
 module_param(discard_errors, int, 0664);
diff --git a/include/linux/fsi.h b/include/linux/fsi.h
index 141fd38d061f..ec3be0d5b786 100644
--- a/include/linux/fsi.h
+++ b/include/linux/fsi.h
@@ -76,8 +76,18 @@ extern int fsi_slave_read(struct fsi_slave *slave, uint32_t addr,
 extern int fsi_slave_write(struct fsi_slave *slave, uint32_t addr,
 		const void *val, size_t size);
 
+extern struct bus_type fsi_bus_type;
+extern const struct device_type fsi_cdev_type;
 
+enum fsi_dev_type {
+	fsi_dev_cfam,
+	fsi_dev_sbefifo,
+	fsi_dev_scom,
+	fsi_dev_occ
+};
 
-extern struct bus_type fsi_bus_type;
+extern int fsi_get_new_minor(struct fsi_device *fdev, enum fsi_dev_type type,
+			     dev_t *out_dev, int *out_index);
+extern void fsi_free_minor(dev_t dev);
 
 #endif /* LINUX_FSI_H */
-- 
cgit v1.2.3


From 179909ecafc3bae1f34289e88bacd45e391f0554 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 24 Jul 2018 11:38:14 -0700
Subject: Input: stop telling users to snail-mail Vojtech

I do not think Vojtech wants snail mail these days (and he mentioned that
nobody has ever sent him snail mail), and the address is not even valid
anymore, so let's remove snail-mail instructions from the sources.

Acked-by: Vojtech Pavlik <vojtech@suse.cz>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/evbug.c                          | 4 ----
 drivers/input/gameport/emu10k1-gp.c            | 4 ----
 drivers/input/gameport/lightning.c             | 4 ----
 drivers/input/gameport/ns558.c                 | 4 ----
 drivers/input/joystick/a3d.c                   | 4 ----
 drivers/input/joystick/adi.c                   | 4 ----
 drivers/input/joystick/amijoy.c                | 4 ----
 drivers/input/joystick/analog.c                | 4 ----
 drivers/input/joystick/cobra.c                 | 4 ----
 drivers/input/joystick/db9.c                   | 4 ----
 drivers/input/joystick/gamecon.c               | 4 ----
 drivers/input/joystick/gf2k.c                  | 4 ----
 drivers/input/joystick/grip.c                  | 4 ----
 drivers/input/joystick/guillemot.c             | 4 ----
 drivers/input/joystick/iforce/iforce-ff.c      | 4 ----
 drivers/input/joystick/iforce/iforce-main.c    | 4 ----
 drivers/input/joystick/iforce/iforce-packets.c | 4 ----
 drivers/input/joystick/iforce/iforce-serio.c   | 4 ----
 drivers/input/joystick/iforce/iforce-usb.c     | 4 ----
 drivers/input/joystick/iforce/iforce.h         | 4 ----
 drivers/input/joystick/interact.c              | 4 ----
 drivers/input/joystick/joydump.c               | 4 ----
 drivers/input/joystick/magellan.c              | 4 ----
 drivers/input/joystick/sidewinder.c            | 4 ----
 drivers/input/joystick/spaceball.c             | 4 ----
 drivers/input/joystick/spaceorb.c              | 4 ----
 drivers/input/joystick/stinger.c               | 4 ----
 drivers/input/joystick/tmdc.c                  | 4 ----
 drivers/input/joystick/turbografx.c            | 4 ----
 drivers/input/joystick/warrior.c               | 4 ----
 drivers/input/keyboard/amikbd.c                | 4 ----
 drivers/input/keyboard/atakbd.c                | 4 ----
 drivers/input/keyboard/newtonkbd.c             | 4 ----
 drivers/input/keyboard/stowaway.c              | 4 ----
 drivers/input/keyboard/sunkbd.c                | 4 ----
 drivers/input/keyboard/xtkbd.c                 | 4 ----
 drivers/input/mouse/inport.c                   | 4 ----
 drivers/input/mouse/logibm.c                   | 4 ----
 drivers/input/mouse/pc110pad.c                 | 4 ----
 drivers/input/mouse/sermouse.c                 | 4 ----
 drivers/input/serio/ct82c710.c                 | 4 ----
 drivers/input/serio/q40kbd.c                   | 4 ----
 drivers/input/serio/rpckbd.c                   | 4 ----
 drivers/input/serio/serio.c                    | 4 ----
 drivers/input/touchscreen/gunze.c              | 4 ----
 include/linux/joystick.h                       | 4 ----
 include/uapi/linux/joystick.h                  | 4 ----
 47 files changed, 188 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evbug.c b/drivers/input/evbug.c
index cd4e6679d61a..5419c1c1f621 100644
--- a/drivers/input/evbug.c
+++ b/drivers/input/evbug.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/input/gameport/emu10k1-gp.c b/drivers/input/gameport/emu10k1-gp.c
index 2909e9561cf3..afdc20ca0e24 100644
--- a/drivers/input/gameport/emu10k1-gp.c
+++ b/drivers/input/gameport/emu10k1-gp.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <asm/io.h>
diff --git a/drivers/input/gameport/lightning.c b/drivers/input/gameport/lightning.c
index 85d6ee09f11f..c6e74c7945cb 100644
--- a/drivers/input/gameport/lightning.c
+++ b/drivers/input/gameport/lightning.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <asm/io.h>
diff --git a/drivers/input/gameport/ns558.c b/drivers/input/gameport/ns558.c
index 7c217848613e..6437645858f9 100644
--- a/drivers/input/gameport/ns558.c
+++ b/drivers/input/gameport/ns558.c
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <asm/io.h>
diff --git a/drivers/input/joystick/a3d.c b/drivers/input/joystick/a3d.c
index 55efdfc7eb62..98307039a534 100644
--- a/drivers/input/joystick/a3d.c
+++ b/drivers/input/joystick/a3d.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/adi.c b/drivers/input/joystick/adi.c
index 15a71acb6997..f466c0d34247 100644
--- a/drivers/input/joystick/adi.c
+++ b/drivers/input/joystick/adi.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/amijoy.c b/drivers/input/joystick/amijoy.c
index c65b5fa69f1e..2b82a838c511 100644
--- a/drivers/input/joystick/amijoy.c
+++ b/drivers/input/joystick/amijoy.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/types.h>
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index c79dbcb4d146..2b445c8d3fcd 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/cobra.c b/drivers/input/joystick/cobra.c
index ae3ee24a2368..14cb956beac4 100644
--- a/drivers/input/joystick/cobra.c
+++ b/drivers/input/joystick/cobra.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/db9.c b/drivers/input/joystick/db9.c
index de0dd4756c84..7721cc0bf569 100644
--- a/drivers/input/joystick/db9.c
+++ b/drivers/input/joystick/db9.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c
index 4e10ffdf8a36..d62e73dd9f7f 100644
--- a/drivers/input/joystick/gamecon.c
+++ b/drivers/input/joystick/gamecon.c
@@ -24,10 +24,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/input/joystick/gf2k.c b/drivers/input/joystick/gf2k.c
index 0f519db64748..50a60065ab14 100644
--- a/drivers/input/joystick/gf2k.c
+++ b/drivers/input/joystick/gf2k.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/grip.c b/drivers/input/joystick/grip.c
index eac9c5b8d73e..e10395ba62bc 100644
--- a/drivers/input/joystick/grip.c
+++ b/drivers/input/joystick/grip.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/guillemot.c b/drivers/input/joystick/guillemot.c
index a9ac2f9cfce0..43ff817d80ac 100644
--- a/drivers/input/joystick/guillemot.c
+++ b/drivers/input/joystick/guillemot.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/iforce/iforce-ff.c b/drivers/input/joystick/iforce/iforce-ff.c
index 0de9a0943a9e..2ed923874bbc 100644
--- a/drivers/input/joystick/iforce/iforce-ff.c
+++ b/drivers/input/joystick/iforce/iforce-ff.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
index daeeb4c7e3b0..95bbd61d3fbe 100644
--- a/drivers/input/joystick/iforce/iforce-main.c
+++ b/drivers/input/joystick/iforce/iforce-main.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-packets.c b/drivers/input/joystick/iforce/iforce-packets.c
index fedaaea5c16d..1cea950276dd 100644
--- a/drivers/input/joystick/iforce/iforce-packets.c
+++ b/drivers/input/joystick/iforce/iforce-packets.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-serio.c b/drivers/input/joystick/iforce/iforce-serio.c
index 154e827b559b..f4ba4a751fe0 100644
--- a/drivers/input/joystick/iforce/iforce-serio.c
+++ b/drivers/input/joystick/iforce/iforce-serio.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index e8724f1a4a25..78073259c9a1 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include "iforce.h"
diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h
index 96ae4f5bd0eb..aee43d14f699 100644
--- a/drivers/input/joystick/iforce/iforce.h
+++ b/drivers/input/joystick/iforce/iforce.h
@@ -19,10 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/interact.c b/drivers/input/joystick/interact.c
index 17c2c800743c..598788b3da62 100644
--- a/drivers/input/joystick/interact.c
+++ b/drivers/input/joystick/interact.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/joydump.c b/drivers/input/joystick/joydump.c
index d1c6e4846a4a..2ea05ade4d4e 100644
--- a/drivers/input/joystick/joydump.c
+++ b/drivers/input/joystick/joydump.c
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c
index a9d0e3edca94..95a34ab34fc3 100644
--- a/drivers/input/joystick/magellan.c
+++ b/drivers/input/joystick/magellan.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/sidewinder.c b/drivers/input/joystick/sidewinder.c
index 5e602a6852b7..f46bf4d41972 100644
--- a/drivers/input/joystick/sidewinder.c
+++ b/drivers/input/joystick/sidewinder.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
index bb3faeff8cac..ffb9c1f495b6 100644
--- a/drivers/input/joystick/spaceball.c
+++ b/drivers/input/joystick/spaceball.c
@@ -24,10 +24,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/spaceorb.c b/drivers/input/joystick/spaceorb.c
index 05da0ed514e2..20540ee71d7f 100644
--- a/drivers/input/joystick/spaceorb.c
+++ b/drivers/input/joystick/spaceorb.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/stinger.c b/drivers/input/joystick/stinger.c
index cb10e7b097ae..ba8579435d6c 100644
--- a/drivers/input/joystick/stinger.c
+++ b/drivers/input/joystick/stinger.c
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/tmdc.c b/drivers/input/joystick/tmdc.c
index 7e17cde464f0..6f4a01cfe79f 100644
--- a/drivers/input/joystick/tmdc.c
+++ b/drivers/input/joystick/tmdc.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/joystick/turbografx.c b/drivers/input/joystick/turbografx.c
index e2685753e460..bf2f9925e416 100644
--- a/drivers/input/joystick/turbografx.c
+++ b/drivers/input/joystick/turbografx.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/joystick/warrior.c b/drivers/input/joystick/warrior.c
index ef5391ba4470..b60cab168e2a 100644
--- a/drivers/input/joystick/warrior.c
+++ b/drivers/input/joystick/warrior.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *  Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c
index e04a3b4e55d6..420e33c49e58 100644
--- a/drivers/input/keyboard/amikbd.c
+++ b/drivers/input/keyboard/amikbd.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index f1235831283d..6f62da2909ec 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -34,10 +34,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/keyboard/newtonkbd.c b/drivers/input/keyboard/newtonkbd.c
index fb9b8e23ab93..de26e2df0ad5 100644
--- a/drivers/input/keyboard/newtonkbd.c
+++ b/drivers/input/keyboard/newtonkbd.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <j.cormack@doc.ic.ac.uk>, or by paper mail:
- * Justin Cormack, 68 Dartmouth Park Road, London NW5 1SN, UK.
  */
 
 #include <linux/slab.h>
diff --git a/drivers/input/keyboard/stowaway.c b/drivers/input/keyboard/stowaway.c
index 8b6de9a692dc..15a5e74dbe91 100644
--- a/drivers/input/keyboard/stowaway.c
+++ b/drivers/input/keyboard/stowaway.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <marek.vasut@gmail.com>, or by paper mail:
- * Marek Vasut, Liskovecka 559, Frydek-Mistek, 738 01 Czech Republic
  */
 
 #include <linux/slab.h>
diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c
index c95707ea2656..ad5d7f94f95a 100644
--- a/drivers/input/keyboard/sunkbd.c
+++ b/drivers/input/keyboard/sunkbd.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/keyboard/xtkbd.c b/drivers/input/keyboard/xtkbd.c
index 8f64b9ded8d0..f7598114b962 100644
--- a/drivers/input/keyboard/xtkbd.c
+++ b/drivers/input/keyboard/xtkbd.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/slab.h>
diff --git a/drivers/input/mouse/inport.c b/drivers/input/mouse/inport.c
index 9ce71dfa0de1..b9e68606c44a 100644
--- a/drivers/input/mouse/inport.c
+++ b/drivers/input/mouse/inport.c
@@ -26,10 +26,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/mouse/logibm.c b/drivers/input/mouse/logibm.c
index 6f165e053f4d..2fd6c84cd5b7 100644
--- a/drivers/input/mouse/logibm.c
+++ b/drivers/input/mouse/logibm.c
@@ -27,10 +27,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/mouse/pc110pad.c b/drivers/input/mouse/pc110pad.c
index 7b02b652e267..b8965e6bc890 100644
--- a/drivers/input/mouse/pc110pad.c
+++ b/drivers/input/mouse/pc110pad.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/mouse/sermouse.c b/drivers/input/mouse/sermouse.c
index 8df526620ebf..d27816ac038f 100644
--- a/drivers/input/mouse/sermouse.c
+++ b/drivers/input/mouse/sermouse.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/serio/ct82c710.c b/drivers/input/serio/ct82c710.c
index 9c54c43c9749..2d1e2993b5a8 100644
--- a/drivers/input/serio/ct82c710.c
+++ b/drivers/input/serio/ct82c710.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/delay.h>
diff --git a/drivers/input/serio/q40kbd.c b/drivers/input/serio/q40kbd.c
index d0fccc8ec259..fbb6b33845fa 100644
--- a/drivers/input/serio/q40kbd.c
+++ b/drivers/input/serio/q40kbd.c
@@ -23,10 +23,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/serio/rpckbd.c b/drivers/input/serio/rpckbd.c
index 8cf964736902..a308d7811427 100644
--- a/drivers/input/serio/rpckbd.c
+++ b/drivers/input/serio/rpckbd.c
@@ -21,10 +21,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/module.h>
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 24a90c8db5b3..2e1fb0649260 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/input/touchscreen/gunze.c b/drivers/input/touchscreen/gunze.c
index 481586909d28..054c2537b392 100644
--- a/drivers/input/touchscreen/gunze.c
+++ b/drivers/input/touchscreen/gunze.c
@@ -20,10 +20,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
 #include <linux/errno.h>
diff --git a/include/linux/joystick.h b/include/linux/joystick.h
index cbf2aa9e93b9..5153f5b9294c 100644
--- a/include/linux/joystick.h
+++ b/include/linux/joystick.h
@@ -17,10 +17,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * 
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@suse.cz>, or by paper mail:
- * Vojtech Pavlik, Ucitelska 1576, Prague 8, 182 00 Czech Republic
  */
 #ifndef _LINUX_JOYSTICK_H
 #define _LINUX_JOYSTICK_H
diff --git a/include/uapi/linux/joystick.h b/include/uapi/linux/joystick.h
index 64aabb84a66d..192bf2cf182d 100644
--- a/include/uapi/linux/joystick.h
+++ b/include/uapi/linux/joystick.h
@@ -18,10 +18,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- * 
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@suse.cz>, or by paper mail:
- * Vojtech Pavlik, Ucitelska 1576, Prague 8, 182 00 Czech Republic
  */
 #ifndef _UAPI_LINUX_JOYSTICK_H
 #define _UAPI_LINUX_JOYSTICK_H
-- 
cgit v1.2.3


From b512719f771a82180211c9a315b8a7f628832b3d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 26 Jul 2018 16:37:08 -0700
Subject: delayacct: fix crash in delayacct_blkio_end() after delayacct init
 failure

While forking, if delayacct init fails due to memory shortage, it
continues expecting all delayacct users to check task->delays pointer
against NULL before dereferencing it, which all of them used to do.

Commit c96f5471ce7d ("delayacct: Account blkio completion on the correct
task"), while updating delayacct_blkio_end() to take the target task
instead of always using %current, made the function test NULL on
%current->delays and then continue to operated on @p->delays.  If
%current succeeded init while @p didn't, it leads to the following
crash.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
 IP: __delayacct_blkio_end+0xc/0x40
 PGD 8000001fd07e1067 P4D 8000001fd07e1067 PUD 1fcffbb067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 4 PID: 25774 Comm: QIOThread0 Not tainted 4.16.0-9_fbk1_rc2_1180_g6b593215b4d7 #9
 RIP: 0010:__delayacct_blkio_end+0xc/0x40
 Call Trace:
  try_to_wake_up+0x2c0/0x600
  autoremove_wake_function+0xe/0x30
  __wake_up_common+0x74/0x120
  wake_up_page_bit+0x9c/0xe0
  mpage_end_io+0x27/0x70
  blk_update_request+0x78/0x2c0
  scsi_end_request+0x2c/0x1e0
  scsi_io_completion+0x20b/0x5f0
  blk_mq_complete_request+0xa2/0x100
  ata_scsi_qc_complete+0x79/0x400
  ata_qc_complete_multiple+0x86/0xd0
  ahci_handle_port_interrupt+0xc9/0x5c0
  ahci_handle_port_intr+0x54/0xb0
  ahci_single_level_irq_intr+0x3b/0x60
  __handle_irq_event_percpu+0x43/0x190
  handle_irq_event_percpu+0x20/0x50
  handle_irq_event+0x2a/0x50
  handle_edge_irq+0x80/0x1c0
  handle_irq+0xaf/0x120
  do_IRQ+0x41/0xc0
  common_interrupt+0xf/0xf

Fix it by updating delayacct_blkio_end() check @p->delays instead.

Link: http://lkml.kernel.org/r/20180724175542.GP1934745@devbig577.frc2.facebook.com
Fixes: c96f5471ce7d ("delayacct: Account blkio completion on the correct task")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Dave Jones <dsj@fb.com>
Debugged-by: Dave Jones <dsj@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Josh Snyder <joshs@netflix.com>
Cc: <stable@vger.kernel.org>	[4.15+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index e6c0448ebcc7..31c865d1842e 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -124,7 +124,7 @@ static inline void delayacct_blkio_start(void)
 
 static inline void delayacct_blkio_end(struct task_struct *p)
 {
-	if (current->delays)
+	if (p->delays)
 		__delayacct_blkio_end(p);
 	delayacct_clear_flag(DELAYACCT_PF_BLKIO);
 }
-- 
cgit v1.2.3


From 027232da7c7c1c7f04383f93bd798e475dde5285 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 26 Jul 2018 16:37:25 -0700
Subject: mm: introduce vma_init()

Not all VMAs allocated with vm_area_alloc().  Some of them allocated on
stack or in data segment.

The new helper can be use to initialize VMA properly regardless where it
was allocated.

Link: http://lkml.kernel.org/r/20180724121139.62570-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 ++++++
 kernel/fork.c      | 6 ++----
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d3a3842316b8..31540f166987 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -452,6 +452,12 @@ struct vm_operations_struct {
 					  unsigned long addr);
 };
 
+static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
+{
+	vma->vm_mm = mm;
+	INIT_LIST_HEAD(&vma->anon_vma_chain);
+}
+
 struct mmu_gather;
 struct inode;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index a191c05e757d..1b27babc4c78 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -312,10 +312,8 @@ struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
 
-	if (vma) {
-		vma->vm_mm = mm;
-		INIT_LIST_HEAD(&vma->anon_vma_chain);
-	}
+	if (vma)
+		vma_init(vma, mm);
 	return vma;
 }
 
-- 
cgit v1.2.3


From bfd40eaff5abb9f62c8ef94ca13ed0d94a560f10 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 26 Jul 2018 16:37:35 -0700
Subject: mm: fix vma_is_anonymous() false-positives

vma_is_anonymous() relies on ->vm_ops being NULL to detect anonymous
VMA.  This is unreliable as ->mmap may not set ->vm_ops.

False-positive vma_is_anonymous() may lead to crashes:

	next ffff8801ce5e7040 prev ffff8801d20eca50 mm ffff88019c1e13c0
	prot 27 anon_vma ffff88019680cdd8 vm_ops 0000000000000000
	pgoff 0 file ffff8801b2ec2d00 private_data 0000000000000000
	flags: 0xff(read|write|exec|shared|mayread|maywrite|mayexec|mayshare)
	------------[ cut here ]------------
	kernel BUG at mm/memory.c:1422!
	invalid opcode: 0000 [#1] SMP KASAN
	CPU: 0 PID: 18486 Comm: syz-executor3 Not tainted 4.18.0-rc3+ #136
	Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google
	01/01/2011
	RIP: 0010:zap_pmd_range mm/memory.c:1421 [inline]
	RIP: 0010:zap_pud_range mm/memory.c:1466 [inline]
	RIP: 0010:zap_p4d_range mm/memory.c:1487 [inline]
	RIP: 0010:unmap_page_range+0x1c18/0x2220 mm/memory.c:1508
	Call Trace:
	 unmap_single_vma+0x1a0/0x310 mm/memory.c:1553
	 zap_page_range_single+0x3cc/0x580 mm/memory.c:1644
	 unmap_mapping_range_vma mm/memory.c:2792 [inline]
	 unmap_mapping_range_tree mm/memory.c:2813 [inline]
	 unmap_mapping_pages+0x3a7/0x5b0 mm/memory.c:2845
	 unmap_mapping_range+0x48/0x60 mm/memory.c:2880
	 truncate_pagecache+0x54/0x90 mm/truncate.c:800
	 truncate_setsize+0x70/0xb0 mm/truncate.c:826
	 simple_setattr+0xe9/0x110 fs/libfs.c:409
	 notify_change+0xf13/0x10f0 fs/attr.c:335
	 do_truncate+0x1ac/0x2b0 fs/open.c:63
	 do_sys_ftruncate+0x492/0x560 fs/open.c:205
	 __do_sys_ftruncate fs/open.c:215 [inline]
	 __se_sys_ftruncate fs/open.c:213 [inline]
	 __x64_sys_ftruncate+0x59/0x80 fs/open.c:213
	 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
	 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Reproducer:

	#include <stdio.h>
	#include <stddef.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <string.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <sys/ioctl.h>
	#include <sys/mman.h>
	#include <unistd.h>
	#include <fcntl.h>

	#define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
	#define KCOV_ENABLE			_IO('c', 100)
	#define KCOV_DISABLE			_IO('c', 101)
	#define COVER_SIZE			(1024<<10)

	#define KCOV_TRACE_PC  0
	#define KCOV_TRACE_CMP 1

	int main(int argc, char **argv)
	{
		int fd;
		unsigned long *cover;

		system("mount -t debugfs none /sys/kernel/debug");
		fd = open("/sys/kernel/debug/kcov", O_RDWR);
		ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE);
		cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long),
				PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
		munmap(cover, COVER_SIZE * sizeof(unsigned long));
		cover = mmap(NULL, COVER_SIZE * sizeof(unsigned long),
				PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
		memset(cover, 0, COVER_SIZE * sizeof(unsigned long));
		ftruncate(fd, 3UL << 20);
		return 0;
	}

This can be fixed by assigning anonymous VMAs own vm_ops and not relying
on it being NULL.

If ->mmap() failed to set ->vm_ops, mmap_region() will set it to
dummy_vm_ops.  This way we will have non-NULL ->vm_ops for all VMAs.

Link: http://lkml.kernel.org/r/20180724121139.62570-4-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: syzbot+3f84280d52be9b7083cc@syzkaller.appspotmail.com
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/mem.c | 1 +
 fs/exec.c          | 1 +
 include/linux/mm.h | 8 ++++++++
 mm/mmap.c          | 3 +++
 mm/nommu.c         | 2 ++
 5 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index ffeb60d3434c..df66a9dd0aae 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -708,6 +708,7 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
 #endif
 	if (vma->vm_flags & VM_SHARED)
 		return shmem_zero_setup(vma);
+	vma_set_anonymous(vma);
 	return 0;
 }
 
diff --git a/fs/exec.c b/fs/exec.c
index 72e961a62adb..bdd0eacefdf5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -293,6 +293,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	bprm->vma = vma = vm_area_alloc(mm);
 	if (!vma)
 		return -ENOMEM;
+	vma_set_anonymous(vma);
 
 	if (down_write_killable(&mm->mmap_sem)) {
 		err = -EINTR;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 31540f166987..7ba6d356d18f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -454,10 +454,18 @@ struct vm_operations_struct {
 
 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 {
+	static const struct vm_operations_struct dummy_vm_ops = {};
+
 	vma->vm_mm = mm;
+	vma->vm_ops = &dummy_vm_ops;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
 }
 
+static inline void vma_set_anonymous(struct vm_area_struct *vma)
+{
+	vma->vm_ops = NULL;
+}
+
 struct mmu_gather;
 struct inode;
 
diff --git a/mm/mmap.c b/mm/mmap.c
index ff1944d8d458..17bbf4d3e24f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1778,6 +1778,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		error = shmem_zero_setup(vma);
 		if (error)
 			goto free_vma;
+	} else {
+		vma_set_anonymous(vma);
 	}
 
 	vma_link(mm, vma, prev, rb_link, rb_parent);
@@ -2983,6 +2985,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 		return -ENOMEM;
 	}
 
+	vma_set_anonymous(vma);
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_pgoff = pgoff;
diff --git a/mm/nommu.c b/mm/nommu.c
index 1d22fdbf7d7c..9fc9e43335b6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1145,6 +1145,8 @@ static int do_mmap_private(struct vm_area_struct *vma,
 		if (ret < len)
 			memset(base + ret, 0, len - ret);
 
+	} else {
+		vma_set_anonymous(vma);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From fa3fc2ad99b4f025446d1cff589a8d2dd7db92f2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Jul 2018 16:37:38 -0700
Subject: include/linux/eventfd.h: include linux/errno.h

The new gasket staging driver ran into a randconfig build failure when
CONFIG_EVENTFD is disabled:

  In file included from drivers/staging/gasket/gasket_interrupt.h:11,
                   from drivers/staging/gasket/gasket_interrupt.c:4:
  include/linux/eventfd.h: In function 'eventfd_ctx_fdget':
  include/linux/eventfd.h:51:9: error: implicit declaration of function 'ERR_PTR' [-Werror=implicit-function-declaration]

I can't see anything wrong with including eventfd.h before err.h, so the
easiest fix is to make it possible to do this by including the file
where it is needed.

Link: http://lkml.kernel.org/r/20180724110737.3985088-1-arnd@arndb.de
Fixes: 9a69f5087ccc ("drivers/staging: Gasket driver framework + Apex driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/eventfd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 7094718b653b..ffcc7724ca21 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -11,6 +11,7 @@
 
 #include <linux/fcntl.h>
 #include <linux/wait.h>
+#include <linux/err.h>
 
 /*
  * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
-- 
cgit v1.2.3


From 8c3799ee25e1fda159099af09f5f2e86091e41d4 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Fri, 20 Jul 2018 12:01:43 +0300
Subject: xen/grant-table: Make set/clear page private code shared

Make set/clear page private code shared and accessible to
other kernel modules which can re-use these instead of open-coding.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/grant-table.c | 54 ++++++++++++++++++++++++++++++-----------------
 include/xen/grant_table.h |  3 +++
 2 files changed, 38 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index ba9f3eec2bd0..bb4840653bf2 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -769,29 +769,18 @@ void gnttab_free_auto_xlat_frames(void)
 }
 EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
 
-/**
- * gnttab_alloc_pages - alloc pages suitable for grant mapping into
- * @nr_pages: number of pages to alloc
- * @pages: returns the pages
- */
-int gnttab_alloc_pages(int nr_pages, struct page **pages)
+int gnttab_pages_set_private(int nr_pages, struct page **pages)
 {
 	int i;
-	int ret;
-
-	ret = alloc_xenballooned_pages(nr_pages, pages);
-	if (ret < 0)
-		return ret;
 
 	for (i = 0; i < nr_pages; i++) {
 #if BITS_PER_LONG < 64
 		struct xen_page_foreign *foreign;
 
 		foreign = kzalloc(sizeof(*foreign), GFP_KERNEL);
-		if (!foreign) {
-			gnttab_free_pages(nr_pages, pages);
+		if (!foreign)
 			return -ENOMEM;
-		}
+
 		set_page_private(pages[i], (unsigned long)foreign);
 #endif
 		SetPagePrivate(pages[i]);
@@ -799,14 +788,30 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
+EXPORT_SYMBOL_GPL(gnttab_pages_set_private);
 
 /**
- * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
- * @nr_pages; number of pages to free
- * @pages: the pages
+ * gnttab_alloc_pages - alloc pages suitable for grant mapping into
+ * @nr_pages: number of pages to alloc
+ * @pages: returns the pages
  */
-void gnttab_free_pages(int nr_pages, struct page **pages)
+int gnttab_alloc_pages(int nr_pages, struct page **pages)
+{
+	int ret;
+
+	ret = alloc_xenballooned_pages(nr_pages, pages);
+	if (ret < 0)
+		return ret;
+
+	ret = gnttab_pages_set_private(nr_pages, pages);
+	if (ret < 0)
+		gnttab_free_pages(nr_pages, pages);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
+
+void gnttab_pages_clear_private(int nr_pages, struct page **pages)
 {
 	int i;
 
@@ -818,6 +823,17 @@ void gnttab_free_pages(int nr_pages, struct page **pages)
 			ClearPagePrivate(pages[i]);
 		}
 	}
+}
+EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
+
+/**
+ * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
+ * @nr_pages; number of pages to free
+ * @pages: the pages
+ */
+void gnttab_free_pages(int nr_pages, struct page **pages)
+{
+	gnttab_pages_clear_private(nr_pages, pages);
 	free_xenballooned_pages(nr_pages, pages);
 }
 EXPORT_SYMBOL_GPL(gnttab_free_pages);
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 2e37741f6b8d..de03f2542bb7 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -198,6 +198,9 @@ void gnttab_free_auto_xlat_frames(void);
 int gnttab_alloc_pages(int nr_pages, struct page **pages);
 void gnttab_free_pages(int nr_pages, struct page **pages);
 
+int gnttab_pages_set_private(int nr_pages, struct page **pages);
+void gnttab_pages_clear_private(int nr_pages, struct page **pages);
+
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		    struct gnttab_map_grant_ref *kmap_ops,
 		    struct page **pages, unsigned int count);
-- 
cgit v1.2.3


From ae4c51a50c990d6feba7058c181dc8f22ca5f1d8 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Fri, 20 Jul 2018 12:01:44 +0300
Subject: xen/balloon: Share common memory reservation routines

Memory {increase|decrease}_reservation and VA mappings update/reset
code used in balloon driver can be made common, so other drivers can
also re-use the same functionality without open-coding.
Create a dedicated file for the shared code and export corresponding
symbols for other kernel modules.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/Makefile          |   1 +
 drivers/xen/balloon.c         |  75 +++------------------------
 drivers/xen/mem-reservation.c | 118 ++++++++++++++++++++++++++++++++++++++++++
 include/xen/mem-reservation.h |  59 +++++++++++++++++++++
 4 files changed, 184 insertions(+), 69 deletions(-)
 create mode 100644 drivers/xen/mem-reservation.c
 create mode 100644 include/xen/mem-reservation.h

(limited to 'include')

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 48b154276179..129dd1cc1b83 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 obj-$(CONFIG_X86)			+= fallback.o
 obj-y	+= grant-table.o features.o balloon.o manage.o preempt.o time.o
+obj-y	+= mem-reservation.o
 obj-y	+= events/
 obj-y	+= xenbus/
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 065f0b607373..e12bb256036f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -71,6 +71,7 @@
 #include <xen/balloon.h>
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/mem-reservation.h>
 
 static int xen_hotplug_unpopulated;
 
@@ -157,13 +158,6 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 #define GFP_BALLOON \
 	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
 
-static void scrub_page(struct page *page)
-{
-#ifdef CONFIG_XEN_SCRUB_PAGES
-	clear_highpage(page);
-#endif
-}
-
 /* balloon_append: add the given page to the balloon. */
 static void __balloon_append(struct page *page)
 {
@@ -463,11 +457,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 	int rc;
 	unsigned long i;
 	struct page   *page;
-	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
-		.extent_order = EXTENT_ORDER,
-		.domid        = DOMID_SELF
-	};
 
 	if (nr_pages > ARRAY_SIZE(frame_list))
 		nr_pages = ARRAY_SIZE(frame_list);
@@ -479,16 +468,11 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 			break;
 		}
 
-		/* XENMEM_populate_physmap requires a PFN based on Xen
-		 * granularity.
-		 */
 		frame_list[i] = page_to_xen_pfn(page);
 		page = balloon_next_page(page);
 	}
 
-	set_xen_guest_handle(reservation.extent_start, frame_list);
-	reservation.nr_extents = nr_pages;
-	rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+	rc = xenmem_reservation_increase(nr_pages, frame_list);
 	if (rc <= 0)
 		return BP_EAGAIN;
 
@@ -496,29 +480,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 		page = balloon_retrieve(false);
 		BUG_ON(page == NULL);
 
-#ifdef CONFIG_XEN_HAVE_PVMMU
-		/*
-		 * We don't support PV MMU when Linux and Xen is using
-		 * different page granularity.
-		 */
-		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
-
-		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-			unsigned long pfn = page_to_pfn(page);
-
-			set_phys_to_machine(pfn, frame_list[i]);
-
-			/* Link back into the page tables if not highmem. */
-			if (!PageHighMem(page)) {
-				int ret;
-				ret = HYPERVISOR_update_va_mapping(
-						(unsigned long)__va(pfn << PAGE_SHIFT),
-						mfn_pte(frame_list[i], PAGE_KERNEL),
-						0);
-				BUG_ON(ret);
-			}
-		}
-#endif
+		xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
 
 		/* Relinquish the page back to the allocator. */
 		free_reserved_page(page);
@@ -535,11 +497,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 	unsigned long i;
 	struct page *page, *tmp;
 	int ret;
-	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
-		.extent_order = EXTENT_ORDER,
-		.domid        = DOMID_SELF
-	};
 	LIST_HEAD(pages);
 
 	if (nr_pages > ARRAY_SIZE(frame_list))
@@ -553,7 +510,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 			break;
 		}
 		adjust_managed_page_count(page, -1);
-		scrub_page(page);
+		xenmem_reservation_scrub_page(page);
 		list_add(&page->lru, &pages);
 	}
 
@@ -572,28 +529,10 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 	 */
 	i = 0;
 	list_for_each_entry_safe(page, tmp, &pages, lru) {
-		/* XENMEM_decrease_reservation requires a GFN */
 		frame_list[i++] = xen_page_to_gfn(page);
 
-#ifdef CONFIG_XEN_HAVE_PVMMU
-		/*
-		 * We don't support PV MMU when Linux and Xen is using
-		 * different page granularity.
-		 */
-		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
-
-		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-			unsigned long pfn = page_to_pfn(page);
+		xenmem_reservation_va_mapping_reset(1, &page);
 
-			if (!PageHighMem(page)) {
-				ret = HYPERVISOR_update_va_mapping(
-						(unsigned long)__va(pfn << PAGE_SHIFT),
-						__pte_ma(0), 0);
-				BUG_ON(ret);
-			}
-			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-		}
-#endif
 		list_del(&page->lru);
 
 		balloon_append(page);
@@ -601,9 +540,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 
 	flush_tlb_all();
 
-	set_xen_guest_handle(reservation.extent_start, frame_list);
-	reservation.nr_extents   = nr_pages;
-	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	ret = xenmem_reservation_decrease(nr_pages, frame_list);
 	BUG_ON(ret != nr_pages);
 
 	balloon_stats.current_pages -= nr_pages;
diff --git a/drivers/xen/mem-reservation.c b/drivers/xen/mem-reservation.c
new file mode 100644
index 000000000000..084799c6180e
--- /dev/null
+++ b/drivers/xen/mem-reservation.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/******************************************************************************
+ * Xen memory reservation utilities.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * Copyright (c) 2010 Daniel Kiper
+ * Copyright (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
+ */
+
+#include <asm/xen/hypercall.h>
+
+#include <xen/interface/memory.h>
+#include <xen/mem-reservation.h>
+
+/*
+ * Use one extent per PAGE_SIZE to avoid to break down the page into
+ * multiple frame.
+ */
+#define EXTENT_ORDER (fls(XEN_PFN_PER_PAGE) - 1)
+
+#ifdef CONFIG_XEN_HAVE_PVMMU
+void __xenmem_reservation_va_mapping_update(unsigned long count,
+					    struct page **pages,
+					    xen_pfn_t *frames)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct page *page = pages[i];
+		unsigned long pfn = page_to_pfn(page);
+
+		BUG_ON(!page);
+
+		/*
+		 * We don't support PV MMU when Linux and Xen is using
+		 * different page granularity.
+		 */
+		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
+
+		set_phys_to_machine(pfn, frames[i]);
+
+		/* Link back into the page tables if not highmem. */
+		if (!PageHighMem(page)) {
+			int ret;
+
+			ret = HYPERVISOR_update_va_mapping(
+					(unsigned long)__va(pfn << PAGE_SHIFT),
+					mfn_pte(frames[i], PAGE_KERNEL),
+					0);
+			BUG_ON(ret);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(__xenmem_reservation_va_mapping_update);
+
+void __xenmem_reservation_va_mapping_reset(unsigned long count,
+					   struct page **pages)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct page *page = pages[i];
+		unsigned long pfn = page_to_pfn(page);
+
+		/*
+		 * We don't support PV MMU when Linux and Xen are using
+		 * different page granularity.
+		 */
+		BUILD_BUG_ON(XEN_PAGE_SIZE != PAGE_SIZE);
+
+		if (!PageHighMem(page)) {
+			int ret;
+
+			ret = HYPERVISOR_update_va_mapping(
+					(unsigned long)__va(pfn << PAGE_SHIFT),
+					__pte_ma(0), 0);
+			BUG_ON(ret);
+		}
+		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+	}
+}
+EXPORT_SYMBOL_GPL(__xenmem_reservation_va_mapping_reset);
+#endif /* CONFIG_XEN_HAVE_PVMMU */
+
+/* @frames is an array of PFNs */
+int xenmem_reservation_increase(int count, xen_pfn_t *frames)
+{
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = EXTENT_ORDER,
+		.domid        = DOMID_SELF
+	};
+
+	/* XENMEM_populate_physmap requires a PFN based on Xen granularity. */
+	set_xen_guest_handle(reservation.extent_start, frames);
+	reservation.nr_extents = count;
+	return HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
+}
+EXPORT_SYMBOL_GPL(xenmem_reservation_increase);
+
+/* @frames is an array of GFNs */
+int xenmem_reservation_decrease(int count, xen_pfn_t *frames)
+{
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = EXTENT_ORDER,
+		.domid        = DOMID_SELF
+	};
+
+	/* XENMEM_decrease_reservation requires a GFN */
+	set_xen_guest_handle(reservation.extent_start, frames);
+	reservation.nr_extents = count;
+	return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+}
+EXPORT_SYMBOL_GPL(xenmem_reservation_decrease);
diff --git a/include/xen/mem-reservation.h b/include/xen/mem-reservation.h
new file mode 100644
index 000000000000..80b52b4945e9
--- /dev/null
+++ b/include/xen/mem-reservation.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Xen memory reservation utilities.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * Copyright (c) 2010 Daniel Kiper
+ * Copyright (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
+ */
+
+#ifndef _XENMEM_RESERVATION_H
+#define _XENMEM_RESERVATION_H
+
+#include <linux/highmem.h>
+
+#include <xen/page.h>
+
+static inline void xenmem_reservation_scrub_page(struct page *page)
+{
+#ifdef CONFIG_XEN_SCRUB_PAGES
+	clear_highpage(page);
+#endif
+}
+
+#ifdef CONFIG_XEN_HAVE_PVMMU
+void __xenmem_reservation_va_mapping_update(unsigned long count,
+					    struct page **pages,
+					    xen_pfn_t *frames);
+
+void __xenmem_reservation_va_mapping_reset(unsigned long count,
+					   struct page **pages);
+#endif
+
+static inline void xenmem_reservation_va_mapping_update(unsigned long count,
+							struct page **pages,
+							xen_pfn_t *frames)
+{
+#ifdef CONFIG_XEN_HAVE_PVMMU
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		__xenmem_reservation_va_mapping_update(count, pages, frames);
+#endif
+}
+
+static inline void xenmem_reservation_va_mapping_reset(unsigned long count,
+						       struct page **pages)
+{
+#ifdef CONFIG_XEN_HAVE_PVMMU
+	if (!xen_feature(XENFEAT_auto_translated_physmap))
+		__xenmem_reservation_va_mapping_reset(count, pages);
+#endif
+}
+
+int xenmem_reservation_increase(int count, xen_pfn_t *frames);
+
+int xenmem_reservation_decrease(int count, xen_pfn_t *frames);
+
+#endif
-- 
cgit v1.2.3


From 9bdc7304f536f3f77f0a69e7c3a8f5afda561a68 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Fri, 20 Jul 2018 12:01:45 +0300
Subject: xen/grant-table: Allow allocating buffers suitable for DMA

Extend grant table module API to allow allocating buffers that can
be used for DMA operations and mapping foreign grant references
on top of those.
The resulting buffer is similar to the one allocated by the balloon
driver in that proper memory reservation is made by
({increase|decrease}_reservation and VA mappings are updated if
needed).
This is useful for sharing foreign buffers with HW drivers which
cannot work with scattered buffers provided by the balloon driver,
but require DMAable memory instead.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/Kconfig       | 14 +++++++
 drivers/xen/grant-table.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/grant_table.h | 18 +++++++++
 3 files changed, 129 insertions(+)

(limited to 'include')

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index e5d0c28372ea..75e5c40f80a5 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -161,6 +161,20 @@ config XEN_GRANT_DEV_ALLOC
 	  to other domains. This can be used to implement frontend drivers
 	  or as part of an inter-domain shared memory channel.
 
+config XEN_GRANT_DMA_ALLOC
+	bool "Allow allocating DMA capable buffers with grant reference module"
+	depends on XEN && HAS_DMA
+	help
+	  Extends grant table module API to allow allocating DMA capable
+	  buffers and mapping foreign grant references on top of it.
+	  The resulting buffer is similar to one allocated by the balloon
+	  driver in that proper memory reservation is made by
+	  ({increase|decrease}_reservation and VA mappings are updated if
+	  needed).
+	  This is useful for sharing foreign buffers with HW drivers which
+	  cannot work with scattered buffers provided by the balloon driver,
+	  but require DMAable memory instead.
+
 config SWIOTLB_XEN
 	def_bool y
 	select SWIOTLB
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index bb4840653bf2..7bafa703a992 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -45,6 +45,9 @@
 #include <linux/workqueue.h>
 #include <linux/ratelimit.h>
 #include <linux/moduleparam.h>
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+#include <linux/dma-mapping.h>
+#endif
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
@@ -57,6 +60,7 @@
 #ifdef CONFIG_X86
 #include <asm/xen/cpuid.h>
 #endif
+#include <xen/mem-reservation.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/interface.h>
 
@@ -838,6 +842,99 @@ void gnttab_free_pages(int nr_pages, struct page **pages)
 }
 EXPORT_SYMBOL_GPL(gnttab_free_pages);
 
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+/**
+ * gnttab_dma_alloc_pages - alloc DMAable pages suitable for grant mapping into
+ * @args: arguments to the function
+ */
+int gnttab_dma_alloc_pages(struct gnttab_dma_alloc_args *args)
+{
+	unsigned long pfn, start_pfn;
+	size_t size;
+	int i, ret;
+
+	size = args->nr_pages << PAGE_SHIFT;
+	if (args->coherent)
+		args->vaddr = dma_alloc_coherent(args->dev, size,
+						 &args->dev_bus_addr,
+						 GFP_KERNEL | __GFP_NOWARN);
+	else
+		args->vaddr = dma_alloc_wc(args->dev, size,
+					   &args->dev_bus_addr,
+					   GFP_KERNEL | __GFP_NOWARN);
+	if (!args->vaddr) {
+		pr_debug("Failed to allocate DMA buffer of size %zu\n", size);
+		return -ENOMEM;
+	}
+
+	start_pfn = __phys_to_pfn(args->dev_bus_addr);
+	for (pfn = start_pfn, i = 0; pfn < start_pfn + args->nr_pages;
+			pfn++, i++) {
+		struct page *page = pfn_to_page(pfn);
+
+		args->pages[i] = page;
+		args->frames[i] = xen_page_to_gfn(page);
+		xenmem_reservation_scrub_page(page);
+	}
+
+	xenmem_reservation_va_mapping_reset(args->nr_pages, args->pages);
+
+	ret = xenmem_reservation_decrease(args->nr_pages, args->frames);
+	if (ret != args->nr_pages) {
+		pr_debug("Failed to decrease reservation for DMA buffer\n");
+		ret = -EFAULT;
+		goto fail;
+	}
+
+	ret = gnttab_pages_set_private(args->nr_pages, args->pages);
+	if (ret < 0)
+		goto fail;
+
+	return 0;
+
+fail:
+	gnttab_dma_free_pages(args);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gnttab_dma_alloc_pages);
+
+/**
+ * gnttab_dma_free_pages - free DMAable pages
+ * @args: arguments to the function
+ */
+int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args)
+{
+	size_t size;
+	int i, ret;
+
+	gnttab_pages_clear_private(args->nr_pages, args->pages);
+
+	for (i = 0; i < args->nr_pages; i++)
+		args->frames[i] = page_to_xen_pfn(args->pages[i]);
+
+	ret = xenmem_reservation_increase(args->nr_pages, args->frames);
+	if (ret != args->nr_pages) {
+		pr_debug("Failed to decrease reservation for DMA buffer\n");
+		ret = -EFAULT;
+	} else {
+		ret = 0;
+	}
+
+	xenmem_reservation_va_mapping_update(args->nr_pages, args->pages,
+					     args->frames);
+
+	size = args->nr_pages << PAGE_SHIFT;
+	if (args->coherent)
+		dma_free_coherent(args->dev, size,
+				  args->vaddr, args->dev_bus_addr);
+	else
+		dma_free_wc(args->dev, size,
+			    args->vaddr, args->dev_bus_addr);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gnttab_dma_free_pages);
+#endif
+
 /* Handling of paged out grant targets (GNTST_eagain) */
 #define MAX_DELAY 256
 static inline void
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index de03f2542bb7..9bc5bc07d4d3 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -198,6 +198,24 @@ void gnttab_free_auto_xlat_frames(void);
 int gnttab_alloc_pages(int nr_pages, struct page **pages);
 void gnttab_free_pages(int nr_pages, struct page **pages);
 
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+struct gnttab_dma_alloc_args {
+	/* Device for which DMA memory will be/was allocated. */
+	struct device *dev;
+	/* If set then DMA buffer is coherent and write-combine otherwise. */
+	bool coherent;
+
+	int nr_pages;
+	struct page **pages;
+	xen_pfn_t *frames;
+	void *vaddr;
+	dma_addr_t dev_bus_addr;
+};
+
+int gnttab_dma_alloc_pages(struct gnttab_dma_alloc_args *args);
+int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args);
+#endif
+
 int gnttab_pages_set_private(int nr_pages, struct page **pages);
 void gnttab_pages_clear_private(int nr_pages, struct page **pages);
 
-- 
cgit v1.2.3


From 975ef7ff81bb000af6e6c8e63e81f89f3468dcf7 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Fri, 20 Jul 2018 12:01:46 +0300
Subject: xen/gntdev: Allow mappings for DMA buffers

Allow mappings for DMA backed  buffers if grant table module
supports such: this extends grant device to not only map buffers
made of balloon pages, but also from buffers allocated with
dma_alloc_xxx.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/gntdev.c      | 99 ++++++++++++++++++++++++++++++++++++++++++++++-
 include/uapi/xen/gntdev.h | 15 +++++++
 2 files changed, 112 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index bd56653b9bbc..173332f439d8 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -37,6 +37,9 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/refcount.h>
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+#include <linux/of_device.h>
+#endif
 
 #include <xen/xen.h>
 #include <xen/grant_table.h>
@@ -72,6 +75,11 @@ struct gntdev_priv {
 	struct mutex lock;
 	struct mm_struct *mm;
 	struct mmu_notifier mn;
+
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+	/* Device for which DMA memory is allocated. */
+	struct device *dma_dev;
+#endif
 };
 
 struct unmap_notify {
@@ -96,10 +104,27 @@ struct grant_map {
 	struct gnttab_unmap_grant_ref *kunmap_ops;
 	struct page **pages;
 	unsigned long pages_vm_start;
+
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+	/*
+	 * If dmabuf_vaddr is not NULL then this mapping is backed by DMA
+	 * capable memory.
+	 */
+
+	struct device *dma_dev;
+	/* Flags used to create this DMA buffer: GNTDEV_DMA_FLAG_XXX. */
+	int dma_flags;
+	void *dma_vaddr;
+	dma_addr_t dma_bus_addr;
+	/* Needed to avoid allocation in gnttab_dma_free_pages(). */
+	xen_pfn_t *frames;
+#endif
 };
 
 static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
 
+static struct miscdevice gntdev_miscdev;
+
 /* ------------------------------------------------------------------ */
 
 static void gntdev_print_maps(struct gntdev_priv *priv,
@@ -121,8 +146,27 @@ static void gntdev_free_map(struct grant_map *map)
 	if (map == NULL)
 		return;
 
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+	if (map->dma_vaddr) {
+		struct gnttab_dma_alloc_args args;
+
+		args.dev = map->dma_dev;
+		args.coherent = !!(map->dma_flags & GNTDEV_DMA_FLAG_COHERENT);
+		args.nr_pages = map->count;
+		args.pages = map->pages;
+		args.frames = map->frames;
+		args.vaddr = map->dma_vaddr;
+		args.dev_bus_addr = map->dma_bus_addr;
+
+		gnttab_dma_free_pages(&args);
+	} else
+#endif
 	if (map->pages)
 		gnttab_free_pages(map->count, map->pages);
+
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+	kfree(map->frames);
+#endif
 	kfree(map->pages);
 	kfree(map->grants);
 	kfree(map->map_ops);
@@ -132,7 +176,8 @@ static void gntdev_free_map(struct grant_map *map)
 	kfree(map);
 }
 
-static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
+static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
+					  int dma_flags)
 {
 	struct grant_map *add;
 	int i;
@@ -155,6 +200,37 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count)
 	    NULL == add->pages)
 		goto err;
 
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+	add->dma_flags = dma_flags;
+
+	/*
+	 * Check if this mapping is requested to be backed
+	 * by a DMA buffer.
+	 */
+	if (dma_flags & (GNTDEV_DMA_FLAG_WC | GNTDEV_DMA_FLAG_COHERENT)) {
+		struct gnttab_dma_alloc_args args;
+
+		add->frames = kcalloc(count, sizeof(add->frames[0]),
+				      GFP_KERNEL);
+		if (!add->frames)
+			goto err;
+
+		/* Remember the device, so we can free DMA memory. */
+		add->dma_dev = priv->dma_dev;
+
+		args.dev = priv->dma_dev;
+		args.coherent = !!(dma_flags & GNTDEV_DMA_FLAG_COHERENT);
+		args.nr_pages = count;
+		args.pages = add->pages;
+		args.frames = add->frames;
+
+		if (gnttab_dma_alloc_pages(&args))
+			goto err;
+
+		add->dma_vaddr = args.vaddr;
+		add->dma_bus_addr = args.dev_bus_addr;
+	} else
+#endif
 	if (gnttab_alloc_pages(count, add->pages))
 		goto err;
 
@@ -325,6 +401,14 @@ static int map_grant_pages(struct grant_map *map)
 		map->unmap_ops[i].handle = map->map_ops[i].handle;
 		if (use_ptemod)
 			map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+		else if (map->dma_vaddr) {
+			unsigned long bfn;
+
+			bfn = pfn_to_bfn(page_to_pfn(map->pages[i]));
+			map->unmap_ops[i].dev_bus_addr = __pfn_to_phys(bfn);
+		}
+#endif
 	}
 	return err;
 }
@@ -548,6 +632,17 @@ static int gntdev_open(struct inode *inode, struct file *flip)
 	}
 
 	flip->private_data = priv;
+#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
+	priv->dma_dev = gntdev_miscdev.this_device;
+
+	/*
+	 * The device is not spawn from a device tree, so arch_setup_dma_ops
+	 * is not called, thus leaving the device with dummy DMA ops.
+	 * Fix this by calling of_dma_configure() with a NULL node to set
+	 * default DMA ops.
+	 */
+	of_dma_configure(priv->dma_dev, NULL, true);
+#endif
 	pr_debug("priv %p\n", priv);
 
 	return 0;
@@ -589,7 +684,7 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
 		return -EINVAL;
 
 	err = -ENOMEM;
-	map = gntdev_alloc_map(priv, op.count);
+	map = gntdev_alloc_map(priv, op.count, 0 /* This is not a dma-buf. */);
 	if (!map)
 		return err;
 
diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h
index 6d1163456c03..4b9d498a31d4 100644
--- a/include/uapi/xen/gntdev.h
+++ b/include/uapi/xen/gntdev.h
@@ -200,4 +200,19 @@ struct ioctl_gntdev_grant_copy {
 /* Send an interrupt on the indicated event channel */
 #define UNMAP_NOTIFY_SEND_EVENT 0x2
 
+/*
+ * Flags to be used while requesting memory mapping's backing storage
+ * to be allocated with DMA API.
+ */
+
+/*
+ * The buffer is backed with memory allocated with dma_alloc_wc.
+ */
+#define GNTDEV_DMA_FLAG_WC		(1 << 0)
+
+/*
+ * The buffer is backed with memory allocated with dma_alloc_coherent.
+ */
+#define GNTDEV_DMA_FLAG_COHERENT	(1 << 1)
+
 #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
-- 
cgit v1.2.3


From 932d6562179efe8e2460a0343dbe0fcacf288a9e Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Fri, 20 Jul 2018 12:01:48 +0300
Subject: xen/gntdev: Add initial support for dma-buf UAPI

Add UAPI and IOCTLs for dma-buf grant device driver extension:
the extension allows userspace processes and kernel modules to
use Xen backed dma-buf implementation. With this extension grant
references to the pages of an imported dma-buf can be exported
for other domain use and grant references coming from a foreign
domain can be converted into a local dma-buf for local export.
Implement basic initialization and stubs for Xen DMA buffers'
support.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/Kconfig         |  10 +++
 drivers/xen/Makefile        |   1 +
 drivers/xen/gntdev-common.h |   6 ++
 drivers/xen/gntdev-dmabuf.c | 177 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/gntdev-dmabuf.h |  33 +++++++++
 drivers/xen/gntdev.c        |  31 ++++++++
 include/uapi/xen/gntdev.h   |  91 +++++++++++++++++++++++
 7 files changed, 349 insertions(+)
 create mode 100644 drivers/xen/gntdev-dmabuf.c
 create mode 100644 drivers/xen/gntdev-dmabuf.h

(limited to 'include')

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 75e5c40f80a5..b459edfacff3 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -152,6 +152,16 @@ config XEN_GNTDEV
 	help
 	  Allows userspace processes to use grants.
 
+config XEN_GNTDEV_DMABUF
+	bool "Add support for dma-buf grant access device driver extension"
+	depends on XEN_GNTDEV && XEN_GRANT_DMA_ALLOC && DMA_SHARED_BUFFER
+	help
+	  Allows userspace processes and kernel modules to use Xen backed
+	  dma-buf implementation. With this extension grant references to
+	  the pages of an imported dma-buf can be exported for other domain
+	  use and grant references coming from a foreign domain can be
+	  converted into a local dma-buf for local export.
+
 config XEN_GRANT_DEV_ALLOC
 	tristate "User-space grant reference allocator driver"
 	depends on XEN
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 129dd1cc1b83..3e542f60f29f 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -41,5 +41,6 @@ obj-$(CONFIG_XEN_PVCALLS_BACKEND)	+= pvcalls-back.o
 obj-$(CONFIG_XEN_PVCALLS_FRONTEND)	+= pvcalls-front.o
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
+xen-gntdev-$(CONFIG_XEN_GNTDEV_DMABUF)	+= gntdev-dmabuf.o
 xen-gntalloc-y				:= gntalloc.o
 xen-privcmd-y				:= privcmd.o privcmd-buf.o
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 2346c198f72e..2f8b949c3eeb 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -16,6 +16,8 @@
 #include <linux/mmu_notifier.h>
 #include <linux/types.h>
 
+struct gntdev_dmabuf_priv;
+
 struct gntdev_priv {
 	/* Maps with visible offsets in the file descriptor. */
 	struct list_head maps;
@@ -33,6 +35,10 @@ struct gntdev_priv {
 	/* Device for which DMA memory is allocated. */
 	struct device *dma_dev;
 #endif
+
+#ifdef CONFIG_XEN_GNTDEV_DMABUF
+	struct gntdev_dmabuf_priv *dmabuf_priv;
+#endif
 };
 
 struct gntdev_unmap_notify {
diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c
new file mode 100644
index 000000000000..af782c0a8a19
--- /dev/null
+++ b/drivers/xen/gntdev-dmabuf.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Xen dma-buf functionality for gntdev.
+ *
+ * Copyright (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include <xen/xen.h>
+#include <xen/grant_table.h>
+
+#include "gntdev-common.h"
+#include "gntdev-dmabuf.h"
+
+struct gntdev_dmabuf_priv {
+	/* List of exported DMA buffers. */
+	struct list_head exp_list;
+	/* List of wait objects. */
+	struct list_head exp_wait_list;
+	/* This is the lock which protects dma_buf_xxx lists. */
+	struct mutex lock;
+};
+
+/* DMA buffer export support. */
+
+/* Implementation of wait for exported DMA buffer to be released. */
+
+static int dmabuf_exp_wait_released(struct gntdev_dmabuf_priv *priv, int fd,
+				    int wait_to_ms)
+{
+	return -EINVAL;
+}
+
+static int dmabuf_exp_from_refs(struct gntdev_priv *priv, int flags,
+				int count, u32 domid, u32 *refs, u32 *fd)
+{
+	*fd = -1;
+	return -EINVAL;
+}
+
+/* DMA buffer import support. */
+
+static struct gntdev_dmabuf *
+dmabuf_imp_to_refs(struct gntdev_dmabuf_priv *priv, struct device *dev,
+		   int fd, int count, int domid)
+{
+	return ERR_PTR(-ENOMEM);
+}
+
+static u32 *dmabuf_imp_get_refs(struct gntdev_dmabuf *gntdev_dmabuf)
+{
+	return NULL;
+}
+
+static int dmabuf_imp_release(struct gntdev_dmabuf_priv *priv, u32 fd)
+{
+	return -EINVAL;
+}
+
+/* DMA buffer IOCTL support. */
+
+long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, int use_ptemod,
+				       struct ioctl_gntdev_dmabuf_exp_from_refs __user *u)
+{
+	struct ioctl_gntdev_dmabuf_exp_from_refs op;
+	u32 *refs;
+	long ret;
+
+	if (use_ptemod) {
+		pr_debug("Cannot provide dma-buf: use_ptemode %d\n",
+			 use_ptemod);
+		return -EINVAL;
+	}
+
+	if (copy_from_user(&op, u, sizeof(op)) != 0)
+		return -EFAULT;
+
+	if (unlikely(op.count <= 0))
+		return -EINVAL;
+
+	refs = kcalloc(op.count, sizeof(*refs), GFP_KERNEL);
+	if (!refs)
+		return -ENOMEM;
+
+	if (copy_from_user(refs, u->refs, sizeof(*refs) * op.count) != 0) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = dmabuf_exp_from_refs(priv, op.flags, op.count,
+				   op.domid, refs, &op.fd);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(u, &op, sizeof(op)) != 0)
+		ret = -EFAULT;
+
+out:
+	kfree(refs);
+	return ret;
+}
+
+long gntdev_ioctl_dmabuf_exp_wait_released(struct gntdev_priv *priv,
+					   struct ioctl_gntdev_dmabuf_exp_wait_released __user *u)
+{
+	struct ioctl_gntdev_dmabuf_exp_wait_released op;
+
+	if (copy_from_user(&op, u, sizeof(op)) != 0)
+		return -EFAULT;
+
+	return dmabuf_exp_wait_released(priv->dmabuf_priv, op.fd,
+					op.wait_to_ms);
+}
+
+long gntdev_ioctl_dmabuf_imp_to_refs(struct gntdev_priv *priv,
+				     struct ioctl_gntdev_dmabuf_imp_to_refs __user *u)
+{
+	struct ioctl_gntdev_dmabuf_imp_to_refs op;
+	struct gntdev_dmabuf *gntdev_dmabuf;
+	long ret;
+
+	if (copy_from_user(&op, u, sizeof(op)) != 0)
+		return -EFAULT;
+
+	if (unlikely(op.count <= 0))
+		return -EINVAL;
+
+	gntdev_dmabuf = dmabuf_imp_to_refs(priv->dmabuf_priv,
+					   priv->dma_dev, op.fd,
+					   op.count, op.domid);
+	if (IS_ERR(gntdev_dmabuf))
+		return PTR_ERR(gntdev_dmabuf);
+
+	if (copy_to_user(u->refs, dmabuf_imp_get_refs(gntdev_dmabuf),
+			 sizeof(*u->refs) * op.count) != 0) {
+		ret = -EFAULT;
+		goto out_release;
+	}
+	return 0;
+
+out_release:
+	dmabuf_imp_release(priv->dmabuf_priv, op.fd);
+	return ret;
+}
+
+long gntdev_ioctl_dmabuf_imp_release(struct gntdev_priv *priv,
+				     struct ioctl_gntdev_dmabuf_imp_release __user *u)
+{
+	struct ioctl_gntdev_dmabuf_imp_release op;
+
+	if (copy_from_user(&op, u, sizeof(op)) != 0)
+		return -EFAULT;
+
+	return dmabuf_imp_release(priv->dmabuf_priv, op.fd);
+}
+
+struct gntdev_dmabuf_priv *gntdev_dmabuf_init(void)
+{
+	struct gntdev_dmabuf_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	return priv;
+}
+
+void gntdev_dmabuf_fini(struct gntdev_dmabuf_priv *priv)
+{
+	kfree(priv);
+}
diff --git a/drivers/xen/gntdev-dmabuf.h b/drivers/xen/gntdev-dmabuf.h
new file mode 100644
index 000000000000..7220a53d0fc5
--- /dev/null
+++ b/drivers/xen/gntdev-dmabuf.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Xen dma-buf functionality for gntdev.
+ *
+ * Copyright (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
+ */
+
+#ifndef _GNTDEV_DMABUF_H
+#define _GNTDEV_DMABUF_H
+
+#include <xen/gntdev.h>
+
+struct gntdev_dmabuf_priv;
+struct gntdev_priv;
+
+struct gntdev_dmabuf_priv *gntdev_dmabuf_init(void);
+
+void gntdev_dmabuf_fini(struct gntdev_dmabuf_priv *priv);
+
+long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, int use_ptemod,
+				       struct ioctl_gntdev_dmabuf_exp_from_refs __user *u);
+
+long gntdev_ioctl_dmabuf_exp_wait_released(struct gntdev_priv *priv,
+					   struct ioctl_gntdev_dmabuf_exp_wait_released __user *u);
+
+long gntdev_ioctl_dmabuf_imp_to_refs(struct gntdev_priv *priv,
+				     struct ioctl_gntdev_dmabuf_imp_to_refs __user *u);
+
+long gntdev_ioctl_dmabuf_imp_release(struct gntdev_priv *priv,
+				     struct ioctl_gntdev_dmabuf_imp_release __user *u);
+
+#endif
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e03f50052f3e..c866a62f766d 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -48,6 +48,9 @@
 #include <asm/xen/hypercall.h>
 
 #include "gntdev-common.h"
+#ifdef CONFIG_XEN_GNTDEV_DMABUF
+#include "gntdev-dmabuf.h"
+#endif
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
@@ -566,6 +569,15 @@ static int gntdev_open(struct inode *inode, struct file *flip)
 	INIT_LIST_HEAD(&priv->freeable_maps);
 	mutex_init(&priv->lock);
 
+#ifdef CONFIG_XEN_GNTDEV_DMABUF
+	priv->dmabuf_priv = gntdev_dmabuf_init();
+	if (IS_ERR(priv->dmabuf_priv)) {
+		ret = PTR_ERR(priv->dmabuf_priv);
+		kfree(priv);
+		return ret;
+	}
+#endif
+
 	if (use_ptemod) {
 		priv->mm = get_task_mm(current);
 		if (!priv->mm) {
@@ -616,8 +628,13 @@ static int gntdev_release(struct inode *inode, struct file *flip)
 	WARN_ON(!list_empty(&priv->freeable_maps));
 	mutex_unlock(&priv->lock);
 
+#ifdef CONFIG_XEN_GNTDEV_DMABUF
+	gntdev_dmabuf_fini(priv->dmabuf_priv);
+#endif
+
 	if (use_ptemod)
 		mmu_notifier_unregister(&priv->mn, priv->mm);
+
 	kfree(priv);
 	return 0;
 }
@@ -1009,6 +1026,20 @@ static long gntdev_ioctl(struct file *flip,
 	case IOCTL_GNTDEV_GRANT_COPY:
 		return gntdev_ioctl_grant_copy(priv, ptr);
 
+#ifdef CONFIG_XEN_GNTDEV_DMABUF
+	case IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS:
+		return gntdev_ioctl_dmabuf_exp_from_refs(priv, use_ptemod, ptr);
+
+	case IOCTL_GNTDEV_DMABUF_EXP_WAIT_RELEASED:
+		return gntdev_ioctl_dmabuf_exp_wait_released(priv, ptr);
+
+	case IOCTL_GNTDEV_DMABUF_IMP_TO_REFS:
+		return gntdev_ioctl_dmabuf_imp_to_refs(priv, ptr);
+
+	case IOCTL_GNTDEV_DMABUF_IMP_RELEASE:
+		return gntdev_ioctl_dmabuf_imp_release(priv, ptr);
+#endif
+
 	default:
 		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
 		return -ENOIOCTLCMD;
diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h
index 4b9d498a31d4..fe4423e518c6 100644
--- a/include/uapi/xen/gntdev.h
+++ b/include/uapi/xen/gntdev.h
@@ -5,6 +5,7 @@
  * Interface to /dev/xen/gntdev.
  * 
  * Copyright (c) 2007, D G Murray
+ * Copyright (c) 2018, Oleksandr Andrushchenko, EPAM Systems Inc.
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
@@ -215,4 +216,94 @@ struct ioctl_gntdev_grant_copy {
  */
 #define GNTDEV_DMA_FLAG_COHERENT	(1 << 1)
 
+/*
+ * Create a dma-buf [1] from grant references @refs of count @count provided
+ * by the foreign domain @domid with flags @flags.
+ *
+ * By default dma-buf is backed by system memory pages, but by providing
+ * one of the GNTDEV_DMA_FLAG_XXX flags it can also be created as
+ * a DMA write-combine or coherent buffer, e.g. allocated with dma_alloc_wc/
+ * dma_alloc_coherent.
+ *
+ * Returns 0 if dma-buf was successfully created and the corresponding
+ * dma-buf's file descriptor is returned in @fd.
+ *
+ * [1] Documentation/driver-api/dma-buf.rst
+ */
+
+#define IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS \
+	_IOC(_IOC_NONE, 'G', 9, \
+	     sizeof(struct ioctl_gntdev_dmabuf_exp_from_refs))
+struct ioctl_gntdev_dmabuf_exp_from_refs {
+	/* IN parameters. */
+	/* Specific options for this dma-buf: see GNTDEV_DMA_FLAG_XXX. */
+	__u32 flags;
+	/* Number of grant references in @refs array. */
+	__u32 count;
+	/* OUT parameters. */
+	/* File descriptor of the dma-buf. */
+	__u32 fd;
+	/* The domain ID of the grant references to be mapped. */
+	__u32 domid;
+	/* Variable IN parameter. */
+	/* Array of grant references of size @count. */
+	__u32 refs[1];
+};
+
+/*
+ * This will block until the dma-buf with the file descriptor @fd is
+ * released. This is only valid for buffers created with
+ * IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS.
+ *
+ * If within @wait_to_ms milliseconds the buffer is not released
+ * then -ETIMEDOUT error is returned.
+ * If the buffer with the file descriptor @fd does not exist or has already
+ * been released, then -ENOENT is returned. For valid file descriptors
+ * this must not be treated as error.
+ */
+#define IOCTL_GNTDEV_DMABUF_EXP_WAIT_RELEASED \
+	_IOC(_IOC_NONE, 'G', 10, \
+	     sizeof(struct ioctl_gntdev_dmabuf_exp_wait_released))
+struct ioctl_gntdev_dmabuf_exp_wait_released {
+	/* IN parameters */
+	__u32 fd;
+	__u32 wait_to_ms;
+};
+
+/*
+ * Import a dma-buf with file descriptor @fd and export granted references
+ * to the pages of that dma-buf into array @refs of size @count.
+ */
+#define IOCTL_GNTDEV_DMABUF_IMP_TO_REFS \
+	_IOC(_IOC_NONE, 'G', 11, \
+	     sizeof(struct ioctl_gntdev_dmabuf_imp_to_refs))
+struct ioctl_gntdev_dmabuf_imp_to_refs {
+	/* IN parameters. */
+	/* File descriptor of the dma-buf. */
+	__u32 fd;
+	/* Number of grant references in @refs array. */
+	__u32 count;
+	/* The domain ID for which references to be granted. */
+	__u32 domid;
+	/* Reserved - must be zero. */
+	__u32 reserved;
+	/* OUT parameters. */
+	/* Array of grant references of size @count. */
+	__u32 refs[1];
+};
+
+/*
+ * This will close all references to the imported buffer with file descriptor
+ * @fd, so it can be released by the owner. This is only valid for buffers
+ * created with IOCTL_GNTDEV_DMABUF_IMP_TO_REFS.
+ */
+#define IOCTL_GNTDEV_DMABUF_IMP_RELEASE \
+	_IOC(_IOC_NONE, 'G', 12, \
+	     sizeof(struct ioctl_gntdev_dmabuf_imp_release))
+struct ioctl_gntdev_dmabuf_imp_release {
+	/* IN parameters */
+	__u32 fd;
+	__u32 reserved;
+};
+
 #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
-- 
cgit v1.2.3


From a6ea5fe95ab4a1a7af6d57429fe3ecde9acf5b5a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 23:19:36 +0200
Subject: ALSA: hda: Fix implicit PCM format type conversion

The PCM format type is defined with __bitwise, hence it can't be
passed as integer but needs an explicit cast.  In this patch, instead
of the messy cast flood, define the format argument of
snd_hdac_calc_stream_format() to be the proper snd_pcm_format_t type.

This fixes sparse warnings like:
  sound/hda/hdac_device.c:760:38: warning: incorrect type in argument 1 (different base types)

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h | 3 ++-
 sound/hda/hdac_device.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index 8305e7971035..6f1e1f3b3063 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -11,6 +11,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/timecounter.h>
 #include <sound/core.h>
+#include <sound/pcm.h>
 #include <sound/memalloc.h>
 #include <sound/hda_verbs.h>
 #include <drm/i915_component.h>
@@ -133,7 +134,7 @@ int snd_hdac_get_sub_nodes(struct hdac_device *codec, hda_nid_t nid,
 			   hda_nid_t *start_id);
 unsigned int snd_hdac_calc_stream_format(unsigned int rate,
 					 unsigned int channels,
-					 unsigned int format,
+					 snd_pcm_format_t format,
 					 unsigned int maxbps,
 					 unsigned short spdif_ctls);
 int snd_hdac_query_supported_pcm(struct hdac_device *codec, hda_nid_t nid,
diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
index 7ba100bb1c3f..dbf02a3a8d2f 100644
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -738,7 +738,7 @@ static struct hda_rate_tbl rate_bits[] = {
  */
 unsigned int snd_hdac_calc_stream_format(unsigned int rate,
 					 unsigned int channels,
-					 unsigned int format,
+					 snd_pcm_format_t format,
 					 unsigned int maxbps,
 					 unsigned short spdif_ctls)
 {
-- 
cgit v1.2.3


From e5d3765b6c4cb3ba64295a4205a2f68a4e8fe083 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 25 Jul 2018 23:19:44 +0200
Subject: ALSA: sb: Fix sparse warning wrt PCM format type

The PCM format type is with __bitwise, and it can't be converted from
integer implicitly.  Instead of an ugly cast, declare the function
argument of snd_sb_csp_autoload() with the proper snd_pcm_format_t
type.

This fixes the sparse warnings like:
  sound/isa/sb/sb16_csp.c:743:22: warning: restricted snd_pcm_format_t degrades to integer

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/sb16_csp.h | 2 +-
 sound/isa/sb/sb16_csp.c  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/sb16_csp.h b/include/sound/sb16_csp.h
index c7c7788005e4..7817e88bd08d 100644
--- a/include/sound/sb16_csp.h
+++ b/include/sound/sb16_csp.h
@@ -46,7 +46,7 @@ enum {
 struct snd_sb_csp_ops {
 	int (*csp_use) (struct snd_sb_csp * p);
 	int (*csp_unuse) (struct snd_sb_csp * p);
-	int (*csp_autoload) (struct snd_sb_csp * p, int pcm_sfmt, int play_rec_mode);
+	int (*csp_autoload) (struct snd_sb_csp * p, snd_pcm_format_t pcm_sfmt, int play_rec_mode);
 	int (*csp_start) (struct snd_sb_csp * p, int sample_width, int channels);
 	int (*csp_stop) (struct snd_sb_csp * p);
 	int (*csp_qsound_transfer) (struct snd_sb_csp * p);
diff --git a/sound/isa/sb/sb16_csp.c b/sound/isa/sb/sb16_csp.c
index fa5780bb0c68..2210e7c72787 100644
--- a/sound/isa/sb/sb16_csp.c
+++ b/sound/isa/sb/sb16_csp.c
@@ -93,7 +93,7 @@ static int snd_sb_csp_riff_load(struct snd_sb_csp * p,
 				struct snd_sb_csp_microcode __user * code);
 static int snd_sb_csp_unload(struct snd_sb_csp * p);
 static int snd_sb_csp_load_user(struct snd_sb_csp * p, const unsigned char __user *buf, int size, int load_flags);
-static int snd_sb_csp_autoload(struct snd_sb_csp * p, int pcm_sfmt, int play_rec_mode);
+static int snd_sb_csp_autoload(struct snd_sb_csp * p, snd_pcm_format_t pcm_sfmt, int play_rec_mode);
 static int snd_sb_csp_check_version(struct snd_sb_csp * p);
 
 static int snd_sb_csp_use(struct snd_sb_csp * p);
@@ -726,7 +726,7 @@ static int snd_sb_csp_firmware_load(struct snd_sb_csp *p, int index, int flags)
  * autoload hardware codec if necessary
  * return 0 if CSP is loaded and ready to run (p->running != 0)
  */
-static int snd_sb_csp_autoload(struct snd_sb_csp * p, int pcm_sfmt, int play_rec_mode)
+static int snd_sb_csp_autoload(struct snd_sb_csp * p, snd_pcm_format_t pcm_sfmt, int play_rec_mode)
 {
 	unsigned long flags;
 	int err = 0;
@@ -736,7 +736,7 @@ static int snd_sb_csp_autoload(struct snd_sb_csp * p, int pcm_sfmt, int play_rec
 		return -EBUSY;
 
 	/* autoload microcode only if requested hardware codec is not already loaded */
-	if (((1 << pcm_sfmt) & p->acc_format) && (play_rec_mode & p->mode)) {
+	if (((1U << (__force int)pcm_sfmt) & p->acc_format) && (play_rec_mode & p->mode)) {
 		p->running = SNDRV_SB_CSP_ST_AUTO;
 	} else {
 		switch (pcm_sfmt) {
-- 
cgit v1.2.3


From 2b8de8a832add91a4e522536078b505283d3979f Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Fri, 6 Jul 2018 19:41:06 -0700
Subject: mfd: rave-sp: Add legacy EEPROM access command translation

This is needed to make rave-sp-eeprom driver work on "legacy"
firmware.

Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rave-sp.c       | 2 ++
 include/linux/mfd/rave-sp.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/rave-sp.c b/drivers/mfd/rave-sp.c
index aa75d5841ca0..a999fa721b03 100644
--- a/drivers/mfd/rave-sp.c
+++ b/drivers/mfd/rave-sp.c
@@ -635,6 +635,8 @@ static int rave_sp_default_cmd_translate(enum rave_sp_command command)
 		return 0x1E;
 	case RAVE_SP_CMD_RESET_REASON:
 		return 0x1F;
+	case RAVE_SP_CMD_RMB_EEPROM:
+		return 0x20;
 	default:
 		return -EINVAL;
 	}
diff --git a/include/linux/mfd/rave-sp.h b/include/linux/mfd/rave-sp.h
index fe0ce7bc59cf..11eef77ef976 100644
--- a/include/linux/mfd/rave-sp.h
+++ b/include/linux/mfd/rave-sp.h
@@ -21,6 +21,7 @@ enum rave_sp_command {
 	RAVE_SP_CMD_STATUS			= 0xA0,
 	RAVE_SP_CMD_SW_WDT			= 0xA1,
 	RAVE_SP_CMD_PET_WDT			= 0xA2,
+	RAVE_SP_CMD_RMB_EEPROM			= 0xA4,
 	RAVE_SP_CMD_SET_BACKLIGHT		= 0xA6,
 	RAVE_SP_CMD_RESET			= 0xA7,
 	RAVE_SP_CMD_RESET_REASON		= 0xA8,
-- 
cgit v1.2.3


From 4d3e55bc7690b289eeae0c7e994db965cb2a993d Mon Sep 17 00:00:00 2001
From: Alberto Panizzo <alberto@amarulasolutions.com>
Date: Mon, 9 Jul 2018 19:46:46 +0200
Subject: mfd: wm8994: Allow to configure CS/ADDR Pulldown from dts

For designs where CS/ADDR pin is floating, it is useful to
allow dts to define whether to keep internal pull down or not.

Signed-off-by: Alberto Panizzo <alberto@amarulasolutions.com>
Signed-off-by: Anthony Brandon <anthony@amarulasolutions.com>
Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm8994-core.c        | 4 ++++
 include/linux/mfd/wm8994/pdata.h | 6 ++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index fa4b1b7f6db1..22bd6525e09c 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -304,6 +304,8 @@ static int wm8994_set_pdata_from_of(struct wm8994 *wm8994)
 
 	pdata->spkmode_pu = of_property_read_bool(np, "wlf,spkmode-pu");
 
+	pdata->csnaddr_pd = of_property_read_bool(np, "wlf,csnaddr-pd");
+
 	pdata->ldo[0].enable = of_get_named_gpio(np, "wlf,ldo1ena", 0);
 	if (pdata->ldo[0].enable < 0)
 		pdata->ldo[0].enable = 0;
@@ -562,6 +564,8 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
 
 	if (pdata->spkmode_pu)
 		pulls |= WM8994_SPKMODE_PU;
+	if (pdata->csnaddr_pd)
+		pulls |= WM8994_CSNADDR_PD;
 
 	/* Disable unneeded pulls */
 	wm8994_set_bits(wm8994, WM8994_PULL_CONTROL_2,
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 90c60524a496..b19c370fe81a 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -222,6 +222,12 @@ struct wm8994_pdata {
 	 */
 	bool spkmode_pu;
 
+	/*
+	 * CS/ADDR must be pulled internally by the device on this
+	 * system.
+	 */
+	bool csnaddr_pd;
+
 	/**
 	 * Maximum number of channels clocks will be generated for,
 	 * useful for systems where and I2S bus with multiple data
-- 
cgit v1.2.3


From c8fda5bfa9972dc1175b898dfdaa3a375dca6022 Mon Sep 17 00:00:00 2001
From: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Date: Tue, 3 Jul 2018 17:04:11 +0200
Subject: mfd: as3722: Disable auto-power-on when AC OK

On ams AS3722, power on when AC OK is enabled by default.
Making this option as disable by default and enable only
when platform need this explicitly.

Signed-off-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Tested-by: Bibek Basu <bbasu@nvidia.com>
Signed-off-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/as3722.txt |  2 ++
 drivers/mfd/as3722.c                             | 12 ++++++++++++
 include/linux/mfd/as3722.h                       |  3 +++
 3 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mfd/as3722.txt b/Documentation/devicetree/bindings/mfd/as3722.txt
index 5297b2210704..2a665741d7fe 100644
--- a/Documentation/devicetree/bindings/mfd/as3722.txt
+++ b/Documentation/devicetree/bindings/mfd/as3722.txt
@@ -20,6 +20,8 @@ Optional properties:
 - ams,enable-internal-i2c-pullup: Boolean property, to enable internal pullup on
 	i2c scl/sda pins. Missing this will disable internal pullup on i2c
 	scl/sda lines.
+- ams,enable-ac-ok-power-on: Boolean property, to enable exit out of power off
+	mode with AC_OK pin (pin enabled in power off mode).
 
 Optional submodule and their properties:
 =======================================
diff --git a/drivers/mfd/as3722.c b/drivers/mfd/as3722.c
index f87342c211bc..4d069ed21ff6 100644
--- a/drivers/mfd/as3722.c
+++ b/drivers/mfd/as3722.c
@@ -349,6 +349,8 @@ static int as3722_i2c_of_probe(struct i2c_client *i2c,
 					"ams,enable-internal-int-pullup");
 	as3722->en_intern_i2c_pullup = of_property_read_bool(np,
 					"ams,enable-internal-i2c-pullup");
+	as3722->en_ac_ok_pwr_on = of_property_read_bool(np,
+					"ams,enable-ac-ok-power-on");
 	as3722->irq_flags = irqd_get_trigger_type(irq_data);
 	dev_dbg(&i2c->dev, "IRQ flags are 0x%08lx\n", as3722->irq_flags);
 	return 0;
@@ -360,6 +362,7 @@ static int as3722_i2c_probe(struct i2c_client *i2c,
 	struct as3722 *as3722;
 	unsigned long irq_flags;
 	int ret;
+	u8 val = 0;
 
 	as3722 = devm_kzalloc(&i2c->dev, sizeof(struct as3722), GFP_KERNEL);
 	if (!as3722)
@@ -398,6 +401,15 @@ static int as3722_i2c_probe(struct i2c_client *i2c,
 	if (ret < 0)
 		return ret;
 
+	if (as3722->en_ac_ok_pwr_on)
+		val = AS3722_CTRL_SEQU1_AC_OK_PWR_ON;
+	ret = as3722_update_bits(as3722, AS3722_CTRL_SEQU1_REG,
+			AS3722_CTRL_SEQU1_AC_OK_PWR_ON, val);
+	if (ret < 0) {
+		dev_err(as3722->dev, "CTRLsequ1 update failed: %d\n", ret);
+		return ret;
+	}
+
 	ret = devm_mfd_add_devices(&i2c->dev, -1, as3722_devs,
 				   ARRAY_SIZE(as3722_devs), NULL, 0,
 				   regmap_irq_get_domain(as3722->irq_data));
diff --git a/include/linux/mfd/as3722.h b/include/linux/mfd/as3722.h
index 51e6f9414575..b404a5af9bba 100644
--- a/include/linux/mfd/as3722.h
+++ b/include/linux/mfd/as3722.h
@@ -296,6 +296,8 @@
 #define AS3722_ADC1_CONV_NOTREADY			BIT(7)
 #define AS3722_ADC1_SOURCE_SELECT_MASK			0x1F
 
+#define AS3722_CTRL_SEQU1_AC_OK_PWR_ON			BIT(0)
+
 /* GPIO modes */
 #define AS3722_GPIO_MODE_MASK				0x07
 #define AS3722_GPIO_MODE_INPUT				0x00
@@ -391,6 +393,7 @@ struct as3722 {
 	unsigned long irq_flags;
 	bool en_intern_int_pullup;
 	bool en_intern_i2c_pullup;
+	bool en_ac_ok_pwr_on;
 	struct regmap_irq_chip_data *irq_data;
 };
 
-- 
cgit v1.2.3


From 3570a00841fb8a5d2f56ac7c59ccc6c91ea35944 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Tue, 29 May 2018 15:26:12 -0500
Subject: can: uapi: can.h: Fix can error class mask dir path

The CAN error masks header file is in the
include/uapi directory.

Fix the path in the header to the correct location.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index d7f97ac197a9..0afb7d8e867f 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -77,7 +77,7 @@ typedef __u32 canid_t;
 /*
  * Controller Area Network Error Message Frame Mask structure
  *
- * bit 0-28	: error class mask (see include/linux/can/error.h)
+ * bit 0-28	: error class mask (see include/uapi/linux/can/error.h)
  * bit 29-31	: set to zero
  */
 typedef __u32 can_err_mask_t;
-- 
cgit v1.2.3


From 038709071328ff68a936c6b8c33a24a805eea3c5 Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu5@cn.bosch.com>
Date: Wed, 13 Jun 2018 16:37:17 +0200
Subject: can: dev: enable multi-queue for SocketCAN devices

The existing SocketCAN implementation provides alloc_candev() to
allocate a CAN device using a single Tx and Rx queue. This can lead to
priority inversion in case the single Tx queue is already full with low
priority messages and a high priority message needs to be sent while the
bus is fully loaded with medium priority messages.

This problem can be solved by using the existing multi-queue support of
the network subsytem. The commit makes it possible to use multi-queue in
the CAN subsystem in the same way it is used in the Ethernet subsystem
by adding an alloc_candev_mqs() call and accompanying macros. With this
support a CAN device can use multi-queue qdisc (e.g. mqprio) to avoid
the aforementioned priority inversion.

The exisiting functionality of alloc_candev() is the same as before.

CAN devices need to have prioritized multiple hardware queues or are
able to abort waiting for arbitration to make sensible use of
multi-queues.

Signed-off-by: Zhu Yi <yi.zhu5@cn.bosch.com>
Signed-off-by: Mark Jonas <mark.jonas@de.bosch.com>
Reviewed-by: Heiko Schocher <hs@denx.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   | 8 +++++---
 include/linux/can/dev.h | 7 ++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 5bb6e8896601..49163570a63a 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -701,7 +701,8 @@ EXPORT_SYMBOL_GPL(alloc_can_err_skb);
 /*
  * Allocate and setup space for the CAN network device
  */
-struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
+struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
+				    unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
 	struct can_priv *priv;
@@ -713,7 +714,8 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
 	else
 		size = sizeof_priv;
 
-	dev = alloc_netdev(size, "can%d", NET_NAME_UNKNOWN, can_setup);
+	dev = alloc_netdev_mqs(size, "can%d", NET_NAME_UNKNOWN, can_setup,
+			       txqs, rxqs);
 	if (!dev)
 		return NULL;
 
@@ -732,7 +734,7 @@ struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max)
 
 	return dev;
 }
-EXPORT_SYMBOL_GPL(alloc_candev);
+EXPORT_SYMBOL_GPL(alloc_candev_mqs);
 
 /*
  * Free space of the CAN network device
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 055aaf5ed9af..a83e1f632eb7 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -143,7 +143,12 @@ u8 can_dlc2len(u8 can_dlc);
 /* map the sanitized data length to an appropriate data length code */
 u8 can_len2dlc(u8 len);
 
-struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
+struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
+				    unsigned int txqs, unsigned int rxqs);
+#define alloc_candev(sizeof_priv, echo_skb_max) \
+	alloc_candev_mqs(sizeof_priv, echo_skb_max, 1, 1)
+#define alloc_candev_mq(sizeof_priv, echo_skb_max, count) \
+	alloc_candev_mqs(sizeof_priv, echo_skb_max, count, count)
 void free_candev(struct net_device *dev);
 
 /* a candev safe wrapper around netdev_priv */
-- 
cgit v1.2.3


From e5225c820c057537dc780244760e2e24c7d27366 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <guennadi.liakhovetski@intel.com>
Date: Thu, 26 Jul 2018 04:17:53 -0400
Subject: media: uvcvideo: Send a control event when a Control Change interrupt
 arrives

UVC defines a method of handling asynchronous controls, which sends a
USB packet over the interrupt pipe. This patch implements support for
such packets by sending a control event to the user. Since this can
involve USB traffic and, therefore, scheduling, this has to be done
in a work queue.

Signed-off-by: Guennadi Liakhovetski <guennadi.liakhovetski@intel.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/usb/uvc/uvc_ctrl.c   | 211 ++++++++++++++++++++++++++++---------
 drivers/media/usb/uvc/uvc_status.c | 121 ++++++++++++++++++---
 drivers/media/usb/uvc/uvc_v4l2.c   |   4 +-
 drivers/media/usb/uvc/uvcvideo.h   |  15 ++-
 include/uapi/linux/uvcvideo.h      |   2 +
 5 files changed, 286 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c
index 12b5be66fd2f..c2ad102bd693 100644
--- a/drivers/media/usb/uvc/uvc_ctrl.c
+++ b/drivers/media/usb/uvc/uvc_ctrl.c
@@ -20,6 +20,7 @@
 #include <linux/videodev2.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
+#include <linux/workqueue.h>
 #include <linux/atomic.h>
 #include <media/v4l2-ctrls.h>
 
@@ -971,12 +972,30 @@ static int uvc_ctrl_populate_cache(struct uvc_video_chain *chain,
 	return 0;
 }
 
+static s32 __uvc_ctrl_get_value(struct uvc_control_mapping *mapping,
+				const u8 *data)
+{
+	s32 value = mapping->get(mapping, UVC_GET_CUR, data);
+
+	if (mapping->v4l2_type == V4L2_CTRL_TYPE_MENU) {
+		struct uvc_menu_info *menu = mapping->menu_info;
+		unsigned int i;
+
+		for (i = 0; i < mapping->menu_count; ++i, ++menu) {
+			if (menu->value == value) {
+				value = i;
+				break;
+			}
+		}
+	}
+
+	return value;
+}
+
 static int __uvc_ctrl_get(struct uvc_video_chain *chain,
 	struct uvc_control *ctrl, struct uvc_control_mapping *mapping,
 	s32 *value)
 {
-	struct uvc_menu_info *menu;
-	unsigned int i;
 	int ret;
 
 	if ((ctrl->info.flags & UVC_CTRL_FLAG_GET_CUR) == 0)
@@ -993,18 +1012,8 @@ static int __uvc_ctrl_get(struct uvc_video_chain *chain,
 		ctrl->loaded = 1;
 	}
 
-	*value = mapping->get(mapping, UVC_GET_CUR,
-		uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT));
-
-	if (mapping->v4l2_type == V4L2_CTRL_TYPE_MENU) {
-		menu = mapping->menu_info;
-		for (i = 0; i < mapping->menu_count; ++i, ++menu) {
-			if (menu->value == *value) {
-				*value = i;
-				break;
-			}
-		}
-	}
+	*value = __uvc_ctrl_get_value(mapping,
+				uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT));
 
 	return 0;
 }
@@ -1216,53 +1225,135 @@ static void uvc_ctrl_fill_event(struct uvc_video_chain *chain,
 	ev->u.ctrl.default_value = v4l2_ctrl.default_value;
 }
 
-static void uvc_ctrl_send_event(struct uvc_fh *handle,
-	struct uvc_control *ctrl, struct uvc_control_mapping *mapping,
-	s32 value, u32 changes)
+/*
+ * Send control change events to all subscribers for the @ctrl control. By
+ * default the subscriber that generated the event, as identified by @handle,
+ * is not notified unless it has set the V4L2_EVENT_SUB_FL_ALLOW_FEEDBACK flag.
+ * @handle can be NULL for asynchronous events related to auto-update controls,
+ * in which case all subscribers are notified.
+ */
+static void uvc_ctrl_send_event(struct uvc_video_chain *chain,
+	struct uvc_fh *handle, struct uvc_control *ctrl,
+	struct uvc_control_mapping *mapping, s32 value, u32 changes)
 {
+	struct v4l2_fh *originator = handle ? &handle->vfh : NULL;
 	struct v4l2_subscribed_event *sev;
 	struct v4l2_event ev;
 
 	if (list_empty(&mapping->ev_subs))
 		return;
 
-	uvc_ctrl_fill_event(handle->chain, &ev, ctrl, mapping, value, changes);
+	uvc_ctrl_fill_event(chain, &ev, ctrl, mapping, value, changes);
 
 	list_for_each_entry(sev, &mapping->ev_subs, node) {
-		if (sev->fh != &handle->vfh ||
+		if (sev->fh != originator ||
 		    (sev->flags & V4L2_EVENT_SUB_FL_ALLOW_FEEDBACK) ||
 		    (changes & V4L2_EVENT_CTRL_CH_FLAGS))
 			v4l2_event_queue_fh(sev->fh, &ev);
 	}
 }
 
-static void uvc_ctrl_send_slave_event(struct uvc_fh *handle,
-	struct uvc_control *master, u32 slave_id,
-	const struct v4l2_ext_control *xctrls, unsigned int xctrls_count)
+/*
+ * Send control change events for the slave of the @master control identified
+ * by the V4L2 ID @slave_id. The @handle identifies the event subscriber that
+ * generated the event and may be NULL for auto-update events.
+ */
+static void uvc_ctrl_send_slave_event(struct uvc_video_chain *chain,
+	struct uvc_fh *handle, struct uvc_control *master, u32 slave_id)
 {
 	struct uvc_control_mapping *mapping = NULL;
 	struct uvc_control *ctrl = NULL;
 	u32 changes = V4L2_EVENT_CTRL_CH_FLAGS;
-	unsigned int i;
 	s32 val = 0;
 
-	/*
-	 * We can skip sending an event for the slave if the slave
-	 * is being modified in the same transaction.
-	 */
-	for (i = 0; i < xctrls_count; i++) {
-		if (xctrls[i].id == slave_id)
-			return;
-	}
-
 	__uvc_find_control(master->entity, slave_id, &mapping, &ctrl, 0);
 	if (ctrl == NULL)
 		return;
 
-	if (__uvc_ctrl_get(handle->chain, ctrl, mapping, &val) == 0)
+	if (__uvc_ctrl_get(chain, ctrl, mapping, &val) == 0)
 		changes |= V4L2_EVENT_CTRL_CH_VALUE;
 
-	uvc_ctrl_send_event(handle, ctrl, mapping, val, changes);
+	uvc_ctrl_send_event(chain, handle, ctrl, mapping, val, changes);
+}
+
+static void uvc_ctrl_status_event_work(struct work_struct *work)
+{
+	struct uvc_device *dev = container_of(work, struct uvc_device,
+					      async_ctrl.work);
+	struct uvc_ctrl_work *w = &dev->async_ctrl;
+	struct uvc_video_chain *chain = w->chain;
+	struct uvc_control_mapping *mapping;
+	struct uvc_control *ctrl = w->ctrl;
+	struct uvc_fh *handle;
+	unsigned int i;
+	int ret;
+
+	mutex_lock(&chain->ctrl_mutex);
+
+	handle = ctrl->handle;
+	ctrl->handle = NULL;
+
+	list_for_each_entry(mapping, &ctrl->info.mappings, list) {
+		s32 value = __uvc_ctrl_get_value(mapping, w->data);
+
+		/*
+		 * handle may be NULL here if the device sends auto-update
+		 * events without a prior related control set from userspace.
+		 */
+		for (i = 0; i < ARRAY_SIZE(mapping->slave_ids); ++i) {
+			if (!mapping->slave_ids[i])
+				break;
+
+			uvc_ctrl_send_slave_event(chain, handle, ctrl,
+						  mapping->slave_ids[i]);
+		}
+
+		uvc_ctrl_send_event(chain, handle, ctrl, mapping, value,
+				    V4L2_EVENT_CTRL_CH_VALUE);
+	}
+
+	mutex_unlock(&chain->ctrl_mutex);
+
+	/* Resubmit the URB. */
+	w->urb->interval = dev->int_ep->desc.bInterval;
+	ret = usb_submit_urb(w->urb, GFP_KERNEL);
+	if (ret < 0)
+		uvc_printk(KERN_ERR, "Failed to resubmit status URB (%d).\n",
+			   ret);
+}
+
+bool uvc_ctrl_status_event(struct urb *urb, struct uvc_video_chain *chain,
+			   struct uvc_control *ctrl, const u8 *data)
+{
+	struct uvc_device *dev = chain->dev;
+	struct uvc_ctrl_work *w = &dev->async_ctrl;
+
+	if (list_empty(&ctrl->info.mappings)) {
+		ctrl->handle = NULL;
+		return false;
+	}
+
+	w->data = data;
+	w->urb = urb;
+	w->chain = chain;
+	w->ctrl = ctrl;
+
+	schedule_work(&w->work);
+
+	return true;
+}
+
+static bool uvc_ctrl_xctrls_has_control(const struct v4l2_ext_control *xctrls,
+					unsigned int xctrls_count, u32 id)
+{
+	unsigned int i;
+
+	for (i = 0; i < xctrls_count; ++i) {
+		if (xctrls[i].id == id)
+			return true;
+	}
+
+	return false;
 }
 
 static void uvc_ctrl_send_events(struct uvc_fh *handle,
@@ -1277,29 +1368,39 @@ static void uvc_ctrl_send_events(struct uvc_fh *handle,
 	for (i = 0; i < xctrls_count; ++i) {
 		ctrl = uvc_find_control(handle->chain, xctrls[i].id, &mapping);
 
+		if (ctrl->info.flags & UVC_CTRL_FLAG_ASYNCHRONOUS)
+			/* Notification will be sent from an Interrupt event. */
+			continue;
+
 		for (j = 0; j < ARRAY_SIZE(mapping->slave_ids); ++j) {
-			if (!mapping->slave_ids[j])
+			u32 slave_id = mapping->slave_ids[j];
+
+			if (!slave_id)
 				break;
-			uvc_ctrl_send_slave_event(handle, ctrl,
-						  mapping->slave_ids[j],
-						  xctrls, xctrls_count);
+
+			/*
+			 * We can skip sending an event for the slave if the
+			 * slave is being modified in the same transaction.
+			 */
+			if (uvc_ctrl_xctrls_has_control(xctrls, xctrls_count,
+							slave_id))
+				continue;
+
+			uvc_ctrl_send_slave_event(handle->chain, handle, ctrl,
+						  slave_id);
 		}
 
 		/*
 		 * If the master is being modified in the same transaction
 		 * flags may change too.
 		 */
-		if (mapping->master_id) {
-			for (j = 0; j < xctrls_count; j++) {
-				if (xctrls[j].id == mapping->master_id) {
-					changes |= V4L2_EVENT_CTRL_CH_FLAGS;
-					break;
-				}
-			}
-		}
+		if (mapping->master_id &&
+		    uvc_ctrl_xctrls_has_control(xctrls, xctrls_count,
+						mapping->master_id))
+			changes |= V4L2_EVENT_CTRL_CH_FLAGS;
 
-		uvc_ctrl_send_event(handle, ctrl, mapping, xctrls[i].value,
-				    changes);
+		uvc_ctrl_send_event(handle->chain, handle, ctrl, mapping,
+				    xctrls[i].value, changes);
 	}
 }
 
@@ -1472,9 +1573,10 @@ int uvc_ctrl_get(struct uvc_video_chain *chain,
 	return __uvc_ctrl_get(chain, ctrl, mapping, &xctrl->value);
 }
 
-int uvc_ctrl_set(struct uvc_video_chain *chain,
+int uvc_ctrl_set(struct uvc_fh *handle,
 	struct v4l2_ext_control *xctrl)
 {
+	struct uvc_video_chain *chain = handle->chain;
 	struct uvc_control *ctrl;
 	struct uvc_control_mapping *mapping;
 	s32 value;
@@ -1581,6 +1683,9 @@ int uvc_ctrl_set(struct uvc_video_chain *chain,
 	mapping->set(mapping, value,
 		uvc_ctrl_data(ctrl, UVC_CTRL_DATA_CURRENT));
 
+	if (ctrl->info.flags & UVC_CTRL_FLAG_ASYNCHRONOUS)
+		ctrl->handle = handle;
+
 	ctrl->dirty = 1;
 	ctrl->modified = 1;
 	return 0;
@@ -1612,7 +1717,9 @@ static int uvc_ctrl_get_flags(struct uvc_device *dev,
 			    |  (data[0] & UVC_CONTROL_CAP_SET ?
 				UVC_CTRL_FLAG_SET_CUR : 0)
 			    |  (data[0] & UVC_CONTROL_CAP_AUTOUPDATE ?
-				UVC_CTRL_FLAG_AUTO_UPDATE : 0);
+				UVC_CTRL_FLAG_AUTO_UPDATE : 0)
+			    |  (data[0] & UVC_CONTROL_CAP_ASYNCHRONOUS ?
+				UVC_CTRL_FLAG_ASYNCHRONOUS : 0);
 
 	kfree(data);
 	return ret;
@@ -2173,6 +2280,8 @@ int uvc_ctrl_init_device(struct uvc_device *dev)
 	struct uvc_entity *entity;
 	unsigned int i;
 
+	INIT_WORK(&dev->async_ctrl.work, uvc_ctrl_status_event_work);
+
 	/* Walk the entities list and instantiate controls */
 	list_for_each_entry(entity, &dev->entities, list) {
 		struct uvc_control *ctrl;
@@ -2241,6 +2350,8 @@ void uvc_ctrl_cleanup_device(struct uvc_device *dev)
 	struct uvc_entity *entity;
 	unsigned int i;
 
+	cancel_work_sync(&dev->async_ctrl.work);
+
 	/* Free controls and control mappings for all entities. */
 	list_for_each_entry(entity, &dev->entities, list) {
 		for (i = 0; i < entity->ncontrols; ++i) {
diff --git a/drivers/media/usb/uvc/uvc_status.c b/drivers/media/usb/uvc/uvc_status.c
index 7b710410584a..0722dc684378 100644
--- a/drivers/media/usb/uvc/uvc_status.c
+++ b/drivers/media/usb/uvc/uvc_status.c
@@ -78,7 +78,24 @@ static void uvc_input_report_key(struct uvc_device *dev, unsigned int code,
 /* --------------------------------------------------------------------------
  * Status interrupt endpoint
  */
-static void uvc_event_streaming(struct uvc_device *dev, u8 *data, int len)
+struct uvc_streaming_status {
+	u8	bStatusType;
+	u8	bOriginator;
+	u8	bEvent;
+	u8	bValue[];
+} __packed;
+
+struct uvc_control_status {
+	u8	bStatusType;
+	u8	bOriginator;
+	u8	bEvent;
+	u8	bSelector;
+	u8	bAttribute;
+	u8	bValue[];
+} __packed;
+
+static void uvc_event_streaming(struct uvc_device *dev,
+				struct uvc_streaming_status *status, int len)
 {
 	if (len < 3) {
 		uvc_trace(UVC_TRACE_STATUS, "Invalid streaming status event "
@@ -86,31 +103,97 @@ static void uvc_event_streaming(struct uvc_device *dev, u8 *data, int len)
 		return;
 	}
 
-	if (data[2] == 0) {
+	if (status->bEvent == 0) {
 		if (len < 4)
 			return;
 		uvc_trace(UVC_TRACE_STATUS, "Button (intf %u) %s len %d\n",
-			data[1], data[3] ? "pressed" : "released", len);
-		uvc_input_report_key(dev, KEY_CAMERA, data[3]);
+			  status->bOriginator,
+			  status->bValue[0] ? "pressed" : "released", len);
+		uvc_input_report_key(dev, KEY_CAMERA, status->bValue[0]);
 	} else {
 		uvc_trace(UVC_TRACE_STATUS,
 			  "Stream %u error event %02x len %d.\n",
-			  data[1], data[2], len);
+			  status->bOriginator, status->bEvent, len);
 	}
 }
 
-static void uvc_event_control(struct uvc_device *dev, u8 *data, int len)
+#define UVC_CTRL_VALUE_CHANGE	0
+#define UVC_CTRL_INFO_CHANGE	1
+#define UVC_CTRL_FAILURE_CHANGE	2
+#define UVC_CTRL_MIN_CHANGE	3
+#define UVC_CTRL_MAX_CHANGE	4
+
+static struct uvc_control *uvc_event_entity_find_ctrl(struct uvc_entity *entity,
+						      u8 selector)
 {
-	char *attrs[3] = { "value", "info", "failure" };
+	struct uvc_control *ctrl;
+	unsigned int i;
+
+	for (i = 0, ctrl = entity->controls; i < entity->ncontrols; i++, ctrl++)
+		if (ctrl->info.selector == selector)
+			return ctrl;
+
+	return NULL;
+}
 
-	if (len < 6 || data[2] != 0 || data[4] > 2) {
+static struct uvc_control *uvc_event_find_ctrl(struct uvc_device *dev,
+					const struct uvc_control_status *status,
+					struct uvc_video_chain **chain)
+{
+	list_for_each_entry((*chain), &dev->chains, list) {
+		struct uvc_entity *entity;
+		struct uvc_control *ctrl;
+
+		list_for_each_entry(entity, &(*chain)->entities, chain) {
+			if (entity->id != status->bOriginator)
+				continue;
+
+			ctrl = uvc_event_entity_find_ctrl(entity,
+							  status->bSelector);
+			if (ctrl)
+				return ctrl;
+		}
+	}
+
+	return NULL;
+}
+
+static bool uvc_event_control(struct urb *urb,
+			      const struct uvc_control_status *status, int len)
+{
+	static const char *attrs[] = { "value", "info", "failure", "min", "max" };
+	struct uvc_device *dev = urb->context;
+	struct uvc_video_chain *chain;
+	struct uvc_control *ctrl;
+
+	if (len < 6 || status->bEvent != 0 ||
+	    status->bAttribute >= ARRAY_SIZE(attrs)) {
 		uvc_trace(UVC_TRACE_STATUS, "Invalid control status event "
 				"received.\n");
-		return;
+		return false;
 	}
 
 	uvc_trace(UVC_TRACE_STATUS, "Control %u/%u %s change len %d.\n",
-		data[1], data[3], attrs[data[4]], len);
+		  status->bOriginator, status->bSelector,
+		  attrs[status->bAttribute], len);
+
+	/* Find the control. */
+	ctrl = uvc_event_find_ctrl(dev, status, &chain);
+	if (!ctrl)
+		return false;
+
+	switch (status->bAttribute) {
+	case UVC_CTRL_VALUE_CHANGE:
+		return uvc_ctrl_status_event(urb, chain, ctrl, status->bValue);
+
+	case UVC_CTRL_INFO_CHANGE:
+	case UVC_CTRL_FAILURE_CHANGE:
+	case UVC_CTRL_MIN_CHANGE:
+	case UVC_CTRL_MAX_CHANGE:
+		break;
+	}
+
+	return false;
 }
 
 static void uvc_status_complete(struct urb *urb)
@@ -138,13 +221,23 @@ static void uvc_status_complete(struct urb *urb)
 	len = urb->actual_length;
 	if (len > 0) {
 		switch (dev->status[0] & 0x0f) {
-		case UVC_STATUS_TYPE_CONTROL:
-			uvc_event_control(dev, dev->status, len);
+		case UVC_STATUS_TYPE_CONTROL: {
+			struct uvc_control_status *status =
+				(struct uvc_control_status *)dev->status;
+
+			if (uvc_event_control(urb, status, len))
+				/* The URB will be resubmitted in work context. */
+				return;
 			break;
+		}
 
-		case UVC_STATUS_TYPE_STREAMING:
-			uvc_event_streaming(dev, dev->status, len);
+		case UVC_STATUS_TYPE_STREAMING: {
+			struct uvc_streaming_status *status =
+				(struct uvc_streaming_status *)dev->status;
+
+			uvc_event_streaming(dev, status, len);
 			break;
+		}
 
 		default:
 			uvc_trace(UVC_TRACE_STATUS, "Unknown status event "
diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
index bd32914259ae..18a7384b50ee 100644
--- a/drivers/media/usb/uvc/uvc_v4l2.c
+++ b/drivers/media/usb/uvc/uvc_v4l2.c
@@ -994,7 +994,7 @@ static int uvc_ioctl_s_ctrl(struct file *file, void *fh,
 	if (ret < 0)
 		return ret;
 
-	ret = uvc_ctrl_set(chain, &xctrl);
+	ret = uvc_ctrl_set(handle, &xctrl);
 	if (ret < 0) {
 		uvc_ctrl_rollback(handle);
 		return ret;
@@ -1069,7 +1069,7 @@ static int uvc_ioctl_s_try_ext_ctrls(struct uvc_fh *handle,
 		return ret;
 
 	for (i = 0; i < ctrls->count; ++ctrl, ++i) {
-		ret = uvc_ctrl_set(chain, ctrl);
+		ret = uvc_ctrl_set(handle, ctrl);
 		if (ret < 0) {
 			uvc_ctrl_rollback(handle);
 			ctrls->error_idx = commit ? ctrls->count : i;
diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h
index 6b955e0dd956..e5f5d84f1d1d 100644
--- a/drivers/media/usb/uvc/uvcvideo.h
+++ b/drivers/media/usb/uvc/uvcvideo.h
@@ -12,6 +12,7 @@
 #include <linux/usb/video.h>
 #include <linux/uvcvideo.h>
 #include <linux/videodev2.h>
+#include <linux/workqueue.h>
 #include <media/media-device.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-event.h>
@@ -259,6 +260,8 @@ struct uvc_control {
 	   initialized:1;
 
 	u8 *uvc_data;
+
+	struct uvc_fh *handle;	/* File handle that last changed the control. */
 };
 
 struct uvc_format_desc {
@@ -603,6 +606,14 @@ struct uvc_device {
 	u8 *status;
 	struct input_dev *input;
 	char input_phys[64];
+
+	struct uvc_ctrl_work {
+		struct work_struct work;
+		struct urb *urb;
+		struct uvc_video_chain *chain;
+		struct uvc_control *ctrl;
+		const void *data;
+	} async_ctrl;
 };
 
 enum uvc_handle_state {
@@ -756,6 +767,8 @@ int uvc_ctrl_add_mapping(struct uvc_video_chain *chain,
 int uvc_ctrl_init_device(struct uvc_device *dev);
 void uvc_ctrl_cleanup_device(struct uvc_device *dev);
 int uvc_ctrl_restore_values(struct uvc_device *dev);
+bool uvc_ctrl_status_event(struct urb *urb, struct uvc_video_chain *chain,
+			   struct uvc_control *ctrl, const u8 *data);
 
 int uvc_ctrl_begin(struct uvc_video_chain *chain);
 int __uvc_ctrl_commit(struct uvc_fh *handle, int rollback,
@@ -773,7 +786,7 @@ static inline int uvc_ctrl_rollback(struct uvc_fh *handle)
 }
 
 int uvc_ctrl_get(struct uvc_video_chain *chain, struct v4l2_ext_control *xctrl);
-int uvc_ctrl_set(struct uvc_video_chain *chain, struct v4l2_ext_control *xctrl);
+int uvc_ctrl_set(struct uvc_fh *handle, struct v4l2_ext_control *xctrl);
 
 int uvc_xu_ctrl_query(struct uvc_video_chain *chain,
 		      struct uvc_xu_control_query *xqry);
diff --git a/include/uapi/linux/uvcvideo.h b/include/uapi/linux/uvcvideo.h
index 020714d2c5bd..f80f05b3c423 100644
--- a/include/uapi/linux/uvcvideo.h
+++ b/include/uapi/linux/uvcvideo.h
@@ -28,6 +28,8 @@
 #define UVC_CTRL_FLAG_RESTORE		(1 << 6)
 /* Control can be updated by the camera. */
 #define UVC_CTRL_FLAG_AUTO_UPDATE	(1 << 7)
+/* Control supports asynchronous reporting */
+#define UVC_CTRL_FLAG_ASYNCHRONOUS	(1 << 8)
 
 #define UVC_CTRL_FLAG_GET_RANGE \
 	(UVC_CTRL_FLAG_GET_CUR | UVC_CTRL_FLAG_GET_MIN | \
-- 
cgit v1.2.3


From d0dd962d8a4ef4df9b710c4e3a975e6bfd9f0225 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 28 May 2018 10:32:41 -0400
Subject: media: dvb: get rid of VIDEO_SET_SPU_PALETTE

No upstream drivers use it. It doesn't make any sense to have
a compat32 code for something that nobody uses upstream.

Reported-by: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../media/uapi/dvb/video-set-spu-palette.rst       | 82 ----------------------
 .../media/uapi/dvb/video_function_calls.rst        |  1 -
 Documentation/media/uapi/dvb/video_types.rst       | 18 -----
 Documentation/media/video.h.rst.exceptions         |  1 -
 fs/compat_ioctl.c                                  | 30 --------
 include/uapi/linux/dvb/video.h                     |  7 --
 6 files changed, 139 deletions(-)
 delete mode 100644 Documentation/media/uapi/dvb/video-set-spu-palette.rst

(limited to 'include')

diff --git a/Documentation/media/uapi/dvb/video-set-spu-palette.rst b/Documentation/media/uapi/dvb/video-set-spu-palette.rst
deleted file mode 100644
index 51a1913d21d2..000000000000
--- a/Documentation/media/uapi/dvb/video-set-spu-palette.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_SPU_PALETTE:
-
-=====================
-VIDEO_SET_SPU_PALETTE
-=====================
-
-Name
-----
-
-VIDEO_SET_SPU_PALETTE
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_SPU_PALETTE, struct video_spu_palette *palette )
-    :name: VIDEO_SET_SPU_PALETTE
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_SPU_PALETTE for this command.
-
-    -  .. row 3
-
-       -  video_spu_palette_t \*palette
-
-       -  SPU palette according to section ??.
-
-
-Description
------------
-
-This ioctl sets the SPU color palette.
-
-.. c:type:: video_spu_palette
-
-.. code-block::c
-
-	typedef struct video_spu_palette {      /* SPU Palette information */
-		int length;
-		__u8 __user *palette;
-	} video_spu_palette_t;
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  input is not a valid palette or driver doesn’t handle SPU.
diff --git a/Documentation/media/uapi/dvb/video_function_calls.rst b/Documentation/media/uapi/dvb/video_function_calls.rst
index 68588ac7fecb..8d8383ffaeba 100644
--- a/Documentation/media/uapi/dvb/video_function_calls.rst
+++ b/Documentation/media/uapi/dvb/video_function_calls.rst
@@ -38,6 +38,5 @@ Video Function Calls
     video-set-system
     video-set-highlight
     video-set-spu
-    video-set-spu-palette
     video-get-navi
     video-set-attributes
diff --git a/Documentation/media/uapi/dvb/video_types.rst b/Documentation/media/uapi/dvb/video_types.rst
index 640a21de6b8a..4cfa00e5c934 100644
--- a/Documentation/media/uapi/dvb/video_types.rst
+++ b/Documentation/media/uapi/dvb/video_types.rst
@@ -320,24 +320,6 @@ to the following format:
      } video_spu_t;
 
 
-.. c:type:: video_spu_palette
-
-struct video_spu_palette
-========================
-
-The following structure is used to set the SPU palette by calling
-VIDEO_SPU_PALETTE:
-
-
-.. code-block:: c
-
-     typedef
-     struct video_spu_palette {
-	 int length;
-	 uint8_t *palette;
-     } video_spu_palette_t;
-
-
 .. c:type:: video_navi_pack
 
 struct video_navi_pack
diff --git a/Documentation/media/video.h.rst.exceptions b/Documentation/media/video.h.rst.exceptions
index a91aa884ce0e..89d7c3ef2da7 100644
--- a/Documentation/media/video.h.rst.exceptions
+++ b/Documentation/media/video.h.rst.exceptions
@@ -36,5 +36,4 @@ replace typedef video_stream_source_t :c:type:`video_stream_source`
 replace typedef video_play_state_t :c:type:`video_play_state`
 replace typedef video_highlight_t :c:type:`video_highlight`
 replace typedef video_spu_t :c:type:`video_spu`
-replace typedef video_spu_palette_t :c:type:`video_spu_palette`
 replace typedef video_navi_pack_t :c:type:`video_navi_pack`
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 9907475b4226..fdb5ef9b5d06 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -198,34 +198,6 @@ static int do_video_stillpicture(struct file *file,
 	return err;
 }
 
-struct compat_video_spu_palette {
-	int length;
-	compat_uptr_t palette;
-};
-
-static int do_video_set_spu_palette(struct file *file,
-		unsigned int cmd, struct compat_video_spu_palette __user *up)
-{
-	struct video_spu_palette __user *up_native;
-	compat_uptr_t palp;
-	int length, err;
-
-	err  = get_user(palp, &up->palette);
-	err |= get_user(length, &up->length);
-	if (err)
-		return -EFAULT;
-
-	up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
-	err  = put_user(compat_ptr(palp), &up_native->palette);
-	err |= put_user(length, &up_native->length);
-	if (err)
-		return -EFAULT;
-
-	err = do_ioctl(file, cmd, (unsigned long) up_native);
-
-	return err;
-}
-
 #ifdef CONFIG_BLOCK
 typedef struct sg_io_hdr32 {
 	compat_int_t interface_id;	/* [i] 'S' for SCSI generic (required) */
@@ -1347,8 +1319,6 @@ static long do_ioctl_trans(unsigned int cmd,
 		return do_video_get_event(file, cmd, argp);
 	case VIDEO_STILLPICTURE:
 		return do_video_stillpicture(file, cmd, argp);
-	case VIDEO_SET_SPU_PALETTE:
-		return do_video_set_spu_palette(file, cmd, argp);
 	}
 
 	/*
diff --git a/include/uapi/linux/dvb/video.h b/include/uapi/linux/dvb/video.h
index df3d7028c807..6a0c9757b7ba 100644
--- a/include/uapi/linux/dvb/video.h
+++ b/include/uapi/linux/dvb/video.h
@@ -186,12 +186,6 @@ typedef struct video_spu {
 } video_spu_t;
 
 
-typedef struct video_spu_palette {      /* SPU Palette information */
-	int length;
-	__u8 __user *palette;
-} video_spu_palette_t;
-
-
 typedef struct video_navi_pack {
 	int length;          /* 0 ... 1024 */
 	__u8 data[1024];
@@ -248,7 +242,6 @@ typedef __u16 video_attributes_t;
 #define VIDEO_SET_SYSTEM           _IO('o', 38)
 #define VIDEO_SET_HIGHLIGHT        _IOW('o', 39, video_highlight_t)
 #define VIDEO_SET_SPU              _IOW('o', 50, video_spu_t)
-#define VIDEO_SET_SPU_PALETTE      _IOW('o', 51, video_spu_palette_t)
 #define VIDEO_GET_NAVI             _IOR('o', 52, video_navi_pack_t)
 #define VIDEO_SET_ATTRIBUTES       _IO('o', 53)
 #define VIDEO_GET_SIZE             _IOR('o', 55, video_size_t)
-- 
cgit v1.2.3


From 1fb2e3f276ddafee81073d884f599cd2574c31e2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 17 Jul 2018 18:05:36 +0200
Subject: lib/crc: Move polynomial definition to separate header

Allow other drivers and parts of kernel to use the same define for
CRC32 polynomial, instead of duplicating it in many places.  This code
does not bring any functional changes, except moving existing code.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crc32poly.h | 20 ++++++++++++++++++++
 lib/crc32.c               |  1 +
 lib/crc32defs.h           | 14 --------------
 lib/gen_crc32table.c      |  1 +
 4 files changed, 22 insertions(+), 14 deletions(-)
 create mode 100644 include/linux/crc32poly.h

(limited to 'include')

diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h
new file mode 100644
index 000000000000..7ad5aa92d3c7
--- /dev/null
+++ b/include/linux/crc32poly.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CRC32_POLY_H
+#define _LINUX_CRC32_POLY_H
+
+/*
+ * There are multiple 16-bit CRC polynomials in common use, but this is
+ * *the* standard CRC-32 polynomial, first popularized by Ethernet.
+ * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
+ */
+#define CRCPOLY_LE 0xedb88320
+#define CRCPOLY_BE 0x04c11db7
+
+/*
+ * This is the CRC32c polynomial, as outlined by Castagnoli.
+ * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+
+ * x^8+x^6+x^0
+ */
+#define CRC32C_POLY_LE 0x82F63B78
+
+#endif /* _LINUX_CRC32_POLY_H */
diff --git a/lib/crc32.c b/lib/crc32.c
index 2ef20fe84b69..341c54cb4edf 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -27,6 +27,7 @@
 /* see: Documentation/crc32.txt for a description of algorithms */
 
 #include <linux/crc32.h>
+#include <linux/crc32poly.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/sched.h>
diff --git a/lib/crc32defs.h b/lib/crc32defs.h
index cb275a28a750..0c8fb5923e7e 100644
--- a/lib/crc32defs.h
+++ b/lib/crc32defs.h
@@ -1,18 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- * There are multiple 16-bit CRC polynomials in common use, but this is
- * *the* standard CRC-32 polynomial, first popularized by Ethernet.
- * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
- */
-#define CRCPOLY_LE 0xedb88320
-#define CRCPOLY_BE 0x04c11db7
-
-/*
- * This is the CRC32c polynomial, as outlined by Castagnoli.
- * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+
- * x^8+x^6+x^0
- */
-#define CRC32C_POLY_LE 0x82F63B78
 
 /* Try to choose an implementation variant via Kconfig */
 #ifdef CONFIG_CRC32_SLICEBY8
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index 8f26660ea10a..34c3bc826f45 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
+#include "../include/linux/crc32poly.h"
 #include "../include/generated/autoconf.h"
 #include "crc32defs.h"
 #include <inttypes.h>
-- 
cgit v1.2.3


From e37f2f93afe594682702439ca34eb8130598cdf2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 17 Jul 2018 18:05:37 +0200
Subject: lib/crc: Use consistent naming for CRC-32 polynomials

Header was defining CRCPOLY_LE/BE and CRC32C_POLY_LE but in fact all of
them are CRC-32 polynomials so use consistent naming.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crc32poly.h |  4 ++--
 lib/crc32.c               | 10 +++++-----
 lib/gen_crc32table.c      |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h
index 7ad5aa92d3c7..62c4b7790a28 100644
--- a/include/linux/crc32poly.h
+++ b/include/linux/crc32poly.h
@@ -7,8 +7,8 @@
  * *the* standard CRC-32 polynomial, first popularized by Ethernet.
  * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
  */
-#define CRCPOLY_LE 0xedb88320
-#define CRCPOLY_BE 0x04c11db7
+#define CRC32_POLY_LE 0xedb88320
+#define CRC32_POLY_BE 0x04c11db7
 
 /*
  * This is the CRC32c polynomial, as outlined by Castagnoli.
diff --git a/lib/crc32.c b/lib/crc32.c
index 341c54cb4edf..a6c9afafc8c8 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -185,7 +185,7 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
 #if CRC_LE_BITS == 1
 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE);
+	return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
 }
 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 {
@@ -195,7 +195,7 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_le_generic(crc, p, len,
-			(const u32 (*)[256])crc32table_le, CRCPOLY_LE);
+			(const u32 (*)[256])crc32table_le, CRC32_POLY_LE);
 }
 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 {
@@ -269,7 +269,7 @@ static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
 
 u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
 {
-	return crc32_generic_shift(crc, len, CRCPOLY_LE);
+	return crc32_generic_shift(crc, len, CRC32_POLY_LE);
 }
 
 u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
@@ -331,13 +331,13 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
 #if CRC_LE_BITS == 1
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
-	return crc32_be_generic(crc, p, len, NULL, CRCPOLY_BE);
+	return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
 }
 #else
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_be_generic(crc, p, len,
-			(const u32 (*)[256])crc32table_be, CRCPOLY_BE);
+			(const u32 (*)[256])crc32table_be, CRC32_POLY_BE);
 }
 #endif
 EXPORT_SYMBOL(crc32_be);
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
index 34c3bc826f45..f755b997b967 100644
--- a/lib/gen_crc32table.c
+++ b/lib/gen_crc32table.c
@@ -58,7 +58,7 @@ static void crc32init_le_generic(const uint32_t polynomial,
 
 static void crc32init_le(void)
 {
-	crc32init_le_generic(CRCPOLY_LE, crc32table_le);
+	crc32init_le_generic(CRC32_POLY_LE, crc32table_le);
 }
 
 static void crc32cinit_le(void)
@@ -77,7 +77,7 @@ static void crc32init_be(void)
 	crc32table_be[0][0] = 0;
 
 	for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
-		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
+		crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32_POLY_BE : 0);
 		for (j = 0; j < i; j++)
 			crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
 	}
-- 
cgit v1.2.3


From 87e1a881e1fe3bfd89903c54ae0d6d6adedb76f1 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 25 Jul 2018 08:51:39 -0400
Subject: media: media.h: remove linux/version.h include

The media.h public header is one of only three public headers that include
linux/version.h. Drop it from media.h. It was only used for an obsolete
define.

It has to be added to media-device.c, since that source relied on media.h
to include it.

Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/media-device.c | 1 +
 include/uapi/linux/media.h   | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/media/media-device.c b/drivers/media/media-device.c
index 14959b19a342..fcdf3d5dc4b6 100644
--- a/drivers/media/media-device.c
+++ b/drivers/media/media-device.c
@@ -25,6 +25,7 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/usb.h>
+#include <linux/version.h>
 
 #include <media/media-device.h>
 #include <media/media-devnode.h>
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 82ec9f132a53..36f76e777ef9 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -25,7 +25,6 @@
 #endif
 #include <linux/ioctl.h>
 #include <linux/types.h>
-#include <linux/version.h>
 
 struct media_device_info {
 	char driver[16];
@@ -421,7 +420,7 @@ struct media_v2_topology {
 #define MEDIA_INTF_T_ALSA_TIMER			(MEDIA_INTF_T_ALSA_BASE + 7)
 
 /* Obsolete symbol for media_version, no longer used in the kernel */
-#define MEDIA_API_VERSION			KERNEL_VERSION(0, 1, 0)
+#define MEDIA_API_VERSION			((0 << 16) | (1 << 8) | 0)
 
 #endif
 
-- 
cgit v1.2.3


From 75cbb3f1d840429e6aa67b351332f38b29e6292c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 26 Jul 2018 12:17:41 +0800
Subject: bcache: stop using the deprecated get_seconds()

The get_seconds function is deprecated now since it returns a 32-bit
value that will eventually overflow, and we are replacing it throughout
the kernel with ktime_get_seconds() or ktime_get_real_seconds() that
return a time64_t.

bcache uses get_seconds() to read the current system time and store it in
the superblock as well as in uuid_entry structures that are user visible.

Unfortunately, the two structures in are still limited to 32 bits, so this
won't fix any real problems but will still overflow in year 2106. Let's
at least document that properly, in case we get an updated format in the
future it can be fixed. We still have a long time before the overflow
and checking the tools at https://github.com/koverstreet/bcache-tools
reveals no access to any of them.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/super.c   | 12 ++++++------
 include/uapi/linux/bcache.h |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 40fe26fef00f..e0a92104ca23 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -181,7 +181,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
 		goto err;
 	}
 
-	sb->last_mount = get_seconds();
+	sb->last_mount = (u32)ktime_get_real_seconds();
 	err = NULL;
 
 	get_page(bh->b_page);
@@ -701,7 +701,7 @@ static void bcache_device_detach(struct bcache_device *d)
 
 		SET_UUID_FLASH_ONLY(u, 0);
 		memcpy(u->uuid, invalid_uuid, 16);
-		u->invalidated = cpu_to_le32(get_seconds());
+		u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
 		bch_uuid_write(d->c);
 	}
 
@@ -1033,7 +1033,7 @@ void bch_cached_dev_detach(struct cached_dev *dc)
 int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 			  uint8_t *set_uuid)
 {
-	uint32_t rtime = cpu_to_le32(get_seconds());
+	uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds());
 	struct uuid_entry *u;
 	struct cached_dev *exist_dc, *t;
 
@@ -1076,7 +1076,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 	    (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE ||
 	     BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) {
 		memcpy(u->uuid, invalid_uuid, 16);
-		u->invalidated = cpu_to_le32(get_seconds());
+		u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
 		u = NULL;
 	}
 
@@ -1398,7 +1398,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
 
 	get_random_bytes(u->uuid, 16);
 	memset(u->label, 0, 32);
-	u->first_reg = u->last_reg = cpu_to_le32(get_seconds());
+	u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds());
 
 	SET_UUID_FLASH_ONLY(u, 1);
 	u->sectors = size >> 9;
@@ -1902,7 +1902,7 @@ static void run_cache_set(struct cache_set *c)
 		goto err;
 
 	closure_sync(&cl);
-	c->sb.last_mount = get_seconds();
+	c->sb.last_mount = (u32)ktime_get_real_seconds();
 	bcache_write_super(c);
 
 	list_for_each_entry_safe(dc, t, &uncached_devices, list)
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 821f71a2e48f..8d19e02d752a 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -195,7 +195,7 @@ struct cache_sb {
 	};
 	};
 
-	__u32			last_mount;	/* time_t */
+	__u32			last_mount;	/* time overflow in y2106 */
 
 	__u16			first_bucket;
 	union {
@@ -318,7 +318,7 @@ struct uuid_entry {
 		struct {
 			__u8	uuid[16];
 			__u8	label[32];
-			__u32	first_reg;
+			__u32	first_reg; /* time overflow in y2106 */
 			__u32	last_reg;
 			__u32	invalidated;
 
-- 
cgit v1.2.3


From 7aaa1807e698f73094b78f0ef25b1a37a4409a55 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Fri, 27 Jul 2018 09:48:30 -0600
Subject: IB/cache: Restore compatibility for ib_query_gid

Code changes in smc have become so complicated this cycle that the RDMA
patches to remove ib_query_gid in smc create too complex merge conflicts.
Allow those conflicts to be resolved by using the net/smc hunks by
providing a compatibility wrapper. During the second phase of the merge
window this wrapper will be deleted and smc updated to use the new API.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/ib_cache.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 1108d4220276..a4ce441f36f0 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -132,4 +132,28 @@ const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
 					    u8 port_num, int index);
 void rdma_put_gid_attr(const struct ib_gid_attr *attr);
 void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
+
+/*
+ * This is to be removed. It only exists to make merging rdma and smc simpler.
+ */
+static inline __deprecated int ib_query_gid(struct ib_device *device,
+					    u8 port_num, int index,
+					    union ib_gid *gid,
+					    struct ib_gid_attr *attr_out)
+{
+	const struct ib_gid_attr *attr;
+
+	attr = rdma_get_gid_attr(device, port_num, index);
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	if (attr->ndev)
+		dev_hold(attr->ndev);
+	*attr_out = *attr;
+
+	rdma_put_gid_attr(attr);
+
+	return 0;
+}
+
 #endif /* _IB_CACHE_H */
-- 
cgit v1.2.3


From 1f3ed383fb9a073ae2e408cd7a0717b04c7c3a21 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 27 Jul 2018 09:45:05 +0200
Subject: net: sched: don't dump chains only held by actions

In case a chain is empty and not explicitly created by a user,
such chain should not exist. The only exception is if there is
an action "goto chain" pointing to it. In that case, don't show the
chain in the dump. Track the chain references held by actions and
use them to find out if a chain should or should not be shown
in chain dump.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     |  3 ++
 include/net/sch_generic.h |  1 +
 net/sched/act_api.c       |  4 +--
 net/sched/cls_api.c       | 70 +++++++++++++++++++++++++++++++++++++++--------
 4 files changed, 64 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3101582f642..6d02f31abba8 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -39,7 +39,10 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 #ifdef CONFIG_NET_CLS
 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 				bool create);
+struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
+				       u32 chain_index);
 void tcf_chain_put(struct tcf_chain *chain);
+void tcf_chain_put_by_act(struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 085c509c8674..c5432362dc26 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -314,6 +314,7 @@ struct tcf_chain {
 	struct tcf_block *block;
 	u32 index; /* chain index */
 	unsigned int refcnt;
+	unsigned int action_refcnt;
 	bool explicitly_created;
 	const struct tcf_proto_ops *tmplt_ops;
 	void *tmplt_priv;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 148a89ab789b..b43df1e25c6d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -36,7 +36,7 @@ static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
 
 	if (!tp)
 		return -EINVAL;
-	a->goto_chain = tcf_chain_get(tp->chain->block, chain_index, true);
+	a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
 	if (!a->goto_chain)
 		return -ENOMEM;
 	return 0;
@@ -44,7 +44,7 @@ static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
 
 static void tcf_action_goto_chain_fini(struct tc_action *a)
 {
-	tcf_chain_put(a->goto_chain);
+	tcf_chain_put_by_act(a->goto_chain);
 }
 
 static void tcf_action_goto_chain_exec(const struct tc_action *a,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 75cce2819de9..e20aad1987b8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -262,6 +262,25 @@ static void tcf_chain_hold(struct tcf_chain *chain)
 	++chain->refcnt;
 }
 
+static void tcf_chain_hold_by_act(struct tcf_chain *chain)
+{
+	++chain->action_refcnt;
+}
+
+static void tcf_chain_release_by_act(struct tcf_chain *chain)
+{
+	--chain->action_refcnt;
+}
+
+static bool tcf_chain_is_zombie(struct tcf_chain *chain)
+{
+	/* In case all the references are action references, this
+	 * chain is a zombie and should not be listed in the chain
+	 * dump list.
+	 */
+	return chain->refcnt == chain->action_refcnt;
+}
+
 static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
 					  u32 chain_index)
 {
@@ -298,6 +317,15 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
 }
 EXPORT_SYMBOL(tcf_chain_get);
 
+struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
+{
+	struct tcf_chain *chain = tcf_chain_get(block, chain_index, true);
+
+	tcf_chain_hold_by_act(chain);
+	return chain;
+}
+EXPORT_SYMBOL(tcf_chain_get_by_act);
+
 static void tc_chain_tmplt_del(struct tcf_chain *chain);
 
 void tcf_chain_put(struct tcf_chain *chain)
@@ -310,6 +338,13 @@ void tcf_chain_put(struct tcf_chain *chain)
 }
 EXPORT_SYMBOL(tcf_chain_put);
 
+void tcf_chain_put_by_act(struct tcf_chain *chain)
+{
+	tcf_chain_release_by_act(chain);
+	tcf_chain_put(chain);
+}
+EXPORT_SYMBOL(tcf_chain_put_by_act);
+
 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 {
 	if (chain->explicitly_created)
@@ -1803,20 +1838,29 @@ replay:
 	chain = tcf_chain_lookup(block, chain_index);
 	if (n->nlmsg_type == RTM_NEWCHAIN) {
 		if (chain) {
-			NL_SET_ERR_MSG(extack, "Filter chain already exists");
-			return -EEXIST;
-		}
-		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
-			NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
-			return -ENOENT;
-		}
-		chain = tcf_chain_create(block, chain_index);
-		if (!chain) {
-			NL_SET_ERR_MSG(extack, "Failed to create filter chain");
-			return -ENOMEM;
+			if (tcf_chain_is_zombie(chain)) {
+				/* The chain exists only because there is
+				 * some action referencing it, meaning it
+				 * is a zombie.
+				 */
+				tcf_chain_hold(chain);
+			} else {
+				NL_SET_ERR_MSG(extack, "Filter chain already exists");
+				return -EEXIST;
+			}
+		} else {
+			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
+				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
+				return -ENOENT;
+			}
+			chain = tcf_chain_create(block, chain_index);
+			if (!chain) {
+				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
+				return -ENOMEM;
+			}
 		}
 	} else {
-		if (!chain) {
+		if (!chain || tcf_chain_is_zombie(chain)) {
 			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
 			return -EINVAL;
 		}
@@ -1944,6 +1988,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 			index++;
 			continue;
 		}
+		if (tcf_chain_is_zombie(chain))
+			continue;
 		err = tc_chain_fill_node(chain, net, skb, block,
 					 NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-- 
cgit v1.2.3


From f07d141fe9430cdf9f8a65a87c4136bd83b8ab2e Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 23 Jul 2018 23:16:07 +0100
Subject: dma-mapping: Generalise dma_32bit_limit flag

Whilst the notion of an upstream DMA restriction is most commonly seen
in PCI host bridges saddled with a 32-bit native interface, a more
general version of the same issue can exist on complex SoCs where a bus
or point-to-point interconnect link from a device's DMA master interface
to another component along the path to memory (often an IOMMU) may carry
fewer address bits than the interfaces at both ends nominally support.
In order to properly deal with this, the first step is to expand the
dma_32bit_limit flag into an arbitrary mask.

To minimise the impact on existing code, we'll make sure to only
consider this new mask valid if set. That makes sense anyway, since a
mask of zero would represent DMA not being wired up at all, and that
would be better handled by not providing valid ops in the first place.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kernel/pci-dma.c | 2 +-
 include/linux/device.h    | 6 +++---
 kernel/dma/direct.c       | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index ab5d9dd668d2..80f9fe8d27d0 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -175,7 +175,7 @@ rootfs_initcall(pci_iommu_init);
 
 static int via_no_dac_cb(struct pci_dev *pdev, void *data)
 {
-	pdev->dev.dma_32bit_limit = true;
+	pdev->dev.bus_dma_mask = DMA_BIT_MASK(32);
 	return 0;
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 055a69dbcd18..6d3b000be57e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -886,6 +886,8 @@ struct dev_links_info {
  * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
  * 		hardware supports 64-bit addresses for consistent allocations
  * 		such descriptors.
+ * @bus_dma_mask: Mask of an upstream bridge or bus which imposes a smaller DMA
+ *		limit than the device itself supports.
  * @dma_pfn_offset: offset of DMA memory range relatively of RAM
  * @dma_parms:	A low level driver may set these to teach IOMMU code about
  * 		segment limitations.
@@ -912,8 +914,6 @@ struct dev_links_info {
  * @offline:	Set after successful invocation of bus type's .offline().
  * @of_node_reused: Set if the device-tree node is shared with an ancestor
  *              device.
- * @dma_32bit_limit: bridge limited to 32bit DMA even if the device itself
- *		indicates support for a higher limit in the dma_mask field.
  *
  * At the lowest level, every device in a Linux system is represented by an
  * instance of struct device. The device structure contains the information
@@ -967,6 +967,7 @@ struct device {
 					     not all hardware supports
 					     64 bit addresses for consistent
 					     allocations such descriptors. */
+	u64		bus_dma_mask;	/* upstream dma_mask constraint */
 	unsigned long	dma_pfn_offset;
 
 	struct device_dma_parameters *dma_parms;
@@ -1002,7 +1003,6 @@ struct device {
 	bool			offline_disabled:1;
 	bool			offline:1;
 	bool			of_node_reused:1;
-	bool			dma_32bit_limit:1;
 };
 
 static inline struct device *kobj_to_dev(struct kobject *kobj)
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8be8106270c2..c2860c5a9e96 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -180,10 +180,10 @@ int dma_direct_supported(struct device *dev, u64 mask)
 		return 0;
 #endif
 	/*
-	 * Various PCI/PCIe bridges have broken support for > 32bit DMA even
-	 * if the device itself might support it.
+	 * Upstream PCI/PCIe bridges or SoC interconnects may not carry
+	 * as many DMA address bits as the device itself supports.
 	 */
-	if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
+	if (dev->bus_dma_mask && mask > dev->bus_dma_mask)
 		return 0;
 	return 1;
 }
-- 
cgit v1.2.3


From 9b89bc3857a6c0dfda18ddae2a42c114ecc32753 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 12 May 2018 18:18:12 +0200
Subject: nvme.h: add support for the log specific field

NVMe 1.3 added a new log specific field to the get log page CQ
defintion, add it to our get_log_page SQ structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 80dfedcf0bf7..39f05b0b8c7f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -885,7 +885,7 @@ struct nvme_get_log_page_command {
 	__u64			rsvd2[2];
 	union nvme_data_ptr	dptr;
 	__u8			lid;
-	__u8			rsvd10;
+	__u8			lsp; /* upper 4 bits reserved */
 	__le16			numdl;
 	__le16			numdu;
 	__u16			rsvd11;
-- 
cgit v1.2.3


From 1a37621658fe06b10cf8bac02c32304d2a1c888c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 13 May 2018 18:53:57 +0200
Subject: nvme.h: add ANA definitions

Add various defintions from NVMe 1.3 TP 4004.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
---
 include/linux/nvme.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 39f05b0b8c7f..64c9175723de 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -242,7 +242,12 @@ struct nvme_id_ctrl {
 	__le32			sanicap;
 	__le32			hmminds;
 	__le16			hmmaxd;
-	__u8			rsvd338[174];
+	__u8			rsvd338[4];
+	__u8			anatt;
+	__u8			anacap;
+	__le32			anagrpmax;
+	__le32			nanagrpid;
+	__u8			rsvd352[160];
 	__u8			sqes;
 	__u8			cqes;
 	__le16			maxcmd;
@@ -258,7 +263,8 @@ struct nvme_id_ctrl {
 	__le16			acwu;
 	__u8			rsvd534[2];
 	__le32			sgls;
-	__u8			rsvd540[228];
+	__le32			mnan;
+	__u8			rsvd544[224];
 	char			subnqn[256];
 	__u8			rsvd1024[768];
 	__le32			ioccsz;
@@ -312,7 +318,9 @@ struct nvme_id_ns {
 	__le16			nabspf;
 	__le16			noiob;
 	__u8			nvmcap[16];
-	__u8			rsvd64[40];
+	__u8			rsvd64[28];
+	__le32			anagrpid;
+	__u8			rsvd96[8];
 	__u8			nguid[16];
 	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
@@ -425,6 +433,32 @@ struct nvme_effects_log {
 	__u8   resv[2048];
 };
 
+enum nvme_ana_state {
+	NVME_ANA_OPTIMIZED		= 0x01,
+	NVME_ANA_NONOPTIMIZED		= 0x02,
+	NVME_ANA_INACCESSIBLE		= 0x03,
+	NVME_ANA_PERSISTENT_LOSS	= 0x04,
+	NVME_ANA_CHANGE			= 0x0f,
+};
+
+struct nvme_ana_group_desc {
+	__le32	grpid;
+	__le32	nnsids;
+	__le64	chgcnt;
+	__u8	state;
+	__u8	rsvd17[7];
+	__le32	nsids[];
+};
+
+/* flag for the log specific field of the ANA log */
+#define NVME_ANA_LOG_RGO	(1 << 0)
+
+struct nvme_ana_rsp_hdr {
+	__le64	chgcnt;
+	__le16	ngrps;
+	__le16	rsvd10[3];
+};
+
 enum {
 	NVME_SMART_CRIT_SPARE		= 1 << 0,
 	NVME_SMART_CRIT_TEMPERATURE	= 1 << 1,
@@ -444,11 +478,13 @@ enum {
 enum {
 	NVME_AER_NOTICE_NS_CHANGED	= 0x00,
 	NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
+	NVME_AER_NOTICE_ANA		= 0x03,
 };
 
 enum {
 	NVME_AEN_CFG_NS_ATTR		= 1 << 8,
 	NVME_AEN_CFG_FW_ACT		= 1 << 9,
+	NVME_AEN_CFG_ANA_CHANGE		= 1 << 11,
 };
 
 struct nvme_lba_range_type {
@@ -763,6 +799,7 @@ enum {
 	NVME_LOG_FW_SLOT	= 0x03,
 	NVME_LOG_CHANGED_NS	= 0x04,
 	NVME_LOG_CMD_EFFECTS	= 0x05,
+	NVME_LOG_ANA		= 0x0c,
 	NVME_LOG_DISC		= 0x70,
 	NVME_LOG_RESERVATION	= 0x80,
 	NVME_FWACT_REPL		= (0 << 3),
@@ -1185,6 +1222,13 @@ enum {
 	NVME_SC_ACCESS_DENIED		= 0x286,
 	NVME_SC_UNWRITTEN_BLOCK		= 0x287,
 
+	/*
+	 * Path-related Errors:
+	 */
+	NVME_SC_ANA_PERSISTENT_LOSS	= 0x301,
+	NVME_SC_ANA_INACCESSIBLE	= 0x302,
+	NVME_SC_ANA_TRANSITION		= 0x303,
+
 	NVME_SC_DNR			= 0x4000,
 };
 
-- 
cgit v1.2.3


From 2ac305b7c8d7f93a6ef0018391e9865ea4ac0d65 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Thu, 26 Jul 2018 16:32:08 +0800
Subject: drm/ttm: add ttm_set_memory header (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch moves all non-x86 abstraction to the ttm_set_memory header.
It is to make function calling more clearly.

(v2): add ttm_ prefix.

Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Bas Nieuwenhuizen <basni@chromium.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/ttm/ttm_set_memory.h | 128 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 include/drm/ttm/ttm_set_memory.h

(limited to 'include')

diff --git a/include/drm/ttm/ttm_set_memory.h b/include/drm/ttm/ttm_set_memory.h
new file mode 100644
index 000000000000..a70723cf208b
--- /dev/null
+++ b/include/drm/ttm/ttm_set_memory.h
@@ -0,0 +1,128 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+/*
+ * Authors: Huang Rui <ray.huang@amd.com>
+ */
+
+#ifndef TTM_SET_MEMORY
+#define TTM_SET_MEMORY
+
+#include <linux/mm.h>
+
+#ifdef CONFIG_X86
+
+#include <asm/set_memory.h>
+
+static inline int ttm_set_pages_array_wb(struct page **pages, int addrinarray)
+{
+	return set_pages_array_wb(pages, addrinarray);
+}
+
+static inline int ttm_set_pages_array_wc(struct page **pages, int addrinarray)
+{
+	return set_pages_array_wc(pages, addrinarray);
+}
+
+static inline int ttm_set_pages_array_uc(struct page **pages, int addrinarray)
+{
+	return set_pages_array_uc(pages, addrinarray);
+}
+
+static inline int ttm_set_pages_wb(struct page *page, int numpages)
+{
+	return set_pages_wb(page, numpages);
+}
+
+#else /* for CONFIG_X86 */
+
+#if IS_ENABLED(CONFIG_AGP)
+
+#include <asm/agp.h>
+
+static inline int ttm_set_pages_array_wb(struct page **pages, int addrinarray)
+{
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		unmap_page_from_agp(pages[i]);
+	return 0;
+}
+
+static inline int ttm_set_pages_array_wc(struct page **pages, int addrinarray)
+{
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		map_page_into_agp(pages[i]);
+	return 0;
+}
+
+static inline int ttm_set_pages_array_uc(struct page **pages, int addrinarray)
+{
+	int i;
+
+	for (i = 0; i < addrinarray; i++)
+		map_page_into_agp(pages[i]);
+	return 0;
+}
+
+static inline int ttm_set_pages_wb(struct page *page, int numpages)
+{
+	int i;
+
+	for (i = 0; i < numpages; i++)
+		unmap_page_from_agp(page++);
+	return 0;
+}
+
+#else /* for CONFIG_AGP */
+
+static inline int ttm_set_pages_array_wb(struct page **pages, int addrinarray)
+{
+	return 0;
+}
+
+static inline int ttm_set_pages_array_wc(struct page **pages, int addrinarray)
+{
+	return 0;
+}
+
+static inline int ttm_set_pages_array_uc(struct page **pages, int addrinarray)
+{
+	return 0;
+}
+
+static inline int ttm_set_pages_wb(struct page *page, int numpages)
+{
+	return 0;
+}
+
+#endif /* for CONFIG_AGP */
+
+#endif /* for CONFIG_X86 */
+
+#endif
-- 
cgit v1.2.3


From b67c540b8a987e365dc548e5b2ddf023946e3d63 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Fri, 27 Jul 2018 15:26:56 +0300
Subject: net: dcb: Add priority-to-DSCP map getters

On ingress, a network device such as a switch assigns to packets
priority based on various criteria. Common options include interpreting
PCP and DSCP fields according to user configuration. When a packet
egresses the switch, a reverse process may rewrite PCP and/or DSCP
values according to packet priority.

The following three functions support a) obtaining a DSCP-to-priority
map or vice versa, and b) finding default-priority entries in APP
database.

The DCB subsystem supports for APP entries a very generous M:N mapping
between priorities and protocol identifiers. Understandably,
several (say) DSCP values can map to the same priority. But this
asymmetry holds the other way around as well--one priority can map to
several DSCP values. For this reason, the following functions operate in
terms of bitmaps, with ones in positions that match some APP entry.

- dcb_ieee_getapp_dscp_prio_mask_map() to compute for a given netdevice
  a map of DSCP-to-priority-mask, which gives for each DSCP value a
  bitmap of priorities related to that DSCP value by APP, along the
  lines of dcb_ieee_getapp_mask().

- dcb_ieee_getapp_prio_dscp_mask_map() similarly to compute for a given
  netdevice a map from priorities to a bitmap of DSCPs.

- dcb_ieee_getapp_default_prio_mask() which finds all default-priority
  rules for a given port in APP database, and returns a mask of
  priorities allowed by these default-priority rules.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbnl.h | 13 ++++++++
 net/dcb/dcbnl.c     | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)

(limited to 'include')

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 0e5e91be2d30..e22a8a3c089b 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -34,6 +34,19 @@ int dcb_ieee_setapp(struct net_device *, struct dcb_app *);
 int dcb_ieee_delapp(struct net_device *, struct dcb_app *);
 u8 dcb_ieee_getapp_mask(struct net_device *, struct dcb_app *);
 
+struct dcb_ieee_app_prio_map {
+	u64 map[IEEE_8021QAZ_MAX_TCS];
+};
+void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev,
+					struct dcb_ieee_app_prio_map *p_map);
+
+struct dcb_ieee_app_dscp_map {
+	u8 map[64];
+};
+void dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev,
+					struct dcb_ieee_app_dscp_map *p_map);
+u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev);
+
 int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd,
 		      u32 seq, u32 pid);
 int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 013fdb6fa07a..a556cd708885 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1958,6 +1958,92 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
 }
 EXPORT_SYMBOL(dcb_ieee_delapp);
 
+/**
+ * dcb_ieee_getapp_prio_dscp_mask_map - For a given device, find mapping from
+ * priorities to the DSCP values assigned to that priority. Initialize p_map
+ * such that each map element holds a bit mask of DSCP values configured for
+ * that priority by APP entries.
+ */
+void dcb_ieee_getapp_prio_dscp_mask_map(const struct net_device *dev,
+					struct dcb_ieee_app_prio_map *p_map)
+{
+	int ifindex = dev->ifindex;
+	struct dcb_app_type *itr;
+	u8 prio;
+
+	memset(p_map->map, 0, sizeof(p_map->map));
+
+	spin_lock_bh(&dcb_lock);
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->ifindex == ifindex &&
+		    itr->app.selector == IEEE_8021QAZ_APP_SEL_DSCP &&
+		    itr->app.protocol < 64 &&
+		    itr->app.priority < IEEE_8021QAZ_MAX_TCS) {
+			prio = itr->app.priority;
+			p_map->map[prio] |= 1ULL << itr->app.protocol;
+		}
+	}
+	spin_unlock_bh(&dcb_lock);
+}
+EXPORT_SYMBOL(dcb_ieee_getapp_prio_dscp_mask_map);
+
+/**
+ * dcb_ieee_getapp_dscp_prio_mask_map - For a given device, find mapping from
+ * DSCP values to the priorities assigned to that DSCP value. Initialize p_map
+ * such that each map element holds a bit mask of priorities configured for a
+ * given DSCP value by APP entries.
+ */
+void
+dcb_ieee_getapp_dscp_prio_mask_map(const struct net_device *dev,
+				   struct dcb_ieee_app_dscp_map *p_map)
+{
+	int ifindex = dev->ifindex;
+	struct dcb_app_type *itr;
+
+	memset(p_map->map, 0, sizeof(p_map->map));
+
+	spin_lock_bh(&dcb_lock);
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->ifindex == ifindex &&
+		    itr->app.selector == IEEE_8021QAZ_APP_SEL_DSCP &&
+		    itr->app.protocol < 64 &&
+		    itr->app.priority < IEEE_8021QAZ_MAX_TCS)
+			p_map->map[itr->app.protocol] |= 1 << itr->app.priority;
+	}
+	spin_unlock_bh(&dcb_lock);
+}
+EXPORT_SYMBOL(dcb_ieee_getapp_dscp_prio_mask_map);
+
+/**
+ * Per 802.1Q-2014, the selector value of 1 is used for matching on Ethernet
+ * type, with valid PID values >= 1536. A special meaning is then assigned to
+ * protocol value of 0: "default priority. For use when priority is not
+ * otherwise specified".
+ *
+ * dcb_ieee_getapp_default_prio_mask - For a given device, find all APP entries
+ * of the form {$PRIO, ETHERTYPE, 0} and construct a bit mask of all default
+ * priorities set by these entries.
+ */
+u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev)
+{
+	int ifindex = dev->ifindex;
+	struct dcb_app_type *itr;
+	u8 mask = 0;
+
+	spin_lock_bh(&dcb_lock);
+	list_for_each_entry(itr, &dcb_app_list, list) {
+		if (itr->ifindex == ifindex &&
+		    itr->app.selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+		    itr->app.protocol == 0 &&
+		    itr->app.priority < IEEE_8021QAZ_MAX_TCS)
+			mask |= 1 << itr->app.priority;
+	}
+	spin_unlock_bh(&dcb_lock);
+
+	return mask;
+}
+EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
+
 static int __init dcbnl_init(void)
 {
 	INIT_LIST_HEAD(&dcb_app_list);
-- 
cgit v1.2.3


From 3ae5536b808dced0af5b2e6768a41862620c779d Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 27 Jul 2018 10:59:57 +0200
Subject: l2tp: ignore L2TP_ATTR_DATA_SEQ netlink attribute

The value of this attribute is never used.

Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h | 7 ++++---
 net/l2tp/l2tp_core.h      | 8 --------
 net/l2tp/l2tp_debugfs.c   | 4 +---
 net/l2tp/l2tp_netlink.c   | 6 ------
 4 files changed, 5 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 7d570c7bd117..ae888606b3ec 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -65,9 +65,9 @@ struct sockaddr_l2tpip6 {
  * TUNNEL_MODIFY	- CONN_ID, udpcsum
  * TUNNEL_GETSTATS	- CONN_ID, (stats)
  * TUNNEL_GET		- CONN_ID, (...)
- * SESSION_CREATE	- SESSION_ID, PW_TYPE, data_seq, cookie, peer_cookie, l2spec
+ * SESSION_CREATE	- SESSION_ID, PW_TYPE, cookie, peer_cookie, l2spec
  * SESSION_DELETE	- SESSION_ID
- * SESSION_MODIFY	- SESSION_ID, data_seq
+ * SESSION_MODIFY	- SESSION_ID
  * SESSION_GET		- SESSION_ID, (...)
  * SESSION_GETSTATS	- SESSION_ID, (stats)
  *
@@ -95,7 +95,7 @@ enum {
 	L2TP_ATTR_PW_TYPE,		/* u16, enum l2tp_pwtype */
 	L2TP_ATTR_ENCAP_TYPE,		/* u16, enum l2tp_encap_type */
 	L2TP_ATTR_OFFSET,		/* u16 (not used) */
-	L2TP_ATTR_DATA_SEQ,		/* u16 */
+	L2TP_ATTR_DATA_SEQ,		/* u16 (not used) */
 	L2TP_ATTR_L2SPEC_TYPE,		/* u8, enum l2tp_l2spec_type */
 	L2TP_ATTR_L2SPEC_LEN,		/* u8 (not used) */
 	L2TP_ATTR_PROTO_VERSION,	/* u8 */
@@ -169,6 +169,7 @@ enum l2tp_encap_type {
 	L2TP_ENCAPTYPE_IP,
 };
 
+/* For L2TP_ATTR_DATA_SEQ. Unused. */
 enum l2tp_seqmode {
 	L2TP_SEQ_NONE = 0,
 	L2TP_SEQ_IP = 1,
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index d85fde793a8c..7dbfb55ab3b5 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -45,10 +45,6 @@ struct l2tp_tunnel;
  */
 struct l2tp_session_cfg {
 	enum l2tp_pwtype	pw_type;
-	unsigned int		data_seq:2;	/* data sequencing level
-						 * 0 => none, 1 => IP only,
-						 * 2 => all
-						 */
 	unsigned int		recv_seq:1;	/* expect receive packets with
 						 * sequence numbers? */
 	unsigned int		send_seq:1;	/* send packets with sequence
@@ -99,10 +95,6 @@ struct l2tp_session {
 
 	char			name[32];	/* for logging */
 	char			ifname[IFNAMSIZ];
-	unsigned int		data_seq:2;	/* data sequencing level
-						 * 0 => none, 1 => IP only,
-						 * 2 => all
-						 */
 	unsigned int		recv_seq:1;	/* expect receive packets with
 						 * sequence numbers? */
 	unsigned int		send_seq:1;	/* send packets with sequence
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index b5d7dde003ef..91b9248610f0 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -191,12 +191,10 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 	if (session->send_seq || session->recv_seq)
 		seq_printf(m, "   nr %hu, ns %hu\n", session->nr, session->ns);
 	seq_printf(m, "   refcnt %d\n", refcount_read(&session->ref_count));
-	seq_printf(m, "   config %d/%d/%c/%c/%s/%s %08x %u\n",
+	seq_printf(m, "   config %d/%d/%c/%c/-/%s %08x %u\n",
 		   session->mtu, session->mru,
 		   session->recv_seq ? 'R' : '-',
 		   session->send_seq ? 'S' : '-',
-		   session->data_seq == 1 ? "IPSEQ" :
-		   session->data_seq == 2 ? "DATASEQ" : "-",
 		   session->lns_mode ? "LNS" : "LAC",
 		   session->debug,
 		   jiffies_to_msecs(session->reorder_timeout));
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 5b9900889e31..e4785f6966f6 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -560,9 +560,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 	}
 
 	if (tunnel->version > 2) {
-		if (info->attrs[L2TP_ATTR_DATA_SEQ])
-			cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
-
 		if (info->attrs[L2TP_ATTR_L2SPEC_TYPE]) {
 			cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
 			if (cfg.l2specific_type != L2TP_L2SPECTYPE_DEFAULT &&
@@ -693,9 +690,6 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_DEBUG])
 		session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
 
-	if (info->attrs[L2TP_ATTR_DATA_SEQ])
-		session->data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
-
 	if (info->attrs[L2TP_ATTR_RECV_SEQ])
 		session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
 
-- 
cgit v1.2.3


From ae51a7c6d54876c47ae53c455434023df2c19801 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 27 Jul 2018 10:59:58 +0200
Subject: l2tp: ignore L2TP_ATTR_VLAN_ID netlink attribute

The value of this attribute is never used.

Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h | 4 ++--
 net/l2tp/l2tp_core.h      | 1 -
 net/l2tp/l2tp_netlink.c   | 3 ---
 3 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index ae888606b3ec..41bf79a4b165 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -60,7 +60,7 @@ struct sockaddr_l2tpip6 {
 /*
  * Commands.
  * Valid TLVs of each command are:-
- * TUNNEL_CREATE	- CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum, vlanid
+ * TUNNEL_CREATE	- CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum
  * TUNNEL_DELETE	- CONN_ID
  * TUNNEL_MODIFY	- CONN_ID, udpcsum
  * TUNNEL_GETSTATS	- CONN_ID, (stats)
@@ -105,7 +105,7 @@ enum {
 	L2TP_ATTR_SESSION_ID,		/* u32 */
 	L2TP_ATTR_PEER_SESSION_ID,	/* u32 */
 	L2TP_ATTR_UDP_CSUM,		/* u8 */
-	L2TP_ATTR_VLAN_ID,		/* u16 */
+	L2TP_ATTR_VLAN_ID,		/* u16 (not used) */
 	L2TP_ATTR_COOKIE,		/* 0, 4 or 8 bytes */
 	L2TP_ATTR_PEER_COOKIE,		/* 0, 4 or 8 bytes */
 	L2TP_ATTR_DEBUG,		/* u32, enum l2tp_debug_flags */
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 7dbfb55ab3b5..49fd5e05538c 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -54,7 +54,6 @@ struct l2tp_session_cfg {
 						 * control of LNS. */
 	int			debug;		/* bitmask of debug message
 						 * categories */
-	u16			vlan_id;	/* VLAN pseudowire only */
 	u16			l2specific_type; /* Layer 2 specific type */
 	u8			cookie[8];	/* optional cookie */
 	int			cookie_len;	/* 0, 4 or 8 bytes */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index e4785f6966f6..8ea1deefbc37 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -591,9 +591,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		}
 		if (info->attrs[L2TP_ATTR_IFNAME])
 			cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
-
-		if (info->attrs[L2TP_ATTR_VLAN_ID])
-			cfg.vlan_id = nla_get_u16(info->attrs[L2TP_ATTR_VLAN_ID]);
 	}
 
 	if (info->attrs[L2TP_ATTR_DEBUG])
-- 
cgit v1.2.3


From 92ea4a7eec7289468ac8de5386f4b13d9c210cb5 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 27 Jul 2018 11:00:00 +0200
Subject: l2tp: drop ->mru from struct l2tp_session

This field is not used.

Treat PPPIOC*MRU the same way as PPPIOC*FLAGS: "get" requests return 0,
while "set" requests vadidate the user supplied pointer but discard its
value.

Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h |  2 +-
 net/l2tp/l2tp_core.c      |  1 -
 net/l2tp/l2tp_core.h      |  2 --
 net/l2tp/l2tp_debugfs.c   |  4 ++--
 net/l2tp/l2tp_netlink.c   | 10 +---------
 net/l2tp/l2tp_ppp.c       | 41 +++++------------------------------------
 6 files changed, 9 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 41bf79a4b165..8bb8c7cfabe5 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -120,7 +120,7 @@ enum {
 	L2TP_ATTR_UDP_SPORT,		/* u16 */
 	L2TP_ATTR_UDP_DPORT,		/* u16 */
 	L2TP_ATTR_MTU,			/* u16 */
-	L2TP_ATTR_MRU,			/* u16 */
+	L2TP_ATTR_MRU,			/* u16 (not used) */
 	L2TP_ATTR_STATS,		/* nested */
 	L2TP_ATTR_IP6_SADDR,		/* struct in6_addr */
 	L2TP_ATTR_IP6_DADDR,		/* struct in6_addr */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index d10f4ed52d92..c61a467fd9b8 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1675,7 +1675,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 			session->pwtype = cfg->pw_type;
 			session->debug = cfg->debug;
 			session->mtu = cfg->mtu;
-			session->mru = cfg->mru;
 			session->send_seq = cfg->send_seq;
 			session->recv_seq = cfg->recv_seq;
 			session->lns_mode = cfg->lns_mode;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 49fd5e05538c..fa5ae9432d38 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -62,7 +62,6 @@ struct l2tp_session_cfg {
 	int			reorder_timeout; /* configured reorder timeout
 						  * (in jiffies) */
 	int			mtu;
-	int			mru;
 	char			*ifname;
 };
 
@@ -107,7 +106,6 @@ struct l2tp_session {
 						  * (in jiffies) */
 	int			reorder_skip;	/* set if skip to next nr */
 	int			mtu;
-	int			mru;
 	enum l2tp_pwtype	pwtype;
 	struct l2tp_stats	stats;
 	struct hlist_node	global_hlist;	/* Global hash list node */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 91b9248610f0..aee271741f5b 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -191,8 +191,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 	if (session->send_seq || session->recv_seq)
 		seq_printf(m, "   nr %hu, ns %hu\n", session->nr, session->ns);
 	seq_printf(m, "   refcnt %d\n", refcount_read(&session->ref_count));
-	seq_printf(m, "   config %d/%d/%c/%c/-/%s %08x %u\n",
-		   session->mtu, session->mru,
+	seq_printf(m, "   config %d/0/%c/%c/-/%s %08x %u\n",
+		   session->mtu,
 		   session->recv_seq ? 'R' : '-',
 		   session->send_seq ? 'S' : '-',
 		   session->lns_mode ? "LNS" : "LAC",
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 8ea1deefbc37..a7c409215336 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -611,9 +611,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_MTU])
 		cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
 
-	if (info->attrs[L2TP_ATTR_MRU])
-		cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
-
 #ifdef CONFIG_MODULES
 	if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) {
 		genl_unlock();
@@ -704,9 +701,6 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_MTU])
 		session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
 
-	if (info->attrs[L2TP_ATTR_MRU])
-		session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
-
 	ret = l2tp_session_notify(&l2tp_nl_family, info,
 				  session, L2TP_CMD_SESSION_MODIFY);
 
@@ -737,9 +731,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 			session->peer_session_id) ||
 	    nla_put_u32(skb, L2TP_ATTR_DEBUG, session->debug) ||
 	    nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype) ||
-	    nla_put_u16(skb, L2TP_ATTR_MTU, session->mtu) ||
-	    (session->mru &&
-	     nla_put_u16(skb, L2TP_ATTR_MRU, session->mru)))
+	    nla_put_u16(skb, L2TP_ATTR_MTU, session->mtu))
 		goto nla_put_failure;
 
 	if ((session->ifname[0] &&
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 759ce8421269..44cac66284a5 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -570,10 +570,9 @@ static void pppol2tp_session_init(struct l2tp_session *session)
 	if (dst) {
 		u32 pmtu = dst_mtu(dst);
 
-		if (pmtu) {
+		if (pmtu)
 			session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD;
-			session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD;
-		}
+
 		dst_release(dst);
 	}
 }
@@ -781,7 +780,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	} else {
 		/* Default MTU must allow space for UDP/L2TP/PPP headers */
 		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-		cfg.mru = cfg.mtu;
 		cfg.pw_type = L2TP_PWTYPE_PPP;
 
 		session = l2tp_session_create(sizeof(struct pppol2tp_session),
@@ -885,8 +883,6 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
 	/* Default MTU values. */
 	if (cfg->mtu == 0)
 		cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-	if (cfg->mru == 0)
-		cfg->mru = cfg->mtu;
 
 	/* Allocate and initialize a new session context. */
 	session = l2tp_session_create(sizeof(struct pppol2tp_session),
@@ -1101,34 +1097,6 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
 		break;
 
 	case PPPIOCGMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (put_user(session->mru, (int __user *) arg))
-			break;
-
-		l2tp_info(session, L2TP_MSG_CONTROL, "%s: get mru=%d\n",
-			  session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCSMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (get_user(val, (int __user *) arg))
-			break;
-
-		session->mru = val;
-		l2tp_info(session, L2TP_MSG_CONTROL, "%s: set mru=%d\n",
-			  session->name, session->mru);
-		err = 0;
-		break;
-
 	case PPPIOCGFLAGS:
 		err = -EFAULT;
 		if (put_user(0, (int __user *)arg))
@@ -1136,6 +1104,7 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
 		err = 0;
 		break;
 
+	case PPPIOCSMRU:
 	case PPPIOCSFLAGS:
 		err = -EFAULT;
 		if (get_user(val, (int __user *)arg))
@@ -1723,8 +1692,8 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   tunnel->peer_tunnel_id,
 		   session->peer_session_id,
 		   state, user_data_ok);
-	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
-		   session->mtu, session->mru,
+	seq_printf(m, "   %d/0/%c/%c/%s %08x %u\n",
+		   session->mtu,
 		   session->recv_seq ? 'R' : '-',
 		   session->send_seq ? 'S' : '-',
 		   session->lns_mode ? "LNS" : "LAC",
-- 
cgit v1.2.3


From 22a65aa8b1a84ca429c0fc8415dee5681ab36eb3 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Mon, 25 Dec 2017 18:40:52 +0200
Subject: net/mlx5e: Vxlan, check maximum number of UDP ports

The NIC has a limited number of offloaded VXLAN UDP ports (usually 4).
Instead of letting the firmware fail when trying to add more ports than
it can handle, let the driver check it on its own.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h    |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 14 ++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                   |  4 +++-
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index c41cfc2a4b70..c4d4db8722f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -657,6 +657,7 @@ enum {
 struct mlx5e_vxlan_db {
 	spinlock_t			lock; /* protect vxlan table */
 	struct radix_tree_root		tree;
+	int				num_ports;
 };
 
 struct mlx5e_l2_rule {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index 2f699998d13e..e3af2efe18ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -52,6 +52,11 @@ void mlx5e_vxlan_init(struct mlx5e_priv *priv)
 		mlx5e_vxlan_add_port(priv, 4789);
 }
 
+static inline u8 mlx5e_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
 static int mlx5e_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 {
 	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
@@ -98,6 +103,13 @@ static void mlx5e_vxlan_add_port(struct mlx5e_priv *priv, u16 port)
 		return;
 	}
 
+	if (vxlan_db->num_ports >= mlx5e_vxlan_max_udp_ports(priv->mdev)) {
+		netdev_info(priv->netdev,
+			    "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
+			    port, mlx5e_vxlan_max_udp_ports(priv->mdev));
+		return;
+	}
+
 	if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
 		return;
 
@@ -114,6 +126,7 @@ static void mlx5e_vxlan_add_port(struct mlx5e_priv *priv, u16 port)
 	if (err)
 		goto err_free;
 
+	vxlan_db->num_ports++;
 	return;
 
 err_free:
@@ -163,6 +176,7 @@ out_unlock:
 	if (remove) {
 		mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
 		kfree(vxlan);
+		vxlan_db->num_ports--;
 	}
 	mutex_unlock(&priv->state_lock);
 	kfree(vxlan_work);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 22f54bedfaae..60c2308fe062 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -668,7 +668,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         swp[0x1];
 	u8         swp_csum[0x1];
 	u8         swp_lso[0x1];
-	u8         reserved_at_23[0x1b];
+	u8         reserved_at_23[0xd];
+	u8         max_vxlan_udp_ports[0x8];
+	u8         reserved_at_38[0x6];
 	u8         max_geneve_opt_len[0x1];
 	u8         tunnel_stateless_geneve_rx[0x1];
 
-- 
cgit v1.2.3


From 358aa5ce288aa1085f0f3ef9f315119563fa6541 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed, 9 May 2018 13:28:00 -0700
Subject: net/mlx5e: Vxlan, move vxlan logic to core driver

Move vxlan logic and objects to mlx5 core dirver.
Since it going to be used from different mlx5 interfaces.
e.g. mlx5e PF NIC netdev and mlx5e E-Switch representors.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   2 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  21 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |   6 +-
 .../net/ethernet/mellanox/mlx5/core/lib/vxlan.c    | 230 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/lib/vxlan.h    |  64 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   5 +
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c    | 230 ---------------------
 drivers/net/ethernet/mellanox/mlx5/core/vxlan.h    |  64 ------
 include/linux/mlx5/driver.h                        |   2 +
 10 files changed, 315 insertions(+), 313 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/vxlan.h

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index ae2bdcb1647c..f20fda1ced4f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -14,8 +14,8 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
 		fpga/ipsec.o fpga/tls.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
-		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o vxlan.o \
-		en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o
+		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
+		en_arfs.o en_fs_ethtool.o en_selftest.o en/port.o lib/vxlan.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1bd4536b9061..c7ed3d20fd54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -52,7 +52,6 @@
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
-#include "vxlan.h"
 
 struct page_pool;
 
@@ -812,7 +811,6 @@ struct mlx5e_priv {
 	u32                        tx_rates[MLX5E_MAX_NUM_SQS];
 
 	struct mlx5e_flow_steering fs;
-	struct mlx5_vxlan          *vxlan;
 
 	struct workqueue_struct    *wq;
 	struct work_struct         update_carrier_work;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ef4b2f0c427c..fde35021a257 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -45,7 +45,7 @@
 #include "en_accel/tls.h"
 #include "accel/ipsec.h"
 #include "accel/tls.h"
-#include "vxlan.h"
+#include "lib/vxlan.h"
 #include "en/port.h"
 #include "en/xdp.h"
 
@@ -2974,7 +2974,7 @@ int mlx5e_open(struct net_device *netdev)
 		mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP);
 	mutex_unlock(&priv->state_lock);
 
-	if (mlx5_vxlan_allowed(priv->vxlan))
+	if (mlx5_vxlan_allowed(priv->mdev->vxlan))
 		udp_tunnel_get_rx_info(netdev);
 
 	return err;
@@ -3983,7 +3983,7 @@ static void mlx5e_vxlan_add_work(struct work_struct *work)
 	u16 port = vxlan_work->port;
 
 	mutex_lock(&priv->state_lock);
-	mlx5_vxlan_add_port(priv->vxlan, port);
+	mlx5_vxlan_add_port(priv->mdev->vxlan, port);
 	mutex_unlock(&priv->state_lock);
 
 	kfree(vxlan_work);
@@ -3997,7 +3997,7 @@ static void mlx5e_vxlan_del_work(struct work_struct *work)
 	u16 port = vxlan_work->port;
 
 	mutex_lock(&priv->state_lock);
-	mlx5_vxlan_del_port(priv->vxlan, port);
+	mlx5_vxlan_del_port(priv->mdev->vxlan, port);
 	mutex_unlock(&priv->state_lock);
 	kfree(vxlan_work);
 }
@@ -4028,7 +4028,7 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev,
 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 		return;
 
-	if (!mlx5_vxlan_allowed(priv->vxlan))
+	if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
 		return;
 
 	mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
@@ -4042,7 +4042,7 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev,
 	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
 		return;
 
-	if (!mlx5_vxlan_allowed(priv->vxlan))
+	if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
 		return;
 
 	mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 0);
@@ -4076,7 +4076,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 		port = be16_to_cpu(udph->dest);
 
 		/* Verify if UDP port is being offloaded by HW */
-		if (mlx5_vxlan_lookup_port(priv->vxlan, port))
+		if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
 			return features;
 	}
 
@@ -4648,7 +4648,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
 
-	if (mlx5_vxlan_allowed(priv->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
+	if (mlx5_vxlan_allowed(mdev->vxlan) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) {
 		netdev->hw_enc_features |= NETIF_F_IP_CSUM;
 		netdev->hw_enc_features |= NETIF_F_IPV6_CSUM;
 		netdev->hw_enc_features |= NETIF_F_TSO;
@@ -4656,7 +4656,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 		netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
 	}
 
-	if (mlx5_vxlan_allowed(priv->vxlan)) {
+	if (mlx5_vxlan_allowed(mdev->vxlan)) {
 		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
 					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
 		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
@@ -4758,8 +4758,6 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
 
-	priv->vxlan = mlx5_vxlan_create(mdev);
-
 	mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
 	err = mlx5e_ipsec_init(priv);
 	if (err)
@@ -4773,7 +4771,6 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
 
 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 {
-	mlx5_vxlan_destroy(priv->vxlan);
 	mlx5e_tls_cleanup(priv);
 	mlx5e_ipsec_cleanup(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1b4931b62094..288a57f76e84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -50,7 +50,7 @@
 #include "en_rep.h"
 #include "en_tc.h"
 #include "eswitch.h"
-#include "vxlan.h"
+#include "lib/vxlan.h"
 #include "fs_core.h"
 #include "en/port.h"
 
@@ -1133,7 +1133,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
 			goto vxlan_match_offload_err;
 
-		if (mlx5_vxlan_lookup_port(up_priv->vxlan, be16_to_cpu(key->dst)) &&
+		if (mlx5_vxlan_lookup_port(up_priv->mdev->vxlan, be16_to_cpu(key->dst)) &&
 		    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
 			parse_vxlan_attr(spec, f);
 		else {
@@ -2557,7 +2557,7 @@ vxlan_encap_offload_err:
 		return -EOPNOTSUPP;
 	}
 
-	if (mlx5_vxlan_lookup_port(up_priv->vxlan, be16_to_cpu(key->tp_dst)) &&
+	if (mlx5_vxlan_lookup_port(up_priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
 	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
 		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
 	} else {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
new file mode 100644
index 000000000000..9a8fd762167b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "vxlan.h"
+
+struct mlx5_vxlan {
+	struct mlx5_core_dev		*mdev;
+	spinlock_t			lock; /* protect vxlan table */
+	/* max_num_ports is usuallly 4, 16 buckets is more than enough */
+	DECLARE_HASHTABLE(htable, 4);
+	int				num_ports;
+	struct mutex                    sync_lock; /* sync add/del port HW operations */
+};
+
+struct mlx5_vxlan_port {
+	struct hlist_node hlist;
+	atomic_t refcount;
+	u16 udp_port;
+};
+
+static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
+}
+
+static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
+
+	MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
+		 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
+	MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
+{
+	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
+
+	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
+		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+	MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static struct mlx5_vxlan_port*
+mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+
+	hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) {
+		if (vxlanp->udp_port == port)
+			return vxlanp;
+	}
+
+	return NULL;
+}
+
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+
+	if (!mlx5_vxlan_allowed(vxlan))
+		return NULL;
+
+	spin_lock_bh(&vxlan->lock);
+	vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+	spin_unlock_bh(&vxlan->lock);
+
+	return vxlanp;
+}
+
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	int ret = -ENOSPC;
+
+	vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
+	if (vxlanp) {
+		atomic_inc(&vxlanp->refcount);
+		return 0;
+	}
+
+	mutex_lock(&vxlan->sync_lock);
+	if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) {
+		mlx5_core_info(vxlan->mdev,
+			       "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
+			       port, mlx5_vxlan_max_udp_ports(vxlan->mdev));
+		ret = -ENOSPC;
+		goto unlock;
+	}
+
+	ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
+	if (ret)
+		goto unlock;
+
+	vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
+	if (!vxlanp) {
+		ret = -ENOMEM;
+		goto err_delete_port;
+	}
+
+	vxlanp->udp_port = port;
+	atomic_set(&vxlanp->refcount, 1);
+
+	spin_lock_bh(&vxlan->lock);
+	hash_add(vxlan->htable, &vxlanp->hlist, port);
+	spin_unlock_bh(&vxlan->lock);
+
+	vxlan->num_ports++;
+	mutex_unlock(&vxlan->sync_lock);
+	return 0;
+
+err_delete_port:
+	mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+
+unlock:
+	mutex_unlock(&vxlan->sync_lock);
+	return ret;
+}
+
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	bool remove = false;
+	int ret = 0;
+
+	mutex_lock(&vxlan->sync_lock);
+
+	spin_lock_bh(&vxlan->lock);
+	vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
+	if (!vxlanp) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	if (atomic_dec_and_test(&vxlanp->refcount)) {
+		hash_del(&vxlanp->hlist);
+		remove = true;
+	}
+
+out_unlock:
+	spin_unlock_bh(&vxlan->lock);
+
+	if (remove) {
+		mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
+		kfree(vxlanp);
+		vxlan->num_ports--;
+	}
+
+	mutex_unlock(&vxlan->sync_lock);
+
+	return ret;
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_vxlan *vxlan;
+
+	if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
+		return ERR_PTR(-ENOTSUPP);
+
+	vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
+	if (!vxlan)
+		return ERR_PTR(-ENOMEM);
+
+	vxlan->mdev = mdev;
+	mutex_init(&vxlan->sync_lock);
+	spin_lock_init(&vxlan->lock);
+	hash_init(vxlan->htable);
+
+	/* Hardware adds 4789 by default */
+	mlx5_vxlan_add_port(vxlan, 4789);
+
+	return vxlan;
+}
+
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
+{
+	struct mlx5_vxlan_port *vxlanp;
+	struct hlist_node *tmp;
+	int bkt;
+
+	if (!mlx5_vxlan_allowed(vxlan))
+		return;
+
+	/* Lockless since we are the only hash table consumers*/
+	hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
+		hash_del(&vxlanp->hlist);
+		mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
+		kfree(vxlanp);
+	}
+
+	kfree(vxlan);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
new file mode 100644
index 000000000000..fd874a30c4d0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_VXLAN_H__
+#define __MLX5_VXLAN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_vxlan;
+struct mlx5_vxlan_port;
+
+#ifdef CONFIG_MLX5_CORE_EN
+
+static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
+{
+	/* not allowed reason is encoded in vxlan pointer as error,
+	 * on mlx5_vxlan_create
+	 */
+	return !IS_ERR_OR_NULL(vxlan);
+}
+
+struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
+void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
+int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
+int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
+struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
+
+#else
+
+static inline struct mlx5_vxlan*
+mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-ENOTSUPP); }
+static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
+
+#endif
+
+#endif /* __MLX5_VXLAN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6ddbb70e95de..03b9c6733eed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -62,6 +62,7 @@
 #include "accel/ipsec.h"
 #include "accel/tls.h"
 #include "lib/clock.h"
+#include "lib/vxlan.h"
 #include "diag/fw_tracer.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -961,6 +962,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 
 	mlx5_init_clock(dev);
 
+	dev->vxlan = mlx5_vxlan_create(dev);
+
 	err = mlx5_init_rl_table(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -1004,6 +1007,7 @@ err_mpfs_cleanup:
 err_rl_cleanup:
 	mlx5_cleanup_rl_table(dev);
 err_tables_cleanup:
+	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_srq_table(dev);
 	mlx5_cleanup_qp_table(dev);
@@ -1024,6 +1028,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
 	mlx5_mpfs_cleanup(dev);
 	mlx5_cleanup_rl_table(dev);
+	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_clock(dev);
 	mlx5_cleanup_reserved_gids(dev);
 	mlx5_cleanup_mkey_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
deleted file mode 100644
index 9a8fd762167b..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include "mlx5_core.h"
-#include "vxlan.h"
-
-struct mlx5_vxlan {
-	struct mlx5_core_dev		*mdev;
-	spinlock_t			lock; /* protect vxlan table */
-	/* max_num_ports is usuallly 4, 16 buckets is more than enough */
-	DECLARE_HASHTABLE(htable, 4);
-	int				num_ports;
-	struct mutex                    sync_lock; /* sync add/del port HW operations */
-};
-
-struct mlx5_vxlan_port {
-	struct hlist_node hlist;
-	atomic_t refcount;
-	u16 udp_port;
-};
-
-static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
-{
-	return MLX5_CAP_ETH(mdev, max_vxlan_udp_ports) ?: 4;
-}
-
-static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
-{
-	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
-
-	MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
-		 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
-	MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
-{
-	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
-
-	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
-		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
-	MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-}
-
-static struct mlx5_vxlan_port*
-mlx5_vxlan_lookup_port_locked(struct mlx5_vxlan *vxlan, u16 port)
-{
-	struct mlx5_vxlan_port *vxlanp;
-
-	hash_for_each_possible(vxlan->htable, vxlanp, hlist, port) {
-		if (vxlanp->udp_port == port)
-			return vxlanp;
-	}
-
-	return NULL;
-}
-
-struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port)
-{
-	struct mlx5_vxlan_port *vxlanp;
-
-	if (!mlx5_vxlan_allowed(vxlan))
-		return NULL;
-
-	spin_lock_bh(&vxlan->lock);
-	vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
-	spin_unlock_bh(&vxlan->lock);
-
-	return vxlanp;
-}
-
-int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
-{
-	struct mlx5_vxlan_port *vxlanp;
-	int ret = -ENOSPC;
-
-	vxlanp = mlx5_vxlan_lookup_port(vxlan, port);
-	if (vxlanp) {
-		atomic_inc(&vxlanp->refcount);
-		return 0;
-	}
-
-	mutex_lock(&vxlan->sync_lock);
-	if (vxlan->num_ports >= mlx5_vxlan_max_udp_ports(vxlan->mdev)) {
-		mlx5_core_info(vxlan->mdev,
-			       "UDP port (%d) not offloaded, max number of UDP ports (%d) are already offloaded\n",
-			       port, mlx5_vxlan_max_udp_ports(vxlan->mdev));
-		ret = -ENOSPC;
-		goto unlock;
-	}
-
-	ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
-	if (ret)
-		goto unlock;
-
-	vxlanp = kzalloc(sizeof(*vxlanp), GFP_KERNEL);
-	if (!vxlanp) {
-		ret = -ENOMEM;
-		goto err_delete_port;
-	}
-
-	vxlanp->udp_port = port;
-	atomic_set(&vxlanp->refcount, 1);
-
-	spin_lock_bh(&vxlan->lock);
-	hash_add(vxlan->htable, &vxlanp->hlist, port);
-	spin_unlock_bh(&vxlan->lock);
-
-	vxlan->num_ports++;
-	mutex_unlock(&vxlan->sync_lock);
-	return 0;
-
-err_delete_port:
-	mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
-
-unlock:
-	mutex_unlock(&vxlan->sync_lock);
-	return ret;
-}
-
-int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port)
-{
-	struct mlx5_vxlan_port *vxlanp;
-	bool remove = false;
-	int ret = 0;
-
-	mutex_lock(&vxlan->sync_lock);
-
-	spin_lock_bh(&vxlan->lock);
-	vxlanp = mlx5_vxlan_lookup_port_locked(vxlan, port);
-	if (!vxlanp) {
-		ret = -ENOENT;
-		goto out_unlock;
-	}
-
-	if (atomic_dec_and_test(&vxlanp->refcount)) {
-		hash_del(&vxlanp->hlist);
-		remove = true;
-	}
-
-out_unlock:
-	spin_unlock_bh(&vxlan->lock);
-
-	if (remove) {
-		mlx5_vxlan_core_del_port_cmd(vxlan->mdev, port);
-		kfree(vxlanp);
-		vxlan->num_ports--;
-	}
-
-	mutex_unlock(&vxlan->sync_lock);
-
-	return ret;
-}
-
-struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev)
-{
-	struct mlx5_vxlan *vxlan;
-
-	if (!MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) || !mlx5_core_is_pf(mdev))
-		return ERR_PTR(-ENOTSUPP);
-
-	vxlan = kzalloc(sizeof(*vxlan), GFP_KERNEL);
-	if (!vxlan)
-		return ERR_PTR(-ENOMEM);
-
-	vxlan->mdev = mdev;
-	mutex_init(&vxlan->sync_lock);
-	spin_lock_init(&vxlan->lock);
-	hash_init(vxlan->htable);
-
-	/* Hardware adds 4789 by default */
-	mlx5_vxlan_add_port(vxlan, 4789);
-
-	return vxlan;
-}
-
-void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan)
-{
-	struct mlx5_vxlan_port *vxlanp;
-	struct hlist_node *tmp;
-	int bkt;
-
-	if (!mlx5_vxlan_allowed(vxlan))
-		return;
-
-	/* Lockless since we are the only hash table consumers*/
-	hash_for_each_safe(vxlan->htable, bkt, tmp, vxlanp, hlist) {
-		hash_del(&vxlanp->hlist);
-		mlx5_vxlan_core_del_port_cmd(vxlan->mdev, vxlanp->udp_port);
-		kfree(vxlanp);
-	}
-
-	kfree(vxlan);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
deleted file mode 100644
index fd874a30c4d0..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __MLX5_VXLAN_H__
-#define __MLX5_VXLAN_H__
-
-#include <linux/mlx5/driver.h>
-
-struct mlx5_vxlan;
-struct mlx5_vxlan_port;
-
-#ifdef CONFIG_MLX5_CORE_EN
-
-static inline bool mlx5_vxlan_allowed(struct mlx5_vxlan *vxlan)
-{
-	/* not allowed reason is encoded in vxlan pointer as error,
-	 * on mlx5_vxlan_create
-	 */
-	return !IS_ERR_OR_NULL(vxlan);
-}
-
-struct mlx5_vxlan *mlx5_vxlan_create(struct mlx5_core_dev *mdev);
-void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan);
-int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port);
-int mlx5_vxlan_del_port(struct mlx5_vxlan *vxlan, u16 port);
-struct mlx5_vxlan_port *mlx5_vxlan_lookup_port(struct mlx5_vxlan *vxlan, u16 port);
-
-#else
-
-static inline struct mlx5_vxlan*
-mlx5_vxlan_create(struct mlx5_core_dev *mdev) { return ERR_PTR(-ENOTSUPP); }
-static inline void mlx5_vxlan_destroy(struct mlx5_vxlan *vxlan) { return; }
-
-#endif
-
-#endif /* __MLX5_VXLAN_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index fd0aaa5568fe..54f385cc8811 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -818,6 +818,7 @@ struct mlx5_clock {
 };
 
 struct mlx5_fw_tracer;
+struct mlx5_vxlan;
 
 struct mlx5_core_dev {
 	struct pci_dev	       *pdev;
@@ -850,6 +851,7 @@ struct mlx5_core_dev {
 	atomic_t		num_qps;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
+	struct mlx5_vxlan       *vxlan;
 	struct {
 		struct mlx5_rsvd_gids	reserved_gids;
 		u32			roce_en;
-- 
cgit v1.2.3


From 627448e85c766587f6fdde1ea3886d6615081c77 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Thu, 28 Jun 2018 18:13:33 +0300
Subject: tpm: separate cmd_ready/go_idle from runtime_pm

Fix tpm ptt initialization error:
tpm tpm0: A TPM error (378) occurred get tpm pcr allocation.

We cannot use go_idle cmd_ready commands via runtime_pm handles
as with the introduction of localities this is no longer an optional
feature, while runtime pm can be not enabled.
Though cmd_ready/go_idle provides a power saving, it's also a part of
TPM2 protocol and should be called explicitly.
This patch exposes cmd_read/go_idle via tpm class ops and removes
runtime pm support as it is not used by any driver.

When calling from nested context always use both flags:
TPM_TRANSMIT_UNLOCKED and TPM_TRANSMIT_RAW. Both are needed to resolve
tpm spaces and locality request recursive calls to tpm_transmit().
TPM_TRANSMIT_RAW should never be used standalone as it will fail
on double locking. While TPM_TRANSMIT_UNLOCKED standalone should be
called from non-recursive locked contexts.

New wrappers are added tpm_cmd_ready() and tpm_go_idle() to
streamline tpm_try_transmit code.

tpm_crb no longer needs own power saving functions and can drop using
tpm_pm_suspend/resume.

This patch cannot be really separated from the locality fix.
Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality)

Cc: stable@vger.kernel.org
Fixes: 888d867df441 (tpm: cmd_ready command can be issued only after granting locality)
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-interface.c |  51 ++++++++++++++++----
 drivers/char/tpm/tpm.h           |  12 ++++-
 drivers/char/tpm/tpm2-space.c    |  16 ++++---
 drivers/char/tpm/tpm_crb.c       | 101 +++++++++++----------------------------
 include/linux/tpm.h              |   2 +
 5 files changed, 90 insertions(+), 92 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index e32f6e85dc6d..31b86e027f9d 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -29,7 +29,6 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/freezer.h>
-#include <linux/pm_runtime.h>
 #include <linux/tpm_eventlog.h>
 
 #include "tpm.h"
@@ -369,10 +368,13 @@ err_len:
 	return -EINVAL;
 }
 
-static int tpm_request_locality(struct tpm_chip *chip)
+static int tpm_request_locality(struct tpm_chip *chip, unsigned int flags)
 {
 	int rc;
 
+	if (flags & TPM_TRANSMIT_RAW)
+		return 0;
+
 	if (!chip->ops->request_locality)
 		return 0;
 
@@ -385,10 +387,13 @@ static int tpm_request_locality(struct tpm_chip *chip)
 	return 0;
 }
 
-static void tpm_relinquish_locality(struct tpm_chip *chip)
+static void tpm_relinquish_locality(struct tpm_chip *chip, unsigned int flags)
 {
 	int rc;
 
+	if (flags & TPM_TRANSMIT_RAW)
+		return;
+
 	if (!chip->ops->relinquish_locality)
 		return;
 
@@ -399,6 +404,28 @@ static void tpm_relinquish_locality(struct tpm_chip *chip)
 	chip->locality = -1;
 }
 
+static int tpm_cmd_ready(struct tpm_chip *chip, unsigned int flags)
+{
+	if (flags & TPM_TRANSMIT_RAW)
+		return 0;
+
+	if (!chip->ops->cmd_ready)
+		return 0;
+
+	return chip->ops->cmd_ready(chip);
+}
+
+static int tpm_go_idle(struct tpm_chip *chip, unsigned int flags)
+{
+	if (flags & TPM_TRANSMIT_RAW)
+		return 0;
+
+	if (!chip->ops->go_idle)
+		return 0;
+
+	return chip->ops->go_idle(chip);
+}
+
 static ssize_t tpm_try_transmit(struct tpm_chip *chip,
 				struct tpm_space *space,
 				u8 *buf, size_t bufsiz,
@@ -449,14 +476,15 @@ static ssize_t tpm_try_transmit(struct tpm_chip *chip,
 	/* Store the decision as chip->locality will be changed. */
 	need_locality = chip->locality == -1;
 
-	if (!(flags & TPM_TRANSMIT_RAW) && need_locality) {
-		rc = tpm_request_locality(chip);
+	if (need_locality) {
+		rc = tpm_request_locality(chip, flags);
 		if (rc < 0)
 			goto out_no_locality;
 	}
 
-	if (chip->dev.parent)
-		pm_runtime_get_sync(chip->dev.parent);
+	rc = tpm_cmd_ready(chip, flags);
+	if (rc)
+		goto out;
 
 	rc = tpm2_prepare_space(chip, space, ordinal, buf);
 	if (rc)
@@ -516,13 +544,16 @@ out_recv:
 	}
 
 	rc = tpm2_commit_space(chip, space, ordinal, buf, &len);
+	if (rc)
+		dev_err(&chip->dev, "tpm2_commit_space: error %d\n", rc);
 
 out:
-	if (chip->dev.parent)
-		pm_runtime_put_sync(chip->dev.parent);
+	rc = tpm_go_idle(chip, flags);
+	if (rc)
+		goto out;
 
 	if (need_locality)
-		tpm_relinquish_locality(chip);
+		tpm_relinquish_locality(chip, flags);
 
 out_no_locality:
 	if (chip->ops->clk_enable != NULL)
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 9824cccb2c76..17833ef63f6c 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -512,9 +512,17 @@ extern const struct file_operations tpm_fops;
 extern const struct file_operations tpmrm_fops;
 extern struct idr dev_nums_idr;
 
+/**
+ * enum tpm_transmit_flags
+ *
+ * @TPM_TRANSMIT_UNLOCKED: used to lock sequence of tpm_transmit calls.
+ * @TPM_TRANSMIT_RAW: prevent recursive calls into setup steps
+ *                    (go idle, locality,..). Always use with UNLOCKED
+ *                    as it will fail on double locking.
+ */
 enum tpm_transmit_flags {
-	TPM_TRANSMIT_UNLOCKED	= BIT(0),
-	TPM_TRANSMIT_RAW	= BIT(1),
+	TPM_TRANSMIT_UNLOCKED = BIT(0),
+	TPM_TRANSMIT_RAW      = BIT(1),
 };
 
 ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c
index 6122d3276f72..11c85ed8c113 100644
--- a/drivers/char/tpm/tpm2-space.c
+++ b/drivers/char/tpm/tpm2-space.c
@@ -39,7 +39,8 @@ static void tpm2_flush_sessions(struct tpm_chip *chip, struct tpm_space *space)
 	for (i = 0; i < ARRAY_SIZE(space->session_tbl); i++) {
 		if (space->session_tbl[i])
 			tpm2_flush_context_cmd(chip, space->session_tbl[i],
-					       TPM_TRANSMIT_UNLOCKED);
+					       TPM_TRANSMIT_UNLOCKED |
+					       TPM_TRANSMIT_RAW);
 	}
 }
 
@@ -84,7 +85,7 @@ static int tpm2_load_context(struct tpm_chip *chip, u8 *buf,
 	tpm_buf_append(&tbuf, &buf[*offset], body_size);
 
 	rc = tpm_transmit_cmd(chip, NULL, tbuf.data, PAGE_SIZE, 4,
-			      TPM_TRANSMIT_UNLOCKED, NULL);
+			      TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW, NULL);
 	if (rc < 0) {
 		dev_warn(&chip->dev, "%s: failed with a system error %d\n",
 			 __func__, rc);
@@ -133,7 +134,7 @@ static int tpm2_save_context(struct tpm_chip *chip, u32 handle, u8 *buf,
 	tpm_buf_append_u32(&tbuf, handle);
 
 	rc = tpm_transmit_cmd(chip, NULL, tbuf.data, PAGE_SIZE, 0,
-			      TPM_TRANSMIT_UNLOCKED, NULL);
+			      TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW, NULL);
 	if (rc < 0) {
 		dev_warn(&chip->dev, "%s: failed with a system error %d\n",
 			 __func__, rc);
@@ -170,7 +171,8 @@ static void tpm2_flush_space(struct tpm_chip *chip)
 	for (i = 0; i < ARRAY_SIZE(space->context_tbl); i++)
 		if (space->context_tbl[i] && ~space->context_tbl[i])
 			tpm2_flush_context_cmd(chip, space->context_tbl[i],
-					       TPM_TRANSMIT_UNLOCKED);
+					       TPM_TRANSMIT_UNLOCKED |
+					       TPM_TRANSMIT_RAW);
 
 	tpm2_flush_sessions(chip, space);
 }
@@ -377,7 +379,8 @@ static int tpm2_map_response_header(struct tpm_chip *chip, u32 cc, u8 *rsp,
 
 	return 0;
 out_no_slots:
-	tpm2_flush_context_cmd(chip, phandle, TPM_TRANSMIT_UNLOCKED);
+	tpm2_flush_context_cmd(chip, phandle,
+			       TPM_TRANSMIT_UNLOCKED | TPM_TRANSMIT_RAW);
 	dev_warn(&chip->dev, "%s: out of slots for 0x%08X\n", __func__,
 		 phandle);
 	return -ENOMEM;
@@ -465,7 +468,8 @@ static int tpm2_save_space(struct tpm_chip *chip)
 			return rc;
 
 		tpm2_flush_context_cmd(chip, space->context_tbl[i],
-				       TPM_TRANSMIT_UNLOCKED);
+				       TPM_TRANSMIT_UNLOCKED |
+				       TPM_TRANSMIT_RAW);
 		space->context_tbl[i] = ~0;
 	}
 
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index 34fbc6cb097b..36952ef98f90 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -132,7 +132,7 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
 }
 
 /**
- * crb_go_idle - request tpm crb device to go the idle state
+ * __crb_go_idle - request tpm crb device to go the idle state
  *
  * @dev:  crb device
  * @priv: crb private data
@@ -147,7 +147,7 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
  *
  * Return: 0 always
  */
-static int crb_go_idle(struct device *dev, struct crb_priv *priv)
+static int __crb_go_idle(struct device *dev, struct crb_priv *priv)
 {
 	if ((priv->sm == ACPI_TPM2_START_METHOD) ||
 	    (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD) ||
@@ -163,11 +163,20 @@ static int crb_go_idle(struct device *dev, struct crb_priv *priv)
 		dev_warn(dev, "goIdle timed out\n");
 		return -ETIME;
 	}
+
 	return 0;
 }
 
+static int crb_go_idle(struct tpm_chip *chip)
+{
+	struct device *dev = &chip->dev;
+	struct crb_priv *priv = dev_get_drvdata(dev);
+
+	return __crb_go_idle(dev, priv);
+}
+
 /**
- * crb_cmd_ready - request tpm crb device to enter ready state
+ * __crb_cmd_ready - request tpm crb device to enter ready state
  *
  * @dev:  crb device
  * @priv: crb private data
@@ -181,7 +190,7 @@ static int crb_go_idle(struct device *dev, struct crb_priv *priv)
  *
  * Return: 0 on success -ETIME on timeout;
  */
-static int crb_cmd_ready(struct device *dev, struct crb_priv *priv)
+static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv)
 {
 	if ((priv->sm == ACPI_TPM2_START_METHOD) ||
 	    (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD) ||
@@ -200,6 +209,14 @@ static int crb_cmd_ready(struct device *dev, struct crb_priv *priv)
 	return 0;
 }
 
+static int crb_cmd_ready(struct tpm_chip *chip)
+{
+	struct device *dev = &chip->dev;
+	struct crb_priv *priv = dev_get_drvdata(dev);
+
+	return __crb_cmd_ready(dev, priv);
+}
+
 static int __crb_request_locality(struct device *dev,
 				  struct crb_priv *priv, int loc)
 {
@@ -401,6 +418,8 @@ static const struct tpm_class_ops tpm_crb = {
 	.send = crb_send,
 	.cancel = crb_cancel,
 	.req_canceled = crb_req_canceled,
+	.go_idle  = crb_go_idle,
+	.cmd_ready = crb_cmd_ready,
 	.request_locality = crb_request_locality,
 	.relinquish_locality = crb_relinquish_locality,
 	.req_complete_mask = CRB_DRV_STS_COMPLETE,
@@ -520,7 +539,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 	 * PTT HW bug w/a: wake up the device to access
 	 * possibly not retained registers.
 	 */
-	ret = crb_cmd_ready(dev, priv);
+	ret = __crb_cmd_ready(dev, priv);
 	if (ret)
 		goto out_relinquish_locality;
 
@@ -565,7 +584,7 @@ out:
 	if (!ret)
 		priv->cmd_size = cmd_size;
 
-	crb_go_idle(dev, priv);
+	__crb_go_idle(dev, priv);
 
 out_relinquish_locality:
 
@@ -628,32 +647,7 @@ static int crb_acpi_add(struct acpi_device *device)
 	chip->acpi_dev_handle = device->handle;
 	chip->flags = TPM_CHIP_FLAG_TPM2;
 
-	rc = __crb_request_locality(dev, priv, 0);
-	if (rc)
-		return rc;
-
-	rc  = crb_cmd_ready(dev, priv);
-	if (rc)
-		goto out;
-
-	pm_runtime_get_noresume(dev);
-	pm_runtime_set_active(dev);
-	pm_runtime_enable(dev);
-
-	rc = tpm_chip_register(chip);
-	if (rc) {
-		crb_go_idle(dev, priv);
-		pm_runtime_put_noidle(dev);
-		pm_runtime_disable(dev);
-		goto out;
-	}
-
-	pm_runtime_put_sync(dev);
-
-out:
-	__crb_relinquish_locality(dev, priv, 0);
-
-	return rc;
+	return tpm_chip_register(chip);
 }
 
 static int crb_acpi_remove(struct acpi_device *device)
@@ -663,52 +657,11 @@ static int crb_acpi_remove(struct acpi_device *device)
 
 	tpm_chip_unregister(chip);
 
-	pm_runtime_disable(dev);
-
 	return 0;
 }
 
-static int __maybe_unused crb_pm_runtime_suspend(struct device *dev)
-{
-	struct tpm_chip *chip = dev_get_drvdata(dev);
-	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
-
-	return crb_go_idle(dev, priv);
-}
-
-static int __maybe_unused crb_pm_runtime_resume(struct device *dev)
-{
-	struct tpm_chip *chip = dev_get_drvdata(dev);
-	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
-
-	return crb_cmd_ready(dev, priv);
-}
-
-static int __maybe_unused crb_pm_suspend(struct device *dev)
-{
-	int ret;
-
-	ret = tpm_pm_suspend(dev);
-	if (ret)
-		return ret;
-
-	return crb_pm_runtime_suspend(dev);
-}
-
-static int __maybe_unused crb_pm_resume(struct device *dev)
-{
-	int ret;
-
-	ret = crb_pm_runtime_resume(dev);
-	if (ret)
-		return ret;
-
-	return tpm_pm_resume(dev);
-}
-
 static const struct dev_pm_ops crb_pm = {
-	SET_SYSTEM_SLEEP_PM_OPS(crb_pm_suspend, crb_pm_resume)
-	SET_RUNTIME_PM_OPS(crb_pm_runtime_suspend, crb_pm_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tpm_pm_suspend, tpm_pm_resume)
 };
 
 static const struct acpi_device_id crb_device_ids[] = {
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 06639fb6ab85..8eb5e5ebe136 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -43,6 +43,8 @@ struct tpm_class_ops {
 	u8 (*status) (struct tpm_chip *chip);
 	bool (*update_timeouts)(struct tpm_chip *chip,
 				unsigned long *timeout_cap);
+	int (*go_idle)(struct tpm_chip *chip);
+	int (*cmd_ready)(struct tpm_chip *chip);
 	int (*request_locality)(struct tpm_chip *chip, int loc);
 	int (*relinquish_locality)(struct tpm_chip *chip, int loc);
 	void (*clk_enable)(struct tpm_chip *chip, bool value);
-- 
cgit v1.2.3


From aaae81536351f831fe6719e5b4e6afbadc5e5f85 Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.vnet.ibm.com>
Date: Tue, 26 Jun 2018 15:09:30 -0400
Subject: tpm: Implement tpm_default_chip() to find a TPM chip

Implement tpm_default_chip() to find the first TPM chip and return it to
the caller while increasing the reference count on its device. This
function can be used by other subsystems, such as IMA, to find the system's
default TPM chip and use it for all subsequent TPM operations.

Signed-off-by: Stefan Berger <stefanb@linux.vnet.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-chip.c | 27 +++++++++++++++++++++++++++
 include/linux/tpm.h         |  5 +++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 242b716aed5e..f551061262c9 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -80,6 +80,33 @@ void tpm_put_ops(struct tpm_chip *chip)
 }
 EXPORT_SYMBOL_GPL(tpm_put_ops);
 
+/**
+ * tpm_default_chip() - find a TPM chip and get a reference to it
+ */
+struct tpm_chip *tpm_default_chip(void)
+{
+	struct tpm_chip *chip, *res = NULL;
+	int chip_num = 0;
+	int chip_prev;
+
+	mutex_lock(&idr_lock);
+
+	do {
+		chip_prev = chip_num;
+		chip = idr_get_next(&dev_nums_idr, &chip_num);
+		if (chip) {
+			get_device(&chip->dev);
+			res = chip;
+			break;
+		}
+	} while (chip_prev != chip_num);
+
+	mutex_unlock(&idr_lock);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(tpm_default_chip);
+
 /**
  * tpm_find_get_ops() - find and reserve a TPM chip
  * @chip:	a &struct tpm_chip instance, %NULL for the default chip
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 8eb5e5ebe136..4609b94142d4 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -63,6 +63,7 @@ extern int tpm_seal_trusted(struct tpm_chip *chip,
 extern int tpm_unseal_trusted(struct tpm_chip *chip,
 			      struct trusted_key_payload *payload,
 			      struct trusted_key_options *options);
+extern struct tpm_chip *tpm_default_chip(void);
 #else
 static inline int tpm_is_tpm2(struct tpm_chip *chip)
 {
@@ -98,5 +99,9 @@ static inline int tpm_unseal_trusted(struct tpm_chip *chip,
 {
 	return -ENODEV;
 }
+static inline struct tpm_chip *tpm_default_chip(void)
+{
+	return NULL;
+}
 #endif
 #endif
-- 
cgit v1.2.3


From e15d54d5009688ccb2a5312f3b70d631615329c9 Mon Sep 17 00:00:00 2001
From: Yunlei He <heyunlei@huawei.com>
Date: Wed, 27 Jun 2018 14:46:21 +0800
Subject: f2fs: Allocate and stat mem used by free nid bitmap more accurately

This patch used f2fs_bitmap_size macro to calculate mem used by
free nid bitmap, and stat used mem including aligned part.

Signed-off-by: Yunlei He <heyunlei@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c         | 3 ++-
 fs/f2fs/node.c          | 2 +-
 include/linux/f2fs_fs.h | 5 -----
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 2d65e77ae5cf..214a968962a1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -215,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	si->base_mem += sizeof(struct f2fs_nm_info);
 	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
 	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
-	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+	si->base_mem += NM_I(sbi)->nat_blocks *
+				f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
 	si->base_mem += NM_I(sbi)->nat_blocks / 8;
 	si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8e58990b9120..142b34130749 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2797,7 +2797,7 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
 
 	for (i = 0; i < nm_i->nat_blocks; i++) {
 		nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
-				NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
+			f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
 		if (!nm_i->free_nid_bitmap)
 			return -ENOMEM;
 	}
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index aa5db8b5521a..f70f8ac9c4f4 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -304,11 +304,6 @@ struct f2fs_node {
  * For NAT entries
  */
 #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
-#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)
-#define NAT_ENTRY_BITMAP_SIZE_ALIGNED				\
-	((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) /		\
-	BITS_PER_LONG * BITS_PER_LONG)
-
 
 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */
-- 
cgit v1.2.3


From 5cbf777cfdf6e5a7b7149006e4881a255da78fdd Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 27 Jul 2018 16:37:28 +0800
Subject: route: add support for directed broadcast forwarding

This patch implements the feature described in rfc1812#section-5.3.5.2
and rfc2644. It allows the router to forward directed broadcast when
sysctl bc_forwarding is enabled.

Note that this feature could be done by iptables -j TEE, but it would
cause some problems:
  - target TEE's gateway param has to be set with a specific address,
    and it's not flexible especially when the route wants forward all
    directed broadcasts.
  - this duplicates the directed broadcasts so this may cause side
    effects to applications.

Besides, to keep consistent with other os router like BSD, it's also
necessary to implement it in the route rx path.

Note that route cache needs to be flushed when bc_forwarding is
changed.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h   |  1 +
 include/uapi/linux/ip.h      |  1 +
 include/uapi/linux/netconf.h |  1 +
 net/ipv4/devinet.c           | 11 +++++++++++
 net/ipv4/route.c             |  6 +++++-
 5 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 27650f1bff3d..c759d1cbcedd 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -93,6 +93,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
+#define IN_DEV_BFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), BC_FORWARDING)
 #define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
 #define IN_DEV_SRC_VMARK(in_dev)    	IN_DEV_ORCONF((in_dev), SRC_VMARK)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index b24a742beae5..e42d13b55cf3 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -168,6 +168,7 @@ enum
 	IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
 	IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	IPV4_DEVCONF_DROP_GRATUITOUS_ARP,
+	IPV4_DEVCONF_BC_FORWARDING,
 	__IPV4_DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h
index c84fcdfca862..fac4edd55379 100644
--- a/include/uapi/linux/netconf.h
+++ b/include/uapi/linux/netconf.h
@@ -18,6 +18,7 @@ enum {
 	NETCONFA_PROXY_NEIGH,
 	NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
 	NETCONFA_INPUT,
+	NETCONFA_BC_FORWARDING,
 	__NETCONFA_MAX
 };
 #define NETCONFA_MAX	(__NETCONFA_MAX - 1)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d7585ab1a77a..ea4bd8a52422 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1827,6 +1827,8 @@ static int inet_netconf_msgsize_devconf(int type)
 		size += nla_total_size(4);
 	if (all || type == NETCONFA_MC_FORWARDING)
 		size += nla_total_size(4);
+	if (all || type == NETCONFA_BC_FORWARDING)
+		size += nla_total_size(4);
 	if (all || type == NETCONFA_PROXY_NEIGH)
 		size += nla_total_size(4);
 	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
@@ -1873,6 +1875,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
 			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
 		goto nla_put_failure;
+	if ((all || type == NETCONFA_BC_FORWARDING) &&
+	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
+			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
+		goto nla_put_failure;
 	if ((all || type == NETCONFA_PROXY_NEIGH) &&
 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
@@ -2143,6 +2149,10 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
 			if ((new_value == 0) && (old_value != 0))
 				rt_cache_flush(net);
 
+		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
+		    new_value != old_value)
+			rt_cache_flush(net);
+
 		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
 		    new_value != old_value) {
 			ifindex = devinet_conf_ifindex(net, cnf);
@@ -2259,6 +2269,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
 					     devinet_sysctl_forward),
 		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
+		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
 
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
 		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1df6e97106d7..b678466da451 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1996,8 +1996,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto no_route;
 	}
 
-	if (res->type == RTN_BROADCAST)
+	if (res->type == RTN_BROADCAST) {
+		if (IN_DEV_BFORWARD(in_dev))
+			goto make_route;
 		goto brd_input;
+	}
 
 	if (res->type == RTN_LOCAL) {
 		err = fib_validate_source(skb, saddr, daddr, tos,
@@ -2014,6 +2017,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (res->type != RTN_UNICAST)
 		goto martian_destination;
 
+make_route:
 	err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
 out:	return err;
 
-- 
cgit v1.2.3


From 4b09384aaa2a9b2ac09a584d7a9345cf003617f2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 27 Jul 2018 13:11:00 -0700
Subject: net: dcb: add DSCP to comment about priority selector types

Commit ee2059819450 ("net/dcb: Add dscp to priority selector type")
added a define for the new DSCP selector type created by
IEEE 802.1Qcd, but missed the comment enumerating all selector types.
Update the comment.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/dcbnl.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index 60aa2e446698..69df19aa8e72 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -233,7 +233,8 @@ struct cee_pfc {
  *	2	Well known port number over TCP or SCTP
  *	3	Well known port number over UDP or DCCP
  *	4	Well known port number over TCP, SCTP, UDP, or DCCP
- *	5-7	Reserved
+ *	5	Differentiated Services Code Point (DSCP) value
+ *	6-7	Reserved
  *
  *  Selector field values for CEE
  *	0	Ethertype
-- 
cgit v1.2.3


From 3e7a50ceb11ea75c27e944f1a01e478fd62a2d8d Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 27 Jul 2018 13:43:22 -0700
Subject: net: report min and max mtu network device settings

Report the minimum and maximum MTU allowed on a device
via netlink so that it can be displayed by tools like
ip link.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h | 2 ++
 net/core/rtnetlink.c         | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 553c438cabe3..43391e2d1153 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -164,6 +164,8 @@ enum {
 	IFLA_CARRIER_UP_COUNT,
 	IFLA_CARRIER_DOWN_COUNT,
 	IFLA_NEW_IFINDEX,
+	IFLA_MIN_MTU,
+	IFLA_MAX_MTU,
 	__IFLA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 92b6fa5d5f6e..510d4f765a13 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1015,6 +1015,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(4)  /* IFLA_IF_NETNSID */
 	       + nla_total_size(4)  /* IFLA_CARRIER_UP_COUNT */
 	       + nla_total_size(4)  /* IFLA_CARRIER_DOWN_COUNT */
+	       + nla_total_size(4)  /* IFLA_MIN_MTU */
+	       + nla_total_size(4)  /* IFLA_MAX_MTU */
 	       + 0;
 }
 
@@ -1601,6 +1603,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
 		       netif_running(dev) ? dev->operstate : IF_OPER_DOWN) ||
 	    nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) ||
 	    nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
+	    nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) ||
+	    nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) ||
 	    nla_put_u32(skb, IFLA_GROUP, dev->group) ||
 	    nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) ||
 	    nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) ||
@@ -1732,6 +1736,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_IF_NETNSID]	= { .type = NLA_S32 },
 	[IFLA_CARRIER_UP_COUNT]	= { .type = NLA_U32 },
 	[IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
+	[IFLA_MIN_MTU]		= { .type = NLA_U32 },
+	[IFLA_MAX_MTU]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
-- 
cgit v1.2.3


From 7a4c53bee3324ac00bf964aa2f82d15d279e86e4 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 27 Jul 2018 13:43:23 -0700
Subject: net: report invalid mtu value via netlink extack

If an invalid MTU value is set through rtnetlink return extra error
information instead of putting message in kernel log. For other cases
where there is no visible API, keep the error report in the log.

Example:
	# ip li set dev enp12s0 mtu 10000
	Error: mtu greater than device maximum.

	# ifconfig enp12s0 mtu 10000
	SIOCSIFMTU: Invalid argument
	# dmesg | tail -1
	[ 2047.795467] enp12s0: mtu greater than device maximum

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 23 +++++++++++++++++------
 net/core/rtnetlink.c      |  2 +-
 3 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c1295c7a452e..9c917467a2c7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3546,6 +3546,8 @@ int dev_set_alias(struct net_device *, const char *, size_t);
 int dev_get_alias(const struct net_device *, char *, size_t);
 int dev_change_net_namespace(struct net_device *, struct net *, const char *);
 int __dev_set_mtu(struct net_device *, int);
+int dev_set_mtu_ext(struct net_device *dev, int mtu,
+		    struct netlink_ext_ack *extack);
 int dev_set_mtu(struct net_device *, int);
 int dev_change_tx_queue_len(struct net_device *, unsigned long);
 void dev_set_group(struct net_device *, int);
diff --git a/net/core/dev.c b/net/core/dev.c
index 87c42c8249ae..89031b5fef9f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7523,13 +7523,15 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu)
 EXPORT_SYMBOL(__dev_set_mtu);
 
 /**
- *	dev_set_mtu - Change maximum transfer unit
+ *	dev_set_mtu_ext - Change maximum transfer unit
  *	@dev: device
  *	@new_mtu: new transfer unit
+ *	@extack: netlink extended ack
  *
  *	Change the maximum transfer size of the network device.
  */
-int dev_set_mtu(struct net_device *dev, int new_mtu)
+int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
+		    struct netlink_ext_ack *extack)
 {
 	int err, orig_mtu;
 
@@ -7538,14 +7540,12 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 
 	/* MTU must be positive, and in range */
 	if (new_mtu < 0 || new_mtu < dev->min_mtu) {
-		net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n",
-				    dev->name, new_mtu, dev->min_mtu);
+		NL_SET_ERR_MSG(extack, "mtu less than device minimum");
 		return -EINVAL;
 	}
 
 	if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
-		net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n",
-				    dev->name, new_mtu, dev->max_mtu);
+		NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
 		return -EINVAL;
 	}
 
@@ -7573,6 +7573,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 	}
 	return err;
 }
+
+int dev_set_mtu(struct net_device *dev, int new_mtu)
+{
+	struct netlink_ext_ack extack;
+	int err;
+
+	err = dev_set_mtu_ext(dev, new_mtu, &extack);
+	if (err)
+		net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
+	return err;
+}
 EXPORT_SYMBOL(dev_set_mtu);
 
 /**
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 510d4f765a13..24431e578310 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2382,7 +2382,7 @@ static int do_setlink(const struct sk_buff *skb,
 	}
 
 	if (tb[IFLA_MTU]) {
-		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+		err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack);
 		if (err < 0)
 			goto errout;
 		status |= DO_SETLINK_MODIFIED;
-- 
cgit v1.2.3


From 0969a204bfdaf7470d2666736b90a8595ae671e9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 27 Jul 2018 17:47:01 +0300
Subject: gpiolib: Use GPIOD_OUT_{LOW,HIGH} macros in open drain ones

There should not be anything more than stated by the name of newly
introduced constants, i.e.
	GPIOD_OUT_LOW_OPEN_DRAIN == GPIOD_OUT_LOW + open drain
and nothing more.

Make it better to read and slightly more robust by using GPIOD_OUT_LOW
and GPIOD_OUT_HIGH constants with open drain flag.

No functional change intended.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index e8aaf34dd65d..21ddbe440030 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -41,11 +41,8 @@ enum gpiod_flags {
 	GPIOD_OUT_LOW	= GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT,
 	GPIOD_OUT_HIGH	= GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT |
 			  GPIOD_FLAGS_BIT_DIR_VAL,
-	GPIOD_OUT_LOW_OPEN_DRAIN = GPIOD_FLAGS_BIT_DIR_SET |
-			  GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_OPEN_DRAIN,
-	GPIOD_OUT_HIGH_OPEN_DRAIN = GPIOD_FLAGS_BIT_DIR_SET |
-			  GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_DIR_VAL |
-			  GPIOD_FLAGS_BIT_OPEN_DRAIN,
+	GPIOD_OUT_LOW_OPEN_DRAIN = GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_OPEN_DRAIN,
+	GPIOD_OUT_HIGH_OPEN_DRAIN = GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_OPEN_DRAIN,
 };
 
 #ifdef CONFIG_GPIOLIB
-- 
cgit v1.2.3


From f39b948dbeaf9da0dfd17e68704f38fe4237788f Mon Sep 17 00:00:00 2001
From: Huang Shijie <sjhuang@iluvatar.ai>
Date: Thu, 26 Jul 2018 14:45:53 +0800
Subject: dmaengine: add a new helper dmaenginem_async_device_register

This patch adds the dmaenginem_async_device_register for DMA code.
Use the Devres to call the release for the DMA engine driver.

Signed-off-by: Huang Shijie <sjhuang@iluvatar.ai>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/driver-model/devres.txt |  1 +
 drivers/dma/dmaengine.c               | 35 +++++++++++++++++++++++++++++++++++
 include/linux/dmaengine.h             |  1 +
 3 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 7c1bb3d0c222..43681ca0837f 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -240,6 +240,7 @@ CLOCK
   devm_of_clk_add_hw_provider()
 
 DMA
+  dmaenginem_async_device_register()
   dmam_alloc_coherent()
   dmam_alloc_attrs()
   dmam_declare_coherent_memory()
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 504420f213ff..272bed6c8ba7 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1141,6 +1141,41 @@ void dma_async_device_unregister(struct dma_device *device)
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
+static void dmam_device_release(struct device *dev, void *res)
+{
+	struct dma_device *device;
+
+	device = *(struct dma_device **)res;
+	dma_async_device_unregister(device);
+}
+
+/**
+ * dmaenginem_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ *
+ * The operation is managed and will be undone on driver detach.
+ */
+int dmaenginem_async_device_register(struct dma_device *device)
+{
+	void *p;
+	int ret;
+
+	p = devres_alloc(dmam_device_release, sizeof(void *), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	ret = dma_async_device_register(device);
+	if (!ret) {
+		*(struct dma_device **)p = device;
+		devres_add(device->dev, p);
+	} else {
+		devres_free(p);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(dmaenginem_async_device_register);
+
 struct dmaengine_unmap_pool {
 	struct kmem_cache *cache;
 	const char *name;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c8c3a7a93802..d49ec5c31944 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -1406,6 +1406,7 @@ static inline int dmaengine_desc_free(struct dma_async_tx_descriptor *desc)
 /* --- DMA device --- */
 
 int dma_async_device_register(struct dma_device *device);
+int dmaenginem_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
-- 
cgit v1.2.3


From 222440b4e832059c0ddf18d1e409f0552ab53a7d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 4 Jul 2018 12:48:04 +0200
Subject: netfilter: nf_tables: handle meta/lookup with direct call

Currently nft uses inlined variants for common operations
such as 'ip saddr 1.2.3.4' instead of an indirect call.

Also handle meta get operations and lookups without indirect call,
both are builtin.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h |  7 +++++++
 net/netfilter/nf_tables_core.c         | 16 +++++++++++++++-
 net/netfilter/nft_lookup.c             |  6 +++---
 net/netfilter/nft_meta.c               |  6 +++---
 4 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a05134507e7b..8da837d2aaf9 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -71,4 +71,11 @@ extern struct nft_set_type nft_set_hash_fast_type;
 extern struct nft_set_type nft_set_rbtree_type;
 extern struct nft_set_type nft_set_bitmap_type;
 
+struct nft_expr;
+struct nft_regs;
+struct nft_pktinfo;
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_lookup_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
 #endif /* _NET_NF_TABLES_CORE_H */
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 8de912ca53d3..ffd5c0f9412b 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -120,6 +120,20 @@ struct nft_jumpstack {
 	struct nft_rule	*const *rules;
 };
 
+static void expr_call_ops_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       struct nft_pktinfo *pkt)
+{
+	unsigned long e = (unsigned long)expr->ops->eval;
+
+	if (e == (unsigned long)nft_meta_get_eval)
+		nft_meta_get_eval(expr, regs, pkt);
+	else if (e == (unsigned long)nft_lookup_eval)
+		nft_lookup_eval(expr, regs, pkt);
+	else
+		expr->ops->eval(expr, regs, pkt);
+}
+
 unsigned int
 nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 {
@@ -153,7 +167,7 @@ next_rule:
 				nft_cmp_fast_eval(expr, &regs);
 			else if (expr->ops != &nft_payload_fast_ops ||
 				 !nft_payload_fast_eval(expr, &regs, pkt))
-				expr->ops->eval(expr, &regs, pkt);
+				expr_call_ops_eval(expr, &regs, pkt);
 
 			if (regs.verdict.code != NFT_CONTINUE)
 				break;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index c2a1d84cdfc4..ad13e8643599 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -26,9 +26,9 @@ struct nft_lookup {
 	struct nft_set_binding		binding;
 };
 
-static void nft_lookup_eval(const struct nft_expr *expr,
-			    struct nft_regs *regs,
-			    const struct nft_pktinfo *pkt)
+void nft_lookup_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs,
+		     const struct nft_pktinfo *pkt)
 {
 	const struct nft_lookup *priv = nft_expr_priv(expr);
 	const struct nft_set *set = priv->set;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 2b94dcc43456..297fe7d97c18 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -41,9 +41,9 @@ static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
 #include "../bridge/br_private.h"
 #endif
 
-static void nft_meta_get_eval(const struct nft_expr *expr,
-			      struct nft_regs *regs,
-			      const struct nft_pktinfo *pkt)
+void nft_meta_get_eval(const struct nft_expr *expr,
+		       struct nft_regs *regs,
+		       const struct nft_pktinfo *pkt)
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
-- 
cgit v1.2.3


From 345a9ca37aa69bb3133c1a8390a71f993abcef0c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 30 Jul 2018 08:46:40 +0200
Subject: ALSA: memalloc: Fix missing PAGE_SIZE definition

The recent fix moved the inline snd_sgbuf_aligned_pages() outside the
ifdef, and this triggered a build error on some architectures due to
the undefined PAGE_SIZE, as spotted by 0day bot.
Fix it by adding the missing header inclusion.

Fixes: 4cae99d9b530 ("ALSA: memalloc: declare snd_sgbuf_aligned_pages() unconditionally")
Reported-by: kbuild test robot <lkp@intel.com>
Cc: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/memalloc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index c669900e6cbe..67561b997915 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -24,6 +24,8 @@
 #ifndef __SOUND_MEMALLOC_H
 #define __SOUND_MEMALLOC_H
 
+#include <asm/page.h>
+
 struct device;
 
 /*
-- 
cgit v1.2.3


From 6decb5b45e70d6ffff6488cc8e8bad6b9ac7f99b Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:32 +0530
Subject: Bluetooth: Define PHY flags in hdev and set 1M as default

1M is mandatory to be supported by LE controllers and the same
would be set in power on. This patch defines hdev flags for
LE PHYs and set 1M to default.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 4 ++++
 include/net/bluetooth/hci_core.h | 3 +++
 net/bluetooth/hci_core.c         | 9 +++++----
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 73e48be5bbb3..664fe1ebf2c7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1514,6 +1514,10 @@ struct hci_cp_le_set_default_phy {
 	__u8    rx_phys;
 } __packed;
 
+#define HCI_LE_SET_PHY_1M		0x01
+#define HCI_LE_SET_PHY_2M		0x02
+#define HCI_LE_SET_PHY_CODED		0x04
+
 #define HCI_OP_LE_SET_EXT_SCAN_PARAMS   0x2041
 struct hci_cp_le_set_ext_scan_params {
 	__u8    own_addr_type;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a74453571264..71f79df9ee05 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -315,6 +315,9 @@ struct hci_dev {
 	unsigned long	sco_last_tx;
 	unsigned long	le_last_tx;
 
+	__u8		le_tx_def_phys;
+	__u8		le_rx_def_phys;
+
 	struct workqueue_struct	*workqueue;
 	struct workqueue_struct	*req_workqueue;
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index f5c21004186c..432f89f390c0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -830,10 +830,9 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
 	if (hdev->commands[35] & 0x20) {
 		struct hci_cp_le_set_default_phy cp;
 
-		/* No transmitter PHY or receiver PHY preferences */
-		cp.all_phys = 0x03;
-		cp.tx_phys = 0;
-		cp.rx_phys = 0;
+		cp.all_phys = 0x00;
+		cp.tx_phys = hdev->le_tx_def_phys;
+		cp.rx_phys = hdev->le_rx_def_phys;
 
 		hci_req_add(req, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp), &cp);
 	}
@@ -3027,6 +3026,8 @@ struct hci_dev *hci_alloc_dev(void)
 	hdev->le_max_tx_time = 0x0148;
 	hdev->le_max_rx_len = 0x001b;
 	hdev->le_max_rx_time = 0x0148;
+	hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M;
+	hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M;
 
 	hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
 	hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
-- 
cgit v1.2.3


From 5075b972f20ddad5bb19542ea4f5794d06673375 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:33 +0530
Subject: Bluetooth: Add defines for BREDR pkt_type and LE PHYs

This also add macros for checking LMP support for different
pkt_types

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 14 ++++++++++++++
 include/net/bluetooth/hci_core.h |  4 ++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 664fe1ebf2c7..89bf800f6eb1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -291,6 +291,14 @@ enum {
 #define HCI_DH3		0x0800
 #define HCI_DH5		0x8000
 
+/* HCI packet types inverted masks */
+#define HCI_2DH1	0x0002
+#define HCI_3DH1	0x0004
+#define HCI_2DH3	0x0100
+#define HCI_3DH3	0x0200
+#define HCI_2DH5	0x1000
+#define HCI_3DH5	0x2000
+
 #define HCI_HV1		0x0020
 #define HCI_HV2		0x0040
 #define HCI_HV3		0x0080
@@ -354,6 +362,8 @@ enum {
 #define LMP_PCONTROL	0x04
 #define LMP_TRANSPARENT	0x08
 
+#define LMP_EDR_2M		0x02
+#define LMP_EDR_3M		0x04
 #define LMP_RSSI_INQ	0x40
 #define LMP_ESCO	0x80
 
@@ -361,7 +371,9 @@ enum {
 #define LMP_EV5		0x02
 #define LMP_NO_BREDR	0x20
 #define LMP_LE		0x40
+#define LMP_EDR_3SLOT	0x80
 
+#define LMP_EDR_5SLOT	0x01
 #define LMP_SNIFF_SUBR	0x02
 #define LMP_PAUSE_ENC	0x04
 #define LMP_EDR_ESCO_2M	0x20
@@ -399,6 +411,8 @@ enum {
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
 #define HCI_LE_EXT_SCAN_POLICY		0x80
+#define HCI_LE_PHY_2M			0x01
+#define HCI_LE_PHY_CODED		0x08
 #define HCI_LE_CHAN_SEL_ALG2		0x40
 
 /* Connection modes */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 71f79df9ee05..a64d13f91d09 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1141,6 +1141,10 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define lmp_inq_tx_pwr_capable(dev) ((dev)->features[0][7] & LMP_INQ_TX_PWR)
 #define lmp_ext_feat_capable(dev)  ((dev)->features[0][7] & LMP_EXTFEATURES)
 #define lmp_transp_capable(dev)    ((dev)->features[0][2] & LMP_TRANSPARENT)
+#define lmp_edr_2m_capable(dev)    ((dev)->features[0][3] & LMP_EDR_2M)
+#define lmp_edr_3m_capable(dev)    ((dev)->features[0][3] & LMP_EDR_3M)
+#define lmp_edr_3slot_capable(dev) ((dev)->features[0][4] & LMP_EDR_3SLOT)
+#define lmp_edr_5slot_capable(dev) ((dev)->features[0][5] & LMP_EDR_5SLOT)
 
 /* ----- Extended LMP capabilities ----- */
 #define lmp_csb_master_capable(dev) ((dev)->features[2][0] & LMP_CSB_MASTER)
-- 
cgit v1.2.3


From 6244691fec4dd0adebca255e60e0ed7ac8155b2e Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:34 +0530
Subject: Bluetooth: Implement Get PHY Configuration mgmt command

This commands basically retrieve the supported packet types of
BREDR and supported PHYs of the controller.

BR_1M_1SLOT, LE_1M_TX and LE_1M_RX would be supported by default.
Other PHYs are supported based on the local features.

Also this sets PHY_CONFIGURATION bit in supported settings.

@ MGMT Command: Get PHY Configuration (0x0044) plen 0
@ MGMT Event: Command Complete (0x0001) plen 15
      Get PHY Configuration (0x0044) plen 12
        Status: Success (0x00)
        Supported PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
        Configurable PHYs: 0x79fe
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
        Selected PHYs: 0x07ff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h |  25 ++++++++
 net/bluetooth/mgmt.c         | 145 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index e7303eee65cd..1c93d6e83a6c 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -101,6 +101,7 @@ struct mgmt_rp_read_index_list {
 #define MGMT_SETTING_PRIVACY		0x00002000
 #define MGMT_SETTING_CONFIGURATION	0x00004000
 #define MGMT_SETTING_STATIC_ADDRESS	0x00008000
+#define MGMT_SETTING_PHY_CONFIGURATION  0x00010000
 
 #define MGMT_OP_READ_INFO		0x0004
 #define MGMT_READ_INFO_SIZE		0
@@ -604,6 +605,30 @@ struct mgmt_cp_set_appearance {
 } __packed;
 #define MGMT_SET_APPEARANCE_SIZE	2
 
+#define MGMT_OP_GET_PHY_CONFIGURATION	0x0044
+struct mgmt_rp_get_phy_confguration {
+	__le32	supported_phys;
+	__le32	configurable_phys;
+	__le32	selected_phys;
+} __packed;
+#define MGMT_GET_PHY_CONFIGURATION_SIZE	0
+
+#define MGMT_PHY_BR_1M_1SLOT	0x00000001
+#define MGMT_PHY_BR_1M_3SLOT	0x00000002
+#define MGMT_PHY_BR_1M_5SLOT	0x00000004
+#define MGMT_PHY_EDR_2M_1SLOT	0x00000008
+#define MGMT_PHY_EDR_2M_3SLOT	0x00000010
+#define MGMT_PHY_EDR_2M_5SLOT	0x00000020
+#define MGMT_PHY_EDR_3M_1SLOT	0x00000040
+#define MGMT_PHY_EDR_3M_3SLOT	0x00000080
+#define MGMT_PHY_EDR_3M_5SLOT	0x00000100
+#define MGMT_PHY_LE_1M_TX		0x00000200
+#define MGMT_PHY_LE_1M_RX		0x00000400
+#define MGMT_PHY_LE_2M_TX		0x00000800
+#define MGMT_PHY_LE_2M_RX		0x00001000
+#define MGMT_PHY_LE_CODED_TX	0x00002000
+#define MGMT_PHY_LE_CODED_RX	0x00004000
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 8a80d48d89c4..c8c3b39fa9f2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -617,6 +617,127 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev,
 				 &rp, sizeof(rp));
 }
 
+static u32 get_supported_phys(struct hci_dev *hdev)
+{
+	u32 supported_phys = 0;
+
+	if (lmp_bredr_capable(hdev)) {
+		supported_phys |= MGMT_PHY_BR_1M_1SLOT;
+
+		if (hdev->features[0][0] & LMP_3SLOT)
+			supported_phys |= MGMT_PHY_BR_1M_3SLOT;
+
+		if (hdev->features[0][0] & LMP_5SLOT)
+			supported_phys |= MGMT_PHY_BR_1M_5SLOT;
+
+		if (lmp_edr_2m_capable(hdev)) {
+			supported_phys |= MGMT_PHY_EDR_2M_1SLOT;
+
+			if (lmp_edr_3slot_capable(hdev))
+				supported_phys |= MGMT_PHY_EDR_2M_3SLOT;
+
+			if (lmp_edr_5slot_capable(hdev))
+				supported_phys |= MGMT_PHY_EDR_2M_5SLOT;
+
+			if (lmp_edr_3m_capable(hdev)) {
+				supported_phys |= MGMT_PHY_EDR_3M_1SLOT;
+
+				if (lmp_edr_3slot_capable(hdev))
+					supported_phys |= MGMT_PHY_EDR_3M_3SLOT;
+
+				if (lmp_edr_5slot_capable(hdev))
+					supported_phys |= MGMT_PHY_EDR_3M_5SLOT;
+			}
+		}
+	}
+
+	if (lmp_le_capable(hdev)) {
+		supported_phys |= MGMT_PHY_LE_1M_TX;
+		supported_phys |= MGMT_PHY_LE_1M_RX;
+
+		if (hdev->le_features[1] & HCI_LE_PHY_2M) {
+			supported_phys |= MGMT_PHY_LE_2M_TX;
+			supported_phys |= MGMT_PHY_LE_2M_RX;
+		}
+
+		if (hdev->le_features[1] & HCI_LE_PHY_CODED) {
+			supported_phys |= MGMT_PHY_LE_CODED_TX;
+			supported_phys |= MGMT_PHY_LE_CODED_RX;
+		}
+	}
+
+	return supported_phys;
+}
+
+static u32 get_selected_phys(struct hci_dev *hdev)
+{
+	u32 selected_phys = 0;
+
+	if (lmp_bredr_capable(hdev)) {
+		selected_phys |= MGMT_PHY_BR_1M_1SLOT;
+
+		if (hdev->pkt_type & (HCI_DM3 | HCI_DH3))
+			selected_phys |= MGMT_PHY_BR_1M_3SLOT;
+
+		if (hdev->pkt_type & (HCI_DM5 | HCI_DH5))
+			selected_phys |= MGMT_PHY_BR_1M_5SLOT;
+
+		if (lmp_edr_2m_capable(hdev)) {
+			if (!(hdev->pkt_type & HCI_2DH1))
+				selected_phys |= MGMT_PHY_EDR_2M_1SLOT;
+
+			if (lmp_edr_3slot_capable(hdev) &&
+			    !(hdev->pkt_type & HCI_2DH3))
+				selected_phys |= MGMT_PHY_EDR_2M_3SLOT;
+
+			if (lmp_edr_5slot_capable(hdev) &&
+			    !(hdev->pkt_type & HCI_2DH5))
+				selected_phys |= MGMT_PHY_EDR_2M_5SLOT;
+
+			if (lmp_edr_3m_capable(hdev)) {
+				if (!(hdev->pkt_type & HCI_3DH1))
+					selected_phys |= MGMT_PHY_EDR_3M_1SLOT;
+
+				if (lmp_edr_3slot_capable(hdev) &&
+				    !(hdev->pkt_type & HCI_3DH3))
+					selected_phys |= MGMT_PHY_EDR_3M_3SLOT;
+
+				if (lmp_edr_5slot_capable(hdev) &&
+				    !(hdev->pkt_type & HCI_3DH5))
+					selected_phys |= MGMT_PHY_EDR_3M_5SLOT;
+			}
+		}
+	}
+
+	if (lmp_le_capable(hdev)) {
+		if (hdev->le_tx_def_phys & HCI_LE_SET_PHY_1M)
+			selected_phys |= MGMT_PHY_LE_1M_TX;
+
+		if (hdev->le_rx_def_phys & HCI_LE_SET_PHY_1M)
+			selected_phys |= MGMT_PHY_LE_1M_RX;
+
+		if (hdev->le_tx_def_phys & HCI_LE_SET_PHY_2M)
+			selected_phys |= MGMT_PHY_LE_2M_TX;
+
+		if (hdev->le_rx_def_phys & HCI_LE_SET_PHY_2M)
+			selected_phys |= MGMT_PHY_LE_2M_RX;
+
+		if (hdev->le_tx_def_phys & HCI_LE_SET_PHY_CODED)
+			selected_phys |= MGMT_PHY_LE_CODED_TX;
+
+		if (hdev->le_rx_def_phys & HCI_LE_SET_PHY_CODED)
+			selected_phys |= MGMT_PHY_LE_CODED_RX;
+	}
+
+	return selected_phys;
+}
+
+static u32 get_configurable_phys(struct hci_dev *hdev)
+{
+	return (get_supported_phys(hdev) & ~MGMT_PHY_BR_1M_1SLOT &
+		~MGMT_PHY_LE_1M_TX & ~MGMT_PHY_LE_1M_RX);
+}
+
 static u32 get_supported_settings(struct hci_dev *hdev)
 {
 	u32 settings = 0;
@@ -654,6 +775,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
 	    hdev->set_bdaddr)
 		settings |= MGMT_SETTING_CONFIGURATION;
 
+	settings |= MGMT_SETTING_PHY_CONFIGURATION;
+
 	return settings;
 }
 
@@ -3184,6 +3307,27 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data,
 	return err;
 }
 
+static int get_phy_configuration(struct sock *sk, struct hci_dev *hdev,
+				 void *data, u16 len)
+{
+	struct mgmt_rp_get_phy_confguration rp;
+
+	BT_DBG("sock %p %s", sk, hdev->name);
+
+	hci_dev_lock(hdev);
+
+	memset(&rp, 0, sizeof(rp));
+
+	rp.supported_phys = cpu_to_le32(get_supported_phys(hdev));
+	rp.selected_phys = cpu_to_le32(get_selected_phys(hdev));
+	rp.configurable_phys = cpu_to_le32(get_configurable_phys(hdev));
+
+	hci_dev_unlock(hdev);
+
+	return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_PHY_CONFIGURATION, 0,
+				 &rp, sizeof(rp));
+}
+
 static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
 				         u16 opcode, struct sk_buff *skb)
 {
@@ -6544,6 +6688,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE,
 						HCI_MGMT_UNTRUSTED },
 	{ set_appearance,	   MGMT_SET_APPEARANCE_SIZE },
+	{ get_phy_configuration,   MGMT_GET_PHY_CONFIGURATION_SIZE },
 };
 
 void mgmt_index_added(struct hci_dev *hdev)
-- 
cgit v1.2.3


From 0314f2867fa0c46d0fc1c23c80e7fab9435079df Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:35 +0530
Subject: Bluetooth: Implement Set PHY Confguration command

This enables user to set phys which will be used in all subsequent
connections. Also host will use the same in LE scanning as well.

@ MGMT Command: Set PHY Configuration (0x0045) plen 4
        Selected PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX
< HCI Command: LE Set Default PHY (0x08|0x0031) plen 3
        All PHYs preference: 0x00
        TX PHYs preference: 0x07
          LE 1M
          LE 2M
          LE Coded
        RX PHYs preference: 0x07
          LE 1M
          LE 2M
          LE Coded
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Default PHY (0x08|0x0031) ncmd 1
        Status: Success (0x00)
@ MGMT Event: Command Complete (0x0001) plen 3
      Set PHY Configuration (0x0045) plen 0
        Status: Success (0x00)
@ MGMT Event: PHY Configuration Changed (0x0026) plen 4
        Selected PHYs: 0x7fff
          BR 1M 1SLOT
          BR 1M 3SLOT
          BR 1M 5SLOT
          EDR 2M 1SLOT
          EDR 2M 3SLOT
          EDR 2M 5SLOT
          EDR 3M 1SLOT
          EDR 3M 3SLOT
          EDR 3M 5SLOT
          LE 1M TX
          LE 1M RX
          LE 2M TX
          LE 2M RX
          LE CODED TX
          LE CODED RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h |  19 +++++
 net/bluetooth/hci_event.c    |  26 +++++++
 net/bluetooth/mgmt.c         | 182 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 227 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 1c93d6e83a6c..0916e203e5d9 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -629,6 +629,25 @@ struct mgmt_rp_get_phy_confguration {
 #define MGMT_PHY_LE_CODED_TX	0x00002000
 #define MGMT_PHY_LE_CODED_RX	0x00004000
 
+#define MGMT_PHY_BREDR_MASK (MGMT_PHY_BR_1M_1SLOT | MGMT_PHY_BR_1M_3SLOT | \
+			     MGMT_PHY_BR_1M_5SLOT | MGMT_PHY_EDR_2M_1SLOT | \
+			     MGMT_PHY_EDR_2M_3SLOT | MGMT_PHY_EDR_2M_5SLOT | \
+			     MGMT_PHY_EDR_3M_1SLOT | MGMT_PHY_EDR_3M_3SLOT | \
+			     MGMT_PHY_EDR_3M_5SLOT)
+#define MGMT_PHY_LE_MASK (MGMT_PHY_LE_1M_TX | MGMT_PHY_LE_1M_RX | \
+			  MGMT_PHY_LE_2M_TX | MGMT_PHY_LE_2M_RX | \
+			  MGMT_PHY_LE_CODED_TX | MGMT_PHY_LE_CODED_RX)
+#define MGMT_PHY_LE_TX_MASK (MGMT_PHY_LE_1M_TX | MGMT_PHY_LE_2M_TX | \
+			     MGMT_PHY_LE_CODED_TX)
+#define MGMT_PHY_LE_RX_MASK (MGMT_PHY_LE_1M_RX | MGMT_PHY_LE_2M_RX | \
+			     MGMT_PHY_LE_CODED_RX)
+
+#define MGMT_OP_SET_PHY_CONFIGURATION	0x0045
+struct mgmt_cp_set_phy_confguration {
+	__le32	selected_phys;
+} __packed;
+#define MGMT_SET_PHY_CONFIGURATION_SIZE	4
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 68192152c23b..694231541a4c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1042,6 +1042,28 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_le_set_default_phy(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+	struct hci_cp_le_set_default_phy *cp;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_DEFAULT_PHY);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	hdev->le_tx_def_phys = cp->tx_phys;
+	hdev->le_rx_def_phys = cp->rx_phys;
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 *sent, status = *((__u8 *) skb->data);
@@ -3163,6 +3185,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_le_set_ext_scan_enable(hdev, skb);
 		break;
 
+	case HCI_OP_LE_SET_DEFAULT_PHY:
+		hci_cc_le_set_default_phy(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode);
 		break;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c8c3b39fa9f2..7cd6a37a63ee 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3328,6 +3328,187 @@ static int get_phy_configuration(struct sock *sk, struct hci_dev *hdev,
 				 &rp, sizeof(rp));
 }
 
+static void set_default_phy_complete(struct hci_dev *hdev, u8 status,
+				     u16 opcode, struct sk_buff *skb)
+{
+	struct mgmt_cp_set_phy_confguration *cp;
+	struct mgmt_pending_cmd *cmd;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	cmd = pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev);
+	if (!cmd)
+		goto unlock;
+
+	cp = cmd->param;
+
+	if (status) {
+		mgmt_cmd_status(cmd->sk, hdev->id,
+				MGMT_OP_SET_PHY_CONFIGURATION,
+				mgmt_status(status));
+	} else {
+		mgmt_cmd_complete(cmd->sk, hdev->id,
+				  MGMT_OP_SET_PHY_CONFIGURATION, 0,
+				  NULL, 0);
+	}
+
+	mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
+static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev,
+				 void *data, u16 len)
+{
+	struct mgmt_cp_set_phy_confguration *cp = data;
+	struct hci_cp_le_set_default_phy cp_phy;
+	struct mgmt_pending_cmd *cmd;
+	struct hci_request req;
+	u32 selected_phys, configurable_phys, supported_phys, unconfigure_phys;
+	u16 pkt_type = (HCI_DH1 | HCI_DM1);
+	int err;
+
+	BT_DBG("sock %p %s", sk, hdev->name);
+
+	configurable_phys = get_configurable_phys(hdev);
+	supported_phys = get_supported_phys(hdev);
+	selected_phys = __le32_to_cpu(cp->selected_phys);
+
+	if (selected_phys & ~supported_phys)
+		return mgmt_cmd_status(sk, hdev->id,
+				       MGMT_OP_SET_PHY_CONFIGURATION,
+				       MGMT_STATUS_INVALID_PARAMS);
+
+	unconfigure_phys = supported_phys & ~configurable_phys;
+
+	if ((selected_phys & unconfigure_phys) != unconfigure_phys)
+		return mgmt_cmd_status(sk, hdev->id,
+				       MGMT_OP_SET_PHY_CONFIGURATION,
+				       MGMT_STATUS_INVALID_PARAMS);
+
+	if (selected_phys == get_selected_phys(hdev))
+		return mgmt_cmd_complete(sk, hdev->id,
+					 MGMT_OP_SET_PHY_CONFIGURATION,
+					 0, NULL, 0);
+
+	hci_dev_lock(hdev);
+
+	if (!hdev_is_powered(hdev)) {
+		err = mgmt_cmd_status(sk, hdev->id,
+				      MGMT_OP_SET_PHY_CONFIGURATION,
+				      MGMT_STATUS_REJECTED);
+		goto unlock;
+	}
+
+	if (pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev)) {
+		err = mgmt_cmd_status(sk, hdev->id,
+				      MGMT_OP_SET_PHY_CONFIGURATION,
+				      MGMT_STATUS_BUSY);
+		goto unlock;
+	}
+
+	if (selected_phys & MGMT_PHY_BR_1M_3SLOT)
+		pkt_type |= (HCI_DH3 | HCI_DM3);
+	else
+		pkt_type &= ~(HCI_DH3 | HCI_DM3);
+
+	if (selected_phys & MGMT_PHY_BR_1M_5SLOT)
+		pkt_type |= (HCI_DH5 | HCI_DM5);
+	else
+		pkt_type &= ~(HCI_DH5 | HCI_DM5);
+
+	if (selected_phys & MGMT_PHY_EDR_2M_1SLOT)
+		pkt_type &= ~HCI_2DH1;
+	else
+		pkt_type |= HCI_2DH1;
+
+	if (selected_phys & MGMT_PHY_EDR_2M_3SLOT)
+		pkt_type &= ~HCI_2DH3;
+	else
+		pkt_type |= HCI_2DH3;
+
+	if (selected_phys & MGMT_PHY_EDR_2M_5SLOT)
+		pkt_type &= ~HCI_2DH5;
+	else
+		pkt_type |= HCI_2DH5;
+
+	if (selected_phys & MGMT_PHY_EDR_3M_1SLOT)
+		pkt_type &= ~HCI_3DH1;
+	else
+		pkt_type |= HCI_3DH1;
+
+	if (selected_phys & MGMT_PHY_EDR_3M_3SLOT)
+		pkt_type &= ~HCI_3DH3;
+	else
+		pkt_type |= HCI_3DH3;
+
+	if (selected_phys & MGMT_PHY_EDR_3M_5SLOT)
+		pkt_type &= ~HCI_3DH5;
+	else
+		pkt_type |= HCI_3DH5;
+
+	if (pkt_type != hdev->pkt_type)
+		hdev->pkt_type = pkt_type;
+
+	if ((selected_phys & MGMT_PHY_LE_MASK) ==
+	    (get_selected_phys(hdev) & MGMT_PHY_LE_MASK)) {
+		err = mgmt_cmd_complete(sk, hdev->id,
+					MGMT_OP_SET_PHY_CONFIGURATION,
+					0, NULL, 0);
+
+		goto unlock;
+	}
+
+	cmd = mgmt_pending_add(sk, MGMT_OP_SET_PHY_CONFIGURATION, hdev, data,
+			       len);
+	if (!cmd) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	hci_req_init(&req, hdev);
+
+	memset(&cp_phy, 0, sizeof(cp_phy));
+
+	if (!(selected_phys & MGMT_PHY_LE_TX_MASK))
+		cp_phy.all_phys |= 0x01;
+
+	if (!(selected_phys & MGMT_PHY_LE_RX_MASK))
+		cp_phy.all_phys |= 0x02;
+
+	if (selected_phys & MGMT_PHY_LE_1M_TX)
+		cp_phy.tx_phys |= HCI_LE_SET_PHY_1M;
+
+	if (selected_phys & MGMT_PHY_LE_2M_TX)
+		cp_phy.tx_phys |= HCI_LE_SET_PHY_2M;
+
+	if (selected_phys & MGMT_PHY_LE_CODED_TX)
+		cp_phy.tx_phys |= HCI_LE_SET_PHY_CODED;
+
+	if (selected_phys & MGMT_PHY_LE_1M_RX)
+		cp_phy.rx_phys |= HCI_LE_SET_PHY_1M;
+
+	if (selected_phys & MGMT_PHY_LE_2M_RX)
+		cp_phy.rx_phys |= HCI_LE_SET_PHY_2M;
+
+	if (selected_phys & MGMT_PHY_LE_CODED_RX)
+		cp_phy.rx_phys |= HCI_LE_SET_PHY_CODED;
+
+	hci_req_add(&req, HCI_OP_LE_SET_DEFAULT_PHY, sizeof(cp_phy), &cp_phy);
+
+	err = hci_req_run_skb(&req, set_default_phy_complete);
+	if (err < 0)
+		mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
+
+	return err;
+}
+
 static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
 				         u16 opcode, struct sk_buff *skb)
 {
@@ -6689,6 +6870,7 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 						HCI_MGMT_UNTRUSTED },
 	{ set_appearance,	   MGMT_SET_APPEARANCE_SIZE },
 	{ get_phy_configuration,   MGMT_GET_PHY_CONFIGURATION_SIZE },
+	{ set_phy_configuration,   MGMT_SET_PHY_CONFIGURATION_SIZE },
 };
 
 void mgmt_index_added(struct hci_dev *hdev)
-- 
cgit v1.2.3


From b7c23df85b6a1c3bcfb591cfa938d341fc3a556e Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:36 +0530
Subject: Bluetooth: Implement PHY changed event

This defines and implement phy changed event and send it to user
whenever selected PHYs changes using SET_PHY_CONFIGURATION.

This will be also trigerred when BREDR pkt_type is changed using
the legacy ioctl HCISETPTYPE.

@ MGMT Command: Set PHY Configuration (0x0045) plen 4
		Selected PHYs: 0x7fff
		  BR 1M 1SLOT
		  BR 1M 3SLOT
		  BR 1M 5SLOT
		  EDR 2M 1SLOT
		  EDR 2M 3SLOT
		  EDR 2M 5SLOT
		  EDR 3M 1SLOT
		  EDR 3M 3SLOT
		  EDR 3M 5SLOT
		  LE 1M TX
		  LE 1M RX
		  LE 2M TX
		  LE 2M RX
		  LE CODED TX
		  LE CODED RX
< HCI Command: LE Set Default PHY (0x08|0x0031) plen 3
		All PHYs preference: 0x00
		TX PHYs preference: 0x07
		  LE 1M
		  LE 2M
		  LE Coded
		RX PHYs preference: 0x07
		  LE 1M
		  LE 2M
		  LE Coded
> HCI Event: Command Complete (0x0e) plen 4
	  LE Set Default PHY (0x08|0x0031) ncmd 1
		Status: Success (0x00)
@ MGMT Event: Command Complete (0x0001) plen 3
	  Set PHY Configuration (0x0045) plen 0
		Status: Success (0x00)
@ MGMT Event: PHY Configuration Changed (0x0026) plen 4
		Selected PHYs: 0x7fff
		  BR 1M 1SLOT
		  BR 1M 3SLOT
		  BR 1M 5SLOT
		  EDR 2M 1SLOT
		  EDR 2M 3SLOT
		  EDR 2M 5SLOT
		  EDR 3M 1SLOT
		  EDR 3M 3SLOT
		  EDR 3M 5SLOT
		  LE 1M TX
		  LE 1M RX
		  LE 2M TX
		  LE 2M RX
		  LE CODED TX
		  LE CODED RX

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 include/net/bluetooth/mgmt.h     |  5 +++++
 net/bluetooth/hci_core.c         |  4 ++++
 net/bluetooth/mgmt.c             | 22 +++++++++++++++++++++-
 4 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a64d13f91d09..ab5d494a545a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1544,6 +1544,7 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev,
 			    u8 instance);
 void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev,
 			      u8 instance);
+int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip);
 
 u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
 		      u16 to_multiplier);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 0916e203e5d9..7f372e9067c9 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -868,3 +868,8 @@ struct mgmt_ev_ext_info_changed {
 	__le16	eir_len;
 	__u8	eir[0];
 } __packed;
+
+#define MGMT_EV_PHY_CONFIGURATION_CHANGED	0x0026
+struct mgmt_ev_phy_configuration_changed {
+	__le32	selected_phys;
+} __packed;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 432f89f390c0..523e91ad64d0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1924,7 +1924,11 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
 		break;
 
 	case HCISETPTYPE:
+		if (hdev->pkt_type == (__u16) dr.dev_opt)
+			break;
+
 		hdev->pkt_type = (__u16) dr.dev_opt;
+		mgmt_phy_configuration_changed(hdev, NULL);
 		break;
 
 	case HCISETACLMTU:
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7cd6a37a63ee..1867aadc5061 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3328,6 +3328,18 @@ static int get_phy_configuration(struct sock *sk, struct hci_dev *hdev,
 				 &rp, sizeof(rp));
 }
 
+int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip)
+{
+	struct mgmt_ev_phy_configuration_changed ev;
+
+	memset(&ev, 0, sizeof(ev));
+
+	ev.selected_phys = cpu_to_le32(get_selected_phys(hdev));
+
+	return mgmt_event(MGMT_EV_PHY_CONFIGURATION_CHANGED, hdev, &ev,
+			  sizeof(ev), skip);
+}
+
 static void set_default_phy_complete(struct hci_dev *hdev, u8 status,
 				     u16 opcode, struct sk_buff *skb)
 {
@@ -3352,6 +3364,8 @@ static void set_default_phy_complete(struct hci_dev *hdev, u8 status,
 		mgmt_cmd_complete(cmd->sk, hdev->id,
 				  MGMT_OP_SET_PHY_CONFIGURATION, 0,
 				  NULL, 0);
+
+		mgmt_phy_configuration_changed(hdev, cmd->sk);
 	}
 
 	mgmt_pending_remove(cmd);
@@ -3369,6 +3383,7 @@ static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev,
 	struct hci_request req;
 	u32 selected_phys, configurable_phys, supported_phys, unconfigure_phys;
 	u16 pkt_type = (HCI_DH1 | HCI_DM1);
+	bool changed = false;
 	int err;
 
 	BT_DBG("sock %p %s", sk, hdev->name);
@@ -3450,11 +3465,16 @@ static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev,
 	else
 		pkt_type |= HCI_3DH5;
 
-	if (pkt_type != hdev->pkt_type)
+	if (pkt_type != hdev->pkt_type) {
 		hdev->pkt_type = pkt_type;
+		changed = true;
+	}
 
 	if ((selected_phys & MGMT_PHY_LE_MASK) ==
 	    (get_selected_phys(hdev) & MGMT_PHY_LE_MASK)) {
+		if (changed)
+			mgmt_phy_configuration_changed(hdev, sk);
+
 		err = mgmt_cmd_complete(sk, hdev->id,
 					MGMT_OP_SET_PHY_CONFIGURATION,
 					0, NULL, 0);
-- 
cgit v1.2.3


From 45bdd86eafc7d29e0b4b6681bec9c6ab8eddc6bf Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:37 +0530
Subject: Bluetooth: Set Scan PHYs based on selected PHYs by user

Use the PHYs selected in Set Phy Configuration management command
while scanning.

< HCI Command: LE Set Extended Scan Parameters (0x08|0x0041) plen 13
        Own address type: Random (0x01)
        Filter policy: Accept all advertisement (0x00)
        PHYs: 0x05
        Entry 0: LE 1M
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
        Entry 1: LE Coded
          Type: Active (0x01)
          Interval: 11.250 msec (0x0012)
          Window: 11.250 msec (0x0012)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Parameters (0x08|0x0041) ncmd 1
        Status: Success (0x00)
< HCI Command: LE Set Extended Scan Enable (0x08|0x0042) plen 6
        Extended scan: Enabled (0x01)
        Filter duplicates: Enabled (0x01)
        Duration: 0 msec (0x0000)
        Period: 0.00 sec (0x0000)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Scan Enable (0x08|0x0042) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  4 +++-
 include/net/bluetooth/hci_core.h |  9 +++++++++
 net/bluetooth/hci_request.c      | 37 ++++++++++++++++++++++++++++---------
 3 files changed, 40 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 89bf800f6eb1..04211457367a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1540,7 +1540,9 @@ struct hci_cp_le_set_ext_scan_params {
 	__u8    data[0];
 } __packed;
 
-#define LE_SCAN_PHY_1M 0x01
+#define LE_SCAN_PHY_1M		0x01
+#define LE_SCAN_PHY_2M		0x02
+#define LE_SCAN_PHY_CODED	0x04
 
 struct hci_cp_le_scan_phy_params {
 	__u8    type;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ab5d494a545a..113c9bb609c7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1165,6 +1165,15 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 #define bredr_sc_enabled(dev)  (lmp_sc_capable(dev) && \
 				hci_dev_test_flag(dev, HCI_SC_ENABLED))
 
+#define scan_1m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_1M) || \
+		      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_1M))
+
+#define scan_2m(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_2M) || \
+		      ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_2M))
+
+#define scan_coded(dev) (((dev)->le_tx_def_phys & HCI_LE_SET_PHY_CODED) || \
+			 ((dev)->le_rx_def_phys & HCI_LE_SET_PHY_CODED))
+
 /* Use ext scanning if set ext scan param and ext scan enable is supported */
 #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
 			   ((dev)->commands[37] & 0x40))
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index faf7c711234c..215059a7646e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -790,8 +790,8 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
 		struct hci_cp_le_set_ext_scan_params *ext_param_cp;
 		struct hci_cp_le_set_ext_scan_enable ext_enable_cp;
 		struct hci_cp_le_scan_phy_params *phy_params;
-		/* Ony single PHY (1M) is supported as of now */
-		u8 data[sizeof(*ext_param_cp) + sizeof(*phy_params) * 1];
+		u8 data[sizeof(*ext_param_cp) + sizeof(*phy_params) * 2];
+		u32 plen;
 
 		ext_param_cp = (void *)data;
 		phy_params = (void *)ext_param_cp->data;
@@ -799,16 +799,35 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
 		memset(ext_param_cp, 0, sizeof(*ext_param_cp));
 		ext_param_cp->own_addr_type = own_addr_type;
 		ext_param_cp->filter_policy = filter_policy;
-		ext_param_cp->scanning_phys = LE_SCAN_PHY_1M;
 
-		memset(phy_params, 0, sizeof(*phy_params));
-		phy_params->type = type;
-		phy_params->interval = cpu_to_le16(interval);
-		phy_params->window = cpu_to_le16(window);
+		plen = sizeof(*ext_param_cp);
+
+		if (scan_1m(hdev) || scan_2m(hdev)) {
+			ext_param_cp->scanning_phys |= LE_SCAN_PHY_1M;
+
+			memset(phy_params, 0, sizeof(*phy_params));
+			phy_params->type = type;
+			phy_params->interval = cpu_to_le16(interval);
+			phy_params->window = cpu_to_le16(window);
+
+			plen += sizeof(*phy_params);
+			phy_params++;
+		}
+
+		if (scan_coded(hdev)) {
+			ext_param_cp->scanning_phys |= LE_SCAN_PHY_CODED;
+
+			memset(phy_params, 0, sizeof(*phy_params));
+			phy_params->type = type;
+			phy_params->interval = cpu_to_le16(interval);
+			phy_params->window = cpu_to_le16(window);
+
+			plen += sizeof(*phy_params);
+			phy_params++;
+		}
 
 		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_PARAMS,
-			    sizeof(*ext_param_cp) + sizeof(*phy_params),
-			    ext_param_cp);
+			    plen, ext_param_cp);
 
 		memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
 		ext_enable_cp.enable = LE_SCAN_ENABLE;
-- 
cgit v1.2.3


From b2cc9761f144e8ef714be8c590603073b80ddc13 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:38 +0530
Subject: Bluetooth: Handle extended ADV PDU types

This patch defines the extended ADV types and handle it in ADV report.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  8 ++++++++
 net/bluetooth/hci_event.c   | 50 +++++++++++++++++++++++++++++++++------------
 2 files changed, 45 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 04211457367a..83a1593a128e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1976,6 +1976,14 @@ struct hci_ev_le_conn_complete {
 #define LE_LEGACY_SCAN_RSP_ADV		0x001b
 #define LE_LEGACY_SCAN_RSP_ADV_SCAN	0x001a
 
+/* Extended Advertising event types */
+#define LE_EXT_ADV_NON_CONN_IND		0x0000
+#define LE_EXT_ADV_CONN_IND		0x0001
+#define LE_EXT_ADV_SCAN_IND		0x0002
+#define LE_EXT_ADV_DIRECT_IND		0x0004
+#define LE_EXT_ADV_SCAN_RSP		0x0008
+#define LE_EXT_ADV_LEGACY_PDU		0x0010
+
 #define ADDR_LE_DEV_PUBLIC	0x00
 #define ADDR_LE_DEV_RANDOM	0x01
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 694231541a4c..5fa00f488cfc 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5137,21 +5137,45 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
-static u8 convert_legacy_evt_type(u16 evt_type)
-{
-	switch (evt_type) {
-	case LE_LEGACY_ADV_IND:
+static u8 ext_evt_type_to_legacy(u16 evt_type)
+{
+	if (evt_type & LE_EXT_ADV_LEGACY_PDU) {
+		switch (evt_type) {
+		case LE_LEGACY_ADV_IND:
+			return LE_ADV_IND;
+		case LE_LEGACY_ADV_DIRECT_IND:
+			return LE_ADV_DIRECT_IND;
+		case LE_LEGACY_ADV_SCAN_IND:
+			return LE_ADV_SCAN_IND;
+		case LE_LEGACY_NONCONN_IND:
+			return LE_ADV_NONCONN_IND;
+		case LE_LEGACY_SCAN_RSP_ADV:
+		case LE_LEGACY_SCAN_RSP_ADV_SCAN:
+			return LE_ADV_SCAN_RSP;
+		}
+
+		BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x",
+				   evt_type);
+
+		return LE_ADV_INVALID;
+	}
+
+	if (evt_type & LE_EXT_ADV_CONN_IND) {
+		if (evt_type & LE_EXT_ADV_DIRECT_IND)
+			return LE_ADV_DIRECT_IND;
+
 		return LE_ADV_IND;
-	case LE_LEGACY_ADV_DIRECT_IND:
-		return LE_ADV_DIRECT_IND;
-	case LE_LEGACY_ADV_SCAN_IND:
+	}
+
+	if (evt_type & LE_EXT_ADV_SCAN_RSP)
+		return LE_ADV_SCAN_RSP;
+
+	if (evt_type & LE_EXT_ADV_SCAN_IND)
 		return LE_ADV_SCAN_IND;
-	case LE_LEGACY_NONCONN_IND:
+
+	if (evt_type == LE_EXT_ADV_NON_CONN_IND ||
+	    evt_type & LE_EXT_ADV_DIRECT_IND)
 		return LE_ADV_NONCONN_IND;
-	case LE_LEGACY_SCAN_RSP_ADV:
-	case LE_LEGACY_SCAN_RSP_ADV_SCAN:
-		return LE_ADV_SCAN_RSP;
-	}
 
 	BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x",
 				   evt_type);
@@ -5172,7 +5196,7 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		u16 evt_type;
 
 		evt_type = __le16_to_cpu(ev->evt_type);
-		legacy_evt_type = convert_legacy_evt_type(evt_type);
+		legacy_evt_type = ext_evt_type_to_legacy(evt_type);
 		if (legacy_evt_type != LE_ADV_INVALID) {
 			process_adv_report(hdev, legacy_evt_type, &ev->bdaddr,
 					   ev->bdaddr_type, NULL, 0, ev->rssi,
-- 
cgit v1.2.3


From 6b49bcb4bce2ed0f0aefe8e304a8b9cbaeeaa3f0 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:40 +0530
Subject: Bluetooth: Read no of adv sets during init

This patch reads the number of advertising sets in the controller
during init and save it in hdev.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  7 +++++++
 include/net/bluetooth/hci_core.h |  4 ++++
 net/bluetooth/hci_core.c         | 16 ++++++++++++++--
 net/bluetooth/hci_event.c        | 18 ++++++++++++++++++
 4 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 83a1593a128e..3f93ae9765a4 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -410,6 +410,7 @@ enum {
 #define HCI_LE_SLAVE_FEATURES		0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
+#define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 #define HCI_LE_PHY_2M			0x01
 #define HCI_LE_PHY_CODED		0x08
@@ -1579,6 +1580,12 @@ struct hci_cp_le_ext_conn_param {
 	__le16 max_ce_len;
 } __packed;
 
+#define HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS	0x203b
+struct hci_rp_le_read_num_supported_adv_sets {
+	__u8  status;
+	__u8  num_of_sets;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 113c9bb609c7..2aad4a863176 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -222,6 +222,7 @@ struct hci_dev {
 	__u8		le_features[8];
 	__u8		le_white_list_size;
 	__u8		le_resolv_list_size;
+	__u8		le_num_of_adv_sets;
 	__u8		le_states[8];
 	__u8		commands[64];
 	__u8		hci_ver;
@@ -1180,6 +1181,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 /* Use ext create connection if command is supported */
 #define use_ext_conn(dev) ((dev)->commands[37] & 0x80)
 
+/* Extended advertising support */
+#define ext_adv_capable(dev) (((dev)->le_features[1] & HCI_LE_EXT_ADV))
+
 /* ----- HCI protocols ----- */
 #define HCI_PROTO_DEFER             0x01
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 523e91ad64d0..7b08b7f57418 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -715,8 +715,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 		hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events),
 			    events);
 
-		if (hdev->commands[25] & 0x40) {
-			/* Read LE Advertising Channel TX Power */
+		/* Read LE Advertising Channel TX Power */
+		if ((hdev->commands[25] & 0x40) && !ext_adv_capable(hdev)) {
+			/* HCI TS spec forbids mixing of legacy and extended
+			 * advertising commands wherein READ_ADV_TX_POWER is
+			 * also included. So do not call it if extended adv
+			 * is supported otherwise controller will return
+			 * COMMAND_DISALLOWED for extended commands.
+			 */
 			hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
 		}
 
@@ -750,6 +756,12 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 			hci_req_add(req, HCI_OP_LE_READ_DEF_DATA_LEN, 0, NULL);
 		}
 
+		if (ext_adv_capable(hdev)) {
+			/* Read LE Number of Supported Advertising Sets */
+			hci_req_add(req, HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS,
+				    0, NULL);
+		}
+
 		hci_set_le_support(req);
 	}
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 5fa00f488cfc..0ceb52edc142 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1267,6 +1267,20 @@ static void hci_cc_le_set_ext_scan_enable(struct hci_dev *hdev,
 	le_set_scan_enable_complete(hdev, cp->enable);
 }
 
+static void hci_cc_le_read_num_adv_sets(struct hci_dev *hdev,
+				      struct sk_buff *skb)
+{
+	struct hci_rp_le_read_num_supported_adv_sets *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x No of Adv sets %u", hdev->name, rp->status,
+	       rp->num_of_sets);
+
+	if (rp->status)
+		return;
+
+	hdev->le_num_of_adv_sets = rp->num_of_sets;
+}
+
 static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
 					   struct sk_buff *skb)
 {
@@ -3189,6 +3203,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_le_set_default_phy(hdev, skb);
 		break;
 
+	case HCI_OP_LE_READ_NUM_SUPPORTED_ADV_SETS:
+		hci_cc_le_read_num_adv_sets(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode);
 		break;
-- 
cgit v1.2.3


From de181e887ac27dadda127c7d4c3e89c6da8fb6d2 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:41 +0530
Subject: Bluetooth: Impmlement extended adv enable

This patch basically replaces legacy adv with extended adv
based on the controller support. Currently there is no
design change. ie only one adv set will be enabled at a time.

This also adds tx_power in instance and store whatever returns
from Set_ext_parameter, use the same in adv data as well.
For instance 0 tx_power is stored in hdev only.

< HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25
        Handle: 0x00
        Properties: 0x0010
          Use legacy advertising PDUs: ADV_NONCONN_IND
        Min advertising interval: 1280.000 msec (0x0800)
        Max advertising interval: 1280.000 msec (0x0800)
        Channel map: 37, 38, 39 (0x07)
        Own address type: Random (0x01)
        Peer address type: Public (0x00)
        Peer address: 00:00:00:00:00:00 (OUI 00-00-00)
        Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00)
        TX power: 127 dbm (0x7f)
        Primary PHY: LE 1M (0x01)
        Secondary max skip: 0x00
        Secondary PHY: LE 1M (0x01)
        SID: 0x00
        Scan request notifications: Disabled (0x00)
> HCI Event: Command Complete (0x0e) plen 5
      LE Set Extended Advertising Parameters (0x08|0x0036) ncmd 1
        Status: Success (0x00)
        TX power (selected): 7 dbm (0x07)
< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 6
        Extended advertising: Enabled (0x01)
        Number of sets: 1 (0x01)
        Entry 0
          Handle: 0x00
          Duration: 0 ms (0x00)
          Max ext adv events: 0
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |  39 +++++++++
 include/net/bluetooth/hci_core.h |   1 +
 net/bluetooth/hci_core.c         |   2 +
 net/bluetooth/hci_event.c        |  72 +++++++++++++++++
 net/bluetooth/hci_request.c      | 171 ++++++++++++++++++++++++++++++++++-----
 net/bluetooth/hci_request.h      |   3 +
 net/bluetooth/mgmt.c             |  22 +++--
 7 files changed, 285 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 3f93ae9765a4..b447b127879e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1586,6 +1586,45 @@ struct hci_rp_le_read_num_supported_adv_sets {
 	__u8  num_of_sets;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_ADV_PARAMS		0x2036
+struct hci_cp_le_set_ext_adv_params {
+	__u8      handle;
+	__le16    evt_properties;
+	__u8      min_interval[3];
+	__u8      max_interval[3];
+	__u8      channel_map;
+	__u8      own_addr_type;
+	__u8      peer_addr_type;
+	bdaddr_t  peer_addr;
+	__u8      filter_policy;
+	__u8      tx_power;
+	__u8      primary_phy;
+	__u8      secondary_max_skip;
+	__u8      secondary_phy;
+	__u8      sid;
+	__u8      notif_enable;
+} __packed;
+
+#define HCI_ADV_PHY_1M		0X01
+
+struct hci_rp_le_set_ext_adv_params {
+	__u8  status;
+	__u8  tx_power;
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_ADV_ENABLE		0x2039
+struct hci_cp_le_set_ext_adv_enable {
+	__u8  enable;
+	__u8  num_of_sets;
+	__u8  data[0];
+} __packed;
+
+struct hci_cp_ext_adv_set {
+	__u8  handle;
+	__le16 duration;
+	__u8  max_events;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2aad4a863176..ad3518303a0c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -171,6 +171,7 @@ struct adv_info {
 	__u8	adv_data[HCI_MAX_AD_LENGTH];
 	__u16	scan_rsp_len;
 	__u8	scan_rsp_data[HCI_MAX_AD_LENGTH];
+	__s8	tx_power;
 };
 
 #define HCI_MAX_ADV_INSTANCES		5
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 7b08b7f57418..944d4fedc317 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2779,6 +2779,8 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
 	else
 		adv_instance->duration = duration;
 
+	adv_instance->tx_power = HCI_TX_POWER_INVALID;
+
 	BT_DBG("%s for %dMR", hdev->name, instance);
 
 	return 0;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0ceb52edc142..0418a5514819 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1099,6 +1099,41 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
+					 struct sk_buff *skb)
+{
+	struct hci_cp_le_set_ext_adv_enable *cp;
+	struct hci_cp_ext_adv_set *adv_set;
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_ENABLE);
+	if (!cp)
+		return;
+
+	adv_set = (void *) cp->data;
+
+	hci_dev_lock(hdev);
+
+	if (cp->enable) {
+		struct hci_conn *conn;
+
+		hci_dev_set_flag(hdev, HCI_LE_ADV);
+
+		conn = hci_lookup_le_connect(hdev);
+		if (conn)
+			queue_delayed_work(hdev->workqueue,
+					   &conn->le_conn_timeout,
+					   conn->conn_timeout);
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_cp_le_set_scan_param *cp;
@@ -1486,6 +1521,35 @@ static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_set_ext_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_rp_le_set_ext_adv_params *rp = (void *) skb->data;
+	struct hci_cp_le_set_ext_adv_params *cp;
+	struct adv_info *adv_instance;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_EXT_ADV_PARAMS);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+	hdev->adv_addr_type = cp->own_addr_type;
+	if (!hdev->cur_adv_instance) {
+		/* Store in hdev for instance 0 */
+		hdev->adv_tx_power = rp->tx_power;
+	} else {
+		adv_instance = hci_find_adv_instance(hdev,
+						     hdev->cur_adv_instance);
+		if (adv_instance)
+			adv_instance->tx_power = rp->tx_power;
+	}
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_rp_read_rssi *rp = (void *) skb->data;
@@ -3207,6 +3271,14 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_le_read_num_adv_sets(hdev, skb);
 		break;
 
+	case HCI_OP_LE_SET_EXT_ADV_PARAMS:
+		hci_cc_set_ext_adv_param(hdev, skb);
+		break;
+
+	case HCI_OP_LE_SET_EXT_ADV_ENABLE:
+		hci_cc_le_set_ext_adv_enable(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode);
 		break;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 215059a7646e..2ac9fd67440a 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -895,6 +895,24 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 			   hdev->le_scan_window, own_addr_type, filter_policy);
 }
 
+static u8 get_adv_instance_scan_rsp_len(struct hci_dev *hdev, u8 instance)
+{
+	struct adv_info *adv_instance;
+
+	/* Ignore instance 0 */
+	if (instance == 0x00)
+		return 0;
+
+	adv_instance = hci_find_adv_instance(hdev, instance);
+	if (!adv_instance)
+		return 0;
+
+	/* TODO: Take into account the "appearance" and "local-name" flags here.
+	 * These are currently being ignored as they are not supported.
+	 */
+	return adv_instance->scan_rsp_len;
+}
+
 static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev)
 {
 	u8 instance = hdev->cur_adv_instance;
@@ -1235,15 +1253,27 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
 		ptr += adv_instance->adv_data_len;
 	}
 
-	/* Provide Tx Power only if we can provide a valid value for it */
-	if (hdev->adv_tx_power != HCI_TX_POWER_INVALID &&
-	    (instance_flags & MGMT_ADV_FLAG_TX_POWER)) {
-		ptr[0] = 0x02;
-		ptr[1] = EIR_TX_POWER;
-		ptr[2] = (u8)hdev->adv_tx_power;
+	if (instance_flags & MGMT_ADV_FLAG_TX_POWER) {
+		s8 adv_tx_power;
 
-		ad_len += 3;
-		ptr += 3;
+		if (ext_adv_capable(hdev)) {
+			if (adv_instance)
+				adv_tx_power = adv_instance->tx_power;
+			else
+				adv_tx_power = hdev->adv_tx_power;
+		} else {
+			adv_tx_power = hdev->adv_tx_power;
+		}
+
+		/* Provide Tx Power only if we can provide a valid value for it */
+		if (adv_tx_power != HCI_TX_POWER_INVALID) {
+			ptr[0] = 0x02;
+			ptr[1] = EIR_TX_POWER;
+			ptr[2] = (u8)adv_tx_power;
+
+			ad_len += 3;
+			ptr += 3;
+		}
 	}
 
 	return ad_len;
@@ -1304,9 +1334,13 @@ void hci_req_reenable_advertising(struct hci_dev *hdev)
 		__hci_req_schedule_adv_instance(&req, hdev->cur_adv_instance,
 						true);
 	} else {
-		__hci_req_update_adv_data(&req, 0x00);
-		__hci_req_update_scan_rsp_data(&req, 0x00);
-		__hci_req_enable_advertising(&req);
+		if (ext_adv_capable(hdev)) {
+			__hci_req_start_ext_adv(&req, 0x00);
+		} else {
+			__hci_req_update_adv_data(&req, 0x00);
+			__hci_req_update_scan_rsp_data(&req, 0x00);
+			__hci_req_enable_advertising(&req);
+		}
 	}
 
 	hci_req_run(&req, adv_enable_complete);
@@ -1343,6 +1377,87 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
+static int __hci_req_setup_ext_adv_instance(struct hci_request *req,
+					    u8 instance)
+{
+	struct hci_cp_le_set_ext_adv_params cp;
+	struct hci_dev *hdev = req->hdev;
+	bool connectable;
+	u32 flags;
+	/* In ext adv set param interval is 3 octets */
+	const u8 adv_interval[3] = { 0x00, 0x08, 0x00 };
+
+	flags = get_adv_instance_flags(hdev, instance);
+
+	/* If the "connectable" instance flag was not set, then choose between
+	 * ADV_IND and ADV_NONCONN_IND based on the global connectable setting.
+	 */
+	connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
+		      mgmt_get_connectable(hdev);
+
+	 if (!is_advertising_allowed(hdev, connectable))
+		return -EPERM;
+
+	memset(&cp, 0, sizeof(cp));
+
+	memcpy(cp.min_interval, adv_interval, sizeof(cp.min_interval));
+	memcpy(cp.max_interval, adv_interval, sizeof(cp.max_interval));
+
+	if (connectable)
+		cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND);
+	else if (get_adv_instance_scan_rsp_len(hdev, instance))
+		cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_SCAN_IND);
+	else
+		cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND);
+
+	cp.own_addr_type = BDADDR_LE_PUBLIC;
+	cp.channel_map = hdev->le_adv_channel_map;
+	cp.tx_power = 127;
+	cp.primary_phy = HCI_ADV_PHY_1M;
+	cp.secondary_phy = HCI_ADV_PHY_1M;
+	cp.handle = 0;
+
+	hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp);
+
+	return 0;
+}
+
+void __hci_req_enable_ext_advertising(struct hci_request *req)
+{
+	struct hci_cp_le_set_ext_adv_enable *cp;
+	struct hci_cp_ext_adv_set *adv_set;
+	u8 data[sizeof(*cp) + sizeof(*adv_set) * 1];
+
+	cp = (void *) data;
+	adv_set = (void *) cp->data;
+
+	memset(cp, 0, sizeof(*cp));
+
+	cp->enable = 0x01;
+	cp->num_of_sets = 0x01;
+
+	memset(adv_set, 0, sizeof(*adv_set));
+
+	adv_set->handle = 0;
+
+	hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE,
+		    sizeof(*cp) + sizeof(*adv_set) * cp->num_of_sets,
+		    data);
+}
+
+int __hci_req_start_ext_adv(struct hci_request *req, u8 instance)
+{
+	int err;
+
+	err = __hci_req_setup_ext_adv_instance(req, instance);
+	if (err < 0)
+		return err;
+
+	__hci_req_enable_ext_advertising(req);
+
+	return 0;
+}
+
 int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance,
 				    bool force)
 {
@@ -1396,9 +1511,13 @@ int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance,
 		return 0;
 
 	hdev->cur_adv_instance = instance;
-	__hci_req_update_adv_data(req, instance);
-	__hci_req_update_scan_rsp_data(req, instance);
-	__hci_req_enable_advertising(req);
+	if (ext_adv_capable(hdev)) {
+		__hci_req_start_ext_adv(req, instance);
+	} else {
+		__hci_req_update_adv_data(req, instance);
+		__hci_req_update_scan_rsp_data(req, instance);
+		__hci_req_enable_advertising(req);
+	}
 
 	return 0;
 }
@@ -1669,8 +1788,12 @@ static int connectable_update(struct hci_request *req, unsigned long opt)
 
 	/* Update the advertising parameters if necessary */
 	if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
-	    !list_empty(&hdev->adv_instances))
-		__hci_req_enable_advertising(req);
+	    !list_empty(&hdev->adv_instances)) {
+		if (ext_adv_capable(hdev))
+			__hci_req_start_ext_adv(req, hdev->cur_adv_instance);
+		else
+			__hci_req_enable_advertising(req);
+	}
 
 	__hci_update_background_scan(req);
 
@@ -1779,8 +1902,12 @@ static int discoverable_update(struct hci_request *req, unsigned long opt)
 		/* Discoverable mode affects the local advertising
 		 * address in limited privacy mode.
 		 */
-		if (hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY))
-			__hci_req_enable_advertising(req);
+		if (hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) {
+			if (ext_adv_capable(hdev))
+				__hci_req_start_ext_adv(req, 0x00);
+			else
+				__hci_req_enable_advertising(req);
+		}
 	}
 
 	hci_dev_unlock(hdev);
@@ -2376,8 +2503,12 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt)
 			__hci_req_update_adv_data(req, 0x00);
 			__hci_req_update_scan_rsp_data(req, 0x00);
 
-			if (hci_dev_test_flag(hdev, HCI_ADVERTISING))
-				__hci_req_enable_advertising(req);
+			if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
+				if (ext_adv_capable(hdev))
+					__hci_req_start_ext_adv(req, 0x00);
+				else
+					__hci_req_enable_advertising(req);
+			}
 		} else if (!list_empty(&hdev->adv_instances)) {
 			struct adv_info *adv_instance;
 
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 702beb140d9f..9b8c74df6b2b 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -80,6 +80,9 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
 				struct hci_request *req, u8 instance,
 				bool force);
 
+int __hci_req_start_ext_adv(struct hci_request *req, u8 instance);
+void __hci_req_enable_ext_advertising(struct hci_request *req);
+
 void __hci_req_update_class(struct hci_request *req);
 
 /* Returns true if HCI commands were queued */
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1867aadc5061..761a9aeaa824 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -940,7 +940,10 @@ static void rpa_expired(struct work_struct *work)
 	 * function.
 	 */
 	hci_req_init(&req, hdev);
-	__hci_req_enable_advertising(&req);
+	if (ext_adv_capable(hdev))
+		__hci_req_start_ext_adv(&req, hdev->cur_adv_instance);
+	else
+		__hci_req_enable_advertising(&req);
 	hci_req_run(&req, NULL);
 }
 
@@ -4382,9 +4385,14 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 		 * HCI_ADVERTISING flag is not yet set.
 		 */
 		hdev->cur_adv_instance = 0x00;
-		__hci_req_update_adv_data(&req, 0x00);
-		__hci_req_update_scan_rsp_data(&req, 0x00);
-		__hci_req_enable_advertising(&req);
+
+		if (ext_adv_capable(hdev)) {
+			__hci_req_start_ext_adv(&req, 0x00);
+		} else {
+			__hci_req_update_adv_data(&req, 0x00);
+			__hci_req_update_scan_rsp_data(&req, 0x00);
+			__hci_req_enable_advertising(&req);
+		}
 	} else {
 		__hci_req_disable_advertising(&req);
 	}
@@ -6312,7 +6320,11 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev)
 	flags |= MGMT_ADV_FLAG_APPEARANCE;
 	flags |= MGMT_ADV_FLAG_LOCAL_NAME;
 
-	if (hdev->adv_tx_power != HCI_TX_POWER_INVALID)
+	/* In extended adv TX_POWER returned from Set Adv Param
+	 * will be always valid.
+	 */
+	if ((hdev->adv_tx_power != HCI_TX_POWER_INVALID) ||
+	    ext_adv_capable(hdev))
 		flags |= MGMT_ADV_FLAG_TX_POWER;
 
 	return flags;
-- 
cgit v1.2.3


From a0fb3726ba55138ef6fdd5dc67da6d9a70360696 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:42 +0530
Subject: Bluetooth: Use Set ext adv/scan rsp data if controller supports

This patch implements Set Ext Adv data and Set Ext Scan rsp data
if controller support extended advertising.

Currently the operation is set as Complete data and fragment
preference is set as no fragment

< HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 35
        Handle: 0x00
        Operation: Complete extended advertising data (0x03)
        Fragment preference: Minimize fragmentation (0x01)
        Data length: 0x15
        16-bit Service UUIDs (complete): 2 entries
          Heart Rate (0x180d)
          Battery Service (0x180f)
        Name (complete): Test LE
        Company: Google (224)
          Data: 0102
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Data (0x08|0x0037) ncmd 1
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  22 ++++++++
 net/bluetooth/hci_event.c   |   2 +
 net/bluetooth/hci_request.c | 126 +++++++++++++++++++++++++++++++++-----------
 net/bluetooth/hci_request.h |   1 +
 net/bluetooth/mgmt.c        |  13 +++--
 5 files changed, 130 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index b447b127879e..aace97099ead 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1625,6 +1625,28 @@ struct hci_cp_ext_adv_set {
 	__u8  max_events;
 } __packed;
 
+#define HCI_OP_LE_SET_EXT_ADV_DATA		0x2037
+struct hci_cp_le_set_ext_adv_data {
+	__u8  handle;
+	__u8  operation;
+	__u8  frag_pref;
+	__u8  length;
+	__u8  data[HCI_MAX_AD_LENGTH];
+} __packed;
+
+#define HCI_OP_LE_SET_EXT_SCAN_RSP_DATA		0x2038
+struct hci_cp_le_set_ext_scan_rsp_data {
+	__u8  handle;
+	__u8  operation;
+	__u8  frag_pref;
+	__u8  length;
+	__u8  data[HCI_MAX_AD_LENGTH];
+} __packed;
+
+#define LE_SET_ADV_DATA_OP_COMPLETE	0x03
+
+#define LE_SET_ADV_DATA_NO_FRAG		0x01
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0418a5514819..0a92bf7e3d80 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1547,6 +1547,8 @@ static void hci_cc_set_ext_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
 		if (adv_instance)
 			adv_instance->tx_power = rp->tx_power;
 	}
+	/* Update adv data as tx power is known now */
+	hci_req_update_adv_data(hdev, hdev->cur_adv_instance);
 	hci_dev_unlock(hdev);
 }
 
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 2ac9fd67440a..c41e9bb7818b 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1174,29 +1174,58 @@ static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
 void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
 {
 	struct hci_dev *hdev = req->hdev;
-	struct hci_cp_le_set_scan_rsp_data cp;
 	u8 len;
 
 	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return;
 
-	memset(&cp, 0, sizeof(cp));
+	if (ext_adv_capable(hdev)) {
+		struct hci_cp_le_set_ext_scan_rsp_data cp;
 
-	if (instance)
-		len = create_instance_scan_rsp_data(hdev, instance, cp.data);
-	else
-		len = create_default_scan_rsp_data(hdev, cp.data);
+		memset(&cp, 0, sizeof(cp));
 
-	if (hdev->scan_rsp_data_len == len &&
-	    !memcmp(cp.data, hdev->scan_rsp_data, len))
-		return;
+		if (instance)
+			len = create_instance_scan_rsp_data(hdev, instance,
+							    cp.data);
+		else
+			len = create_default_scan_rsp_data(hdev, cp.data);
+
+		if (hdev->scan_rsp_data_len == len &&
+		    !memcmp(cp.data, hdev->scan_rsp_data, len))
+			return;
+
+		memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
+		hdev->scan_rsp_data_len = len;
+
+		cp.handle = 0;
+		cp.length = len;
+		cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+		cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+
+		hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, sizeof(cp),
+			    &cp);
+	} else {
+		struct hci_cp_le_set_scan_rsp_data cp;
+
+		memset(&cp, 0, sizeof(cp));
+
+		if (instance)
+			len = create_instance_scan_rsp_data(hdev, instance,
+							    cp.data);
+		else
+			len = create_default_scan_rsp_data(hdev, cp.data);
+
+		if (hdev->scan_rsp_data_len == len &&
+		    !memcmp(cp.data, hdev->scan_rsp_data, len))
+			return;
 
-	memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
-	hdev->scan_rsp_data_len = len;
+		memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
+		hdev->scan_rsp_data_len = len;
 
-	cp.length = len;
+		cp.length = len;
 
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp);
+		hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp);
+	}
 }
 
 static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
@@ -1282,27 +1311,51 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr)
 void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
 {
 	struct hci_dev *hdev = req->hdev;
-	struct hci_cp_le_set_adv_data cp;
 	u8 len;
 
 	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED))
 		return;
 
-	memset(&cp, 0, sizeof(cp));
+	if (ext_adv_capable(hdev)) {
+		struct hci_cp_le_set_ext_adv_data cp;
 
-	len = create_instance_adv_data(hdev, instance, cp.data);
+		memset(&cp, 0, sizeof(cp));
 
-	/* There's nothing to do if the data hasn't changed */
-	if (hdev->adv_data_len == len &&
-	    memcmp(cp.data, hdev->adv_data, len) == 0)
-		return;
+		len = create_instance_adv_data(hdev, instance, cp.data);
+
+		/* There's nothing to do if the data hasn't changed */
+		if (hdev->adv_data_len == len &&
+		    memcmp(cp.data, hdev->adv_data, len) == 0)
+			return;
+
+		memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+		hdev->adv_data_len = len;
+
+		cp.length = len;
+		cp.handle = 0;
+		cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+		cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
 
-	memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
-	hdev->adv_data_len = len;
+		hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_DATA, sizeof(cp), &cp);
+	} else {
+		struct hci_cp_le_set_adv_data cp;
+
+		memset(&cp, 0, sizeof(cp));
 
-	cp.length = len;
+		len = create_instance_adv_data(hdev, instance, cp.data);
+
+		/* There's nothing to do if the data hasn't changed */
+		if (hdev->adv_data_len == len &&
+		    memcmp(cp.data, hdev->adv_data, len) == 0)
+			return;
 
-	hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
+		memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+		hdev->adv_data_len = len;
+
+		cp.length = len;
+
+		hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
+	}
 }
 
 int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance)
@@ -1377,8 +1430,7 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
-static int __hci_req_setup_ext_adv_instance(struct hci_request *req,
-					    u8 instance)
+int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 {
 	struct hci_cp_le_set_ext_adv_params cp;
 	struct hci_dev *hdev = req->hdev;
@@ -1453,6 +1505,7 @@ int __hci_req_start_ext_adv(struct hci_request *req, u8 instance)
 	if (err < 0)
 		return err;
 
+	__hci_req_update_scan_rsp_data(req, instance);
 	__hci_req_enable_ext_advertising(req);
 
 	return 0;
@@ -2500,14 +2553,25 @@ static int powered_update_hci(struct hci_request *req, unsigned long opt)
 		 */
 		if (hci_dev_test_flag(hdev, HCI_ADVERTISING) ||
 		    list_empty(&hdev->adv_instances)) {
-			__hci_req_update_adv_data(req, 0x00);
-			__hci_req_update_scan_rsp_data(req, 0x00);
+			int err;
+
+			if (ext_adv_capable(hdev)) {
+				err = __hci_req_setup_ext_adv_instance(req,
+								       0x00);
+				if (!err)
+					__hci_req_update_scan_rsp_data(req,
+								       0x00);
+			} else {
+				err = 0;
+				__hci_req_update_adv_data(req, 0x00);
+				__hci_req_update_scan_rsp_data(req, 0x00);
+			}
 
 			if (hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
-				if (ext_adv_capable(hdev))
-					__hci_req_start_ext_adv(req, 0x00);
-				else
+				if (!ext_adv_capable(hdev))
 					__hci_req_enable_advertising(req);
+				else if (!err)
+					__hci_req_enable_ext_advertising(req);
 			}
 		} else if (!list_empty(&hdev->adv_instances)) {
 			struct adv_info *adv_instance;
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 9b8c74df6b2b..6afc624605af 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -80,6 +80,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
 				struct hci_request *req, u8 instance,
 				bool force);
 
+int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance);
 int __hci_req_start_ext_adv(struct hci_request *req, u8 instance);
 void __hci_req_enable_ext_advertising(struct hci_request *req);
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 761a9aeaa824..142f7e72a9a2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1847,10 +1847,17 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 	 */
 	if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
 		struct hci_request req;
-
 		hci_req_init(&req, hdev);
-		__hci_req_update_adv_data(&req, 0x00);
-		__hci_req_update_scan_rsp_data(&req, 0x00);
+		if (ext_adv_capable(hdev)) {
+			int err;
+
+			err = __hci_req_setup_ext_adv_instance(&req, 0x00);
+			if (!err)
+				__hci_req_update_scan_rsp_data(&req, 0x00);
+		} else {
+			__hci_req_update_adv_data(&req, 0x00);
+			__hci_req_update_scan_rsp_data(&req, 0x00);
+		}
 		hci_req_run(&req, NULL);
 		hci_update_background_scan(hdev);
 	}
-- 
cgit v1.2.3


From 45b7749f16aacd9ffab8e958caa77e2aa2358c0b Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:43 +0530
Subject: Bluetooth: Implement disable and removal of adv instance

If ext adv is enabled then use ext adv to disable as well.
Also remove the adv set during LE disable.

< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 2
        Extended advertising: Disabled (0x00)
        Number of sets: Disable all sets (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  2 ++
 net/bluetooth/hci_event.c   |  2 ++
 net/bluetooth/hci_request.c | 23 +++++++++++++++++++++--
 net/bluetooth/hci_request.h |  1 +
 net/bluetooth/mgmt.c        |  3 +++
 5 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index aace97099ead..faa2922a69fd 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1647,6 +1647,8 @@ struct hci_cp_le_set_ext_scan_rsp_data {
 
 #define LE_SET_ADV_DATA_NO_FRAG		0x01
 
+#define HCI_OP_LE_CLEAR_ADV_SETS	0x203d
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0a92bf7e3d80..a78d1dd2f57b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1129,6 +1129,8 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
 			queue_delayed_work(hdev->workqueue,
 					   &conn->le_conn_timeout,
 					   conn->conn_timeout);
+	} else {
+		hci_dev_clear_flag(hdev, HCI_LE_ADV);
 	}
 
 	hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index c41e9bb7818b..96e1e05a92c3 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -934,9 +934,19 @@ static u8 get_cur_adv_instance_scan_rsp_len(struct hci_dev *hdev)
 
 void __hci_req_disable_advertising(struct hci_request *req)
 {
-	u8 enable = 0x00;
+	if (ext_adv_capable(req->hdev)) {
+		struct hci_cp_le_set_ext_adv_enable cp;
 
-	hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+		cp.enable = 0x00;
+		/* Disable all sets since we only support one set at the moment */
+		cp.num_of_sets = 0x00;
+
+		hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE, sizeof(cp), &cp);
+	} else {
+		u8 enable = 0x00;
+
+		hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
+	}
 }
 
 static u32 get_adv_instance_flags(struct hci_dev *hdev, u8 instance)
@@ -1430,6 +1440,11 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
+void __hci_req_clear_ext_adv_sets(struct hci_request *req)
+{
+	hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL);
+}
+
 int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 {
 	struct hci_cp_le_set_ext_adv_params cp;
@@ -1499,8 +1514,12 @@ void __hci_req_enable_ext_advertising(struct hci_request *req)
 
 int __hci_req_start_ext_adv(struct hci_request *req, u8 instance)
 {
+	struct hci_dev *hdev = req->hdev;
 	int err;
 
+	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
+		__hci_req_disable_advertising(req);
+
 	err = __hci_req_setup_ext_adv_instance(req, instance);
 	if (err < 0)
 		return err;
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 6afc624605af..2451861bb4f8 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -83,6 +83,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
 int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance);
 int __hci_req_start_ext_adv(struct hci_request *req, u8 instance);
 void __hci_req_enable_ext_advertising(struct hci_request *req);
+void __hci_req_clear_ext_adv_sets(struct hci_request *req);
 
 void __hci_req_update_class(struct hci_request *req);
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 142f7e72a9a2..c283f0364c0f 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1956,6 +1956,9 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	} else {
 		if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 			__hci_req_disable_advertising(&req);
+
+		if (ext_adv_capable(hdev))
+			__hci_req_clear_ext_adv_sets(&req);
 	}
 
 	hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(hci_cp),
-- 
cgit v1.2.3


From a73c046a2869048430c332a871a5b169f192c6c3 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:45 +0530
Subject: Bluetooth: Implement Set ADV set random address

This basically sets the random address for the adv instance
Random address can be set only if the instance is created which
is done in Set ext adv param.

Random address and rpa expire timer and flags have been added
to adv instance which will be used when the respective
instance is scheduled.

This introduces a hci_get_random_address() which returns the
own address type and random address (rpa or nrpa) based
on the instance flags and hdev flags. New function is required
since own address type should be known before setting adv params
but address can be set only after setting params.

< HCI Command: LE Set Advertising Set Random Address (0x08|0x0035) plen 7
        Advertising handle: 0x00
        Advertising random address: 3C:8E:56:9B:77:84 (OUI 3C-8E-56)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Advertising Set Random Address (0x08|0x0035) ncmd 1
        Status: Success (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      |   6 ++
 include/net/bluetooth/hci_core.h |   4 ++
 net/bluetooth/hci_conn.c         |  23 +++++++
 net/bluetooth/hci_core.c         |  33 +++++++++-
 net/bluetooth/hci_event.c        |  37 ++++++++++-
 net/bluetooth/hci_request.c      | 128 ++++++++++++++++++++++++++++++++++++++-
 net/bluetooth/hci_request.h      |   3 +
 net/bluetooth/mgmt.c             |   2 +
 8 files changed, 233 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index faa2922a69fd..8d348d0d3eea 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1649,6 +1649,12 @@ struct hci_cp_le_set_ext_scan_rsp_data {
 
 #define HCI_OP_LE_CLEAR_ADV_SETS	0x203d
 
+#define HCI_OP_LE_SET_ADV_SET_RAND_ADDR	0x2035
+struct hci_cp_le_set_adv_set_rand_addr {
+	__u8  handle;
+	bdaddr_t  bdaddr;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ad3518303a0c..0db1b9b428b7 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -172,6 +172,9 @@ struct adv_info {
 	__u16	scan_rsp_len;
 	__u8	scan_rsp_data[HCI_MAX_AD_LENGTH];
 	__s8	tx_power;
+	bdaddr_t	random_addr;
+	bool 		rpa_expired;
+	struct delayed_work	rpa_expired_cb;
 };
 
 #define HCI_MAX_ADV_INSTANCES		5
@@ -1113,6 +1116,7 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
 			 u16 scan_rsp_len, u8 *scan_rsp_data,
 			 u16 timeout, u16 duration);
 int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance);
+void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired);
 
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 5c37d383caa3..bd4978ce8c45 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -873,6 +873,14 @@ static void hci_req_directed_advertising(struct hci_request *req,
 
 	if (ext_adv_capable(hdev)) {
 		struct hci_cp_le_set_ext_adv_params cp;
+		bdaddr_t random_addr;
+
+		/* Set require_privacy to false so that the remote device has a
+		 * chance of identifying us.
+		 */
+		if (hci_get_random_address(hdev, false, conn_use_rpa(conn), NULL,
+					   &own_addr_type, &random_addr) < 0)
+			return;
 
 		memset(&cp, 0, sizeof(cp));
 
@@ -889,6 +897,21 @@ static void hci_req_directed_advertising(struct hci_request *req,
 
 		hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp);
 
+		if (own_addr_type == ADDR_LE_DEV_RANDOM &&
+		    bacmp(&random_addr, BDADDR_ANY) &&
+		    bacmp(&random_addr, &hdev->random_addr)) {
+			struct hci_cp_le_set_adv_set_rand_addr cp;
+
+			memset(&cp, 0, sizeof(cp));
+
+			cp.handle = 0;
+			bacpy(&cp.bdaddr, &random_addr);
+
+			hci_req_add(req,
+				    HCI_OP_LE_SET_ADV_SET_RAND_ADDR,
+				    sizeof(cp), &cp);
+		}
+
 		__hci_req_enable_ext_advertising(req);
 	} else {
 		struct hci_cp_le_set_adv_param cp;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 944d4fedc317..840e8fd89fa5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1471,6 +1471,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 	if (!ret) {
 		hci_dev_hold(hdev);
 		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
+		hci_adv_instances_set_rpa_expired(hdev, true);
 		set_bit(HCI_UP, &hdev->flags);
 		hci_sock_dev_event(hdev, HCI_DEV_UP);
 		hci_leds_update_powered(hdev, true);
@@ -1626,9 +1627,15 @@ int hci_dev_do_close(struct hci_dev *hdev)
 	if (hci_dev_test_and_clear_flag(hdev, HCI_SERVICE_CACHE))
 		cancel_delayed_work(&hdev->service_cache);
 
-	if (hci_dev_test_flag(hdev, HCI_MGMT))
+	if (hci_dev_test_flag(hdev, HCI_MGMT)) {
+		struct adv_info *adv_instance;
+
 		cancel_delayed_work_sync(&hdev->rpa_expired);
 
+		list_for_each_entry(adv_instance, &hdev->adv_instances, list)
+			cancel_delayed_work_sync(&adv_instance->rpa_expired_cb);
+	}
+
 	/* Avoid potential lockdep warnings from the *_flush() calls by
 	 * ensuring the workqueue is empty up front.
 	 */
@@ -2704,6 +2711,8 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance)
 		hdev->cur_adv_instance = 0x00;
 	}
 
+	cancel_delayed_work_sync(&adv_instance->rpa_expired_cb);
+
 	list_del(&adv_instance->list);
 	kfree(adv_instance);
 
@@ -2712,6 +2721,14 @@ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance)
 	return 0;
 }
 
+void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired)
+{
+	struct adv_info *adv_instance, *n;
+
+	list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list)
+		adv_instance->rpa_expired = rpa_expired;
+}
+
 /* This function requires the caller holds hdev->lock */
 void hci_adv_instances_clear(struct hci_dev *hdev)
 {
@@ -2723,6 +2740,7 @@ void hci_adv_instances_clear(struct hci_dev *hdev)
 	}
 
 	list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) {
+		cancel_delayed_work_sync(&adv_instance->rpa_expired_cb);
 		list_del(&adv_instance->list);
 		kfree(adv_instance);
 	}
@@ -2731,6 +2749,16 @@ void hci_adv_instances_clear(struct hci_dev *hdev)
 	hdev->cur_adv_instance = 0x00;
 }
 
+static void adv_instance_rpa_expired(struct work_struct *work)
+{
+	struct adv_info *adv_instance = container_of(work, struct adv_info,
+						     rpa_expired_cb.work);
+
+	BT_DBG("");
+
+	adv_instance->rpa_expired = true;
+}
+
 /* This function requires the caller holds hdev->lock */
 int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
 			 u16 adv_data_len, u8 *adv_data,
@@ -2781,6 +2809,9 @@ int hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags,
 
 	adv_instance->tx_power = HCI_TX_POWER_INVALID;
 
+	INIT_DELAYED_WORK(&adv_instance->rpa_expired_cb,
+			  adv_instance_rpa_expired);
+
 	BT_DBG("%s for %dMR", hdev->name, instance);
 
 	return 0;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a78d1dd2f57b..392c9d8febd0 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1064,6 +1064,35 @@ static void hci_cc_le_set_default_phy(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_dev_unlock(hdev);
 }
 
+static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev,
+                                              struct sk_buff *skb)
+{
+	__u8 status = *((__u8 *) skb->data);
+	struct hci_cp_le_set_adv_set_rand_addr *cp;
+	struct adv_info *adv_instance;
+
+	if (status)
+		return;
+
+	cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR);
+	if (!cp)
+		return;
+
+	hci_dev_lock(hdev);
+
+	if (!hdev->cur_adv_instance) {
+		/* Store in hdev for instance 0 (Set adv and Directed advs) */
+		bacpy(&hdev->random_addr, &cp->bdaddr);
+	} else {
+		adv_instance = hci_find_adv_instance(hdev,
+						     hdev->cur_adv_instance);
+		if (adv_instance)
+			bacpy(&adv_instance->random_addr, &cp->bdaddr);
+	}
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 *sent, status = *((__u8 *) skb->data);
@@ -2830,8 +2859,10 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	/* We should disregard the current RPA and generate a new one
 	 * whenever the encryption procedure fails.
 	 */
-	if (ev->status && conn->type == LE_LINK)
+	if (ev->status && conn->type == LE_LINK) {
 		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
+		hci_adv_instances_set_rpa_expired(hdev, true);
+	}
 
 	clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
 
@@ -3283,6 +3314,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_le_set_ext_adv_enable(hdev, skb);
 		break;
 
+	case HCI_OP_LE_SET_ADV_SET_RAND_ADDR:
+		hci_cc_le_set_adv_set_random_addr(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s opcode 0x%4.4x", hdev->name, *opcode);
 		break;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 96e1e05a92c3..c72fd9202666 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1440,6 +1440,87 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
+int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
+			   bool use_rpa, struct adv_info *adv_instance,
+			   u8 *own_addr_type, bdaddr_t *rand_addr)
+{
+	int err;
+
+	bacpy(rand_addr, BDADDR_ANY);
+
+	/* If privacy is enabled use a resolvable private address. If
+	 * current RPA has expired then generate a new one.
+	 */
+	if (use_rpa) {
+		int to;
+
+		*own_addr_type = ADDR_LE_DEV_RANDOM;
+
+		if (adv_instance) {
+			if (!adv_instance->rpa_expired &&
+			    !bacmp(&adv_instance->random_addr, &hdev->rpa))
+				return 0;
+
+			adv_instance->rpa_expired = false;
+		} else {
+			if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
+			    !bacmp(&hdev->random_addr, &hdev->rpa))
+				return 0;
+		}
+
+		err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
+		if (err < 0) {
+			BT_ERR("%s failed to generate new RPA", hdev->name);
+			return err;
+		}
+
+		bacpy(rand_addr, &hdev->rpa);
+
+		to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
+		if (adv_instance)
+			queue_delayed_work(hdev->workqueue,
+					   &adv_instance->rpa_expired_cb, to);
+		else
+			queue_delayed_work(hdev->workqueue,
+					   &hdev->rpa_expired, to);
+
+		return 0;
+	}
+
+	/* In case of required privacy without resolvable private address,
+	 * use an non-resolvable private address. This is useful for
+	 * non-connectable advertising.
+	 */
+	if (require_privacy) {
+		bdaddr_t nrpa;
+
+		while (true) {
+			/* The non-resolvable private address is generated
+			 * from random six bytes with the two most significant
+			 * bits cleared.
+			 */
+			get_random_bytes(&nrpa, 6);
+			nrpa.b[5] &= 0x3f;
+
+			/* The non-resolvable private address shall not be
+			 * equal to the public address.
+			 */
+			if (bacmp(&hdev->bdaddr, &nrpa))
+				break;
+		}
+
+		*own_addr_type = ADDR_LE_DEV_RANDOM;
+		bacpy(rand_addr, &nrpa);
+
+		return 0;
+	}
+
+	/* No privacy so use a public address. */
+	*own_addr_type = ADDR_LE_DEV_PUBLIC;
+
+	return 0;
+}
+
 void __hci_req_clear_ext_adv_sets(struct hci_request *req)
 {
 	hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL);
@@ -1451,9 +1532,21 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 	struct hci_dev *hdev = req->hdev;
 	bool connectable;
 	u32 flags;
+	bdaddr_t random_addr;
+	u8 own_addr_type;
+	int err;
+	struct adv_info *adv_instance;
 	/* In ext adv set param interval is 3 octets */
 	const u8 adv_interval[3] = { 0x00, 0x08, 0x00 };
 
+	if (instance > 0) {
+		adv_instance = hci_find_adv_instance(hdev, instance);
+		if (!adv_instance)
+			return -EINVAL;
+	} else {
+		adv_instance = NULL;
+	}
+
 	flags = get_adv_instance_flags(hdev, instance);
 
 	/* If the "connectable" instance flag was not set, then choose between
@@ -1465,6 +1558,16 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 	 if (!is_advertising_allowed(hdev, connectable))
 		return -EPERM;
 
+	/* Set require_privacy to true only when non-connectable
+	 * advertising is used. In that case it is fine to use a
+	 * non-resolvable private address.
+	 */
+	err = hci_get_random_address(hdev, !connectable,
+				     adv_use_rpa(hdev, flags), adv_instance,
+				     &own_addr_type, &random_addr);
+	if (err < 0)
+		return err;
+
 	memset(&cp, 0, sizeof(cp));
 
 	memcpy(cp.min_interval, adv_interval, sizeof(cp.min_interval));
@@ -1477,7 +1580,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 	else
 		cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND);
 
-	cp.own_addr_type = BDADDR_LE_PUBLIC;
+	cp.own_addr_type = own_addr_type;
 	cp.channel_map = hdev->le_adv_channel_map;
 	cp.tx_power = 127;
 	cp.primary_phy = HCI_ADV_PHY_1M;
@@ -1486,6 +1589,29 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 
 	hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp);
 
+	if (own_addr_type == ADDR_LE_DEV_RANDOM &&
+	    bacmp(&random_addr, BDADDR_ANY)) {
+		struct hci_cp_le_set_adv_set_rand_addr cp;
+
+		/* Check if random address need to be updated */
+		if (adv_instance) {
+			if (!bacmp(&random_addr, &adv_instance->random_addr))
+				return 0;
+		} else {
+			if (!bacmp(&random_addr, &hdev->random_addr))
+				return 0;
+		}
+
+		memset(&cp, 0, sizeof(cp));
+
+		cp.handle = 0;
+		bacpy(&cp.bdaddr, &random_addr);
+
+		hci_req_add(req,
+			    HCI_OP_LE_SET_ADV_SET_RAND_ADDR,
+			    sizeof(cp), &cp);
+	}
+
 	return 0;
 }
 
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 2451861bb4f8..692cc8b13368 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -84,6 +84,9 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance);
 int __hci_req_start_ext_adv(struct hci_request *req, u8 instance);
 void __hci_req_enable_ext_advertising(struct hci_request *req);
 void __hci_req_clear_ext_adv_sets(struct hci_request *req);
+int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
+			   bool use_rpa, struct adv_info *adv_instance,
+			   u8 *own_addr_type, bdaddr_t *rand_addr);
 
 void __hci_req_update_class(struct hci_request *req);
 
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c283f0364c0f..949986727019 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4972,6 +4972,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 		changed = !hci_dev_test_and_set_flag(hdev, HCI_PRIVACY);
 		memcpy(hdev->irk, cp->irk, sizeof(hdev->irk));
 		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
+		hci_adv_instances_set_rpa_expired(hdev, true);
 		if (cp->privacy == 0x02)
 			hci_dev_set_flag(hdev, HCI_LIMITED_PRIVACY);
 		else
@@ -4980,6 +4981,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 		changed = hci_dev_test_and_clear_flag(hdev, HCI_PRIVACY);
 		memset(hdev->irk, 0, sizeof(hdev->irk));
 		hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED);
+		hci_adv_instances_set_rpa_expired(hdev, false);
 		hci_dev_clear_flag(hdev, HCI_LIMITED_PRIVACY);
 	}
 
-- 
cgit v1.2.3


From acf0aeae431a0f1723385cd1cb50177e4cc10edd Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:46 +0530
Subject: Bluetooth: Handle ADv set terminated event

This event comes after connection complete event for incoming
connections. Since we now have different random address for
each instance, conn resp address is assigned from this event.

As of now only connection part is handled as we are not
enabling duration or max num of events while starting ext adv.

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  8 ++++++++
 net/bluetooth/hci_core.c    |  8 ++++++++
 net/bluetooth/hci_event.c   | 43 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 56 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8d348d0d3eea..57e3e3675d66 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2155,6 +2155,14 @@ struct hci_ev_le_enh_conn_complete {
 	__u8      clk_accurancy;
 } __packed;
 
+#define HCI_EV_LE_EXT_ADV_SET_TERM	0x12
+struct hci_evt_le_ext_adv_set_term {
+	__u8	status;
+	__u8	handle;
+	__le16	conn_handle;
+	__u8	num_evts;
+} __packed;
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 840e8fd89fa5..79e02d24a215 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -712,6 +712,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 						 * Complete
 						 */
 
+		/* If the controller supports the LE Extended Advertising
+		 * command, enable the corresponding event.
+		 */
+		if (ext_adv_capable(hdev))
+			events[2] |= 0x02;	/* LE Advertising Set
+						 * Terminated
+						 */
+
 		hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, sizeof(events),
 			    events);
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 392c9d8febd0..754714c8d752 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4798,10 +4798,15 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
 		 * the advertising address type.
 		 */
 		conn->resp_addr_type = hdev->adv_addr_type;
-		if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM)
-			bacpy(&conn->resp_addr, &hdev->random_addr);
-		else
+		if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
+			/* In case of ext adv, resp_addr will be updated in
+			 * Adv Terminated event.
+			 */
+			if (!ext_adv_capable(hdev))
+				bacpy(&conn->resp_addr, &hdev->random_addr);
+		} else {
 			bacpy(&conn->resp_addr, &hdev->bdaddr);
+		}
 
 		conn->init_addr_type = bdaddr_type;
 		bacpy(&conn->init_addr, bdaddr);
@@ -4931,6 +4936,34 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev,
 			     le16_to_cpu(ev->supervision_timeout));
 }
 
+static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_evt_le_ext_adv_set_term *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
+
+	if (ev->status)
+		return;
+
+	conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle));
+	if (conn) {
+		struct adv_info *adv_instance;
+
+		if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM)
+			return;
+
+		if (!hdev->cur_adv_instance) {
+			bacpy(&conn->resp_addr, &hdev->random_addr);
+			return;
+		}
+
+		adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance);
+		if (adv_instance)
+			bacpy(&conn->resp_addr, &adv_instance->random_addr);
+	}
+}
+
 static void hci_le_conn_update_complete_evt(struct hci_dev *hdev,
 					    struct sk_buff *skb)
 {
@@ -5578,6 +5611,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_le_enh_conn_complete_evt(hdev, skb);
 		break;
 
+	case HCI_EV_LE_EXT_ADV_SET_TERM:
+		hci_le_ext_adv_term_evt(hdev, skb);
+		break;
+
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 85a721a8b0b6880d8cf6b9def70404ade8563225 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k.os@gmail.com>
Date: Thu, 19 Jul 2018 17:09:47 +0530
Subject: Bluetooth: Implement secondary advertising on different PHYs

This patch adds support for advertising in primary and secondary
channel on different PHYs. User can add the phy preference in
the flag based on which phy type will be added in extended
advertising parameter would be set.

@ MGMT Command: Add Advertising (0x003e) plen 11
        Instance: 1
        Flags: 0x00000200
          Advertise in CODED on Secondary channel
        Duration: 0
        Timeout: 0
        Advertising data length: 0
        Scan response length: 0
< HCI Command: LE Set Extended Advertising Enable (0x08|0x0039) plen 2
        Extended advertising: Disabled (0x00)
        Number of sets: Disable all sets (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      LE Set Extended Advertising Enable (0x08|0x0039) ncmd 2
        Status: Success (0x00)
< HCI Command: LE Set Extended Advertising Parameters (0x08|0x0036) plen 25
        Handle: 0x00
        Properties: 0x0000
        Min advertising interval: 1280.000 msec (0x0800)
        Max advertising interval: 1280.000 msec (0x0800)
        Channel map: 37, 38, 39 (0x07)
        Own address type: Random (0x01)
        Peer address type: Public (0x00)
        Peer address: 00:00:00:00:00:00 (OUI 00-00-00)
        Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00)
        TX power: 127 dbm (0x7f)
        Primary PHY: LE Coded (0x03)
        Secondary max skip: 0x00
        Secondary PHY: LE Coded (0x03)
        SID: 0x00
        Scan request notifications: Disabled (0x00)

Signed-off-by: Jaganath Kanakkassery <jaganathx.kanakkassery@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h  |  4 ++++
 include/net/bluetooth/mgmt.h |  6 ++++++
 net/bluetooth/hci_request.c  | 39 +++++++++++++++++++++++++++++++--------
 net/bluetooth/mgmt.c         | 18 +++++++++++++++---
 4 files changed, 56 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 57e3e3675d66..8ff36463719f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -410,6 +410,8 @@ enum {
 #define HCI_LE_SLAVE_FEATURES		0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
+#define HCI_LE_PHY_2M			0x01
+#define HCI_LE_PHY_CODED		0x08
 #define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 #define HCI_LE_PHY_2M			0x01
@@ -1606,6 +1608,8 @@ struct hci_cp_le_set_ext_adv_params {
 } __packed;
 
 #define HCI_ADV_PHY_1M		0X01
+#define HCI_ADV_PHY_2M		0x02
+#define HCI_ADV_PHY_CODED	0x03
 
 struct hci_rp_le_set_ext_adv_params {
 	__u8  status;
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 7f372e9067c9..9cee7ddc6741 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -562,6 +562,12 @@ struct mgmt_rp_add_advertising {
 #define MGMT_ADV_FLAG_TX_POWER		BIT(4)
 #define MGMT_ADV_FLAG_APPEARANCE	BIT(5)
 #define MGMT_ADV_FLAG_LOCAL_NAME	BIT(6)
+#define MGMT_ADV_FLAG_SEC_1M 		BIT(7)
+#define MGMT_ADV_FLAG_SEC_2M 		BIT(8)
+#define MGMT_ADV_FLAG_SEC_CODED 	BIT(9)
+
+#define MGMT_ADV_FLAG_SEC_MASK	(MGMT_ADV_FLAG_SEC_1M | MGMT_ADV_FLAG_SEC_2M | \
+				 MGMT_ADV_FLAG_SEC_CODED)
 
 #define MGMT_OP_REMOVE_ADVERTISING	0x003F
 struct mgmt_cp_remove_advertising {
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index c72fd9202666..e8c9ef1e1922 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1536,6 +1536,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 	u8 own_addr_type;
 	int err;
 	struct adv_info *adv_instance;
+	bool secondary_adv;
 	/* In ext adv set param interval is 3 octets */
 	const u8 adv_interval[3] = { 0x00, 0x08, 0x00 };
 
@@ -1573,20 +1574,42 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 	memcpy(cp.min_interval, adv_interval, sizeof(cp.min_interval));
 	memcpy(cp.max_interval, adv_interval, sizeof(cp.max_interval));
 
-	if (connectable)
-		cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND);
-	else if (get_adv_instance_scan_rsp_len(hdev, instance))
-		cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_SCAN_IND);
-	else
-		cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND);
+	secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK);
+
+	if (connectable) {
+		if (secondary_adv)
+			cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND);
+		else
+			cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND);
+	} else if (get_adv_instance_scan_rsp_len(hdev, instance)) {
+		if (secondary_adv)
+			cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND);
+		else
+			cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_SCAN_IND);
+	} else {
+		if (secondary_adv)
+			cp.evt_properties = cpu_to_le16(LE_EXT_ADV_NON_CONN_IND);
+		else
+			cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND);
+	}
 
 	cp.own_addr_type = own_addr_type;
 	cp.channel_map = hdev->le_adv_channel_map;
 	cp.tx_power = 127;
-	cp.primary_phy = HCI_ADV_PHY_1M;
-	cp.secondary_phy = HCI_ADV_PHY_1M;
 	cp.handle = 0;
 
+	if (flags & MGMT_ADV_FLAG_SEC_2M) {
+		cp.primary_phy = HCI_ADV_PHY_1M;
+		cp.secondary_phy = HCI_ADV_PHY_2M;
+	} else if (flags & MGMT_ADV_FLAG_SEC_CODED) {
+		cp.primary_phy = HCI_ADV_PHY_CODED;
+		cp.secondary_phy = HCI_ADV_PHY_CODED;
+	} else {
+		/* In all other cases use 1M */
+		cp.primary_phy = HCI_ADV_PHY_1M;
+		cp.secondary_phy = HCI_ADV_PHY_1M;
+	}
+
 	hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp);
 
 	if (own_addr_type == ADDR_LE_DEV_RANDOM &&
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 949986727019..231602f7cb66 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -6339,6 +6339,16 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev)
 	    ext_adv_capable(hdev))
 		flags |= MGMT_ADV_FLAG_TX_POWER;
 
+	if (ext_adv_capable(hdev)) {
+		flags |= MGMT_ADV_FLAG_SEC_1M;
+
+		if (hdev->le_features[1] & HCI_LE_PHY_2M)
+			flags |= MGMT_ADV_FLAG_SEC_2M;
+
+		if (hdev->le_features[1] & HCI_LE_PHY_CODED)
+			flags |= MGMT_ADV_FLAG_SEC_CODED;
+	}
+
 	return flags;
 }
 
@@ -6544,7 +6554,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 	struct mgmt_cp_add_advertising *cp = data;
 	struct mgmt_rp_add_advertising rp;
 	u32 flags;
-	u32 supported_flags;
+	u32 supported_flags, phy_flags;
 	u8 status;
 	u16 timeout, duration;
 	unsigned int prev_instance_cnt = hdev->adv_instance_cnt;
@@ -6574,10 +6584,12 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 	duration = __le16_to_cpu(cp->duration);
 
 	/* The current implementation only supports a subset of the specified
-	 * flags.
+	 * flags. Also need to check mutual exclusiveness of sec flags.
 	 */
 	supported_flags = get_supported_adv_flags(hdev);
-	if (flags & ~supported_flags)
+	phy_flags = flags & MGMT_ADV_FLAG_SEC_MASK;
+	if (flags & ~supported_flags ||
+	    ((phy_flags && (phy_flags ^ (phy_flags & -phy_flags)))))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
 				       MGMT_STATUS_INVALID_PARAMS);
 
-- 
cgit v1.2.3


From 740011cfe94859df8d05f5400d589a8693b095e7 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 20 Jul 2018 13:12:28 +0800
Subject: Bluetooth: Add new quirk for non-persistent setup settings

Add a new quirk HCI_QUIRK_NON_PERSISTENT_SETUP allowing that a quirk that
runs setup() after every open() and not just after the first open().

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 9 +++++++++
 net/bluetooth/hci_core.c    | 3 ++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 8ff36463719f..7f008097552e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -183,6 +183,15 @@ enum {
 	 * during the hdev->setup vendor callback.
 	 */
 	HCI_QUIRK_NON_PERSISTENT_DIAG,
+
+	/* When this quirk is set, setup() would be run after every
+	 * open() and not just after the first open().
+	 *
+	 * This quirk can be set before hci_register_dev is called or
+	 * during the hdev->setup vendor callback.
+	 *
+	 */
+	HCI_QUIRK_NON_PERSISTENT_SETUP,
 };
 
 /* HCI device flags */
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 79e02d24a215..74b29c7d841c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1415,7 +1415,8 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 	atomic_set(&hdev->cmd_cnt, 1);
 	set_bit(HCI_INIT, &hdev->flags);
 
-	if (hci_dev_test_flag(hdev, HCI_SETUP)) {
+	if (hci_dev_test_flag(hdev, HCI_SETUP) ||
+	    test_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks)) {
 		hci_sock_dev_event(hdev, HCI_DEV_SETUP);
 
 		if (hdev->setup)
-- 
cgit v1.2.3


From 51c23b47e6b8590ea7a6a6776ffb21810ece73bf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 13 Jul 2018 14:54:45 +0200
Subject: netfilter: nf_osf: add nf_osf_find()

This new function returns the OS genre as a string. Plan is to use to
from the new nft_osf extension.

Note that this doesn't yet support ttl options, but it could be easily
extended to do so.

Tested-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_osf.h |  9 +++++++++
 net/netfilter/nf_osf.c           | 30 ++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
index 0e114c492fb8..aee460fcbd31 100644
--- a/include/linux/netfilter/nf_osf.h
+++ b/include/linux/netfilter/nf_osf.h
@@ -1,3 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NFOSF_H
+#define _NFOSF_H
+
 #include <uapi/linux/netfilter/nf_osf.h>
 
 /* Initial window size option state machine: multiple of mss, mtu or
@@ -31,3 +35,8 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 		  int hooknum, struct net_device *in, struct net_device *out,
 		  const struct nf_osf_info *info, struct net *net,
 		  const struct list_head *nf_osf_fingers);
+
+const char *nf_osf_find(const struct sk_buff *skb,
+                        const struct list_head *nf_osf_fingers);
+
+#endif /* _NFOSF_H */
diff --git a/net/netfilter/nf_osf.c b/net/netfilter/nf_osf.c
index b44d62d5d9a9..f4c75e982902 100644
--- a/net/netfilter/nf_osf.c
+++ b/net/netfilter/nf_osf.c
@@ -249,4 +249,34 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 }
 EXPORT_SYMBOL_GPL(nf_osf_match);
 
+const char *nf_osf_find(const struct sk_buff *skb,
+			const struct list_head *nf_osf_fingers)
+{
+	const struct iphdr *ip = ip_hdr(skb);
+	const struct nf_osf_user_finger *f;
+	unsigned char opts[MAX_IPOPTLEN];
+	const struct nf_osf_finger *kf;
+	struct nf_osf_hdr_ctx ctx;
+	const struct tcphdr *tcp;
+	const char *genre = NULL;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts);
+	if (!tcp)
+		return false;
+
+	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
+		f = &kf->finger;
+		if (!nf_osf_match_one(skb, f, -1, &ctx))
+			continue;
+
+		genre = f->genre;
+		break;
+	}
+
+	return genre;
+}
+EXPORT_SYMBOL_GPL(nf_osf_find);
+
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From f9324952088f1cd62ea4addf9ff532f1e6452a22 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Wed, 25 Jul 2018 01:32:45 +0200
Subject: netfilter: nfnetlink_osf: extract nfnetlink_subsystem code from
 xt_osf.c

Move nfnetlink osf subsystem from xt_osf.c to standalone module so we can
reuse it from the new nft_ost extension.

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_osf.h |  11 +++
 include/uapi/linux/netfilter/xt_osf.h |   9 +-
 net/netfilter/nfnetlink_osf.c         | 154 ++++++++++++++++++++++++++++++++++
 net/netfilter/xt_osf.c                | 149 +-------------------------------
 4 files changed, 169 insertions(+), 154 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
index 3738116b2bbe..cc2487ff74f6 100644
--- a/include/uapi/linux/netfilter/nf_osf.h
+++ b/include/uapi/linux/netfilter/nf_osf.h
@@ -70,6 +70,8 @@ struct nf_osf_nlmsg {
 	struct tcphdr			tcp;
 };
 
+extern struct list_head nf_osf_fingers[2];
+
 /* Defines for IANA option kinds */
 enum iana_options {
 	OSFOPT_EOL = 0,		/* End of options */
@@ -94,4 +96,13 @@ enum nf_osf_attr_type {
 	OSF_ATTR_MAX,
 };
 
+/*
+ * Add/remove fingerprint from the kernel.
+ */
+enum nf_osf_msg_types {
+	OSF_MSG_ADD,
+	OSF_MSG_REMOVE,
+	OSF_MSG_MAX,
+};
+
 #endif /* _NF_OSF_H */
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index b189007f4f28..a90e90c27cef 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -47,13 +47,6 @@
 #define xt_osf_nlmsg		nf_osf_nlmsg
 
 #define xt_osf_attr_type	nf_osf_attr_type
-/*
- * Add/remove fingerprint from the kernel.
- */
-enum xt_osf_msg_types {
-	OSF_MSG_ADD,
-	OSF_MSG_REMOVE,
-	OSF_MSG_MAX,
-};
+#define xt_osf_msg_types	nf_osf_msg_types
 
 #endif				/* _XT_OSF_H */
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index f4c75e982902..ba0fa11869ce 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -20,6 +20,13 @@
 #include <net/netfilter/nf_log.h>
 #include <linux/netfilter/nf_osf.h>
 
+/*
+ * Indexed by dont-fragment bit.
+ * It is the only constant value in the fingerprint.
+ */
+struct list_head nf_osf_fingers[2];
+EXPORT_SYMBOL_GPL(nf_osf_fingers);
+
 static inline int nf_osf_ttl(const struct sk_buff *skb,
 			     int ttl_check, unsigned char f_ttl)
 {
@@ -279,4 +286,151 @@ const char *nf_osf_find(const struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(nf_osf_find);
 
+static const struct nla_policy nfnl_osf_policy[OSF_ATTR_MAX + 1] = {
+	[OSF_ATTR_FINGER]	= { .len = sizeof(struct nf_osf_user_finger) },
+};
+
+static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
+				 struct sk_buff *skb, const struct nlmsghdr *nlh,
+				 const struct nlattr * const osf_attrs[],
+				 struct netlink_ext_ack *extack)
+{
+	struct nf_osf_user_finger *f;
+	struct nf_osf_finger *kf = NULL, *sf;
+	int err = 0;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!osf_attrs[OSF_ATTR_FINGER])
+		return -EINVAL;
+
+	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+		return -EINVAL;
+
+	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+	kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
+	if (!kf)
+		return -ENOMEM;
+
+	memcpy(&kf->finger, f, sizeof(struct nf_osf_user_finger));
+
+	list_for_each_entry(sf, &nf_osf_fingers[!!f->df], finger_entry) {
+		if (memcmp(&sf->finger, f, sizeof(struct nf_osf_user_finger)))
+			continue;
+
+		kfree(kf);
+		kf = NULL;
+
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			err = -EEXIST;
+		break;
+	}
+
+	/*
+	 * We are protected by nfnl mutex.
+	 */
+	if (kf)
+		list_add_tail_rcu(&kf->finger_entry, &nf_osf_fingers[!!f->df]);
+
+	return err;
+}
+
+static int nfnl_osf_remove_callback(struct net *net, struct sock *ctnl,
+				    struct sk_buff *skb,
+				    const struct nlmsghdr *nlh,
+				    const struct nlattr * const osf_attrs[],
+				    struct netlink_ext_ack *extack)
+{
+	struct nf_osf_user_finger *f;
+	struct nf_osf_finger *sf;
+	int err = -ENOENT;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!osf_attrs[OSF_ATTR_FINGER])
+		return -EINVAL;
+
+	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+	list_for_each_entry(sf, &nf_osf_fingers[!!f->df], finger_entry) {
+		if (memcmp(&sf->finger, f, sizeof(struct nf_osf_user_finger)))
+			continue;
+
+		/*
+		 * We are protected by nfnl mutex.
+		 */
+		list_del_rcu(&sf->finger_entry);
+		kfree_rcu(sf, rcu_head);
+
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+static const struct nfnl_callback nfnl_osf_callbacks[OSF_MSG_MAX] = {
+	[OSF_MSG_ADD]	= {
+		.call		= nfnl_osf_add_callback,
+		.attr_count	= OSF_ATTR_MAX,
+		.policy		= nfnl_osf_policy,
+	},
+	[OSF_MSG_REMOVE]	= {
+		.call		= nfnl_osf_remove_callback,
+		.attr_count	= OSF_ATTR_MAX,
+		.policy		= nfnl_osf_policy,
+	},
+};
+
+static const struct nfnetlink_subsystem nfnl_osf_subsys = {
+	.name			= "osf",
+	.subsys_id		= NFNL_SUBSYS_OSF,
+	.cb_count		= OSF_MSG_MAX,
+	.cb			= nfnl_osf_callbacks,
+};
+
+static int __init nfnl_osf_init(void)
+{
+	int err = -EINVAL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(nf_osf_fingers); ++i)
+		INIT_LIST_HEAD(&nf_osf_fingers[i]);
+
+	err = nfnetlink_subsys_register(&nfnl_osf_subsys);
+	if (err < 0) {
+		pr_err("Failed to register OSF nsfnetlink helper (%d)\n", err);
+		goto err_out_exit;
+	}
+	return 0;
+
+err_out_exit:
+	return err;
+}
+
+static void __exit nfnl_osf_fini(void)
+{
+	struct nf_osf_finger *f;
+	int i;
+
+	nfnetlink_subsys_unregister(&nfnl_osf_subsys);
+
+	rcu_read_lock();
+	for (i = 0; i < ARRAY_SIZE(nf_osf_fingers); ++i) {
+		list_for_each_entry_rcu(f, &nf_osf_fingers[i], finger_entry) {
+			list_del_rcu(&f->finger_entry);
+			kfree_rcu(f, rcu_head);
+		}
+	}
+	rcu_read_unlock();
+
+	rcu_barrier();
+}
+
+module_init(nfnl_osf_init);
+module_exit(nfnl_osf_fini);
+
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 9cfef73b4107..bf7bba80e24c 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -37,118 +37,6 @@
 #include <net/netfilter/nf_log.h>
 #include <linux/netfilter/xt_osf.h>
 
-/*
- * Indexed by dont-fragment bit.
- * It is the only constant value in the fingerprint.
- */
-static struct list_head xt_osf_fingers[2];
-
-static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
-	[OSF_ATTR_FINGER]	= { .len = sizeof(struct xt_osf_user_finger) },
-};
-
-static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
-			       struct sk_buff *skb, const struct nlmsghdr *nlh,
-			       const struct nlattr * const osf_attrs[],
-			       struct netlink_ext_ack *extack)
-{
-	struct xt_osf_user_finger *f;
-	struct xt_osf_finger *kf = NULL, *sf;
-	int err = 0;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (!osf_attrs[OSF_ATTR_FINGER])
-		return -EINVAL;
-
-	if (!(nlh->nlmsg_flags & NLM_F_CREATE))
-		return -EINVAL;
-
-	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
-
-	kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL);
-	if (!kf)
-		return -ENOMEM;
-
-	memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger));
-
-	list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
-		if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
-			continue;
-
-		kfree(kf);
-		kf = NULL;
-
-		if (nlh->nlmsg_flags & NLM_F_EXCL)
-			err = -EEXIST;
-		break;
-	}
-
-	/*
-	 * We are protected by nfnl mutex.
-	 */
-	if (kf)
-		list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]);
-
-	return err;
-}
-
-static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
-				  struct sk_buff *skb,
-				  const struct nlmsghdr *nlh,
-				  const struct nlattr * const osf_attrs[],
-				  struct netlink_ext_ack *extack)
-{
-	struct xt_osf_user_finger *f;
-	struct xt_osf_finger *sf;
-	int err = -ENOENT;
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (!osf_attrs[OSF_ATTR_FINGER])
-		return -EINVAL;
-
-	f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
-
-	list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
-		if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
-			continue;
-
-		/*
-		 * We are protected by nfnl mutex.
-		 */
-		list_del_rcu(&sf->finger_entry);
-		kfree_rcu(sf, rcu_head);
-
-		err = 0;
-		break;
-	}
-
-	return err;
-}
-
-static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = {
-	[OSF_MSG_ADD]	= {
-		.call		= xt_osf_add_callback,
-		.attr_count	= OSF_ATTR_MAX,
-		.policy		= xt_osf_policy,
-	},
-	[OSF_MSG_REMOVE]	= {
-		.call		= xt_osf_remove_callback,
-		.attr_count	= OSF_ATTR_MAX,
-		.policy		= xt_osf_policy,
-	},
-};
-
-static const struct nfnetlink_subsystem xt_osf_nfnetlink = {
-	.name			= "osf",
-	.subsys_id		= NFNL_SUBSYS_OSF,
-	.cb_count		= OSF_MSG_MAX,
-	.cb			= xt_osf_nfnetlink_callbacks,
-};
-
 static bool
 xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 {
@@ -159,7 +47,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 		return false;
 
 	return nf_osf_match(skb, xt_family(p), xt_hooknum(p), xt_in(p),
-			    xt_out(p), info, net, xt_osf_fingers);
+			    xt_out(p), info, net, nf_osf_fingers);
 }
 
 static struct xt_match xt_osf_match = {
@@ -177,52 +65,21 @@ static struct xt_match xt_osf_match = {
 
 static int __init xt_osf_init(void)
 {
-	int err = -EINVAL;
-	int i;
-
-	for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i)
-		INIT_LIST_HEAD(&xt_osf_fingers[i]);
-
-	err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
-	if (err < 0) {
-		pr_err("Failed to register OSF nsfnetlink helper (%d)\n", err);
-		goto err_out_exit;
-	}
+	int err;
 
 	err = xt_register_match(&xt_osf_match);
 	if (err) {
 		pr_err("Failed to register OS fingerprint "
 		       "matching module (%d)\n", err);
-		goto err_out_remove;
+		return err;
 	}
 
 	return 0;
-
-err_out_remove:
-	nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
-err_out_exit:
-	return err;
 }
 
 static void __exit xt_osf_fini(void)
 {
-	struct xt_osf_finger *f;
-	int i;
-
-	nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
 	xt_unregister_match(&xt_osf_match);
-
-	rcu_read_lock();
-	for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) {
-
-		list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
-			list_del_rcu(&f->finger_entry);
-			kfree_rcu(f, rcu_head);
-		}
-	}
-	rcu_read_unlock();
-
-	rcu_barrier();
 }
 
 module_init(xt_osf_init);
-- 
cgit v1.2.3


From b96af92d6eaf9fadd77aa798c508a8a9d2e60020 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Wed, 25 Jul 2018 01:32:46 +0200
Subject: netfilter: nf_tables: implement Passive OS fingerprint module in
 nft_osf

Add basic module functions into nft_osf.[ch] in order to implement OSF
module in nf_tables.

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |   7 ++
 net/netfilter/Kconfig                    |   7 ++
 net/netfilter/Makefile                   |   1 +
 net/netfilter/nft_osf.c                  | 106 +++++++++++++++++++++++++++++++
 4 files changed, 121 insertions(+)
 create mode 100644 net/netfilter/nft_osf.c

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index f466860bcf75..382c32d630e9 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1463,6 +1463,13 @@ enum nft_flowtable_hook_attributes {
 };
 #define NFTA_FLOWTABLE_HOOK_MAX	(__NFTA_FLOWTABLE_HOOK_MAX - 1)
 
+enum nft_osf_attributes {
+	NFTA_OSF_UNSPEC,
+	NFTA_OSF_DREG,
+	__NFTA_OSF_MAX,
+};
+#define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1)
+
 /**
  * enum nft_device_attributes - nf_tables device netlink attributes
  *
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 85333431e524..16fdfb75efb5 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -627,6 +627,13 @@ config NFT_SOCKET
 	  This option allows matching for the presence or absence of a
 	  corresponding socket and its attributes.
 
+config NFT_OSF
+	tristate "Netfilter nf_tables passive OS fingerprint support"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK_OSF
+	help
+	  This option allows matching packets from an specific OS.
+
 if NF_TABLES_NETDEV
 
 config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e684f9b8a9c3..5cbbf6978b55 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -110,6 +110,7 @@ obj-$(CONFIG_NFT_FIB)		+= nft_fib.o
 obj-$(CONFIG_NFT_FIB_INET)	+= nft_fib_inet.o
 obj-$(CONFIG_NFT_FIB_NETDEV)	+= nft_fib_netdev.o
 obj-$(CONFIG_NFT_SOCKET)	+= nft_socket.o
+obj-$(CONFIG_NFT_OSF)		+= nft_osf.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)	+= nft_dup_netdev.o
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
new file mode 100644
index 000000000000..bdacc4cffba4
--- /dev/null
+++ b/net/netfilter/nft_osf.c
@@ -0,0 +1,106 @@
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_tables.h>
+#include <linux/netfilter/nf_osf.h>
+
+#define OSF_GENRE_SIZE		32
+
+struct nft_osf {
+	enum nft_registers	dreg:8;
+};
+
+static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = {
+	[NFTA_OSF_DREG]		= { .type = NLA_U32 },
+};
+
+static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
+			 const struct nft_pktinfo *pkt)
+{
+	struct nft_osf *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	struct sk_buff *skb = pkt->skb;
+	const struct tcphdr *tcp;
+	struct tcphdr _tcph;
+	const char *os_name;
+
+	tcp = skb_header_pointer(skb, ip_hdrlen(skb),
+				 sizeof(struct tcphdr), &_tcph);
+	if (!tcp) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+	if (!tcp->syn) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	os_name = nf_osf_find(skb, nf_osf_fingers);
+	if (!os_name)
+		strncpy((char *)dest, "unknown", IFNAMSIZ);
+	else
+		strncpy((char *)dest, os_name, IFNAMSIZ);
+}
+
+static int nft_osf_init(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_osf *priv = nft_expr_priv(expr);
+	int err;
+
+	priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
+	err = nft_validate_register_store(ctx, priv->dreg, NULL,
+					  NFTA_DATA_VALUE, OSF_GENRE_SIZE);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_osf *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_osf_type;
+static const struct nft_expr_ops nft_osf_op = {
+	.eval		= nft_osf_eval,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_osf)),
+	.init		= nft_osf_init,
+	.dump		= nft_osf_dump,
+	.type		= &nft_osf_type,
+};
+
+static struct nft_expr_type nft_osf_type __read_mostly = {
+	.ops		= &nft_osf_op,
+	.name		= "osf",
+	.owner		= THIS_MODULE,
+	.policy		= nft_osf_policy,
+	.maxattr	= NFTA_OSF_MAX,
+};
+
+static int __init nft_osf_module_init(void)
+{
+	return nft_register_expr(&nft_osf_type);
+}
+
+static void __exit nft_osf_module_exit(void)
+{
+	return nft_unregister_expr(&nft_osf_type);
+}
+
+module_init(nft_osf_module_init);
+module_exit(nft_osf_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
+MODULE_ALIAS_NFT_EXPR("osf");
-- 
cgit v1.2.3


From 4ed8eb6570a49931c705512060acd50058d61616 Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Mon, 30 Jul 2018 11:07:32 +0200
Subject: netfilter: nf_tables: Add native tproxy support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A great portion of the code is taken from xt_TPROXY.c

There are some changes compared to the iptables implementation:
 - tproxy statement is not terminal here
 - Either address or port has to be specified, but at least one of them
   is necessary. If one of them is not specified, the evaluation will be
   performed with the original attribute of the packet (ie. target port
   is not specified => the packet's dport will be used).

To make this work in inet tables, the tproxy structure has a family
member (typically called priv->family) which is not necessarily equal to
ctx->family.

priv->family can have three values legally:
 - NFPROTO_IPV4 if the table family is ip OR if table family is inet,
   but an ipv4 address is specified as a target address. The rule only
   evaluates ipv4 packets in this case.
 - NFPROTO_IPV6 if the table family is ip6 OR if table family is inet,
   but an ipv6 address is specified as a target address. The rule only
   evaluates ipv6 packets in this case.
 - NFPROTO_UNSPEC if the table family is inet AND if only the port is
   specified. The rule will evaluate both ipv4 and ipv6 packets.

Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  16 ++
 net/netfilter/Kconfig                    |  10 +
 net/netfilter/Makefile                   |   1 +
 net/netfilter/nft_tproxy.c               | 314 +++++++++++++++++++++++++++++++
 4 files changed, 341 insertions(+)
 create mode 100644 net/netfilter/nft_tproxy.c

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 382c32d630e9..f112ea52dc1a 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1252,6 +1252,22 @@ enum nft_nat_attributes {
 };
 #define NFTA_NAT_MAX		(__NFTA_NAT_MAX - 1)
 
+/**
+ * enum nft_tproxy_attributes - nf_tables tproxy expression netlink attributes
+ *
+ * NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers)
+ * NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers)
+ * NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers)
+ */
+enum nft_tproxy_attributes {
+	NFTA_TPROXY_UNSPEC,
+	NFTA_TPROXY_FAMILY,
+	NFTA_TPROXY_REG_ADDR,
+	NFTA_TPROXY_REG_PORT,
+	__NFTA_TPROXY_MAX
+};
+#define NFTA_TPROXY_MAX		(__NFTA_TPROXY_MAX - 1)
+
 /**
  * enum nft_masq_attributes - nf_tables masquerade expression attributes
  *
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 16fdfb75efb5..0febf3e21f91 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -634,6 +634,16 @@ config NFT_OSF
 	help
 	  This option allows matching packets from an specific OS.
 
+config NFT_TPROXY
+	tristate "Netfilter nf_tables tproxy support"
+	depends on IPV6 || IPV6=n
+	select NF_DEFRAG_IPV4
+	select NF_DEFRAG_IPV6 if NF_TABLES_IPV6
+	select NF_TPROXY_IPV4
+	select NF_TPROXY_IPV6 if NF_TABLES_IPV6
+	help
+	  This makes transparent proxy support available in nftables.
+
 if NF_TABLES_NETDEV
 
 config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 5cbbf6978b55..cf61615cc529 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_NFT_FIB_INET)	+= nft_fib_inet.o
 obj-$(CONFIG_NFT_FIB_NETDEV)	+= nft_fib_netdev.o
 obj-$(CONFIG_NFT_SOCKET)	+= nft_socket.o
 obj-$(CONFIG_NFT_OSF)		+= nft_osf.o
+obj-$(CONFIG_NFT_TPROXY)	+= nft_tproxy.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)	+= nft_dup_netdev.o
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
new file mode 100644
index 000000000000..c6845f7baa08
--- /dev/null
+++ b/net/netfilter/nft_tproxy.c
@@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/module.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tproxy.h>
+#include <net/inet_sock.h>
+#include <net/tcp.h>
+#include <linux/if_ether.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#endif
+
+struct nft_tproxy {
+	enum nft_registers      sreg_addr:8;
+	enum nft_registers      sreg_port:8;
+	u8			family;
+};
+
+static void nft_tproxy_eval_v4(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	const struct iphdr *iph = ip_hdr(skb);
+	struct udphdr _hdr, *hp;
+	__be32 taddr = 0;
+	__be16 tport = 0;
+	struct sock *sk;
+
+	hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+	if (!hp) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	/* check if there's an ongoing connection on the packet addresses, this
+	 * happens if the redirect already happened and the current packet
+	 * belongs to an already established connection
+	 */
+	sk = nf_tproxy_get_sock_v4(nft_net(pkt), skb, iph->protocol,
+				   iph->saddr, iph->daddr,
+				   hp->source, hp->dest,
+				   skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
+
+	if (priv->sreg_addr)
+		taddr = regs->data[priv->sreg_addr];
+	taddr = nf_tproxy_laddr4(skb, taddr, iph->daddr);
+
+	if (priv->sreg_port)
+		tport = regs->data[priv->sreg_port];
+	if (!tport)
+		tport = hp->dest;
+
+	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
+	if (sk && sk->sk_state == TCP_TIME_WAIT) {
+		/* reopening a TIME_WAIT connection needs special handling */
+		sk = nf_tproxy_handle_time_wait4(nft_net(pkt), skb, taddr, tport, sk);
+	} else if (!sk) {
+		/* no, there's no established connection, check if
+		 * there's a listener on the redirected addr/port
+		 */
+		sk = nf_tproxy_get_sock_v4(nft_net(pkt), skb, iph->protocol,
+					   iph->saddr, taddr,
+					   hp->source, tport,
+					   skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+	}
+
+	if (sk && nf_tproxy_sk_is_transparent(sk))
+		nf_tproxy_assign_sock(skb, sk);
+	else
+		regs->verdict.code = NFT_BREAK;
+}
+
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+static void nft_tproxy_eval_v6(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+	struct sk_buff *skb = pkt->skb;
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct in6_addr taddr = {0};
+	int thoff = pkt->xt.thoff;
+	struct udphdr _hdr, *hp;
+	__be16 tport = 0;
+	struct sock *sk;
+	int l4proto;
+
+	if (!pkt->tprot_set) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+	l4proto = pkt->tprot;
+
+	hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+	if (hp == NULL) {
+		regs->verdict.code = NFT_BREAK;
+		return;
+	}
+
+	/* check if there's an ongoing connection on the packet addresses, this
+	 * happens if the redirect already happened and the current packet
+	 * belongs to an already established connection
+	 */
+	sk = nf_tproxy_get_sock_v6(nft_net(pkt), skb, thoff, l4proto,
+				   &iph->saddr, &iph->daddr,
+				   hp->source, hp->dest,
+				   nft_in(pkt), NF_TPROXY_LOOKUP_ESTABLISHED);
+
+	if (priv->sreg_addr)
+		memcpy(&taddr, &regs->data[priv->sreg_addr], sizeof(taddr));
+	taddr = *nf_tproxy_laddr6(skb, &taddr, &iph->daddr);
+
+	if (priv->sreg_port)
+		tport = regs->data[priv->sreg_port];
+	if (!tport)
+		tport = hp->dest;
+
+	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
+	if (sk && sk->sk_state == TCP_TIME_WAIT) {
+		/* reopening a TIME_WAIT connection needs special handling */
+		sk = nf_tproxy_handle_time_wait6(skb, l4proto, thoff,
+						 nft_net(pkt),
+						 &taddr,
+						 tport,
+						 sk);
+	} else if (!sk) {
+		/* no there's no established connection, check if
+		 * there's a listener on the redirected addr/port
+		 */
+		sk = nf_tproxy_get_sock_v6(nft_net(pkt), skb, thoff,
+					   l4proto, &iph->saddr, &taddr,
+					   hp->source, tport,
+					   nft_in(pkt), NF_TPROXY_LOOKUP_LISTENER);
+	}
+
+	/* NOTE: assign_sock consumes our sk reference */
+	if (sk && nf_tproxy_sk_is_transparent(sk))
+		nf_tproxy_assign_sock(skb, sk);
+	else
+		regs->verdict.code = NFT_BREAK;
+}
+#endif
+
+static void nft_tproxy_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+
+	switch (nft_pf(pkt)) {
+	case NFPROTO_IPV4:
+		switch (priv->family) {
+		case NFPROTO_IPV4:
+		case NFPROTO_UNSPEC:
+			nft_tproxy_eval_v4(expr, regs, pkt);
+			return;
+		}
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		switch (priv->family) {
+		case NFPROTO_IPV6:
+		case NFPROTO_UNSPEC:
+			nft_tproxy_eval_v6(expr, regs, pkt);
+			return;
+		}
+#endif
+	}
+	regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_tproxy_policy[NFTA_TPROXY_MAX + 1] = {
+	[NFTA_TPROXY_FAMILY]   = { .type = NLA_U32 },
+	[NFTA_TPROXY_REG_ADDR] = { .type = NLA_U32 },
+	[NFTA_TPROXY_REG_PORT] = { .type = NLA_U32 },
+};
+
+static int nft_tproxy_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_tproxy *priv = nft_expr_priv(expr);
+	unsigned int alen = 0;
+	int err;
+
+	if (!tb[NFTA_TPROXY_FAMILY] ||
+	    (!tb[NFTA_TPROXY_REG_ADDR] && !tb[NFTA_TPROXY_REG_PORT]))
+		return -EINVAL;
+
+	priv->family = ntohl(nla_get_be32(tb[NFTA_TPROXY_FAMILY]));
+
+	switch (ctx->family) {
+	case NFPROTO_IPV4:
+		if (priv->family != NFPROTO_IPV4)
+			return -EINVAL;
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		if (priv->family != NFPROTO_IPV6)
+			return -EINVAL;
+		break;
+#endif
+	case NFPROTO_INET:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/* Address is specified but the rule family is not set accordingly */
+	if (priv->family == NFPROTO_UNSPEC && tb[NFTA_TPROXY_REG_ADDR])
+		return -EINVAL;
+
+	switch (priv->family) {
+	case NFPROTO_IPV4:
+		alen = FIELD_SIZEOF(union nf_inet_addr, in);
+		err = nf_defrag_ipv4_enable(ctx->net);
+		if (err)
+			return err;
+		break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+	case NFPROTO_IPV6:
+		alen = FIELD_SIZEOF(union nf_inet_addr, in6);
+		err = nf_defrag_ipv6_enable(ctx->net);
+		if (err)
+			return err;
+		break;
+#endif
+	case NFPROTO_UNSPEC:
+		/* No address is specified here */
+		err = nf_defrag_ipv4_enable(ctx->net);
+		if (err)
+			return err;
+		err = nf_defrag_ipv6_enable(ctx->net);
+		if (err)
+			return err;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (tb[NFTA_TPROXY_REG_ADDR]) {
+		priv->sreg_addr = nft_parse_register(tb[NFTA_TPROXY_REG_ADDR]);
+		err = nft_validate_register_load(priv->sreg_addr, alen);
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[NFTA_TPROXY_REG_PORT]) {
+		priv->sreg_port = nft_parse_register(tb[NFTA_TPROXY_REG_PORT]);
+		err = nft_validate_register_load(priv->sreg_port, sizeof(u16));
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int nft_tproxy_dump(struct sk_buff *skb,
+			   const struct nft_expr *expr)
+{
+	const struct nft_tproxy *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_TPROXY_FAMILY, htonl(priv->family)))
+		return -1;
+
+	if (priv->sreg_addr &&
+	    nft_dump_register(skb, NFTA_TPROXY_REG_ADDR, priv->sreg_addr))
+		return -1;
+
+	if (priv->sreg_port &&
+	    nft_dump_register(skb, NFTA_TPROXY_REG_PORT, priv->sreg_port))
+			return -1;
+
+	return 0;
+}
+
+static struct nft_expr_type nft_tproxy_type;
+static const struct nft_expr_ops nft_tproxy_ops = {
+	.type		= &nft_tproxy_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_tproxy)),
+	.eval		= nft_tproxy_eval,
+	.init		= nft_tproxy_init,
+	.dump		= nft_tproxy_dump,
+};
+
+static struct nft_expr_type nft_tproxy_type __read_mostly = {
+	.name		= "tproxy",
+	.ops		= &nft_tproxy_ops,
+	.policy		= nft_tproxy_policy,
+	.maxattr	= NFTA_TPROXY_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_tproxy_module_init(void)
+{
+	return nft_register_expr(&nft_tproxy_type);
+}
+
+static void __exit nft_tproxy_module_exit(void)
+{
+	nft_unregister_expr(&nft_tproxy_type);
+}
+
+module_init(nft_tproxy_module_init);
+module_exit(nft_tproxy_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Máté Eckl");
+MODULE_DESCRIPTION("nf_tables tproxy support module");
+MODULE_ALIAS_NFT_EXPR("tproxy");
-- 
cgit v1.2.3


From f609e7a0f3956e3b03af735b83600a4eef2a04ec Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 7 Jun 2018 15:55:25 -0400
Subject: media: sii9234: remove unused header

The include/linux/platform_data/media/sii9234.h header file is unused,
let's remove it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/linux/platform_data/media/sii9234.h | 24 ------------------------
 1 file changed, 24 deletions(-)
 delete mode 100644 include/linux/platform_data/media/sii9234.h

(limited to 'include')

diff --git a/include/linux/platform_data/media/sii9234.h b/include/linux/platform_data/media/sii9234.h
deleted file mode 100644
index 6a4a809fe9a3..000000000000
--- a/include/linux/platform_data/media/sii9234.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Driver header for SII9234 MHL converter chip.
- *
- * Copyright (c) 2011 Samsung Electronics, Co. Ltd
- * Contact: Tomasz Stanislawski <t.stanislaws@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef SII9234_H
-#define SII9234_H
-
-/**
- * @gpio_n_reset: GPIO driving nRESET pin
- */
-
-struct sii9234_platform_data {
-	int gpio_n_reset;
-};
-
-#endif /* SII9234_H */
-- 
cgit v1.2.3


From 489cae632fc04927e8ef36ac8d8847193a41df3b Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 24 Jul 2018 10:33:19 -0600
Subject: include: Move ascii85 functions from i915 to linux/ascii85.h

The i915 DRM driver very cleverly used ascii85 encoding for their
GPU state file. Move the encode functions to a general header file to
support other drivers that might be interested in the same
functionality.

v4: Make the return value const char * as suggested by Chris Wilson
v3: Fix error_puts -> err_puts pointed out by the 01.org bot
v2: Update API to be cleaner for the caller as suggested by Chris Wilson

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 34 ++++---------------------------
 include/linux/ascii85.h               | 38 +++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 30 deletions(-)
 create mode 100644 include/linux/ascii85.h

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8c81cf3aa182..f7f2aa71d8d9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -31,6 +31,7 @@
 #include <linux/stop_machine.h>
 #include <linux/zlib.h>
 #include <drm/drm_print.h>
+#include <linux/ascii85.h>
 
 #include "i915_gpu_error.h"
 #include "i915_drv.h"
@@ -517,35 +518,12 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
-static int
-ascii85_encode_len(int len)
-{
-	return DIV_ROUND_UP(len, 4);
-}
-
-static bool
-ascii85_encode(u32 in, char *out)
-{
-	int i;
-
-	if (in == 0)
-		return false;
-
-	out[5] = '\0';
-	for (i = 5; i--; ) {
-		out[i] = '!' + in % 85;
-		in /= 85;
-	}
-
-	return true;
-}
-
 static void print_error_obj(struct drm_i915_error_state_buf *m,
 			    struct intel_engine_cs *engine,
 			    const char *name,
 			    struct drm_i915_error_object *obj)
 {
-	char out[6];
+	char out[ASCII85_BUFSZ];
 	int page;
 
 	if (!obj)
@@ -567,12 +545,8 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
 			len -= obj->unused;
 		len = ascii85_encode_len(len);
 
-		for (i = 0; i < len; i++) {
-			if (ascii85_encode(obj->pages[page][i], out))
-				err_puts(m, out);
-			else
-				err_puts(m, "z");
-		}
+		for (i = 0; i < len; i++)
+			err_puts(m, ascii85_encode(obj->pages[page][i], out));
 	}
 	err_puts(m, "\n");
 }
diff --git a/include/linux/ascii85.h b/include/linux/ascii85.h
new file mode 100644
index 000000000000..4cc40201273e
--- /dev/null
+++ b/include/linux/ascii85.h
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (c) 2008 Intel Corporation
+ * Copyright (c) 2018 The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _ASCII85_H_
+#define _ASCII85_H_
+
+#include <linux/kernel.h>
+
+#define ASCII85_BUFSZ 6
+
+static inline long
+ascii85_encode_len(long len)
+{
+	return DIV_ROUND_UP(len, 4);
+}
+
+static inline const char *
+ascii85_encode(u32 in, char *out)
+{
+	int i;
+
+	if (in == 0)
+		return "z";
+
+	out[5] = '\0';
+	for (i = 5; i--; ) {
+		out[i] = '!' + in % 85;
+		in /= 85;
+	}
+
+	return out;
+}
+
+#endif
-- 
cgit v1.2.3


From cfc57a18a3c5dc95d06db80bddd30015162c57d2 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 24 Jul 2018 10:33:20 -0600
Subject: drm: drm_printer: Add printer for devcoredump

Add a drm printer suitable for use with the read callback for
devcoredump or other suitable buffer based output format that
isn't otherwise covered by seq_file.

v2: Add improved documentation per Daniel Vetter

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/drm_print.c | 74 +++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_print.h     | 65 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index b25f98f33f6c..03d1f98e5ac7 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -30,6 +30,80 @@
 #include <drm/drmP.h>
 #include <drm/drm_print.h>
 
+void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf)
+{
+	struct drm_print_iterator *iterator = p->arg;
+	ssize_t len;
+
+	if (!iterator->remain)
+		return;
+
+	/* Figure out how big the string will be */
+	len = snprintf(NULL, 0, "%pV", vaf);
+
+	if (iterator->offset < iterator->start) {
+		char *buf;
+		ssize_t copy;
+
+		if (iterator->offset + len <= iterator->start) {
+			iterator->offset += len;
+			return;
+		}
+
+		/* Print the string into a temporary buffer */
+		buf = kmalloc(len + 1,
+			GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+		if (!buf)
+			return;
+
+		snprintf(buf, len + 1, "%pV", vaf);
+
+		copy = len - (iterator->start - iterator->offset);
+
+		if (copy > iterator->remain)
+			copy = iterator->remain;
+
+		/* Copy out the bit of the string that we need */
+		memcpy(iterator->data,
+			buf + (iterator->start - iterator->offset), copy);
+
+		iterator->offset = iterator->start + copy;
+		iterator->remain -= copy;
+
+		kfree(buf);
+	} else {
+		char *buf;
+		ssize_t pos = iterator->offset - iterator->start;
+
+		if (len < iterator->remain) {
+			snprintf(((char *) iterator->data) + pos,
+				iterator->remain, "%pV", vaf);
+
+			iterator->offset += len;
+			iterator->remain -= len;
+
+			return;
+		}
+
+		/* Print the string into a temporary buffer */
+		buf = kmalloc(len + 1,
+			GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+		if (!buf)
+			return;
+
+		snprintf(buf, len + 1, "%pV", vaf);
+
+		/* Copy out the remaining bits */
+		memcpy(iterator->data + pos, buf, iterator->remain);
+
+		iterator->offset += iterator->remain;
+		iterator->remain = 0;
+
+		kfree(buf);
+	}
+}
+EXPORT_SYMBOL(__drm_printfn_coredump);
+
 void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf)
 {
 	seq_printf(p->arg, "%pV", vaf);
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 767c90b654c5..e7570f93a21f 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -73,6 +73,7 @@ struct drm_printer {
 	const char *prefix;
 };
 
+void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf);
 void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf);
 void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf);
 void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf);
@@ -104,6 +105,70 @@ drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va)
 #define drm_printf_indent(printer, indent, fmt, ...) \
 	drm_printf((printer), "%.*s" fmt, (indent), "\t\t\t\t\tX", ##__VA_ARGS__)
 
+/**
+ * struct drm_print_iterator - local struct used with drm_printer_coredump
+ * @data: Pointer to the devcoredump output buffer
+ * @start: The offset within the buffer to start writing
+ * @remain: The number of bytes to write for this iteration
+ */
+struct drm_print_iterator {
+	void *data;
+	ssize_t start;
+	ssize_t remain;
+	/* private: */
+	ssize_t offset;
+};
+
+/**
+ * drm_coredump_printer - construct a &drm_printer that can output to a buffer
+ * from the read function for devcoredump
+ * @iter: A pointer to a struct drm_print_iterator for the read instance
+ *
+ * This wrapper extends drm_printf() to work with a dev_coredumpm() callback
+ * function. The passed in drm_print_iterator struct contains the buffer
+ * pointer, size and offset as passed in from devcoredump.
+ *
+ * For example::
+ *
+ *	void coredump_read(char *buffer, loff_t offset, size_t count,
+ *		void *data, size_t datalen)
+ *	{
+ *		struct drm_print_iterator iter;
+ *		struct drm_printer p;
+ *
+ *		iter.data = buffer;
+ *		iter.start = offset;
+ *		iter.remain = count;
+ *
+ *		p = drm_coredump_printer(&iter);
+ *
+ *		drm_printf(p, "foo=%d\n", foo);
+ *	}
+ *
+ *	void makecoredump(...)
+ *	{
+ *		...
+ *		dev_coredumpm(dev, THIS_MODULE, data, 0, GFP_KERNEL,
+ *			coredump_read, ...)
+ *	}
+ *
+ * RETURNS:
+ * The &drm_printer object
+ */
+static inline struct drm_printer
+drm_coredump_printer(struct drm_print_iterator *iter)
+{
+	struct drm_printer p = {
+		.printfn = __drm_printfn_coredump,
+		.arg = iter,
+	};
+
+	/* Set the internal offset of the iterator to zero */
+	iter->offset = 0;
+
+	return p;
+}
+
 /**
  * drm_seq_file_printer - construct a &drm_printer that outputs to &seq_file
  * @f:  the &struct seq_file to output to
-- 
cgit v1.2.3


From 63f4cc015b66dd265c2fd6e7c94be1b9a3b72267 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 24 Jul 2018 10:33:21 -0600
Subject: drm: Add drm_puts() to complement drm_printf()

Add drm_puts() for a much faster path to print constant strings
into a drm_printer object with memcpy and friends. This can
have seconds off of really large outputs such as GPU dumps.

If the drm_printer object supports a custom puts function then
use that otherwise fall back to the slower legacy printf call.

v2: Add documentation for drm_puts() per Daniel Vetter

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
[robclark fix minor htmldocs warning]
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/drm_print.c | 17 +++++++++++++++++
 include/drm/drm_print.h     |  2 ++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 03d1f98e5ac7..01d4e5583b5d 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -122,6 +122,23 @@ void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf)
 }
 EXPORT_SYMBOL(__drm_printfn_debug);
 
+/**
+ * drm_puts - print a const string to a &drm_printer stream
+ * @p: the &drm printer
+ * @str: const string
+ *
+ * Allow &drm_printer types that have a constant string
+ * option to use it.
+ */
+void drm_puts(struct drm_printer *p, const char *str)
+{
+	if (p->puts)
+		p->puts(p, str);
+	else
+		drm_printf(p, "%s", str);
+}
+EXPORT_SYMBOL(drm_puts);
+
 /**
  * drm_printf - print to a &drm_printer stream
  * @p: the &drm_printer
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index e7570f93a21f..f2f42bb87ef2 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -69,6 +69,7 @@
 struct drm_printer {
 	/* private: */
 	void (*printfn)(struct drm_printer *p, struct va_format *vaf);
+	void (*puts)(struct drm_printer *p, const char *str);
 	void *arg;
 	const char *prefix;
 };
@@ -80,6 +81,7 @@ void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf);
 
 __printf(2, 3)
 void drm_printf(struct drm_printer *p, const char *f, ...);
+void drm_puts(struct drm_printer *p, const char *str);
 
 __printf(2, 0)
 /**
-- 
cgit v1.2.3


From 4538d7324507fed892a18b79ad72c8501b6e7027 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 24 Jul 2018 10:33:22 -0600
Subject: drm: Add a -puts() function for the seq_file printer

Add a puts() function to use seq_puts() to help speed up
up print time for constant strings.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/drm_print.c | 6 ++++++
 include/drm/drm_print.h     | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 01d4e5583b5d..45d787611723 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -104,6 +104,12 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf)
 }
 EXPORT_SYMBOL(__drm_printfn_coredump);
 
+void __drm_puts_seq_file(struct drm_printer *p, const char *str)
+{
+	seq_puts(p->arg, str);
+}
+EXPORT_SYMBOL(__drm_puts_seq_file);
+
 void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf)
 {
 	seq_printf(p->arg, "%pV", vaf);
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index f2f42bb87ef2..b1432969b627 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -76,6 +76,7 @@ struct drm_printer {
 
 void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf);
 void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf);
+void __drm_puts_seq_file(struct drm_printer *p, const char *str);
 void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf);
 void __drm_printfn_debug(struct drm_printer *p, struct va_format *vaf);
 
@@ -182,6 +183,7 @@ static inline struct drm_printer drm_seq_file_printer(struct seq_file *f)
 {
 	struct drm_printer p = {
 		.printfn = __drm_printfn_seq_file,
+		.puts = __drm_puts_seq_file,
 		.arg = f,
 	};
 	return p;
-- 
cgit v1.2.3


From 5dc634bdbfd6dd9bdf98bce0d6f64878e1d47b1f Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 24 Jul 2018 10:33:23 -0600
Subject: drm: Add puts callback for the coredump printer

Add a puts function for the coredump printer to bypass printf()
for constant strings for a speed boost. Reorganize the
coredump printf callback to share as much code as possible.

v2: Try to reuse code between print and puts as suggested by
    Chris Wilson

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/drm_print.c | 84 ++++++++++++++++++++++++++-------------------
 include/drm/drm_print.h     |  2 ++
 2 files changed, 51 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index 45d787611723..0e7fc3e7dfb4 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -30,7 +30,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_print.h>
 
-void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf)
+void __drm_puts_coredump(struct drm_printer *p, const char *str)
 {
 	struct drm_print_iterator *iterator = p->arg;
 	ssize_t len;
@@ -38,26 +38,16 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf)
 	if (!iterator->remain)
 		return;
 
-	/* Figure out how big the string will be */
-	len = snprintf(NULL, 0, "%pV", vaf);
-
 	if (iterator->offset < iterator->start) {
-		char *buf;
 		ssize_t copy;
 
+		len = strlen(str);
+
 		if (iterator->offset + len <= iterator->start) {
 			iterator->offset += len;
 			return;
 		}
 
-		/* Print the string into a temporary buffer */
-		buf = kmalloc(len + 1,
-			GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
-		if (!buf)
-			return;
-
-		snprintf(buf, len + 1, "%pV", vaf);
-
 		copy = len - (iterator->start - iterator->offset);
 
 		if (copy > iterator->remain)
@@ -65,42 +55,66 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf)
 
 		/* Copy out the bit of the string that we need */
 		memcpy(iterator->data,
-			buf + (iterator->start - iterator->offset), copy);
+			str + (iterator->start - iterator->offset), copy);
 
 		iterator->offset = iterator->start + copy;
 		iterator->remain -= copy;
-
-		kfree(buf);
 	} else {
-		char *buf;
 		ssize_t pos = iterator->offset - iterator->start;
 
-		if (len < iterator->remain) {
-			snprintf(((char *) iterator->data) + pos,
-				iterator->remain, "%pV", vaf);
+		len = min_t(ssize_t, strlen(str), iterator->remain);
 
-			iterator->offset += len;
-			iterator->remain -= len;
+		memcpy(iterator->data + pos, str, len);
 
-			return;
-		}
+		iterator->offset += len;
+		iterator->remain -= len;
+	}
+}
+EXPORT_SYMBOL(__drm_puts_coredump);
 
-		/* Print the string into a temporary buffer */
-		buf = kmalloc(len + 1,
-			GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
-		if (!buf)
-			return;
+void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf)
+{
+	struct drm_print_iterator *iterator = p->arg;
+	size_t len;
+	char *buf;
+
+	if (!iterator->remain)
+		return;
+
+	/* Figure out how big the string will be */
+	len = snprintf(NULL, 0, "%pV", vaf);
+
+	/* This is the easiest path, we've already advanced beyond the offset */
+	if (iterator->offset + len <= iterator->start) {
+		iterator->offset += len;
+		return;
+	}
 
-		snprintf(buf, len + 1, "%pV", vaf);
+	/* Then check if we can directly copy into the target buffer */
+	if ((iterator->offset >= iterator->start) && (len < iterator->remain)) {
+		ssize_t pos = iterator->offset - iterator->start;
 
-		/* Copy out the remaining bits */
-		memcpy(iterator->data + pos, buf, iterator->remain);
+		snprintf(((char *) iterator->data) + pos,
+			iterator->remain, "%pV", vaf);
 
-		iterator->offset += iterator->remain;
-		iterator->remain = 0;
+		iterator->offset += len;
+		iterator->remain -= len;
 
-		kfree(buf);
+		return;
 	}
+
+	/*
+	 * Finally, hit the slow path and make a temporary string to copy over
+	 * using _drm_puts_coredump
+	 */
+	buf = kmalloc(len + 1, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (!buf)
+		return;
+
+	snprintf(buf, len + 1, "%pV", vaf);
+	__drm_puts_coredump(p, (const char *) buf);
+
+	kfree(buf);
 }
 EXPORT_SYMBOL(__drm_printfn_coredump);
 
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index b1432969b627..f3e6eed3e79c 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -75,6 +75,7 @@ struct drm_printer {
 };
 
 void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf);
+void __drm_puts_coredump(struct drm_printer *p, const char *str);
 void __drm_printfn_seq_file(struct drm_printer *p, struct va_format *vaf);
 void __drm_puts_seq_file(struct drm_printer *p, const char *str);
 void __drm_printfn_info(struct drm_printer *p, struct va_format *vaf);
@@ -163,6 +164,7 @@ drm_coredump_printer(struct drm_print_iterator *iter)
 {
 	struct drm_printer p = {
 		.printfn = __drm_printfn_coredump,
+		.puts = __drm_puts_coredump,
 		.arg = iter,
 	};
 
-- 
cgit v1.2.3


From f7debfe54090d1a1c38e1f070be20d83bb70a8e0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 29 Jul 2018 23:03:05 +0200
Subject: ALSA: seq: virmidi: Offload the output event processing

The virmidi sequencer stuff tries to translate the rawmidi bytes to
sequencer events and deliver the packets at trigger callback.  The
amount of the whole process of these translations and deliveries
depends on the incoming rawmidi bytes, and we have no limit for that;
this was the cause of a CPU soft lockup that had been reported and
fixed recently.

Although we've fixed the soft lockup by putting the temporary unlock
and cond_resched(), it's rather a quick band aid.  In this patch,
meanwhile, the event parsing and delivery process is offloaded to a
dedicated work, and the trigger callback just kicks it off.  It has
three merits, at least:

- The processing is always done in a sleepable context, which can
  assure the event delivery with non-atomic flag without hackish
  is_atomic() usage.

- Other relevant codes can be simplified, reducing the lines

- It makes me happier

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/seq_virmidi.h  |   1 +
 sound/core/seq/seq_virmidi.c | 100 ++++++++++++++++++++-----------------------
 2 files changed, 47 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/include/sound/seq_virmidi.h b/include/sound/seq_virmidi.h
index 695257ae64ac..d488dcfa3a4e 100644
--- a/include/sound/seq_virmidi.h
+++ b/include/sound/seq_virmidi.h
@@ -41,6 +41,7 @@ struct snd_virmidi {
 	struct snd_seq_event event;
 	struct snd_virmidi_dev *rdev;
 	struct snd_rawmidi_substream *substream;
+	struct work_struct output_work;
 };
 
 #define SNDRV_VIRMIDI_SUBSCRIBE		(1<<0)
diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index 8ebbca554e99..67ea5d62cebc 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -154,70 +154,58 @@ static void snd_virmidi_input_trigger(struct snd_rawmidi_substream *substream, i
 	}
 }
 
-/*
- * trigger rawmidi stream for output
+/* process rawmidi bytes and send events;
+ * we need no lock here for vmidi->event since it's handled only in this work
  */
-static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, int up)
+static void snd_vmidi_output_work(struct work_struct *work)
 {
-	struct snd_virmidi *vmidi = substream->runtime->private_data;
-	int count, res;
-	unsigned char buf[32], *pbuf;
-	unsigned long flags;
-	bool check_resched = !in_atomic();
+	struct snd_virmidi *vmidi;
+	struct snd_rawmidi_substream *substream;
+	unsigned char input;
+	int ret;
+
+	vmidi = container_of(work, struct snd_virmidi, output_work);
+	substream = vmidi->substream;
+
+	/* discard the outputs in dispatch mode unless subscribed */
+	if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH &&
+	    !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) {
+		while (!snd_rawmidi_transmit_empty(substream))
+			snd_rawmidi_transmit_ack(substream, 1);
+		return;
+	}
 
-	if (up) {
-		vmidi->trigger = 1;
-		if (vmidi->seq_mode == SNDRV_VIRMIDI_SEQ_DISPATCH &&
-		    !(vmidi->rdev->flags & SNDRV_VIRMIDI_SUBSCRIBE)) {
-			while (snd_rawmidi_transmit(substream, buf,
-						    sizeof(buf)) > 0) {
-				/* ignored */
-			}
-			return;
-		}
-		spin_lock_irqsave(&substream->runtime->lock, flags);
+	while (vmidi->trigger) {
+		if (snd_rawmidi_transmit(substream, &input, 1) != 1)
+			break;
+		if (snd_midi_event_encode_byte(vmidi->parser, input,
+					       &vmidi->event) <= 0)
+			continue;
 		if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
-			if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
-				goto out;
+			ret = snd_seq_kernel_client_dispatch(vmidi->client,
+							     &vmidi->event,
+							     false, 0);
 			vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
-		}
-		while (1) {
-			count = __snd_rawmidi_transmit_peek(substream, buf, sizeof(buf));
-			if (count <= 0)
+			if (ret < 0)
 				break;
-			pbuf = buf;
-			while (count > 0) {
-				res = snd_midi_event_encode(vmidi->parser, pbuf, count, &vmidi->event);
-				if (res < 0) {
-					snd_midi_event_reset_encode(vmidi->parser);
-					continue;
-				}
-				__snd_rawmidi_transmit_ack(substream, res);
-				pbuf += res;
-				count -= res;
-				if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
-					if (snd_seq_kernel_client_dispatch(vmidi->client, &vmidi->event, in_atomic(), 0) < 0)
-						goto out;
-					vmidi->event.type = SNDRV_SEQ_EVENT_NONE;
-				}
-			}
-			if (!check_resched)
-				continue;
-			/* do temporary unlock & cond_resched() for avoiding
-			 * CPU soft lockup, which may happen via a write from
-			 * a huge rawmidi buffer
-			 */
-			spin_unlock_irqrestore(&substream->runtime->lock, flags);
-			cond_resched();
-			spin_lock_irqsave(&substream->runtime->lock, flags);
 		}
-	out:
-		spin_unlock_irqrestore(&substream->runtime->lock, flags);
-	} else {
-		vmidi->trigger = 0;
+		/* rawmidi input might be huge, allow to have a break */
+		cond_resched();
 	}
 }
 
+/*
+ * trigger rawmidi stream for output
+ */
+static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream, int up)
+{
+	struct snd_virmidi *vmidi = substream->runtime->private_data;
+
+	vmidi->trigger = !!up;
+	if (up)
+		queue_work(system_highpri_wq, &vmidi->output_work);
+}
+
 /*
  * open rawmidi handle for input
  */
@@ -270,6 +258,7 @@ static int snd_virmidi_output_open(struct snd_rawmidi_substream *substream)
 	vmidi->port = rdev->port;
 	snd_virmidi_init_event(vmidi, &vmidi->event);
 	vmidi->rdev = rdev;
+	INIT_WORK(&vmidi->output_work, snd_vmidi_output_work);
 	runtime->private_data = vmidi;
 	return 0;
 }
@@ -299,6 +288,9 @@ static int snd_virmidi_input_close(struct snd_rawmidi_substream *substream)
 static int snd_virmidi_output_close(struct snd_rawmidi_substream *substream)
 {
 	struct snd_virmidi *vmidi = substream->runtime->private_data;
+
+	vmidi->trigger = 0; /* to be sure */
+	cancel_work_sync(&vmidi->output_work);
 	snd_midi_event_free(vmidi->parser);
 	substream->runtime->private_data = NULL;
 	kfree(vmidi);
-- 
cgit v1.2.3


From 89b4ab213feb11a5bece544cfa12374f66c2c47c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 30 Jul 2018 14:48:29 +0200
Subject: ALSA: seq: virmidi: Use READ_ONCE/WRITE_ONCE() macros

The trigger flag in vmidi object can be referred in different contexts
concurrently, hence it's better to be put with READ_ONCE() and
WRITE_ONCE() macros to assure the accesses.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/seq_virmidi.h  |  2 +-
 sound/core/seq/seq_virmidi.c | 14 +++++---------
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/sound/seq_virmidi.h b/include/sound/seq_virmidi.h
index d488dcfa3a4e..796ce7772213 100644
--- a/include/sound/seq_virmidi.h
+++ b/include/sound/seq_virmidi.h
@@ -36,7 +36,7 @@ struct snd_virmidi {
 	int seq_mode;
 	int client;
 	int port;
-	unsigned int trigger: 1;
+	bool trigger;
 	struct snd_midi_event *parser;
 	struct snd_seq_event event;
 	struct snd_virmidi_dev *rdev;
diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index 67ea5d62cebc..03ac5e72dbe6 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -89,7 +89,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev,
 	else
 		down_read(&rdev->filelist_sem);
 	list_for_each_entry(vmidi, &rdev->filelist, list) {
-		if (!vmidi->trigger)
+		if (!READ_ONCE(vmidi->trigger))
 			continue;
 		if (ev->type == SNDRV_SEQ_EVENT_SYSEX) {
 			if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE)
@@ -147,11 +147,7 @@ static void snd_virmidi_input_trigger(struct snd_rawmidi_substream *substream, i
 {
 	struct snd_virmidi *vmidi = substream->runtime->private_data;
 
-	if (up) {
-		vmidi->trigger = 1;
-	} else {
-		vmidi->trigger = 0;
-	}
+	WRITE_ONCE(vmidi->trigger, !!up);
 }
 
 /* process rawmidi bytes and send events;
@@ -175,7 +171,7 @@ static void snd_vmidi_output_work(struct work_struct *work)
 		return;
 	}
 
-	while (vmidi->trigger) {
+	while (READ_ONCE(vmidi->trigger)) {
 		if (snd_rawmidi_transmit(substream, &input, 1) != 1)
 			break;
 		if (snd_midi_event_encode_byte(vmidi->parser, input,
@@ -201,7 +197,7 @@ static void snd_virmidi_output_trigger(struct snd_rawmidi_substream *substream,
 {
 	struct snd_virmidi *vmidi = substream->runtime->private_data;
 
-	vmidi->trigger = !!up;
+	WRITE_ONCE(vmidi->trigger, !!up);
 	if (up)
 		queue_work(system_highpri_wq, &vmidi->output_work);
 }
@@ -289,7 +285,7 @@ static int snd_virmidi_output_close(struct snd_rawmidi_substream *substream)
 {
 	struct snd_virmidi *vmidi = substream->runtime->private_data;
 
-	vmidi->trigger = 0; /* to be sure */
+	WRITE_ONCE(vmidi->trigger, false); /* to be sure */
 	cancel_work_sync(&vmidi->output_work);
 	snd_midi_event_free(vmidi->parser);
 	substream->runtime->private_data = NULL;
-- 
cgit v1.2.3


From a5ec5a7bfd1f28d1905499641c9f589be36808c1 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 27 Jul 2018 13:47:02 -0700
Subject: ata: ahci: Support state with min power but Partial low power state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently when min_power policy is selected, the partial low power state
is not entered and link will try aggressively enter to only slumber state.
Add a new policy which still enable DEVSLP but also try to enter partial
low power state. This policy is presented as "min_power_with_partial".

For information the difference between partial and slumber
Partial – PHY logic is powered up, and in a reduced power state. The link
PM exit latency to active state maximum is 10 ns.
Slumber – PHY logic is powered up, and in a reduced power state. The link
PM exit latency to active state maximum is 10 ms.
Devslp – PHY logic is powered down. The link PM exit latency from this
state to active state maximum is 20 ms, unless otherwise specified by
DETO.

Suggested-and-reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libahci.c     | 5 ++++-
 drivers/ata/libata-core.c | 1 +
 drivers/ata/libata-scsi.c | 1 +
 include/linux/libata.h    | 3 ++-
 4 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 5038cee7d982..f79dce613e54 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -800,6 +800,8 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 			cmd |= PORT_CMD_ALPE;
 			if (policy == ATA_LPM_MIN_POWER)
 				cmd |= PORT_CMD_ASP;
+			else if (policy == ATA_LPM_MIN_POWER_WITH_PARTIAL)
+				cmd &= ~PORT_CMD_ASP;
 
 			/* write out new cmd value */
 			writel(cmd, port_mmio + PORT_CMD);
@@ -810,7 +812,8 @@ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 	if ((hpriv->cap2 & HOST_CAP2_SDS) &&
 	    (hpriv->cap2 & HOST_CAP2_SADM) &&
 	    (link->device->flags & ATA_DFLAG_DEVSLP)) {
-		if (policy == ATA_LPM_MIN_POWER)
+		if (policy == ATA_LPM_MIN_POWER ||
+		    policy == ATA_LPM_MIN_POWER_WITH_PARTIAL)
 			ahci_set_aggressive_devslp(ap, true);
 		else
 			ahci_set_aggressive_devslp(ap, false);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 1e28d054851f..f1b1be17383b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3967,6 +3967,7 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 		scontrol |= (0x6 << 8);
 		break;
 	case ATA_LPM_MED_POWER_WITH_DIPM:
+	case ATA_LPM_MIN_POWER_WITH_PARTIAL:
 	case ATA_LPM_MIN_POWER:
 		if (ata_link_nr_enabled(link) > 0)
 			/* no restrictions on LPM transitions */
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 980190c9eb48..63c25f1499c3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -110,6 +110,7 @@ static const char *ata_lpm_policy_names[] = {
 	[ATA_LPM_MAX_POWER]		= "max_performance",
 	[ATA_LPM_MED_POWER]		= "medium_power",
 	[ATA_LPM_MED_POWER_WITH_DIPM]	= "med_power_with_dipm",
+	[ATA_LPM_MIN_POWER_WITH_PARTIAL] = "min_power_with_partial",
 	[ATA_LPM_MIN_POWER]		= "min_power",
 };
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index aa8583655a18..80c2bd202367 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -522,7 +522,8 @@ enum ata_lpm_policy {
 	ATA_LPM_MAX_POWER,
 	ATA_LPM_MED_POWER,
 	ATA_LPM_MED_POWER_WITH_DIPM, /* Med power + DIPM as win IRST does */
-	ATA_LPM_MIN_POWER,
+	ATA_LPM_MIN_POWER_WITH_PARTIAL, /* Min Power + partial and slumber */
+	ATA_LPM_MIN_POWER, /* Min power + no partial (slumber only) */
 };
 
 enum ata_lpm_hints {
-- 
cgit v1.2.3


From ea8532daee31bc72abfbc9ca7a43cbec0f6c05af Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 30 May 2018 11:07:05 -0400
Subject: media: videodev2: get rid of VIDIOC_RESERVED

While this ioctl is there at least since Kernel 2.6.12-rc2, it
was never used by any upstream driver.

Get rid of it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/videodev2.h.rst.exceptions | 1 -
 include/uapi/linux/videodev2.h                 | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/videodev2.h.rst.exceptions b/Documentation/media/videodev2.h.rst.exceptions
index a5cb0a8686ac..ca9f0edc579e 100644
--- a/Documentation/media/videodev2.h.rst.exceptions
+++ b/Documentation/media/videodev2.h.rst.exceptions
@@ -517,7 +517,6 @@ ignore define V4L2_CTRL_WHICH_DEF_VAL
 ignore define V4L2_OUT_CAP_CUSTOM_TIMINGS
 ignore define V4L2_CID_MAX_CTRLS
 
-ignore ioctl VIDIOC_RESERVED
 ignore define BASE_VIDIOC_PRIVATE
 
 # Associate ioctls with their counterparts
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 3ea8097c2470..d8b33095abe0 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -2311,7 +2311,6 @@ struct v4l2_create_buffers {
  *
  */
 #define VIDIOC_QUERYCAP		 _IOR('V',  0, struct v4l2_capability)
-#define VIDIOC_RESERVED		  _IO('V',  1)
 #define VIDIOC_ENUM_FMT         _IOWR('V',  2, struct v4l2_fmtdesc)
 #define VIDIOC_G_FMT		_IOWR('V',  4, struct v4l2_format)
 #define VIDIOC_S_FMT		_IOWR('V',  5, struct v4l2_format)
-- 
cgit v1.2.3


From c454edc21b12dd7d416de6c81555e87aaec9685c Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 30 Jul 2018 10:10:01 -0400
Subject: block: don't account for split bio's size in cgroup stats

We need to check in blkcg_bio_issue_check if the bio is flagged as
QUEUE_ENTERED, because if it is then we've already accounted for the
size of the IO in the cgroup stats.  We can still however account for
the extra IO since it'll be another request.

Reported-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 3bed5e02a873..f7b910768306 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -769,8 +769,14 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 
 	if (!throtl) {
 		blkg = blkg ?: q->root_blkg;
-		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
-				bio->bi_iter.bi_size);
+		/*
+		 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
+		 * is a split bio and we would have already accounted for the
+		 * size of the bio.
+		 */
+		if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
+			blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
+					bio->bi_iter.bi_size);
 		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
-- 
cgit v1.2.3


From ddd0bc756983dc4d19000a4fe021b4c7f9d59aab Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 30 Jul 2018 00:15:31 +0300
Subject: block: move ref_tag calculation func to the block layer

Currently this function is implemented in the scsi layer, but it's
actual place should be the block layer since T10-PI is a general
data integrity feature that is used in the nvme protocol as well.

Suggested-by: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/infiniband/ulp/iser/iser_memory.c |  2 +-
 drivers/nvme/host/core.c                  |  3 +--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c      |  2 +-
 drivers/scsi/sd_dif.c                     |  4 ++--
 include/linux/t10-pi.h                    | 10 ++++++++++
 include/scsi/scsi_cmnd.h                  |  7 +------
 6 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ca844a926e6a..130bf163f066 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -311,7 +311,7 @@ iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
 {
 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
 	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
-	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
+	domain->sig.dif.ref_tag = t10_pi_ref_tag(sc->request);
 	/*
 	 * At the moment we hard code those, but in the future
 	 * we will take them from sc.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e77e6418a21c..16c8b86fe95d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -611,8 +611,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 		case NVME_NS_DPS_PI_TYPE2:
 			control |= NVME_RW_PRINFO_PRCHK_GUARD |
 					NVME_RW_PRINFO_PRCHK_REF;
-			cmnd->rw.reftag = cpu_to_le32(
-					nvme_block_nr(ns, blk_rq_pos(req)));
+			cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
 			break;
 		}
 	}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index b8d131a455d0..dd738ae5c75b 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -4568,7 +4568,7 @@ _scsih_setup_eedp(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd,
 		    MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
 		    MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
 		mpi_request->CDB.EEDP32.PrimaryReferenceTag =
-		    cpu_to_be32(scsi_prot_ref_tag(scmd));
+		    cpu_to_be32(t10_pi_ref_tag(scmd->request));
 		break;
 
 	case SCSI_PROT_DIF_TYPE3:
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 9035380c0dda..d8de43d359ac 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -124,7 +124,7 @@ void sd_dif_prepare(struct scsi_cmnd *scmd)
 	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
 		return;
 
-	phys = scsi_prot_ref_tag(scmd);
+	phys = t10_pi_ref_tag(scmd->request);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
@@ -176,7 +176,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		return;
 
 	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = scsi_prot_ref_tag(scmd);
+	phys = t10_pi_ref_tag(scmd->request);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c6aa8a3c42ed..c40511f4e63d 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -37,6 +37,16 @@ struct t10_pi_tuple {
 #define T10_PI_APP_ESCAPE cpu_to_be16(0xffff)
 #define T10_PI_REF_ESCAPE cpu_to_be32(0xffffffff)
 
+static inline u32 t10_pi_ref_tag(struct request *rq)
+{
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	return blk_rq_pos(rq) >>
+		(rq->q->integrity.interval_exp - 9) & 0xffffffff;
+#else
+	return -1U;
+#endif
+}
+
 extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index aaf1e971c6a3..cae229b5395c 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -4,6 +4,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
+#include <linux/t10-pi.h>
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/timer.h>
@@ -313,12 +314,6 @@ static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
 	return scmd->device->sector_size;
 }
 
-static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
-{
-	return blk_rq_pos(scmd->request) >>
-		(ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff;
-}
-
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
 {
 	return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0;
-- 
cgit v1.2.3


From 10c41ddd61323b27b447bc8e18296ac6c06107ad Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 30 Jul 2018 00:15:32 +0300
Subject: block: move dif_prepare/dif_complete functions to block layer

Currently these functions are implemented in the scsi layer, but their
actual place should be the block layer since T10-PI is a general data
integrity feature that is used in the nvme protocol as well. Also, use
the tuple size from the integrity profile since it may vary between
integrity types.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/t10-pi.c         | 110 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c      |   8 ++--
 drivers/scsi/sd.h      |   9 ----
 drivers/scsi/sd_dif.c  | 113 -------------------------------------------------
 include/linux/t10-pi.h |   3 ++
 5 files changed, 118 insertions(+), 125 deletions(-)

(limited to 'include')

diff --git a/block/t10-pi.c b/block/t10-pi.c
index a98db384048f..62aed77d0bb9 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -184,3 +184,113 @@ const struct blk_integrity_profile t10_pi_type3_ip = {
 	.verify_fn		= t10_pi_type3_verify_ip,
 };
 EXPORT_SYMBOL(t10_pi_type3_ip);
+
+/**
+ * t10_pi_prepare - prepare PI prior submitting request to device
+ * @rq:              request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Remap protection information to match the
+ * physical LBA.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_prepare(struct request *rq, u8 protection_type)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u32 ref_tag = t10_pi_ref_tag(rq);
+	struct bio *bio;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u32 virt = bip_get_seed(bip) & 0xffffffff;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		/* Already remapped? */
+		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
+			break;
+
+		bip_for_each_vec(iv, bip, iter) {
+			void *p, *pmap;
+			unsigned int j;
+
+			pmap = kmap_atomic(iv.bv_page);
+			p = pmap + iv.bv_offset;
+			for (j = 0; j < iv.bv_len; j += tuple_sz) {
+				struct t10_pi_tuple *pi = p;
+
+				if (be32_to_cpu(pi->ref_tag) == virt)
+					pi->ref_tag = cpu_to_be32(ref_tag);
+				virt++;
+				ref_tag++;
+				p += tuple_sz;
+			}
+
+			kunmap_atomic(pmap);
+		}
+
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
+	}
+}
+EXPORT_SYMBOL(t10_pi_prepare);
+
+/**
+ * t10_pi_complete - prepare PI prior returning request to the block layer
+ * @rq:              request with PI that should be prepared
+ * @protection_type: PI type (Type 1/Type 2/Type 3)
+ * @intervals:       total elements to prepare
+ *
+ * For Type 1/Type 2, the virtual start sector is the one that was
+ * originally submitted by the block layer for the ref_tag usage. Due to
+ * partitioning, MD/DM cloning, etc. the actual physical start sector is
+ * likely to be different. Since the physical start sector was submitted
+ * to the device, we should remap it back to virtual values expected by the
+ * block layer.
+ *
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+void t10_pi_complete(struct request *rq, u8 protection_type,
+		     unsigned int intervals)
+{
+	const int tuple_sz = rq->q->integrity.tuple_size;
+	u32 ref_tag = t10_pi_ref_tag(rq);
+	struct bio *bio;
+
+	if (protection_type == T10_PI_TYPE3_PROTECTION)
+		return;
+
+	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
+		u32 virt = bip_get_seed(bip) & 0xffffffff;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+
+		bip_for_each_vec(iv, bip, iter) {
+			void *p, *pmap;
+			unsigned int j;
+
+			pmap = kmap_atomic(iv.bv_page);
+			p = pmap + iv.bv_offset;
+			for (j = 0; j < iv.bv_len && intervals; j += tuple_sz) {
+				struct t10_pi_tuple *pi = p;
+
+				if (be32_to_cpu(pi->ref_tag) == ref_tag)
+					pi->ref_tag = cpu_to_be32(virt);
+				virt++;
+				ref_tag++;
+				intervals--;
+				p += tuple_sz;
+			}
+
+			kunmap_atomic(pmap);
+		}
+	}
+}
+EXPORT_SYMBOL(t10_pi_complete);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9421d9877730..bbebdc3769b0 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1119,7 +1119,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 		SCpnt->cmnd[0] = WRITE_6;
 
 		if (blk_integrity_rq(rq))
-			sd_dif_prepare(SCpnt);
+			t10_pi_prepare(SCpnt->request, sdkp->protection_type);
 
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
@@ -2047,8 +2047,10 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 					   "sd_done: completed %d of %d bytes\n",
 					   good_bytes, scsi_bufflen(SCpnt)));
 
-	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
-		sd_dif_complete(SCpnt, good_bytes);
+	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
+	    good_bytes)
+		t10_pi_complete(SCpnt->request, sdkp->protection_type,
+				good_bytes / scsi_prot_interval(SCpnt));
 
 	return good_bytes;
 }
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 392c7d078ae3..a7d4f50b67d4 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -254,21 +254,12 @@ static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 
 extern void sd_dif_config_host(struct scsi_disk *);
-extern void sd_dif_prepare(struct scsi_cmnd *scmd);
-extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 static inline void sd_dif_config_host(struct scsi_disk *disk)
 {
 }
-static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	return 0;
-}
-static inline void sd_dif_complete(struct scsi_cmnd *cmd, unsigned int a)
-{
-}
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index d8de43d359ac..db72c82486e3 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -95,116 +95,3 @@ out:
 	blk_integrity_register(disk, &bi);
 }
 
-/*
- * The virtual start sector is the one that was originally submitted
- * by the block layer.	Due to partitioning, MD/DM cloning, etc. the
- * actual physical start sector is likely to be different.  Remap
- * protection information to match the physical LBA.
- *
- * From a protocol perspective there's a slight difference between
- * Type 1 and 2.  The latter uses 32-byte CDBs exclusively, and the
- * reference tag is seeded in the CDB.  This gives us the potential to
- * avoid virt->phys remapping during write.  However, at read time we
- * don't know whether the virt sector is the same as when we wrote it
- * (we could be reading from real disk as opposed to MD/DM device.  So
- * we always remap Type 2 making it identical to Type 1.
- *
- * Type 3 does not have a reference tag so no remapping is required.
- */
-void sd_dif_prepare(struct scsi_cmnd *scmd)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct bio *bio;
-	struct scsi_disk *sdkp;
-	struct t10_pi_tuple *pi;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION)
-		return;
-
-	phys = t10_pi_ref_tag(scmd->request);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-		unsigned int j;
-
-		/* Already remapped? */
-		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
-			break;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (be32_to_cpu(pi->ref_tag) == virt)
-					pi->ref_tag = cpu_to_be32(phys);
-
-				virt++;
-				phys++;
-			}
-
-			kunmap_atomic(pi);
-		}
-
-		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
-	}
-}
-
-/*
- * Remap physical sector values in the reference tag to the virtual
- * values expected by the block layer.
- */
-void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
-{
-	const int tuple_sz = sizeof(struct t10_pi_tuple);
-	struct scsi_disk *sdkp;
-	struct bio *bio;
-	struct t10_pi_tuple *pi;
-	unsigned int j, intervals;
-	u32 phys, virt;
-
-	sdkp = scsi_disk(scmd->request->rq_disk);
-
-	if (sdkp->protection_type == T10_PI_TYPE3_PROTECTION || good_bytes == 0)
-		return;
-
-	intervals = good_bytes / scsi_prot_interval(scmd);
-	phys = t10_pi_ref_tag(scmd->request);
-
-	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_integrity_payload *bip = bio_integrity(bio);
-		struct bio_vec iv;
-		struct bvec_iter iter;
-
-		virt = bip_get_seed(bip) & 0xffffffff;
-
-		bip_for_each_vec(iv, bip, iter) {
-			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
-
-			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
-
-				if (intervals == 0) {
-					kunmap_atomic(pi);
-					return;
-				}
-
-				if (be32_to_cpu(pi->ref_tag) == phys)
-					pi->ref_tag = cpu_to_be32(virt);
-
-				virt++;
-				phys++;
-				intervals--;
-			}
-
-			kunmap_atomic(pi);
-		}
-	}
-}
-
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index c40511f4e63d..5a427c289f58 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -51,5 +51,8 @@ extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
+extern void t10_pi_prepare(struct request *rq, u8 protection_type);
+extern void t10_pi_complete(struct request *rq, u8 protection_type,
+			    unsigned int intervals);
 
 #endif
-- 
cgit v1.2.3


From dd979b4df817e9976f18fb6f9d134d6bc4a3c317 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:10 +0200
Subject: net: simplify sock_poll_wait

The wait_address argument is always directly derived from the filp
argument, so remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/af_alg.c        |  2 +-
 include/net/sock.h     | 11 ++++++-----
 net/atm/common.c       |  2 +-
 net/caif/caif_socket.c |  2 +-
 net/core/datagram.c    |  2 +-
 net/dccp/proto.c       |  2 +-
 net/ipv4/tcp.c         |  2 +-
 net/iucv/af_iucv.c     |  2 +-
 net/nfc/llcp_sock.c    |  2 +-
 net/rxrpc/af_rxrpc.c   |  2 +-
 net/smc/af_smc.c       |  2 +-
 net/tipc/socket.c      |  2 +-
 net/unix/af_unix.c     |  4 ++--
 13 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index c166f424871c..b053179e0bc5 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1071,7 +1071,7 @@ __poll_t af_alg_poll(struct file *file, struct socket *sock,
 	struct af_alg_ctx *ctx = ask->private;
 	__poll_t mask;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 	mask = 0;
 
 	if (!ctx->more || ctx->used)
diff --git a/include/net/sock.h b/include/net/sock.h
index 83b747538bd0..0518f61926ec 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2057,16 +2057,17 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
 /**
  * sock_poll_wait - place memory barrier behind the poll_wait call.
  * @filp:           file
- * @wait_address:   socket wait queue
  * @p:              poll_table
  *
  * See the comments in the wq_has_sleeper function.
  */
-static inline void sock_poll_wait(struct file *filp,
-		wait_queue_head_t *wait_address, poll_table *p)
+static inline void sock_poll_wait(struct file *filp, poll_table *p)
 {
-	if (!poll_does_not_wait(p) && wait_address) {
-		poll_wait(filp, wait_address, p);
+	struct socket *sock = filp->private_data;
+	wait_queue_head_t *wq = sk_sleep(sock->sk);
+
+	if (!poll_does_not_wait(p) && wq) {
+		poll_wait(filp, wq, p);
 		/* We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
diff --git a/net/atm/common.c b/net/atm/common.c
index a7a68e509628..9f8cb0d2e71e 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -653,7 +653,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct atm_vcc *vcc;
 	__poll_t mask;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 	mask = 0;
 
 	vcc = ATM_SD(sock);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index a6fb1b3bcad9..d18965f3291f 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -941,7 +941,7 @@ static __poll_t caif_poll(struct file *file,
 	__poll_t mask;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 	mask = 0;
 
 	/* exceptional events? */
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9938952c5c78..9aac0d63d53e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -837,7 +837,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	__poll_t mask;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 	mask = 0;
 
 	/* exceptional events? */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0d56e36a6db7..875858c8b059 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -325,7 +325,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
 	__poll_t mask;
 	struct sock *sk = sock->sk;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 	if (sk->sk_state == DCCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 514aaac1626f..f3bfb9f29520 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -507,7 +507,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	int state;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 
 	state = inet_sk_state_load(sk);
 	if (state == TCP_LISTEN)
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 8d1c43f8fed4..92ee91e34395 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1494,7 +1494,7 @@ __poll_t iucv_sock_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 
 	if (sk->sk_state == IUCV_LISTEN)
 		return iucv_accept_poll(sk);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ea0c0c6f1874..dd4adf8b1167 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -556,7 +556,7 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
 
 	pr_debug("%p\n", sk);
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 
 	if (sk->sk_state == LLCP_LISTEN)
 		return llcp_accept_poll(sk);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 2b463047dd7b..ac44d8afffb1 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -741,7 +741,7 @@ static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
 	__poll_t mask;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 	mask = 0;
 
 	/* the socket is readable if there are any messages waiting on the Rx
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index fce7e4751151..0fc94f296e54 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1535,7 +1535,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
 			mask |= EPOLLERR;
 	} else {
 		if (sk->sk_state != SMC_CLOSED)
-			sock_poll_wait(file, sk_sleep(sk), wait);
+			sock_poll_wait(file, wait);
 		if (sk->sk_err)
 			mask |= EPOLLERR;
 		if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3d21414ba357..3763bedecf5f 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -716,7 +716,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
 	struct tipc_sock *tsk = tipc_sk(sk);
 	__poll_t revents = 0;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e5473c03d667..1772a0e32665 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2635,7 +2635,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
 	struct sock *sk = sock->sk;
 	__poll_t mask;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 	mask = 0;
 
 	/* exceptional events? */
@@ -2672,7 +2672,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
 	unsigned int writable;
 	__poll_t mask;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
+	sock_poll_wait(file, wait);
 	mask = 0;
 
 	/* exceptional events? */
-- 
cgit v1.2.3


From d8bbd13beeaacd6494954bf5b945b54ccb2af309 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:11 +0200
Subject: net: don not detour through struct sock to find the poll waitqueue

For any open socket file descriptor sock->sk->sk_wq->wait will always
point to sock->wq->wait.  That means we can do the shorter dereference
and removal a NULL check and don't have to not worry about any RCU
protection.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 0518f61926ec..2afea5d1bdfe 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2064,10 +2064,9 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
 static inline void sock_poll_wait(struct file *filp, poll_table *p)
 {
 	struct socket *sock = filp->private_data;
-	wait_queue_head_t *wq = sk_sleep(sock->sk);
 
-	if (!poll_does_not_wait(p) && wq) {
-		poll_wait(filp, wq, p);
+	if (!poll_does_not_wait(p)) {
+		poll_wait(filp, &sock->wq->wait, p);
 		/* We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
-- 
cgit v1.2.3


From f641f13b992979b97e595b761a9ba1a64fed7c4e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:12 +0200
Subject: net: remove sock_poll_busy_loop

There is no point in hiding this logic in a helper.  Also remove the
useless events != 0 check and only busy loop once we know we actually
have a poll method.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h | 9 ---------
 net/socket.c            | 5 ++++-
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 9e36fda652b7..85777e68f738 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,15 +121,6 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 }
 
-static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events)
-{
-	if (sk_can_busy_loop(sock->sk) &&
-	    events && (events & POLL_BUSY_LOOP)) {
-		/* once, only if requested by syscall */
-		sk_busy_loop(sock->sk, 1);
-	}
-}
-
 /* if this socket can poll_ll, tell the system call */
 static inline __poll_t sock_poll_busy_flag(struct socket *sock)
 {
diff --git a/net/socket.c b/net/socket.c
index 85633622c94d..674434127b3a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1132,9 +1132,12 @@ static __poll_t sock_poll(struct file *file, poll_table *wait)
 	struct socket *sock = file->private_data;
 	__poll_t events = poll_requested_events(wait);
 
-	sock_poll_busy_loop(sock, events);
 	if (!sock->ops->poll)
 		return 0;
+
+	/* poll once if requested by the syscall */
+	if (sk_can_busy_loop(sock->sk) && (events & POLL_BUSY_LOOP))
+		sk_busy_loop(sock->sk, 1);
 	return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock);
 }
 
-- 
cgit v1.2.3


From a331de3bf0e66ab2437fc8c5b99bd3c0d9da3088 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:42:13 +0200
Subject: net: remove sock_poll_busy_flag

Fold it into the only caller to make the code simpler and easier to read.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/busy_poll.h |  6 ------
 net/socket.c            | 16 +++++++++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 85777e68f738..ba61cdd09eaa 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,12 +121,6 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
 #endif
 }
 
-/* if this socket can poll_ll, tell the system call */
-static inline __poll_t sock_poll_busy_flag(struct socket *sock)
-{
-	return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0;
-}
-
 /* used in the NIC receive handler to mark the skb */
 static inline void skb_mark_napi_id(struct sk_buff *skb,
 				    struct napi_struct *napi)
diff --git a/net/socket.c b/net/socket.c
index 674434127b3a..5b7df6695f4f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1130,15 +1130,21 @@ EXPORT_SYMBOL(sock_create_lite);
 static __poll_t sock_poll(struct file *file, poll_table *wait)
 {
 	struct socket *sock = file->private_data;
-	__poll_t events = poll_requested_events(wait);
+	__poll_t events = poll_requested_events(wait), flag = 0;
 
 	if (!sock->ops->poll)
 		return 0;
 
-	/* poll once if requested by the syscall */
-	if (sk_can_busy_loop(sock->sk) && (events & POLL_BUSY_LOOP))
-		sk_busy_loop(sock->sk, 1);
-	return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock);
+	if (sk_can_busy_loop(sock->sk)) {
+		/* poll once if requested by the syscall */
+		if (events & POLL_BUSY_LOOP)
+			sk_busy_loop(sock->sk, 1);
+
+		/* if this socket can poll_ll, tell the system call */
+		flag = POLL_BUSY_LOOP;
+	}
+
+	return sock->ops->poll(file, sock, wait) | flag;
 }
 
 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
-- 
cgit v1.2.3


From 802bfb19152c0fb4137c6ba72bcf042ee023e743 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 30 Jul 2018 14:30:42 +0200
Subject: net/sched: user-space can't set unknown tcfa_action values

Currently, when initializing an action, the user-space can specify
and use arbitrary values for the tcfa_action field. If the value
is unknown by the kernel, is implicitly threaded as TC_ACT_UNSPEC.

This change explicitly checks for unknown values at action creation
time, and explicitly convert them to TC_ACT_UNSPEC. No functional
changes are introduced, but this will allow introducing tcfa_action
values not exposed to user-space in a later patch.

Note: we can't use the above to hide TC_ACT_REDIRECT from user-space,
as the latter is already part of uAPI.

v3 -> v4:
 - use an helper to check for action validity (JiriP)
 - emit an extack for invalid actions (JiriP)
v4 -> v5:
 - keep messages on a single line, drop net_warn (Marcelo)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  6 ++++--
 net/sched/act_api.c          | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index b4512254036b..48e5b5d49a34 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -45,6 +45,7 @@ enum {
 				   * the skb and act like everything
 				   * is alright.
 				   */
+#define TC_ACT_VALUE_MAX	TC_ACT_TRAP
 
 /* There is a special kind of actions called "extended actions",
  * which need a value parameter. These have a local opcode located in
@@ -55,11 +56,12 @@ enum {
 #define __TC_ACT_EXT_SHIFT 28
 #define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
 #define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
-#define TC_ACT_EXT_CMP(combined, opcode) \
-	(((combined) & (~TC_ACT_EXT_VAL_MASK)) == opcode)
+#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
+#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
 
 #define TC_ACT_JUMP __TC_ACT_EXT(1)
 #define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
+#define TC_ACT_EXT_OPCODE_MAX	TC_ACT_GOTO_CHAIN
 
 /* Action type identifiers*/
 enum {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index b43df1e25c6d..229d63c99be2 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -786,6 +786,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 	return c;
 }
 
+static bool tcf_action_valid(int action)
+{
+	int opcode = TC_ACT_EXT_OPCODE(action);
+
+	if (!opcode)
+		return action <= TC_ACT_VALUE_MAX;
+	return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
+}
+
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
@@ -895,6 +904,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 		}
 	}
 
+	if (!tcf_action_valid(a->tcfa_action)) {
+		NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
+		a->tcfa_action = TC_ACT_UNSPEC;
+	}
+
 	return a;
 
 err_mod:
-- 
cgit v1.2.3


From 7fd4b288ea6a3e45ad8afbcd5ec39554d57f1ae0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 30 Jul 2018 14:30:43 +0200
Subject: tc/act: remove unneeded RCU lock in action callback

Each lockless action currently does its own RCU locking in ->act().
This allows using plain RCU accessor, even if the context
is really RCU BH.

This change drops the per action RCU lock, replace the accessors
with the _bh variant, cleans up a bit the surrounding code and
documents the RCU status in the relevant header.
No functional nor performance change is intended.

The goal of this patch is clarifying that the RCU critical section
used by the tc actions extends up to the classifier's caller.

v1 -> v2:
 - preserve rcu lock in act_bpf: it's needed by eBPF helpers,
   as pointed out by Daniel

v3 -> v4:
 - fixed some typos in the commit message (JiriP)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  2 +-
 include/net/sch_generic.h  |  2 ++
 net/sched/act_csum.c       | 12 +++---------
 net/sched/act_ife.c        |  5 +----
 net/sched/act_mirred.c     |  4 +---
 net/sched/act_sample.c     |  4 +---
 net/sched/act_skbedit.c    | 10 +++-------
 net/sched/act_skbmod.c     | 21 +++++++++------------
 net/sched/act_tunnel_key.c |  6 +-----
 net/sched/act_vlan.c       | 19 +++++++------------
 10 files changed, 29 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 683ce41053d9..8c9bc02d05e1 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -85,7 +85,7 @@ struct tc_action_ops {
 	size_t	size;
 	struct module		*owner;
 	int     (*act)(struct sk_buff *, const struct tc_action *,
-		       struct tcf_result *);
+		       struct tcf_result *); /* called under RCU BH lock*/
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
 	int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c5432362dc26..bcae181c1857 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -285,6 +285,8 @@ struct tcf_proto {
 	/* Fast access part */
 	struct tcf_proto __rcu	*next;
 	void __rcu		*root;
+
+	/* called under RCU BH lock*/
 	int			(*classify)(struct sk_buff *,
 					    const struct tcf_proto *,
 					    struct tcf_result *);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 4e8c383f379e..648a3a35b720 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -561,15 +561,14 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
 	u32 update_flags;
 	int action;
 
-	rcu_read_lock();
-	params = rcu_dereference(p->params);
+	params = rcu_dereference_bh(p->params);
 
 	tcf_lastuse_update(&p->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
 
 	action = READ_ONCE(p->tcf_action);
 	if (unlikely(action == TC_ACT_SHOT))
-		goto drop_stats;
+		goto drop;
 
 	update_flags = params->update_flags;
 	switch (tc_skb_protocol(skb)) {
@@ -583,16 +582,11 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
 		break;
 	}
 
-unlock:
-	rcu_read_unlock();
 	return action;
 
 drop:
-	action = TC_ACT_SHOT;
-
-drop_stats:
 	qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
-	goto unlock;
+	return TC_ACT_SHOT;
 }
 
 static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3d6e265758c0..df4060e32d43 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -820,14 +820,11 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
 	struct tcf_ife_params *p;
 	int ret;
 
-	rcu_read_lock();
-	p = rcu_dereference(ife->params);
+	p = rcu_dereference_bh(ife->params);
 	if (p->flags & IFE_ENCODE) {
 		ret = tcf_ife_encode(skb, a, res, p);
-		rcu_read_unlock();
 		return ret;
 	}
-	rcu_read_unlock();
 
 	return tcf_ife_decode(skb, a, res);
 }
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6afd89a36c69..eeb335f03102 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -181,11 +181,10 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 	tcf_lastuse_update(&m->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
 
-	rcu_read_lock();
 	m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
 	m_eaction = READ_ONCE(m->tcfm_eaction);
 	retval = READ_ONCE(m->tcf_action);
-	dev = rcu_dereference(m->tcfm_dev);
+	dev = rcu_dereference_bh(m->tcfm_dev);
 	if (unlikely(!dev)) {
 		pr_notice_once("tc mirred: target device is gone\n");
 		goto out;
@@ -236,7 +235,6 @@ out:
 		if (tcf_mirred_is_act_redirect(m_eaction))
 			retval = TC_ACT_SHOT;
 	}
-	rcu_read_unlock();
 
 	return retval;
 }
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 3079e7be5bde..2608ccc83e5e 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -140,8 +140,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
 	bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
 	retval = READ_ONCE(s->tcf_action);
 
-	rcu_read_lock();
-	psample_group = rcu_dereference(s->psample_group);
+	psample_group = rcu_dereference_bh(s->psample_group);
 
 	/* randomly sample packets according to rate */
 	if (psample_group && (prandom_u32() % s->rate == 0)) {
@@ -165,7 +164,6 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
 			skb_pull(skb, skb->mac_len);
 	}
 
-	rcu_read_unlock();
 	return retval;
 }
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index da56e6938c9e..a6db47ebec11 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -43,8 +43,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 	tcf_lastuse_update(&d->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
 
-	rcu_read_lock();
-	params = rcu_dereference(d->params);
+	params = rcu_dereference_bh(d->params);
 	action = READ_ONCE(d->tcf_action);
 
 	if (params->flags & SKBEDIT_F_PRIORITY)
@@ -77,14 +76,11 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 	}
 	if (params->flags & SKBEDIT_F_PTYPE)
 		skb->pkt_type = params->ptype;
-
-unlock:
-	rcu_read_unlock();
 	return action;
+
 err:
 	qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
-	action = TC_ACT_SHOT;
-	goto unlock;
+	return TC_ACT_SHOT;
 }
 
 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index cdc6bacfb190..c437c6d51a71 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -41,20 +41,14 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
 	 * then MAX_EDIT_LEN needs to change appropriately
 	*/
 	err = skb_ensure_writable(skb, MAX_EDIT_LEN);
-	if (unlikely(err)) { /* best policy is to drop on the floor */
-		qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
-		return TC_ACT_SHOT;
-	}
+	if (unlikely(err)) /* best policy is to drop on the floor */
+		goto drop;
 
-	rcu_read_lock();
 	action = READ_ONCE(d->tcf_action);
-	if (unlikely(action == TC_ACT_SHOT)) {
-		qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
-		rcu_read_unlock();
-		return action;
-	}
+	if (unlikely(action == TC_ACT_SHOT))
+		goto drop;
 
-	p = rcu_dereference(d->skbmod_p);
+	p = rcu_dereference_bh(d->skbmod_p);
 	flags = p->flags;
 	if (flags & SKBMOD_F_DMAC)
 		ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
@@ -62,7 +56,6 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
 		ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
 	if (flags & SKBMOD_F_ETYPE)
 		eth_hdr(skb)->h_proto = p->eth_type;
-	rcu_read_unlock();
 
 	if (flags & SKBMOD_F_SWAPMAC) {
 		u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
@@ -73,6 +66,10 @@ static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	return action;
+
+drop:
+	qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+	return TC_ACT_SHOT;
 }
 
 static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index f811850fd1d0..d42d9e112789 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -31,9 +31,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
 	struct tcf_tunnel_key_params *params;
 	int action;
 
-	rcu_read_lock();
-
-	params = rcu_dereference(t->params);
+	params = rcu_dereference_bh(t->params);
 
 	tcf_lastuse_update(&t->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
@@ -53,8 +51,6 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
 		break;
 	}
 
-	rcu_read_unlock();
-
 	return action;
 }
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ad37f308175a..15a0ee214c9c 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -40,11 +40,9 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 	if (skb_at_tc_ingress(skb))
 		skb_push_rcsum(skb, skb->mac_len);
 
-	rcu_read_lock();
-
 	action = READ_ONCE(v->tcf_action);
 
-	p = rcu_dereference(v->vlan_p);
+	p = rcu_dereference_bh(v->vlan_p);
 
 	switch (p->tcfv_action) {
 	case TCA_VLAN_ACT_POP:
@@ -61,7 +59,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 	case TCA_VLAN_ACT_MODIFY:
 		/* No-op if no vlan tag (either hw-accel or in-payload) */
 		if (!skb_vlan_tagged(skb))
-			goto unlock;
+			goto out;
 		/* extract existing tag (and guarantee no hw-accel tag) */
 		if (skb_vlan_tag_present(skb)) {
 			tci = skb_vlan_tag_get(skb);
@@ -86,18 +84,15 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
 		BUG();
 	}
 
-	goto unlock;
-
-drop:
-	action = TC_ACT_SHOT;
-	qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
-
-unlock:
-	rcu_read_unlock();
+out:
 	if (skb_at_tc_ingress(skb))
 		skb_pull_rcsum(skb, skb->mac_len);
 
 	return action;
+
+drop:
+	qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats));
+	return TC_ACT_SHOT;
 }
 
 static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
-- 
cgit v1.2.3


From cd11b164073b719203318227918f9510809d5e10 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 30 Jul 2018 14:30:44 +0200
Subject: net/tc: introduce TC_ACT_REINSERT.

This is similar TC_ACT_REDIRECT, but with a slightly different
semantic:
- on ingress the mirred skbs are passed to the target device
network stack without any additional check not scrubbing.
- the rcu-protected stats provided via the tcf_result struct
  are updated on error conditions.

This new tcfa_action value is not exposed to the user-space
and can be used only internally by clsact.

v1 -> v2: do not touch TC_ACT_REDIRECT code path, introduce
 a new action type instead
v2 -> v3:
 - rename the new action value TC_ACT_REINJECT, update the
   helper accordingly
 - take care of uncloned reinjected packets in XDP generic
   hook
v3 -> v4:
 - renamed again the new action value (JiriP)
v4 -> v5:
 - fix build error with !NET_CLS_ACT (kbuild bot)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     |  3 +++
 include/net/sch_generic.h | 28 ++++++++++++++++++++++++++++
 net/core/dev.c            |  6 +++++-
 3 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6d02f31abba8..22bfc3a13c25 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -7,6 +7,9 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
+/* TC action not accessible from user space */
+#define TC_ACT_REINSERT		(TC_ACT_VALUE_MAX + 1)
+
 /* Basic packet classifier frontend definitions. */
 
 struct tcf_walker {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bcae181c1857..a6d00093f35e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -235,6 +235,12 @@ struct tcf_result {
 			u32		classid;
 		};
 		const struct tcf_proto *goto_tp;
+
+		/* used by the TC_ACT_REINSERT action */
+		struct {
+			bool		ingress;
+			struct gnet_stats_queue *qstats;
+		};
 	};
 };
 
@@ -569,6 +575,15 @@ static inline void skb_reset_tc(struct sk_buff *skb)
 #endif
 }
 
+static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	return skb->tc_redirected;
+#else
+	return false;
+#endif
+}
+
 static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1108,4 +1123,17 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
 			  struct mini_Qdisc __rcu **p_miniq);
 
+static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
+{
+	struct gnet_stats_queue *stats = res->qstats;
+	int ret;
+
+	if (res->ingress)
+		ret = netif_receive_skb(skb);
+	else
+		ret = dev_queue_xmit(skb);
+	if (ret && stats)
+		qstats_overlimit_inc(res->qstats);
+}
+
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 89031b5fef9f..38b0c414d780 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4252,7 +4252,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	/* Reinjected packets coming from act_mirred or similar should
 	 * not get XDP generic processing.
 	 */
-	if (skb_cloned(skb))
+	if (skb_cloned(skb) || skb_is_tc_redirected(skb))
 		return XDP_PASS;
 
 	/* XDP packets must be linear and must have sufficient headroom
@@ -4602,6 +4602,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
 		__skb_push(skb, skb->mac_len);
 		skb_do_redirect(skb);
 		return NULL;
+	case TC_ACT_REINSERT:
+		/* this does not scrub the packet, and updates stats on error */
+		skb_tc_reinsert(skb, &cl_res);
+		return NULL;
 	default:
 		break;
 	}
-- 
cgit v1.2.3


From 82b98ca566ca2af170eb0ab50cef09dd7335fa55 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Thu, 5 Jul 2018 16:48:50 +0000
Subject: net/sunrpc: Make rpc_auth_create_args a const

This turns rpc_auth_create_args into a const as it gets passed through the
auth stack.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h    | 5 +++--
 net/sunrpc/auth.c              | 2 +-
 net/sunrpc/auth_gss/auth_gss.c | 9 +++++----
 net/sunrpc/auth_null.c         | 2 +-
 net/sunrpc/auth_unix.c         | 2 +-
 5 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index d9af474a857d..58a6765c1c5e 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -125,7 +125,8 @@ struct rpc_authops {
 	struct module		*owner;
 	rpc_authflavor_t	au_flavor;	/* flavor (RPC_AUTH_*) */
 	char *			au_name;
-	struct rpc_auth *	(*create)(struct rpc_auth_create_args *, struct rpc_clnt *);
+	struct rpc_auth *	(*create)(const struct rpc_auth_create_args *,
+					  struct rpc_clnt *);
 	void			(*destroy)(struct rpc_auth *);
 
 	int			(*hash_cred)(struct auth_cred *, unsigned int);
@@ -174,7 +175,7 @@ struct rpc_cred *	rpc_lookup_generic_cred(struct auth_cred *, int, gfp_t);
 struct rpc_cred *	rpc_lookup_machine_cred(const char *service_name);
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
-struct rpc_auth *	rpcauth_create(struct rpc_auth_create_args *,
+struct rpc_auth *	rpcauth_create(const struct rpc_auth_create_args *,
 				struct rpc_clnt *);
 void			rpcauth_release(struct rpc_auth *);
 rpc_authflavor_t	rpcauth_get_pseudoflavor(rpc_authflavor_t,
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index d2623b9f23d6..661e2277f468 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -253,7 +253,7 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
 EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
 
 struct rpc_auth *
-rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	struct rpc_auth		*auth;
 	const struct rpc_authops *ops;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index be8f103d22fd..21a19a9f0e33 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -985,7 +985,7 @@ static void gss_pipe_free(struct gss_pipe *p)
  * parameters based on the input flavor (which must be a pseudoflavor)
  */
 static struct gss_auth *
-gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	rpc_authflavor_t flavor = args->pseudoflavor;
 	struct gss_auth *gss_auth;
@@ -1132,7 +1132,7 @@ gss_destroy(struct rpc_auth *auth)
  * (which is guaranteed to last as long as any of its descendants).
  */
 static struct gss_auth *
-gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
+gss_auth_find_or_add_hashed(const struct rpc_auth_create_args *args,
 		struct rpc_clnt *clnt,
 		struct gss_auth *new)
 {
@@ -1169,7 +1169,8 @@ out:
 }
 
 static struct gss_auth *
-gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+gss_create_hashed(const struct rpc_auth_create_args *args,
+		  struct rpc_clnt *clnt)
 {
 	struct gss_auth *gss_auth;
 	struct gss_auth *new;
@@ -1188,7 +1189,7 @@ out:
 }
 
 static struct rpc_auth *
-gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	struct gss_auth *gss_auth;
 	struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 75d72e109a04..4b48228ee8c7 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -19,7 +19,7 @@ static struct rpc_auth null_auth;
 static struct rpc_cred null_cred;
 
 static struct rpc_auth *
-nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+nul_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	atomic_inc(&null_auth.au_count);
 	return &null_auth;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index dafd6b870ba3..185e56d4f9ae 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -30,7 +30,7 @@ static struct rpc_auth		unix_auth;
 static const struct rpc_credops	unix_credops;
 
 static struct rpc_auth *
-unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	dprintk("RPC:       creating UNIX authenticator for client %p\n",
 			clnt);
-- 
cgit v1.2.3


From b41e44b4cb230747d7ad56e38c9dc65369b1d381 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 30 May 2018 11:07:03 -0400
Subject: media: dvb/video.h: get rid of unused APIs

There are a number of other ioctls that aren't used anywhere
inside the Kernel tree.

Get rid of them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../media/uapi/dvb/video-get-frame-rate.rst        |  61 -----------
 Documentation/media/uapi/dvb/video-get-navi.rst    |  84 ---------------
 .../media/uapi/dvb/video-set-attributes.rst        |  93 -----------------
 .../media/uapi/dvb/video-set-highlight.rst         |  86 ----------------
 Documentation/media/uapi/dvb/video-set-id.rst      |  75 --------------
 Documentation/media/uapi/dvb/video-set-spu.rst     |  85 ----------------
 Documentation/media/uapi/dvb/video-set-system.rst  |  77 --------------
 .../media/uapi/dvb/video_function_calls.rst        |   6 --
 Documentation/media/uapi/dvb/video_types.rst       | 113 ---------------------
 Documentation/media/video.h.rst.exceptions         |   2 -
 fs/compat_ioctl.c                                  |   7 --
 include/uapi/linux/dvb/video.h                     |  51 ----------
 12 files changed, 740 deletions(-)
 delete mode 100644 Documentation/media/uapi/dvb/video-get-frame-rate.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-get-navi.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-attributes.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-highlight.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-id.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-spu.rst
 delete mode 100644 Documentation/media/uapi/dvb/video-set-system.rst

(limited to 'include')

diff --git a/Documentation/media/uapi/dvb/video-get-frame-rate.rst b/Documentation/media/uapi/dvb/video-get-frame-rate.rst
deleted file mode 100644
index 400042a854cf..000000000000
--- a/Documentation/media/uapi/dvb/video-get-frame-rate.rst
+++ /dev/null
@@ -1,61 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_GET_FRAME_RATE:
-
-====================
-VIDEO_GET_FRAME_RATE
-====================
-
-Name
-----
-
-VIDEO_GET_FRAME_RATE
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(int fd, VIDEO_GET_FRAME_RATE, unsigned int *rate)
-    :name: VIDEO_GET_FRAME_RATE
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_GET_FRAME_RATE for this command.
-
-    -  .. row 3
-
-       -  unsigned int \*rate
-
-       -  Returns the framerate in number of frames per 1000 seconds.
-
-
-Description
------------
-
-This ioctl call asks the Video Device to return the current framerate.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/media/uapi/dvb/video-get-navi.rst b/Documentation/media/uapi/dvb/video-get-navi.rst
deleted file mode 100644
index 114a9ac48b9e..000000000000
--- a/Documentation/media/uapi/dvb/video-get-navi.rst
+++ /dev/null
@@ -1,84 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_GET_NAVI:
-
-==============
-VIDEO_GET_NAVI
-==============
-
-Name
-----
-
-VIDEO_GET_NAVI
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_GET_NAVI , struct video_navi_pack *navipack)
-    :name: VIDEO_GET_NAVI
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_GET_NAVI for this command.
-
-    -  .. row 3
-
-       -  video_navi_pack_t \*navipack
-
-       -  PCI or DSI pack (private stream 2) according to section ??.
-
-
-Description
------------
-
-This ioctl returns navigational information from the DVD stream. This is
-especially needed if an encoded stream has to be decoded by the
-hardware.
-
-.. c:type:: video_navi_pack
-
-.. code-block::c
-
-	typedef struct video_navi_pack {
-		int length;          /* 0 ... 1024 */
-		__u8 data[1024];
-	} video_navi_pack_t;
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EFAULT``
-
-       -  driver is not able to return navigational information
diff --git a/Documentation/media/uapi/dvb/video-set-attributes.rst b/Documentation/media/uapi/dvb/video-set-attributes.rst
deleted file mode 100644
index b2f11a6746e9..000000000000
--- a/Documentation/media/uapi/dvb/video-set-attributes.rst
+++ /dev/null
@@ -1,93 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_ATTRIBUTES:
-
-====================
-VIDEO_SET_ATTRIBUTES
-====================
-
-Name
-----
-
-VIDEO_SET_ATTRIBUTES
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_ATTRIBUTE ,video_attributes_t vattr)
-    :name: VIDEO_SET_ATTRIBUTE
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_ATTRIBUTE for this command.
-
-    -  .. row 3
-
-       -  video_attributes_t vattr
-
-       -  video attributes according to section ??.
-
-
-Description
------------
-
-This ioctl is intended for DVD playback and allows you to set certain
-information about the stream. Some hardware may not need this
-information, but the call also tells the hardware to prepare for DVD
-playback.
-
-.. c:type:: video_attributes_t
-
-.. code-block::c
-
-	typedef __u16 video_attributes_t;
-	/*   bits: descr. */
-	/*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
-	/*   13-12 TV system (0=525/60, 1=625/50) */
-	/*   11-10 Aspect ratio (0=4:3, 3=16:9) */
-	/*    9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca */
-	/*    7    line 21-1 data present in GOP (1=yes, 0=no) */
-	/*    6    line 21-2 data present in GOP (1=yes, 0=no) */
-	/*    5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */
-	/*    2    source letterboxed (1=yes, 0=no) */
-	/*    0    film/camera mode (0=camera, 1=film (625/50 only)) */
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  input is not a valid attribute setting.
diff --git a/Documentation/media/uapi/dvb/video-set-highlight.rst b/Documentation/media/uapi/dvb/video-set-highlight.rst
deleted file mode 100644
index 90aeafd923b7..000000000000
--- a/Documentation/media/uapi/dvb/video-set-highlight.rst
+++ /dev/null
@@ -1,86 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_HIGHLIGHT:
-
-===================
-VIDEO_SET_HIGHLIGHT
-===================
-
-Name
-----
-
-VIDEO_SET_HIGHLIGHT
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_HIGHLIGHT, struct video_highlight *vhilite)
-    :name: VIDEO_SET_HIGHLIGHT
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_HIGHLIGHT for this command.
-
-    -  .. row 3
-
-       -  video_highlight_t \*vhilite
-
-       -  SPU Highlight information according to section ??.
-
-
-Description
------------
-
-This ioctl sets the SPU highlight information for the menu access of a
-DVD.
-
-.. c:type:: video_highlight
-
-.. code-block:: c
-
-	typedef
-	struct video_highlight {
-		int     active;      /*    1=show highlight, 0=hide highlight */
-		__u8    contrast1;   /*    7- 4  Pattern pixel contrast */
-				/*    3- 0  Background pixel contrast */
-		__u8    contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
-				/*    3- 0  Emphasis pixel-1 contrast */
-		__u8    color1;      /*    7- 4  Pattern pixel color */
-				/*    3- 0  Background pixel color */
-		__u8    color2;      /*    7- 4  Emphasis pixel-2 color */
-				/*    3- 0  Emphasis pixel-1 color */
-		__u32    ypos;       /*   23-22  auto action mode */
-				/*   21-12  start y */
-				/*    9- 0  end y */
-		__u32    xpos;       /*   23-22  button color number */
-				/*   21-12  start x */
-				/*    9- 0  end x */
-	} video_highlight_t;
-
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/media/uapi/dvb/video-set-id.rst b/Documentation/media/uapi/dvb/video-set-id.rst
deleted file mode 100644
index 18f66875ae3f..000000000000
--- a/Documentation/media/uapi/dvb/video-set-id.rst
+++ /dev/null
@@ -1,75 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_ID:
-
-============
-VIDEO_SET_ID
-============
-
-Name
-----
-
-VIDEO_SET_ID
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(int fd, VIDEO_SET_ID, int id)
-    :name: VIDEO_SET_ID
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_ID for this command.
-
-    -  .. row 3
-
-       -  int id
-
-       -  video sub-stream id
-
-
-Description
------------
-
-This ioctl selects which sub-stream is to be decoded if a program or
-system stream is sent to the video device.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  Invalid sub-stream id.
diff --git a/Documentation/media/uapi/dvb/video-set-spu.rst b/Documentation/media/uapi/dvb/video-set-spu.rst
deleted file mode 100644
index 739e5e7bd133..000000000000
--- a/Documentation/media/uapi/dvb/video-set-spu.rst
+++ /dev/null
@@ -1,85 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_SPU:
-
-=============
-VIDEO_SET_SPU
-=============
-
-Name
-----
-
-VIDEO_SET_SPU
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_SPU , struct video_spu *spu)
-    :name: VIDEO_SET_SPU
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_SPU for this command.
-
-    -  .. row 3
-
-       -  video_spu_t \*spu
-
-       -  SPU decoding (de)activation and subid setting according to section
-	  ??.
-
-
-Description
------------
-
-This ioctl activates or deactivates SPU decoding in a DVD input stream.
-It can only be used, if the driver is able to handle a DVD stream.
-
-.. c:type:: struct video_spu
-
-.. code-block:: c
-
-	typedef struct video_spu {
-		int active;
-		int stream_id;
-	} video_spu_t;
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  input is not a valid spu setting or driver cannot handle SPU.
diff --git a/Documentation/media/uapi/dvb/video-set-system.rst b/Documentation/media/uapi/dvb/video-set-system.rst
deleted file mode 100644
index e39cbe080ef7..000000000000
--- a/Documentation/media/uapi/dvb/video-set-system.rst
+++ /dev/null
@@ -1,77 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _VIDEO_SET_SYSTEM:
-
-================
-VIDEO_SET_SYSTEM
-================
-
-Name
-----
-
-VIDEO_SET_SYSTEM
-
-.. attention:: This ioctl is deprecated.
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, VIDEO_SET_SYSTEM , video_system_t system)
-    :name: VIDEO_SET_SYSTEM
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -  .. row 2
-
-       -  int request
-
-       -  Equals VIDEO_SET_FORMAT for this command.
-
-    -  .. row 3
-
-       -  video_system_t system
-
-       -  video system of TV output.
-
-
-Description
------------
-
-This ioctl sets the television output format. The format (see section
-??) may vary from the color format of the displayed MPEG stream. If the
-hardware is not able to display the requested format the call will
-return an error.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  system is not a valid or supported video system.
diff --git a/Documentation/media/uapi/dvb/video_function_calls.rst b/Documentation/media/uapi/dvb/video_function_calls.rst
index 8d8383ffaeba..3f4f6c9ffad7 100644
--- a/Documentation/media/uapi/dvb/video_function_calls.rst
+++ b/Documentation/media/uapi/dvb/video_function_calls.rst
@@ -21,7 +21,6 @@ Video Function Calls
     video-get-status
     video-get-frame-count
     video-get-pts
-    video-get-frame-rate
     video-get-event
     video-command
     video-try-command
@@ -31,12 +30,7 @@ Video Function Calls
     video-fast-forward
     video-slowmotion
     video-get-capabilities
-    video-set-id
     video-clear-buffer
     video-set-streamtype
     video-set-format
-    video-set-system
-    video-set-highlight
-    video-set-spu
-    video-get-navi
     video-set-attributes
diff --git a/Documentation/media/uapi/dvb/video_types.rst b/Documentation/media/uapi/dvb/video_types.rst
index 4cfa00e5c934..a0942171596c 100644
--- a/Documentation/media/uapi/dvb/video_types.rst
+++ b/Documentation/media/uapi/dvb/video_types.rst
@@ -246,116 +246,3 @@ following bits set according to the hardwares capabilities.
      #define VIDEO_CAP_SPU    16
      #define VIDEO_CAP_NAVI   32
      #define VIDEO_CAP_CSS    64
-
-
-.. _video-system:
-
-video_system_t
-==============
-
-A call to VIDEO_SET_SYSTEM sets the desired video system for TV
-output. The following system types can be set:
-
-
-.. code-block:: c
-
-    typedef enum {
-	 VIDEO_SYSTEM_PAL,
-	 VIDEO_SYSTEM_NTSC,
-	 VIDEO_SYSTEM_PALN,
-	 VIDEO_SYSTEM_PALNc,
-	 VIDEO_SYSTEM_PALM,
-	 VIDEO_SYSTEM_NTSC60,
-	 VIDEO_SYSTEM_PAL60,
-	 VIDEO_SYSTEM_PALM60
-    } video_system_t;
-
-
-.. c:type:: video_highlight
-
-struct video_highlight
-======================
-
-Calling the ioctl VIDEO_SET_HIGHLIGHTS posts the SPU highlight
-information. The call expects the following format for that information:
-
-
-.. code-block:: c
-
-     typedef
-     struct video_highlight {
-	 boolean active;      /*    1=show highlight, 0=hide highlight */
-	 uint8_t contrast1;   /*    7- 4  Pattern pixel contrast */
-		      /*    3- 0  Background pixel contrast */
-	 uint8_t contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
-		      /*    3- 0  Emphasis pixel-1 contrast */
-	 uint8_t color1;      /*    7- 4  Pattern pixel color */
-		      /*    3- 0  Background pixel color */
-	 uint8_t color2;      /*    7- 4  Emphasis pixel-2 color */
-		      /*    3- 0  Emphasis pixel-1 color */
-	 uint32_t ypos;       /*   23-22  auto action mode */
-		      /*   21-12  start y */
-		      /*    9- 0  end y */
-	 uint32_t xpos;       /*   23-22  button color number */
-		      /*   21-12  start x */
-		      /*    9- 0  end x */
-     } video_highlight_t;
-
-
-.. c:type:: video_spu
-
-struct video_spu
-================
-
-Calling VIDEO_SET_SPU deactivates or activates SPU decoding, according
-to the following format:
-
-
-.. code-block:: c
-
-     typedef
-     struct video_spu {
-	 boolean active;
-	 int stream_id;
-     } video_spu_t;
-
-
-.. c:type:: video_navi_pack
-
-struct video_navi_pack
-======================
-
-In order to get the navigational data the following structure has to be
-passed to the ioctl VIDEO_GET_NAVI:
-
-
-.. code-block:: c
-
-     typedef
-     struct video_navi_pack {
-	 int length;         /* 0 ... 1024 */
-	 uint8_t data[1024];
-     } video_navi_pack_t;
-
-
-.. _video-attributes-t:
-
-video_attributes_t
-==================
-
-The following attributes can be set by a call to VIDEO_SET_ATTRIBUTES:
-
-
-.. code-block:: c
-
-     typedef uint16_t video_attributes_t;
-     /*   bits: descr. */
-     /*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
-     /*   13-12 TV system (0=525/60, 1=625/50) */
-     /*   11-10 Aspect ratio (0=4:3, 3=16:9) */
-     /*    9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca */
-     /*    7    line 21-1 data present in GOP (1=yes, 0=no) */
-     /*    6    line 21-2 data present in GOP (1=yes, 0=no) */
-     /*    5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 */
-     /*    2    source letterboxed (1=yes, 0=no) */
-     /*    0    film/camera mode (0=camera, 1=film (625/50 only)) */
diff --git a/Documentation/media/video.h.rst.exceptions b/Documentation/media/video.h.rst.exceptions
index 89d7c3ef2da7..371cdbd7d062 100644
--- a/Documentation/media/video.h.rst.exceptions
+++ b/Documentation/media/video.h.rst.exceptions
@@ -34,6 +34,4 @@ replace typedef video_displayformat_t :c:type:`video_displayformat`
 replace typedef video_size_t :c:type:`video_size`
 replace typedef video_stream_source_t :c:type:`video_stream_source`
 replace typedef video_play_state_t :c:type:`video_play_state`
-replace typedef video_highlight_t :c:type:`video_highlight`
-replace typedef video_spu_t :c:type:`video_spu`
 replace typedef video_navi_pack_t :c:type:`video_navi_pack`
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index fdb5ef9b5d06..59216b172003 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1205,16 +1205,9 @@ COMPATIBLE_IOCTL(VIDEO_FAST_FORWARD)
 COMPATIBLE_IOCTL(VIDEO_SLOWMOTION)
 COMPATIBLE_IOCTL(VIDEO_GET_CAPABILITIES)
 COMPATIBLE_IOCTL(VIDEO_CLEAR_BUFFER)
-COMPATIBLE_IOCTL(VIDEO_SET_ID)
 COMPATIBLE_IOCTL(VIDEO_SET_STREAMTYPE)
 COMPATIBLE_IOCTL(VIDEO_SET_FORMAT)
-COMPATIBLE_IOCTL(VIDEO_SET_SYSTEM)
-COMPATIBLE_IOCTL(VIDEO_SET_HIGHLIGHT)
-COMPATIBLE_IOCTL(VIDEO_SET_SPU)
-COMPATIBLE_IOCTL(VIDEO_GET_NAVI)
-COMPATIBLE_IOCTL(VIDEO_SET_ATTRIBUTES)
 COMPATIBLE_IOCTL(VIDEO_GET_SIZE)
-COMPATIBLE_IOCTL(VIDEO_GET_FRAME_RATE)
 /* cec */
 COMPATIBLE_IOCTL(CEC_ADAP_G_CAPS)
 COMPATIBLE_IOCTL(CEC_ADAP_G_LOG_ADDRS)
diff --git a/include/uapi/linux/dvb/video.h b/include/uapi/linux/dvb/video.h
index 6a0c9757b7ba..43ba8b0a3d14 100644
--- a/include/uapi/linux/dvb/video.h
+++ b/include/uapi/linux/dvb/video.h
@@ -37,18 +37,6 @@ typedef enum {
 } video_format_t;
 
 
-typedef enum {
-	 VIDEO_SYSTEM_PAL,
-	 VIDEO_SYSTEM_NTSC,
-	 VIDEO_SYSTEM_PALN,
-	 VIDEO_SYSTEM_PALNc,
-	 VIDEO_SYSTEM_PALM,
-	 VIDEO_SYSTEM_NTSC60,
-	 VIDEO_SYSTEM_PAL60,
-	 VIDEO_SYSTEM_PALM60
-} video_system_t;
-
-
 typedef enum {
 	VIDEO_PAN_SCAN,       /* use pan and scan format */
 	VIDEO_LETTER_BOX,     /* use letterbox format */
@@ -160,38 +148,6 @@ struct video_still_picture {
 };
 
 
-typedef
-struct video_highlight {
-	int     active;      /*    1=show highlight, 0=hide highlight */
-	__u8    contrast1;   /*    7- 4  Pattern pixel contrast */
-			     /*    3- 0  Background pixel contrast */
-	__u8    contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
-			     /*    3- 0  Emphasis pixel-1 contrast */
-	__u8    color1;      /*    7- 4  Pattern pixel color */
-			     /*    3- 0  Background pixel color */
-	__u8    color2;      /*    7- 4  Emphasis pixel-2 color */
-			     /*    3- 0  Emphasis pixel-1 color */
-	__u32    ypos;       /*   23-22  auto action mode */
-			     /*   21-12  start y */
-			     /*    9- 0  end y */
-	__u32    xpos;       /*   23-22  button color number */
-			     /*   21-12  start x */
-			     /*    9- 0  end x */
-} video_highlight_t;
-
-
-typedef struct video_spu {
-	int active;
-	int stream_id;
-} video_spu_t;
-
-
-typedef struct video_navi_pack {
-	int length;          /* 0 ... 1024 */
-	__u8 data[1024];
-} video_navi_pack_t;
-
-
 typedef __u16 video_attributes_t;
 /*   bits: descr. */
 /*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
@@ -236,16 +192,9 @@ typedef __u16 video_attributes_t;
 #define VIDEO_SLOWMOTION           _IO('o', 32)
 #define VIDEO_GET_CAPABILITIES     _IOR('o', 33, unsigned int)
 #define VIDEO_CLEAR_BUFFER         _IO('o',  34)
-#define VIDEO_SET_ID               _IO('o', 35)
 #define VIDEO_SET_STREAMTYPE       _IO('o', 36)
 #define VIDEO_SET_FORMAT           _IO('o', 37)
-#define VIDEO_SET_SYSTEM           _IO('o', 38)
-#define VIDEO_SET_HIGHLIGHT        _IOW('o', 39, video_highlight_t)
-#define VIDEO_SET_SPU              _IOW('o', 50, video_spu_t)
-#define VIDEO_GET_NAVI             _IOR('o', 52, video_navi_pack_t)
-#define VIDEO_SET_ATTRIBUTES       _IO('o', 53)
 #define VIDEO_GET_SIZE             _IOR('o', 55, video_size_t)
-#define VIDEO_GET_FRAME_RATE       _IOR('o', 56, unsigned int)
 
 /**
  * VIDEO_GET_PTS
-- 
cgit v1.2.3


From d21c249b26311dd193b100e65fc9e7ae96233d40 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 30 May 2018 11:07:04 -0400
Subject: media: dvb/audio.h: get rid of unused APIs

There are a number of other ioctls that aren't used anywhere
inside the Kernel tree.

Get rid of them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/audio.h.rst.exceptions         |  3 -
 Documentation/media/uapi/dvb/audio-get-pts.rst     | 65 ---------------------
 .../media/uapi/dvb/audio-set-attributes.rst        | 67 ----------------------
 Documentation/media/uapi/dvb/audio-set-ext-id.rst  | 66 ---------------------
 Documentation/media/uapi/dvb/audio-set-karaoke.rst | 66 ---------------------
 Documentation/media/uapi/dvb/audio_data_types.rst  | 37 ------------
 .../media/uapi/dvb/audio_function_calls.rst        |  4 --
 fs/compat_ioctl.c                                  |  3 -
 include/uapi/linux/dvb/audio.h                     | 37 ------------
 9 files changed, 348 deletions(-)
 delete mode 100644 Documentation/media/uapi/dvb/audio-get-pts.rst
 delete mode 100644 Documentation/media/uapi/dvb/audio-set-attributes.rst
 delete mode 100644 Documentation/media/uapi/dvb/audio-set-ext-id.rst
 delete mode 100644 Documentation/media/uapi/dvb/audio-set-karaoke.rst

(limited to 'include')

diff --git a/Documentation/media/audio.h.rst.exceptions b/Documentation/media/audio.h.rst.exceptions
index f40f3cbfe4c9..940458774cf6 100644
--- a/Documentation/media/audio.h.rst.exceptions
+++ b/Documentation/media/audio.h.rst.exceptions
@@ -1,9 +1,6 @@
 # Ignore header name
 ignore define _DVBAUDIO_H_
 
-# Typedef pointing to structs
-replace typedef audio_karaoke_t :c:type:`audio_karaoke`
-
 # Undocumented audio caps, as this is a deprecated API anyway
 ignore define AUDIO_CAP_DTS
 ignore define AUDIO_CAP_LPCM
diff --git a/Documentation/media/uapi/dvb/audio-get-pts.rst b/Documentation/media/uapi/dvb/audio-get-pts.rst
deleted file mode 100644
index 2d1396b003de..000000000000
--- a/Documentation/media/uapi/dvb/audio-get-pts.rst
+++ /dev/null
@@ -1,65 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _AUDIO_GET_PTS:
-
-=============
-AUDIO_GET_PTS
-=============
-
-Name
-----
-
-AUDIO_GET_PTS
-
-.. attention:: This ioctl is deprecated
-
-Synopsis
---------
-
-.. c:function:: int ioctl(int fd, AUDIO_GET_PTS, __u64 *pts)
-    :name: AUDIO_GET_PTS
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -
-
-       -  __u64 \*pts
-
-       -  Returns the 33-bit timestamp as defined in ITU T-REC-H.222.0 /
-	  ISO/IEC 13818-1.
-
-	  The PTS should belong to the currently played frame if possible,
-	  but may also be a value close to it like the PTS of the last
-	  decoded frame or the last PTS extracted by the PES parser.
-
-
-Description
------------
-
-This ioctl is obsolete. Do not use in new drivers. If you need this
-functionality, then please contact the linux-media mailing list
-(`https://linuxtv.org/lists.php <https://linuxtv.org/lists.php>`__).
-
-This ioctl call asks the Audio Device to return the current PTS
-timestamp.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/media/uapi/dvb/audio-set-attributes.rst b/Documentation/media/uapi/dvb/audio-set-attributes.rst
deleted file mode 100644
index f0c6153ca80f..000000000000
--- a/Documentation/media/uapi/dvb/audio-set-attributes.rst
+++ /dev/null
@@ -1,67 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _AUDIO_SET_ATTRIBUTES:
-
-====================
-AUDIO_SET_ATTRIBUTES
-====================
-
-Name
-----
-
-AUDIO_SET_ATTRIBUTES
-
-.. attention:: This ioctl is deprecated
-
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, AUDIO_SET_ATTRIBUTES, struct audio_attributes *attr )
-    :name: AUDIO_SET_ATTRIBUTES
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -
-
-       -  audio_attributes_t attr
-
-       -  audio attributes according to section ??
-
-
-Description
------------
-
-This ioctl is intended for DVD playback and allows you to set certain
-information about the audio stream.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  attr is not a valid or supported attribute setting.
diff --git a/Documentation/media/uapi/dvb/audio-set-ext-id.rst b/Documentation/media/uapi/dvb/audio-set-ext-id.rst
deleted file mode 100644
index 8503c47f26bd..000000000000
--- a/Documentation/media/uapi/dvb/audio-set-ext-id.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _AUDIO_SET_EXT_ID:
-
-================
-AUDIO_SET_EXT_ID
-================
-
-Name
-----
-
-AUDIO_SET_EXT_ID
-
-.. attention:: This ioctl is deprecated
-
-Synopsis
---------
-
-.. c:function:: int  ioctl(fd, AUDIO_SET_EXT_ID, int id)
-    :name: AUDIO_SET_EXT_ID
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -
-
-       -  int id
-
-       -  audio sub_stream_id
-
-
-Description
------------
-
-This ioctl can be used to set the extension id for MPEG streams in DVD
-playback. Only the first 3 bits are recognized.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  id is not a valid id.
diff --git a/Documentation/media/uapi/dvb/audio-set-karaoke.rst b/Documentation/media/uapi/dvb/audio-set-karaoke.rst
deleted file mode 100644
index c759952d88aa..000000000000
--- a/Documentation/media/uapi/dvb/audio-set-karaoke.rst
+++ /dev/null
@@ -1,66 +0,0 @@
-.. -*- coding: utf-8; mode: rst -*-
-
-.. _AUDIO_SET_KARAOKE:
-
-=================
-AUDIO_SET_KARAOKE
-=================
-
-Name
-----
-
-AUDIO_SET_KARAOKE
-
-.. attention:: This ioctl is deprecated
-
-Synopsis
---------
-
-.. c:function:: int ioctl(fd, AUDIO_SET_KARAOKE, struct audio_karaoke *karaoke)
-    :name: AUDIO_SET_KARAOKE
-
-
-Arguments
----------
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -
-
-       -  int fd
-
-       -  File descriptor returned by a previous call to open().
-
-    -
-
-       -  audio_karaoke_t \*karaoke
-
-       -  karaoke settings according to section ??.
-
-
-Description
------------
-
-This ioctl allows one to set the mixer settings for a karaoke DVD.
-
-
-Return Value
-------------
-
-On success 0 is returned, on error -1 and the ``errno`` variable is set
-appropriately. The generic error codes are described at the
-:ref:`Generic Error Codes <gen-errors>` chapter.
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-
-
-    -  .. row 1
-
-       -  ``EINVAL``
-
-       -  karaoke is not a valid or supported karaoke setting.
diff --git a/Documentation/media/uapi/dvb/audio_data_types.rst b/Documentation/media/uapi/dvb/audio_data_types.rst
index 6b93359d64f7..5bffa2c98a24 100644
--- a/Documentation/media/uapi/dvb/audio_data_types.rst
+++ b/Documentation/media/uapi/dvb/audio_data_types.rst
@@ -114,40 +114,3 @@ following bits set according to the hardwares capabilities.
      #define AUDIO_CAP_OGG   64
      #define AUDIO_CAP_SDDS 128
      #define AUDIO_CAP_AC3  256
-
-.. c:type:: audio_karaoke
-
-The ioctl AUDIO_SET_KARAOKE uses the following format:
-
-
-.. code-block:: c
-
-    typedef
-    struct audio_karaoke {
-	int vocal1;
-	int vocal2;
-	int melody;
-    } audio_karaoke_t;
-
-If Vocal1 or Vocal2 are non-zero, they get mixed into left and right t
-at 70% each. If both, Vocal1 and Vocal2 are non-zero, Vocal1 gets mixed
-into the left channel and Vocal2 into the right channel at 100% each. Ff
-Melody is non-zero, the melody channel gets mixed into left and right.
-
-
-.. c:type:: audio_attributes
-
-The following attributes can be set by a call to AUDIO_SET_ATTRIBUTES:
-
-
-.. code-block:: c
-
-     typedef uint16_t audio_attributes_t;
-     /*   bits: descr. */
-     /*   15-13 audio coding mode (0=ac3, 2=mpeg1, 3=mpeg2ext, 4=LPCM, 6=DTS, */
-     /*   12    multichannel extension */
-     /*   11-10 audio type (0=not spec, 1=language included) */
-     /*    9- 8 audio application mode (0=not spec, 1=karaoke, 2=surround) */
-     /*    7- 6 Quantization / DRC (mpeg audio: 1=DRC exists)(lpcm: 0=16bit,  */
-     /*    5- 4 Sample frequency fs (0=48kHz, 1=96kHz) */
-     /*    2- 0 number of audio channels (n+1 channels) */
diff --git a/Documentation/media/uapi/dvb/audio_function_calls.rst b/Documentation/media/uapi/dvb/audio_function_calls.rst
index 0bb56f0cfed4..7dba16285dab 100644
--- a/Documentation/media/uapi/dvb/audio_function_calls.rst
+++ b/Documentation/media/uapi/dvb/audio_function_calls.rst
@@ -22,13 +22,9 @@ Audio Function Calls
     audio-set-bypass-mode
     audio-channel-select
     audio-bilingual-channel-select
-    audio-get-pts
     audio-get-status
     audio-get-capabilities
     audio-clear-buffer
     audio-set-id
     audio-set-mixer
     audio-set-streamtype
-    audio-set-ext-id
-    audio-set-attributes
-    audio-set-karaoke
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 59216b172003..a9b00942e87d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1178,9 +1178,6 @@ COMPATIBLE_IOCTL(AUDIO_CLEAR_BUFFER)
 COMPATIBLE_IOCTL(AUDIO_SET_ID)
 COMPATIBLE_IOCTL(AUDIO_SET_MIXER)
 COMPATIBLE_IOCTL(AUDIO_SET_STREAMTYPE)
-COMPATIBLE_IOCTL(AUDIO_SET_EXT_ID)
-COMPATIBLE_IOCTL(AUDIO_SET_ATTRIBUTES)
-COMPATIBLE_IOCTL(AUDIO_SET_KARAOKE)
 COMPATIBLE_IOCTL(DMX_START)
 COMPATIBLE_IOCTL(DMX_STOP)
 COMPATIBLE_IOCTL(DMX_SET_FILTER)
diff --git a/include/uapi/linux/dvb/audio.h b/include/uapi/linux/dvb/audio.h
index 69f7a85d81b1..afeae063e640 100644
--- a/include/uapi/linux/dvb/audio.h
+++ b/include/uapi/linux/dvb/audio.h
@@ -67,27 +67,6 @@ typedef struct audio_status {
 } audio_status_t;                              /* separate decoder hardware */
 
 
-typedef
-struct audio_karaoke {  /* if Vocal1 or Vocal2 are non-zero, they get mixed  */
-	int vocal1;    /* into left and right t at 70% each */
-	int vocal2;    /* if both, Vocal1 and Vocal2 are non-zero, Vocal1 gets*/
-	int melody;    /* mixed into the left channel and */
-		       /* Vocal2 into the right channel at 100% each. */
-		       /* if Melody is non-zero, the melody channel gets mixed*/
-} audio_karaoke_t;     /* into left and right  */
-
-
-typedef __u16 audio_attributes_t;
-/*   bits: descr. */
-/*   15-13 audio coding mode (0=ac3, 2=mpeg1, 3=mpeg2ext, 4=LPCM, 6=DTS, */
-/*   12    multichannel extension */
-/*   11-10 audio type (0=not spec, 1=language included) */
-/*    9- 8 audio application mode (0=not spec, 1=karaoke, 2=surround) */
-/*    7- 6 Quantization / DRC (mpeg audio: 1=DRC exists)(lpcm: 0=16bit,  */
-/*    5- 4 Sample frequency fs (0=48kHz, 1=96kHz) */
-/*    2- 0 number of audio channels (n+1 channels) */
-
-
 /* for GET_CAPABILITIES and SET_FORMAT, the latter should only set one bit */
 #define AUDIO_CAP_DTS    1
 #define AUDIO_CAP_LPCM   2
@@ -115,22 +94,6 @@ typedef __u16 audio_attributes_t;
 #define AUDIO_SET_ID               _IO('o', 13)
 #define AUDIO_SET_MIXER            _IOW('o', 14, audio_mixer_t)
 #define AUDIO_SET_STREAMTYPE       _IO('o', 15)
-#define AUDIO_SET_EXT_ID           _IO('o', 16)
-#define AUDIO_SET_ATTRIBUTES       _IOW('o', 17, audio_attributes_t)
-#define AUDIO_SET_KARAOKE          _IOW('o', 18, audio_karaoke_t)
-
-/**
- * AUDIO_GET_PTS
- *
- * Read the 33 bit presentation time stamp as defined
- * in ITU T-REC-H.222.0 / ISO/IEC 13818-1.
- *
- * The PTS should belong to the currently played
- * frame if possible, but may also be a value close to it
- * like the PTS of the last decoded frame or the last PTS
- * extracted by the PES parser.
- */
-#define AUDIO_GET_PTS              _IOR('o', 19, __u64)
 #define AUDIO_BILINGUAL_CHANNEL_SELECT _IO('o', 20)
 
 #endif /* _DVBAUDIO_H_ */
-- 
cgit v1.2.3


From a449938297e55e7e8958f8b48583f7d342da1930 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Fri, 13 Jul 2018 11:28:31 +0100
Subject: KVM: s390: Add huge page enablement control

General KVM huge page support on s390 has to be enabled via the
kvm.hpage module parameter. Either nested or hpage can be enabled, as
we currently do not support vSIE for huge backed guests. Once the vSIE
support is added we will either drop the parameter or enable it as
default.

For a guest the feature has to be enabled through the new
KVM_CAP_S390_HPAGE_1M capability and the hpage module
parameter. Enabling it means that cmm can't be enabled for the vm and
disables pfmf and storage key interpretation.

This is due to the fact that in some cases, in upcoming patches, we
have to split huge pages in the guest mapping to be able to set more
granular memory protection on 4k pages. These split pages have fake
page tables that are not visible to the Linux memory management which
subsequently will not manage its PGSTEs, while the SIE will. Disabling
these features lets us manage PGSTE data in a consistent matter and
solve that problem.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 16 +++++++++++++++
 arch/s390/kvm/kvm-s390.c          | 42 +++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/kvm.h          |  1 +
 3 files changed, 57 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index d10944e619d3..cb8db4f9d097 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4391,6 +4391,22 @@ all such vmexits.
 
 Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
 
+7.14 KVM_CAP_S390_HPAGE_1M
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success, -EINVAL if hpage module parameter was not set
+	 or cmma is enabled
+
+With this capability the KVM support for memory backing with 1m pages
+through hugetlbfs can be enabled for a VM. After the capability is
+enabled, cmma can't be enabled anymore and pfmfi and the storage key
+interpretation are disabled. If cmma has already been enabled or the
+hpage module parameter is not set to 1, -EINVAL is returned.
+
+While it is generally possible to create a huge page backed VM without
+this capability, the VM will not be able to run.
+
 8. Other capabilities.
 ----------------------
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 662f4d8046db..f9d90337e64a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -172,6 +172,10 @@ static int nested;
 module_param(nested, int, S_IRUGO);
 MODULE_PARM_DESC(nested, "Nested virtualization support");
 
+/* allow 1m huge page guest backing, if !nested */
+static int hpage;
+module_param(hpage, int, 0444);
+MODULE_PARM_DESC(hpage, "1m huge page backing support");
 
 /*
  * For now we handle at most 16 double words as this is what the s390 base
@@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_AIS_MIGRATION:
 		r = 1;
 		break;
+	case KVM_CAP_S390_HPAGE_1M:
+		r = 0;
+		if (hpage)
+			r = 1;
+		break;
 	case KVM_CAP_S390_MEM_OP:
 		r = MEM_OP_MAX_SIZE;
 		break;
@@ -678,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 			 r ? "(not available)" : "(success)");
 		break;
+	case KVM_CAP_S390_HPAGE_1M:
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus)
+			r = -EBUSY;
+		else if (!hpage || kvm->arch.use_cmma)
+			r = -EINVAL;
+		else {
+			r = 0;
+			kvm->mm->context.allow_gmap_hpage_1m = 1;
+			/*
+			 * We might have to create fake 4k page
+			 * tables. To avoid that the hardware works on
+			 * stale PGSTEs, we emulate these instructions.
+			 */
+			kvm->arch.use_skf = 0;
+			kvm->arch.use_pfmfi = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
+			 r ? "(not available)" : "(success)");
+		break;
 	case KVM_CAP_S390_USER_STSI:
 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 		kvm->arch.user_stsi = 1;
@@ -725,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		if (!sclp.has_cmma)
 			break;
 
-		ret = -EBUSY;
 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 		mutex_lock(&kvm->lock);
-		if (!kvm->created_vcpus) {
+		if (kvm->created_vcpus)
+			ret = -EBUSY;
+		else if (kvm->mm->context.allow_gmap_hpage_1m)
+			ret = -EINVAL;
+		else {
 			kvm->arch.use_cmma = 1;
 			/* Not compatible with cmma. */
 			kvm->arch.use_pfmfi = 0;
@@ -4102,6 +4135,11 @@ static int __init kvm_s390_init(void)
 		return -ENODEV;
 	}
 
+	if (nested && hpage) {
+		pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
+		return -EINVAL;
+	}
+
 	for (i = 0; i < 16; i++)
 		kvm_s390_fac_base[i] |=
 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index b6270a3b38e9..b955b986b341 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_GET_MSR_FEATURES 153
 #define KVM_CAP_HYPERV_EVENTFD 154
 #define KVM_CAP_HYPERV_TLBFLUSH 155
+#define KVM_CAP_S390_HPAGE_1M 156
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From e6753f23d961d601dbae50a2fc2a3975c9715b14 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Mon, 30 Jul 2018 15:24:22 -0700
Subject: tracepoint: Make rcuidle tracepoint callers use SRCU

In recent tests with IRQ on/off tracepoints, a large performance
overhead ~10% is noticed when running hackbench. This is root caused to
calls to rcu_irq_enter_irqson and rcu_irq_exit_irqson from the
tracepoint code. Following a long discussion on the list [1] about this,
we concluded that srcu is a better alternative for use during rcu idle.
Although it does involve extra barriers, its lighter than the sched-rcu
version which has to do additional RCU calls to notify RCU idle about
entry into RCU sections.

In this patch, we change the underlying implementation of the
trace_*_rcuidle API to use SRCU. This has shown to improve performance
alot for the high frequency irq enable/disable tracepoints.

Test: Tested idle and preempt/irq tracepoints.

Here are some performance numbers:

With a run of the following 30 times on a single core x86 Qemu instance
with 1GB memory:
hackbench -g 4 -f 2 -l 3000

Completion times in seconds. CONFIG_PROVE_LOCKING=y.

No patches (without this series)
Mean: 3.048
Median: 3.025
Std Dev: 0.064

With Lockdep using irq tracepoints with RCU implementation:
Mean: 3.451   (-11.66 %)
Median: 3.447 (-12.22%)
Std Dev: 0.049

With Lockdep using irq tracepoints with SRCU implementation (this series):
Mean: 3.020   (I would consider the improvement against the "without
	       this series" case as just noise).
Median: 3.013
Std Dev: 0.033

[1] https://patchwork.kernel.org/patch/10344297/

[remove rcu_read_lock_sched_notrace as its the equivalent of
preempt_disable_notrace and is unnecessary to call in tracepoint code]
Link: http://lkml.kernel.org/r/20180730222423.196630-3-joel@joelfernandes.org

Cleaned-up-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
[ Simplified WARN_ON_ONCE() ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 40 ++++++++++++++++++++++++++++++++--------
 kernel/tracepoint.c        | 16 +++++++++++++++-
 2 files changed, 47 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 19a690b559ca..d9a084c72541 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -15,6 +15,7 @@
  */
 
 #include <linux/smp.h>
+#include <linux/srcu.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/cpumask.h>
@@ -33,6 +34,8 @@ struct trace_eval_map {
 
 #define TRACEPOINT_DEFAULT_PRIO	10
 
+extern struct srcu_struct tracepoint_srcu;
+
 extern int
 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
 extern int
@@ -75,10 +78,16 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
  * probe unregistration and the end of module exit to make sure there is no
  * caller executing a probe when it is freed.
  */
+#ifdef CONFIG_TRACEPOINTS
 static inline void tracepoint_synchronize_unregister(void)
 {
+	synchronize_srcu(&tracepoint_srcu);
 	synchronize_sched();
 }
+#else
+static inline void tracepoint_synchronize_unregister(void)
+{ }
+#endif
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 extern int syscall_regfunc(void);
@@ -129,18 +138,31 @@ extern void syscall_unregfunc(void);
  * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just
  * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto".
  */
-#define __DO_TRACE(tp, proto, args, cond, rcucheck)			\
+#define __DO_TRACE(tp, proto, args, cond, rcuidle)			\
 	do {								\
 		struct tracepoint_func *it_func_ptr;			\
 		void *it_func;						\
 		void *__data;						\
+		int __maybe_unused idx = 0;				\
 									\
 		if (!(cond))						\
 			return;						\
-		if (rcucheck)						\
-			rcu_irq_enter_irqson();				\
-		rcu_read_lock_sched_notrace();				\
-		it_func_ptr = rcu_dereference_sched((tp)->funcs);	\
+									\
+		/* srcu can't be used from NMI */			\
+		WARN_ON_ONCE(rcuidle && in_nmi());			\
+									\
+		/* keep srcu and sched-rcu usage consistent */		\
+		preempt_disable_notrace();				\
+									\
+		/*							\
+		 * For rcuidle callers, use srcu since sched-rcu	\
+		 * doesn't work from the idle path.			\
+		 */							\
+		if (rcuidle)						\
+			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+									\
+		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
+									\
 		if (it_func_ptr) {					\
 			do {						\
 				it_func = (it_func_ptr)->func;		\
@@ -148,9 +170,11 @@ extern void syscall_unregfunc(void);
 				((void(*)(proto))(it_func))(args);	\
 			} while ((++it_func_ptr)->func);		\
 		}							\
-		rcu_read_unlock_sched_notrace();			\
-		if (rcucheck)						\
-			rcu_irq_exit_irqson();				\
+									\
+		if (rcuidle)						\
+			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+									\
+		preempt_enable_notrace();				\
 	} while (0)
 
 #ifndef MODULE
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 6dc6356c3327..955148d91b74 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -31,6 +31,9 @@
 extern struct tracepoint * const __start___tracepoints_ptrs[];
 extern struct tracepoint * const __stop___tracepoints_ptrs[];
 
+DEFINE_SRCU(tracepoint_srcu);
+EXPORT_SYMBOL_GPL(tracepoint_srcu);
+
 /* Set to 1 to enable tracepoint debug output */
 static const int tracepoint_debug;
 
@@ -67,16 +70,27 @@ static inline void *allocate_probes(int count)
 	return p == NULL ? NULL : p->probes;
 }
 
-static void rcu_free_old_probes(struct rcu_head *head)
+static void srcu_free_old_probes(struct rcu_head *head)
 {
 	kfree(container_of(head, struct tp_probes, rcu));
 }
 
+static void rcu_free_old_probes(struct rcu_head *head)
+{
+	call_srcu(&tracepoint_srcu, head, srcu_free_old_probes);
+}
+
 static inline void release_probes(struct tracepoint_func *old)
 {
 	if (old) {
 		struct tp_probes *tp_probes = container_of(old,
 			struct tp_probes, probes[0]);
+		/*
+		 * Tracepoint probes are protected by both sched RCU and SRCU,
+		 * by calling the SRCU callback in the sched RCU callback we
+		 * cover both cases. So let us chain the SRCU and sched RCU
+		 * callbacks to wait for both grace periods.
+		 */
 		call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes);
 	}
 }
-- 
cgit v1.2.3


From f696bf6d64b195b83ca1bdb7cd33c999c9dcf514 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 18 Jul 2018 09:25:14 -0700
Subject: RDMA: Constify the argument of the work request conversion functions

When posting a send work request, the work request that is posted is not
modified by any of the RDMA drivers. Make this explicit by constifying
most ib_send_wr pointers in RDMA transport drivers.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c     | 18 ++++++++--------
 drivers/infiniband/hw/cxgb3/iwch_qp.c        | 20 +++++++++---------
 drivers/infiniband/hw/cxgb4/qp.c             | 21 +++++++++++--------
 drivers/infiniband/hw/hns/hns_roce_device.h  |  2 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c   |  4 ++--
 drivers/infiniband/hw/mlx4/qp.c              | 25 +++++++++++-----------
 drivers/infiniband/hw/mlx5/mlx5_ib.h         |  2 +-
 drivers/infiniband/hw/mlx5/qp.c              | 31 ++++++++++++++--------------
 drivers/infiniband/hw/mthca/mthca_qp.c       |  8 +++----
 drivers/infiniband/hw/nes/nes_verbs.c        |  3 ++-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c  | 12 +++++------
 drivers/infiniband/hw/qedr/qedr_roce_cm.c    |  4 ++--
 drivers/infiniband/hw/qedr/verbs.c           |  7 ++++---
 drivers/infiniband/hw/qib/qib_verbs.h        |  2 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c |  3 ++-
 drivers/infiniband/sw/rdmavt/qp.c            |  4 ++--
 drivers/infiniband/sw/rxe/rxe_verbs.c        |  8 +++----
 include/rdma/ib_verbs.h                      | 11 +++++-----
 18 files changed, 97 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index dd800d153aa2..a0082e0bb8e2 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1876,7 +1876,7 @@ out:
 /* Routine for sending QP1 packets for RoCE V1 an V2
  */
 static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
-				     struct ib_send_wr *wr,
+				     const struct ib_send_wr *wr,
 				     struct bnxt_qplib_swqe *wqe,
 				     int payload_size)
 {
@@ -2093,7 +2093,7 @@ static int is_ud_qp(struct bnxt_re_qp *qp)
 }
 
 static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
-				  struct ib_send_wr *wr,
+				  const struct ib_send_wr *wr,
 				  struct bnxt_qplib_swqe *wqe)
 {
 	struct bnxt_re_ah *ah = NULL;
@@ -2131,7 +2131,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
 	return 0;
 }
 
-static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr,
+static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr,
 				  struct bnxt_qplib_swqe *wqe)
 {
 	switch (wr->opcode) {
@@ -2163,7 +2163,7 @@ static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr,
 	return 0;
 }
 
-static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
+static int bnxt_re_build_atomic_wqe(const struct ib_send_wr *wr,
 				    struct bnxt_qplib_swqe *wqe)
 {
 	switch (wr->opcode) {
@@ -2190,7 +2190,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
 	return 0;
 }
 
-static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
+static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr,
 				 struct bnxt_qplib_swqe *wqe)
 {
 	wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
@@ -2209,7 +2209,7 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
 	return 0;
 }
 
-static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
+static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
 				 struct bnxt_qplib_swqe *wqe)
 {
 	struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr);
@@ -2251,7 +2251,7 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
 }
 
 static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
-				    struct ib_send_wr *wr,
+				    const struct ib_send_wr *wr,
 				    struct bnxt_qplib_swqe *wqe)
 {
 	/*  Copy the inline data to the data  field */
@@ -2281,7 +2281,7 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
 }
 
 static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
-				   struct ib_send_wr *wr,
+				   const struct ib_send_wr *wr,
 				   struct bnxt_qplib_swqe *wqe)
 {
 	int payload_sz = 0;
@@ -2313,7 +2313,7 @@ static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
 
 static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
 				       struct bnxt_re_qp *qp,
-				struct ib_send_wr *wr)
+				       const struct ib_send_wr *wr)
 {
 	struct bnxt_qplib_swqe wqe;
 	int rc = 0, payload_sz = 0;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 3871e1fd8395..29ab6910a004 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -39,8 +39,8 @@
 
 #define NO_SUPPORT -1
 
-static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
-				u8 * flit_cnt)
+static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr,
+			   u8 *flit_cnt)
 {
 	int i;
 	u32 plen;
@@ -84,8 +84,8 @@ static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
 	return 0;
 }
 
-static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
-				 u8 *flit_cnt)
+static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr,
+			    u8 *flit_cnt)
 {
 	int i;
 	u32 plen;
@@ -125,8 +125,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
 	return 0;
 }
 
-static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
-				u8 *flit_cnt)
+static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr,
+			   u8 *flit_cnt)
 {
 	if (wr->num_sge > 1)
 		return -EINVAL;
@@ -146,8 +146,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
 	return 0;
 }
 
-static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
-			  u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr,
+			u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
 {
 	struct iwch_mr *mhp = to_iwch_mr(wr->mr);
 	int i;
@@ -189,8 +189,8 @@ static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
 	return 0;
 }
 
-static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
-				u8 *flit_cnt)
+static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr,
+			  u8 *flit_cnt)
 {
 	wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
 	wqe->local_inv.reserved = 0;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 08dc555942af..dbd697b113ec 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -410,7 +410,7 @@ free_sq_qid:
 }
 
 static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
-		      struct ib_send_wr *wr, int max, u32 *plenp)
+		      const struct ib_send_wr *wr, int max, u32 *plenp)
 {
 	u8 *dstp, *srcp;
 	u32 plen = 0;
@@ -480,7 +480,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end,
 }
 
 static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
-			   struct ib_send_wr *wr, u8 *len16)
+			   const struct ib_send_wr *wr, u8 *len16)
 {
 	u32 plen;
 	int size;
@@ -547,7 +547,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
 }
 
 static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
-			    struct ib_send_wr *wr, u8 *len16)
+			    const struct ib_send_wr *wr, u8 *len16)
 {
 	u32 plen;
 	int size;
@@ -589,7 +589,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
 	return 0;
 }
 
-static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
+			   u8 *len16)
 {
 	if (wr->num_sge > 1)
 		return -EINVAL;
@@ -648,7 +649,7 @@ static int build_srq_recv(union t4_recv_wr *wqe, struct ib_recv_wr *wr,
 }
 
 static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
-			      struct ib_reg_wr *wr, struct c4iw_mr *mhp,
+			      const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
 			      u8 *len16)
 {
 	__be64 *p = (__be64 *)fr->pbl;
@@ -680,8 +681,8 @@ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
 }
 
 static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
-			struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16,
-			bool dsgl_supported)
+			const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
+			u8 *len16, bool dsgl_supported)
 {
 	struct fw_ri_immd *imdp;
 	__be64 *p;
@@ -743,7 +744,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
 	return 0;
 }
 
-static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
+			  u8 *len16)
 {
 	wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
 	wqe->inv.r2 = 0;
@@ -862,7 +864,8 @@ static int ib_to_fw_opcode(int ib_opcode)
 	return opcode;
 }
 
-static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
+static int complete_sq_drain_wr(struct c4iw_qp *qhp,
+				const struct ib_send_wr *wr)
 {
 	struct t4_cqe cqe = {};
 	struct c4iw_cq *schp;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index a595e72f243e..ceb490c732c0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -985,7 +985,7 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
 void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
 void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
 			       int cnt);
-__be32 send_ieth(struct ib_send_wr *wr);
+__be32 send_ieth(const struct ib_send_wr *wr);
 int to_hr_qp_type(int qp_type);
 
 struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 6fa12f2262f4..a9bc6b279175 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -53,7 +53,7 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
 	dseg->len  = cpu_to_le32(sg->length);
 }
 
-static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
+static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 			   unsigned int *sge_ind)
 {
 	struct hns_roce_v2_wqe_data_seg *dseg;
@@ -100,7 +100,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
 	}
 }
 
-static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
+static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			     struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 			     void *wqe, unsigned int *sge_ind,
 			     struct ib_send_wr **bad_wr)
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 408e720fd923..44fc684b5e3a 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2925,7 +2925,7 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 }
 
 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
-				  struct ib_ud_wr *wr,
+				  const struct ib_ud_wr *wr,
 				  void *wqe, unsigned *mlx_seg_len)
 {
 	struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
@@ -3073,7 +3073,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
 }
 
 #define MLX4_ROCEV2_QP1_SPORT 0xC000
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
 			    void *wqe, unsigned *mlx_seg_len)
 {
 	struct ib_device *ib_dev = sqp->qp.ibqp.device;
@@ -3355,7 +3355,7 @@ static __be32 convert_access(int acc)
 }
 
 static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
-			struct ib_reg_wr *wr)
+			const struct ib_reg_wr *wr)
 {
 	struct mlx4_ib_mr *mr = to_mmr(wr->mr);
 
@@ -3385,7 +3385,7 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
 }
 
 static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
-		struct ib_atomic_wr *wr)
+			   const struct ib_atomic_wr *wr)
 {
 	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 		aseg->swap_add = cpu_to_be64(wr->swap);
@@ -3401,7 +3401,7 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
 }
 
 static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
-				  struct ib_atomic_wr *wr)
+				  const struct ib_atomic_wr *wr)
 {
 	aseg->swap_add		= cpu_to_be64(wr->swap);
 	aseg->swap_add_mask	= cpu_to_be64(wr->swap_mask);
@@ -3410,7 +3410,7 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
 }
 
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
-			     struct ib_ud_wr *wr)
+			     const struct ib_ud_wr *wr)
 {
 	memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
 	dseg->dqpn = cpu_to_be32(wr->remote_qpn);
@@ -3421,7 +3421,7 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 
 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 				    struct mlx4_wqe_datagram_seg *dseg,
-				    struct ib_ud_wr *wr,
+				    const struct ib_ud_wr *wr,
 				    enum mlx4_ib_qp_type qpt)
 {
 	union mlx4_ext_av *av = &to_mah(wr->ah)->av;
@@ -3443,7 +3443,8 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 	dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
 
-static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
+static void build_tunnel_header(const struct ib_ud_wr *wr, void *wqe,
+				unsigned *mlx_seg_len)
 {
 	struct mlx4_wqe_inline_seg *inl = wqe;
 	struct mlx4_ib_tunnel_header hdr;
@@ -3526,9 +3527,9 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 	dseg->addr       = cpu_to_be64(sg->addr);
 }
 
-static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
-			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
-			 __be32 *lso_hdr_sz, __be32 *blh)
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe,
+			 const struct ib_ud_wr *wr, struct mlx4_ib_qp *qp,
+			 unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh)
 {
 	unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
 
@@ -3546,7 +3547,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
 	return 0;
 }
 
-static __be32 send_ieth(struct ib_send_wr *wr)
+static __be32 send_ieth(const struct ib_send_wr *wr)
 {
 	switch (wr->opcode) {
 	case IB_WR_SEND_WITH_IMM:
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 462505c8fa25..8d9eaa31fab2 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -469,7 +469,7 @@ struct mlx5_umr_wr {
 	u32				mkey;
 };
 
-static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr)
+static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct mlx5_umr_wr, wr);
 }
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index d4414015b64f..3cbd00015182 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3508,7 +3508,7 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
 }
 
 static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
-			 struct ib_send_wr *wr, void *qend,
+			 const struct ib_send_wr *wr, void *qend,
 			 struct mlx5_ib_qp *qp, int *size)
 {
 	void *seg = eseg;
@@ -3561,7 +3561,7 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
 }
 
 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
-			     struct ib_send_wr *wr)
+			     const struct ib_send_wr *wr)
 {
 	memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
 	dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
@@ -3709,9 +3709,9 @@ static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
 
 static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
 			       struct mlx5_wqe_umr_ctrl_seg *umr,
-			       struct ib_send_wr *wr, int atomic)
+			       const struct ib_send_wr *wr, int atomic)
 {
-	struct mlx5_umr_wr *umrwr = umr_wr(wr);
+	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
 	memset(umr, 0, sizeof(*umr));
 
@@ -3782,9 +3782,10 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
 	seg->status = MLX5_MKEY_STATUS_FREE;
 }
 
-static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
+static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
+				 const struct ib_send_wr *wr)
 {
-	struct mlx5_umr_wr *umrwr = umr_wr(wr);
+	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
 	memset(seg, 0, sizeof(*seg));
 	if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
@@ -3833,7 +3834,7 @@ static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
 	seg += mr_list_size;
 }
 
-static __be32 send_ieth(struct ib_send_wr *wr)
+static __be32 send_ieth(const struct ib_send_wr *wr)
 {
 	switch (wr->opcode) {
 	case IB_WR_SEND_WITH_IMM:
@@ -3865,7 +3866,7 @@ static u8 wq_sig(void *wqe)
 	return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
 }
 
-static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
+static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
 			    void *wqe, int *sz)
 {
 	struct mlx5_wqe_inline_seg *seg;
@@ -4011,7 +4012,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 	return 0;
 }
 
-static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
+static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
 				struct mlx5_ib_qp *qp, void **seg, int *size)
 {
 	struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
@@ -4113,7 +4114,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
 }
 
 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
-				 struct ib_sig_handover_wr *wr, u32 size,
+				 const struct ib_sig_handover_wr *wr, u32 size,
 				 u32 length, u32 pdn)
 {
 	struct ib_mr *sig_mr = wr->sig_mr;
@@ -4144,10 +4145,10 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 }
 
 
-static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
-			  void **seg, int *size)
+static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
+			  struct mlx5_ib_qp *qp, void **seg, int *size)
 {
-	struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
+	const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
 	struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
 	u32 pdn = get_pd(qp)->pdn;
 	u32 xlt_size;
@@ -4222,7 +4223,7 @@ static int set_psv_wr(struct ib_sig_domain *domain,
 }
 
 static int set_reg_wr(struct mlx5_ib_qp *qp,
-		      struct ib_reg_wr *wr,
+		      const struct ib_reg_wr *wr,
 		      void **seg, int *size)
 {
 	struct mlx5_ib_mr *mr = to_mmr(wr->mr);
@@ -4295,7 +4296,7 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
 
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 		     struct mlx5_wqe_ctrl_seg **ctrl,
-		     struct ib_send_wr *wr, unsigned *idx,
+		     const struct ib_send_wr *wr, unsigned *idx,
 		     int *size, int nreq)
 {
 	if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index af1c49d70b89..0e390f410b39 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1488,7 +1488,7 @@ void mthca_free_qp(struct mthca_dev *dev,
 
 /* Create UD header for an MLX send and build a data segment for it */
 static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
-			    int ind, struct ib_ud_wr *wr,
+			    int ind, const struct ib_ud_wr *wr,
 			    struct mthca_mlx_seg *mlx,
 			    struct mthca_data_seg *data)
 {
@@ -1581,7 +1581,7 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
 }
 
 static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
-					   struct ib_atomic_wr *wr)
+					   const struct ib_atomic_wr *wr)
 {
 	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 		aseg->swap_add = cpu_to_be64(wr->swap);
@@ -1594,7 +1594,7 @@ static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
 }
 
 static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
-			     struct ib_ud_wr *wr)
+			     const struct ib_ud_wr *wr)
 {
 	useg->lkey    = cpu_to_be32(to_mah(wr->ah)->key);
 	useg->av_addr =	cpu_to_be64(to_mah(wr->ah)->avdma);
@@ -1604,7 +1604,7 @@ static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
 }
 
 static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
-			     struct ib_ud_wr *wr)
+			     const struct ib_ud_wr *wr)
 {
 	memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
 	useg->dqpn = cpu_to_be32(wr->remote_qpn);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 82b8f9630ee8..8b1f114062e5 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3040,7 +3040,8 @@ static int nes_process_mad(struct ib_device *ibdev, int mad_flags,
 }
 
 static inline void
-fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey)
+fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr,
+		 u32 uselkey)
 {
 	int sge_index;
 	int total_payload_length = 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 86b22f6b7271..5da1d0b88d1a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -1953,7 +1953,7 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq)
 /* unprivileged verbs and their support functions. */
 static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
 				struct ocrdma_hdr_wqe *hdr,
-				struct ib_send_wr *wr)
+				const struct ib_send_wr *wr)
 {
 	struct ocrdma_ewqe_ud_hdr *ud_hdr =
 		(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
@@ -2000,7 +2000,7 @@ static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
 static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
 				    struct ocrdma_hdr_wqe *hdr,
 				    struct ocrdma_sge *sge,
-				    struct ib_send_wr *wr, u32 wqe_size)
+				    const struct ib_send_wr *wr, u32 wqe_size)
 {
 	int i;
 	char *dpp_addr;
@@ -2038,7 +2038,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
 }
 
 static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
-			     struct ib_send_wr *wr)
+			     const struct ib_send_wr *wr)
 {
 	int status;
 	struct ocrdma_sge *sge;
@@ -2057,7 +2057,7 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
 }
 
 static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
-			      struct ib_send_wr *wr)
+			      const struct ib_send_wr *wr)
 {
 	int status;
 	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
@@ -2075,7 +2075,7 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
 }
 
 static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
-			      struct ib_send_wr *wr)
+			      const struct ib_send_wr *wr)
 {
 	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
 	struct ocrdma_sge *sge = ext_rw + 1;
@@ -2105,7 +2105,7 @@ static int get_encoded_page_size(int pg_sz)
 
 static int ocrdma_build_reg(struct ocrdma_qp *qp,
 			    struct ocrdma_hdr_wqe *hdr,
-			    struct ib_reg_wr *wr)
+			    const struct ib_reg_wr *wr)
 {
 	u64 fbo;
 	struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 2e1f352c037d..b5d49740bf8a 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -380,7 +380,7 @@ int qedr_destroy_gsi_qp(struct qedr_dev *dev)
 #define QEDR_GSI_QPN		(1)
 static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 					struct qedr_qp *qp,
-					struct ib_send_wr *swr,
+					const struct ib_send_wr *swr,
 					struct ib_ud_header *udh,
 					int *roce_mode)
 {
@@ -488,7 +488,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 
 static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
 					struct qedr_qp *qp,
-					struct ib_send_wr *swr,
+					const struct ib_send_wr *swr,
 					struct qed_roce_ll2_packet **p_packet)
 {
 	u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE];
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index b82c5d5fb0e3..34d8b5580138 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2781,7 +2781,7 @@ static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
 	} while (0)
 
 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
-				struct ib_send_wr *wr)
+				const struct ib_send_wr *wr)
 {
 	u32 data_size = 0;
 	int i;
@@ -2845,7 +2845,7 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
 
 static int qedr_prepare_reg(struct qedr_qp *qp,
 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
-			    struct ib_reg_wr *wr)
+			    const struct ib_reg_wr *wr)
 {
 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
@@ -2907,7 +2907,8 @@ static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
 	}
 }
 
-static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
+static inline bool qedr_can_post_send(struct qedr_qp *qp,
+				      const struct ib_send_wr *wr)
 {
 	int wq_is_full, err_wr, pbl_is_full;
 	struct qedr_dev *dev = qp->dev;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index e72562a8959a..666613eef88f 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -311,7 +311,7 @@ void qib_rc_rnr_retry(unsigned long arg);
 
 void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
 
-int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
+int qib_post_ud_send(struct rvt_qp *qp, const struct ib_send_wr *wr);
 
 void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index eb5b1065ec08..1864621ef942 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -599,7 +599,8 @@ static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
 				       qp->rq.offset + n * qp->rq.wqe_size);
 }
 
-static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr)
+static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
+		       const struct ib_reg_wr *wr)
 {
 	struct pvrdma_user_mr *mr = to_vmr(wr->mr);
 
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index d29e3c943399..858c992906c1 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1620,7 +1620,7 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 static inline int rvt_qp_valid_operation(
 	struct rvt_qp *qp,
 	const struct rvt_operation_params *post_parms,
-	struct ib_send_wr *wr)
+	const struct ib_send_wr *wr)
 {
 	int len;
 
@@ -1717,7 +1717,7 @@ static inline int rvt_qp_is_avail(
  * @wr: the work request to send
  */
 static int rvt_post_one_wr(struct rvt_qp *qp,
-			   struct ib_send_wr *wr,
+			   const struct ib_send_wr *wr,
 			   int *call_send)
 {
 	struct rvt_swqe *wqe;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 1188e163204d..0ea394554c8e 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -554,7 +554,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp)
 	return 0;
 }
 
-static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 			    unsigned int mask, unsigned int length)
 {
 	int num_sge = ibwr->num_sge;
@@ -582,7 +582,7 @@ err1:
 }
 
 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
-			 struct ib_send_wr *ibwr)
+			 const struct ib_send_wr *ibwr)
 {
 	wr->wr_id = ibwr->wr_id;
 	wr->num_sge = ibwr->num_sge;
@@ -637,7 +637,7 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
 	}
 }
 
-static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 			 unsigned int mask, unsigned int length,
 			 struct rxe_send_wqe *wqe)
 {
@@ -685,7 +685,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 	return 0;
 }
 
-static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 			 unsigned int mask, u32 length)
 {
 	int err;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 42cbf8eabe9d..cf38d47fa8f8 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1352,7 +1352,7 @@ struct ib_rdma_wr {
 	u32			rkey;
 };
 
-static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr)
+static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_rdma_wr, wr);
 }
@@ -1367,7 +1367,7 @@ struct ib_atomic_wr {
 	u32			rkey;
 };
 
-static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr)
+static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_atomic_wr, wr);
 }
@@ -1384,7 +1384,7 @@ struct ib_ud_wr {
 	u8			port_num;   /* valid for DR SMPs on switch only */
 };
 
-static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr)
+static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_ud_wr, wr);
 }
@@ -1396,7 +1396,7 @@ struct ib_reg_wr {
 	int			access;
 };
 
-static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
+static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_reg_wr, wr);
 }
@@ -1409,7 +1409,8 @@ struct ib_sig_handover_wr {
 	struct ib_sge	       *prot;
 };
 
-static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
+static inline const struct ib_sig_handover_wr *
+sig_handover_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_sig_handover_wr, wr);
 }
-- 
cgit v1.2.3


From d34ac5cd3a73aacd11009c4fc3ba15d7ea62c411 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Wed, 18 Jul 2018 09:25:32 -0700
Subject: RDMA, core and ULPs: Declare ib_post_send() and ib_post_recv()
 arguments const

Since neither ib_post_send() nor ib_post_recv() modify the data structure
their second argument points at, declare that argument const. This change
makes it necessary to declare the 'bad_wr' argument const too and also to
modify all ULPs that call ib_post_send(), ib_post_recv() or
ib_post_srq_recv(). This patch does not change any functionality but makes
it possible for the compiler to verify whether the
ib_post_(send|recv|srq_recv) really do not modify the posted work request.

To make this possible, only one cast had to be introduce that casts away
constness, namely in rpcrdma_post_recvs(). The only way I can think of to
avoid that cast is to introduce an additional loop in that function or to
change the data type of bad_wr from struct ib_recv_wr ** into int
(an index that refers to an element in the work request list). However,
both approaches would require even more extensive changes than this
patch.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c            |  9 +++++---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c        | 16 ++++++-------
 drivers/infiniband/hw/bnxt_re/ib_verbs.h        | 12 +++++-----
 drivers/infiniband/hw/cxgb3/iwch_provider.h     |  8 +++----
 drivers/infiniband/hw/cxgb3/iwch_qp.c           | 12 +++++-----
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h          | 12 +++++-----
 drivers/infiniband/hw/cxgb4/qp.c                | 27 ++++++++++++----------
 drivers/infiniband/hw/hns/hns_roce_device.h     |  8 +++----
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c      | 13 ++++++-----
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c      | 12 +++++-----
 drivers/infiniband/hw/i40iw/i40iw_verbs.c       |  9 ++++----
 drivers/infiniband/hw/mlx4/mad.c                |  7 +++---
 drivers/infiniband/hw/mlx4/mlx4_ib.h            | 12 +++++-----
 drivers/infiniband/hw/mlx4/qp.c                 | 21 ++++++++---------
 drivers/infiniband/hw/mlx4/srq.c                |  4 ++--
 drivers/infiniband/hw/mlx5/gsi.c                |  8 +++----
 drivers/infiniband/hw/mlx5/mlx5_ib.h            | 20 ++++++++---------
 drivers/infiniband/hw/mlx5/mr.c                 |  2 +-
 drivers/infiniband/hw/mlx5/qp.c                 | 21 ++++++++---------
 drivers/infiniband/hw/mlx5/srq.c                |  4 ++--
 drivers/infiniband/hw/mthca/mthca_dev.h         | 24 ++++++++++----------
 drivers/infiniband/hw/mthca/mthca_qp.c          | 16 ++++++-------
 drivers/infiniband/hw/mthca/mthca_srq.c         |  8 +++----
 drivers/infiniband/hw/nes/nes_verbs.c           |  8 +++----
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c     | 16 ++++++-------
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.h     | 12 +++++-----
 drivers/infiniband/hw/qedr/qedr_roce_cm.c       |  8 +++----
 drivers/infiniband/hw/qedr/qedr_roce_cm.h       |  8 +++----
 drivers/infiniband/hw/qedr/verbs.c              | 26 ++++++++++-----------
 drivers/infiniband/hw/qedr/verbs.h              |  8 +++----
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c    |  8 +++----
 drivers/infiniband/hw/usnic/usnic_ib_verbs.h    |  8 +++----
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c    |  8 +++----
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c   |  4 ++--
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h | 12 +++++-----
 drivers/infiniband/sw/rdmavt/qp.c               | 12 +++++-----
 drivers/infiniband/sw/rdmavt/qp.h               | 12 +++++-----
 drivers/infiniband/sw/rxe/rxe_verbs.c           | 18 +++++++--------
 include/rdma/ib_verbs.h                         | 30 ++++++++++++-------------
 net/rds/ib_send.c                               |  6 ++---
 net/sunrpc/xprtrdma/frwr_ops.c                  |  3 ++-
 net/sunrpc/xprtrdma/svc_rdma_rw.c               |  3 ++-
 net/sunrpc/xprtrdma/verbs.c                     |  3 ++-
 43 files changed, 257 insertions(+), 241 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 38d7de3f9b2f..d4c3bc042343 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2155,7 +2155,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 	struct ib_uverbs_post_send      cmd;
 	struct ib_uverbs_post_send_resp resp;
 	struct ib_uverbs_send_wr       *user_wr;
-	struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
+	struct ib_send_wr              *wr = NULL, *last, *next;
+	const struct ib_send_wr	       *bad_wr;
 	struct ib_qp                   *qp;
 	int                             i, sg_ind;
 	int				is_ud;
@@ -2434,7 +2435,8 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_post_recv      cmd;
 	struct ib_uverbs_post_recv_resp resp;
-	struct ib_recv_wr              *wr, *next, *bad_wr;
+	struct ib_recv_wr              *wr, *next;
+	const struct ib_recv_wr	       *bad_wr;
 	struct ib_qp                   *qp;
 	ssize_t                         ret = -EINVAL;
 
@@ -2483,7 +2485,8 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 {
 	struct ib_uverbs_post_srq_recv      cmd;
 	struct ib_uverbs_post_srq_recv_resp resp;
-	struct ib_recv_wr                  *wr, *next, *bad_wr;
+	struct ib_recv_wr                  *wr, *next;
+	const struct ib_recv_wr		   *bad_wr;
 	struct ib_srq                      *srq;
 	ssize_t                             ret = -EINVAL;
 
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index a0082e0bb8e2..5d955b293c6d 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -1519,8 +1519,8 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
 	return 0;
 }
 
-int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr)
 {
 	struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
 					       ib_srq);
@@ -2048,7 +2048,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
  * and the MAD datagram out to the provided SGE.
  */
 static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
-					    struct ib_recv_wr *wr,
+					    const struct ib_recv_wr *wr,
 					    struct bnxt_qplib_swqe *wqe,
 					    int payload_size)
 {
@@ -2361,8 +2361,8 @@ bad:
 	return rc;
 }
 
-int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr)
+int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr)
 {
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_qplib_swqe wqe;
@@ -2461,7 +2461,7 @@ bad:
 
 static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
 				       struct bnxt_re_qp *qp,
-				       struct ib_recv_wr *wr)
+				       const struct ib_recv_wr *wr)
 {
 	struct bnxt_qplib_swqe wqe;
 	int rc = 0;
@@ -2494,8 +2494,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
 	return rc;
 }
 
-int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_qplib_swqe wqe;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index bd04d40d897a..aa33e7b82c84 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -181,8 +181,8 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
 		       struct ib_udata *udata);
 int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int bnxt_re_destroy_srq(struct ib_srq *srq);
-int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr,
-			  struct ib_recv_wr **bad_recv_wr);
+int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
+			  const struct ib_recv_wr **bad_recv_wr);
 struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *qp_init_attr,
 				struct ib_udata *udata);
@@ -191,10 +191,10 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 		     int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
 int bnxt_re_destroy_qp(struct ib_qp *qp);
-int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
-		      struct ib_send_wr **bad_send_wr);
-int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-		      struct ib_recv_wr **bad_recv_wr);
+int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+		      const struct ib_send_wr **bad_send_wr);
+int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+		      const struct ib_recv_wr **bad_recv_wr);
 struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 				const struct ib_cq_init_attr *attr,
 				struct ib_ucontext *context,
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 2e38ddefea8a..8adbe9658935 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -326,10 +326,10 @@ enum iwch_qp_query_flags {
 };
 
 u16 iwch_rqes_posted(struct iwch_qp *qhp);
-int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr);
-int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr);
+int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
 int iwch_post_zb_read(struct iwch_ep *ep);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 29ab6910a004..c649faad63f9 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -246,7 +246,7 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
 }
 
 static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
-				struct ib_recv_wr *wr)
+			   const struct ib_recv_wr *wr)
 {
 	int i, err = 0;
 	u32 pbl_addr[T3_MAX_SGE];
@@ -286,7 +286,7 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
 }
 
 static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
-				struct ib_recv_wr *wr)
+				const struct ib_recv_wr *wr)
 {
 	int i;
 	u32 pbl_addr;
@@ -348,8 +348,8 @@ static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
 	return 0;
 }
 
-int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr)
+int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr)
 {
 	int err = 0;
 	u8 uninitialized_var(t3_wr_flit_cnt);
@@ -463,8 +463,8 @@ out:
 	return err;
 }
 
-int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	int err = 0;
 	struct iwch_qp *qhp;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 047106cb0393..3cf93463021a 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -1033,10 +1033,10 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
 			    struct c4iw_dev_ucontext *uctx);
 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr);
-int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr);
+int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
 int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -1119,8 +1119,8 @@ void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
 void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq);
 void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16);
 void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx);
-int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-		       struct ib_recv_wr **bad_wr);
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr);
 struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
 
 typedef int c4iw_restrack_func(struct sk_buff *msg,
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index dbd697b113ec..62e2c0d899f5 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -620,7 +620,7 @@ static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
 }
 
 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
-			   struct ib_recv_wr *wr, u8 *len16)
+			   const struct ib_recv_wr *wr, u8 *len16)
 {
 	int ret;
 
@@ -634,7 +634,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
 	return 0;
 }
 
-static int build_srq_recv(union t4_recv_wr *wqe, struct ib_recv_wr *wr,
+static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr,
 			  u8 *len16)
 {
 	int ret;
@@ -903,8 +903,9 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp,
 	return 0;
 }
 
-static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
-				struct ib_send_wr **bad_wr)
+static int complete_sq_drain_wrs(struct c4iw_qp *qhp,
+				 const struct ib_send_wr *wr,
+				 const struct ib_send_wr **bad_wr)
 {
 	int ret = 0;
 
@@ -919,7 +920,8 @@ static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
 	return ret;
 }
 
-static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
+static void complete_rq_drain_wr(struct c4iw_qp *qhp,
+				 const struct ib_recv_wr *wr)
 {
 	struct t4_cqe cqe = {};
 	struct c4iw_cq *rchp;
@@ -951,7 +953,8 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
 	}
 }
 
-static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
+static void complete_rq_drain_wrs(struct c4iw_qp *qhp,
+				  const struct ib_recv_wr *wr)
 {
 	while (wr) {
 		complete_rq_drain_wr(qhp, wr);
@@ -959,8 +962,8 @@ static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
 	}
 }
 
-int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		   struct ib_send_wr **bad_wr)
+int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr)
 {
 	int err = 0;
 	u8 len16 = 0;
@@ -1110,8 +1113,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return err;
 }
 
-int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	int err = 0;
 	struct c4iw_qp *qhp;
@@ -1206,8 +1209,8 @@ static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
 	t4_srq_produce_pending_wr(srq);
 }
 
-int c4iw_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-		       struct ib_recv_wr **bad_wr)
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr)
 {
 	union t4_recv_wr *wqe, lwqe;
 	struct c4iw_srq *srq;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index ceb490c732c0..1c252753fb12 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -763,10 +763,10 @@ struct hns_roce_hw {
 			 int attr_mask, enum ib_qp_state cur_state,
 			 enum ib_qp_state new_state);
 	int (*destroy_qp)(struct ib_qp *ibqp);
-	int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			 struct ib_send_wr **bad_wr);
-	int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-			 struct ib_recv_wr **bad_recv_wr);
+	int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			 const struct ib_send_wr **bad_wr);
+	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+			 const struct ib_recv_wr **bad_recv_wr);
 	int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 	int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 	int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index ae6b642ec073..8e11c6b62009 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -58,8 +58,9 @@ static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr,
 	rseg->len   = 0;
 }
 
-static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-				 struct ib_send_wr **bad_wr)
+static int hns_roce_v1_post_send(struct ib_qp *ibqp,
+				 const struct ib_send_wr *wr,
+				 const struct ib_send_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
@@ -342,8 +343,9 @@ out:
 	return ret;
 }
 
-static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-				 struct ib_recv_wr **bad_wr)
+static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
+				 const struct ib_recv_wr *wr,
+				 const struct ib_recv_wr **bad_wr)
 {
 	int ret = 0;
 	int nreq = 0;
@@ -993,7 +995,8 @@ static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
 	struct device *dev = &hr_dev->pdev->dev;
-	struct ib_send_wr send_wr, *bad_wr;
+	struct ib_send_wr send_wr;
+	const struct ib_send_wr *bad_wr;
 	int ret;
 
 	memset(&send_wr, 0, sizeof(send_wr));
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a9bc6b279175..25e0407d3f31 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -103,7 +103,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			     struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 			     void *wqe, unsigned int *sge_ind,
-			     struct ib_send_wr **bad_wr)
+			     const struct ib_send_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_v2_wqe_data_seg *dseg = wqe;
@@ -164,8 +164,9 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	return 0;
 }
 
-static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-				 struct ib_send_wr **bad_wr)
+static int hns_roce_v2_post_send(struct ib_qp *ibqp,
+				 const struct ib_send_wr *wr,
+				 const struct ib_send_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
@@ -530,8 +531,9 @@ out:
 	return ret;
 }
 
-static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-				 struct ib_recv_wr **bad_wr)
+static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
+				 const struct ib_recv_wr *wr,
+				 const struct ib_recv_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 7d85414742ff..e780454256df 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2201,8 +2201,8 @@ static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, in
  * @bad_wr: return of bad wr if err
  */
 static int i40iw_post_send(struct ib_qp *ibqp,
-			   struct ib_send_wr *ib_wr,
-			   struct ib_send_wr **bad_wr)
+			   const struct ib_send_wr *ib_wr,
+			   const struct ib_send_wr **bad_wr)
 {
 	struct i40iw_qp *iwqp;
 	struct i40iw_qp_uk *ukqp;
@@ -2377,9 +2377,8 @@ out:
  * @ib_wr: work request for receive
  * @bad_wr: bad wr caused an error
  */
-static int i40iw_post_recv(struct ib_qp *ibqp,
-			   struct ib_recv_wr *ib_wr,
-			   struct ib_recv_wr **bad_wr)
+static int i40iw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
+			   const struct ib_recv_wr **bad_wr)
 {
 	struct i40iw_qp *iwqp;
 	struct i40iw_qp_uk *ukqp;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 8d730a69793d..e5466d786bb1 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -506,7 +506,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 {
 	struct ib_sge list;
 	struct ib_ud_wr wr;
-	struct ib_send_wr *bad_wr;
+	const struct ib_send_wr *bad_wr;
 	struct mlx4_ib_demux_pv_ctx *tun_ctx;
 	struct mlx4_ib_demux_pv_qp *tun_qp;
 	struct mlx4_rcv_tunnel_mad *tun_mad;
@@ -1310,7 +1310,8 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
 				  int index)
 {
 	struct ib_sge sg_list;
-	struct ib_recv_wr recv_wr, *bad_recv_wr;
+	struct ib_recv_wr recv_wr;
+	const struct ib_recv_wr *bad_recv_wr;
 	int size;
 
 	size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
@@ -1361,7 +1362,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 {
 	struct ib_sge list;
 	struct ib_ud_wr wr;
-	struct ib_send_wr *bad_wr;
+	const struct ib_send_wr *bad_wr;
 	struct mlx4_ib_demux_pv_ctx *sqp_ctx;
 	struct mlx4_ib_demux_pv_qp *sqp;
 	struct mlx4_mad_snd_buf *sqp_mad;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1a0fad30633b..e817a2f55546 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -775,8 +775,8 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mlx4_ib_destroy_srq(struct ib_srq *srq);
 void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
-int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr);
 
 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
@@ -788,10 +788,10 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		      int attr_mask, struct ib_udata *udata);
 int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 		     struct ib_qp_init_attr *qp_init_attr);
-int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr);
-int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr);
+int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 
 int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
 		 int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 44fc684b5e3a..b431757d4668 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -3569,8 +3569,8 @@ static void add_zero_len_inline(void *wqe)
 	inl->byte_count = cpu_to_be32(1 << 31);
 }
 
-static int _mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			      struct ib_send_wr **bad_wr, bool drain)
+static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			      const struct ib_send_wr **bad_wr, bool drain)
 {
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
 	void *wqe;
@@ -3901,14 +3901,14 @@ out:
 	return err;
 }
 
-int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr)
+int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr)
 {
 	return _mlx4_ib_post_send(ibqp, wr, bad_wr, false);
 }
 
-static int _mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr, bool drain)
+static int _mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr, bool drain)
 {
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
 	struct mlx4_wqe_data_seg *scat;
@@ -3995,8 +3995,8 @@ out:
 	return err;
 }
 
-int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false);
 }
@@ -4536,7 +4536,7 @@ void mlx4_ib_drain_sq(struct ib_qp *qp)
 	struct ib_cq *cq = qp->send_cq;
 	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
 	struct mlx4_ib_drain_cqe sdrain;
-	struct ib_send_wr *bad_swr;
+	const struct ib_send_wr *bad_swr;
 	struct ib_rdma_wr swr = {
 		.wr = {
 			.next = NULL,
@@ -4571,7 +4571,8 @@ void mlx4_ib_drain_rq(struct ib_qp *qp)
 	struct ib_cq *cq = qp->recv_cq;
 	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
 	struct mlx4_ib_drain_cqe rdrain;
-	struct ib_recv_wr rwr = {}, *bad_rwr;
+	struct ib_recv_wr rwr = {};
+	const struct ib_recv_wr *bad_rwr;
 	int ret;
 	struct mlx4_ib_dev *dev = to_mdev(qp->device);
 	struct mlx4_dev *mdev = dev->dev;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index ebee56cbc0e2..3731b31c3653 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -307,8 +307,8 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
 	spin_unlock(&srq->lock);
 }
 
-int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr)
 {
 	struct mlx4_ib_srq *srq = to_msrq(ibsrq);
 	struct mlx4_wqe_srq_next_seg *next;
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 79e6309460dc..4950df3f71b6 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -477,8 +477,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
 	return gsi->tx_qps[qp_index];
 }
 
-int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr)
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr)
 {
 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
 	struct ib_qp *tx_qp;
@@ -522,8 +522,8 @@ err:
 	return ret;
 }
 
-int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr)
 {
 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 8d9eaa31fab2..b75754efc663 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1013,8 +1013,8 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
 int mlx5_ib_destroy_srq(struct ib_srq *srq);
-int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr);
 struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata);
@@ -1025,10 +1025,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
 int mlx5_ib_destroy_qp(struct ib_qp *qp);
 void mlx5_ib_drain_sq(struct ib_qp *qp);
 void mlx5_ib_drain_rq(struct ib_qp *qp);
-int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr);
-int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr);
+int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
 			  void *buffer, u32 length,
@@ -1209,10 +1209,10 @@ int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 			 int qp_attr_mask,
 			 struct ib_qp_init_attr *qp_init_attr);
-int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr);
-int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr);
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr);
 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
 
 int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 308456d28afb..9fb1d9cb9401 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -898,7 +898,7 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 				  struct mlx5_umr_wr *umrwr)
 {
 	struct umr_common *umrc = &dev->umrc;
-	struct ib_send_wr *bad;
+	const struct ib_send_wr *bad;
 	int err;
 	struct mlx5_ib_umr_context umr_context;
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 535f6ad038b2..6efd770797d1 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -4348,8 +4348,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
 	qp->sq.w_list[idx].next = qp->sq.cur_post;
 }
 
-static int _mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr, bool drain)
+static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			      const struct ib_send_wr **bad_wr, bool drain)
 {
 	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4675,8 +4675,8 @@ out:
 	return err;
 }
 
-int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr)
+int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr)
 {
 	return _mlx5_ib_post_send(ibqp, wr, bad_wr, false);
 }
@@ -4686,8 +4686,8 @@ static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
 	sig->signature = calc_sig(sig, size);
 }
 
-static int _mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr, bool drain)
+static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr, bool drain)
 {
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	struct mlx5_wqe_data_seg *scat;
@@ -4767,8 +4767,8 @@ out:
 	return err;
 }
 
-int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
 }
@@ -5764,7 +5764,7 @@ void mlx5_ib_drain_sq(struct ib_qp *qp)
 	struct ib_cq *cq = qp->send_cq;
 	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
 	struct mlx5_ib_drain_cqe sdrain;
-	struct ib_send_wr *bad_swr;
+	const struct ib_send_wr *bad_swr;
 	struct ib_rdma_wr swr = {
 		.wr = {
 			.next = NULL,
@@ -5799,7 +5799,8 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
 	struct ib_cq *cq = qp->recv_cq;
 	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
 	struct mlx5_ib_drain_cqe rdrain;
-	struct ib_recv_wr rwr = {}, *bad_rwr;
+	struct ib_recv_wr rwr = {};
+	const struct ib_recv_wr *bad_rwr;
 	int ret;
 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
 	struct mlx5_core_dev *mdev = dev->mdev;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 0af7b7905550..f9dc2b79a51f 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -440,8 +440,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
 	spin_unlock(&srq->lock);
 }
 
-int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr)
 {
 	struct mlx5_ib_srq *srq = to_msrq(ibsrq);
 	struct mlx5_wqe_srq_next_seg *next;
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 5508afbf1c67..220a3e4717a3 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -519,10 +519,10 @@ int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type);
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
-int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr);
-int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr);
+int mthca_tavor_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr);
 
 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
 		    enum ib_event_type event_type);
@@ -530,14 +530,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
 		   struct ib_qp_init_attr *qp_init_attr);
 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 		    struct ib_udata *udata);
-int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr);
-int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr);
-int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr);
-int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr);
+int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr);
+int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr);
+int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr);
 void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
 			int index, int *dbd, __be32 *new_wqe);
 int mthca_alloc_qp(struct mthca_dev *dev,
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 0e390f410b39..3d37f2373d63 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1611,8 +1611,8 @@ static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
 	useg->qkey = cpu_to_be32(wr->remote_qkey);
 }
 
-int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr)
+int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -1814,8 +1814,8 @@ out:
 	return err;
 }
 
-int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr)
+int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -1925,8 +1925,8 @@ out:
 	return err;
 }
 
-int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr)
+int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -2165,8 +2165,8 @@ out:
 	return err;
 }
 
-int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr)
+int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index f79732bc73b4..9a3fc6fb0d7e 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -472,8 +472,8 @@ void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
 	spin_unlock(&srq->lock);
 }
 
-int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr)
+int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
@@ -572,8 +572,8 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 	return err;
 }
 
-int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr)
+int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 8b1f114062e5..3bd3c61af55b 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3067,8 +3067,8 @@ fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr,
 /**
  * nes_post_send
  */
-static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-		struct ib_send_wr **bad_wr)
+static int nes_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr,
+			 const struct ib_send_wr **bad_wr)
 {
 	u64 u64temp;
 	unsigned long flags = 0;
@@ -3329,8 +3329,8 @@ out:
 /**
  * nes_post_recv
  */
-static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-		struct ib_recv_wr **bad_wr)
+static int nes_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
+			 const struct ib_recv_wr **bad_wr)
 {
 	u64 u64temp;
 	unsigned long flags = 0;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 5da1d0b88d1a..c158ca9fde6d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -2166,8 +2166,8 @@ static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
 	iowrite32(val, qp->sq_db);
 }
 
-int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
+int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		     const struct ib_send_wr **bad_wr)
 {
 	int status = 0;
 	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
@@ -2278,8 +2278,8 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
 	iowrite32(val, qp->rq_db);
 }
 
-static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
-			     u16 tag)
+static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
+			     const struct ib_recv_wr *wr, u16 tag)
 {
 	u32 wqe_size = 0;
 	struct ocrdma_sge *sge;
@@ -2299,8 +2299,8 @@ static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
 	ocrdma_cpu_to_le32(rqe, wqe_size);
 }
 
-int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		     struct ib_recv_wr **bad_wr)
+int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		     const struct ib_recv_wr **bad_wr)
 {
 	int status = 0;
 	unsigned long flags;
@@ -2369,8 +2369,8 @@ static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
 	iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
 }
 
-int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
+int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			 const struct ib_recv_wr **bad_wr)
 {
 	int status = 0;
 	unsigned long flags;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 9a9971708646..b69cfdce7970 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -43,10 +43,10 @@
 #ifndef __OCRDMA_VERBS_H__
 #define __OCRDMA_VERBS_H__
 
-int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *,
-		     struct ib_send_wr **bad_wr);
-int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *,
-		     struct ib_recv_wr **bad_wr);
+int ocrdma_post_send(struct ib_qp *, const struct ib_send_wr *,
+		     const struct ib_send_wr **bad_wr);
+int ocrdma_post_recv(struct ib_qp *, const struct ib_recv_wr *,
+		     const struct ib_recv_wr **bad_wr);
 
 int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
 int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
@@ -100,8 +100,8 @@ int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
 		      enum ib_srq_attr_mask, struct ib_udata *);
 int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
 int ocrdma_destroy_srq(struct ib_srq *);
-int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
-			 struct ib_recv_wr **bad_recv_wr);
+int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *,
+			 const struct ib_recv_wr **bad_recv_wr);
 
 int ocrdma_dereg_mr(struct ib_mr *);
 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index b5d49740bf8a..85578887421b 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -537,8 +537,8 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
 	return 0;
 }
 
-int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		       struct ib_send_wr **bad_wr)
+int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		       const struct ib_send_wr **bad_wr)
 {
 	struct qed_roce_ll2_packet *pkt = NULL;
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -607,8 +607,8 @@ err:
 	return rc;
 }
 
-int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		       struct ib_recv_wr **bad_wr)
+int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
index a55916323ea9..d46dcd3f6424 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.h
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
@@ -46,10 +46,10 @@ static inline u32 qedr_get_ipv4_from_gid(const u8 *gid)
 
 /* RDMA CM */
 int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		       struct ib_recv_wr **bad_wr);
-int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		       struct ib_send_wr **bad_wr);
+int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr);
+int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		       const struct ib_send_wr **bad_wr);
 struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
 				 struct ib_qp_init_attr *attrs,
 				 struct qedr_qp *qp);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 34d8b5580138..4aaeb24cebfc 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2696,9 +2696,9 @@ static void swap_wqe_data64(u64 *p)
 
 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
 				       struct qedr_qp *qp, u8 *wqe_size,
-				       struct ib_send_wr *wr,
-				       struct ib_send_wr **bad_wr, u8 *bits,
-				       u8 bit)
+				       const struct ib_send_wr *wr,
+				       const struct ib_send_wr **bad_wr,
+				       u8 *bits, u8 bit)
 {
 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
 	char *seg_prt, *wqe;
@@ -2805,8 +2805,8 @@ static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
 				     struct qedr_qp *qp,
 				     struct rdma_sq_rdma_wqe_1st *rwqe,
 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
-				     struct ib_send_wr *wr,
-				     struct ib_send_wr **bad_wr)
+				     const struct ib_send_wr *wr,
+				     const struct ib_send_wr **bad_wr)
 {
 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
@@ -2828,8 +2828,8 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
 				     struct qedr_qp *qp,
 				     struct rdma_sq_send_wqe_1st *swqe,
 				     struct rdma_sq_send_wqe_2st *swqe2,
-				     struct ib_send_wr *wr,
-				     struct ib_send_wr **bad_wr)
+				     const struct ib_send_wr *wr,
+				     const struct ib_send_wr **bad_wr)
 {
 	memset(swqe2, 0, sizeof(*swqe2));
 	if (wr->send_flags & IB_SEND_INLINE) {
@@ -2945,8 +2945,8 @@ static inline bool qedr_can_post_send(struct qedr_qp *qp,
 	return true;
 }
 
-static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
+static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			    const struct ib_send_wr **bad_wr)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -3160,8 +3160,8 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return rc;
 }
 
-int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		   struct ib_send_wr **bad_wr)
+int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -3226,8 +3226,8 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return rc;
 }
 
-int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		   struct ib_recv_wr **bad_wr)
+int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		   const struct ib_recv_wr **bad_wr)
 {
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
 	struct qedr_dev *dev = qp->dev;
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 2c57e4c592a6..087baf009864 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -82,10 +82,10 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
 			    u32 max_num_sg);
 int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
-int qedr_post_send(struct ib_qp *, struct ib_send_wr *,
-		   struct ib_send_wr **bad_wr);
-int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *,
-		   struct ib_recv_wr **bad_wr);
+int qedr_post_send(struct ib_qp *, const struct ib_send_wr *,
+		   const struct ib_send_wr **bad_wr);
+int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *,
+		   const struct ib_recv_wr **bad_wr);
 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
 		     u8 port_num, const struct ib_wc *in_wc,
 		     const struct ib_grh *in_grh,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 9524524fade4..9973ac893635 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -771,15 +771,15 @@ int usnic_ib_destroy_ah(struct ib_ah *ah)
 	return -EINVAL;
 }
 
-int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-				struct ib_send_wr **bad_wr)
+int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		       const struct ib_send_wr **bad_wr)
 {
 	usnic_dbg("\n");
 	return -EINVAL;
 }
 
-int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-				struct ib_recv_wr **bad_wr)
+int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr)
 {
 	usnic_dbg("\n");
 	return -EINVAL;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 1fda94425116..2a2c9beb715f 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -80,10 +80,10 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
 				 struct ib_udata *udata);
 
 int usnic_ib_destroy_ah(struct ib_ah *ah);
-int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			struct ib_send_wr **bad_wr);
-int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			struct ib_recv_wr **bad_wr);
+int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			const struct ib_send_wr **bad_wr);
+int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr);
 int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
 			struct ib_wc *wc);
 int usnic_ib_req_notify_cq(struct ib_cq *cq,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 1864621ef942..60083c0363a5 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -624,8 +624,8 @@ static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
  *
  * @return: 0 on success, otherwise errno returned.
  */
-int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
+int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		     const struct ib_send_wr **bad_wr)
 {
 	struct pvrdma_qp *qp = to_vqp(ibqp);
 	struct pvrdma_dev *dev = to_vdev(ibqp->device);
@@ -828,8 +828,8 @@ out:
  *
  * @return: 0 on success, otherwise errno returned.
  */
-int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		     struct ib_recv_wr **bad_wr)
+int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		     const struct ib_recv_wr **bad_wr)
 {
 	struct pvrdma_dev *dev = to_vdev(ibqp->device);
 	unsigned long flags;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index af235967a9c2..a0a82731ea24 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -52,8 +52,8 @@
 
 #include "pvrdma.h"
 
-int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
+int pvrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			 const struct ib_recv_wr **bad_wr)
 {
 	/* No support for kernel clients. */
 	return -EOPNOTSUPP;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index b7b25728a7e5..6ebf3360ea12 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -435,8 +435,8 @@ int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		      enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int pvrdma_destroy_srq(struct ib_srq *srq);
-int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr);
+int pvrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			 const struct ib_recv_wr **bad_wr);
 
 struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 			       struct ib_qp_init_attr *init_attr,
@@ -446,9 +446,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 		    int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
 int pvrdma_destroy_qp(struct ib_qp *qp);
-int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr);
-int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		     struct ib_recv_wr **bad_wr);
+int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		     const struct ib_send_wr **bad_wr);
+int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		     const struct ib_recv_wr **bad_wr);
 
 #endif /* __PVRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 858c992906c1..5ce403c6cddb 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1538,8 +1538,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  *
  * Return: 0 on success otherwise errno
  */
-int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		  struct ib_recv_wr **bad_wr)
+int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		  const struct ib_recv_wr **bad_wr)
 {
 	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
 	struct rvt_rwq *wq = qp->r_rq.wq;
@@ -1891,8 +1891,8 @@ bail_inval_free:
  *
  * Return: 0 on success else errno
  */
-int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		  struct ib_send_wr **bad_wr)
+int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		  const struct ib_send_wr **bad_wr)
 {
 	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
 	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
@@ -1948,8 +1948,8 @@ bail:
  *
  * Return: 0 on success else errno
  */
-int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
 	struct rvt_rwq *wq;
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 8409f80d5f25..264811fdc530 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -60,10 +60,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 int rvt_destroy_qp(struct ib_qp *ibqp);
 int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		 int attr_mask, struct ib_qp_init_attr *init_attr);
-int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		  struct ib_recv_wr **bad_wr);
-int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		  struct ib_send_wr **bad_wr);
-int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		  const struct ib_recv_wr **bad_wr);
+int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		  const struct ib_send_wr **bad_wr);
+int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 #endif          /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 0ea394554c8e..f5b1e0ad6142 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -287,7 +287,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah)
 	return 0;
 }
 
-static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr)
+static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 {
 	int err;
 	int i;
@@ -438,8 +438,8 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq)
 	return 0;
 }
 
-static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr)
+static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr)
 {
 	int err = 0;
 	unsigned long flags;
@@ -726,8 +726,8 @@ err1:
 	return err;
 }
 
-static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
-				struct ib_send_wr **bad_wr)
+static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
+				const struct ib_send_wr **bad_wr)
 {
 	int err = 0;
 	unsigned int mask;
@@ -769,8 +769,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
 	return err;
 }
 
-static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			 struct ib_send_wr **bad_wr)
+static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			 const struct ib_send_wr **bad_wr)
 {
 	struct rxe_qp *qp = to_rqp(ibqp);
 
@@ -792,8 +792,8 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		return rxe_post_send_kernel(qp, wr, bad_wr);
 }
 
-static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
+static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			 const struct ib_recv_wr **bad_wr)
 {
 	int err = 0;
 	struct rxe_qp *qp = to_rqp(ibqp);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index cf38d47fa8f8..1de8f0d2797c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2373,8 +2373,8 @@ struct ib_device {
 						struct ib_srq_attr *srq_attr);
 	int                        (*destroy_srq)(struct ib_srq *srq);
 	int                        (*post_srq_recv)(struct ib_srq *srq,
-						    struct ib_recv_wr *recv_wr,
-						    struct ib_recv_wr **bad_recv_wr);
+						    const struct ib_recv_wr *recv_wr,
+						    const struct ib_recv_wr **bad_recv_wr);
 	struct ib_qp *             (*create_qp)(struct ib_pd *pd,
 						struct ib_qp_init_attr *qp_init_attr,
 						struct ib_udata *udata);
@@ -2388,11 +2388,11 @@ struct ib_device {
 					       struct ib_qp_init_attr *qp_init_attr);
 	int                        (*destroy_qp)(struct ib_qp *qp);
 	int                        (*post_send)(struct ib_qp *qp,
-						struct ib_send_wr *send_wr,
-						struct ib_send_wr **bad_send_wr);
+						const struct ib_send_wr *send_wr,
+						const struct ib_send_wr **bad_send_wr);
 	int                        (*post_recv)(struct ib_qp *qp,
-						struct ib_recv_wr *recv_wr,
-						struct ib_recv_wr **bad_recv_wr);
+						const struct ib_recv_wr *recv_wr,
+						const struct ib_recv_wr **bad_recv_wr);
 	struct ib_cq *             (*create_cq)(struct ib_device *device,
 						const struct ib_cq_init_attr *attr,
 						struct ib_ucontext *context,
@@ -3281,10 +3281,10 @@ int ib_destroy_srq(struct ib_srq *srq);
  *   the work request that failed to be posted on the QP.
  */
 static inline int ib_post_srq_recv(struct ib_srq *srq,
-				   struct ib_recv_wr *recv_wr,
-				   struct ib_recv_wr **bad_recv_wr)
+				   const struct ib_recv_wr *recv_wr,
+				   const struct ib_recv_wr **bad_recv_wr)
 {
-	struct ib_recv_wr *dummy;
+	const struct ib_recv_wr *dummy;
 
 	return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
 }
@@ -3384,10 +3384,10 @@ int ib_close_qp(struct ib_qp *qp);
  * earlier work requests in the list.
  */
 static inline int ib_post_send(struct ib_qp *qp,
-			       struct ib_send_wr *send_wr,
-			       struct ib_send_wr **bad_send_wr)
+			       const struct ib_send_wr *send_wr,
+			       const struct ib_send_wr **bad_send_wr)
 {
-	struct ib_send_wr *dummy;
+	const struct ib_send_wr *dummy;
 
 	return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
 }
@@ -3401,10 +3401,10 @@ static inline int ib_post_send(struct ib_qp *qp,
  *   the work request that failed to be posted on the QP.
  */
 static inline int ib_post_recv(struct ib_qp *qp,
-			       struct ib_recv_wr *recv_wr,
-			       struct ib_recv_wr **bad_recv_wr)
+			       const struct ib_recv_wr *recv_wr,
+			       const struct ib_recv_wr **bad_recv_wr)
 {
-	struct ib_recv_wr *dummy;
+	const struct ib_recv_wr *dummy;
 
 	return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
 }
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 8557a1cae041..8ac80c1b051e 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -492,7 +492,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 	struct rds_ib_send_work *send = NULL;
 	struct rds_ib_send_work *first;
 	struct rds_ib_send_work *prev;
-	struct ib_send_wr *failed_wr;
+	const struct ib_send_wr *failed_wr;
 	struct scatterlist *scat;
 	u32 pos;
 	u32 i;
@@ -758,7 +758,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 {
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	struct rds_ib_send_work *send = NULL;
-	struct ib_send_wr *failed_wr;
+	const struct ib_send_wr *failed_wr;
 	struct rds_ib_device *rds_ibdev;
 	u32 pos;
 	u32 work_alloc;
@@ -849,7 +849,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 	struct rds_ib_send_work *send = NULL;
 	struct rds_ib_send_work *first;
 	struct rds_ib_send_work *prev;
-	struct ib_send_wr *failed_wr;
+	const struct ib_send_wr *failed_wr;
 	struct scatterlist *scat;
 	unsigned long len;
 	u64 remote_addr = op->op_remote_addr;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index a167eebf63d5..1bb00dd6ccdb 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -517,7 +517,8 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
 static void
 frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
 {
-	struct ib_send_wr *first, **prev, *last, *bad_wr;
+	struct ib_send_wr *first, **prev, *last;
+	const struct ib_send_wr *bad_wr;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_frwr *frwr;
 	struct rpcrdma_mr *mr;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index ce3ea8419704..04cb3363172a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -307,7 +307,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
 {
 	struct svcxprt_rdma *rdma = cc->cc_rdma;
 	struct svc_xprt *xprt = &rdma->sc_xprt;
-	struct ib_send_wr *first_wr, *bad_wr;
+	struct ib_send_wr *first_wr;
+	const struct ib_send_wr *bad_wr;
 	struct list_head *tmp;
 	struct ib_cqe *cqe;
 	int ret;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 112a15abc4a4..5efeba08918b 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1559,7 +1559,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
 	if (!count)
 		return;
 
-	rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr);
+	rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
+			  (const struct ib_recv_wr **)&bad_wr);
 	if (rc) {
 		for (wr = bad_wr; wr; wr = wr->next) {
 			struct rpcrdma_rep *rep;
-- 
cgit v1.2.3


From bccd06223f21654eb268e153426a77deb117c1e8 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 26 Jul 2018 16:37:14 -0600
Subject: IB/uverbs: Add UVERBS_ATTR_FLAGS_IN to the specs language

This clearly indicates that the input is a bitwise combination of values
in an enum, and identifies which enum contains the definition of the bits.

Special accessors are provided that handle the mandatory validation of the
allowed bits and enforce the correct type for bitwise flags.

If we had introduced this at the start then the kabi would have uniformly
used u64 data to pass flags, however today there is a mixture of u64 and
u32 flags. All places are converted to accept both sizes and the accessor
fixes it. This allows all existing flags to grow to u64 in future without
any hassle.

Finally all flags are, by definition, optional. If flags are not passed
the accessor does not fail, but provides a value of zero.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c             | 51 ++++++++++++++++++++++
 .../infiniband/core/uverbs_std_types_counters.c    | 10 ++---
 drivers/infiniband/core/uverbs_std_types_cq.c      | 13 +++---
 drivers/infiniband/core/uverbs_std_types_mr.c      | 10 ++---
 drivers/infiniband/hw/mlx5/devx.c                  | 16 ++++---
 drivers/infiniband/hw/mlx5/main.c                  | 16 +++----
 include/rdma/uverbs_ioctl.h                        | 33 ++++++++++++++
 7 files changed, 119 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index db7a92ea5dbe..23a1777f26e2 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -486,3 +486,54 @@ out:
 
 	return err;
 }
+
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits)
+{
+	const struct uverbs_attr *attr;
+	u64 flags;
+
+	attr = uverbs_attr_get(attrs_bundle, idx);
+	/* Missing attribute means 0 flags */
+	if (IS_ERR(attr)) {
+		*to = 0;
+		return 0;
+	}
+
+	/*
+	 * New userspace code should use 8 bytes to pass flags, but we
+	 * transparently support old userspaces that were using 4 bytes as
+	 * well.
+	 */
+	if (attr->ptr_attr.len == 8)
+		flags = attr->ptr_attr.data;
+	else if (attr->ptr_attr.len == 4)
+		memcpy(&flags, &attr->ptr_attr.data, 4);
+	else
+		return -EINVAL;
+
+	if (flags & ~allowed_bits)
+		return -EINVAL;
+
+	*to = flags;
+	return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags64);
+
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits)
+{
+	u64 flags;
+	int ret;
+
+	ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits);
+	if (ret)
+		return ret;
+
+	if (flags > U32_MAX)
+		return -EINVAL;
+	*to = flags;
+
+	return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags32);
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index dfe59ad721f6..34589799f446 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -97,8 +97,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
 	if (!atomic_read(&counters->usecnt))
 		return -EINVAL;
 
-	ret = uverbs_copy_from(&read_attr.flags, attrs,
-			       UVERBS_ATTR_READ_COUNTERS_FLAGS);
+	ret = uverbs_get_flags32(&read_attr.flags, attrs,
+				 UVERBS_ATTR_READ_COUNTERS_FLAGS,
+				 IB_UVERBS_READ_COUNTERS_PREFER_CACHED);
 	if (ret)
 		return ret;
 
@@ -147,9 +148,8 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
 			    UVERBS_ATTR_MIN_SIZE(0),
 			    UA_MANDATORY),
-	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
-			   UVERBS_ATTR_TYPE(__u32),
-			   UA_MANDATORY));
+	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+			     enum ib_uverbs_read_counters_flags));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
 			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index c71305fc0433..3179203a2dd7 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -84,10 +84,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 	if (ret)
 		return ret;
 
-	/* Optional param, if it doesn't exist, we get -ENOENT and skip it */
-	if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs,
-						UVERBS_ATTR_CREATE_CQ_FLAGS)))
-		return -EFAULT;
+	ret = uverbs_get_flags32(&attr.flags, attrs,
+				 UVERBS_ATTR_CREATE_CQ_FLAGS,
+				 IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION |
+					 IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN);
+	if (ret)
+		return ret;
 
 	ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
 	if (!IS_ERR(ev_file_uobj)) {
@@ -164,7 +166,8 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
 			   UVERBS_ATTR_TYPE(u32),
 			   UA_MANDATORY),
-	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
+	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS,
+			     enum ib_uverbs_ex_create_cq_flags),
 	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
 			    UVERBS_ATTR_TYPE(u32),
 			    UA_MANDATORY),
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index c1b9124d611e..d63da0c2a8c1 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -62,8 +62,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
 	if (ret)
 		return ret;
 
-	ret = uverbs_copy_from(&attr.access_flags, attrs,
-			       UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS);
+	ret = uverbs_get_flags32(&attr.access_flags, attrs,
+				 UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+				 IB_ACCESS_SUPPORTED);
 	if (ret)
 		return ret;
 
@@ -131,9 +132,8 @@ DECLARE_UVERBS_NAMED_METHOD(
 			UVERBS_OBJECT_PD,
 			UVERBS_ACCESS_READ,
 			UA_MANDATORY),
-	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
-			   UVERBS_ATTR_TYPE(u32),
-			   UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+			     enum ib_access_flags),
 	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
 			UVERBS_OBJECT_DM,
 			UVERBS_ACCESS_READ,
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index fee800f2fdec..c9a7a12a8c13 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -858,16 +858,21 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
 {
 	u64 addr;
 	size_t size;
-	int access;
+	u32 access;
 	int npages;
 	int err;
 	u32 page_mask;
 
 	if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
-	    uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN) ||
-	    uverbs_copy_from(&access, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS))
+	    uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
 		return -EFAULT;
 
+	err = uverbs_get_flags32(&access, attrs,
+				 MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+				 IB_ACCESS_SUPPORTED);
+	if (err)
+		return err;
+
 	err = ib_check_mr_access(access);
 	if (err)
 		return err;
@@ -1012,9 +1017,8 @@ DECLARE_UVERBS_NAMED_METHOD(
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
 			   UVERBS_ATTR_TYPE(u64),
 			   UA_MANDATORY),
-	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
-			   UVERBS_ATTR_TYPE(u32),
-			   UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+			     enum ib_access_flags),
 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
 			    UVERBS_ATTR_TYPE(u32),
 			    UA_MANDATORY));
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 61c78f4e4ebc..06d6309b719a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3859,12 +3859,11 @@ mlx5_ib_create_flow_action_esp(struct ib_device *device,
 	u64 flags;
 	int err = 0;
 
-	if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs,
-						MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS)))
-		return ERR_PTR(-EFAULT);
-
-	if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1))
-		return ERR_PTR(-EOPNOTSUPP);
+	err = uverbs_get_flags64(
+		&action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+		((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
+	if (err)
+		return ERR_PTR(err);
 
 	flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
 
@@ -5531,9 +5530,8 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
 	mlx5_ib_flow_action,
 	UVERBS_OBJECT_FLOW_ACTION,
 	UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
-	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
-			   UVERBS_ATTR_TYPE(u64),
-			   UA_MANDATORY));
+	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+			     enum mlx5_ib_uapi_flow_action_flags));
 
 #define NUM_TREES	5
 static int populate_specs_root(struct mlx5_ib_dev *dev)
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index d16d31d4322d..5e6d0569d97c 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -268,6 +268,19 @@ struct uverbs_object_tree_def {
 			  __VA_ARGS__ },                                       \
 	})
 
+/*
+ * An input value that is a bitwise combination of values of _enum_type.
+ * This permits the flag value to be passed as either a u32 or u64, it must
+ * be retrieved via uverbs_get_flag().
+ */
+#define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...)                        \
+	UVERBS_ATTR_PTR_IN(                                                    \
+		_attr_id,                                                      \
+		UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO(              \
+						       !sizeof(_enum_type *)), \
+				 sizeof(u64)),                                 \
+		__VA_ARGS__)
+
 /*
  * This spec is used in order to pass information to the hardware driver in a
  * legacy way. Every verb that could get driver specific data should get this
@@ -520,6 +533,26 @@ static inline int _uverbs_copy_from_or_zero(void *to,
 #define uverbs_copy_from_or_zero(to, attrs_bundle, idx)			      \
 	_uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
 
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits);
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits);
+#else
+static inline int
+uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		   size_t idx, u64 allowed_bits)
+{
+	return -EINVAL;
+}
+static inline int
+uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		   size_t idx, u64 allowed_bits)
+{
+	return -EINVAL;
+}
+#endif
+
 /* =================================================
  *	 Definitions -> Specs infrastructure
  * =================================================
-- 
cgit v1.2.3


From 2df7dba855e10cca8eddcd38bca825446ea10e1d Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Sun, 29 Jul 2018 11:53:10 +0300
Subject: RDMA/core: Constify dst_addr argument

Following APIs are not supposed to modify addr or dest_addr contents.
Therefore make those function argument const for better code
readability.

1. rdma_resolve_ip()
2. rdma_addr_size()
3. rdma_resolve_addr()

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/addr.c |  4 ++--
 drivers/infiniband/core/cma.c  | 14 +++++++-------
 include/rdma/ib_addr.h         |  4 ++--
 include/rdma/rdma_cm.h         |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 1b817fdb97a4..46b855a42884 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -188,7 +188,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
 	return -ENODATA;
 }
 
-int rdma_addr_size(struct sockaddr *addr)
+int rdma_addr_size(const struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
@@ -585,7 +585,7 @@ static void process_one_req(struct work_struct *_work)
 	spin_unlock_bh(&lock);
 }
 
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0794b99d2507..f326965a0616 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1046,7 +1046,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 }
 EXPORT_SYMBOL(rdma_init_qp_attr);
 
-static inline int cma_zero_addr(struct sockaddr *addr)
+static inline int cma_zero_addr(const struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
@@ -1060,7 +1060,7 @@ static inline int cma_zero_addr(struct sockaddr *addr)
 	}
 }
 
-static inline int cma_loopback_addr(struct sockaddr *addr)
+static inline int cma_loopback_addr(const struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
@@ -1074,7 +1074,7 @@ static inline int cma_loopback_addr(struct sockaddr *addr)
 	}
 }
 
-static inline int cma_any_addr(struct sockaddr *addr)
+static inline int cma_any_addr(const struct sockaddr *addr)
 {
 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
 }
@@ -1097,7 +1097,7 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
 	}
 }
 
-static __be16 cma_port(struct sockaddr *addr)
+static __be16 cma_port(const struct sockaddr *addr)
 {
 	struct sockaddr_ib *sib;
 
@@ -1115,7 +1115,7 @@ static __be16 cma_port(struct sockaddr *addr)
 	}
 }
 
-static inline int cma_any_port(struct sockaddr *addr)
+static inline int cma_any_port(const struct sockaddr *addr)
 {
 	return !cma_port(addr);
 }
@@ -2944,7 +2944,7 @@ err:
 }
 
 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-			 struct sockaddr *dst_addr)
+			 const struct sockaddr *dst_addr)
 {
 	if (!src_addr || !src_addr->sa_family) {
 		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
@@ -2965,7 +2965,7 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 }
 
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-		      struct sockaddr *dst_addr, int timeout_ms)
+		      const struct sockaddr *dst_addr, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 715394f6d18a..77c7908b7d73 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -97,7 +97,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
  *   or been canceled.  A status of 0 indicates success.
  * @context: User-specified context associated with the call.
  */
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
@@ -109,7 +109,7 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
 		    const struct net_device *dev,
 		    const unsigned char *dst_dev_addr);
 
-int rdma_addr_size(struct sockaddr *addr);
+int rdma_addr_size(const struct sockaddr *addr);
 int rdma_addr_size_in6(struct sockaddr_in6 *addr);
 int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
 
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c5c1435c129a..5d71a7f51a9f 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -192,7 +192,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
  * @timeout_ms: Time to wait for resolution to complete.
  */
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-		      struct sockaddr *dst_addr, int timeout_ms);
+		      const struct sockaddr *dst_addr, int timeout_ms);
 
 /**
  * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
-- 
cgit v1.2.3


From e7ff98aefc9e532a2067d5a2112a23902726e9a3 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Sun, 29 Jul 2018 11:53:11 +0300
Subject: RDMA/cma: Constify path record, ib_cm_event, listen_id pointers

Constify several pointers such as path_rec, ib_cm_event and listen_id
pointers in several functions.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cma.c           | 55 +++++++++++++++++++--------------
 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 15 +++++----
 drivers/infiniband/ulp/srp/ib_srp.c     |  8 +++--
 drivers/infiniband/ulp/srpt/ib_srpt.c   |  5 +--
 include/rdma/ib_cm.h                    |  2 +-
 5 files changed, 49 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f326965a0616..5571f8d52302 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -641,7 +641,7 @@ static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
 }
 
 static int cma_acquire_dev(struct rdma_id_private *id_priv,
-			   struct rdma_id_private *listen_id_priv)
+			   const struct rdma_id_private *listen_id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	const struct ib_gid_attr *sgid_attr;
@@ -1122,8 +1122,8 @@ static inline int cma_any_port(const struct sockaddr *addr)
 
 static void cma_save_ib_info(struct sockaddr *src_addr,
 			     struct sockaddr *dst_addr,
-			     struct rdma_cm_id *listen_id,
-			     struct sa_path_rec *path)
+			     const struct rdma_cm_id *listen_id,
+			     const struct sa_path_rec *path)
 {
 	struct sockaddr_ib *listen_ib, *ib;
 
@@ -1208,7 +1208,7 @@ static u16 cma_port_from_service_id(__be64 service_id)
 
 static int cma_save_ip_info(struct sockaddr *src_addr,
 			    struct sockaddr *dst_addr,
-			    struct ib_cm_event *ib_event,
+			    const struct ib_cm_event *ib_event,
 			    __be64 service_id)
 {
 	struct cma_hdr *hdr;
@@ -1238,8 +1238,8 @@ static int cma_save_ip_info(struct sockaddr *src_addr,
 
 static int cma_save_net_info(struct sockaddr *src_addr,
 			     struct sockaddr *dst_addr,
-			     struct rdma_cm_id *listen_id,
-			     struct ib_cm_event *ib_event,
+			     const struct rdma_cm_id *listen_id,
+			     const struct ib_cm_event *ib_event,
 			     sa_family_t sa_family, __be64 service_id)
 {
 	if (sa_family == AF_IB) {
@@ -1387,7 +1387,7 @@ roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
 	return sgid_attr->ndev;
 }
 
-static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
+static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
 					  struct cma_req_info *req)
 {
 	struct sockaddr *listen_addr =
@@ -1516,9 +1516,10 @@ static struct rdma_id_private *cma_find_listener(
 	return ERR_PTR(-EINVAL);
 }
 
-static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
-						 struct ib_cm_event *ib_event,
-						 struct net_device **net_dev)
+static struct rdma_id_private *
+cma_id_from_event(struct ib_cm_id *cm_id,
+		  const struct ib_cm_event *ib_event,
+		  struct net_device **net_dev)
 {
 	struct cma_req_info req;
 	struct rdma_bind_list *bind_list;
@@ -1766,7 +1767,7 @@ reject:
 }
 
 static void cma_set_rep_event_data(struct rdma_cm_event *event,
-				   struct ib_cm_rep_event_param *rep_data,
+				   const struct ib_cm_rep_event_param *rep_data,
 				   void *private_data)
 {
 	event->param.conn.private_data = private_data;
@@ -1779,7 +1780,8 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
 	event->param.conn.qp_num = rep_data->remote_qpn;
 }
 
-static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+static int cma_ib_handler(struct ib_cm_id *cm_id,
+			  const struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv = cm_id->context;
 	struct rdma_cm_event event;
@@ -1861,9 +1863,10 @@ out:
 	return ret;
 }
 
-static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
-					       struct ib_cm_event *ib_event,
-					       struct net_device *net_dev)
+static struct rdma_id_private *
+cma_new_conn_id(const struct rdma_cm_id *listen_id,
+		const struct ib_cm_event *ib_event,
+		struct net_device *net_dev)
 {
 	struct rdma_id_private *listen_id_priv;
 	struct rdma_id_private *id_priv;
@@ -1924,11 +1927,12 @@ err:
 	return NULL;
 }
 
-static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
-					      struct ib_cm_event *ib_event,
-					      struct net_device *net_dev)
+static struct rdma_id_private *
+cma_new_udp_id(const struct rdma_cm_id *listen_id,
+	       const struct ib_cm_event *ib_event,
+	       struct net_device *net_dev)
 {
-	struct rdma_id_private *listen_id_priv;
+	const struct rdma_id_private *listen_id_priv;
 	struct rdma_id_private *id_priv;
 	struct rdma_cm_id *id;
 	const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
@@ -1968,7 +1972,7 @@ err:
 }
 
 static void cma_set_req_event_data(struct rdma_cm_event *event,
-				   struct ib_cm_req_event_param *req_data,
+				   const struct ib_cm_req_event_param *req_data,
 				   void *private_data, int offset)
 {
 	event->param.conn.private_data = private_data + offset;
@@ -1982,7 +1986,8 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
 	event->param.conn.qp_num = req_data->remote_qpn;
 }
 
-static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
+static int cma_check_req_qp_type(const struct rdma_cm_id *id,
+				 const struct ib_cm_event *ib_event)
 {
 	return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
 		 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
@@ -1991,7 +1996,8 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
 		(!id->qp_type));
 }
 
-static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+static int cma_req_handler(struct ib_cm_id *cm_id,
+			   const struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *listen_id, *conn_id = NULL;
 	struct rdma_cm_event event;
@@ -3479,11 +3485,12 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
 }
 
 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
-				struct ib_cm_event *ib_event)
+				const struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv = cm_id->context;
 	struct rdma_cm_event event;
-	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
+	const struct ib_cm_sidr_rep_event_param *rep =
+				&ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
 	mutex_lock(&id_priv->handler_mutex);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 8ef50e46157c..518313a1b0c9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -78,7 +78,7 @@ static struct ib_send_wr ipoib_cm_rx_drain_wr = {
 };
 
 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
-			       struct ib_cm_event *event);
+			       const struct ib_cm_event *event);
 
 static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
 				  u64 mapping[IPOIB_CM_RX_SG])
@@ -418,7 +418,8 @@ err_free_1:
 }
 
 static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
-			     struct ib_qp *qp, struct ib_cm_req_event_param *req,
+			     struct ib_qp *qp,
+			     const struct ib_cm_req_event_param *req,
 			     unsigned int psn)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -438,7 +439,8 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
 	return ib_send_cm_rep(cm_id, &rep);
 }
 
-static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
+				const struct ib_cm_event *event)
 {
 	struct net_device *dev = cm_id->context;
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -500,7 +502,7 @@ err_qp:
 }
 
 static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
-			       struct ib_cm_event *event)
+			       const struct ib_cm_event *event)
 {
 	struct ipoib_cm_rx *p;
 	struct ipoib_dev_priv *priv;
@@ -978,7 +980,8 @@ void ipoib_cm_dev_stop(struct net_device *dev)
 	cancel_delayed_work(&priv->cm.stale_task);
 }
 
-static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
+				const struct ib_cm_event *event)
 {
 	struct ipoib_cm_tx *p = cm_id->context;
 	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
@@ -1244,7 +1247,7 @@ timeout:
 }
 
 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
-			       struct ib_cm_event *event)
+			       const struct ib_cm_event *event)
 {
 	struct ipoib_cm_tx *tx = cm_id->context;
 	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index f5ec21d1f4dc..444d16520506 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -142,7 +142,8 @@ static void srp_remove_one(struct ib_device *device, void *client_data);
 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
 		const char *opname);
-static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
+			     const struct ib_cm_event *event);
 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
 			       struct rdma_cm_event *event);
 
@@ -2553,7 +2554,7 @@ error:
 }
 
 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
-				  struct ib_cm_event *event,
+				  const struct ib_cm_event *event,
 				  struct srp_rdma_ch *ch)
 {
 	struct srp_target_port *target = ch->target;
@@ -2638,7 +2639,8 @@ static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
 	}
 }
 
-static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
+			     const struct ib_cm_event *event)
 {
 	struct srp_rdma_ch *ch = cm_id->context;
 	struct srp_target_port *target = ch->target;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 7d9972add65f..d73e14699aa9 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2401,7 +2401,7 @@ out:
 }
 
 static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id,
-			       struct ib_cm_req_event_param *param,
+			       const struct ib_cm_req_event_param *param,
 			       void *private_data)
 {
 	char sguid[40];
@@ -2513,7 +2513,8 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
  * a non-zero value in any other case will trigger a race with the
  * ib_destroy_cm_id() call in srpt_release_channel().
  */
-static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int srpt_cm_handler(struct ib_cm_id *cm_id,
+			   const struct ib_cm_event *event)
 {
 	struct srpt_rdma_ch *ch = cm_id->context;
 	int ret;
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 568708a87239..c10f4b5ea8ab 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -311,7 +311,7 @@ struct ib_cm_event {
  * destroy the @cm_id after the callback completes.
  */
 typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
-			     struct ib_cm_event *event);
+			     const struct ib_cm_event *event);
 
 struct ib_cm_id {
 	ib_cm_handler		cm_handler;
-- 
cgit v1.2.3


From ca3a8ace2b128abb7f164de9e10b8431eaddc2ac Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Sun, 29 Jul 2018 11:53:13 +0300
Subject: RDMA/core: Return bool instead of int

Return bool for following internal and inline functions as their
underlying APIs return bool too.

1. cma_zero_addr()
2. cma_loopback_addr()
3. cma_any_addr()
4. ib_addr_any()
5. ib_addr_loopback()

While we are touching cma_loopback_addr(), remove extra white spaces
in it.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cma.c | 23 +++++++++++++----------
 include/rdma/ib.h             |  4 ++--
 2 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5ffd3899dac2..853f73a0499f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1046,35 +1046,38 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 }
 EXPORT_SYMBOL(rdma_init_qp_attr);
 
-static inline int cma_zero_addr(const struct sockaddr *addr)
+static inline bool cma_zero_addr(const struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
 		return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
 	case AF_INET6:
-		return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
+		return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr);
 	case AF_IB:
-		return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
+		return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr);
 	default:
-		return 0;
+		return false;
 	}
 }
 
-static inline int cma_loopback_addr(const struct sockaddr *addr)
+static inline bool cma_loopback_addr(const struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
-		return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+		return ipv4_is_loopback(
+			((struct sockaddr_in *)addr)->sin_addr.s_addr);
 	case AF_INET6:
-		return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
+		return ipv6_addr_loopback(
+			&((struct sockaddr_in6 *)addr)->sin6_addr);
 	case AF_IB:
-		return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
+		return ib_addr_loopback(
+			&((struct sockaddr_ib *)addr)->sib_addr);
 	default:
-		return 0;
+		return false;
 	}
 }
 
-static inline int cma_any_addr(const struct sockaddr *addr)
+static inline bool cma_any_addr(const struct sockaddr *addr)
 {
 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
 }
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index 66dbed0c146d..4f385ec54f80 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -53,12 +53,12 @@ struct ib_addr {
 #define sib_interface_id	ib_u.uib_addr64[1]
 };
 
-static inline int ib_addr_any(const struct ib_addr *a)
+static inline bool ib_addr_any(const struct ib_addr *a)
 {
 	return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0);
 }
 
-static inline int ib_addr_loopback(const struct ib_addr *a)
+static inline bool ib_addr_loopback(const struct ib_addr *a)
 {
 	return ((a->sib_addr32[0] | a->sib_addr32[1] |
 		 a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0);
-- 
cgit v1.2.3


From cb0f32e12cded06e12ff19104eaf90c6f8310558 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Mon, 23 Jul 2018 14:07:45 -0500
Subject: scsi: target: add helper to check if dev is configured

This just adds a helper function to check if a device is configured and it
converts the target users to use it. The next patch will add a backend
module user so those types of modules do not have to know the lio core
details.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_configfs.c        | 8 ++++----
 drivers/target/target_core_device.c          | 6 +++---
 drivers/target/target_core_fabric_configfs.c | 3 ++-
 include/target/target_core_backend.h         | 4 ++++
 4 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 93d3ff34b614..f6b1549f4142 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -810,7 +810,7 @@ static ssize_t pi_prot_type_store(struct config_item *item,
 		       dev->transport->name);
 		return -ENOSYS;
 	}
-	if (!(dev->dev_flags & DF_CONFIGURED)) {
+	if (!target_dev_configured(dev)) {
 		pr_err("DIF protection requires device to be configured\n");
 		return -ENODEV;
 	}
@@ -859,7 +859,7 @@ static ssize_t pi_prot_format_store(struct config_item *item,
 		       dev->transport->name);
 		return -ENOSYS;
 	}
-	if (!(dev->dev_flags & DF_CONFIGURED)) {
+	if (!target_dev_configured(dev)) {
 		pr_err("DIF protection format requires device to be configured\n");
 		return -ENODEV;
 	}
@@ -1948,7 +1948,7 @@ static ssize_t target_dev_enable_show(struct config_item *item, char *page)
 {
 	struct se_device *dev = to_device(item);
 
-	return snprintf(page, PAGE_SIZE, "%d\n", !!(dev->dev_flags & DF_CONFIGURED));
+	return snprintf(page, PAGE_SIZE, "%d\n", target_dev_configured(dev));
 }
 
 static ssize_t target_dev_enable_store(struct config_item *item,
@@ -2473,7 +2473,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item,
 			" tg_pt_gp ID: %hu\n", tg_pt_gp->tg_pt_gp_valid_id);
 		return -EINVAL;
 	}
-	if (!(dev->dev_flags & DF_CONFIGURED)) {
+	if (!target_dev_configured(dev)) {
 		pr_err("Unable to set alua_access_state while device is"
 		       " not configured\n");
 		return -ENODEV;
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 73675eec740d..47b5ef153135 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -900,7 +900,7 @@ static int target_devices_idr_iter(int id, void *p, void *data)
 	 * to allow other callers to access partially setup devices,
 	 * so we skip them here.
 	 */
-	if (!(dev->dev_flags & DF_CONFIGURED))
+	if (!target_dev_configured(dev))
 		return 0;
 
 	iter->prev_item = config_item_get_unless_zero(&dev->dev_group.cg_item);
@@ -940,7 +940,7 @@ int target_configure_device(struct se_device *dev)
 	struct se_hba *hba = dev->se_hba;
 	int ret, id;
 
-	if (dev->dev_flags & DF_CONFIGURED) {
+	if (target_dev_configured(dev)) {
 		pr_err("se_dev->se_dev_ptr already set for storage"
 				" object\n");
 		return -EEXIST;
@@ -1045,7 +1045,7 @@ void target_free_device(struct se_device *dev)
 
 	WARN_ON(!list_empty(&dev->dev_sep_list));
 
-	if (dev->dev_flags & DF_CONFIGURED) {
+	if (target_dev_configured(dev)) {
 		destroy_workqueue(dev->tmr_wq);
 
 		dev->transport->destroy_device(dev);
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 1fa436e865f9..aa2f4f632ebe 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -34,6 +34,7 @@
 #include <linux/configfs.h>
 
 #include <target/target_core_base.h>
+#include <target/target_core_backend.h>
 #include <target/target_core_fabric.h>
 
 #include "target_core_internal.h"
@@ -642,7 +643,7 @@ static int target_fabric_port_link(
 	}
 	dev = container_of(to_config_group(se_dev_ci), struct se_device, dev_group);
 
-	if (!(dev->dev_flags & DF_CONFIGURED)) {
+	if (!target_dev_configured(dev)) {
 		pr_err("se_device not configured yet, cannot port link\n");
 		return -ENODEV;
 	}
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index c3ac47255218..51b6f50eabee 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -111,6 +111,10 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb);
 bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib,
 				       struct request_queue *q);
 
+static inline bool target_dev_configured(struct se_device *se_dev)
+{
+	return !!(se_dev->dev_flags & DF_CONFIGURED);
+}
 
 /* Only use get_unaligned_be24() if reading p - 1 is allowed. */
 static inline uint32_t get_unaligned_be24(const uint8_t *const p)
-- 
cgit v1.2.3


From 486cdf21583e5b1fad488a3e4f0a5242a31c0ffa Mon Sep 17 00:00:00 2001
From: Mathieu Xhonneux <m.xhonneux@gmail.com>
Date: Thu, 26 Jul 2018 02:10:40 +0000
Subject: bpf: add End.DT6 action to bpf_lwt_seg6_action helper

The seg6local LWT provides the End.DT6 action, which allows to
decapsulate an outer IPv6 header containing a Segment Routing Header
(SRH), full specification is available here:

https://tools.ietf.org/html/draft-filsfils-spring-srv6-network-programming-05

This patch adds this action now to the seg6local BPF
interface. Since it is not mandatory that the inner IPv6 header also
contains a SRH, seg6_bpf_srh_state has been extended with a pointer to
a possible SRH of the outermost IPv6 header. This helps assessing if the
validation must be triggered or not, and avoids some calls to
ipv6_find_hdr.

v3: s/1/true, s/0/false for boolean values
v2: - changed true/false -> 1/0
    - preempt_enable no longer called in first conditional block

Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/seg6_local.h |  4 ++-
 net/core/filter.c        | 88 ++++++++++++++++++++++++++++++++----------------
 net/ipv6/seg6_local.c    | 50 +++++++++++++++++----------
 3 files changed, 94 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h
index 661fd5b4d3e0..08359e2d8b35 100644
--- a/include/net/seg6_local.h
+++ b/include/net/seg6_local.h
@@ -21,10 +21,12 @@
 
 extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
 			       u32 tbl_id);
+extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb);
 
 struct seg6_bpf_srh_state {
-	bool valid;
+	struct ipv6_sr_hdr *srh;
 	u16 hdrlen;
+	bool valid;
 };
 
 DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
diff --git a/net/core/filter.c b/net/core/filter.c
index 104d560946da..7df1a0f1d1e1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4542,26 +4542,28 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
 {
 	struct seg6_bpf_srh_state *srh_state =
 		this_cpu_ptr(&seg6_bpf_srh_states);
+	struct ipv6_sr_hdr *srh = srh_state->srh;
 	void *srh_tlvs, *srh_end, *ptr;
-	struct ipv6_sr_hdr *srh;
 	int srhoff = 0;
 
-	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+	if (srh == NULL)
 		return -EINVAL;
 
-	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
 	srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
 	srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
 
 	ptr = skb->data + offset;
 	if (ptr >= srh_tlvs && ptr + len <= srh_end)
-		srh_state->valid = 0;
+		srh_state->valid = false;
 	else if (ptr < (void *)&srh->flags ||
 		 ptr + len > (void *)&srh->segments)
 		return -EFAULT;
 
 	if (unlikely(bpf_try_make_writable(skb, offset + len)))
 		return -EFAULT;
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		return -EINVAL;
+	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
 
 	memcpy(skb->data + offset, from, len);
 	return 0;
@@ -4577,52 +4579,78 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
 	.arg4_type	= ARG_CONST_SIZE
 };
 
-BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
-	   u32, action, void *, param, u32, param_len)
+static void bpf_update_srh_state(struct sk_buff *skb)
 {
 	struct seg6_bpf_srh_state *srh_state =
 		this_cpu_ptr(&seg6_bpf_srh_states);
-	struct ipv6_sr_hdr *srh;
 	int srhoff = 0;
-	int err;
-
-	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
-		return -EINVAL;
-	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
-
-	if (!srh_state->valid) {
-		if (unlikely((srh_state->hdrlen & 7) != 0))
-			return -EBADMSG;
-
-		srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
-		if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
-			return -EBADMSG;
 
-		srh_state->valid = 1;
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
+		srh_state->srh = NULL;
+	} else {
+		srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+		srh_state->hdrlen = srh_state->srh->hdrlen << 3;
+		srh_state->valid = true;
 	}
+}
+
+BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
+	   u32, action, void *, param, u32, param_len)
+{
+	struct seg6_bpf_srh_state *srh_state =
+		this_cpu_ptr(&seg6_bpf_srh_states);
+	int hdroff = 0;
+	int err;
 
 	switch (action) {
 	case SEG6_LOCAL_ACTION_END_X:
+		if (!seg6_bpf_has_valid_srh(skb))
+			return -EBADMSG;
 		if (param_len != sizeof(struct in6_addr))
 			return -EINVAL;
 		return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
 	case SEG6_LOCAL_ACTION_END_T:
+		if (!seg6_bpf_has_valid_srh(skb))
+			return -EBADMSG;
+		if (param_len != sizeof(int))
+			return -EINVAL;
+		return seg6_lookup_nexthop(skb, NULL, *(int *)param);
+	case SEG6_LOCAL_ACTION_END_DT6:
+		if (!seg6_bpf_has_valid_srh(skb))
+			return -EBADMSG;
 		if (param_len != sizeof(int))
 			return -EINVAL;
+
+		if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
+			return -EBADMSG;
+		if (!pskb_pull(skb, hdroff))
+			return -EBADMSG;
+
+		skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
+		skb_reset_network_header(skb);
+		skb_reset_transport_header(skb);
+		skb->encapsulation = 0;
+
+		bpf_compute_data_pointers(skb);
+		bpf_update_srh_state(skb);
 		return seg6_lookup_nexthop(skb, NULL, *(int *)param);
 	case SEG6_LOCAL_ACTION_END_B6:
+		if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
+			return -EBADMSG;
 		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
 					  param, param_len);
 		if (!err)
-			srh_state->hdrlen =
-				((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+			bpf_update_srh_state(skb);
+
 		return err;
 	case SEG6_LOCAL_ACTION_END_B6_ENCAP:
+		if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
+			return -EBADMSG;
 		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
 					  param, param_len);
 		if (!err)
-			srh_state->hdrlen =
-				((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+			bpf_update_srh_state(skb);
+
 		return err;
 	default:
 		return -EINVAL;
@@ -4644,15 +4672,14 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
 {
 	struct seg6_bpf_srh_state *srh_state =
 		this_cpu_ptr(&seg6_bpf_srh_states);
+	struct ipv6_sr_hdr *srh = srh_state->srh;
 	void *srh_end, *srh_tlvs, *ptr;
-	struct ipv6_sr_hdr *srh;
 	struct ipv6hdr *hdr;
 	int srhoff = 0;
 	int ret;
 
-	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+	if (unlikely(srh == NULL))
 		return -EINVAL;
-	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
 
 	srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
 			((srh->first_segment + 1) << 4));
@@ -4682,8 +4709,11 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
 	hdr = (struct ipv6hdr *)skb->data;
 	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 
+	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+		return -EINVAL;
+	srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
 	srh_state->hdrlen += len;
-	srh_state->valid = 0;
+	srh_state->valid = false;
 	return 0;
 }
 
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index e1025b493a18..60325dbfe88b 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -459,36 +459,57 @@ drop:
 
 DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
 
+bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
+{
+	struct seg6_bpf_srh_state *srh_state =
+		this_cpu_ptr(&seg6_bpf_srh_states);
+	struct ipv6_sr_hdr *srh = srh_state->srh;
+
+	if (unlikely(srh == NULL))
+		return false;
+
+	if (unlikely(!srh_state->valid)) {
+		if ((srh_state->hdrlen & 7) != 0)
+			return false;
+
+		srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
+		if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))
+			return false;
+
+		srh_state->valid = true;
+	}
+
+	return true;
+}
+
 static int input_action_end_bpf(struct sk_buff *skb,
 				struct seg6_local_lwt *slwt)
 {
 	struct seg6_bpf_srh_state *srh_state =
 		this_cpu_ptr(&seg6_bpf_srh_states);
-	struct seg6_bpf_srh_state local_srh_state;
 	struct ipv6_sr_hdr *srh;
-	int srhoff = 0;
 	int ret;
 
 	srh = get_and_validate_srh(skb);
-	if (!srh)
-		goto drop;
+	if (!srh) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
 	advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
 
 	/* preempt_disable is needed to protect the per-CPU buffer srh_state,
 	 * which is also accessed by the bpf_lwt_seg6_* helpers
 	 */
 	preempt_disable();
+	srh_state->srh = srh;
 	srh_state->hdrlen = srh->hdrlen << 3;
-	srh_state->valid = 1;
+	srh_state->valid = true;
 
 	rcu_read_lock();
 	bpf_compute_data_pointers(skb);
 	ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
 	rcu_read_unlock();
 
-	local_srh_state = *srh_state;
-	preempt_enable();
-
 	switch (ret) {
 	case BPF_OK:
 	case BPF_REDIRECT:
@@ -500,24 +521,17 @@ static int input_action_end_bpf(struct sk_buff *skb,
 		goto drop;
 	}
 
-	if (unlikely((local_srh_state.hdrlen & 7) != 0))
-		goto drop;
-
-	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
-		goto drop;
-	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
-	srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
-
-	if (!local_srh_state.valid &&
-	    unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
+	if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
 		goto drop;
 
+	preempt_enable();
 	if (ret != BPF_REDIRECT)
 		seg6_lookup_nexthop(skb, NULL, 0);
 
 	return dst_input(skb);
 
 drop:
+	preempt_enable();
 	kfree_skb(skb);
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From d692f1138a4bac2efd2c8656ca15556b63479e82 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Mon, 30 Jul 2018 17:42:28 -0700
Subject: bpf: Support bpf_get_socket_cookie in more prog types

bpf_get_socket_cookie() helper can be used to identify skb that
correspond to the same socket.

Though socket cookie can be useful in many other use-cases where socket is
available in program context. Specifically BPF_PROG_TYPE_CGROUP_SOCK_ADDR
and BPF_PROG_TYPE_SOCK_OPS programs can benefit from it so that one of
them can augment a value in a map prepared earlier by other program for
the same socket.

The patch adds support to call bpf_get_socket_cookie() from
BPF_PROG_TYPE_CGROUP_SOCK_ADDR and BPF_PROG_TYPE_SOCK_OPS.

It doesn't introduce new helpers. Instead it reuses same helper name
bpf_get_socket_cookie() but adds support to this helper to accept
`struct bpf_sock_addr` and `struct bpf_sock_ops`.

Documentation in bpf.h is changed in a way that should not break
automatic generation of markdown.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 14 ++++++++++++++
 net/core/filter.c        | 28 ++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 870113916cac..0ebaaf7f3568 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1371,6 +1371,20 @@ union bpf_attr {
  * 		A 8-byte long non-decreasing number on success, or 0 if the
  * 		socket field is missing inside *skb*.
  *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * 	Description
+ * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		*skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * 	Return
+ * 		A 8-byte long non-decreasing number.
+ *
  * u32 bpf_get_socket_uid(struct sk_buff *skb)
  * 	Return
  * 		The owner UID of the socket associated to *skb*. If the socket
diff --git a/net/core/filter.c b/net/core/filter.c
index 7df1a0f1d1e1..9bb9a4488e25 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3812,6 +3812,30 @@ static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
+{
+	return sock_gen_cookie(ctx->sk);
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
+	.func		= bpf_get_socket_cookie_sock_addr,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
+{
+	return sock_gen_cookie(ctx->sk);
+}
+
+static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
+	.func		= bpf_get_socket_cookie_sock_ops,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
 {
 	struct sock *sk = sk_to_full_sk(skb->sk);
@@ -4818,6 +4842,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		default:
 			return NULL;
 		}
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_sock_addr_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -4960,6 +4986,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sock_map_update_proto;
 	case BPF_FUNC_sock_hash_update:
 		return &bpf_sock_hash_update_proto;
+	case BPF_FUNC_get_socket_cookie:
+		return &bpf_get_socket_cookie_sock_ops_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
-- 
cgit v1.2.3


From 760c435e0f85ed19e48a90d746ce1de2cd02def7 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 19 Jul 2018 00:09:12 +0200
Subject: mtd: rawnand: make subop helpers return unsigned values

A report from Colin Ian King pointed a CoverityScan issue where error
values on these helpers where not checked in the drivers. These
helpers can error out only in case of a software bug in driver code,
not because of a runtime/hardware error. Hence, let's WARN_ON() in this
case and return 0 which is harmless anyway.

Fixes: 8878b126df76 ("mtd: nand: add ->exec_op() implementation")
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 44 ++++++++++++++++++++--------------------
 include/linux/mtd/rawnand.h      | 16 +++++++--------
 2 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index c4b74630f4c5..ef10beab99f5 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -2668,8 +2668,8 @@ static bool nand_subop_instr_is_valid(const struct nand_subop *subop,
 	return subop && instr_idx < subop->ninstrs;
 }
 
-static int nand_subop_get_start_off(const struct nand_subop *subop,
-				    unsigned int instr_idx)
+static unsigned int nand_subop_get_start_off(const struct nand_subop *subop,
+					     unsigned int instr_idx)
 {
 	if (instr_idx)
 		return 0;
@@ -2688,12 +2688,12 @@ static int nand_subop_get_start_off(const struct nand_subop *subop,
  *
  * Given an address instruction, returns the offset of the first cycle to issue.
  */
-int nand_subop_get_addr_start_off(const struct nand_subop *subop,
-				  unsigned int instr_idx)
+unsigned int nand_subop_get_addr_start_off(const struct nand_subop *subop,
+					   unsigned int instr_idx)
 {
-	if (!nand_subop_instr_is_valid(subop, instr_idx) ||
-	    subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR)
-		return -EINVAL;
+	if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) ||
+		    subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR))
+		return 0;
 
 	return nand_subop_get_start_off(subop, instr_idx);
 }
@@ -2710,14 +2710,14 @@ EXPORT_SYMBOL_GPL(nand_subop_get_addr_start_off);
  *
  * Given an address instruction, returns the number of address cycle to issue.
  */
-int nand_subop_get_num_addr_cyc(const struct nand_subop *subop,
-				unsigned int instr_idx)
+unsigned int nand_subop_get_num_addr_cyc(const struct nand_subop *subop,
+					 unsigned int instr_idx)
 {
 	int start_off, end_off;
 
-	if (!nand_subop_instr_is_valid(subop, instr_idx) ||
-	    subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR)
-		return -EINVAL;
+	if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) ||
+		    subop->instrs[instr_idx].type != NAND_OP_ADDR_INSTR))
+		return 0;
 
 	start_off = nand_subop_get_addr_start_off(subop, instr_idx);
 
@@ -2742,12 +2742,12 @@ EXPORT_SYMBOL_GPL(nand_subop_get_num_addr_cyc);
  *
  * Given a data instruction, returns the offset to start from.
  */
-int nand_subop_get_data_start_off(const struct nand_subop *subop,
-				  unsigned int instr_idx)
+unsigned int nand_subop_get_data_start_off(const struct nand_subop *subop,
+					   unsigned int instr_idx)
 {
-	if (!nand_subop_instr_is_valid(subop, instr_idx) ||
-	    !nand_instr_is_data(&subop->instrs[instr_idx]))
-		return -EINVAL;
+	if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) ||
+		    !nand_instr_is_data(&subop->instrs[instr_idx])))
+		return 0;
 
 	return nand_subop_get_start_off(subop, instr_idx);
 }
@@ -2764,14 +2764,14 @@ EXPORT_SYMBOL_GPL(nand_subop_get_data_start_off);
  *
  * Returns the length of the chunk of data to send/receive.
  */
-int nand_subop_get_data_len(const struct nand_subop *subop,
-			    unsigned int instr_idx)
+unsigned int nand_subop_get_data_len(const struct nand_subop *subop,
+				     unsigned int instr_idx)
 {
 	int start_off = 0, end_off;
 
-	if (!nand_subop_instr_is_valid(subop, instr_idx) ||
-	    !nand_instr_is_data(&subop->instrs[instr_idx]))
-		return -EINVAL;
+	if (WARN_ON(!nand_subop_instr_is_valid(subop, instr_idx) ||
+		    !nand_instr_is_data(&subop->instrs[instr_idx])))
+		return 0;
 
 	start_off = nand_subop_get_data_start_off(subop, instr_idx);
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index f60fad29eae6..598d356de83f 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1007,14 +1007,14 @@ struct nand_subop {
 	unsigned int last_instr_end_off;
 };
 
-int nand_subop_get_addr_start_off(const struct nand_subop *subop,
-				  unsigned int op_id);
-int nand_subop_get_num_addr_cyc(const struct nand_subop *subop,
-				unsigned int op_id);
-int nand_subop_get_data_start_off(const struct nand_subop *subop,
-				  unsigned int op_id);
-int nand_subop_get_data_len(const struct nand_subop *subop,
-			    unsigned int op_id);
+unsigned int nand_subop_get_addr_start_off(const struct nand_subop *subop,
+					   unsigned int op_id);
+unsigned int nand_subop_get_num_addr_cyc(const struct nand_subop *subop,
+					 unsigned int op_id);
+unsigned int nand_subop_get_data_start_off(const struct nand_subop *subop,
+					   unsigned int op_id);
+unsigned int nand_subop_get_data_len(const struct nand_subop *subop,
+				     unsigned int op_id);
 
 /**
  * struct nand_op_parser_addr_constraints - Constraints for address instructions
-- 
cgit v1.2.3


From 7da45139d264f3b7ead04e00ebb29b189cf9826e Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 17 Jul 2018 09:08:02 +0200
Subject: mtd: rawnand: better name for the controller structure

In the raw NAND core, a NAND chip is described by a nand_chip structure,
while a NAND controller is described with a nand_hw_control structure
which is not very meaningful.

Rename this structure nand_controller.

As the structure gets renamed, it is logical to also rename the core
function initializing it from nand_hw_control_init() to
nand_controller_init().

Lastly, the 'hwcontrol' entry of the nand_chip structure is not
meaningful neither while it has the role of fallback when no controller
structure is provided by the driver (the controller driver is dumb and
can only control a single chip). Thus, it is renamed dummy_controller.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c | 10 +++++-----
 drivers/mtd/nand/raw/brcmnand/brcmnand.c     |  4 ++--
 drivers/mtd/nand/raw/docg4.c                 |  4 ++--
 drivers/mtd/nand/raw/fsl_elbc_nand.c         |  4 ++--
 drivers/mtd/nand/raw/fsl_ifc_nand.c          |  4 ++--
 drivers/mtd/nand/raw/jz4780_nand.c           |  7 ++++---
 drivers/mtd/nand/raw/marvell_nand.c          |  6 +++---
 drivers/mtd/nand/raw/mtk_nand.c              |  2 +-
 drivers/mtd/nand/raw/nand_base.c             |  4 ++--
 drivers/mtd/nand/raw/ndfc.c                  |  4 ++--
 drivers/mtd/nand/raw/omap2.c                 |  2 +-
 drivers/mtd/nand/raw/oxnas_nand.c            |  4 ++--
 drivers/mtd/nand/raw/qcom_nandc.c            |  4 ++--
 drivers/mtd/nand/raw/s3c2410.c               |  4 ++--
 drivers/mtd/nand/raw/sunxi_nand.c            |  6 +++---
 drivers/mtd/nand/raw/tango_nand.c            |  4 ++--
 drivers/mtd/nand/raw/tegra_nand.c            |  6 +++---
 drivers/mtd/nand/raw/txx9ndfmc.c             |  4 ++--
 include/linux/mtd/rawnand.h                  | 14 ++++++++------
 19 files changed, 50 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 30dae4c9d439..855cc7729c43 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -216,7 +216,7 @@ struct atmel_nand_controller_caps {
 };
 
 struct atmel_nand_controller {
-	struct nand_hw_control base;
+	struct nand_controller base;
 	const struct atmel_nand_controller_caps *caps;
 	struct device *dev;
 	struct regmap *smc;
@@ -227,7 +227,7 @@ struct atmel_nand_controller {
 };
 
 static inline struct atmel_nand_controller *
-to_nand_controller(struct nand_hw_control *ctl)
+to_nand_controller(struct nand_controller *ctl)
 {
 	return container_of(ctl, struct atmel_nand_controller, base);
 }
@@ -239,7 +239,7 @@ struct atmel_smc_nand_controller {
 };
 
 static inline struct atmel_smc_nand_controller *
-to_smc_nand_controller(struct nand_hw_control *ctl)
+to_smc_nand_controller(struct nand_controller *ctl)
 {
 	return container_of(to_nand_controller(ctl),
 			    struct atmel_smc_nand_controller, base);
@@ -263,7 +263,7 @@ struct atmel_hsmc_nand_controller {
 };
 
 static inline struct atmel_hsmc_nand_controller *
-to_hsmc_nand_controller(struct nand_hw_control *ctl)
+to_hsmc_nand_controller(struct nand_controller *ctl)
 {
 	return container_of(to_nand_controller(ctl),
 			    struct atmel_hsmc_nand_controller, base);
@@ -1966,7 +1966,7 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc,
 	struct device_node *np = dev->of_node;
 	int ret;
 
-	nand_hw_control_init(&nc->base);
+	nand_controller_init(&nc->base);
 	INIT_LIST_HEAD(&nc->chips);
 	nc->dev = dev;
 	nc->caps = caps;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 1306aaa7a8bf..2e5efa0f9ea2 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -114,7 +114,7 @@ enum {
 
 struct brcmnand_controller {
 	struct device		*dev;
-	struct nand_hw_control	controller;
+	struct nand_controller	controller;
 	void __iomem		*nand_base;
 	void __iomem		*nand_fc; /* flash cache */
 	void __iomem		*flash_dma_base;
@@ -2433,7 +2433,7 @@ int brcmnand_probe(struct platform_device *pdev, struct brcmnand_soc *soc)
 
 	init_completion(&ctrl->done);
 	init_completion(&ctrl->dma_done);
-	nand_hw_control_init(&ctrl->controller);
+	nand_controller_init(&ctrl->controller);
 	INIT_LIST_HEAD(&ctrl->host_list);
 
 	/* NAND register range */
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index bb96cb33cd6b..4dccdfba6140 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -1257,8 +1257,8 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
 	nand->ecc.strength = DOCG4_T;
 	nand->options = NAND_BUSWIDTH_16 | NAND_NO_SUBPAGE_WRITE;
 	nand->IO_ADDR_R = nand->IO_ADDR_W = doc->virtadr + DOC_IOSPACE_DATA;
-	nand->controller = &nand->hwcontrol;
-	nand_hw_control_init(nand->controller);
+	nand->controller = &nand->dummy_controller;
+	nand_controller_init(nand->controller);
 
 	/* methods */
 	nand->cmdfunc = docg4_command;
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 51f0b340bc0d..0aa54a949653 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -61,7 +61,7 @@ struct fsl_elbc_mtd {
 /* Freescale eLBC FCM controller information */
 
 struct fsl_elbc_fcm_ctrl {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct fsl_elbc_mtd *chips[MAX_BANKS];
 
 	u8 __iomem *addr;        /* Address of assigned FCM buffer        */
@@ -879,7 +879,7 @@ static int fsl_elbc_nand_probe(struct platform_device *pdev)
 		}
 		elbc_fcm_ctrl->counter++;
 
-		nand_hw_control_init(&elbc_fcm_ctrl->controller);
+		nand_controller_init(&elbc_fcm_ctrl->controller);
 		fsl_lbc_ctrl_dev->nand = elbc_fcm_ctrl;
 	} else {
 		elbc_fcm_ctrl = fsl_lbc_ctrl_dev->nand;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 75d3c951f61a..96130d91e32c 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -51,7 +51,7 @@ struct fsl_ifc_mtd {
 
 /* overview of the fsl ifc controller */
 struct fsl_ifc_nand_ctrl {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct fsl_ifc_mtd *chips[FSL_IFC_BANK_COUNT];
 
 	void __iomem *addr;	/* Address of assigned IFC buffer	*/
@@ -1004,7 +1004,7 @@ static int fsl_ifc_nand_probe(struct platform_device *dev)
 		ifc_nand_ctrl->addr = NULL;
 		fsl_ifc_ctrl_dev->nand = ifc_nand_ctrl;
 
-		nand_hw_control_init(&ifc_nand_ctrl->controller);
+		nand_controller_init(&ifc_nand_ctrl->controller);
 	} else {
 		ifc_nand_ctrl = fsl_ifc_ctrl_dev->nand;
 	}
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index e69f6ae4c539..49841dad347c 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -44,7 +44,7 @@ struct jz4780_nand_cs {
 struct jz4780_nand_controller {
 	struct device *dev;
 	struct jz4780_bch *bch;
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	unsigned int num_banks;
 	struct list_head chips;
 	int selected;
@@ -65,7 +65,8 @@ static inline struct jz4780_nand_chip *to_jz4780_nand_chip(struct mtd_info *mtd)
 	return container_of(mtd_to_nand(mtd), struct jz4780_nand_chip, chip);
 }
 
-static inline struct jz4780_nand_controller *to_jz4780_nand_controller(struct nand_hw_control *ctrl)
+static inline struct jz4780_nand_controller
+*to_jz4780_nand_controller(struct nand_controller *ctrl)
 {
 	return container_of(ctrl, struct jz4780_nand_controller, controller);
 }
@@ -368,7 +369,7 @@ static int jz4780_nand_probe(struct platform_device *pdev)
 	nfc->dev = dev;
 	nfc->num_banks = num_banks;
 
-	nand_hw_control_init(&nfc->controller);
+	nand_controller_init(&nfc->controller);
 	INIT_LIST_HEAD(&nfc->chips);
 
 	ret = jz4780_nand_init_chips(nfc, pdev);
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 80a074cccb82..bd5f9a4b7b16 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -318,7 +318,7 @@ struct marvell_nfc_caps {
  * @dma_buf:		32-bit aligned buffer for DMA transfers (NFCv1 only)
  */
 struct marvell_nfc {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct device *dev;
 	void __iomem *regs;
 	struct clk *core_clk;
@@ -335,7 +335,7 @@ struct marvell_nfc {
 	u8 *dma_buf;
 };
 
-static inline struct marvell_nfc *to_marvell_nfc(struct nand_hw_control *ctrl)
+static inline struct marvell_nfc *to_marvell_nfc(struct nand_controller *ctrl)
 {
 	return container_of(ctrl, struct marvell_nfc, controller);
 }
@@ -2745,7 +2745,7 @@ static int marvell_nfc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	nfc->dev = dev;
-	nand_hw_control_init(&nfc->controller);
+	nand_controller_init(&nfc->controller);
 	INIT_LIST_HEAD(&nfc->chips);
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index e6b14b79c8a8..7bc6be3f6ec0 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -145,7 +145,7 @@ struct mtk_nfc_clk {
 };
 
 struct mtk_nfc {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct mtk_ecc_config ecc_cfg;
 	struct mtk_nfc_clk clk;
 	struct mtk_ecc *ecc;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index e545e03a214e..dcdf0f373100 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5000,8 +5000,8 @@ static void nand_set_defaults(struct nand_chip *chip)
 		chip->read_buf = busw ? nand_read_buf16 : nand_read_buf;
 
 	if (!chip->controller) {
-		chip->controller = &chip->hwcontrol;
-		nand_hw_control_init(chip->controller);
+		chip->controller = &chip->dummy_controller;
+		nand_controller_init(chip->controller);
 	}
 
 	if (!chip->buf_align)
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index d8a806894937..540fa1a0ea24 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -39,7 +39,7 @@ struct ndfc_controller {
 	void __iomem *ndfcbase;
 	struct nand_chip chip;
 	int chip_select;
-	struct nand_hw_control ndfc_control;
+	struct nand_controller ndfc_control;
 };
 
 static struct ndfc_controller ndfc_ctrl[NDFC_MAX_CS];
@@ -218,7 +218,7 @@ static int ndfc_probe(struct platform_device *ofdev)
 	ndfc = &ndfc_ctrl[cs];
 	ndfc->chip_select = cs;
 
-	nand_hw_control_init(&ndfc->ndfc_control);
+	nand_controller_init(&ndfc->ndfc_control);
 	ndfc->ofdev = ofdev;
 	dev_set_drvdata(&ofdev->dev, ndfc);
 
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index e50c64adc3c8..e943b2e5a5e2 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -145,7 +145,7 @@ static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
 static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
 
 /* Shared among all NAND instances to synchronize access to the ECC Engine */
-static struct nand_hw_control omap_gpmc_controller = {
+static struct nand_controller omap_gpmc_controller = {
 	.lock = __SPIN_LOCK_UNLOCKED(omap_gpmc_controller.lock),
 	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(omap_gpmc_controller.wq),
 };
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index d649d5944826..01b00bb69c1e 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -32,7 +32,7 @@
 #define OXNAS_NAND_MAX_CHIPS	1
 
 struct oxnas_nand_ctrl {
-	struct nand_hw_control base;
+	struct nand_controller base;
 	void __iomem *io_base;
 	struct clk *clk;
 	struct nand_chip *chips[OXNAS_NAND_MAX_CHIPS];
@@ -96,7 +96,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 	if (!oxnas)
 		return -ENOMEM;
 
-	nand_hw_control_init(&oxnas->base);
+	nand_controller_init(&oxnas->base);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	oxnas->io_base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 645630953f38..aa6c3e026ef1 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -365,7 +365,7 @@ struct nandc_regs {
  *				from all connected NAND devices pagesize
  */
 struct qcom_nand_controller {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct list_head host_list;
 
 	struct device *dev;
@@ -2728,7 +2728,7 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc)
 	INIT_LIST_HEAD(&nandc->desc_list);
 	INIT_LIST_HEAD(&nandc->host_list);
 
-	nand_hw_control_init(&nandc->controller);
+	nand_controller_init(&nandc->controller);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 5a4a68790653..e8bf64832213 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -162,7 +162,7 @@ enum s3c_nand_clk_state {
  */
 struct s3c2410_nand_info {
 	/* mtd info */
-	struct nand_hw_control		controller;
+	struct nand_controller		controller;
 	struct s3c2410_nand_mtd		*mtds;
 	struct s3c2410_platform_nand	*platform;
 
@@ -1094,7 +1094,7 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, info);
 
-	nand_hw_control_init(&info->controller);
+	nand_controller_init(&info->controller);
 
 	/* get the clock source and enable it */
 
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 4b11cd4a79be..07f3ff9a28f2 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -234,7 +234,7 @@ static inline struct sunxi_nand_chip *to_sunxi_nand(struct nand_chip *nand)
  *			controller events
  */
 struct sunxi_nfc {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct device *dev;
 	void __iomem *regs;
 	struct clk *ahb_clk;
@@ -247,7 +247,7 @@ struct sunxi_nfc {
 	struct dma_chan *dmac;
 };
 
-static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_hw_control *ctrl)
+static inline struct sunxi_nfc *to_sunxi_nfc(struct nand_controller *ctrl)
 {
 	return container_of(ctrl, struct sunxi_nfc, controller);
 }
@@ -2012,7 +2012,7 @@ static int sunxi_nfc_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	nfc->dev = dev;
-	nand_hw_control_init(&nfc->controller);
+	nand_controller_init(&nfc->controller);
 	INIT_LIST_HEAD(&nfc->chips);
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index f2052fae21c7..dd7a26efdf4f 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -83,7 +83,7 @@
 #define MAX_CS		4
 
 struct tango_nfc {
-	struct nand_hw_control hw;
+	struct nand_controller hw;
 	void __iomem *reg_base;
 	void __iomem *mem_base;
 	void __iomem *pbus_base;
@@ -654,7 +654,7 @@ static int tango_nand_probe(struct platform_device *pdev)
 		return PTR_ERR(nfc->chan);
 
 	platform_set_drvdata(pdev, nfc);
-	nand_hw_control_init(&nfc->hw);
+	nand_controller_init(&nfc->hw);
 	nfc->freq_kHz = clk_get_rate(clk) / 1000;
 
 	for_each_child_of_node(pdev->dev.of_node, np) {
diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c
index 56c0aa1bc81f..31c0d9ca9d23 100644
--- a/drivers/mtd/nand/raw/tegra_nand.c
+++ b/drivers/mtd/nand/raw/tegra_nand.c
@@ -164,7 +164,7 @@
 				HWSTATUS_RBSY_VALUE(NAND_STATUS_READY))
 
 struct tegra_nand_controller {
-	struct nand_hw_control controller;
+	struct nand_controller controller;
 	struct device *dev;
 	void __iomem *regs;
 	int irq;
@@ -187,7 +187,7 @@ struct tegra_nand_chip {
 };
 
 static inline struct tegra_nand_controller *
-			to_tegra_ctrl(struct nand_hw_control *hw_ctrl)
+			to_tegra_ctrl(struct nand_controller *hw_ctrl)
 {
 	return container_of(hw_ctrl, struct tegra_nand_controller, controller);
 }
@@ -1136,7 +1136,7 @@ static int tegra_nand_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	ctrl->dev = &pdev->dev;
-	nand_hw_control_init(&ctrl->controller);
+	nand_controller_init(&ctrl->controller);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ctrl->regs = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 5fe9da8b4a0a..8f5bbbac4612 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -73,7 +73,7 @@ struct txx9ndfmc_drvdata {
 	void __iomem *base;
 	unsigned char hold;	/* in gbusclock */
 	unsigned char spw;	/* in gbusclock */
-	struct nand_hw_control hw_control;
+	struct nand_controller hw_control;
 };
 
 static struct platform_device *mtd_to_platdev(struct mtd_info *mtd)
@@ -303,7 +303,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 	dev_info(&dev->dev, "CLK:%ldMHz HOLD:%d SPW:%d\n",
 		 (gbusclk + 500000) / 1000000, hold, spw);
 
-	nand_hw_control_init(&drvdata->hw_control);
+	nand_controller_init(&drvdata->hw_control);
 
 	platform_set_drvdata(dev, drvdata);
 	txx9ndfmc_initialize(dev);
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 598d356de83f..93a2678e0f0d 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -510,20 +510,21 @@ struct nand_id {
 };
 
 /**
- * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices
+ * struct nand_controller - Structure used to describe a NAND controller
+ *
  * @lock:               protection lock
  * @active:		the mtd device which holds the controller currently
  * @wq:			wait queue to sleep on if a NAND operation is in
  *			progress used instead of the per chip wait queue
  *			when a hw controller is available.
  */
-struct nand_hw_control {
+struct nand_controller {
 	spinlock_t lock;
 	struct nand_chip *active;
 	wait_queue_head_t wq;
 };
 
-static inline void nand_hw_control_init(struct nand_hw_control *nfc)
+static inline void nand_controller_init(struct nand_controller *nfc)
 {
 	nfc->active = NULL;
 	spin_lock_init(&nfc->lock);
@@ -1197,7 +1198,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  *			setting the read-retry mode. Mostly needed for MLC NAND.
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buf_align:		minimum buffer alignment required by a platform
- * @hwcontrol:		platform-specific hardware control structure
+ * @dummy_controller:	dummy controller implementation for drivers that can
+ *			only control a single chip
  * @erase:		[REPLACEABLE] erase function
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
  *			data from array to read regs (tR).
@@ -1333,11 +1335,11 @@ struct nand_chip {
 	flstate_t state;
 
 	uint8_t *oob_poi;
-	struct nand_hw_control *controller;
+	struct nand_controller *controller;
 
 	struct nand_ecc_ctrl ecc;
 	unsigned long buf_align;
-	struct nand_hw_control hwcontrol;
+	struct nand_controller dummy_controller;
 
 	uint8_t *bbt;
 	struct nand_bbt_descr *bbt_td;
-- 
cgit v1.2.3


From 05b54c7bac906c443fd0b23ab8954e0560b33e5c Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 19 Jul 2018 01:05:46 +0200
Subject: mtd: rawnand: add hooks that may be called during nand_scan()

In order to remove the limitation that forbids dynamic allocation in
nand_scan_ident(), we must create a path that will be the same for all
controller drivers. The idea is to use nand_scan() instead of the widely
used nand_scan_ident()/nand_scan_tail() couple. In order to achieve
this, controller drivers will need to adjust some parameters between
these two functions depending on the NAND chip wired on them.

This takes the form of two new hooks (->{attach,detach}_chip()) that are
placed in a new nand_controller_ops structure, which is then attached
to the nand_controller object at driver initialization time.
->attach_chip() is called between nand_scan_ident() and
nand_scan_tail(), and ->detach_chip() is called in the error path of
nand_scan() and in nand_cleanup().

Note that some NAND controller drivers don't have a dedicated
nand_controller object and instead rely on the default/dummy one
embedded in nand_chip. If you're in this case and still want to
initialize the controller ops, you'll have to manipulate
chip->dummy_controller directly.

Last but not least, it's worth mentioning that we plan to move some of
the controller related hooks placed in nand_chip into
nand_controller_ops to make the separation between NAND chip and NAND
controller methods clearer.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 32 ++++++++++++++++++++++++++++++--
 include/linux/mtd/rawnand.h      | 21 +++++++++++++++++++++
 2 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index dcdf0f373100..dea41fa25be1 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -6718,6 +6718,20 @@ err_free_buf:
 }
 EXPORT_SYMBOL(nand_scan_tail);
 
+static int nand_attach(struct nand_chip *chip)
+{
+	if (chip->controller->ops && chip->controller->ops->attach_chip)
+		return chip->controller->ops->attach_chip(chip);
+
+	return 0;
+}
+
+static void nand_detach(struct nand_chip *chip)
+{
+	if (chip->controller->ops && chip->controller->ops->detach_chip)
+		chip->controller->ops->detach_chip(chip);
+}
+
 /**
  * nand_scan_with_ids - [NAND Interface] Scan for the NAND device
  * @mtd: MTD device structure
@@ -6731,11 +6745,21 @@ EXPORT_SYMBOL(nand_scan_tail);
 int nand_scan_with_ids(struct mtd_info *mtd, int maxchips,
 		       struct nand_flash_dev *ids)
 {
+	struct nand_chip *chip = mtd_to_nand(mtd);
 	int ret;
 
 	ret = nand_scan_ident(mtd, maxchips, ids);
-	if (!ret)
-		ret = nand_scan_tail(mtd);
+	if (ret)
+		return ret;
+
+	ret = nand_attach(chip);
+	if (ret)
+		return ret;
+
+	ret = nand_scan_tail(mtd);
+	if (ret)
+		nand_detach(chip);
+
 	return ret;
 }
 EXPORT_SYMBOL(nand_scan_with_ids);
@@ -6763,7 +6787,11 @@ void nand_cleanup(struct nand_chip *chip)
 
 	/* Free manufacturer priv data. */
 	nand_manufacturer_cleanup(chip);
+
+	/* Free controller specific allocations after chip identification */
+	nand_detach(chip);
 }
+
 EXPORT_SYMBOL_GPL(nand_cleanup);
 
 /**
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 93a2678e0f0d..fbd6b29cf22c 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -509,6 +509,25 @@ struct nand_id {
 	int len;
 };
 
+/**
+ * struct nand_controller_ops - Controller operations
+ *
+ * @attach_chip: this method is called after the NAND detection phase after
+ *		 flash ID and MTD fields such as erase size, page size and OOB
+ *		 size have been set up. ECC requirements are available if
+ *		 provided by the NAND chip or device tree. Typically used to
+ *		 choose the appropriate ECC configuration and allocate
+ *		 associated resources.
+ *		 This hook is optional.
+ * @detach_chip: free all resources allocated/claimed in
+ *		 nand_controller_ops->attach_chip().
+ *		 This hook is optional.
+ */
+struct nand_controller_ops {
+	int (*attach_chip)(struct nand_chip *chip);
+	void (*detach_chip)(struct nand_chip *chip);
+};
+
 /**
  * struct nand_controller - Structure used to describe a NAND controller
  *
@@ -517,11 +536,13 @@ struct nand_id {
  * @wq:			wait queue to sleep on if a NAND operation is in
  *			progress used instead of the per chip wait queue
  *			when a hw controller is available.
+ * @ops:		NAND controller operations.
  */
 struct nand_controller {
 	spinlock_t lock;
 	struct nand_chip *active;
 	wait_queue_head_t wq;
+	const struct nand_controller_ops *ops;
 };
 
 static inline void nand_controller_init(struct nand_controller *nfc)
-- 
cgit v1.2.3


From 98732da1a08ebb666983d469981a8b994e77d556 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 25 Jul 2018 15:31:50 +0200
Subject: mtd: rawnand: do not export nand_scan_[ident|tail]() anymore

Both nand_scan_ident() and nand_scan_tail() helpers used to be called
directly from controller drivers that needed to tweak some ECC-related
parameters before nand_scan_tail(). This separation prevented dynamic
allocations during the phase of NAND identification, which was
inconvenient.

All controller drivers have been moved to use nand_scan(), in
conjunction with the chip->ecc.[attach|detach]_chip() hooks that
actually do the required tweaking sequence between both ident/tail
calls, allowing programmers to use dynamic allocation as they need all
across the scanning sequence.

Declare nand_scan_[ident|tail]() statically now.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 16 +++++++++-------
 include/linux/mtd/rawnand.h      | 18 ++++++------------
 2 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 42a7a934a17b..34ea44f90fd8 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5924,7 +5924,7 @@ static int nand_dt_init(struct nand_chip *chip)
 }
 
 /**
- * nand_scan_ident - [NAND Interface] Scan for the NAND device
+ * nand_scan_ident - Scan for the NAND device
  * @mtd: MTD device structure
  * @maxchips: number of chips to scan for
  * @table: alternative NAND ID table
@@ -5932,9 +5932,13 @@ static int nand_dt_init(struct nand_chip *chip)
  * This is the first phase of the normal nand_scan() function. It reads the
  * flash ID and sets up MTD fields accordingly.
  *
+ * This helper used to be called directly from controller drivers that needed
+ * to tweak some ECC-related parameters before nand_scan_tail(). This separation
+ * prevented dynamic allocations during this phase which was unconvenient and
+ * as been banned for the benefit of the ->init_ecc()/cleanup_ecc() hooks.
  */
-int nand_scan_ident(struct mtd_info *mtd, int maxchips,
-		    struct nand_flash_dev *table)
+static int nand_scan_ident(struct mtd_info *mtd, int maxchips,
+			   struct nand_flash_dev *table)
 {
 	int i, nand_maf_id, nand_dev_id;
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -6008,7 +6012,6 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 
 	return 0;
 }
-EXPORT_SYMBOL(nand_scan_ident);
 
 static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
 {
@@ -6385,14 +6388,14 @@ static bool nand_ecc_strength_good(struct mtd_info *mtd)
 }
 
 /**
- * nand_scan_tail - [NAND Interface] Scan for the NAND device
+ * nand_scan_tail - Scan for the NAND device
  * @mtd: MTD device structure
  *
  * This is the second phase of the normal nand_scan() function. It fills out
  * all the uninitialized function pointers with the defaults and scans for a
  * bad block table if appropriate.
  */
-int nand_scan_tail(struct mtd_info *mtd)
+static int nand_scan_tail(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
@@ -6716,7 +6719,6 @@ err_free_buf:
 
 	return ret;
 }
-EXPORT_SYMBOL(nand_scan_tail);
 
 static int nand_attach(struct nand_chip *chip)
 {
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index fbd6b29cf22c..71571ed23a20 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -35,17 +35,6 @@ static inline int nand_scan(struct mtd_info *mtd, int max_chips)
 	return nand_scan_with_ids(mtd, max_chips, NULL);
 }
 
-/*
- * Separate phases of nand_scan(), allowing board driver to intervene
- * and override command or ECC setup according to flash type.
- */
-int nand_scan_ident(struct mtd_info *mtd, int max_chips,
-			   struct nand_flash_dev *table);
-int nand_scan_tail(struct mtd_info *mtd);
-
-/* Unregister the MTD device and free resources held by the NAND device */
-void nand_release(struct mtd_info *mtd);
-
 /* Internal helper for board drivers which need to override command function */
 void nand_wait_ready(struct mtd_info *mtd);
 
@@ -1745,8 +1734,13 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
 int nand_write_data_op(struct nand_chip *chip, const void *buf,
 		       unsigned int len, bool force_8bit);
 
-/* Free resources held by the NAND device */
+/*
+ * Free resources held by the NAND device, must be called on error after a
+ * sucessful nand_scan().
+ */
 void nand_cleanup(struct nand_chip *chip);
+/* Unregister the MTD device and calls nand_cleanup() */
+void nand_release(struct mtd_info *mtd);
 
 /* Default extended ID decoding function */
 void nand_decode_ext_id(struct nand_chip *chip);
-- 
cgit v1.2.3


From 2023f1fa216f30b1877d65be2057fbaf0bbd49b3 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 25 Jul 2018 15:31:51 +0200
Subject: mtd: rawnand: allocate model parameter dynamically

Thanks to the migration of all drivers to use nand_scan() and the
related nand_controller_ops, we can now allocate data during the
detection phase. Let's do it first for the NAND model parameter which
is allocated in nand_detect().

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 52 +++++++++++++++++++++++++++++++---------
 include/linux/mtd/rawnand.h      |  2 +-
 2 files changed, 42 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 34ea44f90fd8..00e80781124a 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5225,8 +5225,11 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 
 	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
 	sanitize_string(p->model, sizeof(p->model));
-	strncpy(chip->parameters.model, p->model,
-		sizeof(chip->parameters.model) - 1);
+	chip->parameters.model = kstrdup(p->model, GFP_KERNEL);
+	if (!chip->parameters.model) {
+		ret = -ENOMEM;
+		goto free_onfi_param_page;
+	}
 
 	mtd->writesize = le32_to_cpu(p->byte_per_page);
 
@@ -5356,8 +5359,11 @@ static int nand_flash_detect_jedec(struct nand_chip *chip)
 
 	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
 	sanitize_string(p->model, sizeof(p->model));
-	strncpy(chip->parameters.model, p->model,
-		sizeof(chip->parameters.model) - 1);
+	chip->parameters.model = kstrdup(p->model, GFP_KERNEL);
+	if (!chip->parameters.model) {
+		ret = -ENOMEM;
+		goto free_jedec_param_page;
+	}
 
 	mtd->writesize = le32_to_cpu(p->byte_per_page);
 
@@ -5546,8 +5552,9 @@ static bool find_full_id_nand(struct nand_chip *chip,
 		chip->onfi_timing_mode_default =
 					type->onfi_timing_mode_default;
 
-		strncpy(chip->parameters.model, type->name,
-			sizeof(chip->parameters.model) - 1);
+		chip->parameters.model = kstrdup(type->name, GFP_KERNEL);
+		if (!chip->parameters.model)
+			return false;
 
 		return true;
 	}
@@ -5706,8 +5713,9 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 	if (!type->name)
 		return -ENODEV;
 
-	strncpy(chip->parameters.model, type->name,
-		sizeof(chip->parameters.model) - 1);
+	chip->parameters.model = kstrdup(type->name, GFP_KERNEL);
+	if (!chip->parameters.model)
+		return -ENOMEM;
 
 	chip->chipsize = (uint64_t)type->chipsize << 20;
 
@@ -5737,7 +5745,9 @@ ident_done:
 			mtd->name);
 		pr_warn("bus width %d instead of %d bits\n", busw ? 16 : 8,
 			(chip->options & NAND_BUSWIDTH_16) ? 16 : 8);
-		return -EINVAL;
+		ret = -EINVAL;
+
+		goto free_detect_allocation;
 	}
 
 	nand_decode_bbm_options(chip);
@@ -5774,6 +5784,11 @@ ident_done:
 		(int)(chip->chipsize >> 20), nand_is_slc(chip) ? "SLC" : "MLC",
 		mtd->erasesize >> 10, mtd->writesize, mtd->oobsize);
 	return 0;
+
+free_detect_allocation:
+	kfree(chip->parameters.model);
+
+	return ret;
 }
 
 static const char * const nand_ecc_modes[] = {
@@ -6013,6 +6028,11 @@ static int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 	return 0;
 }
 
+static void nand_scan_ident_cleanup(struct nand_chip *chip)
+{
+	kfree(chip->parameters.model);
+}
+
 static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -6760,11 +6780,18 @@ int nand_scan_with_ids(struct mtd_info *mtd, int maxchips,
 
 	ret = nand_attach(chip);
 	if (ret)
-		return ret;
+		goto cleanup_ident;
 
 	ret = nand_scan_tail(mtd);
 	if (ret)
-		nand_detach(chip);
+		goto detach_chip;
+
+	return 0;
+
+detach_chip:
+	nand_detach(chip);
+cleanup_ident:
+	nand_scan_ident_cleanup(chip);
 
 	return ret;
 }
@@ -6796,6 +6823,9 @@ void nand_cleanup(struct nand_chip *chip)
 
 	/* Free controller specific allocations after chip identification */
 	nand_detach(chip);
+
+	/* Free identification phase allocations */
+	nand_scan_ident_cleanup(chip);
 }
 
 EXPORT_SYMBOL_GPL(nand_cleanup);
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 71571ed23a20..099fa166569a 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -476,7 +476,7 @@ struct onfi_params {
  */
 struct nand_parameters {
 	/* Generic parameters */
-	char model[100];
+	const char *model;
 	bool supports_set_get_features;
 	DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER);
 	DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER);
-- 
cgit v1.2.3


From 54c990775f78113a708f24e15877f6b7bd9a1277 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Mon, 2 Jul 2018 04:21:19 -0400
Subject: macintosh/via-pmu68k: Don't load driver on unsupported hardware

Don't load the via-pmu68k driver on early PowerBooks. The M50753 PMU
device found in those models was never supported by this driver.
Attempting to load the driver usually causes a boot hang.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Michael Schmitz <schmitzmic@gmail.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/m68k/mac/misc.c           | 6 ++----
 drivers/macintosh/via-pmu68k.c | 4 ----
 include/uapi/linux/pmu.h       | 2 +-
 3 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index c68054361615..7ccb799eeb57 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -478,8 +478,7 @@ void mac_poweroff(void)
 		cuda_shutdown();
 #endif
 #ifdef CONFIG_ADB_PMU68K
-	} else if (macintosh_config->adb_type == MAC_ADB_PB1
-		|| macintosh_config->adb_type == MAC_ADB_PB2) {
+	} else if (macintosh_config->adb_type == MAC_ADB_PB2) {
 		pmu_shutdown();
 #endif
 	}
@@ -520,8 +519,7 @@ void mac_reset(void)
 		cuda_restart();
 #endif
 #ifdef CONFIG_ADB_PMU68K
-	} else if (macintosh_config->adb_type == MAC_ADB_PB1
-		|| macintosh_config->adb_type == MAC_ADB_PB2) {
+	} else if (macintosh_config->adb_type == MAC_ADB_PB2) {
 		pmu_restart();
 #endif
 	} else if (CPU_IS_030) {
diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c
index d545ed45e482..bec8e1837d7d 100644
--- a/drivers/macintosh/via-pmu68k.c
+++ b/drivers/macintosh/via-pmu68k.c
@@ -175,9 +175,6 @@ static s8 pmu_data_len[256][2] = {
 int __init find_via_pmu(void)
 {
 	switch (macintosh_config->adb_type) {
-	case MAC_ADB_PB1:
-		pmu_kind = PMU_68K_V1;
-		break;
 	case MAC_ADB_PB2:
 		pmu_kind = PMU_68K_V2;
 		break;
@@ -785,7 +782,6 @@ pmu_enable_backlight(int on)
 	    /* first call: get current backlight value */
 	    if (backlight_level < 0) {
 		switch(pmu_kind) {
-		    case PMU_68K_V1:
 		    case PMU_68K_V2:
 			pmu_request(&req, NULL, 3, PMU_READ_NVRAM, 0x14, 0xe);
 			while (!req.complete)
diff --git a/include/uapi/linux/pmu.h b/include/uapi/linux/pmu.h
index 89cb1acea93a..e128f609281a 100644
--- a/include/uapi/linux/pmu.h
+++ b/include/uapi/linux/pmu.h
@@ -93,7 +93,7 @@ enum {
 	PMU_HEATHROW_BASED,	/* PowerBook G3 series */
 	PMU_PADDINGTON_BASED,	/* 1999 PowerBook G3 */
 	PMU_KEYLARGO_BASED,	/* Core99 motherboard (PMU99) */
-	PMU_68K_V1,		/* 68K PMU, version 1 */
+	PMU_68K_V1,		/* Unused/deprecated */
 	PMU_68K_V2, 		/* 68K PMU, version 2 */
 };
 
-- 
cgit v1.2.3


From ebd722275f9cfc6752e29d2412fa3816ca05764b Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Mon, 2 Jul 2018 04:21:19 -0400
Subject: macintosh/via-pmu: Replace via-pmu68k driver with via-pmu driver

Now that the PowerMac via-pmu driver supports m68k PowerBooks,
switch over to that driver and remove the via-pmu68k driver.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/m68k/configs/mac_defconfig   |   2 +-
 arch/m68k/configs/multi_defconfig |   2 +-
 arch/m68k/mac/config.c            |   2 +-
 arch/m68k/mac/misc.c              |  48 +--
 drivers/macintosh/Kconfig         |  13 +-
 drivers/macintosh/Makefile        |   1 -
 drivers/macintosh/adb.c           |   2 +-
 drivers/macintosh/via-pmu68k.c    | 846 --------------------------------------
 include/uapi/linux/pmu.h          |   2 +-
 9 files changed, 14 insertions(+), 904 deletions(-)
 delete mode 100644 drivers/macintosh/via-pmu68k.c

(limited to 'include')

diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index b52e597899eb..087ca15e32f1 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -369,7 +369,7 @@ CONFIG_TCM_PSCSI=m
 CONFIG_ADB=y
 CONFIG_ADB_MACII=y
 CONFIG_ADB_IOP=y
-CONFIG_ADB_PMU68K=y
+CONFIG_ADB_PMU=y
 CONFIG_ADB_CUDA=y
 CONFIG_INPUT_ADBHID=y
 CONFIG_MAC_EMUMOUSEBTN=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 2a84eeec5b02..3f9334084d55 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -402,7 +402,7 @@ CONFIG_TCM_PSCSI=m
 CONFIG_ADB=y
 CONFIG_ADB_MACII=y
 CONFIG_ADB_IOP=y
-CONFIG_ADB_PMU68K=y
+CONFIG_ADB_PMU=y
 CONFIG_ADB_CUDA=y
 CONFIG_INPUT_ADBHID=y
 CONFIG_MAC_EMUMOUSEBTN=y
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index e522307db47c..92e80cf0d8aa 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -891,7 +891,7 @@ static void __init mac_identify(void)
 #ifdef CONFIG_ADB_CUDA
 	find_via_cuda();
 #endif
-#ifdef CONFIG_ADB_PMU68K
+#ifdef CONFIG_ADB_PMU
 	find_via_pmu();
 #endif
 }
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 7ccb799eeb57..28090a44fa09 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -85,7 +85,7 @@ static void cuda_write_pram(int offset, __u8 data)
 }
 #endif /* CONFIG_ADB_CUDA */
 
-#ifdef CONFIG_ADB_PMU68K
+#ifdef CONFIG_ADB_PMU
 static long pmu_read_time(void)
 {
 	struct adb_request req;
@@ -136,7 +136,7 @@ static void pmu_write_pram(int offset, __u8 data)
 	while (!req.complete)
 		pmu_poll();
 }
-#endif /* CONFIG_ADB_PMU68K */
+#endif /* CONFIG_ADB_PMU */
 
 /*
  * VIA PRAM/RTC access routines
@@ -367,38 +367,6 @@ static void cuda_shutdown(void)
 }
 #endif /* CONFIG_ADB_CUDA */
 
-#ifdef CONFIG_ADB_PMU68K
-
-void pmu_restart(void)
-{
-	struct adb_request req;
-	if (pmu_request(&req, NULL,
-			2, PMU_SET_INTR_MASK, PMU_INT_ADB|PMU_INT_TICK) < 0)
-		return;
-	while (!req.complete)
-		pmu_poll();
-	if (pmu_request(&req, NULL, 1, PMU_RESET) < 0)
-		return;
-	while (!req.complete)
-		pmu_poll();
-}
-
-void pmu_shutdown(void)
-{
-	struct adb_request req;
-	if (pmu_request(&req, NULL,
-			2, PMU_SET_INTR_MASK, PMU_INT_ADB|PMU_INT_TICK) < 0)
-		return;
-	while (!req.complete)
-		pmu_poll();
-	if (pmu_request(&req, NULL, 5, PMU_SHUTDOWN, 'M', 'A', 'T', 'T') < 0)
-		return;
-	while (!req.complete)
-		pmu_poll();
-}
-
-#endif
-
 /*
  *-------------------------------------------------------------------
  * Below this point are the generic routines; they'll dispatch to the
@@ -423,7 +391,7 @@ void mac_pram_read(int offset, __u8 *buffer, int len)
 		func = cuda_read_pram;
 		break;
 #endif
-#ifdef CONFIG_ADB_PMU68K
+#ifdef CONFIG_ADB_PMU
 	case MAC_ADB_PB2:
 		func = pmu_read_pram;
 		break;
@@ -453,7 +421,7 @@ void mac_pram_write(int offset, __u8 *buffer, int len)
 		func = cuda_write_pram;
 		break;
 #endif
-#ifdef CONFIG_ADB_PMU68K
+#ifdef CONFIG_ADB_PMU
 	case MAC_ADB_PB2:
 		func = pmu_write_pram;
 		break;
@@ -477,7 +445,7 @@ void mac_poweroff(void)
 	           macintosh_config->adb_type == MAC_ADB_CUDA) {
 		cuda_shutdown();
 #endif
-#ifdef CONFIG_ADB_PMU68K
+#ifdef CONFIG_ADB_PMU
 	} else if (macintosh_config->adb_type == MAC_ADB_PB2) {
 		pmu_shutdown();
 #endif
@@ -518,7 +486,7 @@ void mac_reset(void)
 	           macintosh_config->adb_type == MAC_ADB_CUDA) {
 		cuda_restart();
 #endif
-#ifdef CONFIG_ADB_PMU68K
+#ifdef CONFIG_ADB_PMU
 	} else if (macintosh_config->adb_type == MAC_ADB_PB2) {
 		pmu_restart();
 #endif
@@ -670,7 +638,7 @@ int mac_hwclk(int op, struct rtc_time *t)
 			now = cuda_read_time();
 			break;
 #endif
-#ifdef CONFIG_ADB_PMU68K
+#ifdef CONFIG_ADB_PMU
 		case MAC_ADB_PB2:
 			now = pmu_read_time();
 			break;
@@ -706,7 +674,7 @@ int mac_hwclk(int op, struct rtc_time *t)
 			cuda_write_time(now);
 			break;
 #endif
-#ifdef CONFIG_ADB_PMU68K
+#ifdef CONFIG_ADB_PMU
 		case MAC_ADB_PB2:
 			pmu_write_time(now);
 			break;
diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 26abae4c899d..47c350cdfb12 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig
@@ -39,17 +39,6 @@ config ADB_IOP
 	  <http://www.angelfire.com/ca2/dev68k/iopdesc.html> to enable direct
 	  support for it, say 'Y' here.
 
-config ADB_PMU68K
-	bool "Include PMU (Powerbook) ADB driver"
-	depends on ADB && MAC
-	help
-	  Say Y here if want your kernel to support the m68k based Powerbooks.
-	  This includes the PowerBook 140, PowerBook 145, PowerBook 150,
-	  PowerBook 160, PowerBook 165, PowerBook 165c, PowerBook 170,
-	  PowerBook 180, PowerBook, 180c, PowerBook 190cs, PowerBook 520,
-	  PowerBook Duo 210, PowerBook Duo 230, PowerBook Duo 250,
-	  PowerBook Duo 270c, PowerBook Duo 280 and PowerBook Duo 280c.
-
 # we want to change this to something like CONFIG_SYSCTRL_CUDA/PMU
 config ADB_CUDA
 	bool "Support for Cuda/Egret based Macs and PowerMacs"
@@ -66,7 +55,7 @@ config ADB_CUDA
 
 config ADB_PMU
 	bool "Support for PMU based PowerMacs and PowerBooks"
-	depends on PPC_PMAC
+	depends on PPC_PMAC || MAC
 	help
 	  On PowerBooks, iBooks, and recent iMacs and Power Macintoshes, the
 	  PMU is an embedded microprocessor whose primary function is to
diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile
index ee803638e595..49819b1b6f20 100644
--- a/drivers/macintosh/Makefile
+++ b/drivers/macintosh/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_PMAC_SMU)		+= smu.o
 obj-$(CONFIG_ADB)		+= adb.o
 obj-$(CONFIG_ADB_MACII)		+= via-macii.o
 obj-$(CONFIG_ADB_IOP)		+= adb-iop.o
-obj-$(CONFIG_ADB_PMU68K)	+= via-pmu68k.o
 obj-$(CONFIG_ADB_MACIO)		+= macio-adb.o
 
 obj-$(CONFIG_THERM_WINDTUNNEL)	+= therm_windtunnel.o
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 4c8097e0e6fe..76e98f0f7a3e 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -65,7 +65,7 @@ static struct adb_driver *adb_driver_list[] = {
 #ifdef CONFIG_ADB_IOP
 	&adb_iop_driver,
 #endif
-#if defined(CONFIG_ADB_PMU) || defined(CONFIG_ADB_PMU68K)
+#ifdef CONFIG_ADB_PMU
 	&via_pmu_driver,
 #endif
 #ifdef CONFIG_ADB_MACIO
diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c
deleted file mode 100644
index bec8e1837d7d..000000000000
--- a/drivers/macintosh/via-pmu68k.c
+++ /dev/null
@@ -1,846 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Device driver for the PMU on 68K-based Apple PowerBooks
- *
- * The VIA (versatile interface adapter) interfaces to the PMU,
- * a 6805 microprocessor core whose primary function is to control
- * battery charging and system power on the PowerBooks.
- * The PMU also controls the ADB (Apple Desktop Bus) which connects
- * to the keyboard and mouse, as well as the non-volatile RAM
- * and the RTC (real time clock) chip.
- *
- * Adapted for 68K PMU by Joshua M. Thompson
- *
- * Based largely on the PowerMac PMU code by Paul Mackerras and
- * Fabio Riccardi.
- *
- * Also based on the PMU driver from MkLinux by Apple Computer, Inc.
- * and the Open Software Foundation, Inc.
- */
-
-#include <stdarg.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/miscdevice.h>
-#include <linux/blkdev.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-
-#include <linux/adb.h>
-#include <linux/pmu.h>
-#include <linux/cuda.h>
-
-#include <asm/macintosh.h>
-#include <asm/macints.h>
-#include <asm/mac_via.h>
-
-#include <asm/pgtable.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-
-/* Misc minor number allocated for /dev/pmu */
-#define PMU_MINOR	154
-
-/* VIA registers - spaced 0x200 bytes apart */
-#define RS		0x200		/* skip between registers */
-#define B		0		/* B-side data */
-#define A		RS		/* A-side data */
-#define DIRB		(2*RS)		/* B-side direction (1=output) */
-#define DIRA		(3*RS)		/* A-side direction (1=output) */
-#define T1CL		(4*RS)		/* Timer 1 ctr/latch (low 8 bits) */
-#define T1CH		(5*RS)		/* Timer 1 counter (high 8 bits) */
-#define T1LL		(6*RS)		/* Timer 1 latch (low 8 bits) */
-#define T1LH		(7*RS)		/* Timer 1 latch (high 8 bits) */
-#define T2CL		(8*RS)		/* Timer 2 ctr/latch (low 8 bits) */
-#define T2CH		(9*RS)		/* Timer 2 counter (high 8 bits) */
-#define SR		(10*RS)		/* Shift register */
-#define ACR		(11*RS)		/* Auxiliary control register */
-#define PCR		(12*RS)		/* Peripheral control register */
-#define IFR		(13*RS)		/* Interrupt flag register */
-#define IER		(14*RS)		/* Interrupt enable register */
-#define ANH		(15*RS)		/* A-side data, no handshake */
-
-/* Bits in B data register: both active low */
-#define TACK		0x02		/* Transfer acknowledge (input) */
-#define TREQ		0x04		/* Transfer request (output) */
-
-/* Bits in ACR */
-#define SR_CTRL		0x1c		/* Shift register control bits */
-#define SR_EXT		0x0c		/* Shift on external clock */
-#define SR_OUT		0x10		/* Shift out if 1 */
-
-/* Bits in IFR and IER */
-#define SR_INT		0x04		/* Shift register full/empty */
-#define CB1_INT		0x10		/* transition on CB1 input */
-
-static enum pmu_state {
-	idle,
-	sending,
-	intack,
-	reading,
-	reading_intr,
-} pmu_state;
-
-static struct adb_request *current_req;
-static struct adb_request *last_req;
-static struct adb_request *req_awaiting_reply;
-static unsigned char interrupt_data[32];
-static unsigned char *reply_ptr;
-static int data_index;
-static int data_len;
-static int adb_int_pending;
-static int pmu_adb_flags;
-static int adb_dev_map;
-static struct adb_request bright_req_1, bright_req_2, bright_req_3;
-static int pmu_kind = PMU_UNKNOWN;
-static int pmu_fully_inited;
-
-int asleep;
-
-static int pmu_probe(void);
-static int pmu_init(void);
-static void pmu_start(void);
-static irqreturn_t pmu_interrupt(int irq, void *arg);
-static int pmu_send_request(struct adb_request *req, int sync);
-static int pmu_autopoll(int devs);
-void pmu_poll(void);
-static int pmu_reset_bus(void);
-
-static int init_pmu(void);
-static void pmu_start(void);
-static void send_byte(int x);
-static void recv_byte(void);
-static void pmu_done(struct adb_request *req);
-static void pmu_handle_data(unsigned char *data, int len);
-static void set_volume(int level);
-static void pmu_enable_backlight(int on);
-static void pmu_set_brightness(int level);
-
-struct adb_driver via_pmu_driver = {
-	.name         = "68K PMU",
-	.probe        = pmu_probe,
-	.init         = pmu_init,
-	.send_request = pmu_send_request,
-	.autopoll     = pmu_autopoll,
-	.poll         = pmu_poll,
-	.reset_bus    = pmu_reset_bus,
-};
-
-/*
- * This table indicates for each PMU opcode:
- * - the number of data bytes to be sent with the command, or -1
- *   if a length byte should be sent,
- * - the number of response bytes which the PMU will return, or
- *   -1 if it will send a length byte.
- */
-static s8 pmu_data_len[256][2] = {
-/*	   0	   1	   2	   3	   4	   5	   6	   7  */
-/*00*/	{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*08*/	{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},
-/*10*/	{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*18*/	{ 0, 1},{ 0, 1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{ 0, 0},
-/*20*/	{-1, 0},{ 0, 0},{ 2, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*28*/	{ 0,-1},{ 0,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{ 0,-1},
-/*30*/	{ 4, 0},{20, 0},{-1, 0},{ 3, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*38*/	{ 0, 4},{ 0,20},{ 2,-1},{ 2, 1},{ 3,-1},{-1,-1},{-1,-1},{ 4, 0},
-/*40*/	{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*48*/	{ 0, 1},{ 0, 1},{-1,-1},{ 1, 0},{ 1, 0},{-1,-1},{-1,-1},{-1,-1},
-/*50*/	{ 1, 0},{ 0, 0},{ 2, 0},{ 2, 0},{-1, 0},{ 1, 0},{ 3, 0},{ 1, 0},
-/*58*/	{ 0, 1},{ 1, 0},{ 0, 2},{ 0, 2},{ 0,-1},{-1,-1},{-1,-1},{-1,-1},
-/*60*/	{ 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*68*/	{ 0, 3},{ 0, 3},{ 0, 2},{ 0, 8},{ 0,-1},{ 0,-1},{-1,-1},{-1,-1},
-/*70*/	{ 1, 0},{ 1, 0},{ 1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*78*/	{ 0,-1},{ 0,-1},{-1,-1},{-1,-1},{-1,-1},{ 5, 1},{ 4, 1},{ 4, 1},
-/*80*/	{ 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*88*/	{ 0, 5},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},
-/*90*/	{ 1, 0},{ 2, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*98*/	{ 0, 1},{ 0, 1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},
-/*a0*/	{ 2, 0},{ 2, 0},{ 2, 0},{ 4, 0},{-1, 0},{ 0, 0},{-1, 0},{-1, 0},
-/*a8*/	{ 1, 1},{ 1, 0},{ 3, 0},{ 2, 0},{-1,-1},{-1,-1},{-1,-1},{-1,-1},
-/*b0*/	{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*b8*/	{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},
-/*c0*/	{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*c8*/	{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},
-/*d0*/	{ 0, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*d8*/	{ 1, 1},{ 1, 1},{-1,-1},{-1,-1},{ 0, 1},{ 0,-1},{-1,-1},{-1,-1},
-/*e0*/	{-1, 0},{ 4, 0},{ 0, 1},{-1, 0},{-1, 0},{ 4, 0},{-1, 0},{-1, 0},
-/*e8*/	{ 3,-1},{-1,-1},{ 0, 1},{-1,-1},{ 0,-1},{-1,-1},{-1,-1},{ 0, 0},
-/*f0*/	{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},{-1, 0},
-/*f8*/	{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},{-1,-1},
-};
-
-int __init find_via_pmu(void)
-{
-	switch (macintosh_config->adb_type) {
-	case MAC_ADB_PB2:
-		pmu_kind = PMU_68K_V2;
-		break;
-	default:
-		pmu_kind = PMU_UNKNOWN;
-		return -ENODEV;
-	}
-
-	pmu_state = idle;
-
-	if (!init_pmu())
-		goto fail_init;
-
-	pr_info("adb: PMU 68K driver v0.5 for Unified ADB\n");
-
-	return 1;
-
-fail_init:
-	pmu_kind = PMU_UNKNOWN;
-	return 0;
-}
-
-static int pmu_probe(void)
-{
-	if (pmu_kind == PMU_UNKNOWN)
-		return -ENODEV;
-	return 0;
-}
-
-static int pmu_init(void)
-{
-	if (pmu_kind == PMU_UNKNOWN)
-		return -ENODEV;
-	return 0;
-}
-
-static int __init via_pmu_start(void)
-{
-	if (pmu_kind == PMU_UNKNOWN)
-		return -ENODEV;
-
-	if (request_irq(IRQ_MAC_ADB_SR, pmu_interrupt, 0, "PMU_SR",
-			pmu_interrupt)) {
-		pr_err("%s: can't get SR irq\n", __func__);
-		return -ENODEV;
-	}
-	if (request_irq(IRQ_MAC_ADB_CL, pmu_interrupt, 0, "PMU_CL",
-			pmu_interrupt)) {
-		pr_err("%s: can't get CL irq\n", __func__);
-		free_irq(IRQ_MAC_ADB_SR, pmu_interrupt);
-		return -ENODEV;
-	}
-
-	pmu_fully_inited = 1;
-
-	/* Enable backlight */
-	pmu_enable_backlight(1);
-
-	return 0;
-}
-
-arch_initcall(via_pmu_start);
-
-static int __init init_pmu(void)
-{
-	int timeout;
-	volatile struct adb_request req;
-
-	via2[B] |= TREQ;				/* negate TREQ */
-	via2[DIRB] = (via2[DIRB] | TREQ) & ~TACK;	/* TACK in, TREQ out */
-
-	pmu_request((struct adb_request *) &req, NULL, 2, PMU_SET_INTR_MASK, PMU_INT_ADB);
-	timeout =  100000;
-	while (!req.complete) {
-		if (--timeout < 0) {
-			printk(KERN_ERR "pmu_init: no response from PMU\n");
-			return -EAGAIN;
-		}
-		udelay(10);
-		pmu_poll();
-	}
-
-	/* ack all pending interrupts */
-	timeout = 100000;
-	interrupt_data[0] = 1;
-	while (interrupt_data[0] || pmu_state != idle) {
-		if (--timeout < 0) {
-			printk(KERN_ERR "pmu_init: timed out acking intrs\n");
-			return -EAGAIN;
-		}
-		if (pmu_state == idle) {
-			adb_int_pending = 1;
-			pmu_interrupt(0, NULL);
-		}
-		pmu_poll();
-		udelay(10);
-	}
-
-	pmu_request((struct adb_request *) &req, NULL, 2, PMU_SET_INTR_MASK,
-			PMU_INT_ADB_AUTO|PMU_INT_SNDBRT|PMU_INT_ADB);
-	timeout =  100000;
-	while (!req.complete) {
-		if (--timeout < 0) {
-			printk(KERN_ERR "pmu_init: no response from PMU\n");
-			return -EAGAIN;
-		}
-		udelay(10);
-		pmu_poll();
-	}
-
-	bright_req_1.complete = 1;
-	bright_req_2.complete = 1;
-	bright_req_3.complete = 1;
-
-	return 1;
-}
-
-int
-pmu_get_model(void)
-{
-	return pmu_kind;
-}
-
-/* Send an ADB command */
-static int 
-pmu_send_request(struct adb_request *req, int sync)
-{
-    int i, ret;
-
-    if (!pmu_fully_inited)
-    {
- 	req->complete = 1;
-   	return -ENXIO;
-   }
-
-    ret = -EINVAL;
-	
-    switch (req->data[0]) {
-    case PMU_PACKET:
-		for (i = 0; i < req->nbytes - 1; ++i)
-			req->data[i] = req->data[i+1];
-		--req->nbytes;
-		if (pmu_data_len[req->data[0]][1] != 0) {
-			req->reply[0] = ADB_RET_OK;
-			req->reply_len = 1;
-		} else
-			req->reply_len = 0;
-		ret = pmu_queue_request(req);
-		break;
-    case CUDA_PACKET:
-		switch (req->data[1]) {
-		case CUDA_GET_TIME:
-			if (req->nbytes != 2)
-				break;
-			req->data[0] = PMU_READ_RTC;
-			req->nbytes = 1;
-			req->reply_len = 3;
-			req->reply[0] = CUDA_PACKET;
-			req->reply[1] = 0;
-			req->reply[2] = CUDA_GET_TIME;
-			ret = pmu_queue_request(req);
-			break;
-		case CUDA_SET_TIME:
-			if (req->nbytes != 6)
-				break;
-			req->data[0] = PMU_SET_RTC;
-			req->nbytes = 5;
-			for (i = 1; i <= 4; ++i)
-				req->data[i] = req->data[i+1];
-			req->reply_len = 3;
-			req->reply[0] = CUDA_PACKET;
-			req->reply[1] = 0;
-			req->reply[2] = CUDA_SET_TIME;
-			ret = pmu_queue_request(req);
-			break;
-		case CUDA_GET_PRAM:
-			if (req->nbytes != 4)
-				break;
-			req->data[0] = PMU_READ_NVRAM;
-			req->data[1] = req->data[2];
-			req->data[2] = req->data[3];
-			req->nbytes = 3;
-			req->reply_len = 3;
-			req->reply[0] = CUDA_PACKET;
-			req->reply[1] = 0;
-			req->reply[2] = CUDA_GET_PRAM;
-			ret = pmu_queue_request(req);
-			break;
-		case CUDA_SET_PRAM:
-			if (req->nbytes != 5)
-				break;
-			req->data[0] = PMU_WRITE_NVRAM;
-			req->data[1] = req->data[2];
-			req->data[2] = req->data[3];
-			req->data[3] = req->data[4];
-			req->nbytes = 4;
-			req->reply_len = 3;
-			req->reply[0] = CUDA_PACKET;
-			req->reply[1] = 0;
-			req->reply[2] = CUDA_SET_PRAM;
-			ret = pmu_queue_request(req);
-			break;
-		}
-		break;
-    case ADB_PACKET:
-		for (i = req->nbytes - 1; i > 1; --i)
-			req->data[i+2] = req->data[i];
-		req->data[3] = req->nbytes - 2;
-		req->data[2] = pmu_adb_flags;
-		/*req->data[1] = req->data[1];*/
-		req->data[0] = PMU_ADB_CMD;
-		req->nbytes += 2;
-		req->reply_expected = 1;
-		req->reply_len = 0;
-		ret = pmu_queue_request(req);
-		break;
-    }
-    if (ret)
-    {
-    	req->complete = 1;
-    	return ret;
-    }
-    	
-    if (sync) {
-	while (!req->complete)
-		pmu_poll();
-    }
-
-    return 0;
-}
-
-/* Enable/disable autopolling */
-static int 
-pmu_autopoll(int devs)
-{
-	struct adb_request req;
-
-	if (!pmu_fully_inited) return -ENXIO;
-
-	if (devs) {
-		adb_dev_map = devs;
-		pmu_request(&req, NULL, 5, PMU_ADB_CMD, 0, 0x86,
-			    adb_dev_map >> 8, adb_dev_map);
-		pmu_adb_flags = 2;
-	} else {
-		pmu_request(&req, NULL, 1, PMU_ADB_POLL_OFF);
-		pmu_adb_flags = 0;
-	}
-	while (!req.complete)
-		pmu_poll();
-	return 0;
-}
-
-/* Reset the ADB bus */
-static int 
-pmu_reset_bus(void)
-{
-	struct adb_request req;
-	long timeout;
-	int save_autopoll = adb_dev_map;
-
-	if (!pmu_fully_inited) return -ENXIO;
-
-	/* anyone got a better idea?? */
-	pmu_autopoll(0);
-
-	req.nbytes = 5;
-	req.done = NULL;
-	req.data[0] = PMU_ADB_CMD;
-	req.data[1] = 0;
-	req.data[2] = 3; /* ADB_BUSRESET ??? */
-	req.data[3] = 0;
-	req.data[4] = 0;
-	req.reply_len = 0;
-	req.reply_expected = 1;
-	if (pmu_queue_request(&req) != 0)
-	{
-		printk(KERN_ERR "pmu_adb_reset_bus: pmu_queue_request failed\n");
-		return -EIO;
-	}
-	while (!req.complete)
-		pmu_poll();
-	timeout = 100000;
-	while (!req.complete) {
-		if (--timeout < 0) {
-			printk(KERN_ERR "pmu_adb_reset_bus (reset): no response from PMU\n");
-			return -EIO;
-		}
-		udelay(10);
-		pmu_poll();
-	}
-
-	if (save_autopoll != 0)
-		pmu_autopoll(save_autopoll);
-		
-	return 0;
-}
-
-/* Construct and send a pmu request */
-int 
-pmu_request(struct adb_request *req, void (*done)(struct adb_request *),
-	    int nbytes, ...)
-{
-	va_list list;
-	int i;
-
-	if (nbytes < 0 || nbytes > 32) {
-		printk(KERN_ERR "pmu_request: bad nbytes (%d)\n", nbytes);
-		req->complete = 1;
-		return -EINVAL;
-	}
-	req->nbytes = nbytes;
-	req->done = done;
-	va_start(list, nbytes);
-	for (i = 0; i < nbytes; ++i)
-		req->data[i] = va_arg(list, int);
-	va_end(list);
-	if (pmu_data_len[req->data[0]][1] != 0) {
-		req->reply[0] = ADB_RET_OK;
-		req->reply_len = 1;
-	} else
-		req->reply_len = 0;
-	req->reply_expected = 0;
-	return pmu_queue_request(req);
-}
-
-int
-pmu_queue_request(struct adb_request *req)
-{
-	unsigned long flags;
-	int nsend;
-
-	if (req->nbytes <= 0) {
-		req->complete = 1;
-		return 0;
-	}
-	nsend = pmu_data_len[req->data[0]][0];
-	if (nsend >= 0 && req->nbytes != nsend + 1) {
-		req->complete = 1;
-		return -EINVAL;
-	}
-
-	req->next = NULL;
-	req->sent = 0;
-	req->complete = 0;
-	local_irq_save(flags);
-
-	if (current_req != 0) {
-		last_req->next = req;
-		last_req = req;
-	} else {
-		current_req = req;
-		last_req = req;
-		if (pmu_state == idle)
-			pmu_start();
-	}
-
-	local_irq_restore(flags);
-	return 0;
-}
-
-static void 
-send_byte(int x)
-{
-	via1[ACR] |= SR_CTRL;
-	via1[SR] = x;
-	via2[B] &= ~TREQ;		/* assert TREQ */
-}
-
-static void 
-recv_byte(void)
-{
-	char c;
-
-	via1[ACR] = (via1[ACR] | SR_EXT) & ~SR_OUT;
-	c = via1[SR];		/* resets SR */
-	via2[B] &= ~TREQ;
-}
-
-static void 
-pmu_start(void)
-{
-	unsigned long flags;
-	struct adb_request *req;
-
-	/* assert pmu_state == idle */
-	/* get the packet to send */
-	local_irq_save(flags);
-	req = current_req;
-	if (req == 0 || pmu_state != idle
-	    || (req->reply_expected && req_awaiting_reply))
-		goto out;
-
-	pmu_state = sending;
-	data_index = 1;
-	data_len = pmu_data_len[req->data[0]][0];
-
-	/* set the shift register to shift out and send a byte */
-	send_byte(req->data[0]);
-
-out:
-	local_irq_restore(flags);
-}
-
-void 
-pmu_poll(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	if (via1[IFR] & SR_INT) {
-		via1[IFR] = SR_INT;
-		pmu_interrupt(IRQ_MAC_ADB_SR, NULL);
-	}
-	if (via1[IFR] & CB1_INT) {
-		via1[IFR] = CB1_INT;
-		pmu_interrupt(IRQ_MAC_ADB_CL, NULL);
-	}
-	local_irq_restore(flags);
-}
-
-static irqreturn_t
-pmu_interrupt(int irq, void *dev_id)
-{
-	struct adb_request *req;
-	int timeout, bite = 0;	/* to prevent compiler warning */
-
-#if 0
-	printk("pmu_interrupt: irq %d state %d acr %02X, b %02X data_index %d/%d adb_int_pending %d\n",
-		irq, pmu_state, (uint) via1[ACR], (uint) via2[B], data_index, data_len, adb_int_pending);
-#endif
-
-	if (irq == IRQ_MAC_ADB_CL) {		/* CB1 interrupt */
-		adb_int_pending = 1;
-	} else if (irq == IRQ_MAC_ADB_SR) {	/* SR interrupt  */
-		if (via2[B] & TACK) {
-			printk(KERN_DEBUG "PMU: SR_INT but ack still high! (%x)\n", via2[B]);
-		}
-
-		/* if reading grab the byte */
-		if ((via1[ACR] & SR_OUT) == 0) bite = via1[SR];
-
-		/* reset TREQ and wait for TACK to go high */
-		via2[B] |= TREQ;
-		timeout = 3200;
-		while (!(via2[B] & TACK)) {
-			if (--timeout < 0) {
-				printk(KERN_ERR "PMU not responding (!ack)\n");
-				goto finish;
-			}
-			udelay(10);
-		}
-
-		switch (pmu_state) {
-		case sending:
-			req = current_req;
-			if (data_len < 0) {
-				data_len = req->nbytes - 1;
-				send_byte(data_len);
-				break;
-			}
-			if (data_index <= data_len) {
-				send_byte(req->data[data_index++]);
-				break;
-			}
-			req->sent = 1;
-			data_len = pmu_data_len[req->data[0]][1];
-			if (data_len == 0) {
-				pmu_state = idle;
-				current_req = req->next;
-				if (req->reply_expected)
-					req_awaiting_reply = req;
-				else
-					pmu_done(req);
-			} else {
-				pmu_state = reading;
-				data_index = 0;
-				reply_ptr = req->reply + req->reply_len;
-				recv_byte();
-			}
-			break;
-
-		case intack:
-			data_index = 0;
-			data_len = -1;
-			pmu_state = reading_intr;
-			reply_ptr = interrupt_data;
-			recv_byte();
-			break;
-
-		case reading:
-		case reading_intr:
-			if (data_len == -1) {
-				data_len = bite;
-				if (bite > 32)
-					printk(KERN_ERR "PMU: bad reply len %d\n",
-					       bite);
-			} else {
-				reply_ptr[data_index++] = bite;
-			}
-			if (data_index < data_len) {
-				recv_byte();
-				break;
-			}
-
-			if (pmu_state == reading_intr) {
-				pmu_handle_data(interrupt_data, data_index);
-			} else {
-				req = current_req;
-				current_req = req->next;
-				req->reply_len += data_index;
-				pmu_done(req);
-			}
-			pmu_state = idle;
-
-			break;
-
-		default:
-			printk(KERN_ERR "pmu_interrupt: unknown state %d?\n",
-			       pmu_state);
-		}
-	}
-finish:
-	if (pmu_state == idle) {
-		if (adb_int_pending) {
-			pmu_state = intack;
-			send_byte(PMU_INT_ACK);
-			adb_int_pending = 0;
-		} else if (current_req) {
-			pmu_start();
-		}
-	}
-
-#if 0
-	printk("pmu_interrupt: exit state %d acr %02X, b %02X data_index %d/%d adb_int_pending %d\n",
-		pmu_state, (uint) via1[ACR], (uint) via2[B], data_index, data_len, adb_int_pending);
-#endif
-	return IRQ_HANDLED;
-}
-
-static void 
-pmu_done(struct adb_request *req)
-{
-	req->complete = 1;
-	if (req->done)
-		(*req->done)(req);
-}
-
-/* Interrupt data could be the result data from an ADB cmd */
-static void 
-pmu_handle_data(unsigned char *data, int len)
-{
-	static int show_pmu_ints = 1;
-
-	asleep = 0;
-	if (len < 1) {
-		adb_int_pending = 0;
-		return;
-	}
-	if (data[0] & PMU_INT_ADB) {
-		if ((data[0] & PMU_INT_ADB_AUTO) == 0) {
-			struct adb_request *req = req_awaiting_reply;
-			if (req == 0) {
-				printk(KERN_ERR "PMU: extra ADB reply\n");
-				return;
-			}
-			req_awaiting_reply = NULL;
-			if (len <= 2)
-				req->reply_len = 0;
-			else {
-				memcpy(req->reply, data + 1, len - 1);
-				req->reply_len = len - 1;
-			}
-			pmu_done(req);
-		} else {
-			adb_input(data+1, len-1, 1);
-		}
-	} else {
-		if (data[0] == 0x08 && len == 3) {
-			/* sound/brightness buttons pressed */
-			pmu_set_brightness(data[1] >> 3);
-			set_volume(data[2]);
-		} else if (show_pmu_ints
-			   && !(data[0] == PMU_INT_TICK && len == 1)) {
-			int i;
-			printk(KERN_DEBUG "pmu intr");
-			for (i = 0; i < len; ++i)
-				printk(" %.2x", data[i]);
-			printk("\n");
-		}
-	}
-}
-
-static int backlight_level = -1;
-static int backlight_enabled = 0;
-
-#define LEVEL_TO_BRIGHT(lev)	((lev) < 1? 0x7f: 0x4a - ((lev) << 1))
-
-static void 
-pmu_enable_backlight(int on)
-{
-	struct adb_request req;
-
-	if (on) {
-	    /* first call: get current backlight value */
-	    if (backlight_level < 0) {
-		switch(pmu_kind) {
-		    case PMU_68K_V2:
-			pmu_request(&req, NULL, 3, PMU_READ_NVRAM, 0x14, 0xe);
-			while (!req.complete)
-				pmu_poll();
-			printk(KERN_DEBUG "pmu: nvram returned bright: %d\n", (int)req.reply[1]);
-			backlight_level = req.reply[1];
-			break;
-		    default:
-		        backlight_enabled = 0;
-		        return;
-		}
-	    }
-	    pmu_request(&req, NULL, 2, PMU_BACKLIGHT_BRIGHT,
-	    	LEVEL_TO_BRIGHT(backlight_level));
-	    while (!req.complete)
-		pmu_poll();
-	}
-	pmu_request(&req, NULL, 2, PMU_POWER_CTRL,
-	    PMU_POW_BACKLIGHT | (on ? PMU_POW_ON : PMU_POW_OFF));
-	while (!req.complete)
-		pmu_poll();
-	backlight_enabled = on;
-}
-
-static void 
-pmu_set_brightness(int level)
-{
-	int bright;
-
-	backlight_level = level;
-	bright = LEVEL_TO_BRIGHT(level);
-	if (!backlight_enabled)
-		return;
-	if (bright_req_1.complete)
-		pmu_request(&bright_req_1, NULL, 2, PMU_BACKLIGHT_BRIGHT,
-		    bright);
-	if (bright_req_2.complete)
-		pmu_request(&bright_req_2, NULL, 2, PMU_POWER_CTRL,
-		    PMU_POW_BACKLIGHT | (bright < 0x7f ? PMU_POW_ON : PMU_POW_OFF));
-}
-
-void 
-pmu_enable_irled(int on)
-{
-	struct adb_request req;
-
-	pmu_request(&req, NULL, 2, PMU_POWER_CTRL, PMU_POW_IRLED |
-	    (on ? PMU_POW_ON : PMU_POW_OFF));
-	while (!req.complete)
-		pmu_poll();
-}
-
-static void 
-set_volume(int level)
-{
-}
-
-int
-pmu_present(void)
-{
-	return (pmu_kind != PMU_UNKNOWN);
-}
diff --git a/include/uapi/linux/pmu.h b/include/uapi/linux/pmu.h
index e128f609281a..97256f90e6df 100644
--- a/include/uapi/linux/pmu.h
+++ b/include/uapi/linux/pmu.h
@@ -94,7 +94,7 @@ enum {
 	PMU_PADDINGTON_BASED,	/* 1999 PowerBook G3 */
 	PMU_KEYLARGO_BASED,	/* Core99 motherboard (PMU99) */
 	PMU_68K_V1,		/* Unused/deprecated */
-	PMU_68K_V2, 		/* 68K PMU, version 2 */
+	PMU_68K_V2,		/* Unused/deprecated */
 };
 
 /* PMU PMU_POWER_EVENTS commands */
-- 
cgit v1.2.3


From 56e6c104e4f151e19eb410004405ec52b4f8605a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 31 Jul 2018 13:06:57 +0200
Subject: console: Replace #if 0 with atomic var 'ignore_console_lock_warning'

The macro WARN_CONSOLE_UNLOCKED prints a warning when a thread enters
the console's critical section without having acquired the console
lock. The console lock can be ignored when debugging the console using
printk, but this makes WARN_CONSOLE_UNLOCKED generate unnecessary
warnings.

The variable ignore_console_lock_warning temporarily disables
WARN_CONSOLE_UNLOCKED. Developers interested in debugging the console's
critical sections should increment it before entering the CS and
decrement it after leaving the CS. Setting ignore_console_lock_warning
is only for debugging. Regular operation should not manipulate it.

Acknoledgements: This patch is based on an earlier version by Steven
Rostedt. The use of atomic increment/decrement was suggested by Petr
Mladek.

Link: http://lkml.kernel.org/r/717e6337-e7a6-7a92-1c1b-8929a25696b5@suse.de
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
[b.zolnierkie: use <linux/atomic.h>]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 include/linux/console.h | 14 +++++++++-----
 kernel/printk/printk.c  |  3 +++
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/console.h b/include/linux/console.h
index f59f3dbca65c..ec9bdb3d7bab 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -14,6 +14,7 @@
 #ifndef _LINUX_CONSOLE_H_
 #define _LINUX_CONSOLE_H_ 1
 
+#include <linux/atomic.h>
 #include <linux/types.h>
 
 struct vc_data;
@@ -201,11 +202,14 @@ void vcs_make_sysfs(int index);
 void vcs_remove_sysfs(int index);
 
 /* Some debug stub to catch some of the obvious races in the VT code */
-#if 1
-#define WARN_CONSOLE_UNLOCKED()	WARN_ON(!is_console_locked() && !oops_in_progress)
-#else
-#define WARN_CONSOLE_UNLOCKED()
-#endif
+#define WARN_CONSOLE_UNLOCKED()						\
+	WARN_ON(!atomic_read(&ignore_console_lock_warning) &&		\
+		!is_console_locked() && !oops_in_progress)
+/*
+ * Increment ignore_console_lock_warning if you need to quiet
+ * WARN_CONSOLE_UNLOCKED() for debugging purposes.
+ */
+extern atomic_t ignore_console_lock_warning;
 
 /* VESA Blanking Levels */
 #define VESA_NO_BLANKING        0
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 3f041e7cbfc9..7d32a86758cd 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -66,6 +66,9 @@ int console_printk[4] = {
 	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
 };
 
+atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
+EXPORT_SYMBOL(ignore_console_lock_warning);
+
 /*
  * Low level drivers may need that to know if they can schedule in
  * their unblank() callback or not. So let's export it.
-- 
cgit v1.2.3


From 11785ef53228d23ec386f5fe4a34601536f0c891 Mon Sep 17 00:00:00 2001
From: Jorge Sanjuan <jorge.sanjuan@codethink.co.uk>
Date: Tue, 31 Jul 2018 13:28:42 +0100
Subject: ALSA: usb-audio: Initial Power Domain support

Thee USB Audio Class 3 (UAC3) introduces Power Domains as a new
feature to let a host turn individual parts of an audio function
to different power states via USB requests. This lets the device
get to know a bit amore about what the host is up to in order to
optimize power consumption efficiently.

The Power Domains are optional for UAC3 configuration but all
UAC3 devices shall include at least one BADD configuration where
the support for Power Domains is compulsory.

This patch adds a set of features/helpers to parse these power
domains and change their status.

Signed-off-by: Jorge Sanjuan <jorge.sanjuan@codethink.co.uk>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio-v3.h |   4 ++
 sound/usb/Makefile           |   1 +
 sound/usb/power.c            | 104 +++++++++++++++++++++++++++++++++++++++++++
 sound/usb/power.h            |  19 ++++++++
 4 files changed, 128 insertions(+)
 create mode 100644 sound/usb/power.c

(limited to 'include')

diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h
index 334bfa6dfb47..6b708434b7f9 100644
--- a/include/linux/usb/audio-v3.h
+++ b/include/linux/usb/audio-v3.h
@@ -447,4 +447,8 @@ struct uac3_interrupt_data_msg {
 /* BADD sample rate is always fixed to 48kHz */
 #define UAC3_BADD_SAMPLING_RATE				48000
 
+/* BADD power domains recovery times in 50us increments */
+#define UAC3_BADD_PD_RECOVER_D1D0			0x0258	/* 30ms */
+#define UAC3_BADD_PD_RECOVER_D2D0			0x1770	/* 300ms */
+
 #endif /* __LINUX_USB_AUDIO_V3_H */
diff --git a/sound/usb/Makefile b/sound/usb/Makefile
index 05440e2df8d9..d330f74c90e6 100644
--- a/sound/usb/Makefile
+++ b/sound/usb/Makefile
@@ -13,6 +13,7 @@ snd-usb-audio-objs := 	card.o \
 			mixer_scarlett.o \
 			mixer_us16x08.o \
 			pcm.o \
+			power.o \
 			proc.o \
 			quirks.o \
 			stream.o
diff --git a/sound/usb/power.c b/sound/usb/power.c
new file mode 100644
index 000000000000..bd303a1ba1b7
--- /dev/null
+++ b/sound/usb/power.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *   UAC3 Power Domain state management functions
+ */
+
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/usb/audio.h>
+#include <linux/usb/audio-v2.h>
+#include <linux/usb/audio-v3.h>
+
+#include "usbaudio.h"
+#include "helper.h"
+#include "power.h"
+
+struct snd_usb_power_domain *
+snd_usb_find_power_domain(struct usb_host_interface *ctrl_iface,
+			  unsigned char id)
+{
+	struct snd_usb_power_domain *pd;
+	void *p;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return NULL;
+
+	p = NULL;
+	while ((p = snd_usb_find_csint_desc(ctrl_iface->extra,
+					    ctrl_iface->extralen,
+					    p, UAC3_POWER_DOMAIN)) != NULL) {
+		struct uac3_power_domain_descriptor *pd_desc = p;
+		int i;
+
+		for (i = 0; i < pd_desc->bNrEntities; i++) {
+			if (pd_desc->baEntityID[i] == id) {
+				pd->pd_id = pd_desc->bPowerDomainID;
+				pd->pd_d1d0_rec =
+					le16_to_cpu(pd_desc->waRecoveryTime1);
+				pd->pd_d2d0_rec =
+					le16_to_cpu(pd_desc->waRecoveryTime2);
+				return pd;
+			}
+		}
+	}
+
+	kfree(pd);
+	return NULL;
+}
+
+int snd_usb_power_domain_set(struct snd_usb_audio *chip,
+			     struct snd_usb_power_domain *pd,
+			     unsigned char state)
+{
+	struct usb_device *dev = chip->dev;
+	unsigned char current_state;
+	int err, idx;
+
+	idx = snd_usb_ctrl_intf(chip) | (pd->pd_id << 8);
+
+	err = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0),
+			      UAC2_CS_CUR,
+			      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+			      UAC3_AC_POWER_DOMAIN_CONTROL << 8, idx,
+			      &current_state, sizeof(current_state));
+	if (err < 0) {
+		dev_err(&dev->dev, "Can't get UAC3 power state for id %d\n",
+			pd->pd_id);
+		return err;
+	}
+
+	if (current_state == state) {
+		dev_dbg(&dev->dev, "UAC3 power domain id %d already in state %d\n",
+			pd->pd_id, state);
+		return 0;
+	}
+
+	err = snd_usb_ctl_msg(chip->dev, usb_sndctrlpipe(chip->dev, 0), UAC2_CS_CUR,
+			      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT,
+			      UAC3_AC_POWER_DOMAIN_CONTROL << 8, idx,
+			      &state, sizeof(state));
+	if (err < 0) {
+		dev_err(&dev->dev, "Can't set UAC3 power state to %d for id %d\n",
+			state, pd->pd_id);
+		return err;
+	}
+
+	if (state == UAC3_PD_STATE_D0) {
+		switch (current_state) {
+		case UAC3_PD_STATE_D2:
+			udelay(pd->pd_d2d0_rec * 50);
+			break;
+		case UAC3_PD_STATE_D1:
+			udelay(pd->pd_d1d0_rec * 50);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	dev_dbg(&dev->dev, "UAC3 power domain id %d change to state %d\n",
+		pd->pd_id, state);
+
+	return 0;
+}
diff --git a/sound/usb/power.h b/sound/usb/power.h
index b2e25f60c5a2..6004231a7c75 100644
--- a/sound/usb/power.h
+++ b/sound/usb/power.h
@@ -2,6 +2,25 @@
 #ifndef __USBAUDIO_POWER_H
 #define __USBAUDIO_POWER_H
 
+struct snd_usb_power_domain {
+	int pd_id;              /* UAC3 Power Domain ID */
+	int pd_d1d0_rec;        /* D1 to D0 recovery time */
+	int pd_d2d0_rec;        /* D2 to D0 recovery time */
+};
+
+enum {
+	UAC3_PD_STATE_D0,
+	UAC3_PD_STATE_D1,
+	UAC3_PD_STATE_D2,
+};
+
+int snd_usb_power_domain_set(struct snd_usb_audio *chip,
+			     struct snd_usb_power_domain *pd,
+			     unsigned char state);
+struct snd_usb_power_domain *
+snd_usb_find_power_domain(struct usb_host_interface *ctrl_iface,
+			  unsigned char id);
+
 #ifdef CONFIG_PM
 int snd_usb_autoresume(struct snd_usb_audio *chip);
 void snd_usb_autosuspend(struct snd_usb_audio *chip);
-- 
cgit v1.2.3


From 08fcf813281ebcf72c69487c1501ad91b7121cdb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 31 Jul 2018 09:10:26 -0600
Subject: t10-pi: provide empty t10_pi_complete() for !CONFIG_BLK_DEV_INTEGRITY

Fixes a link failure whtn BLK_DEV_INTEGRITY isn't defined.

Fixes: 10c41ddd6132 ("block: move dif_prepare/dif_complete functions to block layer")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/t10-pi.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 5a427c289f58..b9626aa7e90c 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -51,8 +51,19 @@ extern const struct blk_integrity_profile t10_pi_type1_crc;
 extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
 extern void t10_pi_prepare(struct request *rq, u8 protection_type);
 extern void t10_pi_complete(struct request *rq, u8 protection_type,
 			    unsigned int intervals);
+#else
+static inline void t10_pi_complete(struct request *rq, u8 protection_type,
+				   unsigned int intervals)
+{
+}
+static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
+{
+}
+#endif
 
 #endif
-- 
cgit v1.2.3


From c3bc8fd637a9623f5c507bd18f9677effbddf584 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Mon, 30 Jul 2018 15:24:23 -0700
Subject: tracing: Centralize preemptirq tracepoints and unify their usage

This patch detaches the preemptirq tracepoints from the tracers and
keeps it separate.

Advantages:
* Lockdep and irqsoff event can now run in parallel since they no longer
have their own calls.

* This unifies the usecase of adding hooks to an irqsoff and irqson
event, and a preemptoff and preempton event.
  3 users of the events exist:
  - Lockdep
  - irqsoff and preemptoff tracers
  - irqs and preempt trace events

The unification cleans up several ifdefs and makes the code in preempt
tracer and irqsoff tracers simpler. It gets rid of all the horrific
ifdeferry around PROVE_LOCKING and makes configuration of the different
users of the tracepoints more easy and understandable. It also gets rid
of the time_* function calls from the lockdep hooks used to call into
the preemptirq tracer which is not needed anymore. The negative delta in
lines of code in this patch is quite large too.

In the patch we introduce a new CONFIG option PREEMPTIRQ_TRACEPOINTS
as a single point for registering probes onto the tracepoints. With
this,
the web of config options for preempt/irq toggle tracepoints and its
users becomes:

 PREEMPT_TRACER   PREEMPTIRQ_EVENTS  IRQSOFF_TRACER PROVE_LOCKING
       |                 |     \         |           |
       \    (selects)    /      \        \ (selects) /
      TRACE_PREEMPT_TOGGLE       ----> TRACE_IRQFLAGS
                      \                  /
                       \ (depends on)   /
                     PREEMPTIRQ_TRACEPOINTS

Other than the performance tests mentioned in the previous patch, I also
ran the locking API test suite. I verified that all tests cases are
passing.

I also injected issues by not registering lockdep probes onto the
tracepoints and I see failures to confirm that the probes are indeed
working.

This series + lockdep probes not registered (just to inject errors):
[    0.000000]      hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]      soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]          hard-safe-A + irqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]          soft-safe-A + irqs-on/12:FAILED|FAILED|  ok  |
[    0.000000]          hard-safe-A + irqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]          soft-safe-A + irqs-on/21:FAILED|FAILED|  ok  |
[    0.000000]     hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
[    0.000000]     soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |

With this series + lockdep probes registered, all locking tests pass:

[    0.000000]      hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]      soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]        sirq-safe-A => hirqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]          hard-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]          soft-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
[    0.000000]          hard-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]          soft-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
[    0.000000]     hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
[    0.000000]     soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |

Link: http://lkml.kernel.org/r/20180730222423.196630-4-joel@joelfernandes.org

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h            |  11 +-
 include/linux/irqflags.h          |  11 +-
 include/linux/lockdep.h           |   8 +-
 include/linux/preempt.h           |   2 +-
 include/trace/events/preemptirq.h |  23 ++--
 init/main.c                       |   5 +-
 kernel/locking/lockdep.c          |  35 +++---
 kernel/sched/core.c               |   2 +-
 kernel/trace/Kconfig              |  22 +++-
 kernel/trace/Makefile             |   2 +-
 kernel/trace/trace_irqsoff.c      | 231 ++++++++++----------------------------
 kernel/trace/trace_preemptirq.c   |  72 ++++++++++++
 12 files changed, 195 insertions(+), 229 deletions(-)
 create mode 100644 kernel/trace/trace_preemptirq.c

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 63af5eb0ff46..a397907e8d72 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -701,16 +701,7 @@ static inline unsigned long get_lock_parent_ip(void)
 	return CALLER_ADDR2;
 }
 
-#ifdef CONFIG_IRQSOFF_TRACER
-  extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
-  extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
-#else
-  static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { }
-  static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { }
-#endif
-
-#if defined(CONFIG_PREEMPT_TRACER) || \
-	(defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
+#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
   extern void trace_preempt_on(unsigned long a0, unsigned long a1);
   extern void trace_preempt_off(unsigned long a0, unsigned long a1);
 #else
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 9700f00bbc04..50edb9cbbd26 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -15,9 +15,16 @@
 #include <linux/typecheck.h>
 #include <asm/irqflags.h>
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+/* Currently trace_softirqs_on/off is used only by lockdep */
+#ifdef CONFIG_PROVE_LOCKING
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
+#else
+# define trace_softirqs_on(ip)	do { } while (0)
+# define trace_softirqs_off(ip)	do { } while (0)
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
   extern void trace_hardirqs_on(void);
   extern void trace_hardirqs_off(void);
 # define trace_hardirq_context(p)	((p)->hardirq_context)
@@ -43,8 +50,6 @@ do {						\
 #else
 # define trace_hardirqs_on()		do { } while (0)
 # define trace_hardirqs_off()		do { } while (0)
-# define trace_softirqs_on(ip)		do { } while (0)
-# define trace_softirqs_off(ip)		do { } while (0)
 # define trace_hardirq_context(p)	0
 # define trace_softirq_context(p)	0
 # define trace_hardirqs_enabled(p)	0
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6fc77d4dbdcd..a8113357ceeb 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -266,7 +266,8 @@ struct held_lock {
 /*
  * Initialization, self-test and debugging-output methods:
  */
-extern void lockdep_info(void);
+extern void lockdep_init(void);
+extern void lockdep_init_early(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
@@ -406,7 +407,8 @@ static inline void lockdep_on(void)
 # define lock_downgrade(l, i)			do { } while (0)
 # define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
-# define lockdep_info()				do { } while (0)
+# define lockdep_init()				do { } while (0)
+# define lockdep_init_early()			do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
@@ -532,7 +534,7 @@ do {								\
 
 #endif /* CONFIG_LOCKDEP */
 
-#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_PROVE_LOCKING
 extern void print_irqtrace_events(struct task_struct *curr);
 #else
 static inline void print_irqtrace_events(struct task_struct *curr)
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 5bd3f151da78..c01813c3fbe9 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -150,7 +150,7 @@
  */
 #define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET)
 
-#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
+#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
 extern void preempt_count_add(int val);
 extern void preempt_count_sub(int val);
 #define preempt_count_dec_and_test() \
diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h
index 9c4eb33c5a1d..9a0d4ceeb166 100644
--- a/include/trace/events/preemptirq.h
+++ b/include/trace/events/preemptirq.h
@@ -1,4 +1,4 @@
-#ifdef CONFIG_PREEMPTIRQ_EVENTS
+#ifdef CONFIG_PREEMPTIRQ_TRACEPOINTS
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM preemptirq
@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(preemptirq_template,
 		  (void *)((unsigned long)(_stext) + __entry->parent_offs))
 );
 
-#ifndef CONFIG_PROVE_LOCKING
+#ifdef CONFIG_TRACE_IRQFLAGS
 DEFINE_EVENT(preemptirq_template, irq_disable,
 	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
 	     TP_ARGS(ip, parent_ip));
@@ -40,9 +40,14 @@ DEFINE_EVENT(preemptirq_template, irq_disable,
 DEFINE_EVENT(preemptirq_template, irq_enable,
 	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
 	     TP_ARGS(ip, parent_ip));
+#else
+#define trace_irq_enable(...)
+#define trace_irq_disable(...)
+#define trace_irq_enable_rcuidle(...)
+#define trace_irq_disable_rcuidle(...)
 #endif
 
-#ifdef CONFIG_DEBUG_PREEMPT
+#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
 DEFINE_EVENT(preemptirq_template, preempt_disable,
 	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
 	     TP_ARGS(ip, parent_ip));
@@ -50,22 +55,22 @@ DEFINE_EVENT(preemptirq_template, preempt_disable,
 DEFINE_EVENT(preemptirq_template, preempt_enable,
 	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
 	     TP_ARGS(ip, parent_ip));
+#else
+#define trace_preempt_enable(...)
+#define trace_preempt_disable(...)
+#define trace_preempt_enable_rcuidle(...)
+#define trace_preempt_disable_rcuidle(...)
 #endif
 
 #endif /* _TRACE_PREEMPTIRQ_H */
 
 #include <trace/define_trace.h>
 
-#endif /* !CONFIG_PREEMPTIRQ_EVENTS */
-
-#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PROVE_LOCKING)
+#else /* !CONFIG_PREEMPTIRQ_TRACEPOINTS */
 #define trace_irq_enable(...)
 #define trace_irq_disable(...)
 #define trace_irq_enable_rcuidle(...)
 #define trace_irq_disable_rcuidle(...)
-#endif
-
-#if !defined(CONFIG_PREEMPTIRQ_EVENTS) || !defined(CONFIG_DEBUG_PREEMPT)
 #define trace_preempt_enable(...)
 #define trace_preempt_disable(...)
 #define trace_preempt_enable_rcuidle(...)
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..44fe43be84c1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -648,6 +648,9 @@ asmlinkage __visible void __init start_kernel(void)
 	profile_init();
 	call_function_init();
 	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
+
+	lockdep_init_early();
+
 	early_boot_irqs_disabled = false;
 	local_irq_enable();
 
@@ -663,7 +666,7 @@ asmlinkage __visible void __init start_kernel(void)
 		panic("Too many boot %s vars at `%s'", panic_later,
 		      panic_param);
 
-	lockdep_info();
+	lockdep_init();
 
 	/*
 	 * Need to run this when irqs are enabled, because it wants
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index fbbb79d5cfa0..03bfaeb9f4e6 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -55,6 +55,7 @@
 
 #include "lockdep_internals.h"
 
+#include <trace/events/preemptirq.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/lock.h>
 
@@ -2839,10 +2840,9 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 	debug_atomic_inc(hardirqs_on_events);
 }
 
-__visible void trace_hardirqs_on_caller(unsigned long ip)
+static void lockdep_hardirqs_on(void *none, unsigned long ignore,
+				unsigned long ip)
 {
-	time_hardirqs_on(CALLER_ADDR0, ip);
-
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
 
@@ -2881,23 +2881,15 @@ __visible void trace_hardirqs_on_caller(unsigned long ip)
 	__trace_hardirqs_on_caller(ip);
 	current->lockdep_recursion = 0;
 }
-EXPORT_SYMBOL(trace_hardirqs_on_caller);
-
-void trace_hardirqs_on(void)
-{
-	trace_hardirqs_on_caller(CALLER_ADDR0);
-}
-EXPORT_SYMBOL(trace_hardirqs_on);
 
 /*
  * Hardirqs were disabled:
  */
-__visible void trace_hardirqs_off_caller(unsigned long ip)
+static void lockdep_hardirqs_off(void *none, unsigned long ignore,
+				 unsigned long ip)
 {
 	struct task_struct *curr = current;
 
-	time_hardirqs_off(CALLER_ADDR0, ip);
-
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
 
@@ -2919,13 +2911,6 @@ __visible void trace_hardirqs_off_caller(unsigned long ip)
 	} else
 		debug_atomic_inc(redundant_hardirqs_off);
 }
-EXPORT_SYMBOL(trace_hardirqs_off_caller);
-
-void trace_hardirqs_off(void)
-{
-	trace_hardirqs_off_caller(CALLER_ADDR0);
-}
-EXPORT_SYMBOL(trace_hardirqs_off);
 
 /*
  * Softirqs will be enabled:
@@ -4330,7 +4315,15 @@ out_restore:
 	raw_local_irq_restore(flags);
 }
 
-void __init lockdep_info(void)
+void __init lockdep_init_early(void)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	register_trace_prio_irq_disable(lockdep_hardirqs_off, NULL, INT_MAX);
+	register_trace_prio_irq_enable(lockdep_hardirqs_on, NULL, INT_MIN);
+#endif
+}
+
+void __init lockdep_init(void)
 {
 	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe365c9a08e9..5de1a4343424 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3189,7 +3189,7 @@ static inline void sched_tick_stop(int cpu) { }
 #endif
 
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
-				defined(CONFIG_PREEMPT_TRACER))
+				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
 /*
  * If the value passed in is equal to the current preempt count
  * then we just disabled preemption. Start timing the latency.
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4d4eb15cc7fd..036cec1fcd24 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,6 +82,15 @@ config RING_BUFFER_ALLOW_SWAP
 	 Allow the use of ring_buffer_swap_cpu.
 	 Adds a very slight overhead to tracing when enabled.
 
+config PREEMPTIRQ_TRACEPOINTS
+	bool
+	depends on TRACE_PREEMPT_TOGGLE || TRACE_IRQFLAGS
+	select TRACING
+	default y
+	help
+	  Create preempt/irq toggle tracepoints if needed, so that other parts
+	  of the kernel can use them to generate or add hooks to them.
+
 # All tracer options should select GENERIC_TRACER. For those options that are
 # enabled by all tracers (context switch and event tracer) they select TRACING.
 # This allows those options to appear when no other tracer is selected. But the
@@ -155,18 +164,20 @@ config FUNCTION_GRAPH_TRACER
 	  the return value. This is done by setting the current return
 	  address on the current task structure into a stack of calls.
 
+config TRACE_PREEMPT_TOGGLE
+	bool
+	help
+	  Enables hooks which will be called when preemption is first disabled,
+	  and last enabled.
 
 config PREEMPTIRQ_EVENTS
 	bool "Enable trace events for preempt and irq disable/enable"
 	select TRACE_IRQFLAGS
-	depends on DEBUG_PREEMPT || !PROVE_LOCKING
-	depends on TRACING
+	select TRACE_PREEMPT_TOGGLE if PREEMPT
+	select GENERIC_TRACER
 	default n
 	help
 	  Enable tracing of disable and enable events for preemption and irqs.
-	  For tracing preempt disable/enable events, DEBUG_PREEMPT must be
-	  enabled. For tracing irq disable/enable events, PROVE_LOCKING must
-	  be disabled.
 
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
@@ -203,6 +214,7 @@ config PREEMPT_TRACER
 	select RING_BUFFER_ALLOW_SWAP
 	select TRACER_SNAPSHOT
 	select TRACER_SNAPSHOT_PER_CPU_SWAP
+	select TRACE_PREEMPT_TOGGLE
 	help
 	  This option measures the time spent in preemption-off critical
 	  sections, with microsecond accuracy.
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 81902a79e049..98d53b39a8ee 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_TRACING_MAP) += tracing_map.o
 obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
-obj-$(CONFIG_PREEMPTIRQ_EVENTS) += trace_irqsoff.o
+obj-$(CONFIG_PREEMPTIRQ_TRACEPOINTS) += trace_preemptirq.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
 obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index f8daa754cce2..770cd30cda40 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -16,7 +16,6 @@
 
 #include "trace.h"
 
-#define CREATE_TRACE_POINTS
 #include <trace/events/preemptirq.h>
 
 #if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER)
@@ -450,66 +449,6 @@ void stop_critical_timings(void)
 }
 EXPORT_SYMBOL_GPL(stop_critical_timings);
 
-#ifdef CONFIG_IRQSOFF_TRACER
-#ifdef CONFIG_PROVE_LOCKING
-void time_hardirqs_on(unsigned long a0, unsigned long a1)
-{
-	if (!preempt_trace() && irq_trace())
-		stop_critical_timing(a0, a1);
-}
-
-void time_hardirqs_off(unsigned long a0, unsigned long a1)
-{
-	if (!preempt_trace() && irq_trace())
-		start_critical_timing(a0, a1);
-}
-
-#else /* !CONFIG_PROVE_LOCKING */
-
-/*
- * We are only interested in hardirq on/off events:
- */
-static inline void tracer_hardirqs_on(void)
-{
-	if (!preempt_trace() && irq_trace())
-		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
-}
-
-static inline void tracer_hardirqs_off(void)
-{
-	if (!preempt_trace() && irq_trace())
-		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
-}
-
-static inline void tracer_hardirqs_on_caller(unsigned long caller_addr)
-{
-	if (!preempt_trace() && irq_trace())
-		stop_critical_timing(CALLER_ADDR0, caller_addr);
-}
-
-static inline void tracer_hardirqs_off_caller(unsigned long caller_addr)
-{
-	if (!preempt_trace() && irq_trace())
-		start_critical_timing(CALLER_ADDR0, caller_addr);
-}
-
-#endif /* CONFIG_PROVE_LOCKING */
-#endif /*  CONFIG_IRQSOFF_TRACER */
-
-#ifdef CONFIG_PREEMPT_TRACER
-static inline void tracer_preempt_on(unsigned long a0, unsigned long a1)
-{
-	if (preempt_trace() && !irq_trace())
-		stop_critical_timing(a0, a1);
-}
-
-static inline void tracer_preempt_off(unsigned long a0, unsigned long a1)
-{
-	if (preempt_trace() && !irq_trace())
-		start_critical_timing(a0, a1);
-}
-#endif /* CONFIG_PREEMPT_TRACER */
-
 #ifdef CONFIG_FUNCTION_TRACER
 static bool function_enabled;
 
@@ -659,15 +598,34 @@ static void irqsoff_tracer_stop(struct trace_array *tr)
 }
 
 #ifdef CONFIG_IRQSOFF_TRACER
+/*
+ * We are only interested in hardirq on/off events:
+ */
+static void tracer_hardirqs_on(void *none, unsigned long a0, unsigned long a1)
+{
+	if (!preempt_trace() && irq_trace())
+		stop_critical_timing(a0, a1);
+}
+
+static void tracer_hardirqs_off(void *none, unsigned long a0, unsigned long a1)
+{
+	if (!preempt_trace() && irq_trace())
+		start_critical_timing(a0, a1);
+}
+
 static int irqsoff_tracer_init(struct trace_array *tr)
 {
 	trace_type = TRACER_IRQS_OFF;
 
+	register_trace_irq_disable(tracer_hardirqs_off, NULL);
+	register_trace_irq_enable(tracer_hardirqs_on, NULL);
 	return __irqsoff_tracer_init(tr);
 }
 
 static void irqsoff_tracer_reset(struct trace_array *tr)
 {
+	unregister_trace_irq_disable(tracer_hardirqs_off, NULL);
+	unregister_trace_irq_enable(tracer_hardirqs_on, NULL);
 	__irqsoff_tracer_reset(tr);
 }
 
@@ -690,21 +648,34 @@ static struct tracer irqsoff_tracer __read_mostly =
 	.allow_instances = true,
 	.use_max_tr	= true,
 };
-# define register_irqsoff(trace) register_tracer(&trace)
-#else
-# define register_irqsoff(trace) do { } while (0)
-#endif
+#endif /*  CONFIG_IRQSOFF_TRACER */
 
 #ifdef CONFIG_PREEMPT_TRACER
+static void tracer_preempt_on(void *none, unsigned long a0, unsigned long a1)
+{
+	if (preempt_trace() && !irq_trace())
+		stop_critical_timing(a0, a1);
+}
+
+static void tracer_preempt_off(void *none, unsigned long a0, unsigned long a1)
+{
+	if (preempt_trace() && !irq_trace())
+		start_critical_timing(a0, a1);
+}
+
 static int preemptoff_tracer_init(struct trace_array *tr)
 {
 	trace_type = TRACER_PREEMPT_OFF;
 
+	register_trace_preempt_disable(tracer_preempt_off, NULL);
+	register_trace_preempt_enable(tracer_preempt_on, NULL);
 	return __irqsoff_tracer_init(tr);
 }
 
 static void preemptoff_tracer_reset(struct trace_array *tr)
 {
+	unregister_trace_preempt_disable(tracer_preempt_off, NULL);
+	unregister_trace_preempt_enable(tracer_preempt_on, NULL);
 	__irqsoff_tracer_reset(tr);
 }
 
@@ -727,23 +698,29 @@ static struct tracer preemptoff_tracer __read_mostly =
 	.allow_instances = true,
 	.use_max_tr	= true,
 };
-# define register_preemptoff(trace) register_tracer(&trace)
-#else
-# define register_preemptoff(trace) do { } while (0)
-#endif
+#endif /* CONFIG_PREEMPT_TRACER */
 
-#if defined(CONFIG_IRQSOFF_TRACER) && \
-	defined(CONFIG_PREEMPT_TRACER)
+#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
 
 static int preemptirqsoff_tracer_init(struct trace_array *tr)
 {
 	trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
 
+	register_trace_irq_disable(tracer_hardirqs_off, NULL);
+	register_trace_irq_enable(tracer_hardirqs_on, NULL);
+	register_trace_preempt_disable(tracer_preempt_off, NULL);
+	register_trace_preempt_enable(tracer_preempt_on, NULL);
+
 	return __irqsoff_tracer_init(tr);
 }
 
 static void preemptirqsoff_tracer_reset(struct trace_array *tr)
 {
+	unregister_trace_irq_disable(tracer_hardirqs_off, NULL);
+	unregister_trace_irq_enable(tracer_hardirqs_on, NULL);
+	unregister_trace_preempt_disable(tracer_preempt_off, NULL);
+	unregister_trace_preempt_enable(tracer_preempt_on, NULL);
+
 	__irqsoff_tracer_reset(tr);
 }
 
@@ -766,115 +743,21 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
 	.allow_instances = true,
 	.use_max_tr	= true,
 };
-
-# define register_preemptirqsoff(trace) register_tracer(&trace)
-#else
-# define register_preemptirqsoff(trace) do { } while (0)
 #endif
 
 __init static int init_irqsoff_tracer(void)
 {
-	register_irqsoff(irqsoff_tracer);
-	register_preemptoff(preemptoff_tracer);
-	register_preemptirqsoff(preemptirqsoff_tracer);
-
-	return 0;
-}
-core_initcall(init_irqsoff_tracer);
-#endif /* IRQSOFF_TRACER || PREEMPTOFF_TRACER */
-
-#ifndef CONFIG_IRQSOFF_TRACER
-static inline void tracer_hardirqs_on(void) { }
-static inline void tracer_hardirqs_off(void) { }
-static inline void tracer_hardirqs_on_caller(unsigned long caller_addr) { }
-static inline void tracer_hardirqs_off_caller(unsigned long caller_addr) { }
+#ifdef CONFIG_IRQSOFF_TRACER
+	register_tracer(&irqsoff_tracer);
 #endif
-
-#ifndef CONFIG_PREEMPT_TRACER
-static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { }
-static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { }
+#ifdef CONFIG_PREEMPT_TRACER
+	register_tracer(&preemptoff_tracer);
 #endif
-
-#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING)
-/* Per-cpu variable to prevent redundant calls when IRQs already off */
-static DEFINE_PER_CPU(int, tracing_irq_cpu);
-
-void trace_hardirqs_on(void)
-{
-	if (!this_cpu_read(tracing_irq_cpu))
-		return;
-
-	trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
-	tracer_hardirqs_on();
-
-	this_cpu_write(tracing_irq_cpu, 0);
-}
-EXPORT_SYMBOL(trace_hardirqs_on);
-
-void trace_hardirqs_off(void)
-{
-	if (this_cpu_read(tracing_irq_cpu))
-		return;
-
-	this_cpu_write(tracing_irq_cpu, 1);
-
-	trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
-	tracer_hardirqs_off();
-}
-EXPORT_SYMBOL(trace_hardirqs_off);
-
-__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
-{
-	if (!this_cpu_read(tracing_irq_cpu))
-		return;
-
-	trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
-	tracer_hardirqs_on_caller(caller_addr);
-
-	this_cpu_write(tracing_irq_cpu, 0);
-}
-EXPORT_SYMBOL(trace_hardirqs_on_caller);
-
-__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
-{
-	if (this_cpu_read(tracing_irq_cpu))
-		return;
-
-	this_cpu_write(tracing_irq_cpu, 1);
-
-	trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
-	tracer_hardirqs_off_caller(caller_addr);
-}
-EXPORT_SYMBOL(trace_hardirqs_off_caller);
-
-/*
- * Stubs:
- */
-
-void trace_softirqs_on(unsigned long ip)
-{
-}
-
-void trace_softirqs_off(unsigned long ip)
-{
-}
-
-inline void print_irqtrace_events(struct task_struct *curr)
-{
-}
+#if defined(CONFIG_IRQSOFF_TRACER) && defined(CONFIG_PREEMPT_TRACER)
+	register_tracer(&preemptirqsoff_tracer);
 #endif
 
-#if defined(CONFIG_PREEMPT_TRACER) || \
-	(defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
-void trace_preempt_on(unsigned long a0, unsigned long a1)
-{
-	trace_preempt_enable_rcuidle(a0, a1);
-	tracer_preempt_on(a0, a1);
-}
-
-void trace_preempt_off(unsigned long a0, unsigned long a1)
-{
-	trace_preempt_disable_rcuidle(a0, a1);
-	tracer_preempt_off(a0, a1);
+	return 0;
 }
-#endif
+core_initcall(init_irqsoff_tracer);
+#endif /* IRQSOFF_TRACER || PREEMPTOFF_TRACER */
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
new file mode 100644
index 000000000000..e76b78bf258e
--- /dev/null
+++ b/kernel/trace/trace_preemptirq.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * preemptoff and irqoff tracepoints
+ *
+ * Copyright (C) Joel Fernandes (Google) <joel@joelfernandes.org>
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/preemptirq.h>
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+/* Per-cpu variable to prevent redundant calls when IRQs already off */
+static DEFINE_PER_CPU(int, tracing_irq_cpu);
+
+void trace_hardirqs_on(void)
+{
+	if (!this_cpu_read(tracing_irq_cpu))
+		return;
+
+	trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+	this_cpu_write(tracing_irq_cpu, 0);
+}
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+void trace_hardirqs_off(void)
+{
+	if (this_cpu_read(tracing_irq_cpu))
+		return;
+
+	this_cpu_write(tracing_irq_cpu, 1);
+	trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+}
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+__visible void trace_hardirqs_on_caller(unsigned long caller_addr)
+{
+	if (!this_cpu_read(tracing_irq_cpu))
+		return;
+
+	trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
+	this_cpu_write(tracing_irq_cpu, 0);
+}
+EXPORT_SYMBOL(trace_hardirqs_on_caller);
+
+__visible void trace_hardirqs_off_caller(unsigned long caller_addr)
+{
+	if (this_cpu_read(tracing_irq_cpu))
+		return;
+
+	this_cpu_write(tracing_irq_cpu, 1);
+	trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
+}
+EXPORT_SYMBOL(trace_hardirqs_off_caller);
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+#ifdef CONFIG_TRACE_PREEMPT_TOGGLE
+
+void trace_preempt_on(unsigned long a0, unsigned long a1)
+{
+	trace_preempt_enable_rcuidle(a0, a1);
+}
+
+void trace_preempt_off(unsigned long a0, unsigned long a1)
+{
+	trace_preempt_disable_rcuidle(a0, a1);
+}
+#endif
-- 
cgit v1.2.3


From 44bda4b7d26e9fffed6d7152d98a2e9edaeb2a76 Mon Sep 17 00:00:00 2001
From: Hari Vyas <hari.vyas@broadcom.com>
Date: Tue, 3 Jul 2018 14:35:41 +0530
Subject: PCI: Fix is_added/is_busmaster race condition

When a PCI device is detected, pdev->is_added is set to 1 and proc and
sysfs entries are created.

When the device is removed, pdev->is_added is checked for one and then
device is detached with clearing of proc and sys entries and at end,
pdev->is_added is set to 0.

is_added and is_busmaster are bit fields in pci_dev structure sharing same
memory location.

A strange issue was observed with multiple removal and rescan of a PCIe
NVMe device using sysfs commands where is_added flag was observed as zero
instead of one while removing device and proc,sys entries are not cleared.
This causes issue in later device addition with warning message
"proc_dir_entry" already registered.

Debugging revealed a race condition between the PCI core setting the
is_added bit in pci_bus_add_device() and the NVMe driver reset work-queue
setting the is_busmaster bit in pci_set_master().  As these fields are not
handled atomically, that clears the is_added bit.

Move the is_added bit to a separate private flag variable and use atomic
functions to set and retrieve the device addition state.  This avoids the
race because is_added no longer shares a memory location with is_busmaster.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200283
Signed-off-by: Hari Vyas <hari.vyas@broadcom.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/pci-common.c          |  4 +++-
 arch/powerpc/platforms/powernv/pci-ioda.c |  3 ++-
 arch/powerpc/platforms/pseries/setup.c    |  3 ++-
 drivers/pci/bus.c                         |  6 +++---
 drivers/pci/hotplug/acpiphp_glue.c        |  2 +-
 drivers/pci/pci.h                         | 11 +++++++++++
 drivers/pci/probe.c                       |  4 ++--
 drivers/pci/remove.c                      |  5 +++--
 include/linux/pci.h                       |  1 -
 9 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index fe9733ffffaa..471aac313b89 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -42,6 +42,8 @@
 #include <asm/ppc-pci.h>
 #include <asm/eeh.h>
 
+#include "../../../drivers/pci/pci.h"
+
 /* hose_spinlock protects accesses to the the phb_bitmap. */
 static DEFINE_SPINLOCK(hose_spinlock);
 LIST_HEAD(hose_list);
@@ -1014,7 +1016,7 @@ void pcibios_setup_bus_devices(struct pci_bus *bus)
 		/* Cardbus can call us to add new devices to a bus, so ignore
 		 * those who are already fully discovered
 		 */
-		if (dev->is_added)
+		if (pci_dev_is_added(dev))
 			continue;
 
 		pcibios_setup_device(dev);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5bd0eb6681bc..70b2e1e0f23c 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -46,6 +46,7 @@
 
 #include "powernv.h"
 #include "pci.h"
+#include "../../../../drivers/pci/pci.h"
 
 #define PNV_IODA1_M64_NUM	16	/* Number of M64 BARs	*/
 #define PNV_IODA1_M64_SEGS	8	/* Segments per M64 BAR	*/
@@ -3138,7 +3139,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
 	struct pci_dn *pdn;
 	int mul, total_vfs;
 
-	if (!pdev->is_physfn || pdev->is_added)
+	if (!pdev->is_physfn || pci_dev_is_added(pdev))
 		return;
 
 	pdn = pci_get_pdn(pdev);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 139f0af6c3d9..8a4868a3964b 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -71,6 +71,7 @@
 #include <asm/security_features.h>
 
 #include "pseries.h"
+#include "../../../../drivers/pci/pci.h"
 
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
@@ -664,7 +665,7 @@ static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
 	const int *indexes;
 	struct device_node *dn = pci_device_to_OF_node(pdev);
 
-	if (!pdev->is_physfn || pdev->is_added)
+	if (!pdev->is_physfn || pci_dev_is_added(pdev))
 		return;
 	/*Firmware must support open sriov otherwise dont configure*/
 	indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 35b7fc87eac5..5cb40b2518f9 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -330,7 +330,7 @@ void pci_bus_add_device(struct pci_dev *dev)
 		return;
 	}
 
-	dev->is_added = 1;
+	pci_dev_assign_added(dev, true);
 }
 EXPORT_SYMBOL_GPL(pci_bus_add_device);
 
@@ -347,14 +347,14 @@ void pci_bus_add_devices(const struct pci_bus *bus)
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Skip already-added devices */
-		if (dev->is_added)
+		if (pci_dev_is_added(dev))
 			continue;
 		pci_bus_add_device(dev);
 	}
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Skip if device attach failed */
-		if (!dev->is_added)
+		if (!pci_dev_is_added(dev))
 			continue;
 		child = dev->subordinate;
 		if (child)
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 3a17b290df5d..ef0b1b6ba86f 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -509,7 +509,7 @@ static void enable_slot(struct acpiphp_slot *slot)
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Assume that newly added devices are powered on already. */
-		if (!dev->is_added)
+		if (!pci_dev_is_added(dev))
 			dev->current_state = PCI_D0;
 	}
 
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 882f1f9596df..08817253c8a2 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -288,6 +288,7 @@ struct pci_sriov {
 
 /* pci_dev priv_flags */
 #define PCI_DEV_DISCONNECTED 0
+#define PCI_DEV_ADDED 1
 
 static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
 {
@@ -300,6 +301,16 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev)
 	return test_bit(PCI_DEV_DISCONNECTED, &dev->priv_flags);
 }
 
+static inline void pci_dev_assign_added(struct pci_dev *dev, bool added)
+{
+	assign_bit(PCI_DEV_ADDED, &dev->priv_flags, added);
+}
+
+static inline bool pci_dev_is_added(const struct pci_dev *dev)
+{
+	return test_bit(PCI_DEV_ADDED, &dev->priv_flags);
+}
+
 #ifdef CONFIG_PCI_ATS
 void pci_restore_ats_state(struct pci_dev *dev);
 #else
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ac876e32de4b..611adcd9c169 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2433,13 +2433,13 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
 	dev = pci_scan_single_device(bus, devfn);
 	if (!dev)
 		return 0;
-	if (!dev->is_added)
+	if (!pci_dev_is_added(dev))
 		nr++;
 
 	for (fn = next_fn(bus, dev, 0); fn > 0; fn = next_fn(bus, dev, fn)) {
 		dev = pci_scan_single_device(bus, devfn + fn);
 		if (dev) {
-			if (!dev->is_added)
+			if (!pci_dev_is_added(dev))
 				nr++;
 			dev->multifunction = 1;
 		}
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 6f072eae4f7a..5e3d0dced2b8 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -19,11 +19,12 @@ static void pci_stop_dev(struct pci_dev *dev)
 {
 	pci_pme_active(dev, false);
 
-	if (dev->is_added) {
+	if (pci_dev_is_added(dev)) {
 		device_release_driver(&dev->dev);
 		pci_proc_detach_device(dev);
 		pci_remove_sysfs_dev_files(dev);
-		dev->is_added = 0;
+
+		pci_dev_assign_added(dev, false);
 	}
 
 	if (dev->bus->self)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index abd5d5e17aee..c133ccfa002e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -368,7 +368,6 @@ struct pci_dev {
 	unsigned int	transparent:1;		/* Subtractive decode bridge */
 	unsigned int	multifunction:1;	/* Multi-function device */
 
-	unsigned int	is_added:1;
 	unsigned int	is_busmaster:1;		/* Is busmaster */
 	unsigned int	no_msi:1;		/* May not use MSI */
 	unsigned int	no_64bit_msi:1; 	/* May only use 32-bit MSIs */
-- 
cgit v1.2.3


From c29c2ebd2ae0066c026045a21aa33ccbfcd8bb3c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 30 Jul 2018 20:43:51 -0700
Subject: net: update real_num_rx_queues even when !CONFIG_SYSFS

We used to depend on real_num_rx_queues as a upper bound for sanity
checks.  For AF_XDP socket validation it's useful if the check behaves
the same regardless of CONFIG_SYSFS setting.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9c917467a2c7..3bf7e93c9e96 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3431,8 +3431,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq);
 int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq);
 #else
 static inline int netif_set_real_num_rx_queues(struct net_device *dev,
-						unsigned int rxq)
+						unsigned int rxqs)
 {
+	dev->real_num_rx_queues = rxqs;
 	return 0;
 }
 #endif
-- 
cgit v1.2.3


From 84c6b86875e01a08a0daa6fdd4a01b36bf0bf0b2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 30 Jul 2018 20:43:53 -0700
Subject: xsk: don't allow umem replace at stack level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently drivers have to check if they already have a umem
installed for a given queue and return an error if so.  Make
better use of XDP_QUERY_XSK_UMEM and move this functionality
to the core.

We need to keep rtnl across the calls now.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 ++++---
 net/xdp/xdp_umem.c        | 37 ++++++++++++++++++++++++++++---------
 2 files changed, 32 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3bf7e93c9e96..282e2e95ad5b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -872,10 +872,10 @@ struct netdev_bpf {
 		struct {
 			struct bpf_offloaded_map *offmap;
 		};
-		/* XDP_SETUP_XSK_UMEM */
+		/* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */
 		struct {
-			struct xdp_umem *umem;
-			u16 queue_id;
+			struct xdp_umem *umem; /* out for query*/
+			u16 queue_id; /* in for query */
 		} xsk;
 	};
 };
@@ -3568,6 +3568,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags);
 u32 __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op,
 		    enum bpf_netdev_command cmd);
+int xdp_umem_query(struct net_device *dev, u16 queue_id);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index c199d66b5f3f..911ca6d3cb5a 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -11,6 +11,8 @@
 #include <linux/slab.h>
 #include <linux/bpf.h>
 #include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
 
 #include "xdp_umem.h"
 #include "xsk_queue.h"
@@ -40,6 +42,21 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 	}
 }
 
+int xdp_umem_query(struct net_device *dev, u16 queue_id)
+{
+	struct netdev_bpf bpf;
+
+	ASSERT_RTNL();
+
+	memset(&bpf, 0, sizeof(bpf));
+	bpf.command = XDP_QUERY_XSK_UMEM;
+	bpf.xsk.queue_id = queue_id;
+
+	if (!dev->netdev_ops->ndo_bpf)
+		return 0;
+	return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
+}
+
 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
 			u32 queue_id, u16 flags)
 {
@@ -62,28 +79,30 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
 	bpf.command = XDP_QUERY_XSK_UMEM;
 
 	rtnl_lock();
-	err = dev->netdev_ops->ndo_bpf(dev, &bpf);
-	rtnl_unlock();
-
-	if (err)
-		return force_zc ? -ENOTSUPP : 0;
+	err = xdp_umem_query(dev, queue_id);
+	if (err) {
+		err = err < 0 ? -ENOTSUPP : -EBUSY;
+		goto err_rtnl_unlock;
+	}
 
 	bpf.command = XDP_SETUP_XSK_UMEM;
 	bpf.xsk.umem = umem;
 	bpf.xsk.queue_id = queue_id;
 
-	rtnl_lock();
 	err = dev->netdev_ops->ndo_bpf(dev, &bpf);
-	rtnl_unlock();
-
 	if (err)
-		return force_zc ? err : 0; /* fail or fallback */
+		goto err_rtnl_unlock;
+	rtnl_unlock();
 
 	dev_hold(dev);
 	umem->dev = dev;
 	umem->queue_id = queue_id;
 	umem->zc = true;
 	return 0;
+
+err_rtnl_unlock:
+	rtnl_unlock();
+	return force_zc ? err : 0; /* fail or fallback */
 }
 
 static void xdp_umem_clear_dev(struct xdp_umem *umem)
-- 
cgit v1.2.3


From 016583d7030cec9b69e0d55269a5967f4ee871d2 Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Tue, 31 Jul 2018 10:10:51 -0400
Subject: sunrpc: Change rpc_print_iostats to rpc_clnt_show_stats and handle
 rpc_clnt clones

The existing rpc_print_iostats has a few shortcomings.  First, the naming
is not consistent with other functions in the kernel that display stats.
Second, it is really displaying stats for an rpc_clnt structure as it
displays both xprt stats and per-op stats.  Third, it does not handle
rpc_clnt clones, which is important for the one in-kernel tree caller
of this function, the NFS client's nfs_show_stats function.

Fix all of the above by renaming the rpc_print_iostats to
rpc_clnt_show_stats and looping through any rpc_clnt clones via
cl_parent.

Once this interface is fixed, this addresses a problem with NFSv4.
Before this patch, the /proc/self/mountstats always showed incorrect
counts for NFSv4 lease and session related opcodes such as SEQUENCE,
RENEW, SETCLIENTID, CREATE_SESSION, etc.  These counts were always 0
even though many ops would go over the wire.  The reason for this is
there are multiple rpc_clnt structures allocated for any given NFSv4
mount, and inside nfs_show_stats() we callled into rpc_print_iostats()
which only handled one of them, nfs_server->client.  Fix these counts
by calling sunrpc's new rpc_clnt_show_stats() function, which handles
cloned rpc_clnt structs and prints the stats together.

Note that one side-effect of the above is that multiple mounts from
the same NFS server will show identical counts in the above ops due
to the fact the one rpc_clnt (representing the NFSv4 client state)
is shared across mounts.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/super.c                 |  2 +-
 include/linux/sunrpc/metrics.h |  4 ++--
 net/sunrpc/stats.c             | 17 ++++++++++++-----
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5e470e233c83..bdf39fa1bfbc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -884,7 +884,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root)
 #endif
 	seq_printf(m, "\n");
 
-	rpc_print_iostats(m, nfss->client);
+	rpc_clnt_show_stats(m, nfss->client);
 
 	return 0;
 }
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
index 9baed7b355b2..1b3751327575 100644
--- a/include/linux/sunrpc/metrics.h
+++ b/include/linux/sunrpc/metrics.h
@@ -82,7 +82,7 @@ void			rpc_count_iostats(const struct rpc_task *,
 					  struct rpc_iostats *);
 void			rpc_count_iostats_metrics(const struct rpc_task *,
 					  struct rpc_iostats *);
-void			rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
+void			rpc_clnt_show_stats(struct seq_file *, struct rpc_clnt *);
 void			rpc_free_iostats(struct rpc_iostats *);
 
 #else  /*  CONFIG_PROC_FS  */
@@ -95,7 +95,7 @@ static inline void rpc_count_iostats_metrics(const struct rpc_task *task,
 {
 }
 
-static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {}
+static inline void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) {}
 static inline void rpc_free_iostats(struct rpc_iostats *stats) {}
 
 #endif  /*  CONFIG_PROC_FS  */
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 32adddd7fb78..ffae7c2245b1 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -235,13 +235,12 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
 		   ktime_to_ms(stats->om_execute));
 }
 
-void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
 {
-	struct rpc_iostats *stats = clnt->cl_metrics;
 	struct rpc_xprt *xprt;
 	unsigned int op, maxproc = clnt->cl_maxproc;
 
-	if (!stats)
+	if (!clnt->cl_metrics)
 		return;
 
 	seq_printf(seq, "\tRPC iostats version: %s  ", RPC_IOSTATS_VERS);
@@ -256,10 +255,18 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 
 	seq_printf(seq, "\tper-op statistics\n");
 	for (op = 0; op < maxproc; op++) {
-		_print_rpc_iostats(seq, &stats[op], op, clnt->cl_procinfo);
+		struct rpc_iostats stats = {};
+		struct rpc_clnt *next = clnt;
+		do {
+			_add_rpc_iostats(&stats, &next->cl_metrics[op]);
+			if (next == next->cl_parent)
+				break;
+			next = next->cl_parent;
+		} while (next);
+		_print_rpc_iostats(seq, &stats, op, clnt->cl_procinfo);
 	}
 }
-EXPORT_SYMBOL_GPL(rpc_print_iostats);
+EXPORT_SYMBOL_GPL(rpc_clnt_show_stats);
 
 /*
  * Register/unregister RPC proc files
-- 
cgit v1.2.3


From 0f90be132cbf1537d87a6a8b9e80867adac892f6 Mon Sep 17 00:00:00 2001
From: Bill Baker <Bill.Baker@Oracle.com>
Date: Tue, 19 Jun 2018 16:24:58 -0500
Subject: NFSv4 client live hangs after live data migration recovery

After a live data migration event at the NFS server, the client may send
I/O requests to the wrong server, causing a live hang due to repeated
recovery events.  On the wire, this will appear as an I/O request failing
with NFS4ERR_BADSESSION, followed by successful CREATE_SESSION, repeatedly.
NFS4ERR_BADSSESSION is returned because the session ID being used was
issued by the other server and is not valid at the old server.

The failure is caused by async worker threads having cached the transport
(xprt) in the rpc_task structure.  After the migration recovery completes,
the task is redispatched and the task resends the request to the wrong
server based on the old value still present in tk_xprt.

The solution is to recompute the tk_xprt field of the rpc_task structure
so that the request goes to the correct server.

Signed-off-by: Bill Baker <bill.baker@oracle.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Helen Chao <helen.chao@oracle.com>
Fixes: fb43d17210ba ("SUNRPC: Use the multipath iterator to assign a ...")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c           |  9 ++++++++-
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/clnt.c           | 28 ++++++++++++++++++++--------
 3 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3b28c0ac10bc..bddba460643a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -581,8 +581,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
 		ret = -EIO;
 	return ret;
 out_retry:
-	if (ret == 0)
+	if (ret == 0) {
 		exception->retry = 1;
+		/*
+		 * For NFS4ERR_MOVED, the client transport will need to
+		 * be recomputed after migration recovery has completed.
+		 */
+		if (errorcode == -NFS4ERR_MOVED)
+			rpc_task_release_transport(task);
+	}
 	return ret;
 }
 
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 9b11b6a0978c..73d5c4a870fa 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -156,6 +156,7 @@ int		rpc_switch_client_transport(struct rpc_clnt *,
 
 void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
+void		rpc_task_release_transport(struct rpc_task *);
 void		rpc_task_release_client(struct rpc_task *);
 
 int		rpcb_create_local(struct net *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d839c33ae7d9..0d85425b1e07 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -965,10 +965,20 @@ out:
 }
 EXPORT_SYMBOL_GPL(rpc_bind_new_program);
 
+void rpc_task_release_transport(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+
+	if (xprt) {
+		task->tk_xprt = NULL;
+		xprt_put(xprt);
+	}
+}
+EXPORT_SYMBOL_GPL(rpc_task_release_transport);
+
 void rpc_task_release_client(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
-	struct rpc_xprt *xprt = task->tk_xprt;
 
 	if (clnt != NULL) {
 		/* Remove from client task list */
@@ -979,12 +989,14 @@ void rpc_task_release_client(struct rpc_task *task)
 
 		rpc_release_client(clnt);
 	}
+	rpc_task_release_transport(task);
+}
 
-	if (xprt != NULL) {
-		task->tk_xprt = NULL;
-
-		xprt_put(xprt);
-	}
+static
+void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
+{
+	if (!task->tk_xprt)
+		task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
 }
 
 static
@@ -992,8 +1004,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
 {
 
 	if (clnt != NULL) {
-		if (task->tk_xprt == NULL)
-			task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
+		rpc_task_set_transport(task, clnt);
 		task->tk_client = clnt;
 		atomic_inc(&clnt->cl_count);
 		if (clnt->cl_softrtry)
@@ -1512,6 +1523,7 @@ call_start(struct rpc_task *task)
 		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
+	rpc_task_set_transport(task, clnt);
 }
 
 /*
-- 
cgit v1.2.3


From e6476c21447c4b17c47e476aade6facf050f31e8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:45:07 +0200
Subject: net: remove bogus RCU annotations on socket.wq

We never use RCU protection for it, just a lot of cargo-cult
rcu_deference_protects calls.

Note that we do keep the kfree_rcu call for it, as the references through
struct sock are RCU protected and thus might require a grace period before
freeing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h |  2 +-
 include/net/sock.h  |  2 +-
 net/socket.c        | 10 ++++------
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 6554d3ba4396..e0930678c8bf 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -114,7 +114,7 @@ struct socket {
 
 	unsigned long		flags;
 
-	struct socket_wq __rcu	*wq;
+	struct socket_wq	*wq;
 
 	struct file		*file;
 	struct sock		*sk;
diff --git a/include/net/sock.h b/include/net/sock.h
index 2afea5d1bdfe..433f45fc2d68 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1788,7 +1788,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
 	WARN_ON(parent->sk);
 	write_lock_bh(&sk->sk_callback_lock);
-	sk->sk_wq = parent->wq;
+	rcu_assign_pointer(sk->sk_wq, parent->wq);
 	parent->sk = sk;
 	sk_set_socket(sk, parent);
 	sk->sk_uid = SOCK_INODE(parent)->i_uid;
diff --git a/net/socket.c b/net/socket.c
index 5b7df6695f4f..475247e347ae 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -251,7 +251,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	init_waitqueue_head(&wq->wait);
 	wq->fasync_list = NULL;
 	wq->flags = 0;
-	RCU_INIT_POINTER(ei->socket.wq, wq);
+	ei->socket.wq = wq;
 
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
@@ -265,11 +265,9 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 static void sock_destroy_inode(struct inode *inode)
 {
 	struct socket_alloc *ei;
-	struct socket_wq *wq;
 
 	ei = container_of(inode, struct socket_alloc, vfs_inode);
-	wq = rcu_dereference_protected(ei->socket.wq, 1);
-	kfree_rcu(wq, rcu);
+	kfree_rcu(ei->socket.wq, rcu);
 	kmem_cache_free(sock_inode_cachep, ei);
 }
 
@@ -603,7 +601,7 @@ static void __sock_release(struct socket *sock, struct inode *inode)
 		module_put(owner);
 	}
 
-	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
+	if (sock->wq->fasync_list)
 		pr_err("%s: fasync list not empty!\n", __func__);
 
 	if (!sock->file) {
@@ -1181,7 +1179,7 @@ static int sock_fasync(int fd, struct file *filp, int on)
 		return -EINVAL;
 
 	lock_sock(sk);
-	wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
+	wq = sock->wq;
 	fasync_helper(fd, filp, on, &wq->fasync_list);
 
 	if (!wq->fasync_list)
-- 
cgit v1.2.3


From 7209049d40dc37791ce0f3738965296f30e26044 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 31 Jul 2018 17:27:02 -0400
Subject: dm kcopyd: return void from dm_kcopyd_copy()

dm_kcopyd_copy() only ever returns 0 so there is no need for callers to
account for possible failure.  Same goes for dm_kcopyd_zero().

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c  | 16 ++++------------
 drivers/md/dm-kcopyd.c        | 16 +++++++---------
 drivers/md/dm-raid1.c         | 17 +++++------------
 drivers/md/dm-thin.c          | 23 +++--------------------
 drivers/md/dm-zoned-reclaim.c |  6 ++----
 include/linux/dm-kcopyd.h     | 12 ++++++------
 6 files changed, 27 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 44df244807e5..a53413371725 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -1188,9 +1188,8 @@ static void copy_complete(int read_err, unsigned long write_err, void *context)
 	queue_continuation(mg->cache->wq, &mg->k);
 }
 
-static int copy(struct dm_cache_migration *mg, bool promote)
+static void copy(struct dm_cache_migration *mg, bool promote)
 {
-	int r;
 	struct dm_io_region o_region, c_region;
 	struct cache *cache = mg->cache;
 
@@ -1203,11 +1202,9 @@ static int copy(struct dm_cache_migration *mg, bool promote)
 	c_region.count = cache->sectors_per_block;
 
 	if (promote)
-		r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
+		dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
 	else
-		r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
-
-	return r;
+		dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
 }
 
 static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
@@ -1449,12 +1446,7 @@ static void mg_full_copy(struct work_struct *ws)
 	}
 
 	init_continuation(&mg->k, mg_upgrade_lock);
-
-	if (copy(mg, is_policy_promote)) {
-		DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache));
-		mg->k.input = BLK_STS_IOERR;
-		mg_complete(mg, false);
-	}
+	copy(mg, is_policy_promote);
 }
 
 static void mg_copy(struct work_struct *ws)
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 3c7547a3c371..cc101f3ec42c 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -741,9 +741,9 @@ static void split_job(struct kcopyd_job *master_job)
 	}
 }
 
-int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
-		   unsigned int num_dests, struct dm_io_region *dests,
-		   unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
+void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
+		    unsigned int num_dests, struct dm_io_region *dests,
+		    unsigned int flags, dm_kcopyd_notify_fn fn, void *context)
 {
 	struct kcopyd_job *job;
 	int i;
@@ -818,16 +818,14 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
 		job->progress = 0;
 		split_job(job);
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL(dm_kcopyd_copy);
 
-int dm_kcopyd_zero(struct dm_kcopyd_client *kc,
-		   unsigned num_dests, struct dm_io_region *dests,
-		   unsigned flags, dm_kcopyd_notify_fn fn, void *context)
+void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
+		    unsigned num_dests, struct dm_io_region *dests,
+		    unsigned flags, dm_kcopyd_notify_fn fn, void *context)
 {
-	return dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
+	dm_kcopyd_copy(kc, NULL, num_dests, dests, flags, fn, context);
 }
 EXPORT_SYMBOL(dm_kcopyd_zero);
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 5903e492bb34..79eab1071ec2 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -326,9 +326,8 @@ static void recovery_complete(int read_err, unsigned long write_err,
 	dm_rh_recovery_end(reg, !(read_err || write_err));
 }
 
-static int recover(struct mirror_set *ms, struct dm_region *reg)
+static void recover(struct mirror_set *ms, struct dm_region *reg)
 {
-	int r;
 	unsigned i;
 	struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest;
 	struct mirror *m;
@@ -367,10 +366,8 @@ static int recover(struct mirror_set *ms, struct dm_region *reg)
 	if (!errors_handled(ms))
 		set_bit(DM_KCOPYD_IGNORE_ERROR, &flags);
 
-	r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
-			   flags, recovery_complete, reg);
-
-	return r;
+	dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to,
+		       flags, recovery_complete, reg);
 }
 
 static void reset_ms_flags(struct mirror_set *ms)
@@ -388,7 +385,6 @@ static void do_recovery(struct mirror_set *ms)
 {
 	struct dm_region *reg;
 	struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
-	int r;
 
 	/*
 	 * Start quiescing some regions.
@@ -398,11 +394,8 @@ static void do_recovery(struct mirror_set *ms)
 	/*
 	 * Copy any already quiesced regions.
 	 */
-	while ((reg = dm_rh_recovery_start(ms->rh))) {
-		r = recover(ms, reg);
-		if (r)
-			dm_rh_recovery_end(reg, 0);
-	}
+	while ((reg = dm_rh_recovery_start(ms->rh)))
+		recover(ms, reg);
 
 	/*
 	 * Update the in sync flag.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index c44477d7a9ea..5997d6808b57 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1220,18 +1220,13 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
 static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
 		    sector_t begin, sector_t end)
 {
-	int r;
 	struct dm_io_region to;
 
 	to.bdev = tc->pool_dev->bdev;
 	to.sector = begin;
 	to.count = end - begin;
 
-	r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
-	if (r < 0) {
-		DMERR_LIMIT("dm_kcopyd_zero() failed");
-		copy_complete(1, 1, m);
-	}
+	dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
 }
 
 static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio,
@@ -1257,7 +1252,6 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 			  struct dm_bio_prison_cell *cell, struct bio *bio,
 			  sector_t len)
 {
-	int r;
 	struct pool *pool = tc->pool;
 	struct dm_thin_new_mapping *m = get_next_mapping(pool);
 
@@ -1296,19 +1290,8 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 		to.sector = data_dest * pool->sectors_per_block;
 		to.count = len;
 
-		r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
-				   0, copy_complete, m);
-		if (r < 0) {
-			DMERR_LIMIT("dm_kcopyd_copy() failed");
-			copy_complete(1, 1, m);
-
-			/*
-			 * We allow the zero to be issued, to simplify the
-			 * error path.  Otherwise we'd need to start
-			 * worrying about decrementing the prepare_actions
-			 * counter.
-			 */
-		}
+		dm_kcopyd_copy(pool->copier, &from, 1, &to,
+			       0, copy_complete, m);
 
 		/*
 		 * Do we need to zero a tail region?
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index 44a119e12f1a..edf4b95eb075 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -161,10 +161,8 @@ static int dmz_reclaim_copy(struct dmz_reclaim *zrc,
 
 		/* Copy the valid region */
 		set_bit(DMZ_RECLAIM_KCOPY, &zrc->flags);
-		ret = dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
-				     dmz_reclaim_kcopy_end, zrc);
-		if (ret)
-			return ret;
+		dm_kcopyd_copy(zrc->kc, &src, 1, &dst, flags,
+			       dmz_reclaim_kcopy_end, zrc);
 
 		/* Wait for copy to complete */
 		wait_on_bit_io(&zrc->flags, DMZ_RECLAIM_KCOPY,
diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h
index cfac8588ed56..e42de7750c88 100644
--- a/include/linux/dm-kcopyd.h
+++ b/include/linux/dm-kcopyd.h
@@ -62,9 +62,9 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc);
 typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err,
 				    void *context);
 
-int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
-		   unsigned num_dests, struct dm_io_region *dests,
-		   unsigned flags, dm_kcopyd_notify_fn fn, void *context);
+void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
+		    unsigned num_dests, struct dm_io_region *dests,
+		    unsigned flags, dm_kcopyd_notify_fn fn, void *context);
 
 /*
  * Prepare a callback and submit it via the kcopyd thread.
@@ -81,9 +81,9 @@ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc,
 				 dm_kcopyd_notify_fn fn, void *context);
 void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err);
 
-int dm_kcopyd_zero(struct dm_kcopyd_client *kc,
-		   unsigned num_dests, struct dm_io_region *dests,
-		   unsigned flags, dm_kcopyd_notify_fn fn, void *context);
+void dm_kcopyd_zero(struct dm_kcopyd_client *kc,
+		    unsigned num_dests, struct dm_io_region *dests,
+		    unsigned flags, dm_kcopyd_notify_fn fn, void *context);
 
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_DM_KCOPYD_H */
-- 
cgit v1.2.3


From 43bce41cf48eb51eab5ad9e0d40ed382a7bb61d7 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 26 Jul 2018 13:43:49 +0200
Subject: drm/scheduler: only kill entity if last user is killed v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Note which task is using the entity and only kill it if the last user of
the entity is killed. This should prevent problems when entities are leaked to
child processes.

v2: add missing kernel doc

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Acked-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c | 6 +++++-
 include/drm/gpu_scheduler.h               | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 3f2fc5e8242a..f563e4fbb4b6 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -275,6 +275,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 {
 	struct drm_gpu_scheduler *sched;
+	struct task_struct *last_user;
 	long ret = timeout;
 
 	sched = entity->rq->sched;
@@ -295,7 +296,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 
 
 	/* For killed process disable any more IBs enqueue right now */
-	if ((current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
+	last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
+	if ((!last_user || last_user == current->group_leader) &&
+	    (current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
 		drm_sched_entity_set_rq(entity, NULL);
 
 	return ret;
@@ -541,6 +544,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
 
 	trace_drm_sched_job(sched_job, entity);
 
+	WRITE_ONCE(entity->last_user, current->group_leader);
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 
 	/* first job wakes up scheduler */
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 091b9afcd184..21c648b0b2a1 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -66,6 +66,7 @@ enum drm_sched_priority {
  * @guilty: points to ctx's guilty.
  * @fini_status: contains the exit status in case the process was signalled.
  * @last_scheduled: points to the finished fence of the last scheduled job.
+ * @last_user: last group leader pushing a job into the entity.
  *
  * Entities will emit jobs in order to their corresponding hardware
  * ring, and the scheduler will alternate between entities based on
@@ -85,6 +86,7 @@ struct drm_sched_entity {
 	struct dma_fence_cb		cb;
 	atomic_t			*guilty;
 	struct dma_fence                *last_scheduled;
+	struct task_struct		*last_user;
 };
 
 /**
-- 
cgit v1.2.3


From 546c596cf5491fda1516536e049c6a36836eb884 Mon Sep 17 00:00:00 2001
From: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Date: Wed, 18 Jul 2018 09:40:35 +0800
Subject: PCI: Unify PCI and normal DMA direction definitions

Current DMA direction definitions in pci-dma-compat.h and dma-direction.h
are mirrored in value.  Unify them to enhance readability and avoid
possible inconsistency.

Signed-off-by: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Joey Zheng <yu.zheng@hxt-semitech.com>
---
 include/linux/dma-direction.h  | 6 ++----
 include/linux/pci-dma-compat.h | 8 ++++----
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-direction.h b/include/linux/dma-direction.h
index 3649a031893a..9c96e30e6a0b 100644
--- a/include/linux/dma-direction.h
+++ b/include/linux/dma-direction.h
@@ -1,14 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_DMA_DIRECTION_H
 #define _LINUX_DMA_DIRECTION_H
-/*
- * These definitions mirror those in pci.h, so they can be used
- * interchangeably with their PCI_ counterparts.
- */
+
 enum dma_data_direction {
 	DMA_BIDIRECTIONAL = 0,
 	DMA_TO_DEVICE = 1,
 	DMA_FROM_DEVICE = 2,
 	DMA_NONE = 3,
 };
+
 #endif
diff --git a/include/linux/pci-dma-compat.h b/include/linux/pci-dma-compat.h
index 0dd1a3f7b309..c3f1b44ade29 100644
--- a/include/linux/pci-dma-compat.h
+++ b/include/linux/pci-dma-compat.h
@@ -8,10 +8,10 @@
 #include <linux/dma-mapping.h>
 
 /* This defines the direction arg to the DMA mapping routines. */
-#define PCI_DMA_BIDIRECTIONAL	0
-#define PCI_DMA_TODEVICE	1
-#define PCI_DMA_FROMDEVICE	2
-#define PCI_DMA_NONE		3
+#define PCI_DMA_BIDIRECTIONAL	DMA_BIDIRECTIONAL
+#define PCI_DMA_TODEVICE	DMA_TO_DEVICE
+#define PCI_DMA_FROMDEVICE	DMA_FROM_DEVICE
+#define PCI_DMA_NONE		DMA_NONE
 
 static inline void *
 pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
-- 
cgit v1.2.3


From bb276262e88dae52cc717bb636b7468f66bb234e Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Fri, 27 Jul 2018 11:33:13 -0700
Subject: mtd: spi-nor: only apply reset hacks to broken hardware

Commit 59b356ffd0b0 ("mtd: m25p80: restore the status of SPI flash when
exiting") is the latest from a long history of attempts to add reboot
handling to handle stateful addressing modes on SPI flash. Some prior
mostly-related discussions:

http://lists.infradead.org/pipermail/linux-mtd/2013-March/046343.html
[PATCH 1/3] mtd: m25p80: utilize dedicated 4-byte addressing commands

http://lists.infradead.org/pipermail/barebox/2014-September/020682.html
[RFC] MTD m25p80 3-byte addressing and boot problem

http://lists.infradead.org/pipermail/linux-mtd/2015-February/057683.html
[PATCH 2/2] m25p80: if supported put chip to deep power down if not used

Previously, attempts to add reboot-time software reset handling were
rejected, but the latest attempt was not.

Quick summary of the problem:
Some systems (e.g., boot ROM or bootloader) assume that they can read
initial boot code from their SPI flash using 3-byte addressing. If the
flash is left in 4-byte mode after reset, these systems won't boot. The
above patch provided a shutdown/remove hook to attempt to reset the
addressing mode before we reboot. Notably, this patch misses out on
huge classes of unexpected reboots (e.g., crashes, watchdog resets).

Unfortunately, it is essentially impossible to solve this problem 100%:
if your system doesn't know how to reset the SPI flash to power-on
defaults at initialization time, no amount of software can really rescue
you -- there will always be a chance of some unexpected reset that
leaves your flash in an addressing mode that your boot sequence didn't
expect.

While it is not directly harmful to perform hacks like the
aforementioned commit on all 4-byte addressing flash, a
properly-designed system should not need the hack -- and in fact,
providing this hack may mask the fact that a given system is indeed
broken. So this patch attempts to apply this unsound hack more narrowly,
providing a strong suggestion to developers and system designers that
this is truly a hack. With luck, system designers can catch their errors
early on in their development cycle, rather than applying this hack long
term. But apparently enough systems are out in the wild that we still
have to provide this hack.

Document a new device tree property to denote systems that do not have a
proper hardware (or software) reset mechanism, and apply the hack (with
a loud warning) only in this case.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 .../devicetree/bindings/mtd/jedec,spi-nor.txt          |  9 +++++++++
 drivers/mtd/spi-nor/spi-nor.c                          | 18 ++++++++++++++++--
 include/linux/mtd/spi-nor.h                            |  1 +
 3 files changed, 26 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
index 956bb046e599..f03be904d3c2 100644
--- a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -69,6 +69,15 @@ Optional properties:
                    all chips and support for it can not be detected at runtime.
                    Refer to your chips' datasheet to check if this is supported
                    by your chip.
+- broken-flash-reset : Some flash devices utilize stateful addressing modes
+		   (e.g., for 32-bit addressing) which need to be managed
+		   carefully by a system. Because these sorts of flash don't
+		   have a standardized software reset command, and because some
+		   systems don't toggle the flash RESET# pin upon system reset
+		   (if the pin even exists at all), there are systems which
+		   cannot reboot properly if the flash is left in the "wrong"
+		   state. This boolean flag can be used on such systems, to
+		   denote the absence of a reliable reset mechanism.
 
 Example:
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index d9c368c44194..f028277fb1ce 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2757,8 +2757,18 @@ static int spi_nor_init(struct spi_nor *nor)
 
 	if ((nor->addr_width == 4) &&
 	    (JEDEC_MFR(nor->info) != SNOR_MFR_SPANSION) &&
-	    !(nor->info->flags & SPI_NOR_4B_OPCODES))
+	    !(nor->info->flags & SPI_NOR_4B_OPCODES)) {
+		/*
+		 * If the RESET# pin isn't hooked up properly, or the system
+		 * otherwise doesn't perform a reset command in the boot
+		 * sequence, it's impossible to 100% protect against unexpected
+		 * reboots (e.g., crashes). Warn the user (or hopefully, system
+		 * designer) that this is bad.
+		 */
+		WARN_ONCE(nor->flags & SNOR_F_BROKEN_RESET,
+			  "enabling reset hack; may not recover from unexpected reboots\n");
 		set_4byte(nor, nor->info, 1);
+	}
 
 	return 0;
 }
@@ -2781,7 +2791,8 @@ void spi_nor_restore(struct spi_nor *nor)
 	/* restore the addressing mode */
 	if ((nor->addr_width == 4) &&
 	    (JEDEC_MFR(nor->info) != SNOR_MFR_SPANSION) &&
-	    !(nor->info->flags & SPI_NOR_4B_OPCODES))
+	    !(nor->info->flags & SPI_NOR_4B_OPCODES) &&
+	    (nor->flags & SNOR_F_BROKEN_RESET))
 		set_4byte(nor, nor->info, 0);
 }
 EXPORT_SYMBOL_GPL(spi_nor_restore);
@@ -2911,6 +2922,9 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 		params.hwcaps.mask |= SNOR_HWCAPS_READ_FAST;
 	}
 
+	if (of_property_read_bool(np, "broken-flash-reset"))
+		nor->flags |= SNOR_F_BROKEN_RESET;
+
 	/* Some devices cannot do fast-read, no matter what DT tells us */
 	if (info->flags & SPI_NOR_NO_FR)
 		params.hwcaps.mask &= ~SNOR_HWCAPS_READ_FAST;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index e60da0d34cc1..c922e97f205a 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -235,6 +235,7 @@ enum spi_nor_option_flags {
 	SNOR_F_S3AN_ADDR_DEFAULT = BIT(3),
 	SNOR_F_READY_XSR_RDY	= BIT(4),
 	SNOR_F_USE_CLSR		= BIT(5),
+	SNOR_F_BROKEN_RESET	= BIT(6),
 };
 
 /**
-- 
cgit v1.2.3


From 3d3fe3c05d5a17ebdf55b936c51017c127c0ed44 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Wed, 25 Jul 2018 15:31:52 +0200
Subject: mtd: rawnand: allocate dynamically ONFI parameters during detection

Now that it is possible to do dynamic allocations during the
identification phase, convert the onfi_params structure (which is only
needed with ONFI compliant chips) into a pointer that will be allocated
only if needed.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c    | 54 +++++++++++++++++++++++--------------
 drivers/mtd/nand/raw/nand_micron.c  |  7 ++---
 drivers/mtd/nand/raw/nand_timings.c | 12 ++++-----
 include/linux/mtd/rawnand.h         |  6 ++---
 4 files changed, 47 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 00e80781124a..d527e448ce19 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5151,6 +5151,8 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_onfi_params *p;
+	struct onfi_params *onfi;
+	int onfi_version = 0;
 	char id[4];
 	int i, ret, val;
 
@@ -5206,21 +5208,19 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 	/* Check version */
 	val = le16_to_cpu(p->revision);
 	if (val & ONFI_VERSION_2_3)
-		chip->parameters.onfi.version = 23;
+		onfi_version = 23;
 	else if (val & ONFI_VERSION_2_2)
-		chip->parameters.onfi.version = 22;
+		onfi_version = 22;
 	else if (val & ONFI_VERSION_2_1)
-		chip->parameters.onfi.version = 21;
+		onfi_version = 21;
 	else if (val & ONFI_VERSION_2_0)
-		chip->parameters.onfi.version = 20;
+		onfi_version = 20;
 	else if (val & ONFI_VERSION_1_0)
-		chip->parameters.onfi.version = 10;
+		onfi_version = 10;
 
-	if (!chip->parameters.onfi.version) {
+	if (!onfi_version) {
 		pr_info("unsupported ONFI version: %d\n", val);
 		goto free_onfi_param_page;
-	} else {
-		ret = 1;
 	}
 
 	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
@@ -5257,7 +5257,7 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 	if (p->ecc_bits != 0xff) {
 		chip->ecc_strength_ds = p->ecc_bits;
 		chip->ecc_step_ds = 512;
-	} else if (chip->parameters.onfi.version >= 21 &&
+	} else if (onfi_version >= 21 &&
 		(le16_to_cpu(p->features) & ONFI_FEATURE_EXT_PARAM_PAGE)) {
 
 		/*
@@ -5284,19 +5284,33 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 		bitmap_set(chip->parameters.set_feature_list,
 			   ONFI_FEATURE_ADDR_TIMING_MODE, 1);
 	}
-	chip->parameters.onfi.tPROG = le16_to_cpu(p->t_prog);
-	chip->parameters.onfi.tBERS = le16_to_cpu(p->t_bers);
-	chip->parameters.onfi.tR = le16_to_cpu(p->t_r);
-	chip->parameters.onfi.tCCS = le16_to_cpu(p->t_ccs);
-	chip->parameters.onfi.async_timing_mode =
-		le16_to_cpu(p->async_timing_mode);
-	chip->parameters.onfi.vendor_revision =
-		le16_to_cpu(p->vendor_revision);
-	memcpy(chip->parameters.onfi.vendor, p->vendor,
-	       sizeof(p->vendor));
 
+	onfi = kzalloc(sizeof(*onfi), GFP_KERNEL);
+	if (!onfi) {
+		ret = -ENOMEM;
+		goto free_model;
+	}
+
+	onfi->version = onfi_version;
+	onfi->tPROG = le16_to_cpu(p->t_prog);
+	onfi->tBERS = le16_to_cpu(p->t_bers);
+	onfi->tR = le16_to_cpu(p->t_r);
+	onfi->tCCS = le16_to_cpu(p->t_ccs);
+	onfi->async_timing_mode = le16_to_cpu(p->async_timing_mode);
+	onfi->vendor_revision = le16_to_cpu(p->vendor_revision);
+	memcpy(onfi->vendor, p->vendor, sizeof(p->vendor));
+	chip->parameters.onfi = onfi;
+
+	/* Identification done, free the full ONFI parameter page and exit */
+	kfree(p);
+
+	return 1;
+
+free_model:
+	kfree(chip->parameters.model);
 free_onfi_param_page:
 	kfree(p);
+
 	return ret;
 }
 
@@ -5693,7 +5707,6 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 		}
 	}
 
-	chip->parameters.onfi.version = 0;
 	if (!type->name || !type->pagesize) {
 		/* Check if the chip is ONFI compliant */
 		ret = nand_flash_detect_onfi(chip);
@@ -6031,6 +6044,7 @@ static int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 static void nand_scan_ident_cleanup(struct nand_chip *chip)
 {
 	kfree(chip->parameters.model);
+	kfree(chip->parameters.onfi);
 }
 
 static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c
index 656947d91841..f5dc0a7a2456 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -88,9 +88,10 @@ static int micron_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
 static int micron_nand_onfi_init(struct nand_chip *chip)
 {
 	struct nand_parameters *p = &chip->parameters;
-	struct nand_onfi_vendor_micron *micron = (void *)p->onfi.vendor;
 
-	if (chip->parameters.onfi.version && p->onfi.vendor_revision) {
+	if (p->onfi) {
+		struct nand_onfi_vendor_micron *micron = (void *)p->onfi->vendor;
+
 		chip->read_retries = micron->read_retry_options;
 		chip->setup_read_retry = micron_nand_setup_read_retry;
 	}
@@ -382,7 +383,7 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip)
 	u8 id[5];
 	int ret;
 
-	if (!chip->parameters.onfi.version)
+	if (!chip->parameters.onfi)
 		return MICRON_ON_DIE_UNSUPPORTED;
 
 	if (chip->bits_per_cell != 1)
diff --git a/drivers/mtd/nand/raw/nand_timings.c b/drivers/mtd/nand/raw/nand_timings.c
index 9bb599106a31..ebc7b5f76f77 100644
--- a/drivers/mtd/nand/raw/nand_timings.c
+++ b/drivers/mtd/nand/raw/nand_timings.c
@@ -294,6 +294,7 @@ int onfi_fill_data_interface(struct nand_chip *chip,
 			     int timing_mode)
 {
 	struct nand_data_interface *iface = &chip->data_interface;
+	struct onfi_params *onfi = chip->parameters.onfi;
 
 	if (type != NAND_SDR_IFACE)
 		return -EINVAL;
@@ -308,17 +309,16 @@ int onfi_fill_data_interface(struct nand_chip *chip,
 	 * tPROG, tBERS, tR and tCCS.
 	 * These information are part of the ONFI parameter page.
 	 */
-	if (chip->parameters.onfi.version) {
-		struct nand_parameters *params = &chip->parameters;
+	if (onfi) {
 		struct nand_sdr_timings *timings = &iface->timings.sdr;
 
 		/* microseconds -> picoseconds */
-		timings->tPROG_max = 1000000ULL * params->onfi.tPROG;
-		timings->tBERS_max = 1000000ULL * params->onfi.tBERS;
-		timings->tR_max = 1000000ULL * params->onfi.tR;
+		timings->tPROG_max = 1000000ULL * onfi->tPROG;
+		timings->tBERS_max = 1000000ULL * onfi->tBERS;
+		timings->tR_max = 1000000ULL * onfi->tR;
 
 		/* nanoseconds -> picoseconds */
-		timings->tCCS_min = 1000UL * params->onfi.tCCS;
+		timings->tCCS_min = 1000UL * onfi->tCCS;
 	} else {
 		struct nand_sdr_timings *timings = &iface->timings.sdr;
 		/*
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 099fa166569a..efb2345359bb 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -482,7 +482,7 @@ struct nand_parameters {
 	DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER);
 
 	/* ONFI parameters */
-	struct onfi_params onfi;
+	struct onfi_params *onfi;
 };
 
 /* The maximum expected count of bytes in the NAND ID sequence */
@@ -1618,10 +1618,10 @@ struct platform_nand_data {
 /* return the supported asynchronous timing mode. */
 static inline int onfi_get_async_timing_mode(struct nand_chip *chip)
 {
-	if (!chip->parameters.onfi.version)
+	if (!chip->parameters.onfi)
 		return ONFI_TIMING_MODE_UNKNOWN;
 
-	return chip->parameters.onfi.async_timing_mode;
+	return chip->parameters.onfi->async_timing_mode;
 }
 
 int onfi_fill_data_interface(struct nand_chip *chip,
-- 
cgit v1.2.3


From 4764c0da69dc500791c840c88dfd940d13b452e7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 23 Jul 2018 17:18:37 +0100
Subject: rxrpc: Trace packet transmission

Trace successful packet transmission (kernel_sendmsg() succeeded, that is)
in AF_RXRPC.  We can share the enum that defines the transmission points
with the trace_rxrpc_tx_fail() tracepoint, so rename its constants to be
applicable to both.

Also, save the internal call->debug_id in the rxrpc_channel struct so that
it can be used in retransmission trace lines.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 107 +++++++++++++++++++++++++++++--------------
 net/rxrpc/ar-internal.h      |   1 +
 net/rxrpc/conn_client.c      |   1 +
 net/rxrpc/conn_event.c       |  13 ++++--
 net/rxrpc/input.c            |  11 ++++-
 net/rxrpc/local_event.c      |   5 +-
 net/rxrpc/output.c           |  32 ++++++++++---
 net/rxrpc/rxkad.c            |   7 ++-
 8 files changed, 127 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 4fff00e9da8a..2aa6f615b60d 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -211,18 +211,18 @@ enum rxrpc_congest_change {
 	rxrpc_cong_saw_nack,
 };
 
-enum rxrpc_tx_fail_trace {
-	rxrpc_tx_fail_call_abort,
-	rxrpc_tx_fail_call_ack,
-	rxrpc_tx_fail_call_data_frag,
-	rxrpc_tx_fail_call_data_nofrag,
-	rxrpc_tx_fail_call_final_resend,
-	rxrpc_tx_fail_conn_abort,
-	rxrpc_tx_fail_conn_challenge,
-	rxrpc_tx_fail_conn_response,
-	rxrpc_tx_fail_reject,
-	rxrpc_tx_fail_version_keepalive,
-	rxrpc_tx_fail_version_reply,
+enum rxrpc_tx_point {
+	rxrpc_tx_point_call_abort,
+	rxrpc_tx_point_call_ack,
+	rxrpc_tx_point_call_data_frag,
+	rxrpc_tx_point_call_data_nofrag,
+	rxrpc_tx_point_call_final_resend,
+	rxrpc_tx_point_conn_abort,
+	rxrpc_tx_point_rxkad_challenge,
+	rxrpc_tx_point_rxkad_response,
+	rxrpc_tx_point_reject,
+	rxrpc_tx_point_version_keepalive,
+	rxrpc_tx_point_version_reply,
 };
 
 #endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */
@@ -452,18 +452,18 @@ enum rxrpc_tx_fail_trace {
 	EM(RXRPC_CALL_LOCAL_ERROR,		"LocalError") \
 	E_(RXRPC_CALL_NETWORK_ERROR,		"NetError")
 
-#define rxrpc_tx_fail_traces \
-	EM(rxrpc_tx_fail_call_abort,		"CallAbort") \
-	EM(rxrpc_tx_fail_call_ack,		"CallAck") \
-	EM(rxrpc_tx_fail_call_data_frag,	"CallDataFrag") \
-	EM(rxrpc_tx_fail_call_data_nofrag,	"CallDataNofrag") \
-	EM(rxrpc_tx_fail_call_final_resend,	"CallFinalResend") \
-	EM(rxrpc_tx_fail_conn_abort,		"ConnAbort") \
-	EM(rxrpc_tx_fail_conn_challenge,	"ConnChall") \
-	EM(rxrpc_tx_fail_conn_response,		"ConnResp") \
-	EM(rxrpc_tx_fail_reject,		"Reject") \
-	EM(rxrpc_tx_fail_version_keepalive,	"VerKeepalive") \
-	E_(rxrpc_tx_fail_version_reply,		"VerReply")
+#define rxrpc_tx_points \
+	EM(rxrpc_tx_point_call_abort,		"CallAbort") \
+	EM(rxrpc_tx_point_call_ack,		"CallAck") \
+	EM(rxrpc_tx_point_call_data_frag,	"CallDataFrag") \
+	EM(rxrpc_tx_point_call_data_nofrag,	"CallDataNofrag") \
+	EM(rxrpc_tx_point_call_final_resend,	"CallFinalResend") \
+	EM(rxrpc_tx_point_conn_abort,		"ConnAbort") \
+	EM(rxrpc_tx_point_reject,		"Reject") \
+	EM(rxrpc_tx_point_rxkad_challenge,	"RxkadChall") \
+	EM(rxrpc_tx_point_rxkad_response,	"RxkadResp") \
+	EM(rxrpc_tx_point_version_keepalive,	"VerKeepalive") \
+	E_(rxrpc_tx_point_version_reply,	"VerReply")
 
 /*
  * Export enum symbols via userspace.
@@ -488,7 +488,7 @@ rxrpc_propose_ack_traces;
 rxrpc_propose_ack_outcomes;
 rxrpc_congest_modes;
 rxrpc_congest_changes;
-rxrpc_tx_fail_traces;
+rxrpc_tx_points;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -801,7 +801,7 @@ TRACE_EVENT(rxrpc_transmit,
 	    );
 
 TRACE_EVENT(rxrpc_rx_data,
-	    TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
+	    TP_PROTO(unsigned int call, rxrpc_seq_t seq,
 		     rxrpc_serial_t serial, u8 flags, u8 anno),
 
 	    TP_ARGS(call, seq, serial, flags, anno),
@@ -815,7 +815,7 @@ TRACE_EVENT(rxrpc_rx_data,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call->debug_id;
+		    __entry->call = call;
 		    __entry->seq = seq;
 		    __entry->serial = serial;
 		    __entry->flags = flags;
@@ -918,6 +918,37 @@ TRACE_EVENT(rxrpc_rx_rwind_change,
 		      __entry->wake ? " wake" : "")
 	    );
 
+TRACE_EVENT(rxrpc_tx_packet,
+	    TP_PROTO(unsigned int call_id, struct rxrpc_wire_header *whdr,
+		     enum rxrpc_tx_point where),
+
+	    TP_ARGS(call_id, whdr, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,			call	)
+		    __field(enum rxrpc_tx_point,		where	)
+		    __field_struct(struct rxrpc_wire_header,	whdr	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call_id;
+		    memcpy(&__entry->whdr, whdr, sizeof(__entry->whdr));
+			   ),
+
+	    TP_printk("c=%08x %08x:%08x:%08x:%04x %08x %08x %02x %02x %s %s",
+		      __entry->call,
+		      ntohl(__entry->whdr.epoch),
+		      ntohl(__entry->whdr.cid),
+		      ntohl(__entry->whdr.callNumber),
+		      ntohs(__entry->whdr.serviceId),
+		      ntohl(__entry->whdr.serial),
+		      ntohl(__entry->whdr.seq),
+		      __entry->whdr.type, __entry->whdr.flags,
+		      __entry->whdr.type <= 15 ?
+		      __print_symbolic(__entry->whdr.type, rxrpc_pkts) : "?UNK",
+		      __print_symbolic(__entry->where, rxrpc_tx_points))
+	    );
+
 TRACE_EVENT(rxrpc_tx_data,
 	    TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
 		     rxrpc_serial_t serial, u8 flags, bool retrans, bool lose),
@@ -928,6 +959,8 @@ TRACE_EVENT(rxrpc_tx_data,
 		    __field(unsigned int,		call		)
 		    __field(rxrpc_seq_t,		seq		)
 		    __field(rxrpc_serial_t,		serial		)
+		    __field(u32,			cid		)
+		    __field(u32,			call_id		)
 		    __field(u8,				flags		)
 		    __field(bool,			retrans		)
 		    __field(bool,			lose		)
@@ -935,6 +968,8 @@ TRACE_EVENT(rxrpc_tx_data,
 
 	    TP_fast_assign(
 		    __entry->call = call->debug_id;
+		    __entry->cid = call->cid;
+		    __entry->call_id = call->call_id;
 		    __entry->seq = seq;
 		    __entry->serial = serial;
 		    __entry->flags = flags;
@@ -942,8 +977,10 @@ TRACE_EVENT(rxrpc_tx_data,
 		    __entry->lose = lose;
 			   ),
 
-	    TP_printk("c=%08x DATA %08x q=%08x fl=%02x%s%s",
+	    TP_printk("c=%08x DATA %08x:%08x %08x q=%08x fl=%02x%s%s",
 		      __entry->call,
+		      __entry->cid,
+		      __entry->call_id,
 		      __entry->serial,
 		      __entry->seq,
 		      __entry->flags,
@@ -952,7 +989,7 @@ TRACE_EVENT(rxrpc_tx_data,
 	    );
 
 TRACE_EVENT(rxrpc_tx_ack,
-	    TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial,
+	    TP_PROTO(unsigned int call, rxrpc_serial_t serial,
 		     rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial,
 		     u8 reason, u8 n_acks),
 
@@ -968,7 +1005,7 @@ TRACE_EVENT(rxrpc_tx_ack,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call ? call->debug_id : 0;
+		    __entry->call = call;
 		    __entry->serial = serial;
 		    __entry->ack_first = ack_first;
 		    __entry->ack_serial = ack_serial;
@@ -1434,29 +1471,29 @@ TRACE_EVENT(rxrpc_rx_icmp,
 
 TRACE_EVENT(rxrpc_tx_fail,
 	    TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial, int ret,
-		     enum rxrpc_tx_fail_trace what),
+		     enum rxrpc_tx_point where),
 
-	    TP_ARGS(debug_id, serial, ret, what),
+	    TP_ARGS(debug_id, serial, ret, where),
 
 	    TP_STRUCT__entry(
 		    __field(unsigned int,		debug_id	)
 		    __field(rxrpc_serial_t,		serial		)
 		    __field(int,			ret		)
-		    __field(enum rxrpc_tx_fail_trace,   what		)
+		    __field(enum rxrpc_tx_point,	where		)
 			     ),
 
 	    TP_fast_assign(
 		    __entry->debug_id = debug_id;
 		    __entry->serial = serial;
 		    __entry->ret = ret;
-		    __entry->what = what;
+		    __entry->where = where;
 			   ),
 
 	    TP_printk("c=%08x r=%x ret=%d %s",
 		      __entry->debug_id,
 		      __entry->serial,
 		      __entry->ret,
-		      __print_symbolic(__entry->what, rxrpc_tx_fail_traces))
+		      __print_symbolic(__entry->where, rxrpc_tx_points))
 	    );
 
 TRACE_EVENT(rxrpc_call_reset,
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5fb7d3254d9e..7eee955a768a 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -420,6 +420,7 @@ struct rxrpc_connection {
 	struct rxrpc_channel {
 		unsigned long		final_ack_at;	/* Time at which to issue final ACK */
 		struct rxrpc_call __rcu	*call;		/* Active call */
+		unsigned int		call_debug_id;	/* call->debug_id */
 		u32			call_id;	/* ID of current call */
 		u32			call_counter;	/* Call ID counter */
 		u32			last_call;	/* ID of last call */
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index e4bfbd7e48a8..f8f37188a932 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -590,6 +590,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
 	 */
 	smp_wmb();
 	chan->call_id	= call_id;
+	chan->call_debug_id = call->debug_id;
 	rcu_assign_pointer(chan->call, call);
 	wake_up(&call->waitq);
 }
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index d46a68807f08..84d40ba9856f 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -129,7 +129,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 		_proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
 		break;
 	case RXRPC_PACKET_TYPE_ACK:
-		trace_rxrpc_tx_ack(NULL, serial,
+		trace_rxrpc_tx_ack(chan->call_debug_id, serial,
 				   ntohl(pkt.ack.firstPacket),
 				   ntohl(pkt.ack.serial),
 				   pkt.ack.reason, 0);
@@ -140,8 +140,11 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len);
 	conn->params.peer->last_tx_at = ktime_get_real();
 	if (ret < 0)
-		trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-				    rxrpc_tx_fail_call_final_resend);
+		trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret,
+				    rxrpc_tx_point_call_final_resend);
+	else
+		trace_rxrpc_tx_packet(chan->call_debug_id, &pkt.whdr,
+				      rxrpc_tx_point_call_final_resend);
 
 	_leave("");
 }
@@ -242,11 +245,13 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
 	if (ret < 0) {
 		trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-				    rxrpc_tx_fail_conn_abort);
+				    rxrpc_tx_point_conn_abort);
 		_debug("sendmsg failed: %d", ret);
 		return -EAGAIN;
 	}
 
+	trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
+
 	conn->params.peer->last_tx_at = ktime_get_real();
 
 	_leave(" = 0");
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 608d078a4981..8989d760b6b2 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -496,7 +496,7 @@ next_subpacket:
 			return rxrpc_proto_abort("LSA", call, seq);
 	}
 
-	trace_rxrpc_rx_data(call, seq, serial, flags, annotation);
+	trace_rxrpc_rx_data(call->debug_id, seq, serial, flags, annotation);
 	if (before_eq(seq, hard_ack)) {
 		ack = RXRPC_ACK_DUPLICATE;
 		ack_serial = serial;
@@ -592,6 +592,10 @@ ack:
 		rxrpc_propose_ACK(call, ack, skew, ack_serial,
 				  immediate_ack, true,
 				  rxrpc_propose_ack_input_data);
+	else
+		rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, skew, serial,
+				  false, true,
+				  rxrpc_propose_ack_input_data);
 
 	if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1)
 		rxrpc_notify_socket(call);
@@ -1262,6 +1266,11 @@ void rxrpc_data_ready(struct sock *udp_sk)
 			/* But otherwise we need to retransmit the final packet
 			 * from data cached in the connection record.
 			 */
+			if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+				trace_rxrpc_rx_data(chan->call_debug_id,
+						    sp->hdr.seq,
+						    sp->hdr.serial,
+						    sp->hdr.flags, 0);
 			rxrpc_post_packet_to_conn(conn, skb);
 			goto out_unlock;
 		}
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 8325f1b86840..13bd8a4dfac7 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -72,7 +72,10 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
 	ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
 	if (ret < 0)
 		trace_rxrpc_tx_fail(local->debug_id, 0, ret,
-				    rxrpc_tx_fail_version_reply);
+				    rxrpc_tx_point_version_reply);
+	else
+		trace_rxrpc_tx_packet(local->debug_id, &whdr,
+				      rxrpc_tx_point_version_reply);
 
 	_leave("");
 }
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f03de1c59ba3..801dbf3d3478 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -183,7 +183,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 
 	serial = atomic_inc_return(&conn->serial);
 	pkt->whdr.serial = htonl(serial);
-	trace_rxrpc_tx_ack(call, serial,
+	trace_rxrpc_tx_ack(call->debug_id, serial,
 			   ntohl(pkt->ack.firstPacket),
 			   ntohl(pkt->ack.serial),
 			   pkt->ack.reason, pkt->ack.nAcks);
@@ -212,7 +212,10 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 	conn->params.peer->last_tx_at = ktime_get_real();
 	if (ret < 0)
 		trace_rxrpc_tx_fail(call->debug_id, serial, ret,
-				    rxrpc_tx_fail_call_ack);
+				    rxrpc_tx_point_call_ack);
+	else
+		trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
+				      rxrpc_tx_point_call_ack);
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
 		if (ret < 0) {
@@ -299,7 +302,10 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
 	conn->params.peer->last_tx_at = ktime_get_real();
 	if (ret < 0)
 		trace_rxrpc_tx_fail(call->debug_id, serial, ret,
-				    rxrpc_tx_fail_call_abort);
+				    rxrpc_tx_point_call_abort);
+	else
+		trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
+				      rxrpc_tx_point_call_abort);
 
 
 	rxrpc_put_connection(conn);
@@ -396,7 +402,10 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	up_read(&conn->params.local->defrag_sem);
 	if (ret < 0)
 		trace_rxrpc_tx_fail(call->debug_id, serial, ret,
-				    rxrpc_tx_fail_call_data_nofrag);
+				    rxrpc_tx_point_call_data_nofrag);
+	else
+		trace_rxrpc_tx_packet(call->debug_id, &whdr,
+				      rxrpc_tx_point_call_data_nofrag);
 	if (ret == -EMSGSIZE)
 		goto send_fragmentable;
 
@@ -488,7 +497,10 @@ send_fragmentable:
 
 	if (ret < 0)
 		trace_rxrpc_tx_fail(call->debug_id, serial, ret,
-				    rxrpc_tx_fail_call_data_frag);
+				    rxrpc_tx_point_call_data_frag);
+	else
+		trace_rxrpc_tx_packet(call->debug_id, &whdr,
+				      rxrpc_tx_point_call_data_frag);
 
 	up_write(&conn->params.local->defrag_sem);
 	goto done;
@@ -545,7 +557,10 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 			ret = kernel_sendmsg(local->socket, &msg, iov, 2, size);
 			if (ret < 0)
 				trace_rxrpc_tx_fail(local->debug_id, 0, ret,
-						    rxrpc_tx_fail_reject);
+						    rxrpc_tx_point_reject);
+			else
+				trace_rxrpc_tx_packet(local->debug_id, &whdr,
+						      rxrpc_tx_point_reject);
 		}
 
 		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
@@ -597,7 +612,10 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
 	ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len);
 	if (ret < 0)
 		trace_rxrpc_tx_fail(peer->debug_id, 0, ret,
-				    rxrpc_tx_fail_version_keepalive);
+				    rxrpc_tx_point_version_keepalive);
+	else
+		trace_rxrpc_tx_packet(peer->debug_id, &whdr,
+				      rxrpc_tx_point_version_keepalive);
 
 	peer->last_tx_at = ktime_get_real();
 	_leave("");
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 278ac0807a60..6988073ae842 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -665,11 +665,13 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
 	if (ret < 0) {
 		trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-				    rxrpc_tx_fail_conn_challenge);
+				    rxrpc_tx_point_rxkad_challenge);
 		return -EAGAIN;
 	}
 
 	conn->params.peer->last_tx_at = ktime_get_real();
+	trace_rxrpc_tx_packet(conn->debug_id, &whdr,
+			      rxrpc_tx_point_rxkad_challenge);
 	_leave(" = 0");
 	return 0;
 }
@@ -721,11 +723,12 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
 	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len);
 	if (ret < 0) {
 		trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-				    rxrpc_tx_fail_conn_response);
+				    rxrpc_tx_point_rxkad_response);
 		return -EAGAIN;
 	}
 
 	conn->params.peer->last_tx_at = ktime_get_real();
+	trace_rxrpc_tx_packet(0, &whdr, rxrpc_tx_point_rxkad_response);
 	_leave(" = 0");
 	return 0;
 }
-- 
cgit v1.2.3


From 197445aff13c164794efb6d87a28762e843622d8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 23 Jul 2018 17:18:37 +0100
Subject: rxrpc: Fix ACK proposal tracepoint

Fix the ACK proposal tracepoint outcomes list by making the one that's an
empty string not an empty string - which gets rendered as a hex number
string instead.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 2aa6f615b60d..c1a800a6dee3 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -396,7 +396,7 @@ enum rxrpc_tx_point {
 #define rxrpc_propose_ack_outcomes \
 	EM(rxrpc_propose_ack_subsume,		" Subsume") \
 	EM(rxrpc_propose_ack_update,		" Update") \
-	E_(rxrpc_propose_ack_use,		"")
+	E_(rxrpc_propose_ack_use,		" New")
 
 #define rxrpc_congest_modes \
 	EM(RXRPC_CALL_CONGEST_AVOIDANCE,	"CongAvoid") \
-- 
cgit v1.2.3


From 4272d3034e69aea6e17085ba285d14f5824b430d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 23 Jul 2018 17:18:37 +0100
Subject: rxrpc: Trace socket notification

Trace notifications from the softirq side of the socket to the
process-context side.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 20 ++++++++++++++++++++
 net/rxrpc/input.c            |  4 +++-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index c1a800a6dee3..196587b8f204 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -1528,6 +1528,26 @@ TRACE_EVENT(rxrpc_call_reset,
 		      __entry->tx_seq, __entry->rx_seq)
 	    );
 
+TRACE_EVENT(rxrpc_notify_socket,
+	    TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial),
+
+	    TP_ARGS(debug_id, serial),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		debug_id	)
+		    __field(rxrpc_serial_t,		serial		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->debug_id = debug_id;
+		    __entry->serial = serial;
+			   ),
+
+	    TP_printk("c=%08x r=%08x",
+		      __entry->debug_id,
+		      __entry->serial)
+	    );
+
 #endif /* _TRACE_RXRPC_H */
 
 /* This part must be outside protection */
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 8989d760b6b2..cfdc199c6351 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -597,8 +597,10 @@ ack:
 				  false, true,
 				  rxrpc_propose_ack_input_data);
 
-	if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1)
+	if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) {
+		trace_rxrpc_notify_socket(call->debug_id, serial);
 		rxrpc_notify_socket(call);
+	}
 	_leave(" [queued]");
 }
 
-- 
cgit v1.2.3


From 304d34360b099020a12af2abb7e1ac506f4ba16d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 28 Jul 2018 10:19:13 +0200
Subject: spi: add flags parameter to txrx_word function pointers

Add the capability to specify the flag parameter used in
bitbang_txrx_be_cpha{0,1} through the txrx_word function pointers of
spi_bitbang data structure. That feature will be used to add spi-3wire
support to the spi-gpio controller

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ath79.c         |  2 +-
 drivers/spi/spi-bitbang.c       | 34 +++++++++++++++++++++-------------
 drivers/spi/spi-butterfly.c     |  4 ++--
 drivers/spi/spi-gpio.c          | 32 ++++++++++++++++----------------
 drivers/spi/spi-lm70llp.c       |  5 +++--
 drivers/spi/spi-sh-sci.c        | 20 ++++++++++++--------
 drivers/spi/spi-xtensa-xtfpga.c |  2 +-
 include/linux/spi/spi_bitbang.h |  2 +-
 8 files changed, 57 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index 0719bd484891..3f6b657394de 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -176,7 +176,7 @@ static void ath79_spi_cleanup(struct spi_device *spi)
 }
 
 static u32 ath79_spi_txrx_mode0(struct spi_device *spi, unsigned int nsecs,
-			       u32 word, u8 bits)
+			       u32 word, u8 bits, unsigned flags)
 {
 	struct ath79_spi *sp = ath79_spidev_to_sp(spi);
 	u32 ioc = sp->ioc_base;
diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 3aa9e6e3dac8..76f1b534bdd7 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -49,22 +49,26 @@
 struct spi_bitbang_cs {
 	unsigned	nsecs;	/* (clock cycle time)/2 */
 	u32		(*txrx_word)(struct spi_device *spi, unsigned nsecs,
-					u32 word, u8 bits);
+					u32 word, u8 bits, unsigned flags);
 	unsigned	(*txrx_bufs)(struct spi_device *,
 					u32 (*txrx_word)(
 						struct spi_device *spi,
 						unsigned nsecs,
-						u32 word, u8 bits),
-					unsigned, struct spi_transfer *);
+						u32 word, u8 bits,
+						unsigned flags),
+					unsigned, struct spi_transfer *,
+					unsigned);
 };
 
 static unsigned bitbang_txrx_8(
 	struct spi_device	*spi,
 	u32			(*txrx_word)(struct spi_device *spi,
 					unsigned nsecs,
-					u32 word, u8 bits),
+					u32 word, u8 bits,
+					unsigned flags),
 	unsigned		ns,
-	struct spi_transfer	*t
+	struct spi_transfer	*t,
+	unsigned flags
 ) {
 	unsigned		bits = t->bits_per_word;
 	unsigned		count = t->len;
@@ -76,7 +80,7 @@ static unsigned bitbang_txrx_8(
 
 		if (tx)
 			word = *tx++;
-		word = txrx_word(spi, ns, word, bits);
+		word = txrx_word(spi, ns, word, bits, flags);
 		if (rx)
 			*rx++ = word;
 		count -= 1;
@@ -88,9 +92,11 @@ static unsigned bitbang_txrx_16(
 	struct spi_device	*spi,
 	u32			(*txrx_word)(struct spi_device *spi,
 					unsigned nsecs,
-					u32 word, u8 bits),
+					u32 word, u8 bits,
+					unsigned flags),
 	unsigned		ns,
-	struct spi_transfer	*t
+	struct spi_transfer	*t,
+	unsigned flags
 ) {
 	unsigned		bits = t->bits_per_word;
 	unsigned		count = t->len;
@@ -102,7 +108,7 @@ static unsigned bitbang_txrx_16(
 
 		if (tx)
 			word = *tx++;
-		word = txrx_word(spi, ns, word, bits);
+		word = txrx_word(spi, ns, word, bits, flags);
 		if (rx)
 			*rx++ = word;
 		count -= 2;
@@ -114,9 +120,11 @@ static unsigned bitbang_txrx_32(
 	struct spi_device	*spi,
 	u32			(*txrx_word)(struct spi_device *spi,
 					unsigned nsecs,
-					u32 word, u8 bits),
+					u32 word, u8 bits,
+					unsigned flags),
 	unsigned		ns,
-	struct spi_transfer	*t
+	struct spi_transfer	*t,
+	unsigned flags
 ) {
 	unsigned		bits = t->bits_per_word;
 	unsigned		count = t->len;
@@ -128,7 +136,7 @@ static unsigned bitbang_txrx_32(
 
 		if (tx)
 			word = *tx++;
-		word = txrx_word(spi, ns, word, bits);
+		word = txrx_word(spi, ns, word, bits, flags);
 		if (rx)
 			*rx++ = word;
 		count -= 4;
@@ -236,7 +244,7 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct spi_transfer *t)
 	struct spi_bitbang_cs	*cs = spi->controller_state;
 	unsigned		nsecs = cs->nsecs;
 
-	return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t);
+	return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t, 0);
 }
 
 /*----------------------------------------------------------------------*/
diff --git a/drivers/spi/spi-butterfly.c b/drivers/spi/spi-butterfly.c
index 22a31e4a1a11..1a3510215841 100644
--- a/drivers/spi/spi-butterfly.c
+++ b/drivers/spi/spi-butterfly.c
@@ -144,9 +144,9 @@ static void butterfly_chipselect(struct spi_device *spi, int value)
 
 static u32
 butterfly_txrx_word_mode0(struct spi_device *spi, unsigned nsecs, u32 word,
-			  u8 bits)
+			  u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 /*----------------------------------------------------------------------*/
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index 6ae92d4dca19..be68298cbd9c 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -149,27 +149,27 @@ static inline int getmiso(const struct spi_device *spi)
  */
 
 static u32 spi_gpio_txrx_word_mode0(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode1(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode2(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 1, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
 }
 
 static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 1, 0, word, bits);
+	return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
 }
 
 /*
@@ -183,30 +183,30 @@ static u32 spi_gpio_txrx_word_mode3(struct spi_device *spi,
  */
 
 static u32 spi_gpio_spec_txrx_word_mode0(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	unsigned flags = spi->master->flags;
+	flags = spi->master->flags;
 	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode1(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	unsigned flags = spi->master->flags;
+	flags = spi->master->flags;
 	return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode2(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	unsigned flags = spi->master->flags;
+	flags = spi->master->flags;
 	return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
 }
 
 static u32 spi_gpio_spec_txrx_word_mode3(struct spi_device *spi,
-		unsigned nsecs, u32 word, u8 bits)
+		unsigned nsecs, u32 word, u8 bits, unsigned flags)
 {
-	unsigned flags = spi->master->flags;
+	flags = spi->master->flags;
 	return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
 }
 
diff --git a/drivers/spi/spi-lm70llp.c b/drivers/spi/spi-lm70llp.c
index 61ee0f4269ae..4549efd792da 100644
--- a/drivers/spi/spi-lm70llp.c
+++ b/drivers/spi/spi-lm70llp.c
@@ -188,9 +188,10 @@ static void lm70_chipselect(struct spi_device *spi, int value)
 /*
  * Our actual bitbanger routine.
  */
-static u32 lm70_txrx(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits)
+static u32 lm70_txrx(struct spi_device *spi, unsigned nsecs, u32 word, u8 bits,
+		     unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static void spi_lm70llp_attach(struct parport *p)
diff --git a/drivers/spi/spi-sh-sci.c b/drivers/spi/spi-sh-sci.c
index a9beeeed812c..393701cfca3c 100644
--- a/drivers/spi/spi-sh-sci.c
+++ b/drivers/spi/spi-sh-sci.c
@@ -80,27 +80,31 @@ static inline u32 getmiso(struct spi_device *dev)
 #include "spi-bitbang-txrx.h"
 
 static u32 sh_sci_spi_txrx_mode0(struct spi_device *spi,
-				      unsigned nsecs, u32 word, u8 bits)
+				 unsigned nsecs, u32 word, u8 bits,
+				 unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 sh_sci_spi_txrx_mode1(struct spi_device *spi,
-				      unsigned nsecs, u32 word, u8 bits)
+				 unsigned nsecs, u32 word, u8 bits,
+				 unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 0, 0, word, bits);
+	return bitbang_txrx_be_cpha1(spi, nsecs, 0, flags, word, bits);
 }
 
 static u32 sh_sci_spi_txrx_mode2(struct spi_device *spi,
-				      unsigned nsecs, u32 word, u8 bits)
+				 unsigned nsecs, u32 word, u8 bits,
+				 unsigned flags)
 {
-	return bitbang_txrx_be_cpha0(spi, nsecs, 1, 0, word, bits);
+	return bitbang_txrx_be_cpha0(spi, nsecs, 1, flags, word, bits);
 }
 
 static u32 sh_sci_spi_txrx_mode3(struct spi_device *spi,
-				      unsigned nsecs, u32 word, u8 bits)
+				 unsigned nsecs, u32 word, u8 bits,
+				 unsigned flags)
 {
-	return bitbang_txrx_be_cpha1(spi, nsecs, 1, 0, word, bits);
+	return bitbang_txrx_be_cpha1(spi, nsecs, 1, flags, word, bits);
 }
 
 static void sh_sci_spi_chipselect(struct spi_device *dev, int value)
diff --git a/drivers/spi/spi-xtensa-xtfpga.c b/drivers/spi/spi-xtensa-xtfpga.c
index be6155cba9de..8ce04f829a80 100644
--- a/drivers/spi/spi-xtensa-xtfpga.c
+++ b/drivers/spi/spi-xtensa-xtfpga.c
@@ -54,7 +54,7 @@ static inline void xtfpga_spi_wait_busy(struct xtfpga_spi *xspi)
 }
 
 static u32 xtfpga_spi_txrx_word(struct spi_device *spi, unsigned nsecs,
-				u32 v, u8 bits)
+				u32 v, u8 bits, unsigned flags)
 {
 	struct xtfpga_spi *xspi = spi_master_get_devdata(spi->master);
 
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index c1e61641a7f1..5847f00ef9cf 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -30,7 +30,7 @@ struct spi_bitbang {
 	/* txrx_word[SPI_MODE_*]() just looks like a shift register */
 	u32	(*txrx_word[4])(struct spi_device *spi,
 			unsigned nsecs,
-			u32 word, u8 bits);
+			u32 word, u8 bits, unsigned flags);
 };
 
 /* you can call these default bitbang->master methods from your custom
-- 
cgit v1.2.3


From 4b859db2c60692560afbfef1b030d0ddef57b7ee Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 28 Jul 2018 10:19:14 +0200
Subject: spi: spi-gpio: add SPI_3WIRE support

Add SPI_3WIRE support to spi-gpio controller introducing
set_line_direction function pointer in spi_bitbang data structure.
Spi-gpio controller has been tested using hts221 temp/rh iio sensor
running in 3wire mode and lsm6dsm running in 4wire mode

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bitbang.c       | 16 ++++++++++++++++
 drivers/spi/spi-gpio.c          | 17 ++++++++++++++++-
 include/linux/spi/spi_bitbang.h |  1 +
 3 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 76f1b534bdd7..f29176000b8d 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -243,7 +243,23 @@ static int spi_bitbang_bufs(struct spi_device *spi, struct spi_transfer *t)
 {
 	struct spi_bitbang_cs	*cs = spi->controller_state;
 	unsigned		nsecs = cs->nsecs;
+	struct spi_bitbang	*bitbang;
+
+	bitbang = spi_master_get_devdata(spi->master);
+	if (bitbang->set_line_direction) {
+		int err;
 
+		err = bitbang->set_line_direction(spi, !!(t->tx_buf));
+		if (err < 0)
+			return err;
+	}
+
+	if (spi->mode & SPI_3WIRE) {
+		unsigned flags;
+
+		flags = t->tx_buf ? SPI_MASTER_NO_RX : SPI_MASTER_NO_TX;
+		return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t, flags);
+	}
 	return cs->txrx_bufs(spi, cs->txrx_word, nsecs, t, 0);
 }
 
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index be68298cbd9c..0626e6e3ea0c 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -121,7 +121,10 @@ static inline int getmiso(const struct spi_device *spi)
 {
 	struct spi_gpio *spi_gpio = spi_to_spi_gpio(spi);
 
-	return !!gpiod_get_value_cansleep(spi_gpio->miso);
+	if (spi->mode & SPI_3WIRE)
+		return !!gpiod_get_value_cansleep(spi_gpio->mosi);
+	else
+		return !!gpiod_get_value_cansleep(spi_gpio->miso);
 }
 
 /*
@@ -250,6 +253,16 @@ static int spi_gpio_setup(struct spi_device *spi)
 	return status;
 }
 
+static int spi_gpio_set_direction(struct spi_device *spi, bool output)
+{
+	struct spi_gpio *spi_gpio = spi_to_spi_gpio(spi);
+
+	if (output)
+		return gpiod_direction_output(spi_gpio->mosi, 1);
+	else
+		return gpiod_direction_input(spi_gpio->mosi);
+}
+
 static void spi_gpio_cleanup(struct spi_device *spi)
 {
 	spi_bitbang_cleanup(spi);
@@ -395,6 +408,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 		return status;
 
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
+	master->mode_bits = SPI_3WIRE | SPI_CPHA | SPI_CPOL;
 	master->flags = master_flags;
 	master->bus_num = pdev->id;
 	/* The master needs to think there is a chipselect even if not connected */
@@ -407,6 +421,7 @@ static int spi_gpio_probe(struct platform_device *pdev)
 
 	spi_gpio->bitbang.master = master;
 	spi_gpio->bitbang.chipselect = spi_gpio_chipselect;
+	spi_gpio->bitbang.set_line_direction = spi_gpio_set_direction;
 
 	if ((master_flags & (SPI_MASTER_NO_TX | SPI_MASTER_NO_RX)) == 0) {
 		spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0;
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 5847f00ef9cf..b7e021b274dc 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -31,6 +31,7 @@ struct spi_bitbang {
 	u32	(*txrx_word[4])(struct spi_device *spi,
 			unsigned nsecs,
 			u32 word, u8 bits, unsigned flags);
+	int	(*set_line_direction)(struct spi_device *spi, bool output);
 };
 
 /* you can call these default bitbang->master methods from your custom
-- 
cgit v1.2.3


From 83ba4645152d1177c161750e1064e3a8e7cee19b Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Tue, 31 Jul 2018 21:18:11 +0200
Subject: net: add helpers checking if socket can be bound to nonlocal address

The construction "net->ipv4.sysctl_ip_nonlocal_bind || inet->freebind
|| inet->transparent" is present three times and its IPv6 counterpart
is also present three times. We introduce two small helpers to
characterize these tests uniformly.

Signed-off-by: Vincent Bernat <vincent@bernat.im>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 8 ++++++++
 include/net/ipv6.h      | 7 +++++++
 net/ipv4/af_inet.c      | 3 +--
 net/ipv4/ping.c         | 6 ++----
 net/ipv6/af_inet6.c     | 6 ++----
 net/ipv6/datagram.c     | 3 +--
 6 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 314be484c696..e03b93360f33 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -359,4 +359,12 @@ static inline bool inet_get_convert_csum(struct sock *sk)
 	return !!inet_sk(sk)->convert_csum;
 }
 
+
+static inline bool inet_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv4.sysctl_ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index a44509f4e985..82deb684ba73 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -766,6 +766,13 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 	return hlimit;
 }
 
+static inline bool ipv6_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv6.sysctl.ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 /* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
  * Equivalent to :	flow->v6addrs.src = iph->saddr;
  *			flow->v6addrs.dst = iph->daddr;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f2a0a3bab6b5..ee707b91d1a7 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -486,8 +486,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 	 *  is temporarily down)
 	 */
 	err = -EADDRNOTAVAIL;
-	if (!net->ipv4.sysctl_ip_nonlocal_bind &&
-	    !(inet->freebind || inet->transparent) &&
+	if (!inet_can_nonlocal_bind(net, inet) &&
 	    addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
 	    chk_addr_ret != RTN_LOCAL &&
 	    chk_addr_ret != RTN_MULTICAST &&
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index b54c964ad925..8d7aaf118a30 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -320,8 +320,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
 		if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
 			chk_addr_ret = RTN_LOCAL;
 
-		if ((net->ipv4.sysctl_ip_nonlocal_bind == 0 &&
-		    isk->freebind == 0 && isk->transparent == 0 &&
+		if ((!inet_can_nonlocal_bind(net, isk) &&
 		     chk_addr_ret != RTN_LOCAL) ||
 		    chk_addr_ret == RTN_MULTICAST ||
 		    chk_addr_ret == RTN_BROADCAST)
@@ -361,8 +360,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
 						    scoped);
 		rcu_read_unlock();
 
-		if (!(net->ipv6.sysctl.ip_nonlocal_bind ||
-		      isk->freebind || isk->transparent || has_addr ||
+		if (!(ipv6_can_nonlocal_bind(net, isk) || has_addr ||
 		      addr_type == IPV6_ADDR_ANY))
 			return -EADDRNOTAVAIL;
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c9535354149f..020f6e14a7af 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -322,8 +322,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 		/* Reproduce AF_INET checks to make the bindings consistent */
 		v4addr = addr->sin6_addr.s6_addr32[3];
 		chk_addr_ret = inet_addr_type(net, v4addr);
-		if (!net->ipv4.sysctl_ip_nonlocal_bind &&
-		    !(inet->freebind || inet->transparent) &&
+		if (!inet_can_nonlocal_bind(net, inet) &&
 		    v4addr != htonl(INADDR_ANY) &&
 		    chk_addr_ret != RTN_LOCAL &&
 		    chk_addr_ret != RTN_MULTICAST &&
@@ -362,8 +361,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 			 */
 			v4addr = LOOPBACK4_IPV6;
 			if (!(addr_type & IPV6_ADDR_MULTICAST))	{
-				if (!net->ipv6.sysctl.ip_nonlocal_bind &&
-				    !(inet->freebind || inet->transparent) &&
+				if (!ipv6_can_nonlocal_bind(net, inet) &&
 				    !ipv6_chk_addr(net, &addr->sin6_addr,
 						   dev, 0)) {
 					err = -EADDRNOTAVAIL;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f0264dfd38de..1ede7a16a0be 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -803,8 +803,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
-				if (!(net->ipv6.sysctl.ip_nonlocal_bind ||
-				      inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
+				if (!ipv6_can_nonlocal_bind(net, inet_sk(sk)) &&
 				    !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
 							     dev, !strict, 0,
 							     IFA_F_TENTATIVE) &&
-- 
cgit v1.2.3


From 432e05d328921c68c35bfdeff7d7b7400b8e3d1a Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 1 Aug 2018 00:36:03 +0200
Subject: net: ipv4: Control SKB reprioritization after forwarding

After IPv4 packets are forwarded, the priority of the corresponding SKB
is updated according to the TOS field of IPv4 header. This overrides any
prioritization done earlier by e.g. an skbedit action or ingress-qos-map
defined at a vlan device.

Such overriding may not always be desirable. Even if the packet ends up
being routed, which implies this is an L3 network node, an administrator
may wish to preserve whatever prioritization was done earlier on in the
pipeline.

Therefore introduce a sysctl that controls this behavior. Keep the
default value at 1 to maintain backward-compatible behavior.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 9 +++++++++
 include/net/netns/ipv4.h               | 1 +
 net/ipv4/af_inet.c                     | 1 +
 net/ipv4/ip_forward.c                  | 3 ++-
 net/ipv4/sysctl_net_ipv4.c             | 9 +++++++++
 5 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 77c37fb0b6a6..e74515ecaa9c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -81,6 +81,15 @@ fib_multipath_hash_policy - INTEGER
 	0 - Layer 3
 	1 - Layer 4
 
+ip_forward_update_priority - INTEGER
+	Whether to update SKB priority from "TOS" field in IPv4 header after it
+	is forwarded. The new SKB priority is mapped from TOS field value
+	according to an rt_tos2priority table (see e.g. man tc-prio).
+	Default: 1 (Update priority.)
+	Possible values:
+	0 - Do not update priority.
+	1 - Update priority.
+
 route/max_size - INTEGER
 	Maximum number of routes allowed in the kernel.  Increase
 	this when using large numbers of interfaces and/or routes.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 661348f23ea5..e47503b4e4d1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -98,6 +98,7 @@ struct netns_ipv4 {
 	int sysctl_ip_default_ttl;
 	int sysctl_ip_no_pmtu_disc;
 	int sysctl_ip_fwd_use_pmtu;
+	int sysctl_ip_fwd_update_priority;
 	int sysctl_ip_nonlocal_bind;
 	/* Shall we try to damage output packets if routing dev changes? */
 	int sysctl_ip_dynaddr;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ee707b91d1a7..20fda8fb8ffd 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1801,6 +1801,7 @@ static __net_init int inet_init_net(struct net *net)
 	 * We set them here, in case sysctl is not compiled.
 	 */
 	net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
+	net->ipv4.sysctl_ip_fwd_update_priority = 1;
 	net->ipv4.sysctl_ip_dynaddr = 0;
 	net->ipv4.sysctl_ip_early_demux = 1;
 	net->ipv4.sysctl_udp_early_demux = 1;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index b54b948b0596..32662e9e5d21 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -143,7 +143,8 @@ int ip_forward(struct sk_buff *skb)
 	    !skb_sec_path(skb))
 		ip_rt_send_redirect(skb);
 
-	skb->priority = rt_tos2priority(iph->tos);
+	if (net->ipv4.sysctl_ip_fwd_update_priority)
+		skb->priority = rt_tos2priority(iph->tos);
 
 	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
 		       net, NULL, skb, skb->dev, rt->dst.dev,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5fa335fd3852..e21dda015513 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -663,6 +663,15 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "ip_forward_update_priority",
+		.data		= &init_net.ipv4.sysctl_ip_fwd_update_priority,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{
 		.procname	= "ip_nonlocal_bind",
 		.data		= &init_net.ipv4.sysctl_ip_nonlocal_bind,
-- 
cgit v1.2.3


From d18c5d1995aa322b722fa731397e28ebcd00b3c6 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 1 Aug 2018 00:36:42 +0200
Subject: net: ipv4: Notify about changes to ip_forward_update_priority

Drivers may make offloading decision based on whether
ip_forward_update_priority is enabled or not. Therefore distribute
netevent notifications to give them a chance to react to a change.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netevent.h     |  1 +
 net/ipv4/sysctl_net_ipv4.c | 19 ++++++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netevent.h b/include/net/netevent.h
index d9918261701c..4107016c3bb4 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -28,6 +28,7 @@ enum netevent_notif_type {
 	NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */
 	NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */
 	NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */
+	NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE, /* arg is struct net ptr */
 };
 
 int register_netevent_notifier(struct notifier_block *nb);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e21dda015513..b92f422f2fa8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -201,6 +201,23 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
 	return ret;
 }
 
+static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
+				    void __user *buffer,
+				    size_t *lenp, loff_t *ppos)
+{
+	struct net *net;
+	int ret;
+
+	net = container_of(table->data, struct net,
+			   ipv4.sysctl_ip_fwd_update_priority);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (write && ret == 0)
+		call_netevent_notifiers(NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE,
+					net);
+
+	return ret;
+}
+
 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
 				       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -668,7 +685,7 @@ static struct ctl_table ipv4_net_table[] = {
 		.data		= &init_net.ipv4.sysctl_ip_fwd_update_priority,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler   = proc_dointvec_minmax,
+		.proc_handler   = ipv4_fwd_update_priority,
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-- 
cgit v1.2.3


From ba113c3aa79a7f941ac162d05a3620bdc985c58d Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 31 Jul 2018 17:46:21 -0700
Subject: tcp: add data bytes sent stats

Introduce a new TCP stat to record the number of bytes sent
(RFC4898 tcpEStatsPerfHCDataOctetsOut) and expose it in both tcp_info
(TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS).

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 3 +++
 include/uapi/linux/tcp.h | 4 +++-
 net/ipv4/tcp.c           | 6 ++++++
 net/ipv4/tcp_output.c    | 1 +
 4 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 58a8d7d71354..d0798dcd2cab 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -181,6 +181,9 @@ struct tcp_sock {
 	u32	data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
 				 * total number of data segments sent.
 				 */
+	u64	bytes_sent;	/* RFC4898 tcpEStatsPerfHCDataOctetsOut
+				 * total number of data bytes sent.
+				 */
 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index e3f6ed8a7064..1c70ed287c3b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -235,6 +235,8 @@ struct tcp_info {
 
 	__u32	tcpi_delivered;
 	__u32	tcpi_delivered_ce;
+
+	__u64	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -257,7 +259,7 @@ enum {
 	TCP_NLA_SND_SSTHRESH,	/* Slow start size threshold */
 	TCP_NLA_DELIVERED,	/* Data pkts delivered incl. out-of-order */
 	TCP_NLA_DELIVERED_CE,	/* Like above but only ones w/ CE marks */
-
+	TCP_NLA_BYTES_SENT,	/* Data bytes sent including retransmission */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 27bbe6a792b7..873cb9968ff5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2594,6 +2594,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	sk->sk_rx_dst = NULL;
 	tcp_saved_syn_free(tp);
 	tp->compressed_ack = 0;
+	tp->bytes_sent = 0;
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -3201,6 +3202,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 		info->tcpi_delivery_rate = rate64;
 	info->tcpi_delivered = tp->delivered;
 	info->tcpi_delivered_ce = tp->delivered_ce;
+	info->tcpi_bytes_sent = tp->bytes_sent;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3225,6 +3227,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_SND_SSTHRESH */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
+		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
 		0;
 }
 
@@ -3272,6 +3275,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una);
 	nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state);
 
+	nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
+			  TCP_NLA_PAD);
+
 	return stats;
 }
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 490df62f26d4..861531fe0e97 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1136,6 +1136,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	if (skb->len != tcp_header_size) {
 		tcp_event_data_sent(tp, sk);
 		tp->data_segs_out += tcp_skb_pcount(skb);
+		tp->bytes_sent += skb->len - tcp_header_size;
 		tcp_internal_pacing(sk, skb);
 	}
 
-- 
cgit v1.2.3


From fb31c9b9f6c85b1bad569ecedbde78d9e37cd87b Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 31 Jul 2018 17:46:22 -0700
Subject: tcp: add data bytes retransmitted stats

Introduce a new TCP stat to record the number of bytes retransmitted
(RFC4898 tcpEStatsPerfOctetsRetrans) and expose it in both tcp_info
(TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS).

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 3 +++
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 5 +++++
 net/ipv4/tcp_output.c    | 1 +
 4 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d0798dcd2cab..fb67f9a51b95 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -333,6 +333,9 @@ struct tcp_sock {
 				 * the first SYN. */
 	u32	undo_marker;	/* snd_una upon a new recovery episode. */
 	int	undo_retrans;	/* number of undoable retransmissions. */
+	u64	bytes_retrans;	/* RFC4898 tcpEStatsPerfOctetsRetrans
+				 * Total data bytes retransmitted
+				 */
 	u32	total_retrans;	/* Total retransmits for entire connection */
 
 	u32	urg_seq;	/* Seq of received urgent pointer */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 1c70ed287c3b..c31f5100b744 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -237,6 +237,7 @@ struct tcp_info {
 	__u32	tcpi_delivered_ce;
 
 	__u64	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
+	__u64	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -260,6 +261,7 @@ enum {
 	TCP_NLA_DELIVERED,	/* Data pkts delivered incl. out-of-order */
 	TCP_NLA_DELIVERED_CE,	/* Like above but only ones w/ CE marks */
 	TCP_NLA_BYTES_SENT,	/* Data bytes sent including retransmission */
+	TCP_NLA_BYTES_RETRANS,	/* Data bytes retransmitted */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 873cb9968ff5..5ed1be88e922 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2595,6 +2595,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_saved_syn_free(tp);
 	tp->compressed_ack = 0;
 	tp->bytes_sent = 0;
+	tp->bytes_retrans = 0;
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -3203,6 +3204,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_delivered = tp->delivered;
 	info->tcpi_delivered_ce = tp->delivered_ce;
 	info->tcpi_bytes_sent = tp->bytes_sent;
+	info->tcpi_bytes_retrans = tp->bytes_retrans;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3228,6 +3230,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
+		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
 		0;
 }
 
@@ -3277,6 +3280,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 
 	nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent,
 			  TCP_NLA_PAD);
+	nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
+			  TCP_NLA_PAD);
 
 	return stats;
 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 861531fe0e97..50cabf7656f3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2871,6 +2871,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
 	tp->total_retrans += segs;
+	tp->bytes_retrans += skb->len;
 
 	/* make sure skb->data is aligned on arches that require it
 	 * and check if ack-trimming & collapsing extended the headroom
-- 
cgit v1.2.3


From 7e10b6554ff2ce7f86d5d3eec3af5db8db482caa Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 31 Jul 2018 17:46:23 -0700
Subject: tcp: add dsack blocks received stats

Introduce a new TCP stat to record the number of DSACK blocks received
(RFC4989 tcpEStatsStackDSACKDups) and expose it in both tcp_info
(TCP_INFO) and opt_stats (SOF_TIMESTAMPING_OPT_STATS).

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 3 +++
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 4 ++++
 net/ipv4/tcp_input.c     | 1 +
 4 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fb67f9a51b95..da6281c549a5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -188,6 +188,9 @@ struct tcp_sock {
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
 				 */
+	u32	dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups
+				 * total number of DSACK blocks received
+				 */
  	u32	snd_una;	/* First byte we want an ack for	*/
  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index c31f5100b744..0e1c0aec0153 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -238,6 +238,7 @@ struct tcp_info {
 
 	__u64	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
 	__u64	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
+	__u32	tcpi_dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -262,6 +263,7 @@ enum {
 	TCP_NLA_DELIVERED_CE,	/* Like above but only ones w/ CE marks */
 	TCP_NLA_BYTES_SENT,	/* Data bytes sent including retransmission */
 	TCP_NLA_BYTES_RETRANS,	/* Data bytes retransmitted */
+	TCP_NLA_DSACK_DUPS,	/* DSACK blocks received */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5ed1be88e922..d6232b598cae 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2596,6 +2596,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->compressed_ack = 0;
 	tp->bytes_sent = 0;
 	tp->bytes_retrans = 0;
+	tp->dsack_dups = 0;
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -3205,6 +3206,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_delivered_ce = tp->delivered_ce;
 	info->tcpi_bytes_sent = tp->bytes_sent;
 	info->tcpi_bytes_retrans = tp->bytes_retrans;
+	info->tcpi_dsack_dups = tp->dsack_dups;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3231,6 +3233,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DELIVERED_CE */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
+		nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
 		0;
 }
 
@@ -3282,6 +3285,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 			  TCP_NLA_PAD);
 	nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
 			  TCP_NLA_PAD);
+	nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
 
 	return stats;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d51fa358b2b1..fbc85ff7d71d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -874,6 +874,7 @@ static void tcp_dsack_seen(struct tcp_sock *tp)
 {
 	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
 	tp->rack.dsack_seen = 1;
+	tp->dsack_dups++;
 }
 
 /* It's reordering when higher sequence was delivered (i.e. sacked) before
-- 
cgit v1.2.3


From 7ec65372ca534217b53fd208500cf7aac223a383 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Tue, 31 Jul 2018 17:46:24 -0700
Subject: tcp: add stat of data packet reordering events

Introduce a new TCP stats to record the number of reordering events seen
and expose it in both tcp_info (TCP_INFO) and opt_stats
(SOF_TIMESTAMPING_OPT_STATS).
Application can use this stats to track the frequency of the reordering
events in addition to the existing reordering stats which tracks the
magnitude of the latest reordering event.

Note: this new stats tracks reordering events triggered by ACKs, which
could often be fewer than the actual number of packets being delivered
out-of-order.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 4 ++--
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 4 ++++
 net/ipv4/tcp_input.c     | 3 ++-
 net/ipv4/tcp_recovery.c  | 2 +-
 5 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index da6281c549a5..263e37271afd 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -220,8 +220,7 @@ struct tcp_sock {
 #define TCP_RACK_RECOVERY_THRESH 16
 		u8 reo_wnd_persist:5, /* No. of recovery since last adj */
 		   dsack_seen:1, /* Whether DSACK seen after last adj */
-		   advanced:1,	 /* mstamp advanced since last lost marking */
-		   reord:1;	 /* reordering detected */
+		   advanced:1;	 /* mstamp advanced since last lost marking */
 	} rack;
 	u16	advmss;		/* Advertised MSS			*/
 	u8	compressed_ack;
@@ -267,6 +266,7 @@ struct tcp_sock {
 	u8	ecn_flags;	/* ECN status bits.			*/
 	u8	keepalive_probes; /* num of allowed keep alive probes	*/
 	u32	reordering;	/* Packet reordering metric.		*/
+	u32	reord_seen;	/* number of data packet reordering events */
 	u32	snd_up;		/* Urgent pointer		*/
 
 /*
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 0e1c0aec0153..e02d31986ff9 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -239,6 +239,7 @@ struct tcp_info {
 	__u64	tcpi_bytes_sent;     /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
 	__u64	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
 	__u32	tcpi_dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups */
+	__u32	tcpi_reord_seen;     /* reordering events seen */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
@@ -264,6 +265,7 @@ enum {
 	TCP_NLA_BYTES_SENT,	/* Data bytes sent including retransmission */
 	TCP_NLA_BYTES_RETRANS,	/* Data bytes retransmitted */
 	TCP_NLA_DSACK_DUPS,	/* DSACK blocks received */
+	TCP_NLA_REORD_SEEN,	/* reordering events seen */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d6232b598cae..31fa1c080f28 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2597,6 +2597,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->bytes_sent = 0;
 	tp->bytes_retrans = 0;
 	tp->dsack_dups = 0;
+	tp->reord_seen = 0;
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);
@@ -3207,6 +3208,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_bytes_sent = tp->bytes_sent;
 	info->tcpi_bytes_retrans = tp->bytes_retrans;
 	info->tcpi_dsack_dups = tp->dsack_dups;
+	info->tcpi_reord_seen = tp->reord_seen;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -3234,6 +3236,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_SENT */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
+		nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
 		0;
 }
 
@@ -3286,6 +3289,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans,
 			  TCP_NLA_PAD);
 	nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
+	nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
 
 	return stats;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fbc85ff7d71d..3d6156f07a8d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -906,8 +906,8 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
 				       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
 	}
 
-	tp->rack.reord = 1;
 	/* This exciting event is worth to be remembered. 8) */
+	tp->reord_seen++;
 	NET_INC_STATS(sock_net(sk),
 		      ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER);
 }
@@ -1871,6 +1871,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
 
 	tp->reordering = min_t(u32, tp->packets_out + addend,
 			       sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+	tp->reord_seen++;
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
 }
 
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 71593e4400ab..c81aadff769b 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -25,7 +25,7 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (!tp->rack.reord) {
+	if (!tp->reord_seen) {
 		/* If reordering has not been observed, be aggressive during
 		 * the recovery or starting the recovery by DUPACK threshold.
 		 */
-- 
cgit v1.2.3


From 290b1c8b1a902c0902df9ec05577ab209296f345 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 1 Aug 2018 12:36:57 +0200
Subject: net: sched: make tcf_chain_{get,put}() static

These are no longer used outside of cls_api.c so make them static.
Move tcf_chain_flush() to avoid fwd declaration of tcf_chain_put().

Signed-off-by: Jiri Pirko <jiri@mellanox.com>

v1->v2:
- new patch

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h |  3 ---
 net/sched/cls_api.c   | 34 ++++++++++++++++------------------
 2 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 22bfc3a13c25..ef727f71336e 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -40,11 +40,8 @@ struct tcf_block_cb;
 bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 
 #ifdef CONFIG_NET_CLS
-struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
-				bool create);
 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
 				       u32 chain_index);
-void tcf_chain_put(struct tcf_chain *chain);
 void tcf_chain_put_by_act(struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index b194a5abfc6a..e8b0bbd0883f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -232,19 +232,6 @@ static void tcf_chain0_head_change(struct tcf_chain *chain,
 		tcf_chain_head_change_item(item, tp_head);
 }
 
-static void tcf_chain_flush(struct tcf_chain *chain)
-{
-	struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
-
-	tcf_chain0_head_change(chain, NULL);
-	while (tp) {
-		RCU_INIT_POINTER(chain->filter_chain, tp->next);
-		tcf_proto_destroy(tp, NULL);
-		tp = rtnl_dereference(chain->filter_chain);
-		tcf_chain_put(chain);
-	}
-}
-
 static void tcf_chain_destroy(struct tcf_chain *chain)
 {
 	struct tcf_block *block = chain->block;
@@ -316,12 +303,11 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
 	return chain;
 }
 
-struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
-				bool create)
+static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
+				       bool create)
 {
 	return __tcf_chain_get(block, chain_index, create, false);
 }
-EXPORT_SYMBOL(tcf_chain_get);
 
 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
 {
@@ -347,11 +333,10 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
 	}
 }
 
-void tcf_chain_put(struct tcf_chain *chain)
+static void tcf_chain_put(struct tcf_chain *chain)
 {
 	__tcf_chain_put(chain, false);
 }
-EXPORT_SYMBOL(tcf_chain_put);
 
 void tcf_chain_put_by_act(struct tcf_chain *chain)
 {
@@ -365,6 +350,19 @@ static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 		tcf_chain_put(chain);
 }
 
+static void tcf_chain_flush(struct tcf_chain *chain)
+{
+	struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+
+	tcf_chain0_head_change(chain, NULL);
+	while (tp) {
+		RCU_INIT_POINTER(chain->filter_chain, tp->next);
+		tcf_proto_destroy(tp, NULL);
+		tp = rtnl_dereference(chain->filter_chain);
+		tcf_chain_put(chain);
+	}
+}
+
 static bool tcf_block_offload_in_use(struct tcf_block *block)
 {
 	return block->offloadcnt;
-- 
cgit v1.2.3


From 8b11ec1b5ffb54f71cb5a5e5c8c4d36e5d113085 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 1 Aug 2018 13:43:38 -0700
Subject: mm: do not initialize TLB stack vma's with vma_init()

Commit 2c4541e24c55 ("mm: use vma_init() to initialize VMAs on stack and
data segments") tried to initialize various left-over ad-hoc vma's
"properly", but actually made things worse for the temporary vma's used
for TLB flushing.

vma_init() doesn't actually initialize all of the vma, just a few
fields, so doing something like

   -       struct vm_area_struct vma = { .vm_mm = tlb->mm, };
   +       struct vm_area_struct vma;
   +
   +       vma_init(&vma, tlb->mm);

was actually very bad: instead of having a nicely initialized vma with
every field but "vm_mm" zeroed, you'd have an entirely uninitialized vma
with only a couple of fields initialized.  And they weren't even fields
that the code in question mostly cared about.

The flush_tlb_range() function takes a "struct vma" rather than a
"struct mm_struct", because a few architectures actually care about what
kind of range it is - being able to only do an ITLB flush if it's a
range that doesn't have data accesses enabled, for example.  And all the
normal users already have the vma for doing the range invalidation.

But a few people want to call flush_tlb_range() with a range they just
made up, so they also end up using a made-up vma.  x86 just has a
special "flush_tlb_mm_range()" function for this, but other
architectures (arm and ia64) do the "use fake vma" thing instead, and
thus got caught up in the vma_init() changes.

At the same time, the TLB flushing code really doesn't care about most
other fields in the vma, so vma_init() is just unnecessary and
pointless.

This fixes things by having an explicit "this is just an initializer for
the TLB flush" initializer macro, which is used by the arm/arm64/ia64
people who mis-use this interface with just a dummy vma.

Fixes: 2c4541e24c55 ("mm: use vma_init() to initialize VMAs on stack and data segments")
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-rpc/ecard.c    |  5 +----
 arch/arm64/include/asm/tlb.h |  4 +---
 arch/arm64/mm/hugetlbpage.c  | 10 ++++------
 arch/ia64/include/asm/tlb.h  |  7 +++----
 include/linux/mm.h           |  3 +++
 5 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index 8db62cc54a6a..04b2f22c2739 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -212,7 +212,7 @@ static DEFINE_MUTEX(ecard_mutex);
  */
 static void ecard_init_pgtables(struct mm_struct *mm)
 {
-	struct vm_area_struct vma;
+	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, VM_EXEC);
 
 	/* We want to set up the page tables for the following mapping:
 	 *  Virtual	Physical
@@ -237,9 +237,6 @@ static void ecard_init_pgtables(struct mm_struct *mm)
 
 	memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE));
 
-	vma_init(&vma, mm);
-	vma.vm_flags = VM_EXEC;
-
 	flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE);
 	flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE);
 }
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index d87f2d646caa..0ad1cf233470 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -37,9 +37,7 @@ static inline void __tlb_remove_table(void *_table)
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	struct vm_area_struct vma;
-
-	vma_init(&vma, tlb->mm);
+	struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
 
 	/*
 	 * The ASID allocator will either invalidate the ASID or mark
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 1854e49aa18a..192b3ba07075 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -108,13 +108,10 @@ static pte_t get_clear_flush(struct mm_struct *mm,
 			     unsigned long pgsize,
 			     unsigned long ncontig)
 {
-	struct vm_area_struct vma;
 	pte_t orig_pte = huge_ptep_get(ptep);
 	bool valid = pte_valid(orig_pte);
 	unsigned long i, saddr = addr;
 
-	vma_init(&vma, mm);
-
 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
 		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
 
@@ -127,8 +124,10 @@ static pte_t get_clear_flush(struct mm_struct *mm,
 			orig_pte = pte_mkdirty(orig_pte);
 	}
 
-	if (valid)
+	if (valid) {
+		struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 		flush_tlb_range(&vma, saddr, addr);
+	}
 	return orig_pte;
 }
 
@@ -147,10 +146,9 @@ static void clear_flush(struct mm_struct *mm,
 			     unsigned long pgsize,
 			     unsigned long ncontig)
 {
-	struct vm_area_struct vma;
+	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
 	unsigned long i, saddr = addr;
 
-	vma_init(&vma, mm);
 	for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
 		pte_clear(mm, addr, ptep);
 
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index db89e7306081..516355a774bf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -115,12 +115,11 @@ ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned
 		flush_tlb_all();
 	} else {
 		/*
-		 * XXX fix me: flush_tlb_range() should take an mm pointer instead of a
-		 * vma pointer.
+		 * flush_tlb_range() takes a vma instead of a mm pointer because
+		 * some architectures want the vm_flags for ITLB/DTLB flush.
 		 */
-		struct vm_area_struct vma;
+		struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
 
-		vma_init(&vma, tlb->mm);
 		/* flush the address range from the tlb: */
 		flush_tlb_range(&vma, start, end);
 		/* now flush the virt. page-table area mapping the address range: */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7ba6d356d18f..68a5121694ef 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -466,6 +466,9 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
 	vma->vm_ops = NULL;
 }
 
+/* flush_tlb_range() takes a vma, not a mm, and can care about flags */
+#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
+
 struct mmu_gather;
 struct inode;
 
-- 
cgit v1.2.3


From db57dc7c7a5c42bb653425a01b6d73c49514b5db Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Wed, 1 Aug 2018 22:05:10 +0200
Subject: net: don't declare IPv6 non-local bind helper if CONFIG_IPV6
 undefined

Fixes: 83ba4645152d ("net: add helpers checking if socket can be bound to nonlocal address")
Signed-off-by: Vincent Bernat <vincent@bernat.im>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 82deb684ba73..ff33f498c137 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -766,13 +766,6 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
 	return hlimit;
 }
 
-static inline bool ipv6_can_nonlocal_bind(struct net *net,
-					  struct inet_sock *inet)
-{
-	return net->ipv6.sysctl.ip_nonlocal_bind ||
-		inet->freebind || inet->transparent;
-}
-
 /* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
  * Equivalent to :	flow->v6addrs.src = iph->saddr;
  *			flow->v6addrs.dst = iph->daddr;
@@ -789,6 +782,13 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 
 #if IS_ENABLED(CONFIG_IPV6)
 
+static inline bool ipv6_can_nonlocal_bind(struct net *net,
+					  struct inet_sock *inet)
+{
+	return net->ipv6.sysctl.ip_nonlocal_bind ||
+		inet->freebind || inet->transparent;
+}
+
 /* Sysctl settings for net ipv6.auto_flowlabels */
 #define IP6_AUTO_FLOW_LABEL_OFF		0
 #define IP6_AUTO_FLOW_LABEL_OPTOUT	1
-- 
cgit v1.2.3


From ef965ad5a7697ff16e3be01954f5c57208e36c22 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 1 Aug 2018 14:38:18 +0200
Subject: ALSA: seq: Minor cleanup of MIDI event parser helpers

snd_midi_event_encode_byte() can never fail, and it can return rather
true/false.  Change the return type to bool, adjust the argument to
receive a MIDI byte as unsigned char, and adjust the comment
accordingly.  This allows callers to drop error checks, which
simplifies the code.

Meanwhile, snd_midi_event_encode() helper is used only in seq_midi.c,
and it can be better folded into it.  This will reduce the total
amount of lines in the end.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/seq_midi_event.h    |  6 ++----
 sound/core/seq/oss/seq_oss_midi.c |  2 +-
 sound/core/seq/seq_midi.c         | 24 ++++++++++------------
 sound/core/seq/seq_midi_event.c   | 42 +++++++--------------------------------
 sound/core/seq/seq_virmidi.c      |  4 ++--
 5 files changed, 22 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/sound/seq_midi_event.h b/include/sound/seq_midi_event.h
index e40f43e6fc7b..2f135bccf457 100644
--- a/include/sound/seq_midi_event.h
+++ b/include/sound/seq_midi_event.h
@@ -43,10 +43,8 @@ void snd_midi_event_free(struct snd_midi_event *dev);
 void snd_midi_event_reset_encode(struct snd_midi_event *dev);
 void snd_midi_event_reset_decode(struct snd_midi_event *dev);
 void snd_midi_event_no_status(struct snd_midi_event *dev, int on);
-/* encode from byte stream - return number of written bytes if success */
-long snd_midi_event_encode(struct snd_midi_event *dev, unsigned char *buf, long count,
-			   struct snd_seq_event *ev);
-int snd_midi_event_encode_byte(struct snd_midi_event *dev, int c, struct snd_seq_event *ev);
+bool snd_midi_event_encode_byte(struct snd_midi_event *dev, unsigned char c,
+				struct snd_seq_event *ev);
 /* decode from event to bytes - return number of written bytes if success */
 long snd_midi_event_decode(struct snd_midi_event *dev, unsigned char *buf, long count,
 			   struct snd_seq_event *ev);
diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c
index 9debd1b8fd28..0d5f8b16d057 100644
--- a/sound/core/seq/oss/seq_oss_midi.c
+++ b/sound/core/seq/oss/seq_oss_midi.c
@@ -637,7 +637,7 @@ snd_seq_oss_midi_putc(struct seq_oss_devinfo *dp, int dev, unsigned char c, stru
 
 	if ((mdev = get_mididev(dp, dev)) == NULL)
 		return -ENODEV;
-	if (snd_midi_event_encode_byte(mdev->coder, c, ev) > 0) {
+	if (snd_midi_event_encode_byte(mdev->coder, c, ev)) {
 		snd_seq_oss_fill_addr(dp, ev, mdev->client, mdev->port);
 		snd_use_lock_free(&mdev->use_lock);
 		return 0;
diff --git a/sound/core/seq/seq_midi.c b/sound/core/seq/seq_midi.c
index 5dd0ee258359..9e0dabd3ce5f 100644
--- a/sound/core/seq/seq_midi.c
+++ b/sound/core/seq/seq_midi.c
@@ -78,7 +78,7 @@ static void snd_midi_input_event(struct snd_rawmidi_substream *substream)
 	struct seq_midisynth *msynth;
 	struct snd_seq_event ev;
 	char buf[16], *pbuf;
-	long res, count;
+	long res;
 
 	if (substream == NULL)
 		return;
@@ -94,19 +94,15 @@ static void snd_midi_input_event(struct snd_rawmidi_substream *substream)
 		if (msynth->parser == NULL)
 			continue;
 		pbuf = buf;
-		while (res > 0) {
-			count = snd_midi_event_encode(msynth->parser, pbuf, res, &ev);
-			if (count < 0)
-				break;
-			pbuf += count;
-			res -= count;
-			if (ev.type != SNDRV_SEQ_EVENT_NONE) {
-				ev.source.port = msynth->seq_port;
-				ev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS;
-				snd_seq_kernel_client_dispatch(msynth->seq_client, &ev, 1, 0);
-				/* clear event and reset header */
-				memset(&ev, 0, sizeof(ev));
-			}
+		while (res-- > 0) {
+			if (!snd_midi_event_encode_byte(msynth->parser,
+							*pbuf++, &ev))
+				continue;
+			ev.source.port = msynth->seq_port;
+			ev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS;
+			snd_seq_kernel_client_dispatch(msynth->seq_client, &ev, 1, 0);
+			/* clear event and reset header */
+			memset(&ev, 0, sizeof(ev));
 		}
 	}
 }
diff --git a/sound/core/seq/seq_midi_event.c b/sound/core/seq/seq_midi_event.c
index 90bbbdbeba03..53c0dfab90d6 100644
--- a/sound/core/seq/seq_midi_event.c
+++ b/sound/core/seq/seq_midi_event.c
@@ -214,45 +214,17 @@ int snd_midi_event_resize_buffer(struct snd_midi_event *dev, int bufsize)
 }
 #endif  /*  0  */
 
-/*
- *  read bytes and encode to sequencer event if finished
- *  return the size of encoded bytes
- */
-long snd_midi_event_encode(struct snd_midi_event *dev, unsigned char *buf, long count,
-			   struct snd_seq_event *ev)
-{
-	long result = 0;
-	int rc;
-
-	ev->type = SNDRV_SEQ_EVENT_NONE;
-
-	while (count-- > 0) {
-		rc = snd_midi_event_encode_byte(dev, *buf++, ev);
-		result++;
-		if (rc < 0)
-			return rc;
-		else if (rc > 0)
-			return result;
-	}
-
-	return result;
-}
-EXPORT_SYMBOL(snd_midi_event_encode);
-
 /*
  *  read one byte and encode to sequencer event:
- *  return 1 if MIDI bytes are encoded to an event
- *         0 data is not finished
- *         negative for error
+ *  return true if MIDI bytes are encoded to an event
+ *         false data is not finished
  */
-int snd_midi_event_encode_byte(struct snd_midi_event *dev, int c,
-			       struct snd_seq_event *ev)
+bool snd_midi_event_encode_byte(struct snd_midi_event *dev, unsigned char c,
+				struct snd_seq_event *ev)
 {
-	int rc = 0;
+	bool rc = false;
 	unsigned long flags;
 
-	c &= 0xff;
-
 	if (c >= MIDI_CMD_COMMON_CLOCK) {
 		/* real-time event */
 		ev->type = status_event[ST_SPECIAL + c - 0xf0].event;
@@ -293,7 +265,7 @@ int snd_midi_event_encode_byte(struct snd_midi_event *dev, int c,
 			status_event[dev->type].encode(dev, ev);
 		if (dev->type >= ST_SPECIAL)
 			dev->type = ST_INVALID;
-		rc = 1;
+		rc = true;
 	} else 	if (dev->type == ST_SYSEX) {
 		if (c == MIDI_CMD_COMMON_SYSEX_END ||
 		    dev->read >= dev->bufsize) {
@@ -306,7 +278,7 @@ int snd_midi_event_encode_byte(struct snd_midi_event *dev, int c,
 				dev->read = 0; /* continue to parse */
 			else
 				reset_encode(dev); /* all parsed */
-			rc = 1;
+			rc = true;
 		}
 	}
 
diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c
index 03ac5e72dbe6..0c84926eb726 100644
--- a/sound/core/seq/seq_virmidi.c
+++ b/sound/core/seq/seq_virmidi.c
@@ -174,8 +174,8 @@ static void snd_vmidi_output_work(struct work_struct *work)
 	while (READ_ONCE(vmidi->trigger)) {
 		if (snd_rawmidi_transmit(substream, &input, 1) != 1)
 			break;
-		if (snd_midi_event_encode_byte(vmidi->parser, input,
-					       &vmidi->event) <= 0)
+		if (!snd_midi_event_encode_byte(vmidi->parser, input,
+						&vmidi->event))
 			continue;
 		if (vmidi->event.type != SNDRV_SEQ_EVENT_NONE) {
 			ret = snd_seq_kernel_client_dispatch(vmidi->client,
-- 
cgit v1.2.3


From 32ed5c00ac5fdea49058fd49bf8707e101dc3dfe Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 25 Jul 2018 21:40:11 -0600
Subject: IB/uverbs: Make the write path destroy methods use the same flow as
 ioctl

The ridiculous dance with uobj_remove_commit() is not needed, the write
path can follow the same flow as ioctl - lock and destroy the HW object
then use the data left over in the uobject to form the response to
userspace.

Two helpers are introduced to make this flow straightforward for the
caller.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c  | 53 ++++++++++++++-----------
 drivers/infiniband/core/uverbs_cmd.c | 77 +++++++-----------------------------
 include/rdma/uverbs_std_types.h      | 16 +++++---
 include/rdma/uverbs_types.h          |  1 -
 4 files changed, 55 insertions(+), 92 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 9e84ded6d3be..7db75d784070 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -130,24 +130,44 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
 }
 
 /*
- * Does both rdma_lookup_get_uobject() and rdma_remove_commit_uobject(), then
- * returns success_res on success (negative errno on failure). For use by
- * callers that do not need the uobj.
+ * uobj_get_destroy destroys the HW object and returns a handle to the uobj
+ * with a NULL object pointer. The caller must pair this with
+ * uverbs_put_destroy.
  */
-int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
-			   struct ib_uverbs_file *ufile, int success_res)
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type,
+				      u32 id, struct ib_uverbs_file *ufile)
 {
 	struct ib_uobject *uobj;
 	int ret;
 
 	uobj = rdma_lookup_get_uobject(type, ufile, id, true);
 	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
+		return uobj;
 
-	ret = rdma_remove_commit_uobject(uobj);
-	if (ret)
-		return ret;
+	ret = rdma_explicit_destroy(uobj);
+	if (ret) {
+		rdma_lookup_put_uobject(uobj, true);
+		return ERR_PTR(ret);
+	}
+
+	return uobj;
+}
 
+/*
+ * Does both uobj_get_destroy() and uobj_put_destroy().  Returns success_res
+ * on success (negative errno on failure). For use by callers that do not need
+ * the uobj.
+ */
+int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
+			   struct ib_uverbs_file *ufile, int success_res)
+{
+	struct ib_uobject *uobj;
+
+	uobj = __uobj_get_destroy(type, id, ufile);
+	if (IS_ERR(uobj))
+		return PTR_ERR(uobj);
+
+	rdma_lookup_put_uobject(uobj, true);
 	return success_res;
 }
 
@@ -449,21 +469,6 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
 	return ret;
 }
 
-/* This is called only for user requested DESTROY reasons
- * rdma_lookup_get_uobject(exclusive=true) must have been called to get uobj,
- * and after this returns the corresponding put has been done, and the kref
- * for uobj has been consumed.
- */
-int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
-{
-	int ret;
-
-	ret = rdma_explicit_destroy(uobj);
-	/* Pairs with the lookup_get done by the caller */
-	rdma_lookup_put_uobject(uobj, true);
-	return ret;
-}
-
 int rdma_explicit_destroy(struct ib_uobject *uobject)
 {
 	int ret;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b2af4eeb7669..fe96ceda6cd2 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1304,37 +1304,22 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_cq      cmd;
 	struct ib_uverbs_destroy_cq_resp resp;
 	struct ib_uobject		*uobj;
-	struct ib_cq               	*cq;
 	struct ib_ucq_object        	*obj;
-	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+	uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	/*
-	 * Make sure we don't free the memory in remove_commit as we still
-	 * needs the uobject memory to create the response.
-	 */
-	uverbs_uobject_get(uobj);
-	cq      = uobj->object;
-	obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
-
+	obj = container_of(uobj, struct ib_ucq_object, uobject);
 	memset(&resp, 0, sizeof(resp));
-
-	ret = uobj_remove_commit(uobj);
-	if (ret) {
-		uverbs_uobject_put(uobj);
-		return ret;
-	}
-
 	resp.comp_events_reported  = obj->comp_events_reported;
 	resp.async_events_reported = obj->async_events_reported;
 
-	uverbs_uobject_put(uobj);
+	uobj_put_destroy(uobj);
+
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
 		return -EFAULT;
 
@@ -2104,32 +2089,19 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_qp_resp resp;
 	struct ib_uobject		*uobj;
 	struct ib_uqp_object        	*obj;
-	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	memset(&resp, 0, sizeof resp);
-
-	uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, file);
+	uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
 	obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
-	/*
-	 * Make sure we don't free the memory in remove_commit as we still
-	 * needs the uobject memory to create the response.
-	 */
-	uverbs_uobject_get(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	if (ret) {
-		uverbs_uobject_put(uobj);
-		return ret;
-	}
-
+	memset(&resp, 0, sizeof(resp));
 	resp.events_reported = obj->uevent.events_reported;
-	uverbs_uobject_put(uobj);
+
+	uobj_put_destroy(uobj);
 
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
 		return -EFAULT;
@@ -3198,22 +3170,14 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
 		return -EOPNOTSUPP;
 
 	resp.response_length = required_resp_len;
-	uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+	uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
 	obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
-	/*
-	 * Make sure we don't free the memory in remove_commit as we still
-	 * needs the uobject memory to create the response.
-	 */
-	uverbs_uobject_get(uobj);
-
-	ret = uobj_remove_commit(uobj);
 	resp.events_reported = obj->uevent.events_reported;
-	uverbs_uobject_put(uobj);
-	if (ret)
-		return ret;
+
+	uobj_put_destroy(uobj);
 
 	return ib_copy_to_udata(ucore, &resp, resp.response_length);
 }
@@ -3920,31 +3884,20 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_srq_resp resp;
 	struct ib_uobject		 *uobj;
 	struct ib_uevent_object        	 *obj;
-	int                         	  ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+	uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
 	obj = container_of(uobj, struct ib_uevent_object, uobject);
-	/*
-	 * Make sure we don't free the memory in remove_commit as we still
-	 * needs the uobject memory to create the response.
-	 */
-	uverbs_uobject_get(uobj);
-
 	memset(&resp, 0, sizeof(resp));
-
-	ret = uobj_remove_commit(uobj);
-	if (ret) {
-		uverbs_uobject_put(uobj);
-		return ret;
-	}
 	resp.events_reported = obj->events_reported;
-	uverbs_uobject_put(uobj);
+
+	uobj_put_destroy(uobj);
+
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
 		return -EFAULT;
 
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 076f085d2dcf..c2f89e41cbd2 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -84,6 +84,17 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
 	__uobj_perform_destroy(uobj_get_type(_type), _uobj_check_id(_id),      \
 			       _ufile, _success_res)
 
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type,
+				      u32 id, struct ib_uverbs_file *ufile);
+
+#define uobj_get_destroy(_type, _id, _ufile)                                   \
+	__uobj_get_destroy(uobj_get_type(_type), _uobj_check_id(_id), _ufile)
+
+static inline void uobj_put_destroy(struct ib_uobject *uobj)
+{
+	rdma_lookup_put_uobject(uobj, true);
+}
+
 static inline void uobj_put_read(struct ib_uobject *uobj)
 {
 	rdma_lookup_put_uobject(uobj, false);
@@ -97,11 +108,6 @@ static inline void uobj_put_write(struct ib_uobject *uobj)
 	rdma_lookup_put_uobject(uobj, true);
 }
 
-static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj)
-{
-	return rdma_remove_commit_uobject(uobj);
-}
-
 static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
 						 int success_res)
 {
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index cfc50fcdbff6..8bae28dd2e4f 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -126,7 +126,6 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive);
 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
 					    struct ib_uverbs_file *ufile);
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
-int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj);
 int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
 int rdma_explicit_destroy(struct ib_uobject *uobject);
 
-- 
cgit v1.2.3


From 87ad80abc70d2d5a4e383bc7e63867c9bc660838 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 25 Jul 2018 21:40:12 -0600
Subject: IB/uverbs: Consolidate uobject destruction

There are several flows that can destroy a uobject and each one is
minimized and sprinkled throughout the code base, making it difficult to
understand and very hard to modify the destroy path.

Consolidate all of these into uverbs_destroy_uobject() and call it in all
cases where a uobject has to be destroyed.

This makes one change to the lifecycle, during any abort (eg when
alloc_commit is not called) we always call out to alloc_abort, even if
remove_commit needs to be called to delete a HW object.

This also renames RDMA_REMOVE_DURING_CLEANUP to RDMA_REMOVE_ABORT to
clarify its actual usage and revises some of the comments to reflect what
the life cycle is for the type implementation.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c | 251 ++++++++++++++++++------------------
 include/rdma/ib_verbs.h             |   4 +-
 include/rdma/uverbs_types.h         |  70 +++++-----
 3 files changed, 157 insertions(+), 168 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 7db75d784070..aa1d16d87746 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -129,6 +129,95 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
 	return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
 }
 
+static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
+{
+#ifdef CONFIG_LOCKDEP
+	if (exclusive)
+		WARN_ON(atomic_read(&uobj->usecnt) != -1);
+	else
+		WARN_ON(atomic_read(&uobj->usecnt) <= 0);
+#endif
+}
+
+/*
+ * This must be called with the hw_destroy_rwsem locked (except for
+ * RDMA_REMOVE_ABORT) for read or write, also The uobject itself must be
+ * locked for write.
+ *
+ * Upon return the HW object is guaranteed to be destroyed.
+ *
+ * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
+ * however the type's allocat_commit function cannot have been called and the
+ * uobject cannot be on the uobjects_lists
+ *
+ * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
+ * rdma_lookup_get_uobject) and the object is left in a state where the caller
+ * needs to call rdma_lookup_put_uobject.
+ *
+ * For all other destroy modes this function internally unlocks the uobject
+ * and consumes the kref on the uobj.
+ */
+static int uverbs_destroy_uobject(struct ib_uobject *uobj,
+				  enum rdma_remove_reason reason)
+{
+	struct ib_uverbs_file *ufile = uobj->ufile;
+	unsigned long flags;
+	int ret;
+
+	assert_uverbs_usecnt(uobj, true);
+
+	if (uobj->object) {
+		ret = uobj->type->type_class->remove_commit(uobj, reason);
+		if (ret) {
+			if (ib_is_destroy_retryable(ret, reason, uobj))
+				return ret;
+
+			/* Nothing to be done, dangle the memory and move on */
+			WARN(true,
+			     "ib_uverbs: failed to remove uobject id %d, driver err=%d",
+			     uobj->id, ret);
+		}
+
+		uobj->object = NULL;
+	}
+
+	if (reason == RDMA_REMOVE_ABORT) {
+		WARN_ON(!list_empty(&uobj->list));
+		WARN_ON(!uobj->context);
+		uobj->type->type_class->alloc_abort(uobj);
+	}
+
+	uobj->context = NULL;
+
+	/*
+	 * For DESTROY the usecnt is held write locked, the caller is expected
+	 * to put it unlock and put the object when done with it.
+	 */
+	if (reason != RDMA_REMOVE_DESTROY)
+		atomic_set(&uobj->usecnt, 0);
+
+	if (!list_empty(&uobj->list)) {
+		spin_lock_irqsave(&ufile->uobjects_lock, flags);
+		list_del_init(&uobj->list);
+		spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
+
+		/*
+		 * Pairs with the get in rdma_alloc_commit_uobject(), could
+		 * destroy uobj.
+		 */
+		uverbs_uobject_put(uobj);
+	}
+
+	/*
+	 * When aborting the stack kref remains owned by the core code, and is
+	 * not transferred into the type. Pairs with the get in alloc_uobj
+	 */
+	if (reason == RDMA_REMOVE_ABORT)
+		uverbs_uobject_put(uobj);
+
+	return 0;
+}
+
 /*
  * uobj_get_destroy destroys the HW object and returns a handle to the uobj
  * with a NULL object pointer. The caller must pair this with
@@ -171,6 +260,7 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
 	return success_res;
 }
 
+/* alloc_uobj must be undone by uverbs_destroy_uobject() */
 static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 				     const struct uverbs_obj_type *type)
 {
@@ -379,6 +469,16 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
 	return type->type_class->alloc_begin(type, ufile);
 }
 
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
+{
+	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+			   RDMACG_RESOURCE_HCA_OBJECT);
+
+	spin_lock(&uobj->ufile->idr_lock);
+	idr_remove(&uobj->ufile->idr, uobj->id);
+	spin_unlock(&uobj->ufile->idr_lock);
+}
+
 static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
 						  enum rdma_remove_reason why)
 {
@@ -395,25 +495,19 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
 	if (ib_is_destroy_retryable(ret, why, uobj))
 		return ret;
 
-	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
-			   RDMACG_RESOURCE_HCA_OBJECT);
-
-	spin_lock(&uobj->ufile->idr_lock);
-	idr_remove(&uobj->ufile->idr, uobj->id);
-	spin_unlock(&uobj->ufile->idr_lock);
+	if (why == RDMA_REMOVE_ABORT)
+		return 0;
 
+	alloc_abort_idr_uobject(uobj);
 	/* Matches the kref in alloc_commit_idr_uobject */
 	uverbs_uobject_put(uobj);
 
-	return ret;
+	return 0;
 }
 
 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
 {
 	put_unused_fd(uobj->id);
-
-	/* Pairs with the kref from alloc_begin_idr_uobject */
-	uverbs_uobject_put(uobj);
 }
 
 static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
@@ -426,47 +520,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
 	if (ib_is_destroy_retryable(ret, why, uobj))
 		return ret;
 
-	if (why == RDMA_REMOVE_DURING_CLEANUP) {
-		alloc_abort_fd_uobject(uobj);
-		return ret;
-	}
-
-	uobj->context = NULL;
-	return ret;
-}
-
-static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
-{
-#ifdef CONFIG_LOCKDEP
-	if (exclusive)
-		WARN_ON(atomic_read(&uobj->usecnt) != -1);
-	else
-		WARN_ON(atomic_read(&uobj->usecnt) <= 0);
-#endif
-}
-
-static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
-						    enum rdma_remove_reason why)
-{
-	struct ib_uverbs_file *ufile = uobj->ufile;
-	int ret;
-
-	if (!uobj->object)
-		return 0;
-
-	ret = uobj->type->type_class->remove_commit(uobj, why);
-	if (ib_is_destroy_retryable(ret, why, uobj))
-		return ret;
-
-	uobj->object = NULL;
-
-	spin_lock_irq(&ufile->uobjects_lock);
-	list_del(&uobj->list);
-	spin_unlock_irq(&ufile->uobjects_lock);
-	/* Pairs with the get in rdma_alloc_commit_uobject() */
-	uverbs_uobject_put(uobj);
-
-	return ret;
+	return 0;
 }
 
 int rdma_explicit_destroy(struct ib_uobject *uobject)
@@ -479,8 +533,8 @@ int rdma_explicit_destroy(struct ib_uobject *uobject)
 		WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
 		return 0;
 	}
-	assert_uverbs_usecnt(uobject, true);
-	ret = _rdma_remove_commit_uobject(uobject, RDMA_REMOVE_DESTROY);
+
+	ret = uverbs_destroy_uobject(uobject, RDMA_REMOVE_DESTROY);
 
 	up_read(&ufile->hw_destroy_rwsem);
 	return ret;
@@ -554,24 +608,14 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 	/* Cleanup is running. Calling this should have been impossible */
 	if (!down_read_trylock(&ufile->hw_destroy_rwsem)) {
 		WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
-		ret = uobj->type->type_class->remove_commit(uobj,
-							    RDMA_REMOVE_DURING_CLEANUP);
-		if (ret)
-			pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
-				uobj->id);
-		return ret;
+		uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
+		return -EINVAL;
 	}
 
-	assert_uverbs_usecnt(uobj, true);
-
 	/* alloc_commit consumes the uobj kref */
 	ret = uobj->type->type_class->alloc_commit(uobj);
 	if (ret) {
-		if (uobj->type->type_class->remove_commit(
-			    uobj, RDMA_REMOVE_DURING_CLEANUP))
-			pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
-				uobj->id);
-		up_read(&ufile->hw_destroy_rwsem);
+		uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
 		return ret;
 	}
 
@@ -589,27 +633,14 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 	return 0;
 }
 
-static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
-{
-	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
-			   RDMACG_RESOURCE_HCA_OBJECT);
-
-	spin_lock(&uobj->ufile->idr_lock);
-	/* The value of the handle in the IDR is NULL at this point. */
-	idr_remove(&uobj->ufile->idr, uobj->id);
-	spin_unlock(&uobj->ufile->idr_lock);
-
-	/* Pairs with the kref from alloc_begin_idr_uobject */
-	uverbs_uobject_put(uobj);
-}
-
 /*
  * This consumes the kref for uobj. It is up to the caller to unwind the HW
  * object and anything else connected to uobj before calling this.
  */
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
 {
-	uobj->type->type_class->alloc_abort(uobj);
+	uobj->object = NULL;
+	uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
 }
 
 static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
@@ -667,45 +698,23 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
 };
 EXPORT_SYMBOL(uverbs_idr_class);
 
-static void _uverbs_close_fd(struct ib_uobject *uobj)
-{
-	int ret;
-
-	/*
-	 * uobject was already cleaned up, remove_commit_fd_uobject
-	 * sets this
-	 */
-	if (!uobj->context)
-		return;
-
-	/*
-	 * lookup_get_fd_uobject holds the kref on the struct file any time a
-	 * FD uobj is locked, which prevents this release method from being
-	 * invoked. Meaning we can always get the write lock here, or we have
-	 * a kernel bug. If so dangle the pointers and bail.
-	 */
-	ret = uverbs_try_lock_object(uobj, true);
-	if (WARN(ret, "uverbs_close_fd() racing with lookup_get_fd_uobject()"))
-		return;
-
-	ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_CLOSE);
-	if (ret)
-		pr_warn("Unable to clean up uobject file in %s\n", __func__);
-
-	atomic_set(&uobj->usecnt, 0);
-}
-
 void uverbs_close_fd(struct file *f)
 {
 	struct ib_uobject *uobj = f->private_data;
 	struct ib_uverbs_file *ufile = uobj->ufile;
 
 	if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
-		_uverbs_close_fd(uobj);
+		/*
+		 * lookup_get_fd_uobject holds the kref on the struct file any
+		 * time a FD uobj is locked, which prevents this release
+		 * method from being invoked. Meaning we can always get the
+		 * write lock here, or we have a kernel bug.
+		 */
+		WARN_ON(uverbs_try_lock_object(uobj, true));
+		uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE);
 		up_read(&ufile->hw_destroy_rwsem);
 	}
 
-	uobj->object = NULL;
 	/* Matches the get in alloc_begin_fd_uobject */
 	kref_put(&ufile->ref, ib_uverbs_release_file);
 
@@ -783,7 +792,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 {
 	struct ib_uobject *obj, *next_obj;
 	int ret = -EINVAL;
-	int err = 0;
 
 	/*
 	 * This shouldn't run while executing other commands on this
@@ -800,23 +808,8 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 		 * racing with a lookup_get.
 		 */
 		WARN_ON(uverbs_try_lock_object(obj, true));
-		err = obj->type->type_class->remove_commit(obj, reason);
-
-		if (ib_is_destroy_retryable(err, reason, obj)) {
-			pr_debug("ib_uverbs: failed to remove uobject id %d err %d\n",
-				 obj->id, err);
-			atomic_set(&obj->usecnt, 0);
-			continue;
-		}
-
-		if (err)
-			pr_err("ib_uverbs: unable to remove uobject id %d err %d\n",
-				obj->id, err);
-
-		list_del(&obj->list);
-		/* Pairs with the get in rdma_alloc_commit_uobject() */
-		uverbs_uobject_put(obj);
-		ret = 0;
+		if (!uverbs_destroy_uobject(obj, reason))
+			ret = 0;
 	}
 	return ret;
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1de8f0d2797c..be208421f7d3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1467,8 +1467,8 @@ enum rdma_remove_reason {
 	RDMA_REMOVE_CLOSE,
 	/* Driver is being hot-unplugged. This call should delete the actual object itself */
 	RDMA_REMOVE_DRIVER_REMOVE,
-	/* Context is being cleaned-up, but commit was just completed */
-	RDMA_REMOVE_DURING_CLEANUP,
+	/* uobj is being cleaned-up before being committed */
+	RDMA_REMOVE_ABORT,
 };
 
 struct ib_rdmacg_object {
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 8bae28dd2e4f..875dd8c16ba3 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -38,53 +38,49 @@
 
 struct uverbs_obj_type;
 
+/*
+ * The following sequences are valid:
+ * Success flow:
+ *   alloc_begin
+ *   alloc_commit
+ *    [..]
+ * Access flow:
+ *   lookup_get(exclusive=false) & uverbs_try_lock_object
+ *   lookup_put(exclusive=false) via rdma_lookup_put_uobject
+ * Destruction flow:
+ *   lookup_get(exclusive=true) & uverbs_try_lock_object
+ *   remove_commit
+ *   lookup_put(exclusive=true) via rdma_lookup_put_uobject
+ *
+ * Allocate Error flow #1
+ *   alloc_begin
+ *   alloc_abort
+ * Allocate Error flow #2
+ *   alloc_begin
+ *   remove_commit
+ *   alloc_abort
+ * Allocate Error flow #3
+ *   alloc_begin
+ *   alloc_commit (fails)
+ *   remove_commit
+ *   alloc_abort
+ *
+ * In all cases the caller must hold the ufile kref until alloc_commit or
+ * alloc_abort returns.
+ */
 struct uverbs_obj_type_class {
-	/*
-	 * Get an ib_uobject that corresponds to the given id from ucontext,
-	 * These functions could create or destroy objects if required.
-	 * The action will be finalized only when commit, abort or put fops are
-	 * called.
-	 * The flow of the different actions is:
-	 * [alloc]:	 Starts with alloc_begin. The handlers logic is than
-	 *		 executed. If the handler is successful, alloc_commit
-	 *		 is called and the object is inserted to the repository.
-	 *		 Once alloc_commit completes the object is visible to
-	 *		 other threads and userspace.
-	 e		 Otherwise, alloc_abort is called and the object is
-	 *		 destroyed.
-	 * [lookup]:	 Starts with lookup_get which fetches and locks the
-	 *		 object. After the handler finished using the object, it
-	 *		 needs to call lookup_put to unlock it. The exclusive
-	 *		 flag indicates if the object is locked for exclusive
-	 *		 access.
-	 * [remove]:	 Starts with lookup_get with exclusive flag set. This
-	 *		 locks the object for exclusive access. If the handler
-	 *		 code completed successfully, remove_commit is called
-	 *		 and the ib_uobject is removed from the context's
-	 *		 uobjects repository and put. The object itself is
-	 *		 destroyed as well. Once remove succeeds new krefs to
-	 *		 the object cannot be acquired by other threads or
-	 *		 userspace and the hardware driver is removed from the
-	 *		 object. Other krefs on the object may still exist.
-	 *		 If the handler code failed, lookup_put should be
-	 *		 called. This callback is used when the context
-	 *		 is destroyed as well (process termination,
-	 *		 reset flow).
-	 */
 	struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type,
 					  struct ib_uverbs_file *ufile);
+	/* This consumes the kref on uobj */
 	int (*alloc_commit)(struct ib_uobject *uobj);
+	/* This does not consume the kref on uobj */
 	void (*alloc_abort)(struct ib_uobject *uobj);
 
 	struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
 					 struct ib_uverbs_file *ufile, s64 id,
 					 bool exclusive);
 	void (*lookup_put)(struct ib_uobject *uobj, bool exclusive);
-	/*
-	 * Must be called with the exclusive lock held. If successful uobj is
-	 * invalid on return. On failure uobject is left completely
-	 * unchanged
-	 */
+	/* This does not consume the kref on uobj */
 	int __must_check (*remove_commit)(struct ib_uobject *uobj,
 					  enum rdma_remove_reason why);
 	u8    needs_kfree_rcu;
-- 
cgit v1.2.3


From 9867f5c6695f0a17cde9a4dc140fe026b4e40d4a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 25 Jul 2018 21:40:13 -0600
Subject: IB/uverbs: Convert 'bool exclusive' into an enum

This is more readable, and future patches will need a 3rd lookup type.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c | 94 ++++++++++++++++++++++---------------
 include/rdma/uverbs_std_types.h     | 13 ++---
 include/rdma/uverbs_types.h         | 16 +++++--
 3 files changed, 75 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index aa1d16d87746..435dbe8ef2a2 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -108,7 +108,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject)
 	kref_put(&uobject->ref, uverbs_uobject_free);
 }
 
-static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
+static int uverbs_try_lock_object(struct ib_uobject *uobj,
+				  enum rdma_lookup_mode mode)
 {
 	/*
 	 * When a shared access is required, we use a positive counter. Each
@@ -121,21 +122,29 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
 	 * concurrently, setting the counter to zero is enough for releasing
 	 * this lock.
 	 */
-	if (!exclusive)
+	switch (mode) {
+	case UVERBS_LOOKUP_READ:
 		return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
 			-EBUSY : 0;
-
-	/* lock is either WRITE or DESTROY - should be exclusive */
-	return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+	case UVERBS_LOOKUP_WRITE:
+		/* lock is either WRITE or DESTROY - should be exclusive */
+		return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+	}
+	return 0;
 }
 
-static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
+static void assert_uverbs_usecnt(struct ib_uobject *uobj,
+				 enum rdma_lookup_mode mode)
 {
 #ifdef CONFIG_LOCKDEP
-	if (exclusive)
-		WARN_ON(atomic_read(&uobj->usecnt) != -1);
-	else
+	switch (mode) {
+	case UVERBS_LOOKUP_READ:
 		WARN_ON(atomic_read(&uobj->usecnt) <= 0);
+		break;
+	case UVERBS_LOOKUP_WRITE:
+		WARN_ON(atomic_read(&uobj->usecnt) != -1);
+		break;
+	}
 #endif
 }
 
@@ -164,7 +173,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 	unsigned long flags;
 	int ret;
 
-	assert_uverbs_usecnt(uobj, true);
+	assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
 
 	if (uobj->object) {
 		ret = uobj->type->type_class->remove_commit(uobj, reason);
@@ -229,13 +238,13 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type,
 	struct ib_uobject *uobj;
 	int ret;
 
-	uobj = rdma_lookup_get_uobject(type, ufile, id, true);
+	uobj = rdma_lookup_get_uobject(type, ufile, id, UVERBS_LOOKUP_WRITE);
 	if (IS_ERR(uobj))
 		return uobj;
 
 	ret = rdma_explicit_destroy(uobj);
 	if (ret) {
-		rdma_lookup_put_uobject(uobj, true);
+		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 		return ERR_PTR(ret);
 	}
 
@@ -256,7 +265,7 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	rdma_lookup_put_uobject(uobj, true);
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 	return success_res;
 }
 
@@ -319,7 +328,8 @@ static int idr_add_uobj(struct ib_uobject *uobj)
 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
 static struct ib_uobject *
 lookup_get_idr_uobject(const struct uverbs_obj_type *type,
-		       struct ib_uverbs_file *ufile, s64 id, bool exclusive)
+		       struct ib_uverbs_file *ufile, s64 id,
+		       enum rdma_lookup_mode mode)
 {
 	struct ib_uobject *uobj;
 	unsigned long idrno = id;
@@ -349,9 +359,10 @@ free:
 	return uobj;
 }
 
-static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
-						struct ib_uverbs_file *ufile,
-						s64 id, bool exclusive)
+static struct ib_uobject *
+lookup_get_fd_uobject(const struct uverbs_obj_type *type,
+		      struct ib_uverbs_file *ufile, s64 id,
+		      enum rdma_lookup_mode mode)
 {
 	struct file *f;
 	struct ib_uobject *uobject;
@@ -362,7 +373,7 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty
 	if (fdno != id)
 		return ERR_PTR(-EINVAL);
 
-	if (exclusive)
+	if (mode != UVERBS_LOOKUP_READ)
 		return ERR_PTR(-EOPNOTSUPP);
 
 	f = fget(fdno);
@@ -386,12 +397,12 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty
 
 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
 					   struct ib_uverbs_file *ufile, s64 id,
-					   bool exclusive)
+					   enum rdma_lookup_mode mode)
 {
 	struct ib_uobject *uobj;
 	int ret;
 
-	uobj = type->type_class->lookup_get(type, ufile, id, exclusive);
+	uobj = type->type_class->lookup_get(type, ufile, id, mode);
 	if (IS_ERR(uobj))
 		return uobj;
 
@@ -400,13 +411,13 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
 		goto free;
 	}
 
-	ret = uverbs_try_lock_object(uobj, exclusive);
+	ret = uverbs_try_lock_object(uobj, mode);
 	if (ret)
 		goto free;
 
 	return uobj;
 free:
-	uobj->type->type_class->lookup_put(uobj, exclusive);
+	uobj->type->type_class->lookup_put(uobj, mode);
 	uverbs_uobject_put(uobj);
 	return ERR_PTR(ret);
 }
@@ -643,32 +654,39 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
 	uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
 }
 
-static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
+static void lookup_put_idr_uobject(struct ib_uobject *uobj,
+				   enum rdma_lookup_mode mode)
 {
 }
 
-static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
+static void lookup_put_fd_uobject(struct ib_uobject *uobj,
+				  enum rdma_lookup_mode mode)
 {
 	struct file *filp = uobj->object;
 
-	WARN_ON(exclusive);
+	WARN_ON(mode != UVERBS_LOOKUP_READ);
 	/* This indirectly calls uverbs_close_fd and free the object */
 	fput(filp);
 }
 
-void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+			     enum rdma_lookup_mode mode)
 {
-	assert_uverbs_usecnt(uobj, exclusive);
-	uobj->type->type_class->lookup_put(uobj, exclusive);
+	assert_uverbs_usecnt(uobj, mode);
+	uobj->type->type_class->lookup_put(uobj, mode);
 	/*
 	 * In order to unlock an object, either decrease its usecnt for
 	 * read access or zero it in case of exclusive access. See
 	 * uverbs_try_lock_object for locking schema information.
 	 */
-	if (!exclusive)
+	switch (mode) {
+	case UVERBS_LOOKUP_READ:
 		atomic_dec(&uobj->usecnt);
-	else
+		break;
+	case UVERBS_LOOKUP_WRITE:
 		atomic_set(&uobj->usecnt, 0);
+		break;
+	}
 
 	/* Pairs with the kref obtained by type->lookup_get */
 	uverbs_uobject_put(uobj);
@@ -710,7 +728,7 @@ void uverbs_close_fd(struct file *f)
 		 * method from being invoked. Meaning we can always get the
 		 * write lock here, or we have a kernel bug.
 		 */
-		WARN_ON(uverbs_try_lock_object(uobj, true));
+		WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
 		uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE);
 		up_read(&ufile->hw_destroy_rwsem);
 	}
@@ -807,7 +825,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
 		 * if we hit this WARN_ON, that means we are
 		 * racing with a lookup_get.
 		 */
-		WARN_ON(uverbs_try_lock_object(obj, true));
+		WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
 		if (!uverbs_destroy_uobject(obj, reason))
 			ret = 0;
 	}
@@ -890,10 +908,12 @@ uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
 {
 	switch (access) {
 	case UVERBS_ACCESS_READ:
-		return rdma_lookup_get_uobject(type_attrs, ufile, id, false);
+		return rdma_lookup_get_uobject(type_attrs, ufile, id,
+					       UVERBS_LOOKUP_READ);
 	case UVERBS_ACCESS_DESTROY:
 	case UVERBS_ACCESS_WRITE:
-		return rdma_lookup_get_uobject(type_attrs, ufile, id, true);
+		return rdma_lookup_get_uobject(type_attrs, ufile, id,
+					       UVERBS_LOOKUP_WRITE);
 	case UVERBS_ACCESS_NEW:
 		return rdma_alloc_begin_uobject(type_attrs, ufile);
 	default:
@@ -916,13 +936,13 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
 
 	switch (access) {
 	case UVERBS_ACCESS_READ:
-		rdma_lookup_put_uobject(uobj, false);
+		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
 		break;
 	case UVERBS_ACCESS_WRITE:
-		rdma_lookup_put_uobject(uobj, true);
+		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 		break;
 	case UVERBS_ACCESS_DESTROY:
-		rdma_lookup_put_uobject(uobj, true);
+		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 		break;
 	case UVERBS_ACCESS_NEW:
 		if (commit)
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index c2f89e41cbd2..8c54e1439ba1 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -58,11 +58,12 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
 
 #define uobj_get_read(_type, _id, _ufile)                                      \
 	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
-				_uobj_check_id(_id), false)
+				_uobj_check_id(_id), UVERBS_LOOKUP_READ)
 
 #define ufd_get_read(_type, _fdnum, _ufile)                                    \
 	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
-				(_fdnum)*typecheck(s32, _fdnum), false)
+				(_fdnum)*typecheck(s32, _fdnum),               \
+				UVERBS_LOOKUP_READ)
 
 static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
 {
@@ -76,7 +77,7 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
 
 #define uobj_get_write(_type, _id, _ufile)                                     \
 	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
-				_uobj_check_id(_id), true)
+				_uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
 
 int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
 			   struct ib_uverbs_file *ufile, int success_res);
@@ -92,12 +93,12 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type,
 
 static inline void uobj_put_destroy(struct ib_uobject *uobj)
 {
-	rdma_lookup_put_uobject(uobj, true);
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 }
 
 static inline void uobj_put_read(struct ib_uobject *uobj)
 {
-	rdma_lookup_put_uobject(uobj, false);
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
 }
 
 #define uobj_put_obj_read(_obj)					\
@@ -105,7 +106,7 @@ static inline void uobj_put_read(struct ib_uobject *uobj)
 
 static inline void uobj_put_write(struct ib_uobject *uobj)
 {
-	rdma_lookup_put_uobject(uobj, true);
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 }
 
 static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 875dd8c16ba3..0676672dbbb9 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -38,6 +38,11 @@
 
 struct uverbs_obj_type;
 
+enum rdma_lookup_mode {
+	UVERBS_LOOKUP_READ,
+	UVERBS_LOOKUP_WRITE,
+};
+
 /*
  * The following sequences are valid:
  * Success flow:
@@ -78,8 +83,8 @@ struct uverbs_obj_type_class {
 
 	struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
 					 struct ib_uverbs_file *ufile, s64 id,
-					 bool exclusive);
-	void (*lookup_put)(struct ib_uobject *uobj, bool exclusive);
+					 enum rdma_lookup_mode mode);
+	void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
 	/* This does not consume the kref on uobj */
 	int __must_check (*remove_commit)(struct ib_uobject *uobj,
 					  enum rdma_remove_reason why);
@@ -116,9 +121,10 @@ struct uverbs_obj_idr_type {
 };
 
 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
-					   struct ib_uverbs_file *ufile,
-					   s64 id, bool exclusive);
-void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive);
+					   struct ib_uverbs_file *ufile, s64 id,
+					   enum rdma_lookup_mode mode);
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+			     enum rdma_lookup_mode mode);
 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
 					    struct ib_uverbs_file *ufile);
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
-- 
cgit v1.2.3


From 7452a3c745a2e7eb70d09dc5bb870759b1f26c91 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 25 Jul 2018 21:40:14 -0600
Subject: IB/uverbs: Allow RDMA_REMOVE_DESTROY to work concurrently with
 disassociate

After all the recent structural changes this is now straightfoward, hoist
the hw_destroy_rwsem up out of rdma_destroy_explicit and wrap it around
the uobject write lock as well as the destroy.

This is necessary as obtaining a write lock concurrently with
uverbs_destroy_ufile_hw() will cause malfunction.

After this change none of the destroy callbacks require the
disassociate_srcu lock to be correct.

This requires introducing a new lookup mode, UVERBS_LOOKUP_DESTROY as the
IOCTL interface needs to hold an unlocked kref until all command
verification is completed.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c    | 71 +++++++++++++++++++++++-----------
 drivers/infiniband/core/rdma_core.h    |  2 +
 drivers/infiniband/core/uverbs_ioctl.c |  7 +++-
 include/rdma/uverbs_types.h            |  7 +++-
 4 files changed, 63 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 435dbe8ef2a2..81d668abe18e 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -127,8 +127,10 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
 		return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
 			-EBUSY : 0;
 	case UVERBS_LOOKUP_WRITE:
-		/* lock is either WRITE or DESTROY - should be exclusive */
+		/* lock is exclusive */
 		return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+	case UVERBS_LOOKUP_DESTROY:
+		return 0;
 	}
 	return 0;
 }
@@ -144,6 +146,8 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj,
 	case UVERBS_LOOKUP_WRITE:
 		WARN_ON(atomic_read(&uobj->usecnt) != -1);
 		break;
+	case UVERBS_LOOKUP_DESTROY:
+		break;
 	}
 #endif
 }
@@ -227,6 +231,35 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 	return 0;
 }
 
+/*
+ * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
+ * sequence. It should only be used from command callbacks. On success the
+ * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
+ * version requires the caller to have already obtained an
+ * LOOKUP_DESTROY uobject kref.
+ */
+int uobj_destroy(struct ib_uobject *uobj)
+{
+	struct ib_uverbs_file *ufile = uobj->ufile;
+	int ret;
+
+	down_read(&ufile->hw_destroy_rwsem);
+
+	ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
+	if (ret)
+		goto out_unlock;
+
+	ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY);
+	if (ret) {
+		atomic_set(&uobj->usecnt, 0);
+		goto out_unlock;
+	}
+
+out_unlock:
+	up_read(&ufile->hw_destroy_rwsem);
+	return ret;
+}
+
 /*
  * uobj_get_destroy destroys the HW object and returns a handle to the uobj
  * with a NULL object pointer. The caller must pair this with
@@ -238,13 +271,13 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type,
 	struct ib_uobject *uobj;
 	int ret;
 
-	uobj = rdma_lookup_get_uobject(type, ufile, id, UVERBS_LOOKUP_WRITE);
+	uobj = rdma_lookup_get_uobject(type, ufile, id, UVERBS_LOOKUP_DESTROY);
 	if (IS_ERR(uobj))
 		return uobj;
 
-	ret = rdma_explicit_destroy(uobj);
+	ret = uobj_destroy(uobj);
 	if (ret) {
-		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
 		return ERR_PTR(ret);
 	}
 
@@ -265,6 +298,11 @@ int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
+	/*
+	 * FIXME: After destroy this is not safe. We no longer hold the rwsem
+	 * so disassociation could have completed and unloaded the module that
+	 * backs the uobj->type pointer.
+	 */
 	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 	return success_res;
 }
@@ -534,23 +572,6 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
 	return 0;
 }
 
-int rdma_explicit_destroy(struct ib_uobject *uobject)
-{
-	int ret;
-	struct ib_uverbs_file *ufile = uobject->ufile;
-
-	/* Cleanup is running. Calling this should have been impossible */
-	if (!down_read_trylock(&ufile->hw_destroy_rwsem)) {
-		WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
-		return 0;
-	}
-
-	ret = uverbs_destroy_uobject(uobject, RDMA_REMOVE_DESTROY);
-
-	up_read(&ufile->hw_destroy_rwsem);
-	return ret;
-}
-
 static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
@@ -686,6 +707,8 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 	case UVERBS_LOOKUP_WRITE:
 		atomic_set(&uobj->usecnt, 0);
 		break;
+	case UVERBS_LOOKUP_DESTROY:
+		break;
 	}
 
 	/* Pairs with the kref obtained by type->lookup_get */
@@ -911,6 +934,9 @@ uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
 		return rdma_lookup_get_uobject(type_attrs, ufile, id,
 					       UVERBS_LOOKUP_READ);
 	case UVERBS_ACCESS_DESTROY:
+		/* Actual destruction is done inside uverbs_handle_method */
+		return rdma_lookup_get_uobject(type_attrs, ufile, id,
+					       UVERBS_LOOKUP_DESTROY);
 	case UVERBS_ACCESS_WRITE:
 		return rdma_lookup_get_uobject(type_attrs, ufile, id,
 					       UVERBS_LOOKUP_WRITE);
@@ -942,7 +968,8 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
 		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 		break;
 	case UVERBS_ACCESS_DESTROY:
-		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+		if (uobj)
+			rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
 		break;
 	case UVERBS_ACCESS_NEW:
 		if (commit)
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index a736b46d18e3..e4d8b985c311 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -52,6 +52,8 @@ const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_sp
 void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
 			     enum rdma_remove_reason reason);
 
+int uobj_destroy(struct ib_uobject *uobj);
+
 /*
  * uverbs_uobject_get is called in order to increase the reference count on
  * an uobject. This is useful when a handler wants to keep the uobject's memory
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 404acfcdbeb2..f3776f909ca5 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -349,13 +349,18 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
 	 * not get to manipulate the HW objects.
 	 */
 	if (destroy_attr) {
-		ret = rdma_explicit_destroy(destroy_attr->uobject);
+		ret = uobj_destroy(destroy_attr->uobject);
 		if (ret)
 			goto cleanup;
 	}
 
 	ret = method_spec->handler(ibdev, ufile, attr_bundle);
 
+	if (destroy_attr) {
+		uobj_put_destroy(destroy_attr->uobject);
+		destroy_attr->uobject = NULL;
+	}
+
 cleanup:
 	finalize_ret = uverbs_finalize_attrs(attr_bundle,
 					     method_spec->attr_buckets,
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 0676672dbbb9..f64f413cecac 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -41,6 +41,12 @@ struct uverbs_obj_type;
 enum rdma_lookup_mode {
 	UVERBS_LOOKUP_READ,
 	UVERBS_LOOKUP_WRITE,
+	/*
+	 * Destroy is like LOOKUP_WRITE, except that the uobject is not
+	 * locked.  uobj_destroy is used to convert a LOOKUP_DESTROY lock into
+	 * a LOOKUP_WRITE lock.
+	 */
+	UVERBS_LOOKUP_DESTROY,
 };
 
 /*
@@ -129,7 +135,6 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
 					    struct ib_uverbs_file *ufile);
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
 int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
-int rdma_explicit_destroy(struct ib_uobject *uobject);
 
 struct uverbs_obj_fd_type {
 	/*
-- 
cgit v1.2.3


From bbd51e881ff05aa6dccda025e335438f3b3a1dba Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 25 Jul 2018 21:40:17 -0600
Subject: IB/uverbs: Do not pass struct ib_device to the write based methods

This is a step to get rid of the global check for disassociation. In this
model, the ib_dev is not proven to be valid by the core code and cannot be
provided to the method. Instead, every method decides if it is able to
run after disassociation and obtains the ib_dev using one of three
different approaches:

- Call srcu_dereference on the udevice's ib_dev. As before, this means
  the method cannot be called after disassociation begins.
  (eg alloc ucontext)
- Retrieve the ib_dev from the ucontext, via ib_uverbs_get_ucontext()
- Retrieve the ib_dev from the uobject->object after checking
  under SRCU if disassociation has started (eg uobj_get)

Largely, the code is all ready for this, the main work is to provide a
ib_dev after calling uobj_alloc(). The few other places simply use
ib_uverbs_get_ucontext() to get the ib_dev.

This flexibility will let the next patches allow destroy to operate
after disassociation.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs.h      |   2 -
 drivers/infiniband/core/uverbs_cmd.c  | 155 +++++++++++++++++-----------------
 drivers/infiniband/core/uverbs_main.c |   6 +-
 include/rdma/uverbs_std_types.h       |  12 ++-
 4 files changed, 89 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index cf02b433000c..5e21cc1f900b 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -299,7 +299,6 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
 
 #define IB_UVERBS_DECLARE_CMD(name)					\
 	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\
-				 struct ib_device *ib_dev,              \
 				 const char __user *buf, int in_len,	\
 				 int out_len)
 
@@ -341,7 +340,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd);
 
 #define IB_UVERBS_DECLARE_EX_CMD(name)				\
 	int ib_uverbs_ex_##name(struct ib_uverbs_file *file,	\
-				struct ib_device *ib_dev,		\
 				struct ib_udata *ucore,		\
 				struct ib_udata *uhw)
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index fe96ceda6cd2..465b4d921024 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -66,7 +66,6 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
 	_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
 
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      const char __user *buf,
 			      int in_len, int out_len)
 {
@@ -76,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	struct ib_ucontext		 *ucontext;
 	struct file			 *filp;
 	struct ib_rdmacg_object		 cg_obj;
+	struct ib_device *ib_dev;
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -85,6 +85,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 		return -EFAULT;
 
 	mutex_lock(&file->ucontext_lock);
+	ib_dev = srcu_dereference(file->device->ib_dev,
+				  &file->device->disassociate_srcu);
+	if (!ib_dev) {
+		ret = -EIO;
+		goto err;
+	}
 
 	if (file->ucontext) {
 		ret = -EINVAL;
@@ -177,11 +183,12 @@ err:
 	return ret;
 }
 
-static void copy_query_dev_fields(struct ib_uverbs_file *file,
-				  struct ib_device *ib_dev,
+static void copy_query_dev_fields(struct ib_ucontext *ucontext,
 				  struct ib_uverbs_query_device_resp *resp,
 				  struct ib_device_attr *attr)
 {
+	struct ib_device *ib_dev = ucontext->device;
+
 	resp->fw_ver		= attr->fw_ver;
 	resp->node_guid		= ib_dev->node_guid;
 	resp->sys_image_guid	= attr->sys_image_guid;
@@ -225,12 +232,16 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
-			       struct ib_device *ib_dev,
 			       const char __user *buf,
 			       int in_len, int out_len)
 {
 	struct ib_uverbs_query_device      cmd;
 	struct ib_uverbs_query_device_resp resp;
+	struct ib_ucontext *ucontext;
+
+	ucontext = ib_uverbs_get_ucontext(file);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -239,7 +250,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 		return -EFAULT;
 
 	memset(&resp, 0, sizeof resp);
-	copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
+	copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
 
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
 		return -EFAULT;
@@ -269,7 +280,6 @@ static u32 make_port_cap_flags(const struct ib_port_attr *attr)
 }
 
 ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf,
 			     int in_len, int out_len)
 {
@@ -277,6 +287,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 	struct ib_uverbs_query_port_resp resp;
 	struct ib_port_attr              attr;
 	int                              ret;
+	struct ib_ucontext *ucontext;
+	struct ib_device *ib_dev;
+
+	ucontext = ib_uverbs_get_ucontext(file);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
+	ib_dev = ucontext->device;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -328,7 +345,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf,
 			   int in_len, int out_len)
 {
@@ -338,6 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 	struct ib_uobject             *uobj;
 	struct ib_pd                  *pd;
 	int                            ret;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -350,7 +367,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
                    in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                    out_len - sizeof(resp));
 
-	uobj = uobj_alloc(UVERBS_OBJECT_PD, file);
+	uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -387,7 +404,6 @@ err:
 }
 
 ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf,
 			     int in_len, int out_len)
 {
@@ -486,7 +502,6 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
 }
 
 ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -499,6 +514,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	struct inode                   *inode = NULL;
 	int				ret = 0;
 	int				new_xrcd = 0;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -535,7 +551,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 		}
 	}
 
-	obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file);
+	obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
+						   &ib_dev);
 	if (IS_ERR(obj)) {
 		ret = PTR_ERR(obj);
 		goto err_tree_mutex_unlock;
@@ -606,7 +623,6 @@ err_tree_mutex_unlock:
 }
 
 ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -645,7 +661,6 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
 }
 
 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
-			 struct ib_device *ib_dev,
 			 const char __user *buf, int in_len,
 			 int out_len)
 {
@@ -656,6 +671,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 	struct ib_pd                *pd;
 	struct ib_mr                *mr;
 	int                          ret;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -675,7 +691,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
-	uobj = uobj_alloc(UVERBS_OBJECT_MR, file);
+	uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -737,7 +753,6 @@ err_free:
 }
 
 ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf, int in_len,
 			   int out_len)
 {
@@ -829,7 +844,6 @@ put_uobjs:
 }
 
 ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf, int in_len,
 			   int out_len)
 {
@@ -843,7 +857,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf, int in_len,
 			   int out_len)
 {
@@ -854,6 +867,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 	struct ib_mw                  *mw;
 	struct ib_udata		       udata;
 	int                            ret;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof(resp))
 		return -ENOSPC;
@@ -861,7 +875,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof(cmd)))
 		return -EFAULT;
 
-	uobj = uobj_alloc(UVERBS_OBJECT_MW, file);
+	uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -911,7 +925,6 @@ err_free:
 }
 
 ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -925,7 +938,6 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
-				      struct ib_device *ib_dev,
 				      const char __user *buf, int in_len,
 				      int out_len)
 {
@@ -933,6 +945,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 	struct ib_uverbs_create_comp_channel_resp  resp;
 	struct ib_uobject			  *uobj;
 	struct ib_uverbs_completion_event_file	  *ev_file;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -940,7 +953,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file);
+	uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -959,7 +972,6 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 }
 
 static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
-					struct ib_device *ib_dev,
 				       struct ib_udata *ucore,
 				       struct ib_udata *uhw,
 				       struct ib_uverbs_ex_create_cq *cmd,
@@ -977,17 +989,21 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	int                             ret;
 	struct ib_uverbs_ex_create_cq_resp resp;
 	struct ib_cq_init_attr attr = {};
-
-	if (!ib_dev->create_cq)
-		return ERR_PTR(-EOPNOTSUPP);
+	struct ib_device *ib_dev;
 
 	if (cmd->comp_vector >= file->device->num_comp_vectors)
 		return ERR_PTR(-EINVAL);
 
-	obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file);
+	obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
+						 &ib_dev);
 	if (IS_ERR(obj))
 		return obj;
 
+	if (!ib_dev->create_cq) {
+		ret = -EOPNOTSUPP;
+		goto err;
+	}
+
 	if (cmd->comp_channel >= 0) {
 		ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
 		if (IS_ERR(ev_file)) {
@@ -1066,7 +1082,6 @@ static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -1097,7 +1112,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 	cmd_ex.comp_vector = cmd.comp_vector;
 	cmd_ex.comp_channel = cmd.comp_channel;
 
-	obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
+	obj = create_cq(file, &ucore, &uhw, &cmd_ex,
 			offsetof(typeof(cmd_ex), comp_channel) +
 			sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
 			NULL);
@@ -1120,7 +1135,6 @@ static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
-			 struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -1146,7 +1160,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
 			     sizeof(resp.response_length)))
 		return -ENOSPC;
 
-	obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
+	obj = create_cq(file, ucore, uhw, &cmd,
 			min(ucore->inlen, sizeof(cmd)),
 			ib_uverbs_ex_create_cq_cb, NULL);
 
@@ -1154,7 +1168,6 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -1222,7 +1235,6 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
 }
 
 ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
-			  struct ib_device *ib_dev,
 			  const char __user *buf, int in_len,
 			  int out_len)
 {
@@ -1253,7 +1265,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 		if (!ret)
 			break;
 
-		ret = copy_wc_to_user(ib_dev, data_ptr, &wc);
+		ret = copy_wc_to_user(cq->device, data_ptr, &wc);
 		if (ret)
 			goto out_put;
 
@@ -1274,7 +1286,6 @@ out_put:
 }
 
 ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
-				struct ib_device *ib_dev,
 				const char __user *buf, int in_len,
 				int out_len)
 {
@@ -1297,7 +1308,6 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -1350,11 +1360,13 @@ static int create_qp(struct ib_uverbs_file *file,
 	int				ret;
 	struct ib_rwq_ind_table *ind_tbl = NULL;
 	bool has_sq = true;
+	struct ib_device *ib_dev;
 
 	if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
 		return -EPERM;
 
-	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file);
+	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+						 &ib_dev);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 	obj->uxrcd = NULL;
@@ -1611,7 +1623,6 @@ static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -1672,7 +1683,6 @@ static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -1709,7 +1719,6 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
-			  struct ib_device *ib_dev,
 			  const char __user *buf, int in_len, int out_len)
 {
 	struct ib_uverbs_open_qp        cmd;
@@ -1721,6 +1730,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 	struct ib_qp                   *qp;
 	struct ib_qp_open_attr          attr;
 	int ret;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -1733,7 +1743,8 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file);
+	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+						 &ib_dev);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -1815,7 +1826,6 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
 }
 
 ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf, int in_len,
 			   int out_len)
 {
@@ -2018,7 +2028,6 @@ out:
 }
 
 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -2045,7 +2054,6 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -2081,7 +2089,6 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -2120,7 +2127,6 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge)
 }
 
 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -2401,7 +2407,6 @@ err:
 }
 
 ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -2451,7 +2456,6 @@ out:
 }
 
 ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
-				struct ib_device *ib_dev,
 				const char __user *buf, int in_len,
 				int out_len)
 {
@@ -2502,7 +2506,6 @@ out:
 }
 
 ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -2514,6 +2517,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	struct rdma_ah_attr		attr = {};
 	int ret;
 	struct ib_udata                   udata;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -2521,18 +2525,20 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
-		return -EINVAL;
-
 	ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
 		   u64_to_user_ptr(cmd.response) + sizeof(resp),
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	uobj = uobj_alloc(UVERBS_OBJECT_AH, file);
+	uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
+	if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
 	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
@@ -2589,7 +2595,6 @@ err:
 }
 
 ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len, int out_len)
 {
 	struct ib_uverbs_destroy_ah cmd;
@@ -2602,7 +2607,6 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
-			       struct ib_device *ib_dev,
 			       const char __user *buf, int in_len,
 			       int out_len)
 {
@@ -2652,7 +2656,6 @@ out_put:
 }
 
 ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
-			       struct ib_device *ib_dev,
 			       const char __user *buf, int in_len,
 			       int out_len)
 {
@@ -3021,7 +3024,6 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
 }
 
 int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -3035,6 +3037,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 	struct ib_wq_init_attr wq_init_attr = {};
 	size_t required_cmd_sz;
 	size_t required_resp_len;
+	struct ib_device *ib_dev;
 
 	required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
 	required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
@@ -3057,7 +3060,8 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 	if (cmd.comp_mask)
 		return -EOPNOTSUPP;
 
-	obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file);
+	obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
+						 &ib_dev);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -3136,7 +3140,6 @@ err_uobj:
 }
 
 int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    struct ib_udata *ucore,
 			    struct ib_udata *uhw)
 {
@@ -3183,7 +3186,6 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -3233,7 +3235,6 @@ out:
 }
 
 int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
-				      struct ib_device *ib_dev,
 				      struct ib_udata *ucore,
 				      struct ib_udata *uhw)
 {
@@ -3251,6 +3252,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 	u32 expected_in_size;
 	size_t required_cmd_sz_header;
 	size_t required_resp_len;
+	struct ib_device *ib_dev;
 
 	required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
 	required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
@@ -3316,7 +3318,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 		wqs[num_read_wqs] = wq;
 	}
 
-	uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file);
+	uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
 	if (IS_ERR(uobj)) {
 		err = PTR_ERR(uobj);
 		goto put_wqs;
@@ -3376,7 +3378,6 @@ err_free:
 }
 
 int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
-				       struct ib_device *ib_dev,
 				       struct ib_udata *ucore,
 				       struct ib_udata *uhw)
 {
@@ -3406,7 +3407,6 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     struct ib_udata *ucore,
 			     struct ib_udata *uhw)
 {
@@ -3423,6 +3423,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	void *kern_spec;
 	void *ib_spec;
 	int i;
+	struct ib_device *ib_dev;
 
 	if (ucore->inlen < sizeof(cmd))
 		return -EINVAL;
@@ -3478,7 +3479,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		kern_flow_attr = &cmd.flow_attr;
 	}
 
-	uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file);
+	uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
 	if (IS_ERR(uobj)) {
 		err = PTR_ERR(uobj);
 		goto err_free_attr;
@@ -3583,7 +3584,6 @@ err_free_attr:
 }
 
 int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      struct ib_udata *ucore,
 			      struct ib_udata *uhw)
 {
@@ -3605,7 +3605,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
 }
 
 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
-				struct ib_device *ib_dev,
 				struct ib_uverbs_create_xsrq *cmd,
 				struct ib_udata *udata)
 {
@@ -3616,8 +3615,10 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 	struct ib_uobject               *uninitialized_var(xrcd_uobj);
 	struct ib_srq_init_attr          attr;
 	int ret;
+	struct ib_device *ib_dev;
 
-	obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file);
+	obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
+						  &ib_dev);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -3740,7 +3741,6 @@ err:
 }
 
 ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -3770,7 +3770,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
+	ret = __uverbs_create_xsrq(file, &xcmd, &udata);
 	if (ret)
 		return ret;
 
@@ -3778,7 +3778,6 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      const char __user *buf, int in_len, int out_len)
 {
 	struct ib_uverbs_create_xsrq     cmd;
@@ -3797,7 +3796,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
+	ret = __uverbs_create_xsrq(file, &cmd, &udata);
 	if (ret)
 		return ret;
 
@@ -3805,7 +3804,6 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -3836,7 +3834,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf,
 			    int in_len, int out_len)
 {
@@ -3876,7 +3873,6 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      const char __user *buf, int in_len,
 			      int out_len)
 {
@@ -3905,15 +3901,21 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      struct ib_udata *ucore,
 			      struct ib_udata *uhw)
 {
 	struct ib_uverbs_ex_query_device_resp resp = { {0} };
 	struct ib_uverbs_ex_query_device  cmd;
 	struct ib_device_attr attr = {0};
+	struct ib_ucontext *ucontext;
+	struct ib_device *ib_dev;
 	int err;
 
+	ucontext = ib_uverbs_get_ucontext(file);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
+	ib_dev = ucontext->device;
+
 	if (!ib_dev->query_device)
 		return -EOPNOTSUPP;
 
@@ -3939,7 +3941,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 	if (err)
 		return err;
 
-	copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
+	copy_query_dev_fields(ucontext, &resp.base, &attr);
 
 	if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
 		goto end;
@@ -4026,7 +4028,6 @@ end:
 }
 
 int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 34df04ed142b..a1e427b2c2a1 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -75,7 +75,6 @@ static struct class *uverbs_class;
 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
 
 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
-				     struct ib_device *ib_dev,
 				     const char __user *buf, int in_len,
 				     int out_len) = {
 	[IB_USER_VERBS_CMD_GET_CONTEXT]		= ib_uverbs_get_context,
@@ -116,7 +115,6 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 };
 
 static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
-				    struct ib_device *ib_dev,
 				    struct ib_udata *ucore,
 				    struct ib_udata *uhw) = {
 	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
@@ -774,7 +772,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 	buf += sizeof(hdr);
 
 	if (!extended) {
-		ret = uverbs_cmd_table[command](file, ib_dev, buf,
+		ret = uverbs_cmd_table[command](file, buf,
 						hdr.in_words * 4,
 						hdr.out_words * 4);
 	} else {
@@ -793,7 +791,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 					ex_hdr.provider_in_words * 8,
 					ex_hdr.provider_out_words * 8);
 
-		ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
+		ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
 		ret = (ret) ? : count;
 	}
 
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 8c54e1439ba1..64ee2545dd3d 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -125,12 +125,18 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
 }
 
 static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type,
-					      struct ib_uverbs_file *ufile)
+					      struct ib_uverbs_file *ufile,
+					      struct ib_device **ib_dev)
 {
-	return rdma_alloc_begin_uobject(type, ufile);
+	struct ib_uobject *uobj = rdma_alloc_begin_uobject(type, ufile);
+
+	if (!IS_ERR(uobj))
+		*ib_dev = uobj->context->device;
+	return uobj;
 }
 
-#define uobj_alloc(_type, _ufile) __uobj_alloc(uobj_get_type(_type), _ufile)
+#define uobj_alloc(_type, _ufile, _ib_dev)                                     \
+	__uobj_alloc(uobj_get_type(_type), _ufile, _ib_dev)
 
 #endif
 
-- 
cgit v1.2.3


From e83f0ecdc40f2c3d63ff0e7f17462a29d12684a2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 25 Jul 2018 21:40:18 -0600
Subject: IB/uverbs: Do not pass struct ib_device to the ioctl methods

This does the same as the patch before, except for ioctl. The rules are
the same, but for the ioctl methods the core code handles setting up the
uobject.

- Retrieve the ib_dev from the uobject->context->device. This is
  safe under ioctl as the core has already done rdma_alloc_begin_uobject
  and so CREATE calls are entirely protected by the rwsem.
- Retrieve the ib_dev from uobject->object
- Call ib_uverbs_get_ucontext()

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c             |  2 +-
 drivers/infiniband/core/uverbs_std_types.c         |  3 +-
 .../infiniband/core/uverbs_std_types_counters.c    | 21 +++++------
 drivers/infiniband/core/uverbs_std_types_cq.c      | 18 ++++-----
 drivers/infiniband/core/uverbs_std_types_dm.c      | 13 ++++---
 .../infiniband/core/uverbs_std_types_flow_action.c | 35 ++++++++----------
 drivers/infiniband/core/uverbs_std_types_mr.c      | 22 +++++------
 drivers/infiniband/hw/mlx5/devx.c                  | 43 +++++++++++-----------
 drivers/infiniband/hw/mlx5/flow.c                  |  8 ++--
 include/rdma/ib_verbs.h                            |  3 +-
 include/rdma/uverbs_ioctl.h                        |  4 +-
 11 files changed, 78 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index f3776f909ca5..f0655a84f9d9 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -354,7 +354,7 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
 			goto cleanup;
 	}
 
-	ret = method_spec->handler(ibdev, ufile, attr_bundle);
+	ret = method_spec->handler(ufile, attr_bundle);
 
 	if (destroy_attr) {
 		uobj_put_destroy(destroy_attr->uobject);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index c1e0492cc78a..3aa7c7deac74 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -210,8 +210,7 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
 	return 0;
 };
 
-int uverbs_destroy_def_handler(struct ib_device *ib_dev,
-			       struct ib_uverbs_file *file,
+int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
 			       struct uverbs_attr_bundle *attrs)
 {
 	return 0;
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 34589799f446..dfacc9e83399 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -47,12 +47,13 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
 	return counters->device->destroy_counters(counters);
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev,
-							 struct ib_uverbs_file *file,
-							 struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
+	struct ib_device *ib_dev = uobj->context->device;
 	struct ib_counters *counters;
-	struct ib_uobject *uobj;
 	int ret;
 
 	/*
@@ -63,7 +64,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de
 	if (!ib_dev->create_counters)
 		return -EOPNOTSUPP;
 
-	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
 	counters = ib_dev->create_counters(ib_dev, attrs);
 	if (IS_ERR(counters)) {
 		ret = PTR_ERR(counters);
@@ -81,9 +81,8 @@ err_create_counters:
 	return ret;
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
-						       struct ib_uverbs_file *file,
-						       struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct ib_counters_read_attr read_attr = {};
 	const struct uverbs_attr *uattr;
@@ -91,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
 		uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
 	int ret;
 
-	if (!ib_dev->read_counters)
+	if (!counters->device->read_counters)
 		return -EOPNOTSUPP;
 
 	if (!atomic_read(&counters->usecnt))
@@ -110,9 +109,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
 	if (!read_attr.counters_buff)
 		return -ENOMEM;
 
-	ret = ib_dev->read_counters(counters,
-				    &read_attr,
-				    attrs);
+	ret = counters->device->read_counters(counters, &read_attr, attrs);
 	if (ret)
 		goto err_read;
 
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 68c86e6e932e..5b5f2052cd52 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -57,11 +57,13 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
 	return ret;
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
-						   struct ib_uverbs_file *file,
-						   struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
-	struct ib_ucq_object           *obj;
+	struct ib_ucq_object *obj = container_of(
+		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
+		typeof(*obj), uobject);
+	struct ib_device *ib_dev = obj->uobject.context->device;
 	struct ib_udata uhw;
 	int ret;
 	u64 user_handle;
@@ -104,9 +106,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 		goto err_event_file;
 	}
 
-	obj = container_of(uverbs_attr_get_uobject(attrs,
-						   UVERBS_ATTR_CREATE_CQ_HANDLE),
-			   typeof(*obj), uobject);
 	obj->comp_events_reported  = 0;
 	obj->async_events_reported = 0;
 	INIT_LIST_HEAD(&obj->comp_list);
@@ -173,9 +172,8 @@ DECLARE_UVERBS_NAMED_METHOD(
 			    UA_MANDATORY),
 	UVERBS_ATTR_UHW());
 
-static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
-						    struct ib_uverbs_file *file,
-						    struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct ib_uobject *uobj =
 		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index c90efa4b99f4..edc3ff7733d4 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -46,12 +46,15 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
 	return dm->device->dealloc_dm(dm);
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
-						  struct ib_uverbs_file *file,
-						  struct uverbs_attr_bundle *attrs)
+static int
+UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
+				       struct uverbs_attr_bundle *attrs)
 {
 	struct ib_dm_alloc_attr attr = {};
-	struct ib_uobject *uobj;
+	struct ib_uobject *uobj =
+		uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)
+			->obj_attr.uobject;
+	struct ib_device *ib_dev = uobj->context->device;
 	struct ib_dm *dm;
 	int ret;
 
@@ -68,8 +71,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
 	if (ret)
 		return ret;
 
-	uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
-
 	dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs);
 	if (IS_ERR(dm))
 		return PTR_ERR(dm);
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index adb9209c4710..d8cfafe23bd9 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -304,12 +304,13 @@ static int parse_flow_action_esp(struct ib_device *ib_dev,
 	return 0;
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev,
-								struct ib_uverbs_file *file,
-								struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
+	struct ib_device *ib_dev = uobj->context->device;
 	int				  ret;
-	struct ib_uobject		  *uobj;
 	struct ib_flow_action		  *action;
 	struct ib_flow_action_esp_attr	  esp_attr = {};
 
@@ -321,8 +322,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
 		return ret;
 
 	/* No need to check as this attribute is marked as MANDATORY */
-	uobj = uverbs_attr_get_uobject(
-		attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
 	action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
 	if (IS_ERR(action))
 		return PTR_ERR(action);
@@ -336,32 +335,28 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
 	return 0;
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev,
-								struct ib_uverbs_file *file,
-								struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
+	struct ib_flow_action *action = uobj->object;
 	int				  ret;
-	struct ib_uobject		  *uobj;
-	struct ib_flow_action		  *action;
 	struct ib_flow_action_esp_attr	  esp_attr = {};
 
-	if (!ib_dev->modify_flow_action_esp)
+	if (!action->device->modify_flow_action_esp)
 		return -EOPNOTSUPP;
 
-	ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true);
+	ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr,
+				    true);
 	if (ret)
 		return ret;
 
-	uobj = uverbs_attr_get_uobject(
-		attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
-	action = uobj->object;
-
 	if (action->type != IB_FLOW_ACTION_ESP)
 		return -EINVAL;
 
-	return ib_dev->modify_flow_action_esp(action,
-					      &esp_attr.hdr,
-					      attrs);
+	return action->device->modify_flow_action_esp(action, &esp_attr.hdr,
+						      attrs);
 }
 
 static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index d63da0c2a8c1..cf02e774303e 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -39,14 +39,18 @@ static int uverbs_free_mr(struct ib_uobject *uobject,
 	return ib_dereg_mr((struct ib_mr *)uobject->object);
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
-						   struct ib_uverbs_file *file,
-						   struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct ib_dm_mr_attr attr = {};
-	struct ib_uobject *uobj;
-	struct ib_dm *dm;
-	struct ib_pd *pd;
+	struct ib_uobject *uobj =
+		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+	struct ib_dm *dm =
+		uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+	struct ib_pd *pd =
+		uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+	struct ib_device *ib_dev = pd->device;
+
 	struct ib_mr *mr;
 	int ret;
 
@@ -75,12 +79,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
 	if (ret)
 		return ret;
 
-	pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
-
-	dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
-
-	uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject;
-
 	if (attr.offset > dm->length || attr.length > dm->length ||
 	    attr.length > dm->length - attr.offset)
 		return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index c9a7a12a8c13..29c688372390 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -409,11 +409,11 @@ static bool devx_is_general_cmd(void *in)
 	}
 }
 
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_dev,
-				  struct ib_uverbs_file *file,
-				  struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
-	struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+	struct mlx5_ib_ucontext *c;
+	struct mlx5_ib_dev *dev;
 	int user_vector;
 	int dev_eqn;
 	unsigned int irqn;
@@ -423,6 +423,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_de
 			     MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
 		return -EFAULT;
 
+	c = devx_ufile2uctx(file);
+	if (IS_ERR(c))
+		return PTR_ERR(c);
+	dev = to_mdev(c->ibucontext.device);
+
 	err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
 	if (err < 0)
 		return err;
@@ -454,9 +459,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_de
  * of the buggy user for execution (just insert it to the hardware schedule
  * queue or arm its CQ for event generation), no further harm is expected.
  */
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_dev,
-				  struct ib_uverbs_file *file,
-				  struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct mlx5_ib_ucontext *c;
 	struct mlx5_ib_dev *dev;
@@ -483,9 +487,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_de
 	return 0;
 }
 
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev,
-				  struct ib_uverbs_file *file,
-				  struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct mlx5_ib_ucontext *c;
 	struct mlx5_ib_dev *dev;
@@ -712,9 +715,8 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
 	return ret;
 }
 
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_dev,
-				   struct ib_uverbs_file *file,
-				   struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
 	int cmd_out_len =  uverbs_attr_get_len(attrs,
@@ -769,9 +771,8 @@ obj_free:
 	return err;
 }
 
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_dev,
-				   struct ib_uverbs_file *file,
-				   struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
 	int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -811,9 +812,8 @@ other_cmd_free:
 	return err;
 }
 
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_dev,
-				   struct ib_uverbs_file *file,
-				   struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
 	int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -931,9 +931,8 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
 			     MLX5_IB_MTT_READ);
 }
 
-static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(struct ib_device *ib_dev,
-				 struct ib_uverbs_file *file,
-				 struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct devx_umem_reg_cmd cmd;
 	struct devx_umem *obj;
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index ee398a9b5f26..1a29f47f836e 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -39,8 +39,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
 };
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
-	struct ib_device *ib_dev, struct ib_uverbs_file *file,
-	struct uverbs_attr_bundle *attrs)
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct mlx5_ib_flow_handler *flow_handler;
 	struct mlx5_ib_flow_matcher *fs_matcher;
@@ -109,7 +108,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 	if (IS_ERR(flow_handler))
 		return PTR_ERR(flow_handler);
 
-	ib_set_flow(uobj, &flow_handler->ibflow, qp, ib_dev);
+	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev);
 
 	return 0;
 }
@@ -129,8 +128,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
-	struct ib_device *ib_dev, struct ib_uverbs_file *file,
-	struct uverbs_attr_bundle *attrs)
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
 		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index be208421f7d3..dea770e5b9ae 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4170,7 +4170,6 @@ void rdma_roce_rescan_device(struct ib_device *ibdev);
 
 struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
 
-int uverbs_destroy_def_handler(struct ib_device *ib_dev,
-			       struct ib_uverbs_file *file,
+int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
 			       struct uverbs_attr_bundle *attrs);
 #endif /* IB_VERBS_H */
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 5e6d0569d97c..8d71b7a7f147 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -128,7 +128,7 @@ struct uverbs_method_spec {
 	u32						flags;
 	size_t						num_buckets;
 	size_t						num_child_attrs;
-	int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile,
+	int (*handler)(struct ib_uverbs_file *ufile,
 		       struct uverbs_attr_bundle *ctx);
 	struct uverbs_attr_spec_hash		*attr_buckets[0];
 };
@@ -171,7 +171,7 @@ struct uverbs_method_def {
 	u32				     flags;
 	size_t				     num_attrs;
 	const struct uverbs_attr_def * const (*attrs)[];
-	int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile,
+	int (*handler)(struct ib_uverbs_file *ufile,
 		       struct uverbs_attr_bundle *ctx);
 };
 
-- 
cgit v1.2.3


From 0f50d88a6e9ae6d9dd14ed1a7d6b309280a9c23b Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 25 Jul 2018 21:40:20 -0600
Subject: IB/uverbs: Allow all DESTROY commands to succeed after disassociate

The disassociate function was broken by design because it failed all
commands. This prevents userspace from calling destroy on a uobject after
it has detected a device fatal error and thus reclaiming the resources in
userspace is prevented.

This fix is now straightforward, when anything destroys a uobject that is
not the user the object remains on the IDR with a NULL context and object
pointer. All lookup locking modes other than DESTROY will fail. When the
user ultimately calls the destroy function it is simply dropped from the
IDR while any related information is returned.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c   | 66 +++++++++++++++++++++++++++++------
 drivers/infiniband/core/rdma_core.h   |  3 ++
 drivers/infiniband/core/uverbs_main.c |  7 ++--
 include/rdma/uverbs_types.h           |  6 ++--
 4 files changed, 66 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index d4de1fed98f2..4235b9ddc2ad 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -180,7 +180,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 	assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
 
 	if (uobj->object) {
-		ret = uobj->type->type_class->remove_commit(uobj, reason);
+		ret = uobj->type->type_class->destroy_hw(uobj, reason);
 		if (ret) {
 			if (ib_is_destroy_retryable(ret, reason, uobj))
 				return ret;
@@ -204,10 +204,13 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 
 	/*
 	 * For DESTROY the usecnt is held write locked, the caller is expected
-	 * to put it unlock and put the object when done with it.
+	 * to put it unlock and put the object when done with it. Only DESTROY
+	 * can remove the IDR handle.
 	 */
 	if (reason != RDMA_REMOVE_DESTROY)
 		atomic_set(&uobj->usecnt, 0);
+	else
+		uobj->type->type_class->remove_handle(uobj);
 
 	if (!list_empty(&uobj->list)) {
 		spin_lock_irqsave(&ufile->uobjects_lock, flags);
@@ -554,8 +557,8 @@ static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
 	spin_unlock(&uobj->ufile->idr_lock);
 }
 
-static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
-						  enum rdma_remove_reason why)
+static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
+					       enum rdma_remove_reason why)
 {
 	const struct uverbs_obj_idr_type *idr_type =
 		container_of(uobj->type, struct uverbs_obj_idr_type,
@@ -573,20 +576,28 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
 	if (why == RDMA_REMOVE_ABORT)
 		return 0;
 
-	alloc_abort_idr_uobject(uobj);
-	/* Matches the kref in alloc_commit_idr_uobject */
-	uverbs_uobject_put(uobj);
+	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+			   RDMACG_RESOURCE_HCA_OBJECT);
 
 	return 0;
 }
 
+static void remove_handle_idr_uobject(struct ib_uobject *uobj)
+{
+	spin_lock(&uobj->ufile->idr_lock);
+	idr_remove(&uobj->ufile->idr, uobj->id);
+	spin_unlock(&uobj->ufile->idr_lock);
+	/* Matches the kref in alloc_commit_idr_uobject */
+	uverbs_uobject_put(uobj);
+}
+
 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
 {
 	put_unused_fd(uobj->id);
 }
 
-static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
-						 enum rdma_remove_reason why)
+static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
+					      enum rdma_remove_reason why)
 {
 	const struct uverbs_obj_fd_type *fd_type =
 		container_of(uobj->type, struct uverbs_obj_fd_type, type);
@@ -598,6 +609,10 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
 	return 0;
 }
 
+static void remove_handle_fd_uobject(struct ib_uobject *uobj)
+{
+}
+
 static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
 {
 	struct ib_uverbs_file *ufile = uobj->ufile;
@@ -741,13 +756,41 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 	uverbs_uobject_put(uobj);
 }
 
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+	spin_lock_init(&ufile->idr_lock);
+	idr_init(&ufile->idr);
+}
+
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+	struct ib_uobject *entry;
+	int id;
+
+	/*
+	 * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
+	 * there are no HW objects left, however the IDR is still populated
+	 * with anything that has not been cleaned up by userspace. Since the
+	 * kref on ufile is 0, nothing is allowed to call lookup_get.
+	 *
+	 * This is an optimized equivalent to remove_handle_idr_uobject
+	 */
+	idr_for_each_entry(&ufile->idr, entry, id) {
+		WARN_ON(entry->object);
+		uverbs_uobject_put(entry);
+	}
+
+	idr_destroy(&ufile->idr);
+}
+
 const struct uverbs_obj_type_class uverbs_idr_class = {
 	.alloc_begin = alloc_begin_idr_uobject,
 	.lookup_get = lookup_get_idr_uobject,
 	.alloc_commit = alloc_commit_idr_uobject,
 	.alloc_abort = alloc_abort_idr_uobject,
 	.lookup_put = lookup_put_idr_uobject,
-	.remove_commit = remove_commit_idr_uobject,
+	.destroy_hw = destroy_hw_idr_uobject,
+	.remove_handle = remove_handle_idr_uobject,
 	/*
 	 * When we destroy an object, we first just lock it for WRITE and
 	 * actually DESTROY it in the finalize stage. So, the problematic
@@ -945,7 +988,8 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
 	.alloc_commit = alloc_commit_fd_uobject,
 	.alloc_abort = alloc_abort_fd_uobject,
 	.lookup_put = lookup_put_fd_uobject,
-	.remove_commit = remove_commit_fd_uobject,
+	.destroy_hw = destroy_hw_fd_uobject,
+	.remove_handle = remove_handle_fd_uobject,
 	.needs_kfree_rcu = false,
 };
 EXPORT_SYMBOL(uverbs_fd_class);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index e4d8b985c311..b2e85ce65b78 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -110,4 +110,7 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
 			   enum uverbs_obj_access access,
 			   bool commit);
 
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+
 #endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index a3213245aab2..6f62146e9738 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -253,6 +253,8 @@ void ib_uverbs_release_file(struct kref *ref)
 	struct ib_device *ib_dev;
 	int srcu_key;
 
+	release_ufile_idr_uobject(file);
+
 	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
 	ib_dev = srcu_dereference(file->device->ib_dev,
 				  &file->device->disassociate_srcu);
@@ -867,8 +869,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	}
 
 	file->device	 = dev;
-	spin_lock_init(&file->idr_lock);
-	idr_init(&file->idr);
 	kref_init(&file->ref);
 	mutex_init(&file->ucontext_lock);
 
@@ -885,6 +885,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
 	file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
 
+	setup_ufile_idr_uobject(file);
+
 	return nonseekable_open(inode, filp);
 
 err_module:
@@ -904,7 +906,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
 	struct ib_uverbs_file *file = filp->private_data;
 
 	uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
-	idr_destroy(&file->idr);
 
 	mutex_lock(&file->device->lists_mutex);
 	if (!file->is_closed) {
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index f64f413cecac..1ab9a85eebd9 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -61,6 +61,7 @@ enum rdma_lookup_mode {
  * Destruction flow:
  *   lookup_get(exclusive=true) & uverbs_try_lock_object
  *   remove_commit
+ *   remove_handle (optional)
  *   lookup_put(exclusive=true) via rdma_lookup_put_uobject
  *
  * Allocate Error flow #1
@@ -92,8 +93,9 @@ struct uverbs_obj_type_class {
 					 enum rdma_lookup_mode mode);
 	void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
 	/* This does not consume the kref on uobj */
-	int __must_check (*remove_commit)(struct ib_uobject *uobj,
-					  enum rdma_remove_reason why);
+	int __must_check (*destroy_hw)(struct ib_uobject *uobj,
+				       enum rdma_remove_reason why);
+	void (*remove_handle)(struct ib_uobject *uobj);
 	u8    needs_kfree_rcu;
 };
 
-- 
cgit v1.2.3


From dceb219fc60766006bfe8e53afa62816b94fcb11 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Wed, 1 Aug 2018 13:49:32 +0800
Subject: drm/ttm: Add ttm_set_pages_wc and ttm_set_pages_uc helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These two helpers will be used on set page caching.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/drm/ttm/ttm_set_memory.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_set_memory.h b/include/drm/ttm/ttm_set_memory.h
index a70723cf208b..7c492b49e38c 100644
--- a/include/drm/ttm/ttm_set_memory.h
+++ b/include/drm/ttm/ttm_set_memory.h
@@ -57,6 +57,18 @@ static inline int ttm_set_pages_wb(struct page *page, int numpages)
 	return set_pages_wb(page, numpages);
 }
 
+static inline int ttm_set_pages_wc(struct page *page, int numpages)
+{
+	unsigned long addr = (unsigned long)page_address(page);
+
+	return set_memory_wc(addr, numpages);
+}
+
+static inline int ttm_set_pages_uc(struct page *page, int numpages)
+{
+	return set_pages_uc(page, numpages);
+}
+
 #else /* for CONFIG_X86 */
 
 #if IS_ENABLED(CONFIG_AGP)
@@ -123,6 +135,16 @@ static inline int ttm_set_pages_wb(struct page *page, int numpages)
 
 #endif /* for CONFIG_AGP */
 
+static inline int ttm_set_pages_wc(struct page *page, int numpages)
+{
+	return 0;
+}
+
+static inline int ttm_set_pages_uc(struct page *page, int numpages)
+{
+	return 0;
+}
+
 #endif /* for CONFIG_X86 */
 
 #endif
-- 
cgit v1.2.3


From c42b65e363ce97a828f81b59033c3558f8fa7f70 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 1 Aug 2018 15:42:56 -0700
Subject: bitmap: Add bitmap_alloc(), bitmap_zalloc() and bitmap_free()

A lot of code become ugly because of open coding allocations for bitmaps.

Introduce three helpers to allow users be more clear of intention
and keep their code neat.

Note, due to multiple circular dependencies we may not provide
the helpers as inliners. For now we keep them exported and, perhaps,
at some point in the future we will sort out header inclusion and
inheritance.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/bitmap.h |  8 ++++++++
 lib/bitmap.c           | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 1ee46f492267..acf5e8df3504 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -104,6 +104,14 @@
  * contain all bit positions from 0 to 'bits' - 1.
  */
 
+/*
+ * Allocation and deallocation of bitmap.
+ * Provided in lib/bitmap.c to avoid circular dependency.
+ */
+extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags);
+extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags);
+extern void bitmap_free(const unsigned long *bitmap);
+
 /*
  * lib/bitmap.c provides these functions:
  */
diff --git a/lib/bitmap.c b/lib/bitmap.c
index a42eff7e8c48..5b476d8414be 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -13,6 +13,7 @@
 #include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
 
@@ -1128,6 +1129,25 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n
 EXPORT_SYMBOL(bitmap_copy_le);
 #endif
 
+unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags)
+{
+	return kmalloc_array(BITS_TO_LONGS(nbits), sizeof(unsigned long),
+			     flags);
+}
+EXPORT_SYMBOL(bitmap_alloc);
+
+unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags)
+{
+	return bitmap_alloc(nbits, flags | __GFP_ZERO);
+}
+EXPORT_SYMBOL(bitmap_zalloc);
+
+void bitmap_free(const unsigned long *bitmap)
+{
+	kfree(bitmap);
+}
+EXPORT_SYMBOL(bitmap_free);
+
 #if BITS_PER_LONG == 64
 /**
  * bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap
-- 
cgit v1.2.3


From 3ebea280d7e9b610fa3d31c9cfd556b1705eeedf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 1 Aug 2018 21:08:30 -0400
Subject: ring-buffer: Make ring_buffer_record_is_on() return bool

The value of ring_buffer_record_is_on() is either true or false, so have its
return value be bool.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 kernel/trace/ring_buffer.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 003d09ab308d..5176124fc4ba 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -164,7 +164,7 @@ void ring_buffer_record_disable(struct ring_buffer *buffer);
 void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
-int ring_buffer_record_is_on(struct ring_buffer *buffer);
+bool ring_buffer_record_is_on(struct ring_buffer *buffer);
 int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0b0b688ea166..ffb43cbc5c13 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3221,7 +3221,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_on);
  *
  * Returns true if the ring buffer is in a state that it accepts writes.
  */
-int ring_buffer_record_is_on(struct ring_buffer *buffer)
+bool ring_buffer_record_is_on(struct ring_buffer *buffer)
 {
 	return !atomic_read(&buffer->record_disabled);
 }
-- 
cgit v1.2.3


From d7224c0e128c7337c0b0f66ac20921fbbf4efc14 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 1 Aug 2018 21:09:50 -0400
Subject: ring-buffer: Make ring_buffer_record_is_set_on() return bool

The value of ring_buffer_record_is_set_on() is either true or false, so have
its return value be bool.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 kernel/trace/ring_buffer.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 5176124fc4ba..0940fda59872 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -165,7 +165,7 @@ void ring_buffer_record_enable(struct ring_buffer *buffer);
 void ring_buffer_record_off(struct ring_buffer *buffer);
 void ring_buffer_record_on(struct ring_buffer *buffer);
 bool ring_buffer_record_is_on(struct ring_buffer *buffer);
-int ring_buffer_record_is_set_on(struct ring_buffer *buffer);
+bool ring_buffer_record_is_set_on(struct ring_buffer *buffer);
 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index ffb43cbc5c13..b386830b4d51 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3237,7 +3237,7 @@ bool ring_buffer_record_is_on(struct ring_buffer *buffer)
  * ring_buffer_record_disable(), as that is a temporary disabling of
  * the ring buffer.
  */
-int ring_buffer_record_is_set_on(struct ring_buffer *buffer)
+bool ring_buffer_record_is_set_on(struct ring_buffer *buffer)
 {
 	return !(atomic_read(&buffer->record_disabled) & RB_BUFFER_OFF);
 }
-- 
cgit v1.2.3


From c971e6a006175bd0f195c6346c4e8bc4089bec00 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 28 May 2018 18:27:19 -0400
Subject: kill d_instantiate_no_diralias()

The only user is fuse_create_new_entry(), and there it's used to
mitigate the same mkdir/open-by-handle race as in nfs_mkdir().
The same solution applies - unhash the mkdir argument, then
call d_splice_alias() and if that returns a reference to preexisting
alias, dput() and report success.  ->mkdir() argument left unhashed
negative with the preexisting alias moved in the right place is just
fine from the ->mkdir() callers point of view.

Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 27 ---------------------------
 fs/fuse/dir.c          | 15 +++++++++++----
 include/linux/dcache.h |  1 -
 3 files changed, 11 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 0e8e5de3c48a..a7d9e7a4c283 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1899,33 +1899,6 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
 }
 EXPORT_SYMBOL(d_instantiate_new);
 
-/**
- * d_instantiate_no_diralias - instantiate a non-aliased dentry
- * @entry: dentry to complete
- * @inode: inode to attach to this dentry
- *
- * Fill in inode information in the entry.  If a directory alias is found, then
- * return an error (and drop inode).  Together with d_materialise_unique() this
- * guarantees that a directory inode may never have more than one alias.
- */
-int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
-{
-	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
-
-	security_d_instantiate(entry, inode);
-	spin_lock(&inode->i_lock);
-	if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
-		spin_unlock(&inode->i_lock);
-		iput(inode);
-		return -EBUSY;
-	}
-	__d_instantiate(entry, inode);
-	spin_unlock(&inode->i_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(d_instantiate_no_diralias);
-
 struct dentry *d_make_root(struct inode *root_inode)
 {
 	struct dentry *res = NULL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 56231b31f806..4bbae6ac75c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -539,6 +539,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 {
 	struct fuse_entry_out outarg;
 	struct inode *inode;
+	struct dentry *d;
 	int err;
 	struct fuse_forget_link *forget;
 
@@ -570,11 +571,17 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
 	}
 	kfree(forget);
 
-	err = d_instantiate_no_diralias(entry, inode);
-	if (err)
-		return err;
+	d_drop(entry);
+	d = d_splice_alias(inode, entry);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
 
-	fuse_change_entry_timeout(entry, &outarg);
+	if (d) {
+		fuse_change_entry_timeout(d, &outarg);
+		dput(d);
+	} else {
+		fuse_change_entry_timeout(entry, &outarg);
+	}
 	fuse_invalidate_attr(dir);
 	return 0;
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 66c6e17e61e5..0b83629a3d8f 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -227,7 +227,6 @@ extern void d_instantiate(struct dentry *, struct inode *);
 extern void d_instantiate_new(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *);
-extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
 extern void d_delete(struct dentry *);
-- 
cgit v1.2.3


From 7acece71a517cad83a0842a94d94c13f271b680c Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Tue, 31 Jul 2018 05:41:38 -0500
Subject: serial: sh-sci: Remove SCIx_RZ_SCIFA_REGTYPE

There is no more need for SCIx_RZ_SCIFA_REGTYPE now that
SCIx_SH4_SCIF_REGTYPE can provide the same register/address definitions.

Also, R7S9210 no longer needs a special compatible since the standard
"renesas,scif" will work just fine.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 31 -------------------------------
 include/linux/serial_sci.h  |  1 -
 2 files changed, 32 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index c29244f76057..54ea58bbe3c9 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -293,33 +293,6 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 		.error_clear = SCIF_ERROR_CLEAR,
 	},
 
-	/*
-	 * The "SCIFA" that is in RZ/T and RZ/A2.
-	 * It looks like a normal SCIF with FIFO data, but with a
-	 * compressed address space. Also, the break out of interrupts
-	 * are different: ERI/BRI, RXI, TXI, TEI, DRI.
-	 */
-	[SCIx_RZ_SCIFA_REGTYPE] = {
-		.regs = {
-			[SCSMR]		= { 0x00, 16 },
-			[SCBRR]		= { 0x02,  8 },
-			[SCSCR]		= { 0x04, 16 },
-			[SCxTDR]	= { 0x06,  8 },
-			[SCxSR]		= { 0x08, 16 },
-			[SCxRDR]	= { 0x0A,  8 },
-			[SCFCR]		= { 0x0C, 16 },
-			[SCFDR]		= { 0x0E, 16 },
-			[SCSPTR]	= { 0x10, 16 },
-			[SCLSR]		= { 0x12, 16 },
-		},
-		.fifosize = 16,
-		.overrun_reg = SCLSR,
-		.overrun_mask = SCLSR_ORER,
-		.sampling_rate_mask = SCI_SR(32),
-		.error_mask = SCIF_DEFAULT_ERROR_MASK,
-		.error_clear = SCIF_ERROR_CLEAR,
-	},
-
 	/*
 	 * Common SH-3 SCIF definitions.
 	 */
@@ -3148,10 +3121,6 @@ static const struct of_device_id of_sci_match[] = {
 		.compatible = "renesas,scif-r7s72100",
 		.data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE),
 	},
-	{
-		.compatible = "renesas,scif-r7s9210",
-		.data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE),
-	},
 	/* Family-specific types */
 	{
 		.compatible = "renesas,rcar-gen1-scif",
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 1c89611e0e06..c0e795d95477 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,7 +36,6 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
-	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3


From d3b26dd7cb0e3433bfd3c1d4dcf74c6039bb49fb Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 2 Aug 2018 03:08:23 +0000
Subject: Drivers: hv: vmbus: Reset the channel callback in
 vmbus_onoffer_rescind()

Before setting channel->rescind in vmbus_rescind_cleanup(), we should make
sure the channel callback won't run any more, otherwise a high-level
driver like pci_hyperv, which may be infinitely waiting for the host VSP's
response and notices the channel has been rescinded, can't safely give
up: e.g., in hv_pci_protocol_negotiation() -> wait_for_response(), it's
unsafe to exit from wait_for_response() and proceed with the on-stack
variable "comp_pkt" popped. The issue was originally spotted by
Michael Kelley <mikelley@microsoft.com>.

In vmbus_close_internal(), the patch also minimizes the range protected by
disabling/enabling channel->callback_event: we don't really need that for
the whole function.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Cc: stable@vger.kernel.org
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 40 ++++++++++++++++++++++++----------------
 drivers/hv/channel_mgmt.c |  6 ++++++
 include/linux/hyperv.h    |  2 ++
 3 files changed, 32 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index ba0a092ae085..c3949220b770 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -558,11 +558,8 @@ static void reset_channel_cb(void *arg)
 	channel->onchannel_callback = NULL;
 }
 
-static int vmbus_close_internal(struct vmbus_channel *channel)
+void vmbus_reset_channel_cb(struct vmbus_channel *channel)
 {
-	struct vmbus_channel_close_channel *msg;
-	int ret;
-
 	/*
 	 * vmbus_on_event(), running in the per-channel tasklet, can race
 	 * with vmbus_close_internal() in the case of SMP guest, e.g., when
@@ -572,6 +569,29 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	 */
 	tasklet_disable(&channel->callback_event);
 
+	channel->sc_creation_callback = NULL;
+
+	/* Stop the callback asap */
+	if (channel->target_cpu != get_cpu()) {
+		put_cpu();
+		smp_call_function_single(channel->target_cpu, reset_channel_cb,
+					 channel, true);
+	} else {
+		reset_channel_cb(channel);
+		put_cpu();
+	}
+
+	/* Re-enable tasklet for use on re-open */
+	tasklet_enable(&channel->callback_event);
+}
+
+static int vmbus_close_internal(struct vmbus_channel *channel)
+{
+	struct vmbus_channel_close_channel *msg;
+	int ret;
+
+	vmbus_reset_channel_cb(channel);
+
 	/*
 	 * In case a device driver's probe() fails (e.g.,
 	 * util_probe() -> vmbus_open() returns -ENOMEM) and the device is
@@ -585,16 +605,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	}
 
 	channel->state = CHANNEL_OPEN_STATE;
-	channel->sc_creation_callback = NULL;
-	/* Stop callback and cancel the timer asap */
-	if (channel->target_cpu != get_cpu()) {
-		put_cpu();
-		smp_call_function_single(channel->target_cpu, reset_channel_cb,
-					 channel, true);
-	} else {
-		reset_channel_cb(channel);
-		put_cpu();
-	}
 
 	/* Send a closing message */
 
@@ -639,8 +649,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 
 out:
-	/* re-enable tasklet for use on re-open */
-	tasklet_enable(&channel->callback_event);
 	return ret;
 }
 
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f3b551a50653..0f0e091c117c 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -892,6 +892,12 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		return;
 	}
 
+	/*
+	 * Before setting channel->rescind in vmbus_rescind_cleanup(), we
+	 * should make sure the channel callback is not running any more.
+	 */
+	vmbus_reset_channel_cb(channel);
+
 	/*
 	 * Now wait for offer handling to complete.
 	 */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2330f08062c7..efda23cf32c7 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1061,6 +1061,8 @@ extern int vmbus_establish_gpadl(struct vmbus_channel *channel,
 extern int vmbus_teardown_gpadl(struct vmbus_channel *channel,
 				     u32 gpadl_handle);
 
+void vmbus_reset_channel_cb(struct vmbus_channel *channel);
+
 extern int vmbus_recvpacket(struct vmbus_channel *channel,
 				  void *buffer,
 				  u32 bufferlen,
-- 
cgit v1.2.3


From 72755eed6c1c5312425ad3d78bf0b916769e6125 Mon Sep 17 00:00:00 2001
From: Roman Kiryanov <rkir@google.com>
Date: Mon, 23 Jul 2018 15:47:26 -0700
Subject: goldfish: Add missing includes to goldfish.h

goldfish.h refers to external symbols such as
dma_addr_t and writel. This causes compilation errors
if this file is included before other header files.

The mentioned symbols are defined in types.h (dma_addr_t)
and io.h (writel).

Signed-off-by: Roman Kiryanov <rkir@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/goldfish.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h
index 2835c150c3ff..159b4191f15d 100644
--- a/include/linux/goldfish.h
+++ b/include/linux/goldfish.h
@@ -2,6 +2,9 @@
 #ifndef __LINUX_GOLDFISH_H
 #define __LINUX_GOLDFISH_H
 
+#include <linux/types.h>
+#include <linux/io.h>
+
 /* Helpers for Goldfish virtual platform */
 
 static inline void gf_write_ptr(const void *ptr, void __iomem *portl,
-- 
cgit v1.2.3


From 68275680ea12e9f378e51ff525dacbb76f006764 Mon Sep 17 00:00:00 2001
From: Roman Kiryanov <rkir@google.com>
Date: Mon, 23 Jul 2018 15:47:27 -0700
Subject: goldfish: Use dedicated macros instead of manual bit shifting

There are dedicated macros (lower_32_bits and upper_32_bits)
available to extract the lower and upper 32 bits. They provide
better readability and could prevent some compilation warnings.

Signed-off-by: Roman Kiryanov <rkir@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/goldfish.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h
index 159b4191f15d..265a099cd3b8 100644
--- a/include/linux/goldfish.h
+++ b/include/linux/goldfish.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_GOLDFISH_H
 #define __LINUX_GOLDFISH_H
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/io.h>
 
@@ -10,9 +11,11 @@
 static inline void gf_write_ptr(const void *ptr, void __iomem *portl,
 				void __iomem *porth)
 {
-	writel((u32)(unsigned long)ptr, portl);
+	const unsigned long addr = (unsigned long)ptr;
+
+	writel(lower_32_bits(addr), portl);
 #ifdef CONFIG_64BIT
-	writel((unsigned long)ptr >> 32, porth);
+	writel(upper_32_bits(addr), porth);
 #endif
 }
 
@@ -20,9 +23,9 @@ static inline void gf_write_dma_addr(const dma_addr_t addr,
 				     void __iomem *portl,
 				     void __iomem *porth)
 {
-	writel((u32)addr, portl);
+	writel(lower_32_bits(addr), portl);
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	writel(addr >> 32, porth);
+	writel(upper_32_bits(addr), porth);
 #endif
 }
 
-- 
cgit v1.2.3


From fdec79c18b08c68cfa079f2d3ee23c5a120a2eda Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 2 Aug 2018 01:47:30 +0000
Subject: ASoC: fsi: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sh_fsi.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/sound/sh_fsi.h b/include/sound/sh_fsi.h
index 7a9710b4b799..89eafe23ef88 100644
--- a/include/sound/sh_fsi.h
+++ b/include/sound/sh_fsi.h
@@ -1,16 +1,13 @@
-#ifndef __SOUND_FSI_H
-#define __SOUND_FSI_H
-
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * Fifo-attached Serial Interface (FSI) support for SH7724
  *
  * Copyright (C) 2009 Renesas Solutions Corp.
  * Kuninori Morimoto <morimoto.kuninori@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
+#ifndef __SOUND_FSI_H
+#define __SOUND_FSI_H
+
 #include <linux/clk.h>
 #include <sound/soc.h>
 
-- 
cgit v1.2.3


From 83b15832ab91c9a4651decb6dc40075dd979d443 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 25 Jul 2018 12:38:10 -0400
Subject: media: doc-rst: Add packed Bayer raw14 pixel formats

These formats are compressed 14-bit raw bayer formats with four different
pixel orders. They are similar to 10-bit variants. The formats added by
this patch are

	V4L2_PIX_FMT_SBGGR14P
	V4L2_PIX_FMT_SGBRG14P
	V4L2_PIX_FMT_SGRBG14P
	V4L2_PIX_FMT_SRGGB14P

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Todor Tomov <todor.tomov@linaro.org>
Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/pixfmt-rgb.rst      |   1 +
 Documentation/media/uapi/v4l/pixfmt-srggb14p.rst | 127 +++++++++++++++++++++++
 drivers/media/v4l2-core/v4l2-ioctl.c             |   4 +
 include/uapi/linux/videodev2.h                   |   5 +
 4 files changed, 137 insertions(+)
 create mode 100644 Documentation/media/uapi/v4l/pixfmt-srggb14p.rst

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/pixfmt-rgb.rst b/Documentation/media/uapi/v4l/pixfmt-rgb.rst
index cf2ef7df9616..1f9a7e3a07c9 100644
--- a/Documentation/media/uapi/v4l/pixfmt-rgb.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-rgb.rst
@@ -19,4 +19,5 @@ RGB Formats
     pixfmt-srggb10-ipu3
     pixfmt-srggb12
     pixfmt-srggb12p
+    pixfmt-srggb14p
     pixfmt-srggb16
diff --git a/Documentation/media/uapi/v4l/pixfmt-srggb14p.rst b/Documentation/media/uapi/v4l/pixfmt-srggb14p.rst
new file mode 100644
index 000000000000..88d20c0e4282
--- /dev/null
+++ b/Documentation/media/uapi/v4l/pixfmt-srggb14p.rst
@@ -0,0 +1,127 @@
+.. -*- coding: utf-8; mode: rst -*-
+
+.. _V4L2-PIX-FMT-SRGGB14P:
+.. _v4l2-pix-fmt-sbggr14p:
+.. _v4l2-pix-fmt-sgbrg14p:
+.. _v4l2-pix-fmt-sgrbg14p:
+
+*******************************************************************************************************************************
+V4L2_PIX_FMT_SRGGB14P ('pRCC'), V4L2_PIX_FMT_SGRBG14P ('pgCC'), V4L2_PIX_FMT_SGBRG14P ('pGCC'), V4L2_PIX_FMT_SBGGR14P ('pBCC'),
+*******************************************************************************************************************************
+
+*man V4L2_PIX_FMT_SRGGB14P(2)*
+
+V4L2_PIX_FMT_SGRBG14P
+V4L2_PIX_FMT_SGBRG14P
+V4L2_PIX_FMT_SBGGR14P
+14-bit packed Bayer formats
+
+
+Description
+===========
+
+These four pixel formats are packed raw sRGB / Bayer formats with 14
+bits per colour. Every four consecutive samples are packed into seven
+bytes. Each of the first four bytes contain the eight high order bits
+of the pixels, and the three following bytes contains the six least
+significants bits of each pixel, in the same order.
+
+Each n-pixel row contains n/2 green samples and n/2 blue or red samples,
+with alternating green-red and green-blue rows. They are conventionally
+described as GRGR... BGBG..., RGRG... GBGB..., etc. Below is an example
+of one of these formats:
+
+**Byte Order.**
+Each cell is one byte.
+
+
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       2 1 1 1 1 1 1 1
+
+
+    -  .. row 1
+
+       -  start + 0:
+
+       -  B\ :sub:`00high`
+
+       -  G\ :sub:`01high`
+
+       -  B\ :sub:`02high`
+
+       -  G\ :sub:`03high`
+
+       -  G\ :sub:`01low bits 1--0`\ (bits 7--6)
+	  B\ :sub:`00low bits 5--0`\ (bits 5--0)
+
+       -  R\ :sub:`02low bits 3--0`\ (bits 7--4)
+	  G\ :sub:`01low bits 5--2`\ (bits 3--0)
+
+       -  G\ :sub:`03low bits 5--0`\ (bits 7--2)
+	  R\ :sub:`02low bits 5--4`\ (bits 1--0)
+
+    -  .. row 2
+
+       -  start + 7:
+
+       -  G\ :sub:`00high`
+
+       -  R\ :sub:`01high`
+
+       -  G\ :sub:`02high`
+
+       -  R\ :sub:`03high`
+
+       -  R\ :sub:`01low bits 1--0`\ (bits 7--6)
+	  G\ :sub:`00low bits 5--0`\ (bits 5--0)
+
+       -  G\ :sub:`02low bits 3--0`\ (bits 7--4)
+	  R\ :sub:`01low bits 5--2`\ (bits 3--0)
+
+       -  R\ :sub:`03low bits 5--0`\ (bits 7--2)
+	  G\ :sub:`02low bits 5--4`\ (bits 1--0)
+
+    -  .. row 3
+
+       -  start + 14
+
+       -  B\ :sub:`20high`
+
+       -  G\ :sub:`21high`
+
+       -  B\ :sub:`22high`
+
+       -  G\ :sub:`23high`
+
+       -  G\ :sub:`21low bits 1--0`\ (bits 7--6)
+	  B\ :sub:`20low bits 5--0`\ (bits 5--0)
+
+       -  R\ :sub:`22low bits 3--0`\ (bits 7--4)
+	  G\ :sub:`21low bits 5--2`\ (bits 3--0)
+
+       -  G\ :sub:`23low bits 5--0`\ (bits 7--2)
+	  R\ :sub:`22low bits 5--4`\ (bits 1--0)
+
+    -  .. row 4
+
+       -  start + 21
+
+       -  G\ :sub:`30high`
+
+       -  R\ :sub:`31high`
+
+       -  G\ :sub:`32high`
+
+       -  R\ :sub:`33high`
+
+       -  R\ :sub:`31low bits 1--0`\ (bits 7--6)
+	  G\ :sub:`30low bits 5--0`\ (bits 5--0)
+
+       -  G\ :sub:`32low bits 3--0`\ (bits 7--4)
+	  R\ :sub:`31low bits 5--2`\ (bits 3--0)
+
+       -  R\ :sub:`33low bits 5--0`\ (bits 7--2)
+	  G\ :sub:`32low bits 5--4`\ (bits 1--0)
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 26d9702069fd..fd8d15f12307 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1259,6 +1259,10 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 	case V4L2_PIX_FMT_SGBRG12P:	descr = "12-bit Bayer GBGB/RGRG Packed"; break;
 	case V4L2_PIX_FMT_SGRBG12P:	descr = "12-bit Bayer GRGR/BGBG Packed"; break;
 	case V4L2_PIX_FMT_SRGGB12P:	descr = "12-bit Bayer RGRG/GBGB Packed"; break;
+	case V4L2_PIX_FMT_SBGGR14P:	descr = "14-bit Bayer BGBG/GRGR Packed"; break;
+	case V4L2_PIX_FMT_SGBRG14P:	descr = "14-bit Bayer GBGB/RGRG Packed"; break;
+	case V4L2_PIX_FMT_SGRBG14P:	descr = "14-bit Bayer GRGR/BGBG Packed"; break;
+	case V4L2_PIX_FMT_SRGGB14P:	descr = "14-bit Bayer RGRG/GBGB Packed"; break;
 	case V4L2_PIX_FMT_SBGGR16:	descr = "16-bit Bayer BGBG/GRGR"; break;
 	case V4L2_PIX_FMT_SGBRG16:	descr = "16-bit Bayer GBGB/RGRG"; break;
 	case V4L2_PIX_FMT_SGRBG16:	descr = "16-bit Bayer GRGR/BGBG"; break;
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index d8b33095abe0..2c20b6aa5335 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -609,6 +609,11 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_SGBRG12P v4l2_fourcc('p', 'G', 'C', 'C')
 #define V4L2_PIX_FMT_SGRBG12P v4l2_fourcc('p', 'g', 'C', 'C')
 #define V4L2_PIX_FMT_SRGGB12P v4l2_fourcc('p', 'R', 'C', 'C')
+	/* 14bit raw bayer packed, 7 bytes for every 4 pixels */
+#define V4L2_PIX_FMT_SBGGR14P v4l2_fourcc('p', 'B', 'E', 'E')
+#define V4L2_PIX_FMT_SGBRG14P v4l2_fourcc('p', 'G', 'E', 'E')
+#define V4L2_PIX_FMT_SGRBG14P v4l2_fourcc('p', 'g', 'E', 'E')
+#define V4L2_PIX_FMT_SRGGB14P v4l2_fourcc('p', 'R', 'E', 'E')
 #define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B', 'Y', 'R', '2') /* 16  BGBG.. GRGR.. */
 #define V4L2_PIX_FMT_SGBRG16 v4l2_fourcc('G', 'B', '1', '6') /* 16  GBGB.. RGRG.. */
 #define V4L2_PIX_FMT_SGRBG16 v4l2_fourcc('G', 'R', '1', '6') /* 16  GRGR.. BGBG.. */
-- 
cgit v1.2.3


From 451af0bf04bd313bbaddd67a8be09d10210780bd Mon Sep 17 00:00:00 2001
From: Todor Tomov <todor.tomov@linaro.org>
Date: Wed, 25 Jul 2018 12:38:11 -0400
Subject: media: v4l: Add new 2X8 10-bit grayscale media bus code

The code will be called MEDIA_BUS_FMT_Y10_2X8_PADHI_LE.
It is similar to MEDIA_BUS_FMT_SBGGR10_2X8_PADHI_LE
but MEDIA_BUS_FMT_Y10_2X8_PADHI_LE describes grayscale
data.

Signed-off-by: Todor Tomov <todor.tomov@linaro.org>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/subdev-formats.rst | 72 +++++++++++++++++++++++++
 include/uapi/linux/media-bus-format.h           |  3 +-
 2 files changed, 74 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/subdev-formats.rst b/Documentation/media/uapi/v4l/subdev-formats.rst
index a4739f79d9c3..8e73fcfc6900 100644
--- a/Documentation/media/uapi/v4l/subdev-formats.rst
+++ b/Documentation/media/uapi/v4l/subdev-formats.rst
@@ -4318,6 +4318,78 @@ the following codes.
       - y\ :sub:`2`
       - y\ :sub:`1`
       - y\ :sub:`0`
+    * .. _MEDIA-BUS-FMT-Y10-2X8-PADHI_LE:
+
+      - MEDIA_BUS_FMT_Y10_2X8_PADHI_LE
+      - 0x202c
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      - y\ :sub:`7`
+      - y\ :sub:`6`
+      - y\ :sub:`5`
+      - y\ :sub:`4`
+      - y\ :sub:`3`
+      - y\ :sub:`2`
+      - y\ :sub:`1`
+      - y\ :sub:`0`
+    * -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      - 0
+      - 0
+      - 0
+      - 0
+      - 0
+      - 0
+      - y\ :sub:`9`
+      - y\ :sub:`8`
     * .. _MEDIA-BUS-FMT-UYVY10-2X10:
 
       - MEDIA_BUS_FMT_UYVY10_2X10
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 9e3511742fdc..d6a5a3bfe6c4 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -62,7 +62,7 @@
 #define MEDIA_BUS_FMT_RGB121212_1X36		0x1019
 #define MEDIA_BUS_FMT_RGB161616_1X48		0x101a
 
-/* YUV (including grey) - next is	0x202c */
+/* YUV (including grey) - next is	0x202d */
 #define MEDIA_BUS_FMT_Y8_1X8			0x2001
 #define MEDIA_BUS_FMT_UV8_1X8			0x2015
 #define MEDIA_BUS_FMT_UYVY8_1_5X8		0x2002
@@ -74,6 +74,7 @@
 #define MEDIA_BUS_FMT_YUYV8_2X8			0x2008
 #define MEDIA_BUS_FMT_YVYU8_2X8			0x2009
 #define MEDIA_BUS_FMT_Y10_1X10			0x200a
+#define MEDIA_BUS_FMT_Y10_2X8_PADHI_LE		0x202c
 #define MEDIA_BUS_FMT_UYVY10_2X10		0x2018
 #define MEDIA_BUS_FMT_VYUY10_2X10		0x2019
 #define MEDIA_BUS_FMT_YUYV10_2X10		0x200b
-- 
cgit v1.2.3


From 6e15bec49f366511ec024a556505316222ef4ade Mon Sep 17 00:00:00 2001
From: Todor Tomov <todor.tomov@linaro.org>
Date: Wed, 25 Jul 2018 12:38:12 -0400
Subject: media: v4l: Add new 10-bit packed grayscale format

The new format will be called V4L2_PIX_FMT_Y10P.
It is similar to the V4L2_PIX_FMT_SBGGR10P family formats
but V4L2_PIX_FMT_Y10P is a grayscale format.

Signed-off-by: Todor Tomov <todor.tomov@linaro.org>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/pixfmt-y10p.rst | 33 ++++++++++++++++++++++++++++
 Documentation/media/uapi/v4l/yuv-formats.rst |  1 +
 drivers/media/v4l2-core/v4l2-ioctl.c         |  1 +
 include/uapi/linux/videodev2.h               |  1 +
 4 files changed, 36 insertions(+)
 create mode 100644 Documentation/media/uapi/v4l/pixfmt-y10p.rst

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/pixfmt-y10p.rst b/Documentation/media/uapi/v4l/pixfmt-y10p.rst
new file mode 100644
index 000000000000..13b571306915
--- /dev/null
+++ b/Documentation/media/uapi/v4l/pixfmt-y10p.rst
@@ -0,0 +1,33 @@
+.. -*- coding: utf-8; mode: rst -*-
+
+.. _V4L2-PIX-FMT-Y10P:
+
+******************************
+V4L2_PIX_FMT_Y10P ('Y10P')
+******************************
+
+Grey-scale image as a MIPI RAW10 packed array
+
+
+Description
+===========
+
+This is a packed grey-scale image format with a depth of 10 bits per
+pixel. Every four consecutive pixels are packed into 5 bytes. Each of
+the first 4 bytes contain the 8 high order bits of the pixels, and
+the 5th byte contains the 2 least significants bits of each pixel,
+in the same order.
+
+**Bit-packed representation.**
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths: 8 8 8 8 64
+
+    * - Y'\ :sub:`00[9:2]`
+      - Y'\ :sub:`01[9:2]`
+      - Y'\ :sub:`02[9:2]`
+      - Y'\ :sub:`03[9:2]`
+      - Y'\ :sub:`03[1:0]`\ (bits 7--6) Y'\ :sub:`02[1:0]`\ (bits 5--4)
+	Y'\ :sub:`01[1:0]`\ (bits 3--2) Y'\ :sub:`00[1:0]`\ (bits 1--0)
diff --git a/Documentation/media/uapi/v4l/yuv-formats.rst b/Documentation/media/uapi/v4l/yuv-formats.rst
index 3334ea445657..9ab0592d08da 100644
--- a/Documentation/media/uapi/v4l/yuv-formats.rst
+++ b/Documentation/media/uapi/v4l/yuv-formats.rst
@@ -29,6 +29,7 @@ to brightness information.
     pixfmt-y10
     pixfmt-y12
     pixfmt-y10b
+    pixfmt-y10p
     pixfmt-y16
     pixfmt-y16-be
     pixfmt-y8i
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index fd8d15f12307..54afc9c7ee6e 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1184,6 +1184,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 	case V4L2_PIX_FMT_Y16:		descr = "16-bit Greyscale"; break;
 	case V4L2_PIX_FMT_Y16_BE:	descr = "16-bit Greyscale BE"; break;
 	case V4L2_PIX_FMT_Y10BPACK:	descr = "10-bit Greyscale (Packed)"; break;
+	case V4L2_PIX_FMT_Y10P:		descr = "10-bit Greyscale (MIPI Packed)"; break;
 	case V4L2_PIX_FMT_Y8I:		descr = "Interleaved 8-bit Greyscale"; break;
 	case V4L2_PIX_FMT_Y12I:		descr = "Interleaved 12-bit Greyscale"; break;
 	case V4L2_PIX_FMT_Z16:		descr = "16-bit Depth"; break;
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 2c20b6aa5335..5d1a3685bea9 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -522,6 +522,7 @@ struct v4l2_pix_format {
 
 /* Grey bit-packed formats */
 #define V4L2_PIX_FMT_Y10BPACK    v4l2_fourcc('Y', '1', '0', 'B') /* 10  Greyscale bit-packed */
+#define V4L2_PIX_FMT_Y10P    v4l2_fourcc('Y', '1', '0', 'P') /* 10  Greyscale, MIPI RAW10 packed */
 
 /* Palette formats */
 #define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
-- 
cgit v1.2.3


From 06bcb5168d7d49aa3ed449ff13a6d13c30afc3f0 Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@exceet.de>
Date: Thu, 2 Aug 2018 14:53:52 +0200
Subject: spi: spi-mem: Fix a typo in the documentation of struct spi_mem

Fix a typo in the @drvpriv description.

Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
Acked-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index bb4bd15ae1f6..951a2e949d5f 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -122,7 +122,7 @@ struct spi_mem_op {
 /**
  * struct spi_mem - describes a SPI memory device
  * @spi: the underlying SPI device
- * @drvpriv: spi_mem_drviver private data
+ * @drvpriv: spi_mem_driver private data
  *
  * Extra information that describe the SPI memory device and may be needed by
  * the controller to properly handle this device should be placed here.
-- 
cgit v1.2.3


From 5d27a9c8ea9e967d00b92a76d4bb242bf7692f2b Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@exceet.de>
Date: Thu, 2 Aug 2018 14:53:53 +0200
Subject: spi: spi-mem: Extend the SPI mem interface to set a custom memory
 name

When porting (Q)SPI controller drivers from the MTD layer to the SPI
layer, the naming scheme for the memory devices changes. To be able
to keep compatibility with the old drivers naming scheme, a name
field is added to struct spi_mem and a hook is added to let controller
drivers set a custom name for the memory device.

Example for the FSL QSPI driver:

Name with the old driver: 21e0000.qspi,
or with multiple devices: 21e0000.qspi-0, 21e0000.qspi-1, ...

Name with the new driver without spi_mem_get_name: spi4.0

Suggested-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Frieder Schrempf <frieder.schrempf@exceet.de>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mem.c       | 28 ++++++++++++++++++++++++++++
 include/linux/spi/spi-mem.h | 12 ++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 990770dfa5cf..e43842c7a31a 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -310,6 +310,24 @@ int spi_mem_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 }
 EXPORT_SYMBOL_GPL(spi_mem_exec_op);
 
+/**
+ * spi_mem_get_name() - Return the SPI mem device name to be used by the
+ *			upper layer if necessary
+ * @mem: the SPI memory
+ *
+ * This function allows SPI mem users to retrieve the SPI mem device name.
+ * It is useful if the upper layer needs to expose a custom name for
+ * compatibility reasons.
+ *
+ * Return: a string containing the name of the memory device to be used
+ *	   by the SPI mem user
+ */
+const char *spi_mem_get_name(struct spi_mem *mem)
+{
+	return mem->name;
+}
+EXPORT_SYMBOL_GPL(spi_mem_get_name);
+
 /**
  * spi_mem_adjust_op_size() - Adjust the data size of a SPI mem operation to
  *			      match controller limitations
@@ -344,6 +362,7 @@ static inline struct spi_mem_driver *to_spi_mem_drv(struct device_driver *drv)
 static int spi_mem_probe(struct spi_device *spi)
 {
 	struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
+	struct spi_controller *ctlr = spi->controller;
 	struct spi_mem *mem;
 
 	mem = devm_kzalloc(&spi->dev, sizeof(*mem), GFP_KERNEL);
@@ -351,6 +370,15 @@ static int spi_mem_probe(struct spi_device *spi)
 		return -ENOMEM;
 
 	mem->spi = spi;
+
+	if (ctlr->mem_ops && ctlr->mem_ops->get_name)
+		mem->name = ctlr->mem_ops->get_name(mem);
+	else
+		mem->name = dev_name(&spi->dev);
+
+	if (IS_ERR_OR_NULL(mem->name))
+		return PTR_ERR(mem->name);
+
 	spi_set_drvdata(spi, mem);
 
 	return memdrv->probe(mem);
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 951a2e949d5f..0d64ccc4e584 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -123,6 +123,7 @@ struct spi_mem_op {
  * struct spi_mem - describes a SPI memory device
  * @spi: the underlying SPI device
  * @drvpriv: spi_mem_driver private data
+ * @name: name of the SPI memory device
  *
  * Extra information that describe the SPI memory device and may be needed by
  * the controller to properly handle this device should be placed here.
@@ -133,6 +134,7 @@ struct spi_mem_op {
 struct spi_mem {
 	struct spi_device *spi;
 	void *drvpriv;
+	char *name;
 };
 
 /**
@@ -165,6 +167,13 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem)
  *		    limitations)
  * @supports_op: check if an operation is supported by the controller
  * @exec_op: execute a SPI memory operation
+ * @get_name: get a custom name for the SPI mem device from the controller.
+ *	      This might be needed if the controller driver has been ported
+ *	      to use the SPI mem layer and a custom name is used to keep
+ *	      mtdparts compatible.
+ *	      Note that if the implementation of this function allocates memory
+ *	      dynamically, then it should do so with devm_xxx(), as we don't
+ *	      have a ->free_name() function.
  *
  * This interface should be implemented by SPI controllers providing an
  * high-level interface to execute SPI memory operation, which is usually the
@@ -176,6 +185,7 @@ struct spi_controller_mem_ops {
 			    const struct spi_mem_op *op);
 	int (*exec_op)(struct spi_mem *mem,
 		       const struct spi_mem_op *op);
+	const char *(*get_name)(struct spi_mem *mem);
 };
 
 /**
@@ -234,6 +244,8 @@ bool spi_mem_supports_op(struct spi_mem *mem,
 int spi_mem_exec_op(struct spi_mem *mem,
 		    const struct spi_mem_op *op);
 
+const char *spi_mem_get_name(struct spi_mem *mem);
+
 int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv,
 				       struct module *owner);
 
-- 
cgit v1.2.3


From 5a5ba10f44fa1cd081cec38389e1b47f438fe25b Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Thu, 26 Jul 2018 15:40:56 +0200
Subject: rtc: remove struct rtc_task

Include rtc_task members directly in rtc_timer member.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/interface.c | 8 ++++----
 include/linux/rtc.h     | 9 ++-------
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index d0983ed6c842..a962540faf2e 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -912,8 +912,8 @@ again:
 		timerqueue_del(&rtc->timerqueue, &timer->node);
 		trace_rtc_timer_dequeue(timer);
 		timer->enabled = 0;
-		if (timer->task.func)
-			timer->task.func(timer->task.private_data);
+		if (timer->func)
+			timer->func(timer->private_data);
 
 		trace_rtc_timer_fired(timer);
 		/* Re-add/fwd periodic timers */
@@ -968,8 +968,8 @@ void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data)
 {
 	timerqueue_init(&timer->node);
 	timer->enabled = 0;
-	timer->task.func = f;
-	timer->task.private_data = data;
+	timer->func = f;
+	timer->private_data = data;
 }
 
 /* rtc_timer_start - Sets an rtc_timer to fire in the future
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index bf4d375025d1..6aedc30003e7 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -87,16 +87,11 @@ struct rtc_class_ops {
 	int (*set_offset)(struct device *, long offset);
 };
 
-typedef struct rtc_task {
-	void (*func)(void *private_data);
-	void *private_data;
-} rtc_task_t;
-
-
 struct rtc_timer {
-	struct rtc_task	task;
 	struct timerqueue_node node;
 	ktime_t period;
+	void (*func)(void *private_data);
+	void *private_data;
 	int enabled;
 };
 
-- 
cgit v1.2.3


From 611cbc8799b672f41b6ba7afed758ad9efb959a7 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Thu, 2 Aug 2018 16:03:38 +0100
Subject: ASoC: core: remove support for card rebind using component framework

DRM based audio components get registered inside the component framework
bind callback. However component framework has a big mutex lock taken for
every call to component_add, component_del and bind, unbind callbacks.

This can lead to deadlock situation if we are trying to add new/remove
component within a bind/unbind callbacks. Which is what was happening
with bcm2837 rpi 3.

Revert this change till we sort out the mutex issue.

Reported-by: Guillaume Tucker <guillaume.tucker@collabora.com>
Reported-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  7 ------
 sound/soc/soc-core.c | 62 ----------------------------------------------------
 2 files changed, 69 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index ace474e8649e..41cec42fb456 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -17,7 +17,6 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
-#include <linux/component.h>
 #include <linux/regmap.h>
 #include <linux/log2.h>
 #include <sound/core.h>
@@ -1091,12 +1090,6 @@ struct snd_soc_card {
 
 	struct work_struct deferred_resume_work;
 
-	/* component framework related */
-	bool components_added;
-	/* set in machine driver to enable/disable auto re-binding */
-	bool auto_bind;
-	struct component_match *match;
-
 	/* lists of probed devices belonging to this card */
 	struct list_head component_dev_list;
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 81b27923303d..82eb3868be67 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -279,28 +279,11 @@ static inline void snd_soc_debugfs_exit(void)
 
 #endif
 
-static int snd_soc_card_comp_compare(struct device *dev, void *data)
-{
-	struct snd_soc_component *component;
-
-	lockdep_assert_held(&client_mutex);
-	list_for_each_entry(component, &component_list, list) {
-		if (dev == component->dev) {
-			if (!strcmp(component->name, data))
-				return 1;
-			break;
-		}
-	}
-
-	return 0;
-}
-
 static int snd_soc_rtdcom_add(struct snd_soc_pcm_runtime *rtd,
 			      struct snd_soc_component *component)
 {
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_rtdcom_list *new_rtdcom;
-	char *cname;
 
 	for_each_rtdcom(rtd, rtdcom) {
 		/* already connected */
@@ -317,13 +300,6 @@ static int snd_soc_rtdcom_add(struct snd_soc_pcm_runtime *rtd,
 
 	list_add_tail(&new_rtdcom->list, &rtd->component_list);
 
-	if (rtd->card->auto_bind && !rtd->card->components_added) {
-		cname = devm_kasprintf(rtd->card->dev, GFP_KERNEL,
-				       "%s", component->name);
-		component_match_add(rtd->card->dev, &rtd->card->match,
-				    snd_soc_card_comp_compare, cname);
-	}
-
 	return 0;
 }
 
@@ -859,25 +835,6 @@ static bool soc_is_dai_link_bound(struct snd_soc_card *card,
 	return false;
 }
 
-static int snd_soc_card_comp_bind(struct device *dev)
-{
-	struct snd_soc_card *card = dev_get_drvdata(dev);
-
-	if (card->instantiated)
-		return 0;
-
-	return snd_soc_register_card(card);
-}
-
-static void snd_soc_card_comp_unbind(struct device *dev)
-{
-}
-
-static const struct component_master_ops snd_soc_card_comp_ops = {
-	.bind = snd_soc_card_comp_bind,
-	.unbind = snd_soc_card_comp_unbind,
-};
-
 static int soc_bind_dai_link(struct snd_soc_card *card,
 	struct snd_soc_dai_link *dai_link)
 {
@@ -2169,12 +2126,6 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 
 	card->instantiated = 1;
 	snd_soc_dapm_sync(&card->dapm);
-	if (card->auto_bind && !card->components_added) {
-		component_master_add_with_match(card->dev,
-						&snd_soc_card_comp_ops,
-						card->match);
-		card->components_added = true;
-	}
 	mutex_unlock(&card->mutex);
 	mutex_unlock(&client_mutex);
 
@@ -2820,9 +2771,6 @@ int snd_soc_unregister_card(struct snd_soc_card *card)
 		dev_dbg(card->dev, "ASoC: Unregistered card '%s'\n", card->name);
 	}
 
-	if (!card->auto_bind && card->components_added)
-		component_master_del(card->dev, &snd_soc_card_comp_ops);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snd_soc_unregister_card);
@@ -3235,17 +3183,8 @@ int snd_soc_add_component(struct device *dev,
 
 	snd_soc_component_add(component);
 
-	ret = component_add(dev, NULL);
-	if (ret < 0) {
-		dev_err(dev, "ASoC: Failed to add Component: %d\n", ret);
-		goto err_comp;
-	}
-
 	return 0;
 
-err_comp:
-	soc_remove_component(component);
-	snd_soc_unregister_dais(component);
 err_cleanup:
 	snd_soc_component_cleanup(component);
 err_free:
@@ -3293,7 +3232,6 @@ static int __snd_soc_unregister_component(struct device *dev)
 	mutex_unlock(&client_mutex);
 
 	if (found) {
-		component_del(dev, NULL);
 		snd_soc_component_cleanup(component);
 	}
 
-- 
cgit v1.2.3


From a83c0ea79d8b5705d09a39c73224332f9170a903 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Sat, 30 Jun 2018 17:54:59 +0300
Subject: docs/mm: bootmem: add kernel-doc description of 'struct bootmem_data'

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/bootmem.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 7942a96b1a9d..42515195d7d8 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -27,9 +27,20 @@ extern unsigned long max_pfn;
 extern unsigned long long max_possible_pfn;
 
 #ifndef CONFIG_NO_BOOTMEM
-/*
- * node_bootmem_map is a map pointer - the bits represent all physical 
- * memory pages (including holes) on the node.
+/**
+ * struct bootmem_data - per-node information used by the bootmem allocator
+ * @node_min_pfn: the starting physical address of the node's memory
+ * @node_low_pfn: the end physical address of the directly addressable memory
+ * @node_bootmem_map: is a bitmap pointer - the bits represent all physical
+ *		      memory pages (including holes) on the node.
+ * @last_end_off: the offset within the page of the end of the last allocation;
+ *                if 0, the page used is full
+ * @hint_idx: the PFN of the page used with the last allocation;
+ *            together with using this with the @last_end_offset field,
+ *            a test can be made to see if allocations can be merged
+ *            with the page used for the last allocation rather than
+ *            using up a full new page.
+ * @list: list entry in the linked list ordered by the memory addresses
  */
 typedef struct bootmem_data {
 	unsigned long node_min_pfn;
-- 
cgit v1.2.3


From e1720fee27246dfb84f7c433e35367170a2d4436 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Sat, 30 Jun 2018 17:55:01 +0300
Subject: mm/memblock: add a name for memblock flags enumeration

Since kernel-doc does not like anonymous enums the name is required for
adding documentation. While on it, I've also updated all the function
declarations to use 'enum memblock_flags' instead of unsigned long.

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/memblock.h | 22 +++++++++++-----------
 mm/memblock.c            | 37 +++++++++++++++++++++----------------
 mm/nobootmem.c           |  2 +-
 3 files changed, 33 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index ca59883c8364..8b8fbceffd4d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -21,7 +21,7 @@
 #define INIT_PHYSMEM_REGIONS	4
 
 /* Definition of memblock flags. */
-enum {
+enum memblock_flags {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
 	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
 	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
@@ -31,7 +31,7 @@ enum {
 struct memblock_region {
 	phys_addr_t base;
 	phys_addr_t size;
-	unsigned long flags;
+	enum memblock_flags flags;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 	int nid;
 #endif
@@ -72,7 +72,7 @@ void memblock_discard(void);
 
 phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
-					int nid, ulong flags);
+					int nid, enum memblock_flags flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
@@ -89,19 +89,19 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
-ulong choose_memblock_flags(void);
+enum memblock_flags choose_memblock_flags(void);
 
 /* Low level functions */
 int memblock_add_range(struct memblock_type *type,
 		       phys_addr_t base, phys_addr_t size,
-		       int nid, unsigned long flags);
+		       int nid, enum memblock_flags flags);
 
-void __next_mem_range(u64 *idx, int nid, ulong flags,
+void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
 		      struct memblock_type *type_a,
 		      struct memblock_type *type_b, phys_addr_t *out_start,
 		      phys_addr_t *out_end, int *out_nid);
 
-void __next_mem_range_rev(u64 *idx, int nid, ulong flags,
+void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags,
 			  struct memblock_type *type_a,
 			  struct memblock_type *type_b, phys_addr_t *out_start,
 			  phys_addr_t *out_end, int *out_nid);
@@ -253,13 +253,13 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 			   NUMA_NO_NODE, MEMBLOCK_NONE, p_start, p_end, NULL)
 
 static inline void memblock_set_region_flags(struct memblock_region *r,
-					     unsigned long flags)
+					     enum memblock_flags flags)
 {
 	r->flags |= flags;
 }
 
 static inline void memblock_clear_region_flags(struct memblock_region *r,
-					       unsigned long flags)
+					       enum memblock_flags flags)
 {
 	r->flags &= ~flags;
 }
@@ -317,10 +317,10 @@ static inline bool memblock_bottom_up(void)
 
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
-					ulong flags);
+					enum memblock_flags flags);
 phys_addr_t memblock_alloc_base_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t max_addr,
-					int nid, ulong flags);
+					int nid, enum memblock_flags flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/mm/memblock.c b/mm/memblock.c
index 03d48d8835ba..20f48b49821a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -60,7 +60,7 @@ static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
 static int memblock_reserved_in_slab __initdata_memblock = 0;
 
-ulong __init_memblock choose_memblock_flags(void)
+enum memblock_flags __init_memblock choose_memblock_flags(void)
 {
 	return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
 }
@@ -109,7 +109,7 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
 static phys_addr_t __init_memblock
 __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
 				phys_addr_t size, phys_addr_t align, int nid,
-				ulong flags)
+				enum memblock_flags flags)
 {
 	phys_addr_t this_start, this_end, cand;
 	u64 i;
@@ -143,7 +143,7 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
 static phys_addr_t __init_memblock
 __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
 			       phys_addr_t size, phys_addr_t align, int nid,
-			       ulong flags)
+			       enum memblock_flags flags)
 {
 	phys_addr_t this_start, this_end, cand;
 	u64 i;
@@ -188,7 +188,8 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  */
 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
 					phys_addr_t align, phys_addr_t start,
-					phys_addr_t end, int nid, ulong flags)
+					phys_addr_t end, int nid,
+					enum memblock_flags flags)
 {
 	phys_addr_t kernel_end, ret;
 
@@ -251,7 +252,7 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
 					phys_addr_t align)
 {
 	phys_addr_t ret;
-	ulong flags = choose_memblock_flags();
+	enum memblock_flags flags = choose_memblock_flags();
 
 again:
 	ret = memblock_find_in_range_node(size, align, start, end,
@@ -472,7 +473,8 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
 static void __init_memblock memblock_insert_region(struct memblock_type *type,
 						   int idx, phys_addr_t base,
 						   phys_addr_t size,
-						   int nid, unsigned long flags)
+						   int nid,
+						   enum memblock_flags flags)
 {
 	struct memblock_region *rgn = &type->regions[idx];
 
@@ -504,7 +506,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  */
 int __init_memblock memblock_add_range(struct memblock_type *type,
 				phys_addr_t base, phys_addr_t size,
-				int nid, unsigned long flags)
+				int nid, enum memblock_flags flags)
 {
 	bool insert = false;
 	phys_addr_t obase = base;
@@ -873,7 +875,8 @@ void __init_memblock __next_reserved_mem_region(u64 *idx,
  * As both region arrays are sorted, the function advances the two indices
  * in lockstep and returns each intersection.
  */
-void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
+void __init_memblock __next_mem_range(u64 *idx, int nid,
+				      enum memblock_flags flags,
 				      struct memblock_type *type_a,
 				      struct memblock_type *type_b,
 				      phys_addr_t *out_start,
@@ -982,7 +985,8 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
  *
  * Reverse of __next_mem_range().
  */
-void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
+void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
+					  enum memblock_flags flags,
 					  struct memblock_type *type_a,
 					  struct memblock_type *type_b,
 					  phys_addr_t *out_start,
@@ -1140,7 +1144,8 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
 
 static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t start,
-					phys_addr_t end, int nid, ulong flags)
+					phys_addr_t end, int nid,
+					enum memblock_flags flags)
 {
 	phys_addr_t found;
 
@@ -1162,7 +1167,7 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
-					ulong flags)
+					enum memblock_flags flags)
 {
 	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
 					flags);
@@ -1170,14 +1175,14 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 
 phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t max_addr,
-					int nid, ulong flags)
+					int nid, enum memblock_flags flags)
 {
 	return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
 }
 
 phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
-	ulong flags = choose_memblock_flags();
+	enum memblock_flags flags = choose_memblock_flags();
 	phys_addr_t ret;
 
 again:
@@ -1258,7 +1263,7 @@ static void * __init memblock_virt_alloc_internal(
 {
 	phys_addr_t alloc;
 	void *ptr;
-	ulong flags = choose_memblock_flags();
+	enum memblock_flags flags = choose_memblock_flags();
 
 	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
 		nid = NUMA_NO_NODE;
@@ -1733,7 +1738,7 @@ phys_addr_t __init_memblock memblock_get_current_limit(void)
 static void __init_memblock memblock_dump(struct memblock_type *type)
 {
 	phys_addr_t base, end, size;
-	unsigned long flags;
+	enum memblock_flags flags;
 	int idx;
 	struct memblock_region *rgn;
 
@@ -1751,7 +1756,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
 			snprintf(nid_buf, sizeof(nid_buf), " on node %d",
 				 memblock_get_region_node(rgn));
 #endif
-		pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#lx\n",
+		pr_info(" %s[%#x]\t[%pa-%pa], %pa bytes%s flags: %#x\n",
 			type->name, idx, &base, &end, &size, nid_buf, flags);
 	}
 }
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index c2cfa04f0231..439af3b765a7 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -42,7 +42,7 @@ static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
 {
 	void *ptr;
 	u64 addr;
-	ulong flags = choose_memblock_flags();
+	enum memblock_flags flags = choose_memblock_flags();
 
 	if (limit > memblock.current_limit)
 		limit = memblock.current_limit;
-- 
cgit v1.2.3


From 47cec4432ab06c4ba90c241b142a016f878ac6da Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Sat, 30 Jun 2018 17:55:02 +0300
Subject: docs/mm: memblock: update kernel-doc comments

* make memblock_discard description kernel-doc compatible
* add brief description for memblock_setclr_flag and describe its
  parameters
* fixup return value descriptions

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/memblock.h | 17 +++++++---
 mm/memblock.c            | 84 +++++++++++++++++++++++++++---------------------
 2 files changed, 59 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8b8fbceffd4d..63704c64583a 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -239,7 +239,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 /**
  * for_each_resv_unavail_range - iterate through reserved and unavailable memory
  * @i: u64 used as loop variable
- * @flags: pick from blocks based on memory attributes
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  *
@@ -367,8 +366,10 @@ phys_addr_t memblock_get_current_limit(void);
  */
 
 /**
- * memblock_region_memory_base_pfn - Return the lowest pfn intersecting with the memory region
+ * memblock_region_memory_base_pfn - get the lowest pfn of the memory region
  * @reg: memblock_region structure
+ *
+ * Return: the lowest pfn intersecting with the memory region
  */
 static inline unsigned long memblock_region_memory_base_pfn(const struct memblock_region *reg)
 {
@@ -376,8 +377,10 @@ static inline unsigned long memblock_region_memory_base_pfn(const struct membloc
 }
 
 /**
- * memblock_region_memory_end_pfn - Return the end_pfn this region
+ * memblock_region_memory_end_pfn - get the end pfn of the memory region
  * @reg: memblock_region structure
+ *
+ * Return: the end_pfn of the reserved region
  */
 static inline unsigned long memblock_region_memory_end_pfn(const struct memblock_region *reg)
 {
@@ -385,8 +388,10 @@ static inline unsigned long memblock_region_memory_end_pfn(const struct memblock
 }
 
 /**
- * memblock_region_reserved_base_pfn - Return the lowest pfn intersecting with the reserved region
+ * memblock_region_reserved_base_pfn - get the lowest pfn of the reserved region
  * @reg: memblock_region structure
+ *
+ * Return: the lowest pfn intersecting with the reserved region
  */
 static inline unsigned long memblock_region_reserved_base_pfn(const struct memblock_region *reg)
 {
@@ -394,8 +399,10 @@ static inline unsigned long memblock_region_reserved_base_pfn(const struct membl
 }
 
 /**
- * memblock_region_reserved_end_pfn - Return the end_pfn this region
+ * memblock_region_reserved_end_pfn - get the end pfn of the reserved region
  * @reg: memblock_region structure
+ *
+ * Return: the end_pfn of the reserved region
  */
 static inline unsigned long memblock_region_reserved_end_pfn(const struct memblock_region *reg)
 {
diff --git a/mm/memblock.c b/mm/memblock.c
index 20f48b49821a..4b9dd76f6ae6 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -92,10 +92,11 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
 	return i < type->cnt;
 }
 
-/*
+/**
  * __memblock_find_range_bottom_up - find free area utility in bottom-up
  * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
+ *       %MEMBLOCK_ALLOC_ACCESSIBLE
  * @size: size of free area to find
  * @align: alignment of free area to find
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
@@ -103,7 +104,7 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
  *
  * Utility called from memblock_find_in_range_node(), find free area bottom-up.
  *
- * RETURNS:
+ * Return:
  * Found address on success, 0 on failure.
  */
 static phys_addr_t __init_memblock
@@ -129,7 +130,8 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
 /**
  * __memblock_find_range_top_down - find free area utility, in top-down
  * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
+ *       %MEMBLOCK_ALLOC_ACCESSIBLE
  * @size: size of free area to find
  * @align: alignment of free area to find
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
@@ -137,7 +139,7 @@ __memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
  *
  * Utility called from memblock_find_in_range_node(), find free area top-down.
  *
- * RETURNS:
+ * Return:
  * Found address on success, 0 on failure.
  */
 static phys_addr_t __init_memblock
@@ -169,7 +171,8 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  * @size: size of free area to find
  * @align: alignment of free area to find
  * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
+ *       %MEMBLOCK_ALLOC_ACCESSIBLE
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  * @flags: pick from blocks based on memory attributes
  *
@@ -183,7 +186,7 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  *
  * If bottom-up allocation failed, will try to allocate memory top-down.
  *
- * RETURNS:
+ * Return:
  * Found address on success, 0 on failure.
  */
 phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
@@ -238,13 +241,14 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
 /**
  * memblock_find_in_range - find free area in given range
  * @start: start of candidate range
- * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_ANYWHERE or
+ *       %MEMBLOCK_ALLOC_ACCESSIBLE
  * @size: size of free area to find
  * @align: alignment of free area to find
  *
  * Find @size free area aligned to @align in the specified range.
  *
- * RETURNS:
+ * Return:
  * Found address on success, 0 on failure.
  */
 phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
@@ -288,7 +292,7 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
 
 #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
 /**
- * Discard memory and reserved arrays if they were allocated
+ * memblock_discard - discard memory and reserved arrays if they were allocated
  */
 void __init memblock_discard(void)
 {
@@ -318,11 +322,11 @@ void __init memblock_discard(void)
  *
  * Double the size of the @type regions array. If memblock is being used to
  * allocate memory for a new reserved regions array and there is a previously
- * allocated memory range [@new_area_start,@new_area_start+@new_area_size]
+ * allocated memory range [@new_area_start, @new_area_start + @new_area_size]
  * waiting to be reserved, ensure the memory used by the new array does
  * not overlap.
  *
- * RETURNS:
+ * Return:
  * 0 on success, -1 on failure.
  */
 static int __init_memblock memblock_double_array(struct memblock_type *type,
@@ -467,7 +471,7 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
  * @nid:	node id of the new region
  * @flags:	flags of the new region
  *
- * Insert new memblock region [@base,@base+@size) into @type at @idx.
+ * Insert new memblock region [@base, @base + @size) into @type at @idx.
  * @type must already have extra room to accommodate the new region.
  */
 static void __init_memblock memblock_insert_region(struct memblock_type *type,
@@ -496,12 +500,12 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * @nid: nid of the new region
  * @flags: flags of the new region
  *
- * Add new memblock region [@base,@base+@size) into @type.  The new region
+ * Add new memblock region [@base, @base + @size) into @type.  The new region
  * is allowed to overlap with existing ones - overlaps don't affect already
  * existing regions.  @type is guaranteed to be minimal (all neighbouring
  * compatible regions are merged) after the addition.
  *
- * RETURNS:
+ * Return:
  * 0 on success, -errno on failure.
  */
 int __init_memblock memblock_add_range(struct memblock_type *type,
@@ -615,11 +619,11 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
  * @end_rgn: out parameter for the end of isolated region
  *
  * Walk @type and ensure that regions don't cross the boundaries defined by
- * [@base,@base+@size).  Crossing regions are split at the boundaries,
+ * [@base, @base + @size).  Crossing regions are split at the boundaries,
  * which may create at most two more regions.  The index of the first
  * region inside the range is returned in *@start_rgn and end in *@end_rgn.
  *
- * RETURNS:
+ * Return:
  * 0 on success, -errno on failure.
  */
 static int __init_memblock memblock_isolate_range(struct memblock_type *type,
@@ -730,10 +734,15 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
 }
 
 /**
+ * memblock_setclr_flag - set or clear flag for a memory region
+ * @base: base address of the region
+ * @size: size of the region
+ * @set: set or clear the flag
+ * @flag: the flag to udpate
  *
  * This function isolates region [@base, @base + @size), and sets/clears flag
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 static int __init_memblock memblock_setclr_flag(phys_addr_t base,
 				phys_addr_t size, int set, int flag)
@@ -760,7 +769,7 @@ static int __init_memblock memblock_setclr_flag(phys_addr_t base,
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
 {
@@ -772,7 +781,7 @@ int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
 {
@@ -784,7 +793,7 @@ int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
 {
@@ -798,7 +807,7 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
 {
@@ -810,7 +819,7 @@ int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
  * @base: the base phys addr of the region
  * @size: the size of the region
  *
- * Return 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
  */
 int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
 {
@@ -971,9 +980,6 @@ void __init_memblock __next_mem_range(u64 *idx, int nid,
 /**
  * __next_mem_range_rev - generic next function for for_each_*_range_rev()
  *
- * Finds the next range from type_a which is not marked as unsuitable
- * in type_b.
- *
  * @idx: pointer to u64 loop variable
  * @nid: node selector, %NUMA_NO_NODE for all nodes
  * @flags: pick from blocks based on memory attributes
@@ -983,6 +989,9 @@ void __init_memblock __next_mem_range(u64 *idx, int nid,
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
  *
+ * Finds the next range from type_a which is not marked as unsuitable
+ * in type_b.
+ *
  * Reverse of __next_mem_range().
  */
 void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
@@ -1118,10 +1127,10 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
  * @type: memblock type to set node ID for
  * @nid: node ID to set
  *
- * Set the nid of memblock @type regions in [@base,@base+@size) to @nid.
+ * Set the nid of memblock @type regions in [@base, @base + @size) to @nid.
  * Regions which cross the area boundaries are split as necessary.
  *
- * RETURNS:
+ * Return:
  * 0 on success, -errno on failure.
  */
 int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
@@ -1245,7 +1254,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
  * The allocation is performed from memory region limited by
  * memblock.current_limit if @max_addr == %BOOTMEM_ALLOC_ACCESSIBLE.
  *
- * The memory block is aligned on SMP_CACHE_BYTES if @align == 0.
+ * The memory block is aligned on %SMP_CACHE_BYTES if @align == 0.
  *
  * The phys address of allocated boot memory block is converted to virtual and
  * allocated memory is reset to 0.
@@ -1253,7 +1262,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
  * In addition, function sets the min_count to 0 using kmemleak_alloc for
  * allocated boot memory block, so that it is never reported as leaks.
  *
- * RETURNS:
+ * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
 static void * __init memblock_virt_alloc_internal(
@@ -1338,7 +1347,7 @@ done:
  * info), if enabled. Does not zero allocated memory, does not panic if request
  * cannot be satisfied.
  *
- * RETURNS:
+ * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
 void * __init memblock_virt_alloc_try_nid_raw(
@@ -1375,7 +1384,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
  * Public function, provides additional debug information (including caller
  * info), if enabled. This function zeroes the allocated memory.
  *
- * RETURNS:
+ * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
 void * __init memblock_virt_alloc_try_nid_nopanic(
@@ -1411,7 +1420,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic(
  * which provides debug information (including caller info), if enabled,
  * and panics if the request can not be satisfied.
  *
- * RETURNS:
+ * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
 void * __init memblock_virt_alloc_try_nid(
@@ -1668,9 +1677,9 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
  * @base: base of region to check
  * @size: size of region to check
  *
- * Check if the region [@base, @base+@size) is a subset of a memory block.
+ * Check if the region [@base, @base + @size) is a subset of a memory block.
  *
- * RETURNS:
+ * Return:
  * 0 if false, non-zero if true
  */
 bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
@@ -1689,9 +1698,10 @@ bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t siz
  * @base: base of region to check
  * @size: size of region to check
  *
- * Check if the region [@base, @base+@size) intersects a reserved memory block.
+ * Check if the region [@base, @base + @size) intersects a reserved
+ * memory block.
  *
- * RETURNS:
+ * Return:
  * True if they intersect, false if not.
  */
 bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
-- 
cgit v1.2.3


From 9a0de1bfe191220cb0437865261ab2f95cbb13ef Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Sat, 30 Jun 2018 17:55:04 +0300
Subject: docs/mm: memblock: add kernel-doc description for memblock types

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/memblock.h | 37 +++++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 63704c64583a..516920549378 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -20,7 +20,13 @@
 #define INIT_MEMBLOCK_REGIONS	128
 #define INIT_PHYSMEM_REGIONS	4
 
-/* Definition of memblock flags. */
+/**
+ * enum memblock_flags - definition of memory region attributes
+ * @MEMBLOCK_NONE: no special request
+ * @MEMBLOCK_HOTPLUG: hotpluggable region
+ * @MEMBLOCK_MIRROR: mirrored region
+ * @MEMBLOCK_NOMAP: don't add to kernel direct mapping
+ */
 enum memblock_flags {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
 	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
@@ -28,6 +34,13 @@ enum memblock_flags {
 	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
 };
 
+/**
+ * struct memblock_region - represents a memory region
+ * @base: physical address of the region
+ * @size: size of the region
+ * @flags: memory region attributes
+ * @nid: NUMA node id
+ */
 struct memblock_region {
 	phys_addr_t base;
 	phys_addr_t size;
@@ -37,14 +50,30 @@ struct memblock_region {
 #endif
 };
 
+/**
+ * struct memblock_type - collection of memory regions of certain type
+ * @cnt: number of regions
+ * @max: size of the allocated array
+ * @total_size: size of all regions
+ * @regions: array of regions
+ * @name: the memory type symbolic name
+ */
 struct memblock_type {
-	unsigned long cnt;	/* number of regions */
-	unsigned long max;	/* size of the allocated array */
-	phys_addr_t total_size;	/* size of all regions */
+	unsigned long cnt;
+	unsigned long max;
+	phys_addr_t total_size;
 	struct memblock_region *regions;
 	char *name;
 };
 
+/**
+ * struct memblock - memblock allocator metadata
+ * @bottom_up: is bottom up direction?
+ * @current_limit: physical address of the current allocation limit
+ * @memory: usabe memory regions
+ * @reserved: reserved memory regions
+ * @physmem: all physical memory
+ */
 struct memblock {
 	bool bottom_up;  /* is bottom up direction? */
 	phys_addr_t current_limit;
-- 
cgit v1.2.3


From 6d54228fd1f293d00576ab2c3d2e4992c7cce12f Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 25 Jun 2018 17:26:55 +0200
Subject: libceph: make ceph_osdc_notify{,_ack}() payload_len u32

The wire format dictates that payload_len fits into 4 bytes.  Take u32
instead of size_t to reflect that.

All callers pass a small integer, so no changes required.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 4 ++--
 net/ceph/osd_client.c           | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 0d6ee04b4c41..d6fe7e4df8cf 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -528,12 +528,12 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
 			 u64 notify_id,
 			 u64 cookie,
 			 void *payload,
-			 size_t payload_len);
+			 u32 payload_len);
 int ceph_osdc_notify(struct ceph_osd_client *osdc,
 		     struct ceph_object_id *oid,
 		     struct ceph_object_locator *oloc,
 		     void *payload,
-		     size_t payload_len,
+		     u32 payload_len,
 		     u32 timeout,
 		     struct page ***preply_pages,
 		     size_t *preply_len);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a00c74f1154e..93614cb5e03f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4591,7 +4591,7 @@ EXPORT_SYMBOL(ceph_osdc_unwatch);
 
 static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which,
 				      u64 notify_id, u64 cookie, void *payload,
-				      size_t payload_len)
+				      u32 payload_len)
 {
 	struct ceph_osd_req_op *op;
 	struct ceph_pagelist *pl;
@@ -4628,7 +4628,7 @@ int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
 			 u64 notify_id,
 			 u64 cookie,
 			 void *payload,
-			 size_t payload_len)
+			 u32 payload_len)
 {
 	struct ceph_osd_request *req;
 	int ret;
@@ -4661,7 +4661,7 @@ EXPORT_SYMBOL(ceph_osdc_notify_ack);
 
 static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
 				  u64 cookie, u32 prot_ver, u32 timeout,
-				  void *payload, size_t payload_len)
+				  void *payload, u32 payload_len)
 {
 	struct ceph_osd_req_op *op;
 	struct ceph_pagelist *pl;
@@ -4701,7 +4701,7 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
 		     struct ceph_object_id *oid,
 		     struct ceph_object_locator *oloc,
 		     void *payload,
-		     size_t payload_len,
+		     u32 payload_len,
 		     u32 timeout,
 		     struct page ***preply_pages,
 		     size_t *preply_len)
-- 
cgit v1.2.3


From c9ed51c9123ab5e8f79b7d53a9afd786b43d4fe6 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 27 Jun 2018 16:42:51 +0200
Subject: libceph: change ceph_pagelist_encode_string() to take u32

The wire format dictates that the length of string fits into 4 bytes.
Take u32 instead of size_t to reflect that.

We were already truncating len in ceph_pagelist_encode_32() -- this
just pushes that truncation one level up.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/pagelist.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 7edcded07641..d0223364349f 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -68,7 +68,7 @@ static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v)
 	return ceph_pagelist_append(pl, &v, 1);
 }
 static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl,
-					      char *s, size_t len)
+					      char *s, u32 len)
 {
 	int ret = ceph_pagelist_encode_32(pl, len);
 	if (ret)
-- 
cgit v1.2.3


From 473bd2d780d1699d81b25f57c0ec4de633a28eb8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 22:18:34 +0200
Subject: libceph: use timespec64 in for keepalive2 and ticket validity

ceph_con_keepalive_expired() is the last user of timespec_add() and some
of the last uses of ktime_get_real_ts().  Replacing this with timespec64
based interfaces  lets us remove that deprecated API.

I'm introducing new ceph_encode_timespec64()/ceph_decode_timespec64()
here that take timespec64 structures and convert to/from ceph_timespec,
which is defined to have an unsigned 32-bit tv_sec member. This extends
the range of valid times to year 2106, avoiding the year 2038 overflow.

The ceph file system portion still uses the old functions for inode
timestamps, this will be done separately after the VFS layer is converted.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/decode.h    | 20 +++++++++++++++++++-
 include/linux/ceph/messenger.h |  2 +-
 net/ceph/auth_x.c              | 14 +++++++-------
 net/ceph/auth_x.h              |  2 +-
 net/ceph/cls_lock_client.c     |  4 ++--
 net/ceph/messenger.c           | 20 ++++++++++----------
 6 files changed, 40 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index d143ac8879c6..094b9b4a34f3 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -194,8 +194,26 @@ ceph_decode_skip_n(p, end, sizeof(u8), bad)
 	} while (0)
 
 /*
- * struct ceph_timespec <-> struct timespec
+ * struct ceph_timespec <-> struct timespec64
  */
+static inline void ceph_decode_timespec64(struct timespec64 *ts,
+					  const struct ceph_timespec *tv)
+{
+	/*
+	 * This will still overflow in year 2106.  We could extend
+	 * the protocol to steal two more bits from tv_nsec to
+	 * add three more 136 year epochs after that the way ext4
+	 * does if necessary.
+	 */
+	ts->tv_sec = (time64_t)le32_to_cpu(tv->tv_sec);
+	ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
+}
+static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
+					  const struct timespec64 *ts)
+{
+	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
+	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
+}
 static inline void ceph_decode_timespec(struct timespec *ts,
 					const struct ceph_timespec *tv)
 {
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c7dfcb8a1fb2..a718b877c597 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -330,7 +330,7 @@ struct ceph_connection {
 	int in_base_pos;     /* bytes read */
 	__le64 in_temp_ack;  /* for reading an ack */
 
-	struct timespec last_keepalive_ack; /* keepalive2 ack stamp */
+	struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
 
 	struct delayed_work work;	    /* send|recv work */
 	unsigned long       delay;          /* current delay interval */
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 2f4a1baf5f52..b05c3a540a5a 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -149,12 +149,12 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	void *dp, *dend;
 	int dlen;
 	char is_enc;
-	struct timespec validity;
+	struct timespec64 validity;
 	void *tp, *tpend;
 	void **ptp;
 	struct ceph_crypto_key new_session_key = { 0 };
 	struct ceph_buffer *new_ticket_blob;
-	unsigned long new_expires, new_renew_after;
+	time64_t new_expires, new_renew_after;
 	u64 new_secret_id;
 	int ret;
 
@@ -189,11 +189,11 @@ static int process_one_ticket(struct ceph_auth_client *ac,
 	if (ret)
 		goto out;
 
-	ceph_decode_timespec(&validity, dp);
+	ceph_decode_timespec64(&validity, dp);
 	dp += sizeof(struct ceph_timespec);
-	new_expires = get_seconds() + validity.tv_sec;
+	new_expires = ktime_get_real_seconds() + validity.tv_sec;
 	new_renew_after = new_expires - (validity.tv_sec / 4);
-	dout(" expires=%lu renew_after=%lu\n", new_expires,
+	dout(" expires=%llu renew_after=%llu\n", new_expires,
 	     new_renew_after);
 
 	/* ticket blob for service */
@@ -385,13 +385,13 @@ static bool need_key(struct ceph_x_ticket_handler *th)
 	if (!th->have_key)
 		return true;
 
-	return get_seconds() >= th->renew_after;
+	return ktime_get_real_seconds() >= th->renew_after;
 }
 
 static bool have_key(struct ceph_x_ticket_handler *th)
 {
 	if (th->have_key) {
-		if (get_seconds() >= th->expires)
+		if (ktime_get_real_seconds() >= th->expires)
 			th->have_key = false;
 	}
 
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 454cb54568af..57ba99f7736f 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -22,7 +22,7 @@ struct ceph_x_ticket_handler {
 	u64 secret_id;
 	struct ceph_buffer *ticket_blob;
 
-	unsigned long renew_after, expires;
+	time64_t renew_after, expires;
 };
 
 #define CEPHX_AU_ENC_BUF_LEN	128  /* big enough for encrypted blob */
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 8d2032b2f225..2105a6eaa66c 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -32,7 +32,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
 	int desc_len = strlen(desc);
 	void *p, *end;
 	struct page *lock_op_page;
-	struct timespec mtime;
+	struct timespec64 mtime;
 	int ret;
 
 	lock_op_buf_size = name_len + sizeof(__le32) +
@@ -63,7 +63,7 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
 	ceph_encode_string(&p, end, desc, desc_len);
 	/* only support infinite duration */
 	memset(&mtime, 0, sizeof(mtime));
-	ceph_encode_timespec(p, &mtime);
+	ceph_encode_timespec64(p, &mtime);
 	p += sizeof(struct ceph_timespec);
 	ceph_encode_8(&p, flags);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index c6413c360771..3f6336248509 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1417,11 +1417,11 @@ static void prepare_write_keepalive(struct ceph_connection *con)
 	dout("prepare_write_keepalive %p\n", con);
 	con_out_kvec_reset(con);
 	if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
-		struct timespec now;
+		struct timespec64 now;
 
-		ktime_get_real_ts(&now);
+		ktime_get_real_ts64(&now);
 		con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
-		ceph_encode_timespec(&con->out_temp_keepalive2, &now);
+		ceph_encode_timespec64(&con->out_temp_keepalive2, &now);
 		con_out_kvec_add(con, sizeof(con->out_temp_keepalive2),
 				 &con->out_temp_keepalive2);
 	} else {
@@ -2555,7 +2555,7 @@ static int read_keepalive_ack(struct ceph_connection *con)
 	int ret = read_partial(con, size, size, &ceph_ts);
 	if (ret <= 0)
 		return ret;
-	ceph_decode_timespec(&con->last_keepalive_ack, &ceph_ts);
+	ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts);
 	prepare_read_tag(con);
 	return 1;
 }
@@ -3223,12 +3223,12 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
 {
 	if (interval > 0 &&
 	    (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2)) {
-		struct timespec now;
-		struct timespec ts;
-		ktime_get_real_ts(&now);
-		jiffies_to_timespec(interval, &ts);
-		ts = timespec_add(con->last_keepalive_ack, ts);
-		return timespec_compare(&now, &ts) >= 0;
+		struct timespec64 now;
+		struct timespec64 ts;
+		ktime_get_real_ts64(&now);
+		jiffies_to_timespec64(interval, &ts);
+		ts = timespec64_add(con->last_keepalive_ack, ts);
+		return timespec64_compare(&now, &ts) >= 0;
 	}
 	return false;
 }
-- 
cgit v1.2.3


From 3cd14285a1bb58eaa98c44aec0db4d914fdc8c13 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Aug 2018 12:12:22 -0500
Subject: scsi: target: make transport_init_session_tags static

transport_init_session_tags is only called from target_core_transport.c so
make it static.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 7 +++----
 include/target/target_core_fabric.h    | 2 --
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index b419d4f8cb96..8044b8115dbf 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -304,9 +304,9 @@ EXPORT_SYMBOL(transport_alloc_session_tags);
  *	      each command.
  * @sup_prot_ops: bitmask that defines which T10-PI modes are supported.
  */
-struct se_session *transport_init_session_tags(unsigned int tag_num,
-					       unsigned int tag_size,
-					       enum target_prot_op sup_prot_ops)
+static struct se_session *
+transport_init_session_tags(unsigned int tag_num, unsigned int tag_size,
+			    enum target_prot_op sup_prot_ops)
 {
 	struct se_session *se_sess;
 	int rc;
@@ -334,7 +334,6 @@ struct se_session *transport_init_session_tags(unsigned int tag_num,
 
 	return se_sess;
 }
-EXPORT_SYMBOL(transport_init_session_tags);
 
 /*
  * Called with spin_lock_irqsave(&struct se_portal_group->session_lock called.
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index f61aa716cfe1..d0d064212bfc 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -119,8 +119,6 @@ void transport_init_session(struct se_session *);
 struct se_session *transport_alloc_session(enum target_prot_op);
 int transport_alloc_session_tags(struct se_session *, unsigned int,
 		unsigned int);
-struct se_session *transport_init_session_tags(unsigned int, unsigned int,
-		enum target_prot_op);
 void	__transport_register_session(struct se_portal_group *,
 		struct se_node_acl *, struct se_session *, void *);
 void	transport_register_session(struct se_portal_group *,
-- 
cgit v1.2.3


From fa834287300ba6a89e0a590f520a3398527eb541 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Aug 2018 12:12:23 -0500
Subject: scsi: target: rename target_alloc_session

Rename target_alloc_session to target_setup_session to avoid confusion with
the other transport session allocation function that only allocates the
session and because the target_alloc_session does so much more. It
allocates the session, sets up the nacl and registers the session.

The next patch will then add a remove function to match the setup in this
one, so it should make sense for all drivers, except iscsi, to just call
those 2 functions to setup and remove a session.

iscsi will continue to be the odd driver.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Chris Boot <bootc@bootc.net>
Cc: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: Michael Cyr <mikecyr@linux.vnet.ibm.com>
Cc: <qla2xxx-upstream@qlogic.com>
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/infiniband/ulp/srpt/ib_srpt.c    | 6 +++---
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 2 +-
 drivers/scsi/qla2xxx/tcm_qla2xxx.c       | 2 +-
 drivers/target/loopback/tcm_loop.c       | 2 +-
 drivers/target/sbp/sbp_target.c          | 2 +-
 drivers/target/target_core_transport.c   | 4 ++--
 drivers/target/tcm_fc/tfc_sess.c         | 2 +-
 drivers/usb/gadget/function/f_tcm.c      | 2 +-
 drivers/vhost/scsi.c                     | 2 +-
 drivers/xen/xen-scsiback.c               | 2 +-
 include/target/target_core_fabric.h      | 2 +-
 11 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 07b3e1c583bd..21435a73771a 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2221,16 +2221,16 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 	pr_debug("registering session %s\n", ch->sess_name);
 
 	if (sport->port_guid_tpg.se_tpg_wwn)
-		ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0,
+		ch->sess = target_setup_session(&sport->port_guid_tpg, 0, 0,
 						TARGET_PROT_NORMAL,
 						ch->sess_name, ch, NULL);
 	if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
-		ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
+		ch->sess = target_setup_session(&sport->port_gid_tpg, 0, 0,
 					TARGET_PROT_NORMAL, i_port_id, ch,
 					NULL);
 	/* Retry without leading "0x" */
 	if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
-		ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
+		ch->sess = target_setup_session(&sport->port_gid_tpg, 0, 0,
 						TARGET_PROT_NORMAL,
 						i_port_id + 2, ch, NULL);
 	if (IS_ERR_OR_NULL(ch->sess)) {
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index fdda04e5cf94..1bdf9379c4ce 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -2233,7 +2233,7 @@ static int ibmvscsis_make_nexus(struct ibmvscsis_tport *tport)
 		return -ENOMEM;
 	}
 
-	nexus->se_sess = target_alloc_session(&tport->se_tpg, 0, 0,
+	nexus->se_sess = target_setup_session(&tport->se_tpg, 0, 0,
 					      TARGET_PROT_NORMAL, name, nexus,
 					      NULL);
 	if (IS_ERR(nexus->se_sess)) {
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index cfb5d6067f9f..b9ce4e7e1c3e 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -1539,7 +1539,7 @@ static int tcm_qla2xxx_check_initiator_node_acl(
 	 * Locate our struct se_node_acl either from an explict NodeACL created
 	 * via ConfigFS, or via running in TPG demo mode.
 	 */
-	se_sess = target_alloc_session(&tpg->se_tpg, num_tags,
+	se_sess = target_setup_session(&tpg->se_tpg, num_tags,
 				       sizeof(struct qla_tgt_cmd),
 				       TARGET_PROT_ALL, port_name,
 				       qlat_sess, tcm_qla2xxx_session_cb);
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index b2e7ff50ff61..54b5b94dad37 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -765,7 +765,7 @@ static int tcm_loop_make_nexus(
 	if (!tl_nexus)
 		return -ENOMEM;
 
-	tl_nexus->se_sess = target_alloc_session(&tl_tpg->tl_se_tpg, 0, 0,
+	tl_nexus->se_sess = target_setup_session(&tl_tpg->tl_se_tpg, 0, 0,
 					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS,
 					name, tl_nexus, tcm_loop_alloc_sess_cb);
 	if (IS_ERR(tl_nexus->se_sess)) {
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index b61b79ac98ff..c4369b52a3fb 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -209,7 +209,7 @@ static struct sbp_session *sbp_session_create(
 	INIT_DELAYED_WORK(&sess->maint_work, session_maintenance_work);
 	sess->guid = guid;
 
-	sess->se_sess = target_alloc_session(&tpg->se_tpg, 128,
+	sess->se_sess = target_setup_session(&tpg->se_tpg, 128,
 					     sizeof(struct sbp_target_request),
 					     TARGET_PROT_NORMAL, guid_str,
 					     sess, NULL);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 8044b8115dbf..57fadec2d1ae 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -416,7 +416,7 @@ void transport_register_session(
 EXPORT_SYMBOL(transport_register_session);
 
 struct se_session *
-target_alloc_session(struct se_portal_group *tpg,
+target_setup_session(struct se_portal_group *tpg,
 		     unsigned int tag_num, unsigned int tag_size,
 		     enum target_prot_op prot_op,
 		     const char *initiatorname, void *private,
@@ -458,7 +458,7 @@ target_alloc_session(struct se_portal_group *tpg,
 	transport_register_session(tpg, sess->se_node_acl, sess, private);
 	return sess;
 }
-EXPORT_SYMBOL(target_alloc_session);
+EXPORT_SYMBOL(target_setup_session);
 
 ssize_t target_show_dynamic_sessions(struct se_portal_group *se_tpg, char *page)
 {
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index c91979c1463d..efd7d5e5bfc4 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -239,7 +239,7 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
 	sess->tport = tport;
 	sess->port_id = port_id;
 
-	sess->se_sess = target_alloc_session(se_tpg, TCM_FC_DEFAULT_TAGS,
+	sess->se_sess = target_setup_session(se_tpg, TCM_FC_DEFAULT_TAGS,
 					     sizeof(struct ft_cmd),
 					     TARGET_PROT_NORMAL, &initiatorname[0],
 					     sess, ft_sess_alloc_cb);
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 4f183176b0b4..ba7c5f68d38a 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1592,7 +1592,7 @@ static int tcm_usbg_make_nexus(struct usbg_tpg *tpg, char *name)
 		goto out_unlock;
 	}
 
-	tv_nexus->tvn_se_sess = target_alloc_session(&tpg->se_tpg,
+	tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg,
 						     USB_G_DEFAULT_SESSION_TAGS,
 						     sizeof(struct usbg_cmd),
 						     TARGET_PROT_NORMAL, name,
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index c84a6edd4c25..e9368842c63f 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1738,7 +1738,7 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
 	 * struct se_node_acl for the vhost_scsi struct se_portal_group with
 	 * the SCSI Initiator port name of the passed configfs group 'name'.
 	 */
-	tv_nexus->tvn_se_sess = target_alloc_session(&tpg->se_tpg,
+	tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg,
 					VHOST_SCSI_DEFAULT_TAGS,
 					sizeof(struct vhost_scsi_cmd),
 					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS,
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index fd77ccfc7d6e..2ffc7ed944d8 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -1521,7 +1521,7 @@ static int scsiback_make_nexus(struct scsiback_tpg *tpg,
 		goto out_unlock;
 	}
 
-	tv_nexus->tvn_se_sess = target_alloc_session(&tpg->se_tpg,
+	tv_nexus->tvn_se_sess = target_setup_session(&tpg->se_tpg,
 						     VSCSI_DEFAULT_SESSION_TAGS,
 						     sizeof(struct vscsibk_pend),
 						     TARGET_PROT_NORMAL, name,
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index d0d064212bfc..fbcc9a4025b9 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -109,7 +109,7 @@ void target_unregister_template(const struct target_core_fabric_ops *fo);
 int target_depend_item(struct config_item *item);
 void target_undepend_item(struct config_item *item);
 
-struct se_session *target_alloc_session(struct se_portal_group *,
+struct se_session *target_setup_session(struct se_portal_group *,
 		unsigned int, unsigned int, enum target_prot_op prot_op,
 		const char *, void *,
 		int (*callback)(struct se_portal_group *,
-- 
cgit v1.2.3


From fb7c70f2d750a462c5a9757555bc23bf35360cec Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Thu, 2 Aug 2018 12:12:24 -0500
Subject: scsi: target: add session removal function

This adds a function to remove a session which should be used by drivers
that use target_setup_session. The next patches will convert the target
drivers to use this new function.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Chris Boot <bootc@bootc.net>
Cc: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: Michael Cyr <mikecyr@linux.vnet.ibm.com>
Cc: <qla2xxx-upstream@qlogic.com>
Cc: Johannes Thumshirn <jth@kernel.org>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 7 +++++++
 include/target/target_core_fabric.h    | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 57fadec2d1ae..86c0156e6c88 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -616,6 +616,13 @@ void transport_deregister_session(struct se_session *se_sess)
 }
 EXPORT_SYMBOL(transport_deregister_session);
 
+void target_remove_session(struct se_session *se_sess)
+{
+	transport_deregister_session_configfs(se_sess);
+	transport_deregister_session(se_sess);
+}
+EXPORT_SYMBOL(target_remove_session);
+
 static void target_remove_from_state_list(struct se_cmd *cmd)
 {
 	struct se_device *dev = cmd->se_dev;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index fbcc9a4025b9..f4147b398431 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -114,6 +114,7 @@ struct se_session *target_setup_session(struct se_portal_group *,
 		const char *, void *,
 		int (*callback)(struct se_portal_group *,
 				struct se_session *, void *));
+void target_remove_session(struct se_session *);
 
 void transport_init_session(struct se_session *);
 struct se_session *transport_alloc_session(enum target_prot_op);
-- 
cgit v1.2.3


From fac02ddf910814c24f5d9d969dfdab5227f6f3eb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 22:18:37 +0200
Subject: libceph: use timespec64 for r_mtime

The request mtime field is used all over ceph, and is currently
represented as a 'timespec' structure in Linux. This changes it to
timespec64 to allow times beyond 2038, modifying all users at the
same time.

[ Remove now redundant ts variable in writepage_nounlock(). ]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c             |  2 +-
 fs/ceph/addr.c                  | 12 +++++-------
 fs/ceph/file.c                  |  8 ++++----
 include/linux/ceph/osd_client.h |  6 +++---
 net/ceph/osd_client.c           |  8 ++++----
 5 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c680de15fae0..11f9be10e3fa 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1466,7 +1466,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
 	osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
-	ktime_get_real_ts(&osd_req->r_mtime);
+	ktime_get_real_ts64(&osd_req->r_mtime);
 	osd_req->r_data_offset = obj_request->ex.oe_off;
 }
 
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 292b3d72d725..afcc59ed7090 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -574,7 +574,6 @@ static u64 get_writepages_data_length(struct inode *inode,
  */
 static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 {
-	struct timespec ts;
 	struct inode *inode;
 	struct ceph_inode_info *ci;
 	struct ceph_fs_client *fsc;
@@ -625,12 +624,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
 		set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
 
 	set_page_writeback(page);
-	ts = timespec64_to_timespec(inode->i_mtime);
 	err = ceph_osdc_writepages(&fsc->client->osdc, ceph_vino(inode),
 				   &ci->i_layout, snapc, page_off, len,
 				   ceph_wbc.truncate_seq,
 				   ceph_wbc.truncate_size,
-				   &ts, &page, 1);
+				   &inode->i_mtime, &page, 1);
 	if (err < 0) {
 		struct writeback_control tmp_wbc;
 		if (!wbc)
@@ -1134,7 +1132,7 @@ new_request:
 			pages = NULL;
 		}
 
-		req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+		req->r_mtime = inode->i_mtime;
 		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
 		BUG_ON(rc);
 		req = NULL;
@@ -1734,7 +1732,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 		goto out;
 	}
 
-	req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+	req->r_mtime = inode->i_mtime;
 	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
 	if (!err)
 		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1776,7 +1774,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 			goto out_put;
 	}
 
-	req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+	req->r_mtime = inode->i_mtime;
 	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
 	if (!err)
 		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
@@ -1937,7 +1935,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
 				     0, false, true);
 	err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
 
-	wr_req->r_mtime = timespec64_to_timespec(ci->vfs_inode.i_mtime);
+	wr_req->r_mtime = ci->vfs_inode.i_mtime;
 	err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
 
 	if (!err)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ad0bed99b1d5..2f3a30ca94bf 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -721,7 +721,7 @@ struct ceph_aio_request {
 	struct list_head osd_reqs;
 	unsigned num_reqs;
 	atomic_t pending_reqs;
-	struct timespec mtime;
+	struct timespec64 mtime;
 	struct ceph_cap_flush *prealloc_cf;
 };
 
@@ -923,7 +923,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	int num_pages = 0;
 	int flags;
 	int ret;
-	struct timespec mtime = timespec64_to_timespec(current_time(inode));
+	struct timespec64 mtime = current_time(inode);
 	size_t count = iov_iter_count(iter);
 	loff_t pos = iocb->ki_pos;
 	bool write = iov_iter_rw(iter) == WRITE;
@@ -1131,7 +1131,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
 	int flags;
 	int ret;
 	bool check_caps = false;
-	struct timespec mtime = timespec64_to_timespec(current_time(inode));
+	struct timespec64 mtime = current_time(inode);
 	size_t count = iov_iter_count(from);
 
 	if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -1663,7 +1663,7 @@ static int ceph_zero_partial_object(struct inode *inode,
 		goto out;
 	}
 
-	req->r_mtime = timespec64_to_timespec(inode->i_mtime);
+	req->r_mtime = inode->i_mtime;
 	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
 	if (!ret) {
 		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d6fe7e4df8cf..02096da01845 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -199,7 +199,7 @@ struct ceph_osd_request {
 	/* set by submitter */
 	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
 	struct ceph_snap_context *r_snapc;    /* for writes */
-	struct timespec r_mtime;              /* ditto */
+	struct timespec64 r_mtime;            /* ditto */
 	u64 r_data_offset;                    /* ditto */
 	bool r_linger;                        /* don't resend on failure */
 
@@ -253,7 +253,7 @@ struct ceph_osd_linger_request {
 	struct ceph_osd_request_target t;
 	u32 map_dne_bound;
 
-	struct timespec mtime;
+	struct timespec64 mtime;
 
 	struct kref kref;
 	struct mutex lock;
@@ -508,7 +508,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				struct ceph_snap_context *sc,
 				u64 off, u64 len,
 				u32 truncate_seq, u64 truncate_size,
-				struct timespec *mtime,
+				struct timespec64 *mtime,
 				struct page **pages, int nr_pages);
 
 /* watch/notify */
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 93614cb5e03f..8002b8e9ce24 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1978,7 +1978,7 @@ static void encode_request_partial(struct ceph_osd_request *req,
 	p += sizeof(struct ceph_blkin_trace_info);
 
 	ceph_encode_32(&p, 0); /* client_inc, always 0 */
-	ceph_encode_timespec(p, &req->r_mtime);
+	ceph_encode_timespec64(p, &req->r_mtime);
 	p += sizeof(struct ceph_timespec);
 
 	encode_oloc(&p, end, &req->r_t.target_oloc);
@@ -4512,7 +4512,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
 	ceph_oid_copy(&lreq->t.base_oid, oid);
 	ceph_oloc_copy(&lreq->t.base_oloc, oloc);
 	lreq->t.flags = CEPH_OSD_FLAG_WRITE;
-	ktime_get_real_ts(&lreq->mtime);
+	ktime_get_real_ts64(&lreq->mtime);
 
 	lreq->reg_req = alloc_linger_request(lreq);
 	if (!lreq->reg_req) {
@@ -4570,7 +4570,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
 	ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
 	ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
 	req->r_flags = CEPH_OSD_FLAG_WRITE;
-	ktime_get_real_ts(&req->r_mtime);
+	ktime_get_real_ts64(&req->r_mtime);
 	osd_req_op_watch_init(req, 0, lreq->linger_id,
 			      CEPH_OSD_WATCH_OP_UNWATCH);
 
@@ -5136,7 +5136,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 			 struct ceph_snap_context *snapc,
 			 u64 off, u64 len,
 			 u32 truncate_seq, u64 truncate_size,
-			 struct timespec *mtime,
+			 struct timespec64 *mtime,
 			 struct page **pages, int num_pages)
 {
 	struct ceph_osd_request *req;
-- 
cgit v1.2.3


From f7e52d8efe8588c5d4b4c78eb33da81d89486c1a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 23 Jul 2018 14:11:40 +0200
Subject: libceph: remove now unused ceph_{en,de}code_timespec()

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/decode.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 094b9b4a34f3..a6c2a48d42e0 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -214,18 +214,6 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
 	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
 	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
 }
-static inline void ceph_decode_timespec(struct timespec *ts,
-					const struct ceph_timespec *tv)
-{
-	ts->tv_sec = (__kernel_time_t)le32_to_cpu(tv->tv_sec);
-	ts->tv_nsec = (long)le32_to_cpu(tv->tv_nsec);
-}
-static inline void ceph_encode_timespec(struct ceph_timespec *tv,
-					const struct timespec *ts)
-{
-	tv->tv_sec = cpu_to_le32((u32)ts->tv_sec);
-	tv->tv_nsec = cpu_to_le32((u32)ts->tv_nsec);
-}
 
 /*
  * sockaddr_storage <-> ceph_sockaddr
-- 
cgit v1.2.3


From 262614c4294d33b1f19e0d18c0091d9c329b544a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 26 Jul 2018 15:17:46 +0200
Subject: libceph: store ceph_auth_handshake pointer in ceph_connection

We already copy authorizer_reply_buf and authorizer_reply_buf_len into
ceph_connection.  Factoring out __prepare_write_connect() requires two
more: authorizer_buf and authorizer_buf_len.  Store the pointer to the
handshake in con->auth rather than piling on.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/messenger.h |  3 +--
 net/ceph/messenger.c           | 54 ++++++++++++++++++++----------------------
 2 files changed, 27 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index a718b877c597..021718570b50 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -286,9 +286,8 @@ struct ceph_connection {
 				 attempt for this connection, client */
 	u32 peer_global_seq;  /* peer's global seq for this connection */
 
+	struct ceph_auth_handshake *auth;
 	int auth_retry;       /* true if we need a newer authorizer */
-	void *auth_reply_buf;   /* where to put the authorizer reply */
-	int auth_reply_buf_len;
 
 	struct mutex mutex;
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3f6336248509..b6ebd2cc16a1 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1434,24 +1434,26 @@ static void prepare_write_keepalive(struct ceph_connection *con)
  * Connection negotiation.
  */
 
-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
-						int *auth_proto)
+static int get_connect_authorizer(struct ceph_connection *con)
 {
 	struct ceph_auth_handshake *auth;
+	int auth_proto;
 
 	if (!con->ops->get_authorizer) {
+		con->auth = NULL;
 		con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
 		con->out_connect.authorizer_len = 0;
-		return NULL;
+		return 0;
 	}
 
-	auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
+	auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
 	if (IS_ERR(auth))
-		return auth;
+		return PTR_ERR(auth);
 
-	con->auth_reply_buf = auth->authorizer_reply_buf;
-	con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
-	return auth;
+	con->auth = auth;
+	con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+	con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
+	return 0;
 }
 
 /*
@@ -1471,8 +1473,7 @@ static int prepare_write_connect(struct ceph_connection *con)
 {
 	unsigned int global_seq = get_global_seq(con->msgr, 0);
 	int proto;
-	int auth_proto;
-	struct ceph_auth_handshake *auth;
+	int ret;
 
 	switch (con->peer_name.type) {
 	case CEPH_ENTITY_TYPE_MON:
@@ -1499,20 +1500,15 @@ static int prepare_write_connect(struct ceph_connection *con)
 	con->out_connect.protocol_version = cpu_to_le32(proto);
 	con->out_connect.flags = 0;
 
-	auth_proto = CEPH_AUTH_UNKNOWN;
-	auth = get_connect_authorizer(con, &auth_proto);
-	if (IS_ERR(auth))
-		return PTR_ERR(auth);
-
-	con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
-	con->out_connect.authorizer_len = auth ?
-		cpu_to_le32(auth->authorizer_buf_len) : 0;
+	ret = get_connect_authorizer(con);
+	if (ret)
+		return ret;
 
 	con_out_kvec_add(con, sizeof (con->out_connect),
 					&con->out_connect);
-	if (auth && auth->authorizer_buf_len)
-		con_out_kvec_add(con, auth->authorizer_buf_len,
-					auth->authorizer_buf);
+	if (con->auth)
+		con_out_kvec_add(con, con->auth->authorizer_buf_len,
+				 con->auth->authorizer_buf);
 
 	con->out_more = 0;
 	con_flag_set(con, CON_FLAG_WRITE_PENDING);
@@ -1781,11 +1777,14 @@ static int read_partial_connect(struct ceph_connection *con)
 	if (ret <= 0)
 		goto out;
 
-	size = le32_to_cpu(con->in_reply.authorizer_len);
-	end += size;
-	ret = read_partial(con, end, size, con->auth_reply_buf);
-	if (ret <= 0)
-		goto out;
+	if (con->auth) {
+		size = le32_to_cpu(con->in_reply.authorizer_len);
+		end += size;
+		ret = read_partial(con, end, size,
+				   con->auth->authorizer_reply_buf);
+		if (ret <= 0)
+			goto out;
+	}
 
 	dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
 	     con, (int)con->in_reply.tag,
@@ -1793,7 +1792,6 @@ static int read_partial_connect(struct ceph_connection *con)
 	     le32_to_cpu(con->in_reply.global_seq));
 out:
 	return ret;
-
 }
 
 /*
@@ -2076,7 +2074,7 @@ static int process_connect(struct ceph_connection *con)
 
 	dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
 
-	if (con->auth_reply_buf) {
+	if (con->auth) {
 		/*
 		 * Any connection that defines ->get_authorizer()
 		 * should also define ->verify_authorizer_reply().
-- 
cgit v1.2.3


From 6daca13d2e72bedaaacfc08f873114c9307d5aea Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 27 Jul 2018 19:18:34 +0200
Subject: libceph: add authorizer challenge

When a client authenticates with a service, an authorizer is sent with
a nonce to the service (ceph_x_authorize_[ab]) and the service responds
with a mutation of that nonce (ceph_x_authorize_reply).  This lets the
client verify the service is who it says it is but it doesn't protect
against a replay: someone can trivially capture the exchange and reuse
the same authorizer to authenticate themselves.

Allow the service to reject an initial authorizer with a random
challenge (ceph_x_authorize_challenge).  The client then has to respond
with an updated authorizer proving they are able to decrypt the
service's challenge and that the new authorizer was produced for this
specific connection instance.

The accepting side requires this challenge and response unconditionally
if the client side advertises they have CEPHX_V2 feature bit.

This addresses CVE-2018-1128.

Link: http://tracker.ceph.com/issues/24836
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/mds_client.c           | 11 +++++++
 include/linux/ceph/auth.h      |  8 +++++
 include/linux/ceph/messenger.h |  3 ++
 include/linux/ceph/msgr.h      |  2 +-
 net/ceph/auth.c                | 16 ++++++++++
 net/ceph/auth_x.c              | 72 +++++++++++++++++++++++++++++++++++++++---
 net/ceph/auth_x_protocol.h     |  7 ++++
 net/ceph/messenger.c           | 17 +++++++++-
 net/ceph/osd_client.c          | 11 +++++++
 9 files changed, 140 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f5a299daae95..4fc7582233db 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4154,6 +4154,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
 	return auth;
 }
 
+static int add_authorizer_challenge(struct ceph_connection *con,
+				    void *challenge_buf, int challenge_buf_len)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_mds_client *mdsc = s->s_mdsc;
+	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+
+	return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
+					    challenge_buf, challenge_buf_len);
+}
 
 static int verify_authorizer_reply(struct ceph_connection *con)
 {
@@ -4217,6 +4227,7 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.put = con_put,
 	.dispatch = dispatch,
 	.get_authorizer = get_authorizer,
+	.add_authorizer_challenge = add_authorizer_challenge,
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.peer_reset = peer_reset,
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index e931da8424a4..6728c2ee0205 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -64,6 +64,10 @@ struct ceph_auth_client_ops {
 	/* ensure that an existing authorizer is up to date */
 	int (*update_authorizer)(struct ceph_auth_client *ac, int peer_type,
 				 struct ceph_auth_handshake *auth);
+	int (*add_authorizer_challenge)(struct ceph_auth_client *ac,
+					struct ceph_authorizer *a,
+					void *challenge_buf,
+					int challenge_buf_len);
 	int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
 				       struct ceph_authorizer *a);
 	void (*invalidate_authorizer)(struct ceph_auth_client *ac,
@@ -118,6 +122,10 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
 extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 				       int peer_type,
 				       struct ceph_auth_handshake *a);
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
+				       struct ceph_authorizer *a,
+				       void *challenge_buf,
+				       int challenge_buf_len);
 extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
 					     struct ceph_authorizer *a);
 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 021718570b50..fc2b4491ee0a 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -31,6 +31,9 @@ struct ceph_connection_operations {
 	struct ceph_auth_handshake *(*get_authorizer) (
 				struct ceph_connection *con,
 			       int *proto, int force_new);
+	int (*add_authorizer_challenge)(struct ceph_connection *con,
+					void *challenge_buf,
+					int challenge_buf_len);
 	int (*verify_authorizer_reply) (struct ceph_connection *con);
 	int (*invalidate_authorizer)(struct ceph_connection *con);
 
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 73ae2a926548..9e50aede46c8 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -91,7 +91,7 @@ struct ceph_entity_inst {
 #define CEPH_MSGR_TAG_SEQ           13 /* 64-bit int follows with seen seq number */
 #define CEPH_MSGR_TAG_KEEPALIVE2    14 /* keepalive2 byte + ceph_timespec */
 #define CEPH_MSGR_TAG_KEEPALIVE2_ACK 15 /* keepalive2 reply */
-
+#define CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER 16  /* cephx v2 doing server challenge */
 
 /*
  * connection negotiation
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index dbde2b3c3c15..fbeee068ea14 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -315,6 +315,22 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
 }
 EXPORT_SYMBOL(ceph_auth_update_authorizer);
 
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
+				       struct ceph_authorizer *a,
+				       void *challenge_buf,
+				       int challenge_buf_len)
+{
+	int ret = 0;
+
+	mutex_lock(&ac->mutex);
+	if (ac->ops && ac->ops->add_authorizer_challenge)
+		ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
+							challenge_buf_len);
+	mutex_unlock(&ac->mutex);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
+
 int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
 				      struct ceph_authorizer *a)
 {
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 61cccb93f653..512eed4291fe 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -295,7 +295,8 @@ bad:
  * authorizer.  The first part (ceph_x_authorize_a) should already be
  * encoded.
  */
-static int encrypt_authorizer(struct ceph_x_authorizer *au)
+static int encrypt_authorizer(struct ceph_x_authorizer *au,
+			      u64 *server_challenge)
 {
 	struct ceph_x_authorize_a *msg_a;
 	struct ceph_x_authorize_b *msg_b;
@@ -308,16 +309,28 @@ static int encrypt_authorizer(struct ceph_x_authorizer *au)
 	end = au->buf->vec.iov_base + au->buf->vec.iov_len;
 
 	msg_b = p + ceph_x_encrypt_offset();
-	msg_b->struct_v = 1;
+	msg_b->struct_v = 2;
 	msg_b->nonce = cpu_to_le64(au->nonce);
+	if (server_challenge) {
+		msg_b->have_challenge = 1;
+		msg_b->server_challenge_plus_one =
+		    cpu_to_le64(*server_challenge + 1);
+	} else {
+		msg_b->have_challenge = 0;
+		msg_b->server_challenge_plus_one = 0;
+	}
 
 	ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
 	if (ret < 0)
 		return ret;
 
 	p += ret;
-	WARN_ON(p > end);
-	au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+	if (server_challenge) {
+		WARN_ON(p != end);
+	} else {
+		WARN_ON(p > end);
+		au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+	}
 
 	return 0;
 }
@@ -382,7 +395,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
 	     le64_to_cpu(msg_a->ticket_blob.secret_id));
 
 	get_random_bytes(&au->nonce, sizeof(au->nonce));
-	ret = encrypt_authorizer(au);
+	ret = encrypt_authorizer(au, NULL);
 	if (ret) {
 		pr_err("failed to encrypt authorizer: %d", ret);
 		goto out_au;
@@ -664,6 +677,54 @@ static int ceph_x_update_authorizer(
 	return 0;
 }
 
+static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
+				       void *challenge_buf,
+				       int challenge_buf_len,
+				       u64 *server_challenge)
+{
+	struct ceph_x_authorize_challenge *ch =
+	    challenge_buf + sizeof(struct ceph_x_encrypt_header);
+	int ret;
+
+	/* no leading len */
+	ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
+			       challenge_buf_len);
+	if (ret < 0)
+		return ret;
+	if (ret < sizeof(*ch)) {
+		pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
+		return -EINVAL;
+	}
+
+	*server_challenge = le64_to_cpu(ch->server_challenge);
+	return 0;
+}
+
+static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
+					   struct ceph_authorizer *a,
+					   void *challenge_buf,
+					   int challenge_buf_len)
+{
+	struct ceph_x_authorizer *au = (void *)a;
+	u64 server_challenge;
+	int ret;
+
+	ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
+					  &server_challenge);
+	if (ret) {
+		pr_err("failed to decrypt authorize challenge: %d", ret);
+		return ret;
+	}
+
+	ret = encrypt_authorizer(au, &server_challenge);
+	if (ret) {
+		pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
 					  struct ceph_authorizer *a)
 {
@@ -816,6 +877,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
 	.handle_reply = ceph_x_handle_reply,
 	.create_authorizer = ceph_x_create_authorizer,
 	.update_authorizer = ceph_x_update_authorizer,
+	.add_authorizer_challenge = ceph_x_add_authorizer_challenge,
 	.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
 	.invalidate_authorizer = ceph_x_invalidate_authorizer,
 	.reset =  ceph_x_reset,
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 32c13d763b9a..24b0b74564d0 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -70,6 +70,13 @@ struct ceph_x_authorize_a {
 struct ceph_x_authorize_b {
 	__u8 struct_v;
 	__le64 nonce;
+	__u8 have_challenge;
+	__le64 server_challenge_plus_one;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_challenge {
+	__u8 struct_v;
+	__le64 server_challenge;
 } __attribute__ ((packed));
 
 struct ceph_x_authorize_reply {
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 500cc3da586f..e915c8bce117 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2080,9 +2080,24 @@ static int process_connect(struct ceph_connection *con)
 	if (con->auth) {
 		/*
 		 * Any connection that defines ->get_authorizer()
-		 * should also define ->verify_authorizer_reply().
+		 * should also define ->add_authorizer_challenge() and
+		 * ->verify_authorizer_reply().
+		 *
 		 * See get_connect_authorizer().
 		 */
+		if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
+			ret = con->ops->add_authorizer_challenge(
+				    con, con->auth->authorizer_reply_buf,
+				    le32_to_cpu(con->in_reply.authorizer_len));
+			if (ret < 0)
+				return ret;
+
+			con_out_kvec_reset(con);
+			__prepare_write_connect(con);
+			prepare_read_connect(con);
+			return 0;
+		}
+
 		ret = con->ops->verify_authorizer_reply(con);
 		if (ret < 0) {
 			con->error_msg = "bad authorize reply";
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 8002b8e9ce24..60934bd8796c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5393,6 +5393,16 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
 	return auth;
 }
 
+static int add_authorizer_challenge(struct ceph_connection *con,
+				    void *challenge_buf, int challenge_buf_len)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_osd_client *osdc = o->o_osdc;
+	struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+	return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
+					    challenge_buf, challenge_buf_len);
+}
 
 static int verify_authorizer_reply(struct ceph_connection *con)
 {
@@ -5442,6 +5452,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.put = put_osd_con,
 	.dispatch = dispatch,
 	.get_authorizer = get_authorizer,
+	.add_authorizer_challenge = add_authorizer_challenge,
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
-- 
cgit v1.2.3


From cc255c76c70f7a87d97939621eae04b600d9f4a1 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 27 Jul 2018 19:25:32 +0200
Subject: libceph: implement CEPHX_V2 calculation mode

Derive the signature from the entire buffer (both AES cipher blocks)
instead of using just the first half of the first block, leaving out
data_crc entirely.

This addresses CVE-2018-1129.

Link: http://tracker.ceph.com/issues/24837
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/ceph_features.h |  7 ++--
 net/ceph/auth_x.c                  | 73 +++++++++++++++++++++++++++++---------
 2 files changed, 60 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 3901927cf6a0..6b92b3395fa9 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -165,9 +165,9 @@ DEFINE_CEPH_FEATURE(58, 1, FS_FILE_LAYOUT_V2) // overlap
 DEFINE_CEPH_FEATURE(59, 1, FS_BTIME)
 DEFINE_CEPH_FEATURE(59, 1, FS_CHANGE_ATTR) // overlap
 DEFINE_CEPH_FEATURE(59, 1, MSG_ADDR2) // overlap
-DEFINE_CEPH_FEATURE(60, 1, BLKIN_TRACING)  // *do not share this bit*
+DEFINE_CEPH_FEATURE(60, 1, OSD_RECOVERY_DELETES) // *do not share this bit*
+DEFINE_CEPH_FEATURE(61, 1, CEPHX_V2)             // *do not share this bit*
 
-DEFINE_CEPH_FEATURE(61, 1, RESERVED2)          // unused, but slow down!
 DEFINE_CEPH_FEATURE(62, 1, RESERVED)           // do not use; used as a sentinal
 DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facing
 
@@ -210,7 +210,8 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_SERVER_JEWEL |		\
 	 CEPH_FEATURE_MON_STATEFUL_SUB |	\
 	 CEPH_FEATURE_CRUSH_TUNABLES5 |		\
-	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING)
+	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING |	\
+	 CEPH_FEATURE_CEPHX_V2)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
 	(CEPH_FEATURE_NOSRCADDR |	 \
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 512eed4291fe..462786f571e7 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -9,6 +9,7 @@
 
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/ceph_features.h>
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/messenger.h>
 
@@ -803,26 +804,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
 			  __le64 *psig)
 {
 	void *enc_buf = au->enc_buf;
-	struct {
-		__le32 len;
-		__le32 header_crc;
-		__le32 front_crc;
-		__le32 middle_crc;
-		__le32 data_crc;
-	} __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
 	int ret;
 
-	sigblock->len = cpu_to_le32(4*sizeof(u32));
-	sigblock->header_crc = msg->hdr.crc;
-	sigblock->front_crc = msg->footer.front_crc;
-	sigblock->middle_crc = msg->footer.middle_crc;
-	sigblock->data_crc =  msg->footer.data_crc;
-	ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
-			     sizeof(*sigblock));
-	if (ret < 0)
-		return ret;
+	if (!CEPH_HAVE_FEATURE(msg->con->peer_features, CEPHX_V2)) {
+		struct {
+			__le32 len;
+			__le32 header_crc;
+			__le32 front_crc;
+			__le32 middle_crc;
+			__le32 data_crc;
+		} __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
+
+		sigblock->len = cpu_to_le32(4*sizeof(u32));
+		sigblock->header_crc = msg->hdr.crc;
+		sigblock->front_crc = msg->footer.front_crc;
+		sigblock->middle_crc = msg->footer.middle_crc;
+		sigblock->data_crc =  msg->footer.data_crc;
+
+		ret = ceph_x_encrypt(&au->session_key, enc_buf,
+				     CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
+		if (ret < 0)
+			return ret;
+
+		*psig = *(__le64 *)(enc_buf + sizeof(u32));
+	} else {
+		struct {
+			__le32 header_crc;
+			__le32 front_crc;
+			__le32 front_len;
+			__le32 middle_crc;
+			__le32 middle_len;
+			__le32 data_crc;
+			__le32 data_len;
+			__le32 seq_lower_word;
+		} __packed *sigblock = enc_buf;
+		struct {
+			__le64 a, b, c, d;
+		} __packed *penc = enc_buf;
+		int ciphertext_len;
+
+		sigblock->header_crc = msg->hdr.crc;
+		sigblock->front_crc = msg->footer.front_crc;
+		sigblock->front_len = msg->hdr.front_len;
+		sigblock->middle_crc = msg->footer.middle_crc;
+		sigblock->middle_len = msg->hdr.middle_len;
+		sigblock->data_crc =  msg->footer.data_crc;
+		sigblock->data_len = msg->hdr.data_len;
+		sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
+
+		/* no leading len, no ceph_x_encrypt_header */
+		ret = ceph_crypt(&au->session_key, true, enc_buf,
+				 CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
+				 &ciphertext_len);
+		if (ret)
+			return ret;
+
+		*psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
+	}
 
-	*psig = *(__le64 *)(enc_buf + sizeof(u32));
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2afc9166f79b8f6da5f347f48515215ceee4ae37 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 2 Aug 2018 10:51:40 -0700
Subject: scsi: sysfs: Introduce sysfs_{un,}break_active_protection()

Introduce these two functions and export them such that the next patch
can add calls to these functions from the SCSI core.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 fs/sysfs/file.c       | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sysfs.h | 14 ++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'include')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5c13f29bfcdb..118fa197a35f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -405,6 +405,50 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
+/**
+ * sysfs_break_active_protection - break "active" protection
+ * @kobj: The kernel object @attr is associated with.
+ * @attr: The attribute to break the "active" protection for.
+ *
+ * With sysfs, just like kernfs, deletion of an attribute is postponed until
+ * all active .show() and .store() callbacks have finished unless this function
+ * is called. Hence this function is useful in methods that implement self
+ * deletion.
+ */
+struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
+						  const struct attribute *attr)
+{
+	struct kernfs_node *kn;
+
+	kobject_get(kobj);
+	kn = kernfs_find_and_get(kobj->sd, attr->name);
+	if (kn)
+		kernfs_break_active_protection(kn);
+	return kn;
+}
+EXPORT_SYMBOL_GPL(sysfs_break_active_protection);
+
+/**
+ * sysfs_unbreak_active_protection - restore "active" protection
+ * @kn: Pointer returned by sysfs_break_active_protection().
+ *
+ * Undo the effects of sysfs_break_active_protection(). Since this function
+ * calls kernfs_put() on the kernfs node that corresponds to the 'attr'
+ * argument passed to sysfs_break_active_protection() that attribute may have
+ * been removed between the sysfs_break_active_protection() and
+ * sysfs_unbreak_active_protection() calls, it is not safe to access @kn after
+ * this function has returned.
+ */
+void sysfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+	struct kobject *kobj = kn->parent->priv;
+
+	kernfs_unbreak_active_protection(kn);
+	kernfs_put(kn);
+	kobject_put(kobj);
+}
+EXPORT_SYMBOL_GPL(sysfs_unbreak_active_protection);
+
 /**
  * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
  * @kobj: object we're acting for
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b8bfdc173ec0..3c12198c0103 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -237,6 +237,9 @@ int __must_check sysfs_create_files(struct kobject *kobj,
 				   const struct attribute **attr);
 int __must_check sysfs_chmod_file(struct kobject *kobj,
 				  const struct attribute *attr, umode_t mode);
+struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
+						  const struct attribute *attr);
+void sysfs_unbreak_active_protection(struct kernfs_node *kn);
 void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
 			  const void *ns);
 bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr);
@@ -350,6 +353,17 @@ static inline int sysfs_chmod_file(struct kobject *kobj,
 	return 0;
 }
 
+static inline struct kernfs_node *
+sysfs_break_active_protection(struct kobject *kobj,
+			      const struct attribute *attr)
+{
+	return NULL;
+}
+
+static inline void sysfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+}
+
 static inline void sysfs_remove_file_ns(struct kobject *kobj,
 					const struct attribute *attr,
 					const void *ns)
-- 
cgit v1.2.3


From a3f90c75b833caeff123499e13e0e31cbecf7d5b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 5 Jul 2018 18:59:35 -0400
Subject: media: dvb: convert tuner_info frequencies to Hz
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Right now, satellite tuner drivers specify frequencies in kHz,
while terrestrial/cable ones specify in Hz. That's confusing
for developers.

However, the main problem is that universal tuners capable
of handling both satellite and non-satelite delivery systems
are appearing. We end by needing to hack the drivers in
order to support such hybrid tuners.

So, convert everything to specify tuner frequencies in Hz.

Plese notice that a similar patch is also needed for frontends.

Tested-by: Katsuhiro Suzuki <suzuki.katsuhiro@socionext.com>
Acked-by: Michael Büsch <m@bues.ch>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/dvb-core/dvb_frontend.c         | 25 +++++++++++++++++++++----
 drivers/media/dvb-frontends/ascot2e.c         |  6 +++---
 drivers/media/dvb-frontends/cx24113.c         |  8 ++++----
 drivers/media/dvb-frontends/dib0070.c         |  8 ++++----
 drivers/media/dvb-frontends/dib0090.c         | 12 ++++++------
 drivers/media/dvb-frontends/dvb-pll.c         | 16 ++++++++++++++--
 drivers/media/dvb-frontends/helene.c          | 12 ++++++------
 drivers/media/dvb-frontends/horus3a.c         |  6 +++---
 drivers/media/dvb-frontends/itd1000.c         |  8 ++++----
 drivers/media/dvb-frontends/ix2505v.c         |  4 ++--
 drivers/media/dvb-frontends/stb6000.c         |  4 ++--
 drivers/media/dvb-frontends/stb6100.c         |  5 ++---
 drivers/media/dvb-frontends/stv6110.c         |  6 +++---
 drivers/media/dvb-frontends/stv6110x.c        |  7 +++----
 drivers/media/dvb-frontends/stv6111.c         |  5 ++---
 drivers/media/dvb-frontends/tda18271c2dd.c    |  6 +++---
 drivers/media/dvb-frontends/tda665x.c         |  6 +++---
 drivers/media/dvb-frontends/tda8261.c         |  9 ++++-----
 drivers/media/dvb-frontends/tda826x.c         |  4 ++--
 drivers/media/dvb-frontends/ts2020.c          |  4 ++--
 drivers/media/dvb-frontends/tua6100.c         |  6 +++---
 drivers/media/dvb-frontends/zl10036.c         |  4 ++--
 drivers/media/tuners/e4000.c                  |  6 +++---
 drivers/media/tuners/fc0011.c                 |  6 +++---
 drivers/media/tuners/fc0012.c                 |  7 +++----
 drivers/media/tuners/fc0013.c                 |  7 +++----
 drivers/media/tuners/fc2580.c                 |  6 +++---
 drivers/media/tuners/it913x.c                 |  6 +++---
 drivers/media/tuners/m88rs6000t.c             |  6 +++---
 drivers/media/tuners/max2165.c                |  8 ++++----
 drivers/media/tuners/mc44s803.c               |  8 ++++----
 drivers/media/tuners/mt2060.c                 |  8 ++++----
 drivers/media/tuners/mt2063.c                 |  7 +++----
 drivers/media/tuners/mt2131.c                 |  8 ++++----
 drivers/media/tuners/mt2266.c                 |  8 ++++----
 drivers/media/tuners/mxl301rf.c               |  4 ++--
 drivers/media/tuners/mxl5005s.c               |  8 ++++----
 drivers/media/tuners/qm1d1b0004.c             |  4 ++--
 drivers/media/tuners/qm1d1c0042.c             |  4 ++--
 drivers/media/tuners/qt1010.c                 |  8 ++++----
 drivers/media/tuners/qt1010_priv.h            | 14 ++++++++------
 drivers/media/tuners/r820t.c                  |  6 +++---
 drivers/media/tuners/si2157.c                 |  6 +++---
 drivers/media/tuners/tda18212.c               |  8 ++++----
 drivers/media/tuners/tda18218.c               |  8 ++++----
 drivers/media/tuners/tda18250.c               |  6 +++---
 drivers/media/tuners/tda18271-fe.c            |  6 +++---
 drivers/media/tuners/tda827x.c                | 12 ++++++------
 drivers/media/tuners/tua9001.c                |  6 +++---
 drivers/media/tuners/tuner-xc2028.c           |  6 +++---
 drivers/media/tuners/xc4000.c                 | 12 ++++++------
 drivers/media/tuners/xc5000.c                 | 12 ++++++------
 drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c |  6 +++---
 include/media/dvb_frontend.h                  | 19 ++++++++++---------
 54 files changed, 221 insertions(+), 196 deletions(-)

(limited to 'include')

diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index ce25aef39008..75e95b56f8b3 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -896,14 +896,31 @@ static int dvb_frontend_start(struct dvb_frontend *fe)
 static void dvb_frontend_get_frequency_limits(struct dvb_frontend *fe,
 					      u32 *freq_min, u32 *freq_max)
 {
-	*freq_min = max(fe->ops.info.frequency_min, fe->ops.tuner_ops.info.frequency_min);
+	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
+	__u32 tuner_min = fe->ops.tuner_ops.info.frequency_min_hz;
+	__u32 tuner_max = fe->ops.tuner_ops.info.frequency_max_hz;
+
+	/* If the standard is for satellite, convert frequencies to kHz */
+	switch (c->delivery_system) {
+	case SYS_DVBS:
+	case SYS_DVBS2:
+	case SYS_TURBO:
+	case SYS_ISDBS:
+		tuner_max /= kHz;
+		tuner_min /= kHz;
+		break;
+	default:
+		break;
+	}
+
+	*freq_min = max(fe->ops.info.frequency_min, tuner_min);
 
 	if (fe->ops.info.frequency_max == 0)
-		*freq_max = fe->ops.tuner_ops.info.frequency_max;
-	else if (fe->ops.tuner_ops.info.frequency_max == 0)
+		*freq_max = tuner_max;
+	else if (tuner_max == 0)
 		*freq_max = fe->ops.info.frequency_max;
 	else
-		*freq_max = min(fe->ops.info.frequency_max, fe->ops.tuner_ops.info.frequency_max);
+		*freq_max = min(fe->ops.info.frequency_max, tuner_max);
 
 	if (*freq_min == 0 || *freq_max == 0)
 		dev_warn(fe->dvb->device,
diff --git a/drivers/media/dvb-frontends/ascot2e.c b/drivers/media/dvb-frontends/ascot2e.c
index 9746c6dd7fb8..52ce0e6e2a15 100644
--- a/drivers/media/dvb-frontends/ascot2e.c
+++ b/drivers/media/dvb-frontends/ascot2e.c
@@ -468,9 +468,9 @@ static int ascot2e_get_frequency(struct dvb_frontend *fe, u32 *frequency)
 static const struct dvb_tuner_ops ascot2e_tuner_ops = {
 	.info = {
 		.name = "Sony ASCOT2E",
-		.frequency_min = 1000000,
-		.frequency_max = 1200000000,
-		.frequency_step = 25000,
+		.frequency_min_hz  =    1 * MHz,
+		.frequency_max_hz  = 1200 * MHz,
+		.frequency_step_hz =   25 * kHz,
 	},
 	.init = ascot2e_init,
 	.release = ascot2e_release,
diff --git a/drivers/media/dvb-frontends/cx24113.c b/drivers/media/dvb-frontends/cx24113.c
index 037db3e9d2dd..91a5033b6bd7 100644
--- a/drivers/media/dvb-frontends/cx24113.c
+++ b/drivers/media/dvb-frontends/cx24113.c
@@ -533,10 +533,10 @@ static void cx24113_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops cx24113_tuner_ops = {
 	.info = {
-		.name           = "Conexant CX24113",
-		.frequency_min  = 950000,
-		.frequency_max  = 2150000,
-		.frequency_step = 125,
+		.name              = "Conexant CX24113",
+		.frequency_min_hz  =  950 * MHz,
+		.frequency_max_hz  = 2150 * MHz,
+		.frequency_step_hz =  125 * kHz,
 	},
 
 	.release       = cx24113_release,
diff --git a/drivers/media/dvb-frontends/dib0070.c b/drivers/media/dvb-frontends/dib0070.c
index 932d235118e2..37ebd5af8fd4 100644
--- a/drivers/media/dvb-frontends/dib0070.c
+++ b/drivers/media/dvb-frontends/dib0070.c
@@ -726,10 +726,10 @@ static void dib0070_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops dib0070_ops = {
 	.info = {
-		.name           = "DiBcom DiB0070",
-		.frequency_min  =  45000000,
-		.frequency_max  = 860000000,
-		.frequency_step =      1000,
+		.name              = "DiBcom DiB0070",
+		.frequency_min_hz  =  45 * MHz,
+		.frequency_max_hz  = 860 * MHz,
+		.frequency_step_hz =   1 * kHz,
 	},
 	.release       = dib0070_release,
 
diff --git a/drivers/media/dvb-frontends/dib0090.c b/drivers/media/dvb-frontends/dib0090.c
index ee7af34979ed..44a074261e69 100644
--- a/drivers/media/dvb-frontends/dib0090.c
+++ b/drivers/media/dvb-frontends/dib0090.c
@@ -2578,9 +2578,9 @@ static int dib0090_set_params(struct dvb_frontend *fe)
 static const struct dvb_tuner_ops dib0090_ops = {
 	.info = {
 		 .name = "DiBcom DiB0090",
-		 .frequency_min = 45000000,
-		 .frequency_max = 860000000,
-		 .frequency_step = 1000,
+		 .frequency_min_hz  =  45 * MHz,
+		 .frequency_max_hz  = 860 * MHz,
+		 .frequency_step_hz =   1 * kHz,
 		 },
 	.release = dib0090_release,
 
@@ -2593,9 +2593,9 @@ static const struct dvb_tuner_ops dib0090_ops = {
 static const struct dvb_tuner_ops dib0090_fw_ops = {
 	.info = {
 		 .name = "DiBcom DiB0090",
-		 .frequency_min = 45000000,
-		 .frequency_max = 860000000,
-		 .frequency_step = 1000,
+		 .frequency_min_hz  =  45 * MHz,
+		 .frequency_max_hz  = 860 * MHz,
+		 .frequency_step_hz =   1 * kHz,
 		 },
 	.release = dib0090_release,
 
diff --git a/drivers/media/dvb-frontends/dvb-pll.c b/drivers/media/dvb-frontends/dvb-pll.c
index 4a663420190e..6d4b2eec67b4 100644
--- a/drivers/media/dvb-frontends/dvb-pll.c
+++ b/drivers/media/dvb-frontends/dvb-pll.c
@@ -799,6 +799,7 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr,
 	struct dvb_pll_priv *priv = NULL;
 	int ret;
 	const struct dvb_pll_desc *desc;
+	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 
 	b1 = kmalloc(1, GFP_KERNEL);
 	if (!b1)
@@ -844,8 +845,19 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr,
 
 	strncpy(fe->ops.tuner_ops.info.name, desc->name,
 		sizeof(fe->ops.tuner_ops.info.name));
-	fe->ops.tuner_ops.info.frequency_min = desc->min;
-	fe->ops.tuner_ops.info.frequency_max = desc->max;
+	switch (c->delivery_system) {
+	case SYS_DVBS:
+	case SYS_DVBS2:
+	case SYS_TURBO:
+	case SYS_ISDBS:
+		fe->ops.tuner_ops.info.frequency_min_hz = desc->min * kHz;
+		fe->ops.tuner_ops.info.frequency_max_hz = desc->max * kHz;
+		break;
+	default:
+		fe->ops.tuner_ops.info.frequency_min_hz = desc->min;
+		fe->ops.tuner_ops.info.frequency_max_hz = desc->max;
+	}
+
 	if (!desc->initdata)
 		fe->ops.tuner_ops.init = NULL;
 	if (!desc->sleepdata)
diff --git a/drivers/media/dvb-frontends/helene.c b/drivers/media/dvb-frontends/helene.c
index a5de65dcf784..dc70fb684681 100644
--- a/drivers/media/dvb-frontends/helene.c
+++ b/drivers/media/dvb-frontends/helene.c
@@ -846,9 +846,9 @@ static int helene_get_frequency(struct dvb_frontend *fe, u32 *frequency)
 static const struct dvb_tuner_ops helene_tuner_ops = {
 	.info = {
 		.name = "Sony HELENE Ter tuner",
-		.frequency_min = 1000000,
-		.frequency_max = 1200000000,
-		.frequency_step = 25000,
+		.frequency_min_hz  =    1 * MHz,
+		.frequency_max_hz  = 1200 * MHz,
+		.frequency_step_hz =   25 * kHz,
 	},
 	.init = helene_init,
 	.release = helene_release,
@@ -860,9 +860,9 @@ static const struct dvb_tuner_ops helene_tuner_ops = {
 static const struct dvb_tuner_ops helene_tuner_ops_s = {
 	.info = {
 		.name = "Sony HELENE Sat tuner",
-		.frequency_min = 500000,
-		.frequency_max = 2500000,
-		.frequency_step = 1000,
+		.frequency_min_hz  =  500 * MHz,
+		.frequency_max_hz  = 2500 * MHz,
+		.frequency_step_hz =    1 * MHz,
 	},
 	.init = helene_init,
 	.release = helene_release,
diff --git a/drivers/media/dvb-frontends/horus3a.c b/drivers/media/dvb-frontends/horus3a.c
index 5e7e265a52e6..02bc08081971 100644
--- a/drivers/media/dvb-frontends/horus3a.c
+++ b/drivers/media/dvb-frontends/horus3a.c
@@ -330,9 +330,9 @@ static int horus3a_get_frequency(struct dvb_frontend *fe, u32 *frequency)
 static const struct dvb_tuner_ops horus3a_tuner_ops = {
 	.info = {
 		.name = "Sony Horus3a",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_step = 1000,
+		.frequency_min_hz  =  950 * MHz,
+		.frequency_max_hz  = 2150 * MHz,
+		.frequency_step_hz =    1 * MHz,
 	},
 	.init = horus3a_init,
 	.release = horus3a_release,
diff --git a/drivers/media/dvb-frontends/itd1000.c b/drivers/media/dvb-frontends/itd1000.c
index 04f7f6854f73..c3a6e81ae87f 100644
--- a/drivers/media/dvb-frontends/itd1000.c
+++ b/drivers/media/dvb-frontends/itd1000.c
@@ -353,10 +353,10 @@ static void itd1000_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops itd1000_tuner_ops = {
 	.info = {
-		.name           = "Integrant ITD1000",
-		.frequency_min  = 950000,
-		.frequency_max  = 2150000,
-		.frequency_step = 125,     /* kHz for QPSK frontends */
+		.name              = "Integrant ITD1000",
+		.frequency_min_hz  =  950 * MHz,
+		.frequency_max_hz  = 2150 * MHz,
+		.frequency_step_hz =  125 * kHz,
 	},
 
 	.release       = itd1000_release,
diff --git a/drivers/media/dvb-frontends/ix2505v.c b/drivers/media/dvb-frontends/ix2505v.c
index 965012ad5c59..9c055f72c416 100644
--- a/drivers/media/dvb-frontends/ix2505v.c
+++ b/drivers/media/dvb-frontends/ix2505v.c
@@ -256,8 +256,8 @@ static int ix2505v_get_frequency(struct dvb_frontend *fe, u32 *frequency)
 static const struct dvb_tuner_ops ix2505v_tuner_ops = {
 	.info = {
 		.name = "Sharp IX2505V (B0017)",
-		.frequency_min = 950000,
-		.frequency_max = 2175000
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2175 * MHz
 	},
 	.release = ix2505v_release,
 	.set_params = ix2505v_set_params,
diff --git a/drivers/media/dvb-frontends/stb6000.c b/drivers/media/dvb-frontends/stb6000.c
index 69c03892f2da..786b9eccde00 100644
--- a/drivers/media/dvb-frontends/stb6000.c
+++ b/drivers/media/dvb-frontends/stb6000.c
@@ -188,8 +188,8 @@ static int stb6000_get_frequency(struct dvb_frontend *fe, u32 *frequency)
 static const struct dvb_tuner_ops stb6000_tuner_ops = {
 	.info = {
 		.name = "ST STB6000",
-		.frequency_min = 950000,
-		.frequency_max = 2150000
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz
 	},
 	.release = stb6000_release,
 	.sleep = stb6000_sleep,
diff --git a/drivers/media/dvb-frontends/stb6100.c b/drivers/media/dvb-frontends/stb6100.c
index 3a851f524b16..30ac584dfab3 100644
--- a/drivers/media/dvb-frontends/stb6100.c
+++ b/drivers/media/dvb-frontends/stb6100.c
@@ -527,9 +527,8 @@ static int stb6100_set_params(struct dvb_frontend *fe)
 static const struct dvb_tuner_ops stb6100_ops = {
 	.info = {
 		.name			= "STB6100 Silicon Tuner",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_step		= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
 	},
 
 	.init		= stb6100_init,
diff --git a/drivers/media/dvb-frontends/stv6110.c b/drivers/media/dvb-frontends/stv6110.c
index 6aad0efa3174..7db9a5bceccc 100644
--- a/drivers/media/dvb-frontends/stv6110.c
+++ b/drivers/media/dvb-frontends/stv6110.c
@@ -371,9 +371,9 @@ static int stv6110_get_bandwidth(struct dvb_frontend *fe, u32 *bandwidth)
 static const struct dvb_tuner_ops stv6110_tuner_ops = {
 	.info = {
 		.name = "ST STV6110",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_step = 1000,
+		.frequency_min_hz  =  950 * MHz,
+		.frequency_max_hz  = 2150 * MHz,
+		.frequency_step_hz =    1 * MHz,
 	},
 	.init = stv6110_init,
 	.release = stv6110_release,
diff --git a/drivers/media/dvb-frontends/stv6110x.c b/drivers/media/dvb-frontends/stv6110x.c
index d8950028d021..82c002d3833a 100644
--- a/drivers/media/dvb-frontends/stv6110x.c
+++ b/drivers/media/dvb-frontends/stv6110x.c
@@ -347,10 +347,9 @@ static void stv6110x_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops stv6110x_ops = {
 	.info = {
-		.name		= "STV6110(A) Silicon Tuner",
-		.frequency_min	=  950000,
-		.frequency_max	= 2150000,
-		.frequency_step	= 0,
+		.name		  = "STV6110(A) Silicon Tuner",
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
 	},
 	.release		= stv6110x_release
 };
diff --git a/drivers/media/dvb-frontends/stv6111.c b/drivers/media/dvb-frontends/stv6111.c
index 9b715b6fe152..0cf460111acb 100644
--- a/drivers/media/dvb-frontends/stv6111.c
+++ b/drivers/media/dvb-frontends/stv6111.c
@@ -646,9 +646,8 @@ static int get_rf_strength(struct dvb_frontend *fe, u16 *st)
 static const struct dvb_tuner_ops tuner_ops = {
 	.info = {
 		.name		= "ST STV6111",
-		.frequency_min	= 950000,
-		.frequency_max	= 2150000,
-		.frequency_step	= 0
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
 	},
 	.set_params		= set_params,
 	.release		= release,
diff --git a/drivers/media/dvb-frontends/tda18271c2dd.c b/drivers/media/dvb-frontends/tda18271c2dd.c
index fcffc7b4acf7..5ce58612315d 100644
--- a/drivers/media/dvb-frontends/tda18271c2dd.c
+++ b/drivers/media/dvb-frontends/tda18271c2dd.c
@@ -1215,9 +1215,9 @@ static int get_bandwidth(struct dvb_frontend *fe, u32 *bandwidth)
 static const struct dvb_tuner_ops tuner_ops = {
 	.info = {
 		.name = "NXP TDA18271C2D",
-		.frequency_min  =  47125000,
-		.frequency_max  = 865000000,
-		.frequency_step =     62500
+		.frequency_min_hz  =  47125 * kHz,
+		.frequency_max_hz  =    865 * MHz,
+		.frequency_step_hz =  62500
 	},
 	.init              = init,
 	.sleep             = sleep,
diff --git a/drivers/media/dvb-frontends/tda665x.c b/drivers/media/dvb-frontends/tda665x.c
index 3ef7140ed7f3..8766c9ff6680 100644
--- a/drivers/media/dvb-frontends/tda665x.c
+++ b/drivers/media/dvb-frontends/tda665x.c
@@ -231,9 +231,9 @@ struct dvb_frontend *tda665x_attach(struct dvb_frontend *fe,
 	info			 = &fe->ops.tuner_ops.info;
 
 	memcpy(info->name, config->name, sizeof(config->name));
-	info->frequency_min	= config->frequency_min;
-	info->frequency_max	= config->frequency_max;
-	info->frequency_step	= config->frequency_offst;
+	info->frequency_min_hz	= config->frequency_min;
+	info->frequency_max_hz	= config->frequency_max;
+	info->frequency_step_hz	= config->frequency_offst;
 
 	printk(KERN_DEBUG "%s: Attaching TDA665x (%s) tuner\n", __func__, info->name);
 
diff --git a/drivers/media/dvb-frontends/tda8261.c b/drivers/media/dvb-frontends/tda8261.c
index f72a54e7eb23..500f50b81b66 100644
--- a/drivers/media/dvb-frontends/tda8261.c
+++ b/drivers/media/dvb-frontends/tda8261.c
@@ -163,10 +163,9 @@ static void tda8261_release(struct dvb_frontend *fe)
 static const struct dvb_tuner_ops tda8261_ops = {
 
 	.info = {
-		.name		= "TDA8261",
-		.frequency_min	=  950000,
-		.frequency_max	= 2150000,
-		.frequency_step = 0
+		.name		   = "TDA8261",
+		.frequency_min_hz  =  950 * MHz,
+		.frequency_max_hz  = 2150 * MHz,
 	},
 
 	.set_params	= tda8261_set_params,
@@ -190,7 +189,7 @@ struct dvb_frontend *tda8261_attach(struct dvb_frontend *fe,
 	fe->tuner_priv		= state;
 	fe->ops.tuner_ops	= tda8261_ops;
 
-	fe->ops.tuner_ops.info.frequency_step = div_tab[config->step_size];
+	fe->ops.tuner_ops.info.frequency_step_hz = div_tab[config->step_size] * kHz;
 
 	pr_info("%s: Attaching TDA8261 8PSK/QPSK tuner\n", __func__);
 
diff --git a/drivers/media/dvb-frontends/tda826x.c b/drivers/media/dvb-frontends/tda826x.c
index da427b4c2aaa..100da5d5fdc5 100644
--- a/drivers/media/dvb-frontends/tda826x.c
+++ b/drivers/media/dvb-frontends/tda826x.c
@@ -131,8 +131,8 @@ static int tda826x_get_frequency(struct dvb_frontend *fe, u32 *frequency)
 static const struct dvb_tuner_ops tda826x_tuner_ops = {
 	.info = {
 		.name = "Philips TDA826X",
-		.frequency_min = 950000,
-		.frequency_max = 2175000
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2175 * MHz
 	},
 	.release = tda826x_release,
 	.sleep = tda826x_sleep,
diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c
index c55882a8da06..3e3e40878633 100644
--- a/drivers/media/dvb-frontends/ts2020.c
+++ b/drivers/media/dvb-frontends/ts2020.c
@@ -498,8 +498,8 @@ static int ts2020_read_signal_strength(struct dvb_frontend *fe,
 static const struct dvb_tuner_ops ts2020_tuner_ops = {
 	.info = {
 		.name = "TS2020",
-		.frequency_min = 950000,
-		.frequency_max = 2150000
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz
 	},
 	.init = ts2020_init,
 	.release = ts2020_release,
diff --git a/drivers/media/dvb-frontends/tua6100.c b/drivers/media/dvb-frontends/tua6100.c
index 1d41abd47f04..b233b7be0b84 100644
--- a/drivers/media/dvb-frontends/tua6100.c
+++ b/drivers/media/dvb-frontends/tua6100.c
@@ -155,9 +155,9 @@ static int tua6100_get_frequency(struct dvb_frontend *fe, u32 *frequency)
 static const struct dvb_tuner_ops tua6100_tuner_ops = {
 	.info = {
 		.name = "Infineon TUA6100",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_step = 1000,
+		.frequency_min_hz  =  950 * MHz,
+		.frequency_max_hz  = 2150 * MHz,
+		.frequency_step_hz =    1 * MHz,
 	},
 	.release = tua6100_release,
 	.sleep = tua6100_sleep,
diff --git a/drivers/media/dvb-frontends/zl10036.c b/drivers/media/dvb-frontends/zl10036.c
index 89dd65ae88ad..e5a432fd84c3 100644
--- a/drivers/media/dvb-frontends/zl10036.c
+++ b/drivers/media/dvb-frontends/zl10036.c
@@ -443,8 +443,8 @@ static int zl10036_init(struct dvb_frontend *fe)
 static const struct dvb_tuner_ops zl10036_tuner_ops = {
 	.info = {
 		.name = "Zarlink ZL10036",
-		.frequency_min = 950000,
-		.frequency_max = 2175000
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2175 * MHz
 	},
 	.init = zl10036_init,
 	.release = zl10036_release,
diff --git a/drivers/media/tuners/e4000.c b/drivers/media/tuners/e4000.c
index b5b9d87ba75c..fbec1a13dc6a 100644
--- a/drivers/media/tuners/e4000.c
+++ b/drivers/media/tuners/e4000.c
@@ -610,9 +610,9 @@ static int e4000_dvb_get_if_frequency(struct dvb_frontend *fe, u32 *frequency)
 
 static const struct dvb_tuner_ops e4000_dvb_tuner_ops = {
 	.info = {
-		.name           = "Elonics E4000",
-		.frequency_min  = 174000000,
-		.frequency_max  = 862000000,
+		.name              = "Elonics E4000",
+		.frequency_min_hz  = 174 * MHz,
+		.frequency_max_hz  = 862 * MHz,
 	},
 
 	.init = e4000_dvb_init,
diff --git a/drivers/media/tuners/fc0011.c b/drivers/media/tuners/fc0011.c
index 145407dee3db..a983899c6b0b 100644
--- a/drivers/media/tuners/fc0011.c
+++ b/drivers/media/tuners/fc0011.c
@@ -472,10 +472,10 @@ static int fc0011_get_bandwidth(struct dvb_frontend *fe, u32 *bandwidth)
 
 static const struct dvb_tuner_ops fc0011_tuner_ops = {
 	.info = {
-		.name		= "Fitipower FC0011",
+		.name		  = "Fitipower FC0011",
 
-		.frequency_min	= 45000000,
-		.frequency_max	= 1000000000,
+		.frequency_min_hz =   45 * MHz,
+		.frequency_max_hz = 1000 * MHz,
 	},
 
 	.release		= fc0011_release,
diff --git a/drivers/media/tuners/fc0012.c b/drivers/media/tuners/fc0012.c
index 625ac6f51c39..e992b98ae5bc 100644
--- a/drivers/media/tuners/fc0012.c
+++ b/drivers/media/tuners/fc0012.c
@@ -415,11 +415,10 @@ exit:
 
 static const struct dvb_tuner_ops fc0012_tuner_ops = {
 	.info = {
-		.name           = "Fitipower FC0012",
+		.name              = "Fitipower FC0012",
 
-		.frequency_min  = 37000000,	/* estimate */
-		.frequency_max  = 862000000,	/* estimate */
-		.frequency_step = 0,
+		.frequency_min_hz  =  37 * MHz,	/* estimate */
+		.frequency_max_hz  = 862 * MHz,	/* estimate */
 	},
 
 	.release	= fc0012_release,
diff --git a/drivers/media/tuners/fc0013.c b/drivers/media/tuners/fc0013.c
index e606118d1a9b..fc62afb1450d 100644
--- a/drivers/media/tuners/fc0013.c
+++ b/drivers/media/tuners/fc0013.c
@@ -574,11 +574,10 @@ exit:
 
 static const struct dvb_tuner_ops fc0013_tuner_ops = {
 	.info = {
-		.name		= "Fitipower FC0013",
+		.name		  = "Fitipower FC0013",
 
-		.frequency_min	= 37000000,	/* estimate */
-		.frequency_max	= 1680000000,	/* CHECK */
-		.frequency_step	= 0,
+		.frequency_min_hz =   37 * MHz,	/* estimate */
+		.frequency_max_hz = 1680 * MHz,	/* CHECK */
 	},
 
 	.release	= fc0013_release,
diff --git a/drivers/media/tuners/fc2580.c b/drivers/media/tuners/fc2580.c
index 743184ae0d26..db26892aac84 100644
--- a/drivers/media/tuners/fc2580.c
+++ b/drivers/media/tuners/fc2580.c
@@ -355,9 +355,9 @@ static int fc2580_dvb_get_if_frequency(struct dvb_frontend *fe, u32 *frequency)
 
 static const struct dvb_tuner_ops fc2580_dvb_tuner_ops = {
 	.info = {
-		.name           = "FCI FC2580",
-		.frequency_min  = 174000000,
-		.frequency_max  = 862000000,
+		.name             = "FCI FC2580",
+		.frequency_min_hz = 174 * MHz,
+		.frequency_max_hz = 862 * MHz,
 	},
 
 	.init = fc2580_dvb_init,
diff --git a/drivers/media/tuners/it913x.c b/drivers/media/tuners/it913x.c
index 27e5bc1c3cb5..b5eb39921e95 100644
--- a/drivers/media/tuners/it913x.c
+++ b/drivers/media/tuners/it913x.c
@@ -375,9 +375,9 @@ err:
 
 static const struct dvb_tuner_ops it913x_tuner_ops = {
 	.info = {
-		.name           = "ITE IT913X",
-		.frequency_min  = 174000000,
-		.frequency_max  = 862000000,
+		.name             = "ITE IT913X",
+		.frequency_min_hz = 174 * MHz,
+		.frequency_max_hz = 862 * MHz,
 	},
 
 	.init = it913x_init,
diff --git a/drivers/media/tuners/m88rs6000t.c b/drivers/media/tuners/m88rs6000t.c
index 9f3e0fd4cad9..3df2f23a40be 100644
--- a/drivers/media/tuners/m88rs6000t.c
+++ b/drivers/media/tuners/m88rs6000t.c
@@ -569,9 +569,9 @@ err:
 
 static const struct dvb_tuner_ops m88rs6000t_tuner_ops = {
 	.info = {
-		.name          = "Montage M88RS6000 Internal Tuner",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
+		.name             = "Montage M88RS6000 Internal Tuner",
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
 	},
 
 	.init = m88rs6000t_init,
diff --git a/drivers/media/tuners/max2165.c b/drivers/media/tuners/max2165.c
index 20ceb72e530b..721d8f722efb 100644
--- a/drivers/media/tuners/max2165.c
+++ b/drivers/media/tuners/max2165.c
@@ -377,10 +377,10 @@ static void max2165_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops max2165_tuner_ops = {
 	.info = {
-		.name           = "Maxim MAX2165",
-		.frequency_min  = 470000000,
-		.frequency_max  = 862000000,
-		.frequency_step =     50000,
+		.name              = "Maxim MAX2165",
+		.frequency_min_hz  = 470 * MHz,
+		.frequency_max_hz  = 862 * MHz,
+		.frequency_step_hz =  50 * kHz,
 	},
 
 	.release	   = max2165_release,
diff --git a/drivers/media/tuners/mc44s803.c b/drivers/media/tuners/mc44s803.c
index 403c6b2aa53b..2023e081d9ad 100644
--- a/drivers/media/tuners/mc44s803.c
+++ b/drivers/media/tuners/mc44s803.c
@@ -300,10 +300,10 @@ static int mc44s803_get_if_frequency(struct dvb_frontend *fe, u32 *frequency)
 
 static const struct dvb_tuner_ops mc44s803_tuner_ops = {
 	.info = {
-		.name           = "Freescale MC44S803",
-		.frequency_min  =   48000000,
-		.frequency_max  = 1000000000,
-		.frequency_step =     100000,
+		.name              = "Freescale MC44S803",
+		.frequency_min_hz  =   48 * MHz,
+		.frequency_max_hz  = 1000 * MHz,
+		.frequency_step_hz =  100 * kHz,
 	},
 
 	.release       = mc44s803_release,
diff --git a/drivers/media/tuners/mt2060.c b/drivers/media/tuners/mt2060.c
index 3d3c6815b6a7..4ace77cfe285 100644
--- a/drivers/media/tuners/mt2060.c
+++ b/drivers/media/tuners/mt2060.c
@@ -395,10 +395,10 @@ static void mt2060_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops mt2060_tuner_ops = {
 	.info = {
-		.name           = "Microtune MT2060",
-		.frequency_min  =  48000000,
-		.frequency_max  = 860000000,
-		.frequency_step =     50000,
+		.name              = "Microtune MT2060",
+		.frequency_min_hz  =  48 * MHz,
+		.frequency_max_hz  = 860 * MHz,
+		.frequency_step_hz =  50 * kHz,
 	},
 
 	.release       = mt2060_release,
diff --git a/drivers/media/tuners/mt2063.c b/drivers/media/tuners/mt2063.c
index 80dc3e241b4a..f4c8a7293ebb 100644
--- a/drivers/media/tuners/mt2063.c
+++ b/drivers/media/tuners/mt2063.c
@@ -2200,10 +2200,9 @@ static int mt2063_get_bandwidth(struct dvb_frontend *fe, u32 *bw)
 static const struct dvb_tuner_ops mt2063_ops = {
 	.info = {
 		 .name = "MT2063 Silicon Tuner",
-		 .frequency_min = 45000000,
-		 .frequency_max = 865000000,
-		 .frequency_step = 0,
-		 },
+		 .frequency_min_hz  =  45 * MHz,
+		 .frequency_max_hz  = 865 * MHz,
+	 },
 
 	.init = mt2063_init,
 	.sleep = MT2063_Sleep,
diff --git a/drivers/media/tuners/mt2131.c b/drivers/media/tuners/mt2131.c
index 659bf19dc434..086a7b7cf634 100644
--- a/drivers/media/tuners/mt2131.c
+++ b/drivers/media/tuners/mt2131.c
@@ -235,10 +235,10 @@ static void mt2131_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops mt2131_tuner_ops = {
 	.info = {
-		.name           = "Microtune MT2131",
-		.frequency_min  =  48000000,
-		.frequency_max  = 860000000,
-		.frequency_step =     50000,
+		.name              = "Microtune MT2131",
+		.frequency_min_hz  =  48 * MHz,
+		.frequency_max_hz  = 860 * MHz,
+		.frequency_step_hz =  50 * kHz,
 	},
 
 	.release       = mt2131_release,
diff --git a/drivers/media/tuners/mt2266.c b/drivers/media/tuners/mt2266.c
index f4545b7f5da2..e6cc78720de4 100644
--- a/drivers/media/tuners/mt2266.c
+++ b/drivers/media/tuners/mt2266.c
@@ -304,10 +304,10 @@ static void mt2266_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops mt2266_tuner_ops = {
 	.info = {
-		.name           = "Microtune MT2266",
-		.frequency_min  = 174000000,
-		.frequency_max  = 862000000,
-		.frequency_step =     50000,
+		.name              = "Microtune MT2266",
+		.frequency_min_hz  = 174 * MHz,
+		.frequency_max_hz  = 862 * MHz,
+		.frequency_step_hz =  50 * kHz,
 	},
 	.release       = mt2266_release,
 	.init          = mt2266_init,
diff --git a/drivers/media/tuners/mxl301rf.c b/drivers/media/tuners/mxl301rf.c
index 57b0e4862aaf..c628435a1b06 100644
--- a/drivers/media/tuners/mxl301rf.c
+++ b/drivers/media/tuners/mxl301rf.c
@@ -271,8 +271,8 @@ static const struct dvb_tuner_ops mxl301rf_ops = {
 	.info = {
 		.name = "MaxLinear MxL301RF",
 
-		.frequency_min =  93000000,
-		.frequency_max = 803142857,
+		.frequency_min_hz =  93 * MHz,
+		.frequency_max_hz = 803 * MHz + 142857,
 	},
 
 	.init = mxl301rf_init,
diff --git a/drivers/media/tuners/mxl5005s.c b/drivers/media/tuners/mxl5005s.c
index 355ef2959b7d..ec584316c812 100644
--- a/drivers/media/tuners/mxl5005s.c
+++ b/drivers/media/tuners/mxl5005s.c
@@ -4075,10 +4075,10 @@ static void mxl5005s_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops mxl5005s_tuner_ops = {
 	.info = {
-		.name           = "MaxLinear MXL5005S",
-		.frequency_min  =  48000000,
-		.frequency_max  = 860000000,
-		.frequency_step =     50000,
+		.name              = "MaxLinear MXL5005S",
+		.frequency_min_hz  =  48 * MHz,
+		.frequency_max_hz  = 860 * MHz,
+		.frequency_step_hz =  50 * kHz,
 	},
 
 	.release       = mxl5005s_release,
diff --git a/drivers/media/tuners/qm1d1b0004.c b/drivers/media/tuners/qm1d1b0004.c
index b4495cc1626b..008ad870c00f 100644
--- a/drivers/media/tuners/qm1d1b0004.c
+++ b/drivers/media/tuners/qm1d1b0004.c
@@ -186,8 +186,8 @@ static const struct dvb_tuner_ops qm1d1b0004_ops = {
 	.info = {
 		.name = "Sharp qm1d1b0004",
 
-		.frequency_min =  950000,
-		.frequency_max = 2150000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
 	},
 
 	.init = qm1d1b0004_init,
diff --git a/drivers/media/tuners/qm1d1c0042.c b/drivers/media/tuners/qm1d1c0042.c
index 642a065b9a07..83ca5dc047ea 100644
--- a/drivers/media/tuners/qm1d1c0042.c
+++ b/drivers/media/tuners/qm1d1c0042.c
@@ -388,8 +388,8 @@ static const struct dvb_tuner_ops qm1d1c0042_ops = {
 	.info = {
 		.name = "Sharp QM1D1C0042",
 
-		.frequency_min =  950000,
-		.frequency_max = 2150000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
 	},
 
 	.init = qm1d1c0042_init,
diff --git a/drivers/media/tuners/qt1010.c b/drivers/media/tuners/qt1010.c
index b92be882ab3c..4565c06b1617 100644
--- a/drivers/media/tuners/qt1010.c
+++ b/drivers/media/tuners/qt1010.c
@@ -394,10 +394,10 @@ static int qt1010_get_if_frequency(struct dvb_frontend *fe, u32 *frequency)
 
 static const struct dvb_tuner_ops qt1010_tuner_ops = {
 	.info = {
-		.name           = "Quantek QT1010",
-		.frequency_min  = QT1010_MIN_FREQ,
-		.frequency_max  = QT1010_MAX_FREQ,
-		.frequency_step = QT1010_STEP,
+		.name              = "Quantek QT1010",
+		.frequency_min_hz  = QT1010_MIN_FREQ,
+		.frequency_max_hz  = QT1010_MAX_FREQ,
+		.frequency_step_hz = QT1010_STEP,
 	},
 
 	.release       = qt1010_release,
diff --git a/drivers/media/tuners/qt1010_priv.h b/drivers/media/tuners/qt1010_priv.h
index 4cb78ecc8985..f25324c63067 100644
--- a/drivers/media/tuners/qt1010_priv.h
+++ b/drivers/media/tuners/qt1010_priv.h
@@ -71,12 +71,14 @@ reg def meaning
 2f  00  ? not used?
 */
 
-#define QT1010_STEP         125000 /*  125 kHz used by Windows drivers,
-				      hw could be more precise but we don't
-				      know how to use */
-#define QT1010_MIN_FREQ   48000000 /*   48 MHz */
-#define QT1010_MAX_FREQ  860000000 /*  860 MHz */
-#define QT1010_OFFSET   1246000000 /* 1246 MHz */
+#define QT1010_STEP         (125 * kHz) /*
+					 * used by Windows drivers,
+				         * hw could be more precise but we don't
+				         * know how to use
+					 */
+#define QT1010_MIN_FREQ   (48 * MHz)
+#define QT1010_MAX_FREQ  (860 * MHz)
+#define QT1010_OFFSET   (1246 * MHz)
 
 #define QT1010_WR 0
 #define QT1010_RD 1
diff --git a/drivers/media/tuners/r820t.c b/drivers/media/tuners/r820t.c
index 3e14b9e2e763..ba4be08a8551 100644
--- a/drivers/media/tuners/r820t.c
+++ b/drivers/media/tuners/r820t.c
@@ -2297,9 +2297,9 @@ static void r820t_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops r820t_tuner_ops = {
 	.info = {
-		.name           = "Rafael Micro R820T",
-		.frequency_min  =   42000000,
-		.frequency_max  = 1002000000,
+		.name             = "Rafael Micro R820T",
+		.frequency_min_hz =   42 * MHz,
+		.frequency_max_hz = 1002 * MHz,
 	},
 	.init = r820t_init,
 	.release = r820t_release,
diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c
index 9e34d31d724d..a08d8fe2bb1b 100644
--- a/drivers/media/tuners/si2157.c
+++ b/drivers/media/tuners/si2157.c
@@ -387,9 +387,9 @@ static int si2157_get_if_frequency(struct dvb_frontend *fe, u32 *frequency)
 
 static const struct dvb_tuner_ops si2157_ops = {
 	.info = {
-		.name           = "Silicon Labs Si2141/Si2146/2147/2148/2157/2158",
-		.frequency_min  = 42000000,
-		.frequency_max  = 870000000,
+		.name             = "Silicon Labs Si2141/Si2146/2147/2148/2157/2158",
+		.frequency_min_hz =  42 * MHz,
+		.frequency_max_hz = 870 * MHz,
 	},
 
 	.init = si2157_init,
diff --git a/drivers/media/tuners/tda18212.c b/drivers/media/tuners/tda18212.c
index 7b8068354fea..8326106ec2e3 100644
--- a/drivers/media/tuners/tda18212.c
+++ b/drivers/media/tuners/tda18212.c
@@ -175,11 +175,11 @@ static int tda18212_get_if_frequency(struct dvb_frontend *fe, u32 *frequency)
 
 static const struct dvb_tuner_ops tda18212_tuner_ops = {
 	.info = {
-		.name           = "NXP TDA18212",
+		.name              = "NXP TDA18212",
 
-		.frequency_min  =  48000000,
-		.frequency_max  = 864000000,
-		.frequency_step =      1000,
+		.frequency_min_hz  =  48 * MHz,
+		.frequency_max_hz  = 864 * MHz,
+		.frequency_step_hz =   1 * kHz,
 	},
 
 	.set_params    = tda18212_set_params,
diff --git a/drivers/media/tuners/tda18218.c b/drivers/media/tuners/tda18218.c
index c56fcf5d48e3..cbbd4d5e15da 100644
--- a/drivers/media/tuners/tda18218.c
+++ b/drivers/media/tuners/tda18218.c
@@ -269,11 +269,11 @@ static void tda18218_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops tda18218_tuner_ops = {
 	.info = {
-		.name           = "NXP TDA18218",
+		.name              = "NXP TDA18218",
 
-		.frequency_min  = 174000000,
-		.frequency_max  = 864000000,
-		.frequency_step =      1000,
+		.frequency_min_hz  = 174 * MHz,
+		.frequency_max_hz  = 864 * MHz,
+		.frequency_step_hz =   1 * kHz,
 	},
 
 	.release       = tda18218_release,
diff --git a/drivers/media/tuners/tda18250.c b/drivers/media/tuners/tda18250.c
index 20d12b063380..20d10ef45ab6 100644
--- a/drivers/media/tuners/tda18250.c
+++ b/drivers/media/tuners/tda18250.c
@@ -740,9 +740,9 @@ static int tda18250_sleep(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops tda18250_ops = {
 	.info = {
-		.name           = "NXP TDA18250",
-		.frequency_min  = 42000000,
-		.frequency_max  = 870000000,
+		.name              = "NXP TDA18250",
+		.frequency_min_hz  =  42 * MHz,
+		.frequency_max_hz  = 870 * MHz,
 	},
 
 	.init = tda18250_init,
diff --git a/drivers/media/tuners/tda18271-fe.c b/drivers/media/tuners/tda18271-fe.c
index 147155553648..4d69029229e4 100644
--- a/drivers/media/tuners/tda18271-fe.c
+++ b/drivers/media/tuners/tda18271-fe.c
@@ -1240,9 +1240,9 @@ static int tda18271_set_config(struct dvb_frontend *fe, void *priv_cfg)
 static const struct dvb_tuner_ops tda18271_tuner_ops = {
 	.info = {
 		.name = "NXP TDA18271HD",
-		.frequency_min  =  45000000,
-		.frequency_max  = 864000000,
-		.frequency_step =     62500
+		.frequency_min_hz  =  45 * MHz,
+		.frequency_max_hz  = 864 * MHz,
+		.frequency_step_hz = 62500
 	},
 	.init              = tda18271_init,
 	.sleep             = tda18271_sleep,
diff --git a/drivers/media/tuners/tda827x.c b/drivers/media/tuners/tda827x.c
index 8400808f8f7f..4391dabba510 100644
--- a/drivers/media/tuners/tda827x.c
+++ b/drivers/media/tuners/tda827x.c
@@ -816,9 +816,9 @@ static int tda827x_initial_sleep(struct dvb_frontend *fe)
 static const struct dvb_tuner_ops tda827xo_tuner_ops = {
 	.info = {
 		.name = "Philips TDA827X",
-		.frequency_min  =  55000000,
-		.frequency_max  = 860000000,
-		.frequency_step =    250000
+		.frequency_min_hz  =  55 * MHz,
+		.frequency_max_hz  = 860 * MHz,
+		.frequency_step_hz = 250 * kHz
 	},
 	.release = tda827x_release,
 	.init = tda827x_initial_init,
@@ -832,9 +832,9 @@ static const struct dvb_tuner_ops tda827xo_tuner_ops = {
 static const struct dvb_tuner_ops tda827xa_tuner_ops = {
 	.info = {
 		.name = "Philips TDA827XA",
-		.frequency_min  =  44000000,
-		.frequency_max  = 906000000,
-		.frequency_step =     62500
+		.frequency_min_hz  =  44 * MHz,
+		.frequency_max_hz  = 906 * MHz,
+		.frequency_step_hz = 62500
 	},
 	.release = tda827x_release,
 	.init = tda827x_init,
diff --git a/drivers/media/tuners/tua9001.c b/drivers/media/tuners/tua9001.c
index 9d70378fe2d3..5c89a130b47d 100644
--- a/drivers/media/tuners/tua9001.c
+++ b/drivers/media/tuners/tua9001.c
@@ -164,9 +164,9 @@ static int tua9001_get_if_frequency(struct dvb_frontend *fe, u32 *frequency)
 
 static const struct dvb_tuner_ops tua9001_tuner_ops = {
 	.info = {
-		.name           = "Infineon TUA9001",
-		.frequency_min  = 170000000,
-		.frequency_max  = 862000000,
+		.name             = "Infineon TUA9001",
+		.frequency_min_hz = 170 * MHz,
+		.frequency_max_hz = 862 * MHz,
 	},
 
 	.init = tua9001_init,
diff --git a/drivers/media/tuners/tuner-xc2028.c b/drivers/media/tuners/tuner-xc2028.c
index 84744e138982..222b93ef31c0 100644
--- a/drivers/media/tuners/tuner-xc2028.c
+++ b/drivers/media/tuners/tuner-xc2028.c
@@ -1440,9 +1440,9 @@ unlock:
 static const struct dvb_tuner_ops xc2028_dvb_tuner_ops = {
 	.info = {
 		 .name = "Xceive XC3028",
-		 .frequency_min = 42000000,
-		 .frequency_max = 864000000,
-		 .frequency_step = 50000,
+		 .frequency_min_hz  =  42 * MHz,
+		 .frequency_max_hz  = 864 * MHz,
+		 .frequency_step_hz =  50 * kHz,
 		 },
 
 	.set_config	   = xc2028_set_config,
diff --git a/drivers/media/tuners/xc4000.c b/drivers/media/tuners/xc4000.c
index f0fa8da08afa..76b3f37f24a8 100644
--- a/drivers/media/tuners/xc4000.c
+++ b/drivers/media/tuners/xc4000.c
@@ -398,8 +398,8 @@ static int xc_set_rf_frequency(struct xc4000_priv *priv, u32 freq_hz)
 
 	dprintk(1, "%s(%u)\n", __func__, freq_hz);
 
-	if ((freq_hz > xc4000_tuner_ops.info.frequency_max) ||
-	    (freq_hz < xc4000_tuner_ops.info.frequency_min))
+	if ((freq_hz > xc4000_tuner_ops.info.frequency_max_hz) ||
+	    (freq_hz < xc4000_tuner_ops.info.frequency_min_hz))
 		return -EINVAL;
 
 	freq_code = (u16)(freq_hz / 15625);
@@ -1635,10 +1635,10 @@ static void xc4000_release(struct dvb_frontend *fe)
 
 static const struct dvb_tuner_ops xc4000_tuner_ops = {
 	.info = {
-		.name           = "Xceive XC4000",
-		.frequency_min  =    1000000,
-		.frequency_max  = 1023000000,
-		.frequency_step =      50000,
+		.name              = "Xceive XC4000",
+		.frequency_min_hz  =    1 * MHz,
+		.frequency_max_hz  = 1023 * MHz,
+		.frequency_step_hz =   50 * kHz,
 	},
 
 	.release	   = xc4000_release,
diff --git a/drivers/media/tuners/xc5000.c b/drivers/media/tuners/xc5000.c
index f7a8d05d1758..f6b65278e502 100644
--- a/drivers/media/tuners/xc5000.c
+++ b/drivers/media/tuners/xc5000.c
@@ -460,8 +460,8 @@ static int xc_set_rf_frequency(struct xc5000_priv *priv, u32 freq_hz)
 
 	dprintk(1, "%s(%u)\n", __func__, freq_hz);
 
-	if ((freq_hz > xc5000_tuner_ops.info.frequency_max) ||
-		(freq_hz < xc5000_tuner_ops.info.frequency_min))
+	if ((freq_hz > xc5000_tuner_ops.info.frequency_max_hz) ||
+		(freq_hz < xc5000_tuner_ops.info.frequency_min_hz))
 		return -EINVAL;
 
 	freq_code = (u16)(freq_hz / 15625);
@@ -1350,10 +1350,10 @@ static int xc5000_set_config(struct dvb_frontend *fe, void *priv_cfg)
 
 static const struct dvb_tuner_ops xc5000_tuner_ops = {
 	.info = {
-		.name           = "Xceive XC5000",
-		.frequency_min  =    1000000,
-		.frequency_max  = 1023000000,
-		.frequency_step =      50000,
+		.name              = "Xceive XC5000",
+		.frequency_min_hz  =    1 * MHz,
+		.frequency_max_hz  = 1023 * MHz,
+		.frequency_step_hz =   50 * kHz,
 	},
 
 	.release	   = xc5000_release,
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c
index 240d736bf1bb..92b3b9221a21 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-tuner.c
@@ -465,9 +465,9 @@ static const struct dvb_tuner_ops mxl111sf_tuner_tuner_ops = {
 	.info = {
 		.name = "MaxLinear MxL111SF",
 #if 0
-		.frequency_min  = ,
-		.frequency_max  = ,
-		.frequency_step = ,
+		.frequency_min_hz  = ,
+		.frequency_max_hz  = ,
+		.frequency_step_hz = ,
 #endif
 	},
 #if 0
diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h
index 331c8269c00e..aebaec2dc725 100644
--- a/include/media/dvb_frontend.h
+++ b/include/media/dvb_frontend.h
@@ -52,6 +52,10 @@
  */
 #define MAX_DELSYS	8
 
+/* Helper definitions to be used at frontend drivers */
+#define kHz 1000UL
+#define MHz 1000000UL
+
 /**
  * struct dvb_frontend_tune_settings - parameters to adjust frontend tuning
  *
@@ -73,22 +77,19 @@ struct dvb_frontend;
  * struct dvb_tuner_info - Frontend name and min/max ranges/bandwidths
  *
  * @name:		name of the Frontend
- * @frequency_min:	minimal frequency supported
- * @frequency_max:	maximum frequency supported
- * @frequency_step:	frequency step
+ * @frequency_min_hz:	minimal frequency supported in Hz
+ * @frequency_max_hz:	maximum frequency supported in Hz
+ * @frequency_step_hz:	frequency step in Hz
  * @bandwidth_min:	minimal frontend bandwidth supported
  * @bandwidth_max:	maximum frontend bandwidth supported
  * @bandwidth_step:	frontend bandwidth step
- *
- * NOTE: frequency parameters are in Hz, for terrestrial/cable or kHz for
- * satellite.
  */
 struct dvb_tuner_info {
 	char name[128];
 
-	u32 frequency_min;
-	u32 frequency_max;
-	u32 frequency_step;
+	u32 frequency_min_hz;
+	u32 frequency_max_hz;
+	u32 frequency_step_hz;
 
 	u32 bandwidth_min;
 	u32 bandwidth_max;
-- 
cgit v1.2.3


From e7d0748dd71695b94f3a35c8bdc05226a7f3d919 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 2 Aug 2018 15:22:13 -0600
Subject: block: Switch struct packet_command to use struct scsi_sense_hdr

There is a lot of needless struct request_sense usage in the CDROM
code. These can all be struct scsi_sense_hdr instead, to avoid any
confusion over their respective structure sizes. This patch is a lot
of noise changing "sense" to "sshdr", but the final code is more
readable to distinguish between "sense" meaning "struct request_sense"
and "sshdr" meaning "struct scsi_sense_hdr".

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/pktcdvd.c    | 36 ++++++++++++++++++------------------
 drivers/cdrom/cdrom.c      | 22 +++++++++++-----------
 drivers/ide/ide-cd.c       | 11 ++++++-----
 drivers/ide/ide-cd.h       |  4 ++--
 drivers/ide/ide-cd_ioctl.c | 30 +++++++++++++++---------------
 drivers/scsi/sr_ioctl.c    | 22 +++++++++-------------
 include/linux/cdrom.h      |  3 ++-
 7 files changed, 63 insertions(+), 65 deletions(-)

(limited to 'include')

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 9bb7721c26fc..e285413d4a75 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -748,13 +748,13 @@ static const char *sense_key_string(__u8 index)
 static void pkt_dump_sense(struct pktcdvd_device *pd,
 			   struct packet_command *cgc)
 {
-	struct request_sense *sense = cgc->sense;
+	struct scsi_sense_hdr *sshdr = cgc->sshdr;
 
-	if (sense)
+	if (sshdr)
 		pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
 			CDROM_PACKET_SIZE, cgc->cmd,
-			sense->sense_key, sense->asc, sense->ascq,
-			sense_key_string(sense->sense_key));
+			sshdr->sense_key, sshdr->asc, sshdr->ascq,
+			sense_key_string(sshdr->sense_key));
 	else
 		pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
 }
@@ -787,11 +787,11 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
 				unsigned write_speed, unsigned read_speed)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	int ret;
 
 	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.cmd[0] = GPCMD_SET_SPEED;
 	cgc.cmd[2] = (read_speed >> 8) & 0xff;
 	cgc.cmd[3] = read_speed & 0xff;
@@ -1651,7 +1651,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
 static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	write_param_page *wp;
 	char buffer[128];
 	int ret, size;
@@ -1662,7 +1662,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
 
 	memset(buffer, 0, sizeof(buffer));
 	init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
 	if (ret) {
 		pkt_dump_sense(pd, &cgc);
@@ -1678,7 +1678,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
 	 * now get it all
 	 */
 	init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
 	if (ret) {
 		pkt_dump_sense(pd, &cgc);
@@ -1916,12 +1916,12 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
 						int set)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	unsigned char buf[64];
 	int ret;
 
 	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.buflen = pd->mode_offset + 12;
 
 	/*
@@ -1962,14 +1962,14 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
 						unsigned *write_speed)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	unsigned char buf[256+18];
 	unsigned char *cap_buf;
 	int ret, offset;
 
 	cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
 	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 
 	ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
 	if (ret) {
@@ -2023,13 +2023,13 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 						unsigned *speed)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	unsigned char buf[64];
 	unsigned int size, st, sp;
 	int ret;
 
 	init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	cgc.cmd[1] = 2;
 	cgc.cmd[2] = 4; /* READ ATIP */
@@ -2044,7 +2044,7 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 		size = sizeof(buf);
 
 	init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	cgc.cmd[1] = 2;
 	cgc.cmd[2] = 4;
@@ -2095,13 +2095,13 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
 {
 	struct packet_command cgc;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	int ret;
 
 	pkt_dbg(2, pd, "Performing OPC\n");
 
 	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
-	cgc.sense = &sense;
+	cgc.sshdr = &sshdr;
 	cgc.timeout = 60*HZ;
 	cgc.cmd[0] = GPCMD_SEND_OPC;
 	cgc.cmd[1] = 1;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index a78b8e7085e9..86619472d916 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -345,10 +345,10 @@ static LIST_HEAD(cdrom_list);
 int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
 			       struct packet_command *cgc)
 {
-	if (cgc->sense) {
-		cgc->sense->sense_key = 0x05;
-		cgc->sense->asc = 0x20;
-		cgc->sense->ascq = 0x00;
+	if (cgc->sshdr) {
+		cgc->sshdr->sense_key = 0x05;
+		cgc->sshdr->asc = 0x20;
+		cgc->sshdr->ascq = 0x00;
 	}
 
 	cgc->stat = -EIO;
@@ -2943,7 +2943,7 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
 					      struct packet_command *cgc,
 					      int cmd)
 {
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	struct cdrom_msf msf;
 	int blocksize = 0, format = 0, lba;
 	int ret;
@@ -2971,13 +2971,13 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
 	if (cgc->buffer == NULL)
 		return -ENOMEM;
 
-	memset(&sense, 0, sizeof(sense));
-	cgc->sense = &sense;
+	memset(&sshdr, 0, sizeof(sshdr));
+	cgc->sshdr = &sshdr;
 	cgc->data_direction = CGC_DATA_READ;
 	ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize);
-	if (ret && sense.sense_key == 0x05 &&
-	    sense.asc == 0x20 &&
-	    sense.ascq == 0x00) {
+	if (ret && sshdr.sense_key == 0x05 &&
+	    sshdr.asc == 0x20 &&
+	    sshdr.ascq == 0x00) {
 		/*
 		 * SCSI-II devices are not required to support
 		 * READ_CD, so let's try switching block size
@@ -2986,7 +2986,7 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
 		ret = cdrom_switch_blocksize(cdi, blocksize);
 		if (ret)
 			goto out;
-		cgc->sense = NULL;
+		cgc->sshdr = NULL;
 		ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1);
 		ret |= cdrom_switch_blocksize(cdi, blocksize);
 	}
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index a37dd381d307..a24cdff01865 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -419,7 +419,7 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
 
 int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 		    int write, void *buffer, unsigned *bufflen,
-		    struct request_sense *sense, int timeout,
+		    struct scsi_sense_hdr *sshdr, int timeout,
 		    req_flags_t rq_flags)
 {
 	struct cdrom_info *info = drive->driver_data;
@@ -456,8 +456,9 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
 
 		if (buffer)
 			*bufflen = scsi_req(rq)->resid_len;
-		if (sense)
-			memcpy(sense, scsi_req(rq)->sense, sizeof(*sense));
+		if (sshdr)
+			scsi_normalize_sense(scsi_req(rq)->sense,
+					     scsi_req(rq)->sense_len, sshdr);
 
 		/*
 		 * FIXME: we should probably abort/retry or something in case of
@@ -864,7 +865,7 @@ static void msf_from_bcd(struct atapi_msf *msf)
 	msf->frame  = bcd2bin(msf->frame);
 }
 
-int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
+int cdrom_check_status(ide_drive_t *drive, struct scsi_sense_hdr *sshdr)
 {
 	struct cdrom_info *info = drive->driver_data;
 	struct cdrom_device_info *cdi;
@@ -886,7 +887,7 @@ int cdrom_check_status(ide_drive_t *drive, struct request_sense *sense)
 	 */
 	cmd[7] = cdi->sanyo_slot % 3;
 
-	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sense, 0, RQF_QUIET);
+	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sshdr, 0, RQF_QUIET);
 }
 
 static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
index fc162fbb6629..a69dc7f61c4d 100644
--- a/drivers/ide/ide-cd.h
+++ b/drivers/ide/ide-cd.h
@@ -98,11 +98,11 @@ void ide_cd_log_error(const char *, struct request *, struct request_sense *);
 
 /* ide-cd.c functions used by ide-cd_ioctl.c */
 int ide_cd_queue_pc(ide_drive_t *, const unsigned char *, int, void *,
-		    unsigned *, struct request_sense *, int, req_flags_t);
+		    unsigned *, struct scsi_sense_hdr *, int, req_flags_t);
 int ide_cd_read_toc(ide_drive_t *);
 int ide_cdrom_get_capabilities(ide_drive_t *, u8 *);
 void ide_cdrom_update_speed(ide_drive_t *, u8 *);
-int cdrom_check_status(ide_drive_t *, struct request_sense *);
+int cdrom_check_status(ide_drive_t *, struct scsi_sense_hdr *);
 
 /* ide-cd_ioctl.c */
 int ide_cdrom_open_real(struct cdrom_device_info *, int);
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 14540544413c..4a6e1a413ead 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -43,14 +43,14 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
 {
 	ide_drive_t *drive = cdi->handle;
 	struct media_event_desc med;
-	struct request_sense sense;
+	struct scsi_sense_hdr sshdr;
 	int stat;
 
 	if (slot_nr != CDSL_CURRENT)
 		return -EINVAL;
 
-	stat = cdrom_check_status(drive, &sense);
-	if (!stat || sense.sense_key == UNIT_ATTENTION)
+	stat = cdrom_check_status(drive, &sshdr);
+	if (!stat || sshdr.sense_key == UNIT_ATTENTION)
 		return CDS_DISC_OK;
 
 	if (!cdrom_get_media_event(cdi, &med)) {
@@ -62,8 +62,8 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
 			return CDS_NO_DISC;
 	}
 
-	if (sense.sense_key == NOT_READY && sense.asc == 0x04
-			&& sense.ascq == 0x04)
+	if (sshdr.sense_key == NOT_READY && sshdr.asc == 0x04
+			&& sshdr.ascq == 0x04)
 		return CDS_DISC_OK;
 
 	/*
@@ -71,8 +71,8 @@ int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
 	 * just return TRAY_OPEN since ATAPI doesn't provide
 	 * any other way to detect this...
 	 */
-	if (sense.sense_key == NOT_READY) {
-		if (sense.asc == 0x3a && sense.ascq == 1)
+	if (sshdr.sense_key == NOT_READY) {
+		if (sshdr.asc == 0x3a && sshdr.ascq == 1)
 			return CDS_NO_DISC;
 		else
 			return CDS_TRAY_OPEN;
@@ -135,7 +135,7 @@ int cdrom_eject(ide_drive_t *drive, int ejectflag)
 static
 int ide_cd_lockdoor(ide_drive_t *drive, int lockflag)
 {
-	struct request_sense my_sense, *sense = &my_sense;
+	struct scsi_sense_hdr sshdr;
 	int stat;
 
 	/* If the drive cannot lock the door, just pretend. */
@@ -150,14 +150,14 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag)
 		cmd[4] = lockflag ? 1 : 0;
 
 		stat = ide_cd_queue_pc(drive, cmd, 0, NULL, NULL,
-				       sense, 0, 0);
+				       &sshdr, 0, 0);
 	}
 
 	/* If we got an illegal field error, the drive
 	   probably cannot lock the door. */
 	if (stat != 0 &&
-	    sense->sense_key == ILLEGAL_REQUEST &&
-	    (sense->asc == 0x24 || sense->asc == 0x20)) {
+	    sshdr.sense_key == ILLEGAL_REQUEST &&
+	    (sshdr.asc == 0x24 || sshdr.asc == 0x20)) {
 		printk(KERN_ERR "%s: door locking not supported\n",
 			drive->name);
 		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
@@ -165,7 +165,7 @@ int ide_cd_lockdoor(ide_drive_t *drive, int lockflag)
 	}
 
 	/* no medium, that's alright. */
-	if (stat != 0 && sense->sense_key == NOT_READY && sense->asc == 0x3a)
+	if (stat != 0 && sshdr.sense_key == NOT_READY && sshdr.asc == 0x3a)
 		stat = 0;
 
 	if (stat == 0) {
@@ -451,8 +451,8 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
 	   layer. the packet must be complete, as we do not
 	   touch it at all. */
 
-	if (cgc->sense)
-		memset(cgc->sense, 0, sizeof(struct request_sense));
+	if (cgc->sshdr)
+		memset(cgc->sshdr, 0, sizeof(*cgc->sshdr));
 
 	if (cgc->quiet)
 		flags |= RQF_QUIET;
@@ -460,7 +460,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
 	cgc->stat = ide_cd_queue_pc(drive, cgc->cmd,
 				    cgc->data_direction == CGC_DATA_WRITE,
 				    cgc->buffer, &len,
-				    cgc->sense, cgc->timeout, flags);
+				    cgc->sshdr, cgc->timeout, flags);
 	if (!cgc->stat)
 		cgc->buflen -= len;
 	return cgc->stat;
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 35fab1e18adc..ffcf902da390 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -186,14 +186,13 @@ static int sr_play_trkind(struct cdrom_device_info *cdi,
 int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 {
 	struct scsi_device *SDev;
-	struct scsi_sense_hdr sshdr;
+	struct scsi_sense_hdr local_sshdr, *sshdr = &local_sshdr;
 	int result, err = 0, retries = 0;
-	unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE], *senseptr = NULL;
 
 	SDev = cd->device;
 
-	if (cgc->sense)
-		senseptr = sense_buffer;
+	if (cgc->sshdr)
+		sshdr = cgc->sshdr;
 
       retry:
 	if (!scsi_block_when_processing_errors(SDev)) {
@@ -202,15 +201,12 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 	}
 
 	result = scsi_execute(SDev, cgc->cmd, cgc->data_direction,
-			      cgc->buffer, cgc->buflen, senseptr, &sshdr,
+			      cgc->buffer, cgc->buflen, NULL, sshdr,
 			      cgc->timeout, IOCTL_RETRIES, 0, 0, NULL);
 
-	if (cgc->sense)
-		memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense));
-
 	/* Minimal error checking.  Ignore cases we know about, and report the rest. */
 	if (driver_byte(result) != 0) {
-		switch (sshdr.sense_key) {
+		switch (sshdr->sense_key) {
 		case UNIT_ATTENTION:
 			SDev->changed = 1;
 			if (!cgc->quiet)
@@ -221,8 +217,8 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 			err = -ENOMEDIUM;
 			break;
 		case NOT_READY:	/* This happens if there is no disc in drive */
-			if (sshdr.asc == 0x04 &&
-			    sshdr.ascq == 0x01) {
+			if (sshdr->asc == 0x04 &&
+			    sshdr->ascq == 0x01) {
 				/* sense: Logical unit is in process of becoming ready */
 				if (!cgc->quiet)
 					sr_printk(KERN_INFO, cd,
@@ -245,8 +241,8 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
 			break;
 		case ILLEGAL_REQUEST:
 			err = -EIO;
-			if (sshdr.asc == 0x20 &&
-			    sshdr.ascq == 0x00)
+			if (sshdr->asc == 0x20 &&
+			    sshdr->ascq == 0x00)
 				/* sense: Invalid command operation code */
 				err = -EDRIVE_CANT_DO_THIS;
 			break;
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index e75dfd1f1dec..528271c60018 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -13,6 +13,7 @@
 
 #include <linux/fs.h>		/* not really needed, later.. */
 #include <linux/list.h>
+#include <scsi/scsi_common.h>
 #include <uapi/linux/cdrom.h>
 
 struct packet_command
@@ -21,7 +22,7 @@ struct packet_command
 	unsigned char 		*buffer;
 	unsigned int 		buflen;
 	int			stat;
-	struct request_sense	*sense;
+	struct scsi_sense_hdr	*sshdr;
 	unsigned char		data_direction;
 	int			quiet;
 	int			timeout;
-- 
cgit v1.2.3


From 704f83928c8e7da6e06144569efb15dec73278e8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 31 Jul 2018 12:51:54 -0700
Subject: scsi: Check sense buffer size at build time

To avoid introducing problems like those fixed in commit f7068114d45e
("sr: pass down correctly sized SCSI sense buffer"), this creates a macro
wrapper for scsi_execute() that verifies the size of the sense buffer
similar to what was done for command string sizes in commit 3756f6401c30
("exec: avoid gcc-8 warning for get_task_comm").

Another solution could be to add a length argument to scsi_execute(),
but this function already takes a lot of arguments and Jens was not fond
of that approach.

Additionally, this moves the SCSI_SENSE_BUFFERSIZE definition into
scsi_device.h, and removes a redundant include for scsi_device.h from
scsi_cmnd.h.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/scsi_lib.c    |  6 +++---
 include/scsi/scsi_cmnd.h   |  6 ++----
 include/scsi/scsi_device.h | 14 +++++++++++++-
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 41e9ac9fc138..9cb9a166fa0c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -238,7 +238,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 
 
 /**
- * scsi_execute - insert request and wait for the result
+ * __scsi_execute - insert request and wait for the result
  * @sdev:	scsi device
  * @cmd:	scsi command
  * @data_direction: data direction
@@ -255,7 +255,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
  * Returns the scsi_cmnd result field if a command was executed, or a negative
  * Linux error code if we didn't get that far.
  */
-int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		 int data_direction, void *buffer, unsigned bufflen,
 		 unsigned char *sense, struct scsi_sense_hdr *sshdr,
 		 int timeout, int retries, u64 flags, req_flags_t rq_flags,
@@ -309,7 +309,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 
 	return ret;
 }
-EXPORT_SYMBOL(scsi_execute);
+EXPORT_SYMBOL(__scsi_execute);
 
 /*
  * Function:    scsi_init_cmd_errh()
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index cae229b5395c..c891ada3c5c2 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -15,8 +15,6 @@
 struct Scsi_Host;
 struct scsi_driver;
 
-#include <scsi/scsi_device.h>
-
 /*
  * MAX_COMMAND_SIZE is:
  * The longest fixed-length SCSI CDB as per the SCSI standard.
@@ -121,11 +119,11 @@ struct scsi_cmnd {
 	struct request *request;	/* The command we are
 				   	   working on */
 
-#define SCSI_SENSE_BUFFERSIZE 	96
 	unsigned char *sense_buffer;
 				/* obtained by REQUEST SENSE when
 				 * CHECK CONDITION is received on original
-				 * command (auto-sense) */
+				 * command (auto-sense). Length must be
+				 * SCSI_SENSE_BUFFERSIZE bytes. */
 
 	/* Low-level done function - can be used by low-level driver to point
 	 *        to completion function.  Not used by mid/upper level code. */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 4c36af6edd79..202f4d6a4342 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -17,6 +17,8 @@ struct scsi_sense_hdr;
 
 typedef __u64 __bitwise blist_flags_t;
 
+#define SCSI_SENSE_BUFFERSIZE	96
+
 struct scsi_mode_data {
 	__u32	length;
 	__u16	block_descriptor_length;
@@ -426,11 +428,21 @@ extern const char *scsi_device_state_name(enum scsi_device_state);
 extern int scsi_is_sdev_device(const struct device *);
 extern int scsi_is_target_device(const struct device *);
 extern void scsi_sanitize_inquiry_string(unsigned char *s, int len);
-extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
+extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 			int data_direction, void *buffer, unsigned bufflen,
 			unsigned char *sense, struct scsi_sense_hdr *sshdr,
 			int timeout, int retries, u64 flags,
 			req_flags_t rq_flags, int *resid);
+/* Make sure any sense buffer is the correct size. */
+#define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense,	\
+		     sshdr, timeout, retries, flags, rq_flags, resid)	\
+({									\
+	BUILD_BUG_ON((sense) != NULL &&					\
+		     sizeof(sense) != SCSI_SENSE_BUFFERSIZE);		\
+	__scsi_execute(sdev, cmd, data_direction, buffer, bufflen,	\
+		       sense, sshdr, timeout, retries, flags, rq_flags,	\
+		       resid);						\
+})
 static inline int scsi_execute_req(struct scsi_device *sdev,
 	const unsigned char *cmd, int data_direction, void *buffer,
 	unsigned bufflen, struct scsi_sense_hdr *sshdr, int timeout,
-- 
cgit v1.2.3


From 9a47249d444d344051c7c0e909fad0e88515a5c2 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 31 Jul 2018 21:11:00 +0200
Subject: random: Make crng state queryable

It is very useful to be able to know whether or not get_random_bytes_wait
/ wait_for_random_bytes is going to block or not, or whether plain
get_random_bytes is going to return good randomness or bad randomness.

The particular use case is for mitigating certain attacks in WireGuard.
A handshake packet arrives and is queued up. Elsewhere a worker thread
takes items from the queue and processes them. In replying to these
items, it needs to use some random data, and it has to be good random
data. If we simply block until we can have good randomness, then it's
possible for an attacker to fill the queue up with packets waiting to be
processed. Upon realizing the queue is full, WireGuard will detect that
it's under a denial of service attack, and behave accordingly. A better
approach is just to drop incoming handshake packets if the crng is not
yet initialized.

This patch, therefore, makes that information directly accessible.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c  | 15 +++++++++++++++
 include/linux/random.h |  1 +
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 561082d46a82..bf5f99fc36f1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1663,6 +1663,21 @@ int wait_for_random_bytes(void)
 }
 EXPORT_SYMBOL(wait_for_random_bytes);
 
+/*
+ * Returns whether or not the urandom pool has been seeded and thus guaranteed
+ * to supply cryptographically secure random numbers. This applies to: the
+ * /dev/urandom device, the get_random_bytes function, and the get_random_{u32,
+ * ,u64,int,long} family of functions.
+ *
+ * Returns: true if the urandom pool has been seeded.
+ *          false if the urandom pool has not been seeded.
+ */
+bool rng_is_initialized(void)
+{
+	return crng_ready();
+}
+EXPORT_SYMBOL(rng_is_initialized);
+
 /*
  * Add a callback function that will be invoked when the nonblocking
  * pool is initialised.
diff --git a/include/linux/random.h b/include/linux/random.h
index f1c9bc5cd231..445a0ea4ff49 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -36,6 +36,7 @@ extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
 
 extern void get_random_bytes(void *buf, int nbytes);
 extern int wait_for_random_bytes(void);
+extern bool rng_is_initialized(void);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
 extern void del_random_ready_callback(struct random_ready_callback *rdy);
 extern int __must_check get_random_bytes_arch(void *buf, int nbytes);
-- 
cgit v1.2.3


From f1b1eabff0eb3fc46b06668de8174c0f23b271fd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Thu, 5 Jul 2018 18:59:36 -0400
Subject: media: dvb: represent min/max/step/tolerance freqs in Hz

Right now, satellite frontend drivers specify frequencies in kHz,
while terrestrial/cable ones specify in Hz. That's confusing
for developers.

However, the main problem is that universal frontends capable
of handling both satellite and non-satelite delivery systems
are appearing. We end by needing to hack the drivers in
order to support such hybrid frontends.

So, convert everything to specify frontend frequencies in Hz.

Tested-by: Katsuhiro Suzuki <suzuki.katsuhiro@socionext.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/siano/smsdvb-main.c          |  6 +-
 drivers/media/dvb-core/dvb_frontend.c             | 84 ++++++++++++++++-------
 drivers/media/dvb-frontends/af9013.c              |  7 +-
 drivers/media/dvb-frontends/af9033.c              |  7 +-
 drivers/media/dvb-frontends/as102_fe.c            |  6 +-
 drivers/media/dvb-frontends/atbm8830.c            |  6 +-
 drivers/media/dvb-frontends/au8522_dig.c          |  6 +-
 drivers/media/dvb-frontends/bcm3510.c             |  6 +-
 drivers/media/dvb-frontends/cx22700.c             |  6 +-
 drivers/media/dvb-frontends/cx22702.c             |  6 +-
 drivers/media/dvb-frontends/cx24110.c             |  8 +--
 drivers/media/dvb-frontends/cx24116.c             |  8 +--
 drivers/media/dvb-frontends/cx24117.c             |  8 +--
 drivers/media/dvb-frontends/cx24120.c             |  8 +--
 drivers/media/dvb-frontends/cx24123.c             |  8 +--
 drivers/media/dvb-frontends/cxd2820r_t.c          |  4 +-
 drivers/media/dvb-frontends/cxd2820r_t2.c         |  4 +-
 drivers/media/dvb-frontends/cxd2841er.c           |  9 ++-
 drivers/media/dvb-frontends/cxd2880/cxd2880_top.c |  6 +-
 drivers/media/dvb-frontends/dib3000mb.c           |  6 +-
 drivers/media/dvb-frontends/dib3000mc.c           |  6 +-
 drivers/media/dvb-frontends/dib7000m.c            |  6 +-
 drivers/media/dvb-frontends/dib7000p.c            |  6 +-
 drivers/media/dvb-frontends/dib8000.c             |  6 +-
 drivers/media/dvb-frontends/dib9000.c             |  6 +-
 drivers/media/dvb-frontends/drx39xyj/drxj.c       |  6 +-
 drivers/media/dvb-frontends/drxd_hard.c           |  7 +-
 drivers/media/dvb-frontends/drxk_hard.c           |  8 +--
 drivers/media/dvb-frontends/ds3000.c              |  8 +--
 drivers/media/dvb-frontends/dvb_dummy_fe.c        | 24 +++----
 drivers/media/dvb-frontends/gp8psk-fe.c           |  6 +-
 drivers/media/dvb-frontends/ix2505v.c             |  4 +-
 drivers/media/dvb-frontends/l64781.c              |  7 +-
 drivers/media/dvb-frontends/lg2160.c              | 12 ++--
 drivers/media/dvb-frontends/lgdt3305.c            | 12 ++--
 drivers/media/dvb-frontends/lgdt3306a.c           |  6 +-
 drivers/media/dvb-frontends/lgdt330x.c            | 12 ++--
 drivers/media/dvb-frontends/lgs8gl5.c             |  7 +-
 drivers/media/dvb-frontends/lgs8gxx.c             |  6 +-
 drivers/media/dvb-frontends/m88ds3103.c           |  6 +-
 drivers/media/dvb-frontends/m88rs2000.c           |  8 +--
 drivers/media/dvb-frontends/mb86a16.c             |  7 +-
 drivers/media/dvb-frontends/mb86a20s.c            |  6 +-
 drivers/media/dvb-frontends/mt312.c               | 10 +--
 drivers/media/dvb-frontends/mt352.c               |  7 +-
 drivers/media/dvb-frontends/mxl5xx.c              |  6 +-
 drivers/media/dvb-frontends/nxt200x.c             |  6 +-
 drivers/media/dvb-frontends/nxt6000.c             |  6 +-
 drivers/media/dvb-frontends/or51132.c             |  6 +-
 drivers/media/dvb-frontends/or51211.c             |  8 +--
 drivers/media/dvb-frontends/rtl2830.c             |  4 +-
 drivers/media/dvb-frontends/rtl2832.c             | 10 +--
 drivers/media/dvb-frontends/s5h1409.c             |  6 +-
 drivers/media/dvb-frontends/s5h1411.c             |  6 +-
 drivers/media/dvb-frontends/s5h1420.c             |  8 +--
 drivers/media/dvb-frontends/s5h1432.c             |  6 +-
 drivers/media/dvb-frontends/s921.c                |  7 +-
 drivers/media/dvb-frontends/si2165.c              |  2 +-
 drivers/media/dvb-frontends/si21xx.c              |  7 +-
 drivers/media/dvb-frontends/sp8870.c              |  6 +-
 drivers/media/dvb-frontends/sp887x.c              |  6 +-
 drivers/media/dvb-frontends/stb0899_drv.c         |  6 +-
 drivers/media/dvb-frontends/stv0288.c             |  7 +-
 drivers/media/dvb-frontends/stv0297.c             |  6 +-
 drivers/media/dvb-frontends/stv0299.c             |  7 +-
 drivers/media/dvb-frontends/stv0367.c             | 20 +++---
 drivers/media/dvb-frontends/stv0900_core.c        |  7 +-
 drivers/media/dvb-frontends/stv090x.c             |  6 +-
 drivers/media/dvb-frontends/stv0910.c             |  6 +-
 drivers/media/dvb-frontends/tc90522.c             | 10 +--
 drivers/media/dvb-frontends/tda10021.c            | 10 +--
 drivers/media/dvb-frontends/tda10023.c            |  6 +-
 drivers/media/dvb-frontends/tda10048.c            |  6 +-
 drivers/media/dvb-frontends/tda1004x.c            | 12 ++--
 drivers/media/dvb-frontends/tda10071.c            | 10 +--
 drivers/media/dvb-frontends/tda10086.c            |  6 +-
 drivers/media/dvb-frontends/tda8083.c             |  7 +-
 drivers/media/dvb-frontends/ves1820.c             |  6 +-
 drivers/media/dvb-frontends/ves1x93.c             |  8 +--
 drivers/media/dvb-frontends/zl10036.c             |  4 +-
 drivers/media/dvb-frontends/zl10353.c             |  7 +-
 drivers/media/firewire/firedtv-fe.c               | 26 +++----
 drivers/media/pci/bt8xx/dst.c                     | 26 +++----
 drivers/media/pci/bt8xx/dvb-bt8xx.c               |  8 +--
 drivers/media/pci/ddbridge/ddbridge-sx8.c         |  6 +-
 drivers/media/pci/mantis/mantis_vp3030.c          |  4 +-
 drivers/media/tuners/mxl5007t.c                   |  2 -
 drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c     |  6 +-
 drivers/media/usb/dvb-usb/af9005-fe.c             |  6 +-
 drivers/media/usb/dvb-usb/cinergyT2-fe.c          |  6 +-
 drivers/media/usb/dvb-usb/dtt200u-fe.c            |  6 +-
 drivers/media/usb/dvb-usb/friio-fe.c              | 11 ++-
 drivers/media/usb/dvb-usb/vp702x-fe.c             |  7 +-
 drivers/media/usb/dvb-usb/vp7045-fe.c             |  6 +-
 drivers/media/usb/ttusb-dec/ttusbdecfe.c          | 12 ++--
 include/media/dvb_frontend.h                      | 30 +++++++-
 96 files changed, 428 insertions(+), 399 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/siano/smsdvb-main.c b/drivers/media/common/siano/smsdvb-main.c
index c0faad1ba428..43cfd1dbda01 100644
--- a/drivers/media/common/siano/smsdvb-main.c
+++ b/drivers/media/common/siano/smsdvb-main.c
@@ -1047,9 +1047,9 @@ static void smsdvb_release(struct dvb_frontend *fe)
 static const struct dvb_frontend_ops smsdvb_fe_ops = {
 	.info = {
 		.name			= "Siano Mobile Digital MDTV Receiver",
-		.frequency_min		= 44250000,
-		.frequency_max		= 867250000,
-		.frequency_stepsize	= 250000,
+		.frequency_min_hz	=  44250 * kHz,
+		.frequency_max_hz	= 867250 * kHz,
+		.frequency_stepsize_hz	=    250 * kHz,
 		.caps = FE_CAN_INVERSION_AUTO |
 			FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 75e95b56f8b3..fe0fae482ba4 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -894,11 +894,28 @@ static int dvb_frontend_start(struct dvb_frontend *fe)
 }
 
 static void dvb_frontend_get_frequency_limits(struct dvb_frontend *fe,
-					      u32 *freq_min, u32 *freq_max)
+					      u32 *freq_min, u32 *freq_max,
+					      u32 *tolerance)
 {
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	__u32 tuner_min = fe->ops.tuner_ops.info.frequency_min_hz;
-	__u32 tuner_max = fe->ops.tuner_ops.info.frequency_max_hz;
+	u32 tuner_min = fe->ops.tuner_ops.info.frequency_min_hz;
+	u32 tuner_max = fe->ops.tuner_ops.info.frequency_max_hz;
+	u32 frontend_min = fe->ops.info.frequency_min_hz;
+	u32 frontend_max = fe->ops.info.frequency_max_hz;
+
+	*freq_min = max(frontend_min, tuner_min);
+
+	if (frontend_max == 0)
+		*freq_max = tuner_max;
+	else if (tuner_max == 0)
+		*freq_max = frontend_max;
+	else
+		*freq_max = min(frontend_max, tuner_max);
+
+	if (*freq_min == 0 || *freq_max == 0)
+		dev_warn(fe->dvb->device,
+			 "DVB: adapter %i frontend %u frequency limits undefined - fix the driver\n",
+			 fe->dvb->num, fe->id);
 
 	/* If the standard is for satellite, convert frequencies to kHz */
 	switch (c->delivery_system) {
@@ -906,26 +923,35 @@ static void dvb_frontend_get_frequency_limits(struct dvb_frontend *fe,
 	case SYS_DVBS2:
 	case SYS_TURBO:
 	case SYS_ISDBS:
-		tuner_max /= kHz;
-		tuner_min /= kHz;
+		*freq_min /= kHz;
+		*freq_max /= kHz;
+		if (tolerance)
+			*tolerance = fe->ops.info.frequency_tolerance_hz / kHz;
+
 		break;
 	default:
+		if (tolerance)
+			*tolerance = fe->ops.info.frequency_tolerance_hz;
 		break;
 	}
+}
 
-	*freq_min = max(fe->ops.info.frequency_min, tuner_min);
-
-	if (fe->ops.info.frequency_max == 0)
-		*freq_max = tuner_max;
-	else if (tuner_max == 0)
-		*freq_max = fe->ops.info.frequency_max;
-	else
-		*freq_max = min(fe->ops.info.frequency_max, tuner_max);
+static u32 dvb_frontend_get_stepsize(struct dvb_frontend *fe)
+{
+	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
+	u32 step = fe->ops.info.frequency_stepsize_hz;
+	switch (c->delivery_system) {
+	case SYS_DVBS:
+	case SYS_DVBS2:
+	case SYS_TURBO:
+	case SYS_ISDBS:
+		step /= kHz;
+		break;
+	default:
+		break;
+	}
 
-	if (*freq_min == 0 || *freq_max == 0)
-		dev_warn(fe->dvb->device,
-			 "DVB: adapter %i frontend %u frequency limits undefined - fix the driver\n",
-			 fe->dvb->num, fe->id);
+	return step;
 }
 
 static int dvb_frontend_check_parameters(struct dvb_frontend *fe)
@@ -935,7 +961,7 @@ static int dvb_frontend_check_parameters(struct dvb_frontend *fe)
 	u32 freq_max;
 
 	/* range check: frequency */
-	dvb_frontend_get_frequency_limits(fe, &freq_min, &freq_max);
+	dvb_frontend_get_frequency_limits(fe, &freq_min, &freq_max, NULL);
 	if ((freq_min && c->frequency < freq_min) ||
 	    (freq_max && c->frequency > freq_max)) {
 		dev_warn(fe->dvb->device, "DVB: adapter %i frontend %i frequency %u out of range (%u..%u)\n",
@@ -2261,8 +2287,8 @@ static int dtv_set_frontend(struct dvb_frontend *fe)
 		case SYS_ISDBT:
 		case SYS_DTMB:
 			fepriv->min_delay = HZ / 20;
-			fepriv->step_size = fe->ops.info.frequency_stepsize * 2;
-			fepriv->max_drift = (fe->ops.info.frequency_stepsize * 2) + 1;
+			fepriv->step_size = dvb_frontend_get_stepsize(fe) * 2;
+			fepriv->max_drift = (dvb_frontend_get_stepsize(fe) * 2) + 1;
 			break;
 		default:
 			/*
@@ -2391,9 +2417,17 @@ static int dvb_frontend_handle_ioctl(struct file *file,
 
 	case FE_GET_INFO: {
 		struct dvb_frontend_info *info = parg;
-
-		memcpy(info, &fe->ops.info, sizeof(struct dvb_frontend_info));
-		dvb_frontend_get_frequency_limits(fe, &info->frequency_min, &info->frequency_max);
+		memset(info, 0, sizeof(*info));
+
+		strcpy(info->name, fe->ops.info.name);
+		info->symbol_rate_min = fe->ops.info.symbol_rate_min;
+		info->symbol_rate_max = fe->ops.info.symbol_rate_max;
+		info->symbol_rate_tolerance = fe->ops.info.symbol_rate_tolerance;
+		info->caps = fe->ops.info.caps;
+		info->frequency_stepsize = dvb_frontend_get_stepsize(fe);
+		dvb_frontend_get_frequency_limits(fe, &info->frequency_min,
+						  &info->frequency_max,
+						  &info->frequency_tolerance);
 
 		/*
 		 * Associate the 4 delivery systems supported by DVBv3
@@ -2423,10 +2457,10 @@ static int dvb_frontend_handle_ioctl(struct file *file,
 			dev_err(fe->dvb->device,
 				"%s: doesn't know how to handle a DVBv3 call to delivery system %i\n",
 				__func__, c->delivery_system);
-			fe->ops.info.type = FE_OFDM;
+			info->type = FE_OFDM;
 		}
 		dev_dbg(fe->dvb->device, "%s: current delivery system on cache: %d, V3 type: %d\n",
-			__func__, c->delivery_system, fe->ops.info.type);
+			__func__, c->delivery_system, info->type);
 
 		/* Set CAN_INVERSION_AUTO bit on in other than oneshot mode */
 		if (!(fepriv->tune_mode_flags & FE_TUNE_MODE_ONESHOT))
diff --git a/drivers/media/dvb-frontends/af9013.c b/drivers/media/dvb-frontends/af9013.c
index 482bce49819a..f3acbb57d48c 100644
--- a/drivers/media/dvb-frontends/af9013.c
+++ b/drivers/media/dvb-frontends/af9013.c
@@ -1136,10 +1136,9 @@ static const struct dvb_frontend_ops af9013_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "Afatech AF9013",
-		.frequency_min = 174000000,
-		.frequency_max = 862000000,
-		.frequency_stepsize = 250000,
-		.frequency_tolerance = 0,
+		.frequency_min_hz = 174 * MHz,
+		.frequency_max_hz = 862 * MHz,
+		.frequency_stepsize_hz = 250 * kHz,
 		.caps =	FE_CAN_FEC_1_2 |
 			FE_CAN_FEC_2_3 |
 			FE_CAN_FEC_3_4 |
diff --git a/drivers/media/dvb-frontends/af9033.c b/drivers/media/dvb-frontends/af9033.c
index aaed7cfe5f66..0cd57013ea25 100644
--- a/drivers/media/dvb-frontends/af9033.c
+++ b/drivers/media/dvb-frontends/af9033.c
@@ -1020,10 +1020,9 @@ static const struct dvb_frontend_ops af9033_ops = {
 	.delsys = {SYS_DVBT},
 	.info = {
 		.name = "Afatech AF9033 (DVB-T)",
-		.frequency_min = 174000000,
-		.frequency_max = 862000000,
-		.frequency_stepsize = 250000,
-		.frequency_tolerance = 0,
+		.frequency_min_hz = 174 * MHz,
+		.frequency_max_hz = 862 * MHz,
+		.frequency_stepsize_hz = 250 * kHz,
 		.caps =	FE_CAN_FEC_1_2 |
 			FE_CAN_FEC_2_3 |
 			FE_CAN_FEC_3_4 |
diff --git a/drivers/media/dvb-frontends/as102_fe.c b/drivers/media/dvb-frontends/as102_fe.c
index 9b2f2da1d989..f59a102b0a64 100644
--- a/drivers/media/dvb-frontends/as102_fe.c
+++ b/drivers/media/dvb-frontends/as102_fe.c
@@ -419,9 +419,9 @@ static const struct dvb_frontend_ops as102_fe_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "Abilis AS102 DVB-T",
-		.frequency_min		= 174000000,
-		.frequency_max		= 862000000,
-		.frequency_stepsize	= 166667,
+		.frequency_min_hz	= 174 * MHz,
+		.frequency_max_hz	= 862 * MHz,
+		.frequency_stepsize_hz	= 166667,
 		.caps = FE_CAN_INVERSION_AUTO
 			| FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4
 			| FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO
diff --git a/drivers/media/dvb-frontends/atbm8830.c b/drivers/media/dvb-frontends/atbm8830.c
index 7b0f3239dbba..cbcc65dc9d54 100644
--- a/drivers/media/dvb-frontends/atbm8830.c
+++ b/drivers/media/dvb-frontends/atbm8830.c
@@ -428,9 +428,9 @@ static const struct dvb_frontend_ops atbm8830_ops = {
 	.delsys = { SYS_DTMB },
 	.info = {
 		.name = "AltoBeam ATBM8830/8831 DMB-TH",
-		.frequency_min = 474000000,
-		.frequency_max = 858000000,
-		.frequency_stepsize = 10000,
+		.frequency_min_hz = 474 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 10 * kHz,
 		.caps =
 			FE_CAN_FEC_AUTO |
 			FE_CAN_QAM_AUTO |
diff --git a/drivers/media/dvb-frontends/au8522_dig.c b/drivers/media/dvb-frontends/au8522_dig.c
index 8f659bd1c79e..076f737aa8c0 100644
--- a/drivers/media/dvb-frontends/au8522_dig.c
+++ b/drivers/media/dvb-frontends/au8522_dig.c
@@ -897,9 +897,9 @@ static const struct dvb_frontend_ops au8522_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name			= "Auvitek AU8522 QAM/8VSB Frontend",
-		.frequency_min		= 54000000,
-		.frequency_max		= 858000000,
-		.frequency_stepsize	= 62500,
+		.frequency_min_hz	=  54 * MHz,
+		.frequency_max_hz	= 858 * MHz,
+		.frequency_stepsize_hz	= 62500,
 		.caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
 	},
 
diff --git a/drivers/media/dvb-frontends/bcm3510.c b/drivers/media/dvb-frontends/bcm3510.c
index 05df133dc5be..e92542b92d34 100644
--- a/drivers/media/dvb-frontends/bcm3510.c
+++ b/drivers/media/dvb-frontends/bcm3510.c
@@ -840,10 +840,8 @@ static const struct dvb_frontend_ops bcm3510_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name = "Broadcom BCM3510 VSB/QAM frontend",
-		.frequency_min =  54000000,
-		.frequency_max = 803000000,
-		/* stepsize is just a guess */
-		.frequency_stepsize = 0,
+		.frequency_min_hz =  54 * MHz,
+		.frequency_max_hz = 803 * MHz,
 		.caps =
 			FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/cx22700.c b/drivers/media/dvb-frontends/cx22700.c
index 9ffd2c7ac74a..961380162cdd 100644
--- a/drivers/media/dvb-frontends/cx22700.c
+++ b/drivers/media/dvb-frontends/cx22700.c
@@ -412,9 +412,9 @@ static const struct dvb_frontend_ops cx22700_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "Conexant CX22700 DVB-T",
-		.frequency_min		= 470000000,
-		.frequency_max		= 860000000,
-		.frequency_stepsize	= 166667,
+		.frequency_min_hz	= 470 * MHz,
+		.frequency_max_hz	= 860 * MHz,
+		.frequency_stepsize_hz	= 166667,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		      FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 		      FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 |
diff --git a/drivers/media/dvb-frontends/cx22702.c b/drivers/media/dvb-frontends/cx22702.c
index e8b1e6b7e7e5..ab9b2924bcca 100644
--- a/drivers/media/dvb-frontends/cx22702.c
+++ b/drivers/media/dvb-frontends/cx22702.c
@@ -622,9 +622,9 @@ static const struct dvb_frontend_ops cx22702_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "Conexant CX22702 DVB-T",
-		.frequency_min		= 177000000,
-		.frequency_max		= 858000000,
-		.frequency_stepsize	= 166666,
+		.frequency_min_hz	= 177 * MHz,
+		.frequency_max_hz	= 858 * MHz,
+		.frequency_stepsize_hz	= 166666,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 		FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_AUTO |
diff --git a/drivers/media/dvb-frontends/cx24110.c b/drivers/media/dvb-frontends/cx24110.c
index 2f3a1c237489..9441bdc73097 100644
--- a/drivers/media/dvb-frontends/cx24110.c
+++ b/drivers/media/dvb-frontends/cx24110.c
@@ -629,10 +629,10 @@ static const struct dvb_frontend_ops cx24110_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name = "Conexant CX24110 DVB-S",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_stepsize = 1011,  /* kHz for QPSK frontends */
-		.frequency_tolerance = 29500,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
+		.frequency_stepsize_hz = 1011 * kHz,
+		.frequency_tolerance_hz = 29500 * kHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c
index 2dbc7349d870..220f26663647 100644
--- a/drivers/media/dvb-frontends/cx24116.c
+++ b/drivers/media/dvb-frontends/cx24116.c
@@ -1465,10 +1465,10 @@ static const struct dvb_frontend_ops cx24116_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2 },
 	.info = {
 		.name = "Conexant CX24116/CX24118",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_stepsize = 1011, /* kHz for QPSK frontends */
-		.frequency_tolerance = 5000,
+		.frequency_min_hz = 950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
+		.frequency_stepsize_hz = 1011 * kHz,
+		.frequency_tolerance_hz = 5 * MHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c
index ba55d75d916c..667bc8be848d 100644
--- a/drivers/media/dvb-frontends/cx24117.c
+++ b/drivers/media/dvb-frontends/cx24117.c
@@ -1622,10 +1622,10 @@ static const struct dvb_frontend_ops cx24117_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2 },
 	.info = {
 		.name = "Conexant CX24117/CX24132",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_stepsize = 1011, /* kHz for QPSK frontends */
-		.frequency_tolerance = 5000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
+		.frequency_stepsize_hz = 1011 * kHz,
+		.frequency_tolerance_hz = 5 * MHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/cx24120.c b/drivers/media/dvb-frontends/cx24120.c
index ccbabdae6a69..dd3ec316e7c2 100644
--- a/drivers/media/dvb-frontends/cx24120.c
+++ b/drivers/media/dvb-frontends/cx24120.c
@@ -1555,10 +1555,10 @@ static const struct dvb_frontend_ops cx24120_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2 },
 	.info = {
 		.name = "Conexant CX24120/CX24118",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_stepsize = 1011, /* kHz for QPSK frontends */
-		.frequency_tolerance = 5000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
+		.frequency_stepsize_hz = 1011 * kHz,
+		.frequency_tolerance_hz = 5 * MHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps =	FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/cx24123.c b/drivers/media/dvb-frontends/cx24123.c
index bf33e7390aaf..e49215020a93 100644
--- a/drivers/media/dvb-frontends/cx24123.c
+++ b/drivers/media/dvb-frontends/cx24123.c
@@ -1111,10 +1111,10 @@ static const struct dvb_frontend_ops cx24123_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name = "Conexant CX24123/CX24109",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_stepsize = 1011, /* kHz for QPSK frontends */
-		.frequency_tolerance = 5000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
+		.frequency_stepsize_hz = 1011 * kHz,
+		.frequency_tolerance_hz = 5 * MHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/cxd2820r_t.c b/drivers/media/dvb-frontends/cxd2820r_t.c
index c2e7caf9b010..eb1d7478fa8d 100644
--- a/drivers/media/dvb-frontends/cxd2820r_t.c
+++ b/drivers/media/dvb-frontends/cxd2820r_t.c
@@ -431,8 +431,8 @@ int cxd2820r_get_tune_settings_t(struct dvb_frontend *fe,
 	struct dvb_frontend_tune_settings *s)
 {
 	s->min_delay_ms = 500;
-	s->step_size = fe->ops.info.frequency_stepsize * 2;
-	s->max_drift = (fe->ops.info.frequency_stepsize * 2) + 1;
+	s->step_size = fe->ops.info.frequency_stepsize_hz * 2;
+	s->max_drift = (fe->ops.info.frequency_stepsize_hz * 2) + 1;
 
 	return 0;
 }
diff --git a/drivers/media/dvb-frontends/cxd2820r_t2.c b/drivers/media/dvb-frontends/cxd2820r_t2.c
index e641fde75379..f330ec1710b4 100644
--- a/drivers/media/dvb-frontends/cxd2820r_t2.c
+++ b/drivers/media/dvb-frontends/cxd2820r_t2.c
@@ -426,8 +426,8 @@ int cxd2820r_get_tune_settings_t2(struct dvb_frontend *fe,
 	struct dvb_frontend_tune_settings *s)
 {
 	s->min_delay_ms = 1500;
-	s->step_size = fe->ops.info.frequency_stepsize * 2;
-	s->max_drift = (fe->ops.info.frequency_stepsize * 2) + 1;
+	s->step_size = fe->ops.info.frequency_stepsize_hz * 2;
+	s->max_drift = (fe->ops.info.frequency_stepsize_hz * 2) + 1;
 
 	return 0;
 }
diff --git a/drivers/media/dvb-frontends/cxd2841er.c b/drivers/media/dvb-frontends/cxd2841er.c
index 85905d3503ff..c98093ed3dd7 100644
--- a/drivers/media/dvb-frontends/cxd2841er.c
+++ b/drivers/media/dvb-frontends/cxd2841er.c
@@ -3942,9 +3942,8 @@ static const struct dvb_frontend_ops cxd2841er_dvbs_s2_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2 },
 	.info = {
 		.name		= "Sony CXD2841ER DVB-S/S2 demodulator",
-		.frequency_min	= 500000,
-		.frequency_max	= 2500000,
-		.frequency_stepsize	= 0,
+		.frequency_min_hz	=  500 * MHz,
+		.frequency_max_hz	= 2500 * MHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.symbol_rate_tolerance = 500,
@@ -3988,8 +3987,8 @@ static struct dvb_frontend_ops cxd2841er_t_c_ops = {
 			FE_CAN_HIERARCHY_AUTO |
 			FE_CAN_MUTE_TS |
 			FE_CAN_2G_MODULATION,
-		.frequency_min = 42000000,
-		.frequency_max = 1002000000,
+		.frequency_min_hz =   42 * MHz,
+		.frequency_max_hz = 1002 * MHz,
 		.symbol_rate_min = 870000,
 		.symbol_rate_max = 11700000
 	},
diff --git a/drivers/media/dvb-frontends/cxd2880/cxd2880_top.c b/drivers/media/dvb-frontends/cxd2880/cxd2880_top.c
index bd9101e246d5..f87e27481ea7 100644
--- a/drivers/media/dvb-frontends/cxd2880/cxd2880_top.c
+++ b/drivers/media/dvb-frontends/cxd2880/cxd2880_top.c
@@ -1833,9 +1833,9 @@ static enum dvbfe_algo cxd2880_get_frontend_algo(struct dvb_frontend *fe)
 static struct dvb_frontend_ops cxd2880_dvbt_t2_ops = {
 	.info = {
 		.name = "Sony CXD2880",
-		.frequency_min =  174000000,
-		.frequency_max = 862000000,
-		.frequency_stepsize = 1000,
+		.frequency_min_hz = 174 * MHz,
+		.frequency_max_hz = 862 * MHz,
+		.frequency_stepsize_hz = 1 * kHz,
 		.caps = FE_CAN_INVERSION_AUTO |
 				FE_CAN_FEC_1_2 |
 				FE_CAN_FEC_2_3 |
diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c
index 5861f346db49..bbbd53280477 100644
--- a/drivers/media/dvb-frontends/dib3000mb.c
+++ b/drivers/media/dvb-frontends/dib3000mb.c
@@ -786,9 +786,9 @@ static const struct dvb_frontend_ops dib3000mb_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "DiBcom 3000M-B DVB-T",
-		.frequency_min		= 44250000,
-		.frequency_max		= 867250000,
-		.frequency_stepsize	= 62500,
+		.frequency_min_hz	=  44250 * kHz,
+		.frequency_max_hz	= 867250 * kHz,
+		.frequency_stepsize_hz	= 62500,
 		.caps = FE_CAN_INVERSION_AUTO |
 				FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 				FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/dib3000mc.c b/drivers/media/dvb-frontends/dib3000mc.c
index 7e5d474806a5..c9e1db251723 100644
--- a/drivers/media/dvb-frontends/dib3000mc.c
+++ b/drivers/media/dvb-frontends/dib3000mc.c
@@ -944,9 +944,9 @@ static const struct dvb_frontend_ops dib3000mc_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "DiBcom 3000MC/P",
-		.frequency_min      = 44250000,
-		.frequency_max      = 867250000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz      =  44250 * kHz,
+		.frequency_max_hz      = 867250 * kHz,
+		.frequency_stepsize_hz = 62500,
 		.caps = FE_CAN_INVERSION_AUTO |
 			FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/dib7000m.c b/drivers/media/dvb-frontends/dib7000m.c
index 6a1d357d0c7c..b79358d09de6 100644
--- a/drivers/media/dvb-frontends/dib7000m.c
+++ b/drivers/media/dvb-frontends/dib7000m.c
@@ -1443,9 +1443,9 @@ static const struct dvb_frontend_ops dib7000m_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "DiBcom 7000MA/MB/PA/PB/MC",
-		.frequency_min      = 44250000,
-		.frequency_max      = 867250000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz      =  44250 * kHz,
+		.frequency_max_hz      = 867250 * kHz,
+		.frequency_stepsize_hz = 62500,
 		.caps = FE_CAN_INVERSION_AUTO |
 			FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/dib7000p.c b/drivers/media/dvb-frontends/dib7000p.c
index 5a8dbc0b25fb..58387860b62d 100644
--- a/drivers/media/dvb-frontends/dib7000p.c
+++ b/drivers/media/dvb-frontends/dib7000p.c
@@ -2824,9 +2824,9 @@ static const struct dvb_frontend_ops dib7000p_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		 .name = "DiBcom 7000PC",
-		 .frequency_min = 44250000,
-		 .frequency_max = 867250000,
-		 .frequency_stepsize = 62500,
+		 .frequency_min_hz =  44250 * kHz,
+		 .frequency_max_hz = 867250 * kHz,
+		 .frequency_stepsize_hz = 62500,
 		 .caps = FE_CAN_INVERSION_AUTO |
 		 FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		 FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/dib8000.c b/drivers/media/dvb-frontends/dib8000.c
index 22eec8f65485..3c3f8cb14845 100644
--- a/drivers/media/dvb-frontends/dib8000.c
+++ b/drivers/media/dvb-frontends/dib8000.c
@@ -4390,9 +4390,9 @@ static const struct dvb_frontend_ops dib8000_ops = {
 	.delsys = { SYS_ISDBT },
 	.info = {
 		 .name = "DiBcom 8000 ISDB-T",
-		 .frequency_min = 44250000,
-		 .frequency_max = 867250000,
-		 .frequency_stepsize = 62500,
+		 .frequency_min_hz =  44250 * kHz,
+		 .frequency_max_hz = 867250 * kHz,
+		 .frequency_stepsize_hz = 62500,
 		 .caps = FE_CAN_INVERSION_AUTO |
 		 FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		 FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/dib9000.c b/drivers/media/dvb-frontends/dib9000.c
index b8edb55696bb..0183fb1346ef 100644
--- a/drivers/media/dvb-frontends/dib9000.c
+++ b/drivers/media/dvb-frontends/dib9000.c
@@ -2553,9 +2553,9 @@ static const struct dvb_frontend_ops dib9000_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		 .name = "DiBcom 9000",
-		 .frequency_min = 44250000,
-		 .frequency_max = 867250000,
-		 .frequency_stepsize = 62500,
+		 .frequency_min_hz =  44250 * kHz,
+		 .frequency_max_hz = 867250 * kHz,
+		 .frequency_stepsize_hz = 62500,
 		 .caps = FE_CAN_INVERSION_AUTO |
 		 FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		 FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/drx39xyj/drxj.c b/drivers/media/dvb-frontends/drx39xyj/drxj.c
index 5706898e84cc..2ddb7d218ace 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drxj.c
+++ b/drivers/media/dvb-frontends/drx39xyj/drxj.c
@@ -12374,9 +12374,9 @@ static const struct dvb_frontend_ops drx39xxj_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		 .name = "Micronas DRX39xxj family Frontend",
-		 .frequency_stepsize = 62500,
-		 .frequency_min = 51000000,
-		 .frequency_max = 858000000,
+		 .frequency_min_hz =  51 * MHz,
+		 .frequency_max_hz = 858 * MHz,
+		 .frequency_stepsize_hz = 62500,
 		 .caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
 	},
 
diff --git a/drivers/media/dvb-frontends/drxd_hard.c b/drivers/media/dvb-frontends/drxd_hard.c
index 3b7d31a22d82..11fc259e4383 100644
--- a/drivers/media/dvb-frontends/drxd_hard.c
+++ b/drivers/media/dvb-frontends/drxd_hard.c
@@ -2912,10 +2912,9 @@ static const struct dvb_frontend_ops drxd_ops = {
 	.delsys = { SYS_DVBT},
 	.info = {
 		 .name = "Micronas DRXD DVB-T",
-		 .frequency_min = 47125000,
-		 .frequency_max = 855250000,
-		 .frequency_stepsize = 166667,
-		 .frequency_tolerance = 0,
+		 .frequency_min_hz =  47125 * kHz,
+		 .frequency_max_hz = 855250 * kHz,
+		 .frequency_stepsize_hz = 166667,
 		 .caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 |
 		 FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 |
 		 FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/drxk_hard.c b/drivers/media/dvb-frontends/drxk_hard.c
index 5a26ad93be10..ac10781d3550 100644
--- a/drivers/media/dvb-frontends/drxk_hard.c
+++ b/drivers/media/dvb-frontends/drxk_hard.c
@@ -6744,13 +6744,13 @@ static const struct dvb_frontend_ops drxk_ops = {
 	/* .delsys will be filled dynamically */
 	.info = {
 		.name = "DRXK",
-		.frequency_min = 47000000,
-		.frequency_max = 865000000,
+		.frequency_min_hz =  47 * MHz,
+		.frequency_max_hz = 865 * MHz,
 		 /* For DVB-C */
-		.symbol_rate_min = 870000,
+		.symbol_rate_min =   870000,
 		.symbol_rate_max = 11700000,
 		/* For DVB-T */
-		.frequency_stepsize = 166667,
+		.frequency_stepsize_hz = 166667,
 
 		.caps = FE_CAN_QAM_16 | FE_CAN_QAM_32 | FE_CAN_QAM_64 |
 			FE_CAN_QAM_128 | FE_CAN_QAM_256 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/ds3000.c b/drivers/media/dvb-frontends/ds3000.c
index 2ff90e5eabce..46a55146cb07 100644
--- a/drivers/media/dvb-frontends/ds3000.c
+++ b/drivers/media/dvb-frontends/ds3000.c
@@ -1100,10 +1100,10 @@ static const struct dvb_frontend_ops ds3000_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2 },
 	.info = {
 		.name = "Montage Technology DS3000",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_stepsize = 1011, /* kHz for QPSK frontends */
-		.frequency_tolerance = 5000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
+		.frequency_stepsize_hz = 1011 * kHz,
+		.frequency_tolerance_hz = 5 * MHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/dvb_dummy_fe.c b/drivers/media/dvb-frontends/dvb_dummy_fe.c
index 6650d4f61ef2..a4cbcae7967d 100644
--- a/drivers/media/dvb-frontends/dvb_dummy_fe.c
+++ b/drivers/media/dvb-frontends/dvb_dummy_fe.c
@@ -170,9 +170,9 @@ static const struct dvb_frontend_ops dvb_dummy_fe_ofdm_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "Dummy DVB-T",
-		.frequency_min		= 0,
-		.frequency_max		= 863250000,
-		.frequency_stepsize	= 62500,
+		.frequency_min_hz	= 0,
+		.frequency_max_hz	= 863250 * kHz,
+		.frequency_stepsize_hz	= 62500,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 				FE_CAN_FEC_4_5 | FE_CAN_FEC_5_6 | FE_CAN_FEC_6_7 |
 				FE_CAN_FEC_7_8 | FE_CAN_FEC_8_9 | FE_CAN_FEC_AUTO |
@@ -201,11 +201,11 @@ static const struct dvb_frontend_ops dvb_dummy_fe_qam_ops = {
 	.delsys = { SYS_DVBC_ANNEX_A },
 	.info = {
 		.name			= "Dummy DVB-C",
-		.frequency_stepsize	= 62500,
-		.frequency_min		= 51000000,
-		.frequency_max		= 858000000,
-		.symbol_rate_min	= (57840000/2)/64,     /* SACLK/64 == (XIN/2)/64 */
-		.symbol_rate_max	= (57840000/2)/4,      /* SACLK/4 */
+		.frequency_min_hz	=  51 * MHz,
+		.frequency_max_hz	= 858 * MHz,
+		.frequency_stepsize_hz	= 62500,
+		.symbol_rate_min	= (57840000 / 2) / 64,  /* SACLK/64 == (XIN/2)/64 */
+		.symbol_rate_max	= (57840000 / 2) / 4,   /* SACLK/4 */
 		.caps = FE_CAN_QAM_16 | FE_CAN_QAM_32 | FE_CAN_QAM_64 |
 			FE_CAN_QAM_128 | FE_CAN_QAM_256 |
 			FE_CAN_FEC_AUTO | FE_CAN_INVERSION_AUTO
@@ -230,10 +230,10 @@ static const struct dvb_frontend_ops dvb_dummy_fe_qpsk_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "Dummy DVB-S",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 250,           /* kHz for QPSK frontends */
-		.frequency_tolerance	= 29500,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
+		.frequency_stepsize_hz	= 250 * kHz,
+		.frequency_tolerance_hz	= 29500 * kHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/gp8psk-fe.c b/drivers/media/dvb-frontends/gp8psk-fe.c
index a772ef8bfe1c..238f09aa72f2 100644
--- a/drivers/media/dvb-frontends/gp8psk-fe.c
+++ b/drivers/media/dvb-frontends/gp8psk-fe.c
@@ -355,9 +355,9 @@ static const struct dvb_frontend_ops gp8psk_fe_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "Genpix DVB-S",
-		.frequency_min		= 800000,
-		.frequency_max		= 2250000,
-		.frequency_stepsize	= 100,
+		.frequency_min_hz	=  800 * MHz,
+		.frequency_max_hz	= 2250 * MHz,
+		.frequency_stepsize_hz	=  100 * kHz,
 		.symbol_rate_min        = 1000000,
 		.symbol_rate_max        = 45000000,
 		.symbol_rate_tolerance  = 500,  /* ppm */
diff --git a/drivers/media/dvb-frontends/ix2505v.c b/drivers/media/dvb-frontends/ix2505v.c
index 9c055f72c416..a30707b61b1f 100644
--- a/drivers/media/dvb-frontends/ix2505v.c
+++ b/drivers/media/dvb-frontends/ix2505v.c
@@ -135,8 +135,8 @@ static int ix2505v_set_params(struct dvb_frontend *fe)
 	u8 gain, cc, ref, psc, local_osc, lpf;
 	u8 data[4] = {0};
 
-	if ((frequency < fe->ops.info.frequency_min)
-	||  (frequency > fe->ops.info.frequency_max))
+	if ((frequency < fe->ops.info.frequency_min_hz / kHz)
+	||  (frequency > fe->ops.info.frequency_max_hz / kHz))
 		return -EINVAL;
 
 	if (state->config->tuner_gain)
diff --git a/drivers/media/dvb-frontends/l64781.c b/drivers/media/dvb-frontends/l64781.c
index 249c18761e6e..9afb5bf6424b 100644
--- a/drivers/media/dvb-frontends/l64781.c
+++ b/drivers/media/dvb-frontends/l64781.c
@@ -575,10 +575,9 @@ static const struct dvb_frontend_ops l64781_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "LSI L64781 DVB-T",
-	/*	.frequency_min = ???,*/
-	/*	.frequency_max = ???,*/
-		.frequency_stepsize = 166666,
-	/*      .frequency_tolerance = ???,*/
+	/*	.frequency_min_hz = ???,*/
+	/*	.frequency_max_hz = ???,*/
+		.frequency_stepsize_hz = 166666,
 	/*      .symbol_rate_tolerance = ???,*/
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		      FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 |
diff --git a/drivers/media/dvb-frontends/lg2160.c b/drivers/media/dvb-frontends/lg2160.c
index 9854096839ae..408151e33fa7 100644
--- a/drivers/media/dvb-frontends/lg2160.c
+++ b/drivers/media/dvb-frontends/lg2160.c
@@ -1349,9 +1349,9 @@ static const struct dvb_frontend_ops lg2160_ops = {
 	.delsys = { SYS_ATSCMH },
 	.info = {
 		.name = "LG Electronics LG2160 ATSC/MH Frontend",
-		.frequency_min      = 54000000,
-		.frequency_max      = 858000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz      =  54 * MHz,
+		.frequency_max_hz      = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 	},
 	.i2c_gate_ctrl        = lg216x_i2c_gate_ctrl,
 #if 0
@@ -1375,9 +1375,9 @@ static const struct dvb_frontend_ops lg2161_ops = {
 	.delsys = { SYS_ATSCMH },
 	.info = {
 		.name = "LG Electronics LG2161 ATSC/MH Frontend",
-		.frequency_min      = 54000000,
-		.frequency_max      = 858000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz      =  54 * MHz,
+		.frequency_max_hz      = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 	},
 	.i2c_gate_ctrl        = lg216x_i2c_gate_ctrl,
 #if 0
diff --git a/drivers/media/dvb-frontends/lgdt3305.c b/drivers/media/dvb-frontends/lgdt3305.c
index 735d73060265..857e9b4d69b4 100644
--- a/drivers/media/dvb-frontends/lgdt3305.c
+++ b/drivers/media/dvb-frontends/lgdt3305.c
@@ -1164,9 +1164,9 @@ static const struct dvb_frontend_ops lgdt3304_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name = "LG Electronics LGDT3304 VSB/QAM Frontend",
-		.frequency_min      = 54000000,
-		.frequency_max      = 858000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz      =  54 * MHz,
+		.frequency_max_hz      = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
 	},
 	.i2c_gate_ctrl        = lgdt3305_i2c_gate_ctrl,
@@ -1187,9 +1187,9 @@ static const struct dvb_frontend_ops lgdt3305_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name = "LG Electronics LGDT3305 VSB/QAM Frontend",
-		.frequency_min      = 54000000,
-		.frequency_max      = 858000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz      =  54 * MHz,
+		.frequency_max_hz      = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
 	},
 	.i2c_gate_ctrl        = lgdt3305_i2c_gate_ctrl,
diff --git a/drivers/media/dvb-frontends/lgdt3306a.c b/drivers/media/dvb-frontends/lgdt3306a.c
index 32de824476db..0e1f5daaf20c 100644
--- a/drivers/media/dvb-frontends/lgdt3306a.c
+++ b/drivers/media/dvb-frontends/lgdt3306a.c
@@ -2157,9 +2157,9 @@ static const struct dvb_frontend_ops lgdt3306a_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name = "LG Electronics LGDT3306A VSB/QAM Frontend",
-		.frequency_min      = 54000000,
-		.frequency_max      = 858000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz      =  54 * MHz,
+		.frequency_max_hz      = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.caps = FE_CAN_QAM_AUTO | FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
 	},
 	.i2c_gate_ctrl        = lgdt3306a_i2c_gate_ctrl,
diff --git a/drivers/media/dvb-frontends/lgdt330x.c b/drivers/media/dvb-frontends/lgdt330x.c
index f6731738b073..10d584ce538d 100644
--- a/drivers/media/dvb-frontends/lgdt330x.c
+++ b/drivers/media/dvb-frontends/lgdt330x.c
@@ -944,9 +944,9 @@ static const struct dvb_frontend_ops lgdt3302_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name = "LG Electronics LGDT3302 VSB/QAM Frontend",
-		.frequency_min = 54000000,
-		.frequency_max = 858000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz =  54 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.symbol_rate_min    = 5056941,	/* QAM 64 */
 		.symbol_rate_max    = 10762000,	/* VSB 8  */
 		.caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
@@ -966,9 +966,9 @@ static const struct dvb_frontend_ops lgdt3303_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name = "LG Electronics LGDT3303 VSB/QAM Frontend",
-		.frequency_min = 54000000,
-		.frequency_max = 858000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz =  54 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.symbol_rate_min    = 5056941,	/* QAM 64 */
 		.symbol_rate_max    = 10762000,	/* VSB 8  */
 		.caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
diff --git a/drivers/media/dvb-frontends/lgs8gl5.c b/drivers/media/dvb-frontends/lgs8gl5.c
index f47e5a1af16d..07e5bcee9c1e 100644
--- a/drivers/media/dvb-frontends/lgs8gl5.c
+++ b/drivers/media/dvb-frontends/lgs8gl5.c
@@ -416,10 +416,9 @@ static const struct dvb_frontend_ops lgs8gl5_ops = {
 	.delsys = { SYS_DTMB },
 	.info = {
 		.name			= "Legend Silicon LGS-8GL5 DMB-TH",
-		.frequency_min		= 474000000,
-		.frequency_max		= 858000000,
-		.frequency_stepsize	= 10000,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	= 474 * MHz,
+		.frequency_max_hz	= 858 * MHz,
+		.frequency_stepsize_hz	=  10 * kHz,
 		.caps = FE_CAN_FEC_AUTO |
 			FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_32 |
 			FE_CAN_QAM_64 | FE_CAN_QAM_AUTO |
diff --git a/drivers/media/dvb-frontends/lgs8gxx.c b/drivers/media/dvb-frontends/lgs8gxx.c
index 84af8a12f26a..a6bcf1571d10 100644
--- a/drivers/media/dvb-frontends/lgs8gxx.c
+++ b/drivers/media/dvb-frontends/lgs8gxx.c
@@ -985,9 +985,9 @@ static const struct dvb_frontend_ops lgs8gxx_ops = {
 	.delsys = { SYS_DTMB },
 	.info = {
 		.name = "Legend Silicon LGS8913/LGS8GXX DMB-TH",
-		.frequency_min = 474000000,
-		.frequency_max = 858000000,
-		.frequency_stepsize = 10000,
+		.frequency_min_hz = 474 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 10 * kHz,
 		.caps =
 			FE_CAN_FEC_AUTO |
 			FE_CAN_QAM_AUTO |
diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c
index 65d157fe76d1..dffd2d4bf1c8 100644
--- a/drivers/media/dvb-frontends/m88ds3103.c
+++ b/drivers/media/dvb-frontends/m88ds3103.c
@@ -1300,9 +1300,9 @@ static const struct dvb_frontend_ops m88ds3103_ops = {
 	.delsys = {SYS_DVBS, SYS_DVBS2},
 	.info = {
 		.name = "Montage Technology M88DS3103",
-		.frequency_min =  950000,
-		.frequency_max = 2150000,
-		.frequency_tolerance = 5000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
+		.frequency_tolerance_hz = 5 * MHz,
 		.symbol_rate_min =  1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/m88rs2000.c b/drivers/media/dvb-frontends/m88rs2000.c
index 496ce27fa63a..d5bc85501f9e 100644
--- a/drivers/media/dvb-frontends/m88rs2000.c
+++ b/drivers/media/dvb-frontends/m88rs2000.c
@@ -759,10 +759,10 @@ static const struct dvb_frontend_ops m88rs2000_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "M88RS2000 DVB-S",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 1000,	 /* kHz for QPSK frontends */
-		.frequency_tolerance	= 5000,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
+		.frequency_stepsize_hz	= 1 * MHz,
+		.frequency_tolerance_hz	= 5 * MHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.symbol_rate_tolerance	= 500,	/* ppm */
diff --git a/drivers/media/dvb-frontends/mb86a16.c b/drivers/media/dvb-frontends/mb86a16.c
index 377cd984b069..da505a5d035f 100644
--- a/drivers/media/dvb-frontends/mb86a16.c
+++ b/drivers/media/dvb-frontends/mb86a16.c
@@ -1808,10 +1808,9 @@ static const struct dvb_frontend_ops mb86a16_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "Fujitsu MB86A16 DVB-S",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 3000,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
+		.frequency_stepsize_hz	=    3 * MHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.symbol_rate_tolerance	= 500,
diff --git a/drivers/media/dvb-frontends/mb86a20s.c b/drivers/media/dvb-frontends/mb86a20s.c
index c3b1b88e2e7a..66fc77db0e75 100644
--- a/drivers/media/dvb-frontends/mb86a20s.c
+++ b/drivers/media/dvb-frontends/mb86a20s.c
@@ -2111,9 +2111,9 @@ static const struct dvb_frontend_ops mb86a20s_ops = {
 			FE_CAN_TRANSMISSION_MODE_AUTO | FE_CAN_QAM_AUTO |
 			FE_CAN_GUARD_INTERVAL_AUTO    | FE_CAN_HIERARCHY_AUTO,
 		/* Actually, those values depend on the used tuner */
-		.frequency_min = 45000000,
-		.frequency_max = 864000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz =  45 * MHz,
+		.frequency_max_hz = 864 * MHz,
+		.frequency_stepsize_hz = 62500,
 	},
 
 	.release = mb86a20s_release,
diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c
index e2a3fc521620..aad07adda37d 100644
--- a/drivers/media/dvb-frontends/mt312.c
+++ b/drivers/media/dvb-frontends/mt312.c
@@ -559,8 +559,8 @@ static int mt312_set_frontend(struct dvb_frontend *fe)
 
 	dprintk("%s: Freq %d\n", __func__, p->frequency);
 
-	if ((p->frequency < fe->ops.info.frequency_min)
-	    || (p->frequency > fe->ops.info.frequency_max))
+	if ((p->frequency < fe->ops.info.frequency_min_hz / kHz)
+	    || (p->frequency > fe->ops.info.frequency_max_hz / kHz))
 		return -EINVAL;
 
 	if (((int)p->inversion < INVERSION_OFF)
@@ -755,10 +755,10 @@ static const struct dvb_frontend_ops mt312_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name = "Zarlink ???? DVB-S",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
 		/* FIXME: adjust freq to real used xtal */
-		.frequency_stepsize = (MT312_PLL_CLK / 1000) / 128,
+		.frequency_stepsize_hz = MT312_PLL_CLK / 128,
 		.symbol_rate_min = MT312_SYS_CLK / 128, /* FIXME as above */
 		.symbol_rate_max = MT312_SYS_CLK / 2,
 		.caps =
diff --git a/drivers/media/dvb-frontends/mt352.c b/drivers/media/dvb-frontends/mt352.c
index a440b76444d3..da3e466d50e2 100644
--- a/drivers/media/dvb-frontends/mt352.c
+++ b/drivers/media/dvb-frontends/mt352.c
@@ -567,10 +567,9 @@ static const struct dvb_frontend_ops mt352_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "Zarlink MT352 DVB-T",
-		.frequency_min		= 174000000,
-		.frequency_max		= 862000000,
-		.frequency_stepsize	= 166667,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	= 174 * MHz,
+		.frequency_max_hz	= 862 * MHz,
+		.frequency_stepsize_hz	= 166667,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 |
 			FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 |
 			FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/mxl5xx.c b/drivers/media/dvb-frontends/mxl5xx.c
index 274d8fca0763..295f37d5f10e 100644
--- a/drivers/media/dvb-frontends/mxl5xx.c
+++ b/drivers/media/dvb-frontends/mxl5xx.c
@@ -784,10 +784,8 @@ static struct dvb_frontend_ops mxl_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2, SYS_DSS },
 	.info = {
 		.name			= "MaxLinear MxL5xx DVB-S/S2 tuner-demodulator",
-		.frequency_min		= 300000,
-		.frequency_max		= 2350000,
-		.frequency_stepsize	= 0,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  300 * MHz,
+		.frequency_max_hz	= 2350 * MHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.caps			= FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/nxt200x.c b/drivers/media/dvb-frontends/nxt200x.c
index a6cc4952eb74..0961e686ff68 100644
--- a/drivers/media/dvb-frontends/nxt200x.c
+++ b/drivers/media/dvb-frontends/nxt200x.c
@@ -1212,9 +1212,9 @@ static const struct dvb_frontend_ops nxt200x_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name = "Nextwave NXT200X VSB/QAM frontend",
-		.frequency_min =  54000000,
-		.frequency_max = 860000000,
-		.frequency_stepsize = 166666,	/* stepsize is just a guess */
+		.frequency_min_hz =  54 * MHz,
+		.frequency_max_hz = 860 * MHz,
+		.frequency_stepsize_hz = 166666,	/* stepsize is just a guess */
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			FE_CAN_8VSB | FE_CAN_QAM_64 | FE_CAN_QAM_256
diff --git a/drivers/media/dvb-frontends/nxt6000.c b/drivers/media/dvb-frontends/nxt6000.c
index 109a635d166a..72e447e8ba64 100644
--- a/drivers/media/dvb-frontends/nxt6000.c
+++ b/drivers/media/dvb-frontends/nxt6000.c
@@ -596,9 +596,9 @@ static const struct dvb_frontend_ops nxt6000_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "NxtWave NXT6000 DVB-T",
-		.frequency_min = 0,
-		.frequency_max = 863250000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz = 0,
+		.frequency_max_hz = 863250 * kHz,
+		.frequency_stepsize_hz = 62500,
 		/*.frequency_tolerance = *//* FIXME: 12% of SR */
 		.symbol_rate_min = 0,	/* FIXME */
 		.symbol_rate_max = 9360000,	/* FIXME */
diff --git a/drivers/media/dvb-frontends/or51132.c b/drivers/media/dvb-frontends/or51132.c
index b4c9aadcb552..fc35f37eb3c0 100644
--- a/drivers/media/dvb-frontends/or51132.c
+++ b/drivers/media/dvb-frontends/or51132.c
@@ -583,9 +583,9 @@ static const struct dvb_frontend_ops or51132_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name			= "Oren OR51132 VSB/QAM Frontend",
-		.frequency_min		= 44000000,
-		.frequency_max		= 958000000,
-		.frequency_stepsize	= 166666,
+		.frequency_min_hz	=  44 * MHz,
+		.frequency_max_hz	= 958 * MHz,
+		.frequency_stepsize_hz	= 166666,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_QAM_AUTO |
diff --git a/drivers/media/dvb-frontends/or51211.c b/drivers/media/dvb-frontends/or51211.c
index b65ba34fd00a..a39bbd8ff1f0 100644
--- a/drivers/media/dvb-frontends/or51211.c
+++ b/drivers/media/dvb-frontends/or51211.c
@@ -530,10 +530,10 @@ struct dvb_frontend* or51211_attach(const struct or51211_config* config,
 static const struct dvb_frontend_ops or51211_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
-		.name               = "Oren OR51211 VSB Frontend",
-		.frequency_min      = 44000000,
-		.frequency_max      = 958000000,
-		.frequency_stepsize = 166666,
+		.name                  = "Oren OR51211 VSB Frontend",
+		.frequency_min_hz      =  44 * MHz,
+		.frequency_max_hz      = 958 * MHz,
+		.frequency_stepsize_hz = 166666,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			FE_CAN_8VSB
diff --git a/drivers/media/dvb-frontends/rtl2830.c b/drivers/media/dvb-frontends/rtl2830.c
index 7bbfe11d11ed..adc9046d5a90 100644
--- a/drivers/media/dvb-frontends/rtl2830.c
+++ b/drivers/media/dvb-frontends/rtl2830.c
@@ -159,8 +159,8 @@ static int rtl2830_get_tune_settings(struct dvb_frontend *fe,
 				     struct dvb_frontend_tune_settings *s)
 {
 	s->min_delay_ms = 500;
-	s->step_size = fe->ops.info.frequency_stepsize * 2;
-	s->max_drift = (fe->ops.info.frequency_stepsize * 2) + 1;
+	s->step_size = fe->ops.info.frequency_stepsize_hz * 2;
+	s->max_drift = (fe->ops.info.frequency_stepsize_hz * 2) + 1;
 
 	return 0;
 }
diff --git a/drivers/media/dvb-frontends/rtl2832.c b/drivers/media/dvb-frontends/rtl2832.c
index fa3b8169c1a5..2f1f5cbaf03c 100644
--- a/drivers/media/dvb-frontends/rtl2832.c
+++ b/drivers/media/dvb-frontends/rtl2832.c
@@ -408,8 +408,8 @@ static int rtl2832_get_tune_settings(struct dvb_frontend *fe,
 
 	dev_dbg(&client->dev, "\n");
 	s->min_delay_ms = 1000;
-	s->step_size = fe->ops.info.frequency_stepsize * 2;
-	s->max_drift = (fe->ops.info.frequency_stepsize * 2) + 1;
+	s->step_size = fe->ops.info.frequency_stepsize_hz * 2;
+	s->max_drift = (fe->ops.info.frequency_stepsize_hz * 2) + 1;
 	return 0;
 }
 
@@ -841,9 +841,9 @@ static const struct dvb_frontend_ops rtl2832_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "Realtek RTL2832 (DVB-T)",
-		.frequency_min	  = 174000000,
-		.frequency_max	  = 862000000,
-		.frequency_stepsize = 166667,
+		.frequency_min_hz	= 174 * MHz,
+		.frequency_max_hz	= 862 * MHz,
+		.frequency_stepsize_hz	= 166667,
 		.caps = FE_CAN_FEC_1_2 |
 			FE_CAN_FEC_2_3 |
 			FE_CAN_FEC_3_4 |
diff --git a/drivers/media/dvb-frontends/s5h1409.c b/drivers/media/dvb-frontends/s5h1409.c
index a23ba8727218..ceeb0c3551ce 100644
--- a/drivers/media/dvb-frontends/s5h1409.c
+++ b/drivers/media/dvb-frontends/s5h1409.c
@@ -999,9 +999,9 @@ static const struct dvb_frontend_ops s5h1409_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name			= "Samsung S5H1409 QAM/8VSB Frontend",
-		.frequency_min		= 54000000,
-		.frequency_max		= 858000000,
-		.frequency_stepsize	= 62500,
+		.frequency_min_hz	=  54 * MHz,
+		.frequency_max_hz	= 858 * MHz,
+		.frequency_stepsize_hz	= 62500,
 		.caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
 	},
 
diff --git a/drivers/media/dvb-frontends/s5h1411.c b/drivers/media/dvb-frontends/s5h1411.c
index af5962807f2c..98aeed1d2284 100644
--- a/drivers/media/dvb-frontends/s5h1411.c
+++ b/drivers/media/dvb-frontends/s5h1411.c
@@ -918,9 +918,9 @@ static const struct dvb_frontend_ops s5h1411_ops = {
 	.delsys = { SYS_ATSC, SYS_DVBC_ANNEX_B },
 	.info = {
 		.name			= "Samsung S5H1411 QAM/8VSB Frontend",
-		.frequency_min		= 54000000,
-		.frequency_max		= 858000000,
-		.frequency_stepsize	= 62500,
+		.frequency_min_hz	=  54 * MHz,
+		.frequency_max_hz	= 858 * MHz,
+		.frequency_stepsize_hz	= 62500,
 		.caps = FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
 	},
 
diff --git a/drivers/media/dvb-frontends/s5h1420.c b/drivers/media/dvb-frontends/s5h1420.c
index 8b2222530227..a65cdf8e8cd9 100644
--- a/drivers/media/dvb-frontends/s5h1420.c
+++ b/drivers/media/dvb-frontends/s5h1420.c
@@ -934,10 +934,10 @@ static const struct dvb_frontend_ops s5h1420_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name     = "Samsung S5H1420/PnpNetwork PN1010 DVB-S",
-		.frequency_min    = 950000,
-		.frequency_max    = 2150000,
-		.frequency_stepsize = 125,     /* kHz for QPSK frontends */
-		.frequency_tolerance  = 29500,
+		.frequency_min_hz    =  950 * MHz,
+		.frequency_max_hz    = 2150 * MHz,
+		.frequency_stepsize_hz = 125 * kHz,
+		.frequency_tolerance_hz  = 29500 * kHz,
 		.symbol_rate_min  = 1000000,
 		.symbol_rate_max  = 45000000,
 		/*  .symbol_rate_tolerance  = ???,*/
diff --git a/drivers/media/dvb-frontends/s5h1432.c b/drivers/media/dvb-frontends/s5h1432.c
index 740a60df0455..4dc3febc0e12 100644
--- a/drivers/media/dvb-frontends/s5h1432.c
+++ b/drivers/media/dvb-frontends/s5h1432.c
@@ -370,9 +370,9 @@ static const struct dvb_frontend_ops s5h1432_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		 .name = "Samsung s5h1432 DVB-T Frontend",
-		 .frequency_min = 177000000,
-		 .frequency_max = 858000000,
-		 .frequency_stepsize = 166666,
+		 .frequency_min_hz = 177 * MHz,
+		 .frequency_max_hz = 858 * MHz,
+		 .frequency_stepsize_hz = 166666,
 		 .caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		 FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 		 FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_AUTO |
diff --git a/drivers/media/dvb-frontends/s921.c b/drivers/media/dvb-frontends/s921.c
index 6c9015236655..79276871112a 100644
--- a/drivers/media/dvb-frontends/s921.c
+++ b/drivers/media/dvb-frontends/s921.c
@@ -510,15 +510,14 @@ static const struct dvb_frontend_ops s921_ops = {
 	/* Use dib8000 values per default */
 	.info = {
 		.name = "Sharp S921",
-		.frequency_min = 470000000,
+		.frequency_min_hz = 470 * MHz,
 		/*
 		 * Max should be 770MHz instead, according with Sharp docs,
 		 * but Leadership doc says it works up to 806 MHz. This is
 		 * required to get channel 69, used in Brazil
 		 */
-		.frequency_max = 806000000,
-		.frequency_tolerance = 0,
-		 .caps = FE_CAN_INVERSION_AUTO |
+		.frequency_max_hz = 806 * MHz,
+		.caps =  FE_CAN_INVERSION_AUTO |
 			 FE_CAN_FEC_1_2  | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			 FE_CAN_FEC_5_6  | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			 FE_CAN_QPSK     | FE_CAN_QAM_16 | FE_CAN_QAM_64 |
diff --git a/drivers/media/dvb-frontends/si2165.c b/drivers/media/dvb-frontends/si2165.c
index 2dd336f95cbf..feacd8da421d 100644
--- a/drivers/media/dvb-frontends/si2165.c
+++ b/drivers/media/dvb-frontends/si2165.c
@@ -1120,7 +1120,7 @@ static const struct dvb_frontend_ops si2165_ops = {
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 7200000,
 		/* For DVB-T */
-		.frequency_stepsize = 166667,
+		.frequency_stepsize_hz = 166667,
 		.caps = FE_CAN_FEC_1_2 |
 			FE_CAN_FEC_2_3 |
 			FE_CAN_FEC_3_4 |
diff --git a/drivers/media/dvb-frontends/si21xx.c b/drivers/media/dvb-frontends/si21xx.c
index 9b32a1b3205e..8546a236d452 100644
--- a/drivers/media/dvb-frontends/si21xx.c
+++ b/drivers/media/dvb-frontends/si21xx.c
@@ -870,10 +870,9 @@ static const struct dvb_frontend_ops si21xx_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "SL SI21XX DVB-S",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 125,	 /* kHz for QPSK frontends */
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
+		.frequency_stepsize_hz	=  125 * kHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.symbol_rate_tolerance	= 500,	/* ppm */
diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c
index 1d57a20093fc..8d31cf3f4f07 100644
--- a/drivers/media/dvb-frontends/sp8870.c
+++ b/drivers/media/dvb-frontends/sp8870.c
@@ -584,9 +584,9 @@ static const struct dvb_frontend_ops sp8870_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "Spase SP8870 DVB-T",
-		.frequency_min		= 470000000,
-		.frequency_max		= 860000000,
-		.frequency_stepsize	= 166666,
+		.frequency_min_hz	= 470 * MHz,
+		.frequency_max_hz	= 860 * MHz,
+		.frequency_stepsize_hz	= 166666,
 		.caps			= FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 |
 					  FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 |
 					  FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/sp887x.c b/drivers/media/dvb-frontends/sp887x.c
index 57a0d0ae2b48..c02f50995df4 100644
--- a/drivers/media/dvb-frontends/sp887x.c
+++ b/drivers/media/dvb-frontends/sp887x.c
@@ -594,9 +594,9 @@ static const struct dvb_frontend_ops sp887x_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "Spase SP887x DVB-T",
-		.frequency_min =  50500000,
-		.frequency_max = 858000000,
-		.frequency_stepsize = 166666,
+		.frequency_min_hz =  50500 * kHz,
+		.frequency_max_hz = 858000 * kHz,
+		.frequency_stepsize_hz = 166666,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 |
diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c
index 3c654ae16e78..874e9c9125d6 100644
--- a/drivers/media/dvb-frontends/stb0899_drv.c
+++ b/drivers/media/dvb-frontends/stb0899_drv.c
@@ -1584,10 +1584,8 @@ static const struct dvb_frontend_ops stb0899_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2, SYS_DSS },
 	.info = {
 		.name			= "STB0899 Multistandard",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 0,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
 		.symbol_rate_min	=  5000000,
 		.symbol_rate_max	= 45000000,
 
diff --git a/drivers/media/dvb-frontends/stv0288.c b/drivers/media/dvb-frontends/stv0288.c
index f947ed947aae..c9a9fa4e2c1b 100644
--- a/drivers/media/dvb-frontends/stv0288.c
+++ b/drivers/media/dvb-frontends/stv0288.c
@@ -533,10 +533,9 @@ static const struct dvb_frontend_ops stv0288_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "ST STV0288 DVB-S",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 1000,	 /* kHz for QPSK frontends */
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
+		.frequency_stepsize_hz	=    1 * MHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.symbol_rate_tolerance	= 500,	/* ppm */
diff --git a/drivers/media/dvb-frontends/stv0297.c b/drivers/media/dvb-frontends/stv0297.c
index b823c04e24d3..9a9915f71483 100644
--- a/drivers/media/dvb-frontends/stv0297.c
+++ b/drivers/media/dvb-frontends/stv0297.c
@@ -694,9 +694,9 @@ static const struct dvb_frontend_ops stv0297_ops = {
 	.delsys = { SYS_DVBC_ANNEX_A },
 	.info = {
 		 .name = "ST STV0297 DVB-C",
-		 .frequency_min = 47000000,
-		 .frequency_max = 862000000,
-		 .frequency_stepsize = 62500,
+		 .frequency_min_hz = 470 * MHz,
+		 .frequency_max_hz = 862 * MHz,
+		 .frequency_stepsize_hz = 62500,
 		 .symbol_rate_min = 870000,
 		 .symbol_rate_max = 11700000,
 		 .caps = FE_CAN_QAM_16 | FE_CAN_QAM_32 | FE_CAN_QAM_64 |
diff --git a/drivers/media/dvb-frontends/stv0299.c b/drivers/media/dvb-frontends/stv0299.c
index 633b90e6fe86..4f466394a16c 100644
--- a/drivers/media/dvb-frontends/stv0299.c
+++ b/drivers/media/dvb-frontends/stv0299.c
@@ -717,10 +717,9 @@ static const struct dvb_frontend_ops stv0299_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "ST STV0299 DVB-S",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 125,	 /* kHz for QPSK frontends */
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
+		.frequency_stepsize_hz	=  125 * kHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.symbol_rate_tolerance	= 500,	/* ppm */
diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c
index 5435c908e298..5b91e740e135 100644
--- a/drivers/media/dvb-frontends/stv0367.c
+++ b/drivers/media/dvb-frontends/stv0367.c
@@ -1693,10 +1693,9 @@ static const struct dvb_frontend_ops stv0367ter_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "ST STV0367 DVB-T",
-		.frequency_min		= 47000000,
-		.frequency_max		= 862000000,
-		.frequency_stepsize	= 15625,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  47 * MHz,
+		.frequency_max_hz	= 862 * MHz,
+		.frequency_stepsize_hz	= 15625,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 |
 			FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 |
 			FE_CAN_FEC_AUTO |
@@ -2867,9 +2866,9 @@ static const struct dvb_frontend_ops stv0367cab_ops = {
 	.delsys = { SYS_DVBC_ANNEX_A },
 	.info = {
 		.name = "ST STV0367 DVB-C",
-		.frequency_min = 47000000,
-		.frequency_max = 862000000,
-		.frequency_stepsize = 62500,
+		.frequency_min_hz =  47 * MHz,
+		.frequency_max_hz = 862 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.symbol_rate_min = 870000,
 		.symbol_rate_max = 11700000,
 		.caps = 0x400 |/* FE_CAN_QAM_4 */
@@ -3273,10 +3272,9 @@ static const struct dvb_frontend_ops stv0367ddb_ops = {
 	.delsys = { SYS_DVBC_ANNEX_A, SYS_DVBT },
 	.info = {
 		.name			= "ST STV0367 DDB DVB-C/T",
-		.frequency_min		= 47000000,
-		.frequency_max		= 865000000,
-		.frequency_stepsize	= 166667,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  47 * MHz,
+		.frequency_max_hz	= 865 * MHz,
+		.frequency_stepsize_hz	= 166667,
 		.symbol_rate_min	= 870000,
 		.symbol_rate_max	= 11700000,
 		.caps = /* DVB-C */
diff --git a/drivers/media/dvb-frontends/stv0900_core.c b/drivers/media/dvb-frontends/stv0900_core.c
index 72f17b97ca04..254618a06140 100644
--- a/drivers/media/dvb-frontends/stv0900_core.c
+++ b/drivers/media/dvb-frontends/stv0900_core.c
@@ -1875,10 +1875,9 @@ static const struct dvb_frontend_ops stv0900_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2, SYS_DSS },
 	.info = {
 		.name			= "STV0900 frontend",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 125,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
+		.frequency_stepsize_hz	=  125 * kHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.symbol_rate_tolerance	= 500,
diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c
index 9133f65d4623..a0622bb71803 100644
--- a/drivers/media/dvb-frontends/stv090x.c
+++ b/drivers/media/dvb-frontends/stv090x.c
@@ -4905,10 +4905,8 @@ static const struct dvb_frontend_ops stv090x_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2, SYS_DSS },
 	.info = {
 		.name			= "STV090x Multistandard",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 0,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 		.caps			= FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/stv0910.c b/drivers/media/dvb-frontends/stv0910.c
index 91b21eb59531..4c86073f1a8d 100644
--- a/drivers/media/dvb-frontends/stv0910.c
+++ b/drivers/media/dvb-frontends/stv0910.c
@@ -1724,10 +1724,8 @@ static const struct dvb_frontend_ops stv0910_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2, SYS_DSS },
 	.info = {
 		.name			= "ST STV0910",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 0,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
 		.symbol_rate_min	= 100000,
 		.symbol_rate_max	= 70000000,
 		.caps			= FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/tc90522.c b/drivers/media/dvb-frontends/tc90522.c
index 7abf6b0916ed..2ad81a438d6a 100644
--- a/drivers/media/dvb-frontends/tc90522.c
+++ b/drivers/media/dvb-frontends/tc90522.c
@@ -714,8 +714,8 @@ static const struct dvb_frontend_ops tc90522_ops_sat = {
 	.delsys = { SYS_ISDBS },
 	.info = {
 		.name = "Toshiba TC90522 ISDB-S module",
-		.frequency_min =  950000,
-		.frequency_max = 2150000,
+		.frequency_min_hz =  950 * MHz,
+		.frequency_max_hz = 2150 * MHz,
 		.caps = FE_CAN_INVERSION_AUTO | FE_CAN_FEC_AUTO |
 			FE_CAN_QAM_AUTO | FE_CAN_TRANSMISSION_MODE_AUTO |
 			FE_CAN_GUARD_INTERVAL_AUTO | FE_CAN_HIERARCHY_AUTO,
@@ -734,9 +734,9 @@ static const struct dvb_frontend_ops tc90522_ops_ter = {
 	.delsys = { SYS_ISDBT },
 	.info = {
 		.name = "Toshiba TC90522 ISDB-T module",
-		.frequency_min = 470000000,
-		.frequency_max = 770000000,
-		.frequency_stepsize = 142857,
+		.frequency_min_hz = 470 * MHz,
+		.frequency_max_hz = 770 * MHz,
+		.frequency_stepsize_hz = 142857,
 		.caps = FE_CAN_INVERSION_AUTO |
 			FE_CAN_FEC_1_2  | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6  | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/tda10021.c b/drivers/media/dvb-frontends/tda10021.c
index 4f588ebde39d..5cd885d4ea04 100644
--- a/drivers/media/dvb-frontends/tda10021.c
+++ b/drivers/media/dvb-frontends/tda10021.c
@@ -487,11 +487,11 @@ static const struct dvb_frontend_ops tda10021_ops = {
 	.delsys = { SYS_DVBC_ANNEX_A, SYS_DVBC_ANNEX_C },
 	.info = {
 		.name = "Philips TDA10021 DVB-C",
-		.frequency_stepsize = 62500,
-		.frequency_min = 47000000,
-		.frequency_max = 862000000,
-		.symbol_rate_min = (XIN/2)/64,     /* SACLK/64 == (XIN/2)/64 */
-		.symbol_rate_max = (XIN/2)/4,      /* SACLK/4 */
+		.frequency_min_hz =  47 * MHz,
+		.frequency_max_hz = 862 * MHz,
+		.frequency_stepsize_hz = 62500,
+		.symbol_rate_min = (XIN / 2) / 64,     /* SACLK/64 == (XIN/2)/64 */
+		.symbol_rate_max = (XIN / 2) / 4,      /* SACLK/4 */
 	#if 0
 		.frequency_tolerance = ???,
 		.symbol_rate_tolerance = ???,  /* ppm */  /* == 8% (spec p. 5) */
diff --git a/drivers/media/dvb-frontends/tda10023.c b/drivers/media/dvb-frontends/tda10023.c
index 6c84916234e3..0a9a54563ebe 100644
--- a/drivers/media/dvb-frontends/tda10023.c
+++ b/drivers/media/dvb-frontends/tda10023.c
@@ -575,9 +575,9 @@ static const struct dvb_frontend_ops tda10023_ops = {
 	.delsys = { SYS_DVBC_ANNEX_A, SYS_DVBC_ANNEX_C },
 	.info = {
 		.name = "Philips TDA10023 DVB-C",
-		.frequency_stepsize = 62500,
-		.frequency_min =  47000000,
-		.frequency_max = 862000000,
+		.frequency_min_hz =  47 * MHz,
+		.frequency_max_hz = 862 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.symbol_rate_min = 0,  /* set in tda10023_attach */
 		.symbol_rate_max = 0,  /* set in tda10023_attach */
 		.caps = 0x400 | //FE_CAN_QAM_4
diff --git a/drivers/media/dvb-frontends/tda10048.c b/drivers/media/dvb-frontends/tda10048.c
index de82a2558e15..c01d60a88af2 100644
--- a/drivers/media/dvb-frontends/tda10048.c
+++ b/drivers/media/dvb-frontends/tda10048.c
@@ -1156,9 +1156,9 @@ static const struct dvb_frontend_ops tda10048_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "NXP TDA10048HN DVB-T",
-		.frequency_min		= 177000000,
-		.frequency_max		= 858000000,
-		.frequency_stepsize	= 166666,
+		.frequency_min_hz	= 177 * MHz,
+		.frequency_max_hz	= 858 * MHz,
+		.frequency_stepsize_hz	= 166666,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 		FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_AUTO |
diff --git a/drivers/media/dvb-frontends/tda1004x.c b/drivers/media/dvb-frontends/tda1004x.c
index 7dcfb4a4b2d0..d402e4b722ca 100644
--- a/drivers/media/dvb-frontends/tda1004x.c
+++ b/drivers/media/dvb-frontends/tda1004x.c
@@ -1249,9 +1249,9 @@ static const struct dvb_frontend_ops tda10045_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "Philips TDA10045H DVB-T",
-		.frequency_min = 51000000,
-		.frequency_max = 858000000,
-		.frequency_stepsize = 166667,
+		.frequency_min_hz =  51 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 166667,
 		.caps =
 		    FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		    FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
@@ -1319,9 +1319,9 @@ static const struct dvb_frontend_ops tda10046_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "Philips TDA10046H DVB-T",
-		.frequency_min = 51000000,
-		.frequency_max = 858000000,
-		.frequency_stepsize = 166667,
+		.frequency_min_hz =  51 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 166667,
 		.caps =
 		    FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		    FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c
index 1ed67c08e699..097c42d3f8c2 100644
--- a/drivers/media/dvb-frontends/tda10071.c
+++ b/drivers/media/dvb-frontends/tda10071.c
@@ -681,8 +681,8 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 	cmd.args[5] = (c->frequency >>  0) & 0xff;
 	cmd.args[6] = ((c->symbol_rate / 1000) >> 8) & 0xff;
 	cmd.args[7] = ((c->symbol_rate / 1000) >> 0) & 0xff;
-	cmd.args[8] = (tda10071_ops.info.frequency_tolerance >> 8) & 0xff;
-	cmd.args[9] = (tda10071_ops.info.frequency_tolerance >> 0) & 0xff;
+	cmd.args[8] = ((tda10071_ops.info.frequency_tolerance_hz / 1000) >> 8) & 0xff;
+	cmd.args[9] = ((tda10071_ops.info.frequency_tolerance_hz / 1000) >> 0) & 0xff;
 	cmd.args[10] = rolloff;
 	cmd.args[11] = inversion;
 	cmd.args[12] = pilot;
@@ -1106,9 +1106,9 @@ static const struct dvb_frontend_ops tda10071_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2 },
 	.info = {
 		.name = "NXP TDA10071",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_tolerance = 5000,
+		.frequency_min_hz    =  950 * MHz,
+		.frequency_max_hz    = 2150 * MHz,
+		.frequency_tolerance_hz = 5 * MHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/tda10086.c b/drivers/media/dvb-frontends/tda10086.c
index 1a95c521e97f..8323e4e53d66 100644
--- a/drivers/media/dvb-frontends/tda10086.c
+++ b/drivers/media/dvb-frontends/tda10086.c
@@ -710,9 +710,9 @@ static const struct dvb_frontend_ops tda10086_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name     = "Philips TDA10086 DVB-S",
-		.frequency_min    = 950000,
-		.frequency_max    = 2150000,
-		.frequency_stepsize = 125,     /* kHz for QPSK frontends */
+		.frequency_min_hz      =  950 * MHz,
+		.frequency_max_hz      = 2150 * MHz,
+		.frequency_stepsize_hz =  125 * kHz,
 		.symbol_rate_min  = 1000000,
 		.symbol_rate_max  = 45000000,
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/dvb-frontends/tda8083.c b/drivers/media/dvb-frontends/tda8083.c
index 29b4f64c030c..53b26060db7e 100644
--- a/drivers/media/dvb-frontends/tda8083.c
+++ b/drivers/media/dvb-frontends/tda8083.c
@@ -453,10 +453,9 @@ static const struct dvb_frontend_ops tda8083_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "Philips TDA8083 DVB-S",
-		.frequency_min		= 920000,     /* TDA8060 */
-		.frequency_max		= 2200000,    /* TDA8060 */
-		.frequency_stepsize	= 125,   /* kHz for QPSK frontends */
-	/*      .frequency_tolerance	= ???,*/
+		.frequency_min_hz	=  920 * MHz,     /* TDA8060 */
+		.frequency_max_hz	= 2200 * MHz,    /* TDA8060 */
+		.frequency_stepsize_hz	=  125 * kHz,
 		.symbol_rate_min	= 12000000,
 		.symbol_rate_max	= 30000000,
 	/*      .symbol_rate_tolerance	= ???,*/
diff --git a/drivers/media/dvb-frontends/ves1820.c b/drivers/media/dvb-frontends/ves1820.c
index 17600989f121..eb1249d81310 100644
--- a/drivers/media/dvb-frontends/ves1820.c
+++ b/drivers/media/dvb-frontends/ves1820.c
@@ -412,9 +412,9 @@ static const struct dvb_frontend_ops ves1820_ops = {
 	.delsys = { SYS_DVBC_ANNEX_A },
 	.info = {
 		.name = "VLSI VES1820 DVB-C",
-		.frequency_stepsize = 62500,
-		.frequency_min = 47000000,
-		.frequency_max = 862000000,
+		.frequency_min_hz =  47 * MHz,
+		.frequency_max_hz = 862 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.caps = FE_CAN_QAM_16 |
 			FE_CAN_QAM_32 |
 			FE_CAN_QAM_64 |
diff --git a/drivers/media/dvb-frontends/ves1x93.c b/drivers/media/dvb-frontends/ves1x93.c
index 0c7b3286b04d..ddc5bfd84cd5 100644
--- a/drivers/media/dvb-frontends/ves1x93.c
+++ b/drivers/media/dvb-frontends/ves1x93.c
@@ -516,10 +516,10 @@ static const struct dvb_frontend_ops ves1x93_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "VLSI VES1x93 DVB-S",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 125,		 /* kHz for QPSK frontends */
-		.frequency_tolerance	= 29500,
+		.frequency_min_hz	=   950 * MHz,
+		.frequency_max_hz	=  2150 * MHz,
+		.frequency_stepsize_hz	=   125 * kHz,
+		.frequency_tolerance_hz	= 29500 * kHz,
 		.symbol_rate_min	= 1000000,
 		.symbol_rate_max	= 45000000,
 	/*	.symbol_rate_tolerance	=	???,*/
diff --git a/drivers/media/dvb-frontends/zl10036.c b/drivers/media/dvb-frontends/zl10036.c
index e5a432fd84c3..f1c92338015d 100644
--- a/drivers/media/dvb-frontends/zl10036.c
+++ b/drivers/media/dvb-frontends/zl10036.c
@@ -311,8 +311,8 @@ static int zl10036_set_params(struct dvb_frontend *fe)
 
 	/* ensure correct values
 	 * maybe redundant as core already checks this */
-	if ((frequency < fe->ops.info.frequency_min)
-	||  (frequency > fe->ops.info.frequency_max))
+	if ((frequency < fe->ops.info.frequency_min_hz / kHz)
+	||  (frequency > fe->ops.info.frequency_max_hz / kHz))
 		return -EINVAL;
 
 	/*
diff --git a/drivers/media/dvb-frontends/zl10353.c b/drivers/media/dvb-frontends/zl10353.c
index c9901f45deb7..42e63a3fa121 100644
--- a/drivers/media/dvb-frontends/zl10353.c
+++ b/drivers/media/dvb-frontends/zl10353.c
@@ -635,10 +635,9 @@ static const struct dvb_frontend_ops zl10353_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "Zarlink ZL10353 DVB-T",
-		.frequency_min		= 174000000,
-		.frequency_max		= 862000000,
-		.frequency_stepsize	= 166667,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	= 174 * MHz,
+		.frequency_max_hz	= 862 * MHz,
+		.frequency_stepsize_hz	= 166667,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 |
 			FE_CAN_FEC_3_4 | FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 |
 			FE_CAN_FEC_AUTO |
diff --git a/drivers/media/firewire/firedtv-fe.c b/drivers/media/firewire/firedtv-fe.c
index a2ef4ede8ebe..69087ae6c1d0 100644
--- a/drivers/media/firewire/firedtv-fe.c
+++ b/drivers/media/firewire/firedtv-fe.c
@@ -152,7 +152,7 @@ static int fdtv_set_frontend(struct dvb_frontend *fe)
 void fdtv_frontend_init(struct firedtv *fdtv, const char *name)
 {
 	struct dvb_frontend_ops *ops = &fdtv->fe.ops;
-	struct dvb_frontend_info *fi = &ops->info;
+	struct dvb_frontend_internal_info *fi = &ops->info;
 
 	ops->init			= fdtv_dvb_init;
 	ops->sleep			= fdtv_sleep;
@@ -174,9 +174,9 @@ void fdtv_frontend_init(struct firedtv *fdtv, const char *name)
 	case FIREDTV_DVB_S:
 		ops->delsys[0]		= SYS_DVBS;
 
-		fi->frequency_min	= 950000;
-		fi->frequency_max	= 2150000;
-		fi->frequency_stepsize	= 125;
+		fi->frequency_min_hz	=   950 * MHz;
+		fi->frequency_max_hz	=  2150 * MHz;
+		fi->frequency_stepsize_hz = 125 * kHz;
 		fi->symbol_rate_min	= 1000000;
 		fi->symbol_rate_max	= 40000000;
 
@@ -194,9 +194,9 @@ void fdtv_frontend_init(struct firedtv *fdtv, const char *name)
 		ops->delsys[0]		= SYS_DVBS;
 		ops->delsys[1]		= SYS_DVBS2;
 
-		fi->frequency_min	= 950000;
-		fi->frequency_max	= 2150000;
-		fi->frequency_stepsize	= 125;
+		fi->frequency_min_hz	=   950 * MHz;
+		fi->frequency_max_hz	=  2150 * MHz;
+		fi->frequency_stepsize_hz = 125 * kHz;
 		fi->symbol_rate_min	= 1000000;
 		fi->symbol_rate_max	= 40000000;
 
@@ -214,9 +214,9 @@ void fdtv_frontend_init(struct firedtv *fdtv, const char *name)
 	case FIREDTV_DVB_C:
 		ops->delsys[0]		= SYS_DVBC_ANNEX_A;
 
-		fi->frequency_min	= 47000000;
-		fi->frequency_max	= 866000000;
-		fi->frequency_stepsize	= 62500;
+		fi->frequency_min_hz	=      47 * MHz;
+		fi->frequency_max_hz	=     866 * MHz;
+		fi->frequency_stepsize_hz = 62500;
 		fi->symbol_rate_min	= 870000;
 		fi->symbol_rate_max	= 6900000;
 
@@ -232,9 +232,9 @@ void fdtv_frontend_init(struct firedtv *fdtv, const char *name)
 	case FIREDTV_DVB_T:
 		ops->delsys[0]		= SYS_DVBT;
 
-		fi->frequency_min	= 49000000;
-		fi->frequency_max	= 861000000;
-		fi->frequency_stepsize	= 62500;
+		fi->frequency_min_hz	=  49 * MHz;
+		fi->frequency_max_hz	= 861 * MHz;
+		fi->frequency_stepsize_hz = 62500;
 
 		fi->caps		= FE_CAN_INVERSION_AUTO		|
 					  FE_CAN_FEC_2_3		|
diff --git a/drivers/media/pci/bt8xx/dst.c b/drivers/media/pci/bt8xx/dst.c
index 2e33b7236672..b98de2a22f78 100644
--- a/drivers/media/pci/bt8xx/dst.c
+++ b/drivers/media/pci/bt8xx/dst.c
@@ -1739,9 +1739,9 @@ static const struct dvb_frontend_ops dst_dvbt_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name = "DST DVB-T",
-		.frequency_min = 137000000,
-		.frequency_max = 858000000,
-		.frequency_stepsize = 166667,
+		.frequency_min_hz = 137 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 166667,
 		.caps = FE_CAN_FEC_AUTO			|
 			FE_CAN_QAM_AUTO			|
 			FE_CAN_QAM_16			|
@@ -1768,10 +1768,10 @@ static const struct dvb_frontend_ops dst_dvbs_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name = "DST DVB-S",
-		.frequency_min = 950000,
-		.frequency_max = 2150000,
-		.frequency_stepsize = 1000,	/* kHz for QPSK frontends */
-		.frequency_tolerance = 29500,
+		.frequency_min_hz   =  950 * MHz,
+		.frequency_max_hz   = 2150 * MHz,
+		.frequency_stepsize_hz = 1 * MHz,
+		.frequency_tolerance_hz = 29500 * kHz,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 	/*     . symbol_rate_tolerance	=	???,*/
@@ -1797,9 +1797,9 @@ static const struct dvb_frontend_ops dst_dvbc_ops = {
 	.delsys = { SYS_DVBC_ANNEX_A },
 	.info = {
 		.name = "DST DVB-C",
-		.frequency_stepsize = 62500,
-		.frequency_min = 51000000,
-		.frequency_max = 858000000,
+		.frequency_min_hz =  51 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_FEC_AUTO |
@@ -1826,9 +1826,9 @@ static const struct dvb_frontend_ops dst_atsc_ops = {
 	.delsys = { SYS_ATSC },
 	.info = {
 		.name = "DST ATSC",
-		.frequency_stepsize = 62500,
-		.frequency_min = 510000000,
-		.frequency_max = 858000000,
+		.frequency_min_hz = 510 * MHz,
+		.frequency_max_hz = 858 * MHz,
+		.frequency_stepsize_hz = 62500,
 		.symbol_rate_min = 1000000,
 		.symbol_rate_max = 45000000,
 		.caps = FE_CAN_FEC_AUTO | FE_CAN_QAM_AUTO | FE_CAN_QAM_64 | FE_CAN_QAM_256 | FE_CAN_8VSB
diff --git a/drivers/media/pci/bt8xx/dvb-bt8xx.c b/drivers/media/pci/bt8xx/dvb-bt8xx.c
index 6b2a9e6eb6a8..2f810b7130e6 100644
--- a/drivers/media/pci/bt8xx/dvb-bt8xx.c
+++ b/drivers/media/pci/bt8xx/dvb-bt8xx.c
@@ -602,8 +602,8 @@ static void frontend_init(struct dvb_bt8xx_card *card, u32 type)
 
 		if (card->fe != NULL) {
 			card->fe->ops.tuner_ops.calc_regs = thomson_dtt7579_tuner_calc_regs;
-			card->fe->ops.info.frequency_min = 174000000;
-			card->fe->ops.info.frequency_max = 862000000;
+			card->fe->ops.info.frequency_min_hz = 174 * MHz;
+			card->fe->ops.info.frequency_max_hz = 862 * MHz;
 		}
 		break;
 
@@ -655,8 +655,8 @@ static void frontend_init(struct dvb_bt8xx_card *card, u32 type)
 		card->fe = dvb_attach(mt352_attach, &advbt771_samsung_tdtc9251dh0_config, card->i2c_adapter);
 		if (card->fe != NULL) {
 			card->fe->ops.tuner_ops.calc_regs = advbt771_samsung_tdtc9251dh0_tuner_calc_regs;
-			card->fe->ops.info.frequency_min = 174000000;
-			card->fe->ops.info.frequency_max = 862000000;
+			card->fe->ops.info.frequency_min_hz = 174 * MHz;
+			card->fe->ops.info.frequency_max_hz = 862 * MHz;
 		}
 		break;
 
diff --git a/drivers/media/pci/ddbridge/ddbridge-sx8.c b/drivers/media/pci/ddbridge/ddbridge-sx8.c
index 4418604258d1..64f05f5ee039 100644
--- a/drivers/media/pci/ddbridge/ddbridge-sx8.c
+++ b/drivers/media/pci/ddbridge/ddbridge-sx8.c
@@ -454,10 +454,8 @@ static struct dvb_frontend_ops sx8_ops = {
 	.delsys = { SYS_DVBS, SYS_DVBS2 },
 	.info = {
 		.name			= "Digital Devices MaxSX8 MCI DVB-S/S2/S2X",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 0,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
 		.symbol_rate_min	= 100000,
 		.symbol_rate_max	= 100000000,
 		.caps			= FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/pci/mantis/mantis_vp3030.c b/drivers/media/pci/mantis/mantis_vp3030.c
index 14f6e153000c..9797c9fd8259 100644
--- a/drivers/media/pci/mantis/mantis_vp3030.c
+++ b/drivers/media/pci/mantis/mantis_vp3030.c
@@ -42,8 +42,8 @@ static struct zl10353_config mantis_vp3030_config = {
 static struct tda665x_config env57h12d5_config = {
 	.name			= "ENV57H12D5 (ET-50DT)",
 	.addr			= 0x60,
-	.frequency_min		=  47000000,
-	.frequency_max		= 862000000,
+	.frequency_min		=  47 * MHz,
+	.frequency_max		= 862 * MHz,
 	.frequency_offst	=   3616667,
 	.ref_multiplier		= 6, /* 1/6 MHz */
 	.ref_divider		= 100000, /* 1/6 MHz */
diff --git a/drivers/media/tuners/mxl5007t.c b/drivers/media/tuners/mxl5007t.c
index 4081fd97c3b2..54d9226aaff7 100644
--- a/drivers/media/tuners/mxl5007t.c
+++ b/drivers/media/tuners/mxl5007t.c
@@ -59,8 +59,6 @@ MODULE_PARM_DESC(debug, "set debug level");
 
 /* ------------------------------------------------------------------------- */
 
-#define MHz 1000000
-
 enum mxl5007t_mode {
 	MxL_MODE_ISDBT     =    0,
 	MxL_MODE_DVBT      =    1,
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
index 221cf46b4140..9f74453799a2 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
@@ -554,9 +554,9 @@ static const struct dvb_frontend_ops mxl111sf_demod_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name               = "MaxLinear MxL111SF DVB-T demodulator",
-		.frequency_min      = 177000000,
-		.frequency_max      = 858000000,
-		.frequency_stepsize = 166666,
+		.frequency_min_hz      = 177 * MHz,
+		.frequency_max_hz      = 858 * MHz,
+		.frequency_stepsize_hz = 166666,
 		.caps = FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_QAM_64 |
diff --git a/drivers/media/usb/dvb-usb/af9005-fe.c b/drivers/media/usb/dvb-usb/af9005-fe.c
index 7fbbc954da16..09cc3a21af65 100644
--- a/drivers/media/usb/dvb-usb/af9005-fe.c
+++ b/drivers/media/usb/dvb-usb/af9005-fe.c
@@ -1455,9 +1455,9 @@ static const struct dvb_frontend_ops af9005_fe_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		 .name = "AF9005 USB DVB-T",
-		 .frequency_min = 44250000,
-		 .frequency_max = 867250000,
-		 .frequency_stepsize = 250000,
+		 .frequency_min_hz =    44250 * kHz,
+		 .frequency_max_hz =   867250 * kHz,
+		 .frequency_stepsize_hz = 250 * kHz,
 		 .caps = FE_CAN_INVERSION_AUTO |
 		 FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 		 FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/usb/dvb-usb/cinergyT2-fe.c b/drivers/media/usb/dvb-usb/cinergyT2-fe.c
index 5a2f81311fb7..df71df7ed524 100644
--- a/drivers/media/usb/dvb-usb/cinergyT2-fe.c
+++ b/drivers/media/usb/dvb-usb/cinergyT2-fe.c
@@ -295,9 +295,9 @@ static const struct dvb_frontend_ops cinergyt2_fe_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= DRIVER_NAME,
-		.frequency_min		= 174000000,
-		.frequency_max		= 862000000,
-		.frequency_stepsize	= 166667,
+		.frequency_min_hz	= 174 * MHz,
+		.frequency_max_hz	= 862 * MHz,
+		.frequency_stepsize_hz	= 166667,
 		.caps = FE_CAN_INVERSION_AUTO | FE_CAN_FEC_1_2
 			| FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4
 			| FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8
diff --git a/drivers/media/usb/dvb-usb/dtt200u-fe.c b/drivers/media/usb/dvb-usb/dtt200u-fe.c
index 7e75aae34fb8..1ca3a51b2ae3 100644
--- a/drivers/media/usb/dvb-usb/dtt200u-fe.c
+++ b/drivers/media/usb/dvb-usb/dtt200u-fe.c
@@ -230,9 +230,9 @@ static const struct dvb_frontend_ops dtt200u_fe_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "WideView USB DVB-T",
-		.frequency_min		= 44250000,
-		.frequency_max		= 867250000,
-		.frequency_stepsize	= 250000,
+		.frequency_min_hz	=  44250 * kHz,
+		.frequency_max_hz	= 867250 * kHz,
+		.frequency_stepsize_hz	=    250 * kHz,
 		.caps = FE_CAN_INVERSION_AUTO |
 				FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 				FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/usb/dvb-usb/friio-fe.c b/drivers/media/usb/dvb-usb/friio-fe.c
index 932f262452eb..e6bd0ed8d789 100644
--- a/drivers/media/usb/dvb-usb/friio-fe.c
+++ b/drivers/media/usb/dvb-usb/friio-fe.c
@@ -133,10 +133,10 @@ static int jdvbt90502_pll_set_freq(struct jdvbt90502_state *state, u32 freq)
 	u32 f;
 
 	deb_fe("%s: freq=%d, step=%d\n", __func__, freq,
-	       state->frontend.ops.info.frequency_stepsize);
+	       state->frontend.ops.info.frequency_stepsize_hz);
 	/* freq -> oscilator frequency conversion. */
 	/* freq: 473,000,000 + n*6,000,000 [+ 142857 (center freq. shift)] */
-	f = freq / state->frontend.ops.info.frequency_stepsize;
+	f = freq / state->frontend.ops.info.frequency_stepsize_hz;
 	/* add 399[1/7 MHZ] = 57MHz for the IF  */
 	f += 399;
 	/* add center frequency shift if necessary */
@@ -413,10 +413,9 @@ static const struct dvb_frontend_ops jdvbt90502_ops = {
 	.delsys = { SYS_ISDBT },
 	.info = {
 		.name			= "Comtech JDVBT90502 ISDB-T",
-		.frequency_min		= 473000000, /* UHF 13ch, center */
-		.frequency_max		= 767142857, /* UHF 62ch, center */
-		.frequency_stepsize	= JDVBT90502_PLL_CLK / JDVBT90502_PLL_DIVIDER,
-		.frequency_tolerance	= 0,
+		.frequency_min_hz	= 473000000, /* UHF 13ch, center */
+		.frequency_max_hz	= 767142857, /* UHF 62ch, center */
+		.frequency_stepsize_hz	= JDVBT90502_PLL_CLK / JDVBT90502_PLL_DIVIDER,
 
 		/* NOTE: this driver ignores all parameters but frequency. */
 		.caps = FE_CAN_INVERSION_AUTO |
diff --git a/drivers/media/usb/dvb-usb/vp702x-fe.c b/drivers/media/usb/dvb-usb/vp702x-fe.c
index ae48146e005c..9eb811452f2e 100644
--- a/drivers/media/usb/dvb-usb/vp702x-fe.c
+++ b/drivers/media/usb/dvb-usb/vp702x-fe.c
@@ -349,10 +349,9 @@ static const struct dvb_frontend_ops vp702x_fe_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name           = "Twinhan DST-like frontend (VP7021/VP7020) DVB-S",
-		.frequency_min       = 950000,
-		.frequency_max       = 2150000,
-		.frequency_stepsize  = 1000,   /* kHz for QPSK frontends */
-		.frequency_tolerance = 0,
+		.frequency_min_hz       =  950 * MHz,
+		.frequency_max_hz       = 2150 * MHz,
+		.frequency_stepsize_hz  =    1 * MHz,
 		.symbol_rate_min     = 1000000,
 		.symbol_rate_max     = 45000000,
 		.symbol_rate_tolerance = 500,  /* ppm */
diff --git a/drivers/media/usb/dvb-usb/vp7045-fe.c b/drivers/media/usb/dvb-usb/vp7045-fe.c
index f86040173b8d..1173ae29885b 100644
--- a/drivers/media/usb/dvb-usb/vp7045-fe.c
+++ b/drivers/media/usb/dvb-usb/vp7045-fe.c
@@ -162,9 +162,9 @@ static const struct dvb_frontend_ops vp7045_fe_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "Twinhan VP7045/46 USB DVB-T",
-		.frequency_min		= 44250000,
-		.frequency_max		= 867250000,
-		.frequency_stepsize	= 1000,
+		.frequency_min_hz	=  44250 * kHz,
+		.frequency_max_hz	= 867250 * kHz,
+		.frequency_stepsize_hz	=      1 * kHz,
 		.caps = FE_CAN_INVERSION_AUTO |
 				FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 				FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
diff --git a/drivers/media/usb/ttusb-dec/ttusbdecfe.c b/drivers/media/usb/ttusb-dec/ttusbdecfe.c
index 6ea05d909024..278bf6c5855b 100644
--- a/drivers/media/usb/ttusb-dec/ttusbdecfe.c
+++ b/drivers/media/usb/ttusb-dec/ttusbdecfe.c
@@ -247,9 +247,9 @@ static const struct dvb_frontend_ops ttusbdecfe_dvbt_ops = {
 	.delsys = { SYS_DVBT },
 	.info = {
 		.name			= "TechnoTrend/Hauppauge DEC2000-t Frontend",
-		.frequency_min		= 51000000,
-		.frequency_max		= 858000000,
-		.frequency_stepsize	= 62500,
+		.frequency_min_hz	=  51 * MHz,
+		.frequency_max_hz	= 858 * MHz,
+		.frequency_stepsize_hz	= 62500,
 		.caps =	FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
 			FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			FE_CAN_QAM_16 | FE_CAN_QAM_64 | FE_CAN_QAM_AUTO |
@@ -270,9 +270,9 @@ static const struct dvb_frontend_ops ttusbdecfe_dvbs_ops = {
 	.delsys = { SYS_DVBS },
 	.info = {
 		.name			= "TechnoTrend/Hauppauge DEC3000-s Frontend",
-		.frequency_min		= 950000,
-		.frequency_max		= 2150000,
-		.frequency_stepsize	= 125,
+		.frequency_min_hz	=  950 * MHz,
+		.frequency_max_hz	= 2150 * MHz,
+		.frequency_stepsize_hz	=  125 * kHz,
 		.symbol_rate_min        = 1000000,  /* guessed */
 		.symbol_rate_max        = 45000000, /* guessed */
 		.caps =	FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h
index aebaec2dc725..6f7a85ab3541 100644
--- a/include/media/dvb_frontend.h
+++ b/include/media/dvb_frontend.h
@@ -317,6 +317,34 @@ struct analog_demod_ops {
 
 struct dtv_frontend_properties;
 
+/**
+ * struct dvb_frontend_internal_info - Frontend properties and capabilities
+ *
+ * @name:			Name of the frontend
+ * @frequency_min_hz:		Minimal frequency supported by the frontend.
+ * @frequency_max_hz:		Minimal frequency supported by the frontend.
+ * @frequency_stepsize_hz:	All frequencies are multiple of this value.
+ * @frequency_tolerance_hz:	Frequency tolerance.
+ * @symbol_rate_min:		Minimal symbol rate, in bauds
+ *				(for Cable/Satellite systems).
+ * @symbol_rate_max:		Maximal symbol rate, in bauds
+ *				(for Cable/Satellite systems).
+ * @symbol_rate_tolerance:	Maximal symbol rate tolerance, in ppm
+ *				(for Cable/Satellite systems).
+ * @caps:			Capabilities supported by the frontend,
+ *				as specified in &enum fe_caps.
+ */
+struct dvb_frontend_internal_info {
+	char	name[128];
+	u32	frequency_min_hz;
+	u32	frequency_max_hz;
+	u32	frequency_stepsize_hz;
+	u32	frequency_tolerance_hz;
+	u32	symbol_rate_min;
+	u32	symbol_rate_max;
+	u32	symbol_rate_tolerance;
+	enum fe_caps caps;
+};
 
 /**
  * struct dvb_frontend_ops - Demodulation information and callbacks for
@@ -404,7 +432,7 @@ struct dtv_frontend_properties;
  * @analog_ops:		pointer to &struct analog_demod_ops
  */
 struct dvb_frontend_ops {
-	struct dvb_frontend_info info;
+	struct dvb_frontend_internal_info info;
 
 	u8 delsys[MAX_DELSYS];
 
-- 
cgit v1.2.3


From 0a4c58f5702858822621fa1177c7d3475f181ccb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:17 -0700
Subject: bpf: add ability to charge bpf maps memory dynamically

This commits extends existing bpf maps memory charging API
to support dynamic charging/uncharging.

This is required to account memory used by maps,
if all entries are created dynamically after
the map initialization.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h  |  2 ++
 kernel/bpf/syscall.c | 58 ++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 45 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b5ad95cf339..5a4a256473c3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -435,6 +435,8 @@ struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
+int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
+void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
 void *bpf_map_area_alloc(size_t size, int numa_node);
 void bpf_map_area_free(void *base);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a31a1ba0f8ea..7958252a4d29 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -181,32 +181,60 @@ int bpf_map_precharge_memlock(u32 pages)
 	return 0;
 }
 
-static int bpf_map_charge_memlock(struct bpf_map *map)
+static int bpf_charge_memlock(struct user_struct *user, u32 pages)
 {
-	struct user_struct *user = get_current_user();
-	unsigned long memlock_limit;
+	unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
-	memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+	if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) {
+		atomic_long_sub(pages, &user->locked_vm);
+		return -EPERM;
+	}
+	return 0;
+}
 
-	atomic_long_add(map->pages, &user->locked_vm);
+static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
+{
+	atomic_long_sub(pages, &user->locked_vm);
+}
+
+static int bpf_map_init_memlock(struct bpf_map *map)
+{
+	struct user_struct *user = get_current_user();
+	int ret;
 
-	if (atomic_long_read(&user->locked_vm) > memlock_limit) {
-		atomic_long_sub(map->pages, &user->locked_vm);
+	ret = bpf_charge_memlock(user, map->pages);
+	if (ret) {
 		free_uid(user);
-		return -EPERM;
+		return ret;
 	}
 	map->user = user;
-	return 0;
+	return ret;
 }
 
-static void bpf_map_uncharge_memlock(struct bpf_map *map)
+static void bpf_map_release_memlock(struct bpf_map *map)
 {
 	struct user_struct *user = map->user;
-
-	atomic_long_sub(map->pages, &user->locked_vm);
+	bpf_uncharge_memlock(user, map->pages);
 	free_uid(user);
 }
 
+int bpf_map_charge_memlock(struct bpf_map *map, u32 pages)
+{
+	int ret;
+
+	ret = bpf_charge_memlock(map->user, pages);
+	if (ret)
+		return ret;
+	map->pages += pages;
+	return ret;
+}
+
+void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages)
+{
+	bpf_uncharge_memlock(map->user, pages);
+	map->pages -= pages;
+}
+
 static int bpf_map_alloc_id(struct bpf_map *map)
 {
 	int id;
@@ -256,7 +284,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
 {
 	struct bpf_map *map = container_of(work, struct bpf_map, work);
 
-	bpf_map_uncharge_memlock(map);
+	bpf_map_release_memlock(map);
 	security_bpf_map_free(map);
 	/* implementation dependent freeing */
 	map->ops->map_free(map);
@@ -492,7 +520,7 @@ static int map_create(union bpf_attr *attr)
 	if (err)
 		goto free_map_nouncharge;
 
-	err = bpf_map_charge_memlock(map);
+	err = bpf_map_init_memlock(map);
 	if (err)
 		goto free_map_sec;
 
@@ -515,7 +543,7 @@ static int map_create(union bpf_attr *attr)
 	return err;
 
 free_map:
-	bpf_map_uncharge_memlock(map);
+	bpf_map_release_memlock(map);
 free_map_sec:
 	security_bpf_map_free(map);
 free_map_nouncharge:
-- 
cgit v1.2.3


From de9cbbaadba5adf88a19e46df61f7054000838f6 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:18 -0700
Subject: bpf: introduce cgroup storage maps

This commit introduces BPF_MAP_TYPE_CGROUP_STORAGE maps:
a special type of maps which are implementing the cgroup storage.

>From the userspace point of view it's almost a generic
hash map with the (cgroup inode id, attachment type) pair
used as a key.

The only difference is that some operations are restricted:
  1) a user can't create new entries,
  2) a user can't remove existing entries.

The lookup from userspace is o(log(n)).

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h |  38 +++++
 include/linux/bpf.h        |   1 +
 include/linux/bpf_types.h  |   3 +
 include/uapi/linux/bpf.h   |   6 +
 kernel/bpf/Makefile        |   1 +
 kernel/bpf/local_storage.c | 376 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c       |   3 +
 kernel/bpf/verifier.c      |  12 ++
 8 files changed, 440 insertions(+)
 create mode 100644 kernel/bpf/local_storage.c

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index d50c2f0a655a..7d00d58869ed 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -4,19 +4,39 @@
 
 #include <linux/errno.h>
 #include <linux/jump_label.h>
+#include <linux/rbtree.h>
 #include <uapi/linux/bpf.h>
 
 struct sock;
 struct sockaddr;
 struct cgroup;
 struct sk_buff;
+struct bpf_map;
+struct bpf_prog;
 struct bpf_sock_ops_kern;
+struct bpf_cgroup_storage;
 
 #ifdef CONFIG_CGROUP_BPF
 
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
+struct bpf_cgroup_storage_map;
+
+struct bpf_storage_buffer {
+	struct rcu_head rcu;
+	char data[0];
+};
+
+struct bpf_cgroup_storage {
+	struct bpf_storage_buffer *buf;
+	struct bpf_cgroup_storage_map *map;
+	struct bpf_cgroup_storage_key key;
+	struct list_head list;
+	struct rb_node node;
+	struct rcu_head rcu;
+};
+
 struct bpf_prog_list {
 	struct list_head node;
 	struct bpf_prog *prog;
@@ -77,6 +97,15 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum bpf_attach_type type);
 
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
+void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
+void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+			     struct cgroup *cgroup,
+			     enum bpf_attach_type type);
+void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
+int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
+void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
@@ -221,6 +250,15 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
+static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
+					    struct bpf_map *map) { return 0; }
+static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
+					      struct bpf_map *map) {}
+static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
+	struct bpf_prog *prog) { return 0; }
+static inline void bpf_cgroup_storage_free(
+	struct bpf_cgroup_storage *storage) {}
+
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5a4a256473c3..9d1e4727495e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -282,6 +282,7 @@ struct bpf_prog_aux {
 	struct bpf_prog *prog;
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
+	struct bpf_map *cgroup_storage;
 	char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
 	void *security;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c5700c2d5549..add08be53b6f 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -37,6 +37,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops)
 #ifdef CONFIG_CGROUPS
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
 #endif
+#ifdef CONFIG_CGROUP_BPF
+BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
+#endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0ebaaf7f3568..b10118ee5afe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -75,6 +75,11 @@ struct bpf_lpm_trie_key {
 	__u8	data[0];	/* Arbitrary size */
 };
 
+struct bpf_cgroup_storage_key {
+	__u64	cgroup_inode_id;	/* cgroup inode id */
+	__u32	attach_type;		/* program attach type */
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
 	BPF_MAP_CREATE,
@@ -120,6 +125,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_CPUMAP,
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
+	BPF_MAP_TYPE_CGROUP_STORAGE,
 };
 
 enum bpf_prog_type {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f27f5496d6fe..e8906cbad81f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,6 +3,7 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
new file mode 100644
index 000000000000..f23d3fdeba23
--- /dev/null
+++ b/kernel/bpf/local_storage.c
@@ -0,0 +1,376 @@
+//SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf-cgroup.h>
+#include <linux/bpf.h>
+#include <linux/bug.h>
+#include <linux/filter.h>
+#include <linux/mm.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_CGROUP_BPF
+
+#define LOCAL_STORAGE_CREATE_FLAG_MASK					\
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+struct bpf_cgroup_storage_map {
+	struct bpf_map map;
+
+	spinlock_t lock;
+	struct bpf_prog *prog;
+	struct rb_root root;
+	struct list_head list;
+};
+
+static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
+{
+	return container_of(map, struct bpf_cgroup_storage_map, map);
+}
+
+static int bpf_cgroup_storage_key_cmp(
+	const struct bpf_cgroup_storage_key *key1,
+	const struct bpf_cgroup_storage_key *key2)
+{
+	if (key1->cgroup_inode_id < key2->cgroup_inode_id)
+		return -1;
+	else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
+		return 1;
+	else if (key1->attach_type < key2->attach_type)
+		return -1;
+	else if (key1->attach_type > key2->attach_type)
+		return 1;
+	return 0;
+}
+
+static struct bpf_cgroup_storage *cgroup_storage_lookup(
+	struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
+	bool locked)
+{
+	struct rb_root *root = &map->root;
+	struct rb_node *node;
+
+	if (!locked)
+		spin_lock_bh(&map->lock);
+
+	node = root->rb_node;
+	while (node) {
+		struct bpf_cgroup_storage *storage;
+
+		storage = container_of(node, struct bpf_cgroup_storage, node);
+
+		switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
+		case -1:
+			node = node->rb_left;
+			break;
+		case 1:
+			node = node->rb_right;
+			break;
+		default:
+			if (!locked)
+				spin_unlock_bh(&map->lock);
+			return storage;
+		}
+	}
+
+	if (!locked)
+		spin_unlock_bh(&map->lock);
+
+	return NULL;
+}
+
+static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
+				 struct bpf_cgroup_storage *storage)
+{
+	struct rb_root *root = &map->root;
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+	while (*new) {
+		struct bpf_cgroup_storage *this;
+
+		this = container_of(*new, struct bpf_cgroup_storage, node);
+
+		parent = *new;
+		switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
+		case -1:
+			new = &((*new)->rb_left);
+			break;
+		case 1:
+			new = &((*new)->rb_right);
+			break;
+		default:
+			return -EEXIST;
+		}
+	}
+
+	rb_link_node(&storage->node, parent, new);
+	rb_insert_color(&storage->node, root);
+
+	return 0;
+}
+
+static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+	struct bpf_cgroup_storage_key *key = _key;
+	struct bpf_cgroup_storage *storage;
+
+	storage = cgroup_storage_lookup(map, key, false);
+	if (!storage)
+		return NULL;
+
+	return &READ_ONCE(storage->buf)->data[0];
+}
+
+static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
+				      void *value, u64 flags)
+{
+	struct bpf_cgroup_storage_key *key = _key;
+	struct bpf_cgroup_storage *storage;
+	struct bpf_storage_buffer *new;
+
+	if (flags & BPF_NOEXIST)
+		return -EINVAL;
+
+	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
+					key, false);
+	if (!storage)
+		return -ENOENT;
+
+	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
+			   map->value_size, __GFP_ZERO | GFP_USER,
+			   map->numa_node);
+	if (!new)
+		return -ENOMEM;
+
+	memcpy(&new->data[0], value, map->value_size);
+
+	new = xchg(&storage->buf, new);
+	kfree_rcu(new, rcu);
+
+	return 0;
+}
+
+static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
+				       void *_next_key)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+	struct bpf_cgroup_storage_key *key = _key;
+	struct bpf_cgroup_storage_key *next = _next_key;
+	struct bpf_cgroup_storage *storage;
+
+	spin_lock_bh(&map->lock);
+
+	if (list_empty(&map->list))
+		goto enoent;
+
+	if (key) {
+		storage = cgroup_storage_lookup(map, key, true);
+		if (!storage)
+			goto enoent;
+
+		storage = list_next_entry(storage, list);
+		if (!storage)
+			goto enoent;
+	} else {
+		storage = list_first_entry(&map->list,
+					 struct bpf_cgroup_storage, list);
+	}
+
+	spin_unlock_bh(&map->lock);
+	next->attach_type = storage->key.attach_type;
+	next->cgroup_inode_id = storage->key.cgroup_inode_id;
+	return 0;
+
+enoent:
+	spin_unlock_bh(&map->lock);
+	return -ENOENT;
+}
+
+static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
+{
+	int numa_node = bpf_map_attr_numa_node(attr);
+	struct bpf_cgroup_storage_map *map;
+
+	if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
+		return ERR_PTR(-EINVAL);
+
+	if (attr->value_size > PAGE_SIZE)
+		return ERR_PTR(-E2BIG);
+
+	if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
+		/* reserved bits should not be used */
+		return ERR_PTR(-EINVAL);
+
+	if (attr->max_entries)
+		/* max_entries is not used and enforced to be 0 */
+		return ERR_PTR(-EINVAL);
+
+	map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
+			   __GFP_ZERO | GFP_USER, numa_node);
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+
+	map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
+				  PAGE_SIZE) >> PAGE_SHIFT;
+
+	/* copy mandatory map attributes */
+	bpf_map_init_from_attr(&map->map, attr);
+
+	spin_lock_init(&map->lock);
+	map->root = RB_ROOT;
+	INIT_LIST_HEAD(&map->list);
+
+	return &map->map;
+}
+
+static void cgroup_storage_map_free(struct bpf_map *_map)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+
+	WARN_ON(!RB_EMPTY_ROOT(&map->root));
+	WARN_ON(!list_empty(&map->list));
+
+	kfree(map);
+}
+
+static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
+{
+	return -EINVAL;
+}
+
+const struct bpf_map_ops cgroup_storage_map_ops = {
+	.map_alloc = cgroup_storage_map_alloc,
+	.map_free = cgroup_storage_map_free,
+	.map_get_next_key = cgroup_storage_get_next_key,
+	.map_lookup_elem = cgroup_storage_lookup_elem,
+	.map_update_elem = cgroup_storage_update_elem,
+	.map_delete_elem = cgroup_storage_delete_elem,
+};
+
+int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+	int ret = -EBUSY;
+
+	spin_lock_bh(&map->lock);
+
+	if (map->prog && map->prog != prog)
+		goto unlock;
+	if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
+		goto unlock;
+
+	map->prog = prog;
+	prog->aux->cgroup_storage = _map;
+	ret = 0;
+unlock:
+	spin_unlock_bh(&map->lock);
+
+	return ret;
+}
+
+void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+
+	spin_lock_bh(&map->lock);
+	if (map->prog == prog) {
+		WARN_ON(prog->aux->cgroup_storage != _map);
+		map->prog = NULL;
+		prog->aux->cgroup_storage = NULL;
+	}
+	spin_unlock_bh(&map->lock);
+}
+
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
+{
+	struct bpf_cgroup_storage *storage;
+	struct bpf_map *map;
+	u32 pages;
+
+	map = prog->aux->cgroup_storage;
+	if (!map)
+		return NULL;
+
+	pages = round_up(sizeof(struct bpf_cgroup_storage) +
+			 sizeof(struct bpf_storage_buffer) +
+			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
+	if (bpf_map_charge_memlock(map, pages))
+		return ERR_PTR(-EPERM);
+
+	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
+			       __GFP_ZERO | GFP_USER, map->numa_node);
+	if (!storage) {
+		bpf_map_uncharge_memlock(map, pages);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
+				    map->value_size, __GFP_ZERO | GFP_USER,
+				    map->numa_node);
+	if (!storage->buf) {
+		bpf_map_uncharge_memlock(map, pages);
+		kfree(storage);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	storage->map = (struct bpf_cgroup_storage_map *)map;
+
+	return storage;
+}
+
+void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
+{
+	u32 pages;
+	struct bpf_map *map;
+
+	if (!storage)
+		return;
+
+	map = &storage->map->map;
+	pages = round_up(sizeof(struct bpf_cgroup_storage) +
+			 sizeof(struct bpf_storage_buffer) +
+			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
+	bpf_map_uncharge_memlock(map, pages);
+
+	kfree_rcu(storage->buf, rcu);
+	kfree_rcu(storage, rcu);
+}
+
+void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
+			     struct cgroup *cgroup,
+			     enum bpf_attach_type type)
+{
+	struct bpf_cgroup_storage_map *map;
+
+	if (!storage)
+		return;
+
+	storage->key.attach_type = type;
+	storage->key.cgroup_inode_id = cgroup->kn->id.id;
+
+	map = storage->map;
+
+	spin_lock_bh(&map->lock);
+	WARN_ON(cgroup_storage_insert(map, storage));
+	list_add(&storage->list, &map->list);
+	spin_unlock_bh(&map->lock);
+}
+
+void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
+{
+	struct bpf_cgroup_storage_map *map;
+	struct rb_root *root;
+
+	if (!storage)
+		return;
+
+	map = storage->map;
+
+	spin_lock_bh(&map->lock);
+	root = &map->root;
+	rb_erase(&storage->node, root);
+
+	list_del(&storage->list);
+	spin_unlock_bh(&map->lock);
+}
+
+#endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7958252a4d29..5af4e9e2722d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -957,6 +957,9 @@ static void free_used_maps(struct bpf_prog_aux *aux)
 {
 	int i;
 
+	if (aux->cgroup_storage)
+		bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
+
 	for (i = 0; i < aux->used_map_cnt; i++)
 		bpf_map_put(aux->used_maps[i]);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e948303a0ea8..7e75434a9e54 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5154,6 +5154,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 			}
 			env->used_maps[env->used_map_cnt++] = map;
 
+			if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE &&
+			    bpf_cgroup_storage_assign(env->prog, map)) {
+				verbose(env,
+					"only one cgroup storage is allowed\n");
+				fdput(f);
+				return -EBUSY;
+			}
+
 			fdput(f);
 next_insn:
 			insn++;
@@ -5180,6 +5188,10 @@ static void release_maps(struct bpf_verifier_env *env)
 {
 	int i;
 
+	if (env->prog->aux->cgroup_storage)
+		bpf_cgroup_storage_release(env->prog,
+					   env->prog->aux->cgroup_storage);
+
 	for (i = 0; i < env->used_map_cnt; i++)
 		bpf_map_put(env->used_maps[i]);
 }
-- 
cgit v1.2.3


From aa0ad5b0391e268bdecf6cda31268388844f8afd Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:19 -0700
Subject: bpf: pass a pointer to a cgroup storage using pcpu variable

This commit introduces the bpf_cgroup_storage_set() helper,
which will be used to pass a pointer to a cgroup storage
to the bpf helper.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 15 +++++++++++++++
 kernel/bpf/local_storage.c |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 7d00d58869ed..9a144ddbbc8f 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -4,6 +4,7 @@
 
 #include <linux/errno.h>
 #include <linux/jump_label.h>
+#include <linux/percpu.h>
 #include <linux/rbtree.h>
 #include <uapi/linux/bpf.h>
 
@@ -21,6 +22,8 @@ struct bpf_cgroup_storage;
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
+DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+
 struct bpf_cgroup_storage_map;
 
 struct bpf_storage_buffer {
@@ -97,6 +100,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum bpf_attach_type type);
 
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
+{
+	struct bpf_storage_buffer *buf;
+
+	if (!storage)
+		return;
+
+	buf = READ_ONCE(storage->buf);
+	this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
+}
+
 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
@@ -250,6 +264,7 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
 					    struct bpf_map *map) { return 0; }
 static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index f23d3fdeba23..fc4e37f68f2a 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -7,6 +7,8 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 
+DEFINE_PER_CPU(void*, bpf_cgroup_storage);
+
 #ifdef CONFIG_CGROUP_BPF
 
 #define LOCAL_STORAGE_CREATE_FLAG_MASK					\
-- 
cgit v1.2.3


From d7bf2c10af053191e931b58704cd862fccb7f9de Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:20 -0700
Subject: bpf: allocate cgroup storage entries on attaching bpf programs

If a bpf program is using cgroup local storage, allocate
a bpf_cgroup_storage structure automatically on attaching the program
to a cgroup and save the pointer into the corresponding bpf_prog_list
entry.
Analogically, release the cgroup local storage on detaching
of the bpf program.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h |  1 +
 kernel/bpf/cgroup.c        | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 9a144ddbbc8f..f91b0f8ff3a9 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -43,6 +43,7 @@ struct bpf_cgroup_storage {
 struct bpf_prog_list {
 	struct list_head node;
 	struct bpf_prog *prog;
+	struct bpf_cgroup_storage *storage;
 };
 
 struct bpf_prog_array;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index badabb0b435c..935274c86bfe 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -34,6 +34,8 @@ void cgroup_bpf_put(struct cgroup *cgrp)
 		list_for_each_entry_safe(pl, tmp, progs, node) {
 			list_del(&pl->node);
 			bpf_prog_put(pl->prog);
+			bpf_cgroup_storage_unlink(pl->storage);
+			bpf_cgroup_storage_free(pl->storage);
 			kfree(pl);
 			static_branch_dec(&cgroup_bpf_enabled_key);
 		}
@@ -188,6 +190,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 {
 	struct list_head *progs = &cgrp->bpf.progs[type];
 	struct bpf_prog *old_prog = NULL;
+	struct bpf_cgroup_storage *storage, *old_storage = NULL;
 	struct cgroup_subsys_state *css;
 	struct bpf_prog_list *pl;
 	bool pl_was_allocated;
@@ -210,31 +213,47 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
 		return -E2BIG;
 
+	storage = bpf_cgroup_storage_alloc(prog);
+	if (IS_ERR(storage))
+		return -ENOMEM;
+
 	if (flags & BPF_F_ALLOW_MULTI) {
-		list_for_each_entry(pl, progs, node)
-			if (pl->prog == prog)
+		list_for_each_entry(pl, progs, node) {
+			if (pl->prog == prog) {
 				/* disallow attaching the same prog twice */
+				bpf_cgroup_storage_free(storage);
 				return -EINVAL;
+			}
+		}
 
 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-		if (!pl)
+		if (!pl) {
+			bpf_cgroup_storage_free(storage);
 			return -ENOMEM;
+		}
+
 		pl_was_allocated = true;
 		pl->prog = prog;
+		pl->storage = storage;
 		list_add_tail(&pl->node, progs);
 	} else {
 		if (list_empty(progs)) {
 			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
-			if (!pl)
+			if (!pl) {
+				bpf_cgroup_storage_free(storage);
 				return -ENOMEM;
+			}
 			pl_was_allocated = true;
 			list_add_tail(&pl->node, progs);
 		} else {
 			pl = list_first_entry(progs, typeof(*pl), node);
 			old_prog = pl->prog;
+			old_storage = pl->storage;
+			bpf_cgroup_storage_unlink(old_storage);
 			pl_was_allocated = false;
 		}
 		pl->prog = prog;
+		pl->storage = storage;
 	}
 
 	cgrp->bpf.flags[type] = flags;
@@ -257,10 +276,13 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	}
 
 	static_branch_inc(&cgroup_bpf_enabled_key);
+	if (old_storage)
+		bpf_cgroup_storage_free(old_storage);
 	if (old_prog) {
 		bpf_prog_put(old_prog);
 		static_branch_dec(&cgroup_bpf_enabled_key);
 	}
+	bpf_cgroup_storage_link(storage, cgrp, type);
 	return 0;
 
 cleanup:
@@ -276,6 +298,9 @@ cleanup:
 
 	/* and cleanup the prog list */
 	pl->prog = old_prog;
+	bpf_cgroup_storage_free(pl->storage);
+	pl->storage = old_storage;
+	bpf_cgroup_storage_link(old_storage, cgrp, type);
 	if (pl_was_allocated) {
 		list_del(&pl->node);
 		kfree(pl);
@@ -356,6 +381,8 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 
 	/* now can actually delete it from this cgroup list */
 	list_del(&pl->node);
+	bpf_cgroup_storage_unlink(pl->storage);
+	bpf_cgroup_storage_free(pl->storage);
 	kfree(pl);
 	if (list_empty(progs))
 		/* last program was detached, reset flags to zero */
-- 
cgit v1.2.3


From 394e40a29788820c9c0526b1c3497c9e0ec2a126 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:21 -0700
Subject: bpf: extend bpf_prog_array to store pointers to the cgroup storage

This patch converts bpf_prog_array from an array of prog pointers
to the array of struct bpf_prog_array_item elements.

This allows to save a cgroup storage pointer for each bpf program
efficiently attached to a cgroup.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/media/rc/bpf-lirc.c | 10 +++---
 include/linux/bpf.h         | 19 ++++++++----
 kernel/bpf/cgroup.c         | 21 +++++++------
 kernel/bpf/core.c           | 76 +++++++++++++++++++++++----------------------
 4 files changed, 70 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index fcfab6635f9c..8c26df9b96c1 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -195,14 +195,16 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
  */
 void lirc_bpf_free(struct rc_dev *rcdev)
 {
-	struct bpf_prog **progs;
+	struct bpf_prog_array_item *item;
 
 	if (!rcdev->raw->progs)
 		return;
 
-	progs = rcu_dereference(rcdev->raw->progs)->progs;
-	while (*progs)
-		bpf_prog_put(*progs++);
+	item = rcu_dereference(rcdev->raw->progs)->items;
+	while (item->prog) {
+		bpf_prog_put(item->prog);
+		item++;
+	}
 
 	bpf_prog_array_free(rcdev->raw->progs);
 }
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9d1e4727495e..16be67888c30 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -349,9 +349,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
  * The 'struct bpf_prog_array *' should only be replaced with xchg()
  * since other cpus are walking the array of pointers in parallel.
  */
+struct bpf_prog_array_item {
+	struct bpf_prog *prog;
+	struct bpf_cgroup_storage *cgroup_storage;
+};
+
 struct bpf_prog_array {
 	struct rcu_head rcu;
-	struct bpf_prog *progs[0];
+	struct bpf_prog_array_item items[0];
 };
 
 struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
@@ -372,7 +377,8 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 
 #define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null)	\
 	({						\
-		struct bpf_prog **_prog, *__prog;	\
+		struct bpf_prog_array_item *_item;	\
+		struct bpf_prog *_prog;			\
 		struct bpf_prog_array *_array;		\
 		u32 _ret = 1;				\
 		preempt_disable();			\
@@ -380,10 +386,11 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 		_array = rcu_dereference(array);	\
 		if (unlikely(check_non_null && !_array))\
 			goto _out;			\
-		_prog = _array->progs;			\
-		while ((__prog = READ_ONCE(*_prog))) {	\
-			_ret &= func(__prog, ctx);	\
-			_prog++;			\
+		_item = &_array->items[0];		\
+		while ((_prog = READ_ONCE(_item->prog))) {		\
+			bpf_cgroup_storage_set(_item->cgroup_storage);	\
+			_ret &= func(_prog, ctx);	\
+			_item++;			\
 		}					\
 _out:							\
 		rcu_read_unlock();			\
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 935274c86bfe..ddfa6cc13e57 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -117,15 +117,18 @@ static int compute_effective_progs(struct cgroup *cgrp,
 	cnt = 0;
 	p = cgrp;
 	do {
-		if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
-			list_for_each_entry(pl,
-					    &p->bpf.progs[type], node) {
-				if (!pl->prog)
-					continue;
-				progs->progs[cnt++] = pl->prog;
-			}
-		p = cgroup_parent(p);
-	} while (p);
+		if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+			continue;
+
+		list_for_each_entry(pl, &p->bpf.progs[type], node) {
+			if (!pl->prog)
+				continue;
+
+			progs->items[cnt].prog = pl->prog;
+			progs->items[cnt].cgroup_storage = pl->storage;
+			cnt++;
+		}
+	} while ((p = cgroup_parent(p)));
 
 	rcu_assign_pointer(*array, progs);
 	return 0;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 253aa8e79c7b..9abcf25ebf9f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1542,7 +1542,8 @@ struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
 {
 	if (prog_cnt)
 		return kzalloc(sizeof(struct bpf_prog_array) +
-			       sizeof(struct bpf_prog *) * (prog_cnt + 1),
+			       sizeof(struct bpf_prog_array_item) *
+			       (prog_cnt + 1),
 			       flags);
 
 	return &empty_prog_array.hdr;
@@ -1556,43 +1557,45 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
 	kfree_rcu(progs, rcu);
 }
 
-int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
+int bpf_prog_array_length(struct bpf_prog_array __rcu *array)
 {
-	struct bpf_prog **prog;
+	struct bpf_prog_array_item *item;
 	u32 cnt = 0;
 
 	rcu_read_lock();
-	prog = rcu_dereference(progs)->progs;
-	for (; *prog; prog++)
-		if (*prog != &dummy_bpf_prog.prog)
+	item = rcu_dereference(array)->items;
+	for (; item->prog; item++)
+		if (item->prog != &dummy_bpf_prog.prog)
 			cnt++;
 	rcu_read_unlock();
 	return cnt;
 }
 
-static bool bpf_prog_array_copy_core(struct bpf_prog **prog,
+
+static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
 				     u32 *prog_ids,
 				     u32 request_cnt)
 {
+	struct bpf_prog_array_item *item;
 	int i = 0;
 
-	for (; *prog; prog++) {
-		if (*prog == &dummy_bpf_prog.prog)
+	item = rcu_dereference(array)->items;
+	for (; item->prog; item++) {
+		if (item->prog == &dummy_bpf_prog.prog)
 			continue;
-		prog_ids[i] = (*prog)->aux->id;
+		prog_ids[i] = item->prog->aux->id;
 		if (++i == request_cnt) {
-			prog++;
+			item++;
 			break;
 		}
 	}
 
-	return !!(*prog);
+	return !!(item->prog);
 }
 
-int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
+int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *array,
 				__u32 __user *prog_ids, u32 cnt)
 {
-	struct bpf_prog **prog;
 	unsigned long err = 0;
 	bool nospc;
 	u32 *ids;
@@ -1611,8 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 	if (!ids)
 		return -ENOMEM;
 	rcu_read_lock();
-	prog = rcu_dereference(progs)->progs;
-	nospc = bpf_prog_array_copy_core(prog, ids, cnt);
+	nospc = bpf_prog_array_copy_core(array, ids, cnt);
 	rcu_read_unlock();
 	err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
 	kfree(ids);
@@ -1623,14 +1625,14 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
 	return 0;
 }
 
-void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
+void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *array,
 				struct bpf_prog *old_prog)
 {
-	struct bpf_prog **prog = progs->progs;
+	struct bpf_prog_array_item *item = array->items;
 
-	for (; *prog; prog++)
-		if (*prog == old_prog) {
-			WRITE_ONCE(*prog, &dummy_bpf_prog.prog);
+	for (; item->prog; item++)
+		if (item->prog == old_prog) {
+			WRITE_ONCE(item->prog, &dummy_bpf_prog.prog);
 			break;
 		}
 }
@@ -1641,7 +1643,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 			struct bpf_prog_array **new_array)
 {
 	int new_prog_cnt, carry_prog_cnt = 0;
-	struct bpf_prog **existing_prog;
+	struct bpf_prog_array_item *existing;
 	struct bpf_prog_array *array;
 	bool found_exclude = false;
 	int new_prog_idx = 0;
@@ -1650,15 +1652,15 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 	 * the new array.
 	 */
 	if (old_array) {
-		existing_prog = old_array->progs;
-		for (; *existing_prog; existing_prog++) {
-			if (*existing_prog == exclude_prog) {
+		existing = old_array->items;
+		for (; existing->prog; existing++) {
+			if (existing->prog == exclude_prog) {
 				found_exclude = true;
 				continue;
 			}
-			if (*existing_prog != &dummy_bpf_prog.prog)
+			if (existing->prog != &dummy_bpf_prog.prog)
 				carry_prog_cnt++;
-			if (*existing_prog == include_prog)
+			if (existing->prog == include_prog)
 				return -EEXIST;
 		}
 	}
@@ -1684,15 +1686,17 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
 
 	/* Fill in the new prog array */
 	if (carry_prog_cnt) {
-		existing_prog = old_array->progs;
-		for (; *existing_prog; existing_prog++)
-			if (*existing_prog != exclude_prog &&
-			    *existing_prog != &dummy_bpf_prog.prog)
-				array->progs[new_prog_idx++] = *existing_prog;
+		existing = old_array->items;
+		for (; existing->prog; existing++)
+			if (existing->prog != exclude_prog &&
+			    existing->prog != &dummy_bpf_prog.prog) {
+				array->items[new_prog_idx++].prog =
+					existing->prog;
+			}
 	}
 	if (include_prog)
-		array->progs[new_prog_idx++] = include_prog;
-	array->progs[new_prog_idx] = NULL;
+		array->items[new_prog_idx++].prog = include_prog;
+	array->items[new_prog_idx].prog = NULL;
 	*new_array = array;
 	return 0;
 }
@@ -1701,7 +1705,6 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
 			     u32 *prog_ids, u32 request_cnt,
 			     u32 *prog_cnt)
 {
-	struct bpf_prog **prog;
 	u32 cnt = 0;
 
 	if (array)
@@ -1714,8 +1717,7 @@ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
 		return 0;
 
 	/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
-	prog = rcu_dereference_check(array, 1)->progs;
-	return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC
+	return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC
 								     : 0;
 }
 
-- 
cgit v1.2.3


From 3e6a4b3e0289dc9540a2c1d8a20657f4707fbabb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:22 -0700
Subject: bpf/verifier: introduce BPF_PTR_TO_MAP_VALUE

BPF_MAP_TYPE_CGROUP_STORAGE maps are special in a way
that the access from the bpf program side is lookup-free.
That means the result is guaranteed to be a valid
pointer to the cgroup storage; no NULL-check is required.

This patch introduces BPF_PTR_TO_MAP_VALUE return type,
which is required to cause the verifier accept programs,
which are not checking the map value pointer for being NULL.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h   | 1 +
 kernel/bpf/verifier.c | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 16be67888c30..ca4ac2a39def 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -155,6 +155,7 @@ enum bpf_arg_type {
 enum bpf_return_type {
 	RET_INTEGER,			/* function returns integer */
 	RET_VOID,			/* function doesn't return anything */
+	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7e75434a9e54..1ede16c8bb40 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2545,8 +2545,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		mark_reg_unknown(env, regs, BPF_REG_0);
 	} else if (fn->ret_type == RET_VOID) {
 		regs[BPF_REG_0].type = NOT_INIT;
-	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
-		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
+		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+		if (fn->ret_type == RET_PTR_TO_MAP_VALUE)
+			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+		else
+			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
 		/* There is no offset yet applied, variable or fixed */
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].off = 0;
-- 
cgit v1.2.3


From cd3394317653837e2eb5c5d0904a8996102af9fc Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 2 Aug 2018 14:27:24 -0700
Subject: bpf: introduce the bpf_get_local_storage() helper function

The bpf_get_local_storage() helper function is used
to get a pointer to the bpf local storage from a bpf program.

It takes a pointer to a storage map and flags as arguments.
Right now it accepts only cgroup storage maps, and flags
argument has to be 0. Further it can be extended to support
other types of local storage: e.g. thread local storage etc.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h      |  2 ++
 include/uapi/linux/bpf.h | 21 ++++++++++++++++++++-
 kernel/bpf/cgroup.c      |  2 ++
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 20 ++++++++++++++++++++
 kernel/bpf/verifier.c    | 18 ++++++++++++++++++
 net/core/filter.c        | 23 ++++++++++++++++++++++-
 7 files changed, 85 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ca4ac2a39def..cd8790d2c6ed 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -788,6 +788,8 @@ extern const struct bpf_func_proto bpf_sock_map_update_proto;
 extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
 
+extern const struct bpf_func_proto bpf_get_local_storage_proto;
+
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b10118ee5afe..dd5758dc35d3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2095,6 +2095,24 @@ union bpf_attr {
  * 	Return
  * 		A 64-bit integer containing the current cgroup id based
  * 		on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ *	Description
+ *		Get the pointer to the local storage area.
+ *		The type and the size of the local storage is defined
+ *		by the *map* argument.
+ *		The *flags* meaning is specific for each map type,
+ *		and has to be 0 for cgroup local storage.
+ *
+ *		Depending on the bpf program type, a local storage area
+ *		can be shared between multiple instances of the bpf program,
+ *		running simultaneously.
+ *
+ *		A user should care about the synchronization by himself.
+ *		For example, by using the BPF_STX_XADD instruction to alter
+ *		the shared data.
+ *	Return
+ *		Pointer to the local storage area.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2177,7 +2195,8 @@ union bpf_attr {
 	FN(rc_repeat),			\
 	FN(rc_keydown),			\
 	FN(skb_cgroup_id),		\
-	FN(get_current_cgroup_id),
+	FN(get_current_cgroup_id),	\
+	FN(get_local_storage),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ddfa6cc13e57..0a4fe5a7dc91 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -684,6 +684,8 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_map_delete_elem_proto;
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	case BPF_FUNC_trace_printk:
 		if (capable(CAP_SYS_ADMIN))
 			return bpf_get_trace_printk_proto();
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9abcf25ebf9f..4d09e610777f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1795,6 +1795,7 @@ const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 const struct bpf_func_proto bpf_sock_map_update_proto __weak;
 const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
+const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 73065e2d23c2..1991466b8327 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -193,4 +193,24 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 };
+
+DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+
+BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
+{
+	/* map and flags arguments are not used now,
+	 * but provide an ability to extend the API
+	 * for other types of local storages.
+	 * verifier checks that their values are correct.
+	 */
+	return (unsigned long) this_cpu_read(bpf_cgroup_storage);
+}
+
+const struct bpf_func_proto bpf_get_local_storage_proto = {
+	.func		= bpf_get_local_storage,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_MAP_VALUE,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_ANYTHING,
+};
 #endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1ede16c8bb40..587468a9c37d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2127,6 +2127,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_CGROUP_STORAGE:
+		if (func_id != BPF_FUNC_get_local_storage)
+			goto error;
+		break;
 	/* devmap returns a pointer to a live net_device ifindex that we cannot
 	 * allow to be modified from bpf side. So do not allow lookup elements
 	 * for now.
@@ -2209,6 +2213,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
 			goto error;
 		break;
+	case BPF_FUNC_get_local_storage:
+		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -2533,6 +2541,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	}
 
 	regs = cur_regs(env);
+
+	/* check that flags argument in get_local_storage(map, flags) is 0,
+	 * this is required because get_local_storage() can't return an error.
+	 */
+	if (func_id == BPF_FUNC_get_local_storage &&
+	    !register_is_null(&regs[BPF_REG_2])) {
+		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
+		return -EINVAL;
+	}
+
 	/* reset caller saved regs */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(env, regs, caller_saved[i]);
diff --git a/net/core/filter.c b/net/core/filter.c
index 9bb9a4488e25..9f73aae2f089 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4820,6 +4820,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	 */
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -4844,6 +4846,8 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		}
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_sock_addr_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -4866,6 +4870,17 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
+	default:
+		return sk_filter_func_proto(func_id, prog);
+	}
+}
+
 static const struct bpf_func_proto *
 tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -4988,6 +5003,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sock_hash_update_proto;
 	case BPF_FUNC_get_socket_cookie:
 		return &bpf_get_socket_cookie_sock_ops_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -5007,6 +5024,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_msg_cork_bytes_proto;
 	case BPF_FUNC_msg_pull_data:
 		return &bpf_msg_pull_data_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -5034,6 +5053,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_redirect_map_proto;
 	case BPF_FUNC_sk_redirect_hash:
 		return &bpf_sk_redirect_hash_proto;
+	case BPF_FUNC_get_local_storage:
+		return &bpf_get_local_storage_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6838,7 +6859,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 };
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
-	.get_func_proto		= sk_filter_func_proto,
+	.get_func_proto		= cg_skb_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 };
-- 
cgit v1.2.3


From 0425e3e6e0c7f92f2c2a396d902871b7a81da0eb Mon Sep 17 00:00:00 2001
From: Yixian Liu <liuyixian@huawei.com>
Date: Thu, 2 Aug 2018 10:38:05 +0800
Subject: RDMA/hns: Support flush cqe for hip08 in kernel space

According to IB protocol, there are some cases that work requests must
return the flush error completion status through the completion queue. Due
to hardware limitation, the driver needs to assist the flush process.

This patch adds the support of flush cqe for hip08 in the cases that
needed, such as poll cqe, post send, post recv and aeqe handle.

The patch also considered the compatibility between kernel and user space.

Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_db.c     |   2 +
 drivers/infiniband/hw/hns/hns_roce_device.h |  18 ++-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 190 +++++++++++++++++++++++++---
 drivers/infiniband/hw/hns/hns_roce_qp.c     |  50 +++++++-
 include/uapi/rdma/hns-abi.h                 |   1 +
 5 files changed, 241 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index ebee2782a573..e2f93c1ce86a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -41,6 +41,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
 found:
 	db->dma = sg_dma_address(page->umem->sg_head.sgl) +
 		  (virt & ~PAGE_MASK);
+	page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK;
+	db->virt_addr = sg_virt(page->umem->sg_head.sgl);
 	db->u.user_page = page;
 	refcount_inc(&page->refcount);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 0eab5a2f45e5..9a24fd0ee3e7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -110,6 +110,7 @@
 
 enum {
 	HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
+	HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
 };
 
 enum {
@@ -190,7 +191,8 @@ enum {
 	HNS_ROCE_CAP_FLAG_REREG_MR		= BIT(0),
 	HNS_ROCE_CAP_FLAG_ROCE_V1_V2		= BIT(1),
 	HNS_ROCE_CAP_FLAG_RQ_INLINE		= BIT(2),
-	HNS_ROCE_CAP_FLAG_RECORD_DB		= BIT(3)
+	HNS_ROCE_CAP_FLAG_RECORD_DB		= BIT(3),
+	HNS_ROCE_CAP_FLAG_SQ_RECORD_DB		= BIT(4),
 };
 
 enum hns_roce_mtt_type {
@@ -385,6 +387,7 @@ struct hns_roce_db {
 		struct hns_roce_user_db_page *user_page;
 	} u;
 	dma_addr_t	dma;
+	void		*virt_addr;
 	int		index;
 	int		order;
 };
@@ -524,7 +527,9 @@ struct hns_roce_qp {
 	struct hns_roce_buf	hr_buf;
 	struct hns_roce_wq	rq;
 	struct hns_roce_db	rdb;
+	struct hns_roce_db	sdb;
 	u8			rdb_en;
+	u8			sdb_en;
 	u32			doorbell_qpn;
 	__le32			sq_signal_bits;
 	u32			sq_next_wqe;
@@ -641,6 +646,8 @@ struct hns_roce_eq {
 	int				shift;
 	dma_addr_t			cur_eqe_ba;
 	dma_addr_t			nxt_eqe_ba;
+	int				event_type;
+	int				sub_type;
 };
 
 struct hns_roce_eq_table {
@@ -727,6 +734,14 @@ struct hns_roce_caps {
 	u64		flags;
 };
 
+struct hns_roce_work {
+	struct hns_roce_dev *hr_dev;
+	struct work_struct work;
+	u32 qpn;
+	int event_type;
+	int sub_type;
+};
+
 struct hns_roce_hw {
 	int (*reset)(struct hns_roce_dev *hr_dev, bool enable);
 	int (*cmq_init)(struct hns_roce_dev *hr_dev);
@@ -819,6 +834,7 @@ struct hns_roce_dev {
 	u32			tptr_size; /*only for hw v1*/
 	const struct hns_roce_hw *hw;
 	void			*priv;
+	struct workqueue_struct *irq_workq;
 };
 
 static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 268d55bfca07..0218c0f8c2a7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -165,6 +165,11 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	return 0;
 }
 
+static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
+				 const struct ib_qp_attr *attr,
+				 int attr_mask, enum ib_qp_state cur_state,
+				 enum ib_qp_state new_state);
+
 static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 				 const struct ib_send_wr *wr,
 				 const struct ib_send_wr **bad_wr)
@@ -176,12 +181,14 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
 	struct hns_roce_qp *qp = to_hr_qp(ibqp);
 	struct device *dev = hr_dev->dev;
 	struct hns_roce_v2_db sq_db;
+	struct ib_qp_attr attr;
 	unsigned int sge_ind = 0;
 	unsigned int owner_bit;
 	unsigned long flags;
 	unsigned int ind;
 	void *wqe = NULL;
 	bool loopback;
+	int attr_mask;
 	u32 tmp_len;
 	int ret = 0;
 	u8 *smac;
@@ -524,6 +531,19 @@ out:
 
 		qp->sq_next_wqe = ind;
 		qp->next_sge = sge_ind;
+
+		if (qp->state == IB_QPS_ERR) {
+			attr_mask = IB_QP_STATE;
+			attr.qp_state = IB_QPS_ERR;
+
+			ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask,
+						    qp->state, IB_QPS_ERR);
+			if (ret) {
+				spin_unlock_irqrestore(&qp->sq.lock, flags);
+				*bad_wr = wr;
+				return ret;
+			}
+		}
 	}
 
 	spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -540,8 +560,10 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
 	struct hns_roce_v2_wqe_data_seg *dseg;
 	struct hns_roce_rinl_sge *sge_list;
 	struct device *dev = hr_dev->dev;
+	struct ib_qp_attr attr;
 	unsigned long flags;
 	void *wqe = NULL;
+	int attr_mask;
 	int ret = 0;
 	int nreq;
 	int ind;
@@ -610,6 +632,20 @@ out:
 		wmb();
 
 		*hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff;
+
+		if (hr_qp->state == IB_QPS_ERR) {
+			attr_mask = IB_QP_STATE;
+			attr.qp_state = IB_QPS_ERR;
+
+			ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr,
+						    attr_mask, hr_qp->state,
+						    IB_QPS_ERR);
+			if (ret) {
+				spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+				*bad_wr = wr;
+				return ret;
+			}
+		}
 	}
 	spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
 
@@ -1217,7 +1253,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->flags		= HNS_ROCE_CAP_FLAG_REREG_MR |
 				  HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
 				  HNS_ROCE_CAP_FLAG_RQ_INLINE |
-				  HNS_ROCE_CAP_FLAG_RECORD_DB;
+				  HNS_ROCE_CAP_FLAG_RECORD_DB |
+				  HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
 	caps->pkey_table_len[0] = 1;
 	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
 	caps->ceqe_depth	= HNS_ROCE_V2_COMP_EQE_NUM;
@@ -2009,6 +2046,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 	struct hns_roce_v2_cqe *cqe;
 	struct hns_roce_qp *hr_qp;
 	struct hns_roce_wq *wq;
+	struct ib_qp_attr attr;
+	int attr_mask;
 	int is_send;
 	u16 wqe_ctr;
 	u32 opcode;
@@ -2095,8 +2134,17 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 		break;
 	}
 
-	/* CQE status error, directly return */
-	if (wc->status != IB_WC_SUCCESS)
+	/* flush cqe if wc status is error, excluding flush error */
+	if ((wc->status != IB_WC_SUCCESS) &&
+	    (wc->status != IB_WC_WR_FLUSH_ERR)) {
+		attr_mask = IB_QP_STATE;
+		attr.qp_state = IB_QPS_ERR;
+		return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp,
+					     &attr, attr_mask,
+					     (*cur_qp)->state, IB_QPS_ERR);
+	}
+
+	if (wc->status == IB_WC_WR_FLUSH_ERR)
 		return 0;
 
 	if (is_send) {
@@ -3450,6 +3498,24 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 		goto out;
 	}
 
+	/* When QP state is err, SQ and RQ WQE should be flushed */
+	if (new_state == IB_QPS_ERR) {
+		roce_set_field(context->byte_160_sq_ci_pi,
+			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
+			       hr_qp->sq.head);
+		roce_set_field(qpc_mask->byte_160_sq_ci_pi,
+			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+		roce_set_field(context->byte_84_rq_ci_pi,
+			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
+			       hr_qp->rq.head);
+		roce_set_field(qpc_mask->byte_84_rq_ci_pi,
+			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
+	}
+
 	if (attr_mask & IB_QP_AV) {
 		const struct ib_global_route *grh =
 					    rdma_ah_read_grh(&attr->ah_attr);
@@ -3806,6 +3872,11 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 	hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
 
 	if (is_user) {
+		if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1))
+			hns_roce_db_unmap_user(
+				to_hr_ucontext(hr_qp->ibqp.uobject->context),
+				&hr_qp->sdb);
+
 		if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1))
 			hns_roce_db_unmap_user(
 				to_hr_ucontext(hr_qp->ibqp.uobject->context),
@@ -3888,6 +3959,74 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 	return ret;
 }
 
+static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn)
+{
+	struct hns_roce_qp *hr_qp;
+	struct ib_qp_attr attr;
+	int attr_mask;
+	int ret;
+
+	hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
+	if (!hr_qp) {
+		dev_warn(hr_dev->dev, "no hr_qp can be found!\n");
+		return;
+	}
+
+	if (hr_qp->ibqp.uobject) {
+		if (hr_qp->sdb_en == 1) {
+			hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
+			hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
+		} else {
+			dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n");
+			return;
+		}
+	}
+
+	attr_mask = IB_QP_STATE;
+	attr.qp_state = IB_QPS_ERR;
+	ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+				    hr_qp->state, IB_QPS_ERR);
+	if (ret)
+		dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n",
+			qpn);
+}
+
+static void hns_roce_irq_work_handle(struct work_struct *work)
+{
+	struct hns_roce_work *irq_work =
+				container_of(work, struct hns_roce_work, work);
+	u32 qpn = irq_work->qpn;
+
+	switch (irq_work->event_type) {
+	case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+	case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+	case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+		hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+		break;
+	default:
+		break;
+	}
+
+	kfree(irq_work);
+}
+
+static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
+				      struct hns_roce_eq *eq, u32 qpn)
+{
+	struct hns_roce_work *irq_work;
+
+	irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC);
+	if (!irq_work)
+		return;
+
+	INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
+	irq_work->hr_dev = hr_dev;
+	irq_work->qpn = qpn;
+	irq_work->event_type = eq->event_type;
+	irq_work->sub_type = eq->sub_type;
+	queue_work(hr_dev->irq_workq, &(irq_work->work));
+}
+
 static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
 {
 	u32 doorbell[2];
@@ -3990,14 +4129,9 @@ static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
 
 static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
 				      struct hns_roce_aeqe *aeqe,
-				      int event_type)
+				      int event_type, u32 qpn)
 {
 	struct device *dev = hr_dev->dev;
-	u32 qpn;
-
-	qpn = roce_get_field(aeqe->event.qp_event.qp,
-			     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
-			     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
 
 	switch (event_type) {
 	case HNS_ROCE_EVENT_TYPE_COMM_EST:
@@ -4024,14 +4158,9 @@ static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
 
 static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
 				      struct hns_roce_aeqe *aeqe,
-				      int event_type)
+				      int event_type, u32 cqn)
 {
 	struct device *dev = hr_dev->dev;
-	u32 cqn;
-
-	cqn = roce_get_field(aeqe->event.cq_event.cq,
-			     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
-			     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
 
 	switch (event_type) {
 	case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
@@ -4096,6 +4225,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 	struct hns_roce_aeqe *aeqe;
 	int aeqe_found = 0;
 	int event_type;
+	int sub_type;
+	u32 qpn;
+	u32 cqn;
 
 	while ((aeqe = next_aeqe_sw_v2(eq))) {
 
@@ -4107,6 +4239,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 		event_type = roce_get_field(aeqe->asyn,
 					    HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
 					    HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
+		sub_type = roce_get_field(aeqe->asyn,
+					  HNS_ROCE_V2_AEQE_SUB_TYPE_M,
+					  HNS_ROCE_V2_AEQE_SUB_TYPE_S);
+		qpn = roce_get_field(aeqe->event.qp_event.qp,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+		cqn = roce_get_field(aeqe->event.cq_event.cq,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
 
 		switch (event_type) {
 		case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -4120,7 +4261,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 		case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
 		case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
 		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
-			hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type);
+			hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
+						  qpn);
 			break;
 		case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
 		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
@@ -4129,7 +4271,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 			break;
 		case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
 		case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
-			hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type);
+			hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
+						  cqn);
 			break;
 		case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
 			dev_warn(dev, "DB overflow.\n");
@@ -4152,6 +4295,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 			break;
 		};
 
+		eq->event_type = event_type;
+		eq->sub_type = sub_type;
 		++eq->cons_index;
 		aeqe_found = 1;
 
@@ -4159,6 +4304,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 			dev_warn(dev, "cons_index overflow, set back to 0.\n");
 			eq->cons_index = 0;
 		}
+		hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
 	}
 
 	set_eq_cons_index_v2(eq);
@@ -4975,6 +5121,13 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 		}
 	}
 
+	hr_dev->irq_workq =
+		create_singlethread_workqueue("hns_roce_irq_workqueue");
+	if (!hr_dev->irq_workq) {
+		dev_err(dev, "Create irq workqueue failed!\n");
+		goto err_request_irq_fail;
+	}
+
 	return 0;
 
 err_request_irq_fail:
@@ -5025,6 +5178,9 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
 		kfree(hr_dev->irq_names[i]);
 
 	kfree(eq_table->eq);
+
+	flush_workqueue(hr_dev->irq_workq);
+	destroy_workqueue(hr_dev->irq_workq);
 }
 
 static const struct hns_roce_hw hns_roce_hw_v2 = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index baaf906f7c2e..c1dbddcd58c9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -489,6 +489,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 	return 0;
 }
 
+static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
+{
+	if (attr->qp_type == IB_QPT_XRC_TGT)
+		return 0;
+
+	return 1;
+}
+
 static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
 {
 	if (attr->qp_type == IB_QPT_XRC_INI ||
@@ -613,6 +621,23 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 			goto err_mtt;
 		}
 
+		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
+		    (udata->inlen >= sizeof(ucmd)) &&
+		    (udata->outlen >= sizeof(resp)) &&
+		    hns_roce_qp_has_sq(init_attr)) {
+			ret = hns_roce_db_map_user(
+					to_hr_ucontext(ib_pd->uobject->context),
+					ucmd.sdb_addr, &hr_qp->sdb);
+			if (ret) {
+				dev_err(dev, "sq record doorbell map failed!\n");
+				goto err_mtt;
+			}
+
+			/* indicate kernel supports sq record db */
+			resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB;
+			hr_qp->sdb_en = 1;
+		}
+
 		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
 		    (udata->outlen >= sizeof(resp)) &&
 		    hns_roce_qp_has_rq(init_attr)) {
@@ -621,7 +646,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 					ucmd.db_addr, &hr_qp->rdb);
 			if (ret) {
 				dev_err(dev, "rq record doorbell map failed!\n");
-				goto err_mtt;
+				goto err_sq_dbmap;
 			}
 		}
 	} else {
@@ -734,7 +759,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 	if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) &&
 		(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) {
 
-		/* indicate kernel supports record db */
+		/* indicate kernel supports rq record db */
 		resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
 		ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
 		if (ret)
@@ -770,6 +795,16 @@ err_wrid:
 		kfree(hr_qp->rq.wrid);
 	}
 
+err_sq_dbmap:
+	if (ib_pd->uobject)
+		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
+		    (udata->inlen >= sizeof(ucmd)) &&
+		    (udata->outlen >= sizeof(resp)) &&
+		    hns_roce_qp_has_sq(init_attr))
+			hns_roce_db_unmap_user(
+					to_hr_ucontext(ib_pd->uobject->context),
+					&hr_qp->sdb);
+
 err_mtt:
 	hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
 
@@ -903,6 +938,17 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	new_state = attr_mask & IB_QP_STATE ?
 		    attr->qp_state : cur_state;
 
+	if (ibqp->uobject &&
+	    (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
+		if (hr_qp->sdb_en == 1) {
+			hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
+			hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
+		} else {
+			dev_warn(dev, "flush cqe is not supported in userspace!\n");
+			goto out;
+		}
+	}
+
 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
 				IB_LINK_LAYER_ETHERNET)) {
 		dev_err(dev, "ib_modify_qp_is_ok failed\n");
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 78613b609fa8..c1f87735514f 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -53,6 +53,7 @@ struct hns_roce_ib_create_qp {
 	__u8    log_sq_stride;
 	__u8    sq_no_prefetch;
 	__u8    reserved[5];
+	__aligned_u64 sdb_addr;
 };
 
 struct hns_roce_ib_create_qp_resp {
-- 
cgit v1.2.3


From b9855f4ca0fe582aabfdbd08f0d856b22486e157 Mon Sep 17 00:00:00 2001
From: Potnuri Bharat Teja <bharat@chelsio.com>
Date: Thu, 2 Aug 2018 11:33:03 +0530
Subject: iw_cxgb4: RDMA write with immediate support

Adds iw_cxgb4 functionality to support RDMA_WRITE_WITH_IMMEDATE opcode.

Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/cxgb4/cq.c          | 23 ++++++++++++++++---
 drivers/infiniband/hw/cxgb4/qp.c          | 37 ++++++++++++++++++++++++-------
 drivers/infiniband/hw/cxgb4/t4.h          | 16 ++++++++++++-
 drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 18 ++++++++++++---
 include/uapi/rdma/cxgb4-abi.h             |  3 ++-
 5 files changed, 81 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 0c13f2838c84..6d3042794094 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -791,15 +791,32 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
 			wc->byte_len = CQE_LEN(&cqe);
 		else
 			wc->byte_len = 0;
-		wc->opcode = IB_WC_RECV;
-		if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
-		    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
+
+		switch (CQE_OPCODE(&cqe)) {
+		case FW_RI_SEND:
+			wc->opcode = IB_WC_RECV;
+			break;
+		case FW_RI_SEND_WITH_INV:
+		case FW_RI_SEND_WITH_SE_INV:
+			wc->opcode = IB_WC_RECV;
 			wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
 			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
 			c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
+			break;
+		case FW_RI_WRITE_IMMEDIATE:
+			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+			wc->ex.imm_data = CQE_IMM_DATA(&cqe);
+			wc->wc_flags |= IB_WC_WITH_IMM;
+			break;
+		default:
+			pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
+			       CQE_OPCODE(&cqe), CQE_QPID(&cqe));
+			ret = -EINVAL;
+			goto out;
 		}
 	} else {
 		switch (CQE_OPCODE(&cqe)) {
+		case FW_RI_WRITE_IMMEDIATE:
 		case FW_RI_RDMA_WRITE:
 			wc->opcode = IB_WC_RDMA_WRITE;
 			break;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index dbd99370a0de..5d30cd14f795 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -555,7 +555,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
 
 	if (wr->num_sge > T4_MAX_SEND_SGE)
 		return -EINVAL;
-	wqe->write.r2 = 0;
+
+	/*
+	 * iWARP protocol supports 64 bit immediate data but rdma api
+	 * limits it to 32bit.
+	 */
+	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data;
+	else
+		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0;
 	wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
 	wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
 	if (wr->num_sge) {
@@ -848,6 +856,9 @@ static int ib_to_fw_opcode(int ib_opcode)
 	case IB_WR_RDMA_WRITE:
 		opcode = FW_RI_RDMA_WRITE;
 		break;
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		opcode = FW_RI_WRITE_IMMEDIATE;
+		break;
 	case IB_WR_RDMA_READ:
 	case IB_WR_RDMA_READ_WITH_INV:
 		opcode = FW_RI_READ_REQ;
@@ -970,6 +981,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	enum fw_wr_opcodes fw_opcode = 0;
 	enum fw_ri_wr_flags fw_flags;
 	struct c4iw_qp *qhp;
+	struct c4iw_dev *rhp;
 	union t4_wr *wqe = NULL;
 	u32 num_wrs;
 	struct t4_swsqe *swsqe;
@@ -977,6 +989,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 	u16 idx = 0;
 
 	qhp = to_c4iw_qp(ibqp);
+	rhp = qhp->rhp;
 	spin_lock_irqsave(&qhp->lock, flag);
 
 	/*
@@ -1021,6 +1034,13 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 				swsqe->opcode = FW_RI_SEND_WITH_INV;
 			err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
 			break;
+		case IB_WR_RDMA_WRITE_WITH_IMM:
+			if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) {
+				err = -EINVAL;
+				break;
+			}
+			fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
+			/*FALLTHROUGH*/
 		case IB_WR_RDMA_WRITE:
 			fw_opcode = FW_RI_RDMA_WRITE_WR;
 			swsqe->opcode = FW_RI_RDMA_WRITE;
@@ -1031,8 +1051,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			fw_opcode = FW_RI_RDMA_READ_WR;
 			swsqe->opcode = FW_RI_READ_REQ;
 			if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
-				c4iw_invalidate_mr(qhp->rhp,
-						   wr->sg_list[0].lkey);
+				c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey);
 				fw_flags = FW_RI_RDMA_READ_INVALIDATE;
 			} else {
 				fw_flags = 0;
@@ -1048,7 +1067,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
 
 			swsqe->opcode = FW_RI_FAST_REGISTER;
-			if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
+			if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
 			    !mhp->attr.state && mhp->mpl_len <= 2) {
 				fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
 				build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
@@ -1057,7 +1076,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 				fw_opcode = FW_RI_FR_NSMR_WR;
 				err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
 				       mhp, &len16,
-				       qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl);
+				       rhp->rdev.lldi.ulptx_memwrite_dsgl);
 				if (err)
 					break;
 			}
@@ -1070,7 +1089,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			fw_opcode = FW_RI_INV_LSTAG_WR;
 			swsqe->opcode = FW_RI_LOCAL_INV;
 			err = build_inv_stag(wqe, wr, &len16);
-			c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
+			c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey);
 			break;
 		default:
 			pr_warn("%s post of type=%d TBD!\n", __func__,
@@ -1089,7 +1108,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		swsqe->wr_id = wr->wr_id;
 		if (c4iw_wr_log) {
 			swsqe->sge_ts = cxgb4_read_sge_timestamp(
-					qhp->rhp->rdev.lldi.ports[0]);
+					rhp->rdev.lldi.ports[0]);
 			swsqe->host_time = ktime_get();
 		}
 
@@ -1103,7 +1122,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 		t4_sq_produce(&qhp->wq, len16);
 		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
 	}
-	if (!qhp->rhp->rdev.status_page->db_off) {
+	if (!rhp->rdev.status_page->db_off) {
 		t4_ring_sq_db(&qhp->wq, idx, wqe);
 		spin_unlock_irqrestore(&qhp->lock, flag);
 	} else {
@@ -2098,6 +2117,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 			}
 			uresp.flags = C4IW_QPF_ONCHIP;
 		}
+		if (rhp->rdev.lldi.write_w_imm_support)
+			uresp.flags |= C4IW_QPF_WRITE_W_IMM;
 		uresp.qid_mask = rhp->rdev.qpmask;
 		uresp.sqid = qhp->wq.sq.qid;
 		uresp.sq_size = qhp->wq.sq.size;
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 11d55fc2ded7..0fb3e55f37c1 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -190,7 +190,19 @@ struct t4_cqe {
 			__be32 abs_rqe_idx;
 		} srcqe;
 		struct {
-			__be64 imm_data;
+			__be32 mo;
+			__be32 msn;
+			/*
+			 * Use union for immediate data to be consistent with
+			 * stack's 32 bit data and iWARP spec's 64 bit data.
+			 */
+			union {
+				struct {
+					__be32 imm_data32;
+					u32 reserved;
+				} ib_imm_data;
+				__be64 imm_data64;
+			} iw_imm_data;
 		} imm_data_rcqe;
 
 		u64 drain_cookie;
@@ -253,6 +265,8 @@ struct t4_cqe {
 #define CQE_WRID_STAG(x)  (be32_to_cpu((x)->u.rcqe.stag))
 #define CQE_WRID_MSN(x)   (be32_to_cpu((x)->u.rcqe.msn))
 #define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx))
+#define CQE_IMM_DATA(x)( \
+	(x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32)
 
 /* used for SQ completion processing */
 #define CQE_WRID_SQ_IDX(x)	((x)->u.scqe.cidx)
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 0f4f86b004d6..62606e66ba20 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -50,7 +50,8 @@ enum fw_ri_wr_opcode {
 	FW_RI_BYPASS			= 0xd,
 	FW_RI_RECEIVE			= 0xe,
 
-	FW_RI_SGE_EC_CR_RETURN		= 0xf
+	FW_RI_SGE_EC_CR_RETURN		= 0xf,
+	FW_RI_WRITE_IMMEDIATE           = FW_RI_RDMA_INIT
 };
 
 enum fw_ri_wr_flags {
@@ -59,7 +60,8 @@ enum fw_ri_wr_flags {
 	FW_RI_SOLICITED_EVENT_FLAG	= 0x04,
 	FW_RI_READ_FENCE_FLAG		= 0x08,
 	FW_RI_LOCAL_FENCE_FLAG		= 0x10,
-	FW_RI_RDMA_READ_INVALIDATE	= 0x20
+	FW_RI_RDMA_READ_INVALIDATE	= 0x20,
+	FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40
 };
 
 enum fw_ri_mpa_attrs {
@@ -546,7 +548,17 @@ struct fw_ri_rdma_write_wr {
 	__u16  wrid;
 	__u8   r1[3];
 	__u8   len16;
-	__be64 r2;
+	/*
+	 * Use union for immediate data to be consistent with stack's 32 bit
+	 * data and iWARP spec's 64 bit data.
+	 */
+	union {
+		struct {
+			__be32 imm_data32;
+			u32 reserved;
+		} ib_imm_data;
+		__be64 imm_data64;
+	} iw_imm_data;
 	__be32 plen;
 	__be32 stag_sink;
 	__be64 to_sink;
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index d0b2d829471a..f85ec1a3f727 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -65,7 +65,8 @@ struct c4iw_create_cq_resp {
 };
 
 enum {
-	C4IW_QPF_ONCHIP = (1 << 0)
+	C4IW_QPF_ONCHIP	= (1 << 0),
+	C4IW_QPF_WRITE_W_IMM = (1 << 1)
 };
 
 struct c4iw_create_qp_resp {
-- 
cgit v1.2.3


From 9f49a5b5c21d58aa84e16cfdc5e99e49faefcb7a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Sun, 29 Jul 2018 11:34:56 +0300
Subject: RDMA/netdev: Use priv_destructor for netdev cleanup

Now that the unregister_netdev flow for IPoIB no longer relies on external
code we can now introduce the use of priv_destructor and
needs_free_netdev.

The rdma_netdev flow is switched to use the netdev common priv_destructor
instead of the special free_rdma_netdev and the IPOIB ULP adjusted:
 - priv_destructor needs to switch to point to the ULP's destructor
   which will then call the rdma_ndev's in the right order
 - We need to be careful around the error unwind of register_netdev
   as it sometimes calls priv_destructor on failure
 - ULPs need to use ndo_init/uninit to ensure proper ordering
   of failures around register_netdev

Switching to priv_destructor is a necessary pre-requisite to using
the rtnl new_link mechanism.

The VNIC user for rdma_netdev should also be revised, but that is left for
another patch.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                  |  10 --
 drivers/infiniband/ulp/ipoib/ipoib.h               |   2 +
 drivers/infiniband/ulp/ipoib/ipoib_main.c          | 101 +++++++++++++--------
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c          |  68 ++++++++------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |  37 ++++----
 include/linux/mlx5/driver.h                        |   3 -
 include/rdma/ib_verbs.h                            |   6 +-
 7 files changed, 129 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 06d6309b719a..13744b4631b4 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5157,11 +5157,6 @@ done:
 	return num_counters;
 }
 
-static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
-{
-	return mlx5_rdma_netdev_free(netdev);
-}
-
 static struct net_device*
 mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
 			  u8 port_num,
@@ -5171,17 +5166,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
 			  void (*setup)(struct net_device *))
 {
 	struct net_device *netdev;
-	struct rdma_netdev *rn;
 
 	if (type != RDMA_NETDEV_IPOIB)
 		return ERR_PTR(-EOPNOTSUPP);
 
 	netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
 					name, setup);
-	if (likely(!IS_ERR_OR_NULL(netdev))) {
-		rn = netdev_priv(netdev);
-		rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
-	}
 	return netdev;
 }
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 02ad1a60dc80..d2cb0a8500e3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -323,6 +323,7 @@ struct ipoib_dev_priv {
 	spinlock_t lock;
 
 	struct net_device *dev;
+	void (*next_priv_destructor)(struct net_device *dev);
 
 	struct napi_struct send_napi;
 	struct napi_struct recv_napi;
@@ -481,6 +482,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
 	kref_put(&ah->ref, ipoib_free_ah);
 }
 int ipoib_open(struct net_device *dev);
+void ipoib_intf_free(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
 int ipoib_add_umcast_attr(struct net_device *dev);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 67ab52eec3e9..73d917d57f93 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2062,6 +2062,13 @@ void ipoib_setup_common(struct net_device *dev)
 	netif_keep_dst(dev);
 
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+
+	/*
+	 * unregister_netdev always frees the netdev, we use this mode
+	 * consistently to unify all the various unregister paths, including
+	 * those connected to rtnl_link_ops which require it.
+	 */
+	dev->needs_free_netdev = true;
 }
 
 static void ipoib_build_priv(struct net_device *dev)
@@ -2116,9 +2123,7 @@ static struct net_device
 	rn->send = ipoib_send;
 	rn->attach_mcast = ipoib_mcast_attach;
 	rn->detach_mcast = ipoib_mcast_detach;
-	rn->free_rdma_netdev = free_netdev;
 	rn->hca = hca;
-
 	dev->netdev_ops = &ipoib_netdev_default_pf;
 
 	return dev;
@@ -2173,6 +2178,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
 
 	rn = netdev_priv(dev);
 	rn->clnt_priv = priv;
+
+	/*
+	 * Only the child register_netdev flows can handle priv_destructor
+	 * being set, so we force it to NULL here and handle manually until it
+	 * is safe to turn on.
+	 */
+	priv->next_priv_destructor = dev->priv_destructor;
+	dev->priv_destructor = NULL;
+
 	ipoib_build_priv(dev);
 
 	return priv;
@@ -2181,6 +2195,27 @@ free_priv:
 	return NULL;
 }
 
+void ipoib_intf_free(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
+
+	dev->priv_destructor = priv->next_priv_destructor;
+	if (dev->priv_destructor)
+		dev->priv_destructor(dev);
+
+	/*
+	 * There are some error flows around register_netdev failing that may
+	 * attempt to call priv_destructor twice, prevent that from happening.
+	 */
+	dev->priv_destructor = NULL;
+
+	/* unregister/destroy is very complicated. Make bugs more obvious. */
+	rn->clnt_priv = NULL;
+
+	kfree(priv);
+}
+
 static ssize_t show_pkey(struct device *dev,
 			 struct device_attribute *attr, char *buf)
 {
@@ -2341,7 +2376,7 @@ static struct net_device *ipoib_add_port(const char *format,
 					 struct ib_device *hca, u8 port)
 {
 	struct ipoib_dev_priv *priv;
-	struct rdma_netdev *rn;
+	struct net_device *ndev;
 	int result;
 
 	priv = ipoib_intf_alloc(hca, port, format);
@@ -2349,6 +2384,7 @@ static struct net_device *ipoib_add_port(const char *format,
 		pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
 		return ERR_PTR(-ENOMEM);
 	}
+	ndev = priv->dev;
 
 	INIT_IB_EVENT_HANDLER(&priv->event_handler,
 			      priv->ca, ipoib_event);
@@ -2357,38 +2393,43 @@ static struct net_device *ipoib_add_port(const char *format,
 	/* call event handler to ensure pkey in sync */
 	queue_work(ipoib_workqueue, &priv->flush_heavy);
 
-	result = register_netdev(priv->dev);
+	result = register_netdev(ndev);
 	if (result) {
 		pr_warn("%s: couldn't register ipoib port %d; error %d\n",
 			hca->name, port, result);
-		ipoib_parent_unregister_pre(priv->dev);
-		goto device_init_failed;
+
+		ipoib_parent_unregister_pre(ndev);
+		ipoib_intf_free(ndev);
+		free_netdev(ndev);
+
+		return ERR_PTR(result);
 	}
 
-	result = -ENOMEM;
-	if (ipoib_cm_add_mode_attr(priv->dev))
+	/*
+	 * We cannot set priv_destructor before register_netdev because we
+	 * need priv to be always valid during the error flow to execute
+	 * ipoib_parent_unregister_pre(). Instead handle it manually and only
+	 * enter priv_destructor mode once we are completely registered.
+	 */
+	ndev->priv_destructor = ipoib_intf_free;
+
+	if (ipoib_cm_add_mode_attr(ndev))
 		goto sysfs_failed;
-	if (ipoib_add_pkey_attr(priv->dev))
+	if (ipoib_add_pkey_attr(ndev))
 		goto sysfs_failed;
-	if (ipoib_add_umcast_attr(priv->dev))
+	if (ipoib_add_umcast_attr(ndev))
 		goto sysfs_failed;
-	if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
+	if (device_create_file(&ndev->dev, &dev_attr_create_child))
 		goto sysfs_failed;
-	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
+	if (device_create_file(&ndev->dev, &dev_attr_delete_child))
 		goto sysfs_failed;
 
-	return priv->dev;
+	return ndev;
 
 sysfs_failed:
-	ipoib_parent_unregister_pre(priv->dev);
-	unregister_netdev(priv->dev);
-
-device_init_failed:
-	rn = netdev_priv(priv->dev);
-	rn->free_rdma_netdev(priv->dev);
-	kfree(priv);
-
-	return ERR_PTR(result);
+	ipoib_parent_unregister_pre(ndev);
+	unregister_netdev(ndev);
+	return ERR_PTR(-ENOMEM);
 }
 
 static void ipoib_add_one(struct ib_device *device)
@@ -2426,33 +2467,19 @@ static void ipoib_add_one(struct ib_device *device)
 
 static void ipoib_remove_one(struct ib_device *device, void *client_data)
 {
-	struct ipoib_dev_priv *priv, *tmp, *cpriv, *tcpriv;
+	struct ipoib_dev_priv *priv, *tmp;
 	struct list_head *dev_list = client_data;
 
 	if (!dev_list)
 		return;
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
-		struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
-
 		ipoib_parent_unregister_pre(priv->dev);
 
 		/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
 		mutex_lock(&priv->sysfs_mutex);
 		unregister_netdev(priv->dev);
 		mutex_unlock(&priv->sysfs_mutex);
-
-		parent_rn->free_rdma_netdev(priv->dev);
-
-		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
-			struct rdma_netdev *child_rn;
-
-			child_rn = netdev_priv(cpriv->dev);
-			child_rn->free_rdma_netdev(cpriv->dev);
-			kfree(cpriv);
-		}
-
-		kfree(priv);
 	}
 
 	kfree(dev_list);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 3103729a73fd..7776334cf8c5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -50,31 +50,53 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
 }
 static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
 
+/*
+ * NOTE: If this function fails then the priv->dev will remain valid, however
+ * priv can have been freed and must not be touched by caller in the error
+ * case.
+ *
+ * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
+ * free the net_device (just as rtnl_newlink does) otherwise the net_device
+ * will be freed when the rtnl is unlocked.
+ */
 int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 		     u16 pkey, int type)
 {
+	struct net_device *ndev = priv->dev;
 	int result;
 
+	ASSERT_RTNL();
+
 	priv->parent = ppriv->dev;
 	priv->pkey = pkey;
 	priv->child_type = type;
 
-	result = register_netdevice(priv->dev);
+	/* We do not need to touch priv if register_netdevice fails */
+	ndev->priv_destructor = ipoib_intf_free;
+
+	result = register_netdevice(ndev);
 	if (result) {
 		ipoib_warn(priv, "failed to initialize; error %i", result);
+
+		/*
+		 * register_netdevice sometimes calls priv_destructor,
+		 * sometimes not. Make sure it was done.
+		 */
+		if (ndev->priv_destructor)
+			ndev->priv_destructor(ndev);
 		return result;
 	}
 
 	/* RTNL childs don't need proprietary sysfs entries */
 	if (type == IPOIB_LEGACY_CHILD) {
-		if (ipoib_cm_add_mode_attr(priv->dev))
+		if (ipoib_cm_add_mode_attr(ndev))
 			goto sysfs_failed;
-		if (ipoib_add_pkey_attr(priv->dev))
+		if (ipoib_add_pkey_attr(ndev))
 			goto sysfs_failed;
-		if (ipoib_add_umcast_attr(priv->dev))
+		if (ipoib_add_umcast_attr(ndev))
 			goto sysfs_failed;
 
-		if (device_create_file(&priv->dev->dev, &dev_attr_parent))
+		if (device_create_file(&ndev->dev, &dev_attr_parent))
 			goto sysfs_failed;
 	}
 
@@ -91,6 +113,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 {
 	struct ipoib_dev_priv *ppriv, *priv;
 	char intf_name[IFNAMSIZ];
+	struct net_device *ndev;
 	struct ipoib_dev_priv *tpriv;
 	int result;
 
@@ -122,12 +145,6 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 		return restart_syscall();
 	}
 
-	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
-	if (!priv) {
-		result = -ENOMEM;
-		goto out;
-	}
-
 	/*
 	 * First ensure this isn't a duplicate. We check the parent device and
 	 * then all of the legacy child interfaces to make sure the Pkey
@@ -146,21 +163,23 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 		}
 	}
 
+	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
+	if (!priv) {
+		result = -ENOMEM;
+		goto out;
+	}
+	ndev = priv->dev;
+
 	result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
 
+	if (result && ndev->reg_state == NETREG_UNINITIALIZED)
+		free_netdev(ndev);
+
 out:
 	up_write(&ppriv->vlan_rwsem);
 	rtnl_unlock();
 	mutex_unlock(&ppriv->sysfs_mutex);
 
-	if (result && priv) {
-		struct rdma_netdev *rn;
-
-		rn = netdev_priv(priv->dev);
-		rn->free_rdma_netdev(priv->dev);
-		kfree(priv);
-	}
-
 	return result;
 }
 
@@ -212,14 +231,5 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 	rtnl_unlock();
 	mutex_unlock(&ppriv->sysfs_mutex);
 
-	if (dev) {
-		struct rdma_netdev *rn;
-
-		rn = netdev_priv(dev);
-		rn->free_rdma_netdev(priv->dev);
-		kfree(priv);
-		return 0;
-	}
-
-	return -ENODEV;
+	return (dev) ? 0 : -ENODEV;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index af3bb2f7a504..b8d150d2fd72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -580,6 +580,22 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
+static void mlx5_rdma_netdev_free(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = priv->ppriv;
+	const struct mlx5e_profile *profile = priv->profile;
+
+	mlx5e_detach_netdev(priv);
+	profile->cleanup(priv);
+	destroy_workqueue(priv->wq);
+
+	if (!ipriv->sub_interface) {
+		mlx5i_pkey_qpn_ht_cleanup(netdev);
+		mlx5e_destroy_mdev_resources(priv->mdev);
+	}
+}
+
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  struct ib_device *ibdev,
 					  const char *name,
@@ -653,6 +669,9 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 	rn->detach_mcast = mlx5i_detach_mcast;
 	rn->set_id = mlx5i_set_pkey_index;
 
+	netdev->priv_destructor = mlx5_rdma_netdev_free;
+	netdev->needs_free_netdev = 1;
+
 	return netdev;
 
 destroy_ht:
@@ -665,21 +684,3 @@ err_free_netdev:
 	return NULL;
 }
 EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
-
-void mlx5_rdma_netdev_free(struct net_device *netdev)
-{
-	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
-	struct mlx5i_priv *ipriv = priv->ppriv;
-	const struct mlx5e_profile *profile = priv->profile;
-
-	mlx5e_detach_netdev(priv);
-	profile->cleanup(priv);
-	destroy_workqueue(priv->wq);
-
-	if (!ipriv->sub_interface) {
-		mlx5i_pkey_qpn_ht_cleanup(netdev);
-		mlx5e_destroy_mdev_resources(priv->mdev);
-	}
-	free_netdev(netdev);
-}
-EXPORT_SYMBOL(mlx5_rdma_netdev_free);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 957199c20a0f..96498ff6beb6 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1218,14 +1218,11 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
-
-static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {}
 #else
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  struct ib_device *ibdev,
 					  const char *name,
 					  void (*setup)(struct net_device *));
-void mlx5_rdma_netdev_free(struct net_device *netdev);
 #endif /* CONFIG_MLX5_CORE_IPOIB */
 
 struct mlx5_profile {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index dea770e5b9ae..4ffe3e11e8fb 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2203,7 +2203,11 @@ struct rdma_netdev {
 	struct ib_device  *hca;
 	u8                 port_num;
 
-	/* cleanup function must be specified */
+	/*
+	 * cleanup function must be specified.
+	 * FIXME: This is only used for OPA_VNIC and that usage should be
+	 * removed too.
+	 */
 	void (*free_rdma_netdev)(struct net_device *netdev);
 
 	/* control functions */
-- 
cgit v1.2.3


From 401c0d7712eb3189023efc9c0708a2ac984ed62e Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 3 Aug 2018 11:50:39 +0200
Subject: spi: spi-mem: Constify spi_mem->name

There is no reason to make spi_mem->name modifiable. Moreover,
spi_mem_ops->get_name() returns a const char *, which generates a gcc
warning when assigning the value returned by spi_mem_ops->get_name()
to spi_mem->name.

Fixes: 5d27a9c8ea9e ("spi: spi-mem: Extend the SPI mem interface to set a custom memory name")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 0d64ccc4e584..62722fb7472d 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -134,7 +134,7 @@ struct spi_mem_op {
 struct spi_mem {
 	struct spi_device *spi;
 	void *drvpriv;
-	char *name;
+	const char *name;
 };
 
 /**
-- 
cgit v1.2.3


From 43490e8046b5d273eb82710b04290c5997138adc Mon Sep 17 00:00:00 2001
From: Stephan Müller <smueller@chronox.de>
Date: Fri, 20 Jul 2018 19:42:01 +0200
Subject: crypto: drbg - in-place cipher operation for CTR

The cipher implementations of the kernel crypto API favor in-place
cipher operations. Thus, switch the CTR cipher operation in the DRBG to
perform in-place operations. This is implemented by using the output
buffer as input buffer and zeroizing it before the cipher operation to
implement a CTR encryption of a NULL buffer.

The speed improvement is quite visibile with the following comparison
using the LRNG implementation.

Without the patch set:

      16 bytes|           12.267661 MB/s|    61338304 bytes |  5000000213 ns
      32 bytes|           23.603770 MB/s|   118018848 bytes |  5000000073 ns
      64 bytes|           46.732262 MB/s|   233661312 bytes |  5000000241 ns
     128 bytes|           90.038042 MB/s|   450190208 bytes |  5000000244 ns
     256 bytes|          160.399616 MB/s|   801998080 bytes |  5000000393 ns
     512 bytes|          259.878400 MB/s|  1299392000 bytes |  5000001675 ns
    1024 bytes|          386.050662 MB/s|  1930253312 bytes |  5000001661 ns
    2048 bytes|          493.641728 MB/s|  2468208640 bytes |  5000001598 ns
    4096 bytes|          581.835981 MB/s|  2909179904 bytes |  5000003426 ns

With the patch set:

      16 bytes |         17.051142 MB/s |     85255712 bytes |  5000000854 ns
      32 bytes |         32.695898 MB/s |    163479488 bytes |  5000000544 ns
      64 bytes |         64.490739 MB/s |    322453696 bytes |  5000000954 ns
     128 bytes |        123.285043 MB/s |    616425216 bytes |  5000000201 ns
     256 bytes |        233.434573 MB/s |   1167172864 bytes |  5000000573 ns
     512 bytes |        384.405197 MB/s |   1922025984 bytes |  5000000671 ns
    1024 bytes |        566.313370 MB/s |   2831566848 bytes |  5000001080 ns
    2048 bytes |        744.518042 MB/s |   3722590208 bytes |  5000000926 ns
    4096 bytes |        867.501670 MB/s |   4337508352 bytes |  5000002181 ns

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/drbg.c         | 34 ++++++++++++++--------------------
 include/crypto/drbg.h |  2 --
 2 files changed, 14 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/crypto/drbg.c b/crypto/drbg.c
index ee302fd229ad..bc52d9562611 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -261,8 +261,7 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg);
 static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 			      u8 *inbuf, u32 inbuflen,
 			      u8 *outbuf, u32 outlen);
-#define DRBG_CTR_NULL_LEN 128
-#define DRBG_OUTSCRATCHLEN DRBG_CTR_NULL_LEN
+#define DRBG_OUTSCRATCHLEN 256
 
 /* BCC function for CTR DRBG as defined in 10.4.3 */
 static int drbg_ctr_bcc(struct drbg_state *drbg,
@@ -555,8 +554,7 @@ static int drbg_ctr_generate(struct drbg_state *drbg,
 	}
 
 	/* 10.2.1.5.2 step 4.1 */
-	ret = drbg_kcapi_sym_ctr(drbg, drbg->ctr_null_value, DRBG_CTR_NULL_LEN,
-				 buf, len);
+	ret = drbg_kcapi_sym_ctr(drbg, NULL, 0, buf, len);
 	if (ret)
 		return ret;
 
@@ -1644,9 +1642,6 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg)
 		skcipher_request_free(drbg->ctr_req);
 	drbg->ctr_req = NULL;
 
-	kfree(drbg->ctr_null_value_buf);
-	drbg->ctr_null_value = NULL;
-
 	kfree(drbg->outscratchpadbuf);
 	drbg->outscratchpadbuf = NULL;
 
@@ -1697,15 +1692,6 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg)
 					crypto_req_done, &drbg->ctr_wait);
 
 	alignmask = crypto_skcipher_alignmask(sk_tfm);
-	drbg->ctr_null_value_buf = kzalloc(DRBG_CTR_NULL_LEN + alignmask,
-					   GFP_KERNEL);
-	if (!drbg->ctr_null_value_buf) {
-		drbg_fini_sym_kernel(drbg);
-		return -ENOMEM;
-	}
-	drbg->ctr_null_value = (u8 *)PTR_ALIGN(drbg->ctr_null_value_buf,
-					       alignmask + 1);
-
 	drbg->outscratchpadbuf = kmalloc(DRBG_OUTSCRATCHLEN + alignmask,
 					 GFP_KERNEL);
 	if (!drbg->outscratchpadbuf) {
@@ -1716,7 +1702,7 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg)
 					      alignmask + 1);
 
 	sg_init_table(&drbg->sg_in, 1);
-	sg_init_table(&drbg->sg_out, 1);
+	sg_init_one(&drbg->sg_out, drbg->outscratchpad, DRBG_OUTSCRATCHLEN);
 
 	return alignmask;
 }
@@ -1747,10 +1733,18 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 			      u8 *outbuf, u32 outlen)
 {
 	struct scatterlist *sg_in = &drbg->sg_in, *sg_out = &drbg->sg_out;
+	u32 scratchpad_use = min_t(u32, outlen, DRBG_OUTSCRATCHLEN);
 	int ret;
 
-	sg_set_buf(sg_in, inbuf, inlen);
-	sg_set_buf(sg_out, drbg->outscratchpad, DRBG_OUTSCRATCHLEN);
+	if (inbuf) {
+		/* Use caller-provided input buffer */
+		sg_set_buf(sg_in, inbuf, inlen);
+	} else {
+		/* Use scratchpad for in-place operation */
+		inlen = scratchpad_use;
+		memset(drbg->outscratchpad, 0, scratchpad_use);
+		sg_set_buf(sg_in, drbg->outscratchpad, scratchpad_use);
+	}
 
 	while (outlen) {
 		u32 cryptlen = min3(inlen, outlen, (u32)DRBG_OUTSCRATCHLEN);
@@ -1766,6 +1760,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 		crypto_init_wait(&drbg->ctr_wait);
 
 		memcpy(outbuf, drbg->outscratchpad, cryptlen);
+		memzero_explicit(drbg->outscratchpad, cryptlen);
 
 		outlen -= cryptlen;
 		outbuf += cryptlen;
@@ -1773,7 +1768,6 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 	ret = 0;
 
 out:
-	memzero_explicit(drbg->outscratchpad, DRBG_OUTSCRATCHLEN);
 	return ret;
 }
 #endif /* CONFIG_CRYPTO_DRBG_CTR */
diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 54b9f5d375f5..3fb581bf3b87 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -122,8 +122,6 @@ struct drbg_state {
 
 	struct crypto_skcipher *ctr_handle;	/* CTR mode cipher handle */
 	struct skcipher_request *ctr_req;	/* CTR mode request handle */
-	__u8 *ctr_null_value_buf;		/* CTR mode unaligned buffer */
-	__u8 *ctr_null_value;			/* CTR mode aligned zero buf */
 	__u8 *outscratchpadbuf;			/* CTR mode output scratchpad */
         __u8 *outscratchpad;			/* CTR mode aligned outbuf */
 	struct crypto_wait ctr_wait;		/* CTR mode async wait obj */
-- 
cgit v1.2.3


From 8c30fbe63e4eff1b221f8db70ae5c48a9331eae0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 23 Jul 2018 10:01:33 -0700
Subject: crypto: scatterwalk - remove 'chain' argument from
 scatterwalk_crypto_chain()

All callers pass chain=0 to scatterwalk_crypto_chain().

Remove this unneeded parameter.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/lrw.c                  | 4 ++--
 crypto/scatterwalk.c          | 2 +-
 crypto/xts.c                  | 4 ++--
 include/crypto/scatterwalk.h  | 8 +-------
 net/tls/tls_device_fallback.c | 2 +-
 5 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/crypto/lrw.c b/crypto/lrw.c
index 954a7064a179..393a782679c7 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -188,7 +188,7 @@ static int post_crypt(struct skcipher_request *req)
 	if (rctx->dst != sg) {
 		rctx->dst[0] = *sg;
 		sg_unmark_end(rctx->dst);
-		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2);
+		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
 	}
 	rctx->dst[0].length -= offset - sg->offset;
 	rctx->dst[0].offset = offset;
@@ -265,7 +265,7 @@ static int pre_crypt(struct skcipher_request *req)
 	if (rctx->src != sg) {
 		rctx->src[0] = *sg;
 		sg_unmark_end(rctx->src);
-		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2);
+		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
 	}
 	rctx->src[0].length -= offset - sg->offset;
 	rctx->src[0].offset = offset;
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index c16c94f88733..d0b92c1cd6e9 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -91,7 +91,7 @@ struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
 
 	sg_init_table(dst, 2);
 	sg_set_page(dst, sg_page(src), src->length - len, src->offset + len);
-	scatterwalk_crypto_chain(dst, sg_next(src), 0, 2);
+	scatterwalk_crypto_chain(dst, sg_next(src), 2);
 
 	return dst;
 }
diff --git a/crypto/xts.c b/crypto/xts.c
index 12284183bd20..ccf55fbb8bc2 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -138,7 +138,7 @@ static int post_crypt(struct skcipher_request *req)
 	if (rctx->dst != sg) {
 		rctx->dst[0] = *sg;
 		sg_unmark_end(rctx->dst);
-		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2);
+		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
 	}
 	rctx->dst[0].length -= offset - sg->offset;
 	rctx->dst[0].offset = offset;
@@ -204,7 +204,7 @@ static int pre_crypt(struct skcipher_request *req)
 	if (rctx->src != sg) {
 		rctx->src[0] = *sg;
 		sg_unmark_end(rctx->src);
-		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2);
+		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
 	}
 	rctx->src[0].length -= offset - sg->offset;
 	rctx->src[0].offset = offset;
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 880e6be9e95e..eac72840a7d2 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -22,14 +22,8 @@
 #include <linux/scatterlist.h>
 
 static inline void scatterwalk_crypto_chain(struct scatterlist *head,
-					    struct scatterlist *sg,
-					    int chain, int num)
+					    struct scatterlist *sg, int num)
 {
-	if (chain) {
-		head->length += sg->length;
-		sg = sg_next(sg);
-	}
-
 	if (sg)
 		sg_chain(head, num, sg);
 	else
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 748914abdb60..4e1ec12bc0fb 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -42,7 +42,7 @@ static void chain_to_walk(struct scatterlist *sg, struct scatter_walk *walk)
 	sg_set_page(sg, sg_page(src),
 		    src->length - diff, walk->offset);
 
-	scatterwalk_crypto_chain(sg, sg_next(src), 0, 2);
+	scatterwalk_crypto_chain(sg, sg_next(src), 2);
 }
 
 static int tls_enc_record(struct aead_request *aead_req,
-- 
cgit v1.2.3


From 3dd8cc00c34ab9cde4bcc808ebc607e7b9861280 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 23 Jul 2018 10:04:28 -0700
Subject: crypto: scatterwalk - remove scatterwalk_samebuf()

scatterwalk_samebuf() is never used.  Remove it.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/scatterwalk.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index eac72840a7d2..a66c127a20ed 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -30,13 +30,6 @@ static inline void scatterwalk_crypto_chain(struct scatterlist *head,
 		sg_mark_end(head);
 }
 
-static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in,
-						struct scatter_walk *walk_out)
-{
-	return !(((sg_page(walk_in->sg) - sg_page(walk_out->sg)) << PAGE_SHIFT) +
-		 (int)(walk_in->offset - walk_out->offset));
-}
-
 static inline unsigned int scatterwalk_pagelen(struct scatter_walk *walk)
 {
 	unsigned int len = walk->sg->offset + walk->sg->length - walk->offset;
-- 
cgit v1.2.3


From 2ad5157650b446f14a719280ba3670303bc4f439 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 11 Jul 2018 18:42:11 -0500
Subject: mailbox/omap: switch to SPDX license identifier

Use the appropriate SPDX license identifier in the OMAP Mailbox
driver source files and drop the previous boilerplate license text.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/omap-mailbox.c | 10 +---------
 include/linux/omap-mailbox.h   |  5 +----
 2 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c
index e1e2c085e68e..97c7d9b7f46f 100644
--- a/drivers/mailbox/omap-mailbox.c
+++ b/drivers/mailbox/omap-mailbox.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * OMAP mailbox driver
  *
@@ -6,15 +7,6 @@
  *
  * Contact: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
  *          Suman Anna <s-anna@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 
 #include <linux/interrupt.h>
diff --git a/include/linux/omap-mailbox.h b/include/linux/omap-mailbox.h
index c726bd833761..6dbcd2da0332 100644
--- a/include/linux/omap-mailbox.h
+++ b/include/linux/omap-mailbox.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * omap-mailbox: interprocessor communication module for OMAP
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef OMAP_MAILBOX_H
-- 
cgit v1.2.3


From 1c82407aa302774b24bf619e56973aa97cbf25bd Mon Sep 17 00:00:00 2001
From: Houlong Wei <houlong.wei@mediatek.com>
Date: Wed, 25 Jul 2018 09:26:39 +0800
Subject: dt-bindings: soc: Add documentation for the MediaTek GCE unit

This adds documentation for the MediaTek Global Command Engine (GCE) unit
found in MT8173 SoCs.

Signed-off-by: Houlong Wei <houlong.wei@mediatek.com>
Signed-off-by: HS Liao <hs.liao@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 .../devicetree/bindings/mailbox/mtk-gce.txt        | 57 ++++++++++++++++++++++
 include/dt-bindings/gce/mt8173-gce.h               | 44 +++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mailbox/mtk-gce.txt
 create mode 100644 include/dt-bindings/gce/mt8173-gce.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt
new file mode 100644
index 000000000000..7d72b21c9e94
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt
@@ -0,0 +1,57 @@
+MediaTek GCE
+===============
+
+The Global Command Engine (GCE) is used to help read/write registers with
+critical time limitation, such as updating display configuration during the
+vblank. The GCE can be used to implement the Command Queue (CMDQ) driver.
+
+CMDQ driver uses mailbox framework for communication. Please refer to
+mailbox.txt for generic information about mailbox device-tree bindings.
+
+Required properties:
+- compatible: Must be "mediatek,mt8173-gce"
+- reg: Address range of the GCE unit
+- interrupts: The interrupt signal from the GCE block
+- clock: Clocks according to the common clock binding
+- clock-names: Must be "gce" to stand for GCE clock
+- #mbox-cells: Should be 3.
+	<&phandle channel priority atomic_exec>
+	phandle: Label name of a gce node.
+	channel: Channel of mailbox. Be equal to the thread id of GCE.
+	priority: Priority of GCE thread.
+	atomic_exec: GCE processing continuous packets of commands in atomic
+		way.
+
+Required properties for a client device:
+- mboxes: Client use mailbox to communicate with GCE, it should have this
+  property and list of phandle, mailbox specifiers.
+- mediatek,gce-subsys: u32, specify the sub-system id which is corresponding
+  to the register address.
+
+Some vaules of properties are defined in 'dt-bindings/gce/mt8173-gce.h'. Such as
+sub-system ids, thread priority, event ids.
+
+Example:
+
+	gce: gce@10212000 {
+		compatible = "mediatek,mt8173-gce";
+		reg = <0 0x10212000 0 0x1000>;
+		interrupts = <GIC_SPI 135 IRQ_TYPE_LEVEL_LOW>;
+		clocks = <&infracfg CLK_INFRA_GCE>;
+		clock-names = "gce";
+		thread-num = CMDQ_THR_MAX_COUNT;
+		#mbox-cells = <3>;
+	};
+
+Example for a client device:
+
+	mmsys: clock-controller@14000000 {
+		compatible = "mediatek,mt8173-mmsys";
+		mboxes = <&gce 0 CMDQ_THR_PRIO_LOWEST 1>,
+			 <&gce 1 CMDQ_THR_PRIO_LOWEST 1>;
+		mediatek,gce-subsys = <SUBSYS_1400XXXX>;
+		mutex-event-eof = <CMDQ_EVENT_MUTEX0_STREAM_EOF
+				CMDQ_EVENT_MUTEX1_STREAM_EOF>;
+
+		...
+	};
diff --git a/include/dt-bindings/gce/mt8173-gce.h b/include/dt-bindings/gce/mt8173-gce.h
new file mode 100644
index 000000000000..ffcf94ba96c6
--- /dev/null
+++ b/include/dt-bindings/gce/mt8173-gce.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: Houlong Wei <houlong.wei@mediatek.com>
+ *
+ */
+
+#ifndef _DT_BINDINGS_GCE_MT8173_H
+#define _DT_BINDINGS_GCE_MT8173_H
+
+/* GCE HW thread priority */
+#define CMDQ_THR_PRIO_LOWEST	0
+#define CMDQ_THR_PRIO_HIGHEST	1
+
+/* GCE SUBSYS */
+#define SUBSYS_1400XXXX		1
+#define SUBSYS_1401XXXX		2
+#define SUBSYS_1402XXXX		3
+
+/* GCE HW EVENT */
+#define CMDQ_EVENT_DISP_OVL0_SOF		11
+#define CMDQ_EVENT_DISP_OVL1_SOF		12
+#define CMDQ_EVENT_DISP_RDMA0_SOF		13
+#define CMDQ_EVENT_DISP_RDMA1_SOF		14
+#define CMDQ_EVENT_DISP_RDMA2_SOF		15
+#define CMDQ_EVENT_DISP_WDMA0_SOF		16
+#define CMDQ_EVENT_DISP_WDMA1_SOF		17
+#define CMDQ_EVENT_DISP_OVL0_EOF		39
+#define CMDQ_EVENT_DISP_OVL1_EOF		40
+#define CMDQ_EVENT_DISP_RDMA0_EOF		41
+#define CMDQ_EVENT_DISP_RDMA1_EOF		42
+#define CMDQ_EVENT_DISP_RDMA2_EOF		43
+#define CMDQ_EVENT_DISP_WDMA0_EOF		44
+#define CMDQ_EVENT_DISP_WDMA1_EOF		45
+#define CMDQ_EVENT_MUTEX0_STREAM_EOF		53
+#define CMDQ_EVENT_MUTEX1_STREAM_EOF		54
+#define CMDQ_EVENT_MUTEX2_STREAM_EOF		55
+#define CMDQ_EVENT_MUTEX3_STREAM_EOF		56
+#define CMDQ_EVENT_MUTEX4_STREAM_EOF		57
+#define CMDQ_EVENT_DISP_RDMA0_UNDERRUN		63
+#define CMDQ_EVENT_DISP_RDMA1_UNDERRUN		64
+#define CMDQ_EVENT_DISP_RDMA2_UNDERRUN		65
+
+#endif
-- 
cgit v1.2.3


From 623a6143a845bd485b00ba684f0ccef11835edab Mon Sep 17 00:00:00 2001
From: Houlong Wei <houlong.wei@mediatek.com>
Date: Wed, 25 Jul 2018 09:26:40 +0800
Subject: mailbox: mediatek: Add Mediatek CMDQ driver

This patch is first version of Mediatek Command Queue(CMDQ) driver. The
CMDQ is used to help write registers with critical time limitation,
such as updating display configuration during the vblank. It controls
Global Command Engine (GCE) hardware to achieve this requirement.
Currently, CMDQ only supports display related hardwares, but we expect
it can be extended to other hardwares for future requirements.

Signed-off-by: Houlong Wei <houlong.wei@mediatek.com>
Signed-off-by: HS Liao <hs.liao@mediatek.com>
Signed-off-by: CK Hu <ck.hu@mediatek.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/Kconfig                  |  10 +
 drivers/mailbox/Makefile                 |   2 +
 drivers/mailbox/mtk-cmdq-mailbox.c       | 569 +++++++++++++++++++++++++++++++
 include/linux/mailbox/mtk-cmdq-mailbox.h |  77 +++++
 4 files changed, 658 insertions(+)
 create mode 100644 drivers/mailbox/mtk-cmdq-mailbox.c
 create mode 100644 include/linux/mailbox/mtk-cmdq-mailbox.h

(limited to 'include')

diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index e63d29a95e76..2bbabc907add 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -189,4 +189,14 @@ config STM32_IPCC
 	  Mailbox implementation for STMicroelectonics STM32 family chips
 	  with hardware for Inter-Processor Communication Controller (IPCC)
 	  between processors. Say Y here if you want to have this support.
+
+config MTK_CMDQ_MBOX
+	tristate "MediaTek CMDQ Mailbox Support"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select MTK_INFRACFG
+	help
+	  Say yes here to add support for the MediaTek Command Queue (CMDQ)
+	  mailbox driver. The CMDQ is used to help read/write registers with
+	  critical time limitation, such as updating display configuration
+	  during the vblank.
 endif
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 4d501bea7863..4b0080480661 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -40,3 +40,5 @@ obj-$(CONFIG_QCOM_APCS_IPC)	+= qcom-apcs-ipc-mailbox.o
 obj-$(CONFIG_TEGRA_HSP_MBOX)	+= tegra-hsp.o
 
 obj-$(CONFIG_STM32_IPCC) 	+= stm32-ipcc.o
+
+obj-$(CONFIG_MTK_CMDQ_MBOX)	+= mtk-cmdq-mailbox.o
diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
new file mode 100644
index 000000000000..6f92c5ee12f7
--- /dev/null
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2018 MediaTek Inc.
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/mailbox_controller.h>
+#include <linux/mailbox/mtk-cmdq-mailbox.h>
+#include <linux/of_device.h>
+
+#define CMDQ_OP_CODE_MASK		(0xff << CMDQ_OP_CODE_SHIFT)
+#define CMDQ_IRQ_MASK			0xffff
+#define CMDQ_NUM_CMD(t)			(t->cmd_buf_size / CMDQ_INST_SIZE)
+
+#define CMDQ_CURR_IRQ_STATUS		0x10
+#define CMDQ_THR_SLOT_CYCLES		0x30
+#define CMDQ_THR_BASE			0x100
+#define CMDQ_THR_SIZE			0x80
+#define CMDQ_THR_WARM_RESET		0x00
+#define CMDQ_THR_ENABLE_TASK		0x04
+#define CMDQ_THR_SUSPEND_TASK		0x08
+#define CMDQ_THR_CURR_STATUS		0x0c
+#define CMDQ_THR_IRQ_STATUS		0x10
+#define CMDQ_THR_IRQ_ENABLE		0x14
+#define CMDQ_THR_CURR_ADDR		0x20
+#define CMDQ_THR_END_ADDR		0x24
+#define CMDQ_THR_WAIT_TOKEN		0x30
+#define CMDQ_THR_PRIORITY		0x40
+
+#define CMDQ_THR_ACTIVE_SLOT_CYCLES	0x3200
+#define CMDQ_THR_ENABLED		0x1
+#define CMDQ_THR_DISABLED		0x0
+#define CMDQ_THR_SUSPEND		0x1
+#define CMDQ_THR_RESUME			0x0
+#define CMDQ_THR_STATUS_SUSPENDED	BIT(1)
+#define CMDQ_THR_DO_WARM_RESET		BIT(0)
+#define CMDQ_THR_IRQ_DONE		0x1
+#define CMDQ_THR_IRQ_ERROR		0x12
+#define CMDQ_THR_IRQ_EN			(CMDQ_THR_IRQ_ERROR | CMDQ_THR_IRQ_DONE)
+#define CMDQ_THR_IS_WAITING		BIT(31)
+
+#define CMDQ_JUMP_BY_OFFSET		0x10000000
+#define CMDQ_JUMP_BY_PA			0x10000001
+
+struct cmdq_thread {
+	struct mbox_chan	*chan;
+	void __iomem		*base;
+	struct list_head	task_busy_list;
+	u32			priority;
+	bool			atomic_exec;
+};
+
+struct cmdq_task {
+	struct cmdq		*cmdq;
+	struct list_head	list_entry;
+	dma_addr_t		pa_base;
+	struct cmdq_thread	*thread;
+	struct cmdq_pkt		*pkt; /* the packet sent from mailbox client */
+};
+
+struct cmdq {
+	struct mbox_controller	mbox;
+	void __iomem		*base;
+	u32			irq;
+	u32			thread_nr;
+	struct cmdq_thread	*thread;
+	struct clk		*clock;
+	bool			suspended;
+};
+
+static int cmdq_thread_suspend(struct cmdq *cmdq, struct cmdq_thread *thread)
+{
+	u32 status;
+
+	writel(CMDQ_THR_SUSPEND, thread->base + CMDQ_THR_SUSPEND_TASK);
+
+	/* If already disabled, treat as suspended successful. */
+	if (!(readl(thread->base + CMDQ_THR_ENABLE_TASK) & CMDQ_THR_ENABLED))
+		return 0;
+
+	if (readl_poll_timeout_atomic(thread->base + CMDQ_THR_CURR_STATUS,
+			status, status & CMDQ_THR_STATUS_SUSPENDED, 0, 10)) {
+		dev_err(cmdq->mbox.dev, "suspend GCE thread 0x%x failed\n",
+			(u32)(thread->base - cmdq->base));
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static void cmdq_thread_resume(struct cmdq_thread *thread)
+{
+	writel(CMDQ_THR_RESUME, thread->base + CMDQ_THR_SUSPEND_TASK);
+}
+
+static void cmdq_init(struct cmdq *cmdq)
+{
+	WARN_ON(clk_enable(cmdq->clock) < 0);
+	writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES);
+	clk_disable(cmdq->clock);
+}
+
+static int cmdq_thread_reset(struct cmdq *cmdq, struct cmdq_thread *thread)
+{
+	u32 warm_reset;
+
+	writel(CMDQ_THR_DO_WARM_RESET, thread->base + CMDQ_THR_WARM_RESET);
+	if (readl_poll_timeout_atomic(thread->base + CMDQ_THR_WARM_RESET,
+			warm_reset, !(warm_reset & CMDQ_THR_DO_WARM_RESET),
+			0, 10)) {
+		dev_err(cmdq->mbox.dev, "reset GCE thread 0x%x failed\n",
+			(u32)(thread->base - cmdq->base));
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static void cmdq_thread_disable(struct cmdq *cmdq, struct cmdq_thread *thread)
+{
+	cmdq_thread_reset(cmdq, thread);
+	writel(CMDQ_THR_DISABLED, thread->base + CMDQ_THR_ENABLE_TASK);
+}
+
+/* notify GCE to re-fetch commands by setting GCE thread PC */
+static void cmdq_thread_invalidate_fetched_data(struct cmdq_thread *thread)
+{
+	writel(readl(thread->base + CMDQ_THR_CURR_ADDR),
+	       thread->base + CMDQ_THR_CURR_ADDR);
+}
+
+static void cmdq_task_insert_into_thread(struct cmdq_task *task)
+{
+	struct device *dev = task->cmdq->mbox.dev;
+	struct cmdq_thread *thread = task->thread;
+	struct cmdq_task *prev_task = list_last_entry(
+			&thread->task_busy_list, typeof(*task), list_entry);
+	u64 *prev_task_base = prev_task->pkt->va_base;
+
+	/* let previous task jump to this task */
+	dma_sync_single_for_cpu(dev, prev_task->pa_base,
+				prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
+	prev_task_base[CMDQ_NUM_CMD(prev_task->pkt) - 1] =
+		(u64)CMDQ_JUMP_BY_PA << 32 | task->pa_base;
+	dma_sync_single_for_device(dev, prev_task->pa_base,
+				   prev_task->pkt->cmd_buf_size, DMA_TO_DEVICE);
+
+	cmdq_thread_invalidate_fetched_data(thread);
+}
+
+static bool cmdq_command_is_wfe(u64 cmd)
+{
+	u64 wfe_option = CMDQ_WFE_UPDATE | CMDQ_WFE_WAIT | CMDQ_WFE_WAIT_VALUE;
+	u64 wfe_op = (u64)(CMDQ_CODE_WFE << CMDQ_OP_CODE_SHIFT) << 32;
+	u64 wfe_mask = (u64)CMDQ_OP_CODE_MASK << 32 | 0xffffffff;
+
+	return ((cmd & wfe_mask) == (wfe_op | wfe_option));
+}
+
+/* we assume tasks in the same display GCE thread are waiting the same event. */
+static void cmdq_task_remove_wfe(struct cmdq_task *task)
+{
+	struct device *dev = task->cmdq->mbox.dev;
+	u64 *base = task->pkt->va_base;
+	int i;
+
+	dma_sync_single_for_cpu(dev, task->pa_base, task->pkt->cmd_buf_size,
+				DMA_TO_DEVICE);
+	for (i = 0; i < CMDQ_NUM_CMD(task->pkt); i++)
+		if (cmdq_command_is_wfe(base[i]))
+			base[i] = (u64)CMDQ_JUMP_BY_OFFSET << 32 |
+				  CMDQ_JUMP_PASS;
+	dma_sync_single_for_device(dev, task->pa_base, task->pkt->cmd_buf_size,
+				   DMA_TO_DEVICE);
+}
+
+static bool cmdq_thread_is_in_wfe(struct cmdq_thread *thread)
+{
+	return readl(thread->base + CMDQ_THR_WAIT_TOKEN) & CMDQ_THR_IS_WAITING;
+}
+
+static void cmdq_thread_wait_end(struct cmdq_thread *thread,
+				 unsigned long end_pa)
+{
+	struct device *dev = thread->chan->mbox->dev;
+	unsigned long curr_pa;
+
+	if (readl_poll_timeout_atomic(thread->base + CMDQ_THR_CURR_ADDR,
+			curr_pa, curr_pa == end_pa, 1, 20))
+		dev_err(dev, "GCE thread cannot run to end.\n");
+}
+
+static void cmdq_task_exec_done(struct cmdq_task *task, enum cmdq_cb_status sta)
+{
+	struct cmdq_task_cb *cb = &task->pkt->async_cb;
+	struct cmdq_cb_data data;
+
+	WARN_ON(cb->cb == (cmdq_async_flush_cb)NULL);
+	data.sta = sta;
+	data.data = cb->data;
+	cb->cb(data);
+
+	list_del(&task->list_entry);
+}
+
+static void cmdq_task_handle_error(struct cmdq_task *task)
+{
+	struct cmdq_thread *thread = task->thread;
+	struct cmdq_task *next_task;
+
+	dev_err(task->cmdq->mbox.dev, "task 0x%p error\n", task);
+	WARN_ON(cmdq_thread_suspend(task->cmdq, thread) < 0);
+	next_task = list_first_entry_or_null(&thread->task_busy_list,
+			struct cmdq_task, list_entry);
+	if (next_task)
+		writel(next_task->pa_base, thread->base + CMDQ_THR_CURR_ADDR);
+	cmdq_thread_resume(thread);
+}
+
+static void cmdq_thread_irq_handler(struct cmdq *cmdq,
+				    struct cmdq_thread *thread)
+{
+	struct cmdq_task *task, *tmp, *curr_task = NULL;
+	u32 curr_pa, irq_flag, task_end_pa;
+	bool err;
+
+	irq_flag = readl(thread->base + CMDQ_THR_IRQ_STATUS);
+	writel(~irq_flag, thread->base + CMDQ_THR_IRQ_STATUS);
+
+	/*
+	 * When ISR call this function, another CPU core could run
+	 * "release task" right before we acquire the spin lock, and thus
+	 * reset / disable this GCE thread, so we need to check the enable
+	 * bit of this GCE thread.
+	 */
+	if (!(readl(thread->base + CMDQ_THR_ENABLE_TASK) & CMDQ_THR_ENABLED))
+		return;
+
+	if (irq_flag & CMDQ_THR_IRQ_ERROR)
+		err = true;
+	else if (irq_flag & CMDQ_THR_IRQ_DONE)
+		err = false;
+	else
+		return;
+
+	curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR);
+
+	list_for_each_entry_safe(task, tmp, &thread->task_busy_list,
+				 list_entry) {
+		task_end_pa = task->pa_base + task->pkt->cmd_buf_size;
+		if (curr_pa >= task->pa_base && curr_pa < task_end_pa)
+			curr_task = task;
+
+		if (!curr_task || curr_pa == task_end_pa - CMDQ_INST_SIZE) {
+			cmdq_task_exec_done(task, CMDQ_CB_NORMAL);
+			kfree(task);
+		} else if (err) {
+			cmdq_task_exec_done(task, CMDQ_CB_ERROR);
+			cmdq_task_handle_error(curr_task);
+			kfree(task);
+		}
+
+		if (curr_task)
+			break;
+	}
+
+	if (list_empty(&thread->task_busy_list)) {
+		cmdq_thread_disable(cmdq, thread);
+		clk_disable(cmdq->clock);
+	}
+}
+
+static irqreturn_t cmdq_irq_handler(int irq, void *dev)
+{
+	struct cmdq *cmdq = dev;
+	unsigned long irq_status, flags = 0L;
+	int bit;
+
+	irq_status = readl(cmdq->base + CMDQ_CURR_IRQ_STATUS) & CMDQ_IRQ_MASK;
+	if (!(irq_status ^ CMDQ_IRQ_MASK))
+		return IRQ_NONE;
+
+	for_each_clear_bit(bit, &irq_status, fls(CMDQ_IRQ_MASK)) {
+		struct cmdq_thread *thread = &cmdq->thread[bit];
+
+		spin_lock_irqsave(&thread->chan->lock, flags);
+		cmdq_thread_irq_handler(cmdq, thread);
+		spin_unlock_irqrestore(&thread->chan->lock, flags);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int cmdq_suspend(struct device *dev)
+{
+	struct cmdq *cmdq = dev_get_drvdata(dev);
+	struct cmdq_thread *thread;
+	int i;
+	bool task_running = false;
+
+	cmdq->suspended = true;
+
+	for (i = 0; i < cmdq->thread_nr; i++) {
+		thread = &cmdq->thread[i];
+		if (!list_empty(&thread->task_busy_list)) {
+			task_running = true;
+			break;
+		}
+	}
+
+	if (task_running)
+		dev_warn(dev, "exist running task(s) in suspend\n");
+
+	clk_unprepare(cmdq->clock);
+
+	return 0;
+}
+
+static int cmdq_resume(struct device *dev)
+{
+	struct cmdq *cmdq = dev_get_drvdata(dev);
+
+	WARN_ON(clk_prepare(cmdq->clock) < 0);
+	cmdq->suspended = false;
+	return 0;
+}
+
+static int cmdq_remove(struct platform_device *pdev)
+{
+	struct cmdq *cmdq = platform_get_drvdata(pdev);
+
+	mbox_controller_unregister(&cmdq->mbox);
+	clk_unprepare(cmdq->clock);
+
+	if (cmdq->mbox.chans)
+		devm_kfree(&pdev->dev, cmdq->mbox.chans);
+
+	if (cmdq->thread)
+		devm_kfree(&pdev->dev, cmdq->thread);
+
+	devm_kfree(&pdev->dev, cmdq);
+
+	return 0;
+}
+
+static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
+{
+	struct cmdq_pkt *pkt = (struct cmdq_pkt *)data;
+	struct cmdq_thread *thread = (struct cmdq_thread *)chan->con_priv;
+	struct cmdq *cmdq = dev_get_drvdata(chan->mbox->dev);
+	struct cmdq_task *task;
+	unsigned long curr_pa, end_pa;
+
+	/* Client should not flush new tasks if suspended. */
+	WARN_ON(cmdq->suspended);
+
+	task = kzalloc(sizeof(*task), GFP_ATOMIC);
+	task->cmdq = cmdq;
+	INIT_LIST_HEAD(&task->list_entry);
+	task->pa_base = pkt->pa_base;
+	task->thread = thread;
+	task->pkt = pkt;
+
+	if (list_empty(&thread->task_busy_list)) {
+		WARN_ON(clk_enable(cmdq->clock) < 0);
+		WARN_ON(cmdq_thread_reset(cmdq, thread) < 0);
+
+		writel(task->pa_base, thread->base + CMDQ_THR_CURR_ADDR);
+		writel(task->pa_base + pkt->cmd_buf_size,
+		       thread->base + CMDQ_THR_END_ADDR);
+		writel(thread->priority, thread->base + CMDQ_THR_PRIORITY);
+		writel(CMDQ_THR_IRQ_EN, thread->base + CMDQ_THR_IRQ_ENABLE);
+		writel(CMDQ_THR_ENABLED, thread->base + CMDQ_THR_ENABLE_TASK);
+	} else {
+		WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0);
+		curr_pa = readl(thread->base + CMDQ_THR_CURR_ADDR);
+		end_pa = readl(thread->base + CMDQ_THR_END_ADDR);
+
+		/*
+		 * Atomic execution should remove the following wfe, i.e. only
+		 * wait event at first task, and prevent to pause when running.
+		 */
+		if (thread->atomic_exec) {
+			/* GCE is executing if command is not WFE */
+			if (!cmdq_thread_is_in_wfe(thread)) {
+				cmdq_thread_resume(thread);
+				cmdq_thread_wait_end(thread, end_pa);
+				WARN_ON(cmdq_thread_suspend(cmdq, thread) < 0);
+				/* set to this task directly */
+				writel(task->pa_base,
+				       thread->base + CMDQ_THR_CURR_ADDR);
+			} else {
+				cmdq_task_insert_into_thread(task);
+				cmdq_task_remove_wfe(task);
+				smp_mb(); /* modify jump before enable thread */
+			}
+		} else {
+			/* check boundary */
+			if (curr_pa == end_pa - CMDQ_INST_SIZE ||
+			    curr_pa == end_pa) {
+				/* set to this task directly */
+				writel(task->pa_base,
+				       thread->base + CMDQ_THR_CURR_ADDR);
+			} else {
+				cmdq_task_insert_into_thread(task);
+				smp_mb(); /* modify jump before enable thread */
+			}
+		}
+		writel(task->pa_base + pkt->cmd_buf_size,
+		       thread->base + CMDQ_THR_END_ADDR);
+		cmdq_thread_resume(thread);
+	}
+	list_move_tail(&task->list_entry, &thread->task_busy_list);
+
+	return 0;
+}
+
+static int cmdq_mbox_startup(struct mbox_chan *chan)
+{
+	return 0;
+}
+
+static void cmdq_mbox_shutdown(struct mbox_chan *chan)
+{
+}
+
+static const struct mbox_chan_ops cmdq_mbox_chan_ops = {
+	.send_data = cmdq_mbox_send_data,
+	.startup = cmdq_mbox_startup,
+	.shutdown = cmdq_mbox_shutdown,
+};
+
+static struct mbox_chan *cmdq_xlate(struct mbox_controller *mbox,
+		const struct of_phandle_args *sp)
+{
+	int ind = sp->args[0];
+	struct cmdq_thread *thread;
+
+	if (ind >= mbox->num_chans)
+		return ERR_PTR(-EINVAL);
+
+	thread = (struct cmdq_thread *)mbox->chans[ind].con_priv;
+	thread->priority = sp->args[1];
+	thread->atomic_exec = (sp->args[2] != 0);
+	thread->chan = &mbox->chans[ind];
+
+	return &mbox->chans[ind];
+}
+
+static int cmdq_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+	struct cmdq *cmdq;
+	int err, i;
+
+	cmdq = devm_kzalloc(dev, sizeof(*cmdq), GFP_KERNEL);
+	if (!cmdq)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cmdq->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(cmdq->base)) {
+		dev_err(dev, "failed to ioremap gce\n");
+		return PTR_ERR(cmdq->base);
+	}
+
+	cmdq->irq = platform_get_irq(pdev, 0);
+	if (!cmdq->irq) {
+		dev_err(dev, "failed to get irq\n");
+		return -EINVAL;
+	}
+	err = devm_request_irq(dev, cmdq->irq, cmdq_irq_handler, IRQF_SHARED,
+			       "mtk_cmdq", cmdq);
+	if (err < 0) {
+		dev_err(dev, "failed to register ISR (%d)\n", err);
+		return err;
+	}
+
+	dev_dbg(dev, "cmdq device: addr:0x%p, va:0x%p, irq:%d\n",
+		dev, cmdq->base, cmdq->irq);
+
+	cmdq->clock = devm_clk_get(dev, "gce");
+	if (IS_ERR(cmdq->clock)) {
+		dev_err(dev, "failed to get gce clk\n");
+		return PTR_ERR(cmdq->clock);
+	}
+
+	cmdq->thread_nr = (u32)(unsigned long)of_device_get_match_data(dev);
+	cmdq->mbox.dev = dev;
+	cmdq->mbox.chans = devm_kcalloc(dev, cmdq->thread_nr,
+					sizeof(*cmdq->mbox.chans), GFP_KERNEL);
+	if (!cmdq->mbox.chans)
+		return -ENOMEM;
+
+	cmdq->mbox.num_chans = cmdq->thread_nr;
+	cmdq->mbox.ops = &cmdq_mbox_chan_ops;
+	cmdq->mbox.of_xlate = cmdq_xlate;
+
+	/* make use of TXDONE_BY_ACK */
+	cmdq->mbox.txdone_irq = false;
+	cmdq->mbox.txdone_poll = false;
+
+	cmdq->thread = devm_kcalloc(dev, cmdq->thread_nr,
+					sizeof(*cmdq->thread), GFP_KERNEL);
+	if (!cmdq->thread)
+		return -ENOMEM;
+
+	for (i = 0; i < cmdq->thread_nr; i++) {
+		cmdq->thread[i].base = cmdq->base + CMDQ_THR_BASE +
+				CMDQ_THR_SIZE * i;
+		INIT_LIST_HEAD(&cmdq->thread[i].task_busy_list);
+		cmdq->mbox.chans[i].con_priv = (void *)&cmdq->thread[i];
+	}
+
+	err = mbox_controller_register(&cmdq->mbox);
+	if (err < 0) {
+		dev_err(dev, "failed to register mailbox: %d\n", err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, cmdq);
+	WARN_ON(clk_prepare(cmdq->clock) < 0);
+
+	cmdq_init(cmdq);
+
+	return 0;
+}
+
+static const struct dev_pm_ops cmdq_pm_ops = {
+	.suspend = cmdq_suspend,
+	.resume = cmdq_resume,
+};
+
+static const struct of_device_id cmdq_of_ids[] = {
+	{.compatible = "mediatek,mt8173-gce", .data = (void *)16},
+	{}
+};
+
+static struct platform_driver cmdq_drv = {
+	.probe = cmdq_probe,
+	.remove = cmdq_remove,
+	.driver = {
+		.name = "mtk_cmdq",
+		.pm = &cmdq_pm_ops,
+		.of_match_table = cmdq_of_ids,
+	}
+};
+
+static int __init cmdq_drv_init(void)
+{
+	return platform_driver_register(&cmdq_drv);
+}
+
+static void __exit cmdq_drv_exit(void)
+{
+	platform_driver_unregister(&cmdq_drv);
+}
+
+subsys_initcall(cmdq_drv_init);
+module_exit(cmdq_drv_exit);
diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h
new file mode 100644
index 000000000000..ccb73422c2fa
--- /dev/null
+++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 MediaTek Inc.
+ *
+ */
+
+#ifndef __MTK_CMDQ_MAILBOX_H__
+#define __MTK_CMDQ_MAILBOX_H__
+
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define CMDQ_INST_SIZE			8 /* instruction is 64-bit */
+#define CMDQ_SUBSYS_SHIFT		16
+#define CMDQ_OP_CODE_SHIFT		24
+#define CMDQ_JUMP_PASS			CMDQ_INST_SIZE
+
+#define CMDQ_WFE_UPDATE			BIT(31)
+#define CMDQ_WFE_WAIT			BIT(15)
+#define CMDQ_WFE_WAIT_VALUE		0x1
+
+/*
+ * CMDQ_CODE_MASK:
+ *   set write mask
+ *   format: op mask
+ * CMDQ_CODE_WRITE:
+ *   write value into target register
+ *   format: op subsys address value
+ * CMDQ_CODE_JUMP:
+ *   jump by offset
+ *   format: op offset
+ * CMDQ_CODE_WFE:
+ *   wait for event and clear
+ *   it is just clear if no wait
+ *   format: [wait]  op event update:1 to_wait:1 wait:1
+ *           [clear] op event update:1 to_wait:0 wait:0
+ * CMDQ_CODE_EOC:
+ *   end of command
+ *   format: op irq_flag
+ */
+enum cmdq_code {
+	CMDQ_CODE_MASK = 0x02,
+	CMDQ_CODE_WRITE = 0x04,
+	CMDQ_CODE_JUMP = 0x10,
+	CMDQ_CODE_WFE = 0x20,
+	CMDQ_CODE_EOC = 0x40,
+};
+
+enum cmdq_cb_status {
+	CMDQ_CB_NORMAL = 0,
+	CMDQ_CB_ERROR
+};
+
+struct cmdq_cb_data {
+	enum cmdq_cb_status	sta;
+	void			*data;
+};
+
+typedef void (*cmdq_async_flush_cb)(struct cmdq_cb_data data);
+
+struct cmdq_task_cb {
+	cmdq_async_flush_cb	cb;
+	void			*data;
+};
+
+struct cmdq_pkt {
+	void			*va_base;
+	dma_addr_t		pa_base;
+	size_t			cmd_buf_size; /* command occupied size */
+	size_t			buf_size; /* real buffer size */
+	struct cmdq_task_cb	cb;
+	struct cmdq_task_cb	async_cb;
+	void			*cl;
+};
+
+#endif /* __MTK_CMDQ_MAILBOX_H__ */
-- 
cgit v1.2.3


From 285189c78eeb6f684a024b86fb5997d10c6aa564 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Wed, 25 Jul 2018 15:52:13 +0800
Subject: netfilter: use kvmalloc_array to allocate memory for hashtable

nf_ct_alloc_hashtable is used to allocate memory for conntrack,
NAT bysrc and expectation hashtable. Assuming 64k bucket size,
which means 7th order page allocation, __get_free_pages, called
by nf_ct_alloc_hashtable, will trigger the direct memory reclaim
and stall for a long time, when system has lots of memory stress

so replace combination of __get_free_pages and vzalloc with
kvmalloc_array, which provides a overflow check and a fallback
if no high order memory is available, and do not retry to reclaim
memory, reduce stall

and remove nf_ct_free_hashtable, since it is just a kvfree

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Wang Li <wangli39@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h |  2 --
 net/netfilter/nf_conntrack_core.c    | 29 ++++++-----------------------
 net/netfilter/nf_conntrack_expect.c  |  2 +-
 net/netfilter/nf_conntrack_helper.c  |  4 ++--
 net/netfilter/nf_nat_core.c          |  4 ++--
 5 files changed, 11 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a2b0ed025908..7e012312cd61 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -176,8 +176,6 @@ void nf_ct_netns_put(struct net *net, u8 nfproto);
  */
 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
 
-void nf_ct_free_hashtable(void *hash, unsigned int size);
-
 int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8a113ca1eea2..a676d5f76bdc 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2022,16 +2022,6 @@ static int kill_all(struct nf_conn *i, void *data)
 	return net_eq(nf_ct_net(i), data);
 }
 
-void nf_ct_free_hashtable(void *hash, unsigned int size)
-{
-	if (is_vmalloc_addr(hash))
-		vfree(hash);
-	else
-		free_pages((unsigned long)hash,
-			   get_order(sizeof(struct hlist_head) * size));
-}
-EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
-
 void nf_conntrack_cleanup_start(void)
 {
 	conntrack_gc_work.exiting = true;
@@ -2042,7 +2032,7 @@ void nf_conntrack_cleanup_end(void)
 {
 	RCU_INIT_POINTER(nf_ct_hook, NULL);
 	cancel_delayed_work_sync(&conntrack_gc_work.dwork);
-	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
+	kvfree(nf_conntrack_hash);
 
 	nf_conntrack_proto_fini();
 	nf_conntrack_seqadj_fini();
@@ -2108,7 +2098,6 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 {
 	struct hlist_nulls_head *hash;
 	unsigned int nr_slots, i;
-	size_t sz;
 
 	if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head)))
 		return NULL;
@@ -2116,14 +2105,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 	BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
 	nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
 
-	if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head)))
-		return NULL;
-
-	sz = nr_slots * sizeof(struct hlist_nulls_head);
-	hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
-					get_order(sz));
-	if (!hash)
-		hash = vzalloc(sz);
+	hash = kvmalloc_array(nr_slots, sizeof(struct hlist_nulls_head),
+			      GFP_KERNEL | __GFP_ZERO);
 
 	if (hash && nulls)
 		for (i = 0; i < nr_slots; i++)
@@ -2150,7 +2133,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
 
 	old_size = nf_conntrack_htable_size;
 	if (old_size == hashsize) {
-		nf_ct_free_hashtable(hash, hashsize);
+		kvfree(hash);
 		return 0;
 	}
 
@@ -2186,7 +2169,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
 	local_bh_enable();
 
 	synchronize_net();
-	nf_ct_free_hashtable(old_hash, old_size);
+	kvfree(old_hash);
 	return 0;
 }
 
@@ -2350,7 +2333,7 @@ err_acct:
 err_expect:
 	kmem_cache_destroy(nf_conntrack_cachep);
 err_cachep:
-	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
+	kvfree(nf_conntrack_hash);
 	return ret;
 }
 
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 3f586ba23d92..27b84231db10 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -712,5 +712,5 @@ void nf_conntrack_expect_fini(void)
 {
 	rcu_barrier(); /* Wait for call_rcu() before destroy */
 	kmem_cache_destroy(nf_ct_expect_cachep);
-	nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize);
+	kvfree(nf_ct_expect_hash);
 }
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index d557a425289d..e24b762ffa1d 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -562,12 +562,12 @@ int nf_conntrack_helper_init(void)
 
 	return 0;
 out_extend:
-	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
+	kvfree(nf_ct_helper_hash);
 	return ret;
 }
 
 void nf_conntrack_helper_fini(void)
 {
 	nf_ct_extend_unregister(&helper_extend);
-	nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
+	kvfree(nf_ct_helper_hash);
 }
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6366f0c0b8c1..e2b196054dfc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1056,7 +1056,7 @@ static int __init nf_nat_init(void)
 
 	ret = nf_ct_extend_register(&nat_extend);
 	if (ret < 0) {
-		nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+		kvfree(nf_nat_bysource);
 		pr_err("Unable to register extension\n");
 		return ret;
 	}
@@ -1094,7 +1094,7 @@ static void __exit nf_nat_cleanup(void)
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		kfree(nf_nat_l4protos[i]);
 	synchronize_net();
-	nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+	kvfree(nf_nat_bysource);
 	unregister_pernet_subsys(&nat_net_ops);
 }
 
-- 
cgit v1.2.3


From 7cca1ed0bb248b8d5768d17f5afe297a832d66c0 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Tue, 31 Jul 2018 20:25:00 +0200
Subject: netfilter: nf_osf: move nf_osf_fingers to non-uapi header file

All warnings (new ones prefixed by >>):

>> ./usr/include/linux/netfilter/nf_osf.h:73: userspace cannot reference function or variable defined in the kernel

Fixes: f9324952088f ("netfilter: nfnetlink_osf: extract nfnetlink_subsystem code from xt_osf.c")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_osf.h      | 2 ++
 include/uapi/linux/netfilter/nf_osf.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
index aee460fcbd31..3e455d6f94d5 100644
--- a/include/linux/netfilter/nf_osf.h
+++ b/include/linux/netfilter/nf_osf.h
@@ -25,6 +25,8 @@ enum osf_fmatch_states {
 	FMATCH_OPT_WRONG,
 };
 
+extern struct list_head nf_osf_fingers[2];
+
 struct nf_osf_finger {
 	struct rcu_head			rcu_head;
 	struct list_head		finger_entry;
diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
index cc2487ff74f6..3b93fbb9fc24 100644
--- a/include/uapi/linux/netfilter/nf_osf.h
+++ b/include/uapi/linux/netfilter/nf_osf.h
@@ -70,8 +70,6 @@ struct nf_osf_nlmsg {
 	struct tcphdr			tcp;
 };
 
-extern struct list_head nf_osf_fingers[2];
-
 /* Defines for IANA option kinds */
 enum iana_options {
 	OSFOPT_EOL = 0,		/* End of options */
-- 
cgit v1.2.3


From ddba40be59c9be4059288464f8e6f38fbba27495 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Tue, 31 Jul 2018 20:25:01 +0200
Subject: netfilter: nfnetlink_osf: rename nf_osf header file to nfnetlink_osf

The first client of the nf_osf.h userspace header is nft_osf, coming in
this batch, rename it to nfnetlink_osf.h as there are no userspace
clients for this yet, hence this looks consistent with other nfnetlink
subsystem.

Suggested-by: Jan Engelhardt <jengelh@inai.de>
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_osf.h             |  44 -----------
 include/linux/netfilter/nfnetlink_osf.h      |  44 +++++++++++
 include/uapi/linux/netfilter/nf_osf.h        | 106 ---------------------------
 include/uapi/linux/netfilter/nfnetlink_osf.h | 106 +++++++++++++++++++++++++++
 include/uapi/linux/netfilter/xt_osf.h        |   2 +-
 net/netfilter/nfnetlink_osf.c                |   2 +-
 net/netfilter/nft_osf.c                      |   2 +-
 7 files changed, 153 insertions(+), 153 deletions(-)
 delete mode 100644 include/linux/netfilter/nf_osf.h
 create mode 100644 include/linux/netfilter/nfnetlink_osf.h
 delete mode 100644 include/uapi/linux/netfilter/nf_osf.h
 create mode 100644 include/uapi/linux/netfilter/nfnetlink_osf.h

(limited to 'include')

diff --git a/include/linux/netfilter/nf_osf.h b/include/linux/netfilter/nf_osf.h
deleted file mode 100644
index 3e455d6f94d5..000000000000
--- a/include/linux/netfilter/nf_osf.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NFOSF_H
-#define _NFOSF_H
-
-#include <uapi/linux/netfilter/nf_osf.h>
-
-/* Initial window size option state machine: multiple of mss, mtu or
- * plain numeric value. Can also be made as plain numeric value which
- * is not a multiple of specified value.
- */
-enum nf_osf_window_size_options {
-	OSF_WSS_PLAIN   = 0,
-	OSF_WSS_MSS,
-	OSF_WSS_MTU,
-	OSF_WSS_MODULO,
-	OSF_WSS_MAX,
-};
-
-enum osf_fmatch_states {
-	/* Packet does not match the fingerprint */
-	FMATCH_WRONG = 0,
-	/* Packet matches the fingerprint */
-	FMATCH_OK,
-	/* Options do not match the fingerprint, but header does */
-	FMATCH_OPT_WRONG,
-};
-
-extern struct list_head nf_osf_fingers[2];
-
-struct nf_osf_finger {
-	struct rcu_head			rcu_head;
-	struct list_head		finger_entry;
-	struct nf_osf_user_finger	finger;
-};
-
-bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
-		  int hooknum, struct net_device *in, struct net_device *out,
-		  const struct nf_osf_info *info, struct net *net,
-		  const struct list_head *nf_osf_fingers);
-
-const char *nf_osf_find(const struct sk_buff *skb,
-                        const struct list_head *nf_osf_fingers);
-
-#endif /* _NFOSF_H */
diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
new file mode 100644
index 000000000000..a7311bc03d3a
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NFOSF_H
+#define _NFOSF_H
+
+#include <uapi/linux/netfilter/nfnetlink_osf.h>
+
+/* Initial window size option state machine: multiple of mss, mtu or
+ * plain numeric value. Can also be made as plain numeric value which
+ * is not a multiple of specified value.
+ */
+enum nf_osf_window_size_options {
+	OSF_WSS_PLAIN   = 0,
+	OSF_WSS_MSS,
+	OSF_WSS_MTU,
+	OSF_WSS_MODULO,
+	OSF_WSS_MAX,
+};
+
+enum osf_fmatch_states {
+	/* Packet does not match the fingerprint */
+	FMATCH_WRONG = 0,
+	/* Packet matches the fingerprint */
+	FMATCH_OK,
+	/* Options do not match the fingerprint, but header does */
+	FMATCH_OPT_WRONG,
+};
+
+extern struct list_head nf_osf_fingers[2];
+
+struct nf_osf_finger {
+	struct rcu_head			rcu_head;
+	struct list_head		finger_entry;
+	struct nf_osf_user_finger	finger;
+};
+
+bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
+		  int hooknum, struct net_device *in, struct net_device *out,
+		  const struct nf_osf_info *info, struct net *net,
+		  const struct list_head *nf_osf_fingers);
+
+const char *nf_osf_find(const struct sk_buff *skb,
+                        const struct list_head *nf_osf_fingers);
+
+#endif /* _NFOSF_H */
diff --git a/include/uapi/linux/netfilter/nf_osf.h b/include/uapi/linux/netfilter/nf_osf.h
deleted file mode 100644
index 3b93fbb9fc24..000000000000
--- a/include/uapi/linux/netfilter/nf_osf.h
+++ /dev/null
@@ -1,106 +0,0 @@
-#ifndef _NF_OSF_H
-#define _NF_OSF_H
-
-#include <linux/types.h>
-
-#define MAXGENRELEN	32
-
-#define NF_OSF_GENRE	(1 << 0)
-#define NF_OSF_TTL	(1 << 1)
-#define NF_OSF_LOG	(1 << 2)
-#define NF_OSF_INVERT	(1 << 3)
-
-#define NF_OSF_LOGLEVEL_ALL		0	/* log all matched fingerprints */
-#define NF_OSF_LOGLEVEL_FIRST		1	/* log only the first matced fingerprint */
-#define NF_OSF_LOGLEVEL_ALL_KNOWN	2	/* do not log unknown packets */
-
-#define NF_OSF_TTL_TRUE			0	/* True ip and fingerprint TTL comparison */
-
-/* Check if ip TTL is less than fingerprint one */
-#define NF_OSF_TTL_LESS			1
-
-/* Do not compare ip and fingerprint TTL at all */
-#define NF_OSF_TTL_NOCHECK		2
-
-#define NF_OSF_FLAGMASK		(NF_OSF_GENRE | NF_OSF_TTL | \
-				 NF_OSF_LOG | NF_OSF_INVERT)
-/* Wildcard MSS (kind of).
- * It is used to implement a state machine for the different wildcard values
- * of the MSS and window sizes.
- */
-struct nf_osf_wc {
-	__u32	wc;
-	__u32	val;
-};
-
-/* This struct represents IANA options
- * http://www.iana.org/assignments/tcp-parameters
- */
-struct nf_osf_opt {
-	__u16			kind, length;
-	struct nf_osf_wc	wc;
-};
-
-struct nf_osf_info {
-	char	genre[MAXGENRELEN];
-	__u32	len;
-	__u32	flags;
-	__u32	loglevel;
-	__u32	ttl;
-};
-
-struct nf_osf_user_finger {
-	struct nf_osf_wc	wss;
-
-	__u8	ttl, df;
-	__u16	ss, mss;
-	__u16	opt_num;
-
-	char	genre[MAXGENRELEN];
-	char	version[MAXGENRELEN];
-	char	subtype[MAXGENRELEN];
-
-	/* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
-	struct nf_osf_opt	opt[MAX_IPOPTLEN];
-};
-
-struct nf_osf_nlmsg {
-	struct nf_osf_user_finger	f;
-	struct iphdr			ip;
-	struct tcphdr			tcp;
-};
-
-/* Defines for IANA option kinds */
-enum iana_options {
-	OSFOPT_EOL = 0,		/* End of options */
-	OSFOPT_NOP,		/* NOP */
-	OSFOPT_MSS,		/* Maximum segment size */
-	OSFOPT_WSO,		/* Window scale option */
-	OSFOPT_SACKP,		/* SACK permitted */
-	OSFOPT_SACK,		/* SACK */
-	OSFOPT_ECHO,
-	OSFOPT_ECHOREPLY,
-	OSFOPT_TS,		/* Timestamp option */
-	OSFOPT_POCP,		/* Partial Order Connection Permitted */
-	OSFOPT_POSP,		/* Partial Order Service Profile */
-
-	/* Others are not used in the current OSF */
-	OSFOPT_EMPTY = 255,
-};
-
-enum nf_osf_attr_type {
-	OSF_ATTR_UNSPEC,
-	OSF_ATTR_FINGER,
-	OSF_ATTR_MAX,
-};
-
-/*
- * Add/remove fingerprint from the kernel.
- */
-enum nf_osf_msg_types {
-	OSF_MSG_ADD,
-	OSF_MSG_REMOVE,
-	OSF_MSG_MAX,
-};
-
-#endif /* _NF_OSF_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink_osf.h b/include/uapi/linux/netfilter/nfnetlink_osf.h
new file mode 100644
index 000000000000..3b93fbb9fc24
--- /dev/null
+++ b/include/uapi/linux/netfilter/nfnetlink_osf.h
@@ -0,0 +1,106 @@
+#ifndef _NF_OSF_H
+#define _NF_OSF_H
+
+#include <linux/types.h>
+
+#define MAXGENRELEN	32
+
+#define NF_OSF_GENRE	(1 << 0)
+#define NF_OSF_TTL	(1 << 1)
+#define NF_OSF_LOG	(1 << 2)
+#define NF_OSF_INVERT	(1 << 3)
+
+#define NF_OSF_LOGLEVEL_ALL		0	/* log all matched fingerprints */
+#define NF_OSF_LOGLEVEL_FIRST		1	/* log only the first matced fingerprint */
+#define NF_OSF_LOGLEVEL_ALL_KNOWN	2	/* do not log unknown packets */
+
+#define NF_OSF_TTL_TRUE			0	/* True ip and fingerprint TTL comparison */
+
+/* Check if ip TTL is less than fingerprint one */
+#define NF_OSF_TTL_LESS			1
+
+/* Do not compare ip and fingerprint TTL at all */
+#define NF_OSF_TTL_NOCHECK		2
+
+#define NF_OSF_FLAGMASK		(NF_OSF_GENRE | NF_OSF_TTL | \
+				 NF_OSF_LOG | NF_OSF_INVERT)
+/* Wildcard MSS (kind of).
+ * It is used to implement a state machine for the different wildcard values
+ * of the MSS and window sizes.
+ */
+struct nf_osf_wc {
+	__u32	wc;
+	__u32	val;
+};
+
+/* This struct represents IANA options
+ * http://www.iana.org/assignments/tcp-parameters
+ */
+struct nf_osf_opt {
+	__u16			kind, length;
+	struct nf_osf_wc	wc;
+};
+
+struct nf_osf_info {
+	char	genre[MAXGENRELEN];
+	__u32	len;
+	__u32	flags;
+	__u32	loglevel;
+	__u32	ttl;
+};
+
+struct nf_osf_user_finger {
+	struct nf_osf_wc	wss;
+
+	__u8	ttl, df;
+	__u16	ss, mss;
+	__u16	opt_num;
+
+	char	genre[MAXGENRELEN];
+	char	version[MAXGENRELEN];
+	char	subtype[MAXGENRELEN];
+
+	/* MAX_IPOPTLEN is maximum if all options are NOPs or EOLs */
+	struct nf_osf_opt	opt[MAX_IPOPTLEN];
+};
+
+struct nf_osf_nlmsg {
+	struct nf_osf_user_finger	f;
+	struct iphdr			ip;
+	struct tcphdr			tcp;
+};
+
+/* Defines for IANA option kinds */
+enum iana_options {
+	OSFOPT_EOL = 0,		/* End of options */
+	OSFOPT_NOP,		/* NOP */
+	OSFOPT_MSS,		/* Maximum segment size */
+	OSFOPT_WSO,		/* Window scale option */
+	OSFOPT_SACKP,		/* SACK permitted */
+	OSFOPT_SACK,		/* SACK */
+	OSFOPT_ECHO,
+	OSFOPT_ECHOREPLY,
+	OSFOPT_TS,		/* Timestamp option */
+	OSFOPT_POCP,		/* Partial Order Connection Permitted */
+	OSFOPT_POSP,		/* Partial Order Service Profile */
+
+	/* Others are not used in the current OSF */
+	OSFOPT_EMPTY = 255,
+};
+
+enum nf_osf_attr_type {
+	OSF_ATTR_UNSPEC,
+	OSF_ATTR_FINGER,
+	OSF_ATTR_MAX,
+};
+
+/*
+ * Add/remove fingerprint from the kernel.
+ */
+enum nf_osf_msg_types {
+	OSF_MSG_ADD,
+	OSF_MSG_REMOVE,
+	OSF_MSG_MAX,
+};
+
+#endif /* _NF_OSF_H */
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index a90e90c27cef..c56c59605c2b 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -23,7 +23,7 @@
 #include <linux/types.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
-#include <linux/netfilter/nf_osf.h>
+#include <linux/netfilter/nfnetlink_osf.h>
 
 #define XT_OSF_GENRE		NF_OSF_GENRE
 #define XT_OSF_INVERT		NF_OSF_INVERT
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index ba0fa11869ce..f9dba62c450f 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/x_tables.h>
 #include <net/netfilter/nf_log.h>
-#include <linux/netfilter/nf_osf.h>
+#include <linux/netfilter/nfnetlink_osf.h>
 
 /*
  * Indexed by dont-fragment bit.
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index bdacc4cffba4..9b2f3de7be4f 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -2,7 +2,7 @@
 #include <net/tcp.h>
 
 #include <net/netfilter/nf_tables.h>
-#include <linux/netfilter/nf_osf.h>
+#include <linux/netfilter/nfnetlink_osf.h>
 
 #define OSF_GENRE_SIZE		32
 
-- 
cgit v1.2.3


From e9697e2effad50c0081b3c72002d3975f8ab4347 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 3 Aug 2018 12:38:39 +0200
Subject: l2tp: ignore L2TP_ATTR_MTU

This attribute's handling is broken. It can only be used when creating
Ethernet pseudo-wires, in which case its value can be used as the
initial MTU for the l2tpeth device.
However, when handling update requests, L2TP_ATTR_MTU only modifies
session->mtu. This value is never propagated to the l2tpeth device.
Dump requests also return the value of session->mtu, which is not
synchronised anymore with the device MTU.

The same problem occurs if the device MTU is properly updated using the
generic IFLA_MTU attribute. In this case, session->mtu is not updated,
and L2TP_ATTR_MTU will report an invalid value again when dumping the
session.

It does not seem worthwhile to complexify l2tp_eth.c to synchronise
session->mtu with the device MTU. Even the ip-l2tp manpage advises to
use 'ip link' to initialise the MTU of l2tpeth devices (iproute2 does
not handle L2TP_ATTR_MTU at all anyway). So let's just ignore it
entirely.

Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h |  2 +-
 net/l2tp/l2tp_core.c      |  1 -
 net/l2tp/l2tp_core.h      |  2 --
 net/l2tp/l2tp_debugfs.c   |  3 +--
 net/l2tp/l2tp_eth.c       | 17 +++++++----------
 net/l2tp/l2tp_netlink.c   |  9 +--------
 6 files changed, 10 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 8bb8c7cfabe5..61158f5a1a5b 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -119,7 +119,7 @@ enum {
 	L2TP_ATTR_IP_DADDR,		/* u32 */
 	L2TP_ATTR_UDP_SPORT,		/* u16 */
 	L2TP_ATTR_UDP_DPORT,		/* u16 */
-	L2TP_ATTR_MTU,			/* u16 */
+	L2TP_ATTR_MTU,			/* u16 (not used) */
 	L2TP_ATTR_MRU,			/* u16 (not used) */
 	L2TP_ATTR_STATS,		/* nested */
 	L2TP_ATTR_IP6_SADDR,		/* struct in6_addr */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index c61a467fd9b8..ac6a00bcec71 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1674,7 +1674,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 		if (cfg) {
 			session->pwtype = cfg->pw_type;
 			session->debug = cfg->debug;
-			session->mtu = cfg->mtu;
 			session->send_seq = cfg->send_seq;
 			session->recv_seq = cfg->recv_seq;
 			session->lns_mode = cfg->lns_mode;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 1ca39629031b..5804065dfbfb 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -64,7 +64,6 @@ struct l2tp_session_cfg {
 	int			peer_cookie_len; /* 0, 4 or 8 bytes */
 	int			reorder_timeout; /* configured reorder timeout
 						  * (in jiffies) */
-	int			mtu;
 	char			*ifname;
 };
 
@@ -108,7 +107,6 @@ struct l2tp_session {
 	int			reorder_timeout; /* configured reorder timeout
 						  * (in jiffies) */
 	int			reorder_skip;	/* set if skip to next nr */
-	int			mtu;
 	enum l2tp_pwtype	pwtype;
 	struct l2tp_stats	stats;
 	struct hlist_node	global_hlist;	/* Global hash list node */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index aee271741f5b..9821a1458555 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -191,8 +191,7 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 	if (session->send_seq || session->recv_seq)
 		seq_printf(m, "   nr %hu, ns %hu\n", session->nr, session->ns);
 	seq_printf(m, "   refcnt %d\n", refcount_read(&session->ref_count));
-	seq_printf(m, "   config %d/0/%c/%c/-/%s %08x %u\n",
-		   session->mtu,
+	seq_printf(m, "   config 0/0/%c/%c/-/%s %08x %u\n",
 		   session->recv_seq ? 'R' : '-',
 		   session->send_seq ? 'S' : '-',
 		   session->lns_mode ? "LNS" : "LAC",
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index cfca5e63ae31..3728986ec885 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -234,14 +234,11 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
 		overhead += sizeof(struct udphdr);
 		dev->needed_headroom += sizeof(struct udphdr);
 	}
-	if (session->mtu != 0) {
-		dev->mtu = session->mtu;
-		dev->needed_headroom += session->hdr_len;
-		return;
-	}
+
 	lock_sock(tunnel->sock);
 	l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
 	release_sock(tunnel->sock);
+
 	if (l3_overhead == 0) {
 		/* L3 Overhead couldn't be identified, this could be
 		 * because tunnel->sock was NULL or the socket's
@@ -255,12 +252,12 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
 	 */
 	overhead += session->hdr_len + ETH_HLEN + l3_overhead;
 
-	/* If PMTU discovery was enabled, use discovered MTU on L2TP device */
-	mtu = l2tp_tunnel_dst_mtu(tunnel);
-	if (mtu)
+	mtu = l2tp_tunnel_dst_mtu(tunnel) - overhead;
+	if (mtu < dev->min_mtu || mtu > dev->max_mtu)
+		dev->mtu = ETH_DATA_LEN - overhead;
+	else
 		dev->mtu = mtu;
-	session->mtu = dev->mtu - overhead;
-	dev->mtu = session->mtu;
+
 	dev->needed_headroom += session->hdr_len;
 }
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index a7c409215336..2e1e92651545 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -608,9 +608,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
 		cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
 
-	if (info->attrs[L2TP_ATTR_MTU])
-		cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
-
 #ifdef CONFIG_MODULES
 	if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) {
 		genl_unlock();
@@ -698,9 +695,6 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
 		session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
 
-	if (info->attrs[L2TP_ATTR_MTU])
-		session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
-
 	ret = l2tp_session_notify(&l2tp_nl_family, info,
 				  session, L2TP_CMD_SESSION_MODIFY);
 
@@ -730,8 +724,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	    nla_put_u32(skb, L2TP_ATTR_PEER_SESSION_ID,
 			session->peer_session_id) ||
 	    nla_put_u32(skb, L2TP_ATTR_DEBUG, session->debug) ||
-	    nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype) ||
-	    nla_put_u16(skb, L2TP_ATTR_MTU, session->mtu))
+	    nla_put_u16(skb, L2TP_ATTR_PW_TYPE, session->pwtype))
 		goto nla_put_failure;
 
 	if ((session->ifname[0] &&
-- 
cgit v1.2.3


From af308b94a2a4a5a27bec9028354c4df444a7c8ba Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 2 Aug 2018 20:51:39 +0200
Subject: netfilter: nf_tables: add tunnel support

This patch implements the tunnel object type that can be used to
configure tunnels via metadata template through the existing lightweight
API from the ingress path.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  69 ++++-
 net/core/dst.c                           |   1 +
 net/netfilter/Kconfig                    |   6 +
 net/netfilter/Makefile                   |   1 +
 net/netfilter/nft_tunnel.c               | 458 +++++++++++++++++++++++++++++++
 5 files changed, 534 insertions(+), 1 deletion(-)
 create mode 100644 net/netfilter/nft_tunnel.c

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index f112ea52dc1a..3ee1198eeac1 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1416,7 +1416,8 @@ enum nft_ct_helper_attributes {
 #define NFT_OBJECT_CT_HELPER	3
 #define NFT_OBJECT_LIMIT	4
 #define NFT_OBJECT_CONNLIMIT	5
-#define __NFT_OBJECT_MAX	6
+#define NFT_OBJECT_TUNNEL	6
+#define __NFT_OBJECT_MAX	7
 #define NFT_OBJECT_MAX		(__NFT_OBJECT_MAX - 1)
 
 /**
@@ -1580,4 +1581,70 @@ enum nft_ng_types {
 };
 #define NFT_NG_MAX	(__NFT_NG_MAX - 1)
 
+enum nft_tunnel_key_ip_attributes {
+	NFTA_TUNNEL_KEY_IP_UNSPEC,
+	NFTA_TUNNEL_KEY_IP_SRC,
+	NFTA_TUNNEL_KEY_IP_DST,
+	__NFTA_TUNNEL_KEY_IP_MAX
+};
+#define NFTA_TUNNEL_KEY_IP_MAX	(__NFTA_TUNNEL_KEY_IP_MAX - 1)
+
+enum nft_tunnel_ip6_attributes {
+	NFTA_TUNNEL_KEY_IP6_UNSPEC,
+	NFTA_TUNNEL_KEY_IP6_SRC,
+	NFTA_TUNNEL_KEY_IP6_DST,
+	NFTA_TUNNEL_KEY_IP6_FLOWLABEL,
+	__NFTA_TUNNEL_KEY_IP6_MAX
+};
+#define NFTA_TUNNEL_KEY_IP6_MAX	(__NFTA_TUNNEL_KEY_IP6_MAX - 1)
+
+enum nft_tunnel_opts_attributes {
+	NFTA_TUNNEL_KEY_OPTS_UNSPEC,
+	NFTA_TUNNEL_KEY_OPTS_VXLAN,
+	NFTA_TUNNEL_KEY_OPTS_ERSPAN,
+	__NFTA_TUNNEL_KEY_OPTS_MAX
+};
+#define NFTA_TUNNEL_KEY_OPTS_MAX	(__NFTA_TUNNEL_KEY_OPTS_MAX - 1)
+
+enum nft_tunnel_opts_vxlan_attributes {
+	NFTA_TUNNEL_KEY_VXLAN_UNSPEC,
+	NFTA_TUNNEL_KEY_VXLAN_GBP,
+	__NFTA_TUNNEL_KEY_VXLAN_MAX
+};
+#define NFTA_TUNNEL_KEY_VXLAN_MAX	(__NFTA_TUNNEL_KEY_VXLAN_MAX - 1)
+
+enum nft_tunnel_opts_erspan_attributes {
+	NFTA_TUNNEL_KEY_ERSPAN_UNSPEC,
+	NFTA_TUNNEL_KEY_ERSPAN_VERSION,
+	NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX,
+	NFTA_TUNNEL_KEY_ERSPAN_V2_HWID,
+	NFTA_TUNNEL_KEY_ERSPAN_V2_DIR,
+	__NFTA_TUNNEL_KEY_ERSPAN_MAX
+};
+#define NFTA_TUNNEL_KEY_ERSPAN_MAX	(__NFTA_TUNNEL_KEY_ERSPAN_MAX - 1)
+
+enum nft_tunnel_flags {
+	NFT_TUNNEL_F_ZERO_CSUM_TX	= (1 << 0),
+	NFT_TUNNEL_F_DONT_FRAGMENT	= (1 << 1),
+	NFT_TUNNEL_F_SEQ_NUMBER		= (1 << 2),
+};
+#define NFT_TUNNEL_F_MASK	(NFT_TUNNEL_F_ZERO_CSUM_TX | \
+				 NFT_TUNNEL_F_DONT_FRAGMENT | \
+				 NFT_TUNNEL_F_SEQ_NUMBER)
+
+enum nft_tunnel_key_attributes {
+	NFTA_TUNNEL_KEY_UNSPEC,
+	NFTA_TUNNEL_KEY_ID,
+	NFTA_TUNNEL_KEY_IP,
+	NFTA_TUNNEL_KEY_IP6,
+	NFTA_TUNNEL_KEY_FLAGS,
+	NFTA_TUNNEL_KEY_TOS,
+	NFTA_TUNNEL_KEY_TTL,
+	NFTA_TUNNEL_KEY_SPORT,
+	NFTA_TUNNEL_KEY_DPORT,
+	NFTA_TUNNEL_KEY_OPTS,
+	__NFTA_TUNNEL_KEY_MAX
+};
+#define NFTA_TUNNEL_KEY_MAX	(__NFTA_TUNNEL_KEY_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
diff --git a/net/core/dst.c b/net/core/dst.c
index 2d9b37f8944a..81ccf20e2826 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -307,6 +307,7 @@ void metadata_dst_free(struct metadata_dst *md_dst)
 #endif
 	kfree(md_dst);
 }
+EXPORT_SYMBOL_GPL(metadata_dst_free);
 
 struct metadata_dst __percpu *
 metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 55e399d5af10..654588088676 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -559,6 +559,12 @@ config NFT_NAT
 	  This option adds the "nat" expression that you can use to perform
 	  typical Network Address Translation (NAT) packet transformations.
 
+config NFT_TUNNEL
+	tristate "Netfilter nf_tables tunnel module"
+	help
+	  This option adds the "tunnel" expression that you can use to set
+	  tunneling policies.
+
 config NFT_OBJREF
 	tristate "Netfilter nf_tables stateful object reference module"
 	help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index cf61615cc529..16895e045b66 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -101,6 +101,7 @@ obj-$(CONFIG_NFT_QUEUE)		+= nft_queue.o
 obj-$(CONFIG_NFT_QUOTA)		+= nft_quota.o
 obj-$(CONFIG_NFT_REJECT) 	+= nft_reject.o
 obj-$(CONFIG_NFT_REJECT_INET)	+= nft_reject_inet.o
+obj-$(CONFIG_NFT_TUNNEL)	+= nft_tunnel.o
 obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
 obj-$(CONFIG_NFT_LOG)		+= nft_log.o
 obj-$(CONFIG_NFT_MASQ)		+= nft_masq.o
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
new file mode 100644
index 000000000000..715613d99c20
--- /dev/null
+++ b/net/netfilter/nft_tunnel.c
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/dst_metadata.h>
+#include <net/ip_tunnels.h>
+#include <net/vxlan.h>
+#include <net/erspan.h>
+
+struct nft_tunnel_opts {
+	union {
+		struct vxlan_metadata	vxlan;
+		struct erspan_metadata	erspan;
+	} u;
+	u32	len;
+	u32	flags;
+};
+
+struct nft_tunnel_obj {
+	struct metadata_dst	*md;
+	struct nft_tunnel_opts	opts;
+};
+
+static const struct nla_policy nft_tunnel_ip_policy[NFTA_TUNNEL_KEY_IP_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_IP_SRC]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_KEY_IP_DST]	= { .type = NLA_U32 },
+};
+
+static int nft_tunnel_obj_ip_init(const struct nft_ctx *ctx,
+				  const struct nlattr *attr,
+				  struct ip_tunnel_info *info)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_IP_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_IP_MAX, attr,
+			       nft_tunnel_ip_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_TUNNEL_KEY_IP_DST])
+		return -EINVAL;
+
+	if (tb[NFTA_TUNNEL_KEY_IP_SRC])
+		info->key.u.ipv4.src = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP_SRC]);
+	if (tb[NFTA_TUNNEL_KEY_IP_DST])
+		info->key.u.ipv4.dst = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP_DST]);
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_ip6_policy[NFTA_TUNNEL_KEY_IP6_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_IP6_SRC]	= { .len = sizeof(struct in6_addr), },
+	[NFTA_TUNNEL_KEY_IP6_DST]	= { .len = sizeof(struct in6_addr), },
+	[NFTA_TUNNEL_KEY_IP6_FLOWLABEL]	= { .type = NLA_U32, }
+};
+
+static int nft_tunnel_obj_ip6_init(const struct nft_ctx *ctx,
+				   const struct nlattr *attr,
+				   struct ip_tunnel_info *info)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_IP6_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_IP6_MAX, attr,
+			       nft_tunnel_ip6_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_TUNNEL_KEY_IP6_DST])
+		return -EINVAL;
+
+	if (tb[NFTA_TUNNEL_KEY_IP6_SRC]) {
+		memcpy(&info->key.u.ipv6.src,
+		       nla_data(tb[NFTA_TUNNEL_KEY_IP6_SRC]),
+		       sizeof(struct in6_addr));
+	}
+	if (tb[NFTA_TUNNEL_KEY_IP6_DST]) {
+		memcpy(&info->key.u.ipv6.dst,
+		       nla_data(tb[NFTA_TUNNEL_KEY_IP6_DST]),
+		       sizeof(struct in6_addr));
+	}
+	if (tb[NFTA_TUNNEL_KEY_IP6_FLOWLABEL])
+		info->key.label = nla_get_be32(tb[NFTA_TUNNEL_KEY_IP6_FLOWLABEL]);
+
+	info->mode |= IP_TUNNEL_INFO_IPV6;
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_opts_vxlan_policy[NFTA_TUNNEL_KEY_VXLAN_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_VXLAN_GBP]	= { .type = NLA_U32 },
+};
+
+static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr,
+				     struct nft_tunnel_opts *opts)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_VXLAN_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_VXLAN_MAX, attr,
+			       nft_tunnel_opts_vxlan_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFTA_TUNNEL_KEY_VXLAN_GBP])
+		return -EINVAL;
+
+	opts->u.vxlan.gbp = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_VXLAN_GBP]));
+
+	opts->len	= sizeof(struct vxlan_metadata);
+	opts->flags	= TUNNEL_VXLAN_OPT;
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_opts_erspan_policy[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR]	= { .type = NLA_U8 },
+	[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID]	= { .type = NLA_U8 },
+};
+
+static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
+				      struct nft_tunnel_opts *opts)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1];
+	uint8_t hwid, dir;
+	int err, version;
+
+	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_ERSPAN_MAX, attr,
+			       nft_tunnel_opts_erspan_policy, NULL);
+	if (err < 0)
+		return err;
+
+	version = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION]));
+	switch (version) {
+	case ERSPAN_VERSION:
+		if (!tb[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX])
+			return -EINVAL;
+
+		opts->u.erspan.u.index =
+			nla_get_be32(tb[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX]);
+		break;
+	case ERSPAN_VERSION2:
+		if (!tb[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] ||
+		    !tb[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID])
+			return -EINVAL;
+
+		hwid = nla_get_u8(tb[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID]);
+		dir = nla_get_u8(tb[NFTA_TUNNEL_KEY_ERSPAN_V2_DIR]);
+
+		set_hwid(&opts->u.erspan.u.md2, hwid);
+		opts->u.erspan.u.md2.dir = dir;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	opts->u.erspan.version = version;
+
+	opts->len	= sizeof(struct erspan_metadata);
+	opts->flags	= TUNNEL_ERSPAN_OPT;
+
+	return 0;
+}
+
+static const struct nla_policy nft_tunnel_opts_policy[NFTA_TUNNEL_KEY_OPTS_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_OPTS_VXLAN]	= { .type = NLA_NESTED, },
+	[NFTA_TUNNEL_KEY_OPTS_ERSPAN]	= { .type = NLA_NESTED, },
+};
+
+static int nft_tunnel_obj_opts_init(const struct nft_ctx *ctx,
+				    const struct nlattr *attr,
+				    struct ip_tunnel_info *info,
+				    struct nft_tunnel_opts *opts)
+{
+	struct nlattr *tb[NFTA_TUNNEL_KEY_OPTS_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFTA_TUNNEL_KEY_OPTS_MAX, attr,
+			       nft_tunnel_opts_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_TUNNEL_KEY_OPTS_VXLAN]) {
+		err = nft_tunnel_obj_vxlan_init(tb[NFTA_TUNNEL_KEY_OPTS_VXLAN],
+						opts);
+	} else if (tb[NFTA_TUNNEL_KEY_OPTS_ERSPAN]) {
+		err = nft_tunnel_obj_erspan_init(tb[NFTA_TUNNEL_KEY_OPTS_ERSPAN],
+						 opts);
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] = {
+	[NFTA_TUNNEL_KEY_IP]	= { .type = NLA_NESTED, },
+	[NFTA_TUNNEL_KEY_IP6]	= { .type = NLA_NESTED, },
+	[NFTA_TUNNEL_KEY_ID]	= { .type = NLA_U32, },
+	[NFTA_TUNNEL_KEY_FLAGS]	= { .type = NLA_U32, },
+	[NFTA_TUNNEL_KEY_TOS]	= { .type = NLA_U8, },
+	[NFTA_TUNNEL_KEY_TTL]	= { .type = NLA_U8, },
+	[NFTA_TUNNEL_KEY_OPTS]	= { .type = NLA_NESTED, },
+};
+
+static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
+			       const struct nlattr * const tb[],
+			       struct nft_object *obj)
+{
+	struct nft_tunnel_obj *priv = nft_obj_data(obj);
+	struct ip_tunnel_info info;
+	struct metadata_dst *md;
+	int err;
+
+	if (!tb[NFTA_TUNNEL_KEY_ID])
+		return -EINVAL;
+
+	memset(&info, 0, sizeof(info));
+	info.mode		= IP_TUNNEL_INFO_TX;
+	info.key.tun_id		= key32_to_tunnel_id(nla_get_be32(tb[NFTA_TUNNEL_KEY_ID]));
+	info.key.tun_flags	= TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
+
+	if (tb[NFTA_TUNNEL_KEY_IP]) {
+		err = nft_tunnel_obj_ip_init(ctx, tb[NFTA_TUNNEL_KEY_IP], &info);
+		if (err < 0)
+			return err;
+	} else if (tb[NFTA_TUNNEL_KEY_IP6]) {
+		err = nft_tunnel_obj_ip6_init(ctx, tb[NFTA_TUNNEL_KEY_IP6], &info);
+		if (err < 0)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	if (tb[NFTA_TUNNEL_KEY_SPORT]) {
+		info.key.tp_src =
+			ntohs(nla_get_be16(tb[NFTA_TUNNEL_KEY_SPORT]));
+	}
+	if (tb[NFTA_TUNNEL_KEY_DPORT]) {
+		info.key.tp_dst =
+			ntohs(nla_get_be16(tb[NFTA_TUNNEL_KEY_DPORT]));
+	}
+
+	if (tb[NFTA_TUNNEL_KEY_FLAGS]) {
+		u32 tun_flags;
+
+		tun_flags = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_FLAGS]));
+		if (tun_flags & ~NFT_TUNNEL_F_MASK)
+			return -EOPNOTSUPP;
+
+		if (tun_flags & NFT_TUNNEL_F_ZERO_CSUM_TX)
+			info.key.tun_flags &= ~TUNNEL_CSUM;
+		if (tun_flags & NFT_TUNNEL_F_DONT_FRAGMENT)
+			info.key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+		if (tun_flags & NFT_TUNNEL_F_SEQ_NUMBER)
+			info.key.tun_flags |= TUNNEL_SEQ;
+	}
+	if (tb[NFTA_TUNNEL_KEY_TOS])
+		info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
+	if (tb[NFTA_TUNNEL_KEY_TTL])
+		info.key.ttl = nla_get_u8(tb[NFTA_TUNNEL_KEY_TTL]);
+	else
+		info.key.ttl = U8_MAX;
+
+	if (tb[NFTA_TUNNEL_KEY_OPTS]) {
+		err = nft_tunnel_obj_opts_init(ctx, tb[NFTA_TUNNEL_KEY_OPTS],
+					       &info, &priv->opts);
+		if (err < 0)
+			return err;
+	}
+
+	md = metadata_dst_alloc(priv->opts.len, METADATA_IP_TUNNEL, GFP_KERNEL);
+	if (!md)
+		return -ENOMEM;
+
+	memcpy(&md->u.tun_info, &info, sizeof(info));
+	ip_tunnel_info_opts_set(&md->u.tun_info, &priv->opts.u, priv->opts.len,
+				priv->opts.flags);
+	priv->md = md;
+
+	return 0;
+}
+
+static inline void nft_tunnel_obj_eval(struct nft_object *obj,
+				       struct nft_regs *regs,
+				       const struct nft_pktinfo *pkt)
+{
+	struct nft_tunnel_obj *priv = nft_obj_data(obj);
+	struct sk_buff *skb = pkt->skb;
+
+	skb_dst_drop(skb);
+	dst_hold((struct dst_entry *) priv->md);
+	skb_dst_set(skb, (struct dst_entry *) priv->md);
+}
+
+static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info)
+{
+	struct nlattr *nest;
+
+	if (info->mode & IP_TUNNEL_INFO_IPV6) {
+		nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_IP6);
+		if (!nest)
+			return -1;
+
+		if (nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_SRC, &info->key.u.ipv6.src) < 0 ||
+		    nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_DST, &info->key.u.ipv6.dst) < 0 ||
+		    nla_put_be32(skb, NFTA_TUNNEL_KEY_IP6_FLOWLABEL, info->key.label))
+			return -1;
+
+		nla_nest_end(skb, nest);
+	} else {
+		nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_IP);
+		if (!nest)
+			return -1;
+
+		if (nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_SRC, info->key.u.ipv4.src) < 0 ||
+		    nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_DST, info->key.u.ipv4.dst) < 0)
+			return -1;
+
+		nla_nest_end(skb, nest);
+	}
+
+	return 0;
+}
+
+static int nft_tunnel_opts_dump(struct sk_buff *skb,
+				struct nft_tunnel_obj *priv)
+{
+	struct nft_tunnel_opts *opts = &priv->opts;
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, NFTA_TUNNEL_KEY_OPTS);
+	if (!nest)
+		return -1;
+
+	if (opts->flags & TUNNEL_VXLAN_OPT) {
+		if (nla_put_be32(skb, NFTA_TUNNEL_KEY_VXLAN_GBP,
+				 htonl(opts->u.vxlan.gbp)))
+			return -1;
+	} else if (opts->flags & TUNNEL_ERSPAN_OPT) {
+		switch (opts->u.erspan.version) {
+		case ERSPAN_VERSION:
+			if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX,
+					 opts->u.erspan.u.index))
+				return -1;
+			break;
+		case ERSPAN_VERSION2:
+			if (nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_HWID,
+				       get_hwid(&opts->u.erspan.u.md2)) ||
+			    nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_DIR,
+				       opts->u.erspan.u.md2.dir))
+				return -1;
+			break;
+		}
+	}
+	nla_nest_end(skb, nest);
+
+	return 0;
+}
+
+static int nft_tunnel_ports_dump(struct sk_buff *skb,
+				 struct ip_tunnel_info *info)
+{
+	if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, htons(info->key.tp_src)) < 0 ||
+	    nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, htons(info->key.tp_dst)) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int nft_tunnel_flags_dump(struct sk_buff *skb,
+				 struct ip_tunnel_info *info)
+{
+	u32 flags = 0;
+
+	if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+		flags |= NFT_TUNNEL_F_DONT_FRAGMENT;
+	if (!(info->key.tun_flags & TUNNEL_CSUM))
+		flags |= NFT_TUNNEL_F_ZERO_CSUM_TX;
+	if (info->key.tun_flags & TUNNEL_SEQ)
+		flags |= NFT_TUNNEL_F_SEQ_NUMBER;
+
+	if (nla_put_be32(skb, NFTA_TUNNEL_KEY_FLAGS, htonl(flags)) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int nft_tunnel_obj_dump(struct sk_buff *skb,
+			       struct nft_object *obj, bool reset)
+{
+	struct nft_tunnel_obj *priv = nft_obj_data(obj);
+	struct ip_tunnel_info *info = &priv->md->u.tun_info;
+
+	if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ID,
+			 tunnel_id_to_key32(info->key.tun_id)) ||
+	    nft_tunnel_ip_dump(skb, info) < 0 ||
+	    nft_tunnel_ports_dump(skb, info) < 0 ||
+	    nft_tunnel_flags_dump(skb, info) < 0 ||
+	    nla_put_u8(skb, NFTA_TUNNEL_KEY_TOS, info->key.tos) ||
+	    nla_put_u8(skb, NFTA_TUNNEL_KEY_TTL, info->key.ttl) ||
+	    nft_tunnel_opts_dump(skb, priv) < 0)
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static void nft_tunnel_obj_destroy(const struct nft_ctx *ctx,
+				   struct nft_object *obj)
+{
+	struct nft_tunnel_obj *priv = nft_obj_data(obj);
+
+	metadata_dst_free(priv->md);
+}
+
+static struct nft_object_type nft_tunnel_obj_type;
+static const struct nft_object_ops nft_tunnel_obj_ops = {
+	.type		= &nft_tunnel_obj_type,
+	.size		= sizeof(struct nft_tunnel_obj),
+	.eval		= nft_tunnel_obj_eval,
+	.init		= nft_tunnel_obj_init,
+	.destroy	= nft_tunnel_obj_destroy,
+	.dump		= nft_tunnel_obj_dump,
+};
+
+static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_TUNNEL,
+	.ops		= &nft_tunnel_obj_ops,
+	.maxattr	= NFTA_TUNNEL_KEY_MAX,
+	.policy		= nft_tunnel_key_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_tunnel_module_init(void)
+{
+	return nft_register_obj(&nft_tunnel_obj_type);
+}
+
+static void __exit nft_tunnel_module_exit(void)
+{
+	nft_unregister_obj(&nft_tunnel_obj_type);
+}
+
+module_init(nft_tunnel_module_init);
+module_exit(nft_tunnel_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_TUNNEL);
-- 
cgit v1.2.3


From aaecfdb5c5dd8bac2dfd112166844a9f2d5711f0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 2 Aug 2018 20:51:46 +0200
Subject: netfilter: nf_tables: match on tunnel metadata

This patch allows us to match on the tunnel metadata that is available
of the packet. We can use this to validate if the packet comes from/goes
to tunnel and the corresponding tunnel ID.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  15 +++++
 net/netfilter/nft_tunnel.c               | 112 ++++++++++++++++++++++++++++++-
 2 files changed, 126 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 3ee1198eeac1..357862d948de 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1647,4 +1647,19 @@ enum nft_tunnel_key_attributes {
 };
 #define NFTA_TUNNEL_KEY_MAX	(__NFTA_TUNNEL_KEY_MAX - 1)
 
+enum nft_tunnel_keys {
+	NFT_TUNNEL_PATH,
+	NFT_TUNNEL_ID,
+	__NFT_TUNNEL_MAX
+};
+#define NFT_TUNNEL_MAX	(__NFT_TUNNEL_MAX - 1)
+
+enum nft_tunnel_attributes {
+	NFTA_TUNNEL_UNSPEC,
+	NFTA_TUNNEL_KEY,
+	NFTA_TUNNEL_DREG,
+	__NFTA_TUNNEL_MAX
+};
+#define NFTA_TUNNEL_MAX	(__NFTA_TUNNEL_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 715613d99c20..9332d7933dd5 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -12,6 +12,104 @@
 #include <net/vxlan.h>
 #include <net/erspan.h>
 
+struct nft_tunnel {
+	enum nft_tunnel_keys	key:8;
+	enum nft_registers	dreg:8;
+};
+
+static void nft_tunnel_get_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	const struct nft_tunnel *priv = nft_expr_priv(expr);
+	u32 *dest = &regs->data[priv->dreg];
+	struct ip_tunnel_info *tun_info;
+
+	tun_info = skb_tunnel_info(pkt->skb);
+
+	switch (priv->key) {
+	case NFT_TUNNEL_PATH:
+		nft_reg_store8(dest, !!tun_info);
+		break;
+	case NFT_TUNNEL_ID:
+		if (!tun_info) {
+			regs->verdict.code = NFT_BREAK;
+			return;
+		}
+		*dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+		break;
+	default:
+		WARN_ON(1);
+		regs->verdict.code = NFT_BREAK;
+	}
+}
+
+static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = {
+	[NFTA_TUNNEL_KEY]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_DREG]	= { .type = NLA_U32 },
+};
+
+static int nft_tunnel_get_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_tunnel *priv = nft_expr_priv(expr);
+	u32 len;
+
+	if (!tb[NFTA_TUNNEL_KEY] &&
+	    !tb[NFTA_TUNNEL_DREG])
+		return -EINVAL;
+
+	priv->key = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY]));
+	switch (priv->key) {
+	case NFT_TUNNEL_PATH:
+		len = sizeof(u8);
+		break;
+	case NFT_TUNNEL_ID:
+		len = sizeof(u32);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
+
+	return nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, len);
+}
+
+static int nft_tunnel_get_dump(struct sk_buff *skb,
+			       const struct nft_expr *expr)
+{
+	const struct nft_tunnel *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_TUNNEL_KEY, htonl(priv->key)))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_tunnel_type;
+static const struct nft_expr_ops nft_tunnel_get_ops = {
+	.type		= &nft_tunnel_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_tunnel)),
+	.eval		= nft_tunnel_get_eval,
+	.init		= nft_tunnel_get_init,
+	.dump		= nft_tunnel_get_dump,
+};
+
+static struct nft_expr_type nft_tunnel_type __read_mostly = {
+	.name		= "tunnel",
+	.ops		= &nft_tunnel_get_ops,
+	.policy		= nft_tunnel_policy,
+	.maxattr	= NFTA_TUNNEL_MAX,
+	.owner		= THIS_MODULE,
+};
+
 struct nft_tunnel_opts {
 	union {
 		struct vxlan_metadata	vxlan;
@@ -442,12 +540,23 @@ static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
 
 static int __init nft_tunnel_module_init(void)
 {
-	return nft_register_obj(&nft_tunnel_obj_type);
+	int err;
+
+	err = nft_register_expr(&nft_tunnel_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_obj(&nft_tunnel_obj_type);
+	if (err < 0)
+		nft_unregister_expr(&nft_tunnel_type);
+
+	return err;
 }
 
 static void __exit nft_tunnel_module_exit(void)
 {
 	nft_unregister_obj(&nft_tunnel_obj_type);
+	nft_unregister_expr(&nft_tunnel_type);
 }
 
 module_init(nft_tunnel_module_init);
@@ -455,4 +564,5 @@ module_exit(nft_tunnel_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("tunnel");
 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_TUNNEL);
-- 
cgit v1.2.3


From 94276fa8a2a4c08ccb2e9d55e88b95dc972ccea3 Mon Sep 17 00:00:00 2001
From: Máté Eckl <ecklm94@gmail.com>
Date: Fri, 3 Aug 2018 13:36:13 +0200
Subject: netfilter: bridge: Expose nf_tables bridge hook priorities through
 uapi
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Netfilter exposes standard hook priorities in case of ipv4, ipv6 and
arp but not in case of bridge.

This patch exposes the hook priority values of the bridge family (which are
different from the formerly mentioned) via uapi so that they can be used by
user-space applications just like the others.

Signed-off-by: Máté Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h      | 11 -----------
 include/uapi/linux/netfilter_bridge.h | 11 +++++++++++
 net/bridge/br_netfilter_hooks.c       |  1 +
 net/bridge/netfilter/ebtable_filter.c |  1 +
 net/bridge/netfilter/ebtable_nat.c    |  1 +
 5 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index b671fdfd212b..fa0686500970 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -5,17 +5,6 @@
 #include <uapi/linux/netfilter_bridge.h>
 #include <linux/skbuff.h>
 
-enum nf_br_hook_priorities {
-	NF_BR_PRI_FIRST = INT_MIN,
-	NF_BR_PRI_NAT_DST_BRIDGED = -300,
-	NF_BR_PRI_FILTER_BRIDGED = -200,
-	NF_BR_PRI_BRNF = 0,
-	NF_BR_PRI_NAT_DST_OTHER = 100,
-	NF_BR_PRI_FILTER_OTHER = 200,
-	NF_BR_PRI_NAT_SRC = 300,
-	NF_BR_PRI_LAST = INT_MAX,
-};
-
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h
index 12fb77633f83..156ccd089df1 100644
--- a/include/uapi/linux/netfilter_bridge.h
+++ b/include/uapi/linux/netfilter_bridge.h
@@ -26,4 +26,15 @@
 #define NF_BR_BROUTING		5
 #define NF_BR_NUMHOOKS		6
 
+enum nf_br_hook_priorities {
+	NF_BR_PRI_FIRST = INT_MIN,
+	NF_BR_PRI_NAT_DST_BRIDGED = -300,
+	NF_BR_PRI_FILTER_BRIDGED = -200,
+	NF_BR_PRI_BRNF = 0,
+	NF_BR_PRI_NAT_DST_OTHER = 100,
+	NF_BR_PRI_FILTER_OTHER = 200,
+	NF_BR_PRI_NAT_SRC = 300,
+	NF_BR_PRI_LAST = INT_MAX,
+};
+
 #endif /* _UAPI__LINUX_BRIDGE_NETFILTER_H */
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 9b16eaf33819..6e0dc6bcd32a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -26,6 +26,7 @@
 #include <linux/if_pppox.h>
 #include <linux/ppp_defs.h>
 #include <linux/netfilter_bridge.h>
+#include <uapi/linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_arp.h>
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index c41da5fac84f..550324c516ee 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/netfilter_bridge/ebtables.h>
+#include <uapi/linux/netfilter_bridge.h>
 #include <linux/module.h>
 
 #define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 08df7406ecb3..c0fb3ca518af 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/netfilter_bridge/ebtables.h>
+#include <uapi/linux/netfilter_bridge.h>
 #include <linux/module.h>
 
 #define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \
-- 
cgit v1.2.3


From eb9950eb31f56e57582a61c92073336d04a26542 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 3 Aug 2018 17:06:56 +0100
Subject: rxrpc: Push iov_iter up from rxrpc_kernel_recv_data() to caller

Push iov_iter up from rxrpc_kernel_recv_data() to its caller to allow
non-contiguous iovs to be passed down, thereby permitting file reading to
be simplified in the AFS filesystem in a future patch.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/rxrpc.c         | 28 +++++++++++++++++-----------
 include/net/af_rxrpc.h |  2 +-
 net/rxrpc/recvmsg.c    | 33 +++++++++++----------------------
 3 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a1b18082991b..19db5f672a9d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -346,7 +346,6 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
-	size_t offset;
 	s64 tx_total_len;
 	int ret;
 
@@ -433,10 +432,10 @@ error_do_abort:
 		rxrpc_kernel_abort_call(call->net->socket, rxcall,
 					RX_USER_ABORT, ret, "KSD");
 	} else {
-		offset = 0;
-		rxrpc_kernel_recv_data(call->net->socket, rxcall, NULL,
-				       0, &offset, false, &call->abort_code,
-				       &call->service_id);
+		iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0);
+		rxrpc_kernel_recv_data(call->net->socket, rxcall,
+				       &msg.msg_iter, false,
+				       &call->abort_code, &call->service_id);
 		ac->abort_code = call->abort_code;
 		ac->responded = true;
 	}
@@ -467,13 +466,14 @@ static void afs_deliver_to_call(struct afs_call *call)
 	       state == AFS_CALL_SV_AWAIT_ACK
 	       ) {
 		if (state == AFS_CALL_SV_AWAIT_ACK) {
-			size_t offset = 0;
+			struct iov_iter iter;
+
+			iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0);
 			ret = rxrpc_kernel_recv_data(call->net->socket,
-						     call->rxcall,
-						     NULL, 0, &offset, false,
+						     call->rxcall, &iter, false,
 						     &remote_abort,
 						     &call->service_id);
-			trace_afs_recv_data(call, 0, offset, false, ret);
+			trace_afs_recv_data(call, 0, 0, false, ret);
 
 			if (ret == -EINPROGRESS || ret == -EAGAIN)
 				return;
@@ -894,6 +894,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 		     bool want_more)
 {
 	struct afs_net *net = call->net;
+	struct iov_iter iter;
+	struct kvec iov;
 	enum afs_call_state state;
 	u32 remote_abort = 0;
 	int ret;
@@ -903,10 +905,14 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 
 	ASSERTCMP(call->offset, <=, count);
 
-	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall,
-				     buf, count, &call->offset,
+	iov.iov_base = buf + call->offset;
+	iov.iov_len = count - call->offset;
+	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset);
+
+	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
 				     want_more, &remote_abort,
 				     &call->service_id);
+	call->offset += (count - call->offset) - iov_iter_count(&iter);
 	trace_afs_recv_data(call, count, call->offset, want_more, ret);
 	if (ret == 0 || ret == -EAGAIN)
 		return ret;
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 8ae8ee004258..f53edb3754bc 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -61,7 +61,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
 			   struct msghdr *, size_t,
 			   rxrpc_notify_end_tx_t);
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
-			   void *, size_t, size_t *, bool, u32 *, u16 *);
+			   struct iov_iter *, bool, u32 *, u16 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
 			     u32, int, const char *);
 void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index a57ea96c84ea..816b19a78809 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -611,9 +611,7 @@ wait_error:
  * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
  * @sock: The socket that the call exists on
  * @call: The call to send data through
- * @buf: The buffer to receive into
- * @size: The size of the buffer, including data already read
- * @_offset: The running offset into the buffer.
+ * @iter: The buffer to receive into
  * @want_more: True if more data is expected to be read
  * @_abort: Where the abort code is stored if -ECONNABORTED is returned
  * @_service: Where to store the actual service ID (may be upgraded)
@@ -626,39 +624,30 @@ wait_error:
  * Note that we may return -EAGAIN to drain empty packets at the end of the
  * data, even if we've already copied over the requested data.
  *
- * This function adds the amount it transfers to *_offset, so this should be
- * precleared as appropriate.  Note that the amount remaining in the buffer is
- * taken to be size - *_offset.
- *
  * *_abort should also be initialised to 0.
  */
 int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
-			   void *buf, size_t size, size_t *_offset,
+			   struct iov_iter *iter,
 			   bool want_more, u32 *_abort, u16 *_service)
 {
-	struct iov_iter iter;
-	struct kvec iov;
+	size_t offset = 0;
 	int ret;
 
-	_enter("{%d,%s},%zu/%zu,%d",
+	_enter("{%d,%s},%zu,%d",
 	       call->debug_id, rxrpc_call_states[call->state],
-	       *_offset, size, want_more);
+	       iov_iter_count(iter), want_more);
 
-	ASSERTCMP(*_offset, <=, size);
 	ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING);
 
-	iov.iov_base = buf + *_offset;
-	iov.iov_len = size - *_offset;
-	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
-
 	mutex_lock(&call->user_mutex);
 
 	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_CLIENT_RECV_REPLY:
 	case RXRPC_CALL_SERVER_RECV_REQUEST:
 	case RXRPC_CALL_SERVER_ACK_REQUEST:
-		ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0,
-					 _offset);
+		ret = rxrpc_recvmsg_data(sock, call, NULL, iter,
+					 iov_iter_count(iter), 0,
+					 &offset);
 		if (ret < 0)
 			goto out;
 
@@ -667,7 +656,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
 		 * full buffer or have been given -EAGAIN.
 		 */
 		if (ret == 1) {
-			if (*_offset < size)
+			if (iov_iter_count(iter) > 0)
 				goto short_data;
 			if (!want_more)
 				goto read_phase_complete;
@@ -704,7 +693,7 @@ out:
 	if (_service)
 		*_service = call->service_id;
 	mutex_unlock(&call->user_mutex);
-	_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
+	_leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort);
 	return ret;
 
 short_data:
@@ -720,7 +709,7 @@ call_complete:
 	ret = call->error;
 	if (call->completion == RXRPC_CALL_SUCCEEDED) {
 		ret = 1;
-		if (size > 0)
+		if (iov_iter_count(iter) > 0)
 			ret = -ECONNRESET;
 	}
 	goto out;
-- 
cgit v1.2.3


From c2b6d621c4ffe9936adf7a55c8b1c769672c306f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 28 Jun 2018 15:53:17 -0400
Subject: new primitive: discard_new_inode()

	We don't want open-by-handle picking half-set-up in-core
struct inode from e.g. mkdir() having failed halfway through.
In other words, we don't want such inodes returned by iget_locked()
on their way to extinction.  However, we can't just have them
unhashed - otherwise open-by-handle immediately *after* that would've
ended up creating a new in-core inode over the on-disk one that
is in process of being freed right under us.

	Solution: new flag (I_CREATING) set by insert_inode_locked() and
removed by unlock_new_inode() and a new primitive (discard_new_inode())
to be used by such halfway-through-setup failure exits instead of
unlock_new_inode() / iput() combinations.  That primitive unlocks new
inode, but leaves I_CREATING in place.

	iget_locked() treats finding an I_CREATING inode as failure
(-ESTALE, once we sort out the error propagation).
	insert_inode_locked() treats the same as instant -EBUSY.
	ilookup() treats those as icache miss.

[Fix by Dan Carpenter <dan.carpenter@oracle.com> folded in]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c        |  2 +-
 fs/inode.c         | 45 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/fs.h |  6 +++++-
 3 files changed, 47 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index a7d9e7a4c283..11b753d29409 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1892,7 +1892,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
 	spin_lock(&inode->i_lock);
 	__d_instantiate(entry, inode);
 	WARN_ON(!(inode->i_state & I_NEW));
-	inode->i_state &= ~I_NEW;
+	inode->i_state &= ~I_NEW & ~I_CREATING;
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_NEW);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..6cd2e7ba9f4d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -804,6 +804,10 @@ repeat:
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
+		if (unlikely(inode->i_state & I_CREATING)) {
+			spin_unlock(&inode->i_lock);
+			return ERR_PTR(-ESTALE);
+		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
 		return inode;
@@ -831,6 +835,10 @@ repeat:
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
+		if (unlikely(inode->i_state & I_CREATING)) {
+			spin_unlock(&inode->i_lock);
+			return ERR_PTR(-ESTALE);
+		}
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
 		return inode;
@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode)
 	lockdep_annotate_inode_mutex_key(inode);
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_NEW));
-	inode->i_state &= ~I_NEW;
+	inode->i_state &= ~I_NEW & ~I_CREATING;
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_NEW);
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(unlock_new_inode);
 
+void discard_new_inode(struct inode *inode)
+{
+	lockdep_annotate_inode_mutex_key(inode);
+	spin_lock(&inode->i_lock);
+	WARN_ON(!(inode->i_state & I_NEW));
+	inode->i_state &= ~I_NEW;
+	smp_mb();
+	wake_up_bit(&inode->i_state, __I_NEW);
+	spin_unlock(&inode->i_lock);
+	iput(inode);
+}
+EXPORT_SYMBOL(discard_new_inode);
+
 /**
  * lock_two_nondirectories - take two i_mutexes on non-directory objects
  *
@@ -1039,6 +1060,8 @@ again:
 		 * Use the old inode instead of the preallocated one.
 		 */
 		spin_unlock(&inode_hash_lock);
+		if (IS_ERR(old))
+			return NULL;
 		wait_on_inode(old);
 		if (unlikely(inode_unhashed(old))) {
 			iput(old);
@@ -1128,6 +1151,8 @@ again:
 	inode = find_inode_fast(sb, head, ino);
 	spin_unlock(&inode_hash_lock);
 	if (inode) {
+		if (IS_ERR(inode))
+			return NULL;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
 			iput(inode);
@@ -1165,6 +1190,8 @@ again:
 		 */
 		spin_unlock(&inode_hash_lock);
 		destroy_inode(inode);
+		if (IS_ERR(old))
+			return NULL;
 		inode = old;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
@@ -1282,7 +1309,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
 	inode = find_inode(sb, head, test, data);
 	spin_unlock(&inode_hash_lock);
 
-	return inode;
+	return IS_ERR(inode) ? NULL : inode;
 }
 EXPORT_SYMBOL(ilookup5_nowait);
 
@@ -1338,6 +1365,8 @@ again:
 	spin_unlock(&inode_hash_lock);
 
 	if (inode) {
+		if (IS_ERR(inode))
+			return NULL;
 		wait_on_inode(inode);
 		if (unlikely(inode_unhashed(inode))) {
 			iput(inode);
@@ -1421,12 +1450,17 @@ int insert_inode_locked(struct inode *inode)
 		}
 		if (likely(!old)) {
 			spin_lock(&inode->i_lock);
-			inode->i_state |= I_NEW;
+			inode->i_state |= I_NEW | I_CREATING;
 			hlist_add_head(&inode->i_hash, head);
 			spin_unlock(&inode->i_lock);
 			spin_unlock(&inode_hash_lock);
 			return 0;
 		}
+		if (unlikely(old->i_state & I_CREATING)) {
+			spin_unlock(&old->i_lock);
+			spin_unlock(&inode_hash_lock);
+			return -EBUSY;
+		}
 		__iget(old);
 		spin_unlock(&old->i_lock);
 		spin_unlock(&inode_hash_lock);
@@ -1443,7 +1477,10 @@ EXPORT_SYMBOL(insert_inode_locked);
 int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
-	struct inode *old = inode_insert5(inode, hashval, test, NULL, data);
+	struct inode *old;
+
+	inode->i_state |= I_CREATING;
+	old = inode_insert5(inode, hashval, test, NULL, data);
 
 	if (old != inode) {
 		iput(old);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5c91108846db..a42600565925 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
  *			and work dirs among overlayfs mounts.
  *
+ * I_CREATING		New object's inode in the middle of setting up.
+ *
  * Q: What is the difference between I_WILL_FREE and I_FREEING?
  */
 #define I_DIRTY_SYNC		(1 << 0)
@@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 #define __I_DIRTY_TIME_EXPIRED	12
 #define I_DIRTY_TIME_EXPIRED	(1 << __I_DIRTY_TIME_EXPIRED)
 #define I_WB_SWITCH		(1 << 13)
-#define I_OVL_INUSE			(1 << 14)
+#define I_OVL_INUSE		(1 << 14)
+#define I_CREATING		(1 << 15)
 
 #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
 #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
@@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
 static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
 #endif
 extern void unlock_new_inode(struct inode *);
+extern void discard_new_inode(struct inode *);
 extern unsigned int get_next_ino(void);
 extern void evict_inodes(struct super_block *sb);
 
-- 
cgit v1.2.3


From e90561d40f830f1266d9531ae95eae8252dd8fa1 Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Date: Fri, 3 Aug 2018 07:37:29 -0400
Subject: media: vsp1: Support Interlaced display pipelines

Calculate the top and bottom fields for the interlaced frames and
utilise the extended display list command feature to implement the
auto-field operations. This allows the DU to update the VSP2 registers
dynamically based upon the currently processing field.

Signed-off-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/vsp1/vsp1_dl.c   | 10 +++++
 drivers/media/platform/vsp1/vsp1_drm.c  |  6 ++-
 drivers/media/platform/vsp1/vsp1_pipe.h |  2 +
 drivers/media/platform/vsp1/vsp1_regs.h |  3 ++
 drivers/media/platform/vsp1/vsp1_rpf.c  | 72 +++++++++++++++++++++++++++++++--
 include/media/vsp1.h                    |  2 +
 6 files changed, 90 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/vsp1/vsp1_dl.c b/drivers/media/platform/vsp1/vsp1_dl.c
index 6021c43c53b4..9255b5ee2cb8 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.c
+++ b/drivers/media/platform/vsp1/vsp1_dl.c
@@ -947,6 +947,8 @@ void vsp1_dl_list_commit(struct vsp1_dl_list *dl, bool internal)
  */
 unsigned int vsp1_dlm_irq_frame_end(struct vsp1_dl_manager *dlm)
 {
+	struct vsp1_device *vsp1 = dlm->vsp1;
+	u32 status = vsp1_read(vsp1, VI6_STATUS);
 	unsigned int flags = 0;
 
 	spin_lock(&dlm->lock);
@@ -971,6 +973,14 @@ unsigned int vsp1_dlm_irq_frame_end(struct vsp1_dl_manager *dlm)
 	if (vsp1_dl_list_hw_update_pending(dlm))
 		goto done;
 
+	/*
+	 * Progressive streams report only TOP fields. If we have a BOTTOM
+	 * field, we are interlaced, and expect the frame to complete on the
+	 * next frame end interrupt.
+	 */
+	if (status & VI6_STATUS_FLD_STD(dlm->index))
+		goto done;
+
 	/*
 	 * The device starts processing the queued display list right after the
 	 * frame end interrupt. The display list thus becomes active.
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index a16856856789..87dc3ddbe2d3 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -670,9 +670,11 @@ int vsp1_du_setup_lif(struct device *dev, unsigned int pipe_index,
 
 	drm_pipe->width = cfg->width;
 	drm_pipe->height = cfg->height;
+	pipe->interlaced = cfg->interlaced;
 
-	dev_dbg(vsp1->dev, "%s: configuring LIF%u with format %ux%u\n",
-		__func__, pipe_index, cfg->width, cfg->height);
+	dev_dbg(vsp1->dev, "%s: configuring LIF%u with format %ux%u%s\n",
+		__func__, pipe_index, cfg->width, cfg->height,
+		pipe->interlaced ? "i" : "");
 
 	mutex_lock(&vsp1->drm->lock);
 
diff --git a/drivers/media/platform/vsp1/vsp1_pipe.h b/drivers/media/platform/vsp1/vsp1_pipe.h
index 743d8f0db45c..ae646c9ef337 100644
--- a/drivers/media/platform/vsp1/vsp1_pipe.h
+++ b/drivers/media/platform/vsp1/vsp1_pipe.h
@@ -104,6 +104,7 @@ struct vsp1_partition {
  * @entities: list of entities in the pipeline
  * @stream_config: cached stream configuration for video pipelines
  * @configured: when false the @stream_config shall be written to the hardware
+ * @interlaced: True when the pipeline is configured in interlaced mode
  * @partitions: The number of partitions used to process one frame
  * @partition: The current partition for configuration to process
  * @part_table: The pre-calculated partitions used by the pipeline
@@ -142,6 +143,7 @@ struct vsp1_pipeline {
 
 	struct vsp1_dl_body *stream_config;
 	bool configured;
+	bool interlaced;
 
 	unsigned int partitions;
 	struct vsp1_partition *partition;
diff --git a/drivers/media/platform/vsp1/vsp1_regs.h b/drivers/media/platform/vsp1/vsp1_regs.h
index 5ea9f9070cf3..3738ff2f7b85 100644
--- a/drivers/media/platform/vsp1/vsp1_regs.h
+++ b/drivers/media/platform/vsp1/vsp1_regs.h
@@ -28,6 +28,7 @@
 #define VI6_SRESET_SRTS(n)		(1 << (n))
 
 #define VI6_STATUS			0x0038
+#define VI6_STATUS_FLD_STD(n)		(1 << ((n) + 28))
 #define VI6_STATUS_SYS_ACT(n)		(1 << ((n) + 8))
 
 #define VI6_WPF_IRQ_ENB(n)		(0x0048 + (n) * 12)
@@ -80,6 +81,8 @@
 #define VI6_DL_EXT_CTRL_EXPRI		(1 << 4)
 #define VI6_DL_EXT_CTRL_EXT		(1 << 0)
 
+#define VI6_DL_EXT_AUTOFLD_INT		BIT(0)
+
 #define VI6_DL_BODY_SIZE		0x0120
 #define VI6_DL_BODY_SIZE_UPD		(1 << 24)
 #define VI6_DL_BODY_SIZE_BS_MASK	(0x1ffff << 0)
diff --git a/drivers/media/platform/vsp1/vsp1_rpf.c b/drivers/media/platform/vsp1/vsp1_rpf.c
index 69e5fe6e6b50..f8005b60b9d2 100644
--- a/drivers/media/platform/vsp1/vsp1_rpf.c
+++ b/drivers/media/platform/vsp1/vsp1_rpf.c
@@ -20,6 +20,18 @@
 #define RPF_MAX_WIDTH				8190
 #define RPF_MAX_HEIGHT				8190
 
+/* Pre extended display list command data structure. */
+struct vsp1_extcmd_auto_fld_body {
+	u32 top_y0;
+	u32 bottom_y0;
+	u32 top_c0;
+	u32 bottom_c0;
+	u32 top_c1;
+	u32 bottom_c1;
+	u32 reserved0;
+	u32 reserved1;
+} __packed;
+
 /* -----------------------------------------------------------------------------
  * Device Access
  */
@@ -64,6 +76,14 @@ static void rpf_configure_stream(struct vsp1_entity *entity,
 		pstride |= format->plane_fmt[1].bytesperline
 			<< VI6_RPF_SRCM_PSTRIDE_C_SHIFT;
 
+	/*
+	 * pstride has both STRIDE_Y and STRIDE_C, but multiplying the whole
+	 * of pstride by 2 is conveniently OK here as we are multiplying both
+	 * values.
+	 */
+	if (pipe->interlaced)
+		pstride *= 2;
+
 	vsp1_rpf_write(rpf, dlb, VI6_RPF_SRCM_PSTRIDE, pstride);
 
 	/* Format */
@@ -100,6 +120,9 @@ static void rpf_configure_stream(struct vsp1_entity *entity,
 		top = compose->top;
 	}
 
+	if (pipe->interlaced)
+		top /= 2;
+
 	vsp1_rpf_write(rpf, dlb, VI6_RPF_LOC,
 		       (left << VI6_RPF_LOC_HCOORD_SHIFT) |
 		       (top << VI6_RPF_LOC_VCOORD_SHIFT));
@@ -169,6 +192,36 @@ static void rpf_configure_stream(struct vsp1_entity *entity,
 
 }
 
+static void vsp1_rpf_configure_autofld(struct vsp1_rwpf *rpf,
+				       struct vsp1_dl_list *dl)
+{
+	const struct v4l2_pix_format_mplane *format = &rpf->format;
+	struct vsp1_dl_ext_cmd *cmd;
+	struct vsp1_extcmd_auto_fld_body *auto_fld;
+	u32 offset_y, offset_c;
+
+	cmd = vsp1_dl_get_pre_cmd(dl);
+	if (WARN_ONCE(!cmd, "Failed to obtain an autofld cmd"))
+		return;
+
+	/* Re-index our auto_fld to match the current RPF. */
+	auto_fld = cmd->data;
+	auto_fld = &auto_fld[rpf->entity.index];
+
+	auto_fld->top_y0 = rpf->mem.addr[0];
+	auto_fld->top_c0 = rpf->mem.addr[1];
+	auto_fld->top_c1 = rpf->mem.addr[2];
+
+	offset_y = format->plane_fmt[0].bytesperline;
+	offset_c = format->plane_fmt[1].bytesperline;
+
+	auto_fld->bottom_y0 = rpf->mem.addr[0] + offset_y;
+	auto_fld->bottom_c0 = rpf->mem.addr[1] + offset_c;
+	auto_fld->bottom_c1 = rpf->mem.addr[2] + offset_c;
+
+	cmd->flags |= VI6_DL_EXT_AUTOFLD_INT | BIT(16 + rpf->entity.index);
+}
+
 static void rpf_configure_frame(struct vsp1_entity *entity,
 				struct vsp1_pipeline *pipe,
 				struct vsp1_dl_list *dl,
@@ -221,6 +274,11 @@ static void rpf_configure_partition(struct vsp1_entity *entity,
 		crop.left += pipe->partition->rpf.left;
 	}
 
+	if (pipe->interlaced) {
+		crop.height = round_down(crop.height / 2, fmtinfo->vsub);
+		crop.top = round_down(crop.top / 2, fmtinfo->vsub);
+	}
+
 	vsp1_rpf_write(rpf, dlb, VI6_RPF_SRC_BSIZE,
 		       (crop.width << VI6_RPF_SRC_BSIZE_BHSIZE_SHIFT) |
 		       (crop.height << VI6_RPF_SRC_BSIZE_BVSIZE_SHIFT));
@@ -249,9 +307,17 @@ static void rpf_configure_partition(struct vsp1_entity *entity,
 	    fmtinfo->swap_uv)
 		swap(mem.addr[1], mem.addr[2]);
 
-	vsp1_rpf_write(rpf, dlb, VI6_RPF_SRCM_ADDR_Y, mem.addr[0]);
-	vsp1_rpf_write(rpf, dlb, VI6_RPF_SRCM_ADDR_C0, mem.addr[1]);
-	vsp1_rpf_write(rpf, dlb, VI6_RPF_SRCM_ADDR_C1, mem.addr[2]);
+	/*
+	 * Interlaced pipelines will use the extended pre-cmd to process
+	 * SRCM_ADDR_{Y,C0,C1}
+	 */
+	if (pipe->interlaced) {
+		vsp1_rpf_configure_autofld(rpf, dl);
+	} else {
+		vsp1_rpf_write(rpf, dlb, VI6_RPF_SRCM_ADDR_Y, mem.addr[0]);
+		vsp1_rpf_write(rpf, dlb, VI6_RPF_SRCM_ADDR_C0, mem.addr[1]);
+		vsp1_rpf_write(rpf, dlb, VI6_RPF_SRCM_ADDR_C1, mem.addr[2]);
+	}
 }
 
 static void rpf_partition(struct vsp1_entity *entity,
diff --git a/include/media/vsp1.h b/include/media/vsp1.h
index 678c24de1ac6..3093b9cb9067 100644
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -25,6 +25,7 @@ int vsp1_du_init(struct device *dev);
  * struct vsp1_du_lif_config - VSP LIF configuration
  * @width: output frame width
  * @height: output frame height
+ * @interlaced: true for interlaced pipelines
  * @callback: frame completion callback function (optional). When a callback
  *	      is provided, the VSP driver guarantees that it will be called once
  *	      and only once for each vsp1_du_atomic_flush() call.
@@ -33,6 +34,7 @@ int vsp1_du_init(struct device *dev);
 struct vsp1_du_lif_config {
 	unsigned int width;
 	unsigned int height;
+	bool interlaced;
 
 	void (*callback)(void *data, bool completed, u32 crc);
 	void *callback_data;
-- 
cgit v1.2.3


From 5bef915104f32c9d0bb5df6e86a98e31cb524e9a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 29 Jun 2018 19:36:57 -0400
Subject: new helper: inode_fake_hash()

open-coded in a quite a few places...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/inode.c     |  2 +-
 fs/jfs/jfs_imap.c  |  8 +-------
 fs/jfs/super.c     |  2 +-
 fs/xfs/xfs_iops.c  |  2 +-
 include/linux/fs.h | 11 +++++++++++
 5 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2a16111d312f..a2dfa1b2a89c 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -541,7 +541,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
 	HFS_I(inode)->rsrc_inode = dir;
 	HFS_I(dir)->rsrc_inode = inode;
 	igrab(dir);
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 	mark_inode_dirty(inode);
 	dont_mount(dentry);
 out:
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f36ef68905a7..93e8c590ff5c 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -491,13 +491,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 	/* release the page */
 	release_metapage(mp);
 
-	/*
-	 * __mark_inode_dirty expects inodes to be hashed.  Since we don't
-	 * want special inodes in the fileset inode space, we make them
-	 * appear hashed, but do not put on any lists.  hlist_del()
-	 * will work fine and require no locking.
-	 */
-	hlist_add_fake(&ip->i_hash);
+	inode_fake_hash(ip);
 
 	return (ip);
 }
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 1b9264fd54b6..5403ece57dba 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -581,7 +581,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	inode->i_ino = 0;
 	inode->i_size = i_size_read(sb->s_bdev->bd_inode);
 	inode->i_mapping->a_ops = &jfs_metapage_aops;
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 
 	sbi->direct_inode = inode;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 0fa29f39d658..3a75de777843 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1253,7 +1253,7 @@ xfs_setup_inode(
 
 	inode_sb_list_add(inode);
 	/* make the inode look hashed for the writeback code */
-	hlist_add_fake(&inode->i_hash);
+	inode_fake_hash(inode);
 
 	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
 	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a42600565925..43941e230e2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -684,6 +684,17 @@ static inline int inode_unhashed(struct inode *inode)
 	return hlist_unhashed(&inode->i_hash);
 }
 
+/*
+ * __mark_inode_dirty expects inodes to be hashed.  Since we don't
+ * want special inodes in the fileset inode space, we make them
+ * appear hashed, but do not put on any lists.  hlist_del()
+ * will work fine and require no locking.
+ */
+static inline void inode_fake_hash(struct inode *inode)
+{
+	hlist_add_fake(&inode->i_hash);
+}
+
 /*
  * inode->i_mutex nesting subclasses for the lock validator:
  *
-- 
cgit v1.2.3


From 088fe47ce952542389e604e83f533811750aaf7c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 17:26:49 -0500
Subject: signal: Add calculate_sigpending()

Add a function calculate_sigpending to test to see if any signals are
pending for a new task immediately following fork.  Signals have to
happen either before or after fork.  Today our practice is to push
all of the signals to before the fork, but that has the downside that
frequent or periodic signals can make fork take much much longer than
normal or prevent fork from completing entirely.

So we need move signals that we can after the fork to prevent that.

This updates the code to set TIF_SIGPENDING on a new task if there
are signals or other activities that have moved so that they appear
to happen after the fork.

As the code today restarts if it sees any such activity this won't
immediately have an effect, as there will be no reason for it
to set TIF_SIGPENDING immediately after the fork.

Adding calculate_sigpending means the code in fork can safely be
changed to not always restart if a signal is pending.

The new calculate_sigpending function sets sigpending if there
are pending bits in jobctl, pending signals, the freezer needs
to freeze the new task or the live kernel patching framework
need the new thread to take the slow path to userspace.

I have verified that setting TIF_SIGPENDING does make a new process
take the slow path to userspace before it executes it's first userspace
instruction.

I have looked at the callers of signal_wake_up and the code paths
setting TIF_SIGPENDING and I don't see anything else that needs to be
handled.  The code probably doesn't need to set TIF_SIGPENDING for the
kernel live patching as it uses a separate thread flag as well.  But
at this point it seems safer reuse the recalc_sigpending logic and get
the kernel live patching folks to sort out their story later.

V2: I have moved the test into schedule_tail where siglock can
    be grabbed and recalc_sigpending can be reused directly.
    Further as the last action of setting up a new task this
    guarantees that TIF_SIGPENDING will be properly set in the
    new process.

    The helper calculate_sigpending takes the siglock and
    uncontitionally sets TIF_SIGPENDING and let's recalc_sigpending
    clear TIF_SIGPENDING if it is unnecessary.  This allows reusing
    the existing code and keeps maintenance of the conditions simple.

    Oleg Nesterov <oleg@redhat.com>  suggested the movement
    and pointed out the need to take siglock if this code
    was going to be called while the new task is discoverable.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  1 +
 kernel/sched/core.c          |  2 ++
 kernel/signal.c              | 11 +++++++++++
 3 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 94558ffa82ab..b55fd293c1e5 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -372,6 +372,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
  */
 extern void recalc_sigpending_and_wake(struct task_struct *t);
 extern void recalc_sigpending(void);
+extern void calculate_sigpending(void);
 
 extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78d8facba456..3e4ed4b7aa2d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2813,6 +2813,8 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev)
 
 	if (current->set_child_tid)
 		put_user(task_pid_vnr(current), current->set_child_tid);
+
+	calculate_sigpending();
 }
 
 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index dddbea558455..1e06f1eba363 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -172,6 +172,17 @@ void recalc_sigpending(void)
 
 }
 
+void calculate_sigpending(void)
+{
+	/* Have any signals or users of TIF_SIGPENDING been delayed
+	 * until after fork?
+	 */
+	spin_lock_irq(&current->sighand->siglock);
+	set_tsk_thread_flag(current, TIF_SIGPENDING);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+}
+
 /* Given the mask, find the first available signal that should be serviced. */
 
 #define SYNCHRONOUS_MASK \
-- 
cgit v1.2.3


From 4390e9eadbbb6774b7ba03fde0a0fdf3f07db4cd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 3 Aug 2018 20:10:54 -0500
Subject: fork: Skip setting TIF_SIGPENDING in ptrace_init_task

The code in calculate_sigpending will now handle this so
it is just redundant and possibly a little confusing
to continue setting TIF_SIGPENDING in ptrace_init_task.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/ptrace.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 037bf0ef1ae9..4f36431c380b 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -214,8 +214,6 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
 			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
 		else
 			sigaddset(&child->pending.signal, SIGSTOP);
-
-		set_tsk_thread_flag(child, TIF_SIGPENDING);
 	}
 	else
 		child->ptracer_cred = NULL;
-- 
cgit v1.2.3


From 924de3b8c9410c404c6eda7abffd282b97b3ff7f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 13:38:00 -0500
Subject: fork: Have new threads join on-going signal group stops

There are only two signals that are delivered to every member of a
signal group: SIGSTOP and SIGKILL.  Signal delivery requires every
signal appear to be delivered either before or after a clone syscall.
SIGKILL terminates the clone so does not need to be considered.  Which
leaves only SIGSTOP that needs to be considered when creating new
threads.

Today in the event of a group stop TIF_SIGPENDING will get set and the
fork will restart ensuring the fork syscall participates in the group
stop.

A fork (especially of a process with a lot of memory) is one of the
most expensive system so we really only want to restart a fork when
necessary.

It is easy so check to see if a SIGSTOP is ongoing and have the new
thread join it immediate after the clone completes.  Making it appear
the clone completed happened just before the SIGSTOP.

The calculate_sigpending function will see the bits set in jobctl and
set TIF_SIGPENDING to ensure the new task takes the slow path to userspace.

V2: The call to task_join_group_stop was moved before the new task is
    added to the thread group list.  This should not matter as
    sighand->siglock is held over both the addition of the threads,
    the call to task_join_group_stop and do_signal_stop.  But the change
    is trivial and it is one less thing to worry about when reading
    the code.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  2 ++
 kernel/fork.c                | 27 +++++++++++++++------------
 kernel/signal.c              | 14 ++++++++++++++
 3 files changed, 31 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index b55fd293c1e5..ae2b0b81be25 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -385,6 +385,8 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
 	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
 }
 
+void task_join_group_stop(struct task_struct *task);
+
 #ifdef TIF_RESTORE_SIGMASK
 /*
  * Legacy restore_sigmask accessors.  These are inefficient on
diff --git a/kernel/fork.c b/kernel/fork.c
index 22d4cdb9a7ca..ab731e15a600 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1934,18 +1934,20 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cancel_cgroup;
 	}
 
-	/*
-	 * Process group and session signals need to be delivered to just the
-	 * parent before the fork or both the parent and the child after the
-	 * fork. Restart if a signal comes in before we add the new process to
-	 * it's process group.
-	 * A fatal signal pending means that current will exit, so the new
-	 * thread can't slip out of an OOM kill (or normal SIGKILL).
-	*/
-	recalc_sigpending();
-	if (signal_pending(current)) {
-		retval = -ERESTARTNOINTR;
-		goto bad_fork_cancel_cgroup;
+	if (!(clone_flags & CLONE_THREAD)) {
+		/*
+		 * Process group and session signals need to be delivered to just the
+		 * parent before the fork or both the parent and the child after the
+		 * fork. Restart if a signal comes in before we add the new process to
+		 * it's process group.
+		 * A fatal signal pending means that current will exit, so the new
+		 * thread can't slip out of an OOM kill (or normal SIGKILL).
+		 */
+		recalc_sigpending();
+		if (signal_pending(current)) {
+			retval = -ERESTARTNOINTR;
+			goto bad_fork_cancel_cgroup;
+		}
 	}
 
 
@@ -1982,6 +1984,7 @@ static __latent_entropy struct task_struct *copy_process(
 			current->signal->nr_threads++;
 			atomic_inc(&current->signal->live);
 			atomic_inc(&current->signal->sigcnt);
+			task_join_group_stop(p);
 			list_add_tail_rcu(&p->thread_group,
 					  &p->group_leader->thread_group);
 			list_add_tail_rcu(&p->thread_node,
diff --git a/kernel/signal.c b/kernel/signal.c
index 1e06f1eba363..9f0eafb6d474 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -373,6 +373,20 @@ static bool task_participate_group_stop(struct task_struct *task)
 	return false;
 }
 
+void task_join_group_stop(struct task_struct *task)
+{
+	/* Have the new thread join an on-going signal group stop */
+	unsigned long jobctl = current->jobctl;
+	if (jobctl & JOBCTL_STOP_PENDING) {
+		struct signal_struct *sig = current->signal;
+		unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
+		unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
+		if (task_set_jobctl_pending(task, signr | gstop)) {
+			sig->group_stop_count++;
+		}
+	}
+}
+
 /*
  * allocate a new signal queue record
  * - this may be called without locks if and only if t == current, otherwise an
-- 
cgit v1.2.3


From 31ba191bf5ab2975c1955f1adf771a5a0b57afaa Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Fri, 3 Aug 2018 14:53:15 +0800
Subject: include/net/bond_3ad: Simplify the code by using the ARRAY_SIZE

We prefer to ARRAY_SIZE rather than the open code to calculate size.

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bond_3ad.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index f358ad5e4214..fc3111515f5c 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -283,7 +283,7 @@ static inline const char *bond_3ad_churn_desc(churn_state_t state)
 		"none",
 		"unknown"
 	};
-	int max_size = sizeof(churn_description) / sizeof(churn_description[0]);
+	int max_size = ARRAY_SIZE(churn_description);
 
 	if (state >= max_size)
 		state = max_size - 1;
-- 
cgit v1.2.3


From d9cfe2ce246845b9cca0ec1b881e826965893c58 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 23 Jul 2018 22:26:05 +0200
Subject: i2c: quirks: add zero length checks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some adapters do not support a message length of 0. Add this as a quirk
so drivers don't have to open code it.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 6 ++++++
 include/linux/i2c.h         | 4 ++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 02d6f27b19e4..a26b3e9cc441 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1839,9 +1839,15 @@ static int i2c_check_for_quirks(struct i2c_adapter *adap, struct i2c_msg *msgs,
 		if (msgs[i].flags & I2C_M_RD) {
 			if (do_len_check && i2c_quirk_exceeded(len, q->max_read_len))
 				return i2c_quirk_error(adap, &msgs[i], "msg too long");
+
+			if (q->flags & I2C_AQ_NO_ZERO_LEN_READ && len == 0)
+				return i2c_quirk_error(adap, &msgs[i], "no zero length");
 		} else {
 			if (do_len_check && i2c_quirk_exceeded(len, q->max_write_len))
 				return i2c_quirk_error(adap, &msgs[i], "msg too long");
+
+			if (q->flags & I2C_AQ_NO_ZERO_LEN_WRITE && len == 0)
+				return i2c_quirk_error(adap, &msgs[i], "no zero length");
 		}
 	}
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index bc8d42f8544f..2a98d0886d2e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -661,6 +661,10 @@ struct i2c_adapter_quirks {
 					 I2C_AQ_COMB_READ_SECOND | I2C_AQ_COMB_SAME_ADDR)
 /* clock stretching is not supported */
 #define I2C_AQ_NO_CLK_STRETCH		BIT(4)
+/* message cannot have length of 0 */
+#define I2C_AQ_NO_ZERO_LEN_READ		BIT(5)
+#define I2C_AQ_NO_ZERO_LEN_WRITE	BIT(6)
+#define I2C_AQ_NO_ZERO_LEN		(I2C_AQ_NO_ZERO_LEN_READ | I2C_AQ_NO_ZERO_LEN_WRITE)
 
 /*
  * i2c_adapter is the structure used to identify a physical i2c bus along
-- 
cgit v1.2.3


From d89d41556141a527030a15233135ba622ba3350d Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 4 Aug 2018 14:20:40 -0700
Subject: ethtool: Remove trailing semicolon for static inline

Android's header sanitization tool chokes on static inline functions having a
trailing semicolon, leading to an incorrectly parsed header file. While the
tool should obviously be fixed, also fix the header files for the two affected
functions: ethtool_get_flow_spec_ring() and ethtool_get_flow_spec_ring_vf().

Fixes: 8cf6f497de40 ("ethtool: Add helper routines to pass vf to rx_flow_spec")
Reporetd-by: Blair Prescott <blair.prescott@broadcom.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7363f18e65a5..813282cc8af6 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -902,13 +902,13 @@ struct ethtool_rx_flow_spec {
 static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie)
 {
 	return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie;
-};
+}
 
 static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
 {
 	return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >>
 				ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
-};
+}
 
 /**
  * struct ethtool_rxnfc - command to get or set RX flow classification rules
-- 
cgit v1.2.3


From 91305f2812624c0cf7ccbb44133b66d3b24676e4 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Wed, 1 Aug 2018 18:05:54 +0800
Subject: ptp_qoriq: support automatic configuration for ptp timer

This patch is to support automatic configuration for ptp timer.
If required ptp dts properties are not provided, driver could
try to calculate a set of default configurations to initialize
the ptp timer. This makes the driver work for many boards which
don't have the required ptp dts properties in current kernel.
Also the users could set dts properties by themselves according
to their requirement.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_qoriq.c       | 111 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/fsl/ptp_qoriq.h |   6 ++-
 2 files changed, 113 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index a14c317b5a38..095c18532dc7 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -29,6 +29,7 @@
 #include <linux/of_platform.h>
 #include <linux/timex.h>
 #include <linux/slab.h>
+#include <linux/clk.h>
 
 #include <linux/fsl/ptp_qoriq.h>
 
@@ -317,6 +318,105 @@ static const struct ptp_clock_info ptp_qoriq_caps = {
 	.enable		= ptp_qoriq_enable,
 };
 
+/**
+ * qoriq_ptp_nominal_freq - calculate nominal frequency according to
+ *			    reference clock frequency
+ *
+ * @clk_src: reference clock frequency
+ *
+ * The nominal frequency is the desired clock frequency.
+ * It should be less than the reference clock frequency.
+ * It should be a factor of 1000MHz.
+ *
+ * Return the nominal frequency
+ */
+static u32 qoriq_ptp_nominal_freq(u32 clk_src)
+{
+	u32 remainder = 0;
+
+	clk_src /= 1000000;
+	remainder = clk_src % 100;
+	if (remainder) {
+		clk_src -= remainder;
+		clk_src += 100;
+	}
+
+	do {
+		clk_src -= 100;
+
+	} while (1000 % clk_src);
+
+	return clk_src * 1000000;
+}
+
+/**
+ * qoriq_ptp_auto_config - calculate a set of default configurations
+ *
+ * @qoriq_ptp: pointer to qoriq_ptp
+ * @node: pointer to device_node
+ *
+ * If below dts properties are not provided, this function will be
+ * called to calculate a set of default configurations for them.
+ *   "fsl,tclk-period"
+ *   "fsl,tmr-prsc"
+ *   "fsl,tmr-add"
+ *   "fsl,tmr-fiper1"
+ *   "fsl,tmr-fiper2"
+ *   "fsl,max-adj"
+ *
+ * Return 0 if success
+ */
+static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
+				 struct device_node *node)
+{
+	struct clk *clk;
+	u64 freq_comp;
+	u64 max_adj;
+	u32 nominal_freq;
+	u32 clk_src = 0;
+
+	qoriq_ptp->cksel = DEFAULT_CKSEL;
+
+	clk = of_clk_get(node, 0);
+	if (!IS_ERR(clk)) {
+		clk_src = clk_get_rate(clk);
+		clk_put(clk);
+	}
+
+	if (clk_src <= 100000000UL) {
+		pr_err("error reference clock value, or lower than 100MHz\n");
+		return -EINVAL;
+	}
+
+	nominal_freq = qoriq_ptp_nominal_freq(clk_src);
+	if (!nominal_freq)
+		return -EINVAL;
+
+	qoriq_ptp->tclk_period = 1000000000UL / nominal_freq;
+	qoriq_ptp->tmr_prsc = DEFAULT_TMR_PRSC;
+
+	/* Calculate initial frequency compensation value for TMR_ADD register.
+	 * freq_comp = ceil(2^32 / freq_ratio)
+	 * freq_ratio = reference_clock_freq / nominal_freq
+	 */
+	freq_comp = ((u64)1 << 32) * nominal_freq;
+	if (do_div(freq_comp, clk_src))
+		freq_comp++;
+
+	qoriq_ptp->tmr_add = freq_comp;
+	qoriq_ptp->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - qoriq_ptp->tclk_period;
+	qoriq_ptp->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - qoriq_ptp->tclk_period;
+
+	/* max_adj = 1000000000 * (freq_ratio - 1.0) - 1
+	 * freq_ratio = reference_clock_freq / nominal_freq
+	 */
+	max_adj = 1000000000ULL * (clk_src - nominal_freq);
+	max_adj = max_adj / nominal_freq - 1;
+	qoriq_ptp->caps.max_adj = max_adj;
+
+	return 0;
+}
+
 static int qoriq_ptp_probe(struct platform_device *dev)
 {
 	struct device_node *node = dev->dev.of_node;
@@ -332,7 +432,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 	if (!qoriq_ptp)
 		goto no_memory;
 
-	err = -ENODEV;
+	err = -EINVAL;
 
 	qoriq_ptp->caps = ptp_qoriq_caps;
 
@@ -351,10 +451,14 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 				 "fsl,tmr-fiper2", &qoriq_ptp->tmr_fiper2) ||
 	    of_property_read_u32(node,
 				 "fsl,max-adj", &qoriq_ptp->caps.max_adj)) {
-		pr_err("device tree node missing required elements\n");
-		goto no_node;
+		pr_warn("device tree node missing required elements, try automatic configuration\n");
+
+		if (qoriq_ptp_auto_config(qoriq_ptp, node))
+			goto no_config;
 	}
 
+	err = -ENODEV;
+
 	qoriq_ptp->irq = platform_get_irq(dev, 0);
 
 	if (qoriq_ptp->irq < 0) {
@@ -436,6 +540,7 @@ no_ioremap:
 	release_resource(qoriq_ptp->rsrc);
 no_resource:
 	free_irq(qoriq_ptp->irq, qoriq_ptp);
+no_config:
 no_node:
 	kfree(qoriq_ptp);
 no_memory:
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index dc3dac40f069..c1f003aadcce 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -127,9 +127,13 @@ struct qoriq_ptp_registers {
 
 
 #define DRIVER		"ptp_qoriq"
-#define DEFAULT_CKSEL	1
 #define N_EXT_TS	2
 
+#define DEFAULT_CKSEL		1
+#define DEFAULT_TMR_PRSC	2
+#define DEFAULT_FIPER1_PERIOD	1000000000
+#define DEFAULT_FIPER2_PERIOD	100000
+
 struct qoriq_ptp {
 	void __iomem *base;
 	struct qoriq_ptp_registers regs;
-- 
cgit v1.2.3


From 7969e5c40dfd04799d4341f1b7cd266b6e47f227 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Thu, 2 Aug 2018 23:34:37 +0000
Subject: ip: discard IPv4 datagrams with overlapping segments.

This behavior is required in IPv6, and there is little need
to tolerate overlapping fragments in IPv4. This change
simplifies the code and eliminates potential DDoS attack vectors.

Tested: ran ip_defrag selftest (not yet available uptream).

Suggested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h |  1 +
 net/ipv4/ip_fragment.c    | 75 ++++++++++++-----------------------------------
 net/ipv4/proc.c           |  1 +
 3 files changed, 21 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index e5ebc83827ab..f80135e5feaa 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -56,6 +56,7 @@ enum
 	IPSTATS_MIB_ECT1PKTS,			/* InECT1Pkts */
 	IPSTATS_MIB_ECT0PKTS,			/* InECT0Pkts */
 	IPSTATS_MIB_CEPKTS,			/* InCEPkts */
+	IPSTATS_MIB_REASM_OVERLAPS,		/* ReasmOverlaps */
 	__IPSTATS_MIB_MAX
 };
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d14d741fb05e..960bf5eab59f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -277,6 +277,7 @@ static int ip_frag_reinit(struct ipq *qp)
 /* Add new segment to existing queue. */
 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
+	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
 	struct sk_buff *prev, *next;
 	struct net_device *dev;
 	unsigned int fragsize;
@@ -357,65 +358,23 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	}
 
 found:
-	/* We found where to put this one.  Check for overlap with
-	 * preceding fragment, and, if needed, align things so that
-	 * any overlaps are eliminated.
+	/* RFC5722, Section 4, amended by Errata ID : 3089
+	 *                          When reassembling an IPv6 datagram, if
+	 *   one or more its constituent fragments is determined to be an
+	 *   overlapping fragment, the entire datagram (and any constituent
+	 *   fragments) MUST be silently discarded.
+	 *
+	 * We do the same here for IPv4.
 	 */
-	if (prev) {
-		int i = (prev->ip_defrag_offset + prev->len) - offset;
 
-		if (i > 0) {
-			offset += i;
-			err = -EINVAL;
-			if (end <= offset)
-				goto err;
-			err = -ENOMEM;
-			if (!pskb_pull(skb, i))
-				goto err;
-			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-				skb->ip_summed = CHECKSUM_NONE;
-		}
-	}
+	/* Is there an overlap with the previous fragment? */
+	if (prev &&
+	    (prev->ip_defrag_offset + prev->len) > offset)
+		goto discard_qp;
 
-	err = -ENOMEM;
-
-	while (next && next->ip_defrag_offset < end) {
-		int i = end - next->ip_defrag_offset; /* overlap is 'i' bytes */
-
-		if (i < next->len) {
-			int delta = -next->truesize;
-
-			/* Eat head of the next overlapped fragment
-			 * and leave the loop. The next ones cannot overlap.
-			 */
-			if (!pskb_pull(next, i))
-				goto err;
-			delta += next->truesize;
-			if (delta)
-				add_frag_mem_limit(qp->q.net, delta);
-			next->ip_defrag_offset += i;
-			qp->q.meat -= i;
-			if (next->ip_summed != CHECKSUM_UNNECESSARY)
-				next->ip_summed = CHECKSUM_NONE;
-			break;
-		} else {
-			struct sk_buff *free_it = next;
-
-			/* Old fragment is completely overridden with
-			 * new one drop it.
-			 */
-			next = next->next;
-
-			if (prev)
-				prev->next = next;
-			else
-				qp->q.fragments = next;
-
-			qp->q.meat -= free_it->len;
-			sub_frag_mem_limit(qp->q.net, free_it->truesize);
-			kfree_skb(free_it);
-		}
-	}
+	/* Is there an overlap with the next fragment? */
+	if (next && next->ip_defrag_offset < end)
+		goto discard_qp;
 
 	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
 	dev = skb->dev;
@@ -463,6 +422,10 @@ found:
 	skb_dst_drop(skb);
 	return -EINPROGRESS;
 
+discard_qp:
+	inet_frag_kill(&qp->q);
+	err = -EINVAL;
+	__IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
 err:
 	kfree_skb(skb);
 	return err;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b46e4cf9a55a..70289682a670 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -119,6 +119,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
 	SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
 	SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
 	SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
+	SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.3


From 385114dec8a49b5e5945e77ba7de6356106713f4 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Thu, 2 Aug 2018 23:34:38 +0000
Subject: net: modify skb_rbtree_purge to return the truesize of all purged
 skbs.

Tested: see the next patch is the series.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 net/core/skbuff.c      | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fd3cb1b247df..47848367c816 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2585,7 +2585,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 		kfree_skb(skb);
 }
 
-void skb_rbtree_purge(struct rb_root *root);
+unsigned int skb_rbtree_purge(struct rb_root *root);
 
 void *netdev_alloc_frag(unsigned int fragsz);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 51b0a9126e12..8d574a88125d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2858,23 +2858,27 @@ EXPORT_SYMBOL(skb_queue_purge);
 /**
  *	skb_rbtree_purge - empty a skb rbtree
  *	@root: root of the rbtree to empty
+ *	Return value: the sum of truesizes of all purged skbs.
  *
  *	Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
  *	the list and one reference dropped. This function does not take
  *	any lock. Synchronization should be handled by the caller (e.g., TCP
  *	out-of-order queue is protected by the socket lock).
  */
-void skb_rbtree_purge(struct rb_root *root)
+unsigned int skb_rbtree_purge(struct rb_root *root)
 {
 	struct rb_node *p = rb_first(root);
+	unsigned int sum = 0;
 
 	while (p) {
 		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
 
 		p = rb_next(p);
 		rb_erase(&skb->rbnode, root);
+		sum += skb->truesize;
 		kfree_skb(skb);
 	}
+	return sum;
 }
 
 /**
-- 
cgit v1.2.3


From fa0f527358bd900ef92f925878ed6bfbd51305cc Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Thu, 2 Aug 2018 23:34:39 +0000
Subject: ip: use rb trees for IP frag queue.

Similar to TCP OOO RX queue, it makes sense to use rb trees to store
IP fragments, so that OOO fragments are inserted faster.

Tested:

- a follow-up patch contains a rather comprehensive ip defrag
  self-test (functional)
- ran neper `udp_stream -c -H <host> -F 100 -l 300 -T 20`:
    netstat --statistics
    Ip:
        282078937 total packets received
        0 forwarded
        0 incoming packets discarded
        946760 incoming packets delivered
        18743456 requests sent out
        101 fragments dropped after timeout
        282077129 reassemblies required
        944952 packets reassembled ok
        262734239 packet reassembles failed
   (The numbers/stats above are somewhat better re:
    reassemblies vs a kernel without this patchset. More
    comprehensive performance testing TBD).

Reported-by: Jann Horn <jannh@google.com>
Reported-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                  |   9 +-
 include/net/inet_frag.h                 |   3 +-
 net/ipv4/inet_fragment.c                |  16 +--
 net/ipv4/ip_fragment.c                  | 182 ++++++++++++++++++--------------
 net/ipv6/netfilter/nf_conntrack_reasm.c |   1 +
 net/ipv6/reassembly.c                   |   1 +
 6 files changed, 121 insertions(+), 91 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 47848367c816..7ebdf158a795 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -676,13 +676,16 @@ struct sk_buff {
 				 * UDP receive path is one user.
 				 */
 				unsigned long		dev_scratch;
-				int			ip_defrag_offset;
 			};
 		};
-		struct rb_node		rbnode; /* used in netem & tcp stack */
+		struct rb_node		rbnode; /* used in netem, ip4 defrag, and tcp stack */
 		struct list_head	list;
 	};
-	struct sock		*sk;
+
+	union {
+		struct sock		*sk;
+		int			ip_defrag_offset;
+	};
 
 	union {
 		ktime_t		tstamp;
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index f4272a29dc44..b86d14528188 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -75,7 +75,8 @@ struct inet_frag_queue {
 	struct timer_list	timer;
 	spinlock_t		lock;
 	refcount_t		refcnt;
-	struct sk_buff		*fragments;
+	struct sk_buff		*fragments;  /* Used in IPv6. */
+	struct rb_root		rb_fragments; /* Used in IPv4. */
 	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
 	int			len;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index ccd140e4082d..6d258a5669e7 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -137,12 +137,16 @@ void inet_frag_destroy(struct inet_frag_queue *q)
 	fp = q->fragments;
 	nf = q->net;
 	f = nf->f;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		sum_truesize += fp->truesize;
-		kfree_skb(fp);
-		fp = xp;
+	if (fp) {
+		do {
+			struct sk_buff *xp = fp->next;
+
+			sum_truesize += fp->truesize;
+			kfree_skb(fp);
+			fp = xp;
+		} while (fp);
+	} else {
+		sum_truesize = skb_rbtree_purge(&q->rb_fragments);
 	}
 	sum = sum_truesize + f->qsize;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 960bf5eab59f..0e8f8de77e71 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -136,7 +136,7 @@ static void ip_expire(struct timer_list *t)
 {
 	struct inet_frag_queue *frag = from_timer(frag, t, timer);
 	const struct iphdr *iph;
-	struct sk_buff *head;
+	struct sk_buff *head = NULL;
 	struct net *net;
 	struct ipq *qp;
 	int err;
@@ -152,14 +152,31 @@ static void ip_expire(struct timer_list *t)
 
 	ipq_kill(qp);
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
-
-	head = qp->q.fragments;
-
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
 
-	if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !head)
+	if (!qp->q.flags & INET_FRAG_FIRST_IN)
 		goto out;
 
+	/* sk_buff::dev and sk_buff::rbnode are unionized. So we
+	 * pull the head out of the tree in order to be able to
+	 * deal with head->dev.
+	 */
+	if (qp->q.fragments) {
+		head = qp->q.fragments;
+		qp->q.fragments = head->next;
+	} else {
+		head = skb_rb_first(&qp->q.rb_fragments);
+		if (!head)
+			goto out;
+		rb_erase(&head->rbnode, &qp->q.rb_fragments);
+		memset(&head->rbnode, 0, sizeof(head->rbnode));
+		barrier();
+	}
+	if (head == qp->q.fragments_tail)
+		qp->q.fragments_tail = NULL;
+
+	sub_frag_mem_limit(qp->q.net, head->truesize);
+
 	head->dev = dev_get_by_index_rcu(net, qp->iif);
 	if (!head->dev)
 		goto out;
@@ -179,16 +196,16 @@ static void ip_expire(struct timer_list *t)
 	    (skb_rtable(head)->rt_type != RTN_LOCAL))
 		goto out;
 
-	skb_get(head);
 	spin_unlock(&qp->q.lock);
 	icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
-	kfree_skb(head);
 	goto out_rcu_unlock;
 
 out:
 	spin_unlock(&qp->q.lock);
 out_rcu_unlock:
 	rcu_read_unlock();
+	if (head)
+		kfree_skb(head);
 	ipq_put(qp);
 }
 
@@ -231,7 +248,7 @@ static int ip_frag_too_far(struct ipq *qp)
 	end = atomic_inc_return(&peer->rid);
 	qp->rid = end;
 
-	rc = qp->q.fragments && (end - start) > max;
+	rc = qp->q.fragments_tail && (end - start) > max;
 
 	if (rc) {
 		struct net *net;
@@ -245,7 +262,6 @@ static int ip_frag_too_far(struct ipq *qp)
 
 static int ip_frag_reinit(struct ipq *qp)
 {
-	struct sk_buff *fp;
 	unsigned int sum_truesize = 0;
 
 	if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
@@ -253,20 +269,14 @@ static int ip_frag_reinit(struct ipq *qp)
 		return -ETIMEDOUT;
 	}
 
-	fp = qp->q.fragments;
-	do {
-		struct sk_buff *xp = fp->next;
-
-		sum_truesize += fp->truesize;
-		kfree_skb(fp);
-		fp = xp;
-	} while (fp);
+	sum_truesize = skb_rbtree_purge(&qp->q.rb_fragments);
 	sub_frag_mem_limit(qp->q.net, sum_truesize);
 
 	qp->q.flags = 0;
 	qp->q.len = 0;
 	qp->q.meat = 0;
 	qp->q.fragments = NULL;
+	qp->q.rb_fragments = RB_ROOT;
 	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
 	qp->ecn = 0;
@@ -278,7 +288,8 @@ static int ip_frag_reinit(struct ipq *qp)
 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
-	struct sk_buff *prev, *next;
+	struct rb_node **rbn, *parent;
+	struct sk_buff *skb1;
 	struct net_device *dev;
 	unsigned int fragsize;
 	int flags, offset;
@@ -341,58 +352,58 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (err)
 		goto err;
 
-	/* Find out which fragments are in front and at the back of us
-	 * in the chain of fragments so far.  We must know where to put
-	 * this fragment, right?
-	 */
-	prev = qp->q.fragments_tail;
-	if (!prev || prev->ip_defrag_offset < offset) {
-		next = NULL;
-		goto found;
-	}
-	prev = NULL;
-	for (next = qp->q.fragments; next != NULL; next = next->next) {
-		if (next->ip_defrag_offset >= offset)
-			break;	/* bingo! */
-		prev = next;
-	}
+	/* Note : skb->rbnode and skb->dev share the same location. */
+	dev = skb->dev;
+	/* Makes sure compiler wont do silly aliasing games */
+	barrier();
 
-found:
 	/* RFC5722, Section 4, amended by Errata ID : 3089
 	 *                          When reassembling an IPv6 datagram, if
 	 *   one or more its constituent fragments is determined to be an
 	 *   overlapping fragment, the entire datagram (and any constituent
 	 *   fragments) MUST be silently discarded.
 	 *
-	 * We do the same here for IPv4.
+	 * We do the same here for IPv4 (and increment an snmp counter).
 	 */
 
-	/* Is there an overlap with the previous fragment? */
-	if (prev &&
-	    (prev->ip_defrag_offset + prev->len) > offset)
-		goto discard_qp;
-
-	/* Is there an overlap with the next fragment? */
-	if (next && next->ip_defrag_offset < end)
-		goto discard_qp;
+	/* Find out where to put this fragment.  */
+	skb1 = qp->q.fragments_tail;
+	if (!skb1) {
+		/* This is the first fragment we've received. */
+		rb_link_node(&skb->rbnode, NULL, &qp->q.rb_fragments.rb_node);
+		qp->q.fragments_tail = skb;
+	} else if ((skb1->ip_defrag_offset + skb1->len) < end) {
+		/* This is the common/special case: skb goes to the end. */
+		/* Detect and discard overlaps. */
+		if (offset < (skb1->ip_defrag_offset + skb1->len))
+			goto discard_qp;
+		/* Insert after skb1. */
+		rb_link_node(&skb->rbnode, &skb1->rbnode, &skb1->rbnode.rb_right);
+		qp->q.fragments_tail = skb;
+	} else {
+		/* Binary search. Note that skb can become the first fragment, but
+		 * not the last (covered above). */
+		rbn = &qp->q.rb_fragments.rb_node;
+		do {
+			parent = *rbn;
+			skb1 = rb_to_skb(parent);
+			if (end <= skb1->ip_defrag_offset)
+				rbn = &parent->rb_left;
+			else if (offset >= skb1->ip_defrag_offset + skb1->len)
+				rbn = &parent->rb_right;
+			else /* Found an overlap with skb1. */
+				goto discard_qp;
+		} while (*rbn);
+		/* Here we have parent properly set, and rbn pointing to
+		 * one of its NULL left/right children. Insert skb. */
+		rb_link_node(&skb->rbnode, parent, rbn);
+	}
+	rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
 
-	/* Note : skb->ip_defrag_offset and skb->dev share the same location */
-	dev = skb->dev;
 	if (dev)
 		qp->iif = dev->ifindex;
-	/* Makes sure compiler wont do silly aliasing games */
-	barrier();
 	skb->ip_defrag_offset = offset;
 
-	/* Insert this fragment in the chain of fragments. */
-	skb->next = next;
-	if (!next)
-		qp->q.fragments_tail = skb;
-	if (prev)
-		prev->next = skb;
-	else
-		qp->q.fragments = skb;
-
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
 	qp->ecn |= ecn;
@@ -414,7 +425,7 @@ found:
 		unsigned long orefdst = skb->_skb_refdst;
 
 		skb->_skb_refdst = 0UL;
-		err = ip_frag_reasm(qp, prev, dev);
+		err = ip_frag_reasm(qp, skb, dev);
 		skb->_skb_refdst = orefdst;
 		return err;
 	}
@@ -431,15 +442,15 @@ err:
 	return err;
 }
 
-
 /* Build a new IP datagram from all its fragments. */
-
-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 			 struct net_device *dev)
 {
 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
 	struct iphdr *iph;
-	struct sk_buff *fp, *head = qp->q.fragments;
+	struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
+	struct sk_buff **nextp; /* To build frag_list. */
+	struct rb_node *rbn;
 	int len;
 	int ihlen;
 	int err;
@@ -453,25 +464,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		goto out_fail;
 	}
 	/* Make the one we just received the head. */
-	if (prev) {
-		head = prev->next;
-		fp = skb_clone(head, GFP_ATOMIC);
+	if (head != skb) {
+		fp = skb_clone(skb, GFP_ATOMIC);
 		if (!fp)
 			goto out_nomem;
-
-		fp->next = head->next;
-		if (!fp->next)
+		rb_replace_node(&skb->rbnode, &fp->rbnode, &qp->q.rb_fragments);
+		if (qp->q.fragments_tail == skb)
 			qp->q.fragments_tail = fp;
-		prev->next = fp;
-
-		skb_morph(head, qp->q.fragments);
-		head->next = qp->q.fragments->next;
-
-		consume_skb(qp->q.fragments);
-		qp->q.fragments = head;
+		skb_morph(skb, head);
+		rb_replace_node(&head->rbnode, &skb->rbnode,
+				&qp->q.rb_fragments);
+		consume_skb(head);
+		head = skb;
 	}
 
-	WARN_ON(!head);
 	WARN_ON(head->ip_defrag_offset != 0);
 
 	/* Allocate a new buffer for the datagram. */
@@ -496,24 +502,35 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		clone = alloc_skb(0, GFP_ATOMIC);
 		if (!clone)
 			goto out_nomem;
-		clone->next = head->next;
-		head->next = clone;
 		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
 		skb_frag_list_init(head);
 		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
 			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
 		clone->len = clone->data_len = head->data_len - plen;
-		head->data_len -= clone->len;
-		head->len -= clone->len;
+		skb->truesize += clone->truesize;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
 		add_frag_mem_limit(qp->q.net, clone->truesize);
+		skb_shinfo(head)->frag_list = clone;
+		nextp = &clone->next;
+	} else {
+		nextp = &skb_shinfo(head)->frag_list;
 	}
 
-	skb_shinfo(head)->frag_list = head->next;
 	skb_push(head, head->data - skb_network_header(head));
 
-	for (fp=head->next; fp; fp = fp->next) {
+	/* Traverse the tree in order, to build frag_list. */
+	rbn = rb_next(&head->rbnode);
+	rb_erase(&head->rbnode, &qp->q.rb_fragments);
+	while (rbn) {
+		struct rb_node *rbnext = rb_next(rbn);
+		fp = rb_to_skb(rbn);
+		rb_erase(rbn, &qp->q.rb_fragments);
+		rbn = rbnext;
+		*nextp = fp;
+		nextp = &fp->next;
+		fp->prev = NULL;
+		memset(&fp->rbnode, 0, sizeof(fp->rbnode));
 		head->data_len += fp->len;
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
@@ -524,7 +541,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	}
 	sub_frag_mem_limit(qp->q.net, head->truesize);
 
+	*nextp = NULL;
 	head->next = NULL;
+	head->prev = NULL;
 	head->dev = dev;
 	head->tstamp = qp->q.stamp;
 	IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
@@ -552,6 +571,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
+	qp->q.rb_fragments = RB_ROOT;
 	qp->q.fragments_tail = NULL;
 	return 0;
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 0610bdab721c..38d69ef516d5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -463,6 +463,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_devic
 					  head->csum);
 
 	fq->q.fragments = NULL;
+	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
 
 	return true;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6edd2ac8ae4b..b4e558ab39fa 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -405,6 +405,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
 	fq->q.fragments = NULL;
+	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
 	return 1;
 
-- 
cgit v1.2.3


From bfe4037e722ec672c9dafd5730d9132afeeb76e9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 16 Jul 2018 09:08:20 +0200
Subject: aio: implement IOCB_CMD_POLL

Simple one-shot poll through the io_submit() interface.  To poll for
a file descriptor the application should submit an iocb of type
IOCB_CMD_POLL.  It will poll the fd for the events specified in the
the first 32 bits of the aio_buf field of the iocb.

Unlike poll or epoll without EPOLLONESHOT this interface always works
in one shot mode, that is once the iocb is completed, it will have to be
resubmitted.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Avi Kivity <avi@scylladb.com>
---
 fs/aio.c                     | 178 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/aio_abi.h |   6 +-
 2 files changed, 180 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index fe2018ada32c..2fd19521d8a8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,6 +5,7 @@
  *	Implements an efficient asynchronous io interface.
  *
  *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
+ *	Copyright 2018 Christoph Hellwig.
  *
  *	See ../COPYING for licensing terms.
  */
@@ -165,10 +166,21 @@ struct fsync_iocb {
 	bool			datasync;
 };
 
+struct poll_iocb {
+	struct file		*file;
+	struct wait_queue_head	*head;
+	__poll_t		events;
+	bool			woken;
+	bool			cancelled;
+	struct wait_queue_entry	wait;
+	struct work_struct	work;
+};
+
 struct aio_kiocb {
 	union {
 		struct kiocb		rw;
 		struct fsync_iocb	fsync;
+		struct poll_iocb	poll;
 	};
 
 	struct kioctx		*ki_ctx;
@@ -1601,6 +1613,169 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
 	return 0;
 }
 
+static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+	struct file *file = iocb->poll.file;
+
+	aio_complete(iocb, mangle_poll(mask), 0);
+	fput(file);
+}
+
+static void aio_poll_complete_work(struct work_struct *work)
+{
+	struct poll_iocb *req = container_of(work, struct poll_iocb, work);
+	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+	struct poll_table_struct pt = { ._key = req->events };
+	struct kioctx *ctx = iocb->ki_ctx;
+	__poll_t mask = 0;
+
+	if (!READ_ONCE(req->cancelled))
+		mask = vfs_poll(req->file, &pt) & req->events;
+
+	/*
+	 * Note that ->ki_cancel callers also delete iocb from active_reqs after
+	 * calling ->ki_cancel.  We need the ctx_lock roundtrip here to
+	 * synchronize with them.  In the cancellation case the list_del_init
+	 * itself is not actually needed, but harmless so we keep it in to
+	 * avoid further branches in the fast path.
+	 */
+	spin_lock_irq(&ctx->ctx_lock);
+	if (!mask && !READ_ONCE(req->cancelled)) {
+		add_wait_queue(req->head, &req->wait);
+		spin_unlock_irq(&ctx->ctx_lock);
+		return;
+	}
+	list_del_init(&iocb->ki_list);
+	spin_unlock_irq(&ctx->ctx_lock);
+
+	aio_poll_complete(iocb, mask);
+}
+
+/* assumes we are called with irqs disabled */
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+	struct poll_iocb *req = &aiocb->poll;
+
+	spin_lock(&req->head->lock);
+	WRITE_ONCE(req->cancelled, true);
+	if (!list_empty(&req->wait.entry)) {
+		list_del_init(&req->wait.entry);
+		schedule_work(&aiocb->poll.work);
+	}
+	spin_unlock(&req->head->lock);
+
+	return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+		void *key)
+{
+	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+	__poll_t mask = key_to_poll(key);
+
+	req->woken = true;
+
+	/* for instances that support it check for an event match first: */
+	if (mask && !(mask & req->events))
+		return 0;
+
+	list_del_init(&req->wait.entry);
+	schedule_work(&req->work);
+	return 1;
+}
+
+struct aio_poll_table {
+	struct poll_table_struct	pt;
+	struct aio_kiocb		*iocb;
+	int				error;
+};
+
+static void
+aio_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+		struct poll_table_struct *p)
+{
+	struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt);
+
+	/* multiple wait queues per file are not supported */
+	if (unlikely(pt->iocb->poll.head)) {
+		pt->error = -EINVAL;
+		return;
+	}
+
+	pt->error = 0;
+	pt->iocb->poll.head = head;
+	add_wait_queue(head, &pt->iocb->poll.wait);
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+	struct kioctx *ctx = aiocb->ki_ctx;
+	struct poll_iocb *req = &aiocb->poll;
+	struct aio_poll_table apt;
+	__poll_t mask;
+
+	/* reject any unknown events outside the normal event mask. */
+	if ((u16)iocb->aio_buf != iocb->aio_buf)
+		return -EINVAL;
+	/* reject fields that are not defined for poll */
+	if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+		return -EINVAL;
+
+	INIT_WORK(&req->work, aio_poll_complete_work);
+	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+	req->file = fget(iocb->aio_fildes);
+	if (unlikely(!req->file))
+		return -EBADF;
+
+	apt.pt._qproc = aio_poll_queue_proc;
+	apt.pt._key = req->events;
+	apt.iocb = aiocb;
+	apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
+
+	/* initialized the list so that we can do list_empty checks */
+	INIT_LIST_HEAD(&req->wait.entry);
+	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+
+	/* one for removal from waitqueue, one for this function */
+	refcount_set(&aiocb->ki_refcnt, 2);
+
+	mask = vfs_poll(req->file, &apt.pt) & req->events;
+	if (unlikely(!req->head)) {
+		/* we did not manage to set up a waitqueue, done */
+		goto out;
+	}
+
+	spin_lock_irq(&ctx->ctx_lock);
+	spin_lock(&req->head->lock);
+	if (req->woken) {
+		/* wake_up context handles the rest */
+		mask = 0;
+		apt.error = 0;
+	} else if (mask || apt.error) {
+		/* if we get an error or a mask we are done */
+		WARN_ON_ONCE(list_empty(&req->wait.entry));
+		list_del_init(&req->wait.entry);
+	} else {
+		/* actually waiting for an event */
+		list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+		aiocb->ki_cancel = aio_poll_cancel;
+	}
+	spin_unlock(&req->head->lock);
+	spin_unlock_irq(&ctx->ctx_lock);
+
+out:
+	if (unlikely(apt.error)) {
+		fput(req->file);
+		return apt.error;
+	}
+
+	if (mask)
+		aio_poll_complete(aiocb, mask);
+	iocb_put(aiocb);
+	return 0;
+}
+
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			 bool compat)
 {
@@ -1674,6 +1849,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	case IOCB_CMD_FDSYNC:
 		ret = aio_fsync(&req->fsync, &iocb, true);
 		break;
+	case IOCB_CMD_POLL:
+		ret = aio_poll(req, &iocb);
+		break;
 	default:
 		pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
 		ret = -EINVAL;
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index d4593a6062ef..ce43d340f010 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -38,10 +38,8 @@ enum {
 	IOCB_CMD_PWRITE = 1,
 	IOCB_CMD_FSYNC = 2,
 	IOCB_CMD_FDSYNC = 3,
-	/* These two are experimental.
-	 * IOCB_CMD_PREADX = 4,
-	 * IOCB_CMD_POLL = 5,
-	 */
+	/* 4 was the experimental IOCB_CMD_PREADX */
+	IOCB_CMD_POLL = 5,
 	IOCB_CMD_NOOP = 6,
 	IOCB_CMD_PREADV = 7,
 	IOCB_CMD_PWRITEV = 8,
-- 
cgit v1.2.3


From 55f2503c3b69328735e88031ff8d6ba291bd952b Mon Sep 17 00:00:00 2001
From: Pingfan Liu <kernelfans@gmail.com>
Date: Tue, 31 Jul 2018 16:51:32 +0800
Subject: PM / reboot: Eliminate race between reboot and suspend

At present, "systemctl suspend" and "shutdown" can run in parrallel. A
system can suspend after devices_shutdown(), and resume. Then the shutdown
task goes on to power off. This causes many devices are not really shut
off. Hence replacing reboot_mutex with system_transition_mutex (renamed
from pm_mutex) to achieve the exclusion. The renaming of pm_mutex as
system_transition_mutex can be better to reflect the purpose of the mutex.

Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/freezing-of-tasks.txt      | 12 ++++++------
 Documentation/power/suspend-and-cpuhotplug.txt |  6 +++---
 include/linux/suspend.h                        |  2 +-
 kernel/freezer.c                               |  4 +++-
 kernel/power/hibernate.c                       | 15 ++++++++-------
 kernel/power/main.c                            | 12 ++++++------
 kernel/power/suspend.c                         |  4 ++--
 kernel/power/user.c                            |  4 ++--
 kernel/reboot.c                                |  6 +++---
 mm/page_alloc.c                                | 11 ++++++-----
 10 files changed, 40 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index af005770e767..cd283190855a 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -204,26 +204,26 @@ VI. Are there any precautions to be taken to prevent freezing failures?
 
 Yes, there are.
 
-First of all, grabbing the 'pm_mutex' lock to mutually exclude a piece of code
+First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code
 from system-wide sleep such as suspend/hibernation is not encouraged.
 If possible, that piece of code must instead hook onto the suspend/hibernation
 notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
 (kernel/cpu.c) for an example.
 
-However, if that is not feasible, and grabbing 'pm_mutex' is deemed necessary,
-it is strongly discouraged to directly call mutex_[un]lock(&pm_mutex) since
+However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary,
+it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since
 that could lead to freezing failures, because if the suspend/hibernate code
-successfully acquired the 'pm_mutex' lock, and hence that other entity failed
+successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed
 to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
 state. As a consequence, the freezer would not be able to freeze that task,
 leading to freezing failure.
 
 However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
 since they ask the freezer to skip freezing this task, since it is anyway
-"frozen enough" as it is blocked on 'pm_mutex', which will be released
+"frozen enough" as it is blocked on 'system_transition_mutex', which will be released
 only after the entire suspend/hibernation sequence is complete.
 So, to summarize, use [un]lock_system_sleep() instead of directly using
-mutex_[un]lock(&pm_mutex). That would prevent freezing failures.
+mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures.
 
 V. Miscellaneous
 /sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt
index 6f55eb960a6d..a8751b8df10e 100644
--- a/Documentation/power/suspend-and-cpuhotplug.txt
+++ b/Documentation/power/suspend-and-cpuhotplug.txt
@@ -32,7 +32,7 @@ More details follow:
                                     sysfs file
                                         |
                                         v
-                               Acquire pm_mutex lock
+                               Acquire system_transition_mutex lock
                                         |
                                         v
                              Send PM_SUSPEND_PREPARE
@@ -96,10 +96,10 @@ execution during resume):
 
 * thaw tasks
 * send PM_POST_SUSPEND notifications
-* Release pm_mutex lock.
+* Release system_transition_mutex lock.
 
 
-It is to be noted here that the pm_mutex lock is acquired at the very
+It is to be noted here that the system_transition_mutex lock is acquired at the very
 beginning, when we are just starting out to suspend, and then released only
 after the entire cycle is complete (i.e., suspend + resume).
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 440b62f7502e..5a28ac9284f0 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -414,7 +414,7 @@ static inline bool hibernation_available(void) { return false; }
 #define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
 #define PM_POST_RESTORE		0x0006 /* Restore failed */
 
-extern struct mutex pm_mutex;
+extern struct mutex system_transition_mutex;
 
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 6f56a9e219fa..b162b74611e4 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -15,7 +15,9 @@
 atomic_t system_freezing_cnt = ATOMIC_INIT(0);
 EXPORT_SYMBOL(system_freezing_cnt);
 
-/* indicate whether PM freezing is in effect, protected by pm_mutex */
+/* indicate whether PM freezing is in effect, protected by
+ * system_transition_mutex
+ */
 bool pm_freezing;
 bool pm_nosig_freezing;
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 38f3f96b4b12..abef759de7c8 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -338,7 +338,7 @@ static int create_image(int platform_mode)
  * hibernation_snapshot - Quiesce devices and create a hibernation image.
  * @platform_mode: If set, use platform driver to prepare for the transition.
  *
- * This routine must be called with pm_mutex held.
+ * This routine must be called with system_transition_mutex held.
  */
 int hibernation_snapshot(int platform_mode)
 {
@@ -500,8 +500,9 @@ static int resume_target_kernel(bool platform_mode)
  * hibernation_restore - Quiesce devices and restore from a hibernation image.
  * @platform_mode: If set, use platform driver to prepare for the transition.
  *
- * This routine must be called with pm_mutex held.  If it is successful, control
- * reappears in the restored target kernel in hibernation_snapshot().
+ * This routine must be called with system_transition_mutex held.  If it is
+ * successful, control reappears in the restored target kernel in
+ * hibernation_snapshot().
  */
 int hibernation_restore(int platform_mode)
 {
@@ -806,13 +807,13 @@ static int software_resume(void)
 	 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
 	 * is configured into the kernel. Since the regular hibernate
 	 * trigger path is via sysfs which takes a buffer mutex before
-	 * calling hibernate functions (which take pm_mutex) this can
-	 * cause lockdep to complain about a possible ABBA deadlock
+	 * calling hibernate functions (which take system_transition_mutex)
+	 * this can cause lockdep to complain about a possible ABBA deadlock
 	 * which cannot happen since we're in the boot code here and
 	 * sysfs can't be invoked yet. Therefore, we use a subclass
 	 * here to avoid lockdep complaining.
 	 */
-	mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);
 
 	if (swsusp_resume_device)
 		goto Check_image;
@@ -900,7 +901,7 @@ static int software_resume(void)
 	atomic_inc(&snapshot_device_available);
 	/* For success case, the suspend path will release the lock */
  Unlock:
-	mutex_unlock(&pm_mutex);
+	mutex_unlock(&system_transition_mutex);
 	pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
 	return error;
  Close_Finish:
diff --git a/kernel/power/main.c b/kernel/power/main.c
index d9706da10930..35b50823d83b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -15,17 +15,16 @@
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/suspend.h>
 
 #include "power.h"
 
-DEFINE_MUTEX(pm_mutex);
-
 #ifdef CONFIG_PM_SLEEP
 
 void lock_system_sleep(void)
 {
 	current->flags |= PF_FREEZER_SKIP;
-	mutex_lock(&pm_mutex);
+	mutex_lock(&system_transition_mutex);
 }
 EXPORT_SYMBOL_GPL(lock_system_sleep);
 
@@ -37,8 +36,9 @@ void unlock_system_sleep(void)
 	 *
 	 * Reason:
 	 * Fundamentally, we just don't need it, because freezing condition
-	 * doesn't come into effect until we release the pm_mutex lock,
-	 * since the freezer always works with pm_mutex held.
+	 * doesn't come into effect until we release the
+	 * system_transition_mutex lock, since the freezer always works with
+	 * system_transition_mutex held.
 	 *
 	 * More importantly, in the case of hibernation,
 	 * unlock_system_sleep() gets called in snapshot_read() and
@@ -47,7 +47,7 @@ void unlock_system_sleep(void)
 	 * enter the refrigerator, thus causing hibernation to lockup.
 	 */
 	current->flags &= ~PF_FREEZER_SKIP;
-	mutex_unlock(&pm_mutex);
+	mutex_unlock(&system_transition_mutex);
 }
 EXPORT_SYMBOL_GPL(unlock_system_sleep);
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 87331565e505..9e13afe65a14 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -556,7 +556,7 @@ static int enter_state(suspend_state_t state)
 	} else if (!valid_state(state)) {
 		return -EINVAL;
 	}
-	if (!mutex_trylock(&pm_mutex))
+	if (!mutex_trylock(&system_transition_mutex))
 		return -EBUSY;
 
 	if (state == PM_SUSPEND_TO_IDLE)
@@ -590,7 +590,7 @@ static int enter_state(suspend_state_t state)
 	pm_pr_dbg("Finishing wakeup.\n");
 	suspend_finish();
  Unlock:
-	mutex_unlock(&pm_mutex);
+	mutex_unlock(&system_transition_mutex);
 	return error;
 }
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index abd225550271..2d8b60a3c86b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -216,7 +216,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!mutex_trylock(&pm_mutex))
+	if (!mutex_trylock(&system_transition_mutex))
 		return -EBUSY;
 
 	lock_device_hotplug();
@@ -394,7 +394,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 	}
 
 	unlock_device_hotplug();
-	mutex_unlock(&pm_mutex);
+	mutex_unlock(&system_transition_mutex);
 
 	return error;
 }
diff --git a/kernel/reboot.c b/kernel/reboot.c
index e4ced883d8de..8fb44dec9ad7 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -294,7 +294,7 @@ void kernel_power_off(void)
 }
 EXPORT_SYMBOL_GPL(kernel_power_off);
 
-static DEFINE_MUTEX(reboot_mutex);
+DEFINE_MUTEX(system_transition_mutex);
 
 /*
  * Reboot system call: for obvious reasons only root may call it,
@@ -338,7 +338,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 	if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
 		cmd = LINUX_REBOOT_CMD_HALT;
 
-	mutex_lock(&reboot_mutex);
+	mutex_lock(&system_transition_mutex);
 	switch (cmd) {
 	case LINUX_REBOOT_CMD_RESTART:
 		kernel_restart(NULL);
@@ -389,7 +389,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 		ret = -EINVAL;
 		break;
 	}
-	mutex_unlock(&reboot_mutex);
+	mutex_unlock(&system_transition_mutex);
 	return ret;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1521100f1e63..e47f9e467408 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -155,16 +155,17 @@ static inline void set_pcppage_migratetype(struct page *page, int migratetype)
  * The following functions are used by the suspend/hibernate code to temporarily
  * change gfp_allowed_mask in order to avoid using I/O during memory allocations
  * while devices are suspended.  To avoid races with the suspend/hibernate code,
- * they should always be called with pm_mutex held (gfp_allowed_mask also should
- * only be modified with pm_mutex held, unless the suspend/hibernate code is
- * guaranteed not to run in parallel with that modification).
+ * they should always be called with system_transition_mutex held
+ * (gfp_allowed_mask also should only be modified with system_transition_mutex
+ * held, unless the suspend/hibernate code is guaranteed not to run in parallel
+ * with that modification).
  */
 
 static gfp_t saved_gfp_mask;
 
 void pm_restore_gfp_mask(void)
 {
-	WARN_ON(!mutex_is_locked(&pm_mutex));
+	WARN_ON(!mutex_is_locked(&system_transition_mutex));
 	if (saved_gfp_mask) {
 		gfp_allowed_mask = saved_gfp_mask;
 		saved_gfp_mask = 0;
@@ -173,7 +174,7 @@ void pm_restore_gfp_mask(void)
 
 void pm_restrict_gfp_mask(void)
 {
-	WARN_ON(!mutex_is_locked(&pm_mutex));
+	WARN_ON(!mutex_is_locked(&system_transition_mutex));
 	WARN_ON(saved_gfp_mask);
 	saved_gfp_mask = gfp_allowed_mask;
 	gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
-- 
cgit v1.2.3


From 5636cf7d6dc86f47fd66757dae088e35014da464 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Wed, 23 May 2018 11:58:36 -0400
Subject: btrfs: remove the logged extents infrastructure

This is no longer used anywhere, remove all of it.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ordered-data.c      | 123 -------------------------------------------
 fs/btrfs/ordered-data.h      |  20 ++-----
 fs/btrfs/tree-log.c          |  16 ------
 include/trace/events/btrfs.h |   1 -
 4 files changed, 3 insertions(+), 157 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2e1a1694a33d..c35d2aed5ff8 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -421,129 +421,6 @@ out:
 	return ret == 0;
 }
 
-/* Needs to either be called under a log transaction or the log_mutex */
-void btrfs_get_logged_extents(struct btrfs_inode *inode,
-			      struct list_head *logged_list,
-			      const loff_t start,
-			      const loff_t end)
-{
-	struct btrfs_ordered_inode_tree *tree;
-	struct btrfs_ordered_extent *ordered;
-	struct rb_node *n;
-	struct rb_node *prev;
-
-	tree = &inode->ordered_tree;
-	spin_lock_irq(&tree->lock);
-	n = __tree_search(&tree->tree, end, &prev);
-	if (!n)
-		n = prev;
-	for (; n; n = rb_prev(n)) {
-		ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
-		if (ordered->file_offset > end)
-			continue;
-		if (entry_end(ordered) <= start)
-			break;
-		if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
-			continue;
-		list_add(&ordered->log_list, logged_list);
-		refcount_inc(&ordered->refs);
-	}
-	spin_unlock_irq(&tree->lock);
-}
-
-void btrfs_put_logged_extents(struct list_head *logged_list)
-{
-	struct btrfs_ordered_extent *ordered;
-
-	while (!list_empty(logged_list)) {
-		ordered = list_first_entry(logged_list,
-					   struct btrfs_ordered_extent,
-					   log_list);
-		list_del_init(&ordered->log_list);
-		btrfs_put_ordered_extent(ordered);
-	}
-}
-
-void btrfs_submit_logged_extents(struct list_head *logged_list,
-				 struct btrfs_root *log)
-{
-	int index = log->log_transid % 2;
-
-	spin_lock_irq(&log->log_extents_lock[index]);
-	list_splice_tail(logged_list, &log->logged_list[index]);
-	spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
-void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *log, u64 transid)
-{
-	struct btrfs_ordered_extent *ordered;
-	int index = transid % 2;
-
-	spin_lock_irq(&log->log_extents_lock[index]);
-	while (!list_empty(&log->logged_list[index])) {
-		struct inode *inode;
-		ordered = list_first_entry(&log->logged_list[index],
-					   struct btrfs_ordered_extent,
-					   log_list);
-		list_del_init(&ordered->log_list);
-		inode = ordered->inode;
-		spin_unlock_irq(&log->log_extents_lock[index]);
-
-		if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
-		    !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
-			u64 start = ordered->file_offset;
-			u64 end = ordered->file_offset + ordered->len - 1;
-
-			WARN_ON(!inode);
-			filemap_fdatawrite_range(inode->i_mapping, start, end);
-		}
-		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
-						   &ordered->flags));
-
-		/*
-		 * In order to keep us from losing our ordered extent
-		 * information when committing the transaction we have to make
-		 * sure that any logged extents are completed when we go to
-		 * commit the transaction.  To do this we simply increase the
-		 * current transactions pending_ordered counter and decrement it
-		 * when the ordered extent completes.
-		 */
-		if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
-			struct btrfs_ordered_inode_tree *tree;
-
-			tree = &BTRFS_I(inode)->ordered_tree;
-			spin_lock_irq(&tree->lock);
-			if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
-				set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
-				atomic_inc(&trans->transaction->pending_ordered);
-			}
-			spin_unlock_irq(&tree->lock);
-		}
-		btrfs_put_ordered_extent(ordered);
-		spin_lock_irq(&log->log_extents_lock[index]);
-	}
-	spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
-void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
-{
-	struct btrfs_ordered_extent *ordered;
-	int index = transid % 2;
-
-	spin_lock_irq(&log->log_extents_lock[index]);
-	while (!list_empty(&log->logged_list[index])) {
-		ordered = list_first_entry(&log->logged_list[index],
-					   struct btrfs_ordered_extent,
-					   log_list);
-		list_del_init(&ordered->log_list);
-		spin_unlock_irq(&log->log_extents_lock[index]);
-		btrfs_put_ordered_extent(ordered);
-		spin_lock_irq(&log->log_extents_lock[index]);
-	}
-	spin_unlock_irq(&log->log_extents_lock[index]);
-}
-
 /*
  * used to drop a reference on an ordered extent.  This will free
  * the extent if the last reference is dropped
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 3be443fb3001..b2d3f6a091f7 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -54,15 +54,11 @@ struct btrfs_ordered_sum {
 #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
 				       * has done its due diligence in updating
 				       * the isize. */
-#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
-				       ordered extent */
-#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
+#define BTRFS_ORDERED_TRUNCATED 8 /* Set when we have to truncate an extent */
 
-#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
-				 * in the logging code. */
-#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
+#define BTRFS_ORDERED_PENDING 9 /* We are waiting for this ordered extent to
 				  * complete in the current transaction. */
-#define BTRFS_ORDERED_REGULAR 12 /* Regular IO for COW */
+#define BTRFS_ORDERED_REGULAR 10 /* Regular IO for COW */
 
 struct btrfs_ordered_extent {
 	/* logical offset in the file */
@@ -193,16 +189,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
 			       const u64 range_start, const u64 range_len);
 u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
 			      const u64 range_start, const u64 range_len);
-void btrfs_get_logged_extents(struct btrfs_inode *inode,
-			      struct list_head *logged_list,
-			      const loff_t start,
-			      const loff_t end);
-void btrfs_put_logged_extents(struct list_head *logged_list);
-void btrfs_submit_logged_extents(struct list_head *logged_list,
-				 struct btrfs_root *log);
-void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
-			       struct btrfs_root *log, u64 transid);
-void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
 int __init ordered_data_init(void);
 void __cold ordered_data_exit(void);
 
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index dce499071c86..daf32dc94dc3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2933,7 +2933,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	/* bail out if we need to do a full commit */
 	if (btrfs_need_log_full_commit(fs_info, trans)) {
 		ret = -EAGAIN;
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&root->log_mutex);
 		goto out;
 	}
@@ -2951,7 +2950,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	if (ret) {
 		blk_finish_plug(&plug);
 		btrfs_abort_transaction(trans, ret);
-		btrfs_free_logged_extents(log, log_transid);
 		btrfs_set_log_full_commit(fs_info, trans);
 		mutex_unlock(&root->log_mutex);
 		goto out;
@@ -3002,7 +3000,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 			goto out;
 		}
 		btrfs_wait_tree_log_extents(log, mark);
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = -EAGAIN;
 		goto out;
@@ -3020,7 +3017,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	if (atomic_read(&log_root_tree->log_commit[index2])) {
 		blk_finish_plug(&plug);
 		ret = btrfs_wait_tree_log_extents(log, mark);
-		btrfs_wait_logged_extents(trans, log, log_transid);
 		wait_log_commit(log_root_tree,
 				root_log_ctx.log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
@@ -3045,7 +3041,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	if (btrfs_need_log_full_commit(fs_info, trans)) {
 		blk_finish_plug(&plug);
 		btrfs_wait_tree_log_extents(log, mark);
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = -EAGAIN;
 		goto out_wake_log_root;
@@ -3058,7 +3053,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	if (ret) {
 		btrfs_set_log_full_commit(fs_info, trans);
 		btrfs_abort_transaction(trans, ret);
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		goto out_wake_log_root;
 	}
@@ -3068,11 +3062,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 						  EXTENT_NEW | EXTENT_DIRTY);
 	if (ret) {
 		btrfs_set_log_full_commit(fs_info, trans);
-		btrfs_free_logged_extents(log, log_transid);
 		mutex_unlock(&log_root_tree->log_mutex);
 		goto out_wake_log_root;
 	}
-	btrfs_wait_logged_extents(trans, log, log_transid);
 
 	btrfs_set_super_log_root(fs_info->super_for_commit,
 				 log_root_tree->node->start);
@@ -3159,14 +3151,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
 				  EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
 	}
 
-	/*
-	 * We may have short-circuited the log tree with the full commit logic
-	 * and left ordered extents on our list, so clear these out to keep us
-	 * from leaking inodes and memory.
-	 */
-	btrfs_free_logged_extents(log, 0);
-	btrfs_free_logged_extents(log, 1);
-
 	free_extent_buffer(log->node);
 	kfree(log);
 }
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 39b94ec965be..7057cc99d267 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -433,7 +433,6 @@ DEFINE_EVENT(
 		{ (1 << BTRFS_ORDERED_DIRECT),	 	"DIRECT" 	}, \
 		{ (1 << BTRFS_ORDERED_IOERR), 		"IOERR" 	}, \
 		{ (1 << BTRFS_ORDERED_UPDATED_ISIZE), 	"UPDATED_ISIZE"	}, \
-		{ (1 << BTRFS_ORDERED_LOGGED_CSUM), 	"LOGGED_CSUM"	}, \
 		{ (1 << BTRFS_ORDERED_TRUNCATED), 	"TRUNCATED"	})
 
 
-- 
cgit v1.2.3


From e41ca5897489b1c18af75ff0cc8f5c80260b3281 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 6 Jun 2018 15:41:49 +0800
Subject: btrfs: Get rid of the confusing btrfs_file_extent_inline_len

We used to call btrfs_file_extent_inline_len() to get the uncompressed
data size of an inlined extent.

However this function is hiding evil, for compressed extent, it has no
choice but to directly read out ram_bytes from btrfs_file_extent_item.
While for uncompressed extent, it uses item size to calculate the real
data size, and ignoring ram_bytes completely.

In fact, for corrupted ram_bytes, due to above behavior kernel
btrfs_print_leaf() can't even print correct ram_bytes to expose the bug.

Since we have the tree-checker to verify all EXTENT_DATA, such mismatch
can be detected pretty easily, thus we can trust ram_bytes without the
evil btrfs_file_extent_inline_len().

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             | 26 --------------------------
 fs/btrfs/file-item.c         |  2 +-
 fs/btrfs/file.c              |  3 +--
 fs/btrfs/inode.c             | 12 ++++++------
 fs/btrfs/print-tree.c        |  4 ++--
 fs/btrfs/send.c              | 17 +++++++----------
 fs/btrfs/tree-log.c          | 12 ++++--------
 include/trace/events/btrfs.h |  2 +-
 8 files changed, 22 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 12cb327cd16e..41ba770b9db9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2428,32 +2428,6 @@ static inline u32 btrfs_file_extent_inline_item_len(
 	return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START;
 }
 
-/* this returns the number of file bytes represented by the inline item.
- * If an item is compressed, this is the uncompressed size
- */
-static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb,
-					int slot,
-					const struct btrfs_file_extent_item *fi)
-{
-	struct btrfs_map_token token;
-
-	btrfs_init_map_token(&token);
-	/*
-	 * return the space used on disk if this item isn't
-	 * compressed or encoded
-	 */
-	if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 &&
-	    btrfs_token_file_extent_encryption(eb, fi, &token) == 0 &&
-	    btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) {
-		return btrfs_file_extent_inline_item_len(eb,
-							 btrfs_item_nr(slot));
-	}
-
-	/* otherwise use the ram bytes field */
-	return btrfs_token_file_extent_ram_bytes(eb, fi, &token);
-}
-
-
 /* btrfs_dev_stats_item */
 static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
 					const struct btrfs_dev_stats_item *ptr,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f9dd6d1836a3..8c3cd7072caf 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -942,7 +942,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 			btrfs_file_extent_num_bytes(leaf, fi);
 	} else if (type == BTRFS_FILE_EXTENT_INLINE) {
 		size_t size;
-		size = btrfs_file_extent_inline_len(leaf, slot, fi);
+		size = btrfs_file_extent_ram_bytes(leaf, fi);
 		extent_end = ALIGN(extent_start + size,
 				   fs_info->sectorsize);
 	}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 975c590c50d8..4cd8af14f915 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -833,8 +833,7 @@ next_slot:
 				btrfs_file_extent_num_bytes(leaf, fi);
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			extent_end = key.offset +
-				btrfs_file_extent_inline_len(leaf,
-						     path->slots[0], fi);
+				btrfs_file_extent_ram_bytes(leaf, fi);
 		} else {
 			/* can't happen */
 			BUG();
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eba61bcb9bb3..1ade43c02b81 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1443,8 +1443,7 @@ next_slot:
 			nocow = 1;
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			extent_end = found_key.offset +
-				btrfs_file_extent_inline_len(leaf,
-						     path->slots[0], fi);
+				btrfs_file_extent_ram_bytes(leaf, fi);
 			extent_end = ALIGN(extent_end,
 					   fs_info->sectorsize);
 		} else {
@@ -4643,8 +4642,8 @@ search_again:
 					BTRFS_I(inode), leaf, fi,
 					found_key.offset);
 			} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-				item_end += btrfs_file_extent_inline_len(leaf,
-							 path->slots[0], fi);
+				item_end += btrfs_file_extent_ram_bytes(leaf,
+									fi);
 
 				trace_btrfs_truncate_show_fi_inline(
 					BTRFS_I(inode), leaf, fi, path->slots[0],
@@ -6943,7 +6942,8 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
 						       extent_start);
 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
 		size_t size;
-		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
+
+		size = btrfs_file_extent_ram_bytes(leaf, item);
 		extent_end = ALIGN(extent_start + size,
 				   fs_info->sectorsize);
 
@@ -6994,7 +6994,7 @@ next:
 		if (new_inline)
 			goto out;
 
-		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
+		size = btrfs_file_extent_ram_bytes(leaf, item);
 		extent_offset = page_offset(page) + pg_offset - extent_start;
 		copy_size = min_t(u64, PAGE_SIZE - pg_offset,
 				  size - extent_offset);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index a4e11cf04671..59efcf2e0de8 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -267,8 +267,8 @@ void btrfs_print_leaf(struct extent_buffer *l)
 					    struct btrfs_file_extent_item);
 			if (btrfs_file_extent_type(l, fi) ==
 			    BTRFS_FILE_EXTENT_INLINE) {
-				pr_info("\t\tinline extent data size %u\n",
-				       btrfs_file_extent_inline_len(l, i, fi));
+				pr_info("\t\tinline extent data size %llu\n",
+				       btrfs_file_extent_ram_bytes(l, fi));
 				break;
 			}
 			pr_info("\t\textent data disk bytenr %llu nr %llu\n",
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c47f62b19226..6ff7a1315e52 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1500,7 +1500,7 @@ static int read_symlink(struct btrfs_root *root,
 	BUG_ON(compression);
 
 	off = btrfs_file_extent_inline_start(ei);
-	len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei);
+	len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
 
 	ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
 
@@ -5160,7 +5160,7 @@ static int clone_range(struct send_ctx *sctx,
 		ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
 		type = btrfs_file_extent_type(leaf, ei);
 		if (type == BTRFS_FILE_EXTENT_INLINE) {
-			ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
+			ext_len = btrfs_file_extent_ram_bytes(leaf, ei);
 			ext_len = PAGE_ALIGN(ext_len);
 		} else {
 			ext_len = btrfs_file_extent_num_bytes(leaf, ei);
@@ -5236,8 +5236,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
 			struct btrfs_file_extent_item);
 	type = btrfs_file_extent_type(path->nodes[0], ei);
 	if (type == BTRFS_FILE_EXTENT_INLINE) {
-		len = btrfs_file_extent_inline_len(path->nodes[0],
-						   path->slots[0], ei);
+		len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
 		/*
 		 * it is possible the inline item won't cover the whole page,
 		 * but there may be items after this page.  Make
@@ -5375,7 +5374,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
 		}
 
 		if (right_type == BTRFS_FILE_EXTENT_INLINE) {
-			right_len = btrfs_file_extent_inline_len(eb, slot, ei);
+			right_len = btrfs_file_extent_ram_bytes(eb, ei);
 			right_len = PAGE_ALIGN(right_len);
 		} else {
 			right_len = btrfs_file_extent_num_bytes(eb, ei);
@@ -5496,8 +5495,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
 			    struct btrfs_file_extent_item);
 	type = btrfs_file_extent_type(path->nodes[0], fi);
 	if (type == BTRFS_FILE_EXTENT_INLINE) {
-		u64 size = btrfs_file_extent_inline_len(path->nodes[0],
-							path->slots[0], fi);
+		u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
 		extent_end = ALIGN(key.offset + size,
 				   sctx->send_root->fs_info->sectorsize);
 	} else {
@@ -5560,7 +5558,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
 		fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
 		if (btrfs_file_extent_type(leaf, fi) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			u64 size = btrfs_file_extent_inline_len(leaf, slot, fi);
+			u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
 
 			extent_end = ALIGN(key.offset + size,
 					   root->fs_info->sectorsize);
@@ -5606,8 +5604,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
 			    struct btrfs_file_extent_item);
 	type = btrfs_file_extent_type(path->nodes[0], fi);
 	if (type == BTRFS_FILE_EXTENT_INLINE) {
-		u64 size = btrfs_file_extent_inline_len(path->nodes[0],
-							path->slots[0], fi);
+		u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
 		extent_end = ALIGN(key->offset + size,
 				   sctx->send_root->fs_info->sectorsize);
 	} else {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index b3b1d424f2d8..6bca8f88ade0 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -597,7 +597,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 		if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
 			nbytes = 0;
 	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
-		size = btrfs_file_extent_inline_len(eb, slot, item);
+		size = btrfs_file_extent_ram_bytes(eb, item);
 		nbytes = btrfs_file_extent_ram_bytes(eb, item);
 		extent_end = ALIGN(start + size,
 				   fs_info->sectorsize);
@@ -3920,9 +3920,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 					struct btrfs_file_extent_item);
 		if (btrfs_file_extent_type(src, extent) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			len = btrfs_file_extent_inline_len(src,
-							   src_path->slots[0],
-							   extent);
+			len = btrfs_file_extent_ram_bytes(src, extent);
 			*last_extent = ALIGN(key.offset + len,
 					     fs_info->sectorsize);
 		} else {
@@ -3987,7 +3985,7 @@ fill_holes:
 		extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
 		if (btrfs_file_extent_type(src, extent) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			len = btrfs_file_extent_inline_len(src, i, extent);
+			len = btrfs_file_extent_ram_bytes(src, extent);
 			extent_end = ALIGN(key.offset + len,
 					   fs_info->sectorsize);
 		} else {
@@ -4572,9 +4570,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
 
 		if (btrfs_file_extent_type(leaf, extent) ==
 		    BTRFS_FILE_EXTENT_INLINE) {
-			len = btrfs_file_extent_inline_len(leaf,
-							   path->slots[0],
-							   extent);
+			len = btrfs_file_extent_ram_bytes(leaf, extent);
 			ASSERT(len == i_size ||
 			       (len == fs_info->sectorsize &&
 				btrfs_file_extent_compression(leaf, extent) !=
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 7057cc99d267..b401c4e36394 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -374,7 +374,7 @@ DECLARE_EVENT_CLASS(
 		__entry->extent_type	= btrfs_file_extent_type(l, fi);
 		__entry->compression	= btrfs_file_extent_compression(l, fi);
 		__entry->extent_start	= start;
-		__entry->extent_end	= (start + btrfs_file_extent_inline_len(l, slot, fi));
+		__entry->extent_end	= (start + btrfs_file_extent_ram_bytes(l, fi));
 	),
 
 	TP_printk_btrfs(
-- 
cgit v1.2.3


From 86dafed50e2b9b18198cc663775ece819ff113ba Mon Sep 17 00:00:00 2001
From: KarimAllah Ahmed <karahmed@amazon.de>
Date: Tue, 10 Jul 2018 11:27:19 +0200
Subject: KVM: Switch 'requests' to be 64-bit (explicitly)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switch 'requests' to be explicitly 64-bit and update BUILD_BUG_ON check to
use the size of "requests" instead of the hard-coded '32'.

That gives us a bit more room again for arch-specific requests as we
already ran out of space for x86 due to the hard-coded check.

The only exception here is ARM32 as it is still 32-bits.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5f138b40e433..2840ac74a3d7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -130,7 +130,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQUEST_ARCH_BASE     8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
-	BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \
+	BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
 	(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
 })
 #define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
@@ -224,7 +224,7 @@ struct kvm_vcpu {
 	int vcpu_id;
 	int srcu_idx;
 	int mode;
-	unsigned long requests;
+	u64 requests;
 	unsigned long guest_debug;
 
 	int pre_pcpu;
@@ -1131,7 +1131,7 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 	 * caller.  Paired with the smp_mb__after_atomic in kvm_check_request.
 	 */
 	smp_wmb();
-	set_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
+	set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
 }
 
 static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
@@ -1141,12 +1141,12 @@ static inline bool kvm_request_pending(struct kvm_vcpu *vcpu)
 
 static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu)
 {
-	return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
+	return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
 }
 
 static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu)
 {
-	clear_bit(req & KVM_REQUEST_MASK, &vcpu->requests);
+	clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests);
 }
 
 static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 8fcc4b5923af5de58b80b53a069453b135693304 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Tue, 10 Jul 2018 11:27:20 +0200
Subject: kvm: nVMX: Introduce KVM_CAP_NESTED_STATE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For nested virtualization L0 KVM is managing a bit of state for L2 guests,
this state can not be captured through the currently available IOCTLs. In
fact the state captured through all of these IOCTLs is usually a mix of L1
and L2 state. It is also dependent on whether the L2 guest was running at
the moment when the process was interrupted to save its state.

With this capability, there are two new vcpu ioctls: KVM_GET_NESTED_STATE
and KVM_SET_NESTED_STATE. These can be used for saving and restoring a VM
that is in VMX operation.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: x86@kernel.org
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Jim Mattson <jmattson@google.com>
[karahmed@ - rename structs and functions and make them ready for AMD and
             address previous comments.
           - handle nested.smm state.
           - rebase & a bit of refactoring.
           - Merge 7/8 and 8/8 into one patch. ]
Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  56 ++++++++++++
 arch/x86/include/asm/kvm_host.h   |   6 ++
 arch/x86/include/uapi/asm/kvm.h   |  37 ++++++++
 arch/x86/kvm/vmx.c                | 175 +++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c                |  54 ++++++++++++
 include/uapi/linux/kvm.h          |   4 +
 6 files changed, 330 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index cb8db4f9d097..7b83b176c662 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3561,6 +3561,62 @@ Returns: 0 on success,
 	-ENOENT on deassign if the conn_id isn't registered
 	-EEXIST on assign if the conn_id is already registered
 
+4.114 KVM_GET_NESTED_STATE
+
+Capability: KVM_CAP_NESTED_STATE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_nested_state (in/out)
+Returns: 0 on success, -1 on error
+Errors:
+  E2BIG:     the total state size (including the fixed-size part of struct
+             kvm_nested_state) exceeds the value of 'size' specified by
+             the user; the size required will be written into size.
+
+struct kvm_nested_state {
+	__u16 flags;
+	__u16 format;
+	__u32 size;
+	union {
+		struct kvm_vmx_nested_state vmx;
+		struct kvm_svm_nested_state svm;
+		__u8 pad[120];
+	};
+	__u8 data[0];
+};
+
+#define KVM_STATE_NESTED_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+
+#define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_SMM_VMXON	0x00000002
+
+struct kvm_vmx_nested_state {
+	__u64 vmxon_pa;
+	__u64 vmcs_pa;
+
+	struct {
+		__u16 flags;
+	} smm;
+};
+
+This ioctl copies the vcpu's nested virtualization state from the kernel to
+userspace.
+
+The maximum size of the state, including the fixed-size part of struct
+kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
+the KVM_CHECK_EXTENSION ioctl().
+
+4.115 KVM_SET_NESTED_STATE
+
+Capability: KVM_CAP_NESTED_STATE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_nested_state (in)
+Returns: 0 on success, -1 on error
+
+This copies the vcpu's kvm_nested_state struct from userspace to the kernel.  For
+the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
 
 5. The kvm_run structure
 ------------------------
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index da957725992d..bd287b348751 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1086,6 +1086,12 @@ struct kvm_x86_ops {
 
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
 
+	int (*get_nested_state)(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				unsigned user_data_size);
+	int (*set_nested_state)(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				struct kvm_nested_state *kvm_state);
 	void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
 
 	int (*smi_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index c535c2fdea13..86299efa804a 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -378,4 +378,41 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
 #define KVM_X86_QUIRK_CD_NW_CLEARED	(1 << 1)
 
+#define KVM_STATE_NESTED_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+
+#define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_SMM_VMXON	0x00000002
+
+struct kvm_vmx_nested_state {
+	__u64 vmxon_pa;
+	__u64 vmcs_pa;
+
+	struct {
+		__u16 flags;
+	} smm;
+};
+
+/* for KVM_CAP_NESTED_STATE */
+struct kvm_nested_state {
+	/* KVM_STATE_* flags */
+	__u16 flags;
+
+	/* 0 for VMX, 1 for SVM.  */
+	__u16 format;
+
+	/* 128 for SVM, 128 + VMCS size for VMX.  */
+	__u32 size;
+
+	union {
+		/* VMXON, VMCS */
+		struct kvm_vmx_nested_state vmx;
+
+		/* Pad the header to 128 bytes.  */
+		__u8 pad[120];
+	};
+
+	__u8 data[0];
+};
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fee44e4c5c79..4be6486173b7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7589,6 +7589,11 @@ static __init int hardware_setup(void)
 	else
 		kvm_disable_tdp();
 
+	if (!nested) {
+		kvm_x86_ops->get_nested_state = NULL;
+		kvm_x86_ops->set_nested_state = NULL;
+	}
+
 	/*
 	 * Only enable PML when hardware supports PML feature, and both EPT
 	 * and EPT A/D bit features are enabled -- PML depends on them to work.
@@ -11775,8 +11780,8 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 }
 
 /*
- * If exit_qual is NULL, this is being called from RSM.
- * Otherwise it's called from vmlaunch/vmresume.
+ * If exit_qual is NULL, this is being called from state restore (either RSM
+ * or KVM_SET_NESTED_STATE).  Otherwise it's called from vmlaunch/vmresume.
  */
 static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 {
@@ -13016,6 +13021,170 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				u32 user_data_size)
+{
+	struct vcpu_vmx *vmx;
+	struct vmcs12 *vmcs12;
+	struct kvm_nested_state kvm_state = {
+		.flags = 0,
+		.format = 0,
+		.size = sizeof(kvm_state),
+		.vmx.vmxon_pa = -1ull,
+		.vmx.vmcs_pa = -1ull,
+	};
+
+	if (!vcpu)
+		return kvm_state.size + 2 * VMCS12_SIZE;
+
+	vmx = to_vmx(vcpu);
+	vmcs12 = get_vmcs12(vcpu);
+	if (nested_vmx_allowed(vcpu) &&
+	    (vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
+		kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
+		kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr;
+
+		if (vmx->nested.current_vmptr != -1ull)
+			kvm_state.size += VMCS12_SIZE;
+
+		if (vmx->nested.smm.vmxon)
+			kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
+
+		if (vmx->nested.smm.guest_mode)
+			kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
+
+		if (is_guest_mode(vcpu)) {
+			kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
+
+			if (vmx->nested.nested_run_pending)
+				kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
+		}
+	}
+
+	if (user_data_size < kvm_state.size)
+		goto out;
+
+	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
+		return -EFAULT;
+
+	if (vmx->nested.current_vmptr == -1ull)
+		goto out;
+
+	/*
+	 * When running L2, the authoritative vmcs12 state is in the
+	 * vmcs02. When running L1, the authoritative vmcs12 state is
+	 * in the shadow vmcs linked to vmcs01, unless
+	 * sync_shadow_vmcs is set, in which case, the authoritative
+	 * vmcs12 state is in the vmcs12 already.
+	 */
+	if (is_guest_mode(vcpu))
+		sync_vmcs12(vcpu, vmcs12);
+	else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs)
+		copy_shadow_to_vmcs12(vmx);
+
+	if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12)))
+		return -EFAULT;
+
+out:
+	return kvm_state.size;
+}
+
+static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
+				struct kvm_nested_state __user *user_kvm_nested_state,
+				struct kvm_nested_state *kvm_state)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct vmcs12 *vmcs12;
+	u32 exit_qual;
+	int ret;
+
+	if (kvm_state->format != 0)
+		return -EINVAL;
+
+	if (!nested_vmx_allowed(vcpu))
+		return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL;
+
+	if (kvm_state->vmx.vmxon_pa == -1ull) {
+		if (kvm_state->vmx.smm.flags)
+			return -EINVAL;
+
+		if (kvm_state->vmx.vmcs_pa != -1ull)
+			return -EINVAL;
+
+		vmx_leave_nested(vcpu);
+		return 0;
+	}
+
+	if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa))
+		return -EINVAL;
+
+	if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+		return -EINVAL;
+
+	if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa ||
+	    !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa))
+		return -EINVAL;
+
+	if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+	    (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+		return -EINVAL;
+
+	if (kvm_state->vmx.smm.flags &
+	    ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
+		return -EINVAL;
+
+	if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
+	    !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
+		return -EINVAL;
+
+	vmx_leave_nested(vcpu);
+	if (kvm_state->vmx.vmxon_pa == -1ull)
+		return 0;
+
+	vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa;
+	ret = enter_vmx_operation(vcpu);
+	if (ret)
+		return ret;
+
+	set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa);
+
+	if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
+		vmx->nested.smm.vmxon = true;
+		vmx->nested.vmxon = false;
+
+		if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
+			vmx->nested.smm.guest_mode = true;
+	}
+
+	vmcs12 = get_vmcs12(vcpu);
+	if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12)))
+		return -EFAULT;
+
+	if (vmcs12->revision_id != VMCS12_REVISION)
+		return -EINVAL;
+
+	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
+		return 0;
+
+	vmx->nested.nested_run_pending =
+		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
+	if (check_vmentry_prereqs(vcpu, vmcs12) ||
+	    check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
+		return -EINVAL;
+
+	if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
+		vmx->nested.nested_run_pending = 1;
+
+	vmx->nested.dirty_vmcs12 = true;
+	ret = enter_vmx_non_root_mode(vcpu, NULL);
+	if (ret)
+		return -EINVAL;
+
+	return 0;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -13150,6 +13319,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
 	.setup_mce = vmx_setup_mce,
 
+	.get_nested_state = vmx_get_nested_state,
+	.set_nested_state = vmx_set_nested_state,
 	.get_vmcs12_pages = nested_get_vmcs12_pages,
 
 	.smi_allowed = vmx_smi_allowed,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fbd59ad047b0..1b14c4a654c3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2947,6 +2947,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_X2APIC_API:
 		r = KVM_X2APIC_API_VALID_FLAGS;
 		break;
+	case KVM_CAP_NESTED_STATE:
+		r = kvm_x86_ops->get_nested_state ?
+			kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
+		break;
 	default:
 		break;
 	}
@@ -3963,6 +3967,56 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+	case KVM_GET_NESTED_STATE: {
+		struct kvm_nested_state __user *user_kvm_nested_state = argp;
+		u32 user_data_size;
+
+		r = -EINVAL;
+		if (!kvm_x86_ops->get_nested_state)
+			break;
+
+		BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
+		if (get_user(user_data_size, &user_kvm_nested_state->size))
+			return -EFAULT;
+
+		r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
+						  user_data_size);
+		if (r < 0)
+			return r;
+
+		if (r > user_data_size) {
+			if (put_user(r, &user_kvm_nested_state->size))
+				return -EFAULT;
+			return -E2BIG;
+		}
+		r = 0;
+		break;
+	}
+	case KVM_SET_NESTED_STATE: {
+		struct kvm_nested_state __user *user_kvm_nested_state = argp;
+		struct kvm_nested_state kvm_state;
+
+		r = -EINVAL;
+		if (!kvm_x86_ops->set_nested_state)
+			break;
+
+		if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
+			return -EFAULT;
+
+		if (kvm_state.size < sizeof(kvm_state))
+			return -EINVAL;
+
+		if (kvm_state.flags &
+		    ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
+			return -EINVAL;
+
+		/* nested_run_pending implies guest_mode.  */
+		if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
+			return -EINVAL;
+
+		r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index b955b986b341..3cf632839337 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -950,6 +950,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_EVENTFD 154
 #define KVM_CAP_HYPERV_TLBFLUSH 155
 #define KVM_CAP_S390_HPAGE_1M 156
+#define KVM_CAP_NESTED_STATE 157
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1392,6 +1393,9 @@ struct kvm_enc_region {
 /* Available with KVM_CAP_HYPERV_EVENTFD */
 #define KVM_HYPERV_EVENTFD        _IOW(KVMIO,  0xbd, struct kvm_hyperv_eventfd)
 
+/* Available with KVM_CAP_NESTED_STATE */
+#define KVM_GET_NESTED_STATE         _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
+#define KVM_SET_NESTED_STATE         _IOW(KVMIO,  0xbf, struct kvm_nested_state)
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
-- 
cgit v1.2.3


From b08660e59dbdb600c55953787ed2265a0b510f77 Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Date: Thu, 19 Jul 2018 08:40:17 +0000
Subject: KVM: x86: Add tlb remote flush callback in kvm_x86_ops.

This patch is to provide a way for platforms to register hv tlb remote
flush callback and this helps to optimize operation of tlb flush
among vcpus for nested virtualization case.

Signed-off-by: Lan Tianyu <Tianyu.Lan@microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 11 +++++++++++
 include/linux/kvm_host.h        |  7 +++++++
 virt/kvm/kvm_main.c             |  3 ++-
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 75ab578c3f6a..150937e64f63 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -985,6 +985,7 @@ struct kvm_x86_ops {
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+	int  (*tlb_remote_flush)(struct kvm *kvm);
 
 	/*
 	 * Flush any TLB entries associated with the given GVA.
@@ -1145,6 +1146,16 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 	return kvm_x86_ops->vm_free(kvm);
 }
 
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+{
+	if (kvm_x86_ops->tlb_remote_flush &&
+	    !kvm_x86_ops->tlb_remote_flush(kvm))
+		return 0;
+	else
+		return -ENOTSUPP;
+}
+
 int kvm_mmu_module_init(void);
 void kvm_mmu_module_exit(void);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2840ac74a3d7..7c7362dd2faa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -834,6 +834,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm)
 }
 #endif
 
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
+static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+{
+	return -ENOTSUPP;
+}
+#endif
+
 #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
 void kvm_arch_register_noncoherent_dma(struct kvm *kvm);
 void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8f461e0ed382..74544d20bbf8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -273,7 +273,8 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	 * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
 	 * barrier here.
 	 */
-	if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+	if (!kvm_arch_flush_remote_tlb(kvm)
+	    || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
-- 
cgit v1.2.3


From 4180bf1b655a791a0a6ef93a2ffffc762722c782 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Mon, 23 Jul 2018 14:39:54 +0800
Subject: KVM: X86: Implement "send IPI" hypercall
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using hypercall to send IPIs by one vmexit instead of one by one for
xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster
mode. Intel guest can enter x2apic cluster mode when interrupt remmaping
is enabled in qemu, however, latest AMD EPYC still just supports xapic
mode which can get great improvement by Exit-less IPIs. This patchset
lets a guest send multicast IPIs, with at most 128 destinations per
hypercall in 64-bit mode and 64 vCPUs per hypercall in 32-bit mode.

Hardware: Xeon Skylake 2.5GHz, 2 sockets, 40 cores, 80 threads, the VM
is 80 vCPUs, IPI microbenchmark(https://lkml.org/lkml/2017/12/19/141):

x2apic cluster mode, vanilla

 Dry-run:                         0,            2392199 ns
 Self-IPI:                  6907514,           15027589 ns
 Normal IPI:              223910476,          251301666 ns
 Broadcast IPI:                   0,         9282161150 ns
 Broadcast lock:                  0,         8812934104 ns

x2apic cluster mode, pv-ipi

 Dry-run:                         0,            2449341 ns
 Self-IPI:                  6720360,           15028732 ns
 Normal IPI:              228643307,          255708477 ns
 Broadcast IPI:                   0,         7572293590 ns  => 22% performance boost
 Broadcast lock:                  0,         8316124651 ns

x2apic physical mode, vanilla

 Dry-run:                         0,            3135933 ns
 Self-IPI:                  8572670,           17901757 ns
 Normal IPI:              226444334,          255421709 ns
 Broadcast IPI:                   0,        19845070887 ns
 Broadcast lock:                  0,        19827383656 ns

x2apic physical mode, pv-ipi

 Dry-run:                         0,            2446381 ns
 Self-IPI:                  6788217,           15021056 ns
 Normal IPI:              219454441,          249583458 ns
 Broadcast IPI:                   0,         7806540019 ns  => 154% performance boost
 Broadcast lock:                  0,         9143618799 ns

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/cpuid.txt      |  4 ++++
 Documentation/virtual/kvm/hypercalls.txt | 20 ++++++++++++++++
 arch/x86/include/asm/kvm_host.h          |  4 ++++
 arch/x86/kvm/cpuid.c                     |  3 ++-
 arch/x86/kvm/lapic.c                     | 40 ++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c                       |  3 +++
 include/uapi/linux/kvm_para.h            |  1 +
 7 files changed, 74 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index ab022dcd0911..97ca1940a0dc 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
                                    ||       || can be enabled by setting bit 2
                                    ||       || when writing to msr 0x4b564d02
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
+                                   ||       || before using paravirtualized
+                                   ||       || send IPIs.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index a890529c63ed..da24c138c8d1 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
 
 Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
 or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+6. KVM_HC_SEND_IPI
+------------------------
+Architecture: x86
+Status: active
+Purpose: Send IPIs to multiple vCPUs.
+
+a0: lower part of the bitmap of destination APIC IDs
+a1: higher part of the bitmap of destination APIC IDs
+a2: the lowest APIC ID in bitmap
+a3: APIC ICR
+
+The hypercall lets a guest send multicast IPIs, with at most 128
+128 destinations per hypercall in 64-bit mode and 64 vCPUs per
+hypercall in 32-bit mode.  The destinations are represented by a
+bitmap contained in the first two arguments (a0 and a1). Bit 0 of
+a0 corresponds to the APIC ID in the third argument (a2), bit 1
+corresponds to the APIC ID a2+1, and so on.
+
+Returns the number of CPUs to which the IPIs were delivered successfully.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 150937e64f63..c18958ef17d2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1457,6 +1457,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+    		    unsigned long ipi_bitmap_high, int min,
+		    unsigned long icr, int op_64_bit);
+
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3d47fd..7bcfa61375c0 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
 			     (1 << KVM_FEATURE_PV_UNHALT) |
 			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
-			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
+			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
+			     (1 << KVM_FEATURE_PV_SEND_IPI);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b5cd8465d44f..f0d693122c24 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 			irq->level, irq->trig_mode, dest_map);
 }
 
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+    		    unsigned long ipi_bitmap_high, int min,
+		    unsigned long icr, int op_64_bit)
+{
+	int i;
+	struct kvm_apic_map *map;
+	struct kvm_vcpu *vcpu;
+	struct kvm_lapic_irq irq = {0};
+	int cluster_size = op_64_bit ? 64 : 32;
+	int count = 0;
+
+	irq.vector = icr & APIC_VECTOR_MASK;
+	irq.delivery_mode = icr & APIC_MODE_MASK;
+	irq.level = (icr & APIC_INT_ASSERT) != 0;
+	irq.trig_mode = icr & APIC_INT_LEVELTRIG;
+
+	if (icr & APIC_DEST_MASK)
+		return -KVM_EINVAL;
+	if (icr & APIC_SHORT_MASK)
+		return -KVM_EINVAL;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	/* Bits above cluster_size are masked in the caller.  */
+	for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
+		vcpu = map->phys_map[min + i]->vcpu;
+		count += kvm_apic_set_irq(vcpu, &irq, NULL);
+	}
+
+	min += cluster_size;
+	for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
+		vcpu = map->phys_map[min + i]->vcpu;
+		count += kvm_apic_set_irq(vcpu, &irq, NULL);
+	}
+
+	rcu_read_unlock();
+	return count;
+}
+
 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 {
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b974802cadb..3c83711c0ebe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6802,6 +6802,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_CLOCK_PAIRING:
 		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
 		break;
+	case KVM_HC_SEND_IPI:
+		ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
+		break;
 #endif
 	default:
 		ret = -KVM_ENOSYS;
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index dcf629dd2889..f3893ef82b65 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -13,6 +13,7 @@
 /* Return values for hypercalls */
 #define KVM_ENOSYS		1000
 #define KVM_EFAULT		EFAULT
+#define KVM_EINVAL		EINVAL
 #define KVM_E2BIG		E2BIG
 #define KVM_EPERM		EPERM
 #define KVM_EOPNOTSUPP		95
-- 
cgit v1.2.3


From aaffcfd1e82d3378538408d0310b7424b98d8f81 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Mon, 23 Jul 2018 14:39:52 +0800
Subject: KVM: X86: Implement PV IPIs in linux guest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement paravirtual apic hooks to enable PV IPIs for KVM if the "send IPI"
hypercall is available.  The hypercall lets a guest send IPIs, with
at most 128 destinations per hypercall in 64-bit mode and 64 vCPUs per
hypercall in 32-bit mode.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/kvm_para.h |  1 +
 arch/x86/kernel/kvm.c                | 96 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm_para.h        |  1 +
 3 files changed, 98 insertions(+)

(limited to 'include')

diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 0ede697c3961..19980ec1a316 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -28,6 +28,7 @@
 #define KVM_FEATURE_PV_UNHALT		7
 #define KVM_FEATURE_PV_TLB_FLUSH	9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT	10
+#define KVM_FEATURE_PV_SEND_IPI	11
 
 #define KVM_HINTS_REALTIME      0
 
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 39d79720380f..62cbd089f709 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -454,6 +454,98 @@ static void __init sev_map_percpu_data(void)
 }
 
 #ifdef CONFIG_SMP
+#define KVM_IPI_CLUSTER_SIZE	(2 * BITS_PER_LONG)
+
+static void __send_ipi_mask(const struct cpumask *mask, int vector)
+{
+	unsigned long flags;
+	int cpu, apic_id, icr;
+	int min = 0, max = 0;
+#ifdef CONFIG_X86_64
+	__uint128_t ipi_bitmap = 0;
+#else
+	u64 ipi_bitmap = 0;
+#endif
+
+	if (cpumask_empty(mask))
+		return;
+
+	local_irq_save(flags);
+
+	switch (vector) {
+	default:
+		icr = APIC_DM_FIXED | vector;
+		break;
+	case NMI_VECTOR:
+		icr = APIC_DM_NMI;
+		break;
+	}
+
+	for_each_cpu(cpu, mask) {
+		apic_id = per_cpu(x86_cpu_to_apicid, cpu);
+		if (!ipi_bitmap) {
+			min = max = apic_id;
+		} else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
+			ipi_bitmap <<= min - apic_id;
+			min = apic_id;
+		} else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
+			max = apic_id < max ? max : apic_id;
+		} else {
+			kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+				(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+			min = max = apic_id;
+			ipi_bitmap = 0;
+		}
+		__set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
+	}
+
+	if (ipi_bitmap) {
+		kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
+			(unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
+	}
+
+	local_irq_restore(flags);
+}
+
+static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+	__send_ipi_mask(mask, vector);
+}
+
+static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	struct cpumask new_mask;
+	const struct cpumask *local_mask;
+
+	cpumask_copy(&new_mask, mask);
+	cpumask_clear_cpu(this_cpu, &new_mask);
+	local_mask = &new_mask;
+	__send_ipi_mask(local_mask, vector);
+}
+
+static void kvm_send_ipi_allbutself(int vector)
+{
+	kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
+}
+
+static void kvm_send_ipi_all(int vector)
+{
+	__send_ipi_mask(cpu_online_mask, vector);
+}
+
+/*
+ * Set the IPI entry points
+ */
+static void kvm_setup_pv_ipi(void)
+{
+	apic->send_IPI_mask = kvm_send_ipi_mask;
+	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
+	apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
+	apic->send_IPI_all = kvm_send_ipi_all;
+	pr_info("KVM setup pv IPIs\n");
+}
+
 static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
 {
 	native_smp_prepare_cpus(max_cpus);
@@ -626,6 +718,10 @@ static uint32_t __init kvm_detect(void)
 
 static void __init kvm_apic_init(void)
 {
+#if defined(CONFIG_SMP)
+	if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI))
+		kvm_setup_pv_ipi();
+#endif
 }
 
 static void __init kvm_init_platform(void)
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index f3893ef82b65..6c0ce49931e5 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -27,6 +27,7 @@
 #define KVM_HC_MIPS_EXIT_VM		7
 #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
 #define KVM_HC_CLOCK_PAIRING		9
+#define KVM_HC_SEND_IPI		10
 
 /*
  * hypercalls use architecture specific
-- 
cgit v1.2.3


From c883da313ebf459efd33d262ca963e3a5f0ac024 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 30 Jul 2018 07:54:56 -0400
Subject: locks: add tracepoint in flock codepath

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/locks.c                      | 1 +
 include/trace/events/filelock.h | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/locks.c b/fs/locks.c
index e533623e2e99..6138a9bcd924 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -990,6 +990,7 @@ out:
 	if (new_fl)
 		locks_free_lock(new_fl);
 	locks_dispose_list(&dispose);
+	trace_flock_lock_inode(inode, request, error);
 	return error;
 }
 
diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index d1faf3597b9d..68b17c116907 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -112,8 +112,11 @@ DEFINE_EVENT(filelock_lock, locks_remove_posix,
 		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
 		TP_ARGS(inode, fl, ret));
 
-DECLARE_EVENT_CLASS(filelock_lease,
+DEFINE_EVENT(filelock_lock, flock_lock_inode,
+		TP_PROTO(struct inode *inode, struct file_lock *fl, int ret),
+		TP_ARGS(inode, fl, ret));
 
+DECLARE_EVENT_CLASS(filelock_lease,
 	TP_PROTO(struct inode *inode, struct file_lock *fl),
 
 	TP_ARGS(inode, fl),
-- 
cgit v1.2.3


From eac7e072d7e99fad1b6e817c608b03c48205241e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 6 Aug 2018 10:22:35 -0700
Subject: Revert "ata: ahci_platform: allow disabling of hotplug to save power"

This reverts commit aece27a2f01be4bb7683790f69cd1bed3a0929a2.

Causes boot failure on some devices.

 http://lore.kernel.org/r/CA+G9fYuKW_jCFZPqG4tz=QY9ROfHO38KiCp9XTA+KaDOFVtcqQ@mail.gmail.com

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_platform.c    | 11 ++----
 drivers/ata/libahci_platform.c | 82 +++++++++---------------------------------
 include/linux/ahci_platform.h  |  2 --
 3 files changed, 19 insertions(+), 76 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 757729376eda..99f9a895a459 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -68,13 +68,8 @@ disable_resources:
 	return rc;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static const struct dev_pm_ops ahci_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(ahci_platform_suspend, ahci_platform_resume)
-	SET_RUNTIME_PM_OPS(ahci_platform_runtime_suspend,
-			ahci_platform_runtime_resume, NULL)
-};
-#endif
+static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_platform_suspend,
+			 ahci_platform_resume);
 
 static const struct of_device_id ahci_of_match[] = {
 	{ .compatible = "generic-ahci", },
@@ -103,9 +98,7 @@ static struct platform_driver ahci_driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_of_match,
 		.acpi_match_table = ahci_acpi_match,
-#ifdef CONFIG_PM_SLEEP
 		.pm = &ahci_pm_ops,
-#endif
 	},
 };
 module_platform_driver(ahci_driver);
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 2ceebaf2ed56..8fbb532b62dd 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -257,7 +257,7 @@ static void ahci_platform_put_resources(struct device *dev, void *res)
 	int c;
 
 	if (hpriv->got_runtime_pm) {
-		pm_runtime_allow(dev);
+		pm_runtime_put_sync(dev);
 		pm_runtime_disable(dev);
 	}
 
@@ -477,10 +477,8 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev)
 		if (rc == -EPROBE_DEFER)
 			goto err_out;
 	}
-
-	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
-	pm_runtime_forbid(dev);
+	pm_runtime_get_sync(dev);
 	hpriv->got_runtime_pm = true;
 
 	devres_remove_group(dev, NULL);
@@ -709,21 +707,6 @@ int ahci_platform_resume_host(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(ahci_platform_resume_host);
 
-static int _ahci_platform_suspend(struct device *dev)
-{
-	struct ata_host *host = dev_get_drvdata(dev);
-	struct ahci_host_priv *hpriv = host->private_data;
-	int rc;
-
-	rc = ahci_platform_suspend_host(dev);
-	if (rc)
-		return rc;
-
-	ahci_platform_disable_resources(hpriv);
-
-	return 0;
-}
-
 /**
  * ahci_platform_suspend - Suspend an ahci-platform device
  * @dev: the platform device to suspend
@@ -735,45 +718,20 @@ static int _ahci_platform_suspend(struct device *dev)
  * 0 on success otherwise a negative error code
  */
 int ahci_platform_suspend(struct device *dev)
-{
-	return _ahci_platform_suspend(dev);
-}
-EXPORT_SYMBOL_GPL(ahci_platform_suspend);
-
-/**
- * ahci_platform_runtime_suspend - Runtime suspend an ahci-platform device
- * @dev: the platform device to suspend
- *
- * This function suspends the host associated with the device, followed by
- * disabling all the resources of the device.
- *
- * RETURNS:
- * 0 on success otherwise a negative error code
- */
-int ahci_platform_runtime_suspend(struct device *dev)
-{
-	return _ahci_platform_suspend(dev);
-}
-EXPORT_SYMBOL_GPL(ahci_platform_runtime_suspend);
-
-static int _ahci_platform_resume(struct device *dev)
 {
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
 
-	rc = ahci_platform_enable_resources(hpriv);
+	rc = ahci_platform_suspend_host(dev);
 	if (rc)
 		return rc;
 
-	rc = ahci_platform_resume_host(dev);
-	if (rc) {
-		ahci_platform_disable_resources(hpriv);
-		return rc;
-	}
+	ahci_platform_disable_resources(hpriv);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ahci_platform_suspend);
 
 /**
  * ahci_platform_resume - Resume an ahci-platform device
@@ -787,37 +745,31 @@ static int _ahci_platform_resume(struct device *dev)
  */
 int ahci_platform_resume(struct device *dev)
 {
+	struct ata_host *host = dev_get_drvdata(dev);
+	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
 
-	rc = _ahci_platform_resume(dev);
+	rc = ahci_platform_enable_resources(hpriv);
 	if (rc)
 		return rc;
 
+	rc = ahci_platform_resume_host(dev);
+	if (rc)
+		goto disable_resources;
+
 	/* We resumed so update PM runtime state */
 	pm_runtime_disable(dev);
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
 	return 0;
-}
-EXPORT_SYMBOL_GPL(ahci_platform_resume);
 
-/**
- * ahci_platform_runtime_resume - Runtime resume an ahci-platform device
- * @dev: the platform device to resume
- *
- * This function enables all the resources of the device followed by
- * resuming the host associated with the device.
- *
- * RETURNS:
- * 0 on success otherwise a negative error code
- */
-int ahci_platform_runtime_resume(struct device *dev)
-{
-	return _ahci_platform_resume(dev);
-}
-EXPORT_SYMBOL_GPL(ahci_platform_runtime_resume);
+disable_resources:
+	ahci_platform_disable_resources(hpriv);
 
+	return rc;
+}
+EXPORT_SYMBOL_GPL(ahci_platform_resume);
 #endif
 
 MODULE_DESCRIPTION("AHCI SATA platform library");
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 6396e6982103..1b0a17b22cd3 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -42,7 +42,5 @@ int ahci_platform_suspend_host(struct device *dev);
 int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
-int ahci_platform_runtime_suspend(struct device *dev);
-int ahci_platform_runtime_resume(struct device *dev);
 
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From 429711aec282c4b5fe5bbd7b2f0bbbff4110ffb2 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 6 Aug 2018 11:17:47 +0800
Subject: vhost: switch to use new message format

We use to have message like:

struct vhost_msg {
	int type;
	union {
		struct vhost_iotlb_msg iotlb;
		__u8 padding[64];
	};
};

Unfortunately, there will be a hole of 32bit in 64bit machine because
of the alignment. This leads a different formats between 32bit API and
64bit API. What's more it will break 32bit program running on 64bit
machine.

So fixing this by introducing a new message type with an explicit
32bit reserved field after type like:

struct vhost_msg_v2 {
	__u32 type;
	__u32 reserved;
	union {
		struct vhost_iotlb_msg iotlb;
		__u8 padding[64];
	};
};

We will have a consistent ABI after switching to use this. To enable
this capability, introduce a new ioctl (VHOST_SET_BAKCEND_FEATURE) for
userspace to enable this feature (VHOST_BACKEND_F_IOTLB_V2).

Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/net.c        | 30 ++++++++++++++++++++
 drivers/vhost/vhost.c      | 71 ++++++++++++++++++++++++++++++++++------------
 drivers/vhost/vhost.h      | 11 ++++++-
 include/uapi/linux/vhost.h | 18 ++++++++++++
 4 files changed, 111 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 367d8023b54d..4e656f89cb22 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -77,6 +77,10 @@ enum {
 			 (1ULL << VIRTIO_F_IOMMU_PLATFORM)
 };
 
+enum {
+	VHOST_NET_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2)
+};
+
 enum {
 	VHOST_NET_VQ_RX = 0,
 	VHOST_NET_VQ_TX = 1,
@@ -1399,6 +1403,21 @@ done:
 	return err;
 }
 
+static int vhost_net_set_backend_features(struct vhost_net *n, u64 features)
+{
+	int i;
+
+	mutex_lock(&n->dev.mutex);
+	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+		mutex_lock(&n->vqs[i].vq.mutex);
+		n->vqs[i].vq.acked_backend_features = features;
+		mutex_unlock(&n->vqs[i].vq.mutex);
+	}
+	mutex_unlock(&n->dev.mutex);
+
+	return 0;
+}
+
 static int vhost_net_set_features(struct vhost_net *n, u64 features)
 {
 	size_t vhost_hlen, sock_hlen, hdr_len;
@@ -1489,6 +1508,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
 		if (features & ~VHOST_NET_FEATURES)
 			return -EOPNOTSUPP;
 		return vhost_net_set_features(n, features);
+	case VHOST_GET_BACKEND_FEATURES:
+		features = VHOST_NET_BACKEND_FEATURES;
+		if (copy_to_user(featurep, &features, sizeof(features)))
+			return -EFAULT;
+		return 0;
+	case VHOST_SET_BACKEND_FEATURES:
+		if (copy_from_user(&features, featurep, sizeof(features)))
+			return -EFAULT;
+		if (features & ~VHOST_NET_BACKEND_FEATURES)
+			return -EOPNOTSUPP;
+		return vhost_net_set_backend_features(n, features);
 	case VHOST_RESET_OWNER:
 		return vhost_net_reset_owner(n);
 	case VHOST_SET_OWNER:
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index a502f1af4a21..6f6c42d5e4be 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -315,6 +315,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->log_addr = -1ull;
 	vq->private_data = NULL;
 	vq->acked_features = 0;
+	vq->acked_backend_features = 0;
 	vq->log_base = NULL;
 	vq->error_ctx = NULL;
 	vq->kick = NULL;
@@ -1027,28 +1028,40 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
 			     struct iov_iter *from)
 {
-	struct vhost_msg_node node;
-	unsigned size = sizeof(struct vhost_msg);
-	size_t ret;
-	int err;
+	struct vhost_iotlb_msg msg;
+	size_t offset;
+	int type, ret;
 
-	if (iov_iter_count(from) < size)
-		return 0;
-	ret = copy_from_iter(&node.msg, size, from);
-	if (ret != size)
+	ret = copy_from_iter(&type, sizeof(type), from);
+	if (ret != sizeof(type))
 		goto done;
 
-	switch (node.msg.type) {
+	switch (type) {
 	case VHOST_IOTLB_MSG:
-		err = vhost_process_iotlb_msg(dev, &node.msg.iotlb);
-		if (err)
-			ret = err;
+		/* There maybe a hole after type for V1 message type,
+		 * so skip it here.
+		 */
+		offset = offsetof(struct vhost_msg, iotlb) - sizeof(int);
+		break;
+	case VHOST_IOTLB_MSG_V2:
+		offset = sizeof(__u32);
 		break;
 	default:
 		ret = -EINVAL;
-		break;
+		goto done;
+	}
+
+	iov_iter_advance(from, offset);
+	ret = copy_from_iter(&msg, sizeof(msg), from);
+	if (ret != sizeof(msg))
+		goto done;
+	if (vhost_process_iotlb_msg(dev, &msg)) {
+		ret = -EFAULT;
+		goto done;
 	}
 
+	ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) :
+	      sizeof(struct vhost_msg_v2);
 done:
 	return ret;
 }
@@ -1107,13 +1120,28 @@ ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
 		finish_wait(&dev->wait, &wait);
 
 	if (node) {
-		ret = copy_to_iter(&node->msg, size, to);
+		struct vhost_iotlb_msg *msg;
+		void *start = &node->msg;
+
+		switch (node->msg.type) {
+		case VHOST_IOTLB_MSG:
+			size = sizeof(node->msg);
+			msg = &node->msg.iotlb;
+			break;
+		case VHOST_IOTLB_MSG_V2:
+			size = sizeof(node->msg_v2);
+			msg = &node->msg_v2.iotlb;
+			break;
+		default:
+			BUG();
+			break;
+		}
 
-		if (ret != size || node->msg.type != VHOST_IOTLB_MISS) {
+		ret = copy_to_iter(start, size, to);
+		if (ret != size || msg->type != VHOST_IOTLB_MISS) {
 			kfree(node);
 			return ret;
 		}
-
 		vhost_enqueue_msg(dev, &dev->pending_list, node);
 	}
 
@@ -1126,12 +1154,19 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
 	struct vhost_dev *dev = vq->dev;
 	struct vhost_msg_node *node;
 	struct vhost_iotlb_msg *msg;
+	bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2);
 
-	node = vhost_new_msg(vq, VHOST_IOTLB_MISS);
+	node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG);
 	if (!node)
 		return -ENOMEM;
 
-	msg = &node->msg.iotlb;
+	if (v2) {
+		node->msg_v2.type = VHOST_IOTLB_MSG_V2;
+		msg = &node->msg_v2.iotlb;
+	} else {
+		msg = &node->msg.iotlb;
+	}
+
 	msg->type = VHOST_IOTLB_MISS;
 	msg->iova = iova;
 	msg->perm = access;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 6c844b90a168..466ef7542291 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -132,6 +132,7 @@ struct vhost_virtqueue {
 	struct vhost_umem *iotlb;
 	void *private_data;
 	u64 acked_features;
+	u64 acked_backend_features;
 	/* Log write descriptors */
 	void __user *log_base;
 	struct vhost_log *log;
@@ -147,7 +148,10 @@ struct vhost_virtqueue {
 };
 
 struct vhost_msg_node {
-  struct vhost_msg msg;
+  union {
+	  struct vhost_msg msg;
+	  struct vhost_msg_v2 msg_v2;
+  };
   struct vhost_virtqueue *vq;
   struct list_head node;
 };
@@ -238,6 +242,11 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
 	return vq->acked_features & (1ULL << bit);
 }
 
+static inline bool vhost_backend_has_feature(struct vhost_virtqueue *vq, int bit)
+{
+	return vq->acked_backend_features & (1ULL << bit);
+}
+
 #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
 static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
 {
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index c51f8e5cc608..b1e22c40c4b6 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -65,6 +65,7 @@ struct vhost_iotlb_msg {
 };
 
 #define VHOST_IOTLB_MSG 0x1
+#define VHOST_IOTLB_MSG_V2 0x2
 
 struct vhost_msg {
 	int type;
@@ -74,6 +75,15 @@ struct vhost_msg {
 	};
 };
 
+struct vhost_msg_v2 {
+	__u32 type;
+	__u32 reserved;
+	union {
+		struct vhost_iotlb_msg iotlb;
+		__u8 padding[64];
+	};
+};
+
 struct vhost_memory_region {
 	__u64 guest_phys_addr;
 	__u64 memory_size; /* bytes */
@@ -160,6 +170,14 @@ struct vhost_memory {
 #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24,	\
 					 struct vhost_vring_state)
 
+/* Set or get vhost backend capability */
+
+/* Use message type V2 */
+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+
+#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
+#define VHOST_GET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x26, __u64)
+
 /* VHOST_NET specific defines */
 
 /* Attach virtio net ring to a raw socket, or tap device.
-- 
cgit v1.2.3


From e4cc5a1873ac1297615962185f94adbbfaf6456b Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 6 Aug 2018 17:58:40 +0200
Subject: Bluetooth: btqca: Introduce HCI_EV_VENDOR and use it

Using HCI_VENDOR_PKT for vendor specific events does work since it has
also the value 0xff, but it is actually the packet type indicator
constant and not the event constant. So introduce HCI_EV_VENDOR and
use it.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/btqca.c   | 6 +++---
 include/net/bluetooth/hci.h | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 488f5e7521dd..ec9e03a6b778 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -39,7 +39,7 @@ int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version)
 
 	cmd = EDL_PATCH_VER_REQ_CMD;
 	skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, EDL_PATCH_CMD_LEN,
-				&cmd, HCI_VENDOR_PKT, HCI_INIT_TIMEOUT);
+				&cmd, HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
 	if (IS_ERR(skb)) {
 		err = PTR_ERR(skb);
 		bt_dev_err(hdev, "Reading QCA version information failed (%d)",
@@ -229,7 +229,7 @@ static int qca_tlv_send_segment(struct hci_dev *hdev, int seg_size,
 				      cmd);
 
 	skb = __hci_cmd_sync_ev(hdev, EDL_PATCH_CMD_OPCODE, seg_size + 2, cmd,
-				HCI_VENDOR_PKT, HCI_INIT_TIMEOUT);
+				HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
 	if (IS_ERR(skb)) {
 		err = PTR_ERR(skb);
 		bt_dev_err(hdev, "QCA Failed to send TLV segment (%d)", err);
@@ -318,7 +318,7 @@ int qca_set_bdaddr_rome(struct hci_dev *hdev, const bdaddr_t *bdaddr)
 	cmd[2] = sizeof(bdaddr_t);	/* size */
 	memcpy(cmd + 3, bdaddr, sizeof(bdaddr_t));
 	skb = __hci_cmd_sync_ev(hdev, EDL_NVM_ACCESS_OPCODE, sizeof(cmd), cmd,
-				HCI_VENDOR_PKT, HCI_INIT_TIMEOUT);
+				HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
 	if (IS_ERR(skb)) {
 		err = PTR_ERR(skb);
 		bt_dev_err(hdev, "QCA Change address command failed (%d)", err);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 7f008097552e..4619a79b1bbb 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -2176,6 +2176,8 @@ struct hci_evt_le_ext_adv_set_term {
 	__u8	num_evts;
 } __packed;
 
+#define HCI_EV_VENDOR			0xff
+
 /* Internal events generated by Bluetooth stack */
 #define HCI_EV_STACK_INTERNAL	0xfd
 struct hci_ev_stack_internal {
-- 
cgit v1.2.3


From bc2d8d262cba5736332cbc866acb11b1c5748aa9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 7 Aug 2018 08:19:57 +0200
Subject: cpu/hotplug: Fix SMT supported evaluation

Josh reported that the late SMT evaluation in cpu_smt_state_init() sets
cpu_smt_control to CPU_SMT_NOT_SUPPORTED in case that 'nosmt' was supplied
on the kernel command line as it cannot differentiate between SMT disabled
by BIOS and SMT soft disable via 'nosmt'. That wreckages the state and
makes the sysfs interface unusable.

Rework this so that during bringup of the non boot CPUs the availability of
SMT is determined in cpu_smt_allowed(). If a newly booted CPU is not a
'primary' thread then set the local cpu_smt_available marker and evaluate
this explicitely right after the initial SMP bringup has finished.

SMT evaulation on x86 is a trainwreck as the firmware has all the
information _before_ booting the kernel, but there is no interface to query
it.

Fixes: 73d5e2b47264 ("cpu/hotplug: detect SMT disabled by BIOS")
Reported-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/bugs.c |  2 +-
 include/linux/cpu.h        |  2 ++
 kernel/cpu.c               | 41 ++++++++++++++++++++++++++++-------------
 kernel/smp.c               |  2 ++
 4 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 86f4549b7c9f..be79b7599dda 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -63,7 +63,7 @@ void __init check_bugs(void)
 	 * identify_boot_cpu() initialized SMT support information, let the
 	 * core code know.
 	 */
-	cpu_smt_check_topology();
+	cpu_smt_check_topology_early();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
 		pr_info("CPU: ");
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ccdf3a67ce56..b216bd5bfd20 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -178,10 +178,12 @@ enum cpuhp_smt_control {
 #if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
+extern void cpu_smt_check_topology_early(void);
 extern void cpu_smt_check_topology(void);
 #else
 # define cpu_smt_control		(CPU_SMT_ENABLED)
 static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_check_topology_early(void) { }
 static inline void cpu_smt_check_topology(void) { }
 #endif
 
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2bc3d2f5b2a5..9d2512dd263c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -347,6 +347,8 @@ EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
 
+static bool cpu_smt_available __read_mostly;
+
 void __init cpu_smt_disable(bool force)
 {
 	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
@@ -363,14 +365,28 @@ void __init cpu_smt_disable(bool force)
 
 /*
  * The decision whether SMT is supported can only be done after the full
- * CPU identification. Called from architecture code.
+ * CPU identification. Called from architecture code before non boot CPUs
+ * are brought up.
  */
-void __init cpu_smt_check_topology(void)
+void __init cpu_smt_check_topology_early(void)
 {
 	if (!topology_smt_supported())
 		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
 }
 
+/*
+ * If SMT was disabled by BIOS, detect it here, after the CPUs have been
+ * brought online. This ensures the smt/l1tf sysfs entries are consistent
+ * with reality. cpu_smt_available is set to true during the bringup of non
+ * boot CPUs when a SMT sibling is detected. Note, this may overwrite
+ * cpu_smt_control's previous setting.
+ */
+void __init cpu_smt_check_topology(void)
+{
+	if (!cpu_smt_available)
+		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+}
+
 static int __init smt_cmdline_disable(char *str)
 {
 	cpu_smt_disable(str && !strcmp(str, "force"));
@@ -380,10 +396,18 @@ early_param("nosmt", smt_cmdline_disable);
 
 static inline bool cpu_smt_allowed(unsigned int cpu)
 {
-	if (cpu_smt_control == CPU_SMT_ENABLED)
+	if (topology_is_primary_thread(cpu))
 		return true;
 
-	if (topology_is_primary_thread(cpu))
+	/*
+	 * If the CPU is not a 'primary' thread and the booted_once bit is
+	 * set then the processor has SMT support. Store this information
+	 * for the late check of SMT support in cpu_smt_check_topology().
+	 */
+	if (per_cpu(cpuhp_state, cpu).booted_once)
+		cpu_smt_available = true;
+
+	if (cpu_smt_control == CPU_SMT_ENABLED)
 		return true;
 
 	/*
@@ -2125,15 +2149,6 @@ static const struct attribute_group cpuhp_smt_attr_group = {
 
 static int __init cpu_smt_state_init(void)
 {
-	/*
-	 * If SMT was disabled by BIOS, detect it here, after the CPUs have
-	 * been brought online.  This ensures the smt/l1tf sysfs entries are
-	 * consistent with reality.  Note this may overwrite cpu_smt_control's
-	 * previous setting.
-	 */
-	if (topology_max_smt_threads() == 1)
-		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
-
 	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
 				  &cpuhp_smt_attr_group);
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index 084c8b3a2681..d86eec5f51c1 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -584,6 +584,8 @@ void __init smp_init(void)
 		num_nodes, (num_nodes > 1 ? "s" : ""),
 		num_cpus,  (num_cpus  > 1 ? "s" : ""));
 
+	/* Final decision about SMT support */
+	cpu_smt_check_topology();
 	/* Any cleanup work */
 	smp_cpus_done(setup_max_cpus);
 }
-- 
cgit v1.2.3


From 4231aba000f5a4583dd9f67057aadb68c3eca99d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 27 Jul 2018 21:48:17 +1000
Subject: powerpc/64s: Fix page table fragment refcount race vs speculative
 references

The page table fragment allocator uses the main page refcount racily
with respect to speculative references. A customer observed a BUG due
to page table page refcount underflow in the fragment allocator. This
can be caused by the fragment allocator set_page_count stomping on a
speculative reference, and then the speculative failure handler
decrements the new reference, and the underflow eventually pops when
the page tables are freed.

Fix this by using a dedicated field in the struct page for the page
table fragment allocator.

Fixes: 5c1f6ee9a31c ("powerpc: Reduce PTE table memory wastage")
Cc: stable@vger.kernel.org # v3.10+
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/mmu_context_book3s64.c |  8 ++++----
 arch/powerpc/mm/pgtable-book3s64.c     | 17 +++++++++++------
 include/linux/mm_types.h               |  5 ++++-
 3 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 8b24168ea8c4..4a892d894a0f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -200,9 +200,9 @@ static void pte_frag_destroy(void *pte_frag)
 	/* drop all the pending references */
 	count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
 	/* We allow PTE_FRAG_NR fragments from a PTE page */
-	if (page_ref_sub_and_test(page, PTE_FRAG_NR - count)) {
+	if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
 		pgtable_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
@@ -215,9 +215,9 @@ static void pmd_frag_destroy(void *pmd_frag)
 	/* drop all the pending references */
 	count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
 	/* We allow PTE_FRAG_NR fragments from a PTE page */
-	if (page_ref_sub_and_test(page, PMD_FRAG_NR - count)) {
+	if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
 		pgtable_pmd_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 4afbfbb64bfd..78d0b3d5ebad 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -270,6 +270,8 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
 		return NULL;
 	}
 
+	atomic_set(&page->pt_frag_refcount, 1);
+
 	ret = page_address(page);
 	/*
 	 * if we support only one fragment just return the
@@ -285,7 +287,7 @@ static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
 	 * count.
 	 */
 	if (likely(!mm->context.pmd_frag)) {
-		set_page_count(page, PMD_FRAG_NR);
+		atomic_set(&page->pt_frag_refcount, PMD_FRAG_NR);
 		mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -308,9 +310,10 @@ void pmd_fragment_free(unsigned long *pmd)
 {
 	struct page *page = virt_to_page(pmd);
 
-	if (put_page_testzero(page)) {
+	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
 		pgtable_pmd_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
@@ -352,6 +355,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 			return NULL;
 	}
 
+	atomic_set(&page->pt_frag_refcount, 1);
 
 	ret = page_address(page);
 	/*
@@ -367,7 +371,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 	 * count.
 	 */
 	if (likely(!mm->context.pte_frag)) {
-		set_page_count(page, PTE_FRAG_NR);
+		atomic_set(&page->pt_frag_refcount, PTE_FRAG_NR);
 		mm->context.pte_frag = ret + PTE_FRAG_SIZE;
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -390,10 +394,11 @@ void pte_fragment_free(unsigned long *table, int kernel)
 {
 	struct page *page = virt_to_page(table);
 
-	if (put_page_testzero(page)) {
+	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
+	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
 		if (!kernel)
 			pgtable_page_dtor(page);
-		free_unref_page(page);
+		__free_page(page);
 	}
 }
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 99ce070e7dcb..22651e124071 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -139,7 +139,10 @@ struct page {
 			unsigned long _pt_pad_1;	/* compound_head */
 			pgtable_t pmd_huge_pte; /* protected by page->ptl */
 			unsigned long _pt_pad_2;	/* mapping */
-			struct mm_struct *pt_mm;	/* x86 pgds only */
+			union {
+				struct mm_struct *pt_mm; /* x86 pgds only */
+				atomic_t pt_frag_refcount; /* powerpc */
+			};
 #if ALLOC_SPLIT_PTLOCKS
 			spinlock_t *ptl;
 #else
-- 
cgit v1.2.3


From 35a8a3bd1c2e29bb6baec501c6f56abaaa10a48a Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Tue, 7 Aug 2018 11:43:02 +0200
Subject: netfilter: nft_osf: use NFT_OSF_MAXGENRELEN instead of IFNAMSIZ

As no "genre" on pf.os exceed 16 bytes of length, we reduce
NFT_OSF_MAXGENRELEN parameter to 16 bytes and use it instead of IFNAMSIZ.

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 1 +
 net/netfilter/nft_osf.c                  | 8 +++-----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 357862d948de..94657c701f22 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -8,6 +8,7 @@
 #define NFT_SET_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_OBJ_MAXNAMELEN	NFT_NAME_MAXLEN
 #define NFT_USERDATA_MAXLEN	256
+#define NFT_OSF_MAXGENRELEN	16
 
 /**
  * enum nft_registers - nf_tables registers
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 9b2f3de7be4f..5af74b37f423 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -4,8 +4,6 @@
 #include <net/netfilter/nf_tables.h>
 #include <linux/netfilter/nfnetlink_osf.h>
 
-#define OSF_GENRE_SIZE		32
-
 struct nft_osf {
 	enum nft_registers	dreg:8;
 };
@@ -37,9 +35,9 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
 
 	os_name = nf_osf_find(skb, nf_osf_fingers);
 	if (!os_name)
-		strncpy((char *)dest, "unknown", IFNAMSIZ);
+		strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
 	else
-		strncpy((char *)dest, os_name, IFNAMSIZ);
+		strncpy((char *)dest, os_name, NFT_OSF_MAXGENRELEN);
 }
 
 static int nft_osf_init(const struct nft_ctx *ctx,
@@ -51,7 +49,7 @@ static int nft_osf_init(const struct nft_ctx *ctx,
 
 	priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
 	err = nft_validate_register_store(ctx, priv->dreg, NULL,
-					  NFTA_DATA_VALUE, OSF_GENRE_SIZE);
+					  NFTA_DATA_VALUE, NFT_OSF_MAXGENRELEN);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3


From 4e665afbd7bee29b44b5d22821b56207f8459e39 Mon Sep 17 00:00:00 2001
From: Harsha Sharma <harshasharmaiitr@gmail.com>
Date: Tue, 7 Aug 2018 17:14:10 +0200
Subject: netfilter: cttimeout: move ctnl_untimeout to nf_conntrack

As, ctnl_untimeout is required by nft_ct, so move ctnl_timeout from
nfnetlink_cttimeout to nf_conntrack_timeout and rename as nf_ct_timeout.

Signed-off-by: Harsha Sharma <harshasharmaiitr@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_timeout.h |  1 +
 net/netfilter/nf_conntrack_timeout.c         | 17 +++++++++++++++++
 net/netfilter/nfnetlink_cttimeout.c          | 20 ++------------------
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 80ceb3d0291d..7a21bc0f00eb 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -83,6 +83,7 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 int nf_conntrack_timeout_init(void);
 void nf_conntrack_timeout_fini(void);
+void nf_ct_untimeout(struct net *net, struct ctnl_timeout *timeout);
 #else
 static inline int nf_conntrack_timeout_init(void)
 {
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 46aee65f339b..401c2cce4a61 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -31,6 +31,23 @@ EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
 void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook);
 
+static int untimeout(struct nf_conn *ct, void *timeout)
+{
+	struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
+
+	if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
+		RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+
+	/* We are not intended to delete this conntrack. */
+	return 0;
+}
+
+void nf_ct_untimeout(struct net *net, struct ctnl_timeout *timeout)
+{
+	nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
+}
+EXPORT_SYMBOL_GPL(nf_ct_untimeout);
+
 static const struct nf_ct_ext_type timeout_extend = {
 	.len	= sizeof(struct nf_conn_timeout),
 	.align	= __alignof__(struct nf_conn_timeout),
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 4199e5300575..df53aef2d642 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -297,22 +297,6 @@ static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
 	return ret;
 }
 
-static int untimeout(struct nf_conn *ct, void *timeout)
-{
-	struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
-
-	if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
-		RCU_INIT_POINTER(timeout_ext->timeout, NULL);
-
-	/* We are not intended to delete this conntrack. */
-	return 0;
-}
-
-static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
-{
-	nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
-}
-
 /* try to delete object, fail if it is still in use. */
 static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 {
@@ -325,7 +309,7 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 		/* We are protected by nfnl mutex. */
 		list_del_rcu(&timeout->head);
 		nf_ct_l4proto_put(timeout->l4proto);
-		ctnl_untimeout(net, timeout);
+		nf_ct_untimeout(net, timeout);
 		kfree_rcu(timeout, rcu_head);
 	} else {
 		ret = -EBUSY;
@@ -573,7 +557,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 	struct ctnl_timeout *cur, *tmp;
 
 	nf_ct_unconfirmed_destroy(net);
-	ctnl_untimeout(net, NULL);
+	nf_ct_untimeout(net, NULL);
 
 	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
 		list_del_rcu(&cur->head);
-- 
cgit v1.2.3


From 6c1fd7dc489d9bf64196f5b0fa33e059f64460c8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 7 Aug 2018 17:14:15 +0200
Subject: netfilter: cttimeout: decouple timeout policy from
 nfnetlink_cttimeout object

The timeout policy is currently embedded into the nfnetlink_cttimeout
object, move the policy into an independent object. This allows us to
reuse part of the existing conntrack timeout extension from nf_tables
without adding dependencies with the nfnetlink_cttimeout object layout.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_timeout.h | 22 ++++++++++-------
 net/netfilter/nf_conntrack_timeout.c         |  6 ++---
 net/netfilter/nfnetlink_cttimeout.c          | 37 ++++++++++++++++------------
 net/netfilter/xt_CT.c                        |  4 +--
 4 files changed, 39 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 7a21bc0f00eb..d5f62cc6c2ae 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -11,24 +11,28 @@
 
 #define CTNL_TIMEOUT_NAME_MAX	32
 
+struct nf_ct_timeout {
+	__u16			l3num;
+	const struct nf_conntrack_l4proto *l4proto;
+	char			data[0];
+};
+
 struct ctnl_timeout {
 	struct list_head	head;
 	struct rcu_head		rcu_head;
 	refcount_t		refcnt;
 	char			name[CTNL_TIMEOUT_NAME_MAX];
-	__u16			l3num;
-	const struct nf_conntrack_l4proto *l4proto;
-	char			data[0];
+	struct nf_ct_timeout	timeout;
 };
 
 struct nf_conn_timeout {
-	struct ctnl_timeout __rcu *timeout;
+	struct nf_ct_timeout __rcu *timeout;
 };
 
 static inline unsigned int *
 nf_ct_timeout_data(struct nf_conn_timeout *t)
 {
-	struct ctnl_timeout *timeout;
+	struct nf_ct_timeout *timeout;
 
 	timeout = rcu_dereference(t->timeout);
 	if (timeout == NULL)
@@ -49,7 +53,7 @@ struct nf_conn_timeout *nf_ct_timeout_find(const struct nf_conn *ct)
 
 static inline
 struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
-					      struct ctnl_timeout *timeout,
+					      struct nf_ct_timeout *timeout,
 					      gfp_t gfp)
 {
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
@@ -83,7 +87,7 @@ static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct)
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 int nf_conntrack_timeout_init(void);
 void nf_conntrack_timeout_fini(void);
-void nf_ct_untimeout(struct net *net, struct ctnl_timeout *timeout);
+void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout);
 #else
 static inline int nf_conntrack_timeout_init(void)
 {
@@ -97,8 +101,8 @@ static inline void nf_conntrack_timeout_fini(void)
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
-extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout);
+extern struct nf_ct_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
+extern void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout);
 #endif
 
 #endif /* _NF_CONNTRACK_TIMEOUT_H */
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 401c2cce4a61..91fbd183da2d 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -24,11 +24,11 @@
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
 
-struct ctnl_timeout *
+struct nf_ct_timeout *
 (*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
 
-void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly;
+void (*nf_ct_timeout_put_hook)(struct nf_ct_timeout *timeout) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_timeout_put_hook);
 
 static int untimeout(struct nf_conn *ct, void *timeout)
@@ -42,7 +42,7 @@ static int untimeout(struct nf_conn *ct, void *timeout)
 	return 0;
 }
 
-void nf_ct_untimeout(struct net *net, struct ctnl_timeout *timeout)
+void nf_ct_untimeout(struct net *net, struct nf_ct_timeout *timeout)
 {
 	nf_ct_iterate_cleanup_net(net, untimeout, timeout, 0, 0);
 }
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index df53aef2d642..d46a236cdf31 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -113,13 +113,13 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 			/* You cannot replace one timeout policy by another of
 			 * different kind, sorry.
 			 */
-			if (matching->l3num != l3num ||
-			    matching->l4proto->l4proto != l4num)
+			if (matching->timeout.l3num != l3num ||
+			    matching->timeout.l4proto->l4proto != l4num)
 				return -EINVAL;
 
-			return ctnl_timeout_parse_policy(&matching->data,
-							 matching->l4proto, net,
-							 cda[CTA_TIMEOUT_DATA]);
+			return ctnl_timeout_parse_policy(&matching->timeout.data,
+							 matching->timeout.l4proto,
+							 net, cda[CTA_TIMEOUT_DATA]);
 		}
 
 		return -EBUSY;
@@ -140,14 +140,14 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 		goto err_proto_put;
 	}
 
-	ret = ctnl_timeout_parse_policy(&timeout->data, l4proto, net,
+	ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, net,
 					cda[CTA_TIMEOUT_DATA]);
 	if (ret < 0)
 		goto err;
 
 	strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
-	timeout->l3num = l3num;
-	timeout->l4proto = l4proto;
+	timeout->timeout.l3num = l3num;
+	timeout->timeout.l4proto = l4proto;
 	refcount_set(&timeout->refcnt, 1);
 	list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
 
@@ -166,7 +166,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	unsigned int flags = portid ? NLM_F_MULTI : 0;
-	const struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
+	const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
 
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
@@ -179,8 +179,9 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	nfmsg->res_id = 0;
 
 	if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) ||
-	    nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
-	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
+	    nla_put_be16(skb, CTA_TIMEOUT_L3PROTO,
+			 htons(timeout->timeout.l3num)) ||
+	    nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto) ||
 	    nla_put_be32(skb, CTA_TIMEOUT_USE,
 			 htonl(refcount_read(&timeout->refcnt))))
 		goto nla_put_failure;
@@ -194,7 +195,8 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		if (!nest_parms)
 			goto nla_put_failure;
 
-		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
+		ret = l4proto->ctnl_timeout.obj_to_nlattr(skb,
+							&timeout->timeout.data);
 		if (ret < 0)
 			goto nla_put_failure;
 
@@ -308,8 +310,8 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 	if (refcount_dec_if_one(&timeout->refcnt)) {
 		/* We are protected by nfnl mutex. */
 		list_del_rcu(&timeout->head);
-		nf_ct_l4proto_put(timeout->l4proto);
-		nf_ct_untimeout(net, timeout);
+		nf_ct_l4proto_put(timeout->timeout.l4proto);
+		nf_ct_untimeout(net, &timeout->timeout);
 		kfree_rcu(timeout, rcu_head);
 	} else {
 		ret = -EBUSY;
@@ -510,8 +512,11 @@ err:
 	return matching;
 }
 
-static void ctnl_timeout_put(struct ctnl_timeout *timeout)
+static void ctnl_timeout_put(struct nf_ct_timeout *t)
 {
+	struct ctnl_timeout *timeout =
+		container_of(t, struct ctnl_timeout, timeout);
+
 	if (refcount_dec_and_test(&timeout->refcnt))
 		kfree_rcu(timeout, rcu_head);
 
@@ -561,7 +566,7 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 
 	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
 		list_del_rcu(&cur->head);
-		nf_ct_l4proto_put(cur->l4proto);
+		nf_ct_l4proto_put(cur->timeout.l4proto);
 
 		if (refcount_dec_and_test(&cur->refcnt))
 			kfree_rcu(cur, rcu_head);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 7ba454e9e3fa..89457efd2e00 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -104,7 +104,7 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
 }
 
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout)
+static void __xt_ct_tg_timeout_put(struct nf_ct_timeout *timeout)
 {
 	typeof(nf_ct_timeout_put_hook) timeout_put;
 
@@ -121,7 +121,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
 	const struct nf_conntrack_l4proto *l4proto;
-	struct ctnl_timeout *timeout;
+	struct nf_ct_timeout *timeout;
 	struct nf_conn_timeout *timeout_ext;
 	const char *errmsg = NULL;
 	int ret = 0;
-- 
cgit v1.2.3


From ad83f2a9ce37a264202f48f4fd8889ee9056b703 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 7 Aug 2018 17:14:19 +0200
Subject: netfilter: remove ifdef around cttimeout in struct
 nf_conntrack_l4proto

Simplify this, include it inconditionally in this structure layout as we
do with ctnetlink.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 6068c6da3eac..8465263b297d 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -77,7 +77,6 @@ struct nf_conntrack_l4proto {
 			       struct nf_conntrack_tuple *t);
 	const struct nla_policy *nla_policy;
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
 	struct {
 		int (*nlattr_to_obj)(struct nlattr *tb[],
 				     struct net *net, void *data);
@@ -87,7 +86,6 @@ struct nf_conntrack_l4proto {
 		u16 nlattr_max;
 		const struct nla_policy *nla_policy;
 	} ctnl_timeout;
-#endif
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	/* Print out the private part of the conntrack. */
 	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
-- 
cgit v1.2.3


From 7e0b2b57f01d183e1c84114f1f2287737358d748 Mon Sep 17 00:00:00 2001
From: Harsha Sharma <harshasharmaiitr@gmail.com>
Date: Tue, 7 Aug 2018 17:14:23 +0200
Subject: netfilter: nft_ct: add ct timeout support

This patch allows to add, list and delete connection tracking timeout
policies via nft objref infrastructure and assigning these timeout
via nft rule.

%./libnftnl/examples/nft-ct-timeout-add ip raw cttime tcp

Ruleset:

table ip raw {
   ct timeout cttime {
       protocol tcp;
       policy = {established: 111, close: 13 }
   }

   chain output {
       type filter hook output priority -300; policy accept;
       ct timeout set "cttime"
   }
}

%./libnftnl/examples/nft-rule-ct-timeout-add ip raw output cttime

%conntrack -E
[NEW] tcp      6 111 ESTABLISHED src=172.16.19.128 dst=172.16.19.1
sport=22 dport=41360 [UNREPLIED] src=172.16.19.1 dst=172.16.19.128
sport=41360 dport=22

%nft delete rule ip raw output handle <handle>
%./libnftnl/examples/nft-ct-timeout-del ip raw cttime

Joint work with Pablo Neira.

Signed-off-by: Harsha Sharma <harshasharmaiitr@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  14 ++-
 net/netfilter/nft_ct.c                   | 204 ++++++++++++++++++++++++++++++-
 2 files changed, 216 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 94657c701f22..e23290ffdc77 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -958,6 +958,7 @@ enum nft_socket_keys {
  * @NFT_CT_DST_IP: conntrack layer 3 protocol destination (IPv4 address)
  * @NFT_CT_SRC_IP6: conntrack layer 3 protocol source (IPv6 address)
  * @NFT_CT_DST_IP6: conntrack layer 3 protocol destination (IPv6 address)
+ * @NFT_CT_TIMEOUT: connection tracking timeout policy assigned to conntrack
  */
 enum nft_ct_keys {
 	NFT_CT_STATE,
@@ -983,6 +984,7 @@ enum nft_ct_keys {
 	NFT_CT_DST_IP,
 	NFT_CT_SRC_IP6,
 	NFT_CT_DST_IP6,
+	NFT_CT_TIMEOUT,
 	__NFT_CT_MAX
 };
 #define NFT_CT_MAX		(__NFT_CT_MAX - 1)
@@ -1411,6 +1413,15 @@ enum nft_ct_helper_attributes {
 };
 #define NFTA_CT_HELPER_MAX	(__NFTA_CT_HELPER_MAX - 1)
 
+enum nft_ct_timeout_timeout_attributes {
+	NFTA_CT_TIMEOUT_UNSPEC,
+	NFTA_CT_TIMEOUT_L3PROTO,
+	NFTA_CT_TIMEOUT_L4PROTO,
+	NFTA_CT_TIMEOUT_DATA,
+	__NFTA_CT_TIMEOUT_MAX,
+};
+#define NFTA_CT_TIMEOUT_MAX	(__NFTA_CT_TIMEOUT_MAX - 1)
+
 #define NFT_OBJECT_UNSPEC	0
 #define NFT_OBJECT_COUNTER	1
 #define NFT_OBJECT_QUOTA	2
@@ -1418,7 +1429,8 @@ enum nft_ct_helper_attributes {
 #define NFT_OBJECT_LIMIT	4
 #define NFT_OBJECT_CONNLIMIT	5
 #define NFT_OBJECT_TUNNEL	6
-#define __NFT_OBJECT_MAX	7
+#define NFT_OBJECT_CT_TIMEOUT	7
+#define __NFT_OBJECT_MAX	8
 #define NFT_OBJECT_MAX		(__NFT_OBJECT_MAX - 1)
 
 /**
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 3bc82ee5464d..4788458a0931 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -22,6 +22,8 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_timeout.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 
 struct nft_ct {
 	enum nft_ct_keys	key:8;
@@ -765,6 +767,194 @@ static struct nft_expr_type nft_notrack_type __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+static int
+nft_ct_timeout_parse_policy(void *timeouts,
+			    const struct nf_conntrack_l4proto *l4proto,
+			    struct net *net, const struct nlattr *attr)
+{
+	struct nlattr **tb;
+	int ret = 0;
+
+	if (!l4proto->ctnl_timeout.nlattr_to_obj)
+		return 0;
+
+	tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb),
+		     GFP_KERNEL);
+
+	if (!tb)
+		return -ENOMEM;
+
+	ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max,
+			       attr, l4proto->ctnl_timeout.nla_policy,
+			       NULL);
+	if (ret < 0)
+		goto err;
+
+	ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts);
+
+err:
+	kfree(tb);
+	return ret;
+}
+
+struct nft_ct_timeout_obj {
+	struct nf_conn		*tmpl;
+	u8			l4proto;
+};
+
+static void nft_ct_timeout_obj_eval(struct nft_object *obj,
+				    struct nft_regs *regs,
+				    const struct nft_pktinfo *pkt)
+{
+	const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
+	struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
+	struct sk_buff *skb = pkt->skb;
+
+	if (ct ||
+	    priv->l4proto != pkt->tprot)
+		return;
+
+	nf_ct_set(skb, priv->tmpl, IP_CT_NEW);
+}
+
+static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
+				   const struct nlattr * const tb[],
+				   struct nft_object *obj)
+{
+	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+	struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
+	const struct nf_conntrack_l4proto *l4proto;
+	struct nf_conn_timeout *timeout_ext;
+	struct nf_ct_timeout *timeout;
+	int l3num = ctx->family;
+	struct nf_conn *tmpl;
+	__u8 l4num;
+	int ret;
+
+	if (!tb[NFTA_CT_TIMEOUT_L3PROTO] ||
+	    !tb[NFTA_CT_TIMEOUT_L4PROTO] ||
+	    !tb[NFTA_CT_TIMEOUT_DATA])
+		return -EINVAL;
+
+	l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO]));
+	l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
+	priv->l4proto = l4num;
+
+	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+	if (l4proto->l4proto != l4num) {
+		ret = -EOPNOTSUPP;
+		goto err_proto_put;
+	}
+
+	timeout = kzalloc(sizeof(struct nf_ct_timeout) +
+			  l4proto->ctnl_timeout.obj_size, GFP_KERNEL);
+	if (timeout == NULL) {
+		ret = -ENOMEM;
+		goto err_proto_put;
+	}
+
+	ret = nft_ct_timeout_parse_policy(&timeout->data, l4proto, ctx->net,
+					  tb[NFTA_CT_TIMEOUT_DATA]);
+	if (ret < 0)
+		goto err_free_timeout;
+
+	timeout->l3num = l3num;
+	timeout->l4proto = l4proto;
+	tmpl = nf_ct_tmpl_alloc(ctx->net, zone, GFP_ATOMIC);
+	if (!tmpl) {
+		ret = -ENOMEM;
+		goto err_free_timeout;
+	}
+
+	timeout_ext = nf_ct_timeout_ext_add(tmpl, timeout, GFP_ATOMIC);
+	if (!timeout_ext) {
+		ret = -ENOMEM;
+		goto err_free_tmpl;
+	}
+
+	ret = nf_ct_netns_get(ctx->net, ctx->family);
+	if (ret < 0)
+		goto err_free_tmpl;
+
+	priv->tmpl = tmpl;
+
+	return 0;
+
+err_free_tmpl:
+	nf_ct_tmpl_free(tmpl);
+err_free_timeout:
+	kfree(timeout);
+err_proto_put:
+	nf_ct_l4proto_put(l4proto);
+	return ret;
+}
+
+static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx,
+				       struct nft_object *obj)
+{
+	struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
+	struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl);
+	struct nf_ct_timeout *timeout;
+
+	timeout = rcu_dereference_raw(t->timeout);
+	nf_ct_untimeout(ctx->net, timeout);
+	nf_ct_l4proto_put(timeout->l4proto);
+	nf_ct_netns_put(ctx->net, ctx->family);
+	nf_ct_tmpl_free(priv->tmpl);
+}
+
+static int nft_ct_timeout_obj_dump(struct sk_buff *skb,
+				   struct nft_object *obj, bool reset)
+{
+	const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
+	const struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl);
+	const struct nf_ct_timeout *timeout = rcu_dereference_raw(t->timeout);
+	struct nlattr *nest_params;
+	int ret;
+
+	if (nla_put_u8(skb, NFTA_CT_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
+	    nla_put_be16(skb, NFTA_CT_TIMEOUT_L3PROTO, htons(timeout->l3num)))
+		return -1;
+
+	nest_params = nla_nest_start(skb, NFTA_CT_TIMEOUT_DATA | NLA_F_NESTED);
+	if (!nest_params)
+		return -1;
+
+	ret = timeout->l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data);
+	if (ret < 0)
+		return -1;
+	nla_nest_end(skb, nest_params);
+	return 0;
+}
+
+static const struct nla_policy nft_ct_timeout_policy[NFTA_CT_TIMEOUT_MAX + 1] = {
+	[NFTA_CT_TIMEOUT_L3PROTO] = {.type = NLA_U16 },
+	[NFTA_CT_TIMEOUT_L4PROTO] = {.type = NLA_U8 },
+	[NFTA_CT_TIMEOUT_DATA]	  = {.type = NLA_NESTED },
+};
+
+static struct nft_object_type nft_ct_timeout_obj_type;
+
+static const struct nft_object_ops nft_ct_timeout_obj_ops = {
+	.type		= &nft_ct_timeout_obj_type,
+	.size		= sizeof(struct nft_ct_timeout_obj),
+	.eval		= nft_ct_timeout_obj_eval,
+	.init		= nft_ct_timeout_obj_init,
+	.destroy	= nft_ct_timeout_obj_destroy,
+	.dump		= nft_ct_timeout_obj_dump,
+};
+
+static struct nft_object_type nft_ct_timeout_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_CT_TIMEOUT,
+	.ops		= &nft_ct_timeout_obj_ops,
+	.maxattr	= NFTA_CT_TIMEOUT_MAX,
+	.policy		= nft_ct_timeout_policy,
+	.owner		= THIS_MODULE,
+};
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+
 static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
 				  const struct nlattr * const tb[],
 				  struct nft_object *obj)
@@ -949,9 +1139,17 @@ static int __init nft_ct_module_init(void)
 	err = nft_register_obj(&nft_ct_helper_obj_type);
 	if (err < 0)
 		goto err2;
-
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	err = nft_register_obj(&nft_ct_timeout_obj_type);
+	if (err < 0)
+		goto err3;
+#endif
 	return 0;
 
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+err3:
+	nft_unregister_obj(&nft_ct_helper_obj_type);
+#endif
 err2:
 	nft_unregister_expr(&nft_notrack_type);
 err1:
@@ -961,6 +1159,9 @@ err1:
 
 static void __exit nft_ct_module_exit(void)
 {
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	nft_unregister_obj(&nft_ct_timeout_obj_type);
+#endif
 	nft_unregister_obj(&nft_ct_helper_obj_type);
 	nft_unregister_expr(&nft_notrack_type);
 	nft_unregister_expr(&nft_ct_type);
@@ -974,3 +1175,4 @@ MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_ALIAS_NFT_EXPR("ct");
 MODULE_ALIAS_NFT_EXPR("notrack");
 MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_HELPER);
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CT_TIMEOUT);
-- 
cgit v1.2.3


From 6cfef793b558eee47bac720574aff0d36b89d20a Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 7 Aug 2018 10:50:20 -0700
Subject: ethtool: Add WAKE_FILTER and RX_CLS_FLOW_WAKE

Add the ability to specify through ethtool::rxnfc that a rule location is
special and will be used to participate in Wake-on-LAN, by e.g: having a
specific pattern be matched. When this is the case, fs->ring_cookie must
be set to the special value RX_CLS_FLOW_WAKE.

We also define an additional ethtool::wolinfo flag: WAKE_FILTER which
can be used to configure an Ethernet adapter to allow Wake-on-LAN using
previously programmed filters.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 813282cc8af6..dc69391d2bba 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -870,7 +870,8 @@ struct ethtool_flow_ext {
  *	includes the %FLOW_EXT or %FLOW_MAC_EXT flag
  *	(see &struct ethtool_flow_ext description).
  * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC
- *	if packets should be discarded
+ *	if packets should be discarded, or %RX_CLS_FLOW_WAKE if the
+ *	packets should be used for Wake-on-LAN with %WAKE_FILTER
  * @location: Location of rule in the table.  Locations must be
  *	numbered such that a flow matching multiple rules will be
  *	classified according to the first (lowest numbered) rule.
@@ -1634,6 +1635,7 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 #define WAKE_ARP		(1 << 4)
 #define WAKE_MAGIC		(1 << 5)
 #define WAKE_MAGICSECURE	(1 << 6) /* only meaningful if WAKE_MAGIC */
+#define WAKE_FILTER		(1 << 7)
 
 /* L2-L4 network traffic flow types */
 #define	TCP_V4_FLOW	0x01	/* hash or spec (tcp_ip4_spec) */
@@ -1671,6 +1673,7 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 #define	RXH_DISCARD	(1 << 31)
 
 #define	RX_CLS_FLOW_DISC	0xffffffffffffffffULL
+#define RX_CLS_FLOW_WAKE	0xfffffffffffffffeULL
 
 /* Special RX classification rule insert location values */
 #define RX_CLS_LOC_SPECIAL	0x80000000	/* flag */
-- 
cgit v1.2.3


From 92e2c4053623f21d61a683f7ef7bd61c8300ac7d Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Tue, 7 Aug 2018 17:36:00 +0200
Subject: flow_dissector: allow dissection of tunnel options from metadata

Allow the existing 'dissection' of tunnel metadata to 'dissect'
options already present in tunnel metadata. This dissection is
controlled by a new dissector key, FLOW_DISSECTOR_KEY_ENC_OPTS.

This dissection only occurs when skb_flow_dissect_tunnel_info()
is called, currently only the Flower classifier makes that call.
So there should be no impact on other users of the flow dissector.

This is in preparation for allowing the flower classifier to
match on Geneve options.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_dissector.h | 17 +++++++++++++++++
 net/core/flow_dissector.c    | 19 ++++++++++++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 2a17f041f7a1..6a4586dcdede 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -57,6 +57,21 @@ struct flow_dissector_key_mpls {
 		mpls_label:20;
 };
 
+#define FLOW_DIS_TUN_OPTS_MAX 255
+/**
+ * struct flow_dissector_key_enc_opts:
+ * @data: tunnel option data
+ * @len: length of tunnel option data
+ * @dst_opt_type: tunnel option type
+ */
+struct flow_dissector_key_enc_opts {
+	u8 data[FLOW_DIS_TUN_OPTS_MAX];	/* Using IP_TUNNEL_OPTS_MAX is desired
+					 * here but seems difficult to #include
+					 */
+	u8 len;
+	__be16 dst_opt_type;
+};
+
 struct flow_dissector_key_keyid {
 	__be32	keyid;
 };
@@ -208,6 +223,8 @@ enum flow_dissector_key_id {
 	FLOW_DISSECTOR_KEY_IP, /* struct flow_dissector_key_ip */
 	FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_flow_vlan */
 	FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */
+	FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */
+
 	FLOW_DISSECTOR_KEY_MAX,
 };
 
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 08a5184f4b34..ce9eeeb7c024 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -154,7 +154,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 	    !dissector_uses_key(flow_dissector,
 				FLOW_DISSECTOR_KEY_ENC_PORTS) &&
 	    !dissector_uses_key(flow_dissector,
-				FLOW_DISSECTOR_KEY_ENC_IP))
+				FLOW_DISSECTOR_KEY_ENC_IP) &&
+	    !dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_OPTS))
 		return;
 
 	info = skb_tunnel_info(skb);
@@ -224,6 +226,21 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 		ip->tos = key->tos;
 		ip->ttl = key->ttl;
 	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+		struct flow_dissector_key_enc_opts *enc_opt;
+
+		enc_opt = skb_flow_dissector_target(flow_dissector,
+						    FLOW_DISSECTOR_KEY_ENC_OPTS,
+						    target_container);
+
+		if (info->options_len) {
+			enc_opt->len = info->options_len;
+			ip_tunnel_info_opts_get(enc_opt->data, info);
+			enc_opt->dst_opt_type = info->key.tun_flags &
+						TUNNEL_OPTIONS_PRESENT;
+		}
+	}
 }
 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
 
-- 
cgit v1.2.3


From 0a6e77784f490912d81b92cfd48424541c04691e Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Tue, 7 Aug 2018 17:36:01 +0200
Subject: net/sched: allow flower to match tunnel options

Allow matching on options in Geneve tunnel headers.
This makes use of existing tunnel metadata support.

The options can be described in the form
CLASS:TYPE:DATA/CLASS_MASK:TYPE_MASK:DATA_MASK, where CLASS is
represented as a 16bit hexadecimal value, TYPE as an 8bit
hexadecimal value and DATA as a variable length hexadecimal value.

e.g.
 # ip link add name geneve0 type geneve dstport 0 external
 # tc qdisc add dev geneve0 ingress
 # tc filter add dev geneve0 protocol ip parent ffff: \
     flower \
       enc_src_ip 10.0.99.192 \
       enc_dst_ip 10.0.99.193 \
       enc_key_id 11 \
       geneve_opts 0102:80:1122334421314151/ffff:ff:ffffffffffffffff \
       ip_proto udp \
       action mirred egress redirect dev eth1

This patch adds support for matching Geneve options in the order
supplied by the user. This leads to an efficient implementation in
the software datapath (and in our opinion hardware datapaths that
offload this feature). It is also compatible with Geneve options
matching provided by the Open vSwitch kernel datapath which is
relevant here as the Flower classifier may be used as a mechanism
to program flows into hardware as a form of Open vSwitch datapath
offload (sometimes referred to as OVS-TC). The netlink
Kernel/Userspace API may be extended, for example by adding a flag,
if other matching options are desired, for example matching given
options in any order. This would require an implementation in the
TC software datapath. And be done in a way that drivers that
facilitate offload of the Flower classifier can reject or accept
such flows based on hardware datapath capabilities.

This approach was discussed and agreed on at Netconf 2017 in Seoul.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  26 +++++
 net/sched/cls_flower.c       | 244 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 269 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 48e5b5d49a34..be382fb0592d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -480,11 +480,37 @@ enum {
 	TCA_FLOWER_KEY_ENC_IP_TTL,	/* u8 */
 	TCA_FLOWER_KEY_ENC_IP_TTL_MASK,	/* u8 */
 
+	TCA_FLOWER_KEY_ENC_OPTS,
+	TCA_FLOWER_KEY_ENC_OPTS_MASK,
+
 	__TCA_FLOWER_MAX,
 };
 
 #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
 
+enum {
+	TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
+					 * TCA_FLOWER_KEY_ENC_OPT_GENEVE_
+					 * attributes
+					 */
+	__TCA_FLOWER_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,            /* u16 */
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,             /* u8 */
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,             /* 4 to 128 bytes */
+
+	__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
+		(__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
+
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
 	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index a3b69bb6f4b0..9da244235170 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -24,6 +24,7 @@
 #include <net/pkt_cls.h>
 #include <net/ip.h>
 #include <net/flow_dissector.h>
+#include <net/geneve.h>
 
 #include <net/dst.h>
 #include <net/dst_metadata.h>
@@ -53,6 +54,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_tcp tcp;
 	struct flow_dissector_key_ip ip;
 	struct flow_dissector_key_ip enc_ip;
+	struct flow_dissector_key_enc_opts enc_opts;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_ENC_IP_TTL]	 = { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_OPTS]	= { .type = NLA_NESTED },
+	[TCA_FLOWER_KEY_ENC_OPTS_MASK]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
+	[TCA_FLOWER_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
+	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]      = { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]       = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]       = { .type = NLA_BINARY,
+						       .len = 128 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap,
 	fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
 }
 
+static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
+			     int depth, int option_len,
+			     struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1];
+	struct nlattr *class = NULL, *type = NULL, *data = NULL;
+	struct geneve_opt *opt;
+	int err, data_len = 0;
+
+	if (option_len > sizeof(struct geneve_opt))
+		data_len = option_len - sizeof(struct geneve_opt);
+
+	opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
+	memset(opt, 0xff, option_len);
+	opt->length = data_len / 4;
+	opt->r1 = 0;
+	opt->r2 = 0;
+	opt->r3 = 0;
+
+	/* If no mask has been prodived we assume an exact match. */
+	if (!depth)
+		return sizeof(struct geneve_opt) + data_len;
+
+	if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) {
+		NL_SET_ERR_MSG(extack, "Non-geneve option type for mask");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+			       nla, geneve_opt_policy, extack);
+	if (err < 0)
+		return err;
+
+	/* We are not allowed to omit any of CLASS, TYPE or DATA
+	 * fields from the key.
+	 */
+	if (!option_len &&
+	    (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] ||
+	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] ||
+	     !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) {
+		NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
+		return -EINVAL;
+	}
+
+	/* Omitting any of CLASS, TYPE or DATA fields is allowed
+	 * for the mask.
+	 */
+	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) {
+		int new_len = key->enc_opts.len;
+
+		data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA];
+		data_len = nla_len(data);
+		if (data_len < 4) {
+			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
+			return -ERANGE;
+		}
+		if (data_len % 4) {
+			NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
+			return -ERANGE;
+		}
+
+		new_len += sizeof(struct geneve_opt) + data_len;
+		BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX);
+		if (new_len > FLOW_DIS_TUN_OPTS_MAX) {
+			NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
+			return -ERANGE;
+		}
+		opt->length = data_len / 4;
+		memcpy(opt->opt_data, nla_data(data), data_len);
+	}
+
+	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) {
+		class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS];
+		opt->opt_class = nla_get_be16(class);
+	}
+
+	if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) {
+		type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE];
+		opt->type = nla_get_u8(type);
+	}
+
+	return sizeof(struct geneve_opt) + data_len;
+}
+
+static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
+			  struct fl_flow_key *mask,
+			  struct netlink_ext_ack *extack)
+{
+	const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
+	int option_len, key_depth, msk_depth = 0;
+
+	nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
+
+	if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
+		nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+		msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
+	}
+
+	nla_for_each_attr(nla_opt_key, nla_enc_key,
+			  nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
+		switch (nla_type(nla_opt_key)) {
+		case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
+			option_len = 0;
+			key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+			option_len = fl_set_geneve_opt(nla_opt_key, key,
+						       key_depth, option_len,
+						       extack);
+			if (option_len < 0)
+				return option_len;
+
+			key->enc_opts.len += option_len;
+			/* At the same time we need to parse through the mask
+			 * in order to verify exact and mask attribute lengths.
+			 */
+			mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
+			option_len = fl_set_geneve_opt(nla_opt_msk, mask,
+						       msk_depth, option_len,
+						       extack);
+			if (option_len < 0)
+				return option_len;
+
+			mask->enc_opts.len += option_len;
+			if (key->enc_opts.len != mask->enc_opts.len) {
+				NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
+				return -EINVAL;
+			}
+
+			if (msk_depth)
+				nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask,
 		      struct netlink_ext_ack *extack)
@@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 
 	fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
 
+	if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
+		ret = fl_set_enc_opt(tb, key, mask, extack);
+		if (ret)
+			return ret;
+	}
+
 	if (tb[TCA_FLOWER_KEY_FLAGS])
 		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
 
@@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
 			     FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
 	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
+	FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+			     FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
 
 	skb_flow_dissector_init(dissector, keys, cnt);
 }
@@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
 	return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
 }
 
+static int fl_dump_key_geneve_opt(struct sk_buff *skb,
+				  struct flow_dissector_key_enc_opts *enc_opts)
+{
+	struct geneve_opt *opt;
+	struct nlattr *nest;
+	int opt_off = 0;
+
+	nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
+	if (!nest)
+		goto nla_put_failure;
+
+	while (enc_opts->len > opt_off) {
+		opt = (struct geneve_opt *)&enc_opts->data[opt_off];
+
+		if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,
+				 opt->opt_class))
+			goto nla_put_failure;
+		if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,
+			       opt->type))
+			goto nla_put_failure;
+		if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,
+			    opt->length * 4, opt->opt_data))
+			goto nla_put_failure;
+
+		opt_off += sizeof(struct geneve_opt) + opt->length * 4;
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
+			       struct flow_dissector_key_enc_opts *enc_opts)
+{
+	struct nlattr *nest;
+	int err;
+
+	if (!enc_opts->len)
+		return 0;
+
+	nest = nla_nest_start(skb, enc_opt_type);
+	if (!nest)
+		goto nla_put_failure;
+
+	switch (enc_opts->dst_opt_type) {
+	case TUNNEL_GENEVE_OPT:
+		err = fl_dump_key_geneve_opt(skb, enc_opts);
+		if (err)
+			goto nla_put_failure;
+		break;
+	default:
+		goto nla_put_failure;
+	}
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int fl_dump_key_enc_opt(struct sk_buff *skb,
+			       struct flow_dissector_key_enc_opts *key_opts,
+			       struct flow_dissector_key_enc_opts *msk_opts)
+{
+	int err;
+
+	err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts);
+	if (err)
+		return err;
+
+	return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts);
+}
+
 static int fl_dump_key(struct sk_buff *skb, struct net *net,
 		       struct fl_flow_key *key, struct fl_flow_key *mask)
 {
@@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
 			    &mask->enc_tp.dst,
 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
 			    sizeof(key->enc_tp.dst)) ||
-	    fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
+	    fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) ||
+	    fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))
 		goto nla_put_failure;
 
 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
-- 
cgit v1.2.3


From 455f05ecd2b219e9a216050796d30c830d9bc393 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 6 Aug 2018 11:06:02 -0700
Subject: vsock: split dwork to avoid reinitializations

syzbot reported that we reinitialize an active delayed
work in vsock_stream_connect():

	ODEBUG: init active (active state 0) object type: timer_list hint:
	delayed_work_timer_fn+0x0/0x90 kernel/workqueue.c:1414
	WARNING: CPU: 1 PID: 11518 at lib/debugobjects.c:329
	debug_print_object+0x16a/0x210 lib/debugobjects.c:326

The pattern is apparently wrong, we should only initialize
the dealyed work once and could repeatly schedule it. So we
have to move out the initializations to allocation side.
And to avoid confusion, we can split the shared dwork
into two, instead of re-using the same one.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Reported-by: <syzbot+8a9b1bd330476a4f3db6@syzkaller.appspotmail.com>
Cc: Andy king <acking@vmware.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_vsock.h         |  4 ++--
 net/vmw_vsock/af_vsock.c       | 15 ++++++++-------
 net/vmw_vsock/vmci_transport.c |  3 +--
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 9324ac2d9ff2..43913ae79f64 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -64,7 +64,8 @@ struct vsock_sock {
 	struct list_head pending_links;
 	struct list_head accept_queue;
 	bool rejected;
-	struct delayed_work dwork;
+	struct delayed_work connect_work;
+	struct delayed_work pending_work;
 	struct delayed_work close_work;
 	bool close_work_scheduled;
 	u32 peer_shutdown;
@@ -77,7 +78,6 @@ struct vsock_sock {
 
 s64 vsock_stream_has_data(struct vsock_sock *vsk);
 s64 vsock_stream_has_space(struct vsock_sock *vsk);
-void vsock_pending_work(struct work_struct *work);
 struct sock *__vsock_create(struct net *net,
 			    struct socket *sock,
 			    struct sock *parent,
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c1076c19b858..ab27a2872935 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -451,14 +451,14 @@ static int vsock_send_shutdown(struct sock *sk, int mode)
 	return transport->shutdown(vsock_sk(sk), mode);
 }
 
-void vsock_pending_work(struct work_struct *work)
+static void vsock_pending_work(struct work_struct *work)
 {
 	struct sock *sk;
 	struct sock *listener;
 	struct vsock_sock *vsk;
 	bool cleanup;
 
-	vsk = container_of(work, struct vsock_sock, dwork.work);
+	vsk = container_of(work, struct vsock_sock, pending_work.work);
 	sk = sk_vsock(vsk);
 	listener = vsk->listener;
 	cleanup = true;
@@ -498,7 +498,6 @@ out:
 	sock_put(sk);
 	sock_put(listener);
 }
-EXPORT_SYMBOL_GPL(vsock_pending_work);
 
 /**** SOCKET OPERATIONS ****/
 
@@ -597,6 +596,8 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
 	return retval;
 }
 
+static void vsock_connect_timeout(struct work_struct *work);
+
 struct sock *__vsock_create(struct net *net,
 			    struct socket *sock,
 			    struct sock *parent,
@@ -638,6 +639,8 @@ struct sock *__vsock_create(struct net *net,
 	vsk->sent_request = false;
 	vsk->ignore_connecting_rst = false;
 	vsk->peer_shutdown = 0;
+	INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
+	INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
 
 	psk = parent ? vsock_sk(parent) : NULL;
 	if (parent) {
@@ -1117,7 +1120,7 @@ static void vsock_connect_timeout(struct work_struct *work)
 	struct vsock_sock *vsk;
 	int cancel = 0;
 
-	vsk = container_of(work, struct vsock_sock, dwork.work);
+	vsk = container_of(work, struct vsock_sock, connect_work.work);
 	sk = sk_vsock(vsk);
 
 	lock_sock(sk);
@@ -1221,9 +1224,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 			 * timeout fires.
 			 */
 			sock_hold(sk);
-			INIT_DELAYED_WORK(&vsk->dwork,
-					  vsock_connect_timeout);
-			schedule_delayed_work(&vsk->dwork, timeout);
+			schedule_delayed_work(&vsk->connect_work, timeout);
 
 			/* Skip ahead to preserve error code set above. */
 			goto out_wait;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index a7a73ffe675b..cb332adb84cd 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1094,8 +1094,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
 	vpending->listener = sk;
 	sock_hold(sk);
 	sock_hold(pending);
-	INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
-	schedule_delayed_work(&vpending->dwork, HZ);
+	schedule_delayed_work(&vpending->pending_work, HZ);
 
 out:
 	return err;
-- 
cgit v1.2.3


From 6fdecfe32f903d9f5f35ee4464fd32ede470dcad Mon Sep 17 00:00:00 2001
From: Arun Parameswaran <arun.parameswaran@broadcom.com>
Date: Tue, 7 Aug 2018 10:02:44 -0700
Subject: net: phy: Add support for Broadcom Omega internal Combo GPHY

Add support for the Broadcom Omega SoC internal Combo Ethernet
GPHY to the bcm7xxx phy driver.

Signed-off-by: Arun Parameswaran <arun.parameswaran@broadcom.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 2 ++
 include/linux/brcmphy.h   | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 01d2ff2f6241..b2b6307d64a4 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -229,6 +229,7 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 	phy_read(phydev, MII_BMSR);
 
 	switch (rev) {
+	case 0xa0:
 	case 0xb0:
 		ret = bcm7xxx_28nm_b0_afe_config_init(phydev);
 		break;
@@ -659,6 +660,7 @@ static struct phy_driver bcm7xxx_driver[] = {
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
+	BCM7XXX_28NM_GPHY(PHY_ID_BCM_OMEGA, "Broadcom Omega Combo GPHY"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7346, "Broadcom BCM7346"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7362, "Broadcom BCM7362"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"),
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index daa9234a9baf..949e9af8d9d6 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -45,6 +45,7 @@
 #define PHY_ID_BCM7445			0x600d8510
 
 #define PHY_ID_BCM_CYGNUS		0xae025200
+#define PHY_ID_BCM_OMEGA		0xae025100
 
 #define PHY_BCM_OUI_MASK		0xfffffc00
 #define PHY_BCM_OUI_1			0x00206000
-- 
cgit v1.2.3


From 0dcb82254d65f72333aa50ad626d1e9665ad093b Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 7 Aug 2018 12:41:38 -0700
Subject: llc: use refcount_inc_not_zero() for llc_sap_find()

llc_sap_put() decreases the refcnt before deleting sap
from the global list. Therefore, there is a chance
llc_sap_find() could find a sap with zero refcnt
in this global list.

Close this race condition by checking if refcnt is zero
or not in llc_sap_find(), if it is zero then it is being
removed so we can just treat it as gone.

Reported-by: <syzbot+278893f3f7803871f7ce@syzkaller.appspotmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc.h  | 5 +++++
 net/llc/llc_core.c | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/llc.h b/include/net/llc.h
index dc35f25eb679..890a87318014 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -116,6 +116,11 @@ static inline void llc_sap_hold(struct llc_sap *sap)
 	refcount_inc(&sap->refcnt);
 }
 
+static inline bool llc_sap_hold_safe(struct llc_sap *sap)
+{
+	return refcount_inc_not_zero(&sap->refcnt);
+}
+
 void llc_sap_close(struct llc_sap *sap);
 
 static inline void llc_sap_put(struct llc_sap *sap)
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 89041260784c..260b3dc1b4a2 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -73,8 +73,8 @@ struct llc_sap *llc_sap_find(unsigned char sap_value)
 
 	rcu_read_lock_bh();
 	sap = __llc_sap_find(sap_value);
-	if (sap)
-		llc_sap_hold(sap);
+	if (!sap || !llc_sap_hold_safe(sap))
+		sap = NULL;
 	rcu_read_unlock_bh();
 	return sap;
 }
-- 
cgit v1.2.3


From d88e61faad526a5850e9330c846641b91cf971e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 30 Jul 2018 09:36:26 +0200
Subject: iommu: Remove the ->map_sg indirection

All iommu drivers use the default_iommu_map_sg implementation, and there
is no good reason to ever override it.  Just expose it as iommu_map_sg
directly and remove the indirection, specially in our post-spectre world
where indirect calls are horribly expensive.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c      |  1 -
 drivers/iommu/arm-smmu-v3.c    |  1 -
 drivers/iommu/arm-smmu.c       |  1 -
 drivers/iommu/exynos-iommu.c   |  1 -
 drivers/iommu/intel-iommu.c    |  1 -
 drivers/iommu/iommu.c          |  6 +++---
 drivers/iommu/ipmmu-vmsa.c     |  1 -
 drivers/iommu/msm_iommu.c      |  1 -
 drivers/iommu/mtk_iommu.c      |  1 -
 drivers/iommu/mtk_iommu_v1.c   |  1 -
 drivers/iommu/omap-iommu.c     |  1 -
 drivers/iommu/qcom_iommu.c     |  1 -
 drivers/iommu/rockchip-iommu.c |  1 -
 drivers/iommu/tegra-gart.c     |  1 -
 drivers/iommu/tegra-smmu.c     |  1 -
 include/linux/iommu.h          | 16 ++--------------
 16 files changed, 5 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 596b95c50051..a23c6a4014a5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3192,7 +3192,6 @@ const struct iommu_ops amd_iommu_ops = {
 	.detach_dev = amd_iommu_detach_device,
 	.map = amd_iommu_map,
 	.unmap = amd_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.add_device = amd_iommu_add_device,
 	.remove_device = amd_iommu_remove_device,
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 1d647104bccc..f1dc294f8e08 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1997,7 +1997,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
-	.map_sg			= default_iommu_map_sg,
 	.flush_iotlb_all	= arm_smmu_iotlb_sync,
 	.iotlb_sync		= arm_smmu_iotlb_sync,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f7a96bcf94a6..644fd7ec8ac7 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1562,7 +1562,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
-	.map_sg			= default_iommu_map_sg,
 	.flush_iotlb_all	= arm_smmu_iotlb_sync,
 	.iotlb_sync		= arm_smmu_iotlb_sync,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 85879cfec52f..19e55cf6a9dd 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1332,7 +1332,6 @@ static const struct iommu_ops exynos_iommu_ops = {
 	.detach_dev = exynos_iommu_detach_device,
 	.map = exynos_iommu_map,
 	.unmap = exynos_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = exynos_iommu_iova_to_phys,
 	.device_group = generic_device_group,
 	.add_device = exynos_iommu_add_device,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 14e4b3722428..afb6c872f203 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5308,7 +5308,6 @@ const struct iommu_ops intel_iommu_ops = {
 	.detach_dev		= intel_iommu_detach_device,
 	.map			= intel_iommu_map,
 	.unmap			= intel_iommu_unmap,
-	.map_sg			= default_iommu_map_sg,
 	.iova_to_phys		= intel_iommu_iova_to_phys,
 	.add_device		= intel_iommu_add_device,
 	.remove_device		= intel_iommu_remove_device,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f3698006cb53..8c15c5980299 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1673,8 +1673,8 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
 
-size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
-			 struct scatterlist *sg, unsigned int nents, int prot)
+size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+		    struct scatterlist *sg, unsigned int nents, int prot)
 {
 	struct scatterlist *s;
 	size_t mapped = 0;
@@ -1714,7 +1714,7 @@ out_err:
 	return 0;
 
 }
-EXPORT_SYMBOL_GPL(default_iommu_map_sg);
+EXPORT_SYMBOL_GPL(iommu_map_sg);
 
 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
 			       phys_addr_t paddr, u64 size, int prot)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 40ae6e87cb88..c9ce27cfa7e8 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -889,7 +889,6 @@ static const struct iommu_ops ipmmu_ops = {
 	.unmap = ipmmu_unmap,
 	.flush_iotlb_all = ipmmu_iotlb_sync,
 	.iotlb_sync = ipmmu_iotlb_sync,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = ipmmu_iova_to_phys,
 	.add_device = ipmmu_add_device,
 	.remove_device = ipmmu_remove_device,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 0d3350463a3f..cdf6ab22ede9 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -708,7 +708,6 @@ static struct iommu_ops msm_iommu_ops = {
 	.detach_dev = msm_iommu_detach_dev,
 	.map = msm_iommu_map,
 	.unmap = msm_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.add_device = msm_iommu_add_device,
 	.remove_device = msm_iommu_remove_device,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index f2832a10fcea..f9f69f7111a9 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -495,7 +495,6 @@ static struct iommu_ops mtk_iommu_ops = {
 	.detach_dev	= mtk_iommu_detach_device,
 	.map		= mtk_iommu_map,
 	.unmap		= mtk_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
 	.flush_iotlb_all = mtk_iommu_iotlb_sync,
 	.iotlb_sync	= mtk_iommu_iotlb_sync,
 	.iova_to_phys	= mtk_iommu_iova_to_phys,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index a7c2a973784f..676c029494e4 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -531,7 +531,6 @@ static struct iommu_ops mtk_iommu_ops = {
 	.detach_dev	= mtk_iommu_detach_device,
 	.map		= mtk_iommu_map,
 	.unmap		= mtk_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= mtk_iommu_iova_to_phys,
 	.add_device	= mtk_iommu_add_device,
 	.remove_device	= mtk_iommu_remove_device,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index af4a8e7fcd27..86d64ff507fa 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1548,7 +1548,6 @@ static const struct iommu_ops omap_iommu_ops = {
 	.detach_dev	= omap_iommu_detach_dev,
 	.map		= omap_iommu_map,
 	.unmap		= omap_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= omap_iommu_iova_to_phys,
 	.add_device	= omap_iommu_add_device,
 	.remove_device	= omap_iommu_remove_device,
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index fe88a4880d3a..6e914b3a2b4b 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -590,7 +590,6 @@ static const struct iommu_ops qcom_iommu_ops = {
 	.detach_dev	= qcom_iommu_detach_dev,
 	.map		= qcom_iommu_map,
 	.unmap		= qcom_iommu_unmap,
-	.map_sg		= default_iommu_map_sg,
 	.flush_iotlb_all = qcom_iommu_iotlb_sync,
 	.iotlb_sync	= qcom_iommu_iotlb_sync,
 	.iova_to_phys	= qcom_iommu_iova_to_phys,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 054cd2c8e9c8..90625cdd3b66 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1110,7 +1110,6 @@ static const struct iommu_ops rk_iommu_ops = {
 	.detach_dev = rk_iommu_detach_device,
 	.map = rk_iommu_map,
 	.unmap = rk_iommu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.add_device = rk_iommu_add_device,
 	.remove_device = rk_iommu_remove_device,
 	.iova_to_phys = rk_iommu_iova_to_phys,
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index a004f6da35f2..7b1361d57a17 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -377,7 +377,6 @@ static const struct iommu_ops gart_iommu_ops = {
 	.remove_device	= gart_iommu_remove_device,
 	.device_group	= generic_device_group,
 	.map		= gart_iommu_map,
-	.map_sg		= default_iommu_map_sg,
 	.unmap		= gart_iommu_unmap,
 	.iova_to_phys	= gart_iommu_iova_to_phys,
 	.pgsize_bitmap	= GART_IOMMU_PGSIZES,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 44d40bc771b5..0d03341317c4 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -876,7 +876,6 @@ static const struct iommu_ops tegra_smmu_ops = {
 	.device_group = tegra_smmu_device_group,
 	.map = tegra_smmu_map,
 	.unmap = tegra_smmu_unmap,
-	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = tegra_smmu_iova_to_phys,
 	.of_xlate = tegra_smmu_of_xlate,
 	.pgsize_bitmap = SZ_4K,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7447b0b0579a..87994c265bf5 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -166,8 +166,6 @@ struct iommu_resv_region {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
- * @map_sg: map a scatter-gather list of physically contiguous memory chunks
- *          to an iommu domain
  * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
  * @tlb_range_add: Add a given iova range to the flush queue for this domain
  * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
@@ -201,8 +199,6 @@ struct iommu_ops {
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
-	size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova,
-			 struct scatterlist *sg, unsigned int nents, int prot);
 	void (*flush_iotlb_all)(struct iommu_domain *domain);
 	void (*iotlb_range_add)(struct iommu_domain *domain,
 				unsigned long iova, size_t size);
@@ -303,9 +299,8 @@ extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 			  size_t size);
 extern size_t iommu_unmap_fast(struct iommu_domain *domain,
 			       unsigned long iova, size_t size);
-extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
-				struct scatterlist *sg,unsigned int nents,
-				int prot);
+extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+			   struct scatterlist *sg,unsigned int nents, int prot);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
@@ -378,13 +373,6 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain)
 		domain->ops->iotlb_sync(domain);
 }
 
-static inline size_t iommu_map_sg(struct iommu_domain *domain,
-				  unsigned long iova, struct scatterlist *sg,
-				  unsigned int nents, int prot)
-{
-	return domain->ops->map_sg(domain, iova, sg, nents, prot);
-}
-
 /* PCI device grouping function */
 extern struct iommu_group *pci_device_group(struct device *dev);
 /* Generic device grouping function */
-- 
cgit v1.2.3


From 5e9384c705fcfa6bf6d3937967df3ab1f5e414cb Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 7 Aug 2018 18:18:25 +0200
Subject: regulator: maxim: Add SPDX license identifiers

Replace GPL v2.0 and v2.0+ license statements with SPDX license
identifiers.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max14577-regulator.c         | 22 +++++------------
 drivers/regulator/max77686-regulator.c         | 32 +++++++-----------------
 drivers/regulator/max77693-regulator.c         | 32 +++++++-----------------
 drivers/regulator/max77802-regulator.c         | 34 +++++++++-----------------
 drivers/regulator/max8997-regulator.c          | 30 ++++++-----------------
 drivers/regulator/max8998.c                    | 28 ++++++---------------
 include/dt-bindings/regulator/maxim,max77802.h |  5 +---
 7 files changed, 52 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/max14577-regulator.c b/drivers/regulator/max14577-regulator.c
index 0db288ce319c..bc7f4751bf9c 100644
--- a/drivers/regulator/max14577-regulator.c
+++ b/drivers/regulator/max14577-regulator.c
@@ -1,19 +1,9 @@
-/*
- * max14577.c - Regulator driver for the Maxim 14577/77836
- *
- * Copyright (C) 2013,2014 Samsung Electronics
- * Krzysztof Kozlowski <krzk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max14577.c - Regulator driver for the Maxim 14577/77836
+//
+// Copyright (C) 2013,2014 Samsung Electronics
+// Krzysztof Kozlowski <krzk@kernel.org>
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
diff --git a/drivers/regulator/max77686-regulator.c b/drivers/regulator/max77686-regulator.c
index c301f3733475..bee060937f56 100644
--- a/drivers/regulator/max77686-regulator.c
+++ b/drivers/regulator/max77686-regulator.c
@@ -1,26 +1,12 @@
-/*
- * max77686.c - Regulator driver for the Maxim 77686
- *
- * Copyright (C) 2012 Samsung Electronics
- * Chiwoong Byun <woong.byun@samsung.com>
- * Jonghwa Lee <jonghwa3.lee@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8997.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max77686.c - Regulator driver for the Maxim 77686
+//
+// Copyright (C) 2012 Samsung Electronics
+// Chiwoong Byun <woong.byun@samsung.com>
+// Jonghwa Lee <jonghwa3.lee@samsung.com>
+//
+// This driver is based on max8997.c
 
 #include <linux/kernel.h>
 #include <linux/bug.h>
diff --git a/drivers/regulator/max77693-regulator.c b/drivers/regulator/max77693-regulator.c
index e7000e777292..077ecbbfdf76 100644
--- a/drivers/regulator/max77693-regulator.c
+++ b/drivers/regulator/max77693-regulator.c
@@ -1,26 +1,12 @@
-/*
- * max77693.c - Regulator driver for the Maxim 77693 and 77843
- *
- * Copyright (C) 2013-2015 Samsung Electronics
- * Jonghwa Lee <jonghwa3.lee@samsung.com>
- * Krzysztof Kozlowski <krzk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max77686.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max77693.c - Regulator driver for the Maxim 77693 and 77843
+//
+// Copyright (C) 2013-2015 Samsung Electronics
+// Jonghwa Lee <jonghwa3.lee@samsung.com>
+// Krzysztof Kozlowski <krzk@kernel.org>
+//
+// This driver is based on max77686.c
 
 #include <linux/err.h>
 #include <linux/slab.h>
diff --git a/drivers/regulator/max77802-regulator.c b/drivers/regulator/max77802-regulator.c
index b6261903818c..c30cf5c9f2de 100644
--- a/drivers/regulator/max77802-regulator.c
+++ b/drivers/regulator/max77802-regulator.c
@@ -1,25 +1,15 @@
-/*
- * max77802.c - Regulator driver for the Maxim 77802
- *
- * Copyright (C) 2013-2014 Google, Inc
- * Simon Glass <sjg@chromium.org>
- *
- * Copyright (C) 2012 Samsung Electronics
- * Chiwoong Byun <woong.byun@samsung.com>
- * Jonghwa Lee <jonghwa3.lee@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver is based on max8997.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max77802.c - Regulator driver for the Maxim 77802
+//
+// Copyright (C) 2013-2014 Google, Inc
+// Simon Glass <sjg@chromium.org>
+//
+// Copyright (C) 2012 Samsung Electronics
+// Chiwoong Byun <woong.byun@samsung.com>
+// Jonghwa Lee <jonghwa3.lee@samsung.com>
+//
+// This driver is based on max8997.c
 
 #include <linux/kernel.h>
 #include <linux/bug.h>
diff --git a/drivers/regulator/max8997-regulator.c b/drivers/regulator/max8997-regulator.c
index 029a6ee426ab..ad0c806b0737 100644
--- a/drivers/regulator/max8997-regulator.c
+++ b/drivers/regulator/max8997-regulator.c
@@ -1,25 +1,11 @@
-/*
- * max8997.c - Regulator driver for the Maxim 8997/8966
- *
- * Copyright (C) 2011 Samsung Electronics
- * MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8998.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max8997.c - Regulator driver for the Maxim 8997/8966
+//
+// Copyright (C) 2011 Samsung Electronics
+// MyungJoo Ham <myungjoo.ham@samsung.com>
+//
+// This driver is based on max8998.c
 
 #include <linux/bug.h>
 #include <linux/err.h>
diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c
index 6b9f262ebbb0..271bb736f3f5 100644
--- a/drivers/regulator/max8998.c
+++ b/drivers/regulator/max8998.c
@@ -1,24 +1,10 @@
-/*
- * max8998.c - Voltage regulator driver for the Maxim 8998
- *
- *  Copyright (C) 2009-2010 Samsung Electronics
- *  Kyungmin Park <kyungmin.park@samsung.com>
- *  Marek Szyprowski <m.szyprowski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max8998.c - Voltage regulator driver for the Maxim 8998
+//
+//  Copyright (C) 2009-2010 Samsung Electronics
+//  Kyungmin Park <kyungmin.park@samsung.com>
+//  Marek Szyprowski <m.szyprowski@samsung.com>
 
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/include/dt-bindings/regulator/maxim,max77802.h b/include/dt-bindings/regulator/maxim,max77802.h
index cf28631d7109..d0baba1973d4 100644
--- a/include/dt-bindings/regulator/maxim,max77802.h
+++ b/include/dt-bindings/regulator/maxim,max77802.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2014 Google, Inc
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Device Tree binding constants for the Maxim 77802 PMIC regulators
  */
 
-- 
cgit v1.2.3


From 8b92d0e3d400390660a26ef7f475524700fb86cf Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.com>
Date: Wed, 8 Aug 2018 08:35:29 +0200
Subject: nvme.h: fixup ANA group descriptor format

ANA Phase 3 draft had the 'reserved' field in the group descriptor
format set to '23:17' (so that the first namespace identifier started
at byte 24), but that got move with the approved TP to '31:17'
(so that the first namespace identifier started at byte 32).

Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 64c9175723de..a661861e9d56 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -446,7 +446,7 @@ struct nvme_ana_group_desc {
 	__le32	nnsids;
 	__le64	chgcnt;
 	__u8	state;
-	__u8	rsvd17[7];
+	__u8	rsvd17[15];
 	__le32	nsids[];
 };
 
-- 
cgit v1.2.3


From 93045d5942da60801e71764597d448cf37a798c1 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Tue, 7 Aug 2018 23:01:05 -0700
Subject: nvme.h: add support for ns write protect definitions

Add various definitions from NVMe 1.3 TP 4005.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a661861e9d56..68e91ef5494c 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -259,7 +259,7 @@ struct nvme_id_ctrl {
 	__le16			awun;
 	__le16			awupf;
 	__u8			nvscc;
-	__u8			rsvd531;
+	__u8			nwpc;
 	__le16			acwu;
 	__u8			rsvd534[2];
 	__le32			sgls;
@@ -320,7 +320,9 @@ struct nvme_id_ns {
 	__u8			nvmcap[16];
 	__u8			rsvd64[28];
 	__le32			anagrpid;
-	__u8			rsvd96[8];
+	__u8			rsvd96[3];
+	__u8			nsattr;
+	__u8			rsvd100[4];
 	__u8			nguid[16];
 	__u8			eui64[8];
 	struct nvme_lbaf	lbaf[16];
@@ -794,6 +796,7 @@ enum {
 	NVME_FEAT_HOST_ID	= 0x81,
 	NVME_FEAT_RESV_MASK	= 0x82,
 	NVME_FEAT_RESV_PERSIST	= 0x83,
+	NVME_FEAT_WRITE_PROTECT	= 0x84,
 	NVME_LOG_ERROR		= 0x01,
 	NVME_LOG_SMART		= 0x02,
 	NVME_LOG_FW_SLOT	= 0x03,
@@ -807,6 +810,14 @@ enum {
 	NVME_FWACT_ACTV		= (2 << 3),
 };
 
+/* NVMe Namespace Write Protect State */
+enum {
+	NVME_NS_NO_WRITE_PROTECT = 0,
+	NVME_NS_WRITE_PROTECT,
+	NVME_NS_WRITE_PROTECT_POWER_CYCLE,
+	NVME_NS_WRITE_PROTECT_PERMANENT,
+};
+
 #define NVME_MAX_CHANGED_NAMESPACES	1024
 
 struct nvme_identify {
@@ -1153,6 +1164,8 @@ enum {
 	NVME_SC_SGL_INVALID_OFFSET	= 0x16,
 	NVME_SC_SGL_INVALID_SUBTYPE	= 0x17,
 
+	NVME_SC_NS_WRITE_PROTECTED	= 0x20,
+
 	NVME_SC_LBA_RANGE		= 0x80,
 	NVME_SC_CAP_EXCEEDED		= 0x81,
 	NVME_SC_NS_NOT_READY		= 0x82,
-- 
cgit v1.2.3


From 0c66847793d1982d1083dc6f7adad60fa265ce9c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 1 Aug 2018 14:25:39 -0700
Subject: overflow.h: Add arithmetic shift helper

Add shift_overflow() helper to assist driver authors in ensuring that
shift operations don't cause overflows or other odd conditions.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
[kees: tweaked comments and commit log, dropped unneeded assignment]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/overflow.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 8712ff70995f..40b48e2133cb 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -202,6 +202,37 @@
 
 #endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
 
+/** check_shl_overflow() - Calculate a left-shifted value and check overflow
+ *
+ * @a: Value to be shifted
+ * @s: How many bits left to shift
+ * @d: Pointer to where to store the result
+ *
+ * Computes *@d = (@a << @s)
+ *
+ * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
+ * make sense. Example conditions:
+ * - 'a << s' causes bits to be lost when stored in *d.
+ * - 's' is garbage (e.g. negative) or so large that the result of
+ *   'a << s' is guaranteed to be 0.
+ * - 'a' is negative.
+ * - 'a << s' sets the sign bit, if any, in '*d'.
+ *
+ * '*d' will hold the results of the attempted shift, but is not
+ * considered "safe for use" if false is returned.
+ */
+#define check_shl_overflow(a, s, d) ({					\
+	typeof(a) _a = a;						\
+	typeof(s) _s = s;						\
+	typeof(d) _d = d;						\
+	u64 _a_full = _a;						\
+	unsigned int _to_shift =					\
+		_s >= 0 && _s < 8 * sizeof(*d) ? _s : 0;		\
+	*_d = (_a_full << _to_shift);					\
+	(_to_shift != _s || *_d < 0 || _a < 0 ||			\
+		(*_d >> _to_shift) != _a);				\
+})
+
 /**
  * array_size() - Calculate size of 2-dimensional array.
  *
-- 
cgit v1.2.3


From 212dfd909ea8b630e5d6fa4d25aeec9c4b4b14a5 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Tue, 7 Aug 2018 22:06:52 +0200
Subject: netfilter: nfnetlink_osf: add missing enum in nfnetlink_osf uapi
 header

xt_osf_window_size_options was originally part of
include/uapi/linux/netfilter/xt_osf.h, restore it.

Fixes: bfb15f2a95cb ("netfilter: extract Passive OS fingerprint infrastructure from xt_osf")
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink_osf.h      | 12 ------------
 include/uapi/linux/netfilter/nfnetlink_osf.h | 12 ++++++++++++
 include/uapi/linux/netfilter/xt_osf.h        |  1 +
 3 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
index a7311bc03d3a..ecf7dab81e9e 100644
--- a/include/linux/netfilter/nfnetlink_osf.h
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -4,18 +4,6 @@
 
 #include <uapi/linux/netfilter/nfnetlink_osf.h>
 
-/* Initial window size option state machine: multiple of mss, mtu or
- * plain numeric value. Can also be made as plain numeric value which
- * is not a multiple of specified value.
- */
-enum nf_osf_window_size_options {
-	OSF_WSS_PLAIN   = 0,
-	OSF_WSS_MSS,
-	OSF_WSS_MTU,
-	OSF_WSS_MODULO,
-	OSF_WSS_MAX,
-};
-
 enum osf_fmatch_states {
 	/* Packet does not match the fingerprint */
 	FMATCH_WRONG = 0,
diff --git a/include/uapi/linux/netfilter/nfnetlink_osf.h b/include/uapi/linux/netfilter/nfnetlink_osf.h
index 3b93fbb9fc24..76a3527df5dd 100644
--- a/include/uapi/linux/netfilter/nfnetlink_osf.h
+++ b/include/uapi/linux/netfilter/nfnetlink_osf.h
@@ -88,6 +88,18 @@ enum iana_options {
 	OSFOPT_EMPTY = 255,
 };
 
+/* Initial window size option state machine: multiple of mss, mtu or
+ * plain numeric value. Can also be made as plain numeric value which
+ * is not a multiple of specified value.
+ */
+enum nf_osf_window_size_options {
+	OSF_WSS_PLAIN	= 0,
+	OSF_WSS_MSS,
+	OSF_WSS_MTU,
+	OSF_WSS_MODULO,
+	OSF_WSS_MAX,
+};
+
 enum nf_osf_attr_type {
 	OSF_ATTR_UNSPEC,
 	OSF_ATTR_FINGER,
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index c56c59605c2b..24102b5286ec 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -46,6 +46,7 @@
 #define xt_osf_finger		nf_osf_finger
 #define xt_osf_nlmsg		nf_osf_nlmsg
 
+#define xt_osf_window_size_options	nf_osf_window_size_options
 #define xt_osf_attr_type	nf_osf_attr_type
 #define xt_osf_msg_types	nf_osf_msg_types
 
-- 
cgit v1.2.3


From 4717be73c2843a3d6d8546872177a19358f6b7b5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 25 Jul 2018 17:39:25 +0300
Subject: i2c: core: Parse SDA hold time from firmware

There are two drivers already using the SDA hold time setting.
It might be more in the future, thus, make I2C core to parse the setting
for us if provided by firmware.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 2 ++
 include/linux/i2c.h         | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index a26b3e9cc441..043c4aadaa44 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1576,6 +1576,8 @@ void i2c_parse_fw_timings(struct device *dev, struct i2c_timings *t, bool use_de
 	ret = device_property_read_u32(dev, "i2c-sda-falling-time-ns", &t->sda_fall_ns);
 	if (ret && use_defaults)
 		t->sda_fall_ns = t->scl_fall_ns;
+
+	device_property_read_u32(dev, "i2c-sda-hold-time-ns", &t->sda_hold_ns);
 }
 EXPORT_SYMBOL_GPL(i2c_parse_fw_timings);
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 2a98d0886d2e..36f357ecdf67 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -564,6 +564,7 @@ struct i2c_lock_operations {
  * @scl_fall_ns: time SCL signal takes to fall in ns; t(f) in the I2C specification
  * @scl_int_delay_ns: time IP core additionally needs to setup SCL in ns
  * @sda_fall_ns: time SDA signal takes to fall in ns; t(f) in the I2C specification
+ * @sda_hold_ns: time IP core additionally needs to hold SDA in ns
  */
 struct i2c_timings {
 	u32 bus_freq_hz;
@@ -571,6 +572,7 @@ struct i2c_timings {
 	u32 scl_fall_ns;
 	u32 scl_int_delay_ns;
 	u32 sda_fall_ns;
+	u32 sda_hold_ns;
 };
 
 /**
-- 
cgit v1.2.3


From 342ac8448f1fb213908656ae5581d0f37a5954e8 Mon Sep 17 00:00:00 2001
From: Denis Drozdov <denisd@mellanox.com>
Date: Wed, 8 Aug 2018 16:23:48 -0700
Subject: net/mlx5: Use max_num_eqs for calculation of required MSIX vectors

New firmware has defined new HCA capability field called "max_num_eqs",
that is the number of available EQs after subtracting reserved FW EQs.

Before this capability the FW reported the EQ number in "log_max_eqs",
the reported value also contained FW reserved EQs, but the driver might
be failing to load on 320 cpus systems due to the fact that FW
reserved EQs were not available to the driver.

Now the driver has to obtain max_num_eqs value from new FW to get real
number of EQs available.

Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Reviewed-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 +++-
 include/linux/mlx5/mlx5_ifc.h                  | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 03b9c6733eed..cf3e4a659052 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -323,7 +323,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 {
 	struct mlx5_priv *priv = &dev->priv;
 	struct mlx5_eq_table *table = &priv->eq_table;
-	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
+	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+		      MLX5_CAP_GEN(dev, max_num_eqs) :
+		      1 << MLX5_CAP_GEN(dev, log_max_eq);
 	int nvec;
 	int err;
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 60c2308fe062..63e1ce4c4826 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1133,7 +1133,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         general_obj_types[0x40];
 
-	u8         reserved_at_440[0x40];
+	u8         reserved_at_440[0x20];
+
+	u8         reserved_at_460[0x10];
+	u8         max_num_eqs[0x10];
 
 	u8         reserved_at_480[0x3];
 	u8         log_max_l2_table[0x5];
-- 
cgit v1.2.3


From cc9c82a8668cf98748bfbaa83c5c092d5115c25b Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Wed, 8 Aug 2018 16:23:49 -0700
Subject: net/mlx5: Rename modify/query_vport state related enums

Modify and query vport state commands share the same admin_state and
op_mod values, rename the enums to fit them both.

In addition, remove the esw prefix from the admin state enum as this
also applied for vnic.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c     | 12 ++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c      |  8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c     | 10 +++++-----
 include/linux/mlx5/device.h                           |  6 +++---
 include/linux/mlx5/mlx5_ifc.h                         |  4 ++--
 6 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a2fb21ca5767..2731ba2d8049 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -228,7 +228,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv)
 	u8 port_state;
 
 	port_state = mlx5_query_vport_state(mdev,
-					    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT,
+					    MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
 					    0);
 
 	if (port_state == VPORT_STATE_UP) {
@@ -3916,9 +3916,9 @@ static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
 static int mlx5_vport_link2ifla(u8 esw_link)
 {
 	switch (esw_link) {
-	case MLX5_ESW_VPORT_ADMIN_STATE_DOWN:
+	case MLX5_VPORT_ADMIN_STATE_DOWN:
 		return IFLA_VF_LINK_STATE_DISABLE;
-	case MLX5_ESW_VPORT_ADMIN_STATE_UP:
+	case MLX5_VPORT_ADMIN_STATE_UP:
 		return IFLA_VF_LINK_STATE_ENABLE;
 	}
 	return IFLA_VF_LINK_STATE_AUTO;
@@ -3928,11 +3928,11 @@ static int mlx5_ifla_link2vport(u8 ifla_link)
 {
 	switch (ifla_link) {
 	case IFLA_VF_LINK_STATE_DISABLE:
-		return MLX5_ESW_VPORT_ADMIN_STATE_DOWN;
+		return MLX5_VPORT_ADMIN_STATE_DOWN;
 	case IFLA_VF_LINK_STATE_ENABLE:
-		return MLX5_ESW_VPORT_ADMIN_STATE_UP;
+		return MLX5_VPORT_ADMIN_STATE_UP;
 	}
-	return MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
+	return MLX5_VPORT_ADMIN_STATE_AUTO;
 }
 
 static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 8e3c5b4b90ab..c9cc9747d21d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -698,8 +698,8 @@ static int mlx5e_rep_open(struct net_device *dev)
 		goto unlock;
 
 	if (!mlx5_modify_vport_admin_state(priv->mdev,
-					   MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-					   rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP))
+					   MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+					   rep->vport, MLX5_VPORT_ADMIN_STATE_UP))
 		netif_carrier_on(dev);
 
 unlock:
@@ -716,8 +716,8 @@ static int mlx5e_rep_close(struct net_device *dev)
 
 	mutex_lock(&priv->state_lock);
 	mlx5_modify_vport_admin_state(priv->mdev,
-				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-				      rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
+				      rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN);
 	ret = mlx5e_close_locked(dev);
 	mutex_unlock(&priv->state_lock);
 	return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 4d316cc9b008..35ded91203f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -74,7 +74,7 @@ static int mlx5e_test_link_state(struct mlx5e_priv *priv)
 	if (!netif_carrier_ok(priv->netdev))
 		return 1;
 
-	port_state = mlx5_query_vport_state(priv->mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
+	port_state = mlx5_query_vport_state(priv->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0);
 	return port_state == VPORT_STATE_UP ? 0 : 1;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 40dba9e8af92..cd0760a8d45a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1469,7 +1469,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 		return;
 
 	mlx5_modify_vport_admin_state(esw->dev,
-				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
 				      vport_num,
 				      vport->info.link_state);
 	mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
@@ -1582,9 +1582,9 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
 	esw_vport_disable_qos(esw, vport_num);
 	if (vport_num && esw->mode == SRIOV_LEGACY) {
 		mlx5_modify_vport_admin_state(esw->dev,
-					      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+					      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
 					      vport_num,
-					      MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+					      MLX5_VPORT_ADMIN_STATE_DOWN);
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
 		esw_vport_destroy_drop_counters(vport);
@@ -1736,7 +1736,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		struct mlx5_vport *vport = &esw->vports[vport_num];
 
 		vport->vport = vport_num;
-		vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
+		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
 		vport->dev = dev;
 		INIT_WORK(&vport->vport_change_handler,
 			  esw_vport_change_handler);
@@ -1860,7 +1860,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
 	evport = &esw->vports[vport];
 
 	err = mlx5_modify_vport_admin_state(esw->dev,
-					    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+					    MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
 					    vport, link_state);
 	if (err) {
 		mlx5_core_warn(esw->dev,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index d489494b0a84..11fa4e66afc5 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -946,9 +946,9 @@ enum {
 };
 
 enum {
-	MLX5_ESW_VPORT_ADMIN_STATE_DOWN  = 0x0,
-	MLX5_ESW_VPORT_ADMIN_STATE_UP    = 0x1,
-	MLX5_ESW_VPORT_ADMIN_STATE_AUTO  = 0x2,
+	MLX5_VPORT_ADMIN_STATE_DOWN  = 0x0,
+	MLX5_VPORT_ADMIN_STATE_UP    = 0x1,
+	MLX5_VPORT_ADMIN_STATE_AUTO  = 0x2,
 };
 
 enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 63e1ce4c4826..6ead9c1a5396 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3764,8 +3764,8 @@ struct mlx5_ifc_query_vport_state_out_bits {
 };
 
 enum {
-	MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT  = 0x0,
-	MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT   = 0x1,
+	MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT  = 0x0,
+	MLX5_VPORT_STATE_OP_MOD_ESW_VPORT   = 0x1,
 };
 
 struct mlx5_ifc_query_vport_state_in_bits {
-- 
cgit v1.2.3


From 29d8ebd44de8999e292dbb4de76744d4a41039ec Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 8 Aug 2018 16:23:51 -0700
Subject: net/mlx5: Remove unused mlx5_query_vport_admin_state

mlx5_query_vport_admin_state() is not used anywhere. Remove it.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 10 ----------
 include/linux/mlx5/vport.h                      |  2 --
 2 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 7eecd5b07bb1..fa6fa171b94c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -64,16 +64,6 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_vport_state);
 
-u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
-{
-	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
-
-	_mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
-
-	return MLX5_GET(query_vport_state_out, out, admin_state);
-}
-EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
-
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state)
 {
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 9208cb8809ac..7e7c6dfcfb09 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -43,8 +43,6 @@ enum {
 };
 
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
-u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
-				u16 vport);
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
-- 
cgit v1.2.3


From 1035a0783523d8c730c44944a71b254f3a649e25 Mon Sep 17 00:00:00 2001
From: Dongjiu Geng <gengdongjiu@huawei.com>
Date: Tue, 7 Aug 2018 12:26:15 -0400
Subject: arm64 / ACPI: clean the additional checks before calling
 ghes_notify_sea()

In order to remove the additional check before calling the
ghes_notify_sea(), make stub definition when !CONFIG_ACPI_APEI_SEA.

After this cleanup, we can simply call the ghes_notify_sea() to let
APEI driver handle the SEA notification.

Signed-off-by: Dongjiu Geng <gengdongjiu@huawei.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm64/mm/fault.c | 7 +------
 include/acpi/ghes.h   | 4 ++++
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index b8eecc7b9531..9ffe01d7042a 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -727,12 +727,7 @@ static const struct fault_info fault_info[] = {
 
 int handle_guest_sea(phys_addr_t addr, unsigned int esr)
 {
-	int ret = -ENOENT;
-
-	if (IS_ENABLED(CONFIG_ACPI_APEI_SEA))
-		ret = ghes_notify_sea();
-
-	return ret;
+	return ghes_notify_sea();
 }
 
 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 1624e2be485c..82cb4eb225a4 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -118,6 +118,10 @@ static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata)
 	     (void *)section - (void *)(estatus + 1) < estatus->data_length; \
 	     section = acpi_hest_get_next(section))
 
+#ifdef CONFIG_ACPI_APEI_SEA
 int ghes_notify_sea(void);
+#else
+static inline int ghes_notify_sea(void) { return -ENOENT; }
+#endif
 
 #endif /* GHES_H */
-- 
cgit v1.2.3


From 74fe7b551f3385fa585d92616c85b3a575b2b2cb Mon Sep 17 00:00:00 2001
From: Crestez Dan Leonard <leonard.crestez@intel.com>
Date: Tue, 7 Aug 2018 17:52:17 +0300
Subject: regmap: Add regmap_noinc_read API

The regmap API usually assumes that bulk read operations will read a
range of registers but some I2C/SPI devices have certain registers for
which a such a read operation will return data from an internal FIFO
instead. Add an explicit API to support bulk read without range semantics.

Some linux drivers use regmap_bulk_read or regmap_raw_read for such
registers, for example mpu6050 or bmi150 from IIO. This only happens to
work because when caching is disabled a single regmap read op will map
to a single bus read op (as desired). This breaks if caching is enabled and
reg+1 happens to be a cacheable register.

Without regmap support refactoring a driver to enable regmap caching
requires separate I2C and SPI paths. This is exactly what regmap is
supposed to help avoid.

Suggested-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Crestez Dan Leonard <leonard.crestez@intel.com>
Signed-off-by: Stefan Popa <stefan.popa@analog.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h |  3 ++
 drivers/base/regmap/regmap.c   | 79 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/regmap.h         | 19 ++++++++++
 3 files changed, 100 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 53785e0e297a..a6bf34d6394e 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -94,10 +94,12 @@ struct regmap {
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	bool (*readable_noinc_reg)(struct device *dev, unsigned int reg);
 	const struct regmap_access_table *wr_table;
 	const struct regmap_access_table *rd_table;
 	const struct regmap_access_table *volatile_table;
 	const struct regmap_access_table *precious_table;
+	const struct regmap_access_table *rd_noinc_table;
 
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
 	int (*reg_write)(void *context, unsigned int reg, unsigned int val);
@@ -181,6 +183,7 @@ bool regmap_writeable(struct regmap *map, unsigned int reg);
 bool regmap_readable(struct regmap *map, unsigned int reg);
 bool regmap_volatile(struct regmap *map, unsigned int reg);
 bool regmap_precious(struct regmap *map, unsigned int reg);
+bool regmap_readable_noinc(struct regmap *map, unsigned int reg);
 
 int _regmap_write(struct regmap *map, unsigned int reg,
 		  unsigned int val);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 3bc84885eb91..0360a90ad6b6 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -168,6 +168,17 @@ bool regmap_precious(struct regmap *map, unsigned int reg)
 	return false;
 }
 
+bool regmap_readable_noinc(struct regmap *map, unsigned int reg)
+{
+	if (map->readable_noinc_reg)
+		return map->readable_noinc_reg(map->dev, reg);
+
+	if (map->rd_noinc_table)
+		return regmap_check_range_table(map, reg, map->rd_noinc_table);
+
+	return true;
+}
+
 static bool regmap_volatile_range(struct regmap *map, unsigned int reg,
 	size_t num)
 {
@@ -766,10 +777,12 @@ struct regmap *__regmap_init(struct device *dev,
 	map->rd_table = config->rd_table;
 	map->volatile_table = config->volatile_table;
 	map->precious_table = config->precious_table;
+	map->rd_noinc_table = config->rd_noinc_table;
 	map->writeable_reg = config->writeable_reg;
 	map->readable_reg = config->readable_reg;
 	map->volatile_reg = config->volatile_reg;
 	map->precious_reg = config->precious_reg;
+	map->readable_noinc_reg = config->readable_noinc_reg;
 	map->cache_type = config->cache_type;
 
 	spin_lock_init(&map->async_lock);
@@ -1285,6 +1298,7 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config)
 	map->readable_reg = config->readable_reg;
 	map->volatile_reg = config->volatile_reg;
 	map->precious_reg = config->precious_reg;
+	map->readable_noinc_reg = config->readable_noinc_reg;
 	map->cache_type = config->cache_type;
 
 	regmap_debugfs_init(map, config->name);
@@ -2564,7 +2578,70 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 EXPORT_SYMBOL_GPL(regmap_raw_read);
 
 /**
- * regmap_field_read() - Read a value to a single register field
+ * regmap_noinc_read(): Read data from a register without incrementing the
+ *			register number
+ *
+ * @map: Register map to read from
+ * @reg: Register to read from
+ * @val: Pointer to data buffer
+ * @val_len: Length of output buffer in bytes.
+ *
+ * The regmap API usually assumes that bulk bus read operations will read a
+ * range of registers. Some devices have certain registers for which a read
+ * operation read will read from an internal FIFO.
+ *
+ * The target register must be volatile but registers after it can be
+ * completely unrelated cacheable registers.
+ *
+ * This will attempt multiple reads as required to read val_len bytes.
+ *
+ * A value of zero will be returned on success, a negative errno will be
+ * returned in error cases.
+ */
+int regmap_noinc_read(struct regmap *map, unsigned int reg,
+		      void *val, size_t val_len)
+{
+	size_t read_len;
+	int ret;
+
+	if (!map->bus)
+		return -EINVAL;
+	if (!map->bus->read)
+		return -ENOTSUPP;
+	if (val_len % map->format.val_bytes)
+		return -EINVAL;
+	if (!IS_ALIGNED(reg, map->reg_stride))
+		return -EINVAL;
+	if (val_len == 0)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	if (!regmap_volatile(map, reg) || !regmap_readable_noinc(map, reg)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	while (val_len) {
+		if (map->max_raw_read && map->max_raw_read < val_len)
+			read_len = map->max_raw_read;
+		else
+			read_len = val_len;
+		ret = _regmap_raw_read(map, reg, val, read_len);
+		if (ret)
+			goto out_unlock;
+		val = ((u8 *)val) + read_len;
+		val_len -= read_len;
+	}
+
+out_unlock:
+	map->unlock(map->lock_arg);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_noinc_read);
+
+/**
+ * regmap_field_read(): Read a value to a single register field
  *
  * @field: Register field to read from
  * @val: Pointer to store read value
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 4f38068ffb71..19df946499d2 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -268,6 +268,13 @@ typedef void (*regmap_unlock)(void *);
  *                field is NULL but precious_table (see below) is not, the
  *                check is performed on such table (a register is precious if
  *                it belongs to one of the ranges specified by precious_table).
+ * @readable_noinc_reg: Optional callback returning true if the register
+ *			supports multiple read operations without incrementing
+ *			the register number. If this field is NULL but
+ *			rd_noinc_table (see below) is not, the check is
+ *			performed on such table (a register is no increment
+ *			readable if it belongs to one of the ranges specified
+ *			by rd_noinc_table).
  * @disable_locking: This regmap is either protected by external means or
  *                   is guaranteed not be be accessed from multiple threads.
  *                   Don't use any locking mechanisms.
@@ -295,6 +302,7 @@ typedef void (*regmap_unlock)(void *);
  * @rd_table:     As above, for read access.
  * @volatile_table: As above, for volatile registers.
  * @precious_table: As above, for precious registers.
+ * @rd_noinc_table: As above, for no increment readable registers.
  * @reg_defaults: Power on reset values for registers (for use with
  *                register cache support).
  * @num_reg_defaults: Number of elements in reg_defaults.
@@ -344,6 +352,7 @@ struct regmap_config {
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	bool (*readable_noinc_reg)(struct device *dev, unsigned int reg);
 
 	bool disable_locking;
 	regmap_lock lock;
@@ -360,6 +369,7 @@ struct regmap_config {
 	const struct regmap_access_table *rd_table;
 	const struct regmap_access_table *volatile_table;
 	const struct regmap_access_table *precious_table;
+	const struct regmap_access_table *rd_noinc_table;
 	const struct reg_default *reg_defaults;
 	unsigned int num_reg_defaults;
 	enum regcache_type cache_type;
@@ -946,6 +956,8 @@ int regmap_raw_write_async(struct regmap *map, unsigned int reg,
 int regmap_read(struct regmap *map, unsigned int reg, unsigned int *val);
 int regmap_raw_read(struct regmap *map, unsigned int reg,
 		    void *val, size_t val_len);
+int regmap_noinc_read(struct regmap *map, unsigned int reg,
+		      void *val, size_t val_len);
 int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val,
 		     size_t val_count);
 int regmap_update_bits_base(struct regmap *map, unsigned int reg,
@@ -1196,6 +1208,13 @@ static inline int regmap_raw_read(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_noinc_read(struct regmap *map, unsigned int reg,
+				    void *val, size_t val_len)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_bulk_read(struct regmap *map, unsigned int reg,
 				   void *val, size_t val_count)
 {
-- 
cgit v1.2.3


From b1f4267cc5448d20ae0c515a74141e74365e78a3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 9 Aug 2018 07:47:28 -0700
Subject: block: Remove two superfluous #include directives

Commit 12f5b9314545 ("blk-mq: Remove generation seqeunce") removed the
only seqcount_t and u64_stats_sync instances from <linux/blkdev.h> but
did not remove the corresponding #include directives. Since these
include directives are no longer needed, remove them.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Hannes Reinecke <hare@suse.com>,
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 050d599f5ea9..d6869e0e2b64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,8 +27,6 @@
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
-#include <linux/seqlock.h>
-#include <linux/u64_stats_sync.h>
 
 struct module;
 struct scsi_ioctl_command;
-- 
cgit v1.2.3


From 6bad9b210a228d2fe0e0efe26d9b115348529cee Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Thu, 9 Aug 2018 07:53:36 -0700
Subject: blkcg: Introduce blkg_root_lookup()

This new function will be used in a later patch to verify whether a
queue has been dissociated from the cgroup controller before being
released.

Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Alexandru Moise <00moses.alexander00@gmail.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index f7b910768306..1361cfc9b878 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -341,6 +341,23 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 	return __blkg_lookup(blkcg, q, false);
 }
 
+/**
+ * blkg_lookup - look up blkg for the specified request queue
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for @q at the root level. See also blkg_lookup().
+ */
+static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q)
+{
+	struct blkcg_gq *blkg;
+
+	rcu_read_lock();
+	blkg = blkg_lookup(&blkcg_root, q);
+	rcu_read_unlock();
+
+	return blkg;
+}
+
 /**
  * blkg_to_pdata - get policy private data
  * @blkg: blkg of interest
@@ -864,6 +881,7 @@ static inline bool blk_cgroup_congested(void) { return false; }
 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q) { return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
-- 
cgit v1.2.3


From 209b43759d65b2cc99ce7757249aacc82b03c4e2 Mon Sep 17 00:00:00 2001
From: Michael Büsch <m@bues.ch>
Date: Tue, 31 Jul 2018 22:15:09 +0200
Subject: ssb: Remove SSB_WARN_ON, SSB_BUG_ON and SSB_DEBUG

Use the standard WARN_ON instead.
If a small kernel is desired, WARN_ON can be disabled globally.

Also remove SSB_DEBUG. Besides WARN_ON it only adds a tiny debug check.
Include this check unconditionally.

Signed-off-by: Michael Buesch <m@bues.ch>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 arch/mips/configs/bcm47xx_defconfig |  1 -
 arch/powerpc/configs/wii_defconfig  |  1 -
 drivers/ssb/Kconfig                 |  9 ---------
 drivers/ssb/driver_chipcommon.c     |  8 ++++----
 drivers/ssb/driver_chipcommon_pmu.c | 10 +++++-----
 drivers/ssb/driver_gpio.c           |  4 ++--
 drivers/ssb/driver_pcicore.c        |  6 +++---
 drivers/ssb/embedded.c              | 10 +++++-----
 drivers/ssb/host_soc.c              | 12 ++++++------
 drivers/ssb/main.c                  | 38 +++++++++++++++++--------------------
 drivers/ssb/pci.c                   | 19 ++++++-------------
 drivers/ssb/pcmcia.c                | 14 +++++++-------
 drivers/ssb/scan.c                  |  4 ++--
 drivers/ssb/sdio.c                  | 12 ++++++------
 drivers/ssb/ssb_private.h           |  9 ---------
 include/linux/ssb/ssb.h             |  2 --
 16 files changed, 63 insertions(+), 96 deletions(-)

(limited to 'include')

diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig
index fad8e964f14c..ba800a892384 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -66,7 +66,6 @@ CONFIG_HW_RANDOM=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_WATCHDOG=y
 CONFIG_BCM47XX_WDT=y
-CONFIG_SSB_DEBUG=y
 CONFIG_SSB_DRIVER_GIGE=y
 CONFIG_BCMA_DRIVER_GMAC_CMN=y
 CONFIG_USB=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 10940533da71..f5c366b02828 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -78,7 +78,6 @@ CONFIG_GPIO_HLWD=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_GPIO=y
 # CONFIG_HWMON is not set
-CONFIG_SSB_DEBUG=y
 CONFIG_FB=y
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 6c438c819eb9..df30e1323252 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -89,15 +89,6 @@ config SSB_HOST_SOC
 
 	  If unsure, say N
 
-config SSB_DEBUG
-	bool "SSB debugging"
-	depends on SSB
-	help
-	  This turns on additional runtime checks and debugging
-	  messages. Turn this on for SSB troubleshooting.
-
-	  If unsure, say N
-
 config SSB_SERIAL
 	bool
 	depends on SSB
diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 48050c6fd847..99a4656d113d 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -56,7 +56,7 @@ void ssb_chipco_set_clockmode(struct ssb_chipcommon *cc,
 
 	if (cc->capabilities & SSB_CHIPCO_CAP_PMU)
 		return; /* PMU controls clockmode, separated function needed */
-	SSB_WARN_ON(ccdev->id.revision >= 20);
+	WARN_ON(ccdev->id.revision >= 20);
 
 	/* chipcommon cores prior to rev6 don't support dynamic clock control */
 	if (ccdev->id.revision < 6)
@@ -111,7 +111,7 @@ void ssb_chipco_set_clockmode(struct ssb_chipcommon *cc,
 		}
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 }
 
@@ -164,7 +164,7 @@ static int chipco_pctl_clockfreqlimit(struct ssb_chipcommon *cc, int get_max)
 			divisor = 32;
 			break;
 		default:
-			SSB_WARN_ON(1);
+			WARN_ON(1);
 		}
 	} else if (cc->dev->id.revision < 10) {
 		switch (clocksrc) {
@@ -277,7 +277,7 @@ static void calc_fast_powerup_delay(struct ssb_chipcommon *cc)
 	minfreq = chipco_pctl_clockfreqlimit(cc, 0);
 	pll_on_delay = chipco_read32(cc, SSB_CHIPCO_PLLONDELAY);
 	tmp = (((pll_on_delay + 2) * 1000000) + (minfreq - 1)) / minfreq;
-	SSB_WARN_ON(tmp & ~0xFFFF);
+	WARN_ON(tmp & ~0xFFFF);
 
 	cc->fast_pwrup_delay = tmp;
 }
diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c
index e28682a53cdf..0f60e90ded26 100644
--- a/drivers/ssb/driver_chipcommon_pmu.c
+++ b/drivers/ssb/driver_chipcommon_pmu.c
@@ -128,7 +128,7 @@ static void ssb_pmu0_pllinit_r0(struct ssb_chipcommon *cc,
 			      ~(1 << SSB_PMURES_5354_BB_PLL_PU));
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	for (i = 1500; i; i--) {
 		tmp = chipco_read32(cc, SSB_CHIPCO_CLKCTLST);
@@ -265,7 +265,7 @@ static void ssb_pmu1_pllinit_r0(struct ssb_chipcommon *cc,
 		buffer_strength = 0x222222;
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	for (i = 1500; i; i--) {
 		tmp = chipco_read32(cc, SSB_CHIPCO_CLKCTLST);
@@ -501,7 +501,7 @@ static void ssb_pmu_resources_init(struct ssb_chipcommon *cc)
 					      ~(depend_tab[i].depend));
 				break;
 			default:
-				SSB_WARN_ON(1);
+				WARN_ON(1);
 			}
 		}
 	}
@@ -568,12 +568,12 @@ void ssb_pmu_set_ldo_voltage(struct ssb_chipcommon *cc,
 			mask = 0x3F;
 			break;
 		default:
-			SSB_WARN_ON(1);
+			WARN_ON(1);
 			return;
 		}
 		break;
 	case 0x4312:
-		if (SSB_WARN_ON(id != LDO_PAREF))
+		if (WARN_ON(id != LDO_PAREF))
 			return;
 		addr = 0;
 		shift = 21;
diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
index 6ce4abf7d473..e809dae4c470 100644
--- a/drivers/ssb/driver_gpio.c
+++ b/drivers/ssb/driver_gpio.c
@@ -461,7 +461,7 @@ int ssb_gpio_init(struct ssb_bus *bus)
 	else if (ssb_extif_available(&bus->extif))
 		return ssb_gpio_extif_init(bus);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 
 	return -1;
 }
@@ -473,7 +473,7 @@ int ssb_gpio_unregister(struct ssb_bus *bus)
 		gpiochip_remove(&bus->gpio);
 		return 0;
 	} else {
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 
 	return -1;
diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c
index ae80b3171523..6a5622e0ded5 100644
--- a/drivers/ssb/driver_pcicore.c
+++ b/drivers/ssb/driver_pcicore.c
@@ -115,7 +115,7 @@ static int ssb_extpci_read_config(struct ssb_pcicore *pc,
 	u32 addr, val;
 	void __iomem *mmio;
 
-	SSB_WARN_ON(!pc->hostmode);
+	WARN_ON(!pc->hostmode);
 	if (unlikely(len != 1 && len != 2 && len != 4))
 		goto out;
 	addr = get_cfgspace_addr(pc, bus, dev, func, off);
@@ -161,7 +161,7 @@ static int ssb_extpci_write_config(struct ssb_pcicore *pc,
 	u32 addr, val = 0;
 	void __iomem *mmio;
 
-	SSB_WARN_ON(!pc->hostmode);
+	WARN_ON(!pc->hostmode);
 	if (unlikely(len != 1 && len != 2 && len != 4))
 		goto out;
 	addr = get_cfgspace_addr(pc, bus, dev, func, off);
@@ -702,7 +702,7 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc,
 		/* Calculate the "coremask" for the device. */
 		coremask = (1 << dev->core_index);
 
-		SSB_WARN_ON(bus->bustype != SSB_BUSTYPE_PCI);
+		WARN_ON(bus->bustype != SSB_BUSTYPE_PCI);
 		err = pci_read_config_dword(bus->host_pci, SSB_PCI_IRQMASK, &tmp);
 		if (err)
 			goto out;
diff --git a/drivers/ssb/embedded.c b/drivers/ssb/embedded.c
index c39ddf8988c1..8254ed25e063 100644
--- a/drivers/ssb/embedded.c
+++ b/drivers/ssb/embedded.c
@@ -77,7 +77,7 @@ u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_in(&bus->extif, mask);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
@@ -95,7 +95,7 @@ u32 ssb_gpio_out(struct ssb_bus *bus, u32 mask, u32 value)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_out(&bus->extif, mask, value);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
@@ -113,7 +113,7 @@ u32 ssb_gpio_outen(struct ssb_bus *bus, u32 mask, u32 value)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_outen(&bus->extif, mask, value);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
@@ -145,7 +145,7 @@ u32 ssb_gpio_intmask(struct ssb_bus *bus, u32 mask, u32 value)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_intmask(&bus->extif, mask, value);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
@@ -163,7 +163,7 @@ u32 ssb_gpio_polarity(struct ssb_bus *bus, u32 mask, u32 value)
 	else if (ssb_extif_available(&bus->extif))
 		res = ssb_extif_gpio_polarity(&bus->extif, mask, value);
 	else
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	spin_unlock_irqrestore(&bus->gpio_lock, flags);
 
 	return res;
diff --git a/drivers/ssb/host_soc.c b/drivers/ssb/host_soc.c
index eadaedf466f6..3b438480515c 100644
--- a/drivers/ssb/host_soc.c
+++ b/drivers/ssb/host_soc.c
@@ -61,7 +61,7 @@ static void ssb_host_soc_block_read(struct ssb_device *dev, void *buffer,
 	case sizeof(u16): {
 		__le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		while (count) {
 			*buf = (__force __le16)__raw_readw(addr);
 			buf++;
@@ -72,7 +72,7 @@ static void ssb_host_soc_block_read(struct ssb_device *dev, void *buffer,
 	case sizeof(u32): {
 		__le32 *buf = buffer;
 
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		while (count) {
 			*buf = (__force __le32)__raw_readl(addr);
 			buf++;
@@ -81,7 +81,7 @@ static void ssb_host_soc_block_read(struct ssb_device *dev, void *buffer,
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 }
 #endif /* CONFIG_SSB_BLOCKIO */
@@ -134,7 +134,7 @@ static void ssb_host_soc_block_write(struct ssb_device *dev, const void *buffer,
 	case sizeof(u16): {
 		const __le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		while (count) {
 			__raw_writew((__force u16)(*buf), addr);
 			buf++;
@@ -145,7 +145,7 @@ static void ssb_host_soc_block_write(struct ssb_device *dev, const void *buffer,
 	case sizeof(u32): {
 		const __le32 *buf = buffer;
 
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		while (count) {
 			__raw_writel((__force u32)(*buf), addr);
 			buf++;
@@ -154,7 +154,7 @@ static void ssb_host_soc_block_write(struct ssb_device *dev, const void *buffer,
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 }
 #endif /* CONFIG_SSB_BLOCKIO */
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 9da56d2fdbd3..0a26984acb2c 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -209,7 +209,7 @@ int ssb_devices_freeze(struct ssb_bus *bus, struct ssb_freeze_context *ctx)
 
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->bus = bus;
-	SSB_WARN_ON(bus->nr_devices > ARRAY_SIZE(ctx->device_frozen));
+	WARN_ON(bus->nr_devices > ARRAY_SIZE(ctx->device_frozen));
 
 	for (i = 0; i < bus->nr_devices; i++) {
 		sdev = ssb_device_get(&bus->devices[i]);
@@ -220,7 +220,7 @@ int ssb_devices_freeze(struct ssb_bus *bus, struct ssb_freeze_context *ctx)
 			continue;
 		}
 		sdrv = drv_to_ssb_drv(sdev->dev->driver);
-		if (SSB_WARN_ON(!sdrv->remove))
+		if (WARN_ON(!sdrv->remove))
 			continue;
 		sdrv->remove(sdev);
 		ctx->device_frozen[i] = 1;
@@ -248,10 +248,10 @@ int ssb_devices_thaw(struct ssb_freeze_context *ctx)
 			continue;
 		sdev = &bus->devices[i];
 
-		if (SSB_WARN_ON(!sdev->dev || !sdev->dev->driver))
+		if (WARN_ON(!sdev->dev || !sdev->dev->driver))
 			continue;
 		sdrv = drv_to_ssb_drv(sdev->dev->driver);
-		if (SSB_WARN_ON(!sdrv || !sdrv->probe))
+		if (WARN_ON(!sdrv || !sdrv->probe))
 			continue;
 
 		err = sdrv->probe(sdev, &sdev->id);
@@ -861,13 +861,13 @@ u32 ssb_calc_clock_rate(u32 plltype, u32 n, u32 m)
 	case SSB_PLLTYPE_2: /* 48Mhz, 4 dividers */
 		n1 += SSB_CHIPCO_CLK_T2_BIAS;
 		n2 += SSB_CHIPCO_CLK_T2_BIAS;
-		SSB_WARN_ON(!((n1 >= 2) && (n1 <= 7)));
-		SSB_WARN_ON(!((n2 >= 5) && (n2 <= 23)));
+		WARN_ON(!((n1 >= 2) && (n1 <= 7)));
+		WARN_ON(!((n2 >= 5) && (n2 <= 23)));
 		break;
 	case SSB_PLLTYPE_5: /* 25Mhz, 4 dividers */
 		return 100000000;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 
 	switch (plltype) {
@@ -916,9 +916,9 @@ u32 ssb_calc_clock_rate(u32 plltype, u32 n, u32 m)
 		m1 += SSB_CHIPCO_CLK_T2_BIAS;
 		m2 += SSB_CHIPCO_CLK_T2M2_BIAS;
 		m3 += SSB_CHIPCO_CLK_T2_BIAS;
-		SSB_WARN_ON(!((m1 >= 2) && (m1 <= 7)));
-		SSB_WARN_ON(!((m2 >= 3) && (m2 <= 10)));
-		SSB_WARN_ON(!((m3 >= 2) && (m3 <= 7)));
+		WARN_ON(!((m1 >= 2) && (m1 <= 7)));
+		WARN_ON(!((m2 >= 3) && (m2 <= 10)));
+		WARN_ON(!((m3 >= 2) && (m3 <= 7)));
 
 		if (!(mc & SSB_CHIPCO_CLK_T2MC_M1BYP))
 			clock /= m1;
@@ -928,7 +928,7 @@ u32 ssb_calc_clock_rate(u32 plltype, u32 n, u32 m)
 			clock /= m3;
 		return clock;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	return 0;
 }
@@ -1169,9 +1169,7 @@ int ssb_bus_may_powerdown(struct ssb_bus *bus)
 	if (err)
 		goto error;
 out:
-#ifdef CONFIG_SSB_DEBUG
 	bus->powered_up = 0;
-#endif
 	return err;
 error:
 	pr_err("Bus powerdown failed\n");
@@ -1188,9 +1186,7 @@ int ssb_bus_powerup(struct ssb_bus *bus, bool dynamic_pctl)
 	if (err)
 		goto error;
 
-#ifdef CONFIG_SSB_DEBUG
 	bus->powered_up = 1;
-#endif
 
 	mode = dynamic_pctl ? SSB_CLKMODE_DYNAMIC : SSB_CLKMODE_FAST;
 	ssb_chipco_set_clockmode(&bus->chipco, mode);
@@ -1242,15 +1238,15 @@ u32 ssb_admatch_base(u32 adm)
 		base = (adm & SSB_ADM_BASE0);
 		break;
 	case SSB_ADM_TYPE1:
-		SSB_WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
+		WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
 		base = (adm & SSB_ADM_BASE1);
 		break;
 	case SSB_ADM_TYPE2:
-		SSB_WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
+		WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
 		base = (adm & SSB_ADM_BASE2);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 
 	return base;
@@ -1266,15 +1262,15 @@ u32 ssb_admatch_size(u32 adm)
 		size = ((adm & SSB_ADM_SZ0) >> SSB_ADM_SZ0_SHIFT);
 		break;
 	case SSB_ADM_TYPE1:
-		SSB_WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
+		WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
 		size = ((adm & SSB_ADM_SZ1) >> SSB_ADM_SZ1_SHIFT);
 		break;
 	case SSB_ADM_TYPE2:
-		SSB_WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
+		WARN_ON(adm & SSB_ADM_NEG); /* unsupported */
 		size = ((adm & SSB_ADM_SZ2) >> SSB_ADM_SZ2_SHIFT);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	size = (1 << (size + 1));
 
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index ad4308529eba..84807a9b4b13 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -946,7 +946,6 @@ out:
 	return err;
 }
 
-#ifdef CONFIG_SSB_DEBUG
 static int ssb_pci_assert_buspower(struct ssb_bus *bus)
 {
 	if (likely(bus->powered_up))
@@ -960,12 +959,6 @@ static int ssb_pci_assert_buspower(struct ssb_bus *bus)
 
 	return -ENODEV;
 }
-#else /* DEBUG */
-static inline int ssb_pci_assert_buspower(struct ssb_bus *bus)
-{
-	return 0;
-}
-#endif /* DEBUG */
 
 static u8 ssb_pci_read8(struct ssb_device *dev, u16 offset)
 {
@@ -1024,15 +1017,15 @@ static void ssb_pci_block_read(struct ssb_device *dev, void *buffer,
 		ioread8_rep(addr, buffer, count);
 		break;
 	case sizeof(u16):
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		ioread16_rep(addr, buffer, count >> 1);
 		break;
 	case sizeof(u32):
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		ioread32_rep(addr, buffer, count >> 2);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 
 	return;
@@ -1098,15 +1091,15 @@ static void ssb_pci_block_write(struct ssb_device *dev, const void *buffer,
 		iowrite8_rep(addr, buffer, count);
 		break;
 	case sizeof(u16):
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		iowrite16_rep(addr, buffer, count >> 1);
 		break;
 	case sizeof(u32):
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		iowrite32_rep(addr, buffer, count >> 2);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 }
 #endif /* CONFIG_SSB_BLOCKIO */
diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c
index 20f63cc88e70..567013f8a8be 100644
--- a/drivers/ssb/pcmcia.c
+++ b/drivers/ssb/pcmcia.c
@@ -169,7 +169,7 @@ int ssb_pcmcia_switch_segment(struct ssb_bus *bus, u8 seg)
 	int err;
 	u8 val;
 
-	SSB_WARN_ON((seg != 0) && (seg != 1));
+	WARN_ON((seg != 0) && (seg != 1));
 	while (1) {
 		err = ssb_pcmcia_cfg_write(bus, SSB_PCMCIA_MEMSEG, seg);
 		if (err)
@@ -299,7 +299,7 @@ static void ssb_pcmcia_block_read(struct ssb_device *dev, void *buffer,
 	case sizeof(u16): {
 		__le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		while (count) {
 			*buf = (__force __le16)__raw_readw(addr);
 			buf++;
@@ -310,7 +310,7 @@ static void ssb_pcmcia_block_read(struct ssb_device *dev, void *buffer,
 	case sizeof(u32): {
 		__le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		while (count) {
 			*buf = (__force __le16)__raw_readw(addr);
 			buf++;
@@ -321,7 +321,7 @@ static void ssb_pcmcia_block_read(struct ssb_device *dev, void *buffer,
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 unlock:
 	spin_unlock_irqrestore(&bus->bar_lock, flags);
@@ -399,7 +399,7 @@ static void ssb_pcmcia_block_write(struct ssb_device *dev, const void *buffer,
 	case sizeof(u16): {
 		const __le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		while (count) {
 			__raw_writew((__force u16)(*buf), addr);
 			buf++;
@@ -410,7 +410,7 @@ static void ssb_pcmcia_block_write(struct ssb_device *dev, const void *buffer,
 	case sizeof(u32): {
 		const __le16 *buf = buffer;
 
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		while (count) {
 			__raw_writew((__force u16)(*buf), addr);
 			buf++;
@@ -421,7 +421,7 @@ static void ssb_pcmcia_block_write(struct ssb_device *dev, const void *buffer,
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 unlock:
 	mmiowb();
diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index 71fbb4b3eb6a..6ceee98ed6ff 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -210,7 +210,7 @@ void ssb_iounmap(struct ssb_bus *bus)
 #ifdef CONFIG_SSB_PCIHOST
 		pci_iounmap(bus->host_pci, bus->mmio);
 #else
-		SSB_BUG_ON(1); /* Can't reach this code. */
+		WARN_ON(1); /* Can't reach this code. */
 #endif
 		break;
 	case SSB_BUSTYPE_SDIO:
@@ -236,7 +236,7 @@ static void __iomem *ssb_ioremap(struct ssb_bus *bus,
 #ifdef CONFIG_SSB_PCIHOST
 		mmio = pci_iomap(bus->host_pci, 0, ~0UL);
 #else
-		SSB_BUG_ON(1); /* Can't reach this code. */
+		WARN_ON(1); /* Can't reach this code. */
 #endif
 		break;
 	case SSB_BUSTYPE_SDIO:
diff --git a/drivers/ssb/sdio.c b/drivers/ssb/sdio.c
index 1aedc5fbb62f..7fe0afb42234 100644
--- a/drivers/ssb/sdio.c
+++ b/drivers/ssb/sdio.c
@@ -316,18 +316,18 @@ static void ssb_sdio_block_read(struct ssb_device *dev, void *buffer,
 		break;
 	}
 	case sizeof(u16): {
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		error = sdio_readsb(bus->host_sdio, buffer, offset, count);
 		break;
 	}
 	case sizeof(u32): {
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		offset |= SBSDIO_SB_ACCESS_2_4B_FLAG;	/* 32 bit data access */
 		error = sdio_readsb(bus->host_sdio, buffer, offset, count);
 		break;
 	}
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	if (!error)
 		goto out;
@@ -423,18 +423,18 @@ static void ssb_sdio_block_write(struct ssb_device *dev, const void *buffer,
 				     (void *)buffer, count);
 		break;
 	case sizeof(u16):
-		SSB_WARN_ON(count & 1);
+		WARN_ON(count & 1);
 		error = sdio_writesb(bus->host_sdio, offset,
 				     (void *)buffer, count);
 		break;
 	case sizeof(u32):
-		SSB_WARN_ON(count & 3);
+		WARN_ON(count & 3);
 		offset |= SBSDIO_SB_ACCESS_2_4B_FLAG;	/* 32 bit data access */
 		error = sdio_writesb(bus->host_sdio, offset,
 				     (void *)buffer, count);
 		break;
 	default:
-		SSB_WARN_ON(1);
+		WARN_ON(1);
 	}
 	if (!error)
 		goto out;
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 885e278c4487..5f31bdfbe77f 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -9,15 +9,6 @@
 #include <linux/types.h>
 #include <linux/bcm47xx_wdt.h>
 
-#ifdef CONFIG_SSB_DEBUG
-# define SSB_WARN_ON(x)		WARN_ON(x)
-# define SSB_BUG_ON(x)		BUG_ON(x)
-#else
-static inline int __ssb_do_nothing(int x) { return x; }
-# define SSB_WARN_ON(x)		__ssb_do_nothing(unlikely(!!(x)))
-# define SSB_BUG_ON(x)		__ssb_do_nothing(unlikely(!!(x)))
-#endif
-
 
 /* pci.c */
 #ifdef CONFIG_SSB_PCIHOST
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 3b43655cabe6..0d5a2691e7e9 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -499,11 +499,9 @@ struct ssb_bus {
 
 	/* Internal-only stuff follows. Do not touch. */
 	struct list_head list;
-#ifdef CONFIG_SSB_DEBUG
 	/* Is the bus already powered up? */
 	bool powered_up;
 	int power_warn_count;
-#endif /* DEBUG */
 };
 
 enum ssb_quirks {
-- 
cgit v1.2.3


From cb95deea0b4aa5c7c7423f4e075a3ddcd59e710b Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:29 -0400
Subject: NFS OFFLOAD_CANCEL xdr

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42xdr.c         | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c         |  1 +
 fs/nfs/nfs4xdr.c          |  1 +
 include/linux/nfs4.h      |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   | 12 +++++++++
 6 files changed, 84 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 5966e1e7b1f5..09a540d035b8 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -26,6 +26,9 @@
 					 NFS42_WRITE_RES_SIZE + \
 					 1 /* cr_consecutive */ + \
 					 1 /* cr_synchronous */)
+#define encode_offload_cancel_maxsz	(op_encode_hdr_maxsz + \
+					 XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_offload_cancel_maxsz	(op_decode_hdr_maxsz)
 #define encode_deallocate_maxsz		(op_encode_hdr_maxsz + \
 					 encode_fallocate_maxsz)
 #define decode_deallocate_maxsz		(op_decode_hdr_maxsz)
@@ -75,6 +78,12 @@
 					 decode_putfh_maxsz + \
 					 decode_copy_maxsz + \
 					 decode_commit_maxsz)
+#define NFS4_enc_offload_cancel_sz	(compound_encode_hdr_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_offload_cancel_maxsz)
+#define NFS4_dec_offload_cancel_sz	(compound_decode_hdr_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_offload_cancel_maxsz)
 #define NFS4_enc_deallocate_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
 					 encode_deallocate_maxsz + \
@@ -145,6 +154,14 @@ static void encode_copy(struct xdr_stream *xdr,
 	encode_uint32(xdr, 0); /* src server list */
 }
 
+static void encode_offload_cancel(struct xdr_stream *xdr,
+				  const struct nfs42_offload_status_args *args,
+				  struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_OFFLOAD_CANCEL, decode_offload_cancel_maxsz, hdr);
+	encode_nfs4_stateid(xdr, &args->osa_stateid);
+}
+
 static void encode_deallocate(struct xdr_stream *xdr,
 			      const struct nfs42_falloc_args *args,
 			      struct compound_hdr *hdr)
@@ -260,6 +277,25 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode OFFLOAD_CANEL request
+ */
+static void nfs4_xdr_enc_offload_cancel(struct rpc_rqst *req,
+					struct xdr_stream *xdr,
+					const void *data)
+{
+	const struct nfs42_offload_status_args *args = data;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->osa_seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->osa_seq_args, &hdr);
+	encode_putfh(xdr, args->osa_src_fh, &hdr);
+	encode_offload_cancel(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
 /*
  * Encode DEALLOCATE request
  */
@@ -413,6 +449,12 @@ static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
 	return decode_copy_requirements(xdr, res);
 }
 
+static int decode_offload_cancel(struct xdr_stream *xdr,
+				 struct nfs42_offload_status_res *res)
+{
+	return decode_op_hdr(xdr, OP_OFFLOAD_CANCEL);
+}
+
 static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
 {
 	return decode_op_hdr(xdr, OP_DEALLOCATE);
@@ -512,6 +554,32 @@ out:
 	return status;
 }
 
+/*
+ * Decode OFFLOAD_CANCEL response
+ */
+static int nfs4_xdr_dec_offload_cancel(struct rpc_rqst *rqstp,
+				       struct xdr_stream *xdr,
+				       void *data)
+{
+	struct nfs42_offload_status_res *res = data;
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->osr_seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_offload_cancel(xdr, res);
+
+out:
+	return status;
+}
+
 /*
  * Decode DEALLOCATE request
  */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 124a9f97c366..b22b41eff9c0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9620,6 +9620,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
 		| NFS_CAP_LGOPEN
 		| NFS_CAP_ALLOCATE
 		| NFS_CAP_COPY
+		| NFS_CAP_OFFLOAD_CANCEL
 		| NFS_CAP_DEALLOCATE
 		| NFS_CAP_SEEK
 		| NFS_CAP_LAYOUTSTATS
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cd41d2577a04..b7bde12d8cd5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7789,6 +7789,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
 	PROC42(LAYOUTSTATS,	enc_layoutstats,	dec_layoutstats),
 	PROC42(CLONE,		enc_clone,		dec_clone),
 	PROC42(COPY,		enc_copy,		dec_copy),
+	PROC42(OFFLOAD_CANCEL,	enc_offload_cancel,	dec_offload_cancel),
 	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 57ffaa20d564..c44b87293229 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -527,6 +527,7 @@ enum {
 	NFSPROC4_CLNT_LAYOUTSTATS,
 	NFSPROC4_CLNT_CLONE,
 	NFSPROC4_CLNT_COPY,
+	NFSPROC4_CLNT_OFFLOAD_CANCEL,
 
 	NFSPROC4_CLNT_LOOKUPP,
 };
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2c18d618604e..fbc735f08d7e 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -255,5 +255,6 @@ struct nfs_server {
 #define NFS_CAP_LAYOUTSTATS	(1U << 22)
 #define NFS_CAP_CLONE		(1U << 23)
 #define NFS_CAP_COPY		(1U << 24)
+#define NFS_CAP_OFFLOAD_CANCEL	(1U << 25)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3b7325cfb291..85e928a56cef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1403,6 +1403,18 @@ struct nfs42_copy_res {
 	struct nfs_commitres		commit_res;
 };
 
+struct nfs42_offload_status_args {
+	struct nfs4_sequence_args	osa_seq_args;
+	struct nfs_fh			*osa_src_fh;
+	nfs4_stateid			osa_stateid;
+};
+
+struct nfs42_offload_status_res {
+	struct nfs4_sequence_res	osr_seq_res;
+	uint64_t			osr_count;
+	int				osr_status;
+};
+
 struct nfs42_seek_args {
 	struct nfs4_sequence_args	seq_args;
 
-- 
cgit v1.2.3


From 67aa7444c4beb40aafedd8d2c60bbcc54987adda Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:30 -0400
Subject: NFS COPY xdr handle async reply

If server returns async reply, it must include a callback stateid,
wr_callback_id in the write_response4.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42xdr.c       | 22 ++++++++++++----------
 include/linux/nfs_xdr.h |  1 +
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 09a540d035b8..205c3567ea08 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -389,21 +389,23 @@ static int decode_write_response(struct xdr_stream *xdr,
 				 struct nfs42_write_res *res)
 {
 	__be32 *p;
+	int status, count;
 
-	p = xdr_inline_decode(xdr, 4 + 8 + 4);
+	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(!p))
 		goto out_overflow;
-
-	/*
-	 * We never use asynchronous mode, so warn if a server returns
-	 * a stateid.
-	 */
-	if (unlikely(*p != 0)) {
-		pr_err_once("%s: server has set unrequested "
-				"asynchronous mode\n", __func__);
+	count = be32_to_cpup(p);
+	if (count > 1)
 		return -EREMOTEIO;
+	else if (count == 1) {
+		status = decode_opaque_fixed(xdr, &res->stateid,
+				NFS4_STATEID_SIZE);
+		if (unlikely(status))
+			goto out_overflow;
 	}
-	p++;
+	p = xdr_inline_decode(xdr, 8 + 4);
+	if (unlikely(!p))
+		goto out_overflow;
 	p = xdr_decode_hyper(p, &res->count);
 	res->verifier.committed = be32_to_cpup(p);
 	return decode_verifier(xdr, &res->verifier.verifier);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 85e928a56cef..06ddfa31cbef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1391,6 +1391,7 @@ struct nfs42_copy_args {
 };
 
 struct nfs42_write_res {
+	nfs4_stateid		stateid;
 	u64			count;
 	struct nfs_writeverf	verifier;
 };
-- 
cgit v1.2.3


From 62164f317972fcd36590578888f33a1994dda519 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:31 -0400
Subject: NFS add support for asynchronous COPY

Change xdr to always send COPY asynchronously.

Keep the list copies send in a list under a server structure.
Once copy is sent, it waits on a completion structure that will
be signalled by the callback thread that receives CB_OFFLOAD.

If CB_OFFLOAD returned an error and even if it returned partial
bytes, ignore them (as we can't commit without a verifier to
match) and return an error.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_proc.c    | 38 +++++++++++++++++++++++++++++++-
 fs/nfs/client.c           |  1 +
 fs/nfs/nfs42proc.c        | 55 ++++++++++++++++++++++++++++++++++++++++++-----
 fs/nfs/nfs42xdr.c         |  8 ++++---
 include/linux/nfs_fs.h    |  9 ++++++++
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   |  1 +
 7 files changed, 104 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d6f45bd176a9..acdda259912e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -662,9 +662,45 @@ __be32 nfs4_callback_notify_lock(void *argp, void *resp,
 }
 #endif /* CONFIG_NFS_V4_1 */
 #ifdef CONFIG_NFS_V4_2
-__be32 nfs4_callback_offload(void *args, void *dummy,
+static void nfs4_copy_cb_args(struct nfs4_copy_state *cp_state,
+				struct cb_offloadargs *args)
+{
+	cp_state->count = args->wr_count;
+	cp_state->error = args->error;
+	if (!args->error) {
+		cp_state->verf.committed = args->wr_writeverf.committed;
+		memcpy(&cp_state->verf.verifier.data[0],
+			&args->wr_writeverf.verifier.data[0],
+			NFS4_VERIFIER_SIZE);
+	}
+}
+
+__be32 nfs4_callback_offload(void *data, void *dummy,
 			     struct cb_process_state *cps)
 {
+	struct cb_offloadargs *args = data;
+	struct nfs_server *server;
+	struct nfs4_copy_state *copy;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(server, &cps->clp->cl_superblocks,
+				client_link) {
+		spin_lock(&server->nfs_client->cl_lock);
+		list_for_each_entry(copy, &server->ss_copies, copies) {
+			if (memcmp(args->coa_stateid.other,
+					copy->stateid.other,
+					sizeof(args->coa_stateid.other)))
+				continue;
+			nfs4_copy_cb_args(copy, args);
+			complete(&copy->completion);
+			spin_unlock(&server->nfs_client->cl_lock);
+			goto out;
+		}
+		spin_unlock(&server->nfs_client->cl_lock);
+	}
+out:
+	rcu_read_unlock();
+
 	return 0;
 }
 #endif /* CONFIG_NFS_V4_2 */
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 377a61654a88..96d5f8135eb9 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -886,6 +886,7 @@ struct nfs_server *nfs_alloc_server(void)
 	INIT_LIST_HEAD(&server->delegations);
 	INIT_LIST_HEAD(&server->layouts);
 	INIT_LIST_HEAD(&server->state_owners_lru);
+	INIT_LIST_HEAD(&server->ss_copies);
 
 	atomic_set(&server->active, 0);
 
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 5f59b6f65a42..023aea8f6cf1 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -130,6 +130,37 @@ out_unlock:
 	return err;
 }
 
+static int handle_async_copy(struct nfs42_copy_res *res,
+			     struct nfs_server *server,
+			     struct file *src,
+			     struct file *dst,
+			     nfs4_stateid *src_stateid)
+{
+	struct nfs4_copy_state *copy;
+	int status = NFS4_OK;
+
+	copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+	if (!copy)
+		return -ENOMEM;
+	memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
+	init_completion(&copy->completion);
+
+	spin_lock(&server->nfs_client->cl_lock);
+	list_add_tail(&copy->copies, &server->ss_copies);
+	spin_unlock(&server->nfs_client->cl_lock);
+
+	wait_for_completion_interruptible(&copy->completion);
+	spin_lock(&server->nfs_client->cl_lock);
+	list_del_init(&copy->copies);
+	spin_unlock(&server->nfs_client->cl_lock);
+	res->write_res.count = copy->count;
+	memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
+	status = -copy->error;
+
+	kfree(copy);
+	return status;
+}
+
 static ssize_t _nfs42_proc_copy(struct file *src,
 				struct nfs_lock_context *src_lock,
 				struct file *dst,
@@ -168,9 +199,13 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	if (status)
 		return status;
 
-	res->commit_res.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
-	if (!res->commit_res.verf)
-		return -ENOMEM;
+	res->commit_res.verf = NULL;
+	if (args->sync) {
+		res->commit_res.verf =
+			kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+		if (!res->commit_res.verf)
+			return -ENOMEM;
+	}
 	status = nfs4_call_sync(server->client, server, &msg,
 				&args->seq_args, &res->seq_res, 0);
 	if (status == -ENOTSUPP)
@@ -178,18 +213,27 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 	if (status)
 		goto out;
 
-	if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
+	if (args->sync &&
+		nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
 				    &res->commit_res.verf->verifier)) {
 		status = -EAGAIN;
 		goto out;
 	}
 
+	if (!res->synchronous) {
+		status = handle_async_copy(res, server, src, dst,
+				&args->src_stateid);
+		if (status)
+			return status;
+	}
+
 	truncate_pagecache_range(dst_inode, pos_dst,
 				 pos_dst + res->write_res.count);
 
 	status = res->write_res.count;
 out:
-	kfree(res->commit_res.verf);
+	if (args->sync)
+		kfree(res->commit_res.verf);
 	return status;
 }
 
@@ -206,6 +250,7 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
 		.dst_fh		= NFS_FH(file_inode(dst)),
 		.dst_pos	= pos_dst,
 		.count		= count,
+		.sync		= false,
 	};
 	struct nfs42_copy_res res;
 	struct nfs4_exception src_exception = {
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 205c3567ea08..69f72ed2bf87 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -150,7 +150,7 @@ static void encode_copy(struct xdr_stream *xdr,
 	encode_uint64(xdr, args->count);
 
 	encode_uint32(xdr, 1); /* consecutive = true */
-	encode_uint32(xdr, 1); /* synchronous = true */
+	encode_uint32(xdr, args->sync);
 	encode_uint32(xdr, 0); /* src server list */
 }
 
@@ -273,7 +273,8 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
 	encode_savefh(xdr, &hdr);
 	encode_putfh(xdr, args->dst_fh, &hdr);
 	encode_copy(xdr, args, &hdr);
-	encode_copy_commit(xdr, args, &hdr);
+	if (args->sync)
+		encode_copy_commit(xdr, args, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -551,7 +552,8 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
 	status = decode_copy(xdr, res);
 	if (status)
 		goto out;
-	status = decode_commit(xdr, &res->commit_res);
+	if (res->commit_res.verf)
+		status = decode_commit(xdr, &res->commit_res);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2f129bbfaae8..645ad8e342f6 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -185,6 +185,15 @@ struct nfs_inode {
 	struct inode		vfs_inode;
 };
 
+struct nfs4_copy_state {
+	struct list_head	copies;
+	nfs4_stateid		stateid;
+	struct completion	completion;
+	uint64_t		count;
+	struct nfs_writeverf	verf;
+	int			error;
+};
+
 /*
  * Access bit flags
  */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fbc735f08d7e..f88952d7b9fb 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -208,6 +208,7 @@ struct nfs_server {
 	struct list_head	state_owners_lru;
 	struct list_head	layouts;
 	struct list_head	delegations;
+	struct list_head	ss_copies;
 
 	unsigned long		mig_gen;
 	unsigned long		mig_status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 06ddfa31cbef..bd1c889a9ed9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1388,6 +1388,7 @@ struct nfs42_copy_args {
 	u64				dst_pos;
 
 	u64				count;
+	bool				sync;
 };
 
 struct nfs42_write_res {
-- 
cgit v1.2.3


From bc0c9079b48ddcf1f8a6e1aaa277288b263c78d8 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 9 Jul 2018 15:13:32 -0400
Subject: NFS handle COPY reply CB_OFFLOAD call race

It's possible that server replies back with CB_OFFLOAD call and
COPY reply at the same time such that client will process
CB_OFFLOAD before reply to COPY. For that keep a list of pending
callback stateids received and then before waiting on completion
check the pending list.

Cleanup any pending copies on the client shutdown.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_proc.c    | 17 ++++++++++++++---
 fs/nfs/nfs42proc.c        | 22 ++++++++++++++++++++--
 fs/nfs/nfs4client.c       | 15 +++++++++++++++
 include/linux/nfs_fs_sb.h |  1 +
 4 files changed, 50 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index acdda259912e..cd733649646b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -681,11 +681,12 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
 	struct cb_offloadargs *args = data;
 	struct nfs_server *server;
 	struct nfs4_copy_state *copy;
+	bool found = false;
 
+	spin_lock(&cps->clp->cl_lock);
 	rcu_read_lock();
 	list_for_each_entry_rcu(server, &cps->clp->cl_superblocks,
 				client_link) {
-		spin_lock(&server->nfs_client->cl_lock);
 		list_for_each_entry(copy, &server->ss_copies, copies) {
 			if (memcmp(args->coa_stateid.other,
 					copy->stateid.other,
@@ -693,13 +694,23 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
 				continue;
 			nfs4_copy_cb_args(copy, args);
 			complete(&copy->completion);
-			spin_unlock(&server->nfs_client->cl_lock);
+			found = true;
 			goto out;
 		}
-		spin_unlock(&server->nfs_client->cl_lock);
 	}
 out:
 	rcu_read_unlock();
+	if (!found) {
+		copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+		if (!copy) {
+			spin_unlock(&cps->clp->cl_lock);
+			return htonl(NFS4ERR_SERVERFAULT);
+		}
+		memcpy(&copy->stateid, &args->coa_stateid, NFS4_STATEID_SIZE);
+		nfs4_copy_cb_args(copy, args);
+		list_add_tail(&copy->copies, &cps->clp->pending_cb_stateids);
+	}
+	spin_unlock(&cps->clp->cl_lock);
 
 	return 0;
 }
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 023aea8f6cf1..c7d31f72070e 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -138,14 +138,31 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 {
 	struct nfs4_copy_state *copy;
 	int status = NFS4_OK;
+	bool found_pending = false;
+
+	spin_lock(&server->nfs_client->cl_lock);
+	list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids,
+				copies) {
+		if (memcmp(&res->write_res.stateid, &copy->stateid,
+				NFS4_STATEID_SIZE))
+			continue;
+		found_pending = true;
+		list_del(&copy->copies);
+		break;
+	}
+	if (found_pending) {
+		spin_unlock(&server->nfs_client->cl_lock);
+		goto out;
+	}
 
 	copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
-	if (!copy)
+	if (!copy) {
+		spin_unlock(&server->nfs_client->cl_lock);
 		return -ENOMEM;
+	}
 	memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
 	init_completion(&copy->completion);
 
-	spin_lock(&server->nfs_client->cl_lock);
 	list_add_tail(&copy->copies, &server->ss_copies);
 	spin_unlock(&server->nfs_client->cl_lock);
 
@@ -153,6 +170,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 	spin_lock(&server->nfs_client->cl_lock);
 	list_del_init(&copy->copies);
 	spin_unlock(&server->nfs_client->cl_lock);
+out:
 	res->write_res.count = copy->count;
 	memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
 	status = -copy->error;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d7124fb12041..146e30862234 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -156,9 +156,23 @@ nfs4_shutdown_ds_clients(struct nfs_client *clp)
 	}
 }
 
+static void
+nfs4_cleanup_callback(struct nfs_client *clp)
+{
+	struct nfs4_copy_state *cp_state;
+
+	while (!list_empty(&clp->pending_cb_stateids)) {
+		cp_state = list_entry(clp->pending_cb_stateids.next,
+					struct nfs4_copy_state, copies);
+		list_del(&cp_state->copies);
+		kfree(cp_state);
+	}
+}
+
 void nfs41_shutdown_client(struct nfs_client *clp)
 {
 	if (nfs4_has_session(clp)) {
+		nfs4_cleanup_callback(clp);
 		nfs4_shutdown_ds_clients(clp);
 		nfs4_destroy_session(clp->cl_session);
 		nfs4_destroy_clientid(clp);
@@ -202,6 +216,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
 #if IS_ENABLED(CONFIG_NFS_V4_1)
 	init_waitqueue_head(&clp->cl_lock_waitq);
 #endif
+	INIT_LIST_HEAD(&clp->pending_cb_stateids);
 	return clp;
 
 error:
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index f88952d7b9fb..bf39d9c92201 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -121,6 +121,7 @@ struct nfs_client {
 #endif
 
 	struct net		*cl_net;
+	struct list_head	pending_cb_stateids;
 };
 
 /*
-- 
cgit v1.2.3


From c3ad2c3b02e953ead2b8d52a0c9e70312930c3d0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 23 Jul 2018 15:20:37 -0500
Subject: signal: Don't restart fork when signals come in.

Wen Yang <wen.yang99@zte.com.cn> and majiang <ma.jiang@zte.com.cn>
report that a periodic signal received during fork can cause fork to
continually restart preventing an application from making progress.

The code was being overly pessimistic.  Fork needs to guarantee that a
signal sent to multiple processes is logically delivered before the
fork and just to the forking process or logically delivered after the
fork to both the forking process and it's newly spawned child.  For
signals like periodic timers that are always delivered to a single
process fork can safely complete and let them appear to logically
delivered after the fork().

While examining this issue I also discovered that fork today will miss
signals delivered to multiple processes during the fork and handled by
another thread.  Similarly the current code will also miss blocked
signals that are delivered to multiple process, as those signals will
not appear pending during fork.

Add a list of each thread that is currently forking, and keep on that
list a signal set that records all of the signals sent to multiple
processes.  When fork completes initialize the new processes
shared_pending signal set with it.  The calculate_sigpending function
will see those signals and set TIF_SIGPENDING causing the new task to
take the slow path to userspace to handle those signals.  Making it
appear as if those signals were received immediately after the fork.

It is not possible to send real time signals to multiple processes and
exceptions don't go to multiple processes, which means that that are
no signals sent to multiple processes that require siginfo.  This
means it is safe to not bother collecting siginfo on signals sent
during fork.

The sigaction of a child of fork is initially the same as the
sigaction of the parent process.  So a signal the parent ignores the
child will also initially ignore.  Therefore it is safe to ignore
signals sent to multiple processes and ignored by the forking process.

Signals sent to only a single process or only a single thread and delivered
during fork are treated as if they are received after the fork, and generally
not dealt with.  They won't cause any problems.

V2: Added removal from the multiprocess list on failure.
V3: Use -ERESTARTNOINTR directly
V4: - Don't queue both SIGCONT and SIGSTOP
    - Initialize signal_struct.multiprocess in init_task
    - Move setting of shared_pending to before the new task
      is visible to signals.  This prevents signals from comming
      in before shared_pending.signal is set to delayed.signal
      and being lost.
V5: - rework list add and delete to account for idle threads
v6: - Use sigdelsetmask when removing stop signals

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200447
Reported-by: Wen Yang <wen.yang99@zte.com.cn> and
Reported-by: majiang <ma.jiang@zte.com.cn>
Fixes: 4a2c7a7837da ("[PATCH] make fork() atomic wrt pgrp/session signals")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  8 ++++++++
 init/init_task.c             |  1 +
 kernel/fork.c                | 43 +++++++++++++++++++++++++------------------
 kernel/signal.c              | 15 +++++++++++++++
 4 files changed, 49 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ae2b0b81be25..4e9b77fb702d 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -69,6 +69,11 @@ struct thread_group_cputimer {
 	bool checking_timer;
 };
 
+struct multiprocess_signals {
+	sigset_t signal;
+	struct hlist_node node;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
@@ -90,6 +95,9 @@ struct signal_struct {
 	/* shared signal handling: */
 	struct sigpending	shared_pending;
 
+	/* For collecting multiprocess signals during fork */
+	struct hlist_head	multiprocess;
+
 	/* thread group exit support */
 	int			group_exit_code;
 	/* overloaded:
diff --git a/init/init_task.c b/init/init_task.c
index 4f97846256d7..5aebe3be4d7c 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -22,6 +22,7 @@ static struct signal_struct init_signals = {
 		.list = LIST_HEAD_INIT(init_signals.shared_pending.list),
 		.signal =  {{0}}
 	},
+	.multiprocess	= HLIST_HEAD_INIT,
 	.rlim		= INIT_RLIMITS,
 	.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
 #ifdef CONFIG_POSIX_TIMERS
diff --git a/kernel/fork.c b/kernel/fork.c
index ab731e15a600..411e34acace7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1456,6 +1456,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	init_waitqueue_head(&sig->wait_chldexit);
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
+	INIT_HLIST_HEAD(&sig->multiprocess);
 	seqlock_init(&sig->stats_lock);
 	prev_cputime_init(&sig->prev_cputime);
 
@@ -1602,6 +1603,7 @@ static __latent_entropy struct task_struct *copy_process(
 {
 	int retval;
 	struct task_struct *p;
+	struct multiprocess_signals delayed;
 
 	/*
 	 * Don't allow sharing the root directory with processes in a different
@@ -1649,6 +1651,24 @@ static __latent_entropy struct task_struct *copy_process(
 			return ERR_PTR(-EINVAL);
 	}
 
+	/*
+	 * Force any signals received before this point to be delivered
+	 * before the fork happens.  Collect up signals sent to multiple
+	 * processes that happen during the fork and delay them so that
+	 * they appear to happen after the fork.
+	 */
+	sigemptyset(&delayed.signal);
+	INIT_HLIST_NODE(&delayed.node);
+
+	spin_lock_irq(&current->sighand->siglock);
+	if (!(clone_flags & CLONE_THREAD))
+		hlist_add_head(&delayed.node, &current->signal->multiprocess);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+	retval = -ERESTARTNOINTR;
+	if (signal_pending(current))
+		goto fork_out;
+
 	retval = -ENOMEM;
 	p = dup_task_struct(current, node);
 	if (!p)
@@ -1934,22 +1954,6 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cancel_cgroup;
 	}
 
-	if (!(clone_flags & CLONE_THREAD)) {
-		/*
-		 * Process group and session signals need to be delivered to just the
-		 * parent before the fork or both the parent and the child after the
-		 * fork. Restart if a signal comes in before we add the new process to
-		 * it's process group.
-		 * A fatal signal pending means that current will exit, so the new
-		 * thread can't slip out of an OOM kill (or normal SIGKILL).
-		 */
-		recalc_sigpending();
-		if (signal_pending(current)) {
-			retval = -ERESTARTNOINTR;
-			goto bad_fork_cancel_cgroup;
-		}
-	}
-
 
 	init_task_pid_links(p);
 	if (likely(p->pid)) {
@@ -1965,7 +1969,7 @@ static __latent_entropy struct task_struct *copy_process(
 				ns_of_pid(pid)->child_reaper = p;
 				p->signal->flags |= SIGNAL_UNKILLABLE;
 			}
-
+			p->signal->shared_pending.signal = delayed.signal;
 			p->signal->tty = tty_kref_get(current->signal->tty);
 			/*
 			 * Inherit has_child_subreaper flag under the same
@@ -1993,8 +1997,8 @@ static __latent_entropy struct task_struct *copy_process(
 		attach_pid(p, PIDTYPE_PID);
 		nr_threads++;
 	}
-
 	total_forks++;
+	hlist_del_init(&delayed.node);
 	spin_unlock(&current->sighand->siglock);
 	syscall_tracepoint_update(p);
 	write_unlock_irq(&tasklist_lock);
@@ -2059,6 +2063,9 @@ bad_fork_free:
 	put_task_stack(p);
 	free_task(p);
 fork_out:
+	spin_lock_irq(&current->sighand->siglock);
+	hlist_del_init(&delayed.node);
+	spin_unlock_irq(&current->sighand->siglock);
 	return ERR_PTR(retval);
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 9f0eafb6d474..cfa9d10e731a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1121,6 +1121,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 out_set:
 	signalfd_notify(t, sig);
 	sigaddset(&pending->signal, sig);
+
+	/* Let multiprocess signals appear after on-going forks */
+	if (type > PIDTYPE_TGID) {
+		struct multiprocess_signals *delayed;
+		hlist_for_each_entry(delayed, &t->signal->multiprocess, node) {
+			sigset_t *signal = &delayed->signal;
+			/* Can't queue both a stop and a continue signal */
+			if (sig == SIGCONT)
+				sigdelsetmask(signal, SIG_KERNEL_STOP_MASK);
+			else if (sig_kernel_stop(sig))
+				sigdelset(signal, SIGCONT);
+			sigaddset(signal, sig);
+		}
+	}
+
 	complete_signal(sig, t, type);
 ret:
 	trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result);
-- 
cgit v1.2.3


From 624c0f0239f04cce78c86afa95eb2841c84fbab1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 9 Aug 2018 15:38:38 +0200
Subject: phylink: add helper for configuring 2500BaseX modes

Add a helper for MAC drivers to use in their validate callback to deal
with 2500BaseX vs 1000BaseX modes, where the hardware supports both
but it is not possible to automatically select between them.

This helper defaults to 1000BaseX, as that is the 802.3 standard, and
will allow users to select 2500BaseX either by forcing the speed if
AN is disabled, or by changing the advertising mask if AN is enabled.
Disabling AN is not recommended as it is only the speed that we're
interested in controlling, not the duplex or pause mode parameters.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 30 ++++++++++++++++++++++++++++++
 include/linux/phylink.h   |  1 +
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index af4dc4425be2..3ba5cf2a8a5f 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1688,4 +1688,34 @@ static const struct sfp_upstream_ops sfp_phylink_ops = {
 	.disconnect_phy = phylink_sfp_disconnect_phy,
 };
 
+/* Helpers for MAC drivers */
+
+/**
+ * phylink_helper_basex_speed() - 1000BaseX/2500BaseX helper
+ * @state: a pointer to a &struct phylink_link_state
+ *
+ * Inspect the interface mode, advertising mask or forced speed and
+ * decide whether to run at 2.5Gbit or 1Gbit appropriately, switching
+ * the interface mode to suit.  @state->interface is appropriately
+ * updated, and the advertising mask has the "other" baseX_Full flag
+ * cleared.
+ */
+void phylink_helper_basex_speed(struct phylink_link_state *state)
+{
+	if (phy_interface_mode_is_8023z(state->interface)) {
+		bool want_2500 = state->an_enabled ?
+			phylink_test(state->advertising, 2500baseX_Full) :
+			state->speed == SPEED_2500;
+
+		if (want_2500) {
+			phylink_clear(state->advertising, 1000baseX_Full);
+			state->interface = PHY_INTERFACE_MODE_2500BASEX;
+		} else {
+			phylink_clear(state->advertising, 2500baseX_Full);
+			state->interface = PHY_INTERFACE_MODE_1000BASEX;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(phylink_helper_basex_speed);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 50eeae025f1e..021fc6595856 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -234,5 +234,6 @@ int phylink_mii_ioctl(struct phylink *, struct ifreq *, int);
 #define phylink_test(bm, mode)	__phylink_do_bit(test_bit, bm, mode)
 
 void phylink_set_port_modes(unsigned long *bits);
+void phylink_helper_basex_speed(struct phylink_link_state *state);
 
 #endif
-- 
cgit v1.2.3


From 48ae5554a076c1bca31448d60263e4038def9f6f Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Wed, 8 Aug 2018 09:04:29 +0100
Subject: net: stmmac: Add XGMAC 2.10 HWIF entry

Add a new entry to HWIF table for XGMAC 2.10. For now we fill it with
empty callbacks which will be added in posterior patches.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h | 14 +++++++------
 drivers/net/ethernet/stmicro/stmmac/hwif.c   | 31 ++++++++++++++++++++++++++--
 include/linux/stmmac.h                       |  1 +
 3 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 78fd0f8b8e81..3fb81acbd274 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -36,12 +36,14 @@
 #include "mmc.h"
 
 /* Synopsys Core versions */
-#define	DWMAC_CORE_3_40	0x34
-#define	DWMAC_CORE_3_50	0x35
-#define	DWMAC_CORE_4_00	0x40
-#define DWMAC_CORE_4_10	0x41
-#define DWMAC_CORE_5_00 0x50
-#define DWMAC_CORE_5_10 0x51
+#define	DWMAC_CORE_3_40		0x34
+#define	DWMAC_CORE_3_50		0x35
+#define	DWMAC_CORE_4_00		0x40
+#define DWMAC_CORE_4_10		0x41
+#define DWMAC_CORE_5_00		0x50
+#define DWMAC_CORE_5_10		0x51
+#define DWXGMAC_CORE_2_10	0x21
+
 #define STMMAC_CHAN0	0	/* Always supported and default for all chips */
 
 /* These need to be power of two, and >= 4 */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 1f50e83cafb2..24f5ff175aa4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -72,6 +72,7 @@ static int stmmac_dwmac4_quirks(struct stmmac_priv *priv)
 static const struct stmmac_hwif_entry {
 	bool gmac;
 	bool gmac4;
+	bool xgmac;
 	u32 min_id;
 	const struct stmmac_regs_off regs;
 	const void *desc;
@@ -87,6 +88,7 @@ static const struct stmmac_hwif_entry {
 	{
 		.gmac = false,
 		.gmac4 = false,
+		.xgmac = false,
 		.min_id = 0,
 		.regs = {
 			.ptp_off = PTP_GMAC3_X_OFFSET,
@@ -103,6 +105,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = true,
 		.gmac4 = false,
+		.xgmac = false,
 		.min_id = 0,
 		.regs = {
 			.ptp_off = PTP_GMAC3_X_OFFSET,
@@ -119,6 +122,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = false,
 		.gmac4 = true,
+		.xgmac = false,
 		.min_id = 0,
 		.regs = {
 			.ptp_off = PTP_GMAC4_OFFSET,
@@ -135,6 +139,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = false,
 		.gmac4 = true,
+		.xgmac = false,
 		.min_id = DWMAC_CORE_4_00,
 		.regs = {
 			.ptp_off = PTP_GMAC4_OFFSET,
@@ -151,6 +156,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = false,
 		.gmac4 = true,
+		.xgmac = false,
 		.min_id = DWMAC_CORE_4_10,
 		.regs = {
 			.ptp_off = PTP_GMAC4_OFFSET,
@@ -167,6 +173,7 @@ static const struct stmmac_hwif_entry {
 	}, {
 		.gmac = false,
 		.gmac4 = true,
+		.xgmac = false,
 		.min_id = DWMAC_CORE_5_10,
 		.regs = {
 			.ptp_off = PTP_GMAC4_OFFSET,
@@ -180,11 +187,29 @@ static const struct stmmac_hwif_entry {
 		.tc = &dwmac510_tc_ops,
 		.setup = dwmac4_setup,
 		.quirks = NULL,
-	}
+	}, {
+		.gmac = false,
+		.gmac4 = false,
+		.xgmac = true,
+		.min_id = DWXGMAC_CORE_2_10,
+		.regs = {
+			.ptp_off = 0,
+			.mmc_off = 0,
+		},
+		.desc = NULL,
+		.dma = NULL,
+		.mac = NULL,
+		.hwtimestamp = NULL,
+		.mode = NULL,
+		.tc = NULL,
+		.setup = NULL,
+		.quirks = NULL,
+	},
 };
 
 int stmmac_hwif_init(struct stmmac_priv *priv)
 {
+	bool needs_xgmac = priv->plat->has_xgmac;
 	bool needs_gmac4 = priv->plat->has_gmac4;
 	bool needs_gmac = priv->plat->has_gmac;
 	const struct stmmac_hwif_entry *entry;
@@ -195,7 +220,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
 
 	if (needs_gmac) {
 		id = stmmac_get_id(priv, GMAC_VERSION);
-	} else if (needs_gmac4) {
+	} else if (needs_gmac4 || needs_xgmac) {
 		id = stmmac_get_id(priv, GMAC4_VERSION);
 	} else {
 		id = 0;
@@ -229,6 +254,8 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
 			continue;
 		if (needs_gmac4 ^ entry->gmac4)
 			continue;
+		if (needs_xgmac ^ entry->xgmac)
+			continue;
 		/* Use synopsys_id var because some setups can override this */
 		if (priv->synopsys_id < entry->min_id)
 			continue;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 32feac5bbd75..c43e9a01b892 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -190,5 +190,6 @@ struct plat_stmmacenet_data {
 	bool tso_en;
 	int mac_port_sel_speed;
 	bool en_tx_lpi_clockgating;
+	int has_xgmac;
 };
 #endif
-- 
cgit v1.2.3


From 64bed6cbe38bc95689fb9399872d9ce250192f90 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 13 Jul 2018 17:22:24 +0300
Subject: nfsd: fix leaked file lock with nfs exported overlayfs

nfsd and lockd call vfs_lock_file() to lock/unlock the inode
returned by locks_inode(file).

Many places in nfsd/lockd code use the inode returned by
file_inode(file) for lock manipulation. With Overlayfs, file_inode()
(the underlying inode) is not the same object as locks_inode() (the
overlay inode). This can result in "Leaked POSIX lock" messages
and eventually to a kernel crash as reported by Eddie Horng:
https://marc.info/?l=linux-unionfs&m=153086643202072&w=2

Fix all the call sites in nfsd/lockd that should use locks_inode().
This is a correctness bug that manifested when overlayfs gained
NFS export support in v4.16.

Reported-by: Eddie Horng <eddiehorng.tw@gmail.com>
Tested-by: Eddie Horng <eddiehorng.tw@gmail.com>
Cc: Jeff Layton <jlayton@kernel.org>
Fixes: 8383f1748829 ("ovl: wire up NFS export operations")
Cc: stable@vger.kernel.org
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/clntlock.c         |  2 +-
 fs/lockd/clntproc.c         |  2 +-
 fs/lockd/svclock.c          | 16 ++++++++--------
 fs/lockd/svcsubs.c          |  4 ++--
 fs/nfsd/nfs4state.c         |  2 +-
 include/linux/lockd/lockd.h |  4 ++--
 6 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 96c1d14c18f1..c2a128678e6e 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -187,7 +187,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
 			continue;
 		if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
 			continue;
-		if (nfs_compare_fh(NFS_FH(file_inode(fl_blocked->fl_file)) ,fh) != 0)
+		if (nfs_compare_fh(NFS_FH(locks_inode(fl_blocked->fl_file)), fh) != 0)
 			continue;
 		/* Alright, we found a lock. Set the return status
 		 * and wake up the caller
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a2c0dfc6fdc0..d20b92f271c2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -128,7 +128,7 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 	char *nodename = req->a_host->h_rpcclnt->cl_nodename;
 
 	nlmclnt_next_cookie(&argp->cookie);
-	memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
+	memcpy(&lock->fh, NFS_FH(locks_inode(fl->fl_file)), sizeof(struct nfs_fh));
 	lock->caller  = nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 3701bccab478..74330daeab71 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -405,8 +405,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 	__be32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
-				file_inode(file->f_file)->i_sb->s_id,
-				file_inode(file->f_file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_type, lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end,
@@ -511,8 +511,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 	__be32			ret;
 
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
-				file_inode(file->f_file)->i_sb->s_id,
-				file_inode(file->f_file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_type,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -566,8 +566,8 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
 	int	error;
 
 	dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				file_inode(file->f_file)->i_sb->s_id,
-				file_inode(file->f_file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
@@ -595,8 +595,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
 	int status = 0;
 
 	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
-				file_inode(file->f_file)->i_sb->s_id,
-				file_inode(file->f_file)->i_ino,
+				locks_inode(file->f_file)->i_sb->s_id,
+				locks_inode(file->f_file)->i_ino,
 				lock->fl.fl_pid,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 4ec3d6e03e76..899360ba3b84 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -44,7 +44,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 
 static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
 {
-	struct inode *inode = file_inode(file->f_file);
+	struct inode *inode = locks_inode(file->f_file);
 
 	dprintk("lockd: %s %s/%ld\n",
 		msg, inode->i_sb->s_id, inode->i_ino);
@@ -414,7 +414,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
 {
 	struct super_block *sb = datap;
 
-	return sb == file_inode(file->f_file)->i_sb;
+	return sb == locks_inode(file->f_file)->i_sb;
 }
 
 /**
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 19c4d29917ee..c8e4a8f42bbe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6322,7 +6322,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
 		return status;
 	}
 
-	inode = file_inode(filp);
+	inode = locks_inode(filp);
 	flctx = inode->i_flctx;
 
 	if (flctx && !list_empty_careful(&flctx->flc_posix)) {
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 4fd95dbeb52f..b065ef406770 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -299,7 +299,7 @@ int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
 
 static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
 {
-	return file_inode(file->f_file);
+	return locks_inode(file->f_file);
 }
 
 static inline int __nlm_privileged_request4(const struct sockaddr *sap)
@@ -359,7 +359,7 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
 static inline int nlm_compare_locks(const struct file_lock *fl1,
 				    const struct file_lock *fl2)
 {
-	return file_inode(fl1->fl_file) == file_inode(fl2->fl_file)
+	return locks_inode(fl1->fl_file) == locks_inode(fl2->fl_file)
 	     && fl1->fl_pid   == fl2->fl_pid
 	     && fl1->fl_owner == fl2->fl_owner
 	     && fl1->fl_start == fl2->fl_start
-- 
cgit v1.2.3


From 3fd9557aec919e2db99365ad5a2c00d04ae8893c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Jul 2018 11:19:05 -0400
Subject: NFSD: Refactor the generic write vector fill helper

fill_in_write_vector() is nearly the same logic as
svc_fill_write_vector(), but there are a few differences so that
the former can handle multiple WRITE payloads in a single COMPOUND.

svc_fill_write_vector() can be adjusted so that it can be used in
the NFSv4 WRITE code path too. Instead of assuming the pages are
coming from rq_args.pages, have the caller pass in the page list.

The immediate benefit is a reduction of code duplication. It also
prevents the NFSv4 WRITE decoder from passing an empty vector
element when the transport has provided the payload in the xdr_buf's
page array.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3proc.c         |  3 ++-
 fs/nfsd/nfs4proc.c         | 23 ++++-------------------
 fs/nfsd/nfsproc.c          |  3 ++-
 include/linux/sunrpc/svc.h |  1 +
 net/sunrpc/svc.c           | 11 ++++-------
 5 files changed, 13 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 6259a4b8579f..8d1c2d1a159b 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -202,7 +202,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
 
 	fh_copy(&resp->fh, &argp->fh);
 	resp->committed = argp->stable;
-	nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+	nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+				      &argp->first, cnt);
 	if (!nvecs)
 		RETURN_STATUS(nfserr_io);
 	nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3652f9b1fb68..b7bc6e1a85ac 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -986,24 +986,6 @@ out:
 	return status;
 }
 
-static int fill_in_write_vector(struct kvec *vec, struct nfsd4_write *write)
-{
-        int i = 1;
-        int buflen = write->wr_buflen;
-
-        vec[0].iov_base = write->wr_head.iov_base;
-        vec[0].iov_len = min_t(int, buflen, write->wr_head.iov_len);
-        buflen -= vec[0].iov_len;
-
-        while (buflen) {
-                vec[i].iov_base = page_address(write->wr_pagelist[i - 1]);
-                vec[i].iov_len = min_t(int, PAGE_SIZE, buflen);
-                buflen -= vec[i].iov_len;
-                i++;
-        }
-        return i;
-}
-
 static __be32
 nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	    union nfsd4_op_u *u)
@@ -1031,7 +1013,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	write->wr_how_written = write->wr_stable_how;
 	gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
 
-	nvecs = fill_in_write_vector(rqstp->rq_vec, write);
+	nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
+				      &write->wr_head, write->wr_buflen);
+	if (!nvecs)
+		return nfserr_io;
 	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
 	status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index f107f9fa8e15..a6faee562b31 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -218,7 +218,8 @@ nfsd_proc_write(struct svc_rqst *rqstp)
 		SVCFH_fmt(&argp->fh),
 		argp->len, argp->offset);
 
-	nvecs = svc_fill_write_vector(rqstp, &argp->first, cnt);
+	nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
+				      &argp->first, cnt);
 	if (!nvecs)
 		return nfserr_io;
 	nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 574368e8a16f..43f88bd7b601 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -496,6 +496,7 @@ void		   svc_reserve(struct svc_rqst *rqstp, int space);
 struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
 char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
 unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
+					 struct page **pages,
 					 struct kvec *first, size_t total);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
 					     struct kvec *first, size_t total);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 30a4226baf03..2194ed507991 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1537,16 +1537,16 @@ EXPORT_SYMBOL_GPL(svc_max_payload);
 /**
  * svc_fill_write_vector - Construct data argument for VFS write call
  * @rqstp: svc_rqst to operate on
+ * @pages: list of pages containing data payload
  * @first: buffer containing first section of write payload
  * @total: total number of bytes of write payload
  *
- * Returns the number of elements populated in the data argument array.
+ * Fills in rqstp::rq_vec, and returns the number of elements.
  */
-unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
-				   size_t total)
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
+				   struct kvec *first, size_t total)
 {
 	struct kvec *vec = rqstp->rq_vec;
-	struct page **pages;
 	unsigned int i;
 
 	/* Some types of transport can present the write payload
@@ -1560,14 +1560,11 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct kvec *first,
 		++i;
 	}
 
-	WARN_ON_ONCE(rqstp->rq_arg.page_base != 0);
-	pages = rqstp->rq_arg.pages;
 	while (total) {
 		vec[i].iov_base = page_address(*pages);
 		vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
 		total -= vec[i].iov_len;
 		++i;
-
 		++pages;
 	}
 
-- 
cgit v1.2.3


From 11b4d66ea3313d9b03a83b80458ddee64990e3c3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Jul 2018 11:19:10 -0400
Subject: NFSD: Handle full-length symlinks

I've given up on the idea of zero-copy handling of SYMLINK on the
server side. This is because the Linux VFS symlink API requires the
symlink pathname to be in a NUL-terminated kmalloc'd buffer. The
NUL-termination is going to be problematic (watching out for
landing on a page boundary and dealing with a 4096-byte pathname).

I don't believe that SYMLINK creation is on a performance path or is
requested frequently enough that it will cause noticeable CPU cache
pollution due to data copies.

There will be two places where a transport callout will be necessary
to fill in the rqstp: one will be in the svc_fill_symlink_pathname()
helper that is used by NFSv2 and NFSv3, and the other will be in
nfsd4_decode_create().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3proc.c         |  2 ++
 fs/nfsd/nfsproc.c          |  2 ++
 include/linux/sunrpc/svc.h |  3 ++-
 net/sunrpc/svc.c           | 67 +++++++++++++++++-----------------------------
 4 files changed, 31 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 8d1c2d1a159b..9eb8086ea841 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -290,6 +290,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
 		RETURN_STATUS(nfserr_nametoolong);
 
 	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+						page_address(rqstp->rq_arg.pages[0]),
 						argp->tlen);
 	if (IS_ERR(argp->tname))
 		RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
@@ -303,6 +304,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
 	fh_init(&resp->fh, NFS3_FHSIZE);
 	nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
 						   argp->tname, &resp->fh);
+	kfree(argp->tname);
 	RETURN_STATUS(nfserr);
 }
 
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a6faee562b31..0d20fd161225 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -454,6 +454,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
 		return nfserr_nametoolong;
 
 	argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
+						page_address(rqstp->rq_arg.pages[0]),
 						argp->tlen);
 	if (IS_ERR(argp->tname))
 		return nfserrno(PTR_ERR(argp->tname));
@@ -466,6 +467,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
 	nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
 						 argp->tname, &newfh);
 
+	kfree(argp->tname);
 	fh_put(&argp->ffh);
 	fh_put(&newfh);
 	return nfserr;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 43f88bd7b601..73e130a840ce 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -499,7 +499,8 @@ unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
 					 struct page **pages,
 					 struct kvec *first, size_t total);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
-					     struct kvec *first, size_t total);
+					     struct kvec *first, void *p,
+					     size_t total);
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2194ed507991..d13e05f1a990 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1577,65 +1577,48 @@ EXPORT_SYMBOL_GPL(svc_fill_write_vector);
  * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
  * @rqstp: svc_rqst to operate on
  * @first: buffer containing first section of pathname
+ * @p: buffer containing remaining section of pathname
  * @total: total length of the pathname argument
  *
- * Returns pointer to a NUL-terminated string, or an ERR_PTR. The buffer is
- * released automatically when @rqstp is recycled.
+ * The VFS symlink API demands a NUL-terminated pathname in mapped memory.
+ * Returns pointer to a NUL-terminated string, or an ERR_PTR. Caller must free
+ * the returned string.
  */
 char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first,
-				size_t total)
+				void *p, size_t total)
 {
-	struct xdr_buf *arg = &rqstp->rq_arg;
-	struct page **pages;
-	char *result;
-
-	/* VFS API demands a NUL-terminated pathname. This function
-	 * uses a page from @rqstp as the pathname buffer, to enable
-	 * direct placement. Thus the total buffer size is PAGE_SIZE.
-	 * Space in this buffer for NUL-termination requires that we
-	 * cap the size of the returned symlink pathname just a
-	 * little early.
-	 */
-	if (total > PAGE_SIZE - 1)
-		return ERR_PTR(-ENAMETOOLONG);
+	size_t len, remaining;
+	char *result, *dst;
 
-	/* Some types of transport can present the pathname entirely
-	 * in rq_arg.pages. If not, then copy the pathname into one
-	 * page.
-	 */
-	pages = arg->pages;
-	WARN_ON_ONCE(arg->page_base != 0);
-	if (first->iov_base == 0) {
-		result = page_address(*pages);
-		result[total] = '\0';
-	} else {
-		size_t len, remaining;
-		char *dst;
+	result = kmalloc(total + 1, GFP_KERNEL);
+	if (!result)
+		return ERR_PTR(-ESERVERFAULT);
 
-		result = page_address(*(rqstp->rq_next_page++));
-		dst = result;
-		remaining = total;
+	dst = result;
+	remaining = total;
 
-		len = min_t(size_t, total, first->iov_len);
+	len = min_t(size_t, total, first->iov_len);
+	if (len) {
 		memcpy(dst, first->iov_base, len);
 		dst += len;
 		remaining -= len;
+	}
 
-		/* No more than one page left */
-		if (remaining) {
-			len = min_t(size_t, remaining, PAGE_SIZE);
-			memcpy(dst, page_address(*pages), len);
-			dst += len;
-		}
-
-		*dst = '\0';
+	if (remaining) {
+		len = min_t(size_t, remaining, PAGE_SIZE);
+		memcpy(dst, p, len);
+		dst += len;
 	}
 
-	/* Sanity check: we don't allow the pathname argument to
+	*dst = '\0';
+
+	/* Sanity check: Linux doesn't allow the pathname argument to
 	 * contain a NUL byte.
 	 */
-	if (strlen(result) != total)
+	if (strlen(result) != total) {
+		kfree(result);
 		return ERR_PTR(-EINVAL);
+	}
 	return result;
 }
 EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname);
-- 
cgit v1.2.3


From 2d2917f7747805a1f4188672f308d82a8ba01700 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 9 Aug 2018 14:04:14 -0600
Subject: PCI: Export pcie_has_flr()

pcie_flr() suggests pcie_has_flr() to ensure that PCIe FLR support is
present prior to calling.  pcie_flr() is exported while pcie_has_flr()
is not.  Resolve this.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c   | 3 ++-
 include/linux/pci.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d123c2b173da..85e5b80a69a7 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4039,7 +4039,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
  * Returns true if the device advertises support for PCIe function level
  * resets.
  */
-static bool pcie_has_flr(struct pci_dev *dev)
+bool pcie_has_flr(struct pci_dev *dev)
 {
 	u32 cap;
 
@@ -4049,6 +4049,7 @@ static bool pcie_has_flr(struct pci_dev *dev)
 	pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap);
 	return cap & PCI_EXP_DEVCAP_FLR;
 }
+EXPORT_SYMBOL_GPL(pcie_has_flr);
 
 /**
  * pcie_flr - initiate a PCIe function level reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 307b336496f6..bace761deff2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1090,6 +1090,7 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev,
 			     enum pci_bus_speed *speed,
 			     enum pcie_link_width *width);
 void pcie_print_link_status(struct pci_dev *dev);
+bool pcie_has_flr(struct pci_dev *dev);
 int pcie_flr(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
-- 
cgit v1.2.3


From 5e7baf0fcb2a3aef7329f3c7543d4695a46bd321 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manish.chopra@cavium.com>
Date: Thu, 9 Aug 2018 11:13:49 -0700
Subject: qed/qede: Multi CoS support.

This patch adds support for tc mqprio offload,
using this different traffic classes on the adapter
can be utilized based on configured priority to tc map.

For example -

tc qdisc add dev eth0 root mqprio num_tc 4 map 0 1 2 3

This will cause SKBs with priority 0,1,2,3 to transmit
over tc 0,1,2,3 hardware queues respectively.

Signed-off-by: Manish Chopra <manish.chopra@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c        |   9 +-
 drivers/net/ethernet/qlogic/qed/qed_main.c      |   5 +-
 drivers/net/ethernet/qlogic/qede/qede.h         |  13 +++
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c |  48 ++++++--
 drivers/net/ethernet/qlogic/qede/qede_fp.c      |  29 +++--
 drivers/net/ethernet/qlogic/qede/qede_main.c    | 139 +++++++++++++++++++-----
 include/linux/qed/qed_eth_if.h                  |   6 +
 7 files changed, 200 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 5ede6408649d..82a1bd1f8a8c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2188,16 +2188,17 @@ out:
 static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 				 struct qed_dev_eth_info *info)
 {
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	int i;
 
 	memset(info, 0, sizeof(*info));
 
-	info->num_tc = 1;
-
 	if (IS_PF(cdev)) {
 		int max_vf_vlan_filters = 0;
 		int max_vf_mac_filters = 0;
 
+		info->num_tc = p_hwfn->hw_info.num_hw_tc;
+
 		if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
 			u16 num_queues = 0;
 
@@ -2248,6 +2249,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 	} else {
 		u16 total_cids = 0;
 
+		info->num_tc = 1;
+
 		/* Determine queues &  XDP support */
 		for_each_hwfn(cdev, i) {
 			struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
@@ -2554,7 +2557,7 @@ static int qed_start_txq(struct qed_dev *cdev,
 
 	rc = qed_eth_tx_queue_start(p_hwfn,
 				    p_hwfn->hw_info.opaque_fid,
-				    p_params, 0,
+				    p_params, p_params->tc,
 				    pbl_addr, pbl_size, ret_params);
 
 	if (rc) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index dbe81310c0b6..2094d86a7a08 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -948,13 +948,14 @@ static void qed_update_pf_params(struct qed_dev *cdev,
 		params->eth_pf_params.num_arfs_filters = 0;
 
 	/* In case we might support RDMA, don't allow qede to be greedy
-	 * with the L2 contexts. Allow for 64 queues [rx, tx, xdp] per hwfn.
+	 * with the L2 contexts. Allow for 64 queues [rx, tx cos, xdp]
+	 * per hwfn.
 	 */
 	if (QED_IS_RDMA_PERSONALITY(QED_LEADING_HWFN(cdev))) {
 		u16 *num_cons;
 
 		num_cons = &params->eth_pf_params.num_cons;
-		*num_cons = min_t(u16, *num_cons, 192);
+		*num_cons = min_t(u16, *num_cons, QED_MAX_L2_CONS);
 	}
 
 	for (i = 0; i < cdev->num_hwfns; i++) {
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index d7ed0d3dbf71..e90c60a8fb01 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -386,6 +386,15 @@ struct qede_tx_queue {
 #define QEDE_TXQ_XDP_TO_IDX(edev, txq)	((txq)->index - \
 					 QEDE_MAX_TSS_CNT(edev))
 #define QEDE_TXQ_IDX_TO_XDP(edev, idx)	((idx) + QEDE_MAX_TSS_CNT(edev))
+#define QEDE_NDEV_TXQ_ID_TO_FP_ID(edev, idx)	((edev)->fp_num_rx + \
+						 ((idx) % QEDE_TSS_COUNT(edev)))
+#define QEDE_NDEV_TXQ_ID_TO_TXQ_COS(edev, idx)	((idx) / QEDE_TSS_COUNT(edev))
+#define QEDE_TXQ_TO_NDEV_TXQ_ID(edev, txq)	((QEDE_TSS_COUNT(edev) * \
+						 (txq)->cos) + (txq)->index)
+#define QEDE_NDEV_TXQ_ID_TO_TXQ(edev, idx)	\
+	(&((edev)->fp_array[QEDE_NDEV_TXQ_ID_TO_FP_ID(edev, idx)].txq \
+	[QEDE_NDEV_TXQ_ID_TO_TXQ_COS(edev, idx)]))
+#define QEDE_FP_TC0_TXQ(fp)	(&((fp)->txq[0]))
 
 	/* Regular Tx requires skb + metadata for release purpose,
 	 * while XDP requires the pages and the mapped address.
@@ -399,6 +408,8 @@ struct qede_tx_queue {
 
 	/* Slowpath; Should be kept in end [unless missing padding] */
 	void *handle;
+	u16 cos;
+	u16 ndev_txq_id;
 };
 
 #define BD_UNMAP_ADDR(bd)		HILO_U64(le32_to_cpu((bd)->addr.hi), \
@@ -541,5 +552,7 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq);
 #define QEDE_RX_HDR_SIZE		256
 #define QEDE_MAX_JUMBO_PACKET_SIZE	9600
 #define	for_each_queue(i) for (i = 0; i < edev->num_queues; i++)
+#define for_each_cos_in_txq(edev, var) \
+	for ((var) = 0; (var) < (edev)->dev_info.num_tc; (var)++)
 
 #endif /* _QEDE_H_ */
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index b37857f3f950..2bd84d6d9b15 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -222,7 +222,7 @@ static void qede_get_strings_stats_txq(struct qede_dev *edev,
 				QEDE_TXQ_XDP_TO_IDX(edev, txq),
 				qede_tqstats_arr[i].string);
 		else
-			sprintf(*buf, "%d: %s", txq->index,
+			sprintf(*buf, "%d_%d: %s", txq->index, txq->cos,
 				qede_tqstats_arr[i].string);
 		*buf += ETH_GSTRING_LEN;
 	}
@@ -262,8 +262,13 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
 		if (fp->type & QEDE_FASTPATH_XDP)
 			qede_get_strings_stats_txq(edev, fp->xdp_tx, &buf);
 
-		if (fp->type & QEDE_FASTPATH_TX)
-			qede_get_strings_stats_txq(edev, fp->txq, &buf);
+		if (fp->type & QEDE_FASTPATH_TX) {
+			int cos;
+
+			for_each_cos_in_txq(edev, cos)
+				qede_get_strings_stats_txq(edev,
+							   &fp->txq[cos], &buf);
+		}
 	}
 
 	/* Account for non-queue statistics */
@@ -338,8 +343,12 @@ static void qede_get_ethtool_stats(struct net_device *dev,
 		if (fp->type & QEDE_FASTPATH_XDP)
 			qede_get_ethtool_stats_txq(fp->xdp_tx, &buf);
 
-		if (fp->type & QEDE_FASTPATH_TX)
-			qede_get_ethtool_stats_txq(fp->txq, &buf);
+		if (fp->type & QEDE_FASTPATH_TX) {
+			int cos;
+
+			for_each_cos_in_txq(edev, cos)
+				qede_get_ethtool_stats_txq(&fp->txq[cos], &buf);
+		}
 	}
 
 	for (i = 0; i < QEDE_NUM_STATS; i++) {
@@ -366,7 +375,8 @@ static int qede_get_sset_count(struct net_device *dev, int stringset)
 				num_stats--;
 
 		/* Account for the Regular Tx statistics */
-		num_stats += QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS;
+		num_stats += QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS *
+				edev->dev_info.num_tc;
 
 		/* Account for the Regular Rx statistics */
 		num_stats += QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS;
@@ -741,9 +751,17 @@ static int qede_get_coalesce(struct net_device *dev,
 		}
 
 		for_each_queue(i) {
+			struct qede_tx_queue *txq;
+
 			fp = &edev->fp_array[i];
+
+			/* All TX queues of given fastpath uses same
+			 * coalescing value, so no need to iterate over
+			 * all TCs, TC0 txq should suffice.
+			 */
 			if (fp->type & QEDE_FASTPATH_TX) {
-				tx_handle = fp->txq->handle;
+				txq = QEDE_FP_TC0_TXQ(fp);
+				tx_handle = txq->handle;
 				break;
 			}
 		}
@@ -801,9 +819,17 @@ static int qede_set_coalesce(struct net_device *dev,
 		}
 
 		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			struct qede_tx_queue *txq;
+
+			/* All TX queues of given fastpath uses same
+			 * coalescing value, so no need to iterate over
+			 * all TCs, TC0 txq should suffice.
+			 */
+			txq = QEDE_FP_TC0_TXQ(fp);
+
 			rc = edev->ops->common->set_coalesce(edev->cdev,
 							     0, txc,
-							     fp->txq->handle);
+							     txq->handle);
 			if (rc) {
 				DP_INFO(edev,
 					"Set TX coalesce error, rc = %d\n", rc);
@@ -1385,8 +1411,10 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 	u16 val;
 
 	for_each_queue(i) {
-		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
-			txq = edev->fp_array[i].txq;
+		struct qede_fastpath *fp = &edev->fp_array[i];
+
+		if (fp->type & QEDE_FASTPATH_TX) {
+			txq = QEDE_FP_TC0_TXQ(fp);
 			break;
 		}
 	}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 8c9e95ba9917..1a78027de071 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -408,12 +408,12 @@ static void qede_xdp_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 
 static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
+	unsigned int pkts_compl = 0, bytes_compl = 0;
 	struct netdev_queue *netdev_txq;
 	u16 hw_bd_cons;
-	unsigned int pkts_compl = 0, bytes_compl = 0;
 	int rc;
 
-	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->index);
+	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
 
 	hw_bd_cons = le16_to_cpu(*txq->hw_cons_ptr);
 	barrier();
@@ -1365,9 +1365,14 @@ static bool qede_poll_is_more_work(struct qede_fastpath *fp)
 		if (qede_txq_has_work(fp->xdp_tx))
 			return true;
 
-	if (likely(fp->type & QEDE_FASTPATH_TX))
-		if (qede_txq_has_work(fp->txq))
-			return true;
+	if (likely(fp->type & QEDE_FASTPATH_TX)) {
+		int cos;
+
+		for_each_cos_in_txq(fp->edev, cos) {
+			if (qede_txq_has_work(&fp->txq[cos]))
+				return true;
+		}
+	}
 
 	return false;
 }
@@ -1382,8 +1387,14 @@ int qede_poll(struct napi_struct *napi, int budget)
 	struct qede_dev *edev = fp->edev;
 	int rx_work_done = 0;
 
-	if (likely(fp->type & QEDE_FASTPATH_TX) && qede_txq_has_work(fp->txq))
-		qede_tx_int(edev, fp->txq);
+	if (likely(fp->type & QEDE_FASTPATH_TX)) {
+		int cos;
+
+		for_each_cos_in_txq(fp->edev, cos) {
+			if (qede_txq_has_work(&fp->txq[cos]))
+				qede_tx_int(edev, &fp->txq[cos]);
+		}
+	}
 
 	if ((fp->type & QEDE_FASTPATH_XDP) && qede_txq_has_work(fp->xdp_tx))
 		qede_xdp_tx_int(edev, fp->xdp_tx);
@@ -1444,8 +1455,8 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 	/* Get tx-queue context and netdev index */
 	txq_index = skb_get_queue_mapping(skb);
-	WARN_ON(txq_index >= QEDE_TSS_COUNT(edev));
-	txq = edev->fp_array[edev->fp_num_rx + txq_index].txq;
+	WARN_ON(txq_index >= QEDE_TSS_COUNT(edev) * edev->dev_info.num_tc);
+	txq = QEDE_NDEV_TXQ_ID_TO_TXQ(edev, txq_index);
 	netdev_txq = netdev_get_tx_queue(ndev, txq_index);
 
 	WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1));
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 6a796040a32c..d7299afa902c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -536,6 +536,43 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return 0;
 }
 
+int qede_setup_tc(struct net_device *ndev, u8 num_tc)
+{
+	struct qede_dev *edev = netdev_priv(ndev);
+	int cos, count, offset;
+
+	if (num_tc > edev->dev_info.num_tc)
+		return -EINVAL;
+
+	netdev_reset_tc(ndev);
+	netdev_set_num_tc(ndev, num_tc);
+
+	for_each_cos_in_txq(edev, cos) {
+		count = QEDE_TSS_COUNT(edev);
+		offset = cos * QEDE_TSS_COUNT(edev);
+		netdev_set_tc_queue(ndev, cos, count, offset);
+	}
+
+	return 0;
+}
+
+static int
+qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type,
+		      void *type_data)
+{
+	struct tc_mqprio_qopt *mqprio;
+
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		mqprio = type_data;
+
+		mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+		return qede_setup_tc(dev, mqprio->num_tc);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops qede_netdev_ops = {
 	.ndo_open = qede_open,
 	.ndo_stop = qede_close,
@@ -568,6 +605,7 @@ static const struct net_device_ops qede_netdev_ops = {
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer = qede_rx_flow_steer,
 #endif
+	.ndo_setup_tc = qede_setup_tc_offload,
 };
 
 static const struct net_device_ops qede_netdev_vf_ops = {
@@ -621,7 +659,8 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
 	struct qede_dev *edev;
 
 	ndev = alloc_etherdev_mqs(sizeof(*edev),
-				  info->num_queues, info->num_queues);
+				  info->num_queues * info->num_tc,
+				  info->num_queues);
 	if (!ndev) {
 		pr_err("etherdev allocation failed\n");
 		return NULL;
@@ -830,7 +869,8 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			fp->txq = kzalloc(sizeof(*fp->txq), GFP_KERNEL);
+			fp->txq = kcalloc(edev->dev_info.num_tc,
+					  sizeof(*fp->txq), GFP_KERNEL);
 			if (!fp->txq)
 				goto err;
 		}
@@ -879,10 +919,15 @@ static void qede_sp_task(struct work_struct *work)
 static void qede_update_pf_params(struct qed_dev *cdev)
 {
 	struct qed_pf_params pf_params;
+	u16 num_cons;
 
 	/* 64 rx + 64 tx + 64 XDP */
 	memset(&pf_params, 0, sizeof(struct qed_pf_params));
-	pf_params.eth_pf_params.num_cons = (MAX_SB_PER_PF_MIMD - 1) * 3;
+
+	/* 1 rx + 1 xdp + max tx cos */
+	num_cons = QED_MIN_L2_CONS;
+
+	pf_params.eth_pf_params.num_cons = (MAX_SB_PER_PF_MIMD - 1) * num_cons;
 
 	/* Same for VFs - make sure they'll have sufficient connections
 	 * to support XDP Tx queues.
@@ -1363,8 +1408,12 @@ static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 	if (fp->type & QEDE_FASTPATH_XDP)
 		qede_free_mem_txq(edev, fp->xdp_tx);
 
-	if (fp->type & QEDE_FASTPATH_TX)
-		qede_free_mem_txq(edev, fp->txq);
+	if (fp->type & QEDE_FASTPATH_TX) {
+		int cos;
+
+		for_each_cos_in_txq(edev, cos)
+			qede_free_mem_txq(edev, &fp->txq[cos]);
+	}
 }
 
 /* This function allocates all memory needed for a single fp (i.e. an entity
@@ -1391,9 +1440,13 @@ static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 	}
 
 	if (fp->type & QEDE_FASTPATH_TX) {
-		rc = qede_alloc_mem_txq(edev, fp->txq);
-		if (rc)
-			goto out;
+		int cos;
+
+		for_each_cos_in_txq(edev, cos) {
+			rc = qede_alloc_mem_txq(edev, &fp->txq[cos]);
+			if (rc)
+				goto out;
+		}
 	}
 
 out:
@@ -1466,10 +1519,23 @@ static void qede_init_fp(struct qede_dev *edev)
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			fp->txq->index = txq_index++;
-			if (edev->dev_info.is_legacy)
-				fp->txq->is_legacy = 1;
-			fp->txq->dev = &edev->pdev->dev;
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				struct qede_tx_queue *txq = &fp->txq[cos];
+				u16 ndev_tx_id;
+
+				txq->cos = cos;
+				txq->index = txq_index;
+				ndev_tx_id = QEDE_TXQ_TO_NDEV_TXQ_ID(edev, txq);
+				txq->ndev_txq_id = ndev_tx_id;
+
+				if (edev->dev_info.is_legacy)
+					txq->is_legacy = 1;
+				txq->dev = &edev->pdev->dev;
+			}
+
+			txq_index++;
 		}
 
 		snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
@@ -1483,7 +1549,9 @@ static int qede_set_real_num_queues(struct qede_dev *edev)
 {
 	int rc = 0;
 
-	rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_COUNT(edev));
+	rc = netif_set_real_num_tx_queues(edev->ndev,
+					  QEDE_TSS_COUNT(edev) *
+					  edev->dev_info.num_tc);
 	if (rc) {
 		DP_NOTICE(edev, "Failed to set real number of Tx queues\n");
 		return rc;
@@ -1685,9 +1753,13 @@ static int qede_stop_queues(struct qede_dev *edev)
 		fp = &edev->fp_array[i];
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			rc = qede_drain_txq(edev, fp->txq, true);
-			if (rc)
-				return rc;
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				rc = qede_drain_txq(edev, &fp->txq[cos], true);
+				if (rc)
+					return rc;
+			}
 		}
 
 		if (fp->type & QEDE_FASTPATH_XDP) {
@@ -1703,9 +1775,13 @@ static int qede_stop_queues(struct qede_dev *edev)
 
 		/* Stop the Tx Queue(s) */
 		if (fp->type & QEDE_FASTPATH_TX) {
-			rc = qede_stop_txq(edev, fp->txq, i);
-			if (rc)
-				return rc;
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				rc = qede_stop_txq(edev, &fp->txq[cos], i);
+				if (rc)
+					return rc;
+			}
 		}
 
 		/* Stop the Rx Queue */
@@ -1758,6 +1834,7 @@ static int qede_start_txq(struct qede_dev *edev,
 
 	params.p_sb = fp->sb_info;
 	params.sb_idx = sb_idx;
+	params.tc = txq->cos;
 
 	rc = edev->ops->q_tx_start(edev->cdev, rss_id, &params, phys_table,
 				   page_cnt, &ret_params);
@@ -1877,9 +1954,14 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			rc = qede_start_txq(edev, fp, fp->txq, i, TX_PI(0));
-			if (rc)
-				goto out;
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				rc = qede_start_txq(edev, fp, &fp->txq[cos], i,
+						    TX_PI(cos));
+				if (rc)
+					goto out;
+			}
 		}
 	}
 
@@ -1973,6 +2055,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 		     bool is_locked)
 {
 	struct qed_link_params link_params;
+	u8 num_tc;
 	int rc;
 
 	DP_INFO(edev, "Starting qede load\n");
@@ -2019,6 +2102,10 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
 		goto err4;
 	DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n");
 
+	num_tc = netdev_get_num_tc(edev->ndev);
+	num_tc = num_tc ? num_tc : edev->dev_info.num_tc;
+	qede_setup_tc(edev->ndev, num_tc);
+
 	/* Program un-configured VLANs */
 	qede_configure_vlan_filters(edev);
 
@@ -2143,7 +2230,7 @@ static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
 
-	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->index);
+	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
 	if (netif_xmit_stopped(netdev_txq))
 		return true;
 
@@ -2208,9 +2295,11 @@ static void qede_get_eth_tlv_data(void *dev, void *data)
 	for_each_queue(i) {
 		fp = &edev->fp_array[i];
 		if (fp->type & QEDE_FASTPATH_TX) {
-			if (fp->txq->sw_tx_cons != fp->txq->sw_tx_prod)
+			struct qede_tx_queue *txq = QEDE_FP_TC0_TXQ(fp);
+
+			if (txq->sw_tx_cons != txq->sw_tx_prod)
 				etlv->txqs_empty = false;
-			if (qede_is_txq_full(edev, fp->txq))
+			if (qede_is_txq_full(edev, txq))
 				etlv->num_txqs_full++;
 		}
 		if (fp->type & QEDE_FASTPATH_RX) {
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 2978fa4add42..a1310482c4ed 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -39,6 +39,10 @@
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_iov_if.h>
 
+/* 64 max queues * (1 rx + 4 tx-cos + 1 xdp) */
+#define QED_MIN_L2_CONS (2 + NUM_PHYS_TCS_4PORT_K2)
+#define QED_MAX_L2_CONS (64 * (QED_MIN_L2_CONS))
+
 struct qed_queue_start_common_params {
 	/* Should always be relative to entity sending this. */
 	u8 vport_id;
@@ -49,6 +53,8 @@ struct qed_queue_start_common_params {
 
 	struct qed_sb_info *p_sb;
 	u8 sb_idx;
+
+	u8 tc;
 };
 
 struct qed_rxq_start_ret_params {
-- 
cgit v1.2.3


From 15693fd37fc3a49da356164ed15b571c175efc34 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 8 Aug 2018 19:40:31 +0800
Subject: net: skbuff.h: fix using plain integer as NULL warning

Fixes the following sparse warning:
./include/linux/skbuff.h:2365:58: warning: Using plain integer as NULL pointer

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ebdf158a795..7e237a63a70c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2362,7 +2362,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
 	if (skb_transport_header_was_set(skb))
 		return;
 
-	if (skb_flow_dissect_flow_keys_basic(skb, &keys, 0, 0, 0, 0, 0))
+	if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
 		skb_set_transport_header(skb, keys.control.thoff);
 	else
 		skb_set_transport_header(skb, offset_hint);
-- 
cgit v1.2.3


From bd2e9567db72e37f7f4b90faa5133bc7365b5f65 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 9 Aug 2018 16:19:52 -0500
Subject: PCI: Hide ACS quirk declarations inside PCI core

Move declarations for these functions:

  pci_dev_specific_acs_enabled()
  pci_dev_specific_enable_acs()

from include/linux/pci.h to drivers/pci/pci.h because nothing outside the
PCI core needs to use them.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 14 ++++++++++++++
 include/linux/pci.h | 11 -----------
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index c358e7a07f3f..a1224fef3409 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -352,6 +352,20 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev,
 }
 
 void pci_enable_acs(struct pci_dev *dev);
+#ifdef CONFIG_PCI_QUIRKS
+int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
+int pci_dev_specific_enable_acs(struct pci_dev *dev);
+#else
+static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
+					       u16 acs_flags)
+{
+	return -ENOTTY;
+}
+static inline int pci_dev_specific_enable_acs(struct pci_dev *dev)
+{
+	return -ENOTTY;
+}
+#endif
 
 /* PCI error reporting and recovery */
 void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..1e1ed049dd1c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1876,20 +1876,9 @@ enum pci_fixup_pass {
 
 #ifdef CONFIG_PCI_QUIRKS
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
-int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
-int pci_dev_specific_enable_acs(struct pci_dev *dev);
 #else
 static inline void pci_fixup_device(enum pci_fixup_pass pass,
 				    struct pci_dev *dev) { }
-static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
-					       u16 acs_flags)
-{
-	return -ENOTTY;
-}
-static inline int pci_dev_specific_enable_acs(struct pci_dev *dev)
-{
-	return -ENOTTY;
-}
 #endif
 
 void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
-- 
cgit v1.2.3


From b0768a86585d4d951a30ff565f19598dbbd67897 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 3 Aug 2018 16:58:09 +0900
Subject: net: Export skb_headers_offset_update

This is needed for veth XDP which does skb_copy_expand()-like operation.

v2:
- Drop skb_copy_header part because it has already been exported now.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skbuff.h | 1 +
 net/core/skbuff.c      | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ebdf158a795..e93b157f526c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1038,6 +1038,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 }
 
 struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
+void skb_headers_offset_update(struct sk_buff *skb, int off);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
 void skb_copy_header(struct sk_buff *new, const struct sk_buff *old);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8d574a88125d..c996c09d095f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1291,7 +1291,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(skb_clone);
 
-static void skb_headers_offset_update(struct sk_buff *skb, int off)
+void skb_headers_offset_update(struct sk_buff *skb, int off)
 {
 	/* Only adjust this if it actually is csum_start rather than csum */
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -1305,6 +1305,7 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off)
 	skb->inner_network_header += off;
 	skb->inner_mac_header += off;
 }
+EXPORT_SYMBOL(skb_headers_offset_update);
 
 void skb_copy_header(struct sk_buff *new, const struct sk_buff *old)
 {
-- 
cgit v1.2.3


From a8d5b4ab353738e16e5f9d21ab1e3d44b37983d0 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 3 Aug 2018 16:58:12 +0900
Subject: xdp: Helper function to clear kernel pointers in xdp_frame

xdp_frame has kernel pointers which should not be readable from bpf
programs. When we want to reuse xdp_frame region but it may be read by
bpf programs later, we can use this helper to clear kernel pointers.
This is more efficient than calling memset() for the entire struct.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index fcb033f51d8c..76b95256c266 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -84,6 +84,13 @@ struct xdp_frame {
 	struct net_device *dev_rx; /* used by cpumap */
 };
 
+/* Clear kernel pointers in xdp_frame */
+static inline void xdp_scrub_frame(struct xdp_frame *frame)
+{
+	frame->data = NULL;
+	frame->dev_rx = NULL;
+}
+
 /* Convert xdp_buff to xdp_frame */
 static inline
 struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
-- 
cgit v1.2.3


From 0b19cc0a8694d4295383f88bc3441765875a57bc Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 3 Aug 2018 16:58:15 +0900
Subject: bpf: Make redirect_info accessible from modules

We are going to add kern_flags field in redirect_info for kernel
internal use.
In order to avoid function call to access the flags, make redirect_info
accessible from modules. Also as it is now non-static, add prefix bpf_
to redirect_info.

v6:
- Fix sparse warning around EXPORT_SYMBOL.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h | 10 ++++++++++
 net/core/filter.c      | 29 +++++++++++------------------
 2 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c73dd7396886..4717af8b95e6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -537,6 +537,16 @@ struct sk_msg_buff {
 	struct list_head list;
 };
 
+struct bpf_redirect_info {
+	u32 ifindex;
+	u32 flags;
+	struct bpf_map *map;
+	struct bpf_map *map_to_flush;
+	unsigned long   map_owner;
+};
+
+DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
diff --git a/net/core/filter.c b/net/core/filter.c
index 587bbfbd7db3..2de7dd9f2a57 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2082,19 +2082,12 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
-struct redirect_info {
-	u32 ifindex;
-	u32 flags;
-	struct bpf_map *map;
-	struct bpf_map *map_to_flush;
-	unsigned long   map_owner;
-};
-
-static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
 
 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return TC_ACT_SHOT;
@@ -2107,7 +2100,7 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 
 int skb_do_redirect(struct sk_buff *skb)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct net_device *dev;
 
 	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
@@ -3200,7 +3193,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 
 void xdp_do_flush_map(void)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct bpf_map *map = ri->map_to_flush;
 
 	ri->map_to_flush = NULL;
@@ -3245,7 +3238,7 @@ static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
 			       struct bpf_prog *xdp_prog)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
@@ -3285,7 +3278,7 @@ err:
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	struct net_device *fwd;
 	u32 index = ri->ifindex;
 	int err;
@@ -3317,7 +3310,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct xdp_buff *xdp,
 				       struct bpf_prog *xdp_prog)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	unsigned long map_owner = ri->map_owner;
 	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
@@ -3368,7 +3361,7 @@ err:
 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 	u32 index = ri->ifindex;
 	struct net_device *fwd;
 	int err = 0;
@@ -3399,7 +3392,7 @@ EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
 
 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
 	if (unlikely(flags))
 		return XDP_ABORTED;
@@ -3423,7 +3416,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
 	   unsigned long, map_owner)
 {
-	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
 	if (unlikely(flags))
 		return XDP_ABORTED;
-- 
cgit v1.2.3


From 2539650fadbf63a431e76535a9de7bff6ea5e409 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Date: Fri, 3 Aug 2018 16:58:16 +0900
Subject: xdp: Helpers for disabling napi_direct of xdp_return_frame

We need some mechanism to disable napi_direct on calling
xdp_return_frame_rx_napi() from some context.
When veth gets support of XDP_REDIRECT, it will redirects packets which
are redirected from other devices. On redirection veth will reuse
xdp_mem_info of the redirection source device to make return_frame work.
But in this case .ndo_xdp_xmit() called from veth redirection uses
xdp_mem_info which is not guarded by NAPI, because the .ndo_xdp_xmit()
is not called directly from the rxq which owns the xdp_mem_info.

This approach introduces a flag in bpf_redirect_info to indicate that
napi_direct should be disabled even when _rx_napi variant is used as
well as helper functions to use it.

A NAPI handler who wants to use this flag needs to call
xdp_set_return_frame_no_direct() before processing packets, and call
xdp_clear_return_frame_no_direct() after xdp_do_flush_map() before
exiting NAPI.

v4:
- Use bpf_redirect_info for storing the flag instead of xdp_mem_info to
  avoid per-frame copy cost.

Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h | 25 +++++++++++++++++++++++++
 net/core/xdp.c         |  6 ++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4717af8b95e6..2b072dab32c0 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -543,10 +543,14 @@ struct bpf_redirect_info {
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
 	unsigned long   map_owner;
+	u32 kern_flags;
 };
 
 DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
 
+/* flags for bpf_redirect_info kern_flags */
+#define BPF_RI_F_RF_NO_DIRECT	BIT(0)	/* no napi_direct on return_frame */
+
 /* Compute the linear packet data range [data, data_end) which
  * will be accessed by various program types (cls_bpf, act_bpf,
  * lwt, ...). Subsystems allowing direct data access must (!)
@@ -775,6 +779,27 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
+static inline bool xdp_return_frame_no_direct(void)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT;
+}
+
+static inline void xdp_set_return_frame_no_direct(void)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT;
+}
+
+static inline void xdp_clear_return_frame_no_direct(void)
+{
+	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+	ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT;
+}
+
 static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
 				 unsigned int pktlen)
 {
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 57285383ed00..3dd99e1c04f5 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -330,10 +330,12 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		page = virt_to_head_page(data);
-		if (xa)
+		if (xa) {
+			napi_direct &= !xdp_return_frame_no_direct();
 			page_pool_put_page(xa->page_pool, page, napi_direct);
-		else
+		} else {
 			put_page(page);
+		}
 		rcu_read_unlock();
 		break;
 	case MEM_TYPE_PAGE_SHARED:
-- 
cgit v1.2.3


From aa12af77aae05008b3e637b85944dcd512f75eba Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Tue, 7 Aug 2018 13:16:35 +0530
Subject: Bluetooth: Add definitions for LE set address resolution

Add the definitions for LE address resolution enable HCI commands.
When the LE address resolution enable gets changed via HCI commands
make sure that flag gets updated.

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  3 +++
 net/bluetooth/hci_event.c   | 28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 4619a79b1bbb..cdd9f1fe7cfa 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -269,6 +269,7 @@ enum {
 	HCI_VENDOR_DIAG,
 	HCI_FORCE_BREDR_SMP,
 	HCI_FORCE_STATIC_ADDR,
+	HCI_LL_RPA_RESOLUTION,
 
 	__HCI_NUM_FLAGS,
 };
@@ -1524,6 +1525,8 @@ struct hci_rp_le_read_resolv_list_size {
 	__u8	size;
 } __packed;
 
+#define HCI_OP_LE_SET_ADDR_RESOLV_ENABLE 0x202d
+
 #define HCI_OP_LE_READ_MAX_DATA_LEN	0x202f
 struct hci_rp_le_read_max_data_len {
 	__u8	status;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 8078587572fe..f12555f23a49 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1480,6 +1480,30 @@ static void hci_cc_le_read_resolv_list_size(struct hci_dev *hdev,
 	hdev->le_resolv_list_size = rp->size;
 }
 
+static void hci_cc_le_set_addr_resolution_enable(struct hci_dev *hdev,
+						struct sk_buff *skb)
+{
+	__u8 *sent, status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE);
+	if (!sent)
+		return;
+
+	hci_dev_lock(hdev);
+
+	if (*sent)
+		hci_dev_set_flag(hdev, HCI_LL_RPA_RESOLUTION);
+	else
+		hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);
+
+	hci_dev_unlock(hdev);
+}
+
 static void hci_cc_le_read_max_data_len(struct hci_dev *hdev,
 					struct sk_buff *skb)
 {
@@ -3263,6 +3287,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_le_read_resolv_list_size(hdev, skb);
 		break;
 
+	case HCI_OP_LE_SET_ADDR_RESOLV_ENABLE:
+		hci_cc_le_set_addr_resolution_enable(hdev, skb);
+		break;
+
 	case HCI_OP_LE_READ_MAX_DATA_LEN:
 		hci_cc_le_read_max_data_len(hdev, skb);
 		break;
-- 
cgit v1.2.3


From 0db021f7a2731cc559f0c5beb19ff3f677ff8626 Mon Sep 17 00:00:00 2001
From: David Collins <collinsd@codeaurora.org>
Date: Fri, 13 Jul 2018 18:50:58 -0700
Subject: regulator: dt-bindings: add QCOM RPMh regulator bindings

Introduce bindings for RPMh regulator devices found on some
Qualcomm Technlogies, Inc. SoCs.  These devices allow a given
processor within the SoC to make PMIC regulator requests which
are aggregated within the RPMh hardware block along with requests
from other processors in the SoC to determine the final PMIC
regulator hardware state.

Signed-off-by: David Collins <collinsd@codeaurora.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/qcom,rpmh-regulator.txt     | 160 +++++++++++++++++++++
 .../dt-bindings/regulator/qcom,rpmh-regulator.h    |  36 +++++
 2 files changed, 196 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
 create mode 100644 include/dt-bindings/regulator/qcom,rpmh-regulator.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
new file mode 100644
index 000000000000..7ef2dbe48e8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt
@@ -0,0 +1,160 @@
+Qualcomm Technologies, Inc. RPMh Regulators
+
+rpmh-regulator devices support PMIC regulator management via the Voltage
+Regulator Manager (VRM) and Oscillator Buffer (XOB) RPMh accelerators.  The APPS
+processor communicates with these hardware blocks via a Resource State
+Coordinator (RSC) using command packets.  The VRM allows changing three
+parameters for a given regulator: enable state, output voltage, and operating
+mode.  The XOB allows changing only a single parameter for a given regulator:
+its enable state.  Despite its name, the XOB is capable of controlling the
+enable state of any PMIC peripheral.  It is used for clock buffers, low-voltage
+switches, and LDO/SMPS regulators which have a fixed voltage and mode.
+
+=======================
+Required Node Structure
+=======================
+
+RPMh regulators must be described in two levels of device nodes.  The first
+level describes the PMIC containing the regulators and must reside within an
+RPMh device node.  The second level describes each regulator within the PMIC
+which is to be used on the board.  Each of these regulators maps to a single
+RPMh resource.
+
+The names used for regulator nodes must match those supported by a given PMIC.
+Supported regulator node names:
+	PM8998:		smps1 - smps13, ldo1 - ldo28, lvs1 - lvs2
+	PMI8998:	bob
+	PM8005:		smps1 - smps4
+
+========================
+First Level Nodes - PMIC
+========================
+
+- compatible
+	Usage:      required
+	Value type: <string>
+	Definition: Must be one of: "qcom,pm8998-rpmh-regulators",
+		    "qcom,pmi8998-rpmh-regulators" or
+		    "qcom,pm8005-rpmh-regulators".
+
+- qcom,pmic-id
+	Usage:      required
+	Value type: <string>
+	Definition: RPMh resource name suffix used for the regulators found on
+		    this PMIC.  Typical values: "a", "b", "c", "d", "e", "f".
+
+- vdd-s1-supply
+- vdd-s2-supply
+- vdd-s3-supply
+- vdd-s4-supply
+	Usage:      optional (PM8998 and PM8005 only)
+	Value type: <phandle>
+	Definition: phandle of the parent supply regulator of one or more of the
+		    regulators for this PMIC.
+
+- vdd-s5-supply
+- vdd-s6-supply
+- vdd-s7-supply
+- vdd-s8-supply
+- vdd-s9-supply
+- vdd-s10-supply
+- vdd-s11-supply
+- vdd-s12-supply
+- vdd-s13-supply
+- vdd-l1-l27-supply
+- vdd-l2-l8-l17-supply
+- vdd-l3-l11-supply
+- vdd-l4-l5-supply
+- vdd-l6-supply
+- vdd-l7-l12-l14-l15-supply
+- vdd-l9-supply
+- vdd-l10-l23-l25-supply
+- vdd-l13-l19-l21-supply
+- vdd-l16-l28-supply
+- vdd-l18-l22-supply
+- vdd-l20-l24-supply
+- vdd-l26-supply
+- vin-lvs-1-2-supply
+	Usage:      optional (PM8998 only)
+	Value type: <phandle>
+	Definition: phandle of the parent supply regulator of one or more of the
+		    regulators for this PMIC.
+
+- vdd-bob-supply
+	Usage:      optional (PMI8998 only)
+	Value type: <phandle>
+	Definition: BOB regulator parent supply phandle
+
+===============================
+Second Level Nodes - Regulators
+===============================
+
+- qcom,always-wait-for-ack
+	Usage:      optional
+	Value type: <empty>
+	Definition: Boolean flag which indicates that the application processor
+		    must wait for an ACK or a NACK from RPMh for every request
+		    sent for this regulator including those which are for a
+		    strictly lower power state.
+
+Other properties defined in Documentation/devicetree/bindings/regulator.txt
+may also be used.  regulator-initial-mode and regulator-allowed-modes may be
+specified for VRM regulators using mode values from
+include/dt-bindings/regulator/qcom,rpmh-regulator.h.  regulator-allow-bypass
+may be specified for BOB type regulators managed via VRM.
+regulator-allow-set-load may be specified for LDO type regulators managed via
+VRM.
+
+========
+Examples
+========
+
+#include <dt-bindings/regulator/qcom,rpmh-regulator.h>
+
+&apps_rsc {
+	pm8998-rpmh-regulators {
+		compatible = "qcom,pm8998-rpmh-regulators";
+		qcom,pmic-id = "a";
+
+		vdd-l7-l12-l14-l15-supply = <&pm8998_s5>;
+
+		smps2 {
+			regulator-min-microvolt = <1100000>;
+			regulator-max-microvolt = <1100000>;
+		};
+
+		pm8998_s5: smps5 {
+			regulator-min-microvolt = <1904000>;
+			regulator-max-microvolt = <2040000>;
+		};
+
+		ldo7 {
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <1800000>;
+			regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
+			regulator-allowed-modes =
+				<RPMH_REGULATOR_MODE_LPM
+				 RPMH_REGULATOR_MODE_HPM>;
+			regulator-allow-set-load;
+		};
+
+		lvs1 {
+			regulator-min-microvolt = <1800000>;
+			regulator-max-microvolt = <1800000>;
+		};
+	};
+
+	pmi8998-rpmh-regulators {
+		compatible = "qcom,pmi8998-rpmh-regulators";
+		qcom,pmic-id = "b";
+
+		bob {
+			regulator-min-microvolt = <3312000>;
+			regulator-max-microvolt = <3600000>;
+			regulator-allowed-modes =
+				<RPMH_REGULATOR_MODE_AUTO
+				 RPMH_REGULATOR_MODE_HPM>;
+			regulator-initial-mode = <RPMH_REGULATOR_MODE_AUTO>;
+		};
+	};
+};
diff --git a/include/dt-bindings/regulator/qcom,rpmh-regulator.h b/include/dt-bindings/regulator/qcom,rpmh-regulator.h
new file mode 100644
index 000000000000..86713dcf9e02
--- /dev/null
+++ b/include/dt-bindings/regulator/qcom,rpmh-regulator.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
+
+#ifndef __QCOM_RPMH_REGULATOR_H
+#define __QCOM_RPMH_REGULATOR_H
+
+/*
+ * These mode constants may be used to specify modes for various RPMh regulator
+ * device tree properties (e.g. regulator-initial-mode).  Each type of regulator
+ * supports a subset of the possible modes.
+ *
+ * %RPMH_REGULATOR_MODE_RET:	Retention mode in which only an extremely small
+ *				load current is allowed.  This mode is supported
+ *				by LDO and SMPS type regulators.
+ * %RPMH_REGULATOR_MODE_LPM:	Low power mode in which a small load current is
+ *				allowed.  This mode corresponds to PFM for SMPS
+ *				and BOB type regulators.  This mode is supported
+ *				by LDO, HFSMPS, BOB, and PMIC4 FTSMPS type
+ *				regulators.
+ * %RPMH_REGULATOR_MODE_AUTO:	Auto mode in which the regulator hardware
+ *				automatically switches between LPM and HPM based
+ *				upon the real-time load current.  This mode is
+ *				supported by HFSMPS, BOB, and PMIC4 FTSMPS type
+ *				regulators.
+ * %RPMH_REGULATOR_MODE_HPM:	High power mode in which the full rated current
+ *				of the regulator is allowed.  This mode
+ *				corresponds to PWM for SMPS and BOB type
+ *				regulators.  This mode is supported by all types
+ *				of regulators.
+ */
+#define RPMH_REGULATOR_MODE_RET		0
+#define RPMH_REGULATOR_MODE_LPM		1
+#define RPMH_REGULATOR_MODE_AUTO	2
+#define RPMH_REGULATOR_MODE_HPM		3
+
+#endif
-- 
cgit v1.2.3


From c9fbb2d25295a566b97d62e6904741e8e1702d83 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Fri, 10 Aug 2018 10:47:43 +0200
Subject: net: Provide stub for __netif_set_xps_queue if there is no CONFIG_XPS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building virtio_net driver without CONFIG_XPS fails with:

    drivers/net/virtio_net.c: In function ‘virtnet_set_affinity’:
    drivers/net/virtio_net.c:1910:3: error: implicit declaration of function ‘__netif_set_xps_queue’ [-Werror=implicit-function-declaration]
       __netif_set_xps_queue(vi->dev, mask, i, false);
       ^
Fixes: 4d99f6602cb5 ("net: allow to call netif_reset_xps_queues() under cpus_read_lock")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 282e2e95ad5b..ca5ab98053c8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3412,6 +3412,13 @@ static inline int netif_set_xps_queue(struct net_device *dev,
 {
 	return 0;
 }
+
+static inline int __netif_set_xps_queue(struct net_device *dev,
+					const unsigned long *mask,
+					u16 index, bool is_rxqs_map)
+{
+	return 0;
+}
 #endif
 
 /**
-- 
cgit v1.2.3


From bff1b208a5d1dbb2355822ef859edcb9be0379e4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 6 Aug 2018 15:50:58 -0400
Subject: tracing: Partial revert of "tracing: Centralize preemptirq
 tracepoints and unify their usage"

Joel Fernandes created a nice patch that cleaned up the duplicate hooks used
by lockdep and irqsoff latency tracer. It made both use tracepoints. But it
caused lockdep to trigger several false positives. We have not figured out
why yet, but removing lockdep from using the trace event hooks and just call
its helper functions directly (like it use to), makes the problem go away.

This is a partial revert of the clean up patch c3bc8fd637a9 ("tracing:
Centralize preemptirq tracepoints and unify their usage") that adds direct
calls for lockdep, but also keeps most of the clean up done to get rid of
the horrible preprocessor if statements.

Link: http://lkml.kernel.org/r/20180806155058.5ee875f4@gandalf.local.home

Cc: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Fixes: c3bc8fd637a9 ("tracing: Centralize preemptirq tracepoints and unify their usage")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/irqflags.h        |  8 ++++++--
 include/linux/lockdep.h         |  2 --
 init/main.c                     |  2 --
 kernel/locking/lockdep.c        | 14 ++------------
 kernel/trace/trace_preemptirq.c | 36 ++++++++++++++++++++----------------
 5 files changed, 28 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 50edb9cbbd26..21619c92c377 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -19,9 +19,13 @@
 #ifdef CONFIG_PROVE_LOCKING
   extern void trace_softirqs_on(unsigned long ip);
   extern void trace_softirqs_off(unsigned long ip);
+  extern void lockdep_hardirqs_on(unsigned long ip);
+  extern void lockdep_hardirqs_off(unsigned long ip);
 #else
-# define trace_softirqs_on(ip)	do { } while (0)
-# define trace_softirqs_off(ip)	do { } while (0)
+  static inline void trace_softirqs_on(unsigned long ip) { }
+  static inline void trace_softirqs_off(unsigned long ip) { }
+  static inline void lockdep_hardirqs_on(unsigned long ip) { }
+  static inline void lockdep_hardirqs_off(unsigned long ip) { }
 #endif
 
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index a8113357ceeb..b0d0b51c4d85 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -267,7 +267,6 @@ struct held_lock {
  * Initialization, self-test and debugging-output methods:
  */
 extern void lockdep_init(void);
-extern void lockdep_init_early(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
@@ -408,7 +407,6 @@ static inline void lockdep_on(void)
 # define lock_set_class(l, n, k, s, i)		do { } while (0)
 # define lock_set_subclass(l, s, i)		do { } while (0)
 # define lockdep_init()				do { } while (0)
-# define lockdep_init_early()			do { } while (0)
 # define lockdep_init_map(lock, name, key, sub) \
 		do { (void)(name); (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
diff --git a/init/main.c b/init/main.c
index 44fe43be84c1..5d42e577643a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -649,8 +649,6 @@ asmlinkage __visible void __init start_kernel(void)
 	call_function_init();
 	WARN(!irqs_disabled(), "Interrupts were enabled early\n");
 
-	lockdep_init_early();
-
 	early_boot_irqs_disabled = false;
 	local_irq_enable();
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 03bfaeb9f4e6..e406c5fdb41e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2840,8 +2840,7 @@ static void __trace_hardirqs_on_caller(unsigned long ip)
 	debug_atomic_inc(hardirqs_on_events);
 }
 
-static void lockdep_hardirqs_on(void *none, unsigned long ignore,
-				unsigned long ip)
+void lockdep_hardirqs_on(unsigned long ip)
 {
 	if (unlikely(!debug_locks || current->lockdep_recursion))
 		return;
@@ -2885,8 +2884,7 @@ static void lockdep_hardirqs_on(void *none, unsigned long ignore,
 /*
  * Hardirqs were disabled:
  */
-static void lockdep_hardirqs_off(void *none, unsigned long ignore,
-				 unsigned long ip)
+void lockdep_hardirqs_off(unsigned long ip)
 {
 	struct task_struct *curr = current;
 
@@ -4315,14 +4313,6 @@ out_restore:
 	raw_local_irq_restore(flags);
 }
 
-void __init lockdep_init_early(void)
-{
-#ifdef CONFIG_PROVE_LOCKING
-	register_trace_prio_irq_disable(lockdep_hardirqs_off, NULL, INT_MAX);
-	register_trace_prio_irq_enable(lockdep_hardirqs_on, NULL, INT_MIN);
-#endif
-}
-
 void __init lockdep_init(void)
 {
 	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
index e76b78bf258e..fa656b25f427 100644
--- a/kernel/trace/trace_preemptirq.c
+++ b/kernel/trace/trace_preemptirq.c
@@ -19,41 +19,45 @@ static DEFINE_PER_CPU(int, tracing_irq_cpu);
 
 void trace_hardirqs_on(void)
 {
-	if (!this_cpu_read(tracing_irq_cpu))
-		return;
+	if (this_cpu_read(tracing_irq_cpu)) {
+		trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+		this_cpu_write(tracing_irq_cpu, 0);
+	}
 
-	trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
-	this_cpu_write(tracing_irq_cpu, 0);
+	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on);
 
 void trace_hardirqs_off(void)
 {
-	if (this_cpu_read(tracing_irq_cpu))
-		return;
+	if (!this_cpu_read(tracing_irq_cpu)) {
+		this_cpu_write(tracing_irq_cpu, 1);
+		trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+	}
 
-	this_cpu_write(tracing_irq_cpu, 1);
-	trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
+	lockdep_hardirqs_off(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_off);
 
 __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
 {
-	if (!this_cpu_read(tracing_irq_cpu))
-		return;
+	if (this_cpu_read(tracing_irq_cpu)) {
+		trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
+		this_cpu_write(tracing_irq_cpu, 0);
+	}
 
-	trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
-	this_cpu_write(tracing_irq_cpu, 0);
+	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_on_caller);
 
 __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
 {
-	if (this_cpu_read(tracing_irq_cpu))
-		return;
+	if (!this_cpu_read(tracing_irq_cpu)) {
+		this_cpu_write(tracing_irq_cpu, 1);
+		trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
+	}
 
-	this_cpu_write(tracing_irq_cpu, 1);
-	trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
+	lockdep_hardirqs_off(CALLER_ADDR0);
 }
 EXPORT_SYMBOL(trace_hardirqs_off_caller);
 #endif /* CONFIG_TRACE_IRQFLAGS */
-- 
cgit v1.2.3


From d799a4de0a250f1bdd99765bb8e55a5e2f469a1f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 3 Aug 2018 00:52:18 +0200
Subject: gpio: mmio: Fix up inverted direction registers

The bgpio_init() takes one of two arguments to specify a register
to set the direction of the GPIO line: either dirout that
indicates that a 1 in the bit in that register sets the
corresponding line to output, or dirin which indicates that
a 1 in the bit in that register sets the corresponding line to
input. Conversely setting the bit to 0 on these will turn the
line into input and output respectively. One of these can
be defined but not both.

This means that a platform that sets a bit to 1 for output
only defines dirout and a platform that sets a bit to 0 for
output only defines dirin. In short this defines the polarity
of the direction register.

Both can also be left as NULL meaning the GPIO chip is either
input only or output only.

Tomer Maimon discovered that for get/set chips (those where the
get and set registers are defined but no separate clear register,
and specifying BGPIOF_READ_OUTPUT_REG_SET so that we say we
want to read the output value from the SET register)
we are unconditionally reading the value from the SET register
when the direction bit is 1 and from the DAT register when the
direction bit is 0, not taking the direction bit polarity into
account.

It would be expected that when the direction bit is inverted
(dirin is defined but not dirout) we read the current value from
the DAT register when the bit is 1 and from the SET register
when the bit is 0.

Currently only some versions of ATH79, brcmstb, some versions of
CLP711x, GE, IOP and Loongson use the dirin mode (a 1 in the
register means input). They are unaffected because
BGPIOF_READ_OUTPUT_REG_SET is not set on any of them. (They
do not read back the SET register to figure out the output
value.) So this is no regression with current drivers.

However the behaviour is wrong and does not work with Tomer's
new driver where he needs to use the BGIOF_READ_OUTPUT_REG_SET.
This fixes the above issue by:

- Instead of defining separate functions for the inverted case,
  set up a flag in the gpio_chip that indicates that the
  direction is inverted.
- Remove the special inverted functions for setting
  input/output and getting the direction, rely on the flag
  instead.
- Respect this flag in bgpio_get_set() and
  bgpio_get_set_multiple()

Reported-by: Tomer Maimon <tmaimon77@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-mmio.c    | 108 ++++++++++++++++++++++++++------------------
 include/linux/gpio/driver.h |   3 ++
 2 files changed, 66 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpio-mmio.c b/drivers/gpio/gpio-mmio.c
index 7b14d6280e44..935292a30c99 100644
--- a/drivers/gpio/gpio-mmio.c
+++ b/drivers/gpio/gpio-mmio.c
@@ -136,8 +136,20 @@ static unsigned long bgpio_line2mask(struct gpio_chip *gc, unsigned int line)
 static int bgpio_get_set(struct gpio_chip *gc, unsigned int gpio)
 {
 	unsigned long pinmask = bgpio_line2mask(gc, gpio);
+	bool dir = !!(gc->bgpio_dir & pinmask);
 
-	if (gc->bgpio_dir & pinmask)
+	/*
+	 * If the direction is OUT we read the value from the SET
+	 * register, and if the direction is IN we read the value
+	 * from the DAT register.
+	 *
+	 * If the direction bits are inverted, naturally this gets
+	 * inverted too.
+	 */
+	if (gc->bgpio_dir_inverted)
+		dir = !dir;
+
+	if (dir)
 		return !!(gc->read_reg(gc->reg_set) & pinmask);
 	else
 		return !!(gc->read_reg(gc->reg_dat) & pinmask);
@@ -157,8 +169,13 @@ static int bgpio_get_set_multiple(struct gpio_chip *gc, unsigned long *mask,
 	*bits &= ~*mask;
 
 	/* Exploit the fact that we know which directions are set */
-	set_mask = *mask & gc->bgpio_dir;
-	get_mask = *mask & ~gc->bgpio_dir;
+	if (gc->bgpio_dir_inverted) {
+		set_mask = *mask & ~gc->bgpio_dir;
+		get_mask = *mask & gc->bgpio_dir;
+	} else {
+		set_mask = *mask & gc->bgpio_dir;
+		get_mask = *mask & ~gc->bgpio_dir;
+	}
 
 	if (set_mask)
 		*bits |= gc->read_reg(gc->reg_set) & set_mask;
@@ -359,7 +376,10 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
 
 	spin_lock_irqsave(&gc->bgpio_lock, flags);
 
-	gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
+	if (gc->bgpio_dir_inverted)
+		gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
+	else
+		gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
 	gc->write_reg(gc->reg_dir, gc->bgpio_dir);
 
 	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
@@ -370,7 +390,10 @@ static int bgpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
 static int bgpio_get_dir(struct gpio_chip *gc, unsigned int gpio)
 {
 	/* Return 0 if output, 1 of input */
-	return !(gc->read_reg(gc->reg_dir) & bgpio_line2mask(gc, gpio));
+	if (gc->bgpio_dir_inverted)
+		return !!(gc->read_reg(gc->reg_dir) & bgpio_line2mask(gc, gpio));
+	else
+		return !(gc->read_reg(gc->reg_dir) & bgpio_line2mask(gc, gpio));
 }
 
 static int bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
@@ -381,37 +404,10 @@ static int bgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 
 	spin_lock_irqsave(&gc->bgpio_lock, flags);
 
-	gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
-	gc->write_reg(gc->reg_dir, gc->bgpio_dir);
-
-	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
-
-	return 0;
-}
-
-static int bgpio_dir_in_inv(struct gpio_chip *gc, unsigned int gpio)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&gc->bgpio_lock, flags);
-
-	gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
-	gc->write_reg(gc->reg_dir, gc->bgpio_dir);
-
-	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
-
-	return 0;
-}
-
-static int bgpio_dir_out_inv(struct gpio_chip *gc, unsigned int gpio, int val)
-{
-	unsigned long flags;
-
-	gc->set(gc, gpio, val);
-
-	spin_lock_irqsave(&gc->bgpio_lock, flags);
-
-	gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
+	if (gc->bgpio_dir_inverted)
+		gc->bgpio_dir &= ~bgpio_line2mask(gc, gpio);
+	else
+		gc->bgpio_dir |= bgpio_line2mask(gc, gpio);
 	gc->write_reg(gc->reg_dir, gc->bgpio_dir);
 
 	spin_unlock_irqrestore(&gc->bgpio_lock, flags);
@@ -419,12 +415,6 @@ static int bgpio_dir_out_inv(struct gpio_chip *gc, unsigned int gpio, int val)
 	return 0;
 }
 
-static int bgpio_get_dir_inv(struct gpio_chip *gc, unsigned int gpio)
-{
-	/* Return 0 if output, 1 if input */
-	return !!(gc->read_reg(gc->reg_dir) & bgpio_line2mask(gc, gpio));
-}
-
 static int bgpio_setup_accessors(struct device *dev,
 				 struct gpio_chip *gc,
 				 bool byte_be)
@@ -560,9 +550,10 @@ static int bgpio_setup_direction(struct gpio_chip *gc,
 		gc->get_direction = bgpio_get_dir;
 	} else if (dirin) {
 		gc->reg_dir = dirin;
-		gc->direction_output = bgpio_dir_out_inv;
-		gc->direction_input = bgpio_dir_in_inv;
-		gc->get_direction = bgpio_get_dir_inv;
+		gc->direction_output = bgpio_dir_out;
+		gc->direction_input = bgpio_dir_in;
+		gc->get_direction = bgpio_get_dir;
+		gc->bgpio_dir_inverted = true;
 	} else {
 		if (flags & BGPIOF_NO_OUTPUT)
 			gc->direction_output = bgpio_dir_out_err;
@@ -582,6 +573,33 @@ static int bgpio_request(struct gpio_chip *chip, unsigned gpio_pin)
 	return -EINVAL;
 }
 
+/**
+ * bgpio_init() - Initialize generic GPIO accessor functions
+ * @gc: the GPIO chip to set up
+ * @dev: the parent device of the new GPIO chip (compulsory)
+ * @sz: the size (width) of the MMIO registers in bytes, typically 1, 2 or 4
+ * @dat: MMIO address for the register to READ the value of the GPIO lines, it
+ *	is expected that a 1 in the corresponding bit in this register means the
+ *	line is asserted
+ * @set: MMIO address for the register to SET the value of the GPIO lines, it is
+ *	expected that we write the line with 1 in this register to drive the GPIO line
+ *	high.
+ * @clr: MMIO address for the register to CLEAR the value of the GPIO lines, it is
+ *	expected that we write the line with 1 in this register to drive the GPIO line
+ *	low. It is allowed to leave this address as NULL, in that case the SET register
+ *	will be assumed to also clear the GPIO lines, by actively writing the line
+ *	with 0.
+ * @dirout: MMIO address for the register to set the line as OUTPUT. It is assumed
+ *	that setting a line to 1 in this register will turn that line into an
+ *	output line. Conversely, setting the line to 0 will turn that line into
+ *	an input. Either this or @dirin can be defined, but never both.
+ * @dirin: MMIO address for the register to set this line as INPUT. It is assumed
+ *	that setting a line to 1 in this register will turn that line into an
+ *	input line. Conversely, setting the line to 0 will turn that line into
+ *	an output. Either this or @dirout can be defined, but never both.
+ * @flags: Different flags that will affect the behaviour of the device, such as
+ *	endianness etc.
+ */
 int bgpio_init(struct gpio_chip *gc, struct device *dev,
 	       unsigned long sz, void __iomem *dat, void __iomem *set,
 	       void __iomem *clr, void __iomem *dirout, void __iomem *dirin,
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 5382b5183b7e..0ea328e71ec9 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -201,6 +201,8 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip)
  * @reg_set: output set register (out=high) for generic GPIO
  * @reg_clr: output clear register (out=low) for generic GPIO
  * @reg_dir: direction setting register for generic GPIO
+ * @bgpio_dir_inverted: indicates that the direction register is inverted
+ *	(gpiolib private state variable)
  * @bgpio_bits: number of register bits used for a generic GPIO i.e.
  *	<register width> * 8
  * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep
@@ -267,6 +269,7 @@ struct gpio_chip {
 	void __iomem *reg_set;
 	void __iomem *reg_clr;
 	void __iomem *reg_dir;
+	bool bgpio_dir_inverted;
 	int bgpio_bits;
 	spinlock_t bgpio_lock;
 	unsigned long bgpio_data;
-- 
cgit v1.2.3


From 7d96c9b17636b6148534617ddf95dead18617776 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 9 Aug 2018 20:14:35 -0600
Subject: IB/uverbs: Have the core code create the uverbs_root_spec

There is no reason for drivers to do this, the core code should take of
everything. The drivers will provide their information from rodata to
describe their modifications to the core's base uapi specification.

The core uses this to build up the runtime uapi for each device.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl_merge.c |  2 --
 drivers/infiniband/core/uverbs_main.c        | 50 ++++++++++++++++++----------
 drivers/infiniband/core/uverbs_std_types.c   |  1 -
 drivers/infiniband/hw/mlx5/main.c            | 45 +++++++++----------------
 drivers/infiniband/hw/mlx5/mlx5_ib.h         |  1 +
 include/rdma/ib_verbs.h                      |  2 +-
 6 files changed, 51 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index f81aa888ce5c..16b575929915 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -556,7 +556,6 @@ void uverbs_free_spec_tree(struct uverbs_root_spec *root)
 
 	kfree(root);
 }
-EXPORT_SYMBOL(uverbs_free_spec_tree);
 
 struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
 						const struct uverbs_object_tree_def **trees)
@@ -661,4 +660,3 @@ free_root:
 	uverbs_free_spec_tree(root_spec);
 	return ERR_PTR(res);
 }
-EXPORT_SYMBOL(uverbs_alloc_spec_tree);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 6f62146e9738..20003594b5d6 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -994,6 +994,36 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
 			 __stringify(IB_USER_VERBS_ABI_VERSION));
 
+static int ib_uverbs_create_uapi(struct ib_device *device,
+				 struct ib_uverbs_device *uverbs_dev)
+{
+	const struct uverbs_object_tree_def **specs;
+	struct uverbs_root_spec *specs_root;
+	unsigned int num_specs = 1;
+	unsigned int i;
+
+	if (device->driver_specs)
+		for (i = 0; device->driver_specs[i]; i++)
+			num_specs++;
+
+	specs = kmalloc_array(num_specs, sizeof(*specs), GFP_KERNEL);
+	if (!specs)
+		return -ENOMEM;
+
+	specs[0] = uverbs_default_get_objects();
+	if (device->driver_specs)
+		for (i = 0; device->driver_specs[i]; i++)
+			specs[i+1] = device->driver_specs[i];
+
+	specs_root = uverbs_alloc_spec_tree(num_specs, specs);
+	kfree(specs);
+	if (IS_ERR(specs_root))
+		return PTR_ERR(specs_root);
+
+	uverbs_dev->specs_root = specs_root;
+	return 0;
+}
+
 static void ib_uverbs_add_one(struct ib_device *device)
 {
 	int devnum;
@@ -1036,6 +1066,9 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	rcu_assign_pointer(uverbs_dev->ib_dev, device);
 	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
 
+	if (ib_uverbs_create_uapi(device, uverbs_dev))
+		goto err;
+
 	cdev_init(&uverbs_dev->cdev, NULL);
 	uverbs_dev->cdev.owner = THIS_MODULE;
 	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
@@ -1055,23 +1088,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
 		goto err_class;
 
-	if (!device->driver_specs_root) {
-		const struct uverbs_object_tree_def *default_root[] = {
-			uverbs_default_get_objects()};
-
-		uverbs_dev->specs_root = uverbs_alloc_spec_tree(1,
-								default_root);
-		if (IS_ERR(uverbs_dev->specs_root))
-			goto err_class;
-	} else {
-		uverbs_dev->specs_root = device->driver_specs_root;
-		/*
-		 * Take responsibility to free the specs allocated by the
-		 * driver.
-		 */
-		device->driver_specs_root = NULL;
-	}
-
 	ib_set_client_data(device, &uverbs_client, uverbs_dev);
 
 	return;
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 3aa7c7deac74..7f22b820a21b 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -316,4 +316,3 @@ const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
 {
 	return &uverbs_default_objects;
 }
-EXPORT_SYMBOL_GPL(uverbs_default_get_objects);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 13744b4631b4..f86d831ee27c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5523,37 +5523,29 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
 			     enum mlx5_ib_uapi_flow_action_flags));
 
-#define NUM_TREES	5
 static int populate_specs_root(struct mlx5_ib_dev *dev)
 {
-	const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
-		uverbs_default_get_objects()};
-	size_t num_trees = 1;
+	const struct uverbs_object_tree_def **trees = dev->driver_trees;
+	size_t num_trees = 0;
 
-	if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
-	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
-		default_root[num_trees++] = &mlx5_ib_flow_action;
+	if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+	    MLX5_ACCEL_IPSEC_CAP_DEVICE)
+		trees[num_trees++] = &mlx5_ib_flow_action;
 
-	if (MLX5_CAP_DEV_MEM(dev->mdev, memic) &&
-	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
-		default_root[num_trees++] = &mlx5_ib_dm;
+	if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
+		trees[num_trees++] = &mlx5_ib_dm;
 
 	if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
-			    MLX5_GENERAL_OBJ_TYPES_CAP_UCTX &&
-	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
-		default_root[num_trees++] = mlx5_ib_get_devx_tree();
+	    MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
+		trees[num_trees++] = mlx5_ib_get_devx_tree();
 
-	num_trees += mlx5_ib_get_flow_trees(default_root + num_trees);
+	num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
 
-	dev->ib_dev.driver_specs_root =
-		uverbs_alloc_spec_tree(num_trees, default_root);
+	WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
+	trees[num_trees] = NULL;
+	dev->ib_dev.driver_specs = trees;
 
-	return PTR_ERR_OR_ZERO(dev->ib_dev.driver_specs_root);
-}
-
-static void depopulate_specs_root(struct mlx5_ib_dev *dev)
-{
-	uverbs_free_spec_tree(dev->ib_dev.driver_specs_root);
+	return 0;
 }
 
 static int mlx5_ib_read_counters(struct ib_counters *counters,
@@ -6092,11 +6084,6 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 	return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev)
-{
-	depopulate_specs_root(dev);
-}
-
 void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
 {
 	destroy_umrc_res(dev);
@@ -6231,7 +6218,7 @@ static const struct mlx5_ib_profile pf_profile = {
 		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_SPECS,
 		     mlx5_ib_stage_populate_specs,
-		     mlx5_ib_stage_depopulate_specs),
+		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
 		     mlx5_ib_stage_ib_reg_init,
 		     mlx5_ib_stage_ib_reg_cleanup),
@@ -6279,7 +6266,7 @@ static const struct mlx5_ib_profile nic_rep_profile = {
 		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_SPECS,
 		     mlx5_ib_stage_populate_specs,
-		     mlx5_ib_stage_depopulate_specs),
+		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
 		     mlx5_ib_stage_ib_reg_init,
 		     mlx5_ib_stage_ib_reg_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b75754efc663..320d4dfe8c2f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -860,6 +860,7 @@ to_mcounters(struct ib_counters *ibcntrs)
 
 struct mlx5_ib_dev {
 	struct ib_device		ib_dev;
+	const struct uverbs_object_tree_def *driver_trees[6];
 	struct mlx5_core_dev		*mdev;
 	struct mlx5_roce		roce[MLX5_MAX_PORTS];
 	int				num_ports;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4ffe3e11e8fb..3b07201b9a80 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2580,7 +2580,7 @@ struct ib_device {
 	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
 						     int comp_vector);
 
-	struct uverbs_root_spec		*driver_specs_root;
+	const struct uverbs_object_tree_def *const *driver_specs;
 	enum rdma_driver_id		driver_id;
 };
 
-- 
cgit v1.2.3


From 9ed3e5f447723a41de6bcc29633e9f7e6246d2f7 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 9 Aug 2018 20:14:36 -0600
Subject: IB/uverbs: Build the specs into a radix tree at runtime

This radix tree datastructure is intended to replace the 'hash' structure
used today for parsing ioctl methods during system calls. This first
commit introduces the structure and builds it from the existing .rodata
descriptions.

The so-called hash arrangement is actually a 5 level open coded radix tree.
This new version uses a 3 level radix tree built using the radix tree
library.

Overall this is much less code and much easier to build as the radix tree
API allows for dynamic modification during the building. There is a small
memory penalty to pay for this, but since the radix tree is allocated on
a per device basis, a few kb of RAM seems immaterial considering the
gained simplicity.

The radix tree is similar to the existing tree, but also has a 'attr_bkey'
concept, which is a small value'd index for each method attribute. This is
used to simplify and improve performance of everything in the next
patches.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
---
 drivers/infiniband/core/Makefile      |   3 +-
 drivers/infiniband/core/rdma_core.h   |  50 +++++
 drivers/infiniband/core/uverbs.h      |   1 +
 drivers/infiniband/core/uverbs_main.c |  14 +-
 drivers/infiniband/core/uverbs_uapi.c | 343 ++++++++++++++++++++++++++++++++++
 include/rdma/uverbs_ioctl.h           | 137 ++++++++++++++
 6 files changed, 545 insertions(+), 3 deletions(-)
 create mode 100644 drivers/infiniband/core/uverbs_uapi.c

(limited to 'include')

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 61667705d746..d934cf617841 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -37,4 +37,5 @@ ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
 				rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
 				uverbs_ioctl_merge.o uverbs_std_types_cq.o \
 				uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
-				uverbs_std_types_mr.o uverbs_std_types_counters.o
+				uverbs_std_types_mr.o uverbs_std_types_counters.o \
+				uverbs_uapi.o
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index b2e85ce65b78..55a687285b1d 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -43,6 +43,8 @@
 #include <rdma/ib_verbs.h>
 #include <linux/mutex.h>
 
+struct ib_uverbs_device;
+
 int uverbs_ns_idx(u16 *id, unsigned int ns_count);
 const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
 						   uint16_t object);
@@ -113,4 +115,52 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
 void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile);
 void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
 
+/*
+ * This is the runtime description of the uverbs API, used by the syscall
+ * machinery to validate and dispatch calls.
+ */
+
+/*
+ * Depending on ID the slot pointer in the radix tree points at one of these
+ * structs.
+ */
+struct uverbs_api_object {
+	const struct uverbs_obj_type *type_attrs;
+	const struct uverbs_obj_type_class *type_class;
+};
+
+struct uverbs_api_ioctl_method {
+	int (__rcu *handler)(struct ib_uverbs_file *ufile,
+			     struct uverbs_attr_bundle *ctx);
+	DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
+	u8 driver_method:1;
+	u8 key_bitmap_len;
+	u8 destroy_bkey;
+};
+
+struct uverbs_api_attr {
+	struct uverbs_attr_spec spec;
+};
+
+struct uverbs_api_object;
+struct uverbs_api {
+	/* radix tree contains struct uverbs_api_* pointers */
+	struct radix_tree_root radix;
+	enum rdma_driver_id driver_id;
+};
+
+static inline const struct uverbs_api_object *
+uapi_get_object(struct uverbs_api *uapi, u16 object_id)
+{
+	return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+}
+
+char *uapi_key_format(char *S, unsigned int key);
+struct uverbs_api *uverbs_alloc_api(
+	const struct uverbs_object_tree_def *const *driver_specs,
+	enum rdma_driver_id driver_id);
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
+void uverbs_disassociate_api(struct uverbs_api *uapi);
+void uverbs_destroy_api(struct uverbs_api *uapi);
+
 #endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 0fa32009908c..879be0d1fd99 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -112,6 +112,7 @@ struct ib_uverbs_device {
 	struct list_head			uverbs_file_list;
 	struct list_head			uverbs_events_file_list;
 	struct uverbs_root_spec			*specs_root;
+	struct uverbs_api			*uapi;
 };
 
 struct ib_uverbs_event_queue {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 20003594b5d6..0fab083cafef 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -174,6 +174,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj)
 	struct ib_uverbs_device *dev =
 		container_of(kobj, struct ib_uverbs_device, kobj);
 
+	uverbs_destroy_api(dev->uapi);
 	cleanup_srcu_struct(&dev->disassociate_srcu);
 	uverbs_free_spec_tree(dev->specs_root);
 	kfree(dev);
@@ -1000,6 +1001,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
 	const struct uverbs_object_tree_def **specs;
 	struct uverbs_root_spec *specs_root;
 	unsigned int num_specs = 1;
+	struct uverbs_api *uapi;
 	unsigned int i;
 
 	if (device->driver_specs)
@@ -1020,7 +1022,14 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
 	if (IS_ERR(specs_root))
 		return PTR_ERR(specs_root);
 
+	uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
+	if (IS_ERR(uapi)) {
+		uverbs_free_spec_tree(specs_root);
+		return PTR_ERR(uapi);
+	}
+
 	uverbs_dev->specs_root = specs_root;
+	uverbs_dev->uapi = uapi;
 	return 0;
 }
 
@@ -1115,7 +1124,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 	struct ib_event event;
 
 	/* Pending running commands to terminate */
-	synchronize_srcu(&uverbs_dev->disassociate_srcu);
+	uverbs_disassociate_api_pre(uverbs_dev);
 	event.event = IB_EVENT_DEVICE_FATAL;
 	event.element.port_num = 0;
 	event.device = ib_dev;
@@ -1161,6 +1170,8 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 		kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
 	}
 	mutex_unlock(&uverbs_dev->lists_mutex);
+
+	uverbs_disassociate_api(uverbs_dev->uapi);
 }
 
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
@@ -1188,7 +1199,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
 		 * cdev was deleted, however active clients can still issue
 		 * commands and close their open files.
 		 */
-		rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
 		ib_uverbs_free_hw_resources(uverbs_dev, device);
 		wait_clients = 0;
 	}
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
new file mode 100644
index 000000000000..21c0de034511
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
+ */
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
+#include <linux/bitops.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
+{
+	void *elm;
+	int rc;
+
+	if (key == UVERBS_API_KEY_ERR)
+		return ERR_PTR(-EOVERFLOW);
+
+	elm = kzalloc(alloc_size, GFP_KERNEL);
+	rc = radix_tree_insert(&uapi->radix, key, elm);
+	if (rc) {
+		kfree(elm);
+		return ERR_PTR(rc);
+	}
+
+	return elm;
+}
+
+static int uapi_merge_method(struct uverbs_api *uapi,
+			     struct uverbs_api_object *obj_elm, u32 obj_key,
+			     const struct uverbs_method_def *method,
+			     bool is_driver)
+{
+	u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
+	struct uverbs_api_ioctl_method *method_elm;
+	unsigned int i;
+
+	if (!method->attrs)
+		return 0;
+
+	method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm));
+	if (IS_ERR(method_elm)) {
+		if (method_elm != ERR_PTR(-EEXIST))
+			return PTR_ERR(method_elm);
+
+		/*
+		 * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
+		 */
+		if (WARN_ON(method->handler))
+			return -EINVAL;
+		method_elm = radix_tree_lookup(&uapi->radix, method_key);
+		if (WARN_ON(!method_elm))
+			return -EINVAL;
+	} else {
+		WARN_ON(!method->handler);
+		rcu_assign_pointer(method_elm->handler, method->handler);
+		if (method->handler != uverbs_destroy_def_handler)
+			method_elm->driver_method = is_driver;
+	}
+
+	for (i = 0; i != method->num_attrs; i++) {
+		const struct uverbs_attr_def *attr = (*method->attrs)[i];
+		struct uverbs_api_attr *attr_slot;
+
+		if (!attr)
+			continue;
+
+		/*
+		 * ENUM_IN contains the 'ids' pointer to the driver's .rodata,
+		 * so if it is specified by a driver then it always makes this
+		 * into a driver method.
+		 */
+		if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
+			method_elm->driver_method |= is_driver;
+
+		attr_slot =
+			uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
+				     sizeof(*attr_slot));
+		/* Attributes are not allowed to be modified by drivers */
+		if (IS_ERR(attr_slot))
+			return PTR_ERR(attr_slot);
+
+		attr_slot->spec = attr->attr;
+	}
+
+	return 0;
+}
+
+static int uapi_merge_tree(struct uverbs_api *uapi,
+			   const struct uverbs_object_tree_def *tree,
+			   bool is_driver)
+{
+	unsigned int i, j;
+	int rc;
+
+	if (!tree->objects)
+		return 0;
+
+	for (i = 0; i != tree->num_objects; i++) {
+		const struct uverbs_object_def *obj = (*tree->objects)[i];
+		struct uverbs_api_object *obj_elm;
+		u32 obj_key;
+
+		if (!obj)
+			continue;
+
+		obj_key = uapi_key_obj(obj->id);
+		obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm));
+		if (IS_ERR(obj_elm)) {
+			if (obj_elm != ERR_PTR(-EEXIST))
+				return PTR_ERR(obj_elm);
+
+			/* This occurs when a driver uses ADD_UVERBS_METHODS */
+			if (WARN_ON(obj->type_attrs))
+				return -EINVAL;
+			obj_elm = radix_tree_lookup(&uapi->radix, obj_key);
+			if (WARN_ON(!obj_elm))
+				return -EINVAL;
+		} else {
+			obj_elm->type_attrs = obj->type_attrs;
+			if (obj->type_attrs) {
+				obj_elm->type_class =
+					obj->type_attrs->type_class;
+				/*
+				 * Today drivers are only permitted to use
+				 * idr_class types. They cannot use FD types
+				 * because we currently have no way to revoke
+				 * the fops pointer after device
+				 * disassociation.
+				 */
+				if (WARN_ON(is_driver &&
+					    obj->type_attrs->type_class !=
+						    &uverbs_idr_class))
+					return -EINVAL;
+			}
+		}
+
+		if (!obj->methods)
+			continue;
+
+		for (j = 0; j != obj->num_methods; j++) {
+			const struct uverbs_method_def *method =
+				(*obj->methods)[j];
+			if (!method)
+				continue;
+
+			rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+					       is_driver);
+			if (rc)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+static int
+uapi_finalize_ioctl_method(struct uverbs_api *uapi,
+			   struct uverbs_api_ioctl_method *method_elm,
+			   u32 method_key)
+{
+	struct radix_tree_iter iter;
+	unsigned int max_bkey = 0;
+	bool single_uobj = false;
+	void __rcu **slot;
+
+	method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN;
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter,
+				  uapi_key_attrs_start(method_key)) {
+		struct uverbs_api_attr *elm =
+			rcu_dereference_protected(*slot, true);
+		u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK;
+		u32 attr_bkey = uapi_bkey_attr(attr_key);
+		u8 type = elm->spec.type;
+
+		if (uapi_key_attr_to_method(iter.index) !=
+		    uapi_key_attr_to_method(method_key))
+			break;
+
+		if (elm->spec.mandatory)
+			__set_bit(attr_bkey, method_elm->attr_mandatory);
+
+		if (type == UVERBS_ATTR_TYPE_IDR ||
+		    type == UVERBS_ATTR_TYPE_FD) {
+			u8 access = elm->spec.u.obj.access;
+
+			/*
+			 * Verbs specs may only have one NEW/DESTROY, we don't
+			 * have the infrastructure to abort multiple NEW's or
+			 * cope with multiple DESTROY failure.
+			 */
+			if (access == UVERBS_ACCESS_NEW ||
+			    access == UVERBS_ACCESS_DESTROY) {
+				if (WARN_ON(single_uobj))
+					return -EINVAL;
+
+				single_uobj = true;
+				if (WARN_ON(!elm->spec.mandatory))
+					return -EINVAL;
+			}
+
+			if (access == UVERBS_ACCESS_DESTROY)
+				method_elm->destroy_bkey = attr_bkey;
+		}
+
+		max_bkey = max(max_bkey, attr_bkey);
+	}
+
+	method_elm->key_bitmap_len = max_bkey + 1;
+	WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN);
+
+	return 0;
+}
+
+static int uapi_finalize(struct uverbs_api *uapi)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+	int rc;
+
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+		struct uverbs_api_ioctl_method *method_elm =
+			rcu_dereference_protected(*slot, true);
+
+		if (uapi_key_is_ioctl_method(iter.index)) {
+			rc = uapi_finalize_ioctl_method(uapi, method_elm,
+							iter.index);
+			if (rc)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	if (!uapi)
+		return;
+
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+		kfree(rcu_dereference_protected(*slot, true));
+		radix_tree_iter_delete(&uapi->radix, &iter, slot);
+	}
+}
+
+struct uverbs_api *uverbs_alloc_api(
+	const struct uverbs_object_tree_def *const *driver_specs,
+	enum rdma_driver_id driver_id)
+{
+	struct uverbs_api *uapi;
+	int rc;
+
+	uapi = kzalloc(sizeof(*uapi), GFP_KERNEL);
+	if (!uapi)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
+	uapi->driver_id = driver_id;
+
+	rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false);
+	if (rc)
+		goto err;
+
+	for (; driver_specs && *driver_specs; driver_specs++) {
+		rc = uapi_merge_tree(uapi, *driver_specs, true);
+		if (rc)
+			goto err;
+	}
+
+	rc = uapi_finalize(uapi);
+	if (rc)
+		goto err;
+
+	return uapi;
+err:
+	if (rc != -ENOMEM)
+		pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+		       rc);
+
+	uverbs_destroy_api(uapi);
+	return ERR_PTR(rc);
+}
+
+/*
+ * The pre version is done before destroying the HW objects, it only blocks
+ * off method access. All methods that require the ib_dev or the module data
+ * must test one of these assignments prior to continuing.
+ */
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev)
+{
+	struct uverbs_api *uapi = uverbs_dev->uapi;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
+
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+		if (uapi_key_is_ioctl_method(iter.index)) {
+			struct uverbs_api_ioctl_method *method_elm =
+				rcu_dereference_protected(*slot, true);
+
+			if (method_elm->driver_method)
+				rcu_assign_pointer(method_elm->handler, NULL);
+		}
+	}
+
+	synchronize_srcu(&uverbs_dev->disassociate_srcu);
+}
+
+/*
+ * Called when a driver disassociates from the ib_uverbs_device. The
+ * assumption is that the driver module will unload after. Replace everything
+ * related to the driver with NULL as a safety measure.
+ */
+void uverbs_disassociate_api(struct uverbs_api *uapi)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+		if (uapi_key_is_object(iter.index)) {
+			struct uverbs_api_object *object_elm =
+				rcu_dereference_protected(*slot, true);
+
+			/*
+			 * Some type_attrs are in the driver module. We don't
+			 * bother to keep track of which since there should be
+			 * no use of this after disassociate.
+			 */
+			object_elm->type_attrs = NULL;
+		} else if (uapi_key_is_attr(iter.index)) {
+			struct uverbs_api_attr *elm =
+				rcu_dereference_protected(*slot, true);
+
+			if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN)
+				elm->spec.u2.enum_def.ids = NULL;
+		}
+	}
+}
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 8d71b7a7f147..339996e80c16 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -154,6 +154,143 @@ struct uverbs_root_spec {
 	struct uverbs_object_spec_hash		*object_buckets[0];
 };
 
+/*
+ * Information about the API is loaded into a radix tree. For IOCTL we start
+ * with a tuple of:
+ *  object_id, attr_id, method_id
+ *
+ * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based
+ * on the current kernel support this is compressed into 16 bit key for the
+ * radix tree. Since this compression is entirely internal to the kernel the
+ * below limits can be revised if the kernel gains additional data.
+ *
+ * With 64 leafs per node this is a 3 level radix tree.
+ *
+ * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
+ * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ */
+enum uapi_radix_data {
+	UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
+
+	UVERBS_API_ATTR_KEY_BITS = 6,
+	UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
+	UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+
+	UVERBS_API_METHOD_KEY_BITS = 5,
+	UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
+	UVERBS_API_METHOD_KEY_NUM_CORE = 24,
+	UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) -
+					   UVERBS_API_METHOD_KEY_NUM_CORE,
+	UVERBS_API_METHOD_KEY_MASK = GENMASK(
+		UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
+		UVERBS_API_METHOD_KEY_SHIFT),
+
+	UVERBS_API_OBJ_KEY_BITS = 5,
+	UVERBS_API_OBJ_KEY_SHIFT =
+		UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT,
+	UVERBS_API_OBJ_KEY_NUM_CORE = 24,
+	UVERBS_API_OBJ_KEY_NUM_DRIVER =
+		(1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE,
+	UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT),
+
+	/* This id guaranteed to not exist in the radix tree */
+	UVERBS_API_KEY_ERR = 0xFFFFFFFF,
+};
+
+static inline __attribute_const__ u32 uapi_key_obj(u32 id)
+{
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER)
+			return UVERBS_API_KEY_ERR;
+		id = id + UVERBS_API_OBJ_KEY_NUM_CORE;
+	} else {
+		if (id >= UVERBS_API_OBJ_KEY_NUM_CORE)
+			return UVERBS_API_KEY_ERR;
+	}
+
+	return id << UVERBS_API_OBJ_KEY_SHIFT;
+}
+
+static inline __attribute_const__ bool uapi_key_is_object(u32 key)
+{
+	return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0;
+}
+
+static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id)
+{
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER)
+			return UVERBS_API_KEY_ERR;
+		id = id + UVERBS_API_METHOD_KEY_NUM_CORE;
+	} else {
+		id++;
+		if (id >= UVERBS_API_METHOD_KEY_NUM_CORE)
+			return UVERBS_API_KEY_ERR;
+	}
+
+	return id << UVERBS_API_METHOD_KEY_SHIFT;
+}
+
+static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
+{
+	return attr_key &
+	       (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
+}
+
+static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
+{
+	return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+	       (key & UVERBS_API_ATTR_KEY_MASK) == 0;
+}
+
+static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
+{
+	/* 0 is the method slot itself */
+	return ioctl_method_key + 1;
+}
+
+static inline __attribute_const__ u32 uapi_key_attr(u32 id)
+{
+	/*
+	 * The attr is designed to fit in the typical single radix tree node
+	 * of 64 entries. Since allmost all methods have driver attributes we
+	 * organize things so that the driver and core attributes interleave to
+	 * reduce the length of the attributes array in typical cases.
+	 */
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		id++;
+		if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
+			return UVERBS_API_KEY_ERR;
+		id = (id << 1) | 0;
+	} else {
+		if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
+			return UVERBS_API_KEY_ERR;
+		id = (id << 1) | 1;
+	}
+
+	return id;
+}
+
+static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
+{
+	return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+	       (key & UVERBS_API_ATTR_KEY_MASK) != 0;
+}
+
+/*
+ * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN),
+ * basically it undoes the reservation of 0 in the ID numbering. attr_key
+ * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of
+ * uapi_key_attr().
+ */
+static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
+{
+	return attr_key - 1;
+}
+
 /*
  * =======================================
  *	Verbs definitions
-- 
cgit v1.2.3


From 6b0d08f4a27134e6fb49aa33ceb53356081bc92e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 9 Aug 2018 20:14:37 -0600
Subject: IB/uverbs: Use uverbs_api to manage the object type inside the
 uobject

Currently the struct uverbs_obj_type stored in the ib_uobject is part of
the .rodata segment of the module that defines the object. This is a
problem if drivers define new uapi objects as we will be left with a
dangling pointer after device disassociation.

Switch the uverbs_obj_type for struct uverbs_api_object, which is
allocated memory that is part of the uverbs_api and is guaranteed to
always exist. Further this moves the 'type_class' into this memory which
means access to the IDR/FD function pointers is also guaranteed. Drivers
cannot define new types.

This makes it safe to continue to use all uobjects, including driver
defined ones, after disassociation.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c    | 100 ++++++++++++++++++---------------
 drivers/infiniband/core/rdma_core.h    |   2 +-
 drivers/infiniband/core/uverbs_ioctl.c |   6 +-
 include/rdma/ib_verbs.h                |   2 +-
 include/rdma/uverbs_std_types.h        |  30 +++++-----
 include/rdma/uverbs_types.h            |   9 +--
 6 files changed, 79 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 4235b9ddc2ad..2814228ead39 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -97,7 +97,7 @@ static void uverbs_uobject_free(struct kref *ref)
 	struct ib_uobject *uobj =
 		container_of(ref, struct ib_uobject, ref);
 
-	if (uobj->type->type_class->needs_kfree_rcu)
+	if (uobj->uapi_object->type_class->needs_kfree_rcu)
 		kfree_rcu(uobj, rcu);
 	else
 		kfree(uobj);
@@ -180,7 +180,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 	assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
 
 	if (uobj->object) {
-		ret = uobj->type->type_class->destroy_hw(uobj, reason);
+		ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason);
 		if (ret) {
 			if (ib_is_destroy_retryable(ret, reason, uobj))
 				return ret;
@@ -197,7 +197,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 	if (reason == RDMA_REMOVE_ABORT) {
 		WARN_ON(!list_empty(&uobj->list));
 		WARN_ON(!uobj->context);
-		uobj->type->type_class->alloc_abort(uobj);
+		uobj->uapi_object->type_class->alloc_abort(uobj);
 	}
 
 	uobj->context = NULL;
@@ -210,7 +210,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
 	if (reason != RDMA_REMOVE_DESTROY)
 		atomic_set(&uobj->usecnt, 0);
 	else
-		uobj->type->type_class->remove_handle(uobj);
+		uobj->uapi_object->type_class->remove_handle(uobj);
 
 	if (!list_empty(&uobj->list)) {
 		spin_lock_irqsave(&ufile->uobjects_lock, flags);
@@ -268,13 +268,13 @@ out_unlock:
  * with a NULL object pointer. The caller must pair this with
  * uverbs_put_destroy.
  */
-struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type,
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
 				      u32 id, struct ib_uverbs_file *ufile)
 {
 	struct ib_uobject *uobj;
 	int ret;
 
-	uobj = rdma_lookup_get_uobject(type, ufile, id, UVERBS_LOOKUP_DESTROY);
+	uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
 	if (IS_ERR(uobj))
 		return uobj;
 
@@ -292,27 +292,22 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type,
  * on success (negative errno on failure). For use by callers that do not need
  * the uobj.
  */
-int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
 			   struct ib_uverbs_file *ufile, int success_res)
 {
 	struct ib_uobject *uobj;
 
-	uobj = __uobj_get_destroy(type, id, ufile);
+	uobj = __uobj_get_destroy(obj, id, ufile);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	/*
-	 * FIXME: After destroy this is not safe. We no longer hold the rwsem
-	 * so disassociation could have completed and unloaded the module that
-	 * backs the uobj->type pointer.
-	 */
 	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 	return success_res;
 }
 
 /* alloc_uobj must be undone by uverbs_destroy_uobject() */
 static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
-				     const struct uverbs_obj_type *type)
+				     const struct uverbs_api_object *obj)
 {
 	struct ib_uobject *uobj;
 	struct ib_ucontext *ucontext;
@@ -321,7 +316,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 	if (IS_ERR(ucontext))
 		return ERR_CAST(ucontext);
 
-	uobj = kzalloc(type->obj_size, GFP_KERNEL);
+	uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
 	if (!uobj)
 		return ERR_PTR(-ENOMEM);
 	/*
@@ -331,7 +326,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
 	uobj->ufile = ufile;
 	uobj->context = ucontext;
 	INIT_LIST_HEAD(&uobj->list);
-	uobj->type = type;
+	uobj->uapi_object = obj;
 	/*
 	 * Allocated objects start out as write locked to deny any other
 	 * syscalls from accessing them until they are committed. See
@@ -368,7 +363,7 @@ static int idr_add_uobj(struct ib_uobject *uobj)
 
 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
 static struct ib_uobject *
-lookup_get_idr_uobject(const struct uverbs_obj_type *type,
+lookup_get_idr_uobject(const struct uverbs_api_object *obj,
 		       struct ib_uverbs_file *ufile, s64 id,
 		       enum rdma_lookup_mode mode)
 {
@@ -401,15 +396,14 @@ free:
 }
 
 static struct ib_uobject *
-lookup_get_fd_uobject(const struct uverbs_obj_type *type,
+lookup_get_fd_uobject(const struct uverbs_api_object *obj,
 		      struct ib_uverbs_file *ufile, s64 id,
 		      enum rdma_lookup_mode mode)
 {
+	const struct uverbs_obj_fd_type *fd_type;
 	struct file *f;
 	struct ib_uobject *uobject;
 	int fdno = id;
-	const struct uverbs_obj_fd_type *fd_type =
-		container_of(type, struct uverbs_obj_fd_type, type);
 
 	if (fdno != id)
 		return ERR_PTR(-EINVAL);
@@ -417,6 +411,11 @@ lookup_get_fd_uobject(const struct uverbs_obj_type *type,
 	if (mode != UVERBS_LOOKUP_READ)
 		return ERR_PTR(-EOPNOTSUPP);
 
+	if (!obj->type_attrs)
+		return ERR_PTR(-EIO);
+	fd_type =
+		container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+
 	f = fget(fdno);
 	if (!f)
 		return ERR_PTR(-EBADF);
@@ -436,18 +435,21 @@ lookup_get_fd_uobject(const struct uverbs_obj_type *type,
 	return uobject;
 }
 
-struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
 					   struct ib_uverbs_file *ufile, s64 id,
 					   enum rdma_lookup_mode mode)
 {
 	struct ib_uobject *uobj;
 	int ret;
 
-	uobj = type->type_class->lookup_get(type, ufile, id, mode);
+	if (!obj)
+		return ERR_PTR(-EINVAL);
+
+	uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
 	if (IS_ERR(uobj))
 		return uobj;
 
-	if (uobj->type != type) {
+	if (uobj->uapi_object != obj) {
 		ret = -EINVAL;
 		goto free;
 	}
@@ -469,18 +471,19 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
 
 	return uobj;
 free:
-	uobj->type->type_class->lookup_put(uobj, mode);
+	obj->type_class->lookup_put(uobj, mode);
 	uverbs_uobject_put(uobj);
 	return ERR_PTR(ret);
 }
 
-static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
-						  struct ib_uverbs_file *ufile)
+static struct ib_uobject *
+alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
+			struct ib_uverbs_file *ufile)
 {
 	int ret;
 	struct ib_uobject *uobj;
 
-	uobj = alloc_uobj(ufile, type);
+	uobj = alloc_uobj(ufile, obj);
 	if (IS_ERR(uobj))
 		return uobj;
 
@@ -504,8 +507,9 @@ uobj_put:
 	return ERR_PTR(ret);
 }
 
-static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
-						 struct ib_uverbs_file *ufile)
+static struct ib_uobject *
+alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
+		       struct ib_uverbs_file *ufile)
 {
 	int new_fd;
 	struct ib_uobject *uobj;
@@ -514,7 +518,7 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 	if (new_fd < 0)
 		return ERR_PTR(new_fd);
 
-	uobj = alloc_uobj(ufile, type);
+	uobj = alloc_uobj(ufile, obj);
 	if (IS_ERR(uobj)) {
 		put_unused_fd(new_fd);
 		return uobj;
@@ -526,11 +530,14 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
 	return uobj;
 }
 
-struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
 					    struct ib_uverbs_file *ufile)
 {
 	struct ib_uobject *ret;
 
+	if (!obj)
+		return ERR_PTR(-EINVAL);
+
 	/*
 	 * The hw_destroy_rwsem is held across the entire object creation and
 	 * released during rdma_alloc_commit_uobject or
@@ -539,7 +546,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
 	if (!down_read_trylock(&ufile->hw_destroy_rwsem))
 		return ERR_PTR(-EIO);
 
-	ret = type->type_class->alloc_begin(type, ufile);
+	ret = obj->type_class->alloc_begin(obj, ufile);
 	if (IS_ERR(ret)) {
 		up_read(&ufile->hw_destroy_rwsem);
 		return ret;
@@ -561,8 +568,8 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
 					       enum rdma_remove_reason why)
 {
 	const struct uverbs_obj_idr_type *idr_type =
-		container_of(uobj->type, struct uverbs_obj_idr_type,
-			     type);
+		container_of(uobj->uapi_object->type_attrs,
+			     struct uverbs_obj_idr_type, type);
 	int ret = idr_type->destroy_object(uobj, why);
 
 	/*
@@ -599,8 +606,8 @@ static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
 static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
 					      enum rdma_remove_reason why)
 {
-	const struct uverbs_obj_fd_type *fd_type =
-		container_of(uobj->type, struct uverbs_obj_fd_type, type);
+	const struct uverbs_obj_fd_type *fd_type = container_of(
+		uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
 	int ret = fd_type->context_closed(uobj, why);
 
 	if (ib_is_destroy_retryable(ret, why, uobj))
@@ -633,8 +640,8 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
 
 static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
 {
-	const struct uverbs_obj_fd_type *fd_type =
-		container_of(uobj->type, struct uverbs_obj_fd_type, type);
+	const struct uverbs_obj_fd_type *fd_type = container_of(
+		uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
 	int fd = uobj->id;
 	struct file *filp;
 
@@ -679,7 +686,7 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
 	int ret;
 
 	/* alloc_commit consumes the uobj kref */
-	ret = uobj->type->type_class->alloc_commit(uobj);
+	ret = uobj->uapi_object->type_class->alloc_commit(uobj);
 	if (ret) {
 		uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
 		up_read(&ufile->hw_destroy_rwsem);
@@ -735,7 +742,7 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 			     enum rdma_lookup_mode mode)
 {
 	assert_uverbs_usecnt(uobj, mode);
-	uobj->type->type_class->lookup_put(uobj, mode);
+	uobj->uapi_object->type_class->lookup_put(uobj, mode);
 	/*
 	 * In order to unlock an object, either decrease its usecnt for
 	 * read access or zero it in case of exclusive access. See
@@ -995,23 +1002,26 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
 EXPORT_SYMBOL(uverbs_fd_class);
 
 struct ib_uobject *
-uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
+uverbs_get_uobject_from_file(u16 object_id,
 			     struct ib_uverbs_file *ufile,
 			     enum uverbs_obj_access access, s64 id)
 {
+	const struct uverbs_api_object *obj =
+		uapi_get_object(ufile->device->uapi, object_id);
+
 	switch (access) {
 	case UVERBS_ACCESS_READ:
-		return rdma_lookup_get_uobject(type_attrs, ufile, id,
+		return rdma_lookup_get_uobject(obj, ufile, id,
 					       UVERBS_LOOKUP_READ);
 	case UVERBS_ACCESS_DESTROY:
 		/* Actual destruction is done inside uverbs_handle_method */
-		return rdma_lookup_get_uobject(type_attrs, ufile, id,
+		return rdma_lookup_get_uobject(obj, ufile, id,
 					       UVERBS_LOOKUP_DESTROY);
 	case UVERBS_ACCESS_WRITE:
-		return rdma_lookup_get_uobject(type_attrs, ufile, id,
+		return rdma_lookup_get_uobject(obj, ufile, id,
 					       UVERBS_LOOKUP_WRITE);
 	case UVERBS_ACCESS_NEW:
-		return rdma_alloc_begin_uobject(type_attrs, ufile);
+		return rdma_alloc_begin_uobject(obj, ufile);
 	default:
 		WARN_ON(true);
 		return ERR_PTR(-EOPNOTSUPP);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 55a687285b1d..d89569d87b1c 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -89,7 +89,7 @@ void uverbs_close_fd(struct file *f);
  * uverbs_finalize_objects are called.
  */
 struct ib_uobject *
-uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
+uverbs_get_uobject_from_file(u16 object_id,
 			     struct ib_uverbs_file *ufile,
 			     enum uverbs_obj_access access, s64 id);
 
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 23ff698ab08e..8a052d0fdf2c 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -57,7 +57,6 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 	const struct uverbs_attr_spec *spec;
 	const struct uverbs_attr_spec *val_spec;
 	struct uverbs_attr *e;
-	const struct uverbs_object_spec *object;
 	struct uverbs_obj_attr *o_attr;
 	struct uverbs_attr *elements = attr_bundle_h->attrs;
 
@@ -145,9 +144,6 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 			return -EINVAL;
 
 		o_attr = &e->obj_attr;
-		object = uverbs_get_object(ufile, spec->u.obj.obj_type);
-		if (!object)
-			return -EINVAL;
 
 		/* specs are allowed to have only one destroy attribute */
 		WARN_ON(spec->u.obj.access == UVERBS_ACCESS_DESTROY &&
@@ -162,7 +158,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		 * IDR implementation today rejects negative IDs
 		 */
 		o_attr->uobject = uverbs_get_uobject_from_file(
-					object->type_attrs,
+					spec->u.obj.obj_type,
 					ufile,
 					spec->u.obj.access,
 					uattr->data_s64);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3b07201b9a80..5d404c20b49f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1524,7 +1524,7 @@ struct ib_uobject {
 	atomic_t		usecnt;		/* protects exclusive access */
 	struct rcu_head		rcu;		/* kfree_rcu() overhead */
 
-	const struct uverbs_obj_type *type;
+	const struct uverbs_api_object *uapi_object;
 };
 
 struct ib_udata {
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 64ee2545dd3d..3b00231cc084 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -54,14 +54,15 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
  */
 #define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
 
-#define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs
+#define uobj_get_type(_ufile, _object)                                         \
+	uapi_get_object((_ufile)->device->uapi, _object)
 
 #define uobj_get_read(_type, _id, _ufile)                                      \
-	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
+	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
 				_uobj_check_id(_id), UVERBS_LOOKUP_READ)
 
 #define ufd_get_read(_type, _fdnum, _ufile)                                    \
-	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
+	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
 				(_fdnum)*typecheck(s32, _fdnum),               \
 				UVERBS_LOOKUP_READ)
 
@@ -76,20 +77,21 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
 		uobj_get_read(_type, _id, _ufile)))
 
 #define uobj_get_write(_type, _id, _ufile)                                     \
-	rdma_lookup_get_uobject(uobj_get_type(_type), _ufile,                  \
+	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
 				_uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
 
-int __uobj_perform_destroy(const struct uverbs_obj_type *type, u32 id,
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
 			   struct ib_uverbs_file *ufile, int success_res);
 #define uobj_perform_destroy(_type, _id, _ufile, _success_res)                 \
-	__uobj_perform_destroy(uobj_get_type(_type), _uobj_check_id(_id),      \
-			       _ufile, _success_res)
+	__uobj_perform_destroy(uobj_get_type(_ufile, _type),                   \
+			       _uobj_check_id(_id), _ufile, _success_res)
 
-struct ib_uobject *__uobj_get_destroy(const struct uverbs_obj_type *type,
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
 				      u32 id, struct ib_uverbs_file *ufile);
 
 #define uobj_get_destroy(_type, _id, _ufile)                                   \
-	__uobj_get_destroy(uobj_get_type(_type), _uobj_check_id(_id), _ufile)
+	__uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id),  \
+			   _ufile)
 
 static inline void uobj_put_destroy(struct ib_uobject *uobj)
 {
@@ -124,11 +126,11 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
 	rdma_alloc_abort_uobject(uobj);
 }
 
-static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type,
-					      struct ib_uverbs_file *ufile,
-					      struct ib_device **ib_dev)
+static inline struct ib_uobject *
+__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
+	     struct ib_device **ib_dev)
 {
-	struct ib_uobject *uobj = rdma_alloc_begin_uobject(type, ufile);
+	struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile);
 
 	if (!IS_ERR(uobj))
 		*ib_dev = uobj->context->device;
@@ -136,7 +138,7 @@ static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type
 }
 
 #define uobj_alloc(_type, _ufile, _ib_dev)                                     \
-	__uobj_alloc(uobj_get_type(_type), _ufile, _ib_dev)
+	__uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
 
 #endif
 
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index 1ab9a85eebd9..acb1bfa3cc99 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -37,6 +37,7 @@
 #include <rdma/ib_verbs.h>
 
 struct uverbs_obj_type;
+struct uverbs_api_object;
 
 enum rdma_lookup_mode {
 	UVERBS_LOOKUP_READ,
@@ -81,14 +82,14 @@ enum rdma_lookup_mode {
  * alloc_abort returns.
  */
 struct uverbs_obj_type_class {
-	struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type,
+	struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj,
 					  struct ib_uverbs_file *ufile);
 	/* This consumes the kref on uobj */
 	int (*alloc_commit)(struct ib_uobject *uobj);
 	/* This does not consume the kref on uobj */
 	void (*alloc_abort)(struct ib_uobject *uobj);
 
-	struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
+	struct ib_uobject *(*lookup_get)(const struct uverbs_api_object *obj,
 					 struct ib_uverbs_file *ufile, s64 id,
 					 enum rdma_lookup_mode mode);
 	void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
@@ -128,12 +129,12 @@ struct uverbs_obj_idr_type {
 					   enum rdma_remove_reason why);
 };
 
-struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
 					   struct ib_uverbs_file *ufile, s64 id,
 					   enum rdma_lookup_mode mode);
 void rdma_lookup_put_uobject(struct ib_uobject *uobj,
 			     enum rdma_lookup_mode mode);
-struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
 					    struct ib_uverbs_file *ufile);
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
 int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
-- 
cgit v1.2.3


From 4b3dd2bbf0818ccb23e7f2831f2ca4a86789cd1f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 9 Aug 2018 20:14:38 -0600
Subject: IB/uverbs: Provide implementation private memory for the
 uverbs_attr_bundle

This already existed as the anonymous 'ctx' structure, but this was not
really a useful form. Hoist this struct into bundle_priv and rework the
internal things to use it instead.

Move a bunch of the processing internal state into the priv and reduce the
excessive use of function arguments.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c | 112 +++++++++++++++++----------------
 include/rdma/uverbs_ioctl.h            |   1 +
 2 files changed, 58 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 8a052d0fdf2c..efb7adcc21fb 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -35,6 +35,18 @@
 #include "rdma_core.h"
 #include "uverbs.h"
 
+struct bundle_priv {
+	struct ib_uverbs_attr __user *user_attrs;
+	struct ib_uverbs_attr *uattrs;
+	struct uverbs_obj_attr *destroy_attr;
+
+	/*
+	 * Must be last. bundle ends in a flex array which overlaps
+	 * internal_buffer.
+	 */
+	struct uverbs_attr_bundle bundle;
+};
+
 static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
 				   u16 len)
 {
@@ -46,12 +58,11 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
 			   0, uattr->len - len);
 }
 
-static int uverbs_process_attr(struct ib_uverbs_file *ufile,
+static int uverbs_process_attr(struct bundle_priv *pbundle,
 			       const struct ib_uverbs_attr *uattr,
 			       u16 attr_id,
 			       const struct uverbs_attr_spec_hash *attr_spec_bucket,
 			       struct uverbs_attr_bundle_hash *attr_bundle_h,
-			       struct uverbs_obj_attr **destroy_attr,
 			       struct ib_uverbs_attr __user *uattr_ptr)
 {
 	const struct uverbs_attr_spec *spec;
@@ -147,9 +158,9 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 
 		/* specs are allowed to have only one destroy attribute */
 		WARN_ON(spec->u.obj.access == UVERBS_ACCESS_DESTROY &&
-			*destroy_attr);
+			pbundle->destroy_attr);
 		if (spec->u.obj.access == UVERBS_ACCESS_DESTROY)
-			*destroy_attr = o_attr;
+			pbundle->destroy_attr = o_attr;
 
 		/*
 		 * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
@@ -159,7 +170,7 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 		 */
 		o_attr->uobject = uverbs_get_uobject_from_file(
 					spec->u.obj.obj_type,
-					ufile,
+					pbundle->bundle.ufile,
 					spec->u.obj.access,
 					uattr->data_s64);
 
@@ -187,10 +198,11 @@ static int uverbs_process_attr(struct ib_uverbs_file *ufile,
 	return 0;
 }
 
-static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle,
+static int uverbs_finalize_attrs(struct bundle_priv *pbundle,
 				 struct uverbs_attr_spec_hash *const *spec_hash,
 				 size_t num, bool commit)
 {
+	struct uverbs_attr_bundle *attrs_bundle = &pbundle->bundle;
 	unsigned int i;
 	int ret = 0;
 
@@ -233,27 +245,25 @@ static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle,
 	return ret;
 }
 
-static int uverbs_uattrs_process(struct ib_uverbs_file *ufile,
-				 const struct ib_uverbs_attr *uattrs,
-				 size_t num_uattrs,
+static int uverbs_uattrs_process(size_t num_uattrs,
 				 const struct uverbs_method_spec *method,
-				 struct uverbs_attr_bundle *attr_bundle,
-				 struct uverbs_obj_attr **destroy_attr,
-				 struct ib_uverbs_attr __user *uattr_ptr)
+				 struct bundle_priv *pbundle)
 {
+	struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle;
+	struct ib_uverbs_attr __user *uattr_ptr = pbundle->user_attrs;
 	size_t i;
 	int ret = 0;
 	int num_given_buckets = 0;
 
 	for (i = 0; i < num_uattrs; i++) {
-		const struct ib_uverbs_attr *uattr = &uattrs[i];
+		const struct ib_uverbs_attr *uattr = &pbundle->uattrs[i];
 		u16 attr_id = uattr->attr_id;
 		struct uverbs_attr_spec_hash *attr_spec_bucket;
 
 		ret = uverbs_ns_idx(&attr_id, method->num_buckets);
 		if (ret < 0 || !method->attr_buckets[ret]) {
 			if (uattr->flags & UVERBS_ATTR_F_MANDATORY) {
-				uverbs_finalize_attrs(attr_bundle,
+				uverbs_finalize_attrs(pbundle,
 						      method->attr_buckets,
 						      num_given_buckets,
 						      false);
@@ -270,12 +280,13 @@ static int uverbs_uattrs_process(struct ib_uverbs_file *ufile,
 			num_given_buckets = ret + 1;
 
 		attr_spec_bucket = method->attr_buckets[ret];
-		ret = uverbs_process_attr(ufile, uattr, attr_id,
+		ret = uverbs_process_attr(pbundle,
+					  uattr, attr_id,
 					  attr_spec_bucket,
-					  &attr_bundle->hash[ret], destroy_attr,
+					  &attr_bundle->hash[ret],
 					  uattr_ptr++);
 		if (ret) {
-			uverbs_finalize_attrs(attr_bundle,
+			uverbs_finalize_attrs(pbundle,
 					      method->attr_buckets,
 					      num_given_buckets,
 					      false);
@@ -287,8 +298,9 @@ static int uverbs_uattrs_process(struct ib_uverbs_file *ufile,
 }
 
 static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec,
-					    struct uverbs_attr_bundle *attr_bundle)
+					    struct bundle_priv *pbundle)
 {
+	struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle;
 	unsigned int i;
 
 	for (i = 0; i < attr_bundle->num_buckets; i++) {
@@ -316,27 +328,22 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met
 	return 0;
 }
 
-static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
-				const struct ib_uverbs_attr *uattrs,
-				size_t num_uattrs,
-				struct ib_device *ibdev,
-				struct ib_uverbs_file *ufile,
+static int uverbs_handle_method(size_t num_uattrs,
 				const struct uverbs_method_spec *method_spec,
-				struct uverbs_attr_bundle *attr_bundle)
+				struct bundle_priv *pbundle)
 {
+	struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle;
 	int ret;
 	int finalize_ret;
 	int num_given_buckets;
-	struct uverbs_obj_attr *destroy_attr = NULL;
 
 	num_given_buckets =
-		uverbs_uattrs_process(ufile, uattrs, num_uattrs, method_spec,
-				      attr_bundle, &destroy_attr, uattr_ptr);
+		uverbs_uattrs_process(num_uattrs, method_spec, pbundle);
 	if (num_given_buckets <= 0)
 		return -EINVAL;
 
 	attr_bundle->num_buckets = num_given_buckets;
-	ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle);
+	ret = uverbs_validate_kernel_mandatory(method_spec, pbundle);
 	if (ret)
 		goto cleanup;
 
@@ -344,21 +351,21 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
 	 * We destroy the HW object before invoking the handler, handlers do
 	 * not get to manipulate the HW objects.
 	 */
-	if (destroy_attr) {
-		ret = uobj_destroy(destroy_attr->uobject);
+	if (pbundle->destroy_attr) {
+		ret = uobj_destroy(pbundle->destroy_attr->uobject);
 		if (ret)
 			goto cleanup;
 	}
 
-	ret = method_spec->handler(ufile, attr_bundle);
+	ret = method_spec->handler(pbundle->bundle.ufile, attr_bundle);
 
-	if (destroy_attr) {
-		uobj_put_destroy(destroy_attr->uobject);
-		destroy_attr->uobject = NULL;
+	if (pbundle->destroy_attr) {
+		uobj_put_destroy(pbundle->destroy_attr->uobject);
+		pbundle->destroy_attr->uobject = NULL;
 	}
 
 cleanup:
-	finalize_ret = uverbs_finalize_attrs(attr_bundle,
+	finalize_ret = uverbs_finalize_attrs(pbundle,
 					     method_spec->attr_buckets,
 					     attr_bundle->num_buckets,
 					     !ret);
@@ -370,16 +377,13 @@ cleanup:
 static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 				struct ib_uverbs_file *file,
 				struct ib_uverbs_ioctl_hdr *hdr,
-				void __user *buf)
+				struct ib_uverbs_attr __user *user_attrs)
 {
 	const struct uverbs_object_spec *object_spec;
 	const struct uverbs_method_spec *method_spec;
 	long err = 0;
 	unsigned int i;
-	struct {
-		struct ib_uverbs_attr		*uattrs;
-		struct uverbs_attr_bundle	*uverbs_attr_bundle;
-	} *ctx = NULL;
+	struct bundle_priv *ctx;
 	struct uverbs_attr *curr_attr;
 	unsigned long *curr_bitmap;
 	size_t ctx_size;
@@ -397,12 +401,11 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 		return -EPROTONOSUPPORT;
 
 	ctx_size = sizeof(*ctx) +
-		   sizeof(struct uverbs_attr_bundle) +
 		   sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets +
 		   sizeof(*ctx->uattrs) * hdr->num_attrs +
-		   sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) *
+		   sizeof(*ctx->bundle.hash[0].attrs) *
 		   method_spec->num_child_attrs +
-		   sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) *
+		   sizeof(*ctx->bundle.hash[0].valid_bitmap) *
 			(method_spec->num_child_attrs / BITS_PER_LONG +
 			 method_spec->num_buckets);
 
@@ -413,10 +416,8 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 	if (!ctx)
 		return -ENOMEM;
 
-	ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx);
-	ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) +
-			      (sizeof(ctx->uverbs_attr_bundle->hash[0]) *
-			       method_spec->num_buckets);
+	ctx->uattrs = (void *)(ctx + 1) +
+		      (sizeof(ctx->bundle.hash[0]) * method_spec->num_buckets);
 	curr_attr = (void *)(ctx->uattrs + hdr->num_attrs);
 	curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs);
 
@@ -432,23 +433,25 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 
 		curr_num_attrs = method_spec->attr_buckets[i]->num_attrs;
 
-		ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr;
+		ctx->bundle.hash[i].attrs = curr_attr;
 		curr_attr += curr_num_attrs;
-		ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs;
-		ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap;
+		ctx->bundle.hash[i].num_attrs = curr_num_attrs;
+		ctx->bundle.hash[i].valid_bitmap = curr_bitmap;
 		bitmap_zero(curr_bitmap, curr_num_attrs);
 		curr_bitmap += BITS_TO_LONGS(curr_num_attrs);
 	}
 
-	err = copy_from_user(ctx->uattrs, buf,
+	err = copy_from_user(ctx->uattrs, user_attrs,
 			     sizeof(*ctx->uattrs) * hdr->num_attrs);
 	if (err) {
 		err = -EFAULT;
 		goto out;
 	}
 
-	err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
-				   file, method_spec, ctx->uverbs_attr_bundle);
+	ctx->destroy_attr = NULL;
+	ctx->bundle.ufile = file;
+	ctx->user_attrs = user_attrs;
+	err = uverbs_handle_method(hdr->num_attrs, method_spec, ctx);
 
 	/*
 	 * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
@@ -499,8 +502,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			goto out;
 		}
 
-		err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr,
-					  (__user void *)arg + sizeof(hdr));
+		err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, user_hdr->attrs);
 	} else {
 		err = -ENOIOCTLCMD;
 	}
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 339996e80c16..3b497d9ed395 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -494,6 +494,7 @@ struct uverbs_attr_bundle_hash {
 };
 
 struct uverbs_attr_bundle {
+	struct ib_uverbs_file *ufile;
 	size_t				num_buckets;
 	struct uverbs_attr_bundle_hash  hash[];
 };
-- 
cgit v1.2.3


From 6a1f444fefeba392d1232b408aaf5902e33e0982 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 9 Aug 2018 20:14:39 -0600
Subject: IB/uverbs: Remove the ib_uverbs_attr pointer from each attr

Memory in the bundle is valuable, do not waste it holding an 8 byte
pointer for the rare case of writing to a PTR_OUT. We can compute the
pointer by storing a small 1 byte array offset and the base address of the
uattr memory in the bundle private memory.

This also means we can access the kernel's copy of the ib_uverbs_attr, so
drop the copy of flags as well.

Since the uattr base should be private bundle information this also
de-inlines the already too big uverbs_copy_to inline and moves
create_udata into uverbs_ioctl.c so they can see the private struct
definition.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c     | 67 ++++++++++++++++++++++++++++--
 drivers/infiniband/core/uverbs_std_types.c | 32 --------------
 include/rdma/uverbs_ioctl.h                | 36 ++++------------
 3 files changed, 72 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index efb7adcc21fb..f355e938a0b1 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -84,7 +84,6 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 	spec = &attr_spec_bucket->attrs[attr_id];
 	val_spec = spec;
 	e = &elements[attr_id];
-	e->uattr = uattr_ptr;
 
 	switch (spec->type) {
 	case UVERBS_ATTR_TYPE_ENUM_IN:
@@ -124,8 +123,8 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 		    uattr->attr_data.reserved)
 			return -EINVAL;
 
+		e->ptr_attr.uattr_idx = uattr - pbundle->uattrs;
 		e->ptr_attr.len = uattr->len;
-		e->ptr_attr.flags = uattr->flags;
 
 		if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
 			void *p;
@@ -181,7 +180,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 			s64 id = o_attr->uobject->id;
 
 			/* Copy the allocated id to the user-space */
-			if (put_user(id, &e->uattr->data)) {
+			if (put_user(id, &uattr_ptr->data)) {
 				uverbs_finalize_object(o_attr->uobject,
 						       UVERBS_ACCESS_NEW,
 						       false);
@@ -562,3 +561,65 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
 	return 0;
 }
 EXPORT_SYMBOL(uverbs_get_flags32);
+
+/*
+ * This is for ease of conversion. The purpose is to convert all drivers to
+ * use uverbs_attr_bundle instead of ib_udata.  Assume attr == 0 is input and
+ * attr == 1 is output.
+ */
+void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata)
+{
+	struct bundle_priv *pbundle =
+		container_of(bundle, struct bundle_priv, bundle);
+	const struct uverbs_attr *uhw_in =
+		uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN);
+	const struct uverbs_attr *uhw_out =
+		uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT);
+
+	if (!IS_ERR(uhw_in)) {
+		udata->inlen = uhw_in->ptr_attr.len;
+		if (uverbs_attr_ptr_is_inline(uhw_in))
+			udata->inbuf =
+				&pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx]
+					 .data;
+		else
+			udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
+	} else {
+		udata->inbuf = NULL;
+		udata->inlen = 0;
+	}
+
+	if (!IS_ERR(uhw_out)) {
+		udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
+		udata->outlen = uhw_out->ptr_attr.len;
+	} else {
+		udata->outbuf = NULL;
+		udata->outlen = 0;
+	}
+}
+
+int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
+		   const void *from, size_t size)
+{
+	struct bundle_priv *pbundle =
+		container_of(bundle, struct bundle_priv, bundle);
+	const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+	u16 flags;
+	size_t min_size;
+
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	min_size = min_t(size_t, attr->ptr_attr.len, size);
+	if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
+		return -EFAULT;
+
+	flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags |
+		UVERBS_ATTR_F_VALID_OUTPUT;
+	if (put_user(flags,
+		     &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(uverbs_copy_to);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 7f22b820a21b..203cc96ac6f5 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -217,38 +217,6 @@ int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
 }
 EXPORT_SYMBOL(uverbs_destroy_def_handler);
 
-void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
-{
-	/*
-	 * This is for ease of conversion. The purpose is to convert all drivers
-	 * to use uverbs_attr_bundle instead of ib_udata.
-	 * Assume attr == 0 is input and attr == 1 is output.
-	 */
-	const struct uverbs_attr *uhw_in =
-		uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN);
-	const struct uverbs_attr *uhw_out =
-		uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT);
-
-	if (!IS_ERR(uhw_in)) {
-		udata->inlen = uhw_in->ptr_attr.len;
-		if (uverbs_attr_ptr_is_inline(uhw_in))
-			udata->inbuf = &uhw_in->uattr->data;
-		else
-			udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
-	} else {
-		udata->inbuf = NULL;
-		udata->inlen = 0;
-	}
-
-	if (!IS_ERR(uhw_out)) {
-		udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
-		udata->outlen = uhw_out->ptr_attr.len;
-	} else {
-		udata->outbuf = NULL;
-		udata->outlen = 0;
-	}
-}
-
 DECLARE_UVERBS_NAMED_OBJECT(
 	UVERBS_OBJECT_COMP_CHANNEL,
 	UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 3b497d9ed395..ecf028446cdf 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -461,8 +461,7 @@ struct uverbs_ptr_attr {
 		u64 data;
 	};
 	u16		len;
-	/* Combination of bits from enum UVERBS_ATTR_F_XXXX */
-	u16		flags;
+	u16		uattr_idx;
 	u8		enum_id;
 };
 
@@ -471,11 +470,6 @@ struct uverbs_obj_attr {
 };
 
 struct uverbs_attr {
-	/*
-	 * pointer to the user-space given attribute, in order to write the
-	 * new uobject's id or update flags.
-	 */
-	struct ib_uverbs_attr __user	*uattr;
 	union {
 		struct uverbs_ptr_attr	ptr_attr;
 		struct uverbs_obj_attr	obj_attr;
@@ -575,27 +569,6 @@ uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
 	return attr->ptr_attr.len;
 }
 
-static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
-				 size_t idx, const void *from, size_t size)
-{
-	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
-	u16 flags;
-	size_t min_size;
-
-	if (IS_ERR(attr))
-		return PTR_ERR(attr);
-
-	min_size = min_t(size_t, attr->ptr_attr.len, size);
-	if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
-		return -EFAULT;
-
-	flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT;
-	if (put_user(flags, &attr->uattr->flags))
-		return -EFAULT;
-
-	return 0;
-}
-
 static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
 {
 	return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
@@ -676,6 +649,8 @@ int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
 		       size_t idx, u64 allowed_bits);
 int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
 		       size_t idx, u64 allowed_bits);
+int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx,
+		   const void *from, size_t size);
 #else
 static inline int
 uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -689,6 +664,11 @@ uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
 {
 	return -EINVAL;
 }
+static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
+				 size_t idx, const void *from, size_t size)
+{
+	return -EINVAL;
+}
 #endif
 
 /* =================================================
-- 
cgit v1.2.3


From 40a1227ea845a37ab197dd1caffb60b047fa36b1 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:21 -0700
Subject: tcp: Avoid TCP syncookie rejected by SO_REUSEPORT socket

Although the actual cookie check "__cookie_v[46]_check()" does
not involve sk specific info, it checks whether the sk has recent
synq overflow event in "tcp_synq_no_recent_overflow()".  The
tcp_sk(sk)->rx_opt.ts_recent_stamp is updated every second
when it has sent out a syncookie (through "tcp_synq_overflow()").

The above per sk "recent synq overflow event timestamp" works well
for non SO_REUSEPORT use case.  However, it may cause random
connection request reject/discard when SO_REUSEPORT is used with
syncookie because it fails the "tcp_synq_no_recent_overflow()"
test.

When SO_REUSEPORT is used, it usually has multiple listening
socks serving TCP connection requests destinated to the same local IP:PORT.
There are cases that the TCP-ACK-COOKIE may not be received
by the same sk that sent out the syncookie.  For example,
if reuse->socks[] began with {sk0, sk1},
1) sk1 sent out syncookies and tcp_sk(sk1)->rx_opt.ts_recent_stamp
   was updated.
2) the reuse->socks[] became {sk1, sk2} later.  e.g. sk0 was first closed
   and then sk2 was added.  Here, sk2 does not have ts_recent_stamp set.
   There are other ordering that will trigger the similar situation
   below but the idea is the same.
3) When the TCP-ACK-COOKIE comes back, sk2 was selected.
   "tcp_synq_no_recent_overflow(sk2)" returns true. In this case,
   all syncookies sent by sk1 will be handled (and rejected)
   by sk2 while sk1 is still alive.

The userspace may create and remove listening SO_REUSEPORT sockets
as it sees fit.  E.g. Adding new thread (and SO_REUSEPORT sock) to handle
incoming requests, old process stopping and new process starting...etc.
With or without SO_ATTACH_REUSEPORT_[CB]BPF,
the sockets leaving and joining a reuseport group makes picking
the same sk to check the syncookie very difficult (if not impossible).

The later patches will allow bpf prog more flexibility in deciding
where a sk should be located in a bpf map and selecting a particular
SO_REUSEPORT sock as it sees fit.  e.g. Without closing any sock,
replace the whole bpf reuseport_array in one map_update() by using
map-in-map.  Getting the syncookie check working smoothly across
socks in the same "reuse->socks[]" is important.

A partial solution is to set the newly added sk's ts_recent_stamp
to the max ts_recent_stamp of a reuseport group but that will require
to iterate through reuse->socks[]  OR
pessimistically set it to "now - TCP_SYNCOOKIE_VALID" when a sk is
joining a reuseport group.  However, neither of them will solve the
existing sk getting moved around the reuse->socks[] and that
sk may not have ts_recent_stamp updated, unlikely under continuous
synflood but not impossible.

This patch opts to treat the reuseport group as a whole when
considering the last synq overflow timestamp since
they are serving the same IP:PORT from the userspace
(and BPF program) perspective.

"synq_overflow_ts" is added to "struct sock_reuseport".
The tcp_synq_overflow() and tcp_synq_no_recent_overflow()
will update/check reuse->synq_overflow_ts if the sk is
in a reuseport group.  Similar to the reuseport decision in
__inet_lookup_listener(), both sk->sk_reuseport and
sk->sk_reuseport_cb are tested for SO_REUSEPORT usage.
Update on "synq_overflow_ts" happens at roughly once
every second.

A synflood test was done with a 16 rx-queues and 16 reuseport sockets.
No meaningful performance change is observed.  Before and
after the change is ~9Mpps in IPv4.

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/sock_reuseport.h |  4 ++++
 include/net/tcp.h            | 30 ++++++++++++++++++++++++++++--
 net/core/sock_reuseport.c    |  1 +
 3 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 0054b3a9b923..6bef7a0052f2 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -12,6 +12,10 @@ struct sock_reuseport {
 
 	u16			max_socks;	/* length of socks */
 	u16			num_socks;	/* elements in socks */
+	/* The last synq overflow event timestamp of this
+	 * reuse->socks[] group.
+	 */
+	unsigned int		synq_overflow_ts;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d769dc20359b..d196901c9dba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -36,6 +36,7 @@
 #include <net/inet_hashtables.h>
 #include <net/checksum.h>
 #include <net/request_sock.h>
+#include <net/sock_reuseport.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 #include <net/ip.h>
@@ -473,9 +474,22 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
  */
 static inline void tcp_synq_overflow(const struct sock *sk)
 {
-	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int last_overflow;
 	unsigned int now = jiffies;
 
+	if (sk->sk_reuseport) {
+		struct sock_reuseport *reuse;
+
+		reuse = rcu_dereference(sk->sk_reuseport_cb);
+		if (likely(reuse)) {
+			last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+			if (time_after32(now, last_overflow + HZ))
+				WRITE_ONCE(reuse->synq_overflow_ts, now);
+			return;
+		}
+	}
+
+	last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
 	if (time_after32(now, last_overflow + HZ))
 		tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
 }
@@ -483,9 +497,21 @@ static inline void tcp_synq_overflow(const struct sock *sk)
 /* syncookies: no recent synqueue overflow on this listening socket? */
 static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
 {
-	unsigned int last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+	unsigned int last_overflow;
 	unsigned int now = jiffies;
 
+	if (sk->sk_reuseport) {
+		struct sock_reuseport *reuse;
+
+		reuse = rcu_dereference(sk->sk_reuseport_cb);
+		if (likely(reuse)) {
+			last_overflow = READ_ONCE(reuse->synq_overflow_ts);
+			return time_after32(now, last_overflow +
+					    TCP_SYNCOOKIE_VALID);
+		}
+	}
+
+	last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
 	return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
 }
 
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 064acb04be0f..3f188fad0162 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -81,6 +81,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 
 	memcpy(more_reuse->socks, reuse->socks,
 	       reuse->num_socks * sizeof(struct sock *));
+	more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
 
 	for (i = 0; i < reuse->num_socks; ++i)
 		rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
-- 
cgit v1.2.3


From 736b46027eb4a4c602d3b8b93d2f48c9facbd915 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:22 -0700
Subject: net: Add ID (if needed) to sock_reuseport and expose reuseport_lock

A later patch will introduce a BPF_MAP_TYPE_REUSEPORT_ARRAY which
allows a SO_REUSEPORT sk to be added to a bpf map.  When a sk
is removed from reuse->socks[], it also needs to be removed from
the bpf map.  Also, when adding a sk to a bpf map, the bpf
map needs to ensure it is indeed in a reuse->socks[].
Hence, reuseport_lock is needed by the bpf map to ensure its
map_update_elem() and map_delete_elem() operations are in-sync with
the reuse->socks[].  The BPF_MAP_TYPE_REUSEPORT_ARRAY map will only
acquire the reuseport_lock after ensuring the adding sk is already
in a reuseport group (i.e. reuse->socks[]).  The map_lookup_elem()
will be lockless.

This patch also adds an ID to sock_reuseport.  A later patch
will introduce BPF_PROG_TYPE_SK_REUSEPORT which allows
a bpf prog to select a sk from a bpf map.  It is inflexible to
statically enforce a bpf map can only contain the sk belonging to
a particular reuse->socks[] (i.e. same IP:PORT) during the bpf
verification time. For example, think about the the map-in-map situation
where the inner map can be dynamically changed in runtime and the outer
map may have inner maps belonging to different reuseport groups.
Hence, when the bpf prog (in the new BPF_PROG_TYPE_SK_REUSEPORT
type) selects a sk,  this selected sk has to be checked to ensure it
belongs to the requesting reuseport group (i.e. the group serving
that IP:PORT).

The "sk->sk_reuseport_cb" pointer cannot be used for this checking
purpose because the pointer value will change after reuseport_grow().
Instead of saving all checking conditions like the ones
preced calling "reuseport_add_sock()" and compare them everytime a
bpf_prog is run, a 32bits ID is introduced to survive the
reuseport_grow().  The ID is only acquired if any of the
reuse->socks[] is added to the newly introduced
"BPF_MAP_TYPE_REUSEPORT_ARRAY" map.

If "BPF_MAP_TYPE_REUSEPORT_ARRAY" is not used,  the changes in this
patch is a no-op.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/sock_reuseport.h |  6 ++++++
 net/core/sock_reuseport.c    | 27 ++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 6bef7a0052f2..e1a7681856f7 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -5,8 +5,11 @@
 #include <linux/filter.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
+#include <linux/spinlock.h>
 #include <net/sock.h>
 
+extern spinlock_t reuseport_lock;
+
 struct sock_reuseport {
 	struct rcu_head		rcu;
 
@@ -16,6 +19,8 @@ struct sock_reuseport {
 	 * reuse->socks[] group.
 	 */
 	unsigned int		synq_overflow_ts;
+	/* ID stays the same even after the size of socks[] grows. */
+	unsigned int		reuseport_id;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
@@ -29,5 +34,6 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 					  int hdr_len);
 extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
 					      struct bpf_prog *prog);
+int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 3f188fad0162..cf2e4d305af9 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -8,11 +8,33 @@
 
 #include <net/sock_reuseport.h>
 #include <linux/bpf.h>
+#include <linux/idr.h>
 #include <linux/rcupdate.h>
 
 #define INIT_SOCKS 128
 
-static DEFINE_SPINLOCK(reuseport_lock);
+DEFINE_SPINLOCK(reuseport_lock);
+
+#define REUSEPORT_MIN_ID 1
+static DEFINE_IDA(reuseport_ida);
+
+int reuseport_get_id(struct sock_reuseport *reuse)
+{
+	int id;
+
+	if (reuse->reuseport_id)
+		return reuse->reuseport_id;
+
+	id = ida_simple_get(&reuseport_ida, REUSEPORT_MIN_ID, 0,
+			    /* Called under reuseport_lock */
+			    GFP_ATOMIC);
+	if (id < 0)
+		return id;
+
+	reuse->reuseport_id = id;
+
+	return reuse->reuseport_id;
+}
 
 static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 {
@@ -78,6 +100,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 	more_reuse->max_socks = more_socks_size;
 	more_reuse->num_socks = reuse->num_socks;
 	more_reuse->prog = reuse->prog;
+	more_reuse->reuseport_id = reuse->reuseport_id;
 
 	memcpy(more_reuse->socks, reuse->socks,
 	       reuse->num_socks * sizeof(struct sock *));
@@ -102,6 +125,8 @@ static void reuseport_free_rcu(struct rcu_head *head)
 	reuse = container_of(head, struct sock_reuseport, rcu);
 	if (reuse->prog)
 		bpf_prog_destroy(reuse->prog);
+	if (reuse->reuseport_id)
+		ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
 	kfree(reuse);
 }
 
-- 
cgit v1.2.3


From 5dc4c4b7d4e8115e7cde96a030f98cb3ab2e458c Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:24 -0700
Subject: bpf: Introduce BPF_MAP_TYPE_REUSEPORT_SOCKARRAY

This patch introduces a new map type BPF_MAP_TYPE_REUSEPORT_SOCKARRAY.

To unleash the full potential of a bpf prog, it is essential for the
userspace to be capable of directly setting up a bpf map which can then
be consumed by the bpf prog to make decision.  In this case, decide which
SO_REUSEPORT sk to serve the incoming request.

By adding BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, the userspace has total control
and visibility on where a SO_REUSEPORT sk should be located in a bpf map.
The later patch will introduce BPF_PROG_TYPE_SK_REUSEPORT such that
the bpf prog can directly select a sk from the bpf map.  That will
raise the programmability of the bpf prog attached to a reuseport
group (a group of sk serving the same IP:PORT).

For example, in UDP, the bpf prog can peek into the payload (e.g.
through the "data" pointer introduced in the later patch) to learn
the application level's connection information and then decide which sk
to pick from a bpf map.  The userspace can tightly couple the sk's location
in a bpf map with the application logic in generating the UDP payload's
connection information.  This connection info contact/API stays within the
userspace.

Also, when used with map-in-map, the userspace can switch the
old-server-process's inner map to a new-server-process's inner map
in one call "bpf_map_update_elem(outer_map, &index, &new_reuseport_array)".
The bpf prog will then direct incoming requests to the new process instead
of the old process.  The old process can finish draining the pending
requests (e.g. by "accept()") before closing the old-fds.  [Note that
deleting a fd from a bpf map does not necessary mean the fd is closed]

During map_update_elem(),
Only SO_REUSEPORT sk (i.e. which has already been added
to a reuse->socks[]) can be used.  That means a SO_REUSEPORT sk that is
"bind()" for UDP or "bind()+listen()" for TCP.  These conditions are
ensured in "reuseport_array_update_check()".

A SO_REUSEPORT sk can only be added once to a map (i.e. the
same sk cannot be added twice even to the same map).  SO_REUSEPORT
already allows another sk to be created for the same IP:PORT.
There is no need to re-create a similar usage in the BPF side.

When a SO_REUSEPORT is deleted from the "reuse->socks[]" (e.g. "close()"),
it will notify the bpf map to remove it from the map also.  It is
done through "bpf_sk_reuseport_detach()" and it will only be called
if >=1 of the "reuse->sock[]" has ever been added to a bpf map.

The map_update()/map_delete() has to be in-sync with the
"reuse->socks[]".  Hence, the same "reuseport_lock" used
by "reuse->socks[]" has to be used here also. Care has
been taken to ensure the lock is only acquired when the
adding sk passes some strict tests. and
freeing the map does not require the reuseport_lock.

The reuseport_array will also support lookup from the syscall
side.  It will return a sock_gen_cookie().  The sock_gen_cookie()
is on-demand (i.e. a sk's cookie is not generated until the very
first map_lookup_elem()).

The lookup cookie is 64bits but it goes against the logical userspace
expectation on 32bits sizeof(fd) (and as other fd based bpf maps do also).
It may catch user in surprise if we enforce value_size=8 while
userspace still pass a 32bits fd during update.  Supporting different
value_size between lookup and update seems unintuitive also.

We also need to consider what if other existing fd based maps want
to return 64bits value from syscall's lookup in the future.
Hence, reuseport_array supports both value_size 4 and 8, and
assuming user will usually use value_size=4.  The syscall's lookup
will return ENOSPC on value_size=4.  It will will only
return 64bits value from sock_gen_cookie() when user consciously
choose value_size=8 (as a signal that lookup is desired) which then
requires a 64bits value in both lookup and update.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h          |  28 ++++
 include/linux/bpf_types.h    |   3 +
 include/uapi/linux/bpf.h     |   1 +
 kernel/bpf/Makefile          |   3 +
 kernel/bpf/arraymap.c        |   2 +-
 kernel/bpf/reuseport_array.c | 363 +++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c         |   6 +
 net/core/sock_reuseport.c    |   8 +
 8 files changed, 413 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/reuseport_array.c

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..db11662faea6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -524,6 +524,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
 }
 
 struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+int array_map_alloc_check(union bpf_attr *attr);
 
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@@ -769,6 +770,33 @@ static inline void __xsk_map_flush(struct bpf_map *map)
 }
 #endif
 
+#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
+void bpf_sk_reuseport_detach(struct sock *sk);
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+				       void *value);
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 map_flags);
+#else
+static inline void bpf_sk_reuseport_detach(struct sock *sk)
+{
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
+						     void *key, void *value)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
+						     void *key, void *value,
+						     u64 map_flags)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_BPF_SYSCALL */
+#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
+
 /* verifier prototypes for helper functions called from eBPF programs */
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index add08be53b6f..14fd6c02d258 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -60,4 +60,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 #endif
+#ifdef CONFIG_INET
+BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
+#endif
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..40f584bc7da0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -126,6 +126,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_XSKMAP,
 	BPF_MAP_TYPE_SOCKHASH,
 	BPF_MAP_TYPE_CGROUP_STORAGE,
+	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 };
 
 enum bpf_prog_type {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e8906cbad81f..0488b8258321 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -23,3 +23,6 @@ ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
 obj-$(CONFIG_CGROUP_BPF) += cgroup.o
+ifeq ($(CONFIG_INET),y)
+obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
+endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2aa55d030c77..f6ca3e712831 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -54,7 +54,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array)
 }
 
 /* Called from syscall */
-static int array_map_alloc_check(union bpf_attr *attr)
+int array_map_alloc_check(union bpf_attr *attr)
 {
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 	int numa_node = bpf_map_attr_numa_node(attr);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
new file mode 100644
index 000000000000..18e225de80ff
--- /dev/null
+++ b/kernel/bpf/reuseport_array.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Facebook
+ */
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/sock_diag.h>
+#include <net/sock_reuseport.h>
+
+struct reuseport_array {
+	struct bpf_map map;
+	struct sock __rcu *ptrs[];
+};
+
+static struct reuseport_array *reuseport_array(struct bpf_map *map)
+{
+	return (struct reuseport_array *)map;
+}
+
+/* The caller must hold the reuseport_lock */
+void bpf_sk_reuseport_detach(struct sock *sk)
+{
+	struct sock __rcu **socks;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	socks = sk->sk_user_data;
+	if (socks) {
+		WRITE_ONCE(sk->sk_user_data, NULL);
+		/*
+		 * Do not move this NULL assignment outside of
+		 * sk->sk_callback_lock because there is
+		 * a race with reuseport_array_free()
+		 * which does not hold the reuseport_lock.
+		 */
+		RCU_INIT_POINTER(*socks, NULL);
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static int reuseport_array_alloc_check(union bpf_attr *attr)
+{
+	if (attr->value_size != sizeof(u32) &&
+	    attr->value_size != sizeof(u64))
+		return -EINVAL;
+
+	return array_map_alloc_check(attr);
+}
+
+static void *reuseport_array_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	u32 index = *(u32 *)key;
+
+	if (unlikely(index >= array->map.max_entries))
+		return NULL;
+
+	return rcu_dereference(array->ptrs[index]);
+}
+
+/* Called from syscall only */
+static int reuseport_array_delete_elem(struct bpf_map *map, void *key)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	u32 index = *(u32 *)key;
+	struct sock *sk;
+	int err;
+
+	if (index >= map->max_entries)
+		return -E2BIG;
+
+	if (!rcu_access_pointer(array->ptrs[index]))
+		return -ENOENT;
+
+	spin_lock_bh(&reuseport_lock);
+
+	sk = rcu_dereference_protected(array->ptrs[index],
+				       lockdep_is_held(&reuseport_lock));
+	if (sk) {
+		write_lock_bh(&sk->sk_callback_lock);
+		WRITE_ONCE(sk->sk_user_data, NULL);
+		RCU_INIT_POINTER(array->ptrs[index], NULL);
+		write_unlock_bh(&sk->sk_callback_lock);
+		err = 0;
+	} else {
+		err = -ENOENT;
+	}
+
+	spin_unlock_bh(&reuseport_lock);
+
+	return err;
+}
+
+static void reuseport_array_free(struct bpf_map *map)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	struct sock *sk;
+	u32 i;
+
+	synchronize_rcu();
+
+	/*
+	 * ops->map_*_elem() will not be able to access this
+	 * array now. Hence, this function only races with
+	 * bpf_sk_reuseport_detach() which was triggerred by
+	 * close() or disconnect().
+	 *
+	 * This function and bpf_sk_reuseport_detach() are
+	 * both removing sk from "array".  Who removes it
+	 * first does not matter.
+	 *
+	 * The only concern here is bpf_sk_reuseport_detach()
+	 * may access "array" which is being freed here.
+	 * bpf_sk_reuseport_detach() access this "array"
+	 * through sk->sk_user_data _and_ with sk->sk_callback_lock
+	 * held which is enough because this "array" is not freed
+	 * until all sk->sk_user_data has stopped referencing this "array".
+	 *
+	 * Hence, due to the above, taking "reuseport_lock" is not
+	 * needed here.
+	 */
+
+	/*
+	 * Since reuseport_lock is not taken, sk is accessed under
+	 * rcu_read_lock()
+	 */
+	rcu_read_lock();
+	for (i = 0; i < map->max_entries; i++) {
+		sk = rcu_dereference(array->ptrs[i]);
+		if (sk) {
+			write_lock_bh(&sk->sk_callback_lock);
+			/*
+			 * No need for WRITE_ONCE(). At this point,
+			 * no one is reading it without taking the
+			 * sk->sk_callback_lock.
+			 */
+			sk->sk_user_data = NULL;
+			write_unlock_bh(&sk->sk_callback_lock);
+			RCU_INIT_POINTER(array->ptrs[i], NULL);
+		}
+	}
+	rcu_read_unlock();
+
+	/*
+	 * Once reaching here, all sk->sk_user_data is not
+	 * referenceing this "array".  "array" can be freed now.
+	 */
+	bpf_map_area_free(array);
+}
+
+static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
+{
+	int err, numa_node = bpf_map_attr_numa_node(attr);
+	struct reuseport_array *array;
+	u64 cost, array_size;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	array_size = sizeof(*array);
+	array_size += (u64)attr->max_entries * sizeof(struct sock *);
+
+	/* make sure there is no u32 overflow later in round_up() */
+	cost = array_size;
+	if (cost >= U32_MAX - PAGE_SIZE)
+		return ERR_PTR(-ENOMEM);
+	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	err = bpf_map_precharge_memlock(cost);
+	if (err)
+		return ERR_PTR(err);
+
+	/* allocate all map elements and zero-initialize them */
+	array = bpf_map_area_alloc(array_size, numa_node);
+	if (!array)
+		return ERR_PTR(-ENOMEM);
+
+	/* copy mandatory map attributes */
+	bpf_map_init_from_attr(&array->map, attr);
+	array->map.pages = cost;
+
+	return &array->map;
+}
+
+int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+				       void *value)
+{
+	struct sock *sk;
+	int err;
+
+	if (map->value_size != sizeof(u64))
+		return -ENOSPC;
+
+	rcu_read_lock();
+	sk = reuseport_array_lookup_elem(map, key);
+	if (sk) {
+		*(u64 *)value = sock_gen_cookie(sk);
+		err = 0;
+	} else {
+		err = -ENOENT;
+	}
+	rcu_read_unlock();
+
+	return err;
+}
+
+static int
+reuseport_array_update_check(const struct reuseport_array *array,
+			     const struct sock *nsk,
+			     const struct sock *osk,
+			     const struct sock_reuseport *nsk_reuse,
+			     u32 map_flags)
+{
+	if (osk && map_flags == BPF_NOEXIST)
+		return -EEXIST;
+
+	if (!osk && map_flags == BPF_EXIST)
+		return -ENOENT;
+
+	if (nsk->sk_protocol != IPPROTO_UDP && nsk->sk_protocol != IPPROTO_TCP)
+		return -ENOTSUPP;
+
+	if (nsk->sk_family != AF_INET && nsk->sk_family != AF_INET6)
+		return -ENOTSUPP;
+
+	if (nsk->sk_type != SOCK_STREAM && nsk->sk_type != SOCK_DGRAM)
+		return -ENOTSUPP;
+
+	/*
+	 * sk must be hashed (i.e. listening in the TCP case or binded
+	 * in the UDP case) and
+	 * it must also be a SO_REUSEPORT sk (i.e. reuse cannot be NULL).
+	 *
+	 * Also, sk will be used in bpf helper that is protected by
+	 * rcu_read_lock().
+	 */
+	if (!sock_flag(nsk, SOCK_RCU_FREE) || !sk_hashed(nsk) || !nsk_reuse)
+		return -EINVAL;
+
+	/* READ_ONCE because the sk->sk_callback_lock may not be held here */
+	if (READ_ONCE(nsk->sk_user_data))
+		return -EBUSY;
+
+	return 0;
+}
+
+/*
+ * Called from syscall only.
+ * The "nsk" in the fd refcnt.
+ * The "osk" and "reuse" are protected by reuseport_lock.
+ */
+int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 map_flags)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	struct sock *free_osk = NULL, *osk, *nsk;
+	struct sock_reuseport *reuse;
+	u32 index = *(u32 *)key;
+	struct socket *socket;
+	int err, fd;
+
+	if (map_flags > BPF_EXIST)
+		return -EINVAL;
+
+	if (index >= map->max_entries)
+		return -E2BIG;
+
+	if (map->value_size == sizeof(u64)) {
+		u64 fd64 = *(u64 *)value;
+
+		if (fd64 > S32_MAX)
+			return -EINVAL;
+		fd = fd64;
+	} else {
+		fd = *(int *)value;
+	}
+
+	socket = sockfd_lookup(fd, &err);
+	if (!socket)
+		return err;
+
+	nsk = socket->sk;
+	if (!nsk) {
+		err = -EINVAL;
+		goto put_file;
+	}
+
+	/* Quick checks before taking reuseport_lock */
+	err = reuseport_array_update_check(array, nsk,
+					   rcu_access_pointer(array->ptrs[index]),
+					   rcu_access_pointer(nsk->sk_reuseport_cb),
+					   map_flags);
+	if (err)
+		goto put_file;
+
+	spin_lock_bh(&reuseport_lock);
+	/*
+	 * Some of the checks only need reuseport_lock
+	 * but it is done under sk_callback_lock also
+	 * for simplicity reason.
+	 */
+	write_lock_bh(&nsk->sk_callback_lock);
+
+	osk = rcu_dereference_protected(array->ptrs[index],
+					lockdep_is_held(&reuseport_lock));
+	reuse = rcu_dereference_protected(nsk->sk_reuseport_cb,
+					  lockdep_is_held(&reuseport_lock));
+	err = reuseport_array_update_check(array, nsk, osk, reuse, map_flags);
+	if (err)
+		goto put_file_unlock;
+
+	/* Ensure reuse->reuseport_id is set */
+	err = reuseport_get_id(reuse);
+	if (err < 0)
+		goto put_file_unlock;
+
+	WRITE_ONCE(nsk->sk_user_data, &array->ptrs[index]);
+	rcu_assign_pointer(array->ptrs[index], nsk);
+	free_osk = osk;
+	err = 0;
+
+put_file_unlock:
+	write_unlock_bh(&nsk->sk_callback_lock);
+
+	if (free_osk) {
+		write_lock_bh(&free_osk->sk_callback_lock);
+		WRITE_ONCE(free_osk->sk_user_data, NULL);
+		write_unlock_bh(&free_osk->sk_callback_lock);
+	}
+
+	spin_unlock_bh(&reuseport_lock);
+put_file:
+	fput(socket->file);
+	return err;
+}
+
+/* Called from syscall */
+static int reuseport_array_get_next_key(struct bpf_map *map, void *key,
+					void *next_key)
+{
+	struct reuseport_array *array = reuseport_array(map);
+	u32 index = key ? *(u32 *)key : U32_MAX;
+	u32 *next = (u32 *)next_key;
+
+	if (index >= array->map.max_entries) {
+		*next = 0;
+		return 0;
+	}
+
+	if (index == array->map.max_entries - 1)
+		return -ENOENT;
+
+	*next = index + 1;
+	return 0;
+}
+
+const struct bpf_map_ops reuseport_array_ops = {
+	.map_alloc_check = reuseport_array_alloc_check,
+	.map_alloc = reuseport_array_alloc,
+	.map_free = reuseport_array_free,
+	.map_lookup_elem = reuseport_array_lookup_elem,
+	.map_get_next_key = reuseport_array_get_next_key,
+	.map_delete_elem = reuseport_array_delete_elem,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5af4e9e2722d..57f4d076141b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -684,6 +684,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 		err = bpf_fd_array_map_lookup_elem(map, key, value);
 	} else if (IS_FD_HASH(map)) {
 		err = bpf_fd_htab_map_lookup_elem(map, key, value);
+	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+		err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
 	} else {
 		rcu_read_lock();
 		ptr = map->ops->map_lookup_elem(map, key);
@@ -790,6 +792,10 @@ static int map_update_elem(union bpf_attr *attr)
 		err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
 						  attr->flags);
 		rcu_read_unlock();
+	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
+		/* rcu_read_lock() is not needed */
+		err = bpf_fd_reuseport_array_update_elem(map, key, value,
+							 attr->flags);
 	} else {
 		rcu_read_lock();
 		err = map->ops->map_update_elem(map, key, value, attr->flags);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index cf2e4d305af9..8235f2439816 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -186,6 +186,14 @@ void reuseport_detach_sock(struct sock *sk)
 	spin_lock_bh(&reuseport_lock);
 	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock));
+
+	/* At least one of the sk in this reuseport group is added to
+	 * a bpf map.  Notify the bpf side.  The bpf map logic will
+	 * remove the sk if it is indeed added to a bpf map.
+	 */
+	if (reuse->reuseport_id)
+		bpf_sk_reuseport_detach(sk);
+
 	rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
 
 	for (i = 0; i < reuse->num_socks; i++) {
-- 
cgit v1.2.3


From 2dbb9b9e6df67d444fbe425c7f6014858d337adf Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:25 -0700
Subject: bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT

This patch adds a BPF_PROG_TYPE_SK_REUSEPORT which can select
a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY.  Like other
non SK_FILTER/CGROUP_SKB program, it requires CAP_SYS_ADMIN.

BPF_PROG_TYPE_SK_REUSEPORT introduces "struct sk_reuseport_kern"
to store the bpf context instead of using the skb->cb[48].

At the SO_REUSEPORT sk lookup time, it is in the middle of transiting
from a lower layer (ipv4/ipv6) to a upper layer (udp/tcp).  At this
point,  it is not always clear where the bpf context can be appended
in the skb->cb[48] to avoid saving-and-restoring cb[].  Even putting
aside the difference between ipv4-vs-ipv6 and udp-vs-tcp.  It is not
clear if the lower layer is only ipv4 and ipv6 in the future and
will it not touch the cb[] again before transiting to the upper
layer.

For example, in udp_gro_receive(), it uses the 48 byte NAPI_GRO_CB
instead of IP[6]CB and it may still modify the cb[] after calling
the udp[46]_lib_lookup_skb().  Because of the above reason, if
sk->cb is used for the bpf ctx, saving-and-restoring is needed
and likely the whole 48 bytes cb[] has to be saved and restored.

Instead of saving, setting and restoring the cb[], this patch opts
to create a new "struct sk_reuseport_kern" and setting the needed
values in there.

The new BPF_PROG_TYPE_SK_REUSEPORT and "struct sk_reuseport_(kern|md)"
will serve all ipv4/ipv6 + udp/tcp combinations.  There is no protocol
specific usage at this point and it is also inline with the current
sock_reuseport.c implementation (i.e. no protocol specific requirement).

In "struct sk_reuseport_md", this patch exposes data/data_end/len
with semantic similar to other existing usages.  Together
with "bpf_skb_load_bytes()" and "bpf_skb_load_bytes_relative()",
the bpf prog can peek anywhere in the skb.  The "bind_inany" tells
the bpf prog that the reuseport group is bind-ed to a local
INANY address which cannot be learned from skb.

The new "bind_inany" is added to "struct sock_reuseport" which will be
used when running the new "BPF_PROG_TYPE_SK_REUSEPORT" bpf prog in order
to avoid repeating the "bind INANY" test on
"sk_v6_rcv_saddr/sk->sk_rcv_saddr" every time a bpf prog is run.  It can
only be properly initialized when a "sk->sk_reuseport" enabled sk is
adding to a hashtable (i.e. during "reuseport_alloc()" and
"reuseport_add_sock()").

The new "sk_select_reuseport()" is the main helper that the
bpf prog will use to select a SO_REUSEPORT sk.  It is the only function
that can use the new BPF_MAP_TYPE_REUSEPORT_ARRAY.  As mentioned in
the earlier patch, the validity of a selected sk is checked in
run time in "sk_select_reuseport()".  Doing the check in
verification time is difficult and inflexible (consider the map-in-map
use case).  The runtime check is to compare the selected sk's reuseport_id
with the reuseport_id that we want.  This helper will return -EXXX if the
selected sk cannot serve the incoming request (e.g. reuseport_id
not match).  The bpf prog can decide if it wants to do SK_DROP as its
discretion.

When the bpf prog returns SK_PASS, the kernel will check if a
valid sk has been selected (i.e. "reuse_kern->selected_sk != NULL").
If it does , it will use the selected sk.  If not, the kernel
will select one from "reuse->socks[]" (as before this patch).

The SK_DROP and SK_PASS handling logic will be in the next patch.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_types.h       |   3 +
 include/linux/filter.h          |  15 +++
 include/net/addrconf.h          |   1 +
 include/net/sock_reuseport.h    |   6 +-
 include/uapi/linux/bpf.h        |  36 +++++-
 kernel/bpf/verifier.c           |   9 ++
 net/core/filter.c               | 269 +++++++++++++++++++++++++++++++++++++++-
 net/core/sock_reuseport.c       |  20 ++-
 net/ipv4/inet_connection_sock.c |   9 ++
 net/ipv4/inet_hashtables.c      |   5 +-
 net/ipv4/udp.c                  |   5 +-
 11 files changed, 365 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 14fd6c02d258..cd26c090e7c0 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -29,6 +29,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
 #ifdef CONFIG_BPF_LIRC_MODE2
 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #endif
+#ifdef CONFIG_INET
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
+#endif
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2b072dab32c0..70e9d57677fe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -32,6 +32,7 @@ struct seccomp_data;
 struct bpf_prog_aux;
 struct xdp_rxq_info;
 struct xdp_buff;
+struct sock_reuseport;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -833,6 +834,20 @@ void bpf_warn_invalid_xdp_action(u32 act);
 struct sock *do_sk_redirect_map(struct sk_buff *skb);
 struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
 
+#ifdef CONFIG_INET
+struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+				  struct bpf_prog *prog, struct sk_buff *skb,
+				  u32 hash);
+#else
+static inline struct sock *
+bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+		     struct bpf_prog *prog, struct sk_buff *skb,
+		     u32 hash)
+{
+	return NULL;
+}
+#endif
+
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
 extern int bpf_jit_harden;
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 5f43f7a70fe6..6def0351bcc3 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -108,6 +108,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 		    u32 banned_flags);
 bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 			  bool match_wildcard);
+bool inet_rcv_saddr_any(const struct sock *sk);
 void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
 void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
 
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index e1a7681856f7..73b569556be6 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -21,12 +21,14 @@ struct sock_reuseport {
 	unsigned int		synq_overflow_ts;
 	/* ID stays the same even after the size of socks[] grows. */
 	unsigned int		reuseport_id;
+	bool			bind_inany;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
 
-extern int reuseport_alloc(struct sock *sk);
-extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
+extern int reuseport_alloc(struct sock *sk, bool bind_inany);
+extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
+			      bool bind_inany);
 extern void reuseport_detach_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 40f584bc7da0..3102a2a23c31 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -151,6 +151,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 	BPF_PROG_TYPE_LIRC_MODE2,
+	BPF_PROG_TYPE_SK_REUSEPORT,
 };
 
 enum bpf_attach_type {
@@ -2114,6 +2115,14 @@ union bpf_attr {
  *		the shared data.
  *	Return
  *		Pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		Select a SO_REUSEPORT sk from a	BPF_MAP_TYPE_REUSEPORT_ARRAY map
+ *		It checks the selected sk is matching the incoming
+ *		request in the skb.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2197,7 +2206,8 @@ union bpf_attr {
 	FN(rc_keydown),			\
 	FN(skb_cgroup_id),		\
 	FN(get_current_cgroup_id),	\
-	FN(get_local_storage),
+	FN(get_local_storage),		\
+	FN(sk_select_reuseport),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2414,6 +2424,30 @@ struct sk_msg_md {
 	__u32 local_port;	/* stored in host byte order */
 };
 
+struct sk_reuseport_md {
+	/*
+	 * Start of directly accessible data. It begins from
+	 * the tcp/udp header.
+	 */
+	void *data;
+	void *data_end;		/* End of directly accessible data */
+	/*
+	 * Total length of packet (starting from the tcp/udp header).
+	 * Note that the directly accessible bytes (data_end - data)
+	 * could be less than this "len".  Those bytes could be
+	 * indirectly read by a helper "bpf_skb_load_bytes()".
+	 */
+	__u32 len;
+	/*
+	 * Eth protocol in the mac header (network byte order). e.g.
+	 * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+	 */
+	__u32 eth_protocol;
+	__u32 ip_protocol;	/* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+	__u32 bind_inany;	/* Is sock bound to an INANY address? */
+	__u32 hash;		/* A hash of the packet 4 tuples */
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 587468a9c37d..ca90679a7fe5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1310,6 +1310,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	case BPF_PROG_TYPE_LWT_IN:
 	case BPF_PROG_TYPE_LWT_OUT:
 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+	case BPF_PROG_TYPE_SK_REUSEPORT:
 		/* dst_input() and dst_output() can't write for now */
 		if (t == BPF_WRITE)
 			return false;
@@ -2166,6 +2167,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_msg_redirect_hash)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+		if (func_id != BPF_FUNC_sk_select_reuseport)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -2217,6 +2222,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
 			goto error;
 		break;
+	case BPF_FUNC_sk_select_reuseport:
+		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+			goto error;
+		break;
 	default:
 		break;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index 2de7dd9f2a57..142595b4e0d1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1462,7 +1462,7 @@ static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
 		return -ENOMEM;
 
 	if (sk_unhashed(sk) && sk->sk_reuseport) {
-		err = reuseport_alloc(sk);
+		err = reuseport_alloc(sk, false);
 		if (err)
 			return err;
 	} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
@@ -7013,3 +7013,270 @@ out:
 	release_sock(sk);
 	return ret;
 }
+
+#ifdef CONFIG_INET
+struct sk_reuseport_kern {
+	struct sk_buff *skb;
+	struct sock *sk;
+	struct sock *selected_sk;
+	void *data_end;
+	u32 hash;
+	u32 reuseport_id;
+	bool bind_inany;
+};
+
+static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
+				    struct sock_reuseport *reuse,
+				    struct sock *sk, struct sk_buff *skb,
+				    u32 hash)
+{
+	reuse_kern->skb = skb;
+	reuse_kern->sk = sk;
+	reuse_kern->selected_sk = NULL;
+	reuse_kern->data_end = skb->data + skb_headlen(skb);
+	reuse_kern->hash = hash;
+	reuse_kern->reuseport_id = reuse->reuseport_id;
+	reuse_kern->bind_inany = reuse->bind_inany;
+}
+
+struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
+				  struct bpf_prog *prog, struct sk_buff *skb,
+				  u32 hash)
+{
+	struct sk_reuseport_kern reuse_kern;
+	enum sk_action action;
+
+	bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
+	action = BPF_PROG_RUN(prog, &reuse_kern);
+
+	if (action == SK_PASS)
+		return reuse_kern.selected_sk;
+	else
+		return ERR_PTR(-ECONNREFUSED);
+}
+
+BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
+	   struct bpf_map *, map, void *, key, u32, flags)
+{
+	struct sock_reuseport *reuse;
+	struct sock *selected_sk;
+
+	selected_sk = map->ops->map_lookup_elem(map, key);
+	if (!selected_sk)
+		return -ENOENT;
+
+	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
+	if (!reuse)
+		/* selected_sk is unhashed (e.g. by close()) after the
+		 * above map_lookup_elem().  Treat selected_sk has already
+		 * been removed from the map.
+		 */
+		return -ENOENT;
+
+	if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
+		struct sock *sk;
+
+		if (unlikely(!reuse_kern->reuseport_id))
+			/* There is a small race between adding the
+			 * sk to the map and setting the
+			 * reuse_kern->reuseport_id.
+			 * Treat it as the sk has not been added to
+			 * the bpf map yet.
+			 */
+			return -ENOENT;
+
+		sk = reuse_kern->sk;
+		if (sk->sk_protocol != selected_sk->sk_protocol)
+			return -EPROTOTYPE;
+		else if (sk->sk_family != selected_sk->sk_family)
+			return -EAFNOSUPPORT;
+
+		/* Catch all. Likely bound to a different sockaddr. */
+		return -EBADFD;
+	}
+
+	reuse_kern->selected_sk = selected_sk;
+
+	return 0;
+}
+
+static const struct bpf_func_proto sk_select_reuseport_proto = {
+	.func           = sk_select_reuseport,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(sk_reuseport_load_bytes,
+	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
+	   void *, to, u32, len)
+{
+	return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
+}
+
+static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
+	.func		= sk_reuseport_load_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(sk_reuseport_load_bytes_relative,
+	   const struct sk_reuseport_kern *, reuse_kern, u32, offset,
+	   void *, to, u32, len, u32, start_header)
+{
+	return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
+					       len, start_header);
+}
+
+static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
+	.func		= sk_reuseport_load_bytes_relative,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg4_type	= ARG_CONST_SIZE,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *
+sk_reuseport_func_proto(enum bpf_func_id func_id,
+			const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_sk_select_reuseport:
+		return &sk_select_reuseport_proto;
+	case BPF_FUNC_skb_load_bytes:
+		return &sk_reuseport_load_bytes_proto;
+	case BPF_FUNC_skb_load_bytes_relative:
+		return &sk_reuseport_load_bytes_relative_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+static bool
+sk_reuseport_is_valid_access(int off, int size,
+			     enum bpf_access_type type,
+			     const struct bpf_prog *prog,
+			     struct bpf_insn_access_aux *info)
+{
+	const u32 size_default = sizeof(__u32);
+
+	if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
+	    off % size || type != BPF_READ)
+		return false;
+
+	switch (off) {
+	case offsetof(struct sk_reuseport_md, data):
+		info->reg_type = PTR_TO_PACKET;
+		return size == sizeof(__u64);
+
+	case offsetof(struct sk_reuseport_md, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		return size == sizeof(__u64);
+
+	case offsetof(struct sk_reuseport_md, hash):
+		return size == size_default;
+
+	/* Fields that allow narrowing */
+	case offsetof(struct sk_reuseport_md, eth_protocol):
+		if (size < FIELD_SIZEOF(struct sk_buff, protocol))
+			return false;
+	case offsetof(struct sk_reuseport_md, ip_protocol):
+	case offsetof(struct sk_reuseport_md, bind_inany):
+	case offsetof(struct sk_reuseport_md, len):
+		bpf_ctx_record_field_size(info, size_default);
+		return bpf_ctx_narrow_access_ok(off, size, size_default);
+
+	default:
+		return false;
+	}
+}
+
+#define SK_REUSEPORT_LOAD_FIELD(F) ({					\
+	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+			      si->dst_reg, si->src_reg,			\
+			      bpf_target_off(struct sk_reuseport_kern, F, \
+					     FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+					     target_size));		\
+	})
+
+#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD)				\
+	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,		\
+				    struct sk_buff,			\
+				    skb,				\
+				    SKB_FIELD)
+
+#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
+	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern,	\
+					     struct sock,		\
+					     sk,			\
+					     SK_FIELD, BPF_SIZE, EXTRA_OFF)
+
+static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
+					   const struct bpf_insn *si,
+					   struct bpf_insn *insn_buf,
+					   struct bpf_prog *prog,
+					   u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct sk_reuseport_md, data):
+		SK_REUSEPORT_LOAD_SKB_FIELD(data);
+		break;
+
+	case offsetof(struct sk_reuseport_md, len):
+		SK_REUSEPORT_LOAD_SKB_FIELD(len);
+		break;
+
+	case offsetof(struct sk_reuseport_md, eth_protocol):
+		SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
+		break;
+
+	case offsetof(struct sk_reuseport_md, ip_protocol):
+		BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
+		SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
+						    BPF_W, 0);
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
+					SK_FL_PROTO_SHIFT);
+		/* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
+		 * aware.  No further narrowing or masking is needed.
+		 */
+		*target_size = 1;
+		break;
+
+	case offsetof(struct sk_reuseport_md, data_end):
+		SK_REUSEPORT_LOAD_FIELD(data_end);
+		break;
+
+	case offsetof(struct sk_reuseport_md, hash):
+		SK_REUSEPORT_LOAD_FIELD(hash);
+		break;
+
+	case offsetof(struct sk_reuseport_md, bind_inany):
+		SK_REUSEPORT_LOAD_FIELD(bind_inany);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
+	.get_func_proto		= sk_reuseport_func_proto,
+	.is_valid_access	= sk_reuseport_is_valid_access,
+	.convert_ctx_access	= sk_reuseport_convert_ctx_access,
+};
+
+const struct bpf_prog_ops sk_reuseport_prog_ops = {
+};
+#endif /* CONFIG_INET */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 8235f2439816..d260167f5f77 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -51,7 +51,7 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 	return reuse;
 }
 
-int reuseport_alloc(struct sock *sk)
+int reuseport_alloc(struct sock *sk, bool bind_inany)
 {
 	struct sock_reuseport *reuse;
 
@@ -63,9 +63,17 @@ int reuseport_alloc(struct sock *sk)
 	/* Allocation attempts can occur concurrently via the setsockopt path
 	 * and the bind/hash path.  Nothing to do when we lose the race.
 	 */
-	if (rcu_dereference_protected(sk->sk_reuseport_cb,
-				      lockdep_is_held(&reuseport_lock)))
+	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+					  lockdep_is_held(&reuseport_lock));
+	if (reuse) {
+		/* Only set reuse->bind_inany if the bind_inany is true.
+		 * Otherwise, it will overwrite the reuse->bind_inany
+		 * which was set by the bind/hash path.
+		 */
+		if (bind_inany)
+			reuse->bind_inany = bind_inany;
 		goto out;
+	}
 
 	reuse = __reuseport_alloc(INIT_SOCKS);
 	if (!reuse) {
@@ -75,6 +83,7 @@ int reuseport_alloc(struct sock *sk)
 
 	reuse->socks[0] = sk;
 	reuse->num_socks = 1;
+	reuse->bind_inany = bind_inany;
 	rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
 
 out:
@@ -101,6 +110,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 	more_reuse->num_socks = reuse->num_socks;
 	more_reuse->prog = reuse->prog;
 	more_reuse->reuseport_id = reuse->reuseport_id;
+	more_reuse->bind_inany = reuse->bind_inany;
 
 	memcpy(more_reuse->socks, reuse->socks,
 	       reuse->num_socks * sizeof(struct sock *));
@@ -136,12 +146,12 @@ static void reuseport_free_rcu(struct rcu_head *head)
  *  @sk2: Socket belonging to the existing reuseport group.
  *  May return ENOMEM and not add socket to group under memory pressure.
  */
-int reuseport_add_sock(struct sock *sk, struct sock *sk2)
+int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
 {
 	struct sock_reuseport *old_reuse, *reuse;
 
 	if (!rcu_access_pointer(sk2->sk_reuseport_cb)) {
-		int err = reuseport_alloc(sk2);
+		int err = reuseport_alloc(sk2, bind_inany);
 
 		if (err)
 			return err;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 33a88e045efd..dfd5009f96ef 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -107,6 +107,15 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
 }
 EXPORT_SYMBOL(inet_rcv_saddr_equal);
 
+bool inet_rcv_saddr_any(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6)
+		return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
+#endif
+	return !sk->sk_rcv_saddr;
+}
+
 void inet_get_local_port_range(struct net *net, int *low, int *high)
 {
 	unsigned int seq;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3647167c8fa3..370e24463fb7 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -567,10 +567,11 @@ static int inet_reuseport_add_sock(struct sock *sk,
 		    inet_csk(sk2)->icsk_bind_hash == tb &&
 		    sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
 		    inet_rcv_saddr_equal(sk, sk2, false))
-			return reuseport_add_sock(sk, sk2);
+			return reuseport_add_sock(sk, sk2,
+						  inet_rcv_saddr_any(sk));
 	}
 
-	return reuseport_alloc(sk);
+	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
 }
 
 int __inet_hash(struct sock *sk, struct sock *osk)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 060e841dde40..038dd7909051 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -221,11 +221,12 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
 		    (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
 		    sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
 		    inet_rcv_saddr_equal(sk, sk2, false)) {
-			return reuseport_add_sock(sk, sk2);
+			return reuseport_add_sock(sk, sk2,
+						  inet_rcv_saddr_any(sk));
 		}
 	}
 
-	return reuseport_alloc(sk);
+	return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
 }
 
 /**
-- 
cgit v1.2.3


From 8217ca653ec601246832d562207bc24bdf652d2f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 8 Aug 2018 01:01:26 -0700
Subject: bpf: Enable BPF_PROG_TYPE_SK_REUSEPORT bpf prog in reuseport
 selection

This patch allows a BPF_PROG_TYPE_SK_REUSEPORT bpf prog to select a
SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY introduced in
the earlier patch.  "bpf_run_sk_reuseport()" will return -ECONNREFUSED
when the BPF_PROG_TYPE_SK_REUSEPORT prog returns SK_DROP.
The callers, in inet[6]_hashtable.c and ipv[46]/udp.c, are modified to
handle this case and return NULL immediately instead of continuing the
sk search from its hashtable.

It re-uses the existing SO_ATTACH_REUSEPORT_EBPF setsockopt to attach
BPF_PROG_TYPE_SK_REUSEPORT.  The "sk_reuseport_attach_bpf()" will check
if the attaching bpf prog is in the new SK_REUSEPORT or the existing
SOCKET_FILTER type and then check different things accordingly.

One level of "__reuseport_attach_prog()" call is removed.  The
"sk_unhashed() && ..." and "sk->sk_reuseport_cb" tests are pushed
back to "reuseport_attach_prog()" in sock_reuseport.c.  sock_reuseport.c
seems to have more knowledge on those test requirements than filter.c.
In "reuseport_attach_prog()", after new_prog is attached to reuse->prog,
the old_prog (if any) is also directly freed instead of returning the
old_prog to the caller and asking the caller to free.

The sysctl_optmem_max check is moved back to the
"sk_reuseport_attach_filter()" and "sk_reuseport_attach_bpf()".
As of other bpf prog types, the new BPF_PROG_TYPE_SK_REUSEPORT is only
bounded by the usual "bpf_prog_charge_memlock()" during load time
instead of bounded by both bpf_prog_charge_memlock and sysctl_optmem_max.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h       |  1 +
 include/net/sock_reuseport.h |  3 +-
 net/core/filter.c            | 87 ++++++++++++++++++++++++++------------------
 net/core/sock_reuseport.c    | 36 +++++++++++++-----
 net/ipv4/inet_hashtables.c   | 14 ++++---
 net/ipv4/udp.c               |  4 ++
 net/ipv6/inet6_hashtables.c  | 14 ++++---
 net/ipv6/udp.c               |  4 ++
 8 files changed, 106 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 70e9d57677fe..5d565c50bcb2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -753,6 +753,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
+void sk_reuseport_prog_free(struct bpf_prog *prog);
 int sk_detach_filter(struct sock *sk);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 73b569556be6..8a5f70c7cdf2 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -34,8 +34,7 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 					  u32 hash,
 					  struct sk_buff *skb,
 					  int hdr_len);
-extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
-					      struct bpf_prog *prog);
+extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index 142595b4e0d1..22906b31d43f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1453,30 +1453,6 @@ static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
 	return 0;
 }
 
-static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
-{
-	struct bpf_prog *old_prog;
-	int err;
-
-	if (bpf_prog_size(prog->len) > sysctl_optmem_max)
-		return -ENOMEM;
-
-	if (sk_unhashed(sk) && sk->sk_reuseport) {
-		err = reuseport_alloc(sk, false);
-		if (err)
-			return err;
-	} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
-		/* The socket wasn't bound with SO_REUSEPORT */
-		return -EINVAL;
-	}
-
-	old_prog = reuseport_attach_prog(sk, prog);
-	if (old_prog)
-		bpf_prog_destroy(old_prog);
-
-	return 0;
-}
-
 static
 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
 {
@@ -1550,13 +1526,15 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __reuseport_attach_prog(prog, sk);
-	if (err < 0) {
+	if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+		err = -ENOMEM;
+	else
+		err = reuseport_attach_prog(sk, prog);
+
+	if (err)
 		__bpf_prog_release(prog);
-		return err;
-	}
 
-	return 0;
+	return err;
 }
 
 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
@@ -1586,19 +1564,58 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 
 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
 {
-	struct bpf_prog *prog = __get_bpf(ufd, sk);
+	struct bpf_prog *prog;
 	int err;
 
+	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+
+	prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
+	if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
+		prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
 	if (IS_ERR(prog))
 		return PTR_ERR(prog);
 
-	err = __reuseport_attach_prog(prog, sk);
-	if (err < 0) {
-		bpf_prog_put(prog);
-		return err;
+	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
+		/* Like other non BPF_PROG_TYPE_SOCKET_FILTER
+		 * bpf prog (e.g. sockmap).  It depends on the
+		 * limitation imposed by bpf_prog_load().
+		 * Hence, sysctl_optmem_max is not checked.
+		 */
+		if ((sk->sk_type != SOCK_STREAM &&
+		     sk->sk_type != SOCK_DGRAM) ||
+		    (sk->sk_protocol != IPPROTO_UDP &&
+		     sk->sk_protocol != IPPROTO_TCP) ||
+		    (sk->sk_family != AF_INET &&
+		     sk->sk_family != AF_INET6)) {
+			err = -ENOTSUPP;
+			goto err_prog_put;
+		}
+	} else {
+		/* BPF_PROG_TYPE_SOCKET_FILTER */
+		if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+			err = -ENOMEM;
+			goto err_prog_put;
+		}
 	}
 
-	return 0;
+	err = reuseport_attach_prog(sk, prog);
+err_prog_put:
+	if (err)
+		bpf_prog_put(prog);
+
+	return err;
+}
+
+void sk_reuseport_prog_free(struct bpf_prog *prog)
+{
+	if (!prog)
+		return;
+
+	if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
+		bpf_prog_put(prog);
+	else
+		bpf_prog_destroy(prog);
 }
 
 struct bpf_scratchpad {
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index d260167f5f77..ba5cba56f574 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -9,6 +9,7 @@
 #include <net/sock_reuseport.h>
 #include <linux/bpf.h>
 #include <linux/idr.h>
+#include <linux/filter.h>
 #include <linux/rcupdate.h>
 
 #define INIT_SOCKS 128
@@ -133,8 +134,7 @@ static void reuseport_free_rcu(struct rcu_head *head)
 	struct sock_reuseport *reuse;
 
 	reuse = container_of(head, struct sock_reuseport, rcu);
-	if (reuse->prog)
-		bpf_prog_destroy(reuse->prog);
+	sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1));
 	if (reuse->reuseport_id)
 		ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
 	kfree(reuse);
@@ -219,9 +219,9 @@ void reuseport_detach_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(reuseport_detach_sock);
 
-static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
-			    struct bpf_prog *prog, struct sk_buff *skb,
-			    int hdr_len)
+static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
+				   struct bpf_prog *prog, struct sk_buff *skb,
+				   int hdr_len)
 {
 	struct sk_buff *nskb = NULL;
 	u32 index;
@@ -282,9 +282,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
 		/* paired with smp_wmb() in reuseport_add_sock() */
 		smp_rmb();
 
-		if (prog && skb)
-			sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
+		if (!prog || !skb)
+			goto select_by_hash;
+
+		if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
+			sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
+		else
+			sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
 
+select_by_hash:
 		/* no bpf or invalid bpf result: fall back to hash usage */
 		if (!sk2)
 			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
@@ -296,12 +302,21 @@ out:
 }
 EXPORT_SYMBOL(reuseport_select_sock);
 
-struct bpf_prog *
-reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
+int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
 {
 	struct sock_reuseport *reuse;
 	struct bpf_prog *old_prog;
 
+	if (sk_unhashed(sk) && sk->sk_reuseport) {
+		int err = reuseport_alloc(sk, false);
+
+		if (err)
+			return err;
+	} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
+		/* The socket wasn't bound with SO_REUSEPORT */
+		return -EINVAL;
+	}
+
 	spin_lock_bh(&reuseport_lock);
 	reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
 					  lockdep_is_held(&reuseport_lock));
@@ -310,6 +325,7 @@ reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
 	rcu_assign_pointer(reuse->prog, prog);
 	spin_unlock_bh(&reuseport_lock);
 
-	return old_prog;
+	sk_reuseport_prog_free(old_prog);
+	return 0;
 }
 EXPORT_SYMBOL(reuseport_attach_prog);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 370e24463fb7..f5c9ef2586de 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -328,7 +328,7 @@ struct sock *__inet_lookup_listener(struct net *net,
 				    saddr, sport, daddr, hnum,
 				    dif, sdif);
 	if (result)
-		return result;
+		goto done;
 
 	/* Lookup lhash2 with INADDR_ANY */
 
@@ -337,9 +337,10 @@ struct sock *__inet_lookup_listener(struct net *net,
 	if (ilb2->count > ilb->count)
 		goto port_lookup;
 
-	return inet_lhash2_lookup(net, ilb2, skb, doff,
-				  saddr, sport, daddr, hnum,
-				  dif, sdif);
+	result = inet_lhash2_lookup(net, ilb2, skb, doff,
+				    saddr, sport, daddr, hnum,
+				    dif, sdif);
+	goto done;
 
 port_lookup:
 	sk_for_each_rcu(sk, &ilb->head) {
@@ -352,12 +353,15 @@ port_lookup:
 				result = reuseport_select_sock(sk, phash,
 							       skb, doff);
 				if (result)
-					return result;
+					goto done;
 			}
 			result = sk;
 			hiscore = score;
 		}
 	}
+done:
+	if (unlikely(IS_ERR(result)))
+		return NULL;
 	return result;
 }
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 038dd7909051..f4e35b2ff8b8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -499,6 +499,8 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 						  daddr, hnum, dif, sdif,
 						  exact_dif, hslot2, skb);
 		}
+		if (unlikely(IS_ERR(result)))
+			return NULL;
 		return result;
 	}
 begin:
@@ -513,6 +515,8 @@ begin:
 						   saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
+				if (unlikely(IS_ERR(result)))
+					return NULL;
 				if (result)
 					return result;
 			}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 595ad408dba0..3d7c7460a0c5 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -191,7 +191,7 @@ struct sock *inet6_lookup_listener(struct net *net,
 				     saddr, sport, daddr, hnum,
 				     dif, sdif);
 	if (result)
-		return result;
+		goto done;
 
 	/* Lookup lhash2 with in6addr_any */
 
@@ -200,9 +200,10 @@ struct sock *inet6_lookup_listener(struct net *net,
 	if (ilb2->count > ilb->count)
 		goto port_lookup;
 
-	return inet6_lhash2_lookup(net, ilb2, skb, doff,
-				   saddr, sport, daddr, hnum,
-				   dif, sdif);
+	result = inet6_lhash2_lookup(net, ilb2, skb, doff,
+				     saddr, sport, daddr, hnum,
+				     dif, sdif);
+	goto done;
 
 port_lookup:
 	sk_for_each(sk, &ilb->head) {
@@ -214,12 +215,15 @@ port_lookup:
 				result = reuseport_select_sock(sk, phash,
 							       skb, doff);
 				if (result)
-					return result;
+					goto done;
 			}
 			result = sk;
 			hiscore = score;
 		}
 	}
+done:
+	if (unlikely(IS_ERR(result)))
+		return NULL;
 	return result;
 }
 EXPORT_SYMBOL_GPL(inet6_lookup_listener);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f6b96956a8ed..83f4c77c79d8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -235,6 +235,8 @@ struct sock *__udp6_lib_lookup(struct net *net,
 						  exact_dif, hslot2,
 						  skb);
 		}
+		if (unlikely(IS_ERR(result)))
+			return NULL;
 		return result;
 	}
 begin:
@@ -249,6 +251,8 @@ begin:
 						    saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
+				if (unlikely(IS_ERR(result)))
+					return NULL;
 				if (result)
 					return result;
 			}
-- 
cgit v1.2.3


From 466466dc6c28ca9dc401f10e235b9cde9a7c9162 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Thu, 9 Aug 2018 09:38:09 -0700
Subject: tcp: mandate a one-time immediate ACK

Add a new flag to indicate a one-time immediate ACK. This flag is
occasionaly set under specific TCP protocol states in addition to
the more common quickack mechanism for interactive application.

In several cases in the TCP code we want to force an immediate ACK
but do not want to call tcp_enter_quickack_mode() because we do
not want to forget the icsk_ack.pingpong or icsk_ack.ato state.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 3 ++-
 net/ipv4/tcp_input.c               | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 0a6c9e0f2b5a..fa43b82607d9 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -167,7 +167,8 @@ enum inet_csk_ack_state_t {
 	ICSK_ACK_SCHED	= 1,
 	ICSK_ACK_TIMER  = 2,
 	ICSK_ACK_PUSHED = 4,
-	ICSK_ACK_PUSHED2 = 8
+	ICSK_ACK_PUSHED2 = 8,
+	ICSK_ACK_NOW = 16	/* Send the next ACK immediately (once) */
 };
 
 void inet_csk_init_xmit_timers(struct sock *sk,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 715d541b52dd..b8849588c440 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5179,7 +5179,9 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
 	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
 	    /* We ACK each frame or... */
-	    tcp_in_quickack_mode(sk)) {
+	    tcp_in_quickack_mode(sk) ||
+	    /* Protocol state mandates a one-time immediate ACK */
+	    inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) {
 send_now:
 		tcp_send_ack(sk);
 		return;
-- 
cgit v1.2.3


From 19e226e8cc5da02f17ed119f9137036c0f0f5d80 Mon Sep 17 00:00:00 2001
From: Caleb Raitto <caraitto@google.com>
Date: Thu, 9 Aug 2018 18:18:28 -0700
Subject: virtio: Make vp_set_vq_affinity() take a mask.

Make vp_set_vq_affinity() take a cpumask instead of taking a single CPU.

If there are fewer queues than cores, queue affinity should be able to
map to multiple cores.

Link: https://patchwork.ozlabs.org/patch/948149/
Suggested-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Caleb Raitto <caraitto@google.com>
Acked-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/virtio/virtio_crypto_core.c | 4 ++--
 drivers/net/virtio_net.c                   | 8 ++++----
 drivers/virtio/virtio_pci_common.c         | 7 +++----
 drivers/virtio/virtio_pci_common.h         | 2 +-
 include/linux/virtio_config.h              | 7 ++++---
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index 83326986c113..7c7198553699 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -146,7 +146,7 @@ static void virtcrypto_clean_affinity(struct virtio_crypto *vi, long hcpu)
 
 	if (vi->affinity_hint_set) {
 		for (i = 0; i < vi->max_data_queues; i++)
-			virtqueue_set_affinity(vi->data_vq[i].vq, -1);
+			virtqueue_set_affinity(vi->data_vq[i].vq, NULL);
 
 		vi->affinity_hint_set = false;
 	}
@@ -173,7 +173,7 @@ static void virtcrypto_set_affinity(struct virtio_crypto *vcrypto)
 	 *
 	 */
 	for_each_online_cpu(cpu) {
-		virtqueue_set_affinity(vcrypto->data_vq[i].vq, cpu);
+		virtqueue_set_affinity(vcrypto->data_vq[i].vq, cpumask_of(cpu));
 		if (++i >= vcrypto->max_data_queues)
 			break;
 	}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 39a7f4452587..43fabc0eb4d2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1878,8 +1878,8 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
 
 	if (vi->affinity_hint_set) {
 		for (i = 0; i < vi->max_queue_pairs; i++) {
-			virtqueue_set_affinity(vi->rq[i].vq, -1);
-			virtqueue_set_affinity(vi->sq[i].vq, -1);
+			virtqueue_set_affinity(vi->rq[i].vq, NULL);
+			virtqueue_set_affinity(vi->sq[i].vq, NULL);
 		}
 
 		vi->affinity_hint_set = false;
@@ -1905,8 +1905,8 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
 	for_each_online_cpu(cpu) {
 		const unsigned long *mask = cpumask_bits(cpumask_of(cpu));
 
-		virtqueue_set_affinity(vi->rq[i].vq, cpu);
-		virtqueue_set_affinity(vi->sq[i].vq, cpu);
+		virtqueue_set_affinity(vi->rq[i].vq, cpumask_of(cpu));
+		virtqueue_set_affinity(vi->sq[i].vq, cpumask_of(cpu));
 		__netif_set_xps_queue(vi->dev, mask, i, false);
 		i++;
 	}
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 705aebd74e56..465a6f5142cc 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -421,7 +421,7 @@ const char *vp_bus_name(struct virtio_device *vdev)
  * - OR over all affinities for shared MSI
  * - ignore the affinity request if we're using INTX
  */
-int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
+int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
 {
 	struct virtio_device *vdev = vq->vdev;
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
@@ -435,11 +435,10 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	if (vp_dev->msix_enabled) {
 		mask = vp_dev->msix_affinity_masks[info->msix_vector];
 		irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
-		if (cpu == -1)
+		if (!cpu_mask)
 			irq_set_affinity_hint(irq, NULL);
 		else {
-			cpumask_clear(mask);
-			cpumask_set_cpu(cpu, mask);
+			cpumask_copy(mask, cpu_mask);
 			irq_set_affinity_hint(irq, mask);
 		}
 	}
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 135ee3cf7175..02271002c2f3 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -141,7 +141,7 @@ const char *vp_bus_name(struct virtio_device *vdev);
  * - OR over all affinities for shared MSI
  * - ignore the affinity request if we're using INTX
  */
-int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
+int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask);
 
 const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index);
 
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 5559a2d31c46..32baf8e26735 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -79,7 +79,8 @@ struct virtio_config_ops {
 	u64 (*get_features)(struct virtio_device *vdev);
 	int (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
-	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
+	int (*set_vq_affinity)(struct virtqueue *vq,
+			       const struct cpumask *cpu_mask);
 	const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev,
 			int index);
 };
@@ -236,11 +237,11 @@ const char *virtio_bus_name(struct virtio_device *vdev)
  *
  */
 static inline
-int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
+int virtqueue_set_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask)
 {
 	struct virtio_device *vdev = vq->vdev;
 	if (vdev->config->set_vq_affinity)
-		return vdev->config->set_vq_affinity(vq, cpu);
+		return vdev->config->set_vq_affinity(vq, cpu_mask);
 	return 0;
 }
 
-- 
cgit v1.2.3


From b0e29063dcb3bf14f515f95e748b60e4bab45e7c Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 10 Aug 2018 13:22:01 +0200
Subject: l2tp: remove pppol2tp_session_ioctl()

pppol2tp_ioctl() has everything in place for handling PPPIOCGL2TPSTATS
on session sockets. We just need to copy the stats and set ->session_id.

As a side effect of sharing session and tunnel code, ->using_ipsec is
properly set even when the request was made using a session socket.

Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ppp-ioctl.h |  2 +-
 net/l2tp/l2tp_ppp.c            | 50 +++---------------------------------------
 2 files changed, 4 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h
index 784c2e3e572e..88b5f9990320 100644
--- a/include/uapi/linux/ppp-ioctl.h
+++ b/include/uapi/linux/ppp-ioctl.h
@@ -68,7 +68,7 @@ struct ppp_option_data {
 struct pppol2tp_ioc_stats {
 	__u16		tunnel_id;	/* redundant */
 	__u16		session_id;	/* if zero, get tunnel stats */
-	__u32		using_ipsec:1;	/* valid only for session_id == 0 */
+	__u32		using_ipsec:1;
 	__aligned_u64	tx_packets;
 	__aligned_u64	tx_bytes;
 	__aligned_u64	tx_errors;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 2afd3ab8a551..bdfbd3ed7e14 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1068,52 +1068,6 @@ static int pppol2tp_tunnel_copy_stats(struct pppol2tp_ioc_stats *stats,
 	return 0;
 }
 
-/* Session ioctl helper.
- */
-static int pppol2tp_session_ioctl(struct l2tp_session *session,
-				  unsigned int cmd, unsigned long arg)
-{
-	int err = 0;
-	struct sock *sk;
-	struct l2tp_tunnel *tunnel = session->tunnel;
-	struct pppol2tp_ioc_stats stats;
-
-	l2tp_dbg(session, L2TP_MSG_CONTROL,
-		 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
-		 session->name, cmd, arg);
-
-	sk = pppol2tp_session_get_sock(session);
-	if (!sk)
-		return -EBADR;
-
-	switch (cmd) {
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		memset(&stats, 0, sizeof(stats));
-		stats.tunnel_id = tunnel->tunnel_id;
-		stats.session_id = session->session_id;
-		pppol2tp_copy_stats(&stats, &session->stats);
-		if (copy_to_user((void __user *) arg, &stats,
-				 sizeof(stats)))
-			break;
-		l2tp_info(session, L2TP_MSG_CONTROL, "%s: get L2TP stats\n",
-			  session->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
 static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
 			  unsigned long arg)
 {
@@ -1172,7 +1126,9 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
 
 			stats.session_id = session_id;
 		} else {
-			return pppol2tp_session_ioctl(session, cmd, arg);
+			memset(&stats, 0, sizeof(stats));
+			pppol2tp_copy_stats(&stats, &session->stats);
+			stats.session_id = session->session_id;
 		}
 		stats.tunnel_id = session->tunnel->tunnel_id;
 		stats.using_ipsec = l2tp_tunnel_uses_xfrm(session->tunnel);
-- 
cgit v1.2.3


From 05364ca03cfd419caecb292fede20eb39667eaae Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@virtuozzo.com>
Date: Fri, 10 Aug 2018 20:11:42 +0300
Subject: net/sctp: Make wrappers for accessing in/out streams

This patch introduces wrappers for accessing in/out streams indirectly.
This will enable to replace physically contiguous memory arrays
of streams with flexible arrays (or maybe any other appropriate
mechanism) which do memory allocation on a per-page basis.

Signed-off-by: Oleg Babin <obabin@virtuozzo.com>
Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h   | 35 +++++++++++++++++-------
 net/sctp/chunk.c             |  6 ++--
 net/sctp/outqueue.c          | 11 ++++----
 net/sctp/socket.c            |  4 +--
 net/sctp/stream.c            | 65 +++++++++++++++++++++++---------------------
 net/sctp/stream_interleave.c | 20 +++++++-------
 net/sctp/stream_sched.c      | 13 +++++----
 net/sctp/stream_sched_prio.c | 22 +++++++--------
 net/sctp/stream_sched_rr.c   |  8 +++---
 9 files changed, 103 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ab869e0d8326..6b2b8df8a1d2 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -398,37 +398,35 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new);
 
 /* What is the current SSN number for this stream? */
 #define sctp_ssn_peek(stream, type, sid) \
-	((stream)->type[sid].ssn)
+	(sctp_stream_##type((stream), (sid))->ssn)
 
 /* Return the next SSN number for this stream.	*/
 #define sctp_ssn_next(stream, type, sid) \
-	((stream)->type[sid].ssn++)
+	(sctp_stream_##type((stream), (sid))->ssn++)
 
 /* Skip over this ssn and all below. */
 #define sctp_ssn_skip(stream, type, sid, ssn) \
-	((stream)->type[sid].ssn = ssn + 1)
+	(sctp_stream_##type((stream), (sid))->ssn = ssn + 1)
 
 /* What is the current MID number for this stream? */
 #define sctp_mid_peek(stream, type, sid) \
-	((stream)->type[sid].mid)
+	(sctp_stream_##type((stream), (sid))->mid)
 
 /* Return the next MID number for this stream.  */
 #define sctp_mid_next(stream, type, sid) \
-	((stream)->type[sid].mid++)
+	(sctp_stream_##type((stream), (sid))->mid++)
 
 /* Skip over this mid and all below. */
 #define sctp_mid_skip(stream, type, sid, mid) \
-	((stream)->type[sid].mid = mid + 1)
-
-#define sctp_stream_in(asoc, sid) (&(asoc)->stream.in[sid])
+	(sctp_stream_##type((stream), (sid))->mid = mid + 1)
 
 /* What is the current MID_uo number for this stream? */
 #define sctp_mid_uo_peek(stream, type, sid) \
-	((stream)->type[sid].mid_uo)
+	(sctp_stream_##type((stream), (sid))->mid_uo)
 
 /* Return the next MID_uo number for this stream.  */
 #define sctp_mid_uo_next(stream, type, sid) \
-	((stream)->type[sid].mid_uo++)
+	(sctp_stream_##type((stream), (sid))->mid_uo++)
 
 /*
  * Pointers to address related SCTP functions.
@@ -1463,6 +1461,23 @@ struct sctp_stream {
 	struct sctp_stream_interleave *si;
 };
 
+static inline struct sctp_stream_out *sctp_stream_out(
+	const struct sctp_stream *stream,
+	__u16 sid)
+{
+	return ((struct sctp_stream_out *)(stream->out)) + sid;
+}
+
+static inline struct sctp_stream_in *sctp_stream_in(
+	const struct sctp_stream *stream,
+	__u16 sid)
+{
+	return ((struct sctp_stream_in *)(stream->in)) + sid;
+}
+
+#define SCTP_SO(s, i) sctp_stream_out((s), (i))
+#define SCTP_SI(s, i) sctp_stream_in((s), (i))
+
 #define SCTP_STREAM_CLOSED		0x00
 #define SCTP_STREAM_OPEN		0x01
 
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index bfb9f812e2ef..ce8087846f05 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -325,7 +325,8 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 	if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
 	    time_after(jiffies, chunk->msg->expires_at)) {
 		struct sctp_stream_out *streamout =
-			&chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
+			SCTP_SO(&chunk->asoc->stream,
+				chunk->sinfo.sinfo_stream);
 
 		if (chunk->sent_count) {
 			chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
@@ -339,7 +340,8 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 	} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
 		   chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
 		struct sctp_stream_out *streamout =
-			&chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
+			SCTP_SO(&chunk->asoc->stream,
+				chunk->sinfo.sinfo_stream);
 
 		chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
 		streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index d68aa33485a9..d74d00b29942 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -80,7 +80,7 @@ static inline void sctp_outq_head_data(struct sctp_outq *q,
 	q->out_qlen += ch->skb->len;
 
 	stream = sctp_chunk_stream_no(ch);
-	oute = q->asoc->stream.out[stream].ext;
+	oute = SCTP_SO(&q->asoc->stream, stream)->ext;
 	list_add(&ch->stream_list, &oute->outq);
 }
 
@@ -101,7 +101,7 @@ static inline void sctp_outq_tail_data(struct sctp_outq *q,
 	q->out_qlen += ch->skb->len;
 
 	stream = sctp_chunk_stream_no(ch);
-	oute = q->asoc->stream.out[stream].ext;
+	oute = SCTP_SO(&q->asoc->stream, stream)->ext;
 	list_add_tail(&ch->stream_list, &oute->outq);
 }
 
@@ -372,7 +372,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
 		sctp_insert_list(&asoc->outqueue.abandoned,
 				 &chk->transmitted_list);
 
-		streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
+		streamout = SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream);
 		asoc->sent_cnt_removable--;
 		asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
 		streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
@@ -416,7 +416,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
 		asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 		if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
 			struct sctp_stream_out *streamout =
-				&asoc->stream.out[chk->sinfo.sinfo_stream];
+				SCTP_SO(&asoc->stream, chk->sinfo.sinfo_stream);
 
 			streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
 		}
@@ -1082,6 +1082,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx,
 	/* Finally, transmit new packets.  */
 	while ((chunk = sctp_outq_dequeue_data(ctx->q)) != NULL) {
 		__u32 sid = ntohs(chunk->subh.data_hdr->stream);
+		__u8 stream_state = SCTP_SO(&ctx->asoc->stream, sid)->state;
 
 		/* Has this chunk expired? */
 		if (sctp_chunk_abandoned(chunk)) {
@@ -1091,7 +1092,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx,
 			continue;
 		}
 
-		if (ctx->asoc->stream.out[sid].state == SCTP_STREAM_CLOSED) {
+		if (stream_state == SCTP_STREAM_CLOSED) {
 			sctp_outq_head_data(ctx->q, chunk);
 			break;
 		}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 502c0d7cb105..e96b15a66aba 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1911,7 +1911,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		goto err;
 	}
 
-	if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
+	if (unlikely(!SCTP_SO(&asoc->stream, sinfo->sinfo_stream)->ext)) {
 		err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
 		if (err)
 			goto err;
@@ -7154,7 +7154,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 	if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
 		goto out;
 
-	streamoute = asoc->stream.out[params.sprstat_sid].ext;
+	streamoute = SCTP_SO(&asoc->stream, params.sprstat_sid)->ext;
 	if (!streamoute) {
 		/* Not allocated yet, means all stats are 0 */
 		params.sprstat_abandoned_unsent = 0;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index f1f1d1b232ba..7ca6fe4e7882 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -162,7 +162,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 
 	stream->outcnt = outcnt;
 	for (i = 0; i < stream->outcnt; i++)
-		stream->out[i].state = SCTP_STREAM_OPEN;
+		SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
 	sched->init(stream);
 
@@ -193,7 +193,7 @@ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
 	soute = kzalloc(sizeof(*soute), GFP_KERNEL);
 	if (!soute)
 		return -ENOMEM;
-	stream->out[sid].ext = soute;
+	SCTP_SO(stream, sid)->ext = soute;
 
 	return sctp_sched_init_sid(stream, sid, GFP_KERNEL);
 }
@@ -205,7 +205,7 @@ void sctp_stream_free(struct sctp_stream *stream)
 
 	sched->free(stream);
 	for (i = 0; i < stream->outcnt; i++)
-		kfree(stream->out[i].ext);
+		kfree(SCTP_SO(stream, i)->ext);
 	kfree(stream->out);
 	kfree(stream->in);
 }
@@ -215,12 +215,12 @@ void sctp_stream_clear(struct sctp_stream *stream)
 	int i;
 
 	for (i = 0; i < stream->outcnt; i++) {
-		stream->out[i].mid = 0;
-		stream->out[i].mid_uo = 0;
+		SCTP_SO(stream, i)->mid = 0;
+		SCTP_SO(stream, i)->mid_uo = 0;
 	}
 
 	for (i = 0; i < stream->incnt; i++)
-		stream->in[i].mid = 0;
+		SCTP_SI(stream, i)->mid = 0;
 }
 
 void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
@@ -273,8 +273,8 @@ static bool sctp_stream_outq_is_empty(struct sctp_stream *stream,
 	for (i = 0; i < str_nums; i++) {
 		__u16 sid = ntohs(str_list[i]);
 
-		if (stream->out[sid].ext &&
-		    !list_empty(&stream->out[sid].ext->outq))
+		if (SCTP_SO(stream, sid)->ext &&
+		    !list_empty(&SCTP_SO(stream, sid)->ext->outq))
 			return false;
 	}
 
@@ -361,11 +361,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
 	if (out) {
 		if (str_nums)
 			for (i = 0; i < str_nums; i++)
-				stream->out[str_list[i]].state =
+				SCTP_SO(stream, str_list[i])->state =
 						       SCTP_STREAM_CLOSED;
 		else
 			for (i = 0; i < stream->outcnt; i++)
-				stream->out[i].state = SCTP_STREAM_CLOSED;
+				SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED;
 	}
 
 	asoc->strreset_chunk = chunk;
@@ -380,11 +380,11 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
 
 		if (str_nums)
 			for (i = 0; i < str_nums; i++)
-				stream->out[str_list[i]].state =
+				SCTP_SO(stream, str_list[i])->state =
 						       SCTP_STREAM_OPEN;
 		else
 			for (i = 0; i < stream->outcnt; i++)
-				stream->out[i].state = SCTP_STREAM_OPEN;
+				SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
 		goto out;
 	}
@@ -418,7 +418,7 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
 
 	/* Block further xmit of data until this request is completed */
 	for (i = 0; i < stream->outcnt; i++)
-		stream->out[i].state = SCTP_STREAM_CLOSED;
+		SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED;
 
 	asoc->strreset_chunk = chunk;
 	sctp_chunk_hold(asoc->strreset_chunk);
@@ -429,7 +429,7 @@ int sctp_send_reset_assoc(struct sctp_association *asoc)
 		asoc->strreset_chunk = NULL;
 
 		for (i = 0; i < stream->outcnt; i++)
-			stream->out[i].state = SCTP_STREAM_OPEN;
+			SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
 		return retval;
 	}
@@ -609,10 +609,10 @@ struct sctp_chunk *sctp_process_strreset_outreq(
 		}
 
 		for (i = 0; i < nums; i++)
-			stream->in[ntohs(str_p[i])].mid = 0;
+			SCTP_SI(stream, ntohs(str_p[i]))->mid = 0;
 	} else {
 		for (i = 0; i < stream->incnt; i++)
-			stream->in[i].mid = 0;
+			SCTP_SI(stream, i)->mid = 0;
 	}
 
 	result = SCTP_STRRESET_PERFORMED;
@@ -683,11 +683,11 @@ struct sctp_chunk *sctp_process_strreset_inreq(
 
 	if (nums)
 		for (i = 0; i < nums; i++)
-			stream->out[ntohs(str_p[i])].state =
+			SCTP_SO(stream, ntohs(str_p[i]))->state =
 					       SCTP_STREAM_CLOSED;
 	else
 		for (i = 0; i < stream->outcnt; i++)
-			stream->out[i].state = SCTP_STREAM_CLOSED;
+			SCTP_SO(stream, i)->state = SCTP_STREAM_CLOSED;
 
 	asoc->strreset_chunk = chunk;
 	asoc->strreset_outstanding = 1;
@@ -786,11 +786,11 @@ struct sctp_chunk *sctp_process_strreset_tsnreq(
 	 *      incoming and outgoing streams.
 	 */
 	for (i = 0; i < stream->outcnt; i++) {
-		stream->out[i].mid = 0;
-		stream->out[i].mid_uo = 0;
+		SCTP_SO(stream, i)->mid = 0;
+		SCTP_SO(stream, i)->mid_uo = 0;
 	}
 	for (i = 0; i < stream->incnt; i++)
-		stream->in[i].mid = 0;
+		SCTP_SI(stream, i)->mid = 0;
 
 	result = SCTP_STRRESET_PERFORMED;
 
@@ -979,15 +979,18 @@ struct sctp_chunk *sctp_process_strreset_resp(
 		       sizeof(__u16);
 
 		if (result == SCTP_STRRESET_PERFORMED) {
+			struct sctp_stream_out *sout;
 			if (nums) {
 				for (i = 0; i < nums; i++) {
-					stream->out[ntohs(str_p[i])].mid = 0;
-					stream->out[ntohs(str_p[i])].mid_uo = 0;
+					sout = SCTP_SO(stream, ntohs(str_p[i]));
+					sout->mid = 0;
+					sout->mid_uo = 0;
 				}
 			} else {
 				for (i = 0; i < stream->outcnt; i++) {
-					stream->out[i].mid = 0;
-					stream->out[i].mid_uo = 0;
+					sout = SCTP_SO(stream, i);
+					sout->mid = 0;
+					sout->mid_uo = 0;
 				}
 			}
 
@@ -995,7 +998,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
 		}
 
 		for (i = 0; i < stream->outcnt; i++)
-			stream->out[i].state = SCTP_STREAM_OPEN;
+			SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
 		*evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
 			nums, str_p, GFP_ATOMIC);
@@ -1050,15 +1053,15 @@ struct sctp_chunk *sctp_process_strreset_resp(
 			asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
 
 			for (i = 0; i < stream->outcnt; i++) {
-				stream->out[i].mid = 0;
-				stream->out[i].mid_uo = 0;
+				SCTP_SO(stream, i)->mid = 0;
+				SCTP_SO(stream, i)->mid_uo = 0;
 			}
 			for (i = 0; i < stream->incnt; i++)
-				stream->in[i].mid = 0;
+				SCTP_SI(stream, i)->mid = 0;
 		}
 
 		for (i = 0; i < stream->outcnt; i++)
-			stream->out[i].state = SCTP_STREAM_OPEN;
+			SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
 		*evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags,
 			stsn, rtsn, GFP_ATOMIC);
@@ -1072,7 +1075,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
 
 		if (result == SCTP_STRRESET_PERFORMED)
 			for (i = number; i < stream->outcnt; i++)
-				stream->out[i].state = SCTP_STREAM_OPEN;
+				SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 		else
 			stream->outcnt = number;
 
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index d3764c181299..0a78cdf86463 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -197,7 +197,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_partial(
 	__u32 next_fsn = 0;
 	int is_last = 0;
 
-	sin = sctp_stream_in(ulpq->asoc, event->stream);
+	sin = sctp_stream_in(&ulpq->asoc->stream, event->stream);
 
 	skb_queue_walk(&ulpq->reasm, pos) {
 		struct sctp_ulpevent *cevent = sctp_skb2event(pos);
@@ -278,7 +278,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled(
 	__u32 pd_len = 0;
 	__u32 mid = 0;
 
-	sin = sctp_stream_in(ulpq->asoc, event->stream);
+	sin = sctp_stream_in(&ulpq->asoc->stream, event->stream);
 
 	skb_queue_walk(&ulpq->reasm, pos) {
 		struct sctp_ulpevent *cevent = sctp_skb2event(pos);
@@ -368,7 +368,7 @@ static struct sctp_ulpevent *sctp_intl_reasm(struct sctp_ulpq *ulpq,
 
 	sctp_intl_store_reasm(ulpq, event);
 
-	sin = sctp_stream_in(ulpq->asoc, event->stream);
+	sin = sctp_stream_in(&ulpq->asoc->stream, event->stream);
 	if (sin->pd_mode && event->mid == sin->mid &&
 	    event->fsn == sin->fsn)
 		retval = sctp_intl_retrieve_partial(ulpq, event);
@@ -575,7 +575,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_partial_uo(
 	__u32 next_fsn = 0;
 	int is_last = 0;
 
-	sin = sctp_stream_in(ulpq->asoc, event->stream);
+	sin = sctp_stream_in(&ulpq->asoc->stream, event->stream);
 
 	skb_queue_walk(&ulpq->reasm_uo, pos) {
 		struct sctp_ulpevent *cevent = sctp_skb2event(pos);
@@ -659,7 +659,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo(
 	__u32 pd_len = 0;
 	__u32 mid = 0;
 
-	sin = sctp_stream_in(ulpq->asoc, event->stream);
+	sin = sctp_stream_in(&ulpq->asoc->stream, event->stream);
 
 	skb_queue_walk(&ulpq->reasm_uo, pos) {
 		struct sctp_ulpevent *cevent = sctp_skb2event(pos);
@@ -750,7 +750,7 @@ static struct sctp_ulpevent *sctp_intl_reasm_uo(struct sctp_ulpq *ulpq,
 
 	sctp_intl_store_reasm_uo(ulpq, event);
 
-	sin = sctp_stream_in(ulpq->asoc, event->stream);
+	sin = sctp_stream_in(&ulpq->asoc->stream, event->stream);
 	if (sin->pd_mode_uo && event->mid == sin->mid_uo &&
 	    event->fsn == sin->fsn_uo)
 		retval = sctp_intl_retrieve_partial_uo(ulpq, event);
@@ -774,7 +774,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_first_uo(struct sctp_ulpq *ulpq)
 	skb_queue_walk(&ulpq->reasm_uo, pos) {
 		struct sctp_ulpevent *cevent = sctp_skb2event(pos);
 
-		csin = sctp_stream_in(ulpq->asoc, cevent->stream);
+		csin = sctp_stream_in(&ulpq->asoc->stream, cevent->stream);
 		if (csin->pd_mode_uo)
 			continue;
 
@@ -875,7 +875,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_first(struct sctp_ulpq *ulpq)
 	skb_queue_walk(&ulpq->reasm, pos) {
 		struct sctp_ulpevent *cevent = sctp_skb2event(pos);
 
-		csin = sctp_stream_in(ulpq->asoc, cevent->stream);
+		csin = sctp_stream_in(&ulpq->asoc->stream, cevent->stream);
 		if (csin->pd_mode)
 			continue;
 
@@ -1053,7 +1053,7 @@ static void sctp_intl_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
 	__u16 sid;
 
 	for (sid = 0; sid < stream->incnt; sid++) {
-		struct sctp_stream_in *sin = &stream->in[sid];
+		struct sctp_stream_in *sin = SCTP_SI(stream, sid);
 		__u32 mid;
 
 		if (sin->pd_mode_uo) {
@@ -1247,7 +1247,7 @@ static void sctp_handle_fwdtsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk)
 static void sctp_intl_skip(struct sctp_ulpq *ulpq, __u16 sid, __u32 mid,
 			   __u8 flags)
 {
-	struct sctp_stream_in *sin = sctp_stream_in(ulpq->asoc, sid);
+	struct sctp_stream_in *sin = sctp_stream_in(&ulpq->asoc->stream, sid);
 	struct sctp_stream *stream  = &ulpq->asoc->stream;
 
 	if (flags & SCTP_FTSN_U_BIT) {
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index f5fcd425232a..a6c04a94b08f 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -161,7 +161,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
 
 		/* Give the next scheduler a clean slate. */
 		for (i = 0; i < asoc->stream.outcnt; i++) {
-			void *p = asoc->stream.out[i].ext;
+			void *p = SCTP_SO(&asoc->stream, i)->ext;
 
 			if (!p)
 				continue;
@@ -175,7 +175,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
 	asoc->outqueue.sched = n;
 	n->init(&asoc->stream);
 	for (i = 0; i < asoc->stream.outcnt; i++) {
-		if (!asoc->stream.out[i].ext)
+		if (!SCTP_SO(&asoc->stream, i)->ext)
 			continue;
 
 		ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
@@ -217,7 +217,7 @@ int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
 	if (sid >= asoc->stream.outcnt)
 		return -EINVAL;
 
-	if (!asoc->stream.out[sid].ext) {
+	if (!SCTP_SO(&asoc->stream, sid)->ext) {
 		int ret;
 
 		ret = sctp_stream_init_ext(&asoc->stream, sid);
@@ -234,7 +234,7 @@ int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
 	if (sid >= asoc->stream.outcnt)
 		return -EINVAL;
 
-	if (!asoc->stream.out[sid].ext)
+	if (!SCTP_SO(&asoc->stream, sid)->ext)
 		return 0;
 
 	return asoc->outqueue.sched->get(&asoc->stream, sid, value);
@@ -252,7 +252,7 @@ void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
 		 * priority stream comes in.
 		 */
 		sid = sctp_chunk_stream_no(ch);
-		sout = &q->asoc->stream.out[sid];
+		sout = SCTP_SO(&q->asoc->stream, sid);
 		q->asoc->stream.out_curr = sout;
 		return;
 	}
@@ -272,8 +272,9 @@ void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch)
 int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp)
 {
 	struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+	struct sctp_stream_out_ext *ext = SCTP_SO(stream, sid)->ext;
 
-	INIT_LIST_HEAD(&stream->out[sid].ext->outq);
+	INIT_LIST_HEAD(&ext->outq);
 	return sched->init_sid(stream, sid, gfp);
 }
 
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 7997d35dd0fd..2245083a98f2 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c
@@ -75,10 +75,10 @@ static struct sctp_stream_priorities *sctp_sched_prio_get_head(
 
 	/* No luck. So we search on all streams now. */
 	for (i = 0; i < stream->outcnt; i++) {
-		if (!stream->out[i].ext)
+		if (!SCTP_SO(stream, i)->ext)
 			continue;
 
-		p = stream->out[i].ext->prio_head;
+		p = SCTP_SO(stream, i)->ext->prio_head;
 		if (!p)
 			/* Means all other streams won't be initialized
 			 * as well.
@@ -165,7 +165,7 @@ static void sctp_sched_prio_sched(struct sctp_stream *stream,
 static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
 			       __u16 prio, gfp_t gfp)
 {
-	struct sctp_stream_out *sout = &stream->out[sid];
+	struct sctp_stream_out *sout = SCTP_SO(stream, sid);
 	struct sctp_stream_out_ext *soute = sout->ext;
 	struct sctp_stream_priorities *prio_head, *old;
 	bool reschedule = false;
@@ -186,7 +186,7 @@ static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
 		return 0;
 
 	for (i = 0; i < stream->outcnt; i++) {
-		soute = stream->out[i].ext;
+		soute = SCTP_SO(stream, i)->ext;
 		if (soute && soute->prio_head == old)
 			/* It's still in use, nothing else to do here. */
 			return 0;
@@ -201,7 +201,7 @@ static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
 static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid,
 			       __u16 *value)
 {
-	*value = stream->out[sid].ext->prio_head->prio;
+	*value = SCTP_SO(stream, sid)->ext->prio_head->prio;
 	return 0;
 }
 
@@ -215,7 +215,7 @@ static int sctp_sched_prio_init(struct sctp_stream *stream)
 static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
 				    gfp_t gfp)
 {
-	INIT_LIST_HEAD(&stream->out[sid].ext->prio_list);
+	INIT_LIST_HEAD(&SCTP_SO(stream, sid)->ext->prio_list);
 	return sctp_sched_prio_set(stream, sid, 0, gfp);
 }
 
@@ -233,9 +233,9 @@ static void sctp_sched_prio_free(struct sctp_stream *stream)
 	 */
 	sctp_sched_prio_unsched_all(stream);
 	for (i = 0; i < stream->outcnt; i++) {
-		if (!stream->out[i].ext)
+		if (!SCTP_SO(stream, i)->ext)
 			continue;
-		prio = stream->out[i].ext->prio_head;
+		prio = SCTP_SO(stream, i)->ext->prio_head;
 		if (prio && list_empty(&prio->prio_sched))
 			list_add(&prio->prio_sched, &list);
 	}
@@ -255,7 +255,7 @@ static void sctp_sched_prio_enqueue(struct sctp_outq *q,
 	ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
 	sid = sctp_chunk_stream_no(ch);
 	stream = &q->asoc->stream;
-	sctp_sched_prio_sched(stream, stream->out[sid].ext);
+	sctp_sched_prio_sched(stream, SCTP_SO(stream, sid)->ext);
 }
 
 static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q)
@@ -297,7 +297,7 @@ static void sctp_sched_prio_dequeue_done(struct sctp_outq *q,
 	 * this priority.
 	 */
 	sid = sctp_chunk_stream_no(ch);
-	soute = q->asoc->stream.out[sid].ext;
+	soute = SCTP_SO(&q->asoc->stream, sid)->ext;
 	prio = soute->prio_head;
 
 	sctp_sched_prio_next_stream(prio);
@@ -317,7 +317,7 @@ static void sctp_sched_prio_sched_all(struct sctp_stream *stream)
 		__u16 sid;
 
 		sid = sctp_chunk_stream_no(ch);
-		sout = &stream->out[sid];
+		sout = SCTP_SO(stream, sid);
 		if (sout->ext)
 			sctp_sched_prio_sched(stream, sout->ext);
 	}
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
index 1155692448f1..52ba743fa7a7 100644
--- a/net/sctp/stream_sched_rr.c
+++ b/net/sctp/stream_sched_rr.c
@@ -100,7 +100,7 @@ static int sctp_sched_rr_init(struct sctp_stream *stream)
 static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
 				  gfp_t gfp)
 {
-	INIT_LIST_HEAD(&stream->out[sid].ext->rr_list);
+	INIT_LIST_HEAD(&SCTP_SO(stream, sid)->ext->rr_list);
 
 	return 0;
 }
@@ -120,7 +120,7 @@ static void sctp_sched_rr_enqueue(struct sctp_outq *q,
 	ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
 	sid = sctp_chunk_stream_no(ch);
 	stream = &q->asoc->stream;
-	sctp_sched_rr_sched(stream, stream->out[sid].ext);
+	sctp_sched_rr_sched(stream, SCTP_SO(stream, sid)->ext);
 }
 
 static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q)
@@ -154,7 +154,7 @@ static void sctp_sched_rr_dequeue_done(struct sctp_outq *q,
 
 	/* Last chunk on that msg, move to the next stream */
 	sid = sctp_chunk_stream_no(ch);
-	soute = q->asoc->stream.out[sid].ext;
+	soute = SCTP_SO(&q->asoc->stream, sid)->ext;
 
 	sctp_sched_rr_next_stream(&q->asoc->stream);
 
@@ -173,7 +173,7 @@ static void sctp_sched_rr_sched_all(struct sctp_stream *stream)
 		__u16 sid;
 
 		sid = sctp_chunk_stream_no(ch);
-		soute = stream->out[sid].ext;
+		soute = SCTP_SO(stream, sid)->ext;
 		if (soute)
 			sctp_sched_rr_sched(stream, soute);
 	}
-- 
cgit v1.2.3


From 0d493b4d0be352b5e361e4fa0bc3efe952d8b10e Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@virtuozzo.com>
Date: Fri, 10 Aug 2018 20:11:43 +0300
Subject: net/sctp: Replace in/out stream arrays with flex_array

This path replaces physically contiguous memory arrays
allocated using kmalloc_array() with flexible arrays.
This enables to avoid memory allocation failures on the
systems under a memory stress.

Signed-off-by: Oleg Babin <obabin@virtuozzo.com>
Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  9 ++---
 net/sctp/stream.c          | 88 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 71 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6b2b8df8a1d2..28a7c8e44636 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -57,6 +57,7 @@
 #include <linux/atomic.h>		/* This gets us atomic counters.  */
 #include <linux/skbuff.h>	/* We need sk_buff_head. */
 #include <linux/workqueue.h>	/* We need tq_struct.	 */
+#include <linux/flex_array.h>	/* We need flex_array.   */
 #include <linux/sctp.h>		/* We need sctp* header structs.  */
 #include <net/sctp/auth.h>	/* We need auth specific structs */
 #include <net/ip.h>		/* For inet_skb_parm */
@@ -1438,8 +1439,8 @@ struct sctp_stream_in {
 };
 
 struct sctp_stream {
-	struct sctp_stream_out *out;
-	struct sctp_stream_in *in;
+	struct flex_array *out;
+	struct flex_array *in;
 	__u16 outcnt;
 	__u16 incnt;
 	/* Current stream being sent, if any */
@@ -1465,14 +1466,14 @@ static inline struct sctp_stream_out *sctp_stream_out(
 	const struct sctp_stream *stream,
 	__u16 sid)
 {
-	return ((struct sctp_stream_out *)(stream->out)) + sid;
+	return flex_array_get(stream->out, sid);
 }
 
 static inline struct sctp_stream_in *sctp_stream_in(
 	const struct sctp_stream *stream,
 	__u16 sid)
 {
-	return ((struct sctp_stream_in *)(stream->in)) + sid;
+	return flex_array_get(stream->in, sid);
 }
 
 #define SCTP_SO(s, i) sctp_stream_out((s), (i))
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 7ca6fe4e7882..ffb940d3b57c 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -37,6 +37,53 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/stream_sched.h>
 
+static struct flex_array *fa_alloc(size_t elem_size, size_t elem_count,
+				   gfp_t gfp)
+{
+	struct flex_array *result;
+	int err;
+
+	result = flex_array_alloc(elem_size, elem_count, gfp);
+	if (result) {
+		err = flex_array_prealloc(result, 0, elem_count, gfp);
+		if (err) {
+			flex_array_free(result);
+			result = NULL;
+		}
+	}
+
+	return result;
+}
+
+static void fa_free(struct flex_array *fa)
+{
+	if (fa)
+		flex_array_free(fa);
+}
+
+static void fa_copy(struct flex_array *fa, struct flex_array *from,
+		    size_t index, size_t count)
+{
+	void *elem;
+
+	while (count--) {
+		elem = flex_array_get(from, index);
+		flex_array_put(fa, index, elem, 0);
+		index++;
+	}
+}
+
+static void fa_zero(struct flex_array *fa, size_t index, size_t count)
+{
+	void *elem;
+
+	while (count--) {
+		elem = flex_array_get(fa, index);
+		memset(elem, 0, fa->element_size);
+		index++;
+	}
+}
+
 /* Migrates chunks from stream queues to new stream queues if needed,
  * but not across associations. Also, removes those chunks to streams
  * higher than the new max.
@@ -78,34 +125,33 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
 		 * sctp_stream_update will swap ->out pointers.
 		 */
 		for (i = 0; i < outcnt; i++) {
-			kfree(new->out[i].ext);
-			new->out[i].ext = stream->out[i].ext;
-			stream->out[i].ext = NULL;
+			kfree(SCTP_SO(new, i)->ext);
+			SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext;
+			SCTP_SO(stream, i)->ext = NULL;
 		}
 	}
 
 	for (i = outcnt; i < stream->outcnt; i++)
-		kfree(stream->out[i].ext);
+		kfree(SCTP_SO(stream, i)->ext);
 }
 
 static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
 				 gfp_t gfp)
 {
-	struct sctp_stream_out *out;
+	struct flex_array *out;
+	size_t elem_size = sizeof(struct sctp_stream_out);
 
-	out = kmalloc_array(outcnt, sizeof(*out), gfp);
+	out = fa_alloc(elem_size, outcnt, gfp);
 	if (!out)
 		return -ENOMEM;
 
 	if (stream->out) {
-		memcpy(out, stream->out, min(outcnt, stream->outcnt) *
-					 sizeof(*out));
-		kfree(stream->out);
+		fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt));
+		fa_free(stream->out);
 	}
 
 	if (outcnt > stream->outcnt)
-		memset(out + stream->outcnt, 0,
-		       (outcnt - stream->outcnt) * sizeof(*out));
+		fa_zero(out, stream->outcnt, (outcnt - stream->outcnt));
 
 	stream->out = out;
 
@@ -115,22 +161,20 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
 static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
 				gfp_t gfp)
 {
-	struct sctp_stream_in *in;
-
-	in = kmalloc_array(incnt, sizeof(*stream->in), gfp);
+	struct flex_array *in;
+	size_t elem_size = sizeof(struct sctp_stream_in);
 
+	in = fa_alloc(elem_size, incnt, gfp);
 	if (!in)
 		return -ENOMEM;
 
 	if (stream->in) {
-		memcpy(in, stream->in, min(incnt, stream->incnt) *
-				       sizeof(*in));
-		kfree(stream->in);
+		fa_copy(in, stream->in, 0, min(incnt, stream->incnt));
+		fa_free(stream->in);
 	}
 
 	if (incnt > stream->incnt)
-		memset(in + stream->incnt, 0,
-		       (incnt - stream->incnt) * sizeof(*in));
+		fa_zero(in, stream->incnt, (incnt - stream->incnt));
 
 	stream->in = in;
 
@@ -174,7 +218,7 @@ in:
 	ret = sctp_stream_alloc_in(stream, incnt, gfp);
 	if (ret) {
 		sched->free(stream);
-		kfree(stream->out);
+		fa_free(stream->out);
 		stream->out = NULL;
 		stream->outcnt = 0;
 		goto out;
@@ -206,8 +250,8 @@ void sctp_stream_free(struct sctp_stream *stream)
 	sched->free(stream);
 	for (i = 0; i < stream->outcnt; i++)
 		kfree(SCTP_SO(stream, i)->ext);
-	kfree(stream->out);
-	kfree(stream->in);
+	fa_free(stream->out);
+	fa_free(stream->in);
 }
 
 void sctp_stream_clear(struct sctp_stream *stream)
-- 
cgit v1.2.3


From 84a75b329be84c108a21ab9c02a52a9bf9e5a919 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 10 Aug 2018 20:51:52 +0300
Subject: net: sched: extend action ops with put_dev callback

As a preparation for removing dependency on rtnl lock from rules update
path, all users of shared objects must take reference while working with
them.

Extend action ops with put_dev() API to be used on net device returned by
get_dev().

Modify mirred action (only action that implements get_dev callback):
- Take reference to net device in get_dev.
- Implement put_dev API that releases reference to net device.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h  |  1 +
 net/sched/act_mirred.c | 12 +++++++++++-
 net/sched/cls_api.c    |  1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 8c9bc02d05e1..1ad5b19e83a9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -101,6 +101,7 @@ struct tc_action_ops {
 	void	(*stats_update)(struct tc_action *, u64, u32, u64);
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
+	void	(*put_dev)(struct net_device *dev);
 	int     (*delete)(struct net *net, u32 index);
 };
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b26d060da08e..7a045cc7fe3b 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -358,8 +358,17 @@ static struct notifier_block mirred_device_notifier = {
 static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
 {
 	struct tcf_mirred *m = to_mirred(a);
+	struct net_device *dev = rtnl_dereference(m->tcfm_dev);
+
+	if (dev)
+		dev_hold(dev);
 
-	return rtnl_dereference(m->tcfm_dev);
+	return dev;
+}
+
+static void tcf_mirred_put_dev(struct net_device *dev)
+{
+	dev_put(dev);
 }
 
 static int tcf_mirred_delete(struct net *net, u32 index)
@@ -382,6 +391,7 @@ static struct tc_action_ops act_mirred_ops = {
 	.lookup		=	tcf_mirred_search,
 	.size		=	sizeof(struct tcf_mirred),
 	.get_dev	=	tcf_mirred_get_dev,
+	.put_dev	=	tcf_mirred_put_dev,
 	.delete		=	tcf_mirred_delete,
 };
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f922ce27ed5e..31bd1439cf60 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2176,6 +2176,7 @@ static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
 		if (!dev)
 			continue;
 		ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
+		a->ops->put_dev(dev);
 		if (ret < 0)
 			return ret;
 		ok_count += ret;
-- 
cgit v1.2.3


From 51a9f5ae653979ac4bdbd81778a10431f0177e3c Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 10 Aug 2018 20:51:54 +0300
Subject: net: core: protect rate estimator statistics pointer with lock

Extend gen_new_estimator() to also take stats_lock when re-assigning rate
estimator statistics pointer. (to be used by unlocked actions)

Rename 'stats_lock' to 'lock' and change argument description to explain
that it is now also used for control path.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h  |  4 ++--
 net/core/gen_estimator.c | 21 +++++++++++++--------
 2 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 0304ba2ae353..883bb9085f15 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -59,13 +59,13 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
-		      spinlock_t *stats_lock,
+		      spinlock_t *lock,
 		      seqcount_t *running, struct nlattr *opt);
 void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **ptr,
-			  spinlock_t *stats_lock,
+			  spinlock_t *lock,
 			  seqcount_t *running, struct nlattr *opt);
 bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
 bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 98fd12721221..e4e442d70c2d 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -112,7 +112,7 @@ static void est_timer(struct timer_list *t)
  * @bstats: basic statistics
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
- * @stats_lock: statistics lock
+ * @lock: lock for statistics and control path
  * @running: qdisc running seqcount
  * @opt: rate estimator configuration TLV
  *
@@ -128,7 +128,7 @@ static void est_timer(struct timer_list *t)
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
-		      spinlock_t *stats_lock,
+		      spinlock_t *lock,
 		      seqcount_t *running,
 		      struct nlattr *opt)
 {
@@ -154,19 +154,22 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 	seqcount_init(&est->seq);
 	intvl_log = parm->interval + 2;
 	est->bstats = bstats;
-	est->stats_lock = stats_lock;
+	est->stats_lock = lock;
 	est->running  = running;
 	est->ewma_log = parm->ewma_log;
 	est->intvl_log = intvl_log;
 	est->cpu_bstats = cpu_bstats;
 
-	if (stats_lock)
+	if (lock)
 		local_bh_disable();
 	est_fetch_counters(est, &b);
-	if (stats_lock)
+	if (lock)
 		local_bh_enable();
 	est->last_bytes = b.bytes;
 	est->last_packets = b.packets;
+
+	if (lock)
+		spin_lock_bh(lock);
 	old = rcu_dereference_protected(*rate_est, 1);
 	if (old) {
 		del_timer_sync(&old->timer);
@@ -179,6 +182,8 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 	mod_timer(&est->timer, est->next_jiffies);
 
 	rcu_assign_pointer(*rate_est, est);
+	if (lock)
+		spin_unlock_bh(lock);
 	if (old)
 		kfree_rcu(old, rcu);
 	return 0;
@@ -209,7 +214,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
  * @bstats: basic statistics
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
- * @stats_lock: statistics lock
+ * @lock: lock for statistics and control path
  * @running: qdisc running seqcount (might be NULL)
  * @opt: rate estimator configuration TLV
  *
@@ -221,11 +226,11 @@ EXPORT_SYMBOL(gen_kill_estimator);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **rate_est,
-			  spinlock_t *stats_lock,
+			  spinlock_t *lock,
 			  seqcount_t *running, struct nlattr *opt)
 {
 	return gen_new_estimator(bstats, cpu_bstats, rate_est,
-				 stats_lock, running, opt);
+				 lock, running, opt);
 }
 EXPORT_SYMBOL(gen_replace_estimator);
 
-- 
cgit v1.2.3


From b86d865cb1cae1e61527ea0b8977078bbf694328 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 10 Aug 2018 13:28:07 -0700
Subject: blkcg: Make blkg_root_lookup() work for queues in bypass mode

For legacy queues the only call of blkg_root_lookup() happens after
bypass mode has been enabled. Since blkg_lookup() returns NULL for
queues in bypass mode, modify the blkg_root_lookup() such that it
no longer depends on bypass mode. Rename the function into
blk_queue_root_blkg() as suggested by Tejun.

Suggested-by: Tejun Heo <tj@kernel.org>
Fixes: 6bad9b210a22 ("blkcg: Introduce blkg_root_lookup()")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c          |  2 +-
 include/linux/blk-cgroup.h | 15 +++++----------
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 380bc284ced1..bb109bb0a055 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -813,7 +813,7 @@ static void __blk_release_queue(struct work_struct *work)
 		blk_exit_queue(q);
 	}
 
-	WARN(blkg_root_lookup(q),
+	WARN(blk_queue_root_blkg(q),
 	     "request queue %p is being released but it has not yet been removed from the blkcg controller\n",
 	     q);
 
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1361cfc9b878..34aec30e06c7 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -342,20 +342,14 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 }
 
 /**
- * blkg_lookup - look up blkg for the specified request queue
+ * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
  * @q: request_queue of interest
  *
  * Lookup blkg for @q at the root level. See also blkg_lookup().
  */
-static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q)
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
 {
-	struct blkcg_gq *blkg;
-
-	rcu_read_lock();
-	blkg = blkg_lookup(&blkcg_root, q);
-	rcu_read_unlock();
-
-	return blkg;
+	return q->root_blkg;
 }
 
 /**
@@ -881,7 +875,8 @@ static inline bool blk_cgroup_congested(void) { return false; }
 static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
-static inline struct blkcg_gq *blkg_root_lookup(struct request_queue *q) { return NULL; }
+static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
+{ return NULL; }
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
-- 
cgit v1.2.3


From 6f10f7d1b02b1bbc305f88d7696445dd38b13881 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sat, 11 Aug 2018 13:19:44 +0800
Subject: bcache: style fix to replace 'unsigned' by 'unsigned int'

This patch fixes warning reported by checkpatch.pl by replacing 'unsigned'
with 'unsigned int'.

Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Shenghui Wang <shhuiw@foxmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/alloc.c     |  36 +++++++------
 drivers/md/bcache/bcache.h    | 107 +++++++++++++++++++--------------------
 drivers/md/bcache/bset.c      | 114 ++++++++++++++++++++++--------------------
 drivers/md/bcache/bset.h      |  34 ++++++-------
 drivers/md/bcache/btree.c     |  50 +++++++++---------
 drivers/md/bcache/btree.h     |   4 +-
 drivers/md/bcache/closure.h   |   2 +-
 drivers/md/bcache/debug.c     |   6 +--
 drivers/md/bcache/extents.c   |  22 ++++----
 drivers/md/bcache/io.c        |  18 +++----
 drivers/md/bcache/journal.c   |  20 ++++----
 drivers/md/bcache/journal.h   |   8 +--
 drivers/md/bcache/movinggc.c  |  12 ++---
 drivers/md/bcache/request.c   |  42 ++++++++--------
 drivers/md/bcache/request.h   |  18 +++----
 drivers/md/bcache/stats.c     |  12 ++---
 drivers/md/bcache/stats.h     |   2 +-
 drivers/md/bcache/super.c     |  34 ++++++-------
 drivers/md/bcache/sysfs.c     |  18 +++----
 drivers/md/bcache/util.h      |   9 ++--
 drivers/md/bcache/writeback.c |  19 +++----
 drivers/md/bcache/writeback.h |  12 ++---
 include/uapi/linux/bcache.h   |   6 +--
 23 files changed, 309 insertions(+), 296 deletions(-)

(limited to 'include')

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 7fa2631b422c..89f663d22551 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -87,8 +87,8 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
 {
 	struct cache *ca;
 	struct bucket *b;
-	unsigned next = c->nbuckets * c->sb.bucket_size / 1024;
-	unsigned i;
+	unsigned int next = c->nbuckets * c->sb.bucket_size / 1024;
+	unsigned int i;
 	int r;
 
 	atomic_sub(sectors, &c->rescale);
@@ -169,7 +169,7 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
 
 #define bucket_prio(b)							\
 ({									\
-	unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;	\
+	unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;	\
 									\
 	(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);	\
 })
@@ -301,7 +301,7 @@ do {									\
 
 static int bch_allocator_push(struct cache *ca, long bucket)
 {
-	unsigned i;
+	unsigned int i;
 
 	/* Prios/gens are actually the most important reserve */
 	if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
@@ -385,7 +385,7 @@ out:
 
 /* Allocation */
 
-long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
+long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
 {
 	DEFINE_WAIT(w);
 	struct bucket *b;
@@ -421,7 +421,7 @@ out:
 	if (expensive_debug_checks(ca->set)) {
 		size_t iter;
 		long i;
-		unsigned j;
+		unsigned int j;
 
 		for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
 			BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
@@ -470,14 +470,14 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b)
 
 void bch_bucket_free(struct cache_set *c, struct bkey *k)
 {
-	unsigned i;
+	unsigned int i;
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		__bch_bucket_free(PTR_CACHE(c, k, i),
 				  PTR_BUCKET(c, k, i));
 }
 
-int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
+int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
 			   struct bkey *k, int n, bool wait)
 {
 	int i;
@@ -510,7 +510,7 @@ err:
 	return -1;
 }
 
-int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
+int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
 			 struct bkey *k, int n, bool wait)
 {
 	int ret;
@@ -524,8 +524,8 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
 
 struct open_bucket {
 	struct list_head	list;
-	unsigned		last_write_point;
-	unsigned		sectors_free;
+	unsigned int		last_write_point;
+	unsigned int		sectors_free;
 	BKEY_PADDED(key);
 };
 
@@ -556,7 +556,7 @@ struct open_bucket {
  */
 static struct open_bucket *pick_data_bucket(struct cache_set *c,
 					    const struct bkey *search,
-					    unsigned write_point,
+					    unsigned int write_point,
 					    struct bkey *alloc)
 {
 	struct open_bucket *ret, *ret_task = NULL;
@@ -595,12 +595,16 @@ found:
  *
  * If s->writeback is true, will not fail.
  */
-bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
-		       unsigned write_point, unsigned write_prio, bool wait)
+bool bch_alloc_sectors(struct cache_set *c,
+		       struct bkey *k,
+		       unsigned int sectors,
+		       unsigned int write_point,
+		       unsigned int write_prio,
+		       bool wait)
 {
 	struct open_bucket *b;
 	BKEY_PADDED(key) alloc;
-	unsigned i;
+	unsigned int i;
 
 	/*
 	 * We might have to allocate a new bucket, which we can't do with a
@@ -613,7 +617,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
 	spin_lock(&c->data_bucket_lock);
 
 	while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
-		unsigned watermark = write_prio
+		unsigned int watermark = write_prio
 			? RESERVE_MOVINGGC
 			: RESERVE_NONE;
 
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 05f82ff6f016..1ebd2d9d90d5 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -252,7 +252,7 @@ struct bcache_device {
 	struct kobject		kobj;
 
 	struct cache_set	*c;
-	unsigned		id;
+	unsigned int		id;
 #define BCACHEDEVNAME_SIZE	12
 	char			name[BCACHEDEVNAME_SIZE];
 
@@ -264,18 +264,18 @@ struct bcache_device {
 #define BCACHE_DEV_UNLINK_DONE		2
 #define BCACHE_DEV_WB_RUNNING		3
 #define BCACHE_DEV_RATE_DW_RUNNING	4
-	unsigned		nr_stripes;
-	unsigned		stripe_size;
+	unsigned int		nr_stripes;
+	unsigned int		stripe_size;
 	atomic_t		*stripe_sectors_dirty;
 	unsigned long		*full_dirty_stripes;
 
 	struct bio_set		bio_split;
 
-	unsigned		data_csum:1;
+	unsigned int		data_csum:1;
 
 	int (*cache_miss)(struct btree *, struct search *,
-			  struct bio *, unsigned);
-	int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long);
+			  struct bio *, unsigned int);
+	int (*ioctl) (struct bcache_device *, fmode_t, unsigned int, unsigned long);
 };
 
 struct io {
@@ -284,7 +284,7 @@ struct io {
 	struct list_head	lru;
 
 	unsigned long		jiffies;
-	unsigned		sequential;
+	unsigned int		sequential;
 	sector_t		last;
 };
 
@@ -358,18 +358,18 @@ struct cached_dev {
 	struct cache_accounting	accounting;
 
 	/* The rest of this all shows up in sysfs */
-	unsigned		sequential_cutoff;
-	unsigned		readahead;
+	unsigned int		sequential_cutoff;
+	unsigned int		readahead;
 
-	unsigned		io_disable:1;
-	unsigned		verify:1;
-	unsigned		bypass_torture_test:1;
+	unsigned int		io_disable:1;
+	unsigned int		verify:1;
+	unsigned int		bypass_torture_test:1;
 
-	unsigned		partial_stripes_expensive:1;
-	unsigned		writeback_metadata:1;
-	unsigned		writeback_running:1;
+	unsigned int		partial_stripes_expensive:1;
+	unsigned int		writeback_metadata:1;
+	unsigned int		writeback_running:1;
 	unsigned char		writeback_percent;
-	unsigned		writeback_delay;
+	unsigned int		writeback_delay;
 
 	uint64_t		writeback_rate_target;
 	int64_t			writeback_rate_proportional;
@@ -377,16 +377,16 @@ struct cached_dev {
 	int64_t			writeback_rate_integral_scaled;
 	int32_t			writeback_rate_change;
 
-	unsigned		writeback_rate_update_seconds;
-	unsigned		writeback_rate_i_term_inverse;
-	unsigned		writeback_rate_p_term_inverse;
-	unsigned		writeback_rate_minimum;
+	unsigned int		writeback_rate_update_seconds;
+	unsigned int		writeback_rate_i_term_inverse;
+	unsigned int		writeback_rate_p_term_inverse;
+	unsigned int		writeback_rate_minimum;
 
 	enum stop_on_failure	stop_when_cache_set_failed;
 #define DEFAULT_CACHED_DEV_ERROR_LIMIT	64
 	atomic_t		io_errors;
-	unsigned		error_limit;
-	unsigned		offline_seconds;
+	unsigned int		error_limit;
+	unsigned int		offline_seconds;
 
 	char			backing_dev_name[BDEVNAME_SIZE];
 };
@@ -447,7 +447,7 @@ struct cache {
 	 * until a gc finishes - otherwise we could pointlessly burn a ton of
 	 * cpu
 	 */
-	unsigned		invalidate_needs_gc;
+	unsigned int		invalidate_needs_gc;
 
 	bool			discard; /* Get rid of? */
 
@@ -472,7 +472,7 @@ struct gc_stat {
 
 	size_t			nkeys;
 	uint64_t		data;	/* sectors */
-	unsigned		in_use; /* percent */
+	unsigned int		in_use; /* percent */
 };
 
 /*
@@ -518,7 +518,7 @@ struct cache_set {
 	int			caches_loaded;
 
 	struct bcache_device	**devices;
-	unsigned		devices_max_used;
+	unsigned int		devices_max_used;
 	atomic_t		attached_dev_nr;
 	struct list_head	cached_devs;
 	uint64_t		cached_dev_sectors;
@@ -548,7 +548,7 @@ struct cache_set {
 	 * Default number of pages for a new btree node - may be less than a
 	 * full bucket
 	 */
-	unsigned		btree_pages;
+	unsigned int		btree_pages;
 
 	/*
 	 * Lists of struct btrees; lru is the list for structs that have memory
@@ -571,7 +571,7 @@ struct cache_set {
 	struct list_head	btree_cache_freed;
 
 	/* Number of elements in btree_cache + btree_cache_freeable lists */
-	unsigned		btree_cache_used;
+	unsigned int		btree_cache_used;
 
 	/*
 	 * If we need to allocate memory for a new btree node and that
@@ -649,7 +649,7 @@ struct cache_set {
 	struct mutex		verify_lock;
 #endif
 
-	unsigned		nr_uuids;
+	unsigned int		nr_uuids;
 	struct uuid_entry	*uuids;
 	BKEY_PADDED(uuid_bucket);
 	struct closure		uuid_write;
@@ -670,12 +670,12 @@ struct cache_set {
 	struct journal		journal;
 
 #define CONGESTED_MAX		1024
-	unsigned		congested_last_us;
+	unsigned int		congested_last_us;
 	atomic_t		congested;
 
 	/* The rest of this all shows up in sysfs */
-	unsigned		congested_read_threshold_us;
-	unsigned		congested_write_threshold_us;
+	unsigned int		congested_read_threshold_us;
+	unsigned int		congested_write_threshold_us;
 
 	struct time_stats	btree_gc_time;
 	struct time_stats	btree_split_time;
@@ -694,16 +694,16 @@ struct cache_set {
 		ON_ERROR_PANIC,
 	}			on_error;
 #define DEFAULT_IO_ERROR_LIMIT 8
-	unsigned		error_limit;
-	unsigned		error_decay;
+	unsigned int		error_limit;
+	unsigned int		error_decay;
 
 	unsigned short		journal_delay_ms;
 	bool			expensive_debug_checks;
-	unsigned		verify:1;
-	unsigned		key_merging_disabled:1;
-	unsigned		gc_always_rewrite:1;
-	unsigned		shrinker_disabled:1;
-	unsigned		copy_gc_enabled:1;
+	unsigned int		verify:1;
+	unsigned int		key_merging_disabled:1;
+	unsigned int		gc_always_rewrite:1;
+	unsigned int		shrinker_disabled:1;
+	unsigned int		copy_gc_enabled:1;
 
 #define BUCKET_HASH_BITS	12
 	struct hlist_head	bucket_hash[1 << BUCKET_HASH_BITS];
@@ -712,7 +712,7 @@ struct cache_set {
 };
 
 struct bbio {
-	unsigned		submit_time_us;
+	unsigned int		submit_time_us;
 	union {
 		struct bkey	key;
 		uint64_t	_pad[3];
@@ -729,10 +729,10 @@ struct bbio {
 
 #define btree_bytes(c)		((c)->btree_pages * PAGE_SIZE)
 #define btree_blocks(b)							\
-	((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
+	((unsigned int) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
 
 #define btree_default_blocks(c)						\
-	((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
+	((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
 
 #define bucket_pages(c)		((c)->sb.bucket_size / PAGE_SECTORS)
 #define bucket_bytes(c)		((c)->sb.bucket_size << 9)
@@ -761,21 +761,21 @@ static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
 
 static inline struct cache *PTR_CACHE(struct cache_set *c,
 				      const struct bkey *k,
-				      unsigned ptr)
+				      unsigned int ptr)
 {
 	return c->cache[PTR_DEV(k, ptr)];
 }
 
 static inline size_t PTR_BUCKET_NR(struct cache_set *c,
 				   const struct bkey *k,
-				   unsigned ptr)
+				   unsigned int ptr)
 {
 	return sector_to_bucket(c, PTR_OFFSET(k, ptr));
 }
 
 static inline struct bucket *PTR_BUCKET(struct cache_set *c,
 					const struct bkey *k,
-					unsigned ptr)
+					unsigned int ptr)
 {
 	return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
 }
@@ -787,13 +787,13 @@ static inline uint8_t gen_after(uint8_t a, uint8_t b)
 }
 
 static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
-				unsigned i)
+				unsigned int i)
 {
 	return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
 }
 
 static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
-				 unsigned i)
+				 unsigned int i)
 {
 	return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
 }
@@ -888,7 +888,7 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
 static inline void wake_up_allocators(struct cache_set *c)
 {
 	struct cache *ca;
-	unsigned i;
+	unsigned int i;
 
 	for_each_cache(ca, c, i)
 		wake_up_process(ca->alloc_thread);
@@ -933,7 +933,8 @@ void bch_bbio_free(struct bio *, struct cache_set *);
 struct bio *bch_bbio_alloc(struct cache_set *);
 
 void __bch_submit_bbio(struct bio *, struct cache_set *);
-void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
+void bch_submit_bbio(struct bio *, struct cache_set *,
+		     struct bkey *, unsigned int);
 
 uint8_t bch_inc_gen(struct cache *, struct bucket *);
 void bch_rescale_priorities(struct cache_set *, int);
@@ -944,13 +945,13 @@ void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
 void __bch_bucket_free(struct cache *, struct bucket *);
 void bch_bucket_free(struct cache_set *, struct bkey *);
 
-long bch_bucket_alloc(struct cache *, unsigned, bool);
-int __bch_bucket_alloc_set(struct cache_set *, unsigned,
+long bch_bucket_alloc(struct cache *, unsigned int, bool);
+int __bch_bucket_alloc_set(struct cache_set *, unsigned int,
 			   struct bkey *, int, bool);
-int bch_bucket_alloc_set(struct cache_set *, unsigned,
+int bch_bucket_alloc_set(struct cache_set *, unsigned int,
 			 struct bkey *, int, bool);
-bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
-		       unsigned, unsigned, bool);
+bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned int,
+		       unsigned int, unsigned int, bool);
 bool bch_cached_dev_error(struct cached_dev *dc);
 
 __printf(2, 3)
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 596c93b44e9b..dfda7e9efc3e 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -18,7 +18,7 @@
 
 #ifdef CONFIG_BCACHE_DEBUG
 
-void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
+void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
 {
 	struct bkey *k, *next;
 
@@ -26,7 +26,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
 		next = bkey_next(k);
 
 		printk(KERN_ERR "block %u key %u/%u: ", set,
-		       (unsigned) ((u64 *) k - i->d), i->keys);
+		       (unsigned int) ((u64 *) k - i->d), i->keys);
 
 		if (b->ops->key_dump)
 			b->ops->key_dump(b, k);
@@ -42,7 +42,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
 
 void bch_dump_bucket(struct btree_keys *b)
 {
-	unsigned i;
+	unsigned int i;
 
 	console_lock();
 	for (i = 0; i <= b->nsets; i++)
@@ -53,7 +53,7 @@ void bch_dump_bucket(struct btree_keys *b)
 
 int __bch_count_data(struct btree_keys *b)
 {
-	unsigned ret = 0;
+	unsigned int ret = 0;
 	struct btree_iter iter;
 	struct bkey *k;
 
@@ -128,7 +128,7 @@ static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
 
 /* Keylists */
 
-int __bch_keylist_realloc(struct keylist *l, unsigned u64s)
+int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
 {
 	size_t oldsize = bch_keylist_nkeys(l);
 	size_t newsize = oldsize + u64s;
@@ -180,7 +180,7 @@ void bch_keylist_pop_front(struct keylist *l)
 /* Key/pointer manipulation */
 
 void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
-			      unsigned i)
+			      unsigned int i)
 {
 	BUG_ON(i > KEY_PTRS(src));
 
@@ -194,7 +194,7 @@ void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
 
 bool __bch_cut_front(const struct bkey *where, struct bkey *k)
 {
-	unsigned i, len = 0;
+	unsigned int i, len = 0;
 
 	if (bkey_cmp(where, &START_KEY(k)) <= 0)
 		return false;
@@ -214,7 +214,7 @@ bool __bch_cut_front(const struct bkey *where, struct bkey *k)
 
 bool __bch_cut_back(const struct bkey *where, struct bkey *k)
 {
-	unsigned len = 0;
+	unsigned int len = 0;
 
 	if (bkey_cmp(where, k) >= 0)
 		return false;
@@ -240,9 +240,9 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k)
 #define BKEY_MANTISSA_MASK	((1 << BKEY_MANTISSA_BITS) - 1)
 
 struct bkey_float {
-	unsigned	exponent:BKEY_EXPONENT_BITS;
-	unsigned	m:BKEY_MID_BITS;
-	unsigned	mantissa:BKEY_MANTISSA_BITS;
+	unsigned int	exponent:BKEY_EXPONENT_BITS;
+	unsigned int	m:BKEY_MID_BITS;
+	unsigned int	mantissa:BKEY_MANTISSA_BITS;
 } __packed;
 
 /*
@@ -311,7 +311,7 @@ void bch_btree_keys_free(struct btree_keys *b)
 }
 EXPORT_SYMBOL(bch_btree_keys_free);
 
-int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp)
+int bch_btree_keys_alloc(struct btree_keys *b, unsigned int page_order, gfp_t gfp)
 {
 	struct bset_tree *t = b->set;
 
@@ -345,7 +345,7 @@ EXPORT_SYMBOL(bch_btree_keys_alloc);
 void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
 			 bool *expensive_debug_checks)
 {
-	unsigned i;
+	unsigned int i;
 
 	b->ops = ops;
 	b->expensive_debug_checks = expensive_debug_checks;
@@ -370,7 +370,7 @@ EXPORT_SYMBOL(bch_btree_keys_init);
  * return array index next to j when does in-order traverse
  * of a binary tree which is stored in a linear array
  */
-static unsigned inorder_next(unsigned j, unsigned size)
+static unsigned int inorder_next(unsigned int j, unsigned int size)
 {
 	if (j * 2 + 1 < size) {
 		j = j * 2 + 1;
@@ -387,7 +387,7 @@ static unsigned inorder_next(unsigned j, unsigned size)
  * return array index previous to j when does in-order traverse
  * of a binary tree which is stored in a linear array
  */
-static unsigned inorder_prev(unsigned j, unsigned size)
+static unsigned int inorder_prev(unsigned int j, unsigned int size)
 {
 	if (j * 2 < size) {
 		j = j * 2;
@@ -413,10 +413,12 @@ static unsigned inorder_prev(unsigned j, unsigned size)
  * extra is a function of size:
  *   extra = (size - rounddown_pow_of_two(size - 1)) << 1;
  */
-static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra)
+static unsigned int __to_inorder(unsigned int j,
+				  unsigned int size,
+				  unsigned int extra)
 {
-	unsigned b = fls(j);
-	unsigned shift = fls(size - 1) - b;
+	unsigned int b = fls(j);
+	unsigned int shift = fls(size - 1) - b;
 
 	j  ^= 1U << (b - 1);
 	j <<= 1;
@@ -433,14 +435,16 @@ static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra)
  * Return the cacheline index in bset_tree->data, where j is index
  * from a linear array which stores the auxiliar binary tree
  */
-static unsigned to_inorder(unsigned j, struct bset_tree *t)
+static unsigned int to_inorder(unsigned int j, struct bset_tree *t)
 {
 	return __to_inorder(j, t->size, t->extra);
 }
 
-static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra)
+static unsigned int __inorder_to_tree(unsigned int j,
+				      unsigned int size,
+				      unsigned int extra)
 {
-	unsigned shift;
+	unsigned int shift;
 
 	if (j > extra)
 		j += j - extra;
@@ -457,7 +461,7 @@ static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra)
  * Return an index from a linear array which stores the auxiliar binary
  * tree, j is the cacheline index of t->data.
  */
-static unsigned inorder_to_tree(unsigned j, struct bset_tree *t)
+static unsigned int inorder_to_tree(unsigned int j, struct bset_tree *t)
 {
 	return __inorder_to_tree(j, t->size, t->extra);
 }
@@ -468,11 +472,11 @@ void inorder_test(void)
 	unsigned long done = 0;
 	ktime_t start = ktime_get();
 
-	for (unsigned size = 2;
+	for (unsigned int size = 2;
 	     size < 65536000;
 	     size++) {
-		unsigned extra = (size - rounddown_pow_of_two(size - 1)) << 1;
-		unsigned i = 1, j = rounddown_pow_of_two(size - 1);
+		unsigned int extra = (size - rounddown_pow_of_two(size - 1)) << 1;
+		unsigned int i = 1, j = rounddown_pow_of_two(size - 1);
 
 		if (!(size % 4096))
 			printk(KERN_NOTICE "loop %u, %llu per us\n", size,
@@ -518,30 +522,31 @@ void inorder_test(void)
  * of the previous key so we can walk backwards to it from t->tree[j]'s key.
  */
 
-static struct bkey *cacheline_to_bkey(struct bset_tree *t, unsigned cacheline,
-				      unsigned offset)
+static struct bkey *cacheline_to_bkey(struct bset_tree *t,
+				      unsigned int cacheline,
+				      unsigned int offset)
 {
 	return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8;
 }
 
-static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
+static unsigned int bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
 {
 	return ((void *) k - (void *) t->data) / BSET_CACHELINE;
 }
 
-static unsigned bkey_to_cacheline_offset(struct bset_tree *t,
-					 unsigned cacheline,
+static unsigned int bkey_to_cacheline_offset(struct bset_tree *t,
+					 unsigned int cacheline,
 					 struct bkey *k)
 {
 	return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
 }
 
-static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j)
+static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned int j)
 {
 	return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m);
 }
 
-static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
+static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned int j)
 {
 	return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]);
 }
@@ -550,7 +555,7 @@ static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
  * For the write set - the one we're currently inserting keys into - we don't
  * maintain a full search tree, we just keep a simple lookup table in t->prev.
  */
-static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline)
+static struct bkey *table_to_bkey(struct bset_tree *t, unsigned int cacheline)
 {
 	return cacheline_to_bkey(t, cacheline, t->prev[cacheline]);
 }
@@ -576,14 +581,14 @@ static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
  * See make_bfloat() to check when most significant bit of f->exponent
  * is set or not.
  */
-static inline unsigned bfloat_mantissa(const struct bkey *k,
+static inline unsigned int bfloat_mantissa(const struct bkey *k,
 				       struct bkey_float *f)
 {
 	const uint64_t *p = &k->low - (f->exponent >> 6);
 	return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK;
 }
 
-static void make_bfloat(struct bset_tree *t, unsigned j)
+static void make_bfloat(struct bset_tree *t, unsigned int j)
 {
 	struct bkey_float *f = &t->tree[j];
 	struct bkey *m = tree_to_bkey(t, j);
@@ -631,7 +636,7 @@ static void make_bfloat(struct bset_tree *t, unsigned j)
 static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
 {
 	if (t != b->set) {
-		unsigned j = roundup(t[-1].size,
+		unsigned int j = roundup(t[-1].size,
 				     64 / sizeof(struct bkey_float));
 
 		t->tree = t[-1].tree + j;
@@ -686,13 +691,13 @@ void bch_bset_build_written_tree(struct btree_keys *b)
 {
 	struct bset_tree *t = bset_tree_last(b);
 	struct bkey *prev = NULL, *k = t->data->start;
-	unsigned j, cacheline = 1;
+	unsigned int j, cacheline = 1;
 
 	b->last_set_unwritten = 0;
 
 	bset_alloc_tree(b, t);
 
-	t->size = min_t(unsigned,
+	t->size = min_t(unsigned int,
 			bkey_to_cacheline(t, bset_bkey_last(t->data)),
 			b->set->tree + btree_keys_cachelines(b) - t->tree);
 
@@ -732,7 +737,7 @@ EXPORT_SYMBOL(bch_bset_build_written_tree);
 void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
 {
 	struct bset_tree *t;
-	unsigned inorder, j = 1;
+	unsigned int inorder, j = 1;
 
 	for (t = b->set; t <= bset_tree_last(b); t++)
 		if (k < bset_bkey_last(t->data))
@@ -779,8 +784,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
 				      struct bset_tree *t,
 				      struct bkey *k)
 {
-	unsigned shift = bkey_u64s(k);
-	unsigned j = bkey_to_cacheline(t, k);
+	unsigned int shift = bkey_u64s(k);
+	unsigned int j = bkey_to_cacheline(t, k);
 
 	/* We're getting called from btree_split() or btree_gc, just bail out */
 	if (!t->size)
@@ -867,10 +872,10 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where,
 }
 EXPORT_SYMBOL(bch_bset_insert);
 
-unsigned bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
+unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
 			      struct bkey *replace_key)
 {
-	unsigned status = BTREE_INSERT_STATUS_NO_INSERT;
+	unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
 	struct bset *i = bset_tree_last(b)->data;
 	struct bkey *m, *prev = NULL;
 	struct btree_iter iter;
@@ -922,10 +927,10 @@ struct bset_search_iter {
 static struct bset_search_iter bset_search_write_set(struct bset_tree *t,
 						     const struct bkey *search)
 {
-	unsigned li = 0, ri = t->size;
+	unsigned int li = 0, ri = t->size;
 
 	while (li + 1 != ri) {
-		unsigned m = (li + ri) >> 1;
+		unsigned int m = (li + ri) >> 1;
 
 		if (bkey_cmp(table_to_bkey(t, m), search) > 0)
 			ri = m;
@@ -944,7 +949,7 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
 {
 	struct bkey *l, *r;
 	struct bkey_float *f;
-	unsigned inorder, j, n = 1;
+	unsigned int inorder, j, n = 1;
 
 	do {
 		/*
@@ -958,7 +963,7 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
 		 *		p = 0;
 		 * but a branch instruction is avoided.
 		 */
-		unsigned p = n << 4;
+		unsigned int p = n << 4;
 		p &= ((int) (p - t->size)) >> 31;
 
 		prefetch(&t->tree[p]);
@@ -978,7 +983,7 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
 		 * to work  - that's done in make_bfloat()
 		 */
 		if (likely(f->exponent != 127))
-			n = j * 2 + (((unsigned)
+			n = j * 2 + (((unsigned int)
 				      (f->mantissa -
 				       bfloat_mantissa(search, f))) >> 31);
 		else
@@ -1184,7 +1189,8 @@ void bch_bset_sort_state_free(struct bset_sort_state *state)
 	mempool_exit(&state->pool);
 }
 
-int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
+int bch_bset_sort_state_init(struct bset_sort_state *state,
+			     unsigned int page_order)
 {
 	spin_lock_init(&state->time.lock);
 
@@ -1237,7 +1243,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
 }
 
 static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
-			 unsigned start, unsigned order, bool fixup,
+			 unsigned int start, unsigned int order, bool fixup,
 			 struct bset_sort_state *state)
 {
 	uint64_t start_time;
@@ -1288,7 +1294,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
 		bch_time_stats_update(&state->time, start_time);
 }
 
-void bch_btree_sort_partial(struct btree_keys *b, unsigned start,
+void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
 			    struct bset_sort_state *state)
 {
 	size_t order = b->page_order, keys = 0;
@@ -1298,7 +1304,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned start,
 	__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
 
 	if (start) {
-		unsigned i;
+		unsigned int i;
 
 		for (i = start; i <= b->nsets; i++)
 			keys += b->set[i].data->keys;
@@ -1338,7 +1344,7 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
 
 void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
 {
-	unsigned crit = SORT_CRIT;
+	unsigned int crit = SORT_CRIT;
 	int i;
 
 	/* Don't sort if nothing to do */
@@ -1367,7 +1373,7 @@ EXPORT_SYMBOL(bch_btree_sort_lazy);
 
 void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
 {
-	unsigned i;
+	unsigned int i;
 
 	for (i = 0; i <= b->nsets; i++) {
 		struct bset_tree *t = &b->set[i];
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index b867f2200495..fdc296103113 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -163,10 +163,10 @@ struct bset_tree {
 	 */
 
 	/* size of the binary tree and prev array */
-	unsigned		size;
+	unsigned int		size;
 
 	/* function of size - precalculated for to_inorder() */
-	unsigned		extra;
+	unsigned int		extra;
 
 	/* copy of the last key in the set */
 	struct bkey		end;
@@ -211,7 +211,7 @@ struct btree_keys {
 	const struct btree_keys_ops	*ops;
 	uint8_t			page_order;
 	uint8_t			nsets;
-	unsigned		last_set_unwritten:1;
+	unsigned int		last_set_unwritten:1;
 	bool			*expensive_debug_checks;
 
 	/*
@@ -239,12 +239,12 @@ static inline bool bkey_written(struct btree_keys *b, struct bkey *k)
 	return !b->last_set_unwritten || k < b->set[b->nsets].data->start;
 }
 
-static inline unsigned bset_byte_offset(struct btree_keys *b, struct bset *i)
+static inline unsigned int bset_byte_offset(struct btree_keys *b, struct bset *i)
 {
 	return ((size_t) i) - ((size_t) b->set->data);
 }
 
-static inline unsigned bset_sector_offset(struct btree_keys *b, struct bset *i)
+static inline unsigned int bset_sector_offset(struct btree_keys *b, struct bset *i)
 {
 	return bset_byte_offset(b, i) >> 9;
 }
@@ -273,7 +273,7 @@ static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b)
 }
 
 static inline struct bset *bset_next_set(struct btree_keys *b,
-					 unsigned block_bytes)
+					 unsigned int block_bytes)
 {
 	struct bset *i = bset_tree_last(b)->data;
 
@@ -281,7 +281,7 @@ static inline struct bset *bset_next_set(struct btree_keys *b,
 }
 
 void bch_btree_keys_free(struct btree_keys *);
-int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t);
+int bch_btree_keys_alloc(struct btree_keys *, unsigned int, gfp_t);
 void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
 			 bool *);
 
@@ -290,7 +290,7 @@ void bch_bset_build_written_tree(struct btree_keys *);
 void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *);
 bool bch_bkey_try_merge(struct btree_keys *, struct bkey *, struct bkey *);
 void bch_bset_insert(struct btree_keys *, struct bkey *, struct bkey *);
-unsigned bch_btree_insert_key(struct btree_keys *, struct bkey *,
+unsigned int bch_btree_insert_key(struct btree_keys *, struct bkey *,
 			      struct bkey *);
 
 enum {
@@ -349,20 +349,20 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
 struct bset_sort_state {
 	mempool_t		pool;
 
-	unsigned		page_order;
-	unsigned		crit_factor;
+	unsigned int		page_order;
+	unsigned int		crit_factor;
 
 	struct time_stats	time;
 };
 
 void bch_bset_sort_state_free(struct bset_sort_state *);
-int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
+int bch_bset_sort_state_init(struct bset_sort_state *, unsigned int);
 void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *);
 void bch_btree_sort_into(struct btree_keys *, struct btree_keys *,
 			 struct bset_sort_state *);
 void bch_btree_sort_and_fix_extents(struct btree_keys *, struct btree_iter *,
 				    struct bset_sort_state *);
-void bch_btree_sort_partial(struct btree_keys *, unsigned,
+void bch_btree_sort_partial(struct btree_keys *, unsigned int,
 			    struct bset_sort_state *);
 
 static inline void bch_btree_sort(struct btree_keys *b,
@@ -383,7 +383,7 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
 
 #define bset_bkey_last(i)	bkey_idx((struct bkey *) (i)->d, (i)->keys)
 
-static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
+static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx)
 {
 	return bkey_idx(i->start, idx);
 }
@@ -402,7 +402,7 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l,
 }
 
 void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
-			      unsigned);
+			      unsigned int);
 bool __bch_cut_front(const struct bkey *, struct bkey *);
 bool __bch_cut_back(const struct bkey *, struct bkey *);
 
@@ -524,7 +524,7 @@ static inline size_t bch_keylist_bytes(struct keylist *l)
 
 struct bkey *bch_keylist_pop(struct keylist *);
 void bch_keylist_pop_front(struct keylist *);
-int __bch_keylist_realloc(struct keylist *, unsigned);
+int __bch_keylist_realloc(struct keylist *, unsigned int);
 
 /* Debug stuff */
 
@@ -532,7 +532,7 @@ int __bch_keylist_realloc(struct keylist *, unsigned);
 
 int __bch_count_data(struct btree_keys *);
 void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
-void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
+void bch_dump_bset(struct btree_keys *, struct bset *, unsigned int);
 void bch_dump_bucket(struct btree_keys *);
 
 #else
@@ -541,7 +541,7 @@ static inline int __bch_count_data(struct btree_keys *b) { return -1; }
 static inline void __printf(2, 3)
 	__bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
 static inline void bch_dump_bucket(struct btree_keys *b) {}
-void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
+void bch_dump_bset(struct btree_keys *, struct bset *, unsigned int);
 
 #endif
 
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index c19f7716df88..96c39a8db895 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -183,7 +183,7 @@ static void bch_btree_init_next(struct btree *b)
 
 void bkey_put(struct cache_set *c, struct bkey *k)
 {
-	unsigned i;
+	unsigned int i;
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		if (ptr_available(c, k, i))
@@ -479,7 +479,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)
 
 void bch_btree_node_write(struct btree *b, struct closure *parent)
 {
-	unsigned nsets = b->keys.nsets;
+	unsigned int nsets = b->keys.nsets;
 
 	lockdep_assert_held(&b->lock);
 
@@ -581,7 +581,7 @@ static void mca_bucket_free(struct btree *b)
 	list_move(&b->list, &b->c->btree_cache_freeable);
 }
 
-static unsigned btree_order(struct bkey *k)
+static unsigned int btree_order(struct bkey *k)
 {
 	return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1);
 }
@@ -589,7 +589,7 @@ static unsigned btree_order(struct bkey *k)
 static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
 {
 	if (!bch_btree_keys_alloc(&b->keys,
-				  max_t(unsigned,
+				  max_t(unsigned int,
 					ilog2(b->c->btree_pages),
 					btree_order(k)),
 				  gfp)) {
@@ -620,7 +620,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
 	return b;
 }
 
-static int mca_reap(struct btree *b, unsigned min_order, bool flush)
+static int mca_reap(struct btree *b, unsigned int min_order, bool flush)
 {
 	struct closure cl;
 
@@ -786,7 +786,7 @@ void bch_btree_cache_free(struct cache_set *c)
 
 int bch_btree_cache_alloc(struct cache_set *c)
 {
-	unsigned i;
+	unsigned int i;
 
 	for (i = 0; i < mca_reserve(c); i++)
 		if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL))
@@ -1136,7 +1136,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
 
 static void make_btree_freeing_key(struct btree *b, struct bkey *k)
 {
-	unsigned i;
+	unsigned int i;
 
 	mutex_lock(&b->c->bucket_lock);
 
@@ -1157,7 +1157,7 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op)
 {
 	struct cache_set *c = b->c;
 	struct cache *ca;
-	unsigned i, reserve = (c->root->level - b->level) * 2 + 1;
+	unsigned int i, reserve = (c->root->level - b->level) * 2 + 1;
 
 	mutex_lock(&c->bucket_lock);
 
@@ -1181,7 +1181,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
 				    struct bkey *k)
 {
 	uint8_t stale = 0;
-	unsigned i;
+	unsigned int i;
 	struct bucket *g;
 
 	/*
@@ -1219,7 +1219,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
 			SET_GC_MARK(g, GC_MARK_RECLAIMABLE);
 
 		/* guard against overflow */
-		SET_GC_SECTORS_USED(g, min_t(unsigned,
+		SET_GC_SECTORS_USED(g, min_t(unsigned int,
 					     GC_SECTORS_USED(g) + KEY_SIZE(k),
 					     MAX_GC_SECTORS_USED));
 
@@ -1233,7 +1233,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
 
 void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
 {
-	unsigned i;
+	unsigned int i;
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		if (ptr_available(c, k, i) &&
@@ -1259,7 +1259,7 @@ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats)
 static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
 {
 	uint8_t stale = 0;
-	unsigned keys = 0, good_keys = 0;
+	unsigned int keys = 0, good_keys = 0;
 	struct bkey *k;
 	struct btree_iter iter;
 	struct bset_tree *t;
@@ -1302,7 +1302,7 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
 
 struct gc_merge_info {
 	struct btree	*b;
-	unsigned	keys;
+	unsigned int	keys;
 };
 
 static int bch_btree_insert_node(struct btree *, struct btree_op *,
@@ -1311,7 +1311,7 @@ static int bch_btree_insert_node(struct btree *, struct btree_op *,
 static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
 			     struct gc_stat *gc, struct gc_merge_info *r)
 {
-	unsigned i, nodes = 0, keys = 0, blocks;
+	unsigned int i, nodes = 0, keys = 0, blocks;
 	struct btree *new_nodes[GC_MERGE_NODES];
 	struct keylist keylist;
 	struct closure cl;
@@ -1511,11 +1511,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
 	return -EINTR;
 }
 
-static unsigned btree_gc_count_keys(struct btree *b)
+static unsigned int btree_gc_count_keys(struct btree *b)
 {
 	struct bkey *k;
 	struct btree_iter iter;
-	unsigned ret = 0;
+	unsigned int ret = 0;
 
 	for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
 		ret += bkey_u64s(k);
@@ -1678,7 +1678,7 @@ static void btree_gc_start(struct cache_set *c)
 {
 	struct cache *ca;
 	struct bucket *b;
-	unsigned i;
+	unsigned int i;
 
 	if (!c->gc_mark_valid)
 		return;
@@ -1704,7 +1704,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
 {
 	struct bucket *b;
 	struct cache *ca;
-	unsigned i;
+	unsigned int i;
 
 	mutex_lock(&c->bucket_lock);
 
@@ -1722,7 +1722,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
 		struct bcache_device *d = c->devices[i];
 		struct cached_dev *dc;
 		struct keybuf_key *w, *n;
-		unsigned j;
+		unsigned int j;
 
 		if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
 			continue;
@@ -1814,7 +1814,7 @@ static void bch_btree_gc(struct cache_set *c)
 static bool gc_should_run(struct cache_set *c)
 {
 	struct cache *ca;
-	unsigned i;
+	unsigned int i;
 
 	for_each_cache(ca, c, i)
 		if (ca->invalidate_needs_gc)
@@ -1905,7 +1905,7 @@ void bch_initial_gc_finish(struct cache_set *c)
 {
 	struct cache *ca;
 	struct bucket *b;
-	unsigned i;
+	unsigned int i;
 
 	bch_btree_gc_finish(c);
 
@@ -1945,7 +1945,7 @@ void bch_initial_gc_finish(struct cache_set *c)
 static bool btree_insert_key(struct btree *b, struct bkey *k,
 			     struct bkey *replace_key)
 {
-	unsigned status;
+	unsigned int status;
 
 	BUG_ON(bkey_cmp(k, &b->key) > 0);
 
@@ -2044,7 +2044,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
 			   block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
 
 	if (split) {
-		unsigned keys = 0;
+		unsigned int keys = 0;
 
 		trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
 
@@ -2300,7 +2300,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
 
 void bch_btree_set_root(struct btree *b)
 {
-	unsigned i;
+	unsigned int i;
 	struct closure cl;
 
 	closure_init_stack(&cl);
@@ -2412,7 +2412,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
 
 struct refill {
 	struct btree_op	op;
-	unsigned	nr_found;
+	unsigned int	nr_found;
 	struct keybuf	*buf;
 	struct bkey	*end;
 	keybuf_pred_fn	*pred;
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 68e9d926134d..8cbc0fd27738 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -184,7 +184,7 @@ static inline struct bset *btree_bset_last(struct btree *b)
 	return bset_tree_last(&b->keys)->data;
 }
 
-static inline unsigned bset_block_offset(struct btree *b, struct bset *i)
+static inline unsigned int bset_block_offset(struct btree *b, struct bset *i)
 {
 	return bset_sector_offset(&b->keys, i) >> b->c->block_bits;
 }
@@ -213,7 +213,7 @@ struct btree_op {
 	/* Btree level at which we start taking write locks */
 	short			lock;
 
-	unsigned		insert_collision:1;
+	unsigned int		insert_collision:1;
 };
 
 static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 7c2c5bc7c88b..7f3594c0be14 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -159,7 +159,7 @@ struct closure {
 #define CLOSURE_MAGIC_DEAD	0xc054dead
 #define CLOSURE_MAGIC_ALIVE	0xc054a11e
 
-	unsigned		magic;
+	unsigned int		magic;
 	struct list_head	all;
 	unsigned long		ip;
 	unsigned long		waiting_on;
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 12034c07257b..0caad145902b 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -69,7 +69,7 @@ void bch_btree_verify(struct btree *b)
 		   sorted->start,
 		   (void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) {
 		struct bset *i;
-		unsigned j;
+		unsigned int j;
 
 		console_lock();
 
@@ -80,7 +80,7 @@ void bch_btree_verify(struct btree *b)
 		bch_dump_bset(&v->keys, sorted, 0);
 
 		for_each_written_bset(b, ondisk, i) {
-			unsigned block = ((void *) i - (void *) ondisk) /
+			unsigned int block = ((void *) i - (void *) ondisk) /
 				block_bytes(b->c);
 
 			printk(KERN_ERR "*** on disk block %u:\n", block);
@@ -176,7 +176,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
 
 	while (size) {
 		struct keybuf_key *w;
-		unsigned bytes = min(i->bytes, size);
+		unsigned int bytes = min(i->bytes, size);
 
 		int err = copy_to_user(buf, i->buf, bytes);
 		if (err)
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 1d096742eb41..e96ba928eeb6 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -46,7 +46,7 @@ static bool bch_key_sort_cmp(struct btree_iter_set l,
 
 static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
 {
-	unsigned i;
+	unsigned int i;
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		if (ptr_available(c, k, i)) {
@@ -67,7 +67,7 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
 
 static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
 {
-	unsigned i;
+	unsigned int i;
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		if (ptr_available(c, k, i)) {
@@ -96,7 +96,7 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
 
 void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
 {
-	unsigned i = 0;
+	unsigned int i = 0;
 	char *out = buf, *end = buf + size;
 
 #define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
@@ -126,7 +126,7 @@ void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
 static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
 {
 	struct btree *b = container_of(keys, struct btree, keys);
-	unsigned j;
+	unsigned int j;
 	char buf[80];
 
 	bch_extent_to_text(buf, sizeof(buf), k);
@@ -171,7 +171,7 @@ static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
 
 static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
 {
-	unsigned i;
+	unsigned int i;
 	char buf[80];
 	struct bucket *g;
 
@@ -204,7 +204,7 @@ err:
 static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
 {
 	struct btree *b = container_of(bk, struct btree, keys);
-	unsigned i;
+	unsigned int i;
 
 	if (!bkey_cmp(k, &ZERO_KEY) ||
 	    !KEY_PTRS(k) ||
@@ -327,7 +327,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
 	struct cache_set *c = container_of(b, struct btree, keys)->c;
 
 	uint64_t old_offset;
-	unsigned old_size, sectors_found = 0;
+	unsigned int old_size, sectors_found = 0;
 
 	BUG_ON(!KEY_OFFSET(insert));
 	BUG_ON(!KEY_SIZE(insert));
@@ -363,7 +363,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
 			 * k might have been split since we inserted/found the
 			 * key we're replacing
 			 */
-			unsigned i;
+			unsigned int i;
 			uint64_t offset = KEY_START(k) -
 				KEY_START(replace_key);
 
@@ -502,7 +502,7 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
 }
 
 static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
-				     unsigned ptr)
+				     unsigned int ptr)
 {
 	struct bucket *g = PTR_BUCKET(b->c, k, ptr);
 	char buf[80];
@@ -534,7 +534,7 @@ err:
 static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
 {
 	struct btree *b = container_of(bk, struct btree, keys);
-	unsigned i, stale;
+	unsigned int i, stale;
 
 	if (!KEY_PTRS(k) ||
 	    bch_extent_invalid(bk, k))
@@ -577,7 +577,7 @@ static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
 static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey *r)
 {
 	struct btree *b = container_of(bk, struct btree, keys);
-	unsigned i;
+	unsigned int i;
 
 	if (key_merging_disabled(b->c))
 		return false;
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 9612873afee2..c6b41a09f550 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -42,7 +42,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
 }
 
 void bch_submit_bbio(struct bio *bio, struct cache_set *c,
-		     struct bkey *k, unsigned ptr)
+		     struct bkey *k, unsigned int ptr)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
 	bch_bkey_copy_single_ptr(&b->key, k, ptr);
@@ -52,7 +52,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 /* IO errors */
 void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
 {
-	unsigned errors;
+	unsigned int errors;
 
 	WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
 
@@ -75,12 +75,12 @@ void bch_count_io_errors(struct cache *ca,
 	 */
 
 	if (ca->set->error_decay) {
-		unsigned count = atomic_inc_return(&ca->io_count);
+		unsigned int count = atomic_inc_return(&ca->io_count);
 
 		while (count > ca->set->error_decay) {
-			unsigned errors;
-			unsigned old = count;
-			unsigned new = count - ca->set->error_decay;
+			unsigned int errors;
+			unsigned int old = count;
+			unsigned int new = count - ca->set->error_decay;
 
 			/*
 			 * First we subtract refresh from count; each time we
@@ -104,7 +104,7 @@ void bch_count_io_errors(struct cache *ca,
 	}
 
 	if (error) {
-		unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
+		unsigned int errors = atomic_add_return(1 << IO_ERROR_SHIFT,
 						    &ca->io_errors);
 		errors >>= IO_ERROR_SHIFT;
 
@@ -126,12 +126,12 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 	struct cache *ca = PTR_CACHE(c, &b->key, 0);
 	int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
 
-	unsigned threshold = op_is_write(bio_op(bio))
+	unsigned int threshold = op_is_write(bio_op(bio))
 		? c->congested_write_threshold_us
 		: c->congested_read_threshold_us;
 
 	if (threshold) {
-		unsigned t = local_clock_us();
+		unsigned int t = local_clock_us();
 
 		int us = t - b->submit_time_us;
 		int congested = atomic_read(&c->congested);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 10748c626a1d..ee61062b58fc 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -32,7 +32,7 @@ static void journal_read_endio(struct bio *bio)
 }
 
 static int journal_read_bucket(struct cache *ca, struct list_head *list,
-			       unsigned bucket_index)
+			       unsigned int bucket_index)
 {
 	struct journal_device *ja = &ca->journal;
 	struct bio *bio = &ja->bio;
@@ -40,7 +40,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
 	struct journal_replay *i;
 	struct jset *j, *data = ca->set->journal.w[0].data;
 	struct closure cl;
-	unsigned len, left, offset = 0;
+	unsigned int len, left, offset = 0;
 	int ret = 0;
 	sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]);
 
@@ -50,7 +50,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
 
 	while (offset < ca->sb.bucket_size) {
 reread:		left = ca->sb.bucket_size - offset;
-		len = min_t(unsigned, left, PAGE_SECTORS << JSET_BITS);
+		len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
 
 		bio_reset(bio);
 		bio->bi_iter.bi_sector	= bucket + offset;
@@ -154,12 +154,12 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
 	})
 
 	struct cache *ca;
-	unsigned iter;
+	unsigned int iter;
 
 	for_each_cache(ca, c, iter) {
 		struct journal_device *ja = &ca->journal;
 		DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS);
-		unsigned i, l, r, m;
+		unsigned int i, l, r, m;
 		uint64_t seq;
 
 		bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
@@ -304,7 +304,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
 		     k < bset_bkey_last(&i->j);
 		     k = bkey_next(k))
 			if (!__bch_extent_invalid(c, k)) {
-				unsigned j;
+				unsigned int j;
 
 				for (j = 0; j < KEY_PTRS(k); j++)
 					if (ptr_available(c, k, j))
@@ -492,7 +492,7 @@ static void journal_reclaim(struct cache_set *c)
 	struct bkey *k = &c->journal.key;
 	struct cache *ca;
 	uint64_t last_seq;
-	unsigned iter, n = 0;
+	unsigned int iter, n = 0;
 	atomic_t p __maybe_unused;
 
 	atomic_long_inc(&c->reclaim);
@@ -526,7 +526,7 @@ static void journal_reclaim(struct cache_set *c)
 
 	for_each_cache(ca, c, iter) {
 		struct journal_device *ja = &ca->journal;
-		unsigned next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
+		unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
 
 		/* No space available on this device */
 		if (next == ja->discard_idx)
@@ -609,7 +609,7 @@ static void journal_write_unlocked(struct closure *cl)
 	struct cache *ca;
 	struct journal_write *w = c->journal.cur;
 	struct bkey *k = &c->journal.key;
-	unsigned i, sectors = set_blocks(w->data, block_bytes(c)) *
+	unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) *
 		c->sb.block_size;
 
 	struct bio *bio;
@@ -705,7 +705,7 @@ static void journal_try_write(struct cache_set *c)
 }
 
 static struct journal_write *journal_wait_for_write(struct cache_set *c,
-						    unsigned nkeys)
+						    unsigned int nkeys)
 	__acquires(&c->journal.lock)
 {
 	size_t sectors;
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index b5788199188f..f0982731ae20 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -110,7 +110,7 @@ struct journal {
 	struct delayed_work	work;
 
 	/* Number of blocks free in the bucket(s) we're currently writing to */
-	unsigned		blocks_free;
+	unsigned int		blocks_free;
 	uint64_t		seq;
 	DECLARE_FIFO(atomic_t, pin);
 
@@ -131,13 +131,13 @@ struct journal_device {
 	uint64_t		seq[SB_JOURNAL_BUCKETS];
 
 	/* Journal bucket we're currently writing to */
-	unsigned		cur_idx;
+	unsigned int		cur_idx;
 
 	/* Last journal bucket that still contains an open journal entry */
-	unsigned		last_idx;
+	unsigned int		last_idx;
 
 	/* Next journal bucket to be discarded */
-	unsigned		discard_idx;
+	unsigned int		discard_idx;
 
 #define DISCARD_READY		0
 #define DISCARD_IN_FLIGHT	1
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index a24c3a95b2c0..0790d710f911 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -23,7 +23,7 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
 {
 	struct cache_set *c = container_of(buf, struct cache_set,
 					   moving_gc_keys);
-	unsigned i;
+	unsigned int i;
 
 	for (i = 0; i < KEY_PTRS(k); i++)
 		if (ptr_available(c, k, i) &&
@@ -186,7 +186,7 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r)
 	return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
 }
 
-static unsigned bucket_heap_top(struct cache *ca)
+static unsigned int bucket_heap_top(struct cache *ca)
 {
 	struct bucket *b;
 	return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
@@ -196,7 +196,7 @@ void bch_moving_gc(struct cache_set *c)
 {
 	struct cache *ca;
 	struct bucket *b;
-	unsigned i;
+	unsigned int i;
 
 	if (!c->copy_gc_enabled)
 		return;
@@ -204,9 +204,9 @@ void bch_moving_gc(struct cache_set *c)
 	mutex_lock(&c->bucket_lock);
 
 	for_each_cache(ca, c, i) {
-		unsigned sectors_to_move = 0;
-		unsigned reserve_sectors = ca->sb.bucket_size *
-			fifo_used(&ca->free[RESERVE_MOVINGGC]);
+		unsigned int sectors_to_move = 0;
+		unsigned int reserve_sectors = ca->sb.bucket_size *
+			     fifo_used(&ca->free[RESERVE_MOVINGGC]);
 
 		ca->heap.used = 0;
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 7dbe8b6316a0..6e1a60dd1742 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -27,7 +27,7 @@ struct kmem_cache *bch_search_cache;
 
 static void bch_data_insert_start(struct closure *);
 
-static unsigned cache_mode(struct cached_dev *dc)
+static unsigned int cache_mode(struct cached_dev *dc)
 {
 	return BDEV_CACHE_MODE(&dc->sb);
 }
@@ -98,7 +98,7 @@ static void bch_data_insert_keys(struct closure *cl)
 	closure_return(cl);
 }
 
-static int bch_keylist_realloc(struct keylist *l, unsigned u64s,
+static int bch_keylist_realloc(struct keylist *l, unsigned int u64s,
 			       struct cache_set *c)
 {
 	size_t oldsize = bch_keylist_nkeys(l);
@@ -125,7 +125,7 @@ static void bch_data_invalidate(struct closure *cl)
 		 bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
 
 	while (bio_sectors(bio)) {
-		unsigned sectors = min(bio_sectors(bio),
+		unsigned int sectors = min(bio_sectors(bio),
 				       1U << (KEY_SIZE_BITS - 1));
 
 		if (bch_keylist_realloc(&op->insert_keys, 2, op->c))
@@ -211,7 +211,7 @@ static void bch_data_insert_start(struct closure *cl)
 	bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA);
 
 	do {
-		unsigned i;
+		unsigned int i;
 		struct bkey *k;
 		struct bio_set *split = &op->c->bio_split;
 
@@ -328,7 +328,7 @@ void bch_data_insert(struct closure *cl)
 
 /* Congested? */
 
-unsigned bch_get_congested(struct cache_set *c)
+unsigned int bch_get_congested(struct cache_set *c)
 {
 	int i;
 	long rand;
@@ -372,8 +372,8 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
 static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 {
 	struct cache_set *c = dc->disk.c;
-	unsigned mode = cache_mode(dc);
-	unsigned sectors, congested = bch_get_congested(c);
+	unsigned int mode = cache_mode(dc);
+	unsigned int sectors, congested = bch_get_congested(c);
 	struct task_struct *task = current;
 	struct io *i;
 
@@ -469,11 +469,11 @@ struct search {
 	struct bio		*cache_miss;
 	struct bcache_device	*d;
 
-	unsigned		insert_bio_sectors;
-	unsigned		recoverable:1;
-	unsigned		write:1;
-	unsigned		read_dirty_data:1;
-	unsigned		cache_missed:1;
+	unsigned int		insert_bio_sectors;
+	unsigned int		recoverable:1;
+	unsigned int		write:1;
+	unsigned int		read_dirty_data:1;
+	unsigned int		cache_missed:1;
 
 	unsigned long		start_time;
 
@@ -514,15 +514,15 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
 	struct search *s = container_of(op, struct search, op);
 	struct bio *n, *bio = &s->bio.bio;
 	struct bkey *bio_key;
-	unsigned ptr;
+	unsigned int ptr;
 
 	if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
 		return MAP_CONTINUE;
 
 	if (KEY_INODE(k) != s->iop.inode ||
 	    KEY_START(k) > bio->bi_iter.bi_sector) {
-		unsigned bio_sectors = bio_sectors(bio);
-		unsigned sectors = KEY_INODE(k) == s->iop.inode
+		unsigned int bio_sectors = bio_sectors(bio);
+		unsigned int sectors = KEY_INODE(k) == s->iop.inode
 			? min_t(uint64_t, INT_MAX,
 				KEY_START(k) - bio->bi_iter.bi_sector)
 			: INT_MAX;
@@ -856,10 +856,10 @@ static void cached_dev_read_done_bh(struct closure *cl)
 }
 
 static int cached_dev_cache_miss(struct btree *b, struct search *s,
-				 struct bio *bio, unsigned sectors)
+				 struct bio *bio, unsigned int sectors)
 {
 	int ret = MAP_CONTINUE;
-	unsigned reada = 0;
+	unsigned int reada = 0;
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 	struct bio *miss, *cache_bio;
 
@@ -1226,7 +1226,7 @@ static int cached_dev_congested(void *data, int bits)
 		return 1;
 
 	if (cached_dev_get(dc)) {
-		unsigned i;
+		unsigned int i;
 		struct cache *ca;
 
 		for_each_cache(ca, d->c, i) {
@@ -1253,9 +1253,9 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
 /* Flash backed devices */
 
 static int flash_dev_cache_miss(struct btree *b, struct search *s,
-				struct bio *bio, unsigned sectors)
+				struct bio *bio, unsigned int sectors)
 {
-	unsigned bytes = min(sectors, bio_sectors(bio)) << 9;
+	unsigned int bytes = min(sectors, bio_sectors(bio)) << 9;
 
 	swap(bio->bi_iter.bi_size, bytes);
 	zero_fill_bio(bio);
@@ -1338,7 +1338,7 @@ static int flash_dev_congested(void *data, int bits)
 	struct bcache_device *d = data;
 	struct request_queue *q;
 	struct cache *ca;
-	unsigned i;
+	unsigned int i;
 	int ret = 0;
 
 	for_each_cache(ca, d->c, i) {
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index dea0886b81c1..8e8c1ce00d9d 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -8,7 +8,7 @@ struct data_insert_op {
 	struct bio		*bio;
 	struct workqueue_struct *wq;
 
-	unsigned		inode;
+	unsigned int		inode;
 	uint16_t		write_point;
 	uint16_t		write_prio;
 	blk_status_t		status;
@@ -17,15 +17,15 @@ struct data_insert_op {
 		uint16_t	flags;
 
 	struct {
-		unsigned	bypass:1;
-		unsigned	writeback:1;
-		unsigned	flush_journal:1;
-		unsigned	csum:1;
+		unsigned int	bypass:1;
+		unsigned int	writeback:1;
+		unsigned int	flush_journal:1;
+		unsigned int	csum:1;
 
-		unsigned	replace:1;
-		unsigned	replace_collision:1;
+		unsigned int	replace:1;
+		unsigned int	replace_collision:1;
 
-		unsigned	insert_data_done:1;
+		unsigned int	insert_data_done:1;
 	};
 	};
 
@@ -33,7 +33,7 @@ struct data_insert_op {
 	BKEY_PADDED(replace_key);
 };
 
-unsigned bch_get_congested(struct cache_set *);
+unsigned int bch_get_congested(struct cache_set *);
 void bch_data_insert(struct closure *cl);
 
 void bch_cached_dev_request_init(struct cached_dev *dc);
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index be119326297b..2331a0d5aa28 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -33,11 +33,11 @@
  * stored left shifted by 16, and scaled back in the sysfs show() function.
  */
 
-static const unsigned DAY_RESCALE		= 288;
-static const unsigned HOUR_RESCALE		= 12;
-static const unsigned FIVE_MINUTE_RESCALE	= 1;
-static const unsigned accounting_delay		= (HZ * 300) / 22;
-static const unsigned accounting_weight		= 32;
+static const unsigned int DAY_RESCALE		= 288;
+static const unsigned int HOUR_RESCALE		= 12;
+static const unsigned int FIVE_MINUTE_RESCALE	= 1;
+static const unsigned int accounting_delay	= (HZ * 300) / 22;
+static const unsigned int accounting_weight	= 32;
 
 /* sysfs reading/writing */
 
@@ -152,7 +152,7 @@ static void scale_accounting(struct timer_list *t)
 	struct cache_accounting *acc = from_timer(acc, t, timer);
 
 #define move_stat(name) do {						\
-	unsigned t = atomic_xchg(&acc->collector.name, 0);		\
+	unsigned int t = atomic_xchg(&acc->collector.name, 0);		\
 	t <<= 16;							\
 	acc->five_minute.name += t;					\
 	acc->hour.name += t;						\
diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h
index 0b70f9de0c03..77234a89dd69 100644
--- a/drivers/md/bcache/stats.h
+++ b/drivers/md/bcache/stats.h
@@ -23,7 +23,7 @@ struct cache_stats {
 	unsigned long cache_miss_collisions;
 	unsigned long sectors_bypassed;
 
-	unsigned		rescale;
+	unsigned int		rescale;
 };
 
 struct cache_accounting {
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 55a37641aa95..4ab1b1968d9a 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -61,7 +61,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
 	const char *err;
 	struct cache_sb *s;
 	struct buffer_head *bh = __bread(bdev, 1, SB_SIZE);
-	unsigned i;
+	unsigned int i;
 
 	if (!bh)
 		return "IO error";
@@ -202,7 +202,7 @@ static void write_bdev_super_endio(struct bio *bio)
 static void __write_super(struct cache_sb *sb, struct bio *bio)
 {
 	struct cache_sb *out = page_address(bio_first_page_all(bio));
-	unsigned i;
+	unsigned int i;
 
 	bio->bi_iter.bi_sector	= SB_SECTOR;
 	bio->bi_iter.bi_size	= SB_SIZE;
@@ -282,7 +282,7 @@ void bcache_write_super(struct cache_set *c)
 {
 	struct closure *cl = &c->sb_write;
 	struct cache *ca;
-	unsigned i;
+	unsigned int i;
 
 	down(&c->sb_write_mutex);
 	closure_init(cl, &c->cl);
@@ -334,7 +334,7 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
 {
 	struct closure *cl = &c->uuid_write;
 	struct uuid_entry *u;
-	unsigned i;
+	unsigned int i;
 	char buf[80];
 
 	BUG_ON(!parent);
@@ -587,7 +587,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
 	struct prio_set *p = ca->disk_buckets;
 	struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
 	struct bucket *b;
-	unsigned bucket_nr = 0;
+	unsigned int bucket_nr = 0;
 
 	for (b = ca->buckets;
 	     b < ca->buckets + ca->sb.nbuckets;
@@ -662,7 +662,7 @@ static void bcache_device_unlink(struct bcache_device *d)
 	lockdep_assert_held(&bch_register_lock);
 
 	if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
-		unsigned i;
+		unsigned int i;
 		struct cache *ca;
 
 		sysfs_remove_link(&d->c->kobj, d->name);
@@ -676,7 +676,7 @@ static void bcache_device_unlink(struct bcache_device *d)
 static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
 			       const char *name)
 {
-	unsigned i;
+	unsigned int i;
 	struct cache *ca;
 
 	for_each_cache(ca, d->c, i)
@@ -715,7 +715,7 @@ static void bcache_device_detach(struct bcache_device *d)
 }
 
 static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
-				 unsigned id)
+				 unsigned int id)
 {
 	d->id = id;
 	d->c = c;
@@ -762,7 +762,7 @@ static void bcache_device_free(struct bcache_device *d)
 	closure_debug_destroy(&d->cl);
 }
 
-static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
 			      sector_t sectors)
 {
 	struct request_queue *q;
@@ -778,7 +778,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
 
 	if (!d->nr_stripes || d->nr_stripes > max_stripes) {
 		pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
-			(unsigned)d->nr_stripes);
+			(unsigned int)d->nr_stripes);
 		return -ENOMEM;
 	}
 
@@ -1212,7 +1212,7 @@ static void cached_dev_flush(struct closure *cl)
 	continue_at(cl, cached_dev_free, system_wq);
 }
 
-static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
+static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
 {
 	int ret;
 	struct io *io;
@@ -1489,7 +1489,7 @@ static void cache_set_free(struct closure *cl)
 {
 	struct cache_set *c = container_of(cl, struct cache_set, cl);
 	struct cache *ca;
-	unsigned i;
+	unsigned int i;
 
 	if (!IS_ERR_OR_NULL(c->debug))
 		debugfs_remove(c->debug);
@@ -1532,7 +1532,7 @@ static void cache_set_flush(struct closure *cl)
 	struct cache_set *c = container_of(cl, struct cache_set, caching);
 	struct cache *ca;
 	struct btree *b;
-	unsigned i;
+	unsigned int i;
 
 	bch_cache_accounting_destroy(&c->accounting);
 
@@ -1762,7 +1762,7 @@ static void run_cache_set(struct cache_set *c)
 	struct cached_dev *dc, *t;
 	struct cache *ca;
 	struct closure cl;
-	unsigned i;
+	unsigned int i;
 
 	closure_init_stack(&cl);
 
@@ -1853,7 +1853,7 @@ static void run_cache_set(struct cache_set *c)
 		pr_notice("invalidating existing data");
 
 		for_each_cache(ca, c, i) {
-			unsigned j;
+			unsigned int j;
 
 			ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
 					      2, SB_JOURNAL_BUCKETS);
@@ -1998,7 +1998,7 @@ err:
 void bch_cache_release(struct kobject *kobj)
 {
 	struct cache *ca = container_of(kobj, struct cache, kobj);
-	unsigned i;
+	unsigned int i;
 
 	if (ca->set) {
 		BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
@@ -2150,7 +2150,7 @@ static bool bch_is_open_backing(struct block_device *bdev) {
 static bool bch_is_open_cache(struct block_device *bdev) {
 	struct cache_set *c, *tc;
 	struct cache *ca;
-	unsigned i;
+	unsigned int i;
 
 	list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
 		for_each_cache(ca, c, i)
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 81d3520b0702..3f2b7964d6a9 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -307,7 +307,7 @@ STORE(__cached_dev)
 		if (v < 0)
 			return v;
 
-		if ((unsigned) v != BDEV_CACHE_MODE(&dc->sb)) {
+		if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) {
 			SET_BDEV_CACHE_MODE(&dc->sb, v);
 			bch_write_bdev_super(dc, NULL);
 		}
@@ -533,9 +533,9 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
 			op.stats.floats, op.stats.failed);
 }
 
-static unsigned bch_root_usage(struct cache_set *c)
+static unsigned int bch_root_usage(struct cache_set *c)
 {
-	unsigned bytes = 0;
+	unsigned int bytes = 0;
 	struct bkey *k;
 	struct btree *b;
 	struct btree_iter iter;
@@ -570,9 +570,9 @@ static size_t bch_cache_size(struct cache_set *c)
 	return ret;
 }
 
-static unsigned bch_cache_max_chain(struct cache_set *c)
+static unsigned int bch_cache_max_chain(struct cache_set *c)
 {
-	unsigned ret = 0;
+	unsigned int ret = 0;
 	struct hlist_head *h;
 
 	mutex_lock(&c->bucket_lock);
@@ -580,7 +580,7 @@ static unsigned bch_cache_max_chain(struct cache_set *c)
 	for (h = c->bucket_hash;
 	     h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
 	     h++) {
-		unsigned i = 0;
+		unsigned int i = 0;
 		struct hlist_node *p;
 
 		hlist_for_each(p, h)
@@ -593,13 +593,13 @@ static unsigned bch_cache_max_chain(struct cache_set *c)
 	return ret;
 }
 
-static unsigned bch_btree_used(struct cache_set *c)
+static unsigned int bch_btree_used(struct cache_set *c)
 {
 	return div64_u64(c->gc_stats.key_bytes * 100,
 			 (c->gc_stats.nodes ?: 1) * btree_bytes(c));
 }
 
-static unsigned bch_average_key_size(struct cache_set *c)
+static unsigned int bch_average_key_size(struct cache_set *c)
 {
 	return c->gc_stats.nkeys
 		? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
@@ -996,7 +996,7 @@ STORE(__bch_cache)
 		if (v < 0)
 			return v;
 
-		if ((unsigned) v != CACHE_REPLACEMENT(&ca->sb)) {
+		if ((unsigned int) v != CACHE_REPLACEMENT(&ca->sb)) {
 			mutex_lock(&ca->set->bucket_lock);
 			SET_CACHE_REPLACEMENT(&ca->sb, v);
 			mutex_unlock(&ca->set->bucket_lock);
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index f7b0133c9d2f..484044231f21 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -347,7 +347,7 @@ static inline int bch_strtoul_h(const char *cp, long *res)
 	snprintf(buf, size,						\
 		__builtin_types_compatible_p(typeof(var), int)		\
 		     ? "%i\n" :						\
-		__builtin_types_compatible_p(typeof(var), unsigned)	\
+		__builtin_types_compatible_p(typeof(var), unsigned int)	\
 		     ? "%u\n" :						\
 		__builtin_types_compatible_p(typeof(var), long)		\
 		     ? "%li\n" :					\
@@ -379,7 +379,7 @@ struct time_stats {
 
 void bch_time_stats_update(struct time_stats *stats, uint64_t time);
 
-static inline unsigned local_clock_us(void)
+static inline unsigned int local_clock_us(void)
 {
 	return local_clock() >> 10;
 }
@@ -543,9 +543,10 @@ dup:									\
 	container_of_or_null(rb_prev(&(ptr)->member), typeof(*ptr), member)
 
 /* Does linear interpolation between powers of two */
-static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
+static inline unsigned int fract_exp_two(unsigned int x,
+					 unsigned int fract_bits)
 {
-	unsigned fract = x & ~(~0 << fract_bits);
+	unsigned int fract = x & ~(~0 << fract_bits);
 
 	x >>= fract_bits;
 	x   = 1 << x;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 481d4cf38ac0..39ee38ffb2db 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -215,7 +215,8 @@ static void update_writeback_rate(struct work_struct *work)
 	smp_mb();
 }
 
-static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
+static unsigned int writeback_delay(struct cached_dev *dc,
+				    unsigned int sectors)
 {
 	if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
 	    !dc->writeback_percent)
@@ -263,7 +264,7 @@ static void write_dirty_finish(struct closure *cl)
 	/* This is kind of a dumb way of signalling errors. */
 	if (KEY_DIRTY(&w->key)) {
 		int ret;
-		unsigned i;
+		unsigned int i;
 		struct keylist keys;
 
 		bch_keylist_init(&keys);
@@ -377,7 +378,7 @@ static void read_dirty_submit(struct closure *cl)
 
 static void read_dirty(struct cached_dev *dc)
 {
-	unsigned delay = 0;
+	unsigned int delay = 0;
 	struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w;
 	size_t size;
 	int nk, i;
@@ -498,11 +499,11 @@ err:
 
 /* Scan for dirty data */
 
-void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
 				  uint64_t offset, int nr_sectors)
 {
 	struct bcache_device *d = c->devices[inode];
-	unsigned stripe_offset, stripe, sectors_dirty;
+	unsigned int stripe_offset, stripe, sectors_dirty;
 
 	if (!d)
 		return;
@@ -514,7 +515,7 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
 	stripe_offset = offset & (d->stripe_size - 1);
 
 	while (nr_sectors) {
-		int s = min_t(unsigned, abs(nr_sectors),
+		int s = min_t(unsigned int, abs(nr_sectors),
 			      d->stripe_size - stripe_offset);
 
 		if (nr_sectors < 0)
@@ -548,7 +549,7 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k)
 static void refill_full_stripes(struct cached_dev *dc)
 {
 	struct keybuf *buf = &dc->writeback_keys;
-	unsigned start_stripe, stripe, next_stripe;
+	unsigned int start_stripe, stripe, next_stripe;
 	bool wrapped = false;
 
 	stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned));
@@ -688,7 +689,7 @@ static int bch_writeback_thread(void *arg)
 		read_dirty(dc);
 
 		if (searched_full_index) {
-			unsigned delay = dc->writeback_delay * HZ;
+			unsigned int delay = dc->writeback_delay * HZ;
 
 			while (delay &&
 			       !kthread_should_stop() &&
@@ -712,7 +713,7 @@ static int bch_writeback_thread(void *arg)
 
 struct sectors_dirty_init {
 	struct btree_op	op;
-	unsigned	inode;
+	unsigned int	inode;
 	size_t		count;
 	struct bkey	start;
 };
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 3745d7004c47..76b691850c98 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -28,7 +28,7 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
 	return ret;
 }
 
-static inline unsigned offset_to_stripe(struct bcache_device *d,
+static inline unsigned int offset_to_stripe(struct bcache_device *d,
 					uint64_t offset)
 {
 	do_div(offset, d->stripe_size);
@@ -37,9 +37,9 @@ static inline unsigned offset_to_stripe(struct bcache_device *d,
 
 static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
 					   uint64_t offset,
-					   unsigned nr_sectors)
+					   unsigned int nr_sectors)
 {
-	unsigned stripe = offset_to_stripe(&dc->disk, offset);
+	unsigned int stripe = offset_to_stripe(&dc->disk, offset);
 
 	while (1) {
 		if (atomic_read(dc->disk.stripe_sectors_dirty + stripe))
@@ -54,9 +54,9 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
 }
 
 static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
-				    unsigned cache_mode, bool would_skip)
+				    unsigned int cache_mode, bool would_skip)
 {
-	unsigned in_use = dc->disk.c->gc_stats.in_use;
+	unsigned int in_use = dc->disk.c->gc_stats.in_use;
 
 	if (cache_mode != CACHE_MODE_WRITEBACK ||
 	    test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
@@ -96,7 +96,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
 	}
 }
 
-void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
+void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned int, uint64_t, int);
 
 void bch_sectors_dirty_init(struct bcache_device *);
 void bch_cached_dev_writeback_init(struct cached_dev *);
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 8d19e02d752a..6bdcb48ee8cf 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -30,10 +30,10 @@ struct bkey {
 	BITMASK(name, struct bkey, field, offset, size)
 
 #define PTR_FIELD(name, offset, size)					\
-static inline __u64 name(const struct bkey *k, unsigned i)		\
+static inline __u64 name(const struct bkey *k, unsigned int i)		\
 { return (k->ptr[i] >> offset) & ~(~0ULL << size); }			\
 									\
-static inline void SET_##name(struct bkey *k, unsigned i, __u64 v)	\
+static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v)	\
 {									\
 	k->ptr[i] &= ~(~(~0ULL << size) << offset);			\
 	k->ptr[i] |= (v & ~(~0ULL << size)) << offset;			\
@@ -120,7 +120,7 @@ static inline struct bkey *bkey_next(const struct bkey *k)
 	return (struct bkey *) (d + bkey_u64s(k));
 }
 
-static inline struct bkey *bkey_idx(const struct bkey *k, unsigned nr_keys)
+static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
 {
 	__u64 *d = (void *) k;
 	return (struct bkey *) (d + nr_keys);
-- 
cgit v1.2.3


From 1fae7cf05293d3a2c9e59c1bc59372322386467c Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Sat, 11 Aug 2018 13:19:45 +0800
Subject: bcache: style fix to add a blank line after declarations

Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Shenghui Wang <shhuiw@foxmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/alloc.c     |  3 +++
 drivers/md/bcache/bcache.h    |  1 +
 drivers/md/bcache/bset.c      |  5 ++++-
 drivers/md/bcache/btree.c     |  7 +++++++
 drivers/md/bcache/closure.c   |  1 +
 drivers/md/bcache/debug.c     |  4 ++--
 drivers/md/bcache/extents.c   |  5 ++++-
 drivers/md/bcache/io.c        |  4 +++-
 drivers/md/bcache/journal.c   |  2 ++
 drivers/md/bcache/movinggc.c  |  2 ++
 drivers/md/bcache/request.c   |  5 ++++-
 drivers/md/bcache/stats.c     |  3 +++
 drivers/md/bcache/super.c     | 13 ++++++++++++-
 drivers/md/bcache/sysfs.c     |  5 +++++
 drivers/md/bcache/util.c      |  1 +
 drivers/md/bcache/writeback.c |  1 +
 include/uapi/linux/bcache.h   |  2 ++
 17 files changed, 57 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 89f663d22551..7a28232d868b 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -244,6 +244,7 @@ static void invalidate_buckets_random(struct cache *ca)
 
 	while (!fifo_full(&ca->free_inc)) {
 		size_t n;
+
 		get_random_bytes(&n, sizeof(n));
 
 		n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket);
@@ -514,6 +515,7 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
 			 struct bkey *k, int n, bool wait)
 {
 	int ret;
+
 	mutex_lock(&c->bucket_lock);
 	ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
 	mutex_unlock(&c->bucket_lock);
@@ -706,6 +708,7 @@ int bch_open_buckets_alloc(struct cache_set *c)
 
 	for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
 		struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
+
 		if (!b)
 			return -ENOMEM;
 
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 1ebd2d9d90d5..fd74dd075951 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -783,6 +783,7 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
 static inline uint8_t gen_after(uint8_t a, uint8_t b)
 {
 	uint8_t r = a - b;
+
 	return r > 128U ? 0 : r;
 }
 
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index dfda7e9efc3e..6fd5623b2e63 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -585,6 +585,7 @@ static inline unsigned int bfloat_mantissa(const struct bkey *k,
 				       struct bkey_float *f)
 {
 	const uint64_t *p = &k->low - (f->exponent >> 6);
+
 	return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK;
 }
 
@@ -964,6 +965,7 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
 		 * but a branch instruction is avoided.
 		 */
 		unsigned int p = n << 4;
+
 		p &= ((int) (p - t->size)) >> 31;
 
 		prefetch(&t->tree[p]);
@@ -1114,6 +1116,7 @@ static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
 					  struct bset_tree *start)
 {
 	struct bkey *ret = NULL;
+
 	iter->size = ARRAY_SIZE(iter->data);
 	iter->used = 0;
 
@@ -1329,8 +1332,8 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
 			 struct bset_sort_state *state)
 {
 	uint64_t start_time = local_clock();
-
 	struct btree_iter iter;
+
 	bch_btree_iter_init(b, &iter, NULL);
 
 	btree_mergesort(b, new->set->data, &iter, false, true);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 96c39a8db895..4003f92f4d2c 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -287,6 +287,7 @@ err:
 static void btree_node_read_endio(struct bio *bio)
 {
 	struct closure *cl = bio->bi_private;
+
 	closure_put(cl);
 }
 
@@ -604,6 +605,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
 				      struct bkey *k, gfp_t gfp)
 {
 	struct btree *b = kzalloc(sizeof(struct btree), gfp);
+
 	if (!b)
 		return NULL;
 
@@ -746,6 +748,7 @@ void bch_btree_cache_free(struct cache_set *c)
 {
 	struct btree *b;
 	struct closure cl;
+
 	closure_init_stack(&cl);
 
 	if (c->shrink.list.next)
@@ -1124,6 +1127,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
 						  struct btree_op *op)
 {
 	struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
+
 	if (!IS_ERR_OR_NULL(n)) {
 		mutex_lock(&n->write_lock);
 		bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -2488,6 +2492,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
 
 	if (!RB_EMPTY_ROOT(&buf->keys)) {
 		struct keybuf_key *w;
+
 		w = RB_FIRST(&buf->keys, struct keybuf_key, node);
 		buf->start	= START_KEY(&w->key);
 
@@ -2519,6 +2524,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
 {
 	bool ret = false;
 	struct keybuf_key *p, *w, s;
+
 	s.key = *start;
 
 	if (bkey_cmp(end, &buf->start) <= 0 ||
@@ -2545,6 +2551,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
 struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
 {
 	struct keybuf_key *w;
+
 	spin_lock(&buf->lock);
 
 	w = RB_FIRST(&buf->keys, struct keybuf_key, node);
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 618253683d40..8570fc426e31 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -162,6 +162,7 @@ static struct dentry *closure_debug;
 static int debug_seq_show(struct seq_file *f, void *data)
 {
 	struct closure *cl;
+
 	spin_lock_irq(&closure_list_lock);
 
 	list_for_each_entry(cl, &closure_list, all) {
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 0caad145902b..f0eb37a14dab 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -177,8 +177,8 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
 	while (size) {
 		struct keybuf_key *w;
 		unsigned int bytes = min(i->bytes, size);
-
 		int err = copy_to_user(buf, i->buf, bytes);
+
 		if (err)
 			return err;
 
@@ -237,8 +237,8 @@ void bch_debug_init_cache_set(struct cache_set *c)
 {
 	if (!IS_ERR_OR_NULL(bcache_debug)) {
 		char name[50];
-		snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
 
+		snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
 		c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
 					       &cache_set_debug_ops);
 	}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index e96ba928eeb6..8f5de61e1a90 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -134,8 +134,8 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
 
 	for (j = 0; j < KEY_PTRS(k); j++) {
 		size_t n = PTR_BUCKET_NR(b->c, k, j);
-		printk(" bucket %zu", n);
 
+		printk(" bucket %zu", n);
 		if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
 			printk(" prio %i",
 			       PTR_BUCKET(b->c, k, j)->prio);
@@ -166,6 +166,7 @@ bad:
 static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
 {
 	struct btree *b = container_of(bk, struct btree, keys);
+
 	return __bch_btree_ptr_invalid(b->c, k);
 }
 
@@ -334,6 +335,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
 
 	while (1) {
 		struct bkey *k = bch_btree_iter_next(iter);
+
 		if (!k)
 			break;
 
@@ -498,6 +500,7 @@ bad:
 static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
 {
 	struct btree *b = container_of(bk, struct btree, keys);
+
 	return __bch_extent_invalid(b->c, k);
 }
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index c6b41a09f550..cfc56add799a 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -17,6 +17,7 @@
 void bch_bbio_free(struct bio *bio, struct cache_set *c)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
+
 	mempool_free(b, &c->bio_meta);
 }
 
@@ -45,6 +46,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
 		     struct bkey *k, unsigned int ptr)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
+
 	bch_bkey_copy_single_ptr(&b->key, k, ptr);
 	__bch_submit_bbio(bio, c);
 }
@@ -132,12 +134,12 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
 
 	if (threshold) {
 		unsigned int t = local_clock_us();
-
 		int us = t - b->submit_time_us;
 		int congested = atomic_read(&c->congested);
 
 		if (us > (int) threshold) {
 			int ms = us / 1024;
+
 			c->congested_last_us = t;
 
 			ms = min(ms, CONGESTED_MAX + congested);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index ee61062b58fc..301cbb43a78f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -28,6 +28,7 @@
 static void journal_read_endio(struct bio *bio)
 {
 	struct closure *cl = bio->bi_private;
+
 	closure_put(cl);
 }
 
@@ -614,6 +615,7 @@ static void journal_write_unlocked(struct closure *cl)
 
 	struct bio *bio;
 	struct bio_list list;
+
 	bio_list_init(&list);
 
 	if (!w->need_write) {
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 0790d710f911..7891fb512736 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -38,6 +38,7 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
 static void moving_io_destructor(struct closure *cl)
 {
 	struct moving_io *io = container_of(cl, struct moving_io, cl);
+
 	kfree(io);
 }
 
@@ -189,6 +190,7 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r)
 static unsigned int bucket_heap_top(struct cache *ca)
 {
 	struct bucket *b;
+
 	return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
 }
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6e1a60dd1742..d15d8c5778ed 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -45,6 +45,7 @@ static void bio_csum(struct bio *bio, struct bkey *k)
 
 	bio_for_each_segment(bv, bio, iter) {
 		void *d = kmap(bv.bv_page) + bv.bv_offset;
+
 		csum = bch_crc64_update(csum, d, bv.bv_len);
 		kunmap(bv.bv_page);
 	}
@@ -526,8 +527,8 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
 			? min_t(uint64_t, INT_MAX,
 				KEY_START(k) - bio->bi_iter.bi_sector)
 			: INT_MAX;
-
 		int ret = s->d->cache_miss(b, s, bio, sectors);
+
 		if (ret != MAP_CONTINUE)
 			return ret;
 
@@ -623,6 +624,7 @@ static void request_endio(struct bio *bio)
 
 	if (bio->bi_status) {
 		struct search *s = container_of(cl, struct search, cl);
+
 		s->iop.status = bio->bi_status;
 		/* Only cache read errors are recoverable */
 		s->recoverable = false;
@@ -1212,6 +1214,7 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
 			    unsigned int cmd, unsigned long arg)
 {
 	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
 	return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg);
 }
 
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index 2331a0d5aa28..894410f3f829 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -200,6 +200,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
 			       bool hit, bool bypass)
 {
 	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
 	mark_cache_stats(&dc->accounting.collector, hit, bypass);
 	mark_cache_stats(&c->accounting.collector, hit, bypass);
 }
@@ -207,6 +208,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
 void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
 {
 	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
 	atomic_inc(&dc->accounting.collector.cache_readaheads);
 	atomic_inc(&c->accounting.collector.cache_readaheads);
 }
@@ -214,6 +216,7 @@ void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
 void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d)
 {
 	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
 	atomic_inc(&dc->accounting.collector.cache_miss_collisions);
 	atomic_inc(&c->accounting.collector.cache_miss_collisions);
 }
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 4ab1b1968d9a..c11cf852715c 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -415,8 +415,8 @@ static int __uuid_write(struct cache_set *c)
 {
 	BKEY_PADDED(key) k;
 	struct closure cl;
-	closure_init_stack(&cl);
 
+	closure_init_stack(&cl);
 	lockdep_assert_held(&bch_register_lock);
 
 	if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true))
@@ -456,6 +456,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
 static struct uuid_entry *uuid_find_empty(struct cache_set *c)
 {
 	static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
 	return uuid_find(c, zero_uuid);
 }
 
@@ -619,6 +620,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
 static int open_dev(struct block_device *b, fmode_t mode)
 {
 	struct bcache_device *d = b->bd_disk->private_data;
+
 	if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
 		return -ENXIO;
 
@@ -629,6 +631,7 @@ static int open_dev(struct block_device *b, fmode_t mode)
 static void release_dev(struct gendisk *b, fmode_t mode)
 {
 	struct bcache_device *d = b->private_data;
+
 	closure_put(&d->cl);
 }
 
@@ -919,6 +922,7 @@ void bch_cached_dev_run(struct cached_dev *dc)
 	if (!d->c &&
 	    BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
 		struct closure cl;
+
 		closure_init_stack(&cl);
 
 		SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE);
@@ -976,6 +980,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
 {
 	struct cached_dev *dc = container_of(w, struct cached_dev, detach);
 	struct closure cl;
+
 	closure_init_stack(&cl);
 
 	BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
@@ -1103,6 +1108,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
 
 	if (bch_is_zero(u->uuid, 16)) {
 		struct closure cl;
+
 		closure_init_stack(&cl);
 
 		memcpy(u->uuid, dc->sb.uuid, 16);
@@ -1320,6 +1326,7 @@ void bch_flash_dev_release(struct kobject *kobj)
 static void flash_dev_free(struct closure *cl)
 {
 	struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+
 	mutex_lock(&bch_register_lock);
 	atomic_long_sub(bcache_dev_sectors_dirty(d),
 			&d->c->flash_dev_dirty_sectors);
@@ -1481,6 +1488,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
 void bch_cache_set_release(struct kobject *kobj)
 {
 	struct cache_set *c = container_of(kobj, struct cache_set, kobj);
+
 	kfree(c);
 	module_put(THIS_MODULE);
 }
@@ -1671,6 +1679,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
 {
 	int iter_size;
 	struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
+
 	if (!c)
 		return NULL;
 
@@ -2216,6 +2225,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 	err = "failed to register device";
 	if (SB_IS_BDEV(sb)) {
 		struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+
 		if (!dc)
 			goto err_close;
 
@@ -2224,6 +2234,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 		mutex_unlock(&bch_register_lock);
 	} else {
 		struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+
 		if (!ca)
 			goto err_close;
 
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 3f2b7964d6a9..ba4cd7efca8e 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -459,6 +459,7 @@ STORE(__bch_flash_dev)
 
 	if (attr == &sysfs_size) {
 		uint64_t v;
+
 		strtoi_h_or_return(buf, v);
 
 		u->sectors = v >> 9;
@@ -703,6 +704,7 @@ STORE(__bch_cache_set)
 	if (attr == &sysfs_flash_vol_create) {
 		int r;
 		uint64_t v;
+
 		strtoi_h_or_return(buf, v);
 
 		r = bch_flash_dev_create(c, v);
@@ -736,6 +738,7 @@ STORE(__bch_cache_set)
 
 	if (attr == &sysfs_prune_cache) {
 		struct shrink_control sc;
+
 		sc.gfp_mask = GFP_KERNEL;
 		sc.nr_to_scan = strtoul_or_return(buf);
 		c->shrink.scan_objects(&c->shrink, &sc);
@@ -789,12 +792,14 @@ STORE_LOCKED(bch_cache_set)
 SHOW(bch_cache_set_internal)
 {
 	struct cache_set *c = container_of(kobj, struct cache_set, internal);
+
 	return bch_cache_set_show(&c->kobj, attr, buf);
 }
 
 STORE(bch_cache_set_internal)
 {
 	struct cache_set *c = container_of(kobj, struct cache_set, internal);
+
 	return bch_cache_set_store(&c->kobj, attr, buf, size);
 }
 
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index b15256bcf0e7..18016e7bb32c 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -133,6 +133,7 @@ bool bch_is_zero(const char *p, size_t n)
 int bch_parse_uuid(const char *s, char *uuid)
 {
 	size_t i, j, x;
+
 	memset(uuid, 0, 16);
 
 	for (i = 0, j = 0;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 39ee38ffb2db..44f1b0f1f4d9 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -250,6 +250,7 @@ static void dirty_init(struct keybuf_key *w)
 static void dirty_io_destructor(struct closure *cl)
 {
 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+
 	kfree(io);
 }
 
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 6bdcb48ee8cf..5d4f58e059fd 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -117,12 +117,14 @@ static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
 static inline struct bkey *bkey_next(const struct bkey *k)
 {
 	__u64 *d = (void *) k;
+
 	return (struct bkey *) (d + bkey_u64s(k));
 }
 
 static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
 {
 	__u64 *d = (void *) k;
+
 	return (struct bkey *) (d + nr_keys);
 }
 /* Enough for a key with 6 pointers */
-- 
cgit v1.2.3


From 353c9cb360874e737fb000545f783df756c06f9a Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Sat, 11 Aug 2018 20:27:24 +0000
Subject: ip: add helpers to process in-order fragments faster.

This patch introduces several helper functions/macros that will be
used in the follow-up patch. No runtime changes yet.

The new logic (fully implemented in the second patch) is as follows:

* Nodes in the rb-tree will now contain not single fragments, but lists
  of consecutive fragments ("runs").

* At each point in time, the current "active" run at the tail is
  maintained/tracked. Fragments that arrive in-order, adjacent
  to the previous tail fragment, are added to this tail run without
  triggering the re-balancing of the rb-tree.

* If a fragment arrives out of order with the offset _before_ the tail run,
  it is inserted into the rb-tree as a single fragment.

* If a fragment arrives after the current tail fragment (with a gap),
  it starts a new "tail" run, as is inserted into the rb-tree
  at the end as the head of the new run.

skb->cb is used to store additional information
needed here (suggested by Eric Dumazet).

Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Peter Oskolkov <posk@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h |  6 ++++
 net/ipv4/ip_fragment.c  | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index b86d14528188..1662cbc0b46b 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -57,7 +57,9 @@ struct frag_v6_compare_key {
  * @lock: spinlock protecting this frag
  * @refcnt: reference count of the queue
  * @fragments: received fragments head
+ * @rb_fragments: received fragments rb-tree root
  * @fragments_tail: received fragments tail
+ * @last_run_head: the head of the last "run". see ip_fragment.c
  * @stamp: timestamp of the last received fragment
  * @len: total length of the original datagram
  * @meat: length of received fragments so far
@@ -78,6 +80,7 @@ struct inet_frag_queue {
 	struct sk_buff		*fragments;  /* Used in IPv6. */
 	struct rb_root		rb_fragments; /* Used in IPv4. */
 	struct sk_buff		*fragments_tail;
+	struct sk_buff		*last_run_head;
 	ktime_t			stamp;
 	int			len;
 	int			meat;
@@ -113,6 +116,9 @@ void inet_frag_kill(struct inet_frag_queue *q);
 void inet_frag_destroy(struct inet_frag_queue *q);
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
 
+/* Free all skbs in the queue; return the sum of their truesizes. */
+unsigned int inet_frag_rbtree_purge(struct rb_root *root);
+
 static inline void inet_frag_put(struct inet_frag_queue *q)
 {
 	if (refcount_dec_and_test(&q->refcnt))
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7cb7ed761d8c..26ace9d2d976 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,6 +57,57 @@
  */
 static const char ip_frag_cache_name[] = "ip4-frags";
 
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
+	struct inet_skb_parm	h;
+	struct sk_buff		*next_frag;
+	int			frag_run_len;
+};
+
+#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void ip4_frag_init_run(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+
+	FRAG_CB(skb)->next_frag = NULL;
+	FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+					struct sk_buff *skb)
+{
+	RB_CLEAR_NODE(&skb->rbnode);
+	FRAG_CB(skb)->next_frag = NULL;
+
+	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+	FRAG_CB(q->fragments_tail)->next_frag = skb;
+	q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+	if (q->last_run_head)
+		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+			     &q->last_run_head->rbnode.rb_right);
+	else
+		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+	rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+	ip4_frag_init_run(skb);
+	q->fragments_tail = skb;
+	q->last_run_head = skb;
+}
+
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
 	struct inet_frag_queue q;
@@ -654,6 +705,28 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
 }
 EXPORT_SYMBOL(ip_check_defrag);
 
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+	struct rb_node *p = rb_first(root);
+	unsigned int sum = 0;
+
+	while (p) {
+		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+		p = rb_next(p);
+		rb_erase(&skb->rbnode, root);
+		while (skb) {
+			struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+			sum += skb->truesize;
+			kfree_skb(skb);
+			skb = next;
+		}
+	}
+	return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
 #ifdef CONFIG_SYSCTL
 static int dist_min;
 
-- 
cgit v1.2.3


From 6249f2a479268702f7259aeee3671db2be3b922c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 6 Aug 2018 12:51:19 +0100
Subject: KVM: arm/arm64: vgic-v3: Add core support for Group0 SGIs

Although vgic-v3 now supports Group0 interrupts, it still doesn't
deal with Group0 SGIs. As usually with the GIC, nothing is simple:

- ICC_SGI1R can signal SGIs of both groups, since GICD_CTLR.DS==1
  with KVM (as per 8.1.10, Non-secure EL1 access)

- ICC_SGI0R can only generate Group0 SGIs

- ICC_ASGI1R sees its scope refocussed to generate only Group0
  SGIs (as per the note at the bottom of Table 8-14)

We only support Group1 SGIs so far, so no material change.

Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/coproc.c            |  2 +-
 arch/arm64/kvm/sys_regs.c        |  2 +-
 include/kvm/arm_vgic.h           |  2 +-
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 19 +++++++++++++++----
 4 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 3a02e76699a6..b17c52608a19 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -253,7 +253,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
 	reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
 	reg |= *vcpu_reg(vcpu, p->Rt1) ;
 
-	vgic_v3_dispatch_sgi(vcpu, reg);
+	vgic_v3_dispatch_sgi(vcpu, reg, true);
 
 	return true;
 }
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index e04aacb2a24c..aba6755c816d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -255,7 +255,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p, r);
 
-	vgic_v3_dispatch_sgi(vcpu, p->regval);
+	vgic_v3_dispatch_sgi(vcpu, p->regval, true);
 
 	return true;
 }
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index c134790be32c..4f31f96bbfab 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -373,7 +373,7 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
 
-void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1);
 
 /**
  * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 88e78b582139..a2a175b08b17 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -900,7 +900,8 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
 /**
  * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
  * @vcpu: The VCPU requesting a SGI
- * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
+ * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU
+ * @allow_group1: Does the sysreg access allow generation of G1 SGIs
  *
  * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
  * This will trap in sys_regs.c and call this function.
@@ -910,7 +911,7 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
  * check for matching ones. If this bit is set, we signal all, but not the
  * calling VCPU.
  */
-void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu *c_vcpu;
@@ -959,9 +960,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
 		irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
 
 		spin_lock_irqsave(&irq->irq_lock, flags);
-		irq->pending_latch = true;
 
-		vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+		/*
+		 * An access targetting Group0 SGIs can only generate
+		 * those, while an access targetting Group1 SGIs can
+		 * generate interrupts of either group.
+		 */
+		if (!irq->group || allow_group1) {
+			irq->pending_latch = true;
+			vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+		} else {
+			spin_unlock_irqrestore(&irq->irq_lock, flags);
+		}
+
 		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
-- 
cgit v1.2.3


From b5b1404d0815894de0690de8a1ab58269e56eae6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 12 Aug 2018 12:19:42 -0700
Subject: init: rename and re-order boot_cpu_state_init()

This is purely a preparatory patch for upcoming changes during the 4.19
merge window.

We have a function called "boot_cpu_state_init()" that isn't really
about the bootup cpu state: that is done much earlier by the similarly
named "boot_cpu_init()" (note lack of "state" in name).

This function initializes some hotplug CPU state, and needs to run after
the percpu data has been properly initialized.  It even has a comment to
that effect.

Except it _doesn't_ actually run after the percpu data has been properly
initialized.  On x86 it happens to do that, but on at least arm and
arm64, the percpu base pointers are initialized by the arch-specific
'smp_prepare_boot_cpu()' hook, which ran _after_ boot_cpu_state_init().

This had some unexpected results, and in particular we have a patch
pending for the merge window that did the obvious cleanup of using
'this_cpu_write()' in the cpu hotplug init code:

  -       per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
  +       this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);

which is obviously the right thing to do.  Except because of the
ordering issue, it actually failed miserably and unexpectedly on arm64.

So this just fixes the ordering, and changes the name of the function to
be 'boot_cpu_hotplug_init()' to make it obvious that it's about cpu
hotplug state, because the core CPU state was supposed to have already
been done earlier.

Marked for stable, since the (not yet merged) patch that will show this
problem is marked for stable.

Reported-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Mian Yousaf Kaukab <yousaf.kaukab@suse.com>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h | 2 +-
 init/main.c         | 2 +-
 kernel/cpu.c        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index a97a63eef59f..3233fbe23594 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -30,7 +30,7 @@ struct cpu {
 };
 
 extern void boot_cpu_init(void);
-extern void boot_cpu_state_init(void);
+extern void boot_cpu_hotplug_init(void);
 extern void cpu_init(void);
 extern void trap_init(void);
 
diff --git a/init/main.c b/init/main.c
index 3b4ada11ed52..5e13c544bbf4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -561,8 +561,8 @@ asmlinkage __visible void __init start_kernel(void)
 	setup_command_line(command_line);
 	setup_nr_cpu_ids();
 	setup_per_cpu_areas();
-	boot_cpu_state_init();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
+	boot_cpu_hotplug_init();
 
 	build_all_zonelists(NULL);
 	page_alloc_init();
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0db8938fbb23..2f8f338e77cf 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2010,7 +2010,7 @@ void __init boot_cpu_init(void)
 /*
  * Must be called _AFTER_ setting up the per_cpu areas
  */
-void __init boot_cpu_state_init(void)
+void __init boot_cpu_hotplug_init(void)
 {
 	per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
 }
-- 
cgit v1.2.3


From e8d2bec0457962e8f348a9a3627b398f7fe5c5fc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 12 Aug 2018 01:59:17 +0200
Subject: bpf: decouple btf from seq bpf fs dump and enable more maps

Commit a26ca7c982cb ("bpf: btf: Add pretty print support to
the basic arraymap") and 699c86d6ec21 ("bpf: btf: add pretty
print for hash/lru_hash maps") enabled support for BTF and
dumping via BPF fs for array and hash/lru map. However, both
can be decoupled from each other such that regular BPF maps
can be supported for attaching BTF key/value information,
while not all maps necessarily need to dump via map_seq_show_elem()
callback.

The basic sanity check which is a prerequisite for all maps
is that key/value size has to match in any case, and some maps
can have extra checks via map_check_btf() callback, e.g.
probing certain types or indicating no support in general. With
that we can also enable retrieving BTF info for per-cpu map
types and lpm.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
---
 include/linux/bpf.h        | 13 +++++++++----
 kernel/bpf/arraymap.c      | 26 ++++++++++++--------------
 kernel/bpf/cpumap.c        |  1 +
 kernel/bpf/devmap.c        |  1 +
 kernel/bpf/hashtab.c       | 20 +-------------------
 kernel/bpf/inode.c         |  3 ++-
 kernel/bpf/local_storage.c |  1 +
 kernel/bpf/lpm_trie.c      | 12 ++++++++++++
 kernel/bpf/sockmap.c       |  2 ++
 kernel/bpf/stackmap.c      |  1 +
 kernel/bpf/syscall.c       | 36 ++++++++++++++++++++++++++++++++----
 kernel/bpf/xskmap.c        |  3 +--
 12 files changed, 75 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index db11662faea6..523481a3471b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -23,7 +23,7 @@ struct bpf_prog;
 struct bpf_map;
 struct sock;
 struct seq_file;
-struct btf;
+struct btf_type;
 
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
@@ -48,8 +48,9 @@ struct bpf_map_ops {
 	u32 (*map_fd_sys_lookup_elem)(void *ptr);
 	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
 				  struct seq_file *m);
-	int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf,
-			     u32 key_type_id, u32 value_type_id);
+	int (*map_check_btf)(const struct bpf_map *map,
+			     const struct btf_type *key_type,
+			     const struct btf_type *value_type);
 };
 
 struct bpf_map {
@@ -118,9 +119,13 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
 
 static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
 {
-	return map->ops->map_seq_show_elem && map->ops->map_check_btf;
+	return map->btf && map->ops->map_seq_show_elem;
 }
 
+int map_check_no_btf(const struct bpf_map *map,
+		     const struct btf_type *key_type,
+		     const struct btf_type *value_type);
+
 extern const struct bpf_map_ops bpf_map_offload_ops;
 
 /* function argument constraints */
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index f6ca3e712831..0c17aab3ce5f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -358,27 +358,20 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
 	rcu_read_unlock();
 }
 
-static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf,
-			       u32 btf_key_id, u32 btf_value_id)
+static int array_map_check_btf(const struct bpf_map *map,
+			       const struct btf_type *key_type,
+			       const struct btf_type *value_type)
 {
-	const struct btf_type *key_type, *value_type;
-	u32 key_size, value_size;
 	u32 int_data;
 
-	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
-	if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
+	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
 		return -EINVAL;
 
 	int_data = *(u32 *)(key_type + 1);
-	/* bpf array can only take a u32 key.  This check makes
-	 * sure that the btf matches the attr used during map_create.
+	/* bpf array can only take a u32 key. This check makes sure
+	 * that the btf matches the attr used during map_create.
 	 */
-	if (BTF_INT_BITS(int_data) != 32 || key_size != 4 ||
-	    BTF_INT_OFFSET(int_data))
-		return -EINVAL;
-
-	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
-	if (!value_type || value_size != map->value_size)
+	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
 		return -EINVAL;
 
 	return 0;
@@ -405,6 +398,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_lookup_elem = percpu_array_map_lookup_elem,
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
+	.map_check_btf = array_map_check_btf,
 };
 
 static int fd_array_map_alloc_check(union bpf_attr *attr)
@@ -546,6 +540,7 @@ const struct bpf_map_ops prog_array_map_ops = {
 	.map_fd_put_ptr = prog_fd_array_put_ptr,
 	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
 	.map_release_uref = bpf_fd_array_map_clear,
+	.map_check_btf = map_check_no_btf,
 };
 
 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
@@ -634,6 +629,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
 	.map_fd_get_ptr = perf_event_fd_array_get_ptr,
 	.map_fd_put_ptr = perf_event_fd_array_put_ptr,
 	.map_release = perf_event_fd_array_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 #ifdef CONFIG_CGROUPS
@@ -665,6 +661,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
 	.map_delete_elem = fd_array_map_delete_elem,
 	.map_fd_get_ptr = cgroup_fd_array_get_ptr,
 	.map_fd_put_ptr = cgroup_fd_array_put_ptr,
+	.map_check_btf = map_check_no_btf,
 };
 #endif
 
@@ -749,4 +746,5 @@ const struct bpf_map_ops array_of_maps_map_ops = {
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 	.map_gen_lookup = array_of_map_gen_lookup,
+	.map_check_btf = map_check_no_btf,
 };
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index e0918d180f08..3b494941a44a 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -555,6 +555,7 @@ const struct bpf_map_ops cpu_map_ops = {
 	.map_update_elem	= cpu_map_update_elem,
 	.map_lookup_elem	= cpu_map_lookup_elem,
 	.map_get_next_key	= cpu_map_get_next_key,
+	.map_check_btf		= map_check_no_btf,
 };
 
 static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d361fc1e3bf3..a7c6620d8389 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -484,6 +484,7 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_lookup_elem = dev_map_lookup_elem,
 	.map_update_elem = dev_map_update_elem,
 	.map_delete_elem = dev_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 static int dev_map_notification(struct notifier_block *notifier,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d6110042e0d9..04b8eda94e7d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1185,23 +1185,6 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
 	rcu_read_unlock();
 }
 
-static int htab_map_check_btf(const struct bpf_map *map, const struct btf *btf,
-			      u32 btf_key_id, u32 btf_value_id)
-{
-	const struct btf_type *key_type, *value_type;
-	u32 key_size, value_size;
-
-	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
-	if (!key_type || key_size != map->key_size)
-		return -EINVAL;
-
-	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
-	if (!value_type || value_size != map->value_size)
-		return -EINVAL;
-
-	return 0;
-}
-
 const struct bpf_map_ops htab_map_ops = {
 	.map_alloc_check = htab_map_alloc_check,
 	.map_alloc = htab_map_alloc,
@@ -1212,7 +1195,6 @@ const struct bpf_map_ops htab_map_ops = {
 	.map_delete_elem = htab_map_delete_elem,
 	.map_gen_lookup = htab_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
-	.map_check_btf = htab_map_check_btf,
 };
 
 const struct bpf_map_ops htab_lru_map_ops = {
@@ -1225,7 +1207,6 @@ const struct bpf_map_ops htab_lru_map_ops = {
 	.map_delete_elem = htab_lru_map_delete_elem,
 	.map_gen_lookup = htab_lru_map_gen_lookup,
 	.map_seq_show_elem = htab_map_seq_show_elem,
-	.map_check_btf = htab_map_check_btf,
 };
 
 /* Called from eBPF program */
@@ -1452,4 +1433,5 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
 	.map_gen_lookup = htab_of_map_gen_lookup,
+	.map_check_btf = map_check_no_btf,
 };
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index fc5b103512e7..2ada5e21dfa6 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -334,7 +334,8 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 	struct bpf_map *map = arg;
 
 	return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops,
-			     map->btf ? &bpffs_map_fops : &bpffs_obj_fops);
+			     bpf_map_support_seq_show(map) ?
+			     &bpffs_map_fops : &bpffs_obj_fops);
 }
 
 static struct dentry *
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index fc4e37f68f2a..22ad967d1e5f 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -246,6 +246,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
 	.map_lookup_elem = cgroup_storage_lookup_elem,
 	.map_update_elem = cgroup_storage_update_elem,
 	.map_delete_elem = cgroup_storage_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1603492c9cc7..9058317ba9de 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -10,11 +10,13 @@
  */
 
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <net/ipv6.h>
+#include <uapi/linux/btf.h>
 
 /* Intermediate node */
 #define LPM_TREE_NODE_FLAG_IM BIT(0)
@@ -686,6 +688,15 @@ free_stack:
 	return err;
 }
 
+static int trie_check_btf(const struct bpf_map *map,
+			  const struct btf_type *key_type,
+			  const struct btf_type *value_type)
+{
+	/* Keys must have struct bpf_lpm_trie_key embedded. */
+	return BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT ?
+	       -EINVAL : 0;
+}
+
 const struct bpf_map_ops trie_map_ops = {
 	.map_alloc = trie_alloc,
 	.map_free = trie_free,
@@ -693,4 +704,5 @@ const struct bpf_map_ops trie_map_ops = {
 	.map_lookup_elem = trie_lookup_elem,
 	.map_update_elem = trie_update_elem,
 	.map_delete_elem = trie_delete_elem,
+	.map_check_btf = trie_check_btf,
 };
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 0b38be5a955c..e376ffffebce 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -2495,6 +2495,7 @@ const struct bpf_map_ops sock_map_ops = {
 	.map_update_elem = sock_map_update_elem,
 	.map_delete_elem = sock_map_delete_elem,
 	.map_release_uref = sock_map_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 const struct bpf_map_ops sock_hash_ops = {
@@ -2505,6 +2506,7 @@ const struct bpf_map_ops sock_hash_ops = {
 	.map_update_elem = sock_hash_update_elem,
 	.map_delete_elem = sock_hash_delete_elem,
 	.map_release_uref = sock_map_release,
+	.map_check_btf = map_check_no_btf,
 };
 
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b675a3f3d141..8061a439ef18 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -607,6 +607,7 @@ const struct bpf_map_ops stack_map_ops = {
 	.map_lookup_elem = stack_map_lookup_elem,
 	.map_update_elem = stack_map_update_elem,
 	.map_delete_elem = stack_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
 
 static int __init stack_map_init(void)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 57f4d076141b..43727ed0d94a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -103,6 +103,7 @@ int bpf_check_uarg_tail_zero(void __user *uaddr,
 const struct bpf_map_ops bpf_map_offload_ops = {
 	.map_alloc = bpf_map_offload_map_alloc,
 	.map_free = bpf_map_offload_map_free,
+	.map_check_btf = map_check_no_btf,
 };
 
 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
@@ -455,6 +456,34 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
 	return 0;
 }
 
+int map_check_no_btf(const struct bpf_map *map,
+		     const struct btf_type *key_type,
+		     const struct btf_type *value_type)
+{
+	return -ENOTSUPP;
+}
+
+static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
+			 u32 btf_key_id, u32 btf_value_id)
+{
+	const struct btf_type *key_type, *value_type;
+	u32 key_size, value_size;
+	int ret = 0;
+
+	key_type = btf_type_id_size(btf, &btf_key_id, &key_size);
+	if (!key_type || key_size != map->key_size)
+		return -EINVAL;
+
+	value_type = btf_type_id_size(btf, &btf_value_id, &value_size);
+	if (!value_type || value_size != map->value_size)
+		return -EINVAL;
+
+	if (map->ops->map_check_btf)
+		ret = map->ops->map_check_btf(map, key_type, value_type);
+
+	return ret;
+}
+
 #define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
@@ -489,8 +518,7 @@ static int map_create(union bpf_attr *attr)
 	atomic_set(&map->refcnt, 1);
 	atomic_set(&map->usercnt, 1);
 
-	if (bpf_map_support_seq_show(map) &&
-	    (attr->btf_key_type_id || attr->btf_value_type_id)) {
+	if (attr->btf_key_type_id || attr->btf_value_type_id) {
 		struct btf *btf;
 
 		if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
@@ -504,8 +532,8 @@ static int map_create(union bpf_attr *attr)
 			goto free_map_nouncharge;
 		}
 
-		err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
-					      attr->btf_value_type_id);
+		err = map_check_btf(map, btf, attr->btf_key_type_id,
+				    attr->btf_value_type_id);
 		if (err) {
 			btf_put(btf);
 			goto free_map_nouncharge;
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index b3c557476a8d..4ddf61e158f6 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -227,6 +227,5 @@ const struct bpf_map_ops xsk_map_ops = {
 	.map_lookup_elem = xsk_map_lookup_elem,
 	.map_update_elem = xsk_map_update_elem,
 	.map_delete_elem = xsk_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
 };
-
-
-- 
cgit v1.2.3


From 7723628101aaeb1d723786747529b4ea65c5b5c5 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Sun, 12 Aug 2018 10:49:27 -0700
Subject: bpf: Introduce bpf_skb_ancestor_cgroup_id helper

== Problem description ==

It's useful to be able to identify cgroup associated with skb in TC so
that a policy can be applied to this skb, and existing bpf_skb_cgroup_id
helper can help with this.

Though in real life cgroup hierarchy and hierarchy to apply a policy to
don't map 1:1.

It's often the case that there is a container and corresponding cgroup,
but there are many more sub-cgroups inside container, e.g. because it's
delegated to containerized application to control resources for its
subsystems, or to separate application inside container from infra that
belongs to containerization system (e.g. sshd).

At the same time it may be useful to apply a policy to container as a
whole.

If multiple containers like this are run on a host (what is often the
case) and many of them have sub-cgroups, it may not be possible to apply
per-container policy in TC with existing helpers such as
bpf_skb_under_cgroup or bpf_skb_cgroup_id:

* bpf_skb_cgroup_id will return id of immediate cgroup associated with
  skb, i.e. if it's a sub-cgroup inside container, it can't be used to
  identify container's cgroup;

* bpf_skb_under_cgroup can work only with one cgroup and doesn't scale,
  i.e. if there are N containers on a host and a policy has to be
  applied to M of them (0 <= M <= N), it'd require M calls to
  bpf_skb_under_cgroup, and, if M changes, it'd require to rebuild &
  load new BPF program.

== Solution ==

The patch introduces new helper bpf_skb_ancestor_cgroup_id that can be
used to get id of cgroup v2 that is an ancestor of cgroup associated
with skb at specified level of cgroup hierarchy.

That way admin can place all containers on one level of cgroup hierarchy
(what is a good practice in general and already used in many
configurations) and identify specific cgroup on this level no matter
what sub-cgroup skb is associated with.

E.g. if there is a cgroup hierarchy:
  root/
  root/container1/
  root/container1/app11/
  root/container1/app11/sub-app-a/
  root/container1/app12/
  root/container2/
  root/container2/app21/
  root/container2/app22/
  root/container2/app22/sub-app-b/

, then having skb associated with root/container1/app11/sub-app-a/ it's
possible to get ancestor at level 1, what is container1 and apply policy
for this container, or apply another policy if it's container2.

Policies can be kept e.g. in a hash map where key is a container cgroup
id and value is an action.

Levels where container cgroups are created are usually known in advance
whether cgroup hierarchy inside container may be hard to predict
especially in case when its creation is delegated to containerized
application.

== Implementation details ==

The helper gets ancestor by walking parents up to specified level.

Another option would be to get different kind of "id" from
cgroup->ancestor_ids[level] and use it with idr_find() to get struct
cgroup for ancestor. But that would require radix lookup what doesn't
seem to be better (at least it's not obviously better).

Format of return value of the new helper is same as that of
bpf_skb_cgroup_id.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/cgroup.h   | 30 ++++++++++++++++++++++++++++++
 include/uapi/linux/bpf.h | 21 ++++++++++++++++++++-
 net/core/filter.c        | 28 ++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c9fdf6f57913..32c553556bbd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -553,6 +553,36 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
 	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
 }
 
+/**
+ * cgroup_ancestor - find ancestor of cgroup
+ * @cgrp: cgroup to find ancestor of
+ * @ancestor_level: level of ancestor to find starting from root
+ *
+ * Find ancestor of cgroup at specified level starting from root if it exists
+ * and return pointer to it. Return NULL if @cgrp doesn't have ancestor at
+ * @ancestor_level.
+ *
+ * This function is safe to call as long as @cgrp is accessible.
+ */
+static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
+					     int ancestor_level)
+{
+	struct cgroup *ptr;
+
+	if (cgrp->level < ancestor_level)
+		return NULL;
+
+	for (ptr = cgrp;
+	     ptr && ptr->level > ancestor_level;
+	     ptr = cgroup_parent(ptr))
+		;
+
+	if (ptr && ptr->level == ancestor_level)
+		return ptr;
+
+	return NULL;
+}
+
 /**
  * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
  * @task: the task to be tested
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3102a2a23c31..66917a4eba27 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2093,6 +2093,24 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *skb*, then return value will be same as that
+ *		of **bpf_skb_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *skb*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_skb_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
  * u64 bpf_get_current_cgroup_id(void)
  * 	Return
  * 		A 64-bit integer containing the current cgroup id based
@@ -2207,7 +2225,8 @@ union bpf_attr {
 	FN(skb_cgroup_id),		\
 	FN(get_current_cgroup_id),	\
 	FN(get_local_storage),		\
-	FN(sk_select_reuseport),
+	FN(sk_select_reuseport),	\
+	FN(skb_ancestor_cgroup_id),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 22906b31d43f..15b9d2df92ca 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3778,6 +3778,32 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
 	.ret_type       = RET_INTEGER,
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
+
+BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
+	   ancestor_level)
+{
+	struct sock *sk = skb_to_full_sk(skb);
+	struct cgroup *ancestor;
+	struct cgroup *cgrp;
+
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	ancestor = cgroup_ancestor(cgrp, ancestor_level);
+	if (!ancestor)
+		return 0;
+
+	return ancestor->kn->id.id;
+}
+
+static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
+	.func           = bpf_skb_ancestor_cgroup_id,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+};
 #endif
 
 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
@@ -4966,6 +4992,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #ifdef CONFIG_SOCK_CGROUP_DATA
 	case BPF_FUNC_skb_cgroup_id:
 		return &bpf_skb_cgroup_id_proto;
+	case BPF_FUNC_skb_ancestor_cgroup_id:
+		return &bpf_skb_ancestor_cgroup_id_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
-- 
cgit v1.2.3


From f28cdf0430fc92acaa718e15598bdad6cb236a4d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 11 Jul 2018 14:02:22 -0700
Subject: 9p: Replace the fidlist with an IDR

The p9_idpool being used to allocate the IDs uses an IDR to allocate
the IDs ... which we then keep in a doubly-linked list, rather than in
the IDR which allocated them.  We can use an IDR directly which saves
two pointers per p9_fid, and a tiny memory allocation per p9_client.

Link: http://lkml.kernel.org/r/20180711210225.19730-4-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
---
 include/net/9p/client.h |  9 +++------
 net/9p/client.c         | 44 ++++++++++++++++----------------------------
 2 files changed, 19 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 7af9d769b97d..e405729cd1c7 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -27,6 +27,7 @@
 #define NET_9P_CLIENT_H
 
 #include <linux/utsname.h>
+#include <linux/idr.h>
 
 /* Number of requests per row */
 #define P9_ROW_MAXTAG 255
@@ -128,8 +129,7 @@ struct p9_req_t {
  * @proto_version: 9P protocol version to use
  * @trans_mod: module API instantiated with this client
  * @trans: tranport instance state and API
- * @fidpool: fid handle accounting for session
- * @fidlist: List of active fid handles
+ * @fids: All active FID handles
  * @tagpool - transaction id accounting for session
  * @reqs - 2D array of requests
  * @max_tag - current maximum tag id allocated
@@ -169,8 +169,7 @@ struct p9_client {
 		} tcp;
 	} trans_opts;
 
-	struct p9_idpool *fidpool;
-	struct list_head fidlist;
+	struct idr fids;
 
 	struct p9_idpool *tagpool;
 	struct p9_req_t *reqs[P9_ROW_MAXTAG];
@@ -188,7 +187,6 @@ struct p9_client {
  * @iounit: the server reported maximum transaction size for this file
  * @uid: the numeric uid of the local user who owns this handle
  * @rdir: readdir accounting structure (allocated on demand)
- * @flist: per-client-instance fid tracking
  * @dlist: per-dentry fid tracking
  *
  * TODO: This needs lots of explanation.
@@ -204,7 +202,6 @@ struct p9_fid {
 
 	void *rdir;
 
-	struct list_head flist;
 	struct hlist_node dlist;	/* list of all fids attached to a dentry */
 };
 
diff --git a/net/9p/client.c b/net/9p/client.c
index 7c317d39bf62..cd33cf636c47 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -909,30 +909,29 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
 {
 	int ret;
 	struct p9_fid *fid;
-	unsigned long flags;
 
 	p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
 	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
 	if (!fid)
 		return NULL;
 
-	ret = p9_idpool_get(clnt->fidpool);
-	if (ret < 0)
-		goto error;
-	fid->fid = ret;
-
 	memset(&fid->qid, 0, sizeof(struct p9_qid));
 	fid->mode = -1;
 	fid->uid = current_fsuid();
 	fid->clnt = clnt;
 	fid->rdir = NULL;
-	spin_lock_irqsave(&clnt->lock, flags);
-	list_add(&fid->flist, &clnt->fidlist);
-	spin_unlock_irqrestore(&clnt->lock, flags);
+	fid->fid = 0;
 
-	return fid;
+	idr_preload(GFP_KERNEL);
+	spin_lock_irq(&clnt->lock);
+	ret = idr_alloc_u32(&clnt->fids, fid, &fid->fid, P9_NOFID - 1,
+			    GFP_NOWAIT);
+	spin_unlock_irq(&clnt->lock);
+	idr_preload_end();
+
+	if (!ret)
+		return fid;
 
-error:
 	kfree(fid);
 	return NULL;
 }
@@ -944,9 +943,8 @@ static void p9_fid_destroy(struct p9_fid *fid)
 
 	p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid);
 	clnt = fid->clnt;
-	p9_idpool_put(fid->fid, clnt->fidpool);
 	spin_lock_irqsave(&clnt->lock, flags);
-	list_del(&fid->flist);
+	idr_remove(&clnt->fids, fid->fid);
 	spin_unlock_irqrestore(&clnt->lock, flags);
 	kfree(fid->rdir);
 	kfree(fid);
@@ -1029,7 +1027,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 	memcpy(clnt->name, client_id, strlen(client_id) + 1);
 
 	spin_lock_init(&clnt->lock);
-	INIT_LIST_HEAD(&clnt->fidlist);
+	idr_init(&clnt->fids);
 
 	err = p9_tag_init(clnt);
 	if (err < 0)
@@ -1049,18 +1047,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 		goto destroy_tagpool;
 	}
 
-	clnt->fidpool = p9_idpool_create();
-	if (IS_ERR(clnt->fidpool)) {
-		err = PTR_ERR(clnt->fidpool);
-		goto put_trans;
-	}
-
 	p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n",
 		 clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);
 
 	err = clnt->trans_mod->create(clnt, dev_name, options);
 	if (err)
-		goto destroy_fidpool;
+		goto put_trans;
 
 	if (clnt->msize > clnt->trans_mod->maxsize)
 		clnt->msize = clnt->trans_mod->maxsize;
@@ -1073,8 +1065,6 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 
 close_trans:
 	clnt->trans_mod->close(clnt);
-destroy_fidpool:
-	p9_idpool_destroy(clnt->fidpool);
 put_trans:
 	v9fs_put_trans(clnt->trans_mod);
 destroy_tagpool:
@@ -1087,7 +1077,8 @@ EXPORT_SYMBOL(p9_client_create);
 
 void p9_client_destroy(struct p9_client *clnt)
 {
-	struct p9_fid *fid, *fidptr;
+	struct p9_fid *fid;
+	int id;
 
 	p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt);
 
@@ -1096,14 +1087,11 @@ void p9_client_destroy(struct p9_client *clnt)
 
 	v9fs_put_trans(clnt->trans_mod);
 
-	list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) {
+	idr_for_each_entry(&clnt->fids, fid, id) {
 		pr_info("Found fid %d not clunked\n", fid->fid);
 		p9_fid_destroy(fid);
 	}
 
-	if (clnt->fidpool)
-		p9_idpool_destroy(clnt->fidpool);
-
 	p9_tag_cleanup(clnt);
 
 	kfree(clnt);
-- 
cgit v1.2.3


From 2557d0c57c0c11af915d0d4d97402527958c0c01 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 11 Jul 2018 14:02:23 -0700
Subject: 9p: Embed wait_queue_head into p9_req_t

On a 64-bit system, the wait_queue_head_t is 24 bytes while the pointer
to it is 8 bytes.  Growing the p9_req_t by 16 bytes is better than
performing a 24-byte memory allocation.

Link: http://lkml.kernel.org/r/20180711210225.19730-5-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Greg Kurz <groug@kaod.org>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
---
 include/net/9p/client.h |  2 +-
 net/9p/client.c         | 19 +++++--------------
 net/9p/trans_virtio.c   |  2 +-
 3 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index e405729cd1c7..0fa0fbab33b0 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -113,7 +113,7 @@ enum p9_req_status_t {
 struct p9_req_t {
 	int status;
 	int t_err;
-	wait_queue_head_t *wq;
+	wait_queue_head_t wq;
 	struct p9_fcall *tc;
 	struct p9_fcall *rc;
 	void *aux;
diff --git a/net/9p/client.c b/net/9p/client.c
index cd33cf636c47..33717b1b84d8 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -283,8 +283,9 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
 				return ERR_PTR(-ENOMEM);
 			}
 			for (col = 0; col < P9_ROW_MAXTAG; col++) {
-				c->reqs[row][col].status = REQ_STATUS_IDLE;
-				c->reqs[row][col].tc = NULL;
+				req = &c->reqs[row][col];
+				req->status = REQ_STATUS_IDLE;
+				init_waitqueue_head(&req->wq);
 			}
 			c->max_tag += P9_ROW_MAXTAG;
 		}
@@ -294,13 +295,6 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
 	col = tag % P9_ROW_MAXTAG;
 
 	req = &c->reqs[row][col];
-	if (!req->wq) {
-		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
-		if (!req->wq)
-			goto grow_failed;
-		init_waitqueue_head(req->wq);
-	}
-
 	if (!req->tc)
 		req->tc = p9_fcall_alloc(alloc_msize);
 	if (!req->rc)
@@ -320,9 +314,7 @@ grow_failed:
 	pr_err("Couldn't grow tag array\n");
 	kfree(req->tc);
 	kfree(req->rc);
-	kfree(req->wq);
 	req->tc = req->rc = NULL;
-	req->wq = NULL;
 	return ERR_PTR(-ENOMEM);
 }
 
@@ -410,7 +402,6 @@ static void p9_tag_cleanup(struct p9_client *c)
 	/* free requests associated with tags */
 	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {
 		for (col = 0; col < P9_ROW_MAXTAG; col++) {
-			kfree(c->reqs[row][col].wq);
 			kfree(c->reqs[row][col].tc);
 			kfree(c->reqs[row][col].rc);
 		}
@@ -453,7 +444,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
 	smp_wmb();
 	req->status = status;
 
-	wake_up(req->wq);
+	wake_up(&req->wq);
 	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
 }
 EXPORT_SYMBOL(p9_client_cb);
@@ -774,7 +765,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 	}
 again:
 	/* Wait for the response */
-	err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
+	err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
 
 	/*
 	 * Make sure our req is coherent with regard to updates in other
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index eaacce086427..3f69c428ddf9 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -490,7 +490,7 @@ req_retry_pinned:
 	virtqueue_kick(chan->vq);
 	spin_unlock_irqrestore(&chan->lock, flags);
 	p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
-	err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
+	err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
 	/*
 	 * Non kernel buffers are pinned, unpin them
 	 */
-- 
cgit v1.2.3


From 461bb2eee4e162617e790c74d9b4ab10056cad7f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 9 Aug 2018 20:14:40 -0600
Subject: IB/uverbs: Add a simple allocator to uverbs_attr_bundle

This is similar in spirit to devm, it keeps track of any allocations
linked to this method call and ensures they are all freed when the method
exits. Further, if there is space in the internal/onstack buffer then the
allocator will hand out that memory and avoid an expensive call to
kalloc/kfree in the syscall path.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/core/uverbs_ioctl.c | 109 +++++++++++++++++++++++++++------
 include/rdma/uverbs_ioctl.h            |  24 ++++++++
 2 files changed, 113 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index f355e938a0b1..7b330cc5ff76 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -35,7 +35,18 @@
 #include "rdma_core.h"
 #include "uverbs.h"
 
+struct bundle_alloc_head {
+	struct bundle_alloc_head *next;
+	u8 data[];
+};
+
 struct bundle_priv {
+	/* Must be first */
+	struct bundle_alloc_head alloc_head;
+	struct bundle_alloc_head *allocated_mem;
+	size_t internal_avail;
+	size_t internal_used;
+
 	struct ib_uverbs_attr __user *user_attrs;
 	struct ib_uverbs_attr *uattrs;
 	struct uverbs_obj_attr *destroy_attr;
@@ -45,8 +56,53 @@ struct bundle_priv {
 	 * internal_buffer.
 	 */
 	struct uverbs_attr_bundle bundle;
+	u64 internal_buffer[32];
 };
 
+/**
+ * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * @bundle: The bundle
+ * @size: Number of bytes to allocate
+ * @flags: Allocator flags
+ *
+ * The bundle allocator is intended for allocations that are connected with
+ * processing the system call related to the bundle. The allocated memory is
+ * always freed once the system call completes, and cannot be freed any other
+ * way.
+ *
+ * This tries to use a small pool of pre-allocated memory for performance.
+ */
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+			     gfp_t flags)
+{
+	struct bundle_priv *pbundle =
+		container_of(bundle, struct bundle_priv, bundle);
+	size_t new_used;
+	void *res;
+
+	if (check_add_overflow(size, pbundle->internal_used, &new_used))
+		return ERR_PTR(-EINVAL);
+
+	if (new_used > pbundle->internal_avail) {
+		struct bundle_alloc_head *buf;
+
+		buf = kvmalloc(struct_size(buf, data, size), flags);
+		if (!buf)
+			return ERR_PTR(-ENOMEM);
+		buf->next = pbundle->allocated_mem;
+		pbundle->allocated_mem = buf;
+		return buf->data;
+	}
+
+	res = (void *)pbundle->internal_buffer + pbundle->internal_used;
+	pbundle->internal_used =
+		ALIGN(new_used, sizeof(*pbundle->internal_buffer));
+	if (flags & __GFP_ZERO)
+		memset(res, 0, size);
+	return res;
+}
+EXPORT_SYMBOL(_uverbs_alloc);
+
 static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
 				   u16 len)
 {
@@ -129,17 +185,15 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 		if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
 			void *p;
 
-			p = kvmalloc(uattr->len, GFP_KERNEL);
-			if (!p)
-				return -ENOMEM;
+			p = uverbs_alloc(&pbundle->bundle, uattr->len);
+			if (IS_ERR(p))
+				return PTR_ERR(p);
 
 			e->ptr_attr.ptr = p;
 
 			if (copy_from_user(p, u64_to_user_ptr(uattr->data),
-					   uattr->len)) {
-				kvfree(p);
+					   uattr->len))
 				return -EFAULT;
-			}
 		} else {
 			e->ptr_attr.data = uattr->data;
 		}
@@ -234,10 +288,6 @@ static int uverbs_finalize_attrs(struct bundle_priv *pbundle,
 					spec->u.obj.access, commit);
 				if (!ret)
 					ret = current_ret;
-			} else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN &&
-				   spec->alloc_and_copy &&
-				   !uverbs_attr_ptr_is_inline(attr)) {
-				kvfree(attr->ptr_attr.ptr);
 			}
 		}
 	}
@@ -372,7 +422,18 @@ cleanup:
 	return ret ? ret : finalize_ret;
 }
 
-#define UVERBS_OPTIMIZE_USING_STACK_SZ  256
+static void bundle_destroy(struct bundle_priv *pbundle)
+{
+	struct bundle_alloc_head *memblock;
+
+	for (memblock = pbundle->allocated_mem; memblock;) {
+		struct bundle_alloc_head *tmp = memblock;
+
+		memblock = memblock->next;
+		kvfree(tmp);
+	}
+}
+
 static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 				struct ib_uverbs_file *file,
 				struct ib_uverbs_ioctl_hdr *hdr,
@@ -382,11 +443,11 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 	const struct uverbs_method_spec *method_spec;
 	long err = 0;
 	unsigned int i;
+	struct bundle_priv onstack_pbundle;
 	struct bundle_priv *ctx;
 	struct uverbs_attr *curr_attr;
 	unsigned long *curr_bitmap;
 	size_t ctx_size;
-	uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
 
 	if (hdr->driver_id != ib_dev->driver_id)
 		return -EINVAL;
@@ -399,7 +460,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 	if (!method_spec)
 		return -EPROTONOSUPPORT;
 
-	ctx_size = sizeof(*ctx) +
+	ctx_size = sizeof(*ctx) - sizeof(ctx->internal_buffer) +
 		   sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets +
 		   sizeof(*ctx->uattrs) * hdr->num_attrs +
 		   sizeof(*ctx->bundle.hash[0].attrs) *
@@ -408,17 +469,26 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 			(method_spec->num_child_attrs / BITS_PER_LONG +
 			 method_spec->num_buckets);
 
-	if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ)
-		ctx = (void *)data;
-	if (!ctx)
+	if (ctx_size <= sizeof(onstack_pbundle)) {
+		ctx = &onstack_pbundle;
+		ctx->internal_avail =
+			sizeof(onstack_pbundle) -
+			offsetof(struct bundle_priv, internal_buffer);
+		ctx->allocated_mem = NULL;
+	} else {
 		ctx = kmalloc(ctx_size, GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
+		if (!ctx)
+			return -ENOMEM;
+		ctx->internal_avail = 0;
+		ctx->alloc_head.next = NULL;
+		ctx->allocated_mem = &ctx->alloc_head;
+	}
 
 	ctx->uattrs = (void *)(ctx + 1) +
 		      (sizeof(ctx->bundle.hash[0]) * method_spec->num_buckets);
 	curr_attr = (void *)(ctx->uattrs + hdr->num_attrs);
 	curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs);
+	ctx->internal_used = ALIGN(ctx_size, sizeof(*ctx->internal_buffer));
 
 	/*
 	 * We just fill the pointers and num_attrs here. The data itself will be
@@ -462,8 +532,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
 		err = -EINVAL;
 	}
 out:
-	if (ctx != (void *)data)
-		kfree(ctx);
+	bundle_destroy(ctx);
 	return err;
 }
 
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index ecf028446cdf..1dbf663f7f43 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -651,6 +651,20 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
 		       size_t idx, u64 allowed_bits);
 int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx,
 		   const void *from, size_t size);
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+			     gfp_t flags);
+
+static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
+					  size_t size)
+{
+	return _uverbs_alloc(bundle, size, GFP_KERNEL);
+}
+
+static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
+					   size_t size)
+{
+	return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
+}
 #else
 static inline int
 uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
@@ -669,6 +683,16 @@ static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
 {
 	return -EINVAL;
 }
+static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
+					  size_t size)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
+					   size_t size)
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif
 
 /* =================================================
-- 
cgit v1.2.3


From 3a863577a7496278892360a69d90d8465733100c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 9 Aug 2018 20:14:42 -0600
Subject: IB/uverbs: Use uverbs_api to unmarshal ioctl commands

Convert the ioctl method syscall path to use the uverbs_api data
structures. The new uapi structure includes all the same information, just
in a different and more optimal way.

 - Use attr_bkey instead of 2 level radix trees for everything related to
   attributes. This includes the attribute storage, presence, and
   detection of missing mandatory attributes.
 - Avoid iterating over all attribute storage at finish, instead use
   find_first_bit with the attr_bkey to locate only those attrs that need
   cleanup.
 - Organize things to always run, and always rely on, cleanup. This
   avoids a bunch of tricky error unwind cases.
 - Locate the method using the radix tree, and locate the attributes
   using a very efficient incremental radix tree lookup
 - Use the precomputed destroy_bkey to handle uobject destruction
 - Use the precomputed allocation sizes and precomputed 'need_stack'
   to avoid maths in the fast path. This is optimal if userspace
   does not pass (many) unsupported attributes.

Overall this results in much better codegen for the attribute accessors,
everything is now stored in bitmaps or linear arrays indexed by attr_bkey.
The compiler can compute attr_bkey values at compile time for all method
attributes, meaning things like uverbs_attr_is_valid() now compile into
single instruction bit tests.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.h    |   4 +
 drivers/infiniband/core/uverbs_ioctl.c | 472 ++++++++++++++-------------------
 drivers/infiniband/core/uverbs_uapi.c  |   3 +
 include/rdma/uverbs_ioctl.h            |  36 +--
 4 files changed, 217 insertions(+), 298 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index d89569d87b1c..aca279bfef08 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -133,6 +133,8 @@ struct uverbs_api_ioctl_method {
 	int (__rcu *handler)(struct ib_uverbs_file *ufile,
 			     struct uverbs_attr_bundle *ctx);
 	DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
+	u16 bundle_size;
+	u8 use_stack:1;
 	u8 driver_method:1;
 	u8 key_bitmap_len;
 	u8 destroy_bkey;
@@ -162,5 +164,7 @@ struct uverbs_api *uverbs_alloc_api(
 void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
 void uverbs_disassociate_api(struct uverbs_api *uapi);
 void uverbs_destroy_api(struct uverbs_api *uapi);
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+			      unsigned int num_attrs);
 
 #endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 7b330cc5ff76..3ca700f6d663 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -47,9 +47,16 @@ struct bundle_priv {
 	size_t internal_avail;
 	size_t internal_used;
 
+	struct radix_tree_root *radix;
+	const struct uverbs_api_ioctl_method *method_elm;
+	void __rcu **radix_slots;
+	unsigned long radix_slots_len;
+	u32 method_key;
+
 	struct ib_uverbs_attr __user *user_attrs;
 	struct ib_uverbs_attr *uattrs;
-	struct uverbs_obj_attr *destroy_attr;
+
+	DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
 
 	/*
 	 * Must be last. bundle ends in a flex array which overlaps
@@ -59,6 +66,28 @@ struct bundle_priv {
 	u64 internal_buffer[32];
 };
 
+/*
+ * Each method has an absolute minimum amount of memory it needs to allocate,
+ * precompute that amount and determine if the onstack memory can be used or
+ * if allocation is need.
+ */
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+			      unsigned int num_attrs)
+{
+	struct bundle_priv *pbundle;
+	size_t bundle_size =
+		offsetof(struct bundle_priv, internal_buffer) +
+		sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len +
+		sizeof(*pbundle->uattrs) * num_attrs;
+
+	method_elm->use_stack = bundle_size <= sizeof(*pbundle);
+	method_elm->bundle_size =
+		ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer));
+
+	/* Do not want order-2 allocations for this. */
+	WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE);
+}
+
 /**
  * uverbs_alloc() - Quickly allocate memory for use with a bundle
  * @bundle: The bundle
@@ -81,7 +110,7 @@ __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
 	void *res;
 
 	if (check_add_overflow(size, pbundle->internal_used, &new_used))
-		return ERR_PTR(-EINVAL);
+		return ERR_PTR(-EOVERFLOW);
 
 	if (new_used > pbundle->internal_avail) {
 		struct bundle_alloc_head *buf;
@@ -115,31 +144,13 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
 }
 
 static int uverbs_process_attr(struct bundle_priv *pbundle,
-			       const struct ib_uverbs_attr *uattr,
-			       u16 attr_id,
-			       const struct uverbs_attr_spec_hash *attr_spec_bucket,
-			       struct uverbs_attr_bundle_hash *attr_bundle_h,
-			       struct ib_uverbs_attr __user *uattr_ptr)
+			       const struct uverbs_api_attr *attr_uapi,
+			       struct ib_uverbs_attr *uattr, u32 attr_bkey)
 {
-	const struct uverbs_attr_spec *spec;
-	const struct uverbs_attr_spec *val_spec;
-	struct uverbs_attr *e;
+	const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+	struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey];
+	const struct uverbs_attr_spec *val_spec = spec;
 	struct uverbs_obj_attr *o_attr;
-	struct uverbs_attr *elements = attr_bundle_h->attrs;
-
-	if (attr_id >= attr_spec_bucket->num_attrs) {
-		if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
-			return -EINVAL;
-		else
-			return 0;
-	}
-
-	if (test_bit(attr_id, attr_bundle_h->valid_bitmap))
-		return -EINVAL;
-
-	spec = &attr_spec_bucket->attrs[attr_id];
-	val_spec = spec;
-	e = &elements[attr_id];
 
 	switch (spec->type) {
 	case UVERBS_ATTR_TYPE_ENUM_IN:
@@ -208,12 +219,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 			return -EINVAL;
 
 		o_attr = &e->obj_attr;
-
-		/* specs are allowed to have only one destroy attribute */
-		WARN_ON(spec->u.obj.access == UVERBS_ACCESS_DESTROY &&
-			pbundle->destroy_attr);
-		if (spec->u.obj.access == UVERBS_ACCESS_DESTROY)
-			pbundle->destroy_attr = o_attr;
+		o_attr->attr_elm = attr_uapi;
 
 		/*
 		 * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
@@ -226,20 +232,17 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 					pbundle->bundle.ufile,
 					spec->u.obj.access,
 					uattr->data_s64);
-
 		if (IS_ERR(o_attr->uobject))
 			return PTR_ERR(o_attr->uobject);
+		__set_bit(attr_bkey, pbundle->uobj_finalize);
 
 		if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
+			unsigned int uattr_idx = uattr - pbundle->uattrs;
 			s64 id = o_attr->uobject->id;
 
 			/* Copy the allocated id to the user-space */
-			if (put_user(id, &uattr_ptr->data)) {
-				uverbs_finalize_object(o_attr->uobject,
-						       UVERBS_ACCESS_NEW,
-						       false);
+			if (put_user(id, &pbundle->user_attrs[uattr_idx].data))
 				return -EFAULT;
-			}
 		}
 
 		break;
@@ -247,184 +250,152 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 		return -EOPNOTSUPP;
 	}
 
-	set_bit(attr_id, attr_bundle_h->valid_bitmap);
 	return 0;
 }
 
-static int uverbs_finalize_attrs(struct bundle_priv *pbundle,
-				 struct uverbs_attr_spec_hash *const *spec_hash,
-				 size_t num, bool commit)
+/*
+ * We search the radix tree with the method prefix and now we want to fast
+ * search the suffix bits to get a particular attribute pointer. It is not
+ * totally clear to me if this breaks the radix tree encasulation or not, but
+ * it uses the iter data to determine if the method iter points at the same
+ * chunk that will store the attribute, if so it just derefs it directly. By
+ * construction in most kernel configs the method and attrs will all fit in a
+ * single radix chunk, so in most cases this will have no search. Other cases
+ * this falls back to a full search.
+ */
+static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle,
+					     u32 attr_key)
 {
-	struct uverbs_attr_bundle *attrs_bundle = &pbundle->bundle;
-	unsigned int i;
-	int ret = 0;
-
-	for (i = 0; i < num; i++) {
-		struct uverbs_attr_bundle_hash *curr_bundle =
-			&attrs_bundle->hash[i];
-		const struct uverbs_attr_spec_hash *curr_spec_bucket =
-			spec_hash[i];
-		unsigned int j;
-
-		if (!curr_spec_bucket)
-			continue;
-
-		for (j = 0; j < curr_bundle->num_attrs; j++) {
-			struct uverbs_attr *attr;
-			const struct uverbs_attr_spec *spec;
-
-			if (!uverbs_attr_is_valid_in_hash(curr_bundle, j))
-				continue;
-
-			attr = &curr_bundle->attrs[j];
-			spec = &curr_spec_bucket->attrs[j];
+	void __rcu **slot;
 
-			if (spec->type == UVERBS_ATTR_TYPE_IDR ||
-			    spec->type == UVERBS_ATTR_TYPE_FD) {
-				int current_ret;
+	if (likely(attr_key < pbundle->radix_slots_len)) {
+		void *entry;
 
-				current_ret = uverbs_finalize_object(
-					attr->obj_attr.uobject,
-					spec->u.obj.access, commit);
-				if (!ret)
-					ret = current_ret;
-			}
-		}
+		slot = pbundle->radix_slots + attr_key;
+		entry = rcu_dereference_raw(*slot);
+		if (likely(!radix_tree_is_internal_node(entry) && entry))
+			return slot;
 	}
-	return ret;
+
+	return radix_tree_lookup_slot(pbundle->radix,
+				      pbundle->method_key | attr_key);
 }
 
-static int uverbs_uattrs_process(size_t num_uattrs,
-				 const struct uverbs_method_spec *method,
-				 struct bundle_priv *pbundle)
+static int uverbs_set_attr(struct bundle_priv *pbundle,
+			   struct ib_uverbs_attr *uattr)
 {
-	struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle;
-	struct ib_uverbs_attr __user *uattr_ptr = pbundle->user_attrs;
-	size_t i;
-	int ret = 0;
-	int num_given_buckets = 0;
-
-	for (i = 0; i < num_uattrs; i++) {
-		const struct ib_uverbs_attr *uattr = &pbundle->uattrs[i];
-		u16 attr_id = uattr->attr_id;
-		struct uverbs_attr_spec_hash *attr_spec_bucket;
-
-		ret = uverbs_ns_idx(&attr_id, method->num_buckets);
-		if (ret < 0 || !method->attr_buckets[ret]) {
-			if (uattr->flags & UVERBS_ATTR_F_MANDATORY) {
-				uverbs_finalize_attrs(pbundle,
-						      method->attr_buckets,
-						      num_given_buckets,
-						      false);
-				return ret;
-			}
-			continue;
-		}
+	u32 attr_key = uapi_key_attr(uattr->attr_id);
+	u32 attr_bkey = uapi_bkey_attr(attr_key);
+	const struct uverbs_api_attr *attr;
+	void __rcu **slot;
+	int ret;
 
+	slot = uapi_get_attr_for_method(pbundle, attr_key);
+	if (!slot) {
 		/*
-		 * ret is the found ns, so increase num_given_buckets if
-		 * necessary.
+		 * Kernel does not support the attribute but user-space says it
+		 * is mandatory
 		 */
-		if (ret >= num_given_buckets)
-			num_given_buckets = ret + 1;
-
-		attr_spec_bucket = method->attr_buckets[ret];
-		ret = uverbs_process_attr(pbundle,
-					  uattr, attr_id,
-					  attr_spec_bucket,
-					  &attr_bundle->hash[ret],
-					  uattr_ptr++);
-		if (ret) {
-			uverbs_finalize_attrs(pbundle,
-					      method->attr_buckets,
-					      num_given_buckets,
-					      false);
-			return ret;
-		}
+		if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
+			return -EPROTONOSUPPORT;
+		return 0;
 	}
+	attr = srcu_dereference(
+		*slot, &pbundle->bundle.ufile->device->disassociate_srcu);
 
-	return num_given_buckets;
-}
-
-static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec,
-					    struct bundle_priv *pbundle)
-{
-	struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle;
-	unsigned int i;
-
-	for (i = 0; i < attr_bundle->num_buckets; i++) {
-		struct uverbs_attr_spec_hash *attr_spec_bucket =
-			method_spec->attr_buckets[i];
-
-		if (!attr_spec_bucket)
-			continue;
-
-		if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask,
-				   attr_bundle->hash[i].valid_bitmap,
-				   attr_spec_bucket->num_attrs))
-			return -EINVAL;
-	}
+	/* Reject duplicate attributes from user-space */
+	if (test_bit(attr_bkey, pbundle->bundle.attr_present))
+		return -EINVAL;
 
-	for (; i < method_spec->num_buckets; i++) {
-		struct uverbs_attr_spec_hash *attr_spec_bucket =
-			method_spec->attr_buckets[i];
+	ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey);
+	if (ret)
+		return ret;
 
-		if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask,
-				  attr_spec_bucket->num_attrs))
-			return -EINVAL;
-	}
+	__set_bit(attr_bkey, pbundle->bundle.attr_present);
 
 	return 0;
 }
 
-static int uverbs_handle_method(size_t num_uattrs,
-				const struct uverbs_method_spec *method_spec,
-				struct bundle_priv *pbundle)
+static int ib_uverbs_run_method(struct bundle_priv *pbundle,
+				unsigned int num_attrs)
 {
-	struct uverbs_attr_bundle *attr_bundle = &pbundle->bundle;
+	int (*handler)(struct ib_uverbs_file *ufile,
+		       struct uverbs_attr_bundle *ctx);
+	size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
+	unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
+	unsigned int i;
 	int ret;
-	int finalize_ret;
-	int num_given_buckets;
 
-	num_given_buckets =
-		uverbs_uattrs_process(num_uattrs, method_spec, pbundle);
-	if (num_given_buckets <= 0)
+	/* See uverbs_disassociate_api() */
+	handler = srcu_dereference(
+		pbundle->method_elm->handler,
+		&pbundle->bundle.ufile->device->disassociate_srcu);
+	if (!handler)
+		return -EIO;
+
+	pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size);
+	if (IS_ERR(pbundle->uattrs))
+		return PTR_ERR(pbundle->uattrs);
+	if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size))
+		return -EFAULT;
+
+	for (i = 0; i != num_attrs; i++) {
+		ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	/* User space did not provide all the mandatory attributes */
+	if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory,
+				    pbundle->bundle.attr_present,
+				    pbundle->method_elm->key_bitmap_len)))
 		return -EINVAL;
 
-	attr_bundle->num_buckets = num_given_buckets;
-	ret = uverbs_validate_kernel_mandatory(method_spec, pbundle);
-	if (ret)
-		goto cleanup;
+	if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
+		struct uverbs_obj_attr *destroy_attr =
+			&pbundle->bundle.attrs[destroy_bkey].obj_attr;
 
-	/*
-	 * We destroy the HW object before invoking the handler, handlers do
-	 * not get to manipulate the HW objects.
-	 */
-	if (pbundle->destroy_attr) {
-		ret = uobj_destroy(pbundle->destroy_attr->uobject);
+		ret = uobj_destroy(destroy_attr->uobject);
 		if (ret)
-			goto cleanup;
-	}
-
-	ret = method_spec->handler(pbundle->bundle.ufile, attr_bundle);
+			return ret;
+		__clear_bit(destroy_bkey, pbundle->uobj_finalize);
 
-	if (pbundle->destroy_attr) {
-		uobj_put_destroy(pbundle->destroy_attr->uobject);
-		pbundle->destroy_attr->uobject = NULL;
+		ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+		uobj_put_destroy(destroy_attr->uobject);
+	} else {
+		ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
 	}
 
-cleanup:
-	finalize_ret = uverbs_finalize_attrs(pbundle,
-					     method_spec->attr_buckets,
-					     attr_bundle->num_buckets,
-					     !ret);
+	/*
+	 * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
+	 * not invoke the method because the request is not supported.  No
+	 * other cases should return this code.
+	 */
+	if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT))
+		return -EINVAL;
 
-	return ret ? ret : finalize_ret;
+	return ret;
 }
 
-static void bundle_destroy(struct bundle_priv *pbundle)
+static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
 {
+	unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
 	struct bundle_alloc_head *memblock;
+	unsigned int i;
+	int ret = 0;
+
+	i = -1;
+	while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
+				  i + 1)) < key_bitmap_len) {
+		struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
+		int current_ret;
+
+		current_ret = uverbs_finalize_object(
+			attr->obj_attr.uobject,
+			attr->obj_attr.attr_elm->spec.u.obj.access, commit);
+		if (!ret)
+			ret = current_ret;
+	}
 
 	for (memblock = pbundle->allocated_mem; memblock;) {
 		struct bundle_alloc_head *tmp = memblock;
@@ -432,108 +403,71 @@ static void bundle_destroy(struct bundle_priv *pbundle)
 		memblock = memblock->next;
 		kvfree(tmp);
 	}
+
+	return ret;
 }
 
-static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
-				struct ib_uverbs_file *file,
-				struct ib_uverbs_ioctl_hdr *hdr,
-				struct ib_uverbs_attr __user *user_attrs)
+static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
+			       struct ib_uverbs_ioctl_hdr *hdr,
+			       struct ib_uverbs_attr __user *user_attrs)
 {
-	const struct uverbs_object_spec *object_spec;
-	const struct uverbs_method_spec *method_spec;
-	long err = 0;
-	unsigned int i;
-	struct bundle_priv onstack_pbundle;
-	struct bundle_priv *ctx;
-	struct uverbs_attr *curr_attr;
-	unsigned long *curr_bitmap;
-	size_t ctx_size;
+	const struct uverbs_api_ioctl_method *method_elm;
+	struct uverbs_api *uapi = ufile->device->uapi;
+	struct radix_tree_iter attrs_iter;
+	struct bundle_priv *pbundle;
+	struct bundle_priv onstack;
+	void __rcu **slot;
+	int destroy_ret;
+	int ret;
 
-	if (hdr->driver_id != ib_dev->driver_id)
+	if (unlikely(hdr->driver_id != uapi->driver_id))
 		return -EINVAL;
 
-	object_spec = uverbs_get_object(file, hdr->object_id);
-	if (!object_spec)
-		return -EPROTONOSUPPORT;
-
-	method_spec = uverbs_get_method(object_spec, hdr->method_id);
-	if (!method_spec)
+	slot = radix_tree_iter_lookup(
+		&uapi->radix, &attrs_iter,
+		uapi_key_obj(hdr->object_id) |
+			uapi_key_ioctl_method(hdr->method_id));
+	if (unlikely(!slot))
 		return -EPROTONOSUPPORT;
+	method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu);
 
-	ctx_size = sizeof(*ctx) - sizeof(ctx->internal_buffer) +
-		   sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets +
-		   sizeof(*ctx->uattrs) * hdr->num_attrs +
-		   sizeof(*ctx->bundle.hash[0].attrs) *
-		   method_spec->num_child_attrs +
-		   sizeof(*ctx->bundle.hash[0].valid_bitmap) *
-			(method_spec->num_child_attrs / BITS_PER_LONG +
-			 method_spec->num_buckets);
-
-	if (ctx_size <= sizeof(onstack_pbundle)) {
-		ctx = &onstack_pbundle;
-		ctx->internal_avail =
-			sizeof(onstack_pbundle) -
+	if (!method_elm->use_stack) {
+		pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
+		if (!pbundle)
+			return -ENOMEM;
+		pbundle->internal_avail =
+			method_elm->bundle_size -
 			offsetof(struct bundle_priv, internal_buffer);
-		ctx->allocated_mem = NULL;
+		pbundle->alloc_head.next = NULL;
+		pbundle->allocated_mem = &pbundle->alloc_head;
 	} else {
-		ctx = kmalloc(ctx_size, GFP_KERNEL);
-		if (!ctx)
-			return -ENOMEM;
-		ctx->internal_avail = 0;
-		ctx->alloc_head.next = NULL;
-		ctx->allocated_mem = &ctx->alloc_head;
+		pbundle = &onstack;
+		pbundle->internal_avail = sizeof(pbundle->internal_buffer);
+		pbundle->allocated_mem = NULL;
 	}
 
-	ctx->uattrs = (void *)(ctx + 1) +
-		      (sizeof(ctx->bundle.hash[0]) * method_spec->num_buckets);
-	curr_attr = (void *)(ctx->uattrs + hdr->num_attrs);
-	curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs);
-	ctx->internal_used = ALIGN(ctx_size, sizeof(*ctx->internal_buffer));
-
-	/*
-	 * We just fill the pointers and num_attrs here. The data itself will be
-	 * filled at a later stage (uverbs_process_attr)
-	 */
-	for (i = 0; i < method_spec->num_buckets; i++) {
-		unsigned int curr_num_attrs;
-
-		if (!method_spec->attr_buckets[i])
-			continue;
-
-		curr_num_attrs = method_spec->attr_buckets[i]->num_attrs;
-
-		ctx->bundle.hash[i].attrs = curr_attr;
-		curr_attr += curr_num_attrs;
-		ctx->bundle.hash[i].num_attrs = curr_num_attrs;
-		ctx->bundle.hash[i].valid_bitmap = curr_bitmap;
-		bitmap_zero(curr_bitmap, curr_num_attrs);
-		curr_bitmap += BITS_TO_LONGS(curr_num_attrs);
-	}
+	/* Space for the pbundle->bundle.attrs flex array */
+	pbundle->method_elm = method_elm;
+	pbundle->method_key = attrs_iter.index;
+	pbundle->bundle.ufile = ufile;
+	pbundle->radix = &uapi->radix;
+	pbundle->radix_slots = slot;
+	pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter);
+	pbundle->user_attrs = user_attrs;
+
+	pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len *
+					       sizeof(*pbundle->bundle.attrs),
+				       sizeof(*pbundle->internal_buffer));
+	memset(pbundle->bundle.attr_present, 0,
+	       sizeof(pbundle->bundle.attr_present));
+	memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
+
+	ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
+	destroy_ret = bundle_destroy(pbundle, ret == 0);
+	if (unlikely(destroy_ret && !ret))
+		return destroy_ret;
 
-	err = copy_from_user(ctx->uattrs, user_attrs,
-			     sizeof(*ctx->uattrs) * hdr->num_attrs);
-	if (err) {
-		err = -EFAULT;
-		goto out;
-	}
-
-	ctx->destroy_attr = NULL;
-	ctx->bundle.ufile = file;
-	ctx->user_attrs = user_attrs;
-	err = uverbs_handle_method(hdr->num_attrs, method_spec, ctx);
-
-	/*
-	 * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
-	 * not invoke the method because the request is not supported.  No
-	 * other cases should return this code.
-	*/
-	if (unlikely(err == -EPROTONOSUPPORT)) {
-		WARN_ON_ONCE(err == -EPROTONOSUPPORT);
-		err = -EINVAL;
-	}
-out:
-	bundle_destroy(ctx);
-	return err;
+	return ret;
 }
 
 #define IB_UVERBS_MAX_CMD_SZ 4096
@@ -570,7 +504,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			goto out;
 		}
 
-		err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr, user_hdr->attrs);
+		err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs);
 	} else {
 		err = -ENOIOCTLCMD;
 	}
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 21c0de034511..73ea6f0db88f 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -160,6 +160,7 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
 			   u32 method_key)
 {
 	struct radix_tree_iter iter;
+	unsigned int num_attrs = 0;
 	unsigned int max_bkey = 0;
 	bool single_uobj = false;
 	void __rcu **slot;
@@ -204,11 +205,13 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
 		}
 
 		max_bkey = max(max_bkey, attr_bkey);
+		num_attrs++;
 	}
 
 	method_elm->key_bitmap_len = max_bkey + 1;
 	WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN);
 
+	uapi_compute_bundle_size(method_elm, num_attrs);
 	return 0;
 }
 
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 1dbf663f7f43..24ef8d9ac631 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -451,6 +451,7 @@ struct uverbs_object_tree_def {
  * =================================================
  */
 
+
 struct uverbs_ptr_attr {
 	/*
 	 * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is
@@ -467,6 +468,7 @@ struct uverbs_ptr_attr {
 
 struct uverbs_obj_attr {
 	struct ib_uobject		*uobject;
+	const struct uverbs_api_attr	*attr_elm;
 };
 
 struct uverbs_attr {
@@ -476,39 +478,17 @@ struct uverbs_attr {
 	};
 };
 
-struct uverbs_attr_bundle_hash {
-	/* if bit i is set, it means attrs[i] contains valid information */
-	unsigned long *valid_bitmap;
-	size_t num_attrs;
-	/*
-	 * arrays of attributes, each element corresponds to the specification
-	 * of the attribute in the same index.
-	 */
-	struct uverbs_attr *attrs;
-};
-
 struct uverbs_attr_bundle {
 	struct ib_uverbs_file *ufile;
-	size_t				num_buckets;
-	struct uverbs_attr_bundle_hash  hash[];
+	DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
+	struct uverbs_attr attrs[];
 };
 
-static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_hash *attrs_hash,
-						unsigned int idx)
-{
-	return test_bit(idx, attrs_hash->valid_bitmap);
-}
-
 static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle,
 					unsigned int idx)
 {
-	u16 idx_bucket = idx >>	UVERBS_ID_NS_SHIFT;
-
-	if (attrs_bundle->num_buckets <= idx_bucket)
-		return false;
-
-	return uverbs_attr_is_valid_in_hash(&attrs_bundle->hash[idx_bucket],
-					    idx & ~UVERBS_ID_NS_MASK);
+	return test_bit(uapi_bkey_attr(uapi_key_attr(idx)),
+			attrs_bundle->attr_present);
 }
 
 #define IS_UVERBS_COPY_ERR(_ret)		((_ret) && (_ret) != -ENOENT)
@@ -516,12 +496,10 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b
 static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
 							u16 idx)
 {
-	u16 idx_bucket = idx >>	UVERBS_ID_NS_SHIFT;
-
 	if (!uverbs_attr_is_valid(attrs_bundle, idx))
 		return ERR_PTR(-ENOENT);
 
-	return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK];
+	return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))];
 }
 
 static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle,
-- 
cgit v1.2.3


From 51d0a2b4cfa9979fd8a59faf483b4e84587ab4ea Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Thu, 9 Aug 2018 20:14:43 -0600
Subject: IB/uverbs: Remove struct uverbs_root_spec and all supporting code

Everything now uses the uverbs_uapi data structure.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/Makefile             |   2 +-
 drivers/infiniband/core/rdma_core.c          |  45 --
 drivers/infiniband/core/rdma_core.h          |   6 -
 drivers/infiniband/core/uverbs.h             |   1 -
 drivers/infiniband/core/uverbs_ioctl_merge.c | 662 ---------------------------
 drivers/infiniband/core/uverbs_main.c        |  28 +-
 include/rdma/uverbs_ioctl.h                  |  91 ----
 7 files changed, 2 insertions(+), 833 deletions(-)
 delete mode 100644 drivers/infiniband/core/uverbs_ioctl_merge.c

(limited to 'include')

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d934cf617841..867cee5e27b2 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -35,7 +35,7 @@ ib_ucm-y :=			ucm.o
 
 ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
 				rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
-				uverbs_ioctl_merge.o uverbs_std_types_cq.o \
+				uverbs_std_types_cq.o \
 				uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
 				uverbs_std_types_mr.o uverbs_std_types_counters.o \
 				uverbs_uapi.o
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 2814228ead39..12e7c6c102c1 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -42,51 +42,6 @@
 #include "core_priv.h"
 #include "rdma_core.h"
 
-int uverbs_ns_idx(u16 *id, unsigned int ns_count)
-{
-	int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT;
-
-	if (ret >= ns_count)
-		return -EINVAL;
-
-	*id &= ~UVERBS_ID_NS_MASK;
-	return ret;
-}
-
-const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
-						   uint16_t object)
-{
-	const struct uverbs_root_spec *object_hash = ufile->device->specs_root;
-	const struct uverbs_object_spec_hash *objects;
-	int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
-
-	if (ret < 0)
-		return NULL;
-
-	objects = object_hash->object_buckets[ret];
-
-	if (object >= objects->num_objects)
-		return NULL;
-
-	return objects->objects[object];
-}
-
-const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
-						   uint16_t method)
-{
-	const struct uverbs_method_spec_hash *methods;
-	int ret = uverbs_ns_idx(&method, object->num_buckets);
-
-	if (ret < 0)
-		return NULL;
-
-	methods = object->method_buckets[ret];
-	if (method >= methods->num_methods)
-		return NULL;
-
-	return methods->methods[method];
-}
-
 void uverbs_uobject_get(struct ib_uobject *uobject)
 {
 	kref_get(&uobject->ref);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index aca279bfef08..f962f2a593ba 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -45,12 +45,6 @@
 
 struct ib_uverbs_device;
 
-int uverbs_ns_idx(u16 *id, unsigned int ns_count);
-const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
-						   uint16_t object);
-const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
-						   uint16_t method);
-
 void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
 			     enum rdma_remove_reason reason);
 
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 879be0d1fd99..5df8e548cc14 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -111,7 +111,6 @@ struct ib_uverbs_device {
 	struct mutex				lists_mutex; /* protect lists */
 	struct list_head			uverbs_file_list;
 	struct list_head			uverbs_events_file_list;
-	struct uverbs_root_spec			*specs_root;
 	struct uverbs_api			*uapi;
 };
 
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
deleted file mode 100644
index 16b575929915..000000000000
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ /dev/null
@@ -1,662 +0,0 @@
-/*
- * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/uverbs_ioctl.h>
-#include <rdma/rdma_user_ioctl.h>
-#include <linux/bitops.h>
-#include "uverbs.h"
-
-#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT)
-#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT)
-#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK)
-
-#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,	       \
-			  buckets_offset)				       \
-	for (tmpj = 0,							       \
-	     elem = (*(const void ***)((hashes)[tmpi] +			       \
-				       (buckets_offset)))[0];	               \
-	     tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset));        \
-	     tmpj++)						               \
-		if ((elem = ((*(const void ***)(hashes[tmpi] +		       \
-						(buckets_offset)))[tmpj])))
-
-/*
- * Iterate all elements of a few @hashes. The number of given hashes is
- * indicated by @num_hashes. The offset of the number of buckets in the hash is
- * represented by @num_buckets_offset, while the offset of the buckets array in
- * the hash structure is represented by @buckets_offset. tmpi and tmpj are two
- * short (or int) based indices that are given by the user. tmpi iterates over
- * the different hashes. @elem points the current element in the hashes[tmpi]
- * bucket we are looping on. To be honest, @hashes representation isn't exactly
- * a hash, but more a collection of elements. These elements' ids are treated
- * in a hash like manner, where the first upper bits are the bucket number.
- * These elements are later mapped into a perfect-hash.
- */
-#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes,                 \
-			 num_buckets_offset, buckets_offset)		       \
-	for (tmpi = 0; tmpi < (num_hashes); tmpi++)		               \
-		_for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\
-				  buckets_offset)
-
-#define get_elements_iterators_entry_above(iters, num_elements, elements,     \
-					  num_objects_fld, objects_fld, bucket,\
-					  min_id)			       \
-	get_elements_above_id((const void **)iters, num_elements,       \
-				     (const void **)(elements),		       \
-				     offsetof(typeof(**elements),	       \
-					      num_objects_fld),		       \
-				     offsetof(typeof(**elements), objects_fld),\
-				     offsetof(typeof(***(*elements)->objects_fld), id),\
-				     bucket, min_id)
-
-#define get_objects_above_id(iters, num_trees, trees, bucket, min_id)	       \
-	get_elements_iterators_entry_above(iters, num_trees, trees,	       \
-					   num_objects, objects, bucket, min_id)
-
-#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\
-	get_elements_iterators_entry_above(method_iters, num_iters, iters,     \
-					   num_methods, methods, bucket, min_id)
-
-#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\
-	get_elements_iterators_entry_above(attrs_iters, num_iters, iters,      \
-					   num_attrs, attrs, bucket, min_id)
-
-/*
- * get_elements_above_id get a few hashes represented by @elements and
- * @num_elements. The hashes fields are described by @num_offset, @data_offset
- * and @id_offset in the same way as required by for_each_element. The function
- * returns an array of @iters, represents an array of elements in the hashes
- * buckets, which their ids are the smallest ids in all hashes but are all
- * larger than the id given by min_id. Elements are only added to the iters
- * array if their id belongs to the bucket @bucket. The number of elements in
- * the returned array is returned by the function. @min_id is also updated to
- * reflect the new min_id of all elements in iters.
- */
-static size_t get_elements_above_id(const void **iters,
-				    unsigned int num_elements,
-				    const void **elements,
-				    size_t num_offset,
-				    size_t data_offset,
-				    size_t id_offset,
-				    u16 bucket,
-				    short *min_id)
-{
-	size_t num_iters = 0;
-	short min = SHRT_MAX;
-	const void *elem;
-	int i, j, last_stored = -1;
-	unsigned int equal_min = 0;
-
-	for_each_element(elem, i, j, elements, num_elements, num_offset,
-			 data_offset) {
-		u16 id = *(u16 *)(elem + id_offset);
-
-		if (GET_NS_ID(id) != bucket)
-			continue;
-
-		if (GET_ID(id) < *min_id ||
-		    (min != SHRT_MAX && GET_ID(id) > min))
-			continue;
-
-		/*
-		 * We first iterate all hashes represented by @elements. When
-		 * we do, we try to find an element @elem in the bucket @bucket
-		 * which its id is min. Since we can't ensure the user sorted
-		 * the elements in increasing order, we override this hash's
-		 * minimal id element we found, if a new element with a smaller
-		 * id was just found.
-		 */
-		iters[last_stored == i ? num_iters - 1 : num_iters++] = elem;
-		last_stored = i;
-		if (min == GET_ID(id))
-			equal_min++;
-		else
-			equal_min = 1;
-		min = GET_ID(id);
-	}
-
-	/*
-	 * We only insert to our iters array an element, if its id is smaller
-	 * than all previous ids. Therefore, the final iters array is sorted so
-	 * that smaller ids are in the end of the array.
-	 * Therefore, we need to clean the beginning of the array to make sure
-	 * all ids of final elements are equal to min.
-	 */
-	memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min);
-
-	*min_id = min;
-	return equal_min;
-}
-
-#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
-				  objects_fld, bucket)			   \
-	find_max_element_id(num_elements, (const void **)(elements),	   \
-			    offsetof(typeof(**elements), num_objects_fld),    \
-			    offsetof(typeof(**elements), objects_fld),	      \
-			    offsetof(typeof(***(*elements)->objects_fld), id),\
-			    bucket)
-
-static short find_max_element_ns_id(unsigned int num_elements,
-				    const void **elements,
-				    size_t num_offset,
-				    size_t data_offset,
-				    size_t id_offset)
-{
-	short max_ns = SHRT_MIN;
-	const void *elem;
-	int i, j;
-
-	for_each_element(elem, i, j, elements, num_elements, num_offset,
-			 data_offset) {
-		u16 id = *(u16 *)(elem + id_offset);
-
-		if (GET_NS_ID(id) > max_ns)
-			max_ns = GET_NS_ID(id);
-	}
-
-	return max_ns;
-}
-
-static short find_max_element_id(unsigned int num_elements,
-				 const void **elements,
-				 size_t num_offset,
-				 size_t data_offset,
-				 size_t id_offset,
-				 u16 bucket)
-{
-	short max_id = SHRT_MIN;
-	const void *elem;
-	int i, j;
-
-	for_each_element(elem, i, j, elements, num_elements, num_offset,
-			 data_offset) {
-		u16 id = *(u16 *)(elem + id_offset);
-
-		if (GET_NS_ID(id) == bucket &&
-		    GET_ID(id) > max_id)
-			max_id = GET_ID(id);
-	}
-	return max_id;
-}
-
-#define find_max_element_entry_id(num_elements, elements, num_objects_fld,   \
-				  objects_fld, bucket)			      \
-	find_max_element_id(num_elements, (const void **)(elements),	      \
-			    offsetof(typeof(**elements), num_objects_fld),    \
-			    offsetof(typeof(**elements), objects_fld),	      \
-			    offsetof(typeof(***(*elements)->objects_fld), id),\
-			    bucket)
-
-#define find_max_element_ns_entry_id(num_elements, elements,		    \
-				     num_objects_fld, objects_fld)	    \
-	find_max_element_ns_id(num_elements, (const void **)(elements),	    \
-			      offsetof(typeof(**elements), num_objects_fld),\
-			      offsetof(typeof(**elements), objects_fld),    \
-			      offsetof(typeof(***(*elements)->objects_fld), id))
-
-/*
- * find_max_xxxx_ns_id gets a few elements. Each element is described by an id
- * which its upper bits represents a namespace. It finds the max namespace. This
- * could be used in order to know how many buckets do we need to allocate. If no
- * elements exist, SHRT_MIN is returned. Namespace represents here different
- * buckets. The common example is "common bucket" and "driver bucket".
- *
- * find_max_xxxx_id gets a few elements and a bucket. Each element is described
- * by an id which its upper bits represent a namespace. It returns the max id
- * which is contained in the same namespace defined in @bucket. This could be
- * used in order to know how many elements do we need to allocate in the bucket.
- * If no elements exist, SHRT_MIN is returned.
- */
-
-#define find_max_object_id(num_trees, trees, bucket)			\
-		find_max_element_entry_id(num_trees, trees, num_objects,\
-					  objects, bucket)
-#define find_max_object_ns_id(num_trees, trees)			\
-		find_max_element_ns_entry_id(num_trees, trees,		\
-					     num_objects, objects)
-
-#define find_max_method_id(num_iters, iters, bucket)			\
-		find_max_element_entry_id(num_iters, iters, num_methods,\
-					  methods, bucket)
-#define find_max_method_ns_id(num_iters, iters)			\
-		find_max_element_ns_entry_id(num_iters, iters,		\
-					     num_methods, methods)
-
-#define find_max_attr_id(num_iters, iters, bucket)			\
-		find_max_element_entry_id(num_iters, iters, num_attrs,  \
-					  attrs, bucket)
-#define find_max_attr_ns_id(num_iters, iters)				\
-		find_max_element_ns_entry_id(num_iters, iters,		\
-					     num_attrs, attrs)
-
-static void free_method(struct uverbs_method_spec *method)
-{
-	unsigned int i;
-
-	if (!method)
-		return;
-
-	for (i = 0; i < method->num_buckets; i++)
-		kfree(method->attr_buckets[i]);
-
-	kfree(method);
-}
-
-#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \
-			      (attr)->type == UVERBS_ATTR_TYPE_FD)
-
-/*
- * This function gets array of size @num_method_defs which contains pointers to
- * method definitions @method_defs. The function allocates an
- * uverbs_method_spec structure and initializes its number of buckets and the
- * elements in buckets to the correct attributes. While doing that, it
- * validates that there aren't conflicts between attributes of different
- * method_defs.
- */
-static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs,
-							  size_t num_method_defs)
-{
-	int bucket_idx;
-	int max_attr_buckets = 0;
-	size_t num_attr_buckets = 0;
-	int res = 0;
-	struct uverbs_method_spec *method = NULL;
-	const struct uverbs_attr_def **attr_defs;
-	unsigned int num_of_singularities = 0;
-
-	max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs);
-	if (max_attr_buckets >= 0)
-		num_attr_buckets = max_attr_buckets + 1;
-
-	method = kzalloc(struct_size(method, attr_buckets, num_attr_buckets),
-			 GFP_KERNEL);
-	if (!method)
-		return ERR_PTR(-ENOMEM);
-
-	method->num_buckets = num_attr_buckets;
-	attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL);
-	if (!attr_defs) {
-		res = -ENOMEM;
-		goto free_method;
-	}
-	for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) {
-		short min_id = SHRT_MIN;
-		int attr_max_bucket = 0;
-		struct uverbs_attr_spec_hash *hash = NULL;
-
-		attr_max_bucket = find_max_attr_id(num_method_defs, method_defs,
-						   bucket_idx);
-		if (attr_max_bucket < 0)
-			continue;
-
-		hash = kzalloc(sizeof(*hash) +
-			       ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1),
-				     sizeof(long)) +
-			       BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long),
-			       GFP_KERNEL);
-		if (!hash) {
-			res = -ENOMEM;
-			goto free;
-		}
-		hash->num_attrs = attr_max_bucket + 1;
-		method->num_child_attrs += hash->num_attrs;
-		hash->mandatory_attrs_bitmask = (void *)(hash + 1) +
-						 ALIGN(sizeof(*hash->attrs) *
-						       (attr_max_bucket + 1),
-						       sizeof(long));
-
-		method->attr_buckets[bucket_idx] = hash;
-
-		do {
-			size_t			 num_attr_defs;
-			struct uverbs_attr_spec	*attr;
-			bool attr_obj_with_special_access;
-
-			num_attr_defs =
-				get_attrs_above_id(attr_defs,
-						   num_method_defs,
-						   method_defs,
-						   bucket_idx,
-						   &min_id);
-			/* Last attr in bucket */
-			if (!num_attr_defs)
-				break;
-
-			if (num_attr_defs > 1) {
-				/*
-				 * We don't allow two attribute definitions for
-				 * the same attribute. This is usually a
-				 * programmer error. If required, it's better to
-				 * just add a new attribute to capture the new
-				 * semantics.
-				 */
-				res = -EEXIST;
-				goto free;
-			}
-
-			attr = &hash->attrs[min_id];
-			memcpy(attr, &attr_defs[0]->attr, sizeof(*attr));
-
-			attr_obj_with_special_access = IS_ATTR_OBJECT(attr) &&
-				   (attr->u.obj.access == UVERBS_ACCESS_NEW ||
-				    attr->u.obj.access == UVERBS_ACCESS_DESTROY);
-			num_of_singularities +=  !!attr_obj_with_special_access;
-			if (WARN(num_of_singularities > 1,
-				 "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n",
-				 min_id) ||
-			    WARN(attr_obj_with_special_access &&
-				 !attr->mandatory,
-				 "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
-				 min_id) ||
-			    WARN(IS_ATTR_OBJECT(attr) &&
-				 attr->zero_trailing,
-				 "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
-				 min_id)) {
-				res = -EINVAL;
-				goto free;
-			}
-
-			if (attr->mandatory)
-				set_bit(min_id, hash->mandatory_attrs_bitmask);
-			min_id++;
-
-		} while (1);
-	}
-	kfree(attr_defs);
-	return method;
-
-free:
-	kfree(attr_defs);
-free_method:
-	free_method(method);
-	return ERR_PTR(res);
-}
-
-static void free_object(struct uverbs_object_spec *object)
-{
-	unsigned int i, j;
-
-	if (!object)
-		return;
-
-	for (i = 0; i < object->num_buckets; i++) {
-		struct uverbs_method_spec_hash	*method_buckets =
-			object->method_buckets[i];
-
-		if (!method_buckets)
-			continue;
-
-		for (j = 0; j < method_buckets->num_methods; j++)
-			free_method(method_buckets->methods[j]);
-
-		kfree(method_buckets);
-	}
-
-	kfree(object);
-}
-
-/*
- * This function gets array of size @num_object_defs which contains pointers to
- * object definitions @object_defs. The function allocated an
- * uverbs_object_spec structure and initialize its number of buckets and the
- * elements in buckets to the correct methods. While doing that, it
- * sorts out the correct relationship between conflicts in the same method.
- */
-static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs,
-							    size_t num_object_defs)
-{
-	u16 bucket_idx;
-	int max_method_buckets = 0;
-	u16 num_method_buckets = 0;
-	int res = 0;
-	struct uverbs_object_spec *object = NULL;
-	const struct uverbs_method_def **method_defs;
-
-	max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs);
-	if (max_method_buckets >= 0)
-		num_method_buckets = max_method_buckets + 1;
-
-	object = kzalloc(struct_size(object, method_buckets,
-				     num_method_buckets),
-			 GFP_KERNEL);
-	if (!object)
-		return ERR_PTR(-ENOMEM);
-
-	object->num_buckets = num_method_buckets;
-	method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL);
-	if (!method_defs) {
-		res = -ENOMEM;
-		goto free_object;
-	}
-
-	for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) {
-		short min_id = SHRT_MIN;
-		int methods_max_bucket = 0;
-		struct uverbs_method_spec_hash *hash = NULL;
-
-		methods_max_bucket = find_max_method_id(num_object_defs, object_defs,
-							bucket_idx);
-		if (methods_max_bucket < 0)
-			continue;
-
-		hash = kzalloc(struct_size(hash, methods,
-					   methods_max_bucket + 1),
-			       GFP_KERNEL);
-		if (!hash) {
-			res = -ENOMEM;
-			goto free;
-		}
-
-		hash->num_methods = methods_max_bucket + 1;
-		object->method_buckets[bucket_idx] = hash;
-
-		do {
-			size_t				num_method_defs;
-			struct uverbs_method_spec	*method;
-			int i;
-
-			num_method_defs =
-				get_methods_above_id(method_defs,
-						     num_object_defs,
-						     object_defs,
-						     bucket_idx,
-						     &min_id);
-			/* Last method in bucket */
-			if (!num_method_defs)
-				break;
-
-			method = build_method_with_attrs(method_defs,
-							 num_method_defs);
-			if (IS_ERR(method)) {
-				res = PTR_ERR(method);
-				goto free;
-			}
-
-			/*
-			 * The last tree which is given as an argument to the
-			 * merge overrides previous method handler.
-			 * Therefore, we iterate backwards and search for the
-			 * first handler which != NULL. This also defines the
-			 * set of flags used for this handler.
-			 */
-			for (i = num_method_defs - 1;
-			     i >= 0 && !method_defs[i]->handler; i--)
-				;
-			hash->methods[min_id++] = method;
-			/* NULL handler isn't allowed */
-			if (WARN(i < 0,
-				 "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n",
-				 min_id)) {
-				res = -EINVAL;
-				goto free;
-			}
-			method->handler = method_defs[i]->handler;
-			method->flags = method_defs[i]->flags;
-
-		} while (1);
-	}
-	kfree(method_defs);
-	return object;
-
-free:
-	kfree(method_defs);
-free_object:
-	free_object(object);
-	return ERR_PTR(res);
-}
-
-void uverbs_free_spec_tree(struct uverbs_root_spec *root)
-{
-	unsigned int i, j;
-
-	if (!root)
-		return;
-
-	for (i = 0; i < root->num_buckets; i++) {
-		struct uverbs_object_spec_hash *object_hash =
-			root->object_buckets[i];
-
-		if (!object_hash)
-			continue;
-
-		for (j = 0; j < object_hash->num_objects; j++)
-			free_object(object_hash->objects[j]);
-
-		kfree(object_hash);
-	}
-
-	kfree(root);
-}
-
-struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
-						const struct uverbs_object_tree_def **trees)
-{
-	u16 bucket_idx;
-	short max_object_buckets = 0;
-	size_t num_objects_buckets = 0;
-	struct uverbs_root_spec *root_spec = NULL;
-	const struct uverbs_object_def **object_defs;
-	int i;
-	int res = 0;
-
-	max_object_buckets = find_max_object_ns_id(num_trees, trees);
-	/*
-	 * Devices which don't want to support ib_uverbs, should just allocate
-	 * an empty parsing tree. Every user-space command won't hit any valid
-	 * entry in the parsing tree and thus will fail.
-	 */
-	if (max_object_buckets >= 0)
-		num_objects_buckets = max_object_buckets + 1;
-
-	root_spec = kzalloc(struct_size(root_spec, object_buckets,
-					num_objects_buckets),
-			    GFP_KERNEL);
-	if (!root_spec)
-		return ERR_PTR(-ENOMEM);
-	root_spec->num_buckets = num_objects_buckets;
-
-	object_defs = kcalloc(num_trees, sizeof(*object_defs),
-			      GFP_KERNEL);
-	if (!object_defs) {
-		res = -ENOMEM;
-		goto free_root;
-	}
-
-	for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) {
-		short min_id = SHRT_MIN;
-		short objects_max_bucket;
-		struct uverbs_object_spec_hash *hash = NULL;
-
-		objects_max_bucket = find_max_object_id(num_trees, trees,
-							bucket_idx);
-		if (objects_max_bucket < 0)
-			continue;
-
-		hash = kzalloc(struct_size(hash, objects,
-					   objects_max_bucket + 1),
-			       GFP_KERNEL);
-		if (!hash) {
-			res = -ENOMEM;
-			goto free;
-		}
-		hash->num_objects = objects_max_bucket + 1;
-		root_spec->object_buckets[bucket_idx] = hash;
-
-		do {
-			size_t				num_object_defs;
-			struct uverbs_object_spec	*object;
-
-			num_object_defs = get_objects_above_id(object_defs,
-							       num_trees,
-							       trees,
-							       bucket_idx,
-							       &min_id);
-			/* Last object in bucket */
-			if (!num_object_defs)
-				break;
-
-			object = build_object_with_methods(object_defs,
-							   num_object_defs);
-			if (IS_ERR(object)) {
-				res = PTR_ERR(object);
-				goto free;
-			}
-
-			/*
-			 * The last tree which is given as an argument to the
-			 * merge overrides previous object's type_attrs.
-			 * Therefore, we iterate backwards and search for the
-			 * first type_attrs which != NULL.
-			 */
-			for (i = num_object_defs - 1;
-			     i >= 0 && !object_defs[i]->type_attrs; i--)
-				;
-			/*
-			 * NULL is a valid type_attrs. It means an object we
-			 * can't instantiate (like DEVICE).
-			 */
-			object->type_attrs = i < 0 ? NULL :
-				object_defs[i]->type_attrs;
-
-			hash->objects[min_id++] = object;
-		} while (1);
-	}
-
-	kfree(object_defs);
-	return root_spec;
-
-free:
-	kfree(object_defs);
-free_root:
-	uverbs_free_spec_tree(root_spec);
-	return ERR_PTR(res);
-}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 0fab083cafef..823beca448e1 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -176,7 +176,6 @@ static void ib_uverbs_release_dev(struct kobject *kobj)
 
 	uverbs_destroy_api(dev->uapi);
 	cleanup_srcu_struct(&dev->disassociate_srcu);
-	uverbs_free_spec_tree(dev->specs_root);
 	kfree(dev);
 }
 
@@ -998,37 +997,12 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO,
 static int ib_uverbs_create_uapi(struct ib_device *device,
 				 struct ib_uverbs_device *uverbs_dev)
 {
-	const struct uverbs_object_tree_def **specs;
-	struct uverbs_root_spec *specs_root;
-	unsigned int num_specs = 1;
 	struct uverbs_api *uapi;
-	unsigned int i;
-
-	if (device->driver_specs)
-		for (i = 0; device->driver_specs[i]; i++)
-			num_specs++;
-
-	specs = kmalloc_array(num_specs, sizeof(*specs), GFP_KERNEL);
-	if (!specs)
-		return -ENOMEM;
-
-	specs[0] = uverbs_default_get_objects();
-	if (device->driver_specs)
-		for (i = 0; device->driver_specs[i]; i++)
-			specs[i+1] = device->driver_specs[i];
-
-	specs_root = uverbs_alloc_spec_tree(num_specs, specs);
-	kfree(specs);
-	if (IS_ERR(specs_root))
-		return PTR_ERR(specs_root);
 
 	uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
-	if (IS_ERR(uapi)) {
-		uverbs_free_spec_tree(specs_root);
+	if (IS_ERR(uapi))
 		return PTR_ERR(uapi);
-	}
 
-	uverbs_dev->specs_root = specs_root;
 	uverbs_dev->uapi = uapi;
 	return 0;
 }
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 24ef8d9ac631..9e997c3c2f04 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -114,46 +114,6 @@ struct uverbs_attr_spec {
 	} u2;
 };
 
-struct uverbs_attr_spec_hash {
-	size_t				num_attrs;
-	unsigned long			*mandatory_attrs_bitmask;
-	struct uverbs_attr_spec		attrs[0];
-};
-
-struct uverbs_attr_bundle;
-struct ib_uverbs_file;
-
-struct uverbs_method_spec {
-	/* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */
-	u32						flags;
-	size_t						num_buckets;
-	size_t						num_child_attrs;
-	int (*handler)(struct ib_uverbs_file *ufile,
-		       struct uverbs_attr_bundle *ctx);
-	struct uverbs_attr_spec_hash		*attr_buckets[0];
-};
-
-struct uverbs_method_spec_hash {
-	size_t					num_methods;
-	struct uverbs_method_spec		*methods[0];
-};
-
-struct uverbs_object_spec {
-	const struct uverbs_obj_type		*type_attrs;
-	size_t					num_buckets;
-	struct uverbs_method_spec_hash		*method_buckets[0];
-};
-
-struct uverbs_object_spec_hash {
-	size_t					num_objects;
-	struct uverbs_object_spec		*objects[0];
-};
-
-struct uverbs_root_spec {
-	size_t					num_buckets;
-	struct uverbs_object_spec_hash		*object_buckets[0];
-};
-
 /*
  * Information about the API is loaded into a radix tree. For IOCTL we start
  * with a tuple of:
@@ -673,55 +633,4 @@ static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
 }
 #endif
 
-/* =================================================
- *	 Definitions -> Specs infrastructure
- * =================================================
- */
-
-/*
- * uverbs_alloc_spec_tree - Merges different common and driver specific feature
- *	into one parsing tree that every uverbs command will be parsed upon.
- *
- * @num_trees: Number of trees in the array @trees.
- * @trees: Array of pointers to tree root definitions to merge. Each such tree
- *	   possibly contains objects, methods and attributes definitions.
- *
- * Returns:
- *	uverbs_root_spec *: The root of the merged parsing tree.
- *	On error, we return an error code. Error is checked via IS_ERR.
- *
- * The following merges could take place:
- * a. Two trees representing the same method with different handler
- *	-> We take the handler of the tree that its handler != NULL
- *	   and its index in the trees array is greater. The incentive for that
- *	   is that developers are expected to first merge common trees and then
- *	   merge trees that gives specialized the behaviour.
- * b. Two trees representing the same object with different
- *    type_attrs (struct uverbs_obj_type):
- *	-> We take the type_attrs of the tree that its type_attr != NULL
- *	   and its index in the trees array is greater. This could be used
- *	   in order to override the free function, allocation size, etc.
- * c. Two trees representing the same method attribute (same id but possibly
- *    different attributes):
- *	-> ERROR (-ENOENT), we believe that's not the programmer's intent.
- *
- * An object without any methods is considered invalid and will abort the
- * function with -ENOENT error.
- */
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
-						const struct uverbs_object_tree_def **trees);
-void uverbs_free_spec_tree(struct uverbs_root_spec *root);
-#else
-static inline struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
-							      const struct uverbs_object_tree_def **trees)
-{
-	return NULL;
-}
-
-static inline void uverbs_free_spec_tree(struct uverbs_root_spec *root)
-{
-}
-#endif
-
 #endif
-- 
cgit v1.2.3


From 62b0194368147def8c5a77ce604a125d620fc582 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@dabbelt.com>
Date: Sat, 4 Aug 2018 10:23:19 +0200
Subject: clocksource: new RISC-V SBI timer driver

The RISC-V ISA defines a per-hart real-time clock and timer, which is
present on all systems.  The clock is accessed via the 'rdtime'
pseudo-instruction (which reads a CSR), and the timer is set via an SBI
call.

Contains various improvements from Atish Patra <atish.patra@wdc.com>.

Signed-off-by: Dmitriy Cherkasov <dmitriy@oss-tech.org>
Signed-off-by: Palmer Dabbelt <palmer@dabbelt.com>
[hch: remove dead code, add SPDX tags, used riscv_of_processor_hart(),
 minor cleanups, merged  hotplug cpu support and other improvements
 from Atish]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Atish Patra <atish.patra@wdc.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/smp.h      |   3 --
 arch/riscv/kernel/irq.c           |   3 ++
 arch/riscv/kernel/smpboot.c       |   1 -
 arch/riscv/kernel/time.c          |   9 +---
 drivers/clocksource/Kconfig       |  11 ++++
 drivers/clocksource/Makefile      |   1 +
 drivers/clocksource/riscv_timer.c | 105 ++++++++++++++++++++++++++++++++++++++
 include/linux/cpuhotplug.h        |   1 +
 8 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 drivers/clocksource/riscv_timer.c

(limited to 'include')

diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index c9395fff246f..36016845461d 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -24,9 +24,6 @@
 
 #ifdef CONFIG_SMP
 
-/* SMP initialization hook for setup_arch */
-void __init init_clockevent(void);
-
 /* SMP initialization hook for setup_arch */
 void __init setup_smp(void);
 
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index ab5f3e22c7cc..0cfac48a1272 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -30,6 +30,9 @@ asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs, unsigned long cause)
 
 	irq_enter();
 	switch (cause & ~INTERRUPT_CAUSE_FLAG) {
+	case INTERRUPT_CAUSE_TIMER:
+		riscv_timer_interrupt();
+		break;
 #ifdef CONFIG_SMP
 	case INTERRUPT_CAUSE_SOFTWARE:
 		/*
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index f741458c5a3f..56abab6a9812 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -104,7 +104,6 @@ asmlinkage void __init smp_callin(void)
 	current->active_mm = mm;
 
 	trap_init();
-	init_clockevent();
 	notify_cpu_starting(smp_processor_id());
 	set_cpu_online(smp_processor_id(), 1);
 	local_flush_tlb_all();
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 0df9b2cbd645..1911c8f6b8a6 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -18,12 +18,6 @@
 
 unsigned long riscv_timebase;
 
-void __init init_clockevent(void)
-{
-	timer_probe();
-	csr_set(sie, SIE_STIE);
-}
-
 void __init time_init(void)
 {
 	struct device_node *cpu;
@@ -35,6 +29,5 @@ void __init time_init(void)
 	riscv_timebase = prop;
 
 	lpj_fine = riscv_timebase / HZ;
-
-	init_clockevent();
+	timer_probe();
 }
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index dec0dd88ec15..a11f4ba98b05 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -609,4 +609,15 @@ config ATCPIT100_TIMER
 	help
 	  This option enables support for the Andestech ATCPIT100 timers.
 
+config RISCV_TIMER
+	bool "Timer for the RISC-V platform"
+	depends on RISCV
+	default y
+	select TIMER_PROBE
+	select TIMER_OF
+	help
+	  This enables the per-hart timer built into all RISC-V systems, which
+	  is accessed via both the SBI and the rdcycle instruction.  This is
+	  required for all RISC-V systems.
+
 endmenu
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 00caf37e52f9..ded31f720bd9 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -78,3 +78,4 @@ obj-$(CONFIG_H8300_TPU)			+= h8300_tpu.o
 obj-$(CONFIG_CLKSRC_ST_LPC)		+= clksrc_st_lpc.o
 obj-$(CONFIG_X86_NUMACHIP)		+= numachip.o
 obj-$(CONFIG_ATCPIT100_TIMER)		+= timer-atcpit100.o
+obj-$(CONFIG_RISCV_TIMER)		+= riscv_timer.o
diff --git a/drivers/clocksource/riscv_timer.c b/drivers/clocksource/riscv_timer.c
new file mode 100644
index 000000000000..4e8b347e43e2
--- /dev/null
+++ b/drivers/clocksource/riscv_timer.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ */
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <asm/sbi.h>
+
+/*
+ * All RISC-V systems have a timer attached to every hart.  These timers can be
+ * read by the 'rdcycle' pseudo instruction, and can use the SBI to setup
+ * events.  In order to abstract the architecture-specific timer reading and
+ * setting functions away from the clock event insertion code, we provide
+ * function pointers to the clockevent subsystem that perform two basic
+ * operations: rdtime() reads the timer on the current CPU, and
+ * next_event(delta) sets the next timer event to 'delta' cycles in the future.
+ * As the timers are inherently a per-cpu resource, these callbacks perform
+ * operations on the current hart.  There is guaranteed to be exactly one timer
+ * per hart on all RISC-V systems.
+ */
+
+static int riscv_clock_next_event(unsigned long delta,
+		struct clock_event_device *ce)
+{
+	csr_set(sie, SIE_STIE);
+	sbi_set_timer(get_cycles64() + delta);
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = {
+	.name			= "riscv_timer_clockevent",
+	.features		= CLOCK_EVT_FEAT_ONESHOT,
+	.rating			= 100,
+	.set_next_event		= riscv_clock_next_event,
+};
+
+/*
+ * It is guaranteed that all the timers across all the harts are synchronized
+ * within one tick of each other, so while this could technically go
+ * backwards when hopping between CPUs, practically it won't happen.
+ */
+static unsigned long long riscv_clocksource_rdtime(struct clocksource *cs)
+{
+	return get_cycles64();
+}
+
+static DEFINE_PER_CPU(struct clocksource, riscv_clocksource) = {
+	.name		= "riscv_clocksource",
+	.rating		= 300,
+	.mask		= CLOCKSOURCE_MASK(BITS_PER_LONG),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.read		= riscv_clocksource_rdtime,
+};
+
+static int riscv_timer_starting_cpu(unsigned int cpu)
+{
+	struct clock_event_device *ce = per_cpu_ptr(&riscv_clock_event, cpu);
+
+	ce->cpumask = cpumask_of(cpu);
+	clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff);
+
+	csr_set(sie, SIE_STIE);
+	return 0;
+}
+
+static int riscv_timer_dying_cpu(unsigned int cpu)
+{
+	csr_clear(sie, SIE_STIE);
+	return 0;
+}
+
+/* called directly from the low-level interrupt handler */
+void riscv_timer_interrupt(void)
+{
+	struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event);
+
+	csr_clear(sie, SIE_STIE);
+	evdev->event_handler(evdev);
+}
+
+static int __init riscv_timer_init_dt(struct device_node *n)
+{
+	int cpu_id = riscv_of_processor_hart(n), error;
+	struct clocksource *cs;
+
+	if (cpu_id != smp_processor_id())
+		return 0;
+
+	cs = per_cpu_ptr(&riscv_clocksource, cpu_id);
+	clocksource_register_hz(cs, riscv_timebase);
+
+	error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING,
+			 "clockevents/riscv/timer:starting",
+			 riscv_timer_starting_cpu, riscv_timer_dying_cpu);
+	if (error)
+		pr_err("RISCV timer register failed [%d] for cpu = [%d]\n",
+		       error, cpu_id);
+	return error;
+}
+
+TIMER_OF_DECLARE(riscv_timer, "riscv", riscv_timer_init_dt);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 8796ba387152..554c27f6cfbd 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -125,6 +125,7 @@ enum cpuhp_state {
 	CPUHP_AP_MARCO_TIMER_STARTING,
 	CPUHP_AP_MIPS_GIC_TIMER_STARTING,
 	CPUHP_AP_ARC_TIMER_STARTING,
+	CPUHP_AP_RISCV_TIMER_STARTING,
 	CPUHP_AP_KVM_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_STARTING,
-- 
cgit v1.2.3


From 0b243d004ea640875115d1500ec429a3e9f9fae9 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Fri, 10 Aug 2018 20:46:41 +0530
Subject: net/tls: Combined memory allocation for decryption request

For preparing decryption request, several memory chunks are required
(aead_req, sgin, sgout, iv, aad). For submitting the decrypt request to
an accelerator, it is required that the buffers which are read by the
accelerator must be dma-able and not come from stack. The buffers for
aad and iv can be separately kmalloced each, but it is inefficient.
This patch does a combined allocation for preparing decryption request
and then segments into aead_req || sgin || sgout || iv || aad.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h |   4 -
 net/tls/tls_sw.c  | 238 ++++++++++++++++++++++++++++++++----------------------
 2 files changed, 142 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index d8b3b6578c01..d5c683e8bb22 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -124,10 +124,6 @@ struct tls_sw_context_rx {
 	struct sk_buff *recv_pkt;
 	u8 control;
 	bool decrypted;
-
-	char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE];
-	char rx_aad_plaintext[TLS_AAD_SPACE_SIZE];
-
 };
 
 struct tls_record_info {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 83d67df33f0c..52fbe727d7c1 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -48,19 +48,13 @@ static int tls_do_decryption(struct sock *sk,
 			     struct scatterlist *sgout,
 			     char *iv_recv,
 			     size_t data_len,
-			     struct sk_buff *skb,
-			     gfp_t flags)
+			     struct aead_request *aead_req)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	struct aead_request *aead_req;
-
 	int ret;
 
-	aead_req = aead_request_alloc(ctx->aead_recv, flags);
-	if (!aead_req)
-		return -ENOMEM;
-
+	aead_request_set_tfm(aead_req, ctx->aead_recv);
 	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
 	aead_request_set_crypt(aead_req, sgin, sgout,
 			       data_len + tls_ctx->rx.tag_size,
@@ -69,8 +63,6 @@ static int tls_do_decryption(struct sock *sk,
 				  crypto_req_done, &ctx->async_wait);
 
 	ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
-
-	aead_request_free(aead_req);
 	return ret;
 }
 
@@ -657,8 +649,132 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
 	return skb;
 }
 
+/* This function decrypts the input skb into either out_iov or in out_sg
+ * or in skb buffers itself. The input parameter 'zc' indicates if
+ * zero-copy mode needs to be tried or not. With zero-copy mode, either
+ * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are
+ * NULL, then the decryption happens inside skb buffers itself, i.e.
+ * zero-copy gets disabled and 'zc' is updated.
+ */
+
+static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
+			    struct iov_iter *out_iov,
+			    struct scatterlist *out_sg,
+			    int *chunk, bool *zc)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct strp_msg *rxm = strp_msg(skb);
+	int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
+	struct aead_request *aead_req;
+	struct sk_buff *unused;
+	u8 *aad, *iv, *mem = NULL;
+	struct scatterlist *sgin = NULL;
+	struct scatterlist *sgout = NULL;
+	const int data_len = rxm->full_len - tls_ctx->rx.overhead_size;
+
+	if (*zc && (out_iov || out_sg)) {
+		if (out_iov)
+			n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
+		else
+			n_sgout = sg_nents(out_sg);
+	} else {
+		n_sgout = 0;
+		*zc = false;
+	}
+
+	n_sgin = skb_cow_data(skb, 0, &unused);
+	if (n_sgin < 1)
+		return -EBADMSG;
+
+	/* Increment to accommodate AAD */
+	n_sgin = n_sgin + 1;
+
+	nsg = n_sgin + n_sgout;
+
+	aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
+	mem_size = aead_size + (nsg * sizeof(struct scatterlist));
+	mem_size = mem_size + TLS_AAD_SPACE_SIZE;
+	mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv);
+
+	/* Allocate a single block of memory which contains
+	 * aead_req || sgin[] || sgout[] || aad || iv.
+	 * This order achieves correct alignment for aead_req, sgin, sgout.
+	 */
+	mem = kmalloc(mem_size, sk->sk_allocation);
+	if (!mem)
+		return -ENOMEM;
+
+	/* Segment the allocated memory */
+	aead_req = (struct aead_request *)mem;
+	sgin = (struct scatterlist *)(mem + aead_size);
+	sgout = sgin + n_sgin;
+	aad = (u8 *)(sgout + n_sgout);
+	iv = aad + TLS_AAD_SPACE_SIZE;
+
+	/* Prepare IV */
+	err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
+			    iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+			    tls_ctx->rx.iv_size);
+	if (err < 0) {
+		kfree(mem);
+		return err;
+	}
+	memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+
+	/* Prepare AAD */
+	tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size,
+		     tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size,
+		     ctx->control);
+
+	/* Prepare sgin */
+	sg_init_table(sgin, n_sgin);
+	sg_set_buf(&sgin[0], aad, TLS_AAD_SPACE_SIZE);
+	err = skb_to_sgvec(skb, &sgin[1],
+			   rxm->offset + tls_ctx->rx.prepend_size,
+			   rxm->full_len - tls_ctx->rx.prepend_size);
+	if (err < 0) {
+		kfree(mem);
+		return err;
+	}
+
+	if (n_sgout) {
+		if (out_iov) {
+			sg_init_table(sgout, n_sgout);
+			sg_set_buf(&sgout[0], aad, TLS_AAD_SPACE_SIZE);
+
+			*chunk = 0;
+			err = zerocopy_from_iter(sk, out_iov, data_len, &pages,
+						 chunk, &sgout[1],
+						 (n_sgout - 1), false);
+			if (err < 0)
+				goto fallback_to_reg_recv;
+		} else if (out_sg) {
+			memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
+		} else {
+			goto fallback_to_reg_recv;
+		}
+	} else {
+fallback_to_reg_recv:
+		sgout = sgin;
+		pages = 0;
+		*chunk = 0;
+		*zc = false;
+	}
+
+	/* Prepare and submit AEAD request */
+	err = tls_do_decryption(sk, sgin, sgout, iv, data_len, aead_req);
+
+	/* Release the pages in case iov was mapped to pages */
+	for (; pages > 0; pages--)
+		put_page(sg_page(&sgout[pages]));
+
+	kfree(mem);
+	return err;
+}
+
 static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
-			      struct scatterlist *sgout, bool *zc)
+			      struct iov_iter *dest, int *chunk, bool *zc)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -671,7 +787,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 		return err;
 #endif
 	if (!ctx->decrypted) {
-		err = decrypt_skb(sk, skb, sgout);
+		err = decrypt_internal(sk, skb, dest, NULL, chunk, zc);
 		if (err < 0)
 			return err;
 	} else {
@@ -690,54 +806,10 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 int decrypt_skb(struct sock *sk, struct sk_buff *skb,
 		struct scatterlist *sgout)
 {
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	char iv[TLS_CIPHER_AES_GCM_128_SALT_SIZE + MAX_IV_SIZE];
-	struct scatterlist sgin_arr[MAX_SKB_FRAGS + 2];
-	struct scatterlist *sgin = &sgin_arr[0];
-	struct strp_msg *rxm = strp_msg(skb);
-	int ret, nsg = ARRAY_SIZE(sgin_arr);
-	struct sk_buff *unused;
-
-	ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
-			    iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-			    tls_ctx->rx.iv_size);
-	if (ret < 0)
-		return ret;
-
-	memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
-	if (!sgout) {
-		nsg = skb_cow_data(skb, 0, &unused) + 1;
-		sgin = kmalloc_array(nsg, sizeof(*sgin), sk->sk_allocation);
-		sgout = sgin;
-	}
-
-	sg_init_table(sgin, nsg);
-	sg_set_buf(&sgin[0], ctx->rx_aad_ciphertext, TLS_AAD_SPACE_SIZE);
-
-	nsg = skb_to_sgvec(skb, &sgin[1],
-			   rxm->offset + tls_ctx->rx.prepend_size,
-			   rxm->full_len - tls_ctx->rx.prepend_size);
-	if (nsg < 0) {
-		ret = nsg;
-		goto out;
-	}
-
-	tls_make_aad(ctx->rx_aad_ciphertext,
-		     rxm->full_len - tls_ctx->rx.overhead_size,
-		     tls_ctx->rx.rec_seq,
-		     tls_ctx->rx.rec_seq_size,
-		     ctx->control);
-
-	ret = tls_do_decryption(sk, sgin, sgout, iv,
-				rxm->full_len - tls_ctx->rx.overhead_size,
-				skb, sk->sk_allocation);
-
-out:
-	if (sgin != &sgin_arr[0])
-		kfree(sgin);
+	bool zc = true;
+	int chunk;
 
-	return ret;
+	return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc);
 }
 
 static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
@@ -816,43 +888,17 @@ int tls_sw_recvmsg(struct sock *sk,
 		}
 
 		if (!ctx->decrypted) {
-			int page_count;
-			int to_copy;
-
-			page_count = iov_iter_npages(&msg->msg_iter,
-						     MAX_SKB_FRAGS);
-			to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
-			if (!is_kvec && to_copy <= len && page_count < MAX_SKB_FRAGS &&
-			    likely(!(flags & MSG_PEEK)))  {
-				struct scatterlist sgin[MAX_SKB_FRAGS + 1];
-				int pages = 0;
+			int to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
 
+			if (!is_kvec && to_copy <= len &&
+			    likely(!(flags & MSG_PEEK)))
 				zc = true;
-				sg_init_table(sgin, MAX_SKB_FRAGS + 1);
-				sg_set_buf(&sgin[0], ctx->rx_aad_plaintext,
-					   TLS_AAD_SPACE_SIZE);
-
-				err = zerocopy_from_iter(sk, &msg->msg_iter,
-							 to_copy, &pages,
-							 &chunk, &sgin[1],
-							 MAX_SKB_FRAGS,	false);
-				if (err < 0)
-					goto fallback_to_reg_recv;
-
-				err = decrypt_skb_update(sk, skb, sgin, &zc);
-				for (; pages > 0; pages--)
-					put_page(sg_page(&sgin[pages]));
-				if (err < 0) {
-					tls_err_abort(sk, EBADMSG);
-					goto recv_end;
-				}
-			} else {
-fallback_to_reg_recv:
-				err = decrypt_skb_update(sk, skb, NULL, &zc);
-				if (err < 0) {
-					tls_err_abort(sk, EBADMSG);
-					goto recv_end;
-				}
+
+			err = decrypt_skb_update(sk, skb, &msg->msg_iter,
+						 &chunk, &zc);
+			if (err < 0) {
+				tls_err_abort(sk, EBADMSG);
+				goto recv_end;
 			}
 			ctx->decrypted = true;
 		}
@@ -903,7 +949,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	int err = 0;
 	long timeo;
 	int chunk;
-	bool zc;
+	bool zc = false;
 
 	lock_sock(sk);
 
@@ -920,7 +966,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	}
 
 	if (!ctx->decrypted) {
-		err = decrypt_skb_update(sk, skb, NULL, &zc);
+		err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc);
 
 		if (err < 0) {
 			tls_err_abort(sk, EBADMSG);
-- 
cgit v1.2.3


From e6f86b0f7ae473969a3301b74bf98af9e42ecd0e Mon Sep 17 00:00:00 2001
From: Virgile Jarry <virgile@acceis.fr>
Date: Fri, 10 Aug 2018 17:48:15 +0200
Subject: ipv6: Add icmp_echo_ignore_all support for ICMPv6

Preventing the kernel from responding to ICMP Echo Requests messages
can be useful in several ways. The sysctl parameter
'icmp_echo_ignore_all' can be used to prevent the kernel from
responding to IPv4 ICMP echo requests. For IPv6 pings, such
a sysctl kernel parameter did not exist.

Add the ability to prevent the kernel from responding to IPv6
ICMP echo requests through the use of the following sysctl
parameter : /proc/sys/net/ipv6/icmp/echo_ignore_all.
Update the documentation to reflect this change.

Signed-off-by: Virgile Jarry <virgile@acceis.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  5 +++++
 include/net/netns/ipv6.h               |  1 +
 include/uapi/linux/sysctl.h            |  3 ++-
 net/ipv6/af_inet6.c                    |  1 +
 net/ipv6/icmp.c                        | 16 +++++++++++++---
 5 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index e74515ecaa9c..8313a636dd53 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1882,6 +1882,11 @@ ratelimit - INTEGER
 	otherwise the minimal space between responses in milliseconds.
 	Default: 1000
 
+echo_ignore_all - BOOLEAN
+	If set non-zero, then the kernel will ignore all ICMP ECHO
+	requests sent to it over the IPv6 protocol.
+	Default: 0
+
 xfrm6_gc_thresh - INTEGER
 	The threshold at which we will start garbage collecting for IPv6
 	destination cache entries.  At twice this value the system will
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 762ac9931b62..f0e396ab9bec 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -32,6 +32,7 @@ struct netns_sysctl_ipv6 {
 	int flowlabel_consistency;
 	int auto_flowlabels;
 	int icmpv6_time;
+	int icmpv6_echo_ignore_all;
 	int anycast_src_echo_reply;
 	int ip_nonlocal_bind;
 	int fwmark_reflect;
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 6b58371b1f0d..d71013fffaf6 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -575,7 +575,8 @@ enum {
 
 /* /proc/sys/net/ipv6/icmp */
 enum {
-	NET_IPV6_ICMP_RATELIMIT=1
+	NET_IPV6_ICMP_RATELIMIT = 1,
+	NET_IPV6_ICMP_ECHO_IGNORE_ALL = 2
 };
 
 /* /proc/sys/net/<protocol>/neigh/<dev> */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 020f6e14a7af..673bba31eb18 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -832,6 +832,7 @@ static int __net_init inet6_net_init(struct net *net)
 
 	net->ipv6.sysctl.bindv6only = 0;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
+	net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
 	net->ipv6.sysctl.flowlabel_consistency = 1;
 	net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
 	net->ipv6.sysctl.idgen_retries = 3;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 7f6b1f81c200..c9c53ade55c3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -794,6 +794,7 @@ out:
 
 static int icmpv6_rcv(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dev);
 	struct net_device *dev = skb->dev;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	const struct in6_addr *saddr, *daddr;
@@ -843,7 +844,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 
 	switch (type) {
 	case ICMPV6_ECHO_REQUEST:
-		icmpv6_echo_reply(skb);
+		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
+			icmpv6_echo_reply(skb);
 		break;
 
 	case ICMPV6_ECHO_REPLY:
@@ -1104,6 +1106,13 @@ static struct ctl_table ipv6_icmp_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_ms_jiffies,
 	},
+	{
+		.procname	= "echo_ignore_all",
+		.data		= &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler = proc_dointvec,
+	},
 	{ },
 };
 
@@ -1115,9 +1124,10 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
 			sizeof(ipv6_icmp_table_template),
 			GFP_KERNEL);
 
-	if (table)
+	if (table) {
 		table[0].data = &net->ipv6.sysctl.icmpv6_time;
-
+		table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
+	}
 	return table;
 }
 #endif
-- 
cgit v1.2.3


From 9af18e56d43ca4864ce65c3542c513827c2697de Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sun, 12 Aug 2018 09:14:03 -0400
Subject: cpumask: make cpumask_next_wrap available without smp

The kbuild robot shows build failure on machines without CONFIG_SMP:

  drivers/net/virtio_net.c:1916:10: error:
    implicit declaration of function 'cpumask_next_wrap'

cpumask_next_wrap is exported from lib/cpumask.o, which has

    lib-$(CONFIG_SMP) += cpumask.o

same as other functions, also define it as static inline in the
NR_CPUS==1 branch in include/linux/cpumask.h.

If wrap is true and next == start, return nr_cpumask_bits, or 1.
Else wrap across the range of valid cpus, here [0].

Fixes: 2ca653d607ce ("virtio_net: Stripe queue affinities across cores.")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Tested-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 57f20a0a7794..147bdec42215 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -159,6 +159,13 @@ static inline unsigned int cpumask_next_and(int n,
 	return n+1;
 }
 
+static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask,
+					     int start, bool wrap)
+{
+	/* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */
+	return (wrap && n == 0);
+}
+
 /* cpu must be a valid cpu, ie 0, so there's no other choice. */
 static inline unsigned int cpumask_any_but(const struct cpumask *mask,
 					   unsigned int cpu)
-- 
cgit v1.2.3


From 0192e7d46c776f7f735560bfa4d40eac3fd700f9 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 13 Aug 2018 10:26:52 +0800
Subject: net: Change the layout of structure trace_event_raw_fib_table_lookup

There is an unalignment access about the structure
'trace_event_raw_fib_table_lookup'.

In include/trace/events/fib.h, there is a memory operation which casting
the 'src' data member to a pointer, and then store a value to this
pointer point to.

p32 = (__be32 *) __entry->src;
*p32 = flp->saddr;

The offset of 'src' in structure trace_event_raw_fib_table_lookup is not
four bytes alignment. On some architectures, they don't permit the
unalignment access, it need to pay the price to handle this situation in
exception handler.

Adjust the layout of structure to avoid this case.

Fixes: 9f323973c915 ("net/ipv4: Udate fib_table_lookup tracepoint")
Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/fib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index 9763cddd0594..6271bab63bfb 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -22,6 +22,7 @@ TRACE_EVENT(fib_table_lookup,
 		__field(	int,	err		)
 		__field(	int,	oif		)
 		__field(	int,	iif		)
+		__field(	u8,	proto		)
 		__field(	__u8,	tos		)
 		__field(	__u8,	scope		)
 		__field(	__u8,	flags		)
@@ -31,7 +32,6 @@ TRACE_EVENT(fib_table_lookup,
 		__array(	__u8,	saddr,	4	)
 		__field(	u16,	sport		)
 		__field(	u16,	dport		)
-		__field(	u8,	proto		)
 		__dynamic_array(char,  name,   IFNAMSIZ )
 	),
 
-- 
cgit v1.2.3


From cf916ffbe0c628bb072ea829f300cff1874162ce Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Sun, 29 Apr 2018 09:17:34 -0500
Subject: net/mlx5: Improve argument name for add flow API

The last argument to mlx5_add_flow_rules passes the number of
destinations in the struct pointed to by the dest arg. Change the name
to better reflect this fact.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 8 ++++----
 include/linux/mlx5/fs.h                           | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index a21df24b695e..261cb6aacf12 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1876,7 +1876,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		    struct mlx5_flow_spec *spec,
 		    struct mlx5_flow_act *flow_act,
 		    struct mlx5_flow_destination *dest,
-		    int dest_num)
+		    int num_dest)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_flow_destination gen_dest = {};
@@ -1889,7 +1889,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 	if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!fwd_next_prio_supported(ft))
 			return ERR_PTR(-EOPNOTSUPP);
-		if (dest_num)
+		if (num_dest)
 			return ERR_PTR(-EINVAL);
 		mutex_lock(&root->chain_lock);
 		next_ft = find_next_chained_ft(prio);
@@ -1897,7 +1897,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 			gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 			gen_dest.ft = next_ft;
 			dest = &gen_dest;
-			dest_num = 1;
+			num_dest = 1;
 			flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 		} else {
 			mutex_unlock(&root->chain_lock);
@@ -1905,7 +1905,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		}
 	}
 
-	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num);
+	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
 
 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!IS_ERR_OR_NULL(handle) &&
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index c40f2fc68655..71fb503b2b52 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -177,7 +177,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		    struct mlx5_flow_spec *spec,
 		    struct mlx5_flow_act *flow_act,
 		    struct mlx5_flow_destination *dest,
-		    int dest_num);
+		    int num_dest);
 void mlx5_del_flow_rules(struct mlx5_flow_handle *fr);
 
 int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
-- 
cgit v1.2.3


From e4648aa4f98a87cf0a83f73a5864cede073053a0 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Mon, 13 Aug 2018 15:33:01 -0400
Subject: NFS recover from destination server reboot for copies

Mark the destination state to indicate a server-side copy is
happening. On detecting a reboot and recovering open state check
if any state is engaged in a server-side copy, if so, find the
copy and mark it and then signal the waiting thread. Upon wakeup,
if copy was marked then propage EAGAIN to the nfsd_copy_file_range
and restart the copy from scratch.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs42proc.c     | 16 +++++++++++++---
 fs/nfs/nfs4_fs.h       |  3 +++
 fs/nfs/nfs4file.c      |  9 +++++++--
 fs/nfs/nfs4state.c     | 16 ++++++++++++++++
 include/linux/nfs_fs.h |  2 ++
 5 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index d158526d1044..ac5b784a1de0 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -140,6 +140,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 	struct nfs4_copy_state *copy;
 	int status = NFS4_OK;
 	bool found_pending = false;
+	struct nfs_open_context *ctx = nfs_file_open_context(dst);
 
 	spin_lock(&server->nfs_client->cl_lock);
 	list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids,
@@ -163,6 +164,7 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 	}
 	memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
 	init_completion(&copy->completion);
+	copy->parent_state = ctx->state;
 
 	list_add_tail(&copy->copies, &server->ss_copies);
 	spin_unlock(&server->nfs_client->cl_lock);
@@ -172,15 +174,20 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 	list_del_init(&copy->copies);
 	spin_unlock(&server->nfs_client->cl_lock);
 	if (status == -ERESTARTSYS) {
-		nfs42_do_offload_cancel_async(dst, &copy->stateid);
-		kfree(copy);
-		return status;
+		goto out_cancel;
+	} else if (copy->flags) {
+		status = -EAGAIN;
+		goto out_cancel;
 	}
 out:
 	res->write_res.count = copy->count;
 	memcpy(&res->write_res.verifier, &copy->verf, sizeof(copy->verf));
 	status = -copy->error;
 
+	kfree(copy);
+	return status;
+out_cancel:
+	nfs42_do_offload_cancel_async(dst, &copy->stateid);
 	kfree(copy);
 	return status;
 }
@@ -254,6 +261,9 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 		if (!res->commit_res.verf)
 			return -ENOMEM;
 	}
+	set_bit(NFS_CLNT_DST_SSC_COPY_STATE,
+		&dst_lock->open_context->state->flags);
+
 	status = nfs4_call_sync(server->client, server, &msg,
 				&args->seq_args, &res->seq_res, 0);
 	if (status == -ENOTSUPP)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e87d414c40d0..542b2fce0447 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -163,6 +163,9 @@ enum {
 	NFS_STATE_RECOVERY_FAILED,	/* OPEN stateid state recovery failed */
 	NFS_STATE_MAY_NOTIFY_LOCK,	/* server may CB_NOTIFY_LOCK */
 	NFS_STATE_CHANGE_WAIT,		/* A state changing operation is outstanding */
+#ifdef CONFIG_NFS_V4_2
+	NFS_CLNT_DST_SSC_COPY_STATE,    /* dst server open state on client*/
+#endif /* CONFIG_NFS_V4_2 */
 };
 
 struct nfs4_state {
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 6c03d6b570b2..4288a6ecaf75 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -133,10 +133,15 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
 				    struct file *file_out, loff_t pos_out,
 				    size_t count, unsigned int flags)
 {
+	ssize_t ret;
+
 	if (file_inode(file_in) == file_inode(file_out))
 		return -EINVAL;
-
-	return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+retry:
+	ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+	if (ret == -EAGAIN)
+		goto retry;
+	return ret;
 }
 
 static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f1b27e895a94..b6882e09d0f4 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1589,6 +1589,22 @@ restart:
 				}
 				clear_bit(NFS_STATE_RECLAIM_NOGRACE,
 					&state->flags);
+#ifdef CONFIG_NFS_V4_2
+				if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) {
+					struct nfs4_copy_state *copy;
+
+					spin_lock(&sp->so_server->nfs_client->cl_lock);
+					list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
+						if (memcmp(&state->stateid.other, &copy->parent_state->stateid.other, NFS4_STATEID_SIZE))
+							continue;
+						copy->flags = 1;
+						complete(&copy->completion);
+						printk("AGLO: server rebooted waking up the copy\n");
+						break;
+					}
+					spin_unlock(&sp->so_server->nfs_client->cl_lock);
+				}
+#endif /* CONFIG_NFS_V4_2 */
 				nfs4_put_open_state(state);
 				spin_lock(&sp->so_lock);
 				goto restart;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 645ad8e342f6..a0831e9d19c9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -192,6 +192,8 @@ struct nfs4_copy_state {
 	uint64_t		count;
 	struct nfs_writeverf	verf;
 	int			error;
+	int			flags;
+	struct nfs4_state	*parent_state;
 };
 
 /*
-- 
cgit v1.2.3


From 6d43743e9079ac0531b60cde7eadd0f042873344 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Thu, 9 Aug 2018 09:48:52 +0530
Subject: Uprobe: Additional argument arch_uprobe to uprobe_write_opcode()

Add addition argument 'arch_uprobe' to uprobe_write_opcode().
We need this in later set of patches.

Link: http://lkml.kernel.org/r/20180809041856.1547-3-ravi.bangoria@linux.ibm.com

Reviewed-by: Song Liu <songliubraving@fb.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arm/probes/uprobes/core.c | 2 +-
 arch/mips/kernel/uprobes.c     | 2 +-
 include/linux/uprobes.h        | 2 +-
 kernel/events/uprobes.c        | 9 +++++----
 4 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c
index d1329f1ba4e4..bf992264060e 100644
--- a/arch/arm/probes/uprobes/core.c
+++ b/arch/arm/probes/uprobes/core.c
@@ -32,7 +32,7 @@ bool is_swbp_insn(uprobe_opcode_t *insn)
 int set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	     unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr,
+	return uprobe_write_opcode(auprobe, mm, vaddr,
 		   __opcode_to_mem_arm(auprobe->bpinsn));
 }
 
diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
index f7a0645ccb82..4aaff3b3175c 100644
--- a/arch/mips/kernel/uprobes.c
+++ b/arch/mips/kernel/uprobes.c
@@ -224,7 +224,7 @@ unsigned long arch_uretprobe_hijack_return_addr(
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+	return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 0a294e950df8..bb9d2084af03 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -121,7 +121,7 @@ extern bool is_swbp_insn(uprobe_opcode_t *insn);
 extern bool is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
-extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
+extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 471eac896635..c0418ba52ba8 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -299,8 +299,8 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
  * Called with mm->mmap_sem held for write.
  * Return 0 (success) or a negative errno.
  */
-int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
-			uprobe_opcode_t opcode)
+int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
+			unsigned long vaddr, uprobe_opcode_t opcode)
 {
 	struct page *old_page, *new_page;
 	struct vm_area_struct *vma;
@@ -351,7 +351,7 @@ put_old:
  */
 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
+	return uprobe_write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
 }
 
 /**
@@ -366,7 +366,8 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned
 int __weak
 set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
 {
-	return uprobe_write_opcode(mm, vaddr, *(uprobe_opcode_t *)&auprobe->insn);
+	return uprobe_write_opcode(auprobe, mm, vaddr,
+			*(uprobe_opcode_t *)&auprobe->insn);
 }
 
 static struct uprobe *get_uprobe(struct uprobe *uprobe)
-- 
cgit v1.2.3


From 96d18d8254dc5a3f0067a629866af4165b3afe32 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 1 Aug 2018 14:57:59 -0700
Subject: inet/connection_sock: prefer _THIS_IP_ to current_text_addr

As part of the effort to reduce the code duplication between _THIS_IP_
and current_text_addr(), let's consolidate callers of
current_text_addr() to use _THIS_IP_.

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index fa43b82607d9..371b3b45fd5c 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/poll.h>
+#include <linux/kernel.h>
 
 #include <net/inet_sock.h>
 #include <net/request_sock.h>
@@ -225,7 +226,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
 
 	if (when > max_when) {
 		pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
-			 sk, what, when, current_text_addr());
+			 sk, what, when, (void *)_THIS_IP_);
 		when = max_when;
 	}
 
-- 
cgit v1.2.3


From b72ae8cac0caff86fe414fceb940655c2d1371c9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sun, 29 Jul 2018 16:16:56 +0300
Subject: PCI: Add PCI_DEVICE_DATA() macro to fully describe device ID entry

There are a lot of examples in the kernel where PCI_VDEVICE() is used and
still looks not so convenient due to additional driver_data field attached.

Introduce PCI_DEVICE_DATA() macro to fully describe device ID entry in
shortest possible form. For example,

  before:

    { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD),
      (kernel_ulong_t) &dwc3_pci_mrfld_properties, },

  after:

    { PCI_DEVICE_DATA(INTEL, MRFLD, &dwc3_pci_mrfld_properties) },

Drivers can be converted later on in independent way.

While here, remove the unused macro with the same name from Ralink wireless
driver.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Kalle Valo <kvalo@codeaurora.org>	# for rt2x00
---
 drivers/net/wireless/ralink/rt2x00/rt2x00pci.h |  6 ------
 include/linux/pci.h                            | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00pci.h b/drivers/net/wireless/ralink/rt2x00/rt2x00pci.h
index bc0ca5f58f38..283e2e607bba 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00pci.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00pci.h
@@ -27,12 +27,6 @@
 #include <linux/io.h>
 #include <linux/pci.h>
 
-/*
- * This variable should be used with the
- * pci_driver structure initialization.
- */
-#define PCI_DEVICE_DATA(__ops)	.driver_data = (kernel_ulong_t)(__ops)
-
 /*
  * PCI driver handlers.
  */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 340029b2fb38..bf3665c534c2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -819,6 +819,21 @@ struct pci_driver {
 	.vendor = PCI_VENDOR_ID_##vend, .device = (dev), \
 	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
 
+/**
+ * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form
+ * @vend: the vendor name (without PCI_VENDOR_ID_ prefix)
+ * @dev: the device name (without PCI_DEVICE_ID_<vend>_ prefix)
+ * @data: the driver data to be filled
+ *
+ * This macro is used to create a struct pci_device_id that matches a
+ * specific PCI device.  The subvendor, and subdevice fields will be set
+ * to PCI_ANY_ID.
+ */
+#define PCI_DEVICE_DATA(vend, dev, data) \
+	.vendor = PCI_VENDOR_ID_##vend, .device = PCI_DEVICE_ID_##vend##_##dev, \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0, \
+	.driver_data = (kernel_ulong_t)(data)
+
 enum {
 	PCI_REASSIGN_ALL_RSRC	= 0x00000001,	/* Ignore firmware setup */
 	PCI_REASSIGN_ALL_BUS	= 0x00000002,	/* Reassign all bus numbers */
-- 
cgit v1.2.3


From d46b6537f0ceed303047da918ed951f074a99288 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 10 Aug 2018 14:42:54 -0700
Subject: ACPICA: AML Parser: ignore all exceptions resulting from incorrect
 AML during table load

Macros to classify different AML exception codes have been added in
order to ignore the exceptions,

Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
[ rjw: Fix damaged white space ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/psloop.c | 26 +++++++++++++-------------
 include/acpi/acexcep.h       |  6 ++++++
 2 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index 44f35ab3347d..42f694f4481e 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -709,20 +709,20 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
 			} else
 			    if ((walk_state->
 				 parse_flags & ACPI_PARSE_MODULE_LEVEL)
-				&& status != AE_CTRL_TRANSFER
-				&& ACPI_FAILURE(status)) {
+				&& (ACPI_AML_EXCEPTION(status)
+				    || status == AE_ALREADY_EXISTS
+				    || status == AE_NOT_FOUND)) {
 				/*
-				 * ACPI_PARSE_MODULE_LEVEL flag means that we are currently
-				 * loading a table by executing it as a control method.
-				 * However, if we encounter an error while loading the table,
-				 * we need to keep trying to load the table rather than
-				 * aborting the table load (setting the status to AE_OK
-				 * continues the table load). If we get a failure at this
-				 * point, it means that the dispatcher got an error while
-				 * processing Op (most likely an AML operand error) or a
-				 * control method was called from module level and the
-				 * dispatcher returned AE_CTRL_TRANSFER. In the latter case,
-				 * leave the status alone, there's nothing wrong with it.
+				 * ACPI_PARSE_MODULE_LEVEL flag means that we
+				 * are currently loading a table by executing
+				 * it as a control method. However, if we
+				 * encounter an error while loading the table,
+				 * we need to keep trying to load the table
+				 * rather than aborting the table load (setting
+				 * the status to AE_OK continues the table
+				 * load). If we get a failure at this point, it
+				 * means that the dispatcher got an error while
+				 * trying to execute the Op.
 				 */
 				status = AE_OK;
 			}
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
index 226e5aeba6c2..856c56ef0143 100644
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -59,6 +59,12 @@ struct acpi_exception_info {
 
 #define AE_OK                           (acpi_status) 0x0000
 
+#define ACPI_ENV_EXCEPTION(status)      (status & AE_CODE_ENVIRONMENTAL)
+#define ACPI_AML_EXCEPTION(status)      (status & AE_CODE_AML)
+#define ACPI_PROG_EXCEPTION(status)     (status & AE_CODE_PROGRAMMER)
+#define ACPI_TABLE_EXCEPTION(status)    (status & AE_CODE_ACPI_TABLES)
+#define ACPI_CNTL_EXCEPTION(status)     (status & AE_CODE_CONTROL)
+
 /*
  * Environmental exceptions
  */
-- 
cgit v1.2.3


From 8b23570ab001c1982c8a068cde468ff067255314 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 10 Aug 2018 14:43:02 -0700
Subject: ACPICA: Reference Counts: increase max to 0x4000 for large servers

Increase the reference count limit to 0x4000 as the current one is
not sufficient for some large server systems.

Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Tested-by: Russ Anderson <russ.anderson@hpe.com>
Reported-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acconfig.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 012c55cb22ba..e6964e97acdd 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -89,7 +89,7 @@
 
 /* Maximum object reference count (detects object deletion issues) */
 
-#define ACPI_MAX_REFERENCE_COUNT        0x1000
+#define ACPI_MAX_REFERENCE_COUNT        0x4000
 
 /* Default page size for use in mapping memory for operation regions */
 
-- 
cgit v1.2.3


From 4efa829d195172d630dcd1b2f9b239bcce69639e Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 10 Aug 2018 14:43:04 -0700
Subject: ACPICA: Update version to 20180810

Version 20180810.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 88072c92ace2..9566f99cc3c0 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20180629
+#define ACPI_CA_VERSION                 0x20180810
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
cgit v1.2.3


From 6855dc41b24619c3d1de3dbd27dd0546b0e45272 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 6 Jun 2018 15:54:11 +0300
Subject: x86: Add entry trampolines to kcore

Without program headers for PTI entry trampoline pages, the trampoline
virtual addresses do not map to anything.

Example before:

 sudo gdb --quiet vmlinux /proc/kcore
 Reading symbols from vmlinux...done.
 [New process 1]
 Core was generated by `BOOT_IMAGE=/boot/vmlinuz-4.16.0 root=UUID=a6096b83-b763-4101-807e-f33daff63233'.
 #0  0x0000000000000000 in irq_stack_union ()
 (gdb) x /21ib 0xfffffe0000006000
    0xfffffe0000006000:  Cannot access memory at address 0xfffffe0000006000
 (gdb) quit

After:

 sudo gdb --quiet vmlinux /proc/kcore
 [sudo] password for ahunter:
 Reading symbols from vmlinux...done.
 [New process 1]
 Core was generated by `BOOT_IMAGE=/boot/vmlinuz-4.16.0-fix-4-00005-gd6e65a8b4072 root=UUID=a6096b83-b7'.
 #0  0x0000000000000000 in irq_stack_union ()
 (gdb) x /21ib 0xfffffe0000006000
    0xfffffe0000006000:  swapgs
    0xfffffe0000006003:  mov    %rsp,-0x3e12(%rip)        # 0xfffffe00000021f8
    0xfffffe000000600a:  xchg   %ax,%ax
    0xfffffe000000600c:  mov    %cr3,%rsp
    0xfffffe000000600f:  bts    $0x3f,%rsp
    0xfffffe0000006014:  and    $0xffffffffffffe7ff,%rsp
    0xfffffe000000601b:  mov    %rsp,%cr3
    0xfffffe000000601e:  mov    -0x3019(%rip),%rsp        # 0xfffffe000000300c
    0xfffffe0000006025:  pushq  $0x2b
    0xfffffe0000006027:  pushq  -0x3e35(%rip)        # 0xfffffe00000021f8
    0xfffffe000000602d:  push   %r11
    0xfffffe000000602f:  pushq  $0x33
    0xfffffe0000006031:  push   %rcx
    0xfffffe0000006032:  push   %rdi
    0xfffffe0000006033:  mov    $0xffffffff91a00010,%rdi
    0xfffffe000000603a:  callq  0xfffffe0000006046
    0xfffffe000000603f:  pause
    0xfffffe0000006041:  lfence
    0xfffffe0000006044:  jmp    0xfffffe000000603f
    0xfffffe0000006046:  mov    %rdi,(%rsp)
    0xfffffe000000604a:  retq
 (gdb) quit

In addition, entry trampolines all map to the same page.  Represent that
by giving the corresponding program headers in kcore the same offset.

This has the benefit that, when perf tools uses /proc/kcore as a source
for kernel object code, samples from different CPU trampolines are
aggregated together.  Note, such aggregation is normal for profiling
i.e. people want to profile the object code, not every different virtual
address the object code might be mapped to (across different processes
for example).

Notes by PeterZ:

This also adds the KCORE_REMAP functionality.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86@kernel.org
Link: http://lkml.kernel.org/r/1528289651-4113-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/mm/cpu_entry_area.c | 10 ++++++++++
 fs/proc/kcore.c              |  7 +++++--
 include/linux/kcore.h        | 13 +++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index fab49fd5190f..076ebdce9bd4 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -3,6 +3,7 @@
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
 #include <linux/kallsyms.h>
+#include <linux/kcore.h>
 
 #include <asm/cpu_entry_area.h>
 #include <asm/pgtable.h>
@@ -14,6 +15,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage)
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+static DEFINE_PER_CPU(struct kcore_list, kcore_entry_trampoline);
 #endif
 
 struct cpu_entry_area *get_cpu_entry_area(int cpu)
@@ -147,6 +149,14 @@ static void __init setup_cpu_entry_area(int cpu)
 
 	cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
 		     __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+	/*
+	 * The cpu_entry_area alias addresses are not in the kernel binary
+	 * so they do not show up in /proc/kcore normally.  This adds entries
+	 * for them manually.
+	 */
+	kclist_add_remap(&per_cpu(kcore_entry_trampoline, cpu),
+			 _entry_trampoline,
+			 &get_cpu_entry_area(cpu)->entry_trampoline, PAGE_SIZE);
 #endif
 	percpu_setup_debug_store(cpu);
 }
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e64ecb9f2720..00282f134336 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -383,8 +383,11 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 		phdr->p_type	= PT_LOAD;
 		phdr->p_flags	= PF_R|PF_W|PF_X;
 		phdr->p_offset	= kc_vaddr_to_offset(m->addr) + dataoff;
-		phdr->p_vaddr	= (size_t)m->addr;
-		if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+		if (m->type == KCORE_REMAP)
+			phdr->p_vaddr	= (size_t)m->vaddr;
+		else
+			phdr->p_vaddr	= (size_t)m->addr;
+		if (m->type == KCORE_RAM || m->type == KCORE_TEXT || m->type == KCORE_REMAP)
 			phdr->p_paddr	= __pa(m->addr);
 		else
 			phdr->p_paddr	= (elf_addr_t)-1;
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8de55e4b5ee9..bc088ef96358 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -12,11 +12,13 @@ enum kcore_type {
 	KCORE_VMEMMAP,
 	KCORE_USER,
 	KCORE_OTHER,
+	KCORE_REMAP,
 };
 
 struct kcore_list {
 	struct list_head list;
 	unsigned long addr;
+	unsigned long vaddr;
 	size_t size;
 	int type;
 };
@@ -36,11 +38,22 @@ struct vmcoredd_node {
 
 #ifdef CONFIG_PROC_KCORE
 extern void kclist_add(struct kcore_list *, void *, size_t, int type);
+static inline
+void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
+{
+	m->vaddr = (unsigned long)vaddr;
+	kclist_add(m, addr, sz, KCORE_REMAP);
+}
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 }
+
+static inline
+void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
+{
+}
 #endif
 
 #endif /* _LINUX_KCORE_H */
-- 
cgit v1.2.3


From c355aa465fce5b446789348a2c50c3eb58ee6756 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@bootlin.com>
Date: Mon, 2 Jul 2018 11:43:51 +0200
Subject: ubi: expose the volume CRC check skip flag

Now that we have the logic for skipping CRC check for static UBI volumes
in the core, let's expose it to users.

This makes use of a padding byte in the volume description data
structure as a flag. This flag only tell for now whether we should skip
the CRC check of a volume.

This checks the UBI volume for which we are trying to skip the CRC check
is static.

Let's also make sure that the flags passed to verify_mkvol_req are
valid.

We voluntarily do not take into account the skip_check flag in
vol_cdev_write() as we want to make sure what we wrote was correctly
written.

Suggested-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Quentin Schulz <quentin.schulz@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/cdev.c      | 11 +++++++++++
 drivers/mtd/ubi/vmt.c       |  3 +++
 include/uapi/mtd/ubi-user.h | 18 ++++++++++++++++--
 3 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 45c329694a5e..22547d7a84ea 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -367,6 +367,10 @@ static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
 			return count;
 		}
 
+		/*
+		 * We voluntarily do not take into account the skip_check flag
+		 * as we want to make sure what we wrote was correctly written.
+		 */
 		err = ubi_check_volume(ubi, vol->vol_id);
 		if (err < 0)
 			return err;
@@ -622,6 +626,13 @@ static int verify_mkvol_req(const struct ubi_device *ubi,
 	    req->vol_type != UBI_STATIC_VOLUME)
 		goto bad;
 
+	if (req->flags & ~UBI_VOL_VALID_FLGS)
+		goto bad;
+
+	if (req->flags & UBI_VOL_SKIP_CRC_CHECK_FLG &&
+	    req->vol_type != UBI_STATIC_VOLUME)
+		goto bad;
+
 	if (req->alignment > ubi->leb_size)
 		goto bad;
 
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index e2606a4e1c9e..729588b94e41 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -174,6 +174,9 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	vol->dev.class = &ubi_class;
 	vol->dev.groups = volume_dev_groups;
 
+	if (req->flags & UBI_VOL_SKIP_CRC_CHECK_FLG)
+		vol->skip_check = 1;
+
 	spin_lock(&ubi->volumes_lock);
 	if (vol_id == UBI_VOL_NUM_AUTO) {
 		/* Find unused volume ID */
diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h
index 5b04a494d139..aad3b6201fc0 100644
--- a/include/uapi/mtd/ubi-user.h
+++ b/include/uapi/mtd/ubi-user.h
@@ -285,6 +285,20 @@ struct ubi_attach_req {
 	__s8 padding[10];
 };
 
+/*
+ * UBI volume flags.
+ *
+ * @UBI_VOL_SKIP_CRC_CHECK_FLG: skip the CRC check done on a static volume at
+ *				open time. Only valid for static volumes and
+ *				should only be used if the volume user has a
+ *				way to verify data integrity
+ */
+enum {
+	UBI_VOL_SKIP_CRC_CHECK_FLG = 0x1,
+};
+
+#define UBI_VOL_VALID_FLGS	(UBI_VOL_SKIP_CRC_CHECK_FLG)
+
 /**
  * struct ubi_mkvol_req - volume description data structure used in
  *                        volume creation requests.
@@ -292,7 +306,7 @@ struct ubi_attach_req {
  * @alignment: volume alignment
  * @bytes: volume size in bytes
  * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME)
- * @padding1: reserved for future, not used, has to be zeroed
+ * @flags: volume flags (%UBI_VOL_SKIP_CRC_CHECK_FLG)
  * @name_len: volume name length
  * @padding2: reserved for future, not used, has to be zeroed
  * @name: volume name
@@ -321,7 +335,7 @@ struct ubi_mkvol_req {
 	__s32 alignment;
 	__s64 bytes;
 	__s8 vol_type;
-	__s8 padding1;
+	__u8 flags;
 	__s16 name_len;
 	__s8 padding2[4];
 	char name[UBI_MAX_VOLUME_NAME + 1];
-- 
cgit v1.2.3


From 40b173ddce0fc6653a859889d1a90b5f5817061b Mon Sep 17 00:00:00 2001
From: Yuval Bason <yuval.bason@cavium.com>
Date: Thu, 9 Aug 2018 17:29:38 +0300
Subject: qedr: Add user space support for SRQ

This patch adds support for SRQ's created in user space and update
qedr_affiliated_event to deal with general SRQ events.

Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: Yuval Bason <yuval.bason@cavium.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/qedr/main.c  |  98 +++++++++++++++++++-------
 drivers/infiniband/hw/qedr/verbs.c | 137 +++++++++++++++++++++++++++++++------
 include/uapi/rdma/qedr-abi.h       |  17 +++++
 3 files changed, 208 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 2642caf94b55..a0af6d424aed 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -191,6 +191,11 @@ static int qedr_register_device(struct qedr_dev *dev)
 				     QEDR_UVERBS(MODIFY_QP) |
 				     QEDR_UVERBS(QUERY_QP) |
 				     QEDR_UVERBS(DESTROY_QP) |
+				     QEDR_UVERBS(CREATE_SRQ) |
+				     QEDR_UVERBS(DESTROY_SRQ) |
+				     QEDR_UVERBS(QUERY_SRQ) |
+				     QEDR_UVERBS(MODIFY_SRQ) |
+				     QEDR_UVERBS(POST_SRQ_RECV) |
 				     QEDR_UVERBS(REG_MR) |
 				     QEDR_UVERBS(DEREG_MR) |
 				     QEDR_UVERBS(POLL_CQ) |
@@ -658,42 +663,70 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
 #define EVENT_TYPE_NOT_DEFINED	0
 #define EVENT_TYPE_CQ		1
 #define EVENT_TYPE_QP		2
+#define EVENT_TYPE_SRQ		3
 	struct qedr_dev *dev = (struct qedr_dev *)context;
 	struct regpair *async_handle = (struct regpair *)fw_handle;
 	u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo;
 	u8 event_type = EVENT_TYPE_NOT_DEFINED;
 	struct ib_event event;
+	struct ib_srq *ibsrq;
+	struct qedr_srq *srq;
+	unsigned long flags;
 	struct ib_cq *ibcq;
 	struct ib_qp *ibqp;
 	struct qedr_cq *cq;
 	struct qedr_qp *qp;
+	u16 srq_id;
 
-	switch (e_code) {
-	case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR:
-		event.event = IB_EVENT_CQ_ERR;
-		event_type = EVENT_TYPE_CQ;
-		break;
-	case ROCE_ASYNC_EVENT_SQ_DRAINED:
-		event.event = IB_EVENT_SQ_DRAINED;
-		event_type = EVENT_TYPE_QP;
-		break;
-	case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR:
-		event.event = IB_EVENT_QP_FATAL;
-		event_type = EVENT_TYPE_QP;
-		break;
-	case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR:
-		event.event = IB_EVENT_QP_REQ_ERR;
-		event_type = EVENT_TYPE_QP;
-		break;
-	case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR:
-		event.event = IB_EVENT_QP_ACCESS_ERR;
-		event_type = EVENT_TYPE_QP;
-		break;
-	default:
+	if (IS_ROCE(dev)) {
+		switch (e_code) {
+		case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR:
+			event.event = IB_EVENT_CQ_ERR;
+			event_type = EVENT_TYPE_CQ;
+			break;
+		case ROCE_ASYNC_EVENT_SQ_DRAINED:
+			event.event = IB_EVENT_SQ_DRAINED;
+			event_type = EVENT_TYPE_QP;
+			break;
+		case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR:
+			event.event = IB_EVENT_QP_FATAL;
+			event_type = EVENT_TYPE_QP;
+			break;
+		case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR:
+			event.event = IB_EVENT_QP_REQ_ERR;
+			event_type = EVENT_TYPE_QP;
+			break;
+		case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR:
+			event.event = IB_EVENT_QP_ACCESS_ERR;
+			event_type = EVENT_TYPE_QP;
+			break;
+		case ROCE_ASYNC_EVENT_SRQ_LIMIT:
+			event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+			event_type = EVENT_TYPE_SRQ;
+			break;
+		case ROCE_ASYNC_EVENT_SRQ_EMPTY:
+			event.event = IB_EVENT_SRQ_ERR;
+			event_type = EVENT_TYPE_SRQ;
+			break;
+		default:
+			DP_ERR(dev, "unsupported event %d on handle=%llx\n",
+			       e_code, roce_handle64);
+		}
+	} else {
+		switch (e_code) {
+		case QED_IWARP_EVENT_SRQ_LIMIT:
+			event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+			event_type = EVENT_TYPE_SRQ;
+			break;
+		case QED_IWARP_EVENT_SRQ_EMPTY:
+			event.event = IB_EVENT_SRQ_ERR;
+			event_type = EVENT_TYPE_SRQ;
+			break;
+		default:
 		DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code,
 		       roce_handle64);
+		}
 	}
-
 	switch (event_type) {
 	case EVENT_TYPE_CQ:
 		cq = (struct qedr_cq *)(uintptr_t)roce_handle64;
@@ -727,6 +760,25 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
 		}
 		DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp);
 		break;
+	case EVENT_TYPE_SRQ:
+		srq_id = (u16)roce_handle64;
+		spin_lock_irqsave(&dev->srqidr.idr_lock, flags);
+		srq = idr_find(&dev->srqidr.idr, srq_id);
+		if (srq) {
+			ibsrq = &srq->ibsrq;
+			if (ibsrq->event_handler) {
+				event.device = ibsrq->device;
+				event.element.srq = ibsrq;
+				ibsrq->event_handler(&event,
+						     ibsrq->srq_context);
+			}
+		} else {
+			DP_NOTICE(dev,
+				  "SRQ event with NULL pointer ibsrq. Handle=%llx\n",
+				  roce_handle64);
+		}
+		spin_unlock_irqrestore(&dev->srqidr.idr_lock, flags);
+		DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq);
 	default:
 		break;
 	}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 7c75fc36e5ec..3f46fc14ee38 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1199,6 +1199,21 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
 	return 0;
 }
 
+static int qedr_copy_srq_uresp(struct qedr_dev *dev,
+			       struct qedr_srq *srq, struct ib_udata *udata)
+{
+	struct qedr_create_srq_uresp uresp = {};
+	int rc;
+
+	uresp.srq_id = srq->srq_id;
+
+	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (rc)
+		DP_ERR(dev, "create srq: problem copying data to user space\n");
+
+	return rc;
+}
+
 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
 			       struct qedr_create_qp_uresp *uresp,
 			       struct qedr_qp *qp)
@@ -1321,6 +1336,13 @@ static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev,
 	return 0;
 }
 
+static void qedr_free_srq_user_params(struct qedr_srq *srq)
+{
+	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
+	ib_umem_release(srq->usrq.umem);
+	ib_umem_release(srq->prod_umem);
+}
+
 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
 {
 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
@@ -1333,6 +1355,37 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
 			  hw_srq->phy_prod_pair_addr);
 }
 
+static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx,
+				     struct qedr_srq *srq,
+				     struct qedr_create_srq_ureq *ureq,
+				     int access, int dmasync)
+{
+	struct scatterlist *sg;
+	int rc;
+
+	rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr,
+				  ureq->srq_len, access, dmasync, 1);
+	if (rc)
+		return rc;
+
+	srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr,
+				     sizeof(struct rdma_srq_producers),
+				     access, dmasync);
+	if (IS_ERR(srq->prod_umem)) {
+		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
+		ib_umem_release(srq->usrq.umem);
+		DP_ERR(srq->dev,
+		       "create srq: failed ib_umem_get for producer, got %ld\n",
+		       PTR_ERR(srq->prod_umem));
+		return PTR_ERR(srq->prod_umem);
+	}
+
+	sg = srq->prod_umem->sg_head.sgl;
+	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
+
+	return 0;
+}
+
 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
 					struct qedr_dev *dev,
 					struct ib_srq_init_attr *init_attr)
@@ -1390,10 +1443,12 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
 	struct qed_rdma_create_srq_out_params out_params;
 	struct qedr_pd *pd = get_qedr_pd(ibpd);
+	struct qedr_create_srq_ureq ureq = {};
 	u64 pbl_base_addr, phy_prod_pair_addr;
+	struct ib_ucontext *ib_ctx = NULL;
 	struct qedr_srq_hwq_info *hw_srq;
+	struct qedr_ucontext *ctx = NULL;
 	u32 page_cnt, page_size;
-	struct qed_chain *pbl;
 	struct qedr_srq *srq;
 	int rc = 0;
 
@@ -1416,15 +1471,38 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
 	hw_srq->max_wr = init_attr->attr.max_wr;
 	hw_srq->max_sges = init_attr->attr.max_sge;
 
-	rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
-	if (rc)
-		goto err0;
+	if (udata && ibpd->uobject && ibpd->uobject->context) {
+		ib_ctx = ibpd->uobject->context;
+		ctx = get_qedr_ucontext(ib_ctx);
+
+		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+			DP_ERR(dev,
+			       "create srq: problem copying data from user space\n");
+			goto err0;
+		}
+
+		rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0);
+		if (rc)
+			goto err0;
+
+		page_cnt = srq->usrq.pbl_info.num_pbes;
+		pbl_base_addr = srq->usrq.pbl_tbl->pa;
+		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
+		page_size = BIT(srq->usrq.umem->page_shift);
+	} else {
+		struct qed_chain *pbl;
+
+		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
+		if (rc)
+			goto err0;
+
+		pbl = &hw_srq->pbl;
+		page_cnt = qed_chain_get_page_cnt(pbl);
+		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
+		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
+		page_size = QED_CHAIN_PAGE_SIZE;
+	}
 
-	pbl = &hw_srq->pbl;
-	page_cnt = qed_chain_get_page_cnt(pbl);
-	pbl_base_addr = qed_chain_get_pbl_phys(pbl);
-	phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
-	page_size = QED_CHAIN_PAGE_SIZE;
 	in_params.pd_id = pd->pd_id;
 	in_params.pbl_base_addr = pbl_base_addr;
 	in_params.prod_pair_addr = phy_prod_pair_addr;
@@ -1437,6 +1515,12 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
 
 	srq->srq_id = out_params.srq_id;
 
+	if (udata) {
+		rc = qedr_copy_srq_uresp(dev, srq, udata);
+		if (rc)
+			goto err2;
+	}
+
 	rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id);
 	if (rc)
 		goto err2;
@@ -1450,7 +1534,10 @@ err2:
 
 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
 err1:
-	qedr_free_srq_kernel_params(srq);
+	if (udata)
+		qedr_free_srq_user_params(srq);
+	else
+		qedr_free_srq_kernel_params(srq);
 err0:
 	kfree(srq);
 
@@ -1467,7 +1554,10 @@ int qedr_destroy_srq(struct ib_srq *ibsrq)
 	in_params.srq_id = srq->srq_id;
 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
 
-	qedr_free_srq_kernel_params(srq);
+	if (ibsrq->pd->uobject)
+		qedr_free_srq_user_params(srq);
+	else
+		qedr_free_srq_kernel_params(srq);
 
 	DP_DEBUG(dev, QEDR_MSG_SRQ,
 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
@@ -1593,9 +1683,10 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
 
 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
-
-	qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
-	qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
+	if (!qp->srq) {
+		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
+		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
+	}
 
 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
@@ -1641,11 +1732,13 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 	if (rc)
 		return rc;
 
-	/* RQ - read access only (0), dma sync not required (0) */
-	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
-				  ureq.rq_len, 0, 0, alloc_and_init);
-	if (rc)
-		return rc;
+	if (!qp->srq) {
+		/* RQ - read access only (0), dma sync not required (0) */
+		rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
+					  ureq.rq_len, 0, 0, alloc_and_init);
+		if (rc)
+			return rc;
+	}
 
 	memset(&in_params, 0, sizeof(in_params));
 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
@@ -1653,8 +1746,10 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 	in_params.qp_handle_hi = ureq.qp_handle_hi;
 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
-	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
-	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+	if (!qp->srq) {
+		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
+		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+	}
 
 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
 					      &in_params, &out_params);
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index 24c658b3c790..7a10b3a325fa 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -111,4 +111,21 @@ struct qedr_create_qp_uresp {
 	__u32 reserved;
 };
 
+struct qedr_create_srq_ureq {
+	/* user space virtual address of producer pair */
+	__aligned_u64 prod_pair_addr;
+
+	/* user space virtual address of SRQ buffer */
+	__aligned_u64 srq_addr;
+
+	/* length of SRQ buffer */
+	__aligned_u64 srq_len;
+};
+
+struct qedr_create_srq_uresp {
+	__u16 srq_id;
+	__u16 reserved0;
+	__u32 reserved1;
+};
+
 #endif /* __QEDR_USER_H__ */
-- 
cgit v1.2.3


From 2538fb89b8f4ee9e1c1759cfb3c7989d9ef1f6e7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 14 Aug 2018 16:48:50 -0700
Subject: PCI: Limit config space size for Netronome NFP5000

Like the NFP4000 and NFP6000, the NFP5000 as an erratum where reading/
writing to PCI config space addresses above 0x600 can cause the NFP to
generate PCIe completion timeouts.

Limit the NFP5000's PF's config space size to 0x600 bytes as is already
done for the NFP4000 and NFP6000.

The NFP5000's VF is 0x6003 (PCI_DEVICE_ID_NETRONOME_NFP6000_VF), the same
device ID as the NFP6000's VF.  Thus, its config space is already limited
by the existing use of quirk_nfp6000().

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Tony Egan <tony.egan@netronome.com>
---
 drivers/pci/quirks.c    | 1 +
 include/linux/pci_ids.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 502275c092ae..7743cd56b89a 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -460,6 +460,7 @@ static void quirk_nfp6000(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP4000,	quirk_nfp6000);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP6000,	quirk_nfp6000);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP5000,	quirk_nfp6000);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NETRONOME,	PCI_DEVICE_ID_NETRONOME_NFP6000_VF,	quirk_nfp6000);
 
 /*  On IBM Crocodile ipr SAS adapters, expand BAR to system page size */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 29502238e510..381fc009551d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2541,6 +2541,7 @@
 #define PCI_DEVICE_ID_NETRONOME_NFP3200	0x3200
 #define PCI_DEVICE_ID_NETRONOME_NFP3240	0x3240
 #define PCI_DEVICE_ID_NETRONOME_NFP4000	0x4000
+#define PCI_DEVICE_ID_NETRONOME_NFP5000	0x5000
 #define PCI_DEVICE_ID_NETRONOME_NFP6000	0x6000
 #define PCI_DEVICE_ID_NETRONOME_NFP6000_VF	0x6003
 
-- 
cgit v1.2.3


From 30107fa6908b6c2747ee9100b40af813f99483c3 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Fri, 3 Aug 2018 14:08:14 +0300
Subject: mfd: bd71837: Core driver for ROHM BD71837 PMIC

ROHM BD71837 PMIC MFD driver providing interrupts and support
for three subsystems:
- clk
- Regulators
- input/power-key

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig              |  13 ++
 drivers/mfd/Makefile             |   1 +
 drivers/mfd/rohm-bd718x7.c       | 211 +++++++++++++++++++++++++
 include/linux/mfd/rohm-bd718x7.h | 332 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 557 insertions(+)
 create mode 100644 drivers/mfd/rohm-bd718x7.c
 create mode 100644 include/linux/mfd/rohm-bd718x7.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f3fa516011ec..11841f4b7b2b 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1817,6 +1817,19 @@ config MFD_STW481X
 	  in various ST Microelectronics and ST-Ericsson embedded
 	  Nomadik series.
 
+config MFD_ROHM_BD718XX
+	tristate "ROHM BD71837 Power Management IC"
+	depends on I2C=y
+	depends on OF
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	select MFD_CORE
+	help
+	  Select this option to get support for the ROHM BD71837
+	  Power Management ICs. BD71837 is designed to power processors like
+	  NXP i.MX8. It contains 8 BUCK outputs and 7 LDOs, voltage monitoring
+	  and emergency shut down as well as 32,768KHz clock output.
+
 config MFD_STM32_LPTIMER
 	tristate "Support for STM32 Low-Power Timer"
 	depends on (ARCH_STM32 && OF) || COMPILE_TEST
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2852a6042ecf..5856a9489cbd 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -239,4 +239,5 @@ obj-$(CONFIG_MFD_STM32_TIMERS) 	+= stm32-timers.o
 obj-$(CONFIG_MFD_MXS_LRADC)     += mxs-lradc.o
 obj-$(CONFIG_MFD_SC27XX_PMIC)	+= sprd-sc27xx-spi.o
 obj-$(CONFIG_RAVE_SP_CORE)	+= rave-sp.o
+obj-$(CONFIG_MFD_ROHM_BD718XX)	+= rohm-bd718x7.o
 
diff --git a/drivers/mfd/rohm-bd718x7.c b/drivers/mfd/rohm-bd718x7.c
new file mode 100644
index 000000000000..75c8ec659547
--- /dev/null
+++ b/drivers/mfd/rohm-bd718x7.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+//
+// Copyright (C) 2018 ROHM Semiconductors
+//
+// ROHM BD71837MWV PMIC driver
+//
+// Datasheet available from
+// https://www.rohm.com/datasheet/BD71837MWV/bd71837mwv-e
+
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/rohm-bd718x7.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+/*
+ * gpio_keys.h requires definiton of bool. It is brought in
+ * by above includes. Keep this as last until gpio_keys.h gets fixed.
+ */
+#include <linux/gpio_keys.h>
+
+static const u8 supported_revisions[] = { 0xA2 /* BD71837 */ };
+
+static struct gpio_keys_button button = {
+	.code = KEY_POWER,
+	.gpio = -1,
+	.type = EV_KEY,
+};
+
+static struct gpio_keys_platform_data bd718xx_powerkey_data = {
+	.buttons = &button,
+	.nbuttons = 1,
+	.name = "bd718xx-pwrkey",
+};
+
+static struct mfd_cell bd71837_mfd_cells[] = {
+	{
+		.name = "gpio-keys",
+		.platform_data = &bd718xx_powerkey_data,
+		.pdata_size = sizeof(bd718xx_powerkey_data),
+	},
+	{ .name = "bd71837-clk", },
+	{ .name = "bd71837-pmic", },
+};
+
+static const struct regmap_irq bd71837_irqs[] = {
+	REGMAP_IRQ_REG(BD71837_INT_SWRST, 0, BD71837_INT_SWRST_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_PWRBTN_S, 0, BD71837_INT_PWRBTN_S_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_PWRBTN_L, 0, BD71837_INT_PWRBTN_L_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_PWRBTN, 0, BD71837_INT_PWRBTN_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_WDOG, 0, BD71837_INT_WDOG_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_ON_REQ, 0, BD71837_INT_ON_REQ_MASK),
+	REGMAP_IRQ_REG(BD71837_INT_STBY_REQ, 0, BD71837_INT_STBY_REQ_MASK),
+};
+
+static struct regmap_irq_chip bd71837_irq_chip = {
+	.name = "bd71837-irq",
+	.irqs = bd71837_irqs,
+	.num_irqs = ARRAY_SIZE(bd71837_irqs),
+	.num_regs = 1,
+	.irq_reg_stride = 1,
+	.status_base = BD71837_REG_IRQ,
+	.mask_base = BD71837_REG_MIRQ,
+	.ack_base = BD71837_REG_IRQ,
+	.init_ack_masked = true,
+	.mask_invert = false,
+};
+
+static const struct regmap_range pmic_status_range = {
+	.range_min = BD71837_REG_IRQ,
+	.range_max = BD71837_REG_POW_STATE,
+};
+
+static const struct regmap_access_table volatile_regs = {
+	.yes_ranges = &pmic_status_range,
+	.n_yes_ranges = 1,
+};
+
+static const struct regmap_config bd71837_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.volatile_table = &volatile_regs,
+	.max_register = BD71837_MAX_REGISTER - 1,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static int bd71837_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct bd71837 *bd71837;
+	int ret, i;
+	unsigned int val;
+
+	bd71837 = devm_kzalloc(&i2c->dev, sizeof(struct bd71837), GFP_KERNEL);
+
+	if (!bd71837)
+		return -ENOMEM;
+
+	bd71837->chip_irq = i2c->irq;
+
+	if (!bd71837->chip_irq) {
+		dev_err(&i2c->dev, "No IRQ configured\n");
+		return -EINVAL;
+	}
+
+	bd71837->dev = &i2c->dev;
+	dev_set_drvdata(&i2c->dev, bd71837);
+
+	bd71837->regmap = devm_regmap_init_i2c(i2c, &bd71837_regmap_config);
+	if (IS_ERR(bd71837->regmap)) {
+		dev_err(&i2c->dev, "regmap initialization failed\n");
+		return PTR_ERR(bd71837->regmap);
+	}
+
+	ret = regmap_read(bd71837->regmap, BD71837_REG_REV, &val);
+	if (ret) {
+		dev_err(&i2c->dev, "Read BD71837_REG_DEVICE failed\n");
+		return ret;
+	}
+	for (i = 0; i < ARRAY_SIZE(supported_revisions); i++)
+		if (supported_revisions[i] == val)
+			break;
+
+	if (i == ARRAY_SIZE(supported_revisions)) {
+		dev_err(&i2c->dev, "Unsupported chip revision\n");
+		return -ENODEV;
+	}
+
+	ret = devm_regmap_add_irq_chip(&i2c->dev, bd71837->regmap,
+				       bd71837->chip_irq, IRQF_ONESHOT, 0,
+				       &bd71837_irq_chip, &bd71837->irq_data);
+	if (ret) {
+		dev_err(&i2c->dev, "Failed to add irq_chip\n");
+		return ret;
+	}
+
+	/* Configure short press to 10 milliseconds */
+	ret = regmap_update_bits(bd71837->regmap,
+				 BD71837_REG_PWRONCONFIG0,
+				 BD718XX_PWRBTN_PRESS_DURATION_MASK,
+				 BD718XX_PWRBTN_SHORT_PRESS_10MS);
+	if (ret) {
+		dev_err(&i2c->dev,
+			"Failed to configure button short press timeout\n");
+		return ret;
+	}
+
+	/* Configure long press to 10 seconds */
+	ret = regmap_update_bits(bd71837->regmap,
+				 BD71837_REG_PWRONCONFIG1,
+				 BD718XX_PWRBTN_PRESS_DURATION_MASK,
+				 BD718XX_PWRBTN_LONG_PRESS_10S);
+
+	if (ret) {
+		dev_err(&i2c->dev,
+			"Failed to configure button long press timeout\n");
+		return ret;
+	}
+
+	ret = regmap_irq_get_virq(bd71837->irq_data, BD71837_INT_PWRBTN_S);
+
+	if (ret < 0) {
+		dev_err(&i2c->dev, "Failed to get the IRQ\n");
+		return ret;
+	}
+
+	button.irq = ret;
+
+	ret = devm_mfd_add_devices(bd71837->dev, PLATFORM_DEVID_AUTO,
+				   bd71837_mfd_cells,
+				   ARRAY_SIZE(bd71837_mfd_cells), NULL, 0,
+				   regmap_irq_get_domain(bd71837->irq_data));
+	if (ret)
+		dev_err(&i2c->dev, "Failed to create subdevices\n");
+
+	return ret;
+}
+
+static const struct of_device_id bd71837_of_match[] = {
+	{ .compatible = "rohm,bd71837", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, bd71837_of_match);
+
+static struct i2c_driver bd71837_i2c_driver = {
+	.driver = {
+		.name = "rohm-bd718x7",
+		.of_match_table = bd71837_of_match,
+	},
+	.probe = bd71837_i2c_probe,
+};
+
+static int __init bd71837_i2c_init(void)
+{
+	return i2c_add_driver(&bd71837_i2c_driver);
+}
+
+/* Initialise early so consumer devices can complete system boot */
+subsys_initcall(bd71837_i2c_init);
+
+static void __exit bd71837_i2c_exit(void)
+{
+	i2c_del_driver(&bd71837_i2c_driver);
+}
+module_exit(bd71837_i2c_exit);
+
+MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
+MODULE_DESCRIPTION("ROHM BD71837 Power Management IC driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
new file mode 100644
index 000000000000..a528747f8aed
--- /dev/null
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (C) 2018 ROHM Semiconductors */
+
+#ifndef __LINUX_MFD_BD71837_H__
+#define __LINUX_MFD_BD71837_H__
+
+#include <linux/regmap.h>
+
+enum {
+	BD71837_BUCK1	=	0,
+	BD71837_BUCK2,
+	BD71837_BUCK3,
+	BD71837_BUCK4,
+	BD71837_BUCK5,
+	BD71837_BUCK6,
+	BD71837_BUCK7,
+	BD71837_BUCK8,
+	BD71837_LDO1,
+	BD71837_LDO2,
+	BD71837_LDO3,
+	BD71837_LDO4,
+	BD71837_LDO5,
+	BD71837_LDO6,
+	BD71837_LDO7,
+	BD71837_REGULATOR_CNT,
+};
+
+#define BD71837_BUCK1_VOLTAGE_NUM	0x40
+#define BD71837_BUCK2_VOLTAGE_NUM	0x40
+#define BD71837_BUCK3_VOLTAGE_NUM	0x40
+#define BD71837_BUCK4_VOLTAGE_NUM	0x40
+
+#define BD71837_BUCK5_VOLTAGE_NUM	0x08
+#define BD71837_BUCK6_VOLTAGE_NUM	0x04
+#define BD71837_BUCK7_VOLTAGE_NUM	0x08
+#define BD71837_BUCK8_VOLTAGE_NUM	0x40
+
+#define BD71837_LDO1_VOLTAGE_NUM	0x04
+#define BD71837_LDO2_VOLTAGE_NUM	0x02
+#define BD71837_LDO3_VOLTAGE_NUM	0x10
+#define BD71837_LDO4_VOLTAGE_NUM	0x10
+#define BD71837_LDO5_VOLTAGE_NUM	0x10
+#define BD71837_LDO6_VOLTAGE_NUM	0x10
+#define BD71837_LDO7_VOLTAGE_NUM	0x10
+
+enum {
+	BD71837_REG_REV                = 0x00,
+	BD71837_REG_SWRESET            = 0x01,
+	BD71837_REG_I2C_DEV            = 0x02,
+	BD71837_REG_PWRCTRL0           = 0x03,
+	BD71837_REG_PWRCTRL1           = 0x04,
+	BD71837_REG_BUCK1_CTRL         = 0x05,
+	BD71837_REG_BUCK2_CTRL         = 0x06,
+	BD71837_REG_BUCK3_CTRL         = 0x07,
+	BD71837_REG_BUCK4_CTRL         = 0x08,
+	BD71837_REG_BUCK5_CTRL         = 0x09,
+	BD71837_REG_BUCK6_CTRL         = 0x0A,
+	BD71837_REG_BUCK7_CTRL         = 0x0B,
+	BD71837_REG_BUCK8_CTRL         = 0x0C,
+	BD71837_REG_BUCK1_VOLT_RUN     = 0x0D,
+	BD71837_REG_BUCK1_VOLT_IDLE    = 0x0E,
+	BD71837_REG_BUCK1_VOLT_SUSP    = 0x0F,
+	BD71837_REG_BUCK2_VOLT_RUN     = 0x10,
+	BD71837_REG_BUCK2_VOLT_IDLE    = 0x11,
+	BD71837_REG_BUCK3_VOLT_RUN     = 0x12,
+	BD71837_REG_BUCK4_VOLT_RUN     = 0x13,
+	BD71837_REG_BUCK5_VOLT         = 0x14,
+	BD71837_REG_BUCK6_VOLT         = 0x15,
+	BD71837_REG_BUCK7_VOLT         = 0x16,
+	BD71837_REG_BUCK8_VOLT         = 0x17,
+	BD71837_REG_LDO1_VOLT          = 0x18,
+	BD71837_REG_LDO2_VOLT          = 0x19,
+	BD71837_REG_LDO3_VOLT          = 0x1A,
+	BD71837_REG_LDO4_VOLT          = 0x1B,
+	BD71837_REG_LDO5_VOLT          = 0x1C,
+	BD71837_REG_LDO6_VOLT          = 0x1D,
+	BD71837_REG_LDO7_VOLT          = 0x1E,
+	BD71837_REG_TRANS_COND0        = 0x1F,
+	BD71837_REG_TRANS_COND1        = 0x20,
+	BD71837_REG_VRFAULTEN          = 0x21,
+	BD71837_REG_MVRFLTMASK0        = 0x22,
+	BD71837_REG_MVRFLTMASK1        = 0x23,
+	BD71837_REG_MVRFLTMASK2        = 0x24,
+	BD71837_REG_RCVCFG             = 0x25,
+	BD71837_REG_RCVNUM             = 0x26,
+	BD71837_REG_PWRONCONFIG0       = 0x27,
+	BD71837_REG_PWRONCONFIG1       = 0x28,
+	BD71837_REG_RESETSRC           = 0x29,
+	BD71837_REG_MIRQ               = 0x2A,
+	BD71837_REG_IRQ                = 0x2B,
+	BD71837_REG_IN_MON             = 0x2C,
+	BD71837_REG_POW_STATE          = 0x2D,
+	BD71837_REG_OUT32K             = 0x2E,
+	BD71837_REG_REGLOCK            = 0x2F,
+	BD71837_REG_OTPVER             = 0xFF,
+	BD71837_MAX_REGISTER           = 0x100,
+};
+
+#define REGLOCK_PWRSEQ	0x1
+#define REGLOCK_VREG	0x10
+
+/* Generic BUCK control masks */
+#define BD71837_BUCK_SEL	0x02
+#define BD71837_BUCK_EN		0x01
+#define BD71837_BUCK_RUN_ON	0x04
+
+/* Generic LDO masks */
+#define BD71837_LDO_SEL		0x80
+#define BD71837_LDO_EN		0x40
+
+/* BD71837 BUCK ramp rate CTRL reg bits */
+#define BUCK_RAMPRATE_MASK	0xC0
+#define BUCK_RAMPRATE_10P00MV	0x0
+#define BUCK_RAMPRATE_5P00MV	0x1
+#define BUCK_RAMPRATE_2P50MV	0x2
+#define BUCK_RAMPRATE_1P25MV	0x3
+
+/* BD71837_REG_BUCK1_VOLT_RUN bits */
+#define BUCK1_RUN_MASK		0x3F
+#define BUCK1_RUN_DEFAULT	0x14
+
+/* BD71837_REG_BUCK1_VOLT_SUSP bits */
+#define BUCK1_SUSP_MASK		0x3F
+#define BUCK1_SUSP_DEFAULT	0x14
+
+/* BD71837_REG_BUCK1_VOLT_IDLE bits */
+#define BUCK1_IDLE_MASK		0x3F
+#define BUCK1_IDLE_DEFAULT	0x14
+
+/* BD71837_REG_BUCK2_VOLT_RUN bits */
+#define BUCK2_RUN_MASK		0x3F
+#define BUCK2_RUN_DEFAULT	0x1E
+
+/* BD71837_REG_BUCK2_VOLT_IDLE bits */
+#define BUCK2_IDLE_MASK		0x3F
+#define BUCK2_IDLE_DEFAULT	0x14
+
+/* BD71837_REG_BUCK3_VOLT_RUN bits */
+#define BUCK3_RUN_MASK		0x3F
+#define BUCK3_RUN_DEFAULT	0x1E
+
+/* BD71837_REG_BUCK4_VOLT_RUN bits */
+#define BUCK4_RUN_MASK		0x3F
+#define BUCK4_RUN_DEFAULT	0x1E
+
+/* BD71837_REG_BUCK5_VOLT bits */
+#define BUCK5_MASK		0x07
+#define BUCK5_DEFAULT		0x02
+
+/* BD71837_REG_BUCK6_VOLT bits */
+#define BUCK6_MASK		0x03
+#define BUCK6_DEFAULT		0x03
+
+/* BD71837_REG_BUCK7_VOLT bits */
+#define BUCK7_MASK		0x07
+#define BUCK7_DEFAULT		0x03
+
+/* BD71837_REG_BUCK8_VOLT bits */
+#define BUCK8_MASK		0x3F
+#define BUCK8_DEFAULT		0x1E
+
+/* BD71837_REG_IRQ bits */
+#define IRQ_SWRST		0x40
+#define IRQ_PWRON_S		0x20
+#define IRQ_PWRON_L		0x10
+#define IRQ_PWRON		0x08
+#define IRQ_WDOG		0x04
+#define IRQ_ON_REQ		0x02
+#define IRQ_STBY_REQ		0x01
+
+/* BD71837_REG_OUT32K bits */
+#define BD71837_OUT32K_EN	0x01
+
+/* BD71837 gated clock rate */
+#define BD71837_CLK_RATE 32768
+
+/* ROHM BD71837 irqs */
+enum {
+	BD71837_INT_STBY_REQ,
+	BD71837_INT_ON_REQ,
+	BD71837_INT_WDOG,
+	BD71837_INT_PWRBTN,
+	BD71837_INT_PWRBTN_L,
+	BD71837_INT_PWRBTN_S,
+	BD71837_INT_SWRST
+};
+
+/* ROHM BD71837 interrupt masks */
+#define BD71837_INT_SWRST_MASK		0x40
+#define BD71837_INT_PWRBTN_S_MASK	0x20
+#define BD71837_INT_PWRBTN_L_MASK	0x10
+#define BD71837_INT_PWRBTN_MASK		0x8
+#define BD71837_INT_WDOG_MASK		0x4
+#define BD71837_INT_ON_REQ_MASK		0x2
+#define BD71837_INT_STBY_REQ_MASK	0x1
+
+/* BD71837_REG_LDO1_VOLT bits */
+#define LDO1_MASK		0x03
+
+/* BD71837_REG_LDO1_VOLT bits */
+#define LDO2_MASK		0x20
+
+/* BD71837_REG_LDO3_VOLT bits */
+#define LDO3_MASK		0x0F
+
+/* BD71837_REG_LDO4_VOLT bits */
+#define LDO4_MASK		0x0F
+
+/* BD71837_REG_LDO5_VOLT bits */
+#define LDO5_MASK		0x0F
+
+/* BD71837_REG_LDO6_VOLT bits */
+#define LDO6_MASK		0x0F
+
+/* BD71837_REG_LDO7_VOLT bits */
+#define LDO7_MASK		0x0F
+
+/* Register write induced reset settings */
+
+/*
+ * Even though the bit zero is not SWRESET type we still want to write zero
+ * to it when changing type. Bit zero is 'SWRESET' trigger bit and if we
+ * write 1 to it we will trigger the action. So always write 0 to it when
+ * changning SWRESET action - no matter what we read from it.
+ */
+#define BD71837_SWRESET_TYPE_MASK	7
+#define BD71837_SWRESET_TYPE_DISABLED	0
+#define BD71837_SWRESET_TYPE_COLD	4
+#define BD71837_SWRESET_TYPE_WARM	6
+
+#define BD71837_SWRESET_RESET_MASK	1
+#define BD71837_SWRESET_RESET		1
+
+/* Poweroff state transition conditions */
+
+#define BD718XX_ON_REQ_POWEROFF_MASK	1
+#define BD718XX_SWRESET_POWEROFF_MASK	2
+#define BD718XX_WDOG_POWEROFF_MASK	4
+#define BD718XX_KEY_L_POWEROFF_MASK	8
+
+#define BD718XX_POWOFF_TO_SNVS	0
+#define BD718XX_POWOFF_TO_RDY	0xF
+
+#define BD718XX_POWOFF_TIME_MASK 0xF0
+enum {
+	BD718XX_POWOFF_TIME_5MS = 0,
+	BD718XX_POWOFF_TIME_10MS,
+	BD718XX_POWOFF_TIME_15MS,
+	BD718XX_POWOFF_TIME_20MS,
+	BD718XX_POWOFF_TIME_25MS,
+	BD718XX_POWOFF_TIME_30MS,
+	BD718XX_POWOFF_TIME_35MS,
+	BD718XX_POWOFF_TIME_40MS,
+	BD718XX_POWOFF_TIME_45MS,
+	BD718XX_POWOFF_TIME_50MS,
+	BD718XX_POWOFF_TIME_75MS,
+	BD718XX_POWOFF_TIME_100MS,
+	BD718XX_POWOFF_TIME_250MS,
+	BD718XX_POWOFF_TIME_500MS,
+	BD718XX_POWOFF_TIME_750MS,
+	BD718XX_POWOFF_TIME_1500MS
+};
+
+/* Poweron sequence state transition conditions */
+#define BD718XX_RDY_TO_SNVS_MASK 0xF
+#define BD718XX_SNVS_TO_RUN_MASK 0xF0
+
+#define BD718XX_PWR_TRIG_KEY_L		1
+#define BD718XX_PWR_TRIG_KEY_S		2
+#define BD718XX_PWR_TRIG_PMIC_ON	4
+#define BD718XX_PWR_TRIG_VSYS_UVLO	8
+#define BD718XX_RDY_TO_SNVS_SIFT	0
+#define BD718XX_SNVS_TO_RUN_SIFT	4
+
+#define BD718XX_PWRBTN_PRESS_DURATION_MASK 0xF
+
+/* Timeout value for detecting short press */
+enum {
+	BD718XX_PWRBTN_SHORT_PRESS_10MS = 0,
+	BD718XX_PWRBTN_SHORT_PRESS_500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_1000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_1500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_2000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_2500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_3000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_3500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_4000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_4500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_5000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_5500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_6000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_6500MS,
+	BD718XX_PWRBTN_SHORT_PRESS_7000MS,
+	BD718XX_PWRBTN_SHORT_PRESS_7500MS
+};
+
+/* Timeout value for detecting LONG press */
+enum {
+	BD718XX_PWRBTN_LONG_PRESS_10MS = 0,
+	BD718XX_PWRBTN_LONG_PRESS_1S,
+	BD718XX_PWRBTN_LONG_PRESS_2S,
+	BD718XX_PWRBTN_LONG_PRESS_3S,
+	BD718XX_PWRBTN_LONG_PRESS_4S,
+	BD718XX_PWRBTN_LONG_PRESS_5S,
+	BD718XX_PWRBTN_LONG_PRESS_6S,
+	BD718XX_PWRBTN_LONG_PRESS_7S,
+	BD718XX_PWRBTN_LONG_PRESS_8S,
+	BD718XX_PWRBTN_LONG_PRESS_9S,
+	BD718XX_PWRBTN_LONG_PRESS_10S,
+	BD718XX_PWRBTN_LONG_PRESS_11S,
+	BD718XX_PWRBTN_LONG_PRESS_12S,
+	BD718XX_PWRBTN_LONG_PRESS_13S,
+	BD718XX_PWRBTN_LONG_PRESS_14S,
+	BD718XX_PWRBTN_LONG_PRESS_15S
+};
+
+struct bd71837_pmic;
+struct bd71837_clk;
+
+struct bd71837 {
+	struct device *dev;
+	struct regmap *regmap;
+	unsigned long int id;
+
+	int chip_irq;
+	struct regmap_irq_chip_data *irq_data;
+
+	struct bd71837_pmic *pmic;
+	struct bd71837_clk *clk;
+};
+
+#endif /* __LINUX_MFD_BD71837_H__ */
-- 
cgit v1.2.3


From d2c9281c184bf2b768ac141a7a10586e0643695d Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Tue, 7 Aug 2018 10:08:12 +0100
Subject: mfd: madera: Add register definitions for accessory detect

Add some register definitions for accessory detection, used
by the extcon driver.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/madera/registers.h | 51 ++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/madera/registers.h b/include/linux/mfd/madera/registers.h
index a6b7c4222c5e..977e06101711 100644
--- a/include/linux/mfd/madera/registers.h
+++ b/include/linux/mfd/madera/registers.h
@@ -1219,9 +1219,11 @@
 #define MADERA_IRQ1_STATUS_33				0x1820
 #define MADERA_IRQ1_MASK_1				0x1840
 #define MADERA_IRQ1_MASK_2				0x1841
+#define MADERA_IRQ1_MASK_6				0x1845
 #define MADERA_IRQ1_MASK_33				0x1860
 #define MADERA_IRQ1_RAW_STATUS_1			0x1880
 #define MADERA_IRQ1_RAW_STATUS_2			0x1881
+#define MADERA_IRQ1_RAW_STATUS_7			0x1886
 #define MADERA_IRQ1_RAW_STATUS_15			0x188E
 #define MADERA_IRQ1_RAW_STATUS_33			0x18A0
 #define MADERA_INTERRUPT_DEBOUNCE_7			0x1A06
@@ -1664,6 +1666,42 @@
 #define MADERA_MICB2A_ENA_SHIFT				     0
 #define MADERA_MICB2A_ENA_WIDTH				     1
 
+/* (0x0225) - HP Ctrl 1L */
+#define MADERA_RMV_SHRT_HP1L				0x4000
+#define MADERA_RMV_SHRT_HP1L_MASK			0x4000
+#define MADERA_RMV_SHRT_HP1L_SHIFT			    14
+#define MADERA_RMV_SHRT_HP1L_WIDTH			     1
+#define MADERA_HP1L_FLWR				0x0004
+#define MADERA_HP1L_FLWR_MASK				0x0004
+#define MADERA_HP1L_FLWR_SHIFT				     2
+#define MADERA_HP1L_FLWR_WIDTH				     1
+#define MADERA_HP1L_SHRTI				0x0002
+#define MADERA_HP1L_SHRTI_MASK				0x0002
+#define MADERA_HP1L_SHRTI_SHIFT				     1
+#define MADERA_HP1L_SHRTI_WIDTH				     1
+#define MADERA_HP1L_SHRTO				0x0001
+#define MADERA_HP1L_SHRTO_MASK				0x0001
+#define MADERA_HP1L_SHRTO_SHIFT				     0
+#define MADERA_HP1L_SHRTO_WIDTH				     1
+
+/* (0x0226) - HP Ctrl 1R */
+#define MADERA_RMV_SHRT_HP1R				0x4000
+#define MADERA_RMV_SHRT_HP1R_MASK			0x4000
+#define MADERA_RMV_SHRT_HP1R_SHIFT			    14
+#define MADERA_RMV_SHRT_HP1R_WIDTH			     1
+#define MADERA_HP1R_FLWR				0x0004
+#define MADERA_HP1R_FLWR_MASK				0x0004
+#define MADERA_HP1R_FLWR_SHIFT				     2
+#define MADERA_HP1R_FLWR_WIDTH				     1
+#define MADERA_HP1R_SHRTI				0x0002
+#define MADERA_HP1R_SHRTI_MASK				0x0002
+#define MADERA_HP1R_SHRTI_SHIFT				     1
+#define MADERA_HP1R_SHRTI_WIDTH				     1
+#define MADERA_HP1R_SHRTO				0x0001
+#define MADERA_HP1R_SHRTO_MASK				0x0001
+#define MADERA_HP1R_SHRTO_SHIFT				     0
+#define MADERA_HP1R_SHRTO_WIDTH				     1
+
 /* (0x0293)  Accessory_Detect_Mode_1 */
 #define MADERA_ACCDET_SRC				0x2000
 #define MADERA_ACCDET_SRC_MASK				0x2000
@@ -3766,6 +3804,19 @@
 #define MADERA_DSP1_BUS_ERR_EINT1_SHIFT			     0
 #define MADERA_DSP1_BUS_ERR_EINT1_WIDTH			     1
 
+/* (0x1845)  IRQ1_Mask_6 */
+#define MADERA_IM_MICDET2_EINT1				0x0200
+#define MADERA_IM_MICDET2_EINT1_MASK			0x0200
+#define MADERA_IM_MICDET2_EINT1_SHIFT			     9
+#define MADERA_IM_MICDET2_EINT1_WIDTH			     1
+#define MADERA_IM_MICDET1_EINT1				0x0100
+#define MADERA_IM_MICDET1_EINT1_MASK			0x0100
+#define MADERA_IM_MICDET1_EINT1_SHIFT			     8
+#define MADERA_IM_MICDET1_EINT1_WIDTH			     1
+#define MADERA_IM_HPDET_EINT1				0x0001
+#define MADERA_IM_HPDET_EINT1_MASK			0x0001
+#define MADERA_IM_HPDET_EINT1_SHIFT			     0
+#define MADERA_IM_HPDET_EINT1_WIDTH			     1
 /* (0x184E)  IRQ1_Mask_15 */
 #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1		0x0004
 #define MADERA_IM_SPK_OVERHEAT_WARN_EINT1_MASK		0x0004
-- 
cgit v1.2.3


From 817aef260037f33ee0f44c17fe341323d3aebd6d Mon Sep 17 00:00:00 2001
From: Yannik Sembritzki <yannik@sembritzki.me>
Date: Thu, 16 Aug 2018 14:05:10 +0100
Subject: Replace magic for trusting the secondary keyring with #define

Replace the use of a magic number that indicates that verify_*_signature()
should use the secondary keyring with a symbol.

Signed-off-by: Yannik Sembritzki <yannik@sembritzki.me>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: keyrings@vger.kernel.org
Cc: linux-security-module@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 certs/system_keyring.c                  | 3 ++-
 crypto/asymmetric_keys/pkcs7_key_type.c | 2 +-
 include/linux/verification.h            | 6 ++++++
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 6251d1b27f0c..81728717523d 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -15,6 +15,7 @@
 #include <linux/cred.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/verification.h>
 #include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
 #include <crypto/pkcs7.h>
@@ -230,7 +231,7 @@ int verify_pkcs7_signature(const void *data, size_t len,
 
 	if (!trusted_keys) {
 		trusted_keys = builtin_trusted_keys;
-	} else if (trusted_keys == (void *)1UL) {
+	} else if (trusted_keys == VERIFY_USE_SECONDARY_KEYRING) {
 #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
 		trusted_keys = secondary_trusted_keys;
 #else
diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index e284d9cb9237..5b2f6a2b5585 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -63,7 +63,7 @@ static int pkcs7_preparse(struct key_preparsed_payload *prep)
 
 	return verify_pkcs7_signature(NULL, 0,
 				      prep->data, prep->datalen,
-				      (void *)1UL, usage,
+				      VERIFY_USE_SECONDARY_KEYRING, usage,
 				      pkcs7_view_content, prep);
 }
 
diff --git a/include/linux/verification.h b/include/linux/verification.h
index a10549a6c7cd..cfa4730d607a 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -12,6 +12,12 @@
 #ifndef _LINUX_VERIFICATION_H
 #define _LINUX_VERIFICATION_H
 
+/*
+ * Indicate that both builtin trusted keys and secondary trusted keys
+ * should be used.
+ */
+#define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL)
+
 /*
  * The use to which an asymmetric key is being put.
  */
-- 
cgit v1.2.3


From 14d32b2525dd3a21c29b0d9edf1748f5dabc1bbc Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Tue, 31 Jul 2018 18:03:32 +0200
Subject: jiffies: add utility function to calculate delta in ms

add jiffies_delta_to_msecs() helper func to calculate the delta between
two times and eventually 0 if negative.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/jiffies.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index a27cf6652327..fa928242567d 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -447,6 +447,11 @@ static inline clock_t jiffies_delta_to_clock_t(long delta)
 	return jiffies_to_clock_t(max(0L, delta));
 }
 
+static inline unsigned int jiffies_delta_to_msecs(long delta)
+{
+	return jiffies_to_msecs(max(0L, delta));
+}
+
 extern unsigned long clock_t_to_jiffies(unsigned long x);
 extern u64 jiffies_64_to_clock_t(u64 x);
 extern u64 nsec_to_clock_t(u64 x);
-- 
cgit v1.2.3


From 4ef360dd6a65f6ef337645e1b65e744034754b19 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 26 Jul 2018 00:39:51 +0900
Subject: netfilter: nft_set: fix allocation size overflow in privsize
 callback.

In order to determine allocation size of set, ->privsize is invoked.
At this point, both desc->size and size of each data structure of set
are used. desc->size means number of element that is given by user.
desc->size is u32 type. so that upperlimit of set element is 4294967295.
but return type of ->privsize is also u32. hence overflow can occurred.

test commands:
   %nft add table ip filter
   %nft add set ip filter hash1 { type ipv4_addr \; size 4294967295 \; }
   %nft list ruleset

splat looks like:
[ 1239.202910] kasan: CONFIG_KASAN_INLINE enabled
[ 1239.208788] kasan: GPF could be caused by NULL-ptr deref or user memory access
[ 1239.217625] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[ 1239.219329] CPU: 0 PID: 1603 Comm: nft Not tainted 4.18.0-rc5+ #7
[ 1239.229091] RIP: 0010:nft_hash_walk+0x1d2/0x310 [nf_tables_set]
[ 1239.229091] Code: 84 d2 7f 10 4c 89 e7 89 44 24 38 e8 d8 5a 17 e0 8b 44 24 38 48 8d 7b 10 41 0f b6 0c 24 48 89 fa 48 89 fe 48 c1 ea 03 83 e6 07 <42> 0f b6 14 3a 40 38 f2 7f 1a 84 d2 74 16
[ 1239.229091] RSP: 0018:ffff8801118cf358 EFLAGS: 00010246
[ 1239.229091] RAX: 0000000000000000 RBX: 0000000000020400 RCX: 0000000000000001
[ 1239.229091] RDX: 0000000000004082 RSI: 0000000000000000 RDI: 0000000000020410
[ 1239.229091] RBP: ffff880114d5a988 R08: 0000000000007e94 R09: ffff880114dd8030
[ 1239.229091] R10: ffff880114d5a988 R11: ffffed00229bb006 R12: ffff8801118cf4d0
[ 1239.229091] R13: ffff8801118cf4d8 R14: 0000000000000000 R15: dffffc0000000000
[ 1239.229091] FS:  00007f5a8fe0b700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000
[ 1239.229091] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1239.229091] CR2: 00007f5a8ecc27b0 CR3: 000000010608e000 CR4: 00000000001006f0
[ 1239.229091] Call Trace:
[ 1239.229091]  ? nft_hash_remove+0xf0/0xf0 [nf_tables_set]
[ 1239.229091]  ? memset+0x1f/0x40
[ 1239.229091]  ? __nla_reserve+0x9f/0xb0
[ 1239.229091]  ? memcpy+0x34/0x50
[ 1239.229091]  nf_tables_dump_set+0x9a1/0xda0 [nf_tables]
[ 1239.229091]  ? __kmalloc_reserve.isra.29+0x2e/0xa0
[ 1239.229091]  ? nft_chain_hash_obj+0x630/0x630 [nf_tables]
[ 1239.229091]  ? nf_tables_commit+0x2c60/0x2c60 [nf_tables]
[ 1239.229091]  netlink_dump+0x470/0xa20
[ 1239.229091]  __netlink_dump_start+0x5ae/0x690
[ 1239.229091]  nft_netlink_dump_start_rcu+0xd1/0x160 [nf_tables]
[ 1239.229091]  nf_tables_getsetelem+0x2e5/0x4b0 [nf_tables]
[ 1239.229091]  ? nft_get_set_elem+0x440/0x440 [nf_tables]
[ 1239.229091]  ? nft_chain_hash_obj+0x630/0x630 [nf_tables]
[ 1239.229091]  ? nf_tables_dump_obj_done+0x70/0x70 [nf_tables]
[ 1239.229091]  ? nla_parse+0xab/0x230
[ 1239.229091]  ? nft_get_set_elem+0x440/0x440 [nf_tables]
[ 1239.229091]  nfnetlink_rcv_msg+0x7f0/0xab0 [nfnetlink]
[ 1239.229091]  ? nfnetlink_bind+0x1d0/0x1d0 [nfnetlink]
[ 1239.229091]  ? debug_show_all_locks+0x290/0x290
[ 1239.229091]  ? sched_clock_cpu+0x132/0x170
[ 1239.229091]  ? find_held_lock+0x39/0x1b0
[ 1239.229091]  ? sched_clock_local+0x10d/0x130
[ 1239.229091]  netlink_rcv_skb+0x211/0x320
[ 1239.229091]  ? nfnetlink_bind+0x1d0/0x1d0 [nfnetlink]
[ 1239.229091]  ? netlink_ack+0x7b0/0x7b0
[ 1239.229091]  ? ns_capable_common+0x6e/0x110
[ 1239.229091]  nfnetlink_rcv+0x2d1/0x310 [nfnetlink]
[ 1239.229091]  ? nfnetlink_rcv_batch+0x10f0/0x10f0 [nfnetlink]
[ 1239.229091]  ? netlink_deliver_tap+0x829/0x930
[ 1239.229091]  ? lock_acquire+0x265/0x2e0
[ 1239.229091]  netlink_unicast+0x406/0x520
[ 1239.509725]  ? netlink_attachskb+0x5b0/0x5b0
[ 1239.509725]  ? find_held_lock+0x39/0x1b0
[ 1239.509725]  netlink_sendmsg+0x987/0xa20
[ 1239.509725]  ? netlink_unicast+0x520/0x520
[ 1239.509725]  ? _copy_from_user+0xa9/0xc0
[ 1239.509725]  __sys_sendto+0x21a/0x2c0
[ 1239.509725]  ? __ia32_sys_getpeername+0xa0/0xa0
[ 1239.509725]  ? retint_kernel+0x10/0x10
[ 1239.509725]  ? sched_clock_cpu+0x132/0x170
[ 1239.509725]  ? find_held_lock+0x39/0x1b0
[ 1239.509725]  ? lock_downgrade+0x540/0x540
[ 1239.509725]  ? up_read+0x1c/0x100
[ 1239.509725]  ? __do_page_fault+0x763/0x970
[ 1239.509725]  ? retint_user+0x18/0x18
[ 1239.509725]  __x64_sys_sendto+0x177/0x180
[ 1239.509725]  do_syscall_64+0xaa/0x360
[ 1239.509725]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 1239.509725] RIP: 0033:0x7f5a8f468e03
[ 1239.509725] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb d0 0f 1f 84 00 00 00 00 00 83 3d 49 c9 2b 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8
[ 1239.509725] RSP: 002b:00007ffd78d0b778 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
[ 1239.509725] RAX: ffffffffffffffda RBX: 00007ffd78d0c890 RCX: 00007f5a8f468e03
[ 1239.509725] RDX: 0000000000000034 RSI: 00007ffd78d0b7e0 RDI: 0000000000000003
[ 1239.509725] RBP: 00007ffd78d0b7d0 R08: 00007f5a8f15c160 R09: 000000000000000c
[ 1239.509725] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffd78d0b7e0
[ 1239.509725] R13: 0000000000000034 R14: 00007f5a8f9aff60 R15: 00005648040094b0
[ 1239.509725] Modules linked in: nf_tables_set nf_tables nfnetlink ip_tables x_tables
[ 1239.670713] ---[ end trace 39375adcda140f11 ]---
[ 1239.676016] RIP: 0010:nft_hash_walk+0x1d2/0x310 [nf_tables_set]
[ 1239.682834] Code: 84 d2 7f 10 4c 89 e7 89 44 24 38 e8 d8 5a 17 e0 8b 44 24 38 48 8d 7b 10 41 0f b6 0c 24 48 89 fa 48 89 fe 48 c1 ea 03 83 e6 07 <42> 0f b6 14 3a 40 38 f2 7f 1a 84 d2 74 16
[ 1239.705108] RSP: 0018:ffff8801118cf358 EFLAGS: 00010246
[ 1239.711115] RAX: 0000000000000000 RBX: 0000000000020400 RCX: 0000000000000001
[ 1239.719269] RDX: 0000000000004082 RSI: 0000000000000000 RDI: 0000000000020410
[ 1239.727401] RBP: ffff880114d5a988 R08: 0000000000007e94 R09: ffff880114dd8030
[ 1239.735530] R10: ffff880114d5a988 R11: ffffed00229bb006 R12: ffff8801118cf4d0
[ 1239.743658] R13: ffff8801118cf4d8 R14: 0000000000000000 R15: dffffc0000000000
[ 1239.751785] FS:  00007f5a8fe0b700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000
[ 1239.760993] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1239.767560] CR2: 00007f5a8ecc27b0 CR3: 000000010608e000 CR4: 00000000001006f0
[ 1239.775679] Kernel panic - not syncing: Fatal exception
[ 1239.776630] Kernel Offset: 0x1f000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 1239.776630] Rebooting in 5 seconds..

Fixes: 20a69341f2d0 ("netfilter: nf_tables: add netlink set API")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 ++--
 net/netfilter/nf_tables_api.c     | 2 +-
 net/netfilter/nft_set_bitmap.c    | 6 +++---
 net/netfilter/nft_set_hash.c      | 8 ++++----
 net/netfilter/nft_set_rbtree.c    | 4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index dc417ef0a0c5..552bfbef1bf1 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -274,7 +274,7 @@ enum nft_set_class {
  *	@space: memory class
  */
 struct nft_set_estimate {
-	unsigned int		size;
+	u64			size;
 	enum nft_set_class	lookup;
 	enum nft_set_class	space;
 };
@@ -336,7 +336,7 @@ struct nft_set_ops {
 					       const struct nft_set_elem *elem,
 					       unsigned int flags);
 
-	unsigned int			(*privsize)(const struct nlattr * const nla[],
+	u64				(*privsize)(const struct nlattr * const nla[],
 						    const struct nft_set_desc *desc);
 	bool				(*estimate)(const struct nft_set_desc *desc,
 						    u32 features,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 67cdd5c4f4f5..3008f93469c4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3354,7 +3354,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	char *name;
-	unsigned int size;
+	u64 size;
 	u64 timeout;
 	u32 ktype, dtype, flags, policy, gc_int, objtype;
 	struct nft_set_desc desc;
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 128bc16f52dd..f866bd41e5d2 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -248,13 +248,13 @@ static inline u32 nft_bitmap_size(u32 klen)
 	return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1;
 }
 
-static inline u32 nft_bitmap_total_size(u32 klen)
+static inline u64 nft_bitmap_total_size(u32 klen)
 {
 	return sizeof(struct nft_bitmap) + nft_bitmap_size(klen);
 }
 
-static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[],
-					const struct nft_set_desc *desc)
+static u64 nft_bitmap_privsize(const struct nlattr * const nla[],
+			       const struct nft_set_desc *desc)
 {
 	u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
 
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 90c3e7e6cacb..015124e649cb 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -341,8 +341,8 @@ schedule:
 			   nft_set_gc_interval(set));
 }
 
-static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
-				       const struct nft_set_desc *desc)
+static u64 nft_rhash_privsize(const struct nlattr * const nla[],
+			      const struct nft_set_desc *desc)
 {
 	return sizeof(struct nft_rhash);
 }
@@ -585,8 +585,8 @@ cont:
 	}
 }
 
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[],
-				      const struct nft_set_desc *desc)
+static u64 nft_hash_privsize(const struct nlattr * const nla[],
+			     const struct nft_set_desc *desc)
 {
 	return sizeof(struct nft_hash) +
 	       nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9873d734b494..55e2d9215c0d 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -411,8 +411,8 @@ static void nft_rbtree_gc(struct work_struct *work)
 			   nft_set_gc_interval(set));
 }
 
-static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[],
-					const struct nft_set_desc *desc)
+static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
+			       const struct nft_set_desc *desc)
 {
 	return sizeof(struct nft_rbtree);
 }
-- 
cgit v1.2.3


From d209df3e7f7002d9099fdb0f6df0f972b4386a63 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 2 Aug 2018 21:44:40 +0200
Subject: netfilter: nf_tables: fix register ordering

We must register nfnetlink ops last, as that exposes nf_tables to
userspace.  Without this, we could theoretically get nfnetlink request
before net->nft state has been initialized.

Fixes: 99633ab29b213 ("netfilter: nf_tables: complete net namespace support")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c     | 29 ++++++++++++++++++++++-------
 net/netfilter/nft_chain_filter.c  |  2 +-
 3 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 552bfbef1bf1..0f39ac487012 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1374,6 +1374,6 @@ struct nft_trans_flowtable {
 	(((struct nft_trans_flowtable *)trans->data)->flowtable)
 
 int __init nft_chain_filter_init(void);
-void __exit nft_chain_filter_fini(void);
+void nft_chain_filter_fini(void);
 
 #endif /* _NET_NF_TABLES_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3008f93469c4..80636cc59686 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7273,21 +7273,36 @@ static int __init nf_tables_module_init(void)
 {
 	int err;
 
-	nft_chain_filter_init();
+	err = register_pernet_subsys(&nf_tables_net_ops);
+	if (err < 0)
+		return err;
+
+	err = nft_chain_filter_init();
+	if (err < 0)
+		goto err1;
 
 	err = nf_tables_core_module_init();
 	if (err < 0)
-		return err;
+		goto err2;
 
-	err = nfnetlink_subsys_register(&nf_tables_subsys);
+	err = register_netdevice_notifier(&nf_tables_flowtable_notifier);
 	if (err < 0)
-		goto err;
+		goto err3;
 
-	register_netdevice_notifier(&nf_tables_flowtable_notifier);
+	/* must be last */
+	err = nfnetlink_subsys_register(&nf_tables_subsys);
+	if (err < 0)
+		goto err4;
 
-	return register_pernet_subsys(&nf_tables_net_ops);
-err:
+	return err;
+err4:
+	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+err3:
 	nf_tables_core_module_exit();
+err2:
+	nft_chain_filter_fini();
+err1:
+	unregister_pernet_subsys(&nf_tables_net_ops);
 	return err;
 }
 
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index ea5b7c4944f6..9d07b277b9ee 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -392,7 +392,7 @@ int __init nft_chain_filter_init(void)
 	return 0;
 }
 
-void __exit nft_chain_filter_fini(void)
+void nft_chain_filter_fini(void)
 {
 	nft_chain_filter_bridge_fini();
 	nft_chain_filter_inet_fini();
-- 
cgit v1.2.3


From cdb2f401246ef70b37eeedd9c6f9c28d611d8255 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Mon, 13 Aug 2018 18:43:36 +0300
Subject: netfilter: uapi: fix linux/netfilter/nf_osf.h userspace compilation
 errors

Move inclusion of <linux/ip.h> and <linux/tcp.h> from
linux/netfilter/xt_osf.h to linux/netfilter/nf_osf.h to fix
the following linux/netfilter/nf_osf.h userspace compilation errors:

/usr/include/linux/netfilter/nf_osf.h:59:24: error: 'MAX_IPOPTLEN' undeclared here (not in a function)
  struct nf_osf_opt opt[MAX_IPOPTLEN];
/usr/include/linux/netfilter/nf_osf.h:64:17: error: field 'ip' has incomplete type
  struct iphdr   ip;
/usr/include/linux/netfilter/nf_osf.h:65:18: error: field 'tcp' has incomplete type
  struct tcphdr   tcp;

Fixes: bfb15f2a95cb ("netfilter: extract Passive OS fingerprint infrastructure from xt_osf")
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_osf.h | 2 ++
 include/uapi/linux/netfilter/xt_osf.h        | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nfnetlink_osf.h b/include/uapi/linux/netfilter/nfnetlink_osf.h
index 76a3527df5dd..272bc3195f2d 100644
--- a/include/uapi/linux/netfilter/nfnetlink_osf.h
+++ b/include/uapi/linux/netfilter/nfnetlink_osf.h
@@ -2,6 +2,8 @@
 #define _NF_OSF_H
 
 #include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
 
 #define MAXGENRELEN	32
 
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index 24102b5286ec..6e466236ca4b 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -21,8 +21,6 @@
 #define _XT_OSF_H
 
 #include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
 #include <linux/netfilter/nfnetlink_osf.h>
 
 #define XT_OSF_GENRE		NF_OSF_GENRE
-- 
cgit v1.2.3


From ff93bca769925a2d8fd7f910cdf543d992e17f07 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 14 Aug 2018 15:21:31 -0700
Subject: ila: make lockdep happy again

Previously, alloc_ila_locks() and bucket_table_alloc() call
spin_lock_init() separately, therefore they have two different
lock names and lock class keys. However, after commit b893281715ab
("ila: Call library function alloc_bucket_locks") they both call
helper alloc_bucket_spinlocks() which now only has one lock
name and lock class key. This causes a few bogus lockdep warnings
as reported by syzbot.

Fix this by making alloc_bucket_locks() a macro and pass declaration
name as lock name and a static lock class key inside the macro.

Fixes: b893281715ab ("ila: Call library function alloc_bucket_locks")
Reported-by: <syzbot+b66a5a554991a8ed027c@syzkaller.appspotmail.com>
Cc: Tom Herbert <tom@quantonium.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/spinlock.h | 17 ++++++++++++++---
 lib/bucket_locks.c       | 11 +++++++----
 2 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3190997df9ca..e089157dcf97 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -451,9 +451,20 @@ extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
 #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
 		__cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
 
-int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
-			   size_t max_size, unsigned int cpu_mult,
-			   gfp_t gfp);
+int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
+			     size_t max_size, unsigned int cpu_mult,
+			     gfp_t gfp, const char *name,
+			     struct lock_class_key *key);
+
+#define alloc_bucket_spinlocks(locks, lock_mask, max_size, cpu_mult, gfp)    \
+	({								     \
+		static struct lock_class_key key;			     \
+		int ret;						     \
+									     \
+		ret = __alloc_bucket_spinlocks(locks, lock_mask, max_size,   \
+					       cpu_mult, gfp, #locks, &key); \
+		ret;							     \
+	})
 
 void free_bucket_spinlocks(spinlock_t *locks);
 
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
index ade3ce6c4af6..64b92e1dbace 100644
--- a/lib/bucket_locks.c
+++ b/lib/bucket_locks.c
@@ -11,8 +11,9 @@
  * to a power of 2 to be suitable as a hash table.
  */
 
-int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
-			   size_t max_size, unsigned int cpu_mult, gfp_t gfp)
+int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
+			     size_t max_size, unsigned int cpu_mult, gfp_t gfp,
+			     const char *name, struct lock_class_key *key)
 {
 	spinlock_t *tlocks = NULL;
 	unsigned int i, size;
@@ -33,8 +34,10 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
 		tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp);
 		if (!tlocks)
 			return -ENOMEM;
-		for (i = 0; i < size; i++)
+		for (i = 0; i < size; i++) {
 			spin_lock_init(&tlocks[i]);
+			lockdep_init_map(&tlocks[i].dep_map, name, key, 0);
+		}
 	}
 
 	*locks = tlocks;
@@ -42,7 +45,7 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
 
 	return 0;
 }
-EXPORT_SYMBOL(alloc_bucket_spinlocks);
+EXPORT_SYMBOL(__alloc_bucket_spinlocks);
 
 void free_bucket_spinlocks(spinlock_t *locks)
 {
-- 
cgit v1.2.3


From 92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 15 Aug 2018 16:52:58 -0600
Subject: Revert "net/smc: Replace ib_query_gid with rdma_get_gid_attr"

This reverts commit ddb457c6993babbcdd41fca638b870d2a2fc3941.

The include rdma/ib_cache.h is kept, and we have to add a memset
to the compat wrapper to avoid compiler warnings in gcc-7

This revert is done to avoid extensive merge conflicts with SMC
changes in netdev during the 4.19 merge window.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/ib_cache.h |  1 +
 net/smc/smc_core.c      | 19 ++++++++++---------
 net/smc/smc_ib.c        | 24 ++++++++++--------------
 3 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index a4ce441f36f0..3e11e7cc60b7 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -143,6 +143,7 @@ static inline __deprecated int ib_query_gid(struct ib_device *device,
 {
 	const struct ib_gid_attr *attr;
 
+	memset(attr_out, 0, sizeof(*attr_out));
 	attr = rdma_get_gid_attr(device, port_num, index);
 	if (IS_ERR(attr))
 		return PTR_ERR(attr);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d99a75f75e42..15bad268f37d 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -451,7 +451,8 @@ out:
 static int smc_link_determine_gid(struct smc_link_group *lgr)
 {
 	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
-	const struct ib_gid_attr *gattr;
+	struct ib_gid_attr gattr;
+	union ib_gid gid;
 	int i;
 
 	if (!lgr->vlan_id) {
@@ -461,18 +462,18 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 
 	for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len;
 	     i++) {
-		gattr = rdma_get_gid_attr(lnk->smcibdev->ibdev, lnk->ibport, i);
-		if (IS_ERR(gattr))
+		if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
+				 &gattr))
 			continue;
-		if (gattr->ndev) {
-			if (is_vlan_dev(gattr->ndev) &&
-			    vlan_dev_vlan_id(gattr->ndev) == lgr->vlan_id) {
-				lnk->gid = gattr->gid;
-				rdma_put_gid_attr(gattr);
+		if (gattr.ndev) {
+			if (is_vlan_dev(gattr.ndev) &&
+			    vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
+				lnk->gid = gid;
+				dev_put(gattr.ndev);
 				return 0;
 			}
+			dev_put(gattr.ndev);
 		}
-		rdma_put_gid_attr(gattr);
 	}
 	return -ENODEV;
 }
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 74f29f814ec1..117b05f1a494 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -373,21 +373,17 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
 
 static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
 {
-	const struct ib_gid_attr *gattr;
-	int rc = 0;
+	struct ib_gid_attr gattr;
+	int rc;
 
-	gattr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0);
-	if (IS_ERR(gattr))
-		return PTR_ERR(gattr);
-	if (!gattr->ndev) {
-		rc = -ENODEV;
-		goto done;
-	}
-	smcibdev->gid[ibport - 1] = gattr->gid;
-	memcpy(smcibdev->mac[ibport - 1], gattr->ndev->dev_addr, ETH_ALEN);
-done:
-	rdma_put_gid_attr(gattr);
-	return rc;
+	rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
+			  &smcibdev->gid[ibport - 1], &gattr);
+	if (rc || !gattr.ndev)
+		return -ENODEV;
+
+	memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
+	dev_put(gattr.ndev);
+	return 0;
 }
 
 /* Create an identifier unique for this instance of SMC-R.
-- 
cgit v1.2.3


From 037b0b86ecf5646f8eae777d8b52ff8b401692ec Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 16 Aug 2018 21:49:06 +0200
Subject: tcp, ulp: add alias for all ulp modules

Lets not turn the TCP ULP lookup into an arbitrary module loader as
we only intend to load ULP modules through this mechanism, not other
unrelated kernel modules:

  [root@bar]# cat foo.c
  #include <sys/types.h>
  #include <sys/socket.h>
  #include <linux/tcp.h>
  #include <linux/in.h>

  int main(void)
  {
      int sock = socket(PF_INET, SOCK_STREAM, 0);
      setsockopt(sock, IPPROTO_TCP, TCP_ULP, "sctp", sizeof("sctp"));
      return 0;
  }

  [root@bar]# gcc foo.c -O2 -Wall
  [root@bar]# lsmod | grep sctp
  [root@bar]# ./a.out
  [root@bar]# lsmod | grep sctp
  sctp                 1077248  4
  libcrc32c              16384  3 nf_conntrack,nf_nat,sctp
  [root@bar]#

Fix it by adding module alias to TCP ULP modules, so probing module
via request_module() will be limited to tcp-ulp-[name]. The existing
modules like kTLS will load fine given tcp-ulp-tls alias, but others
will fail to load:

  [root@bar]# lsmod | grep sctp
  [root@bar]# ./a.out
  [root@bar]# lsmod | grep sctp
  [root@bar]#

Sockmap is not affected from this since it's either built-in or not.

Fixes: 734942cc4ea6 ("tcp: ULP infrastructure")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/tcp.h  | 4 ++++
 net/ipv4/tcp_ulp.c | 2 +-
 net/tls/tls_main.c | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index d196901c9dba..770917d0caa7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2065,6 +2065,10 @@ int tcp_set_ulp_id(struct sock *sk, const int ulp);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
 
+#define MODULE_ALIAS_TCP_ULP(name)				\
+	__MODULE_INFO(alias, alias_userspace, name);		\
+	__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
+
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
  * program does not support the chosen operation or there is no BPF
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 622caa4039e0..7dd44b6156c7 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -51,7 +51,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
 #ifdef CONFIG_MODULES
 	if (!ulp && capable(CAP_NET_ADMIN)) {
 		rcu_read_unlock();
-		request_module("%s", name);
+		request_module("tcp-ulp-%s", name);
 		rcu_read_lock();
 		ulp = tcp_ulp_find(name);
 	}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b09867c8b817..93c0c225ab34 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -45,6 +45,7 @@
 MODULE_AUTHOR("Mellanox Technologies");
 MODULE_DESCRIPTION("Transport Layer Security Support");
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS_TCP_ULP("tls");
 
 enum {
 	TLSV4,
-- 
cgit v1.2.3


From b4c296f9c96420b8e7e92466ea5960f10ee20aae Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Fri, 17 Aug 2018 16:45:51 -0600
Subject: RDMA/smc: Replace ib_query_gid with rdma_get_gid_attr

All RDMA ULPs should be using rdma_get_gid_attr instead of
ib_query_gid. Convert SMC to use the new API.

In the process correct some confusion with gid_type - if attr->ndev is
!NULL then gid_type can never be IB_GID_TYPE_IB by
definition. IB_GID_TYPE_ROCE shares the same enum value and is probably
what was intended here.

Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/ib_cache.h | 24 ------------------------
 net/smc/smc_ib.c        | 48 +++++++++++++++++++++++++-----------------------
 2 files changed, 25 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 3e11e7cc60b7..62e990b620aa 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -133,28 +133,4 @@ const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
 void rdma_put_gid_attr(const struct ib_gid_attr *attr);
 void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
 
-/*
- * This is to be removed. It only exists to make merging rdma and smc simpler.
- */
-static inline __deprecated int ib_query_gid(struct ib_device *device,
-					    u8 port_num, int index,
-					    union ib_gid *gid,
-					    struct ib_gid_attr *attr_out)
-{
-	const struct ib_gid_attr *attr;
-
-	memset(attr_out, 0, sizeof(*attr_out));
-	attr = rdma_get_gid_attr(device, port_num, index);
-	if (IS_ERR(attr))
-		return PTR_ERR(attr);
-
-	if (attr->ndev)
-		dev_hold(attr->ndev);
-	*attr_out = *attr;
-
-	rdma_put_gid_attr(attr);
-
-	return 0;
-}
-
 #endif /* _IB_CACHE_H */
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 9bb5274a244e..e519ef29c0ff 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -145,17 +145,21 @@ out:
 
 static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport)
 {
-	struct ib_gid_attr gattr;
-	union ib_gid gid;
-	int rc;
+	const struct ib_gid_attr *attr;
+	int rc = 0;
 
-	rc = ib_query_gid(smcibdev->ibdev, ibport, 0, &gid, &gattr);
-	if (rc || !gattr.ndev)
+	attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0);
+	if (IS_ERR(attr))
 		return -ENODEV;
 
-	memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
-	dev_put(gattr.ndev);
-	return 0;
+	if (attr->ndev)
+		memcpy(smcibdev->mac[ibport - 1], attr->ndev->dev_addr,
+		       ETH_ALEN);
+	else
+		rc = -ENODEV;
+
+	rdma_put_gid_attr(attr);
+	return rc;
 }
 
 /* Create an identifier unique for this instance of SMC-R.
@@ -180,29 +184,27 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
 int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
 			 unsigned short vlan_id, u8 gid[], u8 *sgid_index)
 {
-	struct ib_gid_attr gattr;
-	union ib_gid _gid;
+	const struct ib_gid_attr *attr;
 	int i;
 
 	for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
-		memset(&_gid, 0, SMC_GID_SIZE);
-		memset(&gattr, 0, sizeof(gattr));
-		if (ib_query_gid(smcibdev->ibdev, ibport, i, &_gid, &gattr))
+		attr = rdma_get_gid_attr(smcibdev->ibdev, ibport, i);
+		if (IS_ERR(attr))
 			continue;
-		if (!gattr.ndev)
-			continue;
-		if (((!vlan_id && !is_vlan_dev(gattr.ndev)) ||
-		     (vlan_id && is_vlan_dev(gattr.ndev) &&
-		      vlan_dev_vlan_id(gattr.ndev) == vlan_id)) &&
-		    gattr.gid_type == IB_GID_TYPE_IB) {
+
+		if (attr->ndev &&
+		    ((!vlan_id && !is_vlan_dev(attr->ndev)) ||
+		     (vlan_id && is_vlan_dev(attr->ndev) &&
+		      vlan_dev_vlan_id(attr->ndev) == vlan_id)) &&
+		    attr->gid_type == IB_GID_TYPE_ROCE) {
 			if (gid)
-				memcpy(gid, &_gid, SMC_GID_SIZE);
+				memcpy(gid, &attr->gid, SMC_GID_SIZE);
 			if (sgid_index)
-				*sgid_index = i;
-			dev_put(gattr.ndev);
+				*sgid_index = attr->index;
+			rdma_put_gid_attr(attr);
 			return 0;
 		}
-		dev_put(gattr.ndev);
+		rdma_put_gid_attr(attr);
 	}
 	return -ENODEV;
 }
-- 
cgit v1.2.3


From f6069b9aa9934ede26f41ac0781fce241279ad43 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 17 Aug 2018 23:26:14 +0200
Subject: bpf: fix redirect to map under tail calls

Commits 109980b894e9 ("bpf: don't select potentially stale ri->map
from buggy xdp progs") and 7c3001313396 ("bpf: fix ri->map_owner
pointer on bpf_prog_realloc") tried to mitigate that buggy programs
using bpf_redirect_map() helper call do not leave stale maps behind.
Idea was to add a map_owner cookie into the per CPU struct redirect_info
which was set to prog->aux by the prog making the helper call as a
proof that the map is not stale since the prog is implicitly holding
a reference to it. This owner cookie could later on get compared with
the program calling into BPF whether they match and therefore the
redirect could proceed with processing the map safely.

In (obvious) hindsight, this approach breaks down when tail calls are
involved since the original caller's prog->aux pointer does not have
to match the one from one of the progs out of the tail call chain,
and therefore the xdp buffer will be dropped instead of redirected.
A way around that would be to fix the issue differently (which also
allows to remove related work in fast path at the same time): once
the life-time of a redirect map has come to its end we use it's map
free callback where we need to wait on synchronize_rcu() for current
outstanding xdp buffers and remove such a map pointer from the
redirect info if found to be present. At that time no program is
using this map anymore so we simply invalidate the map pointers to
NULL iff they previously pointed to that instance while making sure
that the redirect path only reads out the map once.

Fixes: 97f91a7cf04f ("bpf: add bpf_redirect_map helper routine")
Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs")
Reported-by: Sebastiano Miano <sebastiano.miano@polito.it>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h     |  3 +-
 include/trace/events/xdp.h |  5 ++--
 kernel/bpf/cpumap.c        |  2 ++
 kernel/bpf/devmap.c        |  1 +
 kernel/bpf/verifier.c      | 21 --------------
 kernel/bpf/xskmap.c        |  1 +
 net/core/filter.c          | 68 ++++++++++++++++++++--------------------------
 7 files changed, 38 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5d565c50bcb2..6791a0ac0139 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -543,7 +543,6 @@ struct bpf_redirect_info {
 	u32 flags;
 	struct bpf_map *map;
 	struct bpf_map *map_to_flush;
-	unsigned long   map_owner;
 	u32 kern_flags;
 };
 
@@ -781,6 +780,8 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
 
+void bpf_clear_redirect_map(struct bpf_map *map);
+
 static inline bool xdp_return_frame_no_direct(void)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 1ecf4c67fcf7..e95cb86b65cf 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -147,9 +147,8 @@ struct _bpf_dtab_netdev {
 
 #define devmap_ifindex(fwd, map)				\
 	(!fwd ? 0 :						\
-	 (!map ? 0 :						\
-	  ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
-	   ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
+	 ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
+	  ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))
 
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
 	 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),	\
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 620bc5024d7d..24aac0d0f412 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -479,6 +479,8 @@ static void cpu_map_free(struct bpf_map *map)
 	 * It does __not__ ensure pending flush operations (if any) are
 	 * complete.
 	 */
+
+	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* To ensure all pending flush operations have completed wait for flush
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index ac1df79f3788..141710b82a6c 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -161,6 +161,7 @@ static void dev_map_free(struct bpf_map *map)
 	list_del_rcu(&dtab->list);
 	spin_unlock(&dev_map_lock);
 
+	bpf_clear_redirect_map(map);
 	synchronize_rcu();
 
 	/* To ensure all pending flush operations have completed wait for flush
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ca90679a7fe5..92246117d2b0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5844,27 +5844,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			goto patch_call_imm;
 		}
 
-		if (insn->imm == BPF_FUNC_redirect_map) {
-			/* Note, we cannot use prog directly as imm as subsequent
-			 * rewrites would still change the prog pointer. The only
-			 * stable address we can use is aux, which also works with
-			 * prog clones during blinding.
-			 */
-			u64 addr = (unsigned long)prog->aux;
-			struct bpf_insn r4_ld[] = {
-				BPF_LD_IMM64(BPF_REG_4, addr),
-				*insn,
-			};
-			cnt = ARRAY_SIZE(r4_ld);
-
-			new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
-			if (!new_prog)
-				return -ENOMEM;
-
-			delta    += cnt - 1;
-			env->prog = prog = new_prog;
-			insn      = new_prog->insnsi + i + delta;
-		}
 patch_call_imm:
 		fn = env->ops->get_func_proto(insn->imm, env->prog);
 		/* all functions that have prototype and verifier allowed
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 4ddf61e158f6..9f8463afda9c 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -75,6 +75,7 @@ static void xsk_map_free(struct bpf_map *map)
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
 	int i;
 
+	bpf_clear_redirect_map(map);
 	synchronize_net();
 
 	for (i = 0; i < map->max_entries; i++) {
diff --git a/net/core/filter.c b/net/core/filter.c
index fd423ce3da34..c25eb36f1320 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3246,31 +3246,33 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
 	}
 }
 
-static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
-				   unsigned long aux)
+void bpf_clear_redirect_map(struct bpf_map *map)
 {
-	return (unsigned long)xdp_prog->aux != aux;
+	struct bpf_redirect_info *ri;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+		/* Avoid polluting remote cacheline due to writes if
+		 * not needed. Once we pass this test, we need the
+		 * cmpxchg() to make sure it hasn't been changed in
+		 * the meantime by remote CPU.
+		 */
+		if (unlikely(READ_ONCE(ri->map) == map))
+			cmpxchg(&ri->map, map, NULL);
+	}
 }
 
 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
-			       struct bpf_prog *xdp_prog)
+			       struct bpf_prog *xdp_prog, struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	unsigned long map_owner = ri->map_owner;
-	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
 	void *fwd = NULL;
 	int err;
 
 	ri->ifindex = 0;
-	ri->map = NULL;
-	ri->map_owner = 0;
-
-	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
-		err = -EFAULT;
-		map = NULL;
-		goto err;
-	}
+	WRITE_ONCE(ri->map, NULL);
 
 	fwd = __xdp_map_lookup_elem(map, index);
 	if (!fwd) {
@@ -3296,12 +3298,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 		    struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_map *map = READ_ONCE(ri->map);
 	struct net_device *fwd;
 	u32 index = ri->ifindex;
 	int err;
 
-	if (ri->map)
-		return xdp_do_redirect_map(dev, xdp, xdp_prog);
+	if (map)
+		return xdp_do_redirect_map(dev, xdp, xdp_prog, map);
 
 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
 	ri->ifindex = 0;
@@ -3325,24 +3328,17 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect);
 static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
-				       struct bpf_prog *xdp_prog)
+				       struct bpf_prog *xdp_prog,
+				       struct bpf_map *map)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
-	unsigned long map_owner = ri->map_owner;
-	struct bpf_map *map = ri->map;
 	u32 index = ri->ifindex;
 	void *fwd = NULL;
 	int err = 0;
 
 	ri->ifindex = 0;
-	ri->map = NULL;
-	ri->map_owner = 0;
+	WRITE_ONCE(ri->map, NULL);
 
-	if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
-		err = -EFAULT;
-		map = NULL;
-		goto err;
-	}
 	fwd = __xdp_map_lookup_elem(map, index);
 	if (unlikely(!fwd)) {
 		err = -EINVAL;
@@ -3379,13 +3375,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 			    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+	struct bpf_map *map = READ_ONCE(ri->map);
 	u32 index = ri->ifindex;
 	struct net_device *fwd;
 	int err = 0;
 
-	if (ri->map)
-		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog);
-
+	if (map)
+		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
+						   map);
 	ri->ifindex = 0;
 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
 	if (unlikely(!fwd)) {
@@ -3416,8 +3413,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
-	ri->map = NULL;
-	ri->map_owner = 0;
+	WRITE_ONCE(ri->map, NULL);
 
 	return XDP_REDIRECT;
 }
@@ -3430,8 +3426,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
-	   unsigned long, map_owner)
+BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
+	   u64, flags)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
@@ -3440,15 +3436,11 @@ BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags
 
 	ri->ifindex = ifindex;
 	ri->flags = flags;
-	ri->map = map;
-	ri->map_owner = map_owner;
+	WRITE_ONCE(ri->map, map);
 
 	return XDP_REDIRECT;
 }
 
-/* Note, arg4 is hidden from users and populated by the verifier
- * with the right pointer.
- */
 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
 	.func           = bpf_xdp_redirect_map,
 	.gpl_only       = false,
-- 
cgit v1.2.3


From e36488c83b6d871b35dd555afb2d434bd61687cf Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Aug 2018 15:43:36 -0700
Subject: bitfield: avoid gcc-8 -Wint-in-bool-context warning

Passing an enum into FIELD_GET() produces a long but harmless warning on
newer compilers:

                   from include/linux/linkage.h:7,
                   from include/linux/kernel.h:7,
                   from include/linux/skbuff.h:17,
                   from include/linux/if_ether.h:23,
                   from include/linux/etherdevice.h:25,
                   from drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:63:
  drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c: In function 'iwl_mvm_rx_mpdu_mq':
  include/linux/bitfield.h:56:20: error: enum constant in boolean context [-Werror=int-in-bool-context]
     BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \
                      ^
  ...
  include/linux/bitfield.h:103:3: note: in expansion of macro '__BF_FIELD_CHECK'
     __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
     ^~~~~~~~~~~~~~~~
  drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c:1025:21: note: in expansion of macro 'FIELD_GET'
      le16_encode_bits(FIELD_GET(IWL_RX_HE_PHY_SIBG_SYM_OR_USER_NUM_MASK,

The problem here is that the caller has no idea how the macro gets
expanding, leading to a false-positive.  It can be trivially avoided by
doing a comparison against zero.

This only recently started appearing as the iwlwifi driver was patched
to use FIELD_GET.

Link: http://lkml.kernel.org/r/20180813220950.194841-1-arnd@arndb.de
Fixes: 514c30696fbc ("iwlwifi: add support for IEEE802.11ax")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitfield.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index 65a6981eef7b..3f1ef4450a7c 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -53,7 +53,7 @@
 	({								\
 		BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask),		\
 				 _pfx "mask is not constant");		\
-		BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero");	\
+		BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero");	\
 		BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ?		\
 				 ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
 				 _pfx "value too large for the field"); \
-- 
cgit v1.2.3


From 4cdfffc8722e99be8d400d8fa1fcd615d078ad43 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 17 Aug 2018 15:44:37 -0700
Subject: vfs: discard ATTR_ATTR_FLAG

This flag was introduce in 2.1.37pre1 and the only place it was tested
was removed in 2.1.43pre1.  The flag was never set.

Let's discard it properly.

Link: http://lkml.kernel.org/r/877en0hewz.fsf@notabene.neil.brown.name
Signed-off-by: NeilBrown <neilb@suse.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hostfs/hostfs.h | 2 +-
 include/linux/fs.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index cb8374af08a6..33b8423ef0c9 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -19,7 +19,7 @@
 #define HOSTFS_ATTR_ATIME_SET	128
 #define HOSTFS_ATTR_MTIME_SET	256
 
-/* These two are unused by hostfs. */
+/* This one is unused by hostfs. */
 #define HOSTFS_ATTR_FORCE	512	/* Not a change, but a change it */
 #define HOSTFS_ATTR_ATTR_FLAG	1024
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ec33fd0423f..9d319f1f66f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -179,7 +179,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define ATTR_ATIME_SET	(1 << 7)
 #define ATTR_MTIME_SET	(1 << 8)
 #define ATTR_FORCE	(1 << 9) /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG	(1 << 10)
 #define ATTR_KILL_SUID	(1 << 11)
 #define ATTR_KILL_SGID	(1 << 12)
 #define ATTR_FILE	(1 << 13)
-- 
cgit v1.2.3


From b3a2369692fedcc439dfaa4f215cf5323a886c88 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 17 Aug 2018 15:45:12 -0700
Subject: include/linux/page_ext.h: drop definition of unused
 PAGE_EXT_DEBUG_POISON

After commit bd33ef368135 ("mm: enable page poisoning early at boot")
PAGE_EXT_DEBUG_POISON is not longer used.  Remove it.

Link: http://lkml.kernel.org/r/20180531135457.20167-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_ext.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index ca5461efae2f..bbec618a614b 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -16,18 +16,7 @@ struct page_ext_operations {
 
 #ifdef CONFIG_PAGE_EXTENSION
 
-/*
- * page_ext->flags bits:
- *
- * PAGE_EXT_DEBUG_POISON is set for poisoned pages. This is used to
- * implement generic debug pagealloc feature. The pages are filled with
- * poison patterns and set this flag after free_pages(). The poisoned
- * pages are verified whether the patterns are not corrupted and clear
- * the flag before alloc_pages().
- */
-
 enum page_ext_flags {
-	PAGE_EXT_DEBUG_POISON,		/* Page is poisoned */
 	PAGE_EXT_DEBUG_GUARD,
 	PAGE_EXT_OWNER,
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
-- 
cgit v1.2.3


From 10ed63415223b16d7e80ba528556500231814232 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 17 Aug 2018 15:45:15 -0700
Subject: mm/page_ext.c: constify lookup_page_ext() argument

lookup_page_ext() finds 'struct page_ext' for a given page.  It requires
only read access to the given struct page.

Current implemnentation takes 'struct page *' as an argument.  It makes
compiler complain when 'const struct page *' passed.

Change the argument to 'const struct page *'.

Link: http://lkml.kernel.org/r/20180531135457.20167-3-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_ext.h | 4 ++--
 mm/page_ext.c            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index bbec618a614b..f84f167ec04c 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -50,7 +50,7 @@ static inline void page_ext_init(void)
 }
 #endif
 
-struct page_ext *lookup_page_ext(struct page *page);
+struct page_ext *lookup_page_ext(const struct page *page);
 
 #else /* !CONFIG_PAGE_EXTENSION */
 struct page_ext;
@@ -59,7 +59,7 @@ static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
 {
 }
 
-static inline struct page_ext *lookup_page_ext(struct page *page)
+static inline struct page_ext *lookup_page_ext(const struct page *page)
 {
 	return NULL;
 }
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 5295ef331165..a9826da84ccb 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -120,7 +120,7 @@ void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
 	pgdat->node_page_ext = NULL;
 }
 
-struct page_ext *lookup_page_ext(struct page *page)
+struct page_ext *lookup_page_ext(const struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
 	unsigned long index;
@@ -195,7 +195,7 @@ fail:
 
 #else /* CONFIG_FLAT_NODE_MEM_MAP */
 
-struct page_ext *lookup_page_ext(struct page *page)
+struct page_ext *lookup_page_ext(const struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page);
 	struct mem_section *section = __pfn_to_section(pfn);
-- 
cgit v1.2.3


From 74c8164e1cdb1eb22f1d49d54e515e81821a8ad0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 17 Aug 2018 15:45:36 -0700
Subject: mpage: mpage_readpages() should submit IO as read-ahead

a_ops->readpages() is only ever used for read-ahead, yet we don't flag
the IO being submitted as such.  Fix that up.  Any file system that uses
mpage_readpages() as its ->readpages() implementation will now get this
right.

Since we're passing in whether the IO is read-ahead or not, we don't
need to pass in the 'gfp' separately, as it is dependent on the IO being
read-ahead.  Kill off that member.

Add some documentation notes on ->readpages() being purely for
read-ahead.

Link: http://lkml.kernel.org/r/20180621010725.17813-3-axboe@kernel.dk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Chris Mason <clm@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/f2fs/data.c     |  5 +++++
 fs/mpage.c         | 29 +++++++++++++++++++----------
 include/linux/fs.h |  4 ++++
 3 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8f931d699287..b7c9b58acf3e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1421,6 +1421,11 @@ out:
 /*
  * This function was originally taken from fs/mpage.c, and customized for f2fs.
  * Major change was from block_size == page_size in f2fs by default.
+ *
+ * Note that the aops->readpages() function is ONLY used for read-ahead. If
+ * this function ever deviates from doing just read-ahead, it should either
+ * use ->readpage() or do the necessary surgery to decouple ->readpages()
+ * readom read-ahead.
  */
 static int f2fs_mpage_readpages(struct address_space *mapping,
 			struct list_head *pages, struct page *page,
diff --git a/fs/mpage.c b/fs/mpage.c
index 6dc90e456abf..c820dc9bebab 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -137,11 +137,11 @@ struct mpage_readpage_args {
 	struct bio *bio;
 	struct page *page;
 	unsigned int nr_pages;
+	bool is_readahead;
 	sector_t last_block_in_bio;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block;
 	get_block_t *get_block;
-	gfp_t gfp;
 };
 
 /*
@@ -170,8 +170,18 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	struct block_device *bdev = NULL;
 	int length;
 	int fully_mapped = 1;
+	int op_flags;
 	unsigned nblocks;
 	unsigned relative_block;
+	gfp_t gfp;
+
+	if (args->is_readahead) {
+		op_flags = REQ_RAHEAD;
+		gfp = readahead_gfp_mask(page->mapping);
+	} else {
+		op_flags = 0;
+		gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
+	}
 
 	if (page_has_buffers(page))
 		goto confused;
@@ -284,7 +294,7 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
 	if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
-		args->bio = mpage_bio_submit(REQ_OP_READ, 0, args->bio);
+		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
 
 alloc_new:
 	if (args->bio == NULL) {
@@ -296,14 +306,14 @@ alloc_new:
 		args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 					min_t(int, args->nr_pages,
 					      BIO_MAX_PAGES),
-					args->gfp);
+					gfp);
 		if (args->bio == NULL)
 			goto confused;
 	}
 
 	length = first_hole << blkbits;
 	if (bio_add_page(args->bio, page, length, 0) < length) {
-		args->bio = mpage_bio_submit(REQ_OP_READ, 0, args->bio);
+		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
 		goto alloc_new;
 	}
 
@@ -311,7 +321,7 @@ alloc_new:
 	nblocks = map_bh->b_size >> blkbits;
 	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
 	    (first_hole != blocks_per_page))
-		args->bio = mpage_bio_submit(REQ_OP_READ, 0, args->bio);
+		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
 	else
 		args->last_block_in_bio = blocks[blocks_per_page - 1];
 out:
@@ -319,7 +329,7 @@ out:
 
 confused:
 	if (args->bio)
-		args->bio = mpage_bio_submit(REQ_OP_READ, 0, args->bio);
+		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
 	if (!PageUptodate(page))
 		block_read_full_page(page, args->get_block);
 	else
@@ -377,7 +387,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 {
 	struct mpage_readpage_args args = {
 		.get_block = get_block,
-		.gfp = readahead_gfp_mask(mapping),
+		.is_readahead = true,
 	};
 	unsigned page_idx;
 
@@ -388,7 +398,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 		list_del(&page->lru);
 		if (!add_to_page_cache_lru(page, mapping,
 					page->index,
-					args.gfp)) {
+					readahead_gfp_mask(mapping))) {
 			args.page = page;
 			args.nr_pages = nr_pages - page_idx;
 			args.bio = do_mpage_readpage(&args);
@@ -397,7 +407,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	}
 	BUG_ON(!list_empty(pages));
 	if (args.bio)
-		mpage_bio_submit(REQ_OP_READ, 0, args.bio);
+		mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
 	return 0;
 }
 EXPORT_SYMBOL(mpage_readpages);
@@ -411,7 +421,6 @@ int mpage_readpage(struct page *page, get_block_t get_block)
 		.page = page,
 		.nr_pages = 1,
 		.get_block = get_block,
-		.gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL),
 	};
 
 	args.bio = do_mpage_readpage(&args);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d319f1f66f6..a9242f336f02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -344,6 +344,10 @@ struct address_space_operations {
 	/* Set a page dirty.  Return true if this dirtied it */
 	int (*set_page_dirty)(struct page *page);
 
+	/*
+	 * Reads in the requested pages. Unlike ->readpage(), this is
+	 * PURELY used for read-ahead!.
+	 */
 	int (*readpages)(struct file *filp, struct address_space *mapping,
 			struct list_head *pages, unsigned nr_pages);
 
-- 
cgit v1.2.3


From c9f4cd71383576a916e7fca99c490fc92a289f5a Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 17 Aug 2018 15:45:49 -0700
Subject: mm, huge page: copy target sub-page last when copy huge page

Huge page helps to reduce TLB miss rate, but it has higher cache
footprint, sometimes this may cause some issue.  For example, when
copying huge page on x86_64 platform, the cache footprint is 4M.  But on
a Xeon E5 v3 2699 CPU, there are 18 cores, 36 threads, and only 45M LLC
(last level cache).  That is, in average, there are 2.5M LLC for each
core and 1.25M LLC for each thread.

If the cache contention is heavy when copying the huge page, and we copy
the huge page from the begin to the end, it is possible that the begin
of huge page is evicted from the cache after we finishing copying the
end of the huge page.  And it is possible for the application to access
the begin of the huge page after copying the huge page.

In c79b57e462b5d ("mm: hugetlb: clear target sub-page last when clearing
huge page"), to keep the cache lines of the target subpage hot, the
order to clear the subpages in the huge page in clear_huge_page() is
changed to clearing the subpage which is furthest from the target
subpage firstly, and the target subpage last.  The similar order
changing helps huge page copying too.  That is implemented in this
patch.  Because we have put the order algorithm into a separate
function, the implementation is quite simple.

The patch is a generic optimization which should benefit quite some
workloads, not for a specific use case.  To demonstrate the performance
benefit of the patch, we tested it with vm-scalability run on
transparent huge page.

With this patch, the throughput increases ~16.6% in vm-scalability
anon-cow-seq test case with 36 processes on a 2 socket Xeon E5 v3 2699
system (36 cores, 72 threads).  The test case set
/sys/kernel/mm/transparent_hugepage/enabled to be always, mmap() a big
anonymous memory area and populate it, then forked 36 child processes,
each writes to the anonymous memory area from the begin to the end, so
cause copy on write.  For each child process, other child processes
could be seen as other workloads which generate heavy cache pressure.
At the same time, the IPC (instruction per cycle) increased from 0.63 to
0.78, and the time spent in user space is reduced ~7.2%.

Link: http://lkml.kernel.org/r/20180524005851.4079-3-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Shaohua Li <shli@fb.com>
Cc: Christopher Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  3 ++-
 mm/huge_memory.c   |  3 ++-
 mm/memory.c        | 30 +++++++++++++++++++++++-------
 3 files changed, 27 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 68a5121694ef..2fb32d1561eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2752,7 +2752,8 @@ extern void clear_huge_page(struct page *page,
 			    unsigned long addr_hint,
 			    unsigned int pages_per_huge_page);
 extern void copy_user_huge_page(struct page *dst, struct page *src,
-				unsigned long addr, struct vm_area_struct *vma,
+				unsigned long addr_hint,
+				struct vm_area_struct *vma,
 				unsigned int pages_per_huge_page);
 extern long copy_huge_page_from_user(struct page *dst_page,
 				const void __user *usr_src,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 064a9d78879d..78427af91de9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1328,7 +1328,8 @@ alloc:
 	if (!page)
 		clear_huge_page(new_page, vmf->address, HPAGE_PMD_NR);
 	else
-		copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
+		copy_user_huge_page(new_page, page, vmf->address,
+				    vma, HPAGE_PMD_NR);
 	__SetPageUptodate(new_page);
 
 	mmun_start = haddr;
diff --git a/mm/memory.c b/mm/memory.c
index 65bb59e031c9..175f344e1523 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4705,11 +4705,31 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
 	}
 }
 
+struct copy_subpage_arg {
+	struct page *dst;
+	struct page *src;
+	struct vm_area_struct *vma;
+};
+
+static void copy_subpage(unsigned long addr, int idx, void *arg)
+{
+	struct copy_subpage_arg *copy_arg = arg;
+
+	copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx,
+			   addr, copy_arg->vma);
+}
+
 void copy_user_huge_page(struct page *dst, struct page *src,
-			 unsigned long addr, struct vm_area_struct *vma,
+			 unsigned long addr_hint, struct vm_area_struct *vma,
 			 unsigned int pages_per_huge_page)
 {
-	int i;
+	unsigned long addr = addr_hint &
+		~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
+	struct copy_subpage_arg arg = {
+		.dst = dst,
+		.src = src,
+		.vma = vma,
+	};
 
 	if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) {
 		copy_user_gigantic_page(dst, src, addr, vma,
@@ -4717,11 +4737,7 @@ void copy_user_huge_page(struct page *dst, struct page *src,
 		return;
 	}
 
-	might_sleep();
-	for (i = 0; i < pages_per_huge_page; i++) {
-		cond_resched();
-		copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
-	}
+	process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
 }
 
 long copy_huge_page_from_user(struct page *dst_page,
-- 
cgit v1.2.3


From 4fbce633910ed80b135b84160a22b219080c8082 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Fri, 17 Aug 2018 15:46:22 -0700
Subject: mm/memory_hotplug.c: make register_mem_sect_under_node() a callback
 of walk_memory_range()

link_mem_sections() and walk_memory_range() share most of the code, so
we can use convert link_mem_sections() into a dummy function that calls
walk_memory_range() with a callback to register_mem_sect_under_node().

This patch converts register_mem_sect_under_node() in order to match a
walk_memory_range's callback, getting rid of the check_nid argument and
checking instead if the system is still boothing, since we only have to
check for the nid if the system is in such state.

Link: http://lkml.kernel.org/r/20180622111839.10071-4-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Suggested-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Reza Arbab <arbab@linux.vnet.ibm.com>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c  | 44 ++++++--------------------------------------
 include/linux/node.h | 12 +++++++-----
 mm/memory_hotplug.c  |  5 +----
 3 files changed, 14 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index a5e821d09656..845d5523812b 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -399,10 +399,9 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 }
 
 /* register memory section under specified node if it spans that node */
-int register_mem_sect_under_node(struct memory_block *mem_blk, int nid,
-				 bool check_nid)
+int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
 {
-	int ret;
+	int ret, nid = *(int *)arg;
 	unsigned long pfn, sect_start_pfn, sect_end_pfn;
 
 	if (!mem_blk)
@@ -433,7 +432,7 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid,
 		 * case, during hotplug we know that all pages in the memory
 		 * block belong to the same node.
 		 */
-		if (check_nid) {
+		if (system_state == SYSTEM_BOOTING) {
 			page_nid = get_nid_for_pfn(pfn);
 			if (page_nid < 0)
 				continue;
@@ -490,41 +489,10 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
 	return 0;
 }
 
-int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages,
-		      bool check_nid)
+int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
 {
-	unsigned long end_pfn = start_pfn + nr_pages;
-	unsigned long pfn;
-	struct memory_block *mem_blk = NULL;
-	int err = 0;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
-		unsigned long section_nr = pfn_to_section_nr(pfn);
-		struct mem_section *mem_sect;
-		int ret;
-
-		if (!present_section_nr(section_nr))
-			continue;
-		mem_sect = __nr_to_section(section_nr);
-
-		/* same memblock ? */
-		if (mem_blk)
-			if ((section_nr >= mem_blk->start_section_nr) &&
-			    (section_nr <= mem_blk->end_section_nr))
-				continue;
-
-		mem_blk = find_memory_block_hinted(mem_sect, mem_blk);
-
-		ret = register_mem_sect_under_node(mem_blk, nid, check_nid);
-		if (!err)
-			err = ret;
-
-		/* discard ref obtained in find_memory_block() */
-	}
-
-	if (mem_blk)
-		kobject_put(&mem_blk->dev.kobj);
-	return err;
+	return walk_memory_range(start_pfn, end_pfn, (void *)&nid,
+					register_mem_sect_under_node);
 }
 
 #ifdef CONFIG_HUGETLBFS
diff --git a/include/linux/node.h b/include/linux/node.h
index 6d336e38d155..257bb3d6d014 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -33,10 +33,10 @@ typedef  void (*node_registration_func_t)(struct node *);
 
 #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
 extern int link_mem_sections(int nid, unsigned long start_pfn,
-			     unsigned long nr_pages, bool check_nid);
+			     unsigned long end_pfn);
 #else
 static inline int link_mem_sections(int nid, unsigned long start_pfn,
-				    unsigned long nr_pages, bool check_nid)
+				    unsigned long end_pfn)
 {
 	return 0;
 }
@@ -54,12 +54,14 @@ static inline int register_one_node(int nid)
 
 	if (node_online(nid)) {
 		struct pglist_data *pgdat = NODE_DATA(nid);
+		unsigned long start_pfn = pgdat->node_start_pfn;
+		unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
 
 		error = __register_one_node(nid);
 		if (error)
 			return error;
 		/* link memory sections under this node */
-		error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages, true);
+		error = link_mem_sections(nid, start_pfn, end_pfn);
 	}
 
 	return error;
@@ -69,7 +71,7 @@ extern void unregister_one_node(int nid);
 extern int register_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid);
 extern int register_mem_sect_under_node(struct memory_block *mem_blk,
-						int nid, bool check_nid);
+						void *arg);
 extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
 					   unsigned long phys_index);
 
@@ -99,7 +101,7 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 	return 0;
 }
 static inline int register_mem_sect_under_node(struct memory_block *mem_blk,
-							int nid, bool check_nid)
+							void *arg)
 {
 	return 0;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e2ed64b994e5..4eb6e824a80c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1123,7 +1123,6 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	u64 start, size;
 	bool new_node = false;
 	int ret;
-	unsigned long start_pfn, nr_pages;
 
 	start = res->start;
 	size = resource_size(res);
@@ -1164,9 +1163,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
 	}
 
 	/* link memory sections under this node.*/
-	start_pfn = start >> PAGE_SHIFT;
-	nr_pages = size >> PAGE_SHIFT;
-	ret = link_mem_sections(nid, start_pfn, nr_pages, false);
+	ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1));
 	BUG_ON(ret);
 
 	/* create new memmap entry */
-- 
cgit v1.2.3


From a3266bd49c721e2e0a71f352d83713fbd60caadb Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 17 Aug 2018 15:46:29 -0700
Subject: mm: provide a fallback for PAGE_KERNEL_RO for architectures

Some architectures do not define certain PAGE_KERNEL_* flags, this is
either because:

 a) The way to implement some of these flags is *not yet ported*, or
 b) The architecture *has no way* to describe them

Over time we have accumulated a few PAGE_KERNEL_* fallback workarounds
for architectures in the kernel which do not define them using
*relatively safe* equivalents.  Move these scattered fallback hacks into
asm-generic.

We start off with PAGE_KERNEL_RO using PAGE_KERNEL as a fallback.  This
has been in place on the firmware loader for years.  Move the fallback
into the respective asm-generic header.

Link: http://lkml.kernel.org/r/20180510185507.2439-2-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/firmware_loader/fallback.c |  5 -----
 include/asm-generic/pgtable.h           | 14 ++++++++++++++
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index 202324291542..b5c865fe263b 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -219,11 +219,6 @@ static ssize_t firmware_loading_show(struct device *dev,
 	return sprintf(buf, "%d\n", loading);
 }
 
-/* Some architectures don't have PAGE_KERNEL_RO */
-#ifndef PAGE_KERNEL_RO
-#define PAGE_KERNEL_RO PAGE_KERNEL
-#endif
-
 /* one pages buffer should be mapped/unmapped only once */
 static int map_fw_priv_pages(struct fw_priv *fw_priv)
 {
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index a75cb371cd19..62bbd6f23c35 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1095,6 +1095,20 @@ static inline bool arch_has_pfn_modify_check(void)
 }
 #endif /* !_HAVE_ARCH_PFN_MODIFY_ALLOWED */
 
+/*
+ * Architecture PAGE_KERNEL_* fallbacks
+ *
+ * Some architectures don't define certain PAGE_KERNEL_* flags. This is either
+ * because they really don't support them, or the port needs to be updated to
+ * reflect the required functionality. Below are a set of relatively safe
+ * fallbacks, as best effort, which we can count on in lieu of the architectures
+ * not defining them on their own yet.
+ */
+
+#ifndef PAGE_KERNEL_RO
+# define PAGE_KERNEL_RO PAGE_KERNEL
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
-- 
cgit v1.2.3


From 1a9b4b3d75679fbe8c3bb8fb7e957ea693b6a89c Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 17 Aug 2018 15:46:32 -0700
Subject: mm: provide a fallback for PAGE_KERNEL_EXEC for architectures

Some architectures just don't have PAGE_KERNEL_EXEC.  The mm/nommu.c and
mm/vmalloc.c code have been using PAGE_KERNEL as a fallback for years.
Move this fallback to asm-generic.

Link: http://lkml.kernel.org/r/20180510185507.2439-3-mcgrof@kernel.org
Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/pgtable.h | 4 ++++
 mm/nommu.c                    | 4 ----
 mm/vmalloc.c                  | 4 ----
 3 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 62bbd6f23c35..88ebc6102c7c 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1109,6 +1109,10 @@ static inline bool arch_has_pfn_modify_check(void)
 # define PAGE_KERNEL_RO PAGE_KERNEL
 #endif
 
+#ifndef PAGE_KERNEL_EXEC
+# define PAGE_KERNEL_EXEC PAGE_KERNEL
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
diff --git a/mm/nommu.c b/mm/nommu.c
index 9fc9e43335b6..e4aac33216ae 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -364,10 +364,6 @@ void *vzalloc_node(unsigned long size, int node)
 }
 EXPORT_SYMBOL(vzalloc_node);
 
-#ifndef PAGE_KERNEL_EXEC
-# define PAGE_KERNEL_EXEC PAGE_KERNEL
-#endif
-
 /**
  *	vmalloc_exec  -  allocate virtually contiguous, executable memory
  *	@size:		allocation size
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index cfea25be7754..a728fc492557 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1907,10 +1907,6 @@ void *vzalloc_node(unsigned long size, int node)
 }
 EXPORT_SYMBOL(vzalloc_node);
 
-#ifndef PAGE_KERNEL_EXEC
-# define PAGE_KERNEL_EXEC PAGE_KERNEL
-#endif
-
 /**
  *	vmalloc_exec  -  allocate virtually contiguous, executable memory
  *	@size:		allocation size
-- 
cgit v1.2.3


From dc0b58643aff8b378086f25cce6789ccba68cbcb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 17 Aug 2018 15:46:36 -0700
Subject: mm: introduce mem_cgroup_put() helper

Introduce the mem_cgroup_put() helper, which helps to eliminate guarding
memcg css release with "#ifdef CONFIG_MEMCG" in multiple places.

Link: http://lkml.kernel.org/r/20180623000600.5818-2-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 680d3395fc83..42f4719def32 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -378,6 +378,11 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
 }
 
+static inline void mem_cgroup_put(struct mem_cgroup *memcg)
+{
+	css_put(&memcg->css);
+}
+
 #define mem_cgroup_from_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
@@ -850,6 +855,10 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
 	return true;
 }
 
+static inline void mem_cgroup_put(struct mem_cgroup *memcg)
+{
+}
+
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
 		struct mem_cgroup *prev,
-- 
cgit v1.2.3


From d46eb14b735b11927d4bdc2d1854c311af19de6d Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Fri, 17 Aug 2018 15:46:39 -0700
Subject: fs: fsnotify: account fsnotify metadata to kmemcg

Patch series "Directed kmem charging", v8.

The Linux kernel's memory cgroup allows limiting the memory usage of the
jobs running on the system to provide isolation between the jobs.  All
the kernel memory allocated in the context of the job and marked with
__GFP_ACCOUNT will also be included in the memory usage and be limited
by the job's limit.

The kernel memory can only be charged to the memcg of the process in
whose context kernel memory was allocated.  However there are cases
where the allocated kernel memory should be charged to the memcg
different from the current processes's memcg.  This patch series
contains two such concrete use-cases i.e.  fsnotify and buffer_head.

The fsnotify event objects can consume a lot of system memory for large
or unlimited queues if there is either no or slow listener.  The events
are allocated in the context of the event producer.  However they should
be charged to the event consumer.  Similarly the buffer_head objects can
be allocated in a memcg different from the memcg of the page for which
buffer_head objects are being allocated.

To solve this issue, this patch series introduces mechanism to charge
kernel memory to a given memcg.  In case of fsnotify events, the memcg
of the consumer can be used for charging and for buffer_head, the memcg
of the page can be charged.  For directed charging, the caller can use
the scope API memalloc_[un]use_memcg() to specify the memcg to charge
for all the __GFP_ACCOUNT allocations within the scope.

This patch (of 2):

A lot of memory can be consumed by the events generated for the huge or
unlimited queues if there is either no or slow listener.  This can cause
system level memory pressure or OOMs.  So, it's better to account the
fsnotify kmem caches to the memcg of the listener.

However the listener can be in a different memcg than the memcg of the
producer and these allocations happen in the context of the event
producer.  This patch introduces remote memcg charging API which the
producer can use to charge the allocations to the memcg of the listener.

There are seven fsnotify kmem caches and among them allocations from
dnotify_struct_cache, dnotify_mark_cache, fanotify_mark_cache and
inotify_inode_mark_cachep happens in the context of syscall from the
listener.  So, SLAB_ACCOUNT is enough for these caches.

The objects from fsnotify_mark_connector_cachep are not accounted as
they are small compared to the notification mark or events and it is
unclear whom to account connector to since it is shared by all events
attached to the inode.

The allocations from the event caches happen in the context of the event
producer.  For such caches we will need to remote charge the allocations
to the listener's memcg.  Thus we save the memcg reference in the
fsnotify_group structure of the listener.

This patch has also moved the members of fsnotify_group to keep the size
same, at least for 64 bit build, even with additional member by filling
the holes.

[shakeelb@google.com: use GFP_KERNEL_ACCOUNT rather than open-coding it]
  Link: http://lkml.kernel.org/r/20180702215439.211597-1-shakeelb@google.com
Link: http://lkml.kernel.org/r/20180627191250.209150-2-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/dnotify/dnotify.c          |  5 +++--
 fs/notify/fanotify/fanotify.c        | 14 ++++++++++----
 fs/notify/fanotify/fanotify_user.c   |  5 ++++-
 fs/notify/group.c                    |  3 +++
 fs/notify/inotify/inotify_fsnotify.c |  7 ++++++-
 fs/notify/inotify/inotify_user.c     |  5 ++++-
 include/linux/fsnotify_backend.h     | 12 ++++++++----
 include/linux/memcontrol.h           | 10 +++++++++-
 include/linux/sched.h                |  3 +++
 include/linux/sched/mm.h             | 37 ++++++++++++++++++++++++++++++++++++
 kernel/fork.c                        |  3 +++
 mm/memcontrol.c                      | 37 ++++++++++++++++++++++++++++++++----
 12 files changed, 123 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e2bea2ac5dfb..a6365e6bc047 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -384,8 +384,9 @@ out_err:
 
 static int __init dnotify_init(void)
 {
-	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
-	dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
+	dnotify_struct_cache = KMEM_CACHE(dnotify_struct,
+					  SLAB_PANIC|SLAB_ACCOUNT);
+	dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC|SLAB_ACCOUNT);
 
 	dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
 	if (IS_ERR(dnotify_group))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f90842efea13..eb4e75175cfb 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/audit.h>
+#include <linux/sched/mm.h>
 
 #include "fanotify.h"
 
@@ -140,8 +141,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
 						 struct inode *inode, u32 mask,
 						 const struct path *path)
 {
-	struct fanotify_event_info *event;
-	gfp_t gfp = GFP_KERNEL;
+	struct fanotify_event_info *event = NULL;
+	gfp_t gfp = GFP_KERNEL_ACCOUNT;
 
 	/*
 	 * For queues with unlimited length lost events are not expected and
@@ -151,19 +152,22 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
 	if (group->max_events == UINT_MAX)
 		gfp |= __GFP_NOFAIL;
 
+	/* Whoever is interested in the event, pays for the allocation. */
+	memalloc_use_memcg(group->memcg);
+
 	if (fanotify_is_perm_event(mask)) {
 		struct fanotify_perm_event_info *pevent;
 
 		pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
 		if (!pevent)
-			return NULL;
+			goto out;
 		event = &pevent->fae;
 		pevent->response = 0;
 		goto init;
 	}
 	event = kmem_cache_alloc(fanotify_event_cachep, gfp);
 	if (!event)
-		return NULL;
+		goto out;
 init: __maybe_unused
 	fsnotify_init_event(&event->fse, inode, mask);
 	event->tgid = get_pid(task_tgid(current));
@@ -174,6 +178,8 @@ init: __maybe_unused
 		event->path.mnt = NULL;
 		event->path.dentry = NULL;
 	}
+out:
+	memalloc_unuse_memcg();
 	return event;
 }
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ec4d8c59d0e3..0cf45041dc32 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/compat.h>
 #include <linux/sched/signal.h>
+#include <linux/memcontrol.h>
 
 #include <asm/ioctls.h>
 
@@ -756,6 +757,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 
 	group->fanotify_data.user = user;
 	atomic_inc(&user->fanotify_listeners);
+	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
 	oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
 	if (unlikely(!oevent)) {
@@ -957,7 +959,8 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
+					 SLAB_PANIC|SLAB_ACCOUNT);
 	fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
 	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS)) {
 		fanotify_perm_event_cachep =
diff --git a/fs/notify/group.c b/fs/notify/group.c
index aa5468f23e45..c03b83662876 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -22,6 +22,7 @@
 #include <linux/srcu.h>
 #include <linux/rculist.h>
 #include <linux/wait.h>
+#include <linux/memcontrol.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
@@ -36,6 +37,8 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
 	if (group->ops->free_group_priv)
 		group->ops->free_group_priv(group);
 
+	mem_cgroup_put(group->memcg);
+
 	kfree(group);
 }
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 9ab6dde38a14..f4184b4f3815 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -31,6 +31,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/sched/user.h>
+#include <linux/sched/mm.h>
 
 #include "inotify.h"
 
@@ -98,7 +99,11 @@ int inotify_handle_event(struct fsnotify_group *group,
 	i_mark = container_of(inode_mark, struct inotify_inode_mark,
 			      fsn_mark);
 
-	event = kmalloc(alloc_len, GFP_KERNEL);
+	/* Whoever is interested in the event, pays for the allocation. */
+	memalloc_use_memcg(group->memcg);
+	event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
+	memalloc_unuse_memcg();
+
 	if (unlikely(!event)) {
 		/*
 		 * Treat lost event due to ENOMEM the same way as queue
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1cf5b779d862..749c46ababa0 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -38,6 +38,7 @@
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
+#include <linux/memcontrol.h>
 
 #include "inotify.h"
 #include "../fdinfo.h"
@@ -636,6 +637,7 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 	oevent->name_len = 0;
 
 	group->max_events = max_events;
+	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
 	spin_lock_init(&group->inotify_data.idr_lock);
 	idr_init(&group->inotify_data.idr);
@@ -808,7 +810,8 @@ static int __init inotify_user_setup(void)
 
 	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
 
-	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
+	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
+					       SLAB_PANIC|SLAB_ACCOUNT);
 
 	inotify_max_queued_events = 16384;
 	init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b38964a7a521..a0c4790c5302 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -84,6 +84,8 @@ struct fsnotify_event_private_data;
 struct fsnotify_fname;
 struct fsnotify_iter_info;
 
+struct mem_cgroup;
+
 /*
  * Each group much define these ops.  The fsnotify infrastructure will call
  * these operations for each relevant group.
@@ -127,6 +129,8 @@ struct fsnotify_event {
  * everything will be cleaned up.
  */
 struct fsnotify_group {
+	const struct fsnotify_ops *ops;	/* how this group handles things */
+
 	/*
 	 * How the refcnt is used is up to each group.  When the refcnt hits 0
 	 * fsnotify will clean up all of the resources associated with this group.
@@ -137,8 +141,6 @@ struct fsnotify_group {
 	 */
 	refcount_t refcnt;		/* things with interest in this group */
 
-	const struct fsnotify_ops *ops;	/* how this group handles things */
-
 	/* needed to send notification to userspace */
 	spinlock_t notification_lock;		/* protect the notification_list */
 	struct list_head notification_list;	/* list of event_holder this group needs to send to userspace */
@@ -160,6 +162,8 @@ struct fsnotify_group {
 	atomic_t num_marks;		/* 1 for each mark and 1 for not being
 					 * past the point of no return when freeing
 					 * a group */
+	atomic_t user_waits;		/* Number of tasks waiting for user
+					 * response */
 	struct list_head marks_list;	/* all inode marks for this group */
 
 	struct fasync_struct *fsn_fa;    /* async notification */
@@ -167,8 +171,8 @@ struct fsnotify_group {
 	struct fsnotify_event *overflow_event;	/* Event we queue when the
 						 * notification list is too
 						 * full */
-	atomic_t user_waits;		/* Number of tasks waiting for user
-					 * response */
+
+	struct mem_cgroup *memcg;	/* memcg to charge allocations */
 
 	/* groups can define private fields here or use the void *private */
 	union {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 42f4719def32..121e218d2a21 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -373,6 +373,8 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
 bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
+struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
+
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -380,7 +382,8 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
-	css_put(&memcg->css);
+	if (memcg)
+		css_put(&memcg->css);
 }
 
 #define mem_cgroup_from_counter(counter, member)	\
@@ -855,6 +858,11 @@ static inline bool task_in_mem_cgroup(struct task_struct *task,
 	return true;
 }
 
+static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
+{
+	return NULL;
+}
+
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 95a5018c338e..1827f4a7a6de 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1152,6 +1152,9 @@ struct task_struct {
 
 	/* Number of pages to reclaim on returning to userland: */
 	unsigned int			memcg_nr_pages_over_high;
+
+	/* Used by memcontrol for targeted memcg charge: */
+	struct mem_cgroup		*active_memcg;
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44d356f5e47c..aebb370a0006 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -248,6 +248,43 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC) | flags;
 }
 
+#ifdef CONFIG_MEMCG
+/**
+ * memalloc_use_memcg - Starts the remote memcg charging scope.
+ * @memcg: memcg to charge.
+ *
+ * This function marks the beginning of the remote memcg charging scope. All the
+ * __GFP_ACCOUNT allocations till the end of the scope will be charged to the
+ * given memcg.
+ *
+ * NOTE: This function is not nesting safe.
+ */
+static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+{
+	WARN_ON_ONCE(current->active_memcg);
+	current->active_memcg = memcg;
+}
+
+/**
+ * memalloc_unuse_memcg - Ends the remote memcg charging scope.
+ *
+ * This function marks the end of the remote memcg charging scope started by
+ * memalloc_use_memcg().
+ */
+static inline void memalloc_unuse_memcg(void)
+{
+	current->active_memcg = NULL;
+}
+#else
+static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+{
+}
+
+static inline void memalloc_unuse_memcg(void)
+{
+}
+#endif
+
 #ifdef CONFIG_MEMBARRIER
 enum {
 	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY		= (1U << 0),
diff --git a/kernel/fork.c b/kernel/fork.c
index 33112315b5c0..5ee74c113381 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -871,6 +871,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->use_memdelay = 0;
 #endif
 
+#ifdef CONFIG_MEMCG
+	tsk->active_memcg = NULL;
+#endif
 	return tsk;
 
 free_stack:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b836e7f00309..bf9cf738c836 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -678,9 +678,20 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
 }
 EXPORT_SYMBOL(mem_cgroup_from_task);
 
-static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
+/**
+ * get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg.
+ * @mm: mm from which memcg should be extracted. It can be NULL.
+ *
+ * Obtain a reference on mm->memcg and returns it if successful. Otherwise
+ * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is
+ * returned.
+ */
+struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 {
-	struct mem_cgroup *memcg = NULL;
+	struct mem_cgroup *memcg;
+
+	if (mem_cgroup_disabled())
+		return NULL;
 
 	rcu_read_lock();
 	do {
@@ -700,6 +711,24 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 	rcu_read_unlock();
 	return memcg;
 }
+EXPORT_SYMBOL(get_mem_cgroup_from_mm);
+
+/**
+ * If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
+ */
+static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+	if (unlikely(current->active_memcg)) {
+		struct mem_cgroup *memcg = root_mem_cgroup;
+
+		rcu_read_lock();
+		if (css_tryget_online(&current->active_memcg->css))
+			memcg = current->active_memcg;
+		rcu_read_unlock();
+		return memcg;
+	}
+	return get_mem_cgroup_from_mm(current->mm);
+}
 
 /**
  * mem_cgroup_iter - iterate over memory cgroup hierarchy
@@ -2261,7 +2290,7 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
 	if (current->memcg_kmem_skip_account)
 		return cachep;
 
-	memcg = get_mem_cgroup_from_mm(current->mm);
+	memcg = get_mem_cgroup_from_current();
 	kmemcg_id = READ_ONCE(memcg->kmemcg_id);
 	if (kmemcg_id < 0)
 		goto out;
@@ -2345,7 +2374,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 	if (memcg_kmem_bypass())
 		return 0;
 
-	memcg = get_mem_cgroup_from_mm(current->mm);
+	memcg = get_mem_cgroup_from_current();
 	if (!mem_cgroup_is_root(memcg)) {
 		ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
 		if (!ret)
-- 
cgit v1.2.3


From f745c6f5fe75734f3b35d9d4e6ebe2a7d010ddda Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Fri, 17 Aug 2018 15:46:44 -0700
Subject: fs, mm: account buffer_head to kmemcg

The buffer_head can consume a significant amount of system memory and is
directly related to the amount of page cache.  In our production
environment we have observed that a lot of machines are spending a
significant amount of memory as buffer_head and can not be left as
system memory overhead.

Charging buffer_head is not as simple as adding __GFP_ACCOUNT to the
allocation.  The buffer_heads can be allocated in a memcg different from
the memcg of the page for which buffer_heads are being allocated.  One
concrete example is memory reclaim.  The reclaim can trigger I/O of
pages of any memcg on the system.  So, the right way to charge
buffer_head is to extract the memcg from the page for which buffer_heads
are being allocated and then use targeted memcg charging API.

[shakeelb@google.com: use __GFP_ACCOUNT for directed memcg charging]
  Link: http://lkml.kernel.org/r/20180702220208.213380-1-shakeelb@google.com
Link: http://lkml.kernel.org/r/20180627191250.209150-3-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                | 12 ++++++++++--
 include/linux/memcontrol.h |  7 +++++++
 mm/memcontrol.c            | 22 ++++++++++++++++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index c8c2b7d8b8d6..4cc679d5bf58 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -45,6 +45,7 @@
 #include <linux/mpage.h>
 #include <linux/bit_spinlock.h>
 #include <linux/pagevec.h>
+#include <linux/sched/mm.h>
 #include <trace/events/block.h>
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -813,12 +814,16 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
 		bool retry)
 {
 	struct buffer_head *bh, *head;
-	gfp_t gfp = GFP_NOFS;
+	gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
 	long offset;
+	struct mem_cgroup *memcg;
 
 	if (retry)
 		gfp |= __GFP_NOFAIL;
 
+	memcg = get_mem_cgroup_from_page(page);
+	memalloc_use_memcg(memcg);
+
 	head = NULL;
 	offset = PAGE_SIZE;
 	while ((offset -= size) >= 0) {
@@ -835,6 +840,9 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
 		/* Link the buffer to its page */
 		set_bh_page(bh, page, offset);
 	}
+out:
+	memalloc_unuse_memcg();
+	mem_cgroup_put(memcg);
 	return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -848,7 +856,7 @@ no_grow:
 		} while (head);
 	}
 
-	return NULL;
+	goto out;
 }
 EXPORT_SYMBOL_GPL(alloc_page_buffers);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 121e218d2a21..50e3e807b427 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -375,6 +375,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
 
+struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
+
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 	return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -863,6 +865,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 	return NULL;
 }
 
+static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
+{
+	return NULL;
+}
+
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index bf9cf738c836..c071af193986 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -713,6 +713,28 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(get_mem_cgroup_from_mm);
 
+/**
+ * get_mem_cgroup_from_page: Obtain a reference on given page's memcg.
+ * @page: page from which memcg should be extracted.
+ *
+ * Obtain a reference on page->memcg and returns it if successful. Otherwise
+ * root_mem_cgroup is returned.
+ */
+struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
+{
+	struct mem_cgroup *memcg = page->mem_cgroup;
+
+	if (mem_cgroup_disabled())
+		return NULL;
+
+	rcu_read_lock();
+	if (!memcg || !css_tryget_online(&memcg->css))
+		memcg = root_mem_cgroup;
+	rcu_read_unlock();
+	return memcg;
+}
+EXPORT_SYMBOL(get_mem_cgroup_from_page);
+
 /**
  * If current->active_memcg is non-NULL, do not fallback to current->mm->memcg.
  */
-- 
cgit v1.2.3


From 0207df4fa1a869281ddbf72db6203dbf036b3e1a Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:04 -0700
Subject: kernel/memremap, kasan: make ZONE_DEVICE with work with KASAN

KASAN learns about hotadded memory via the memory hotplug notifier.
devm_memremap_pages() intentionally skips calling memory hotplug
notifiers.  So KASAN doesn't know anything about new memory added by
devm_memremap_pages().  This causes a crash when KASAN tries to access
non-existent shadow memory:

 BUG: unable to handle kernel paging request at ffffed0078000000
 RIP: 0010:check_memory_region+0x82/0x1e0
 Call Trace:
  memcpy+0x1f/0x50
  pmem_do_bvec+0x163/0x720
  pmem_make_request+0x305/0xac0
  generic_make_request+0x54f/0xcf0
  submit_bio+0x9c/0x370
  submit_bh_wbc+0x4c7/0x700
  block_read_full_page+0x5ef/0x870
  do_read_cache_page+0x2b8/0xb30
  read_dev_sector+0xbd/0x3f0
  read_lba.isra.0+0x277/0x670
  efi_partition+0x41a/0x18f0
  check_partition+0x30d/0x5e9
  rescan_partitions+0x18c/0x840
  __blkdev_get+0x859/0x1060
  blkdev_get+0x23f/0x810
  __device_add_disk+0x9c8/0xde0
  pmem_attach_disk+0x9a8/0xf50
  nvdimm_bus_probe+0xf3/0x3c0
  driver_probe_device+0x493/0xbd0
  bus_for_each_drv+0x118/0x1b0
  __device_attach+0x1cd/0x2b0
  bus_probe_device+0x1ac/0x260
  device_add+0x90d/0x1380
  nd_async_device_register+0xe/0x50
  async_run_entry_fn+0xc3/0x5d0
  process_one_work+0xa0a/0x1810
  worker_thread+0x87/0xe80
  kthread+0x2d7/0x390
  ret_from_fork+0x3a/0x50

Add kasan_add_zero_shadow()/kasan_remove_zero_shadow() - post mm_init()
interface to map/unmap kasan_zero_page at requested virtual addresses.
And use it to add/remove the shadow memory for hotplugged/unplugged
device memory.

Link: http://lkml.kernel.org/r/20180629164932.740-1-aryabinin@virtuozzo.com
Fixes: 41e94a851304 ("add devm_memremap_pages")
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h |  13 ++-
 kernel/memremap.c     |  10 ++
 mm/kasan/kasan_init.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 325 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index de784fd11d12..46aae129917c 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -20,7 +20,7 @@ extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
 extern pud_t kasan_zero_pud[PTRS_PER_PUD];
 extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];
 
-void kasan_populate_zero_shadow(const void *shadow_start,
+int kasan_populate_zero_shadow(const void *shadow_start,
 				const void *shadow_end);
 
 static inline void *kasan_mem_to_shadow(const void *addr)
@@ -71,6 +71,9 @@ struct kasan_cache {
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
+int kasan_add_zero_shadow(void *start, unsigned long size);
+void kasan_remove_zero_shadow(void *start, unsigned long size);
+
 size_t ksize(const void *);
 static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
 size_t kasan_metadata_size(struct kmem_cache *cache);
@@ -124,6 +127,14 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
 static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
+static inline int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+	return 0;
+}
+static inline void kasan_remove_zero_shadow(void *start,
+					unsigned long size)
+{}
+
 static inline void kasan_unpoison_slab(const void *ptr) { }
 static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
 
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 38283363da06..1f87ea6b6545 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -5,6 +5,7 @@
 #include <linux/types.h>
 #include <linux/pfn_t.h>
 #include <linux/io.h>
+#include <linux/kasan.h>
 #include <linux/mm.h>
 #include <linux/memory_hotplug.h>
 #include <linux/swap.h>
@@ -137,6 +138,7 @@ static void devm_memremap_pages_release(void *data)
 	mem_hotplug_begin();
 	arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
 			&pgmap->altmap : NULL);
+	kasan_remove_zero_shadow(__va(align_start), align_size);
 	mem_hotplug_done();
 
 	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
@@ -239,6 +241,12 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 		goto err_pfn_remap;
 
 	mem_hotplug_begin();
+	error = kasan_add_zero_shadow(__va(align_start), align_size);
+	if (error) {
+		mem_hotplug_done();
+		goto err_kasan;
+	}
+
 	error = arch_add_memory(nid, align_start, align_size, altmap, false);
 	if (!error)
 		move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
@@ -267,6 +275,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	return __va(res->start);
 
  err_add_memory:
+	kasan_remove_zero_shadow(__va(align_start), align_size);
+ err_kasan:
 	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
  err_pfn_remap:
  err_radix:
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index f436246ccc79..7a2a2f13f86f 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -17,10 +17,13 @@
 #include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
+#include <linux/slab.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 
+#include "kasan.h"
+
 /*
  * This page serves two purposes:
  *   - It used as early shadow memory. The entire shadow region populated
@@ -32,22 +35,59 @@ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
 
 #if CONFIG_PGTABLE_LEVELS > 4
 p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
+static inline bool kasan_p4d_table(pgd_t pgd)
+{
+	return pgd_page(pgd) == virt_to_page(lm_alias(kasan_zero_p4d));
+}
+#else
+static inline bool kasan_p4d_table(pgd_t pgd)
+{
+	return 0;
+}
 #endif
 #if CONFIG_PGTABLE_LEVELS > 3
 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+	return p4d_page(p4d) == virt_to_page(lm_alias(kasan_zero_pud));
+}
+#else
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+	return 0;
+}
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
 pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss;
+static inline bool kasan_pmd_table(pud_t pud)
+{
+	return pud_page(pud) == virt_to_page(lm_alias(kasan_zero_pmd));
+}
+#else
+static inline bool kasan_pmd_table(pud_t pud)
+{
+	return 0;
+}
 #endif
 pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss;
 
+static inline bool kasan_pte_table(pmd_t pmd)
+{
+	return pmd_page(pmd) == virt_to_page(lm_alias(kasan_zero_pte));
+}
+
+static inline bool kasan_zero_page_entry(pte_t pte)
+{
+	return pte_page(pte) == virt_to_page(lm_alias(kasan_zero_page));
+}
+
 static __init void *early_alloc(size_t size, int node)
 {
 	return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
 					BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
-static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __ref zero_pte_populate(pmd_t *pmd, unsigned long addr,
 				unsigned long end)
 {
 	pte_t *pte = pte_offset_kernel(pmd, addr);
@@ -63,7 +103,7 @@ static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
 	}
 }
 
-static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
+static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 				unsigned long end)
 {
 	pmd_t *pmd = pmd_offset(pud, addr);
@@ -78,14 +118,24 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
 		}
 
 		if (pmd_none(*pmd)) {
-			pmd_populate_kernel(&init_mm, pmd,
-					early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			pte_t *p;
+
+			if (slab_is_available())
+				p = pte_alloc_one_kernel(&init_mm, addr);
+			else
+				p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+			if (!p)
+				return -ENOMEM;
+
+			pmd_populate_kernel(&init_mm, pmd, p);
 		}
 		zero_pte_populate(pmd, addr, next);
 	} while (pmd++, addr = next, addr != end);
+
+	return 0;
 }
 
-static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
+static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
 				unsigned long end)
 {
 	pud_t *pud = pud_offset(p4d, addr);
@@ -103,14 +153,24 @@ static void __init zero_pud_populate(p4d_t *p4d, unsigned long addr,
 		}
 
 		if (pud_none(*pud)) {
-			pud_populate(&init_mm, pud,
-				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			pmd_t *p;
+
+			if (slab_is_available()) {
+				p = pmd_alloc(&init_mm, pud, addr);
+				if (!p)
+					return -ENOMEM;
+			} else {
+				pud_populate(&init_mm, pud,
+					early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			}
 		}
 		zero_pmd_populate(pud, addr, next);
 	} while (pud++, addr = next, addr != end);
+
+	return 0;
 }
 
-static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
+static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 				unsigned long end)
 {
 	p4d_t *p4d = p4d_offset(pgd, addr);
@@ -132,11 +192,21 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 		}
 
 		if (p4d_none(*p4d)) {
-			p4d_populate(&init_mm, p4d,
-				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			pud_t *p;
+
+			if (slab_is_available()) {
+				p = pud_alloc(&init_mm, p4d, addr);
+				if (!p)
+					return -ENOMEM;
+			} else {
+				p4d_populate(&init_mm, p4d,
+					early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			}
 		}
 		zero_pud_populate(p4d, addr, next);
 	} while (p4d++, addr = next, addr != end);
+
+	return 0;
 }
 
 /**
@@ -145,7 +215,7 @@ static void __init zero_p4d_populate(pgd_t *pgd, unsigned long addr,
  * @shadow_start - start of the memory range to populate
  * @shadow_end   - end of the memory range to populate
  */
-void __init kasan_populate_zero_shadow(const void *shadow_start,
+int __ref kasan_populate_zero_shadow(const void *shadow_start,
 				const void *shadow_end)
 {
 	unsigned long addr = (unsigned long)shadow_start;
@@ -191,9 +261,229 @@ void __init kasan_populate_zero_shadow(const void *shadow_start,
 		}
 
 		if (pgd_none(*pgd)) {
-			pgd_populate(&init_mm, pgd,
-				early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			p4d_t *p;
+
+			if (slab_is_available()) {
+				p = p4d_alloc(&init_mm, pgd, addr);
+				if (!p)
+					return -ENOMEM;
+			} else {
+				pgd_populate(&init_mm, pgd,
+					early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+			}
 		}
 		zero_p4d_populate(pgd, addr, next);
 	} while (pgd++, addr = next, addr != end);
+
+	return 0;
+}
+
+static void kasan_free_pte(pte_t *pte_start, pmd_t *pmd)
+{
+	pte_t *pte;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pte = pte_start + i;
+		if (!pte_none(*pte))
+			return;
+	}
+
+	pte_free_kernel(&init_mm, (pte_t *)page_to_virt(pmd_page(*pmd)));
+	pmd_clear(pmd);
+}
+
+static void kasan_free_pmd(pmd_t *pmd_start, pud_t *pud)
+{
+	pmd_t *pmd;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		pmd = pmd_start + i;
+		if (!pmd_none(*pmd))
+			return;
+	}
+
+	pmd_free(&init_mm, (pmd_t *)page_to_virt(pud_page(*pud)));
+	pud_clear(pud);
+}
+
+static void kasan_free_pud(pud_t *pud_start, p4d_t *p4d)
+{
+	pud_t *pud;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		pud = pud_start + i;
+		if (!pud_none(*pud))
+			return;
+	}
+
+	pud_free(&init_mm, (pud_t *)page_to_virt(p4d_page(*p4d)));
+	p4d_clear(p4d);
+}
+
+static void kasan_free_p4d(p4d_t *p4d_start, pgd_t *pgd)
+{
+	p4d_t *p4d;
+	int i;
+
+	for (i = 0; i < PTRS_PER_P4D; i++) {
+		p4d = p4d_start + i;
+		if (!p4d_none(*p4d))
+			return;
+	}
+
+	p4d_free(&init_mm, (p4d_t *)page_to_virt(pgd_page(*pgd)));
+	pgd_clear(pgd);
+}
+
+static void kasan_remove_pte_table(pte_t *pte, unsigned long addr,
+				unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next, pte++) {
+		next = (addr + PAGE_SIZE) & PAGE_MASK;
+		if (next > end)
+			next = end;
+
+		if (!pte_present(*pte))
+			continue;
+
+		if (WARN_ON(!kasan_zero_page_entry(*pte)))
+			continue;
+		pte_clear(&init_mm, addr, pte);
+	}
+}
+
+static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr,
+				unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next, pmd++) {
+		pte_t *pte;
+
+		next = pmd_addr_end(addr, end);
+
+		if (!pmd_present(*pmd))
+			continue;
+
+		if (kasan_pte_table(*pmd)) {
+			if (IS_ALIGNED(addr, PMD_SIZE) &&
+			    IS_ALIGNED(next, PMD_SIZE))
+				pmd_clear(pmd);
+			continue;
+		}
+		pte = pte_offset_kernel(pmd, addr);
+		kasan_remove_pte_table(pte, addr, next);
+		kasan_free_pte(pte_offset_kernel(pmd, 0), pmd);
+	}
+}
+
+static void kasan_remove_pud_table(pud_t *pud, unsigned long addr,
+				unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next, pud++) {
+		pmd_t *pmd, *pmd_base;
+
+		next = pud_addr_end(addr, end);
+
+		if (!pud_present(*pud))
+			continue;
+
+		if (kasan_pmd_table(*pud)) {
+			if (IS_ALIGNED(addr, PUD_SIZE) &&
+			    IS_ALIGNED(next, PUD_SIZE))
+				pud_clear(pud);
+			continue;
+		}
+		pmd = pmd_offset(pud, addr);
+		pmd_base = pmd_offset(pud, 0);
+		kasan_remove_pmd_table(pmd, addr, next);
+		kasan_free_pmd(pmd_base, pud);
+	}
+}
+
+static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr,
+				unsigned long end)
+{
+	unsigned long next;
+
+	for (; addr < end; addr = next, p4d++) {
+		pud_t *pud;
+
+		next = p4d_addr_end(addr, end);
+
+		if (!p4d_present(*p4d))
+			continue;
+
+		if (kasan_pud_table(*p4d)) {
+			if (IS_ALIGNED(addr, P4D_SIZE) &&
+			    IS_ALIGNED(next, P4D_SIZE))
+				p4d_clear(p4d);
+			continue;
+		}
+		pud = pud_offset(p4d, addr);
+		kasan_remove_pud_table(pud, addr, next);
+		kasan_free_pud(pud_offset(p4d, 0), p4d);
+	}
+}
+
+void kasan_remove_zero_shadow(void *start, unsigned long size)
+{
+	unsigned long addr, end, next;
+	pgd_t *pgd;
+
+	addr = (unsigned long)kasan_mem_to_shadow(start);
+	end = addr + (size >> KASAN_SHADOW_SCALE_SHIFT);
+
+	if (WARN_ON((unsigned long)start %
+			(KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
+	    WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+		return;
+
+	for (; addr < end; addr = next) {
+		p4d_t *p4d;
+
+		next = pgd_addr_end(addr, end);
+
+		pgd = pgd_offset_k(addr);
+		if (!pgd_present(*pgd))
+			continue;
+
+		if (kasan_p4d_table(*pgd)) {
+			if (IS_ALIGNED(addr, PGDIR_SIZE) &&
+			    IS_ALIGNED(next, PGDIR_SIZE))
+				pgd_clear(pgd);
+			continue;
+		}
+
+		p4d = p4d_offset(pgd, addr);
+		kasan_remove_p4d_table(p4d, addr, next);
+		kasan_free_p4d(p4d_offset(pgd, 0), pgd);
+	}
+}
+
+int kasan_add_zero_shadow(void *start, unsigned long size)
+{
+	int ret;
+	void *shadow_start, *shadow_end;
+
+	shadow_start = kasan_mem_to_shadow(start);
+	shadow_end = shadow_start + (size >> KASAN_SHADOW_SCALE_SHIFT);
+
+	if (WARN_ON((unsigned long)start %
+			(KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)) ||
+	    WARN_ON(size % (KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE)))
+		return -EINVAL;
+
+	ret = kasan_populate_zero_shadow(shadow_start, shadow_end);
+	if (ret)
+		kasan_remove_zero_shadow(shadow_start,
+					size >> KASAN_SHADOW_SCALE_SHIFT);
+	return ret;
 }
-- 
cgit v1.2.3


From 29ef680ae7c21110af8e6416d84d8a72fc147b14 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 17 Aug 2018 15:47:11 -0700
Subject: memcg, oom: move out_of_memory back to the charge path

Commit 3812c8c8f395 ("mm: memcg: do not trap chargers with full
callstack on OOM") has changed the ENOMEM semantic of memcg charges.
Rather than invoking the oom killer from the charging context it delays
the oom killer to the page fault path (pagefault_out_of_memory).  This
in turn means that many users (e.g.  slab or g-u-p) will get ENOMEM when
the corresponding memcg hits the hard limit and the memcg is is OOM.
This is behavior is inconsistent with !memcg case where the oom killer
is invoked from the allocation context and the allocator keeps retrying
until it succeeds.

The difference in the behavior is user visible.  mmap(MAP_POPULATE)
might result in not fully populated ranges while the mmap return code
doesn't tell that to the userspace.  Random syscalls might fail with
ENOMEM etc.

The primary motivation of the different memcg oom semantic was the
deadlock avoidance.  Things have changed since then, though.  We have an
async oom teardown by the oom reaper now and so we do not have to rely
on the victim to tear down its memory anymore.  Therefore we can return
to the original semantic as long as the memcg oom killer is not handed
over to the users space.

There is still one thing to be careful about here though.  If the oom
killer is not able to make any forward progress - e.g.  because there is
no eligible task to kill - then we have to bail out of the charge path
to prevent from same class of deadlocks.  We have basically two options
here.  Either we fail the charge with ENOMEM or force the charge and
allow overcharge.  The first option has been considered more harmful
than useful because rare inconsistencies in the ENOMEM behavior is hard
to test for and error prone.  Basically the same reason why the page
allocator doesn't fail allocations under such conditions.  The later
might allow runaways but those should be really unlikely unless somebody
misconfigures the system.  E.g.  allowing to migrate tasks away from the
memcg to a different unlimited memcg with move_charge_at_immigrate
disabled.

Link: http://lkml.kernel.org/r/20180628151101.25307-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 16 +++++-----
 include/linux/sched.h      |  2 +-
 mm/memcontrol.c            | 75 ++++++++++++++++++++++++++++++++++++----------
 mm/memory.c                |  4 +--
 4 files changed, 71 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 50e3e807b427..57a202f31683 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -507,16 +507,16 @@ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
 void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 				struct task_struct *p);
 
-static inline void mem_cgroup_oom_enable(void)
+static inline void mem_cgroup_enter_user_fault(void)
 {
-	WARN_ON(current->memcg_may_oom);
-	current->memcg_may_oom = 1;
+	WARN_ON(current->in_user_fault);
+	current->in_user_fault = 1;
 }
 
-static inline void mem_cgroup_oom_disable(void)
+static inline void mem_cgroup_exit_user_fault(void)
 {
-	WARN_ON(!current->memcg_may_oom);
-	current->memcg_may_oom = 0;
+	WARN_ON(!current->in_user_fault);
+	current->in_user_fault = 0;
 }
 
 static inline bool task_in_memcg_oom(struct task_struct *p)
@@ -961,11 +961,11 @@ static inline void mem_cgroup_handle_over_high(void)
 {
 }
 
-static inline void mem_cgroup_oom_enable(void)
+static inline void mem_cgroup_enter_user_fault(void)
 {
 }
 
-static inline void mem_cgroup_oom_disable(void)
+static inline void mem_cgroup_exit_user_fault(void)
 {
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1827f4a7a6de..066a2c328653 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -722,7 +722,7 @@ struct task_struct {
 	unsigned			restore_sigmask:1;
 #endif
 #ifdef CONFIG_MEMCG
-	unsigned			memcg_may_oom:1;
+	unsigned			in_user_fault:1;
 #ifndef CONFIG_SLOB
 	unsigned			memcg_kmem_skip_account:1;
 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c071af193986..d6724bed57d8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1534,28 +1534,53 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 		__wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
 }
 
-static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
+enum oom_status {
+	OOM_SUCCESS,
+	OOM_FAILED,
+	OOM_ASYNC,
+	OOM_SKIPPED
+};
+
+static enum oom_status mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-	if (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)
-		return;
+	if (order > PAGE_ALLOC_COSTLY_ORDER)
+		return OOM_SKIPPED;
+
 	/*
 	 * We are in the middle of the charge context here, so we
 	 * don't want to block when potentially sitting on a callstack
 	 * that holds all kinds of filesystem and mm locks.
 	 *
-	 * Also, the caller may handle a failed allocation gracefully
-	 * (like optional page cache readahead) and so an OOM killer
-	 * invocation might not even be necessary.
+	 * cgroup1 allows disabling the OOM killer and waiting for outside
+	 * handling until the charge can succeed; remember the context and put
+	 * the task to sleep at the end of the page fault when all locks are
+	 * released.
+	 *
+	 * On the other hand, in-kernel OOM killer allows for an async victim
+	 * memory reclaim (oom_reaper) and that means that we are not solely
+	 * relying on the oom victim to make a forward progress and we can
+	 * invoke the oom killer here.
 	 *
-	 * That's why we don't do anything here except remember the
-	 * OOM context and then deal with it at the end of the page
-	 * fault when the stack is unwound, the locks are released,
-	 * and when we know whether the fault was overall successful.
+	 * Please note that mem_cgroup_out_of_memory might fail to find a
+	 * victim and then we have to bail out from the charge path.
 	 */
-	css_get(&memcg->css);
-	current->memcg_in_oom = memcg;
-	current->memcg_oom_gfp_mask = mask;
-	current->memcg_oom_order = order;
+	if (memcg->oom_kill_disable) {
+		if (!current->in_user_fault)
+			return OOM_SKIPPED;
+		css_get(&memcg->css);
+		current->memcg_in_oom = memcg;
+		current->memcg_oom_gfp_mask = mask;
+		current->memcg_oom_order = order;
+
+		return OOM_ASYNC;
+	}
+
+	if (mem_cgroup_out_of_memory(memcg, mask, order))
+		return OOM_SUCCESS;
+
+	WARN(1,"Memory cgroup charge failed because of no reclaimable memory! "
+		"This looks like a misconfiguration or a kernel bug.");
+	return OOM_FAILED;
 }
 
 /**
@@ -1950,6 +1975,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	unsigned long nr_reclaimed;
 	bool may_swap = true;
 	bool drained = false;
+	bool oomed = false;
+	enum oom_status oom_status;
 
 	if (mem_cgroup_is_root(memcg))
 		return 0;
@@ -2037,6 +2064,9 @@ retry:
 	if (nr_retries--)
 		goto retry;
 
+	if (gfp_mask & __GFP_RETRY_MAYFAIL && oomed)
+		goto nomem;
+
 	if (gfp_mask & __GFP_NOFAIL)
 		goto force;
 
@@ -2045,8 +2075,23 @@ retry:
 
 	memcg_memory_event(mem_over_limit, MEMCG_OOM);
 
-	mem_cgroup_oom(mem_over_limit, gfp_mask,
+	/*
+	 * keep retrying as long as the memcg oom killer is able to make
+	 * a forward progress or bypass the charge if the oom killer
+	 * couldn't make any progress.
+	 */
+	oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask,
 		       get_order(nr_pages * PAGE_SIZE));
+	switch (oom_status) {
+	case OOM_SUCCESS:
+		nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
+		oomed = true;
+		goto retry;
+	case OOM_FAILED:
+		goto force;
+	default:
+		goto nomem;
+	}
 nomem:
 	if (!(gfp_mask & __GFP_NOFAIL))
 		return -ENOMEM;
diff --git a/mm/memory.c b/mm/memory.c
index 175f344e1523..ae2ec887508b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4153,7 +4153,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	 * space.  Kernel faults are handled more gracefully.
 	 */
 	if (flags & FAULT_FLAG_USER)
-		mem_cgroup_oom_enable();
+		mem_cgroup_enter_user_fault();
 
 	if (unlikely(is_vm_hugetlb_page(vma)))
 		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
@@ -4161,7 +4161,7 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 		ret = __handle_mm_fault(vma, address, flags);
 
 	if (flags & FAULT_FLAG_USER) {
-		mem_cgroup_oom_disable();
+		mem_cgroup_exit_user_fault();
 		/*
 		 * The task may have entered a memcg OOM situation but
 		 * if the allocation error was handled gracefully (no
-- 
cgit v1.2.3


From 84c07d11aa619c6d24c682f469b10f344f0c02aa Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:25 -0700
Subject: mm: introduce CONFIG_MEMCG_KMEM as combination of CONFIG_MEMCG &&
 !CONFIG_SLOB

Introduce new config option, which is used to replace repeating
CONFIG_MEMCG && !CONFIG_SLOB pattern.  Next patches add a little more
memcg+kmem related code, so let's keep the defines more clearly.

Link: http://lkml.kernel.org/r/153063053670.1818.15013136946600481138.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_lru.h   |  4 ++--
 include/linux/memcontrol.h |  6 +++---
 include/linux/sched.h      |  2 +-
 include/linux/slab.h       |  2 +-
 init/Kconfig               |  5 +++++
 mm/list_lru.c              |  8 ++++----
 mm/memcontrol.c            | 16 ++++++++--------
 mm/slab.h                  |  6 +++---
 mm/slab_common.c           |  8 ++++----
 9 files changed, 31 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 96def9d15b1b..2d23b5b745be 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -42,7 +42,7 @@ struct list_lru_node {
 	spinlock_t		lock;
 	/* global list, used for the root cgroup in cgroup aware lrus */
 	struct list_lru_one	lru;
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 	/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
 	struct list_lru_memcg	__rcu *memcg_lrus;
 #endif
@@ -51,7 +51,7 @@ struct list_lru_node {
 
 struct list_lru {
 	struct list_lru_node	*node;
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 	struct list_head	list;
 #endif
 };
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 57a202f31683..f3c026df7443 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -271,7 +271,7 @@ struct mem_cgroup {
 	bool			tcpmem_active;
 	int			tcpmem_pressure;
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
         /* Index in the kmem_cache->memcg_params.memcg_caches array */
 	int kmemcg_id;
 	enum memcg_kmem_state kmem_state;
@@ -1231,7 +1231,7 @@ int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
 void memcg_kmem_uncharge(struct page *page, int order);
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 extern struct static_key_false memcg_kmem_enabled_key;
 extern struct workqueue_struct *memcg_kmem_cache_wq;
 
@@ -1284,6 +1284,6 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 066a2c328653..789923fbee3a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -723,7 +723,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_MEMCG
 	unsigned			in_user_fault:1;
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 	unsigned			memcg_kmem_skip_account:1;
 #endif
 #endif
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 14e3fe4bd6a1..ed9cbddeb4a6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -97,7 +97,7 @@
 # define SLAB_FAILSLAB		0
 #endif
 /* Account to memcg */
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 # define SLAB_ACCOUNT		((slab_flags_t __force)0x04000000U)
 #else
 # define SLAB_ACCOUNT		0
diff --git a/init/Kconfig b/init/Kconfig
index 4dc783023e43..9bd50ba8253f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -708,6 +708,11 @@ config MEMCG_SWAP_ENABLED
 	  select this option (if, for some reason, they need to disable it
 	  then swapaccount=0 does the trick).
 
+config MEMCG_KMEM
+	bool
+	depends on MEMCG && !SLOB
+	default y
+
 config BLK_CGROUP
 	bool "IO controller"
 	depends on BLOCK
diff --git a/mm/list_lru.c b/mm/list_lru.c
index b65e0b9b0646..c5217d84c6e1 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -12,7 +12,7 @@
 #include <linux/mutex.h>
 #include <linux/memcontrol.h>
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 static LIST_HEAD(list_lrus);
 static DEFINE_MUTEX(list_lrus_mutex);
 
@@ -103,7 +103,7 @@ list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
 {
 	return &nlru->lru;
 }
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 bool list_lru_add(struct list_lru *lru, struct list_head *item)
 {
@@ -284,7 +284,7 @@ static void init_one_lru(struct list_lru_one *l)
 	l->nr_items = 0;
 }
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
 					  int begin, int end)
 {
@@ -543,7 +543,7 @@ static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
 static void memcg_destroy_list_lru(struct list_lru *lru)
 {
 }
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 		    struct lock_class_key *key)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d6724bed57d8..2f00b455080f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -251,7 +251,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 	return (memcg == root_mem_cgroup);
 }
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
  * The main reason for not using cgroup id for this:
@@ -305,7 +305,7 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 struct workqueue_struct *memcg_kmem_cache_wq;
 
-#endif /* !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 /**
  * mem_cgroup_css_from_page - css of the memcg associated with a page
@@ -2215,7 +2215,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 		unlock_page_lru(page, isolated);
 }
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 static int memcg_alloc_cache_id(void)
 {
 	int id, size;
@@ -2480,7 +2480,7 @@ void memcg_kmem_uncharge(struct page *page, int order)
 
 	css_put_many(&memcg->css, nr_pages);
 }
-#endif /* !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
@@ -2875,7 +2875,7 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
 	}
 }
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 static int memcg_online_kmem(struct mem_cgroup *memcg)
 {
 	int memcg_id;
@@ -2975,7 +2975,7 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
 static void memcg_free_kmem(struct mem_cgroup *memcg)
 {
 }
-#endif /* !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 static int memcg_update_kmem_max(struct mem_cgroup *memcg,
 				 unsigned long max)
@@ -4279,7 +4279,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	INIT_LIST_HEAD(&memcg->event_list);
 	spin_lock_init(&memcg->event_list_lock);
 	memcg->socket_pressure = jiffies;
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 	memcg->kmemcg_id = -1;
 #endif
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -6119,7 +6119,7 @@ static int __init mem_cgroup_init(void)
 {
 	int cpu, node;
 
-#ifndef CONFIG_SLOB
+#ifdef CONFIG_MEMCG_KMEM
 	/*
 	 * Kmem cache creation is mostly done with the slab_mutex held,
 	 * so use a workqueue with limited concurrency to avoid stalling
diff --git a/mm/slab.h b/mm/slab.h
index 68bdf498da3b..58c6c1c2a78e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -203,7 +203,7 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
 void __kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
 int __kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 
 /* List of all root caches. */
 extern struct list_head		slab_root_caches;
@@ -296,7 +296,7 @@ extern void memcg_link_cache(struct kmem_cache *s);
 extern void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
 				void (*deact_fn)(struct kmem_cache *));
 
-#else /* CONFIG_MEMCG && !CONFIG_SLOB */
+#else /* CONFIG_MEMCG_KMEM */
 
 /* If !memcg, all caches are root. */
 #define slab_root_caches	slab_caches
@@ -351,7 +351,7 @@ static inline void memcg_link_cache(struct kmem_cache *s)
 {
 }
 
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2296caf87bfb..fea3376f9816 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -127,7 +127,7 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
 	return i;
 }
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 
 LIST_HEAD(slab_root_caches);
 
@@ -256,7 +256,7 @@ static inline void destroy_memcg_params(struct kmem_cache *s)
 static inline void memcg_unlink_cache(struct kmem_cache *s)
 {
 }
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 /*
  * Figure out what the alignment of the objects will be given a set of
@@ -584,7 +584,7 @@ static int shutdown_cache(struct kmem_cache *s)
 	return 0;
 }
 
-#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
+#ifdef CONFIG_MEMCG_KMEM
 /*
  * memcg_create_kmem_cache - Create a cache for a memory cgroup.
  * @memcg: The memory cgroup the new cache is for.
@@ -861,7 +861,7 @@ static inline int shutdown_memcg_caches(struct kmem_cache *s)
 static inline void flush_memcg_workqueue(struct kmem_cache *s)
 {
 }
-#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
+#endif /* CONFIG_MEMCG_KMEM */
 
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
-- 
cgit v1.2.3


From b4c2b231c3ba155623591fb6301ed97b95e1c039 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:29 -0700
Subject: mm: assign id to every memcg-aware shrinker

Introduce shrinker::id number, which is used to enumerate memcg-aware
shrinkers.  The number start from 0, and the code tries to maintain it
as small as possible.

This will be used to represent a memcg-aware shrinkers in memcg
shrinkers map.

Since all memcg-aware shrinkers are based on list_lru, which is
per-memcg in case of !CONFIG_MEMCG_KMEM only, the new functionality will
be under this config option.

[ktkhai@virtuozzo.com: v9]
  Link: http://lkml.kernel.org/r/153112546435.4097.10607140323811756557.stgit@localhost.localdomain
Link: http://lkml.kernel.org/r/153063054586.1818.6041047871606697364.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h |  4 +++
 mm/vmscan.c              | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'include')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 6794490f25b2..7ca9c18cf130 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -66,6 +66,10 @@ struct shrinker {
 
 	/* These are for internal use */
 	struct list_head list;
+#ifdef CONFIG_MEMCG_KMEM
+	/* ID in shrinker_idr */
+	int id;
+#endif
 	/* objs pending delete, per node */
 	atomic_long_t *nr_deferred;
 };
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a00d94530e57..5cb4f779ea4a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -169,6 +169,50 @@ unsigned long vm_total_pages;
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
+#ifdef CONFIG_MEMCG_KMEM
+static DEFINE_IDR(shrinker_idr);
+static int shrinker_nr_max;
+
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+	int id, ret = -ENOMEM;
+
+	down_write(&shrinker_rwsem);
+	/* This may call shrinker, so it must use down_read_trylock() */
+	id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
+	if (id < 0)
+		goto unlock;
+
+	if (id >= shrinker_nr_max)
+		shrinker_nr_max = id + 1;
+	shrinker->id = id;
+	ret = 0;
+unlock:
+	up_write(&shrinker_rwsem);
+	return ret;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+	int id = shrinker->id;
+
+	BUG_ON(id < 0);
+
+	down_write(&shrinker_rwsem);
+	idr_remove(&shrinker_idr, id);
+	up_write(&shrinker_rwsem);
+}
+#else /* CONFIG_MEMCG_KMEM */
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+	return 0;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
+
 #ifdef CONFIG_MEMCG
 static bool global_reclaim(struct scan_control *sc)
 {
@@ -313,11 +357,28 @@ int prealloc_shrinker(struct shrinker *shrinker)
 	shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
 	if (!shrinker->nr_deferred)
 		return -ENOMEM;
+
+	if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
+		if (prealloc_memcg_shrinker(shrinker))
+			goto free_deferred;
+	}
+
 	return 0;
+
+free_deferred:
+	kfree(shrinker->nr_deferred);
+	shrinker->nr_deferred = NULL;
+	return -ENOMEM;
 }
 
 void free_prealloced_shrinker(struct shrinker *shrinker)
 {
+	if (!shrinker->nr_deferred)
+		return;
+
+	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+		unregister_memcg_shrinker(shrinker);
+
 	kfree(shrinker->nr_deferred);
 	shrinker->nr_deferred = NULL;
 }
@@ -347,6 +408,8 @@ void unregister_shrinker(struct shrinker *shrinker)
 {
 	if (!shrinker->nr_deferred)
 		return;
+	if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+		unregister_memcg_shrinker(shrinker);
 	down_write(&shrinker_rwsem);
 	list_del(&shrinker->list);
 	up_write(&shrinker_rwsem);
-- 
cgit v1.2.3


From 0a4465d340282f92719f4e3a56545a848e638d15 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:37 -0700
Subject: mm, memcg: assign memcg-aware shrinkers bitmap to memcg

Imagine a big node with many cpus, memory cgroups and containers.  Let
we have 200 containers, every container has 10 mounts, and 10 cgroups.
All container tasks don't touch foreign containers mounts.  If there is
intensive pages write, and global reclaim happens, a writing task has to
iterate over all memcgs to shrink slab, before it's able to go to
shrink_page_list().

Iteration over all the memcg slabs is very expensive: the task has to
visit 200 * 10 = 2000 shrinkers for every memcg, and since there are
2000 memcgs, the total calls are 2000 * 2000 = 4000000.

So, the shrinker makes 4 million do_shrink_slab() calls just to try to
isolate SWAP_CLUSTER_MAX pages in one of the actively writing memcg via
shrink_page_list().  I've observed a node spending almost 100% in
kernel, making useless iteration over already shrinked slab.

This patch adds bitmap of memcg-aware shrinkers to memcg.  The size of
the bitmap depends on bitmap_nr_ids, and during memcg life it's
maintained to be enough to fit bitmap_nr_ids shrinkers.  Every bit in
the map is related to corresponding shrinker id.

Next patches will maintain set bit only for really charged memcg.  This
will allow shrink_slab() to increase its performance in significant way.
See the last patch for the numbers.

[ktkhai@virtuozzo.com: v9]
  Link: http://lkml.kernel.org/r/153112549031.4097.3576147070498769979.stgit@localhost.localdomain
[ktkhai@virtuozzo.com: add comment to mem_cgroup_css_online()]
  Link: http://lkml.kernel.org/r/521f9e5f-c436-b388-fe83-4dc870bfb489@virtuozzo.com
Link: http://lkml.kernel.org/r/153063056619.1818.12550500883688681076.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  14 +++++
 mm/memcontrol.c            | 124 +++++++++++++++++++++++++++++++++++++++++++++
 mm/vmscan.c                |   8 ++-
 3 files changed, 145 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f3c026df7443..2cccbb9e1b3e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -111,6 +111,15 @@ struct lruvec_stat {
 	long count[NR_VM_NODE_STAT_ITEMS];
 };
 
+/*
+ * Bitmap of shrinker::id corresponding to memcg-aware shrinkers,
+ * which have elements charged to this memcg.
+ */
+struct memcg_shrinker_map {
+	struct rcu_head rcu;
+	unsigned long map[0];
+};
+
 /*
  * per-zone information in memory controller.
  */
@@ -124,6 +133,9 @@ struct mem_cgroup_per_node {
 
 	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
 
+#ifdef CONFIG_MEMCG_KMEM
+	struct memcg_shrinker_map __rcu	*shrinker_map;
+#endif
 	struct rb_node		tree_node;	/* RB tree node */
 	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
@@ -1262,6 +1274,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return memcg ? memcg->kmemcg_id : -1;
 }
 
+extern int memcg_expand_shrinker_maps(int new_id);
+
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 313355dddf66..827c9e87ca08 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -320,6 +320,119 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 struct workqueue_struct *memcg_kmem_cache_wq;
 
+static int memcg_shrinker_map_size;
+static DEFINE_MUTEX(memcg_shrinker_map_mutex);
+
+static void memcg_free_shrinker_map_rcu(struct rcu_head *head)
+{
+	kvfree(container_of(head, struct memcg_shrinker_map, rcu));
+}
+
+static int memcg_expand_one_shrinker_map(struct mem_cgroup *memcg,
+					 int size, int old_size)
+{
+	struct memcg_shrinker_map *new, *old;
+	int nid;
+
+	lockdep_assert_held(&memcg_shrinker_map_mutex);
+
+	for_each_node(nid) {
+		old = rcu_dereference_protected(
+			mem_cgroup_nodeinfo(memcg, nid)->shrinker_map, true);
+		/* Not yet online memcg */
+		if (!old)
+			return 0;
+
+		new = kvmalloc(sizeof(*new) + size, GFP_KERNEL);
+		if (!new)
+			return -ENOMEM;
+
+		/* Set all old bits, clear all new bits */
+		memset(new->map, (int)0xff, old_size);
+		memset((void *)new->map + old_size, 0, size - old_size);
+
+		rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_map, new);
+		call_rcu(&old->rcu, memcg_free_shrinker_map_rcu);
+	}
+
+	return 0;
+}
+
+static void memcg_free_shrinker_maps(struct mem_cgroup *memcg)
+{
+	struct mem_cgroup_per_node *pn;
+	struct memcg_shrinker_map *map;
+	int nid;
+
+	if (mem_cgroup_is_root(memcg))
+		return;
+
+	for_each_node(nid) {
+		pn = mem_cgroup_nodeinfo(memcg, nid);
+		map = rcu_dereference_protected(pn->shrinker_map, true);
+		if (map)
+			kvfree(map);
+		rcu_assign_pointer(pn->shrinker_map, NULL);
+	}
+}
+
+static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
+{
+	struct memcg_shrinker_map *map;
+	int nid, size, ret = 0;
+
+	if (mem_cgroup_is_root(memcg))
+		return 0;
+
+	mutex_lock(&memcg_shrinker_map_mutex);
+	size = memcg_shrinker_map_size;
+	for_each_node(nid) {
+		map = kvzalloc(sizeof(*map) + size, GFP_KERNEL);
+		if (!map) {
+			memcg_free_shrinker_maps(memcg);
+			ret = -ENOMEM;
+			break;
+		}
+		rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_map, map);
+	}
+	mutex_unlock(&memcg_shrinker_map_mutex);
+
+	return ret;
+}
+
+int memcg_expand_shrinker_maps(int new_id)
+{
+	int size, old_size, ret = 0;
+	struct mem_cgroup *memcg;
+
+	size = DIV_ROUND_UP(new_id + 1, BITS_PER_LONG) * sizeof(unsigned long);
+	old_size = memcg_shrinker_map_size;
+	if (size <= old_size)
+		return 0;
+
+	mutex_lock(&memcg_shrinker_map_mutex);
+	if (!root_mem_cgroup)
+		goto unlock;
+
+	for_each_mem_cgroup(memcg) {
+		if (mem_cgroup_is_root(memcg))
+			continue;
+		ret = memcg_expand_one_shrinker_map(memcg, size, old_size);
+		if (ret)
+			goto unlock;
+	}
+unlock:
+	if (!ret)
+		memcg_shrinker_map_size = size;
+	mutex_unlock(&memcg_shrinker_map_mutex);
+	return ret;
+}
+#else /* CONFIG_MEMCG_KMEM */
+static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
+{
+	return 0;
+}
+static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { }
 #endif /* CONFIG_MEMCG_KMEM */
 
 /**
@@ -4356,6 +4469,16 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
+	/*
+	 * A memcg must be visible for memcg_expand_shrinker_maps()
+	 * by the time the maps are allocated. So, we allocate maps
+	 * here, when for_each_mem_cgroup() can't skip it.
+	 */
+	if (memcg_alloc_shrinker_maps(memcg)) {
+		mem_cgroup_id_remove(memcg);
+		return -ENOMEM;
+	}
+
 	/* Online state pins memcg ID, memcg ID pins CSS */
 	atomic_set(&memcg->id.ref, 1);
 	css_get(css);
@@ -4408,6 +4531,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	vmpressure_cleanup(&memcg->vmpressure);
 	cancel_work_sync(&memcg->high_work);
 	mem_cgroup_remove_from_trees(memcg);
+	memcg_free_shrinker_maps(memcg);
 	memcg_free_kmem(memcg);
 	mem_cgroup_free(memcg);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5cb4f779ea4a..db0970ba340d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,8 +183,14 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
 	if (id < 0)
 		goto unlock;
 
-	if (id >= shrinker_nr_max)
+	if (id >= shrinker_nr_max) {
+		if (memcg_expand_shrinker_maps(id)) {
+			idr_remove(&shrinker_idr, id);
+			goto unlock;
+		}
+
 		shrinker_nr_max = id + 1;
+	}
 	shrinker->id = id;
 	ret = 0;
 unlock:
-- 
cgit v1.2.3


From c92e8e10cafeaaedc84f23fed1bfcf9cf07399c2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:50 -0700
Subject: fs: propagate shrinker::id to list_lru

Add list_lru::shrinker_id field and populate it by registered shrinker
id.

This will be used to set correct bit in memcg shrinkers map by lru code
in next patches, after there appeared the first related to memcg element
in list_lru.

Link: http://lkml.kernel.org/r/153063059758.1818.14866596416857717800.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/super.c               |  4 ++--
 include/linux/list_lru.h | 14 +++++++++-----
 mm/list_lru.c            | 11 ++++++++++-
 mm/workingset.c          |  3 ++-
 4 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 78227c4ddb21..f5f96e52e0cd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -261,9 +261,9 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
 	if (prealloc_shrinker(&s->s_shrink))
 		goto fail;
-	if (list_lru_init_memcg(&s->s_dentry_lru))
+	if (list_lru_init_memcg(&s->s_dentry_lru, &s->s_shrink))
 		goto fail;
-	if (list_lru_init_memcg(&s->s_inode_lru))
+	if (list_lru_init_memcg(&s->s_inode_lru, &s->s_shrink))
 		goto fail;
 	return s;
 
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 2d23b5b745be..9e75bb33766b 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -53,16 +53,20 @@ struct list_lru {
 	struct list_lru_node	*node;
 #ifdef CONFIG_MEMCG_KMEM
 	struct list_head	list;
+	int			shrinker_id;
 #endif
 };
 
 void list_lru_destroy(struct list_lru *lru);
 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
-		    struct lock_class_key *key);
-
-#define list_lru_init(lru)		__list_lru_init((lru), false, NULL)
-#define list_lru_init_key(lru, key)	__list_lru_init((lru), false, (key))
-#define list_lru_init_memcg(lru)	__list_lru_init((lru), true, NULL)
+		    struct lock_class_key *key, struct shrinker *shrinker);
+
+#define list_lru_init(lru)				\
+	__list_lru_init((lru), false, NULL, NULL)
+#define list_lru_init_key(lru, key)			\
+	__list_lru_init((lru), false, (key), NULL)
+#define list_lru_init_memcg(lru, shrinker)		\
+	__list_lru_init((lru), true, NULL, shrinker)
 
 int memcg_update_all_list_lrus(int num_memcgs);
 void memcg_drain_all_list_lrus(int src_idx, int dst_idx);
diff --git a/mm/list_lru.c b/mm/list_lru.c
index c5217d84c6e1..5aebbb9b2f5b 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -546,12 +546,18 @@ static void memcg_destroy_list_lru(struct list_lru *lru)
 #endif /* CONFIG_MEMCG_KMEM */
 
 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
-		    struct lock_class_key *key)
+		    struct lock_class_key *key, struct shrinker *shrinker)
 {
 	int i;
 	size_t size = sizeof(*lru->node) * nr_node_ids;
 	int err = -ENOMEM;
 
+#ifdef CONFIG_MEMCG_KMEM
+	if (shrinker)
+		lru->shrinker_id = shrinker->id;
+	else
+		lru->shrinker_id = -1;
+#endif
 	memcg_get_cache_ids();
 
 	lru->node = kzalloc(size, GFP_KERNEL);
@@ -594,6 +600,9 @@ void list_lru_destroy(struct list_lru *lru)
 	kfree(lru->node);
 	lru->node = NULL;
 
+#ifdef CONFIG_MEMCG_KMEM
+	lru->shrinker_id = -1;
+#endif
 	memcg_put_cache_ids();
 }
 EXPORT_SYMBOL_GPL(list_lru_destroy);
diff --git a/mm/workingset.c b/mm/workingset.c
index 4e0b2523aae2..cd0b2ae615e4 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -526,7 +526,8 @@ static int __init workingset_init(void)
 	ret = prealloc_shrinker(&workingset_shadow_shrinker);
 	if (ret)
 		goto err;
-	ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
+	ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key,
+			      &workingset_shadow_shrinker);
 	if (ret)
 		goto err_list_lru;
 	register_shrinker_prepared(&workingset_shadow_shrinker);
-- 
cgit v1.2.3


From 9bec5c35bfa3d41b046594b5890f772ed737f1fd Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:47:58 -0700
Subject: mm/list_lru: pass dst_memcg argument to memcg_drain_list_lru_node()

This is just refactoring to allow the next patches to have dst_memcg
pointer in memcg_drain_list_lru_node().

Link: http://lkml.kernel.org/r/153063062118.1818.2761273817739499749.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_lru.h |  2 +-
 mm/list_lru.c            | 11 ++++++-----
 mm/memcontrol.c          |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 9e75bb33766b..d9c16f2f2f00 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -69,7 +69,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 	__list_lru_init((lru), true, NULL, shrinker)
 
 int memcg_update_all_list_lrus(int num_memcgs);
-void memcg_drain_all_list_lrus(int src_idx, int dst_idx);
+void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg);
 
 /**
  * list_lru_add: add an element to the lru list's tail
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 1fc5be746e69..5384cda08984 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -502,8 +502,9 @@ fail:
 }
 
 static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
-				      int src_idx, int dst_idx)
+				      int src_idx, struct mem_cgroup *dst_memcg)
 {
+	int dst_idx = dst_memcg->kmemcg_id;
 	struct list_lru_one *src, *dst;
 
 	/*
@@ -523,7 +524,7 @@ static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
 }
 
 static void memcg_drain_list_lru(struct list_lru *lru,
-				 int src_idx, int dst_idx)
+				 int src_idx, struct mem_cgroup *dst_memcg)
 {
 	int i;
 
@@ -531,16 +532,16 @@ static void memcg_drain_list_lru(struct list_lru *lru,
 		return;
 
 	for_each_node(i)
-		memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx);
+		memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_memcg);
 }
 
-void memcg_drain_all_list_lrus(int src_idx, int dst_idx)
+void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
 {
 	struct list_lru *lru;
 
 	mutex_lock(&list_lrus_mutex);
 	list_for_each_entry(lru, &list_lrus, list)
-		memcg_drain_list_lru(lru, src_idx, dst_idx);
+		memcg_drain_list_lru(lru, src_idx, dst_memcg);
 	mutex_unlock(&list_lrus_mutex);
 }
 #else
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 827c9e87ca08..a35dc901424d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3060,7 +3060,7 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
 	}
 	rcu_read_unlock();
 
-	memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id);
+	memcg_drain_all_list_lrus(kmemcg_id, parent);
 
 	memcg_free_cache_id(kmemcg_id);
 }
-- 
cgit v1.2.3


From dfd2f10ccfd7e6bd2a096eaf42e76a7229776322 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:48:06 -0700
Subject: mm/memcontrol.c: export mem_cgroup_is_root()

This will be used in next patch.

Link: http://lkml.kernel.org/r/153063064347.1818.1987011484100392706.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ++++++++++
 mm/memcontrol.c            |  5 -----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 2cccbb9e1b3e..258c8a46959a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -318,6 +318,11 @@ struct mem_cgroup {
 
 extern struct mem_cgroup *root_mem_cgroup;
 
+static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
+{
+	return (memcg == root_mem_cgroup);
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
 	return !cgroup_subsys_enabled(memory_cgrp_subsys);
@@ -784,6 +789,11 @@ void mem_cgroup_split_huge_fixup(struct page *head);
 
 struct mem_cgroup;
 
+static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
+{
+	return true;
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
 	return true;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a35dc901424d..d8cd6c39eca5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -261,11 +261,6 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
 	return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
 }
 
-static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
-{
-	return (memcg == root_mem_cgroup);
-}
-
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * This will be the memcg's index in each cache's ->memcg_params.memcg_caches.
-- 
cgit v1.2.3


From fae91d6d8be5e20c47e459dbeb3d43bd5f9486f4 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:48:10 -0700
Subject: mm/list_lru.c: set bit in memcg shrinker bitmap on first list_lru
 item appearance

Introduce set_shrinker_bit() function to set shrinker-related bit in
memcg shrinker bitmap, and set the bit after the first item is added and
in case of reparenting destroyed memcg's items.

This will allow next patch to make shrinkers be called only, in case of
they have charged objects at the moment, and to improve shrink_slab()
performance.

[ktkhai@virtuozzo.com: v9]
  Link: http://lkml.kernel.org/r/153112557572.4097.17315791419810749985.stgit@localhost.localdomain
Link: http://lkml.kernel.org/r/153063065671.1818.15914674956134687268.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  4 ++++
 mm/list_lru.c              | 22 ++++++++++++++++++++--
 mm/memcontrol.c            | 13 +++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 258c8a46959a..0e6c515fb698 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1286,6 +1286,8 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 
 extern int memcg_expand_shrinker_maps(int new_id);
 
+extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+				   int nid, int shrinker_id);
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
@@ -1308,6 +1310,8 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
+static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+					  int nid, int shrinker_id) { }
 #endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/mm/list_lru.c b/mm/list_lru.c
index c6131925ec76..c9bdde9c03d1 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -30,6 +30,11 @@ static void list_lru_unregister(struct list_lru *lru)
 	mutex_unlock(&list_lrus_mutex);
 }
 
+static int lru_shrinker_id(struct list_lru *lru)
+{
+	return lru->shrinker_id;
+}
+
 static inline bool list_lru_memcg_aware(struct list_lru *lru)
 {
 	/*
@@ -93,6 +98,11 @@ static void list_lru_unregister(struct list_lru *lru)
 {
 }
 
+static int lru_shrinker_id(struct list_lru *lru)
+{
+	return -1;
+}
+
 static inline bool list_lru_memcg_aware(struct list_lru *lru)
 {
 	return false;
@@ -118,13 +128,17 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item)
 {
 	int nid = page_to_nid(virt_to_page(item));
 	struct list_lru_node *nlru = &lru->node[nid];
+	struct mem_cgroup *memcg;
 	struct list_lru_one *l;
 
 	spin_lock(&nlru->lock);
 	if (list_empty(item)) {
-		l = list_lru_from_kmem(nlru, item, NULL);
+		l = list_lru_from_kmem(nlru, item, &memcg);
 		list_add_tail(item, &l->list);
-		l->nr_items++;
+		/* Set shrinker bit if the first element was added */
+		if (!l->nr_items++)
+			memcg_set_shrinker_bit(memcg, nid,
+					       lru_shrinker_id(lru));
 		nlru->nr_items++;
 		spin_unlock(&nlru->lock);
 		return true;
@@ -507,6 +521,7 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
 	struct list_lru_node *nlru = &lru->node[nid];
 	int dst_idx = dst_memcg->kmemcg_id;
 	struct list_lru_one *src, *dst;
+	bool set;
 
 	/*
 	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
@@ -518,7 +533,10 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
 	dst = list_lru_from_memcg_idx(nlru, dst_idx);
 
 	list_splice_init(&src->list, &dst->list);
+	set = (!dst->nr_items && src->nr_items);
 	dst->nr_items += src->nr_items;
+	if (set)
+		memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
 	src->nr_items = 0;
 
 	spin_unlock_irq(&nlru->lock);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d8cd6c39eca5..55c010a58535 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -422,6 +422,19 @@ unlock:
 	mutex_unlock(&memcg_shrinker_map_mutex);
 	return ret;
 }
+
+void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
+{
+	if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
+		struct memcg_shrinker_map *map;
+
+		rcu_read_lock();
+		map = rcu_dereference(memcg->nodeinfo[nid]->shrinker_map);
+		set_bit(shrinker_id, map->map);
+		rcu_read_unlock();
+	}
+}
+
 #else /* CONFIG_MEMCG_KMEM */
 static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
 {
-- 
cgit v1.2.3


From 9b996468cfdba09f688f52dba4287de596194613 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 17 Aug 2018 15:48:21 -0700
Subject: mm: add SHRINK_EMPTY shrinker methods return value

We need to distinguish the situations when shrinker has very small
amount of objects (see vfs_pressure_ratio() called from
super_cache_count()), and when it has no objects at all.  Currently, in
the both of these cases, shrinker::count_objects() returns 0.

The patch introduces new SHRINK_EMPTY return value, which will be used
for "no objects at all" case.  It's is a refactoring mostly, as
SHRINK_EMPTY is replaced by 0 by all callers of do_shrink_slab() in this
patch, and all the magic will happen in further.

Link: http://lkml.kernel.org/r/153063069574.1818.11037751256699341813.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Tested-by: Shakeel Butt <shakeelb@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Li RongQing <lirongqing@baidu.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Sahitya Tummala <stummala@codeaurora.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/super.c               |  3 +++
 include/linux/shrinker.h |  7 +++++--
 mm/vmscan.c              | 12 +++++++++---
 mm/workingset.c          |  3 +++
 4 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index f5f96e52e0cd..7429588d6b49 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -144,6 +144,9 @@ static unsigned long super_cache_count(struct shrinker *shrink,
 	total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
 	total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
 
+	if (!total_objects)
+		return SHRINK_EMPTY;
+
 	total_objects = vfs_pressure_ratio(total_objects);
 	return total_objects;
 }
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 7ca9c18cf130..b154fd2b084c 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -34,12 +34,15 @@ struct shrink_control {
 };
 
 #define SHRINK_STOP (~0UL)
+#define SHRINK_EMPTY (~0UL - 1)
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
  * @count_objects should return the number of freeable items in the cache. If
- * there are no objects to free or the number of freeable items cannot be
- * determined, it should return 0. No deadlock checks should be done during the
+ * there are no objects to free, it should return SHRINK_EMPTY, while 0 is
+ * returned in cases of the number of freeable items cannot be determined
+ * or shrinker should skip this cache for this time (e.g., their number
+ * is below shrinkable limit). No deadlock checks should be done during the
  * count callback - the shrinker relies on aggregating scan counts that couldn't
  * be executed due to potential deadlocks to be run at a later call when the
  * deadlock condition is no longer pending.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2aa3cb760189..8199e1b9a204 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -456,8 +456,8 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
 	long scanned = 0, next_deferred;
 
 	freeable = shrinker->count_objects(shrinker, shrinkctl);
-	if (freeable == 0)
-		return 0;
+	if (freeable == 0 || freeable == SHRINK_EMPTY)
+		return freeable;
 
 	/*
 	 * copy the current shrinker scan count into a local variable
@@ -596,6 +596,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 			continue;
 
 		ret = do_shrink_slab(&sc, shrinker, priority);
+		if (ret == SHRINK_EMPTY)
+			ret = 0;
 		freed += ret;
 
 		if (rwsem_is_contended(&shrinker_rwsem)) {
@@ -641,6 +643,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 {
 	struct shrinker *shrinker;
 	unsigned long freed = 0;
+	int ret;
 
 	if (!mem_cgroup_is_root(memcg))
 		return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
@@ -658,7 +661,10 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
 		if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
 			sc.nid = 0;
 
-		freed += do_shrink_slab(&sc, shrinker, priority);
+		ret = do_shrink_slab(&sc, shrinker, priority);
+		if (ret == SHRINK_EMPTY)
+			ret = 0;
+		freed += ret;
 		/*
 		 * Bail out if someone want to register a new shrinker to
 		 * prevent the regsitration from being stalled for long periods
diff --git a/mm/workingset.c b/mm/workingset.c
index cd0b2ae615e4..bc72ad029b3e 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -399,6 +399,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	}
 	max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3);
 
+	if (!nodes)
+		return SHRINK_EMPTY;
+
 	if (nodes <= max_nodes)
 		return 0;
 	return nodes - max_nodes;
-- 
cgit v1.2.3


From 6518202970c1052148daaef9a8096711775e43a2 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 17 Aug 2018 15:48:57 -0700
Subject: mm/cma: remove unsupported gfp_mask parameter from cma_alloc()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cma_alloc() doesn't really support gfp flags other than __GFP_NOWARN, so
convert gfp_mask parameter to boolean no_warn parameter.

This will help to avoid giving false feeling that this function supports
standard gfp flags and callers can pass __GFP_ZERO to get zeroed buffer,
what has already been an issue: see commit dd65a941f6ba ("arm64:
dma-mapping: clear buffers allocated with FORCE_CONTIGUOUS flag").

Link: http://lkml.kernel.org/r/20180709122019eucas1p2340da484acfcc932537e6014f4fd2c29~-sqTPJKij2939229392eucas1p2j@eucas1p2.samsung.com
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Michał Nazarewicz <mina86@mina86.com>
Acked-by: Laura Abbott <labbott@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv_builtin.c       | 2 +-
 drivers/s390/char/vmcp.c                   | 2 +-
 drivers/staging/android/ion/ion_cma_heap.c | 2 +-
 include/linux/cma.h                        | 2 +-
 kernel/dma/contiguous.c                    | 3 ++-
 mm/cma.c                                   | 8 ++++----
 mm/cma_debug.c                             | 2 +-
 7 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index d4a3f4da409b..fc6bb9630a9c 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -77,7 +77,7 @@ struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
 	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES),
-			 GFP_KERNEL);
+			 false);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
 
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 948ce82a7725..0fa1b6b1491a 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -68,7 +68,7 @@ static void vmcp_response_alloc(struct vmcp_session *session)
 	 * anymore the system won't work anyway.
 	 */
 	if (order > 2)
-		page = cma_alloc(vmcp_cma, nr_pages, 0, GFP_KERNEL);
+		page = cma_alloc(vmcp_cma, nr_pages, 0, false);
 	if (page) {
 		session->response = (char *)page_to_phys(page);
 		session->cma_alloc = 1;
diff --git a/drivers/staging/android/ion/ion_cma_heap.c b/drivers/staging/android/ion/ion_cma_heap.c
index 49718c96bf9e..3fafd013d80a 100644
--- a/drivers/staging/android/ion/ion_cma_heap.c
+++ b/drivers/staging/android/ion/ion_cma_heap.c
@@ -39,7 +39,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
 	if (align > CONFIG_CMA_ALIGNMENT)
 		align = CONFIG_CMA_ALIGNMENT;
 
-	pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL);
+	pages = cma_alloc(cma_heap->cma, nr_pages, align, false);
 	if (!pages)
 		return -ENOMEM;
 
diff --git a/include/linux/cma.h b/include/linux/cma.h
index bf90f0bb42bd..190184b5ff32 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -33,7 +33,7 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					const char *name,
 					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
-			      gfp_t gfp_mask);
+			      bool no_warn);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
 
 extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index d987dcd1bd56..19ea5d70150c 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -191,7 +191,8 @@ struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
 	if (align > CONFIG_CMA_ALIGNMENT)
 		align = CONFIG_CMA_ALIGNMENT;
 
-	return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask);
+	return cma_alloc(dev_get_cma_area(dev), count, align,
+			 gfp_mask & __GFP_NOWARN);
 }
 
 /**
diff --git a/mm/cma.c b/mm/cma.c
index 5809bbe360d7..4cb76121a3ab 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -395,13 +395,13 @@ static inline void cma_debug_show_areas(struct cma *cma) { }
  * @cma:   Contiguous memory region for which the allocation is performed.
  * @count: Requested number of pages.
  * @align: Requested alignment of pages (in PAGE_SIZE order).
- * @gfp_mask:  GFP mask to use during compaction
+ * @no_warn: Avoid printing message about failed allocation
  *
  * This function allocates part of contiguous memory on specific
  * contiguous memory area.
  */
 struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
-		       gfp_t gfp_mask)
+		       bool no_warn)
 {
 	unsigned long mask, offset;
 	unsigned long pfn = -1;
@@ -447,7 +447,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
 		pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
 		mutex_lock(&cma_mutex);
 		ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
-					 gfp_mask);
+				     GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
 		mutex_unlock(&cma_mutex);
 		if (ret == 0) {
 			page = pfn_to_page(pfn);
@@ -466,7 +466,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
 
 	trace_cma_alloc(pfn, page, count, align);
 
-	if (ret && !(gfp_mask & __GFP_NOWARN)) {
+	if (ret && !no_warn) {
 		pr_err("%s: alloc failed, req-size: %zu pages, ret: %d\n",
 			__func__, count, ret);
 		cma_debug_show_areas(cma);
diff --git a/mm/cma_debug.c b/mm/cma_debug.c
index f23467291cfb..ad6723e9d110 100644
--- a/mm/cma_debug.c
+++ b/mm/cma_debug.c
@@ -139,7 +139,7 @@ static int cma_alloc_mem(struct cma *cma, int count)
 	if (!mem)
 		return -ENOMEM;
 
-	p = cma_alloc(cma, count, 0, GFP_KERNEL);
+	p = cma_alloc(cma, count, 0, false);
 	if (!p) {
 		kfree(mem);
 		return -ENOMEM;
-- 
cgit v1.2.3


From d834c5ab83febf9624ad3b16c3c348aa1e02014c Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 17 Aug 2018 15:49:00 -0700
Subject: kernel/dma: remove unsupported gfp_mask parameter from
 dma_alloc_from_contiguous()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The CMA memory allocator doesn't support standard gfp flags for memory
allocation, so there is no point having it as a parameter for
dma_alloc_from_contiguous() function.  Replace it by a boolean no_warn
argument, which covers all the underlaying cma_alloc() function
supports.

This will help to avoid giving false feeling that this function supports
standard gfp flags and callers can pass __GFP_ZERO to get zeroed buffer,
what has already been an issue: see commit dd65a941f6ba ("arm64:
dma-mapping: clear buffers allocated with FORCE_CONTIGUOUS flag").

Link: http://lkml.kernel.org/r/20180709122020eucas1p21a71b092975cb4a3b9954ffc63f699d1~-sqUFoa-h2939329393eucas1p2Y@eucas1p2.samsung.com
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Michał Nazarewicz <mina86@mina86.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/dma-mapping.c      | 5 +++--
 arch/arm64/mm/dma-mapping.c    | 4 ++--
 arch/xtensa/kernel/pci-dma.c   | 2 +-
 drivers/iommu/amd_iommu.c      | 2 +-
 drivers/iommu/intel-iommu.c    | 3 ++-
 include/linux/dma-contiguous.h | 4 ++--
 kernel/dma/contiguous.c        | 7 +++----
 kernel/dma/direct.c            | 3 ++-
 8 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ba0e786c952e..66566472c153 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -594,7 +594,7 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
 	struct page *page;
 	void *ptr = NULL;
 
-	page = dma_alloc_from_contiguous(dev, count, order, gfp);
+	page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN);
 	if (!page)
 		return NULL;
 
@@ -1299,7 +1299,8 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
 		unsigned long order = get_order(size);
 		struct page *page;
 
-		page = dma_alloc_from_contiguous(dev, count, order, gfp);
+		page = dma_alloc_from_contiguous(dev, count, order,
+						 gfp & __GFP_NOWARN);
 		if (!page)
 			goto error;
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 61e93f0b5482..072c51fb07d7 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -355,7 +355,7 @@ static int __init atomic_pool_init(void)
 
 	if (dev_get_cma_area(NULL))
 		page = dma_alloc_from_contiguous(NULL, nr_pages,
-						 pool_size_order, GFP_KERNEL);
+						 pool_size_order, false);
 	else
 		page = alloc_pages(GFP_DMA32, pool_size_order);
 
@@ -573,7 +573,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 		struct page *page;
 
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
-						 get_order(size), gfp);
+					get_order(size), gfp & __GFP_NOWARN);
 		if (!page)
 			return NULL;
 
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 392b4a80ebc2..a02dc563d290 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -137,7 +137,7 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size,
 
 	if (gfpflags_allow_blocking(flag))
 		page = dma_alloc_from_contiguous(dev, count, get_order(size),
-						 flag);
+						 flag & __GFP_NOWARN);
 
 	if (!page)
 		page = alloc_pages(flag, get_order(size));
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 596b95c50051..60b2eab29cd8 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -2620,7 +2620,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 			return NULL;
 
 		page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
-						 get_order(size), flag);
+					get_order(size), flag & __GFP_NOWARN);
 		if (!page)
 			return NULL;
 	}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 115ff26e9ced..6a237d18fabf 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3758,7 +3758,8 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
 	if (gfpflags_allow_blocking(flags)) {
 		unsigned int count = size >> PAGE_SHIFT;
 
-		page = dma_alloc_from_contiguous(dev, count, order, flags);
+		page = dma_alloc_from_contiguous(dev, count, order,
+						 flags & __GFP_NOWARN);
 		if (page && iommu_no_mapping(dev) &&
 		    page_to_phys(page) + size > dev->coherent_dma_mask) {
 			dma_release_from_contiguous(dev, page, count);
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 3c5a4cb3eb95..f247e8aa5e3d 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -112,7 +112,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 }
 
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-				       unsigned int order, gfp_t gfp_mask);
+				       unsigned int order, bool no_warn);
 bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 				 int count);
 
@@ -145,7 +145,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 
 static inline
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-				       unsigned int order, gfp_t gfp_mask)
+				       unsigned int order, bool no_warn)
 {
 	return NULL;
 }
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 19ea5d70150c..286d82329eb0 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -178,7 +178,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
  * @dev:   Pointer to device for which the allocation is performed.
  * @count: Requested number of pages.
  * @align: Requested alignment of pages (in PAGE_SIZE order).
- * @gfp_mask: GFP flags to use for this allocation.
+ * @no_warn: Avoid printing message about failed allocation.
  *
  * This function allocates memory buffer for specified device. It uses
  * device specific contiguous memory area if available or the default
@@ -186,13 +186,12 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
  * function.
  */
 struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-				       unsigned int align, gfp_t gfp_mask)
+				       unsigned int align, bool no_warn)
 {
 	if (align > CONFIG_CMA_ALIGNMENT)
 		align = CONFIG_CMA_ALIGNMENT;
 
-	return cma_alloc(dev_get_cma_area(dev), count, align,
-			 gfp_mask & __GFP_NOWARN);
+	return cma_alloc(dev_get_cma_area(dev), count, align, no_warn);
 }
 
 /**
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index c2860c5a9e96..1c35b7b945d0 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -78,7 +78,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 again:
 	/* CMA can be used only in the context which permits sleeping */
 	if (gfpflags_allow_blocking(gfp)) {
-		page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
+		page = dma_alloc_from_contiguous(dev, count, page_order,
+						 gfp & __GFP_NOWARN);
 		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
 			dma_release_from_contiguous(dev, page, count);
 			page = NULL;
-- 
cgit v1.2.3


From 40d18ebffb3974272a920c41f2d74431152cae98 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 17 Aug 2018 15:49:07 -0700
Subject: mm/hugetlb: remove gigantic page support for HIGHMEM

This reverts ee8f248d266e ("hugetlb: add phys addr to struct
huge_bootmem_page").

At one time powerpc used this field and supporting code.  However that
was removed with commit 79cc38ded1e1 ("powerpc/mm/hugetlb: Add support
for reserving gigantic huge pages via kernel command line").

There are no users of this field and supporting code, so remove it.

Link: http://lkml.kernel.org/r/20180711195913.1294-1-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Cannon Matthews <cannonmatthews@google.com>
Cc: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 3 ---
 mm/hugetlb.c            | 9 +--------
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 36fa6a2a82e3..c39d9170a8a0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -348,9 +348,6 @@ struct hstate {
 struct huge_bootmem_page {
 	struct list_head list;
 	struct hstate *hstate;
-#ifdef CONFIG_HIGHMEM
-	phys_addr_t phys;
-#endif
 };
 
 struct page *alloc_huge_page(struct vm_area_struct *vma,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1bcaae0d73a..4cea30ac5033 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2139,16 +2139,9 @@ static void __init gather_bootmem_prealloc(void)
 	struct huge_bootmem_page *m;
 
 	list_for_each_entry(m, &huge_boot_pages, list) {
+		struct page *page = virt_to_page(m);
 		struct hstate *h = m->hstate;
-		struct page *page;
 
-#ifdef CONFIG_HIGHMEM
-		page = pfn_to_page(m->phys >> PAGE_SHIFT);
-		memblock_free_late(__pa(m),
-				   sizeof(struct huge_bootmem_page));
-#else
-		page = virt_to_page(m);
-#endif
 		WARN_ON(page_count(page) != 1);
 		prep_compound_huge_page(page, h->order);
 		WARN_ON(PageReserved(page));
-- 
cgit v1.2.3


From 35fd1eb1e8212c02f6eae24335a9e5b80f9519b4 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Fri, 17 Aug 2018 15:49:21 -0700
Subject: mm/sparse: abstract sparse buffer allocations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "sparse_init rewrite", v6.

In sparse_init() we allocate two large buffers to temporary hold usemap
and memmap for the whole machine.  However, we can avoid doing that if
we changed sparse_init() to operated on per-node bases instead of doing
it on the whole machine beforehand.

As shown by Baoquan
  http://lkml.kernel.org/r/20180628062857.29658-1-bhe@redhat.com

The buffers are large enough to cause machine stop to boot on small
memory systems.

Another benefit of these changes is that they also obsolete
CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER.

This patch (of 5):

When struct pages are allocated for sparse-vmemmap VA layout, we first try
to allocate one large buffer, and than if that fails allocate struct pages
for each section as we go.

The code that allocates buffer is uses global variables and is spread
across several call sites.

Cleanup the code by introducing three functions to handle the global
buffer:

sparse_buffer_init()	initialize the buffer
sparse_buffer_fini()	free the remaining part of the buffer
sparse_buffer_alloc()	alloc from the buffer, and if buffer is empty
return NULL

Define these functions in sparse.c instead of sparse-vmemmap.c because
later we will use them for non-vmemmap sparse allocations as well.

[akpm@linux-foundation.org: use PTR_ALIGN()]
[akpm@linux-foundation.org: s/BUG_ON/WARN_ON/]
Link: http://lkml.kernel.org/r/20180712203730.8703-2-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Tested-by: Oscar Salvador <osalvador@suse.de>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  4 ++++
 mm/sparse-vmemmap.c | 40 ++++++----------------------------------
 mm/sparse.c         | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2fb32d1561eb..4ace5d50a892 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2671,6 +2671,10 @@ void sparse_mem_maps_populate_node(struct page **map_map,
 				   unsigned long map_count,
 				   int nodeid);
 
+unsigned long __init section_map_size(void);
+void sparse_buffer_init(unsigned long size, int nid);
+void sparse_buffer_fini(void);
+void *sparse_buffer_alloc(unsigned long size);
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
 		struct vmem_altmap *altmap);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 95e2c7638a5c..b05c7663c640 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -43,12 +43,9 @@ static void * __ref __earlyonly_bootmem_alloc(int node,
 				unsigned long goal)
 {
 	return memblock_virt_alloc_try_nid_raw(size, align, goal,
-					    BOOTMEM_ALLOC_ACCESSIBLE, node);
+					       BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
-static void *vmemmap_buf;
-static void *vmemmap_buf_end;
-
 void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 {
 	/* If the main allocator is up use that, fallback to bootmem. */
@@ -76,18 +73,10 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 /* need to make sure size is all the same during early stage */
 void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
 {
-	void *ptr;
-
-	if (!vmemmap_buf)
-		return vmemmap_alloc_block(size, node);
-
-	/* take the from buf */
-	ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
-	if (ptr + size > vmemmap_buf_end)
-		return vmemmap_alloc_block(size, node);
-
-	vmemmap_buf = ptr + size;
+	void *ptr = sparse_buffer_alloc(size);
 
+	if (!ptr)
+		ptr = vmemmap_alloc_block(size, node);
 	return ptr;
 }
 
@@ -279,19 +268,9 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 					  unsigned long map_count, int nodeid)
 {
 	unsigned long pnum;
-	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
-	void *vmemmap_buf_start;
 	int nr_consumed_maps = 0;
 
-	size = ALIGN(size, PMD_SIZE);
-	vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
-			 PMD_SIZE, __pa(MAX_DMA_ADDRESS));
-
-	if (vmemmap_buf_start) {
-		vmemmap_buf = vmemmap_buf_start;
-		vmemmap_buf_end = vmemmap_buf_start + size * map_count;
-	}
-
+	sparse_buffer_init(section_map_size() * map_count, nodeid);
 	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -303,12 +282,5 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
 		       __func__);
 	}
-
-	if (vmemmap_buf_start) {
-		/* need to free left buf */
-		memblock_free_early(__pa(vmemmap_buf),
-				    vmemmap_buf_end - vmemmap_buf);
-		vmemmap_buf = NULL;
-		vmemmap_buf_end = NULL;
-	}
+	sparse_buffer_fini();
 }
diff --git a/mm/sparse.c b/mm/sparse.c
index 2ea8b3dbd0df..9a0a5f598469 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -400,7 +400,14 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
 	}
 }
 
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+unsigned long __init section_map_size(void)
+
+{
+	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
+}
+
+#else
 struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
 		struct vmem_altmap *altmap)
 {
@@ -457,6 +464,42 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 }
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
+static void *sparsemap_buf __meminitdata;
+static void *sparsemap_buf_end __meminitdata;
+
+void __init sparse_buffer_init(unsigned long size, int nid)
+{
+	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
+	sparsemap_buf =
+		memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE,
+						__pa(MAX_DMA_ADDRESS),
+						BOOTMEM_ALLOC_ACCESSIBLE, nid);
+	sparsemap_buf_end = sparsemap_buf + size;
+}
+
+void __init sparse_buffer_fini(void)
+{
+	unsigned long size = sparsemap_buf_end - sparsemap_buf;
+
+	if (sparsemap_buf && size > 0)
+		memblock_free_early(__pa(sparsemap_buf), size);
+	sparsemap_buf = NULL;
+}
+
+void * __meminit sparse_buffer_alloc(unsigned long size)
+{
+	void *ptr = NULL;
+
+	if (sparsemap_buf) {
+		ptr = PTR_ALIGN(sparsemap_buf, size);
+		if (ptr + size > sparsemap_buf_end)
+			ptr = NULL;
+		else
+			sparsemap_buf = ptr + size;
+	}
+	return ptr;
+}
+
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
 static void __init sparse_early_mem_maps_alloc_node(void *data,
 				 unsigned long pnum_begin,
-- 
cgit v1.2.3


From afda57bc13410459fc957e93341ade7bebca36e2 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Fri, 17 Aug 2018 15:49:30 -0700
Subject: mm/sparse: move buffer init/fini to the common place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that both variants of sparse memory use the same buffers to populate
memory map, we can move sparse_buffer_init()/sparse_buffer_fini() to the
common place.

Link: http://lkml.kernel.org/r/20180712203730.8703-4-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Tested-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  3 ---
 mm/sparse-vmemmap.c |  2 --
 mm/sparse.c         | 14 +++++++-------
 3 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4ace5d50a892..48040510df05 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2671,9 +2671,6 @@ void sparse_mem_maps_populate_node(struct page **map_map,
 				   unsigned long map_count,
 				   int nodeid);
 
-unsigned long __init section_map_size(void);
-void sparse_buffer_init(unsigned long size, int nid);
-void sparse_buffer_fini(void);
 void *sparse_buffer_alloc(unsigned long size);
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
 		struct vmem_altmap *altmap);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index b05c7663c640..cd15f3d252c3 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -270,7 +270,6 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 	unsigned long pnum;
 	int nr_consumed_maps = 0;
 
-	sparse_buffer_init(section_map_size() * map_count, nodeid);
 	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -282,5 +281,4 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
 		       __func__);
 	}
-	sparse_buffer_fini();
 }
diff --git a/mm/sparse.c b/mm/sparse.c
index db4867b62fff..20ca292d8f11 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -401,14 +401,14 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-unsigned long __init section_map_size(void)
+static unsigned long __init section_map_size(void)
 
 {
 	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
 }
 
 #else
-unsigned long __init section_map_size(void)
+static unsigned long __init section_map_size(void)
 {
 	return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
 }
@@ -433,10 +433,8 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 					  unsigned long map_count, int nodeid)
 {
 	unsigned long pnum;
-	unsigned long size = section_map_size();
 	int nr_consumed_maps = 0;
 
-	sparse_buffer_init(size * map_count, nodeid);
 	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
 		if (!present_section_nr(pnum))
 			continue;
@@ -447,14 +445,13 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
 		       __func__);
 	}
-	sparse_buffer_fini();
 }
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 static void *sparsemap_buf __meminitdata;
 static void *sparsemap_buf_end __meminitdata;
 
-void __init sparse_buffer_init(unsigned long size, int nid)
+static void __init sparse_buffer_init(unsigned long size, int nid)
 {
 	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
 	sparsemap_buf =
@@ -464,7 +461,7 @@ void __init sparse_buffer_init(unsigned long size, int nid)
 	sparsemap_buf_end = sparsemap_buf + size;
 }
 
-void __init sparse_buffer_fini(void)
+static void __init sparse_buffer_fini(void)
 {
 	unsigned long size = sparsemap_buf_end - sparsemap_buf;
 
@@ -494,8 +491,11 @@ static void __init sparse_early_mem_maps_alloc_node(void *data,
 				 unsigned long map_count, int nodeid)
 {
 	struct page **map_map = (struct page **)data;
+
+	sparse_buffer_init(section_map_size() * map_count, nodeid);
 	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
 					 map_count, nodeid);
+	sparse_buffer_fini();
 }
 #else
 static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
-- 
cgit v1.2.3


From 2a3cb8baef71e4dad4a6ec17f5f0db9e05f46a01 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Fri, 17 Aug 2018 15:49:37 -0700
Subject: mm/sparse: delete old sparse_init and enable new one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename new_sparse_init() to sparse_init() which enables it.  Delete old
sparse_init() and all the code that became obsolete with.

[pasha.tatashin@oracle.com: remove unused sparse_mem_maps_populate_node()]
  Link: http://lkml.kernel.org/r/20180716174447.14529-6-pasha.tatashin@oracle.com
Link: http://lkml.kernel.org/r/20180712203730.8703-6-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Tested-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |   6 --
 mm/Kconfig          |   4 -
 mm/sparse-vmemmap.c |  21 -----
 mm/sparse.c         | 237 +---------------------------------------------------
 4 files changed, 1 insertion(+), 267 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 48040510df05..a3cae495f9ce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2665,12 +2665,6 @@ extern int randomize_va_space;
 const char * arch_vma_name(struct vm_area_struct *vma);
 void print_vma_addr(char *prefix, unsigned long rip);
 
-void sparse_mem_maps_populate_node(struct page **map_map,
-				   unsigned long pnum_begin,
-				   unsigned long pnum_end,
-				   unsigned long map_count,
-				   int nodeid);
-
 void *sparse_buffer_alloc(unsigned long size);
 struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
 		struct vmem_altmap *altmap);
diff --git a/mm/Kconfig b/mm/Kconfig
index 08d8399bb93b..adfeae4decb4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -118,10 +118,6 @@ config SPARSEMEM_EXTREME
 config SPARSEMEM_VMEMMAP_ENABLE
 	bool
 
-config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	def_bool y
-	depends on SPARSEMEM && X86_64
-
 config SPARSEMEM_VMEMMAP
 	bool "Sparse Memory virtual memmap"
 	depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index cd15f3d252c3..8301293331a2 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -261,24 +261,3 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid,
 
 	return map;
 }
-
-void __init sparse_mem_maps_populate_node(struct page **map_map,
-					  unsigned long pnum_begin,
-					  unsigned long pnum_end,
-					  unsigned long map_count, int nodeid)
-{
-	unsigned long pnum;
-	int nr_consumed_maps = 0;
-
-	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-		if (!present_section_nr(pnum))
-			continue;
-
-		map_map[nr_consumed_maps] =
-				sparse_mem_map_populate(pnum, nodeid, NULL);
-		if (map_map[nr_consumed_maps++])
-			continue;
-		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
-		       __func__);
-	}
-}
diff --git a/mm/sparse.c b/mm/sparse.c
index 248d5d7bbf55..10b07eea9a6e 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -205,12 +205,6 @@ static inline unsigned long first_present_section_nr(void)
 	return next_present_section_nr(-1);
 }
 
-/*
- * Record how many memory sections are marked as present
- * during system bootup.
- */
-static int __initdata nr_present_sections;
-
 /* Record a memory area against a node. */
 void __init memory_present(int nid, unsigned long start, unsigned long end)
 {
@@ -240,7 +234,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
 			ms->section_mem_map = sparse_encode_early_nid(nid) |
 							SECTION_IS_ONLINE;
 			section_mark_present(ms);
-			nr_present_sections++;
 		}
 	}
 }
@@ -377,37 +370,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-static void __init sparse_early_usemaps_alloc_node(void *data,
-				 unsigned long pnum_begin,
-				 unsigned long pnum_end,
-				 unsigned long usemap_count, int nodeid)
-{
-	void *usemap;
-	unsigned long pnum;
-	unsigned long **usemap_map = (unsigned long **)data;
-	int size = usemap_size();
-	int nr_consumed_maps = 0;
-
-	usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
-							  size * usemap_count);
-	if (!usemap) {
-		pr_warn("%s: allocation failed\n", __func__);
-		return;
-	}
-
-	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-		if (!present_section_nr(pnum))
-			continue;
-		usemap_map[nr_consumed_maps] = usemap;
-		usemap += size;
-		check_usemap_section_nr(nodeid, usemap_map[nr_consumed_maps]);
-		nr_consumed_maps++;
-	}
-}
-
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 static unsigned long __init section_map_size(void)
-
 {
 	return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
 }
@@ -432,25 +396,6 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
 					  BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	return map;
 }
-void __init sparse_mem_maps_populate_node(struct page **map_map,
-					  unsigned long pnum_begin,
-					  unsigned long pnum_end,
-					  unsigned long map_count, int nodeid)
-{
-	unsigned long pnum;
-	int nr_consumed_maps = 0;
-
-	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
-		if (!present_section_nr(pnum))
-			continue;
-		map_map[nr_consumed_maps] =
-				sparse_mem_map_populate(pnum, nodeid, NULL);
-		if (map_map[nr_consumed_maps++])
-			continue;
-		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
-		       __func__);
-	}
-}
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 static void *sparsemap_buf __meminitdata;
@@ -489,190 +434,10 @@ void * __meminit sparse_buffer_alloc(unsigned long size)
 	return ptr;
 }
 
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-static void __init sparse_early_mem_maps_alloc_node(void *data,
-				 unsigned long pnum_begin,
-				 unsigned long pnum_end,
-				 unsigned long map_count, int nodeid)
-{
-	struct page **map_map = (struct page **)data;
-
-	sparse_buffer_init(section_map_size() * map_count, nodeid);
-	sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
-					 map_count, nodeid);
-	sparse_buffer_fini();
-}
-#else
-static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
-{
-	struct page *map;
-	struct mem_section *ms = __nr_to_section(pnum);
-	int nid = sparse_early_nid(ms);
-
-	map = sparse_mem_map_populate(pnum, nid, NULL);
-	if (map)
-		return map;
-
-	pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
-	       __func__);
-	return NULL;
-}
-#endif
-
 void __weak __meminit vmemmap_populate_print_last(void)
 {
 }
 
-/**
- *  alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap
- *  @map: usemap_map for pageblock flags or mmap_map for vmemmap
- *  @unit_size: size of map unit
- */
-static void __init alloc_usemap_and_memmap(void (*alloc_func)
-					(void *, unsigned long, unsigned long,
-					unsigned long, int), void *data,
-					int data_unit_size)
-{
-	unsigned long pnum;
-	unsigned long map_count;
-	int nodeid_begin = 0;
-	unsigned long pnum_begin = 0;
-
-	for_each_present_section_nr(0, pnum) {
-		struct mem_section *ms;
-
-		ms = __nr_to_section(pnum);
-		nodeid_begin = sparse_early_nid(ms);
-		pnum_begin = pnum;
-		break;
-	}
-	map_count = 1;
-	for_each_present_section_nr(pnum_begin + 1, pnum) {
-		struct mem_section *ms;
-		int nodeid;
-
-		ms = __nr_to_section(pnum);
-		nodeid = sparse_early_nid(ms);
-		if (nodeid == nodeid_begin) {
-			map_count++;
-			continue;
-		}
-		/* ok, we need to take cake of from pnum_begin to pnum - 1*/
-		alloc_func(data, pnum_begin, pnum,
-						map_count, nodeid_begin);
-		/* new start, update count etc*/
-		nodeid_begin = nodeid;
-		pnum_begin = pnum;
-		data += map_count * data_unit_size;
-		map_count = 1;
-	}
-	/* ok, last chunk */
-	alloc_func(data, pnum_begin, __highest_present_section_nr+1,
-						map_count, nodeid_begin);
-}
-
-/*
- * Allocate the accumulated non-linear sections, allocate a mem_map
- * for each and record the physical to section mapping.
- */
-void __init sparse_init(void)
-{
-	unsigned long pnum;
-	struct page *map;
-	unsigned long *usemap;
-	unsigned long **usemap_map;
-	int size;
-	int nr_consumed_maps = 0;
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	int size2;
-	struct page **map_map;
-#endif
-
-	/* see include/linux/mmzone.h 'struct mem_section' definition */
-	BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section)));
-
-	/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
-	set_pageblock_order();
-
-	/*
-	 * map is using big page (aka 2M in x86 64 bit)
-	 * usemap is less one page (aka 24 bytes)
-	 * so alloc 2M (with 2M align) and 24 bytes in turn will
-	 * make next 2M slip to one more 2M later.
-	 * then in big system, the memory will have a lot of holes...
-	 * here try to allocate 2M pages continuously.
-	 *
-	 * powerpc need to call sparse_init_one_section right after each
-	 * sparse_early_mem_map_alloc, so allocate usemap_map at first.
-	 */
-	size = sizeof(unsigned long *) * nr_present_sections;
-	usemap_map = memblock_virt_alloc(size, 0);
-	if (!usemap_map)
-		panic("can not allocate usemap_map\n");
-	alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
-				(void *)usemap_map,
-				sizeof(usemap_map[0]));
-
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	size2 = sizeof(struct page *) * nr_present_sections;
-	map_map = memblock_virt_alloc(size2, 0);
-	if (!map_map)
-		panic("can not allocate map_map\n");
-	alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node,
-				(void *)map_map,
-				sizeof(map_map[0]));
-#endif
-
-	/*
-	 * The number of present sections stored in nr_present_sections
-	 * are kept the same since mem sections are marked as present in
-	 * memory_present(). In this for loop, we need check which sections
-	 * failed to allocate memmap or usemap, then clear its
-	 * ->section_mem_map accordingly. During this process, we need
-	 * increase 'nr_consumed_maps' whether its allocation of memmap
-	 * or usemap failed or not, so that after we handle the i-th
-	 * memory section, can get memmap and usemap of (i+1)-th section
-	 * correctly.
-	 */
-	for_each_present_section_nr(0, pnum) {
-		struct mem_section *ms;
-
-		if (nr_consumed_maps >= nr_present_sections) {
-			pr_err("nr_consumed_maps goes beyond nr_present_sections\n");
-			break;
-		}
-		ms = __nr_to_section(pnum);
-		usemap = usemap_map[nr_consumed_maps];
-		if (!usemap) {
-			ms->section_mem_map = 0;
-			nr_consumed_maps++;
-			continue;
-		}
-
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-		map = map_map[nr_consumed_maps];
-#else
-		map = sparse_early_mem_map_alloc(pnum);
-#endif
-		if (!map) {
-			ms->section_mem_map = 0;
-			nr_consumed_maps++;
-			continue;
-		}
-
-		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
-								usemap);
-		nr_consumed_maps++;
-	}
-
-	vmemmap_populate_print_last();
-
-#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-	memblock_free_early(__pa(map_map), size2);
-#endif
-	memblock_free_early(__pa(usemap_map), size);
-}
-
 /*
  * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
  * And number of present sections in this node is map_count.
@@ -726,7 +491,7 @@ failed:
  * Allocate the accumulated non-linear sections, allocate a mem_map
  * for each and record the physical to section mapping.
  */
-void __init new_sparse_init(void)
+void __init sparse_init(void)
 {
 	unsigned long pnum_begin = first_present_section_nr();
 	int nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
-- 
cgit v1.2.3


From 6b51e88199ca4f75ff647eff28efd30bfcb08dc4 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 17 Aug 2018 15:49:55 -0700
Subject: mm/list_lru: introduce list_lru_shrink_walk_irq()

Provide list_lru_shrink_walk_irq() and let it behave like
list_lru_walk_one() except that it locks the spinlock with
spin_lock_irq().  This is used by scan_shadow_nodes() because its lock
nests within the i_pages lock which is acquired with IRQ.  This change
allows to use proper locking promitives instead hand crafted
lock_irq_disable() plus spin_lock().

There is no EXPORT_SYMBOL provided because the current user is in-kernel
only.

Add list_lru_shrink_walk_irq() which acquires the spinlock with the
proper locking primitives.

Link: http://lkml.kernel.org/r/20180716111921.5365-5-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list_lru.h | 25 +++++++++++++++++++++++++
 mm/list_lru.c            | 15 +++++++++++++++
 mm/workingset.c          |  8 ++------
 3 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index d9c16f2f2f00..aa5efd9351eb 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -166,6 +166,23 @@ unsigned long list_lru_walk_one(struct list_lru *lru,
 				int nid, struct mem_cgroup *memcg,
 				list_lru_walk_cb isolate, void *cb_arg,
 				unsigned long *nr_to_walk);
+/**
+ * list_lru_walk_one_irq: walk a list_lru, isolating and disposing freeable items.
+ * @lru: the lru pointer.
+ * @nid: the node id to scan from.
+ * @memcg: the cgroup to scan from.
+ * @isolate: callback function that is resposible for deciding what to do with
+ *  the item currently being scanned
+ * @cb_arg: opaque type that will be passed to @isolate
+ * @nr_to_walk: how many items to scan.
+ *
+ * Same as @list_lru_walk_one except that the spinlock is acquired with
+ * spin_lock_irq().
+ */
+unsigned long list_lru_walk_one_irq(struct list_lru *lru,
+				    int nid, struct mem_cgroup *memcg,
+				    list_lru_walk_cb isolate, void *cb_arg,
+				    unsigned long *nr_to_walk);
 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
 				 list_lru_walk_cb isolate, void *cb_arg,
 				 unsigned long *nr_to_walk);
@@ -178,6 +195,14 @@ list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
 				 &sc->nr_to_scan);
 }
 
+static inline unsigned long
+list_lru_shrink_walk_irq(struct list_lru *lru, struct shrink_control *sc,
+			 list_lru_walk_cb isolate, void *cb_arg)
+{
+	return list_lru_walk_one_irq(lru, sc->nid, sc->memcg, isolate, cb_arg,
+				     &sc->nr_to_scan);
+}
+
 static inline unsigned long
 list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
 	      void *cb_arg, unsigned long nr_to_walk)
diff --git a/mm/list_lru.c b/mm/list_lru.c
index f5c6a2d1ea66..5b30625fd365 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -282,6 +282,21 @@ list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 }
 EXPORT_SYMBOL_GPL(list_lru_walk_one);
 
+unsigned long
+list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
+		      list_lru_walk_cb isolate, void *cb_arg,
+		      unsigned long *nr_to_walk)
+{
+	struct list_lru_node *nlru = &lru->node[nid];
+	unsigned long ret;
+
+	spin_lock_irq(&nlru->lock);
+	ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
+				  nr_to_walk);
+	spin_unlock_irq(&nlru->lock);
+	return ret;
+}
+
 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
 				 list_lru_walk_cb isolate, void *cb_arg,
 				 unsigned long *nr_to_walk)
diff --git a/mm/workingset.c b/mm/workingset.c
index bc72ad029b3e..4516dd790129 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -483,13 +483,9 @@ out:
 static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
 				       struct shrink_control *sc)
 {
-	unsigned long ret;
-
 	/* list_lru lock nests inside the IRQ-safe i_pages lock */
-	local_irq_disable();
-	ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
-	local_irq_enable();
-	return ret;
+	return list_lru_shrink_walk_irq(&shadow_nodes, sc, shadow_lru_isolate,
+					NULL);
 }
 
 static struct shrinker workingset_shadow_shrinker = {
-- 
cgit v1.2.3


From ddbf369c0a33924f76d092985bd20d9310f43d7f Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 17 Aug 2018 15:49:58 -0700
Subject: mm, vmacache: hash addresses based on pmd

When perf profiling a wide variety of different workloads, it was found
that vmacache_find() had higher than expected cost: up to 0.08% of cpu
utilization in some cases.  This was found to rival other core VM
functions such as alloc_pages_vma() with thp enabled and default
mempolicy, and the conditionals in __get_vma_policy().

VMACACHE_HASH() determines which of the four per-task_struct slots a vma
is cached for a particular address.  This currently depends on the pfn,
so pfn 5212 occupies a different vmacache slot than its neighboring pfn
5213.

vmacache_find() iterates through all four of current's vmacache slots
when looking up an address.  Hashing based on pfn, an address has
~1/VMACACHE_SIZE chance of being cached in the first vmacache slot, or
about 25%, *if* the vma is cached.

This patch hashes an address by its pmd instead of pte to optimize for
workloads with good spatial locality.  This results in a higher
probability of vmas being cached in the first slot that is checked:
normally ~70% on the same workloads instead of 25%.

[rientjes@google.com: various updates]
  Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807231532290.109445@chino.kir.corp.google.com
Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807091749150.114630@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmacache.h |  6 ------
 mm/vmacache.c            | 38 +++++++++++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
index a5b3aa8d281f..3e9a963edd6a 100644
--- a/include/linux/vmacache.h
+++ b/include/linux/vmacache.h
@@ -5,12 +5,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 
-/*
- * Hash based on the page number. Provides a good hit rate for
- * workloads with good locality and those with random accesses as well.
- */
-#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
-
 static inline void vmacache_flush(struct task_struct *tsk)
 {
 	memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
diff --git a/mm/vmacache.c b/mm/vmacache.c
index db7596eb6132..ea517bef7dc5 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -6,6 +6,18 @@
 #include <linux/sched/task.h>
 #include <linux/mm.h>
 #include <linux/vmacache.h>
+#include <asm/pgtable.h>
+
+/*
+ * Hash based on the pmd of addr if configured with MMU, which provides a good
+ * hit rate for workloads with spatial locality.  Otherwise, use pages.
+ */
+#ifdef CONFIG_MMU
+#define VMACACHE_SHIFT	PMD_SHIFT
+#else
+#define VMACACHE_SHIFT	PAGE_SHIFT
+#endif
+#define VMACACHE_HASH(addr) ((addr >> VMACACHE_SHIFT) & VMACACHE_MASK)
 
 /*
  * Flush vma caches for threads that share a given mm.
@@ -87,6 +99,7 @@ static bool vmacache_valid(struct mm_struct *mm)
 
 struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 {
+	int idx = VMACACHE_HASH(addr);
 	int i;
 
 	count_vm_vmacache_event(VMACACHE_FIND_CALLS);
@@ -95,16 +108,20 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 		return NULL;
 
 	for (i = 0; i < VMACACHE_SIZE; i++) {
-		struct vm_area_struct *vma = current->vmacache.vmas[i];
+		struct vm_area_struct *vma = current->vmacache.vmas[idx];
 
-		if (!vma)
-			continue;
-		if (WARN_ON_ONCE(vma->vm_mm != mm))
-			break;
-		if (vma->vm_start <= addr && vma->vm_end > addr) {
-			count_vm_vmacache_event(VMACACHE_FIND_HITS);
-			return vma;
+		if (vma) {
+#ifdef CONFIG_DEBUG_VM_VMACACHE
+			if (WARN_ON_ONCE(vma->vm_mm != mm))
+				break;
+#endif
+			if (vma->vm_start <= addr && vma->vm_end > addr) {
+				count_vm_vmacache_event(VMACACHE_FIND_HITS);
+				return vma;
+			}
 		}
+		if (++idx == VMACACHE_SIZE)
+			idx = 0;
 	}
 
 	return NULL;
@@ -115,6 +132,7 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 					   unsigned long start,
 					   unsigned long end)
 {
+	int idx = VMACACHE_HASH(start);
 	int i;
 
 	count_vm_vmacache_event(VMACACHE_FIND_CALLS);
@@ -123,12 +141,14 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 		return NULL;
 
 	for (i = 0; i < VMACACHE_SIZE; i++) {
-		struct vm_area_struct *vma = current->vmacache.vmas[i];
+		struct vm_area_struct *vma = current->vmacache.vmas[idx];
 
 		if (vma && vma->vm_start == start && vma->vm_end == end) {
 			count_vm_vmacache_event(VMACACHE_FIND_HITS);
 			return vma;
 		}
+		if (++idx == VMACACHE_SIZE)
+			idx = 0;
 	}
 
 	return NULL;
-- 
cgit v1.2.3


From 771c035372a036f83353eef46dbb829780330234 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 18 Aug 2018 12:19:56 -0700
Subject: deprecate the '__deprecated' attribute warnings entirely and for good

We haven't had lots of deprecation warnings lately, but the rdma use of
it made them flare up again.

They are not useful.  They annoy everybody, and nobody ever does
anything about them, because it's always "somebody elses problem".  And
when people start thinking that warnings are normal, they stop looking
at them, and the real warnings that mean something go unnoticed.

If you want to get rid of a function, just get rid of it.  Convert every
user to the new world order.

And if you can't do that, then don't annoy everybody else with your
marking that says "I couldn't be bothered to fix this, so I'll just spam
everybody elses build logs with warnings about my laziness".

Make a kernelnewbies wiki page about things that could be cleaned up,
write a blog post about it, or talk to people on the mailing lists.  But
don't add warnings to the kernel build about cleanup that you think
should happen but you aren't doing yourself.

Don't.  Just don't.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h   |  1 -
 include/linux/compiler_types.h | 21 ++-------------------
 lib/Kconfig.debug              |  8 --------
 3 files changed, 2 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 573f5a7d42d4..3e70b7d4e9ed 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -103,7 +103,6 @@
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 
-#define __deprecated	__attribute__((deprecated))
 #define __packed	__attribute__((packed))
 #define __weak		__attribute__((weak))
 #define __alias(symbol)	__attribute__((alias(#symbol)))
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index a8ba6b04152c..fbf337933fd8 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -111,21 +111,10 @@ struct ftrace_likely_data {
 #endif /* __ASSEMBLY__ */
 
 #ifdef __KERNEL__
-/*
- * Allow us to mark functions as 'deprecated' and have gcc emit a nice
- * warning for each use, in hopes of speeding the functions removal.
- * Usage is:
- * 		int __deprecated foo(void)
- */
-#ifndef __deprecated
-# define __deprecated		/* unimplemented */
-#endif
 
-#ifdef MODULE
-#define __deprecated_for_modules __deprecated
-#else
+/* Don't. Just don't. */
+#define __deprecated
 #define __deprecated_for_modules
-#endif
 
 #ifndef __must_check
 #define __must_check
@@ -135,12 +124,6 @@ struct ftrace_likely_data {
 #undef __must_check
 #define __must_check
 #endif
-#ifndef CONFIG_ENABLE_WARN_DEPRECATED
-#undef __deprecated
-#undef __deprecated_for_modules
-#define __deprecated
-#define __deprecated_for_modules
-#endif
 
 #ifndef __malloc
 #define __malloc
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c6e73904c5a5..ab1b599202bc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -211,14 +211,6 @@ config GDB_SCRIPTS
 	  instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
 	  for further details.
 
-config ENABLE_WARN_DEPRECATED
-	bool "Enable __deprecated logic"
-	default y
-	help
-	  Enable the __deprecated logic in the kernel build.
-	  Disable this to suppress the "warning: 'foo' is deprecated
-	  (declared at kernel/power/somefile.c:1234)" messages.
-
 config ENABLE_MUST_CHECK
 	bool "Enable __must_check logic"
 	default y
-- 
cgit v1.2.3


From 307797159ac25fe5a2048bf5c6a5718298edca57 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 18 Aug 2018 12:30:42 -0700
Subject: pcmcia: remove long deprecated pcmcia_request_exclusive_irq()
 function

This function was created as a deprecated fallback case back in 2010 by
commit eb14120f743d ("pcmcia: re-work pcmcia_request_irq()") for legacy
cases.

Actual in-kernel users haven't been around for a long while.  The last
in-kernel user was apparently removed four years ago by commit
5f5316fcd08e ("am2150: Update nmclan_cs.c to use update PCMCIA API").

Just remove it entirely.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/pcmcia/driver-changes.txt |  3 ---
 drivers/pcmcia/pcmcia_resource.c        | 36 ---------------------------------
 include/pcmcia/ds.h                     | 10 ---------
 3 files changed, 49 deletions(-)

(limited to 'include')

diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
index dd04361dd361..78355c4c268a 100644
--- a/Documentation/pcmcia/driver-changes.txt
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -46,9 +46,6 @@ This file details changes in 2.6 which affect PCMCIA card driver authors:
    - use pcmcia_request_irq(p_dev, handler_t); the PCMCIA core will
      clean up automatically on calls to pcmcia_disable_device() or
      device ejection.
-   - drivers still not capable of IRQF_SHARED (or not telling us so) may
-     use the deprecated pcmcia_request_exclusive_irq() for the time
-     being; they might receive a shared IRQ nonetheless.
 
 * no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33)
    Instead of the cs_error() callback or the CS_CHECK() macro, please use
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 34aad895a239..18802096148e 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -712,42 +712,6 @@ int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev,
 EXPORT_SYMBOL(pcmcia_request_irq);
 
 
-/**
- * pcmcia_request_exclusive_irq() - attempt to request an exclusive IRQ first
- * @p_dev: the associated PCMCIA device
- * @handler: IRQ handler to register
- *
- * pcmcia_request_exclusive_irq() is a wrapper around request_irq() which
- * attempts first to request an exclusive IRQ. If it fails, it also accepts
- * a shared IRQ, but prints out a warning. PCMCIA drivers should allow for
- * IRQ sharing and either use request_irq directly (then they need to call
- * free_irq() themselves, too), or the pcmcia_request_irq() function.
- */
-int __must_check
-__pcmcia_request_exclusive_irq(struct pcmcia_device *p_dev,
-			irq_handler_t handler)
-{
-	int ret;
-
-	if (!p_dev->irq)
-		return -EINVAL;
-
-	ret = request_irq(p_dev->irq, handler, 0, p_dev->devname, p_dev->priv);
-	if (ret) {
-		ret = pcmcia_request_irq(p_dev, handler);
-		dev_warn(&p_dev->dev, "pcmcia: request for exclusive IRQ could not be fulfilled\n");
-		dev_warn(&p_dev->dev, "pcmcia: the driver needs updating to supported shared IRQ lines\n");
-	}
-	if (ret)
-		dev_info(&p_dev->dev, "request_irq() failed\n");
-	else
-		p_dev->_irq = 1;
-
-	return ret;
-} /* pcmcia_request_exclusive_irq */
-EXPORT_SYMBOL(__pcmcia_request_exclusive_irq);
-
-
 #ifdef CONFIG_PCMCIA_PROBE
 
 /* mask of IRQs already reserved by other cards, we should avoid using them */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 2d56e428506c..3037157855f0 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -206,16 +206,6 @@ int pcmcia_write_config_byte(struct pcmcia_device *p_dev, off_t where, u8 val);
 /* device configuration */
 int pcmcia_request_io(struct pcmcia_device *p_dev);
 
-int __must_check
-__pcmcia_request_exclusive_irq(struct pcmcia_device *p_dev,
-				irq_handler_t handler);
-static inline __must_check __deprecated int
-pcmcia_request_exclusive_irq(struct pcmcia_device *p_dev,
-				irq_handler_t handler)
-{
-	return __pcmcia_request_exclusive_irq(p_dev, handler);
-}
-
 int __must_check pcmcia_request_irq(struct pcmcia_device *p_dev,
 				irq_handler_t handler);
 
-- 
cgit v1.2.3


From 284ce4011ba60d6c487b668eea729b6294930806 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 13 Jul 2018 21:50:32 -0700
Subject: x86/memory_failure: Introduce {set, clear}_mce_nospec()

Currently memory_failure() returns zero if the error was handled. On
that result mce_unmap_kpfn() is called to zap the page out of the kernel
linear mapping to prevent speculative fetches of potentially poisoned
memory. However, in the case of dax mapped devmap pages the page may be
in active permanent use by the device driver, so it cannot be unmapped
from the kernel.

Instead of marking the page not present, marking the page UC should
be sufficient for preventing poison from being pre-fetched into the
cache. Convert mce_unmap_pfn() to set_mce_nospec() remapping the page as
UC, to hide it from speculative accesses.

Given that that persistent memory errors can be cleared by the driver,
include a facility to restore the page to cacheable operation,
clear_mce_nospec().

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: <linux-edac@vger.kernel.org>
Cc: <x86@kernel.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 arch/x86/include/asm/set_memory.h         | 42 +++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 -----------
 arch/x86/kernel/cpu/mcheck/mce.c          | 38 +++-------------------------
 include/linux/set_memory.h                | 14 +++++++++++
 4 files changed, 59 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index bd090367236c..cf5e9124b45e 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -88,4 +88,46 @@ extern int kernel_set_to_readonly;
 void set_kernel_text_rw(void);
 void set_kernel_text_ro(void);
 
+#ifdef CONFIG_X86_64
+static inline int set_mce_nospec(unsigned long pfn)
+{
+	unsigned long decoy_addr;
+	int rc;
+
+	/*
+	 * Mark the linear address as UC to make sure we don't log more
+	 * errors because of speculative access to the page.
+	 * We would like to just call:
+	 *      set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1);
+	 * but doing that would radically increase the odds of a
+	 * speculative access to the poison page because we'd have
+	 * the virtual address of the kernel 1:1 mapping sitting
+	 * around in registers.
+	 * Instead we get tricky.  We create a non-canonical address
+	 * that looks just like the one we want, but has bit 63 flipped.
+	 * This relies on set_memory_uc() properly sanitizing any __pa()
+	 * results with __PHYSICAL_MASK or PTE_PFN_MASK.
+	 */
+	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
+
+	rc = set_memory_uc(decoy_addr, 1);
+	if (rc)
+		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
+	return rc;
+}
+#define set_mce_nospec set_mce_nospec
+
+/* Restore full speculative operation to the pfn. */
+static inline int clear_mce_nospec(unsigned long pfn)
+{
+	return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
+}
+#define clear_mce_nospec clear_mce_nospec
+#else
+/*
+ * Few people would run a 32-bit kernel on a machine that supports
+ * recoverable errors because they have too much memory to boot 32-bit.
+ */
+#endif
+
 #endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 374d1aa66952..ceb67cd5918f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,21 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb)	{ }
 static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 #endif
 
-#ifndef CONFIG_X86_64
-/*
- * On 32-bit systems it would be difficult to safely unmap a poison page
- * from the kernel 1:1 map because there are no non-canonical addresses that
- * we can use to refer to the address without risking a speculative access.
- * However, this isn't much of an issue because:
- * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
- *    are only mapped into the kernel as needed
- * 2) Few people would run a 32-bit kernel on a machine that supports
- *    recoverable errors because they have too much memory to boot 32-bit.
- */
-static inline void mce_unmap_kpfn(unsigned long pfn) {}
-#define mce_unmap_kpfn mce_unmap_kpfn
-#endif
-
 struct mca_config {
 	bool dont_log_ce;
 	bool cmci_disabled;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c102ad51025e..42a061ce1f5d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -42,6 +42,7 @@
 #include <linux/irq_work.h>
 #include <linux/export.h>
 #include <linux/jump_label.h>
+#include <linux/set_memory.h>
 
 #include <asm/intel-family.h>
 #include <asm/processor.h>
@@ -50,7 +51,6 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/reboot.h>
-#include <asm/set_memory.h>
 
 #include "mce-internal.h"
 
@@ -108,10 +108,6 @@ static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
 
-#ifndef mce_unmap_kpfn
-static void mce_unmap_kpfn(unsigned long pfn);
-#endif
-
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -602,7 +598,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
 	if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
 		pfn = mce->addr >> PAGE_SHIFT;
 		if (!memory_failure(pfn, 0))
-			mce_unmap_kpfn(pfn);
+			set_mce_nospec(pfn);
 	}
 
 	return NOTIFY_OK;
@@ -1072,38 +1068,10 @@ static int do_memory_failure(struct mce *m)
 	if (ret)
 		pr_err("Memory error not recovered");
 	else
-		mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
+		set_mce_nospec(m->addr >> PAGE_SHIFT);
 	return ret;
 }
 
-#ifndef mce_unmap_kpfn
-static void mce_unmap_kpfn(unsigned long pfn)
-{
-	unsigned long decoy_addr;
-
-	/*
-	 * Unmap this page from the kernel 1:1 mappings to make sure
-	 * we don't log more errors because of speculative access to
-	 * the page.
-	 * We would like to just call:
-	 *	set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
-	 * but doing that would radically increase the odds of a
-	 * speculative access to the poison page because we'd have
-	 * the virtual address of the kernel 1:1 mapping sitting
-	 * around in registers.
-	 * Instead we get tricky.  We create a non-canonical address
-	 * that looks just like the one we want, but has bit 63 flipped.
-	 * This relies on set_memory_np() not checking whether we passed
-	 * a legal address.
-	 */
-
-	decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-
-	if (set_memory_np(decoy_addr, 1))
-		pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-}
-#endif
-
 /*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index da5178216da5..2a986d282a97 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,6 +17,20 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+#ifndef set_mce_nospec
+static inline int set_mce_nospec(unsigned long pfn)
+{
+	return 0;
+}
+#endif
+
+#ifndef clear_mce_nospec
+static inline int clear_mce_nospec(unsigned long pfn)
+{
+	return 0;
+}
+#endif
+
 #ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
 static inline int set_memory_encrypted(unsigned long addr, int numpages)
 {
-- 
cgit v1.2.3


From cafa0010cd51fb711fdcb50fc55f394c5f167a0a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 20 Aug 2018 13:15:26 -0700
Subject: Raise the minimum required gcc version to 4.6

Various architectures fail to build properly with older versions of the
gcc compiler.

An example from Guenter Roeck in thread [1]:
>
>   In file included from ./include/linux/mm.h:17:0,
>                    from ./include/linux/pid_namespace.h:7,
>                    from ./include/linux/ptrace.h:10,
>                    from arch/openrisc/kernel/asm-offsets.c:32:
>   ./include/linux/mm_types.h:497:16: error: flexible array member in otherwise empty struct
>
> This is just an example with gcc 4.5.1 for or32. I have seen the problem
> with gcc 4.4 (for unicore32) as well.

So update the minimum required version of gcc to 4.6.

[1] https://lore.kernel.org/lkml/20180814170904.GA12768@roeck-us.net/

Miscellanea:

 - Update Documentation/process/changes.rst

 - Remove and consolidate version test blocks in compiler-gcc.h for
   versions lower than 4.6

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/process/changes.rst |  2 +-
 include/linux/compiler-gcc.h      | 86 ++++++++-------------------------------
 2 files changed, 19 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 7a92a06f90de..61f918b10a0c 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -29,7 +29,7 @@ you probably needn't concern yourself with isdn4k-utils.
 ====================== ===============  ========================================
         Program        Minimal version       Command to check the version
 ====================== ===============  ========================================
-GNU C                  3.2              gcc --version
+GNU C                  4.6              gcc --version
 GNU make               3.81             make --version
 binutils               2.20             ld -v
 flex                   2.5.35           flex --version
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 3e70b7d4e9ed..250b9b7cfd60 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -10,6 +10,10 @@
 		     + __GNUC_MINOR__ * 100	\
 		     + __GNUC_PATCHLEVEL__)
 
+#if GCC_VERSION < 40600
+# error Sorry, your compiler is too old - please upgrade it.
+#endif
+
 /* Optimization barrier */
 
 /* The "volatile" is due to gcc bugs */
@@ -58,6 +62,12 @@
 #define OPTIMIZER_HIDE_VAR(var)						\
 	__asm__ ("" : "=r" (var) : "0" (var))
 
+/*
+ * A trick to suppress uninitialized variable warning without generating any
+ * code
+ */
+#define uninitialized_var(x) x = x
+
 #ifdef __CHECKER__
 #define __must_be_array(a)	0
 #else
@@ -91,7 +101,7 @@
  * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
-    !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
+    !defined(CONFIG_OPTIMIZE_INLINING)
 #define inline \
 	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
@@ -147,47 +157,13 @@
 #define __always_unused		__attribute__((unused))
 #define __mode(x)               __attribute__((mode(x)))
 
-/* gcc version specific checks */
-
-#if GCC_VERSION < 30200
-# error Sorry, your compiler is too old - please upgrade it.
-#endif
-
-#if GCC_VERSION < 30300
-# define __used			__attribute__((__unused__))
-#else
-# define __used			__attribute__((__used__))
-#endif
-
-#ifdef CONFIG_GCOV_KERNEL
-# if GCC_VERSION < 30400
-#   error "GCOV profiling support for gcc versions below 3.4 not included"
-# endif /* __GNUC_MINOR__ */
-#endif /* CONFIG_GCOV_KERNEL */
-
-#if GCC_VERSION >= 30400
 #define __must_check		__attribute__((warn_unused_result))
 #define __malloc		__attribute__((__malloc__))
-#endif
-
-#if GCC_VERSION >= 40000
-
-/* GCC 4.1.[01] miscompiles __weak */
-#ifdef __KERNEL__
-# if GCC_VERSION >= 40100 &&  GCC_VERSION <= 40101
-#  error Your version of gcc miscompiles the __weak directive
-# endif
-#endif
 
 #define __used			__attribute__((__used__))
 #define __compiler_offsetof(a, b)					\
 	__builtin_offsetof(a, b)
 
-#if GCC_VERSION >= 40100
-# define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-#endif
-
-#if GCC_VERSION >= 40300
 /* Mark functions as cold. gcc will assume any path leading to a call
  * to them will be unlikely.  This means a lot of manual unlikely()s
  * are unnecessary now for any paths leading to the usual suspects
@@ -206,24 +182,19 @@
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#ifndef __CHECKER__
-# define __compiletime_warning(message) __attribute__((warning(message)))
-# define __compiletime_error(message) __attribute__((error(message)))
-#endif /* __CHECKER__ */
-#endif /* GCC_VERSION >= 40300 */
-
-#if GCC_VERSION >= 40400
 #define __optimize(level)	__attribute__((__optimize__(level)))
 #define __nostackprotector	__optimize("no-stack-protector")
-#endif /* GCC_VERSION >= 40400 */
 
-#if GCC_VERSION >= 40500
+#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
 #ifndef __CHECKER__
+#define __compiletime_warning(message) __attribute__((warning(message)))
+#define __compiletime_error(message) __attribute__((error(message)))
+
 #ifdef LATENT_ENTROPY_PLUGIN
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
-#endif
+#endif /* __CHECKER__ */
 
 /*
  * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
@@ -261,10 +232,6 @@
 #define randomized_struct_fields_end	} __randomize_layout;
 #endif
 
-#endif /* GCC_VERSION >= 40500 */
-
-#if GCC_VERSION >= 40600
-
 /*
  * When used with Link Time Optimization, gcc can optimize away C functions or
  * variables which are referenced only from assembly code.  __visible tells the
@@ -273,8 +240,7 @@
  */
 #define __visible	__attribute__((externally_visible))
 
-#endif /* GCC_VERSION >= 40600 */
-
+/* gcc version specific checks */
 
 #if GCC_VERSION >= 40900 && !defined(__CHECKER__)
 /*
@@ -308,10 +274,8 @@
  * folding in __builtin_bswap*() (yet), so don't set these for it.
  */
 #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__)
-#if GCC_VERSION >= 40400
 #define __HAVE_BUILTIN_BSWAP32__
 #define __HAVE_BUILTIN_BSWAP64__
-#endif
 #if GCC_VERSION >= 40800
 #define __HAVE_BUILTIN_BSWAP16__
 #endif
@@ -340,10 +304,9 @@
  * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
  */
 #define __designated_init __attribute__((designated_init))
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
 
-#endif	/* gcc version >= 40000 specific checks */
-
 #if !defined(__noclone)
 #define __noclone	/* not needed */
 #endif
@@ -352,16 +315,6 @@
 #define __no_sanitize_address
 #endif
 
-/*
- * A trick to suppress uninitialized variable warning without generating any
- * code
- */
-#define uninitialized_var(x) x = x
-
-#if GCC_VERSION >= 50100
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
-
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
@@ -374,12 +327,9 @@
 #define __diag_GCC_warn		warning
 #define __diag_GCC_error	error
 
-/* Compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */
-#if GCC_VERSION >= 40600
 #define __diag_str1(s)		#s
 #define __diag_str(s)		__diag_str1(s)
 #define __diag(s)		_Pragma(__diag_str(GCC diagnostic s))
-#endif
 
 #if GCC_VERSION >= 80000
 #define __diag_GCC_8(s)		__diag(s)
-- 
cgit v1.2.3


From 04f264d3a8b0eb25d378127bd78c3c9a0261c828 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Mon, 20 Aug 2018 15:36:17 -0700
Subject: compiler.h: Allow arch-specific asm/compiler.h

We have a need to override the definition of
barrier_before_unreachable() for MIPS, which means we either need to add
architecture-specific code into linux/compiler-gcc.h or we need to allow
the architecture to provide a header that can define the macro before
the generic definition. The latter seems like the better approach.

A straightforward approach to the per-arch header is to make use of
asm-generic to provide a default empty header & adjust architectures
which don't need anything specific to make use of that by adding the
header to generic-y. Unfortunately this doesn't work so well due to
commit 28128c61e08e ("kconfig.h: Include compiler types to avoid missed
struct attributes") which caused linux/compiler_types.h to be included
in the compilation of every C file via the -include linux/kconfig.h flag
in c_flags.

Because the -include flag is present for all C files we compile, we need
the architecture-provided header to be present before any C files are
compiled. If any C files can be compiled prior to the asm-generic header
wrappers being generated then we hit a build failure due to missing
header. Such cases do exist - one pointed out by the kbuild test robot
is the compilation of arch/ia64/kernel/nr-irqs.c, which occurs as part
of the archprepare target [1].

This leaves us with a few options:

  1) Use generic-y & fix any build failures we find by enforcing
     ordering such that the asm-generic target occurs before any C
     compilation, such that linux/compiler_types.h can always include
     the generated asm-generic wrapper which in turn includes the empty
     asm-generic header. This would rely on us finding all the
     problematic cases - I don't know for sure that the ia64 issue is
     the only one.

  2) Add an actual empty header to each architecture, so that we don't
     need the generated asm-generic wrapper. This seems messy.

  3) Give up & add #ifdef CONFIG_MIPS or similar to
     linux/compiler_types.h. This seems messy too.

  4) Include the arch header only when it's actually needed, removing
     the need for the asm-generic wrapper for all other architectures.

This patch allows us to use approach 4, by including an asm/compiler.h
header from linux/compiler_types.h after the inclusion of the
compiler-specific linux/compiler-*.h header(s). We do this
conditionally, only when CONFIG_HAVE_ARCH_COMPILER_H is selected, in
order to avoid the need for asm-generic wrappers & the associated build
ordering issue described above. The asm/compiler.h header is included
after the generic linux/compiler-*.h header(s) for consistency with the
way linux/compiler-intel.h & linux/compiler-clang.h are included after
the linux/compiler-gcc.h header that they override.

[1] https://lists.01.org/pipermail/kbuild-all/2018-August/051175.html

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Patchwork: https://patchwork.linux-mips.org/patch/20269/
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: James Hogan <jhogan@kernel.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-arch@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: linux-mips@linux-mips.org
---
 arch/Kconfig                   |  8 ++++++++
 include/linux/compiler_types.h | 12 ++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index 1aa59063f1fd..5c7c48e7b727 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -971,4 +971,12 @@ config REFCOUNT_FULL
 	  against various use-after-free conditions that can be used in
 	  security flaw exploits.
 
+config HAVE_ARCH_COMPILER_H
+	bool
+	help
+	  An architecture can select this if it provides an
+	  asm/compiler.h header that should be included after
+	  linux/compiler-*.h in order to override macro definitions that those
+	  headers generally provide.
+
 source "kernel/gcov/Kconfig"
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 6b79a9bba9a7..4be464a07612 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -78,6 +78,18 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 #include <linux/compiler-clang.h>
 #endif
 
+/*
+ * Some architectures need to provide custom definitions of macros provided
+ * by linux/compiler-*.h, and can do so using asm/compiler.h. We include that
+ * conditionally rather than using an asm-generic wrapper in order to avoid
+ * build failures if any C compilation, which will include this file via an
+ * -include argument in c_flags, occurs prior to the asm-generic wrappers being
+ * generated.
+ */
+#ifdef CONFIG_HAVE_ARCH_COMPILER_H
+#include <asm/compiler.h>
+#endif
+
 /*
  * Generic compiler-dependent macros required for kernel
  * build go below this comment. Actual compiler/compiler version
-- 
cgit v1.2.3


From 5ade60dda43c8906d4554374226c2eb11cc2ffba Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 20 Mar 2018 17:07:11 -0400
Subject: ida: Add new API

Add ida_alloc(), ida_alloc_min(), ida_alloc_max(), ida_alloc_range()
and ida_free().  The ida_alloc_max() and ida_alloc_range() functions
differ from ida_simple_get() in that they take an inclusive 'max'
parameter instead of an exclusive 'end' parameter.  Callers are about
evenly split whether they'd like inclusive or exclusive parameters and
'max' is easier to document than 'end'.

Change the IDA allocation to first attempt to allocate a bit using
existing memory, and only allocate memory afterwards.  Also change the
behaviour of 'min' > INT_MAX from being a BUG() to returning -ENOSPC.

Leave compatibility wrappers in place for ida_simple_get() and
ida_simple_remove() to avoid changing all callers.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/idr.h | 59 +++++++++++++++++++++++++++++++++++++++++---
 lib/idr.c           | 70 ++++++++++++++++++++++++-----------------------------
 2 files changed, 87 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..cd339da0b1aa 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -230,15 +230,68 @@ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
 void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
 
-int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
-		   gfp_t gfp_mask);
-void ida_simple_remove(struct ida *ida, unsigned int id);
+int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t);
+void ida_free(struct ida *, unsigned int id);
+
+/**
+ * ida_alloc() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between 0 and %INT_MAX, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc(struct ida *ida, gfp_t gfp)
+{
+	return ida_alloc_range(ida, 0, ~0, gfp);
+}
+
+/**
+ * ida_alloc_min() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between @min and %INT_MAX, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
+{
+	return ida_alloc_range(ida, min, ~0, gfp);
+}
+
+/**
+ * ida_alloc_max() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between 0 and @max, inclusive.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp)
+{
+	return ida_alloc_range(ida, 0, max, gfp);
+}
 
 static inline void ida_init(struct ida *ida)
 {
 	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
 }
 
+#define ida_simple_get(ida, start, end, gfp)	\
+			ida_alloc_range(ida, start, (end) - 1, gfp)
+#define ida_simple_remove(ida, id)	ida_free(ida, id)
+
 /**
  * ida_get_new - allocate new ID
  * @ida:	idr handle
diff --git a/lib/idr.c b/lib/idr.c
index 352c6160e2e0..760fce92f1fb 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -317,7 +317,8 @@ EXPORT_SYMBOL(idr_replace);
  * bit per ID, and so is more space efficient than an IDR.  To use an IDA,
  * define it using DEFINE_IDA() (or embed a &struct ida in a data structure,
  * then initialise it using ida_init()).  To allocate a new ID, call
- * ida_simple_get().  To free an ID, call ida_simple_remove().
+ * ida_alloc(), ida_alloc_min(), ida_alloc_max() or ida_alloc_range().
+ * To free an ID, call ida_free().
  *
  * If you have more complex locking requirements, use a loop around
  * ida_pre_get() and ida_get_new() to allocate a new ID.  Then use
@@ -378,7 +379,7 @@ EXPORT_SYMBOL(idr_replace);
  * Allocate new ID above or equal to @start.  It should be called
  * with any required locks to ensure that concurrent calls to
  * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.
- * Consider using ida_simple_get() if you do not have complex locking
+ * Consider using ida_alloc_range() if you do not have complex locking
  * requirements.
  *
  * If memory is required, it will return %-EAGAIN, you should unlock
@@ -551,43 +552,34 @@ void ida_destroy(struct ida *ida)
 EXPORT_SYMBOL(ida_destroy);
 
 /**
- * ida_simple_get - get a new id.
- * @ida: the (initialized) ida.
- * @start: the minimum id (inclusive, < 0x8000000)
- * @end: the maximum id (exclusive, < 0x8000000 or 0)
- * @gfp_mask: memory allocation flags
- *
- * Allocates an id in the range start <= id < end, or returns -ENOSPC.
- * On memory allocation failure, returns -ENOMEM.
+ * ida_alloc_range() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
  *
- * Compared to ida_get_new_above() this function does its own locking, and
- * should be used unless there are special requirements.
+ * Allocate an ID between @min and @max, inclusive.  The allocated ID will
+ * not exceed %INT_MAX, even if @max is larger.
  *
- * Use ida_simple_remove() to get rid of an id.
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
  */
-int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
-		   gfp_t gfp_mask)
+int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
+			gfp_t gfp)
 {
 	int ret, id;
-	unsigned int max;
 	unsigned long flags;
 
-	BUG_ON((int)start < 0);
-	BUG_ON((int)end < 0);
+	if ((int)min < 0)
+		return -ENOSPC;
 
-	if (end == 0)
-		max = 0x80000000;
-	else {
-		BUG_ON(end < start);
-		max = end - 1;
-	}
+	if ((int)max < 0)
+		max = INT_MAX;
 
 again:
-	if (!ida_pre_get(ida, gfp_mask))
-		return -ENOMEM;
-
 	xa_lock_irqsave(&ida->ida_rt, flags);
-	ret = ida_get_new_above(ida, start, &id);
+	ret = ida_get_new_above(ida, min, &id);
 	if (!ret) {
 		if (id > max) {
 			ida_remove(ida, id);
@@ -598,24 +590,24 @@ again:
 	}
 	xa_unlock_irqrestore(&ida->ida_rt, flags);
 
-	if (unlikely(ret == -EAGAIN))
+	if (unlikely(ret == -EAGAIN)) {
+		if (!ida_pre_get(ida, gfp))
+			return -ENOMEM;
 		goto again;
+	}
 
 	return ret;
 }
-EXPORT_SYMBOL(ida_simple_get);
+EXPORT_SYMBOL(ida_alloc_range);
 
 /**
- * ida_simple_remove - remove an allocated id.
- * @ida: the (initialized) ida.
- * @id: the id returned by ida_simple_get.
- *
- * Use to release an id allocated with ida_simple_get().
+ * ida_free() - Release an allocated ID.
+ * @ida: IDA handle.
+ * @id: Previously allocated ID.
  *
- * Compared to ida_remove() this function does its own locking, and should be
- * used unless there are special requirements.
+ * Context: Any context.
  */
-void ida_simple_remove(struct ida *ida, unsigned int id)
+void ida_free(struct ida *ida, unsigned int id)
 {
 	unsigned long flags;
 
@@ -624,4 +616,4 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
 	ida_remove(ida, id);
 	xa_unlock_irqrestore(&ida->ida_rt, flags);
 }
-EXPORT_SYMBOL(ida_simple_remove);
+EXPORT_SYMBOL(ida_free);
-- 
cgit v1.2.3


From b03f8e43c9261878bf29d8cc1c3ba458cc98287e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 18 Jun 2018 19:02:48 -0400
Subject: ida: Remove old API

Delete ida_pre_get(), ida_get_new(), ida_get_new_above() and ida_remove()
from the public API.  Some of these functions still exist as internal
helpers, but they should not be called by consumers.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/idr.h | 21 ++++-----------------
 lib/idr.c           | 48 +++++++-----------------------------------------
 lib/radix-tree.c    |  9 ---------
 3 files changed, 11 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index cd339da0b1aa..2e1db0e36486 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -225,13 +225,9 @@ struct ida {
 }
 #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
-int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
-int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
-void ida_remove(struct ida *ida, int id);
-void ida_destroy(struct ida *ida);
-
 int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t);
 void ida_free(struct ida *, unsigned int id);
+void ida_destroy(struct ida *ida);
 
 /**
  * ida_alloc() - Allocate an unused ID.
@@ -292,20 +288,11 @@ static inline void ida_init(struct ida *ida)
 			ida_alloc_range(ida, start, (end) - 1, gfp)
 #define ida_simple_remove(ida, id)	ida_free(ida, id)
 
-/**
- * ida_get_new - allocate new ID
- * @ida:	idr handle
- * @p_id:	pointer to the allocated handle
- *
- * Simple wrapper around ida_get_new_above() w/ @starting_id of zero.
- */
-static inline int ida_get_new(struct ida *ida, int *p_id)
-{
-	return ida_get_new_above(ida, 0, p_id);
-}
-
 static inline bool ida_is_empty(const struct ida *ida)
 {
 	return radix_tree_empty(&ida->ida_rt);
 }
+
+/* in lib/radix-tree.c */
+int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 #endif /* __IDR_H__ */
diff --git a/lib/idr.c b/lib/idr.c
index 760fce92f1fb..1dc52191acb6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -320,16 +320,9 @@ EXPORT_SYMBOL(idr_replace);
  * ida_alloc(), ida_alloc_min(), ida_alloc_max() or ida_alloc_range().
  * To free an ID, call ida_free().
  *
- * If you have more complex locking requirements, use a loop around
- * ida_pre_get() and ida_get_new() to allocate a new ID.  Then use
- * ida_remove() to free an ID.  You must make sure that ida_get_new() and
- * ida_remove() cannot be called at the same time as each other for the
- * same IDA.
- *
- * You can also use ida_get_new_above() if you need an ID to be allocated
- * above a particular number.  ida_destroy() can be used to dispose of an
- * IDA without needing to free the individual IDs in it.  You can use
- * ida_is_empty() to find out whether the IDA has any IDs currently allocated.
+ * ida_destroy() can be used to dispose of an IDA without needing to
+ * free the individual IDs in it.  You can use ida_is_empty() to find
+ * out whether the IDA has any IDs currently allocated.
  *
  * IDs are currently limited to the range [0-INT_MAX].  If this is an awkward
  * limitation, it should be quite straightforward to raise the maximum.
@@ -370,25 +363,7 @@ EXPORT_SYMBOL(idr_replace);
 
 #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
 
-/**
- * ida_get_new_above - allocate new ID above or equal to a start id
- * @ida: ida handle
- * @start: id to start search at
- * @id: pointer to the allocated handle
- *
- * Allocate new ID above or equal to @start.  It should be called
- * with any required locks to ensure that concurrent calls to
- * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.
- * Consider using ida_alloc_range() if you do not have complex locking
- * requirements.
- *
- * If memory is required, it will return %-EAGAIN, you should unlock
- * and go back to the ida_pre_get() call.  If the ida is full, it will
- * return %-ENOSPC.  On success, it will return 0.
- *
- * @id returns a value in the range @start ... %0x7fffffff.
- */
-int ida_get_new_above(struct ida *ida, int start, int *id)
+static int ida_get_new_above(struct ida *ida, int start, int *id)
 {
 	struct radix_tree_root *root = &ida->ida_rt;
 	void __rcu **slot;
@@ -473,16 +448,8 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 		return 0;
 	}
 }
-EXPORT_SYMBOL(ida_get_new_above);
 
-/**
- * ida_remove - Free the given ID
- * @ida: ida handle
- * @id: ID to free
- *
- * This function should not be called at the same time as ida_get_new_above().
- */
-void ida_remove(struct ida *ida, int id)
+static void ida_remove(struct ida *ida, int id)
 {
 	unsigned long index = id / IDA_BITMAP_BITS;
 	unsigned offset = id % IDA_BITMAP_BITS;
@@ -519,9 +486,8 @@ void ida_remove(struct ida *ida, int id)
 	}
 	return;
  err:
-	WARN(1, "ida_remove called for id=%d which is not allocated.\n", id);
+	WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
 }
-EXPORT_SYMBOL(ida_remove);
 
 /**
  * ida_destroy() - Free all IDs.
@@ -568,7 +534,7 @@ EXPORT_SYMBOL(ida_destroy);
 int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
 			gfp_t gfp)
 {
-	int ret, id;
+	int ret, id = 0;
 	unsigned long flags;
 
 	if ((int)min < 0)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index cc6096b97afb..bc03ecc4dfd2 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2106,14 +2106,6 @@ void idr_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(idr_preload);
 
-/**
- * ida_pre_get - reserve resources for ida allocation
- * @ida: ida handle
- * @gfp: memory allocation flags
- *
- * This function should be called before calling ida_get_new_above().  If it
- * is unable to allocate memory, it will return %0.  On success, it returns %1.
- */
 int ida_pre_get(struct ida *ida, gfp_t gfp)
 {
 	/*
@@ -2134,7 +2126,6 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 
 	return 1;
 }
-EXPORT_SYMBOL(ida_pre_get);
 
 void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
-- 
cgit v1.2.3


From fd991a23c8f6ef30692f77409602ccf3614353b2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 21 Aug 2018 22:33:00 +0200
Subject: y2038: Provide aliases for compat helpers

As part of the system call rework for 64-bit time_t, we are restructuring
the way that compat syscalls deal with 32-bit time_t, reusing the
implementation for 32-bit architectures. Christoph Hellwig suggested a
rename of the associated types and interfaces to avoid the confusing usage
of the 'compat' prefix for 32-bit architectures.

To prepare for doing that in linux-4.20, add a set of macros that allows to
convert subsystems separately to the new names and avoids some of the
nastier merge conflicts.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: y2038@lists.linaro.org
Cc: John Stultz <john.stultz@linaro.org>
Cc: Deepa Dinamani <deepa.kernel@gmail.com>
Link: https://lkml.kernel.org/r/20180821203329.2089473-1-arnd@arndb.de
---
 include/linux/time32.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/time32.h b/include/linux/time32.h
index 0b14f936100a..d1ae43c13e25 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -207,4 +207,19 @@ static inline s64 timeval_to_ns(const struct timeval *tv)
 extern struct timeval ns_to_timeval(const s64 nsec);
 extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec);
 
+/*
+ * New aliases for compat time functions. These will be used to replace
+ * the compat code so it can be shared between 32-bit and 64-bit builds
+ * both of which provide compatibility with old 32-bit tasks.
+ */
+#define old_time32_t		compat_time_t
+#define old_timeval32		compat_timeval
+#define old_timespec32		compat_timespec
+#define old_itimerspec32	compat_itimerspec
+#define ns_to_old_timeval32	ns_to_compat_timeval
+#define get_old_itimerspec32	get_compat_itimerspec64
+#define put_old_itimerspec32	put_compat_itimerspec64
+#define get_old_timespec32	compat_get_timespec64
+#define put_old_timespec32	compat_put_timespec64
+
 #endif
-- 
cgit v1.2.3


From c4df32c80d04987023844c1fb13734a872c8f2e2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 19 Aug 2018 07:34:47 +0900
Subject: export.h: remove VMLINUX_SYMBOL() and VMLINUX_SYMBOL_STR()

With the special case handling for Blackfin and Metag was removed by
commit 94e58e0ac312 ("export.h: remove code for prefixing symbols with
underscore"), VMLINUX_SYMBOL() is no-op.

Replace the remaining usages, then remove the definition of
VMLINUX_SYMBOL() and VMLINUX_SYMBOL_STR().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 certs/system_certificates.S | 16 ++++++++--------
 include/linux/export.h      |  7 -------
 usr/initramfs_data.S        |  4 ++--
 3 files changed, 10 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/certs/system_certificates.S b/certs/system_certificates.S
index 3918ff7235ed..8f29058adf93 100644
--- a/certs/system_certificates.S
+++ b/certs/system_certificates.S
@@ -5,8 +5,8 @@
 	__INITRODATA
 
 	.align 8
-	.globl VMLINUX_SYMBOL(system_certificate_list)
-VMLINUX_SYMBOL(system_certificate_list):
+	.globl system_certificate_list
+system_certificate_list:
 __cert_list_start:
 #ifdef CONFIG_MODULE_SIG
 	.incbin "certs/signing_key.x509"
@@ -15,21 +15,21 @@ __cert_list_start:
 __cert_list_end:
 
 #ifdef CONFIG_SYSTEM_EXTRA_CERTIFICATE
-	.globl VMLINUX_SYMBOL(system_extra_cert)
+	.globl system_extra_cert
 	.size system_extra_cert, CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE
-VMLINUX_SYMBOL(system_extra_cert):
+system_extra_cert:
 	.fill CONFIG_SYSTEM_EXTRA_CERTIFICATE_SIZE, 1, 0
 
 	.align 4
-	.globl VMLINUX_SYMBOL(system_extra_cert_used)
-VMLINUX_SYMBOL(system_extra_cert_used):
+	.globl system_extra_cert_used
+system_extra_cert_used:
 	.int 0
 
 #endif /* CONFIG_SYSTEM_EXTRA_CERTIFICATE */
 
 	.align 8
-	.globl VMLINUX_SYMBOL(system_certificate_list_size)
-VMLINUX_SYMBOL(system_certificate_list_size):
+	.globl system_certificate_list_size
+system_certificate_list_size:
 #ifdef CONFIG_64BIT
 	.quad __cert_list_end - __cert_list_start
 #else
diff --git a/include/linux/export.h b/include/linux/export.h
index b768d6dd3c90..c363bde21bbe 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -10,13 +10,6 @@
  * hackers place grumpy comments in header files.
  */
 
-#define __VMLINUX_SYMBOL(x) x
-#define __VMLINUX_SYMBOL_STR(x) #x
-
-/* Indirect, so macros are expanded before pasting. */
-#define VMLINUX_SYMBOL(x) __VMLINUX_SYMBOL(x)
-#define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
-
 #ifndef __ASSEMBLY__
 struct kernel_symbol
 {
diff --git a/usr/initramfs_data.S b/usr/initramfs_data.S
index b28da799f6a6..d07648f05bbf 100644
--- a/usr/initramfs_data.S
+++ b/usr/initramfs_data.S
@@ -30,8 +30,8 @@ __irf_start:
 .incbin __stringify(INITRAMFS_IMAGE)
 __irf_end:
 .section .init.ramfs.info,"a"
-.globl VMLINUX_SYMBOL(__initramfs_size)
-VMLINUX_SYMBOL(__initramfs_size):
+.globl __initramfs_size
+__initramfs_size:
 #ifdef CONFIG_64BIT
 	.quad __irf_end - __irf_start
 #else
-- 
cgit v1.2.3


From 7953002a7c6561c93defd19c81737012ef5a10dc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 21 Aug 2018 00:06:24 +0900
Subject: vmlinux.lds.h: remove stale <linux/export.h> include

This is unneeded since commit a62143850053 ("vmlinux.lds.h: remove
no-op macro VMLINUX_SYMBOL()").

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 include/asm-generic/vmlinux.lds.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f173b5f30dbe..7b75ff6e2fce 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -54,8 +54,6 @@
 #define LOAD_OFFSET 0
 #endif
 
-#include <linux/export.h>
-
 /* Align . to a 8 byte boundary equals to maximum function alignment. */
 #define ALIGN_FUNCTION()  . = ALIGN(8)
 
-- 
cgit v1.2.3


From 16af2d65842d343c2f95733c3993a0b5baab08f9 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Wed, 22 Aug 2018 21:13:01 +0900
Subject: ata: add an extra argument to ahci_platform_get_resources()

Add an extra argument to ahci_platform_get_resources(), that is
for the bitmap representing the resource to get in this function.

Currently there is no resources to be defined, so all the callers set
'0' to the argument.

Suggested-by: Hans de Goede <hdegoede@redhat.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_brcm.c        | 2 +-
 drivers/ata/ahci_ceva.c        | 2 +-
 drivers/ata/ahci_da850.c       | 2 +-
 drivers/ata/ahci_dm816.c       | 2 +-
 drivers/ata/ahci_imx.c         | 2 +-
 drivers/ata/ahci_mtk.c         | 2 +-
 drivers/ata/ahci_mvebu.c       | 2 +-
 drivers/ata/ahci_platform.c    | 2 +-
 drivers/ata/ahci_qoriq.c       | 2 +-
 drivers/ata/ahci_seattle.c     | 2 +-
 drivers/ata/ahci_st.c          | 2 +-
 drivers/ata/ahci_sunxi.c       | 2 +-
 drivers/ata/ahci_tegra.c       | 2 +-
 drivers/ata/ahci_xgene.c       | 2 +-
 drivers/ata/libahci_platform.c | 4 +++-
 include/linux/ahci_platform.h  | 2 +-
 16 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c
index ea430819c80b..f3d557777d82 100644
--- a/drivers/ata/ahci_brcm.c
+++ b/drivers/ata/ahci_brcm.c
@@ -425,7 +425,7 @@ static int brcm_ahci_probe(struct platform_device *pdev)
 
 	brcm_sata_phys_enable(priv);
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 	hpriv->plat_data = priv;
diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c
index 5ecc9d46cb54..dc78c98cb9f1 100644
--- a/drivers/ata/ahci_ceva.c
+++ b/drivers/ata/ahci_ceva.c
@@ -213,7 +213,7 @@ static int ceva_ahci_probe(struct platform_device *pdev)
 
 	cevapriv->ahci_pdev = pdev;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c
index 9b34dff64536..ebaa657f28c4 100644
--- a/drivers/ata/ahci_da850.c
+++ b/drivers/ata/ahci_da850.c
@@ -171,7 +171,7 @@ static int ahci_da850_probe(struct platform_device *pdev)
 	u32 mpy;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_dm816.c b/drivers/ata/ahci_dm816.c
index fbd827c3a75c..89509c3efb01 100644
--- a/drivers/ata/ahci_dm816.c
+++ b/drivers/ata/ahci_dm816.c
@@ -148,7 +148,7 @@ static int ahci_dm816_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index 6822e2f33f7e..b00799d208f5 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -1127,7 +1127,7 @@ static int imx_ahci_probe(struct platform_device *pdev)
 			return ret;
 	}
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c
index 0ae6971c2a4c..8bc1a26ffc31 100644
--- a/drivers/ata/ahci_mtk.c
+++ b/drivers/ata/ahci_mtk.c
@@ -142,7 +142,7 @@ static int mtk_ahci_probe(struct platform_device *pdev)
 	if (!plat)
 		return -ENOMEM;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 0045dacd814b..adbe38fb9c35 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -158,7 +158,7 @@ static int ahci_mvebu_probe(struct platform_device *pdev)
 	const struct mbus_dram_target_info *dram;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 99f9a895a459..570927316962 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -43,7 +43,7 @@ static int ahci_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index cfdef4d44ae9..ce59253ec158 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -250,7 +250,7 @@ static int ahci_qoriq_probe(struct platform_device *pdev)
 	struct resource *res;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c
index 1d31c0c0fc20..e57b6f92c288 100644
--- a/drivers/ata/ahci_seattle.c
+++ b/drivers/ata/ahci_seattle.c
@@ -164,7 +164,7 @@ static int ahci_seattle_probe(struct platform_device *pdev)
 	int rc;
 	struct ahci_host_priv *hpriv;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index bc345f249555..21c5c44832ef 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -156,7 +156,7 @@ static int st_ahci_probe(struct platform_device *pdev)
 	if (!drv_data)
 		return -ENOMEM;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 	hpriv->plat_data = drv_data;
diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c
index b26437430163..631610b72aa5 100644
--- a/drivers/ata/ahci_sunxi.c
+++ b/drivers/ata/ahci_sunxi.c
@@ -181,7 +181,7 @@ static int ahci_sunxi_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_tegra.c b/drivers/ata/ahci_tegra.c
index 64d848409fe2..004f2608818e 100644
--- a/drivers/ata/ahci_tegra.c
+++ b/drivers/ata/ahci_tegra.c
@@ -494,7 +494,7 @@ static int tegra_ahci_probe(struct platform_device *pdev)
 	int ret;
 	unsigned int i;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index ad58da7c9aff..7e157e1bf65e 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -759,7 +759,7 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 					      &xgene_ahci_v2_port_info };
 	int rc;
 
-	hpriv = ahci_platform_get_resources(pdev);
+	hpriv = ahci_platform_get_resources(pdev, 0);
 	if (IS_ERR(hpriv))
 		return PTR_ERR(hpriv);
 
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 8fbb532b62dd..679f763410c0 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -332,6 +332,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
 /**
  * ahci_platform_get_resources - Get platform resources
  * @pdev: platform device to get resources for
+ * @flags: bitmap representing the resource to get
  *
  * This function allocates an ahci_host_priv struct, and gets the following
  * resources, storing a reference to them inside the returned struct:
@@ -345,7 +346,8 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
  * RETURNS:
  * The allocated ahci_host_priv on success, otherwise an ERR_PTR value
  */
-struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev)
+struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
+						   unsigned int flags)
 {
 	struct device *dev = &pdev->dev;
 	struct ahci_host_priv *hpriv;
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1b0a17b22cd3..6490be1f8a16 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -30,7 +30,7 @@ void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv);
 int ahci_platform_enable_resources(struct ahci_host_priv *hpriv);
 void ahci_platform_disable_resources(struct ahci_host_priv *hpriv);
 struct ahci_host_priv *ahci_platform_get_resources(
-	struct platform_device *pdev);
+	struct platform_device *pdev, unsigned int flags);
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
-- 
cgit v1.2.3


From 9d2ab99573970838108add835442a03e23f8577b Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Wed, 22 Aug 2018 21:13:02 +0900
Subject: ata: libahci_platform: add reset control support

Add support to get and control a list of resets for the device
as optional and shared. These resets must be kept de-asserted until
the device is enabled.

This is specified as shared because some SoCs like UniPhier series
have common reset controls with all ahci controller instances.

However, according to Thierry's view,
https://www.spinics.net/lists/linux-ide/msg55357.html
some hardware-specific drivers already use their own resets,
and the common reset make a path to occur double controls of resets.

The ahci_platform_get_resources() can get and control the reset
only when the second argument includes AHCI_PLATFORM_GET_RESETS bit.

Suggested-by: Hans de Goede <hdegoede@redhat.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 .../devicetree/bindings/ata/ahci-platform.txt      |  1 +
 drivers/ata/ahci.h                                 |  1 +
 drivers/ata/libahci_platform.c                     | 31 ++++++++++++++++++----
 include/linux/ahci_platform.h                      |  2 ++
 4 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index c760ecb81381..f4006d3c9fdf 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -30,6 +30,7 @@ compatible:
 Optional properties:
 - dma-coherent      : Present if dma operations are coherent
 - clocks            : a list of phandle + clock specifier pairs
+- resets            : a list of phandle + reset specifier pairs
 - target-supply     : regulator for SATA target power
 - phys              : reference to the SATA PHY node
 - phy-names         : must be "sata-phy"
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 1609ebab4e23..6a1515f0da40 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -350,6 +350,7 @@ struct ahci_host_priv {
 	u32			em_msg_type;	/* EM message type */
 	bool			got_runtime_pm; /* Did we do pm_runtime_get? */
 	struct clk		*clks[AHCI_MAX_CLKS]; /* Optional */
+	struct reset_control	*rsts;		/* Optional */
 	struct regulator	**target_pwrs;	/* Optional */
 	/*
 	 * If platform uses PHYs. There is a 1:1 relation between the port number and
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 679f763410c0..c92c10d55374 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -25,6 +25,7 @@
 #include <linux/phy/phy.h>
 #include <linux/pm_runtime.h>
 #include <linux/of_platform.h>
+#include <linux/reset.h>
 #include "ahci.h"
 
 static void ahci_host_stop(struct ata_host *host);
@@ -195,7 +196,8 @@ EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators);
  * following order:
  * 1) Regulator
  * 2) Clocks (through ahci_platform_enable_clks)
- * 3) Phys
+ * 3) Resets
+ * 4) Phys
  *
  * If resource enabling fails at any point the previous enabled resources
  * are disabled in reverse order.
@@ -215,12 +217,19 @@ int ahci_platform_enable_resources(struct ahci_host_priv *hpriv)
 	if (rc)
 		goto disable_regulator;
 
-	rc = ahci_platform_enable_phys(hpriv);
+	rc = reset_control_deassert(hpriv->rsts);
 	if (rc)
 		goto disable_clks;
 
+	rc = ahci_platform_enable_phys(hpriv);
+	if (rc)
+		goto disable_resets;
+
 	return 0;
 
+disable_resets:
+	reset_control_assert(hpriv->rsts);
+
 disable_clks:
 	ahci_platform_disable_clks(hpriv);
 
@@ -238,13 +247,16 @@ EXPORT_SYMBOL_GPL(ahci_platform_enable_resources);
  * This function disables all ahci_platform managed resources in the
  * following order:
  * 1) Phys
- * 2) Clocks (through ahci_platform_disable_clks)
- * 3) Regulator
+ * 2) Resets
+ * 3) Clocks (through ahci_platform_disable_clks)
+ * 4) Regulator
  */
 void ahci_platform_disable_resources(struct ahci_host_priv *hpriv)
 {
 	ahci_platform_disable_phys(hpriv);
 
+	reset_control_assert(hpriv->rsts);
+
 	ahci_platform_disable_clks(hpriv);
 
 	ahci_platform_disable_regulators(hpriv);
@@ -341,7 +353,8 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
  * 2) regulator for controlling the targets power (optional)
  * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node,
  *    or for non devicetree enabled platforms a single clock
- * 4) phys (optional)
+ * 4) resets, if flags has AHCI_PLATFORM_GET_RESETS (optional)
+ * 5) phys (optional)
  *
  * RETURNS:
  * The allocated ahci_host_priv on success, otherwise an ERR_PTR value
@@ -395,6 +408,14 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
 		hpriv->clks[i] = clk;
 	}
 
+	if (flags & AHCI_PLATFORM_GET_RESETS) {
+		hpriv->rsts = devm_reset_control_array_get_optional_shared(dev);
+		if (IS_ERR(hpriv->rsts)) {
+			rc = PTR_ERR(hpriv->rsts);
+			goto err_out;
+		}
+	}
+
 	hpriv->nports = child_nodes = of_get_child_count(dev->of_node);
 
 	/*
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 6490be1f8a16..eaedca5fe6fc 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -43,4 +43,6 @@ int ahci_platform_resume_host(struct device *dev);
 int ahci_platform_suspend(struct device *dev);
 int ahci_platform_resume(struct device *dev);
 
+#define AHCI_PLATFORM_GET_RESETS	0x01
+
 #endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From 92be775a3db343889ab159c44d55315c5ee0be4e Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 21 Aug 2018 21:51:53 -0700
Subject: mm: struct shrink_control: keep int fields together

Patch series "Reorderings in struct shrinker and struct shrink_control".

These structures are intensively used during reclaim and, displace other
data in cache, so there is no a reason they have int fields not grouped
together.

This patch (of 2):

gfp_t is of unsigned type, so let's move nid to keep them together.

Link: http://lkml.kernel.org/r/153199747930.21131.861043607301997810.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index b154fd2b084c..d58aaaed34a4 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -12,6 +12,9 @@
 struct shrink_control {
 	gfp_t gfp_mask;
 
+	/* current node being shrunk (for NUMA aware shrinkers) */
+	int nid;
+
 	/*
 	 * How many objects scan_objects should scan and try to reclaim.
 	 * This is reset before every call, so it is safe for callees
@@ -26,9 +29,6 @@ struct shrink_control {
 	 */
 	unsigned long nr_scanned;
 
-	/* current node being shrunk (for NUMA aware shrinkers) */
-	int nid;
-
 	/* current memcg being shrunk (for memcg aware shrinkers) */
 	struct mem_cgroup *memcg;
 };
-- 
cgit v1.2.3


From e50ef89b0f58a2cb0e7d496a97b851e0dced62a6 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 21 Aug 2018 21:51:57 -0700
Subject: mm: struct shrinker: make flags of unsigned type

Currently, there are two flags only, so unsigned is more then enough.
Also, move int seeks to keep these fields together.

Link: http://lkml.kernel.org/r/153199748720.21131.6476256940113102483.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shrinker.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index d58aaaed34a4..9443cafd1969 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -63,9 +63,9 @@ struct shrinker {
 	unsigned long (*scan_objects)(struct shrinker *,
 				      struct shrink_control *sc);
 
-	int seeks;	/* seeks to recreate an obj */
 	long batch;	/* reclaim batch size, 0 = default */
-	unsigned long flags;
+	int seeks;	/* seeks to recreate an obj */
+	unsigned flags;
 
 	/* These are for internal use */
 	struct list_head list;
-- 
cgit v1.2.3


From 5d5e8f19544a35ae1609bd96ae6b28c9fcd1baf6 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Tue, 21 Aug 2018 21:52:20 -0700
Subject: mm, swap, get_swap_pages: use entry_size instead of cluster in
 parameter

As suggested by Matthew Wilcox, it is better to use "int entry_size"
instead of "bool cluster" as parameter to specify whether to operate for
huge or normal swap entries.  Because this improve the flexibility to
support other swap entry size.  And Dave Hansen thinks that this
improves code readability too.

So in this patch, the "bool cluster" parameter of get_swap_pages() is
replaced by "int entry_size".

And nr_swap_entries() trick is used to reduce the binary size when
!CONFIG_TRANSPARENT_HUGE_PAGE.

       text	   data	    bss	    dec	    hex	filename
base  24215	   2028	    340	  26583	   67d7	mm/swapfile.o
head  24123	   2004	    340	  26467	   6763	mm/swapfile.o

Link: http://lkml.kernel.org/r/20180720071845.17920-7-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  2 +-
 mm/swap_slots.c      |  8 ++++----
 mm/swapfile.c        | 16 ++++++++--------
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1a8bd05a335e..8e2c11e692ba 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -447,7 +447,7 @@ extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(struct page *page);
 extern void put_swap_page(struct page *page, swp_entry_t entry);
 extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 008ccb22fee6..63a7b4563a57 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -269,8 +269,8 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
 
 	cache->cur = 0;
 	if (swap_slot_cache_active)
-		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, false,
-					   cache->slots);
+		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
+					   cache->slots, 1);
 
 	return cache->nr;
 }
@@ -316,7 +316,7 @@ swp_entry_t get_swap_page(struct page *page)
 
 	if (PageTransHuge(page)) {
 		if (IS_ENABLED(CONFIG_THP_SWAP))
-			get_swap_pages(1, true, &entry);
+			get_swap_pages(1, &entry, HPAGE_PMD_NR);
 		goto out;
 	}
 
@@ -350,7 +350,7 @@ repeat:
 			goto out;
 	}
 
-	get_swap_pages(1, false, &entry);
+	get_swap_pages(1, &entry, 1);
 out:
 	if (mem_cgroup_try_charge_swap(page, entry)) {
 		put_swap_page(page, entry);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 043645e7f0b5..b30dd0642ccf 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -940,18 +940,18 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 
 }
 
-int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
+int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
 {
-	unsigned long nr_pages = cluster ? SWAPFILE_CLUSTER : 1;
+	unsigned long size = swap_entry_size(entry_size);
 	struct swap_info_struct *si, *next;
 	long avail_pgs;
 	int n_ret = 0;
 	int node;
 
 	/* Only single cluster request supported */
-	WARN_ON_ONCE(n_goal > 1 && cluster);
+	WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER);
 
-	avail_pgs = atomic_long_read(&nr_swap_pages) / nr_pages;
+	avail_pgs = atomic_long_read(&nr_swap_pages) / size;
 	if (avail_pgs <= 0)
 		goto noswap;
 
@@ -961,7 +961,7 @@ int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
 	if (n_goal > avail_pgs)
 		n_goal = avail_pgs;
 
-	atomic_long_sub(n_goal * nr_pages, &nr_swap_pages);
+	atomic_long_sub(n_goal * size, &nr_swap_pages);
 
 	spin_lock(&swap_avail_lock);
 
@@ -988,14 +988,14 @@ start_over:
 			spin_unlock(&si->lock);
 			goto nextsi;
 		}
-		if (cluster) {
+		if (size == SWAPFILE_CLUSTER) {
 			if (!(si->flags & SWP_FILE))
 				n_ret = swap_alloc_cluster(si, swp_entries);
 		} else
 			n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
 						    n_goal, swp_entries);
 		spin_unlock(&si->lock);
-		if (n_ret || cluster)
+		if (n_ret || size == SWAPFILE_CLUSTER)
 			goto check_out;
 		pr_debug("scan_swap_map of si %d failed to find offset\n",
 			si->type);
@@ -1021,7 +1021,7 @@ nextsi:
 
 check_out:
 	if (n_ret < n_goal)
-		atomic_long_add((long)(n_goal - n_ret) * nr_pages,
+		atomic_long_add((long)(n_goal - n_ret) * size,
 				&nr_swap_pages);
 noswap:
 	return n_ret;
-- 
cgit v1.2.3


From 93065ac753e4443840a057bfef4be71ec766fde9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 21 Aug 2018 21:52:33 -0700
Subject: mm, oom: distinguish blockable mode for mmu notifiers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are several blockable mmu notifiers which might sleep in
mmu_notifier_invalidate_range_start and that is a problem for the
oom_reaper because it needs to guarantee a forward progress so it cannot
depend on any sleepable locks.

Currently we simply back off and mark an oom victim with blockable mmu
notifiers as done after a short sleep.  That can result in selecting a new
oom victim prematurely because the previous one still hasn't torn its
memory down yet.

We can do much better though.  Even if mmu notifiers use sleepable locks
there is no reason to automatically assume those locks are held.  Moreover
majority of notifiers only care about a portion of the address space and
there is absolutely zero reason to fail when we are unmapping an unrelated
range.  Many notifiers do really block and wait for HW which is harder to
handle and we have to bail out though.

This patch handles the low hanging fruit.
__mmu_notifier_invalidate_range_start gets a blockable flag and callbacks
are not allowed to sleep if the flag is set to false.  This is achieved by
using trylock instead of the sleepable lock for most callbacks and
continue as long as we do not block down the call chain.

I think we can improve that even further because there is a common pattern
to do a range lookup first and then do something about that.  The first
part can be done without a sleeping lock in most cases AFAICS.

The oom_reaper end then simply retries if there is at least one notifier
which couldn't make any progress in !blockable mode.  A retry loop is
already implemented to wait for the mmap_sem and this is basically the
same thing.

The simplest way for driver developers to test this code path is to wrap
userspace code which uses these notifiers into a memcg and set the hard
limit to hit the oom.  This can be done e.g.  after the test faults in all
the mmu notifier managed memory and set the hard limit to something really
small.  Then we are looking for a proper process tear down.

[akpm@linux-foundation.org: coding style fixes]
[akpm@linux-foundation.org: minor code simplification]
Link: http://lkml.kernel.org/r/20180716115058.5559-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Christian König <christian.koenig@amd.com> # AMD notifiers
Acked-by: Leon Romanovsky <leonro@mellanox.com> # mlx and umem_odp
Reported-by: David Rientjes <rientjes@google.com>
Cc: "David (ChunMing) Zhou" <David1.Zhou@amd.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Mike Marciniszyn <mike.marciniszyn@intel.com>
Cc: Dennis Dalessandro <dennis.dalessandro@intel.com>
Cc: Sudeep Dutt <sudeep.dutt@intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit@intel.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kvm/x86.c                      |  7 ++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  | 43 ++++++++++++++++++++++++++------
 drivers/gpu/drm/i915/i915_gem_userptr.c | 13 +++++++---
 drivers/gpu/drm/radeon/radeon_mn.c      | 22 ++++++++++++++---
 drivers/infiniband/core/umem_odp.c      | 33 +++++++++++++++++++------
 drivers/infiniband/hw/hfi1/mmu_rb.c     | 11 ++++++---
 drivers/infiniband/hw/mlx5/odp.c        |  2 +-
 drivers/misc/mic/scif/scif_dma.c        |  7 ++++--
 drivers/misc/sgi-gru/grutlbpurge.c      |  7 ++++--
 drivers/xen/gntdev.c                    | 44 ++++++++++++++++++++++++++-------
 include/linux/kvm_host.h                |  4 +--
 include/linux/mmu_notifier.h            | 33 +++++++++++++++++++------
 include/linux/oom.h                     |  2 +-
 include/rdma/ib_umem_odp.h              |  3 ++-
 mm/hmm.c                                |  7 ++++--
 mm/mmap.c                               |  2 +-
 mm/mmu_notifier.c                       | 19 +++++++++++---
 mm/oom_kill.c                           | 29 +++++++++++-----------
 virt/kvm/kvm_main.c                     | 15 +++++++----
 19 files changed, 223 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f7dff0457846..4a74a7cf0a8b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7305,8 +7305,9 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
 }
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end)
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end,
+		bool blockable)
 {
 	unsigned long apic_address;
 
@@ -7317,6 +7318,8 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 	apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
 	if (start <= apic_address && apic_address < end)
 		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
+	return 0;
 }
 
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index a365ea2383d1..e55508b39496 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -178,12 +178,18 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
  *
  * @amn: our notifier
  */
-static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
+static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
 {
-	mutex_lock(&amn->read_lock);
+	if (blockable)
+		mutex_lock(&amn->read_lock);
+	else if (!mutex_trylock(&amn->read_lock))
+		return -EAGAIN;
+
 	if (atomic_inc_return(&amn->recursion) == 1)
 		down_read_non_owner(&amn->lock);
 	mutex_unlock(&amn->read_lock);
+
+	return 0;
 }
 
 /**
@@ -239,10 +245,11 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 						 struct mm_struct *mm,
 						 unsigned long start,
-						 unsigned long end)
+						 unsigned long end,
+						 bool blockable)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -250,17 +257,28 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
-	amdgpu_mn_read_lock(amn);
+	/* TODO we should be able to split locking for interval tree and
+	 * amdgpu_mn_invalidate_node
+	 */
+	if (amdgpu_mn_read_lock(amn, blockable))
+		return -EAGAIN;
 
 	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 
+		if (!blockable) {
+			amdgpu_mn_read_unlock(amn);
+			return -EAGAIN;
+		}
+
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
 		amdgpu_mn_invalidate_node(node, start, end);
 	}
+
+	return 0;
 }
 
 /**
@@ -275,10 +293,11 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 						 struct mm_struct *mm,
 						 unsigned long start,
-						 unsigned long end)
+						 unsigned long end,
+						 bool blockable)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -286,13 +305,19 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
-	amdgpu_mn_read_lock(amn);
+	if (amdgpu_mn_read_lock(amn, blockable))
+		return -EAGAIN;
 
 	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 		struct amdgpu_bo *bo;
 
+		if (!blockable) {
+			amdgpu_mn_read_unlock(amn);
+			return -EAGAIN;
+		}
+
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
@@ -304,6 +329,8 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 				amdgpu_amdkfd_evict_userptr(mem, mm);
 		}
 	}
+
+	return 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index dcd6e230d16a..2c9b284036d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -112,10 +112,11 @@ static void del_object(struct i915_mmu_object *mo)
 	mo->attached = false;
 }
 
-static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 						       struct mm_struct *mm,
 						       unsigned long start,
-						       unsigned long end)
+						       unsigned long end,
+						       bool blockable)
 {
 	struct i915_mmu_notifier *mn =
 		container_of(_mn, struct i915_mmu_notifier, mn);
@@ -124,7 +125,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	LIST_HEAD(cancelled);
 
 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
-		return;
+		return 0;
 
 	/* interval ranges are inclusive, but invalidate range is exclusive */
 	end--;
@@ -132,6 +133,10 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	spin_lock(&mn->lock);
 	it = interval_tree_iter_first(&mn->objects, start, end);
 	while (it) {
+		if (!blockable) {
+			spin_unlock(&mn->lock);
+			return -EAGAIN;
+		}
 		/* The mmu_object is released late when destroying the
 		 * GEM object so it is entirely possible to gain a
 		 * reference on an object in the process of being freed
@@ -154,6 +159,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 
 	if (!list_empty(&cancelled))
 		flush_workqueue(mn->wq);
+
+	return 0;
 }
 
 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index abd24975c9b1..f8b35df44c60 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -118,19 +118,27 @@ static void radeon_mn_release(struct mmu_notifier *mn,
  * We block for all BOs between start and end to be idle and
  * unmap them by move them into system domain again.
  */
-static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
+static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 					     struct mm_struct *mm,
 					     unsigned long start,
-					     unsigned long end)
+					     unsigned long end,
+					     bool blockable)
 {
 	struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
 	struct ttm_operation_ctx ctx = { false, false };
 	struct interval_tree_node *it;
+	int ret = 0;
 
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
-	mutex_lock(&rmn->lock);
+	/* TODO we should be able to split locking for interval tree and
+	 * the tear down.
+	 */
+	if (blockable)
+		mutex_lock(&rmn->lock);
+	else if (!mutex_trylock(&rmn->lock))
+		return -EAGAIN;
 
 	it = interval_tree_iter_first(&rmn->objects, start, end);
 	while (it) {
@@ -138,6 +146,11 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 		struct radeon_bo *bo;
 		long r;
 
+		if (!blockable) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+
 		node = container_of(it, struct radeon_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
@@ -166,7 +179,10 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 		}
 	}
 	
+out_unlock:
 	mutex_unlock(&rmn->lock);
+
+	return ret;
 }
 
 static const struct mmu_notifier_ops radeon_mn_ops = {
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 182436b92ba9..6ec748eccff7 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -186,6 +186,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
 	rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
 				      ULLONG_MAX,
 				      ib_umem_notifier_release_trampoline,
+				      true,
 				      NULL);
 	up_read(&context->umem_rwsem);
 }
@@ -207,22 +208,31 @@ static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
 	return 0;
 }
 
-static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						    struct mm_struct *mm,
 						    unsigned long start,
-						    unsigned long end)
+						    unsigned long end,
+						    bool blockable)
 {
 	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+	int ret;
 
 	if (!context->invalidate_range)
-		return;
+		return 0;
+
+	if (blockable)
+		down_read(&context->umem_rwsem);
+	else if (!down_read_trylock(&context->umem_rwsem))
+		return -EAGAIN;
 
 	ib_ucontext_notifier_start_account(context);
-	down_read(&context->umem_rwsem);
-	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+	ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
 				      end,
-				      invalidate_range_start_trampoline, NULL);
+				      invalidate_range_start_trampoline,
+				      blockable, NULL);
 	up_read(&context->umem_rwsem);
+
+	return ret;
 }
 
 static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
@@ -242,10 +252,15 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
 	if (!context->invalidate_range)
 		return;
 
+	/*
+	 * TODO: we currently bail out if there is any sleepable work to be done
+	 * in ib_umem_notifier_invalidate_range_start so we shouldn't really block
+	 * here. But this is ugly and fragile.
+	 */
 	down_read(&context->umem_rwsem);
 	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
 				      end,
-				      invalidate_range_end_trampoline, NULL);
+				      invalidate_range_end_trampoline, true, NULL);
 	up_read(&context->umem_rwsem);
 	ib_ucontext_notifier_end_account(context);
 }
@@ -798,6 +813,7 @@ EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
 int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 				  u64 start, u64 last,
 				  umem_call_back cb,
+				  bool blockable,
 				  void *cookie)
 {
 	int ret_val = 0;
@@ -809,6 +825,9 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 
 	for (node = rbt_ib_umem_iter_first(root, start, last - 1);
 			node; node = next) {
+		/* TODO move the blockable decision up to the callback */
+		if (!blockable)
+			return -EAGAIN;
 		next = rbt_ib_umem_iter_next(node, start, last - 1);
 		umem = container_of(node, struct ib_umem_odp, interval_tree);
 		ret_val = cb(umem->umem, start, last, cookie) || ret_val;
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 70aceefe14d5..e1c7996c018e 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -67,9 +67,9 @@ struct mmu_rb_handler {
 
 static unsigned long mmu_node_start(struct mmu_rb_node *);
 static unsigned long mmu_node_last(struct mmu_rb_node *);
-static void mmu_notifier_range_start(struct mmu_notifier *,
+static int mmu_notifier_range_start(struct mmu_notifier *,
 				     struct mm_struct *,
-				     unsigned long, unsigned long);
+				     unsigned long, unsigned long, bool);
 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
 					   unsigned long, unsigned long);
 static void do_remove(struct mmu_rb_handler *handler,
@@ -284,10 +284,11 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
 	handler->ops->remove(handler->ops_arg, node);
 }
 
-static void mmu_notifier_range_start(struct mmu_notifier *mn,
+static int mmu_notifier_range_start(struct mmu_notifier *mn,
 				     struct mm_struct *mm,
 				     unsigned long start,
-				     unsigned long end)
+				     unsigned long end,
+				     bool blockable)
 {
 	struct mmu_rb_handler *handler =
 		container_of(mn, struct mmu_rb_handler, mn);
@@ -313,6 +314,8 @@ static void mmu_notifier_range_start(struct mmu_notifier *mn,
 
 	if (added)
 		queue_work(handler->wq, &handler->del_work);
+
+	return 0;
 }
 
 /*
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index f1a87a690a4c..d216e0d2921d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -488,7 +488,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 
 	down_read(&ctx->umem_rwsem);
 	rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
-				      mr_leaf_free, imr);
+				      mr_leaf_free, true, imr);
 	up_read(&ctx->umem_rwsem);
 
 	wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
index 63d6246d6dff..6369aeaa7056 100644
--- a/drivers/misc/mic/scif/scif_dma.c
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -200,15 +200,18 @@ static void scif_mmu_notifier_release(struct mmu_notifier *mn,
 	schedule_work(&scif_info.misc_work);
 }
 
-static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						     struct mm_struct *mm,
 						     unsigned long start,
-						     unsigned long end)
+						     unsigned long end,
+						     bool blockable)
 {
 	struct scif_mmu_notif	*mmn;
 
 	mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
 	scif_rma_destroy_tcw(mmn, start, end - start);
+
+	return 0;
 }
 
 static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index a3454eb56fbf..be28f05bfafa 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -219,9 +219,10 @@ void gru_flush_all_tlb(struct gru_state *gru)
 /*
  * MMUOPS notifier callout functions
  */
-static void gru_invalidate_range_start(struct mmu_notifier *mn,
+static int gru_invalidate_range_start(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
-				       unsigned long start, unsigned long end)
+				       unsigned long start, unsigned long end,
+				       bool blockable)
 {
 	struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
 						 ms_notifier);
@@ -231,6 +232,8 @@ static void gru_invalidate_range_start(struct mmu_notifier *mn,
 	gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
 		start, end, atomic_read(&gms->ms_range_active));
 	gru_flush_tlb_range(gms, start, end - start);
+
+	return 0;
 }
 
 static void gru_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index c866a62f766d..57390c7666e5 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -479,18 +479,25 @@ static const struct vm_operations_struct gntdev_vmops = {
 
 /* ------------------------------------------------------------------ */
 
+static bool in_range(struct gntdev_grant_map *map,
+			      unsigned long start, unsigned long end)
+{
+	if (!map->vma)
+		return false;
+	if (map->vma->vm_start >= end)
+		return false;
+	if (map->vma->vm_end <= start)
+		return false;
+
+	return true;
+}
+
 static void unmap_if_in_range(struct gntdev_grant_map *map,
 			      unsigned long start, unsigned long end)
 {
 	unsigned long mstart, mend;
 	int err;
 
-	if (!map->vma)
-		return;
-	if (map->vma->vm_start >= end)
-		return;
-	if (map->vma->vm_end <= start)
-		return;
 	mstart = max(start, map->vma->vm_start);
 	mend   = min(end,   map->vma->vm_end);
 	pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
@@ -503,21 +510,40 @@ static void unmap_if_in_range(struct gntdev_grant_map *map,
 	WARN_ON(err);
 }
 
-static void mn_invl_range_start(struct mmu_notifier *mn,
+static int mn_invl_range_start(struct mmu_notifier *mn,
 				struct mm_struct *mm,
-				unsigned long start, unsigned long end)
+				unsigned long start, unsigned long end,
+				bool blockable)
 {
 	struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
 	struct gntdev_grant_map *map;
+	int ret = 0;
+
+	/* TODO do we really need a mutex here? */
+	if (blockable)
+		mutex_lock(&priv->lock);
+	else if (!mutex_trylock(&priv->lock))
+		return -EAGAIN;
 
-	mutex_lock(&priv->lock);
 	list_for_each_entry(map, &priv->maps, next) {
+		if (in_range(map, start, end)) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
 		unmap_if_in_range(map, start, end);
 	}
 	list_for_each_entry(map, &priv->freeable_maps, next) {
+		if (in_range(map, start, end)) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
 		unmap_if_in_range(map, start, end);
 	}
+
+out_unlock:
 	mutex_unlock(&priv->lock);
+
+	return ret;
 }
 
 static void mn_release(struct mmu_notifier *mn,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c7362dd2faa..0205aee44ded 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1289,8 +1289,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end);
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end, bool blockable);
 
 #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
 int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 392e6af82701..133ba78820ee 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -151,13 +151,15 @@ struct mmu_notifier_ops {
 	 * address space but may still be referenced by sptes until
 	 * the last refcount is dropped.
 	 *
-	 * If both of these callbacks cannot block, and invalidate_range
-	 * cannot block, mmu_notifier_ops.flags should have
-	 * MMU_INVALIDATE_DOES_NOT_BLOCK set.
+	 * If blockable argument is set to false then the callback cannot
+	 * sleep and has to return with -EAGAIN. 0 should be returned
+	 * otherwise.
+	 *
 	 */
-	void (*invalidate_range_start)(struct mmu_notifier *mn,
+	int (*invalidate_range_start)(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
-				       unsigned long start, unsigned long end);
+				       unsigned long start, unsigned long end,
+				       bool blockable);
 	void (*invalidate_range_end)(struct mmu_notifier *mn,
 				     struct mm_struct *mm,
 				     unsigned long start, unsigned long end);
@@ -229,8 +231,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
 				     unsigned long address);
 extern void __mmu_notifier_change_pte(struct mm_struct *mm,
 				      unsigned long address, pte_t pte);
-extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
-				  unsigned long start, unsigned long end);
+extern int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end,
+				  bool blockable);
 extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end,
 				  bool only_end);
@@ -281,7 +284,15 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
 	if (mm_has_notifiers(mm))
-		__mmu_notifier_invalidate_range_start(mm, start, end);
+		__mmu_notifier_invalidate_range_start(mm, start, end, true);
+}
+
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	if (mm_has_notifiers(mm))
+		return __mmu_notifier_invalidate_range_start(mm, start, end, false);
+	return 0;
 }
 
 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -461,6 +472,12 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 {
 }
 
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	return 0;
+}
+
 static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 6adac113e96d..92f70e4c6252 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -95,7 +95,7 @@ static inline int check_stable_address_space(struct mm_struct *mm)
 	return 0;
 }
 
-void __oom_reap_task_mm(struct mm_struct *mm);
+bool __oom_reap_task_mm(struct mm_struct *mm);
 
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 6a17f856f841..381cdf5a9bd1 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -119,7 +119,8 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
  */
 int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 				  u64 start, u64 end,
-				  umem_call_back cb, void *cookie);
+				  umem_call_back cb,
+				  bool blockable, void *cookie);
 
 /*
  * Find first region intersecting with address range.
diff --git a/mm/hmm.c b/mm/hmm.c
index 76e7a058b32f..0b0554591610 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -177,16 +177,19 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 	up_write(&hmm->mirrors_sem);
 }
 
-static void hmm_invalidate_range_start(struct mmu_notifier *mn,
+static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
 				       unsigned long start,
-				       unsigned long end)
+				       unsigned long end,
+				       bool blockable)
 {
 	struct hmm *hmm = mm->hmm;
 
 	VM_BUG_ON(!hmm);
 
 	atomic_inc(&hmm->sequence);
+
+	return 0;
 }
 
 static void hmm_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/mm/mmap.c b/mm/mmap.c
index 8d6449e74431..bb2a7e097c7d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3064,7 +3064,7 @@ void exit_mmap(struct mm_struct *mm)
 		 * reliably test it.
 		 */
 		mutex_lock(&oom_lock);
-		__oom_reap_task_mm(mm);
+		(void)__oom_reap_task_mm(mm);
 		mutex_unlock(&oom_lock);
 
 		set_bit(MMF_OOM_SKIP, &mm->flags);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index eff6b88a993f..82bb1a939c0e 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -174,18 +174,29 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
 	srcu_read_unlock(&srcu, id);
 }
 
-void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
-				  unsigned long start, unsigned long end)
+int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+				  unsigned long start, unsigned long end,
+				  bool blockable)
 {
 	struct mmu_notifier *mn;
+	int ret = 0;
 	int id;
 
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
-		if (mn->ops->invalidate_range_start)
-			mn->ops->invalidate_range_start(mn, mm, start, end);
+		if (mn->ops->invalidate_range_start) {
+			int _ret = mn->ops->invalidate_range_start(mn, mm, start, end, blockable);
+			if (_ret) {
+				pr_info("%pS callback failed with %d in %sblockable context.\n",
+						mn->ops->invalidate_range_start, _ret,
+						!blockable ? "non-" : "");
+				ret = _ret;
+			}
+		}
 	}
 	srcu_read_unlock(&srcu, id);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 412f43453a68..be31a1e0fe78 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -487,9 +487,10 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
 static struct task_struct *oom_reaper_list;
 static DEFINE_SPINLOCK(oom_reaper_lock);
 
-void __oom_reap_task_mm(struct mm_struct *mm)
+bool __oom_reap_task_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	bool ret = true;
 
 	/*
 	 * Tell all users of get_user/copy_from_user etc... that the content
@@ -519,12 +520,17 @@ void __oom_reap_task_mm(struct mm_struct *mm)
 			struct mmu_gather tlb;
 
 			tlb_gather_mmu(&tlb, mm, start, end);
-			mmu_notifier_invalidate_range_start(mm, start, end);
+			if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
+				ret = false;
+				continue;
+			}
 			unmap_page_range(&tlb, vma, start, end, NULL);
 			mmu_notifier_invalidate_range_end(mm, start, end);
 			tlb_finish_mmu(&tlb, start, end);
 		}
 	}
+
+	return ret;
 }
 
 static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
@@ -553,18 +559,6 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 		goto unlock_oom;
 	}
 
-	/*
-	 * If the mm has invalidate_{start,end}() notifiers that could block,
-	 * sleep to give the oom victim some more time.
-	 * TODO: we really want to get rid of this ugly hack and make sure that
-	 * notifiers cannot block for unbounded amount of time
-	 */
-	if (mm_has_blockable_invalidate_notifiers(mm)) {
-		up_read(&mm->mmap_sem);
-		schedule_timeout_idle(HZ);
-		goto unlock_oom;
-	}
-
 	/*
 	 * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
 	 * work on the mm anymore. The check for MMF_OOM_SKIP must run
@@ -579,7 +573,12 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	trace_start_task_reaping(tsk->pid);
 
-	__oom_reap_task_mm(mm);
+	/* failed to reap part of the address space. Try again later */
+	if (!__oom_reap_task_mm(mm)) {
+		up_read(&mm->mmap_sem);
+		ret = false;
+		goto unlock_oom;
+	}
 
 	pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
 			task_pid_nr(tsk), tsk->comm,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9263ead9fd32..0116b449b993 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -140,9 +140,10 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
 static unsigned long long kvm_createvm_count;
 static unsigned long long kvm_active_vms;
 
-__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-		unsigned long start, unsigned long end)
+__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+		unsigned long start, unsigned long end, bool blockable)
 {
+	return 0;
 }
 
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
@@ -360,13 +361,15 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
-static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						    struct mm_struct *mm,
 						    unsigned long start,
-						    unsigned long end)
+						    unsigned long end,
+						    bool blockable)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
 	int need_tlb_flush = 0, idx;
+	int ret;
 
 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
@@ -384,9 +387,11 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 
 	spin_unlock(&kvm->mmu_lock);
 
-	kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+	ret = kvm_arch_mmu_notifier_invalidate_range(kvm, start, end, blockable);
 
 	srcu_read_unlock(&kvm->srcu, idx);
+
+	return ret;
 }
 
 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
-- 
cgit v1.2.3


From a670468f5e0b5fad4db6e4d195f15915dc2a35c1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 21 Aug 2018 21:53:06 -0700
Subject: mm: zero out the vma in vma_init()

Rather than in vm_area_alloc().  To ensure that the various oddball
stack-based vmas are in a good state.  Some of the callers were zeroing
them out, others were not.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/process.c | 9 ++++-----
 fs/hugetlbfs/inode.c      | 2 --
 include/linux/mm.h        | 1 +
 kernel/fork.c             | 3 ++-
 mm/mempolicy.c            | 1 -
 mm/shmem.c                | 1 -
 6 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d9c299133111..82ab015bf42b 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -330,16 +330,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
  * atomic helpers. Insert it into the gate_vma so that it is visible
  * through ptrace and /proc/<pid>/mem.
  */
-static struct vm_area_struct gate_vma = {
-	.vm_start	= 0xffff0000,
-	.vm_end		= 0xffff0000 + PAGE_SIZE,
-	.vm_flags	= VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC,
-};
+static struct vm_area_struct gate_vma;
 
 static int __init gate_vma_init(void)
 {
 	vma_init(&gate_vma, NULL);
 	gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
+	gate_vma.vm_start = 0xffff0000;
+	gate_vma.vm_end	= 0xffff0000 + PAGE_SIZE;
+	gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
 	return 0;
 }
 arch_initcall(gate_vma_init);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 346a146c7617..32920a10100e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -410,7 +410,6 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 	int i, freed = 0;
 	bool truncate_op = (lend == LLONG_MAX);
 
-	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
 	vma_init(&pseudo_vma, current->mm);
 	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
 	pagevec_init(&pvec);
@@ -595,7 +594,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 	 * allocation routines.  If NUMA is configured, use page index
 	 * as input to create an allocation policy.
 	 */
-	memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
 	vma_init(&pseudo_vma, mm);
 	pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
 	pseudo_vma.vm_file = file;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a3cae495f9ce..3a4b87d1a59a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -456,6 +456,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 {
 	static const struct vm_operations_struct dummy_vm_ops = {};
 
+	memset(vma, 0, sizeof(*vma));
 	vma->vm_mm = mm;
 	vma->vm_ops = &dummy_vm_ops;
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5ee74c113381..8c760effa42e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -310,8 +310,9 @@ static struct kmem_cache *mm_cachep;
 
 struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
 {
-	struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	struct vm_area_struct *vma;
 
+	vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
 	if (vma)
 		vma_init(vma, mm);
 	return vma;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 01f1a14facc4..4861ba738d6f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2504,7 +2504,6 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 			goto put_new;
 
 		/* Create pseudo-vma that contains just the policy */
-		memset(&pvma, 0, sizeof(struct vm_area_struct));
 		vma_init(&pvma, NULL);
 		pvma.vm_end = TASK_SIZE;	/* policy covers entire file */
 		mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
diff --git a/mm/shmem.c b/mm/shmem.c
index c48c79018a7c..fb04baacc9fa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1421,7 +1421,6 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
 		struct shmem_inode_info *info, pgoff_t index)
 {
 	/* Create a pseudo vma that just contains the policy */
-	memset(vma, 0, sizeof(*vma));
 	vma_init(vma, NULL);
 	/* Bias interleave by inode number to distribute better across nodes */
 	vma->vm_pgoff = index + info->vfs_inode.i_ino;
-- 
cgit v1.2.3


From 89696701ea847786f88ccc1c580fb4c3c20c4a3a Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:53:24 -0700
Subject: mm: remove zone_id() and make use of zone_idx() in is_dev_zone()

is_dev_zone() is using zone_id() to check if the zone is ZONE_DEVICE.
zone_id() looks pretty much the same as zone_idx(), and while the use of
zone_idx() is quite spread in the kernel, zone_id() is only being used by
is_dev_zone().

This patch removes zone_id() and makes is_dev_zone() use zone_idx() to
check the zone, so we do not have two things with the same functionality
around.

Link: http://lkml.kernel.org/r/20180730133718.28683-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 32699b2dc52a..3d4577a3ae7e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,25 +755,6 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
 	return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
 }
 
-static inline int zone_id(const struct zone *zone)
-{
-	struct pglist_data *pgdat = zone->zone_pgdat;
-
-	return zone - pgdat->node_zones;
-}
-
-#ifdef CONFIG_ZONE_DEVICE
-static inline bool is_dev_zone(const struct zone *zone)
-{
-	return zone_id(zone) == ZONE_DEVICE;
-}
-#else
-static inline bool is_dev_zone(const struct zone *zone)
-{
-	return false;
-}
-#endif
-
 #include <linux/memory_hotplug.h>
 
 void build_all_zonelists(pg_data_t *pgdat);
@@ -824,6 +805,18 @@ static inline int local_memory_node(int node_id) { return node_id; };
  */
 #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
 
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return zone_idx(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool is_dev_zone(const struct zone *zone)
+{
+	return false;
+}
+#endif
+
 /*
  * Returns true if a zone has pages managed by the buddy allocator.
  * All the reclaim decisions have to use this function rather than
-- 
cgit v1.2.3


From c1093b746c0576ed81c4d568d1e39cab651d37e6 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Tue, 21 Aug 2018 21:53:32 -0700
Subject: mm: access zone->node via zone_to_nid() and zone_set_nid()

zone->node is configured only when CONFIG_NUMA=y, so it is a good idea to
have inline functions to access this field in order to avoid ifdef's in c
files.

Link: http://lkml.kernel.org/r/20180730101757.28058-3-osalvador@techadventures.net
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h     |  9 ---------
 include/linux/mmzone.h | 26 ++++++++++++++++++++------
 mm/mempolicy.c         |  4 ++--
 mm/mm_init.c           |  9 ++-------
 mm/page_alloc.c        | 10 ++++------
 5 files changed, 28 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3a4b87d1a59a..a55f5389c491 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -960,15 +960,6 @@ static inline int page_zone_id(struct page *page)
 	return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
 }
 
-static inline int zone_to_nid(struct zone *zone)
-{
-#ifdef CONFIG_NUMA
-	return zone->node;
-#else
-	return 0;
-#endif
-}
-
 #ifdef NODE_NOT_IN_PAGE_FLAGS
 extern int page_to_nid(const struct page *page);
 #else
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3d4577a3ae7e..1e22d96734e0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -834,6 +834,25 @@ static inline bool populated_zone(struct zone *zone)
 	return zone->present_pages;
 }
 
+#ifdef CONFIG_NUMA
+static inline int zone_to_nid(struct zone *zone)
+{
+	return zone->node;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid)
+{
+	zone->node = nid;
+}
+#else
+static inline int zone_to_nid(struct zone *zone)
+{
+	return 0;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid) {}
+#endif
+
 extern int movable_zone;
 
 #ifdef CONFIG_HIGHMEM
@@ -949,12 +968,7 @@ static inline int zonelist_zone_idx(struct zoneref *zoneref)
 
 static inline int zonelist_node_idx(struct zoneref *zoneref)
 {
-#ifdef CONFIG_NUMA
-	/* zone_to_nid not available in this context */
-	return zoneref->zone->node;
-#else
-	return 0;
-#endif /* CONFIG_NUMA */
+	return zone_to_nid(zoneref->zone);
 }
 
 struct zoneref *__next_zones_zonelist(struct zoneref *z,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4861ba738d6f..da858f794eb6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1784,7 +1784,7 @@ unsigned int mempolicy_slab_node(void)
 		zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK];
 		z = first_zones_zonelist(zonelist, highest_zoneidx,
 							&policy->v.nodes);
-		return z->zone ? z->zone->node : node;
+		return z->zone ? zone_to_nid(z->zone) : node;
 	}
 
 	default:
@@ -2326,7 +2326,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 				node_zonelist(numa_node_id(), GFP_HIGHUSER),
 				gfp_zone(GFP_HIGHUSER),
 				&pol->v.nodes);
-		polnid = z->zone->node;
+		polnid = zone_to_nid(z->zone);
 		break;
 
 	default:
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5b72266b4b03..6838a530789b 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -53,13 +53,8 @@ void __init mminit_verify_zonelist(void)
 				zone->name);
 
 			/* Iterate the zonelist */
-			for_each_zone_zonelist(zone, z, zonelist, zoneid) {
-#ifdef CONFIG_NUMA
-				pr_cont("%d:%s ", zone->node, zone->name);
-#else
-				pr_cont("0:%s ", zone->name);
-#endif /* CONFIG_NUMA */
-			}
+			for_each_zone_zonelist(zone, z, zonelist, zoneid)
+				pr_cont("%d:%s ", zone_to_nid(zone), zone->name);
 			pr_cont("\n");
 		}
 	}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 455cc3bfae99..6b2324bc61db 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2909,10 +2909,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
 	if (!static_branch_likely(&vm_numa_stat_key))
 		return;
 
-	if (z->node != numa_node_id())
+	if (zone_to_nid(z) != numa_node_id())
 		local_stat = NUMA_OTHER;
 
-	if (z->node == preferred_zone->node)
+	if (zone_to_nid(z) == zone_to_nid(preferred_zone))
 		__inc_numa_state(z, NUMA_HIT);
 	else {
 		__inc_numa_state(z, NUMA_MISS);
@@ -5278,7 +5278,7 @@ int local_memory_node(int node)
 	z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
 				   gfp_zone(GFP_KERNEL),
 				   NULL);
-	return z->zone->node;
+	return zone_to_nid(z->zone);
 }
 #endif
 
@@ -6299,9 +6299,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 		 * And all highmem pages will be managed by the buddy system.
 		 */
 		zone->managed_pages = freesize;
-#ifdef CONFIG_NUMA
-		zone->node = nid;
-#endif
+		zone_set_nid(zone, nid);
 		zone->name = zone_names[j];
 		zone->zone_pgdat = pgdat;
 		spin_lock_init(&zone->lock);
-- 
cgit v1.2.3


From 03e85f9d5f1f8c74f127c5f7a87575d74a78d248 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:53:43 -0700
Subject: mm/page_alloc: Introduce free_area_init_core_hotplug

Currently, whenever a new node is created/re-used from the memhotplug
path, we call free_area_init_node()->free_area_init_core().  But there is
some code that we do not really need to run when we are coming from such
path.

free_area_init_core() performs the following actions:

1) Initializes pgdat internals, such as spinlock, waitqueues and more.
2) Account # nr_all_pages and # nr_kernel_pages. These values are used later on
   when creating hash tables.
3) Account number of managed_pages per zone, substracting dma_reserved and
   memmap pages.
4) Initializes some fields of the zone structure data
5) Calls init_currently_empty_zone to initialize all the freelists
6) Calls memmap_init to initialize all pages belonging to certain zone

When called from memhotplug path, free_area_init_core() only performs
actions #1 and #4.

Action #2 is pointless as the zones do not have any pages since either the
node was freed, or we are re-using it, eitherway all zones belonging to
this node should have 0 pages.  For the same reason, action #3 results
always in manages_pages being 0.

Action #5 and #6 are performed later on when onlining the pages:
 online_pages()->move_pfn_range_to_zone()->init_currently_empty_zone()
 online_pages()->move_pfn_range_to_zone()->memmap_init_zone()

This patch does two things:

First, moves the node/zone initializtion to their own function, so it
allows us to create a small version of free_area_init_core, where we only
perform:

1) Initialization of pgdat internals, such as spinlock, waitqueues and more
4) Initialization of some fields of the zone structure data

These two functions are: pgdat_init_internals() and zone_init_internals().

The second thing this patch does, is to introduce
free_area_init_core_hotplug(), the memhotplug version of
free_area_init_core():

Currently, we call free_area_init_node() from the memhotplug path.  In
there, we set some pgdat's fields, and call calculate_node_totalpages().
calculate_node_totalpages() calculates the # of pages the node has.

Since the node is either new, or we are re-using it, the zones belonging
to this node should not have any pages, so there is no point to calculate
this now.

Actually, we re-set these values to 0 later on with the calls to:

reset_node_managed_pages()
reset_node_present_pages()

The # of pages per node and the # of pages per zone will be calculated when
onlining the pages:

online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_zone_range()
online_pages()->move_pfn_range()->move_pfn_range_to_zone()->resize_pgdat_range()

Also, since free_area_init_core/free_area_init_node will now only get called during early init, let us replace
__paginginit with __init, so their code gets freed up.

[osalvador@techadventures.net: fix section usage]
  Link: http://lkml.kernel.org/r/20180731101752.GA473@techadventures.net
[osalvador@suse.de: v6]
  Link: http://lkml.kernel.org/r/20180801122348.21588-6-osalvador@techadventures.net
Link: http://lkml.kernel.org/r/20180730101757.28058-5-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  1 +
 include/linux/mm.h             |  2 +-
 mm/memory_hotplug.c            | 16 +++------
 mm/page_alloc.c                | 78 +++++++++++++++++++++++++++++-------------
 4 files changed, 61 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4e9828cda7a2..34a28227068d 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -319,6 +319,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 static inline void remove_memory(int nid, u64 start, u64 size) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
+extern void __ref free_area_init_core_hotplug(int nid);
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a55f5389c491..a9e733b5fb76 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2015,7 +2015,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
 
 extern void __init pagecache_init(void);
 extern void free_area_init(unsigned long * zones_size);
-extern void free_area_init_node(int nid, unsigned long * zones_size,
+extern void __init free_area_init_node(int nid, unsigned long * zones_size,
 		unsigned long zone_start_pfn, unsigned long *zholes_size);
 extern void free_initmem(void);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4eb6e824a80c..9eea6e809a4e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -982,8 +982,6 @@ static void reset_node_present_pages(pg_data_t *pgdat)
 static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 {
 	struct pglist_data *pgdat;
-	unsigned long zones_size[MAX_NR_ZONES] = {0};
-	unsigned long zholes_size[MAX_NR_ZONES] = {0};
 	unsigned long start_pfn = PFN_DOWN(start);
 
 	pgdat = NODE_DATA(nid);
@@ -1006,8 +1004,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 
 	/* we can use NODE_DATA(nid) from here */
 
+	pgdat->node_id = nid;
+	pgdat->node_start_pfn = start_pfn;
+
 	/* init node's zones as empty zones, we don't have any present pages.*/
-	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+	free_area_init_core_hotplug(nid);
 	pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
 
 	/*
@@ -1016,19 +1017,12 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 	 */
 	build_all_zonelists(pgdat);
 
-	/*
-	 * zone->managed_pages is set to an approximate value in
-	 * free_area_init_core(), which will cause
-	 * /sys/device/system/node/nodeX/meminfo has wrong data.
-	 * So reset it to 0 before any memory is onlined.
-	 */
-	reset_node_managed_pages(pgdat);
-
 	/*
 	 * When memory is hot-added, all the memory is in offline state. So
 	 * clear all zones' present_pages because they will be updated in
 	 * online_pages() and offline_pages().
 	 */
+	reset_node_managed_pages(pgdat);
 	reset_node_present_pages(pgdat);
 
 	return pgdat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5b939bd1bff9..c677c1506d73 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 
 /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
-void __meminit set_pageblock_order(void)
+void __init set_pageblock_order(void)
 {
 	unsigned int order;
 
@@ -6168,13 +6168,13 @@ void __meminit set_pageblock_order(void)
  * include/linux/pageblock-flags.h for the values of pageblock_order based on
  * the kernel config
  */
-void __meminit set_pageblock_order(void)
+void __init set_pageblock_order(void)
 {
 }
 
 #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
 
-static unsigned long __meminit calc_memmap_size(unsigned long spanned_pages,
+static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 						unsigned long present_pages)
 {
 	unsigned long pages = spanned_pages;
@@ -6225,19 +6225,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat)
 static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
 #endif
 
-/*
- * Set up the zone data structures:
- *   - mark all pages reserved
- *   - mark all memory queues empty
- *   - clear the memory bitmaps
- *
- * NOTE: pgdat should get zeroed by caller.
- */
-static void __meminit free_area_init_core(struct pglist_data *pgdat)
+static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 {
-	enum zone_type j;
-	int nid = pgdat->node_id;
-
 	pgdat_resize_init(pgdat);
 
 	pgdat_init_numabalancing(pgdat);
@@ -6250,7 +6239,54 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
 	pgdat_page_ext_init(pgdat);
 	spin_lock_init(&pgdat->lru_lock);
 	lruvec_init(node_lruvec(pgdat));
+}
+
+static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
+							unsigned long remaining_pages)
+{
+	zone->managed_pages = remaining_pages;
+	zone_set_nid(zone, nid);
+	zone->name = zone_names[idx];
+	zone->zone_pgdat = NODE_DATA(nid);
+	spin_lock_init(&zone->lock);
+	zone_seqlock_init(zone);
+	zone_pcp_init(zone);
+}
+
+/*
+ * Set up the zone data structures
+ * - init pgdat internals
+ * - init all zones belonging to this node
+ *
+ * NOTE: this function is only called during memory hotplug
+ */
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __ref free_area_init_core_hotplug(int nid)
+{
+	enum zone_type z;
+	pg_data_t *pgdat = NODE_DATA(nid);
+
+	pgdat_init_internals(pgdat);
+	for (z = 0; z < MAX_NR_ZONES; z++)
+		zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
+}
+#endif
+
+/*
+ * Set up the zone data structures:
+ *   - mark all pages reserved
+ *   - mark all memory queues empty
+ *   - clear the memory bitmaps
+ *
+ * NOTE: pgdat should get zeroed by caller.
+ * NOTE: this function is only called during early init.
+ */
+static void __init free_area_init_core(struct pglist_data *pgdat)
+{
+	enum zone_type j;
+	int nid = pgdat->node_id;
 
+	pgdat_init_internals(pgdat);
 	pgdat->per_cpu_nodestats = &boot_nodestats;
 
 	for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -6298,13 +6334,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat)
 		 * when the bootmem allocator frees pages into the buddy system.
 		 * And all highmem pages will be managed by the buddy system.
 		 */
-		zone->managed_pages = freesize;
-		zone_set_nid(zone, nid);
-		zone->name = zone_names[j];
-		zone->zone_pgdat = pgdat;
-		spin_lock_init(&zone->lock);
-		zone_seqlock_init(zone);
-		zone_pcp_init(zone);
+		zone_init_internals(zone, j, nid, freesize);
 
 		if (!size)
 			continue;
@@ -6379,7 +6409,7 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
 #endif
 
-void __meminit free_area_init_node(int nid, unsigned long *zones_size,
+void __init free_area_init_node(int nid, unsigned long *zones_size,
 				   unsigned long node_start_pfn,
 				   unsigned long *zholes_size)
 {
@@ -6418,7 +6448,7 @@ void __meminit free_area_init_node(int nid, unsigned long *zones_size,
  * may be accessed (for example page_to_pfn() on some configuration accesses
  * flags). We must explicitly zero those struct pages.
  */
-void __meminit zero_resv_unavail(void)
+void __init zero_resv_unavail(void)
 {
 	phys_addr_t start, end;
 	unsigned long pfn;
-- 
cgit v1.2.3


From 3d8b38eb81cac81395f6a823f6bf401b327268e6 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 21 Aug 2018 21:53:54 -0700
Subject: mm, oom: introduce memory.oom.group

For some workloads an intervention from the OOM killer can be painful.
Killing a random task can bring the workload into an inconsistent state.

Historically, there are two common solutions for this
problem:
1) enabling panic_on_oom,
2) using a userspace daemon to monitor OOMs and kill
   all outstanding processes.

Both approaches have their downsides: rebooting on each OOM is an obvious
waste of capacity, and handling all in userspace is tricky and requires a
userspace agent, which will monitor all cgroups for OOMs.

In most cases an in-kernel after-OOM cleaning-up mechanism can eliminate
the necessity of enabling panic_on_oom.  Also, it can simplify the cgroup
management for userspace applications.

This commit introduces a new knob for cgroup v2 memory controller:
memory.oom.group.  The knob determines whether the cgroup should be
treated as an indivisible workload by the OOM killer.  If set, all tasks
belonging to the cgroup or to its descendants (if the memory cgroup is not
a leaf cgroup) are killed together or not at all.

To determine which cgroup has to be killed, we do traverse the cgroup
hierarchy from the victim task's cgroup up to the OOMing cgroup (or root)
and looking for the highest-level cgroup with memory.oom.group set.

Tasks with the OOM protection (oom_score_adj set to -1000) are treated as
an exception and are never killed.

This patch doesn't change the OOM victim selection algorithm.

Link: http://lkml.kernel.org/r/20180802003201.817-4-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v2.rst | 18 +++++++
 include/linux/memcontrol.h              | 18 +++++++
 mm/memcontrol.c                         | 93 +++++++++++++++++++++++++++++++++
 mm/oom_kill.c                           | 30 +++++++++++
 4 files changed, 159 insertions(+)

(limited to 'include')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 1746131bc9cb..184193bcb262 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1072,6 +1072,24 @@ PAGE_SIZE multiple when read back.
 	high limit is used and monitored properly, this limit's
 	utility is limited to providing the final safety net.
 
+  memory.oom.group
+	A read-write single value file which exists on non-root
+	cgroups.  The default value is "0".
+
+	Determines whether the cgroup should be treated as
+	an indivisible workload by the OOM killer. If set,
+	all tasks belonging to the cgroup or to its descendants
+	(if the memory cgroup is not a leaf cgroup) are killed
+	together or not at all. This can be used to avoid
+	partial kills to guarantee workload integrity.
+
+	Tasks with the OOM protection (oom_score_adj set to -1000)
+	are treated as an exception and are never killed.
+
+	If the OOM killer is invoked in a cgroup, it's not going
+	to kill any tasks outside of this cgroup, regardless
+	memory.oom.group values of ancestor cgroups.
+
   memory.events
 	A read-only flat-keyed file which exists on non-root cgroups.
 	The following entries are defined.  Unless specified
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0e6c515fb698..652f602167df 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -225,6 +225,11 @@ struct mem_cgroup {
 	 */
 	bool use_hierarchy;
 
+	/*
+	 * Should the OOM killer kill all belonging tasks, had it kill one?
+	 */
+	bool oom_group;
+
 	/* protected by memcg_oom_lock */
 	bool		oom_lock;
 	int		under_oom;
@@ -542,6 +547,9 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
 }
 
 bool mem_cgroup_oom_synchronize(bool wait);
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+					    struct mem_cgroup *oom_domain);
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
 
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
@@ -1001,6 +1009,16 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
+static inline struct mem_cgroup *mem_cgroup_get_oom_group(
+	struct task_struct *victim, struct mem_cgroup *oom_domain)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+}
+
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 					     int idx)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 59c14c988143..4ead5a4817de 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1776,6 +1776,62 @@ cleanup:
 	return true;
 }
 
+/**
+ * mem_cgroup_get_oom_group - get a memory cgroup to clean up after OOM
+ * @victim: task to be killed by the OOM killer
+ * @oom_domain: memcg in case of memcg OOM, NULL in case of system-wide OOM
+ *
+ * Returns a pointer to a memory cgroup, which has to be cleaned up
+ * by killing all belonging OOM-killable tasks.
+ *
+ * Caller has to call mem_cgroup_put() on the returned non-NULL memcg.
+ */
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+					    struct mem_cgroup *oom_domain)
+{
+	struct mem_cgroup *oom_group = NULL;
+	struct mem_cgroup *memcg;
+
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+		return NULL;
+
+	if (!oom_domain)
+		oom_domain = root_mem_cgroup;
+
+	rcu_read_lock();
+
+	memcg = mem_cgroup_from_task(victim);
+	if (memcg == root_mem_cgroup)
+		goto out;
+
+	/*
+	 * Traverse the memory cgroup hierarchy from the victim task's
+	 * cgroup up to the OOMing cgroup (or root) to find the
+	 * highest-level memory cgroup with oom.group set.
+	 */
+	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+		if (memcg->oom_group)
+			oom_group = memcg;
+
+		if (memcg == oom_domain)
+			break;
+	}
+
+	if (oom_group)
+		css_get(&oom_group->css);
+out:
+	rcu_read_unlock();
+
+	return oom_group;
+}
+
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+	pr_info("Tasks in ");
+	pr_cont_cgroup_path(memcg->css.cgroup);
+	pr_cont(" are going to be killed due to memory.oom.group set\n");
+}
+
 /**
  * lock_page_memcg - lock a page->mem_cgroup binding
  * @page: the page
@@ -5561,6 +5617,37 @@ static int memory_stat_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int memory_oom_group_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	seq_printf(m, "%d\n", memcg->oom_group);
+
+	return 0;
+}
+
+static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
+				      char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	int ret, oom_group;
+
+	buf = strstrip(buf);
+	if (!buf)
+		return -EINVAL;
+
+	ret = kstrtoint(buf, 0, &oom_group);
+	if (ret)
+		return ret;
+
+	if (oom_group != 0 && oom_group != 1)
+		return -EINVAL;
+
+	memcg->oom_group = oom_group;
+
+	return nbytes;
+}
+
 static struct cftype memory_files[] = {
 	{
 		.name = "current",
@@ -5602,6 +5689,12 @@ static struct cftype memory_files[] = {
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = memory_stat_show,
 	},
+	{
+		.name = "oom.group",
+		.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_NS_DELEGATABLE,
+		.seq_show = memory_oom_group_show,
+		.write = memory_oom_group_write,
+	},
 	{ }	/* terminate */
 };
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 330416c67ce5..0e10b864e074 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -908,6 +908,19 @@ static void __oom_kill_process(struct task_struct *victim)
 }
 #undef K
 
+/*
+ * Kill provided task unless it's secured by setting
+ * oom_score_adj to OOM_SCORE_ADJ_MIN.
+ */
+static int oom_kill_memcg_member(struct task_struct *task, void *unused)
+{
+	if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
+		get_task_struct(task);
+		__oom_kill_process(task);
+	}
+	return 0;
+}
+
 static void oom_kill_process(struct oom_control *oc, const char *message)
 {
 	struct task_struct *p = oc->chosen;
@@ -915,6 +928,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	struct task_struct *victim = p;
 	struct task_struct *child;
 	struct task_struct *t;
+	struct mem_cgroup *oom_group;
 	unsigned int victim_points = 0;
 	static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
 					      DEFAULT_RATELIMIT_BURST);
@@ -968,7 +982,23 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 	}
 	read_unlock(&tasklist_lock);
 
+	/*
+	 * Do we need to kill the entire memory cgroup?
+	 * Or even one of the ancestor memory cgroups?
+	 * Check this out before killing the victim task.
+	 */
+	oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
+
 	__oom_kill_process(victim);
+
+	/*
+	 * If necessary, kill all tasks in the selected memory cgroup.
+	 */
+	if (oom_group) {
+		mem_cgroup_print_oom_group(oom_group);
+		mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL);
+		mem_cgroup_put(oom_group);
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 7e8a6304d5419cbf056a59de92939e5eef039c57 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 21 Aug 2018 21:53:58 -0700
Subject: /proc/meminfo: add percpu populated pages count

Currently, percpu memory only exposes allocation and utilization
information via debugfs.  This more or less is only really useful for
understanding the fragmentation and allocation information at a per-chunk
level with a few global counters.  This is also gated behind a config.
BPF and cgroup, for example, have seen an increase in use causing
increased use of percpu memory.  Let's make it easier for someone to
identify how much memory is being used.

This patch adds the "Percpu" stat to meminfo to more easily look up how
much percpu memory is in use.  This number includes the cost for all
allocated backing pages and not just insight at the per a unit, per chunk
level.  Metadata is excluded.  I think excluding metadata is fair because
the backing memory scales with the numbere of cpus and can quickly
outweigh the metadata.  It also makes this calculation light.

Link: http://lkml.kernel.org/r/20180807184723.74919-1-dennisszhou@gmail.com
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  3 +++
 fs/proc/meminfo.c                  |  2 ++
 include/linux/percpu.h             |  2 ++
 mm/percpu.c                        | 29 +++++++++++++++++++++++++++++
 4 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 1605acbb23b6..22b4b00dee31 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -870,6 +870,7 @@ Committed_AS:   100056 kB
 VmallocTotal:   112216 kB
 VmallocUsed:       428 kB
 VmallocChunk:   111088 kB
+Percpu:          62080 kB
 HardwareCorrupted:   0 kB
 AnonHugePages:   49152 kB
 ShmemHugePages:      0 kB
@@ -962,6 +963,8 @@ Committed_AS: The amount of memory presently allocated on the system.
 VmallocTotal: total size of vmalloc memory area
  VmallocUsed: amount of vmalloc area which is used
 VmallocChunk: largest contiguous block of vmalloc area which is free
+      Percpu: Memory allocated to the percpu allocator used to back percpu
+              allocations. This stat excludes the cost of metadata.
 
 ..............................................................................
 
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 2fb04846ed11..edda898714eb 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -7,6 +7,7 @@
 #include <linux/mman.h>
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
+#include <linux/percpu.h>
 #include <linux/quicklist.h>
 #include <linux/seq_file.h>
 #include <linux/swap.h>
@@ -121,6 +122,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		   (unsigned long)VMALLOC_TOTAL >> 10);
 	show_val_kb(m, "VmallocUsed:    ", 0ul);
 	show_val_kb(m, "VmallocChunk:   ", 0ul);
+	show_val_kb(m, "Percpu:         ", pcpu_nr_pages());
 
 #ifdef CONFIG_MEMORY_FAILURE
 	seq_printf(m, "HardwareCorrupted: %5lu kB\n",
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 296bbe49d5d1..70b7123f38c7 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -149,4 +149,6 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 	(typeof(type) __percpu *)__alloc_percpu(sizeof(type),		\
 						__alignof__(type))
 
+extern unsigned long pcpu_nr_pages(void);
+
 #endif /* __LINUX_PERCPU_H */
diff --git a/mm/percpu.c b/mm/percpu.c
index 0b6480979ac7..a749d4d96e3e 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -169,6 +169,14 @@ static LIST_HEAD(pcpu_map_extend_chunks);
  */
 int pcpu_nr_empty_pop_pages;
 
+/*
+ * The number of populated pages in use by the allocator, protected by
+ * pcpu_lock.  This number is kept per a unit per chunk (i.e. when a page gets
+ * allocated/deallocated, it is allocated/deallocated in all units of a chunk
+ * and increments/decrements this count by 1).
+ */
+static unsigned long pcpu_nr_populated;
+
 /*
  * Balance work is used to populate or destroy chunks asynchronously.  We
  * try to keep the number of populated free pages between
@@ -1232,6 +1240,7 @@ static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
 
 	bitmap_set(chunk->populated, page_start, nr);
 	chunk->nr_populated += nr;
+	pcpu_nr_populated += nr;
 
 	if (!for_alloc) {
 		chunk->nr_empty_pop_pages += nr;
@@ -1260,6 +1269,7 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
 	chunk->nr_populated -= nr;
 	chunk->nr_empty_pop_pages -= nr;
 	pcpu_nr_empty_pop_pages -= nr;
+	pcpu_nr_populated -= nr;
 }
 
 /*
@@ -2176,6 +2186,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
+	/* include all regions of the first chunk */
+	pcpu_nr_populated += PFN_DOWN(size_sum);
+
 	pcpu_stats_chunk_alloc();
 	trace_percpu_create_chunk(base_addr);
 
@@ -2745,6 +2758,22 @@ void __init setup_per_cpu_areas(void)
 
 #endif	/* CONFIG_SMP */
 
+/*
+ * pcpu_nr_pages - calculate total number of populated backing pages
+ *
+ * This reflects the number of pages populated to back chunks.  Metadata is
+ * excluded in the number exposed in meminfo as the number of backing pages
+ * scales with the number of cpus and can quickly outweigh the memory used for
+ * metadata.  It also keeps this calculation nice and simple.
+ *
+ * RETURNS:
+ * Total number of populated backing pages in use by the allocator.
+ */
+unsigned long pcpu_nr_pages(void)
+{
+	return pcpu_nr_populated * pcpu_nr_units;
+}
+
 /*
  * Percpu allocator is initialized early during boot when neither slab or
  * workqueue is available.  Plug async management until everything is up
-- 
cgit v1.2.3


From 5df66d306ec9b1952d3d183fe4e2ced1a7b2bddb Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 21 Aug 2018 21:54:06 -0700
Subject: mm: fix comment for NODEMASK_ALLOC

Currently, NODEMASK_ALLOC allocates a nodemask_t with kmalloc when
NODES_SHIFT is higher than 8, otherwise it declares it within the stack.

The comment says that the reasoning behind this, is that nodemask_t will
be 256 bytes when NODES_SHIFT is higher than 8, but this is not true.  For
example, NODES_SHIFT = 9 will give us a 64 bytes nodemask_t.  Let us fix
up the comment for that.

Another thing is that it might make sense to let values lower than
128bytes be allocated in the stack.  Although this all depends on the
depth of the stack (and this changes from function to function), I think
that 64 bytes is something we can easily afford.  So we could even bump
the limit by 1 (from > 8 to > 9).

Link: http://lkml.kernel.org/r/20180820085516.9687-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 1fbde8a880d9..5a30ad594ccc 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -518,7 +518,7 @@ static inline int node_random(const nodemask_t *mask)
  * NODEMASK_ALLOC(type, name) allocates an object with a specified type and
  * name.
  */
-#if NODES_SHIFT > 8 /* nodemask_t > 256 bytes */
+#if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */
 #define NODEMASK_ALLOC(type, name, gfp_flags)	\
 			type *name = kmalloc(sizeof(*name), gfp_flags)
 #define NODEMASK_FREE(m)			kfree(m)
-- 
cgit v1.2.3


From 891ae71dc4fbd2454a3fa569e115a7ca86630949 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 21 Aug 2018 21:54:37 -0700
Subject: proc: spread "const" a bit

Link: http://lkml.kernel.org/r/20180627200614.GB18434@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h      | 4 ++--
 include/linux/proc_fs.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c9d97818a421..5185d7f6a51e 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -113,12 +113,12 @@ static inline void *__PDE_DATA(const struct inode *inode)
 	return PDE(inode)->data;
 }
 
-static inline struct pid *proc_pid(struct inode *inode)
+static inline struct pid *proc_pid(const struct inode *inode)
 {
 	return PROC_I(inode)->pid;
 }
 
-static inline struct task_struct *get_proc_task(struct inode *inode)
+static inline struct task_struct *get_proc_task(const struct inode *inode)
 {
 	return get_pid_task(proc_pid(inode), PIDTYPE_PID);
 }
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 626fc65c4336..d0e1f1522a78 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -129,7 +129,7 @@ int open_related_ns(struct ns_common *ns,
 		   struct ns_common *(*get_ns)(struct ns_common *ns));
 
 /* get the associated pid namespace for a file in procfs */
-static inline struct pid_namespace *proc_pid_ns(struct inode *inode)
+static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
 {
 	return inode->i_sb->s_fs_info;
 }
-- 
cgit v1.2.3


From a8dd9c4df18edc873d244790d163564a5d17626b Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 21 Aug 2018 21:54:51 -0700
Subject: proc/kcore: don't grab lock for kclist_add()

Patch series "/proc/kcore improvements", v4.

This series makes a few improvements to /proc/kcore.  It fixes a couple of
small issues in v3 but is otherwise the same.  Patches 1, 2, and 3 are
prep patches.  Patch 4 is a fix/cleanup.  Patch 5 is another prep patch.
Patches 6 and 7 are optimizations to ->read().  Patch 8 makes it possible
to enable CRASH_CORE on any architecture, which is needed for patch 9.
Patch 9 adds vmcoreinfo to /proc/kcore.

This patch (of 9):

kclist_add() is only called at init time, so there's no point in grabbing
any locks.  We're also going to replace the rwlock with a rwsem, which we
don't want to try grabbing during early boot.

While we're here, mark kclist_add() with __init so that we'll get a
warning if it's called from non-init code.

Link: http://lkml.kernel.org/r/98208db1faf167aa8b08eebfa968d95c70527739.1531953780.git.osandov@fb.com
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Tested-by: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: James Morse <james.morse@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c       | 7 +++----
 include/linux/kcore.h | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 66c373230e60..b0b9a76f28d6 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -62,16 +62,15 @@ static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
 static int kcore_need_update = 1;
 
-void
-kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
+/* This doesn't grab kclist_lock, so it should only be used at init time. */
+void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
+		       int type)
 {
 	new->addr = (unsigned long)addr;
 	new->size = size;
 	new->type = type;
 
-	write_lock(&kclist_lock);
 	list_add_tail(&new->list, &kclist_head);
-	write_unlock(&kclist_lock);
 }
 
 static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8de55e4b5ee9..c20f296438fb 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -35,7 +35,7 @@ struct vmcoredd_node {
 };
 
 #ifdef CONFIG_PROC_KCORE
-extern void kclist_add(struct kcore_list *, void *, size_t, int type);
+void __init kclist_add(struct kcore_list *, void *, size_t, int type);
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
-- 
cgit v1.2.3


From 23c85094fe1895caefdd19ef624ee687ec5f4507 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Tue, 21 Aug 2018 21:55:20 -0700
Subject: proc/kcore: add vmcoreinfo note to /proc/kcore

The vmcoreinfo information is useful for runtime debugging tools, not just
for crash dumps.  A lot of this information can be determined by other
means, but this is much more convenient, and it only adds a page at most
to the file.

Link: http://lkml.kernel.org/r/fddbcd08eed76344863303878b12de1c1e2a04b6.1531953780.git.osandov@fb.com
Signed-off-by: Omar Sandoval <osandov@fb.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: James Morse <james.morse@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/Kconfig            |  1 +
 fs/proc/kcore.c            | 18 ++++++++++++++++--
 include/linux/crash_core.h |  2 ++
 kernel/crash_core.c        |  4 ++--
 4 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 0eaeb41453f5..817c02b13b1d 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -31,6 +31,7 @@ config PROC_FS
 config PROC_KCORE
 	bool "/proc/kcore support" if !ARM
 	depends on PROC_FS && MMU
+	select CRASH_CORE
 	help
 	  Provides a virtual ELF core file of the live kernel.  This can
 	  be read with gdb and other ELF tools.  No modifications can be
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 758c14e46a44..80464432dfe6 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -10,6 +10,7 @@
  *	Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  */
 
+#include <linux/crash_core.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/kcore.h>
@@ -81,10 +82,13 @@ static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
 	}
 
 	*phdrs_len = *nphdr * sizeof(struct elf_phdr);
-	*notes_len = (3 * (sizeof(struct elf_note) + ALIGN(sizeof(CORE_STR), 4)) +
+	*notes_len = (4 * sizeof(struct elf_note) +
+		      3 * ALIGN(sizeof(CORE_STR), 4) +
+		      VMCOREINFO_NOTE_NAME_BYTES +
 		      ALIGN(sizeof(struct elf_prstatus), 4) +
 		      ALIGN(sizeof(struct elf_prpsinfo), 4) +
-		      ALIGN(arch_task_struct_size, 4));
+		      ALIGN(arch_task_struct_size, 4) +
+		      ALIGN(vmcoreinfo_size, 4));
 	*data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
 				  *notes_len);
 	return *data_offset + size;
@@ -406,6 +410,16 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				  sizeof(prpsinfo));
 		append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
 				  arch_task_struct_size);
+		/*
+		 * vmcoreinfo_size is mostly constant after init time, but it
+		 * can be changed by crash_save_vmcoreinfo(). Racing here with a
+		 * panic on another CPU before the machine goes down is insanely
+		 * unlikely, but it's better to not leave potential buffer
+		 * overflows lying around, regardless.
+		 */
+		append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
+				  vmcoreinfo_data,
+				  min(vmcoreinfo_size, notes_len - i));
 
 		tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
 		if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index b511f6d24b42..525510a9f965 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -60,6 +60,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
 #define VMCOREINFO_CONFIG(name) \
 	vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
 
+extern unsigned char *vmcoreinfo_data;
+extern size_t vmcoreinfo_size;
 extern u32 *vmcoreinfo_note;
 
 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index e7b4025c7b24..a683bfb0530f 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,8 +14,8 @@
 #include <asm/sections.h>
 
 /* vmcoreinfo stuff */
-static unsigned char *vmcoreinfo_data;
-static size_t vmcoreinfo_size;
+unsigned char *vmcoreinfo_data;
+size_t vmcoreinfo_size;
 u32 *vmcoreinfo_note;
 
 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
-- 
cgit v1.2.3


From 96c6a32ccb55a366054fd82cc63523bb7f7493d3 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 21 Aug 2018 21:55:24 -0700
Subject: include/asm-generic/bug.h: clarify valid uses of WARN()

Explicitly state that WARN*() should be used only for recoverable kernel
issues/bugs and that it should not be used for any kind of invalid
external inputs or transient conditions.

Motivation: it's a very useful capability to be able to understand if a
particular kernel splat means a kernel bug or simply an invalid user-space
program.  For the former one wants to notify kernel developers, while
notifying kernel developers for the latter is annoying.  Even a kernel
developer may not know what to do with a WARNING in an unfamiliar
subsystem.  This is especially critical for any automated testing systems
that may use panic_on_warn and mail kernel developers.

The clear separation also serves as an additional documentation: is it a
condition that must never occur because of additional checks/logic
elsewhere?  or is it simply a check for invalid inputs or unfortunate
conditions?

Use of pr_err() for user messages also leads to better error messages.
"Something is wrong in file foo on line X" is not particularly useful
message for end user.  pr_err() forces developers to write more meaningful
error messages for user.

As of now we are almost there.  We are doing systematic kernel testing
with panic_on_warn and are not seeing massive amounts of false positives.
But every now and then another WARN on ENOMEM or invalid inputs pops up
and leads to a lengthy argument each time.  The goal of this change is to
officially document the rules.

Link: http://lkml.kernel.org/r/20180620103716.61636-1-dvyukov@gmail.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a7613e1b0c87..20561a60db9c 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -75,9 +75,19 @@ struct bug_entry {
 
 /*
  * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
- * significant issues that need prompt attention if they should ever
- * appear at runtime.  Use the versions with printk format strings
- * to provide better diagnostics.
+ * significant kernel issues that need prompt attention if they should ever
+ * appear at runtime.
+ *
+ * Do not use these macros when checking for invalid external inputs
+ * (e.g. invalid system call arguments, or invalid data coming from
+ * network/devices), and on transient conditions like ENOMEM or EAGAIN.
+ * These macros should be used for recoverable kernel issues only.
+ * For invalid external inputs, transient conditions, etc use
+ * pr_err[_once/_ratelimited]() followed by dump_stack(), if necessary.
+ * Do not include "BUG"/"WARNING" in format strings manually to make these
+ * conditions distinguishable from kernel issues.
+ *
+ * Use the versions with printk format strings to provide better diagnostics.
  */
 #ifndef __WARN_TAINT
 extern __printf(3, 4)
-- 
cgit v1.2.3


From cedc5b6aab493f6b1b1d381dccc0cc082da7d3d8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 21 Aug 2018 21:55:28 -0700
Subject: kernel.h: documentation for roundup() vs round_up()

Things like 3619dec5103d ("dh key: fix rounding up KDF output length")
expose the lack of explicit documentation for roundup() vs round_up().  At
least we can try to document it better if anyone goes looking.

Link: http://lkml.kernel.org/r/20180703041950.GA43464@beast
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 941dc0a5a877..d6aac75b51ba 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -85,7 +85,23 @@
  * arguments just once each.
  */
 #define __round_mask(x, y) ((__typeof__(x))((y)-1))
+/**
+ * round_up - round up to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round up to (must be a power of 2)
+ *
+ * Rounds @x up to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding up, use roundup() below.
+ */
 #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+/**
+ * round_down - round down to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round down to (must be a power of 2)
+ *
+ * Rounds @x down to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding down, use rounddown() below.
+ */
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
 /**
@@ -110,13 +126,30 @@
 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
 #endif
 
-/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
+/**
+ * roundup - round up to the next specified multiple
+ * @x: the value to up
+ * @y: multiple to round up to
+ *
+ * Rounds @x up to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_up().
+ *
+ * The `const' here prevents gcc-3.3 from calling __divdi3
+ */
 #define roundup(x, y) (					\
 {							\
 	const typeof(y) __y = y;			\
 	(((x) + (__y - 1)) / __y) * __y;		\
 }							\
 )
+/**
+ * rounddown - round down to next specified multiple
+ * @x: the value to round
+ * @y: multiple to round down to
+ *
+ * Rounds @x down to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_down().
+ */
 #define rounddown(x, y) (				\
 {							\
 	typeof(x) __x = (x);				\
-- 
cgit v1.2.3


From e58dd0de5eadf145895b13451a1fef8ef03946eb Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 21 Aug 2018 21:55:31 -0700
Subject: bdi: use refcount_t for reference counting instead atomic_t

refcount_t type and corresponding API should be used instead of atomic_t
when the variable is used as a reference counter.  This permits avoiding
accidental refcounter overflows that might lead to use-after-free
situations.

Link: http://lkml.kernel.org/r/20180703200141.28415-4-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev-defs.h |  3 ++-
 include/linux/backing-dev.h      |  4 ++--
 mm/backing-dev.c                 | 12 ++++++------
 3 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 24251762c20c..9a6bc0951cfa 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -12,6 +12,7 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/kref.h>
+#include <linux/refcount.h>
 
 struct page;
 struct device;
@@ -75,7 +76,7 @@ enum wb_reason {
  */
 struct bdi_writeback_congested {
 	unsigned long state;		/* WB_[a]sync_congested flags */
-	atomic_t refcnt;		/* nr of attached wb's and blkg */
+	refcount_t refcnt;		/* nr of attached wb's and blkg */
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct backing_dev_info *__bdi;	/* the associated bdi, set to NULL
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 72ca0f3d39f3..c28a47cbe355 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -404,13 +404,13 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
 static inline struct bdi_writeback_congested *
 wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
 {
-	atomic_inc(&bdi->wb_congested->refcnt);
+	refcount_inc(&bdi->wb_congested->refcnt);
 	return bdi->wb_congested;
 }
 
 static inline void wb_congested_put(struct bdi_writeback_congested *congested)
 {
-	if (atomic_dec_and_test(&congested->refcnt))
+	if (refcount_dec_and_test(&congested->refcnt))
 		kfree(congested);
 }
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 2e5d3df0853d..55a233d75f39 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -438,10 +438,10 @@ retry:
 	if (new_congested) {
 		/* !found and storage for new one already allocated, insert */
 		congested = new_congested;
-		new_congested = NULL;
 		rb_link_node(&congested->rb_node, parent, node);
 		rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
-		goto found;
+		spin_unlock_irqrestore(&cgwb_lock, flags);
+		return congested;
 	}
 
 	spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -451,13 +451,13 @@ retry:
 	if (!new_congested)
 		return NULL;
 
-	atomic_set(&new_congested->refcnt, 0);
+	refcount_set(&new_congested->refcnt, 1);
 	new_congested->__bdi = bdi;
 	new_congested->blkcg_id = blkcg_id;
 	goto retry;
 
 found:
-	atomic_inc(&congested->refcnt);
+	refcount_inc(&congested->refcnt);
 	spin_unlock_irqrestore(&cgwb_lock, flags);
 	kfree(new_congested);
 	return congested;
@@ -474,7 +474,7 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
+	if (!refcount_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
 		local_irq_restore(flags);
 		return;
 	}
@@ -804,7 +804,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 	if (!bdi->wb_congested)
 		return -ENOMEM;
 
-	atomic_set(&bdi->wb_congested->refcnt, 1);
+	refcount_set(&bdi->wb_congested->refcnt, 1);
 
 	err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
 	if (err) {
-- 
cgit v1.2.3


From fc37191272a972d449bab3d8247cf52cadf5d4e6 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 21 Aug 2018 21:55:38 -0700
Subject: userns: use refcount_t for reference counting instead atomic_t

refcount_t type and corresponding API should be used instead of atomic_t
wh en the variable is used as a reference counter.  This avoids accidental
refcounter overflows that might lead to use-after-free situations.

Link: http://lkml.kernel.org/r/20180703200141.28415-6-bigeasy@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/user.h | 5 +++--
 kernel/user.c              | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 96fe289c4c6e..39ad98c09c58 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
 
 #include <linux/uidgid.h>
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/ratelimit.h>
 
 struct key;
@@ -12,7 +13,7 @@ struct key;
  * Some day this will be a full-fledged user tracking system..
  */
 struct user_struct {
-	atomic_t __count;	/* reference count */
+	refcount_t __count;	/* reference count */
 	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
@@ -59,7 +60,7 @@ extern struct user_struct root_user;
 extern struct user_struct * alloc_uid(kuid_t);
 static inline struct user_struct *get_uid(struct user_struct *u)
 {
-	atomic_inc(&u->__count);
+	refcount_inc(&u->__count);
 	return u;
 }
 extern void free_uid(struct user_struct *);
diff --git a/kernel/user.c b/kernel/user.c
index 36288d840675..5f65ef195259 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock);
 
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
-	.__count	= ATOMIC_INIT(1),
+	.__count	= REFCOUNT_INIT(1),
 	.processes	= ATOMIC_INIT(1),
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
@@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent)
 
 	hlist_for_each_entry(user, hashent, uidhash_node) {
 		if (uid_eq(user->uid, uid)) {
-			atomic_inc(&user->__count);
+			refcount_inc(&user->__count);
 			return user;
 		}
 	}
@@ -170,7 +170,7 @@ void free_uid(struct user_struct *up)
 		return;
 
 	local_irq_save(flags);
-	if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+	if (refcount_dec_and_lock(&up->__count, &uidhash_lock))
 		free_user(up, flags);
 	else
 		local_irq_restore(flags);
@@ -191,7 +191,7 @@ struct user_struct *alloc_uid(kuid_t uid)
 			goto out_unlock;
 
 		new->uid = uid;
-		atomic_set(&new->__count, 1);
+		refcount_set(&new->__count, 1);
 		ratelimit_state_init(&new->ratelimit, HZ, 100);
 		ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
 
-- 
cgit v1.2.3


From 203583990cb675aeddff6d7623f68268068c078c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 21 Aug 2018 21:55:45 -0700
Subject: linux/compiler.h: don't use bool

Appararently, it's possible to have a non-trivial TU include a few
headers, including linux/build_bug.h, without ending up with
linux/types.h.  So the 0day bot sent me

config: um-x86_64_defconfig (attached as .config)

>> include/linux/compiler.h:316:3: error: unknown type name 'bool'; did you mean '_Bool'?
      bool __cond = !(condition);    \

for something I'm working on.

Rather than contributing to the #include madness and including
linux/types.h from compiler.h, just use int.

Link: http://lkml.kernel.org/r/20180817101036.20969-1-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Christopher Li <sparse@chrisli.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 42506e4d1f53..c8eab637a2a7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -313,7 +313,7 @@ unsigned long read_word_at_a_time(const void *addr)
 #ifdef __OPTIMIZE__
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
-		bool __cond = !(condition);				\
+		int __cond = !(condition);				\
 		extern void prefix ## suffix(void) __compiletime_error(msg); \
 		if (__cond)						\
 			prefix ## suffix();				\
-- 
cgit v1.2.3


From a2e514453861dd39b53b7a50b6771bd3f9852078 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Tue, 21 Aug 2018 21:55:52 -0700
Subject: kernel/hung_task.c: allow to set checking interval separately from
 timeout

Currently task hung checking interval is equal to timeout, as the result
hung is detected anywhere between timeout and 2*timeout.  This is fine for
most interactive environments, but this hurts automated testing setups
(syzbot).  In an automated setup we need to strictly order CPU lockup <
RCU stall < workqueue lockup < task hung < silent loss, so that RCU stall
is not detected as task hung and task hung is not detected as silent
machine loss.  The large variance in task hung detection timeout requires
setting silent machine loss timeout to a very large value (e.g.  if task
hung is 3 mins, then silent loss need to be set to ~7 mins).  The
additional 3 minutes significantly reduce testing efficiency because
usually we crash kernel within a minute, and this can add hours to bug
localization process as it needs to do dozens of tests.

Allow setting checking interval separately from timeout.  This allows to
set timeout to, say, 3 minutes, but checking interval to 10 secs.

The interval is controlled via a new hung_task_check_interval_secs sysctl,
similar to the existing hung_task_timeout_secs sysctl.  The default value
of 0 results in the current behavior: checking interval is equal to
timeout.

[akpm@linux-foundation.org: update hung_task_timeout_max's comment]
Link: http://lkml.kernel.org/r/20180611111004.203513-1-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/kernel.txt | 15 ++++++++++++++-
 include/linux/sched.h           |  1 +
 include/linux/sched/sysctl.h    |  1 +
 kernel/fork.c                   |  1 +
 kernel/hung_task.c              | 15 ++++++++++++++-
 kernel/sysctl.c                 | 13 ++++++++++++-
 6 files changed, 43 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 59585030cbaf..9d38e75b19a0 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -38,6 +38,7 @@ show up in /proc/sys/kernel:
 - hung_task_panic
 - hung_task_check_count
 - hung_task_timeout_secs
+- hung_task_check_interval_secs
 - hung_task_warnings
 - hyperv_record_panic_msg
 - kexec_load_disabled
@@ -355,7 +356,7 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
 
 hung_task_timeout_secs:
 
-Check interval. When a task in D state did not get scheduled
+When a task in D state did not get scheduled
 for more than this value report a warning.
 This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
 
@@ -364,6 +365,18 @@ Possible values to set are in range {0..LONG_MAX/HZ}.
 
 ==============================================================
 
+hung_task_check_interval_secs:
+
+Hung task check interval. If hung task checking is enabled
+(see hung_task_timeout_secs), the check is done every
+hung_task_check_interval_secs seconds.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0 (default): means use hung_task_timeout_secs as checking interval.
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+==============================================================
+
 hung_task_warnings:
 
 The maximum number of warnings to report. During a check interval
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 789923fbee3a..58eb3a2bc695 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -853,6 +853,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_DETECT_HUNG_TASK
 	unsigned long			last_switch_count;
+	unsigned long			last_switch_time;
 #endif
 	/* Filesystem information: */
 	struct fs_struct		*fs;
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 913488d828cb..a9c32daeb9d8 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -10,6 +10,7 @@ struct ctl_table;
 extern int	     sysctl_hung_task_check_count;
 extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_check_interval_secs;
 extern int sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
 					 void __user *buffer,
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c760effa42e..8bcef2413f1b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1302,6 +1302,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
 	tsk->nvcsw = tsk->nivcsw = 0;
 #ifdef CONFIG_DETECT_HUNG_TASK
 	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+	tsk->last_switch_time = 0;
 #endif
 
 	tsk->mm = NULL;
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 32b479468e4d..b9132d1269ef 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -40,6 +40,11 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
  */
 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
 
+/*
+ * Zero (default value) means use sysctl_hung_task_timeout_secs:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
+
 int __read_mostly sysctl_hung_task_warnings = 10;
 
 static int __read_mostly did_panic;
@@ -98,8 +103,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 
 	if (switch_count != t->last_switch_count) {
 		t->last_switch_count = switch_count;
+		t->last_switch_time = jiffies;
 		return;
 	}
+	if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+		return;
 
 	trace_sched_process_hang(t);
 
@@ -245,8 +253,13 @@ static int watchdog(void *dummy)
 
 	for ( ; ; ) {
 		unsigned long timeout = sysctl_hung_task_timeout_secs;
-		long t = hung_timeout_jiffies(hung_last_checked, timeout);
+		unsigned long interval = sysctl_hung_task_check_interval_secs;
+		long t;
 
+		if (interval == 0)
+			interval = timeout;
+		interval = min_t(unsigned long, interval, timeout);
+		t = hung_timeout_jiffies(hung_last_checked, interval);
 		if (t <= 0) {
 			if (!atomic_xchg(&reset_hung_task, 0))
 				check_hung_uninterruptible_tasks(timeout);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f22f76b7a138..dbdcb6673b27 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -145,7 +145,10 @@ static int minolduid;
 static int ngroups_max = NGROUPS_MAX;
 static const int cap_last_cap = CAP_LAST_CAP;
 
-/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
 #ifdef CONFIG_DETECT_HUNG_TASK
 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 #endif
@@ -1090,6 +1093,14 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dohung_task_timeout_secs,
 		.extra2		= &hung_task_timeout_max,
 	},
+	{
+		.procname	= "hung_task_check_interval_secs",
+		.data		= &sysctl_hung_task_check_interval_secs,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_dohung_task_timeout_secs,
+		.extra2		= &hung_task_timeout_max,
+	},
 	{
 		.procname	= "hung_task_warnings",
 		.data		= &sysctl_hung_task_warnings,
-- 
cgit v1.2.3


From f922c4abdf7648523589abee9460c87f51630d2f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:04 -0700
Subject: module: allow symbol exports to be disabled

To allow existing C code to be incorporated into the decompressor or the
UEFI stub, introduce a CPP macro that turns all EXPORT_SYMBOL_xxx
declarations into nops, and #define it in places where such exports are
undesirable.  Note that this gets rid of a rather dodgy redefine of
linux/export.h's header guard.

Link: http://lkml.kernel.org/r/20180704083651.24360-3-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/boot/compressed/kaslr.c      |  5 +----
 drivers/firmware/efi/libstub/Makefile |  1 +
 include/linux/export.h                | 11 ++++++++++-
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 302517929932..d1e19f358b6e 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -23,11 +23,8 @@
  * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
  * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
  * which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
  */
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
 
 #include "misc.h"
 #include "error.h"
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 88c322d7c71e..14c40a7750d1 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -24,6 +24,7 @@ KBUILD_CFLAGS			:= $(cflags-y) -DDISABLE_BRANCH_PROFILING \
 				   -D__NO_FORTIFY \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector) \
+				   -D__DISABLE_EXPORTS
 
 GCOV_PROFILE			:= n
 KASAN_SANITIZE			:= n
diff --git a/include/linux/export.h b/include/linux/export.h
index b768d6dd3c90..ea7df303d68d 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -66,7 +66,16 @@ extern struct module __this_module;
 	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
 	= { (unsigned long)&sym, __kstrtab_##sym }
 
-#if defined(__KSYM_DEPS__)
+#if defined(__DISABLE_EXPORTS)
+
+/*
+ * Allow symbol exports to be disabled completely so that C code may
+ * be reused in other execution contexts such as the UEFI stub or the
+ * decompressor.
+ */
+#define __EXPORT_SYMBOL(sym, sec)
+
+#elif defined(__KSYM_DEPS__)
 
 /*
  * For fine grained build dependencies, we want to tell the build system
-- 
cgit v1.2.3


From 7290d58095712a89f845e1bca05334796dd49ed2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:09 -0700
Subject: module: use relative references for __ksymtab entries

An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries,
each consisting of two 64-bit fields containing absolute references, to
the symbol itself and to a char array containing its name, respectively.

When we build the same configuration with KASLR enabled, we end up with an
additional ~192 KB of relocations in the .init section, i.e., one 24 byte
entry for each absolute reference, which all need to be processed at boot
time.

Given how the struct kernel_symbol that describes each entry is completely
local to module.c (except for the references emitted by EXPORT_SYMBOL()
itself), we can easily modify it to contain two 32-bit relative references
instead.  This reduces the size of the __ksymtab section by 50% for all
64-bit architectures, and gets rid of the runtime relocations entirely for
architectures implementing KASLR, either via standard PIE linking (arm64)
or using custom host tools (x86).

Note that the binary search involving __ksymtab contents relies on each
section being sorted by symbol name.  This is implemented based on the
input section names, not the names in the ksymtab entries, so this patch
does not interfere with that.

Given that the use of place-relative relocations requires support both in
the toolchain and in the module loader, we cannot enable this feature for
all architectures.  So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.

Link: http://lkml.kernel.org/r/20180704083651.24360-4-ard.biesheuvel@linaro.org
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/Kbuild   |  1 +
 arch/x86/include/asm/export.h |  5 -----
 include/asm-generic/export.h  | 12 +++++++++--
 include/linux/compiler.h      | 19 ++++++++++++++++++
 include/linux/export.h        | 46 ++++++++++++++++++++++++++++++++-----------
 kernel/module.c               | 32 ++++++++++++++++++++++++------
 6 files changed, 91 insertions(+), 24 deletions(-)
 delete mode 100644 arch/x86/include/asm/export.h

(limited to 'include')

diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index de690c2d2e33..a0ab9ab61c75 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,5 +8,6 @@ generated-y += xen-hypercalls.h
 
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
+generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66689c5..000000000000
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include <asm-generic/export.h>
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 68efb950a918..4d73e6e3c66c 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -5,12 +5,10 @@
 #define KSYM_FUNC(x) x
 #endif
 #ifdef CONFIG_64BIT
-#define __put .quad
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 8
 #endif
 #else
-#define __put .long
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
@@ -19,6 +17,16 @@
 #define KCRC_ALIGN 4
 #endif
 
+.macro __put, val, name
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	.long	\val - ., \name - .
+#elif defined(CONFIG_64BIT)
+	.quad	\val, \name
+#else
+	.long	\val, \name
+#endif
+.endm
+
 /*
  * note on .section use: @progbits vs %progbits nastiness doesn't matter,
  * since we immediately emit into those sections anyway.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c8eab637a2a7..681d866efb1e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -280,6 +280,25 @@ unsigned long read_word_at_a_time(const void *addr)
 
 #endif /* __KERNEL__ */
 
+/*
+ * Force the compiler to emit 'sym' as a symbol, so that we can reference
+ * it from inline assembler. Necessary in case 'sym' could be inlined
+ * otherwise, or eliminated entirely due to lack of references that are
+ * visible to the compiler.
+ */
+#define __ADDRESSABLE(sym) \
+	static void * __attribute__((section(".discard.addressable"), used)) \
+		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
+
+/**
+ * offset_to_ptr - convert a relative memory offset to an absolute pointer
+ * @off:	the address of the 32-bit offset value
+ */
+static inline void *offset_to_ptr(const int *off)
+{
+	return (void *)((unsigned long)off + *off);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #ifndef __optimize
diff --git a/include/linux/export.h b/include/linux/export.h
index ea7df303d68d..ae072bc5aacf 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -18,12 +18,6 @@
 #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
 
 #ifndef __ASSEMBLY__
-struct kernel_symbol
-{
-	unsigned long value;
-	const char *name;
-};
-
 #ifdef MODULE
 extern struct module __this_module;
 #define THIS_MODULE (&__this_module)
@@ -54,17 +48,47 @@ extern struct module __this_module;
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#include <linux/compiler.h>
+/*
+ * Emit the ksymtab entry as a pair of relative references: this reduces
+ * the size by half on 64-bit architectures, and eliminates the need for
+ * absolute relocations that require runtime processing on relocatable
+ * kernels.
+ */
+#define __KSYMTAB_ENTRY(sym, sec)					\
+	__ADDRESSABLE(sym)						\
+	asm("	.section \"___ksymtab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.balign	8					\n"	\
+	    "__ksymtab_" #sym ":				\n"	\
+	    "	.long	" #sym "- .				\n"	\
+	    "	.long	__kstrtab_" #sym "- .			\n"	\
+	    "	.previous					\n")
+
+struct kernel_symbol {
+	int value_offset;
+	int name_offset;
+};
+#else
+#define __KSYMTAB_ENTRY(sym, sec)					\
+	static const struct kernel_symbol __ksymtab_##sym		\
+	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
+	= { (unsigned long)&sym, __kstrtab_##sym }
+
+struct kernel_symbol {
+	unsigned long value;
+	const char *name;
+};
+#endif
+
 /* For every exported symbol, place a struct in the __ksymtab section */
 #define ___EXPORT_SYMBOL(sym, sec)					\
 	extern typeof(sym) sym;						\
 	__CRC_SYMBOL(sym, sec)						\
 	static const char __kstrtab_##sym[]				\
-	__attribute__((section("__ksymtab_strings"), aligned(1)))	\
+	__attribute__((section("__ksymtab_strings"), used, aligned(1)))	\
 	= #sym;								\
-	static const struct kernel_symbol __ksymtab_##sym		\
-	__used								\
-	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
-	= { (unsigned long)&sym, __kstrtab_##sym }
+	__KSYMTAB_ENTRY(sym, sec)
 
 #if defined(__DISABLE_EXPORTS)
 
diff --git a/kernel/module.c b/kernel/module.c
index b046a32520d8..6746c85511fe 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -529,12 +529,30 @@ static bool check_symbol(const struct symsearch *syms,
 	return true;
 }
 
+static unsigned long kernel_symbol_value(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	return (unsigned long)offset_to_ptr(&sym->value_offset);
+#else
+	return sym->value;
+#endif
+}
+
+static const char *kernel_symbol_name(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	return offset_to_ptr(&sym->name_offset);
+#else
+	return sym->name;
+#endif
+}
+
 static int cmp_name(const void *va, const void *vb)
 {
 	const char *a;
 	const struct kernel_symbol *b;
 	a = va; b = vb;
-	return strcmp(a, b->name);
+	return strcmp(a, kernel_symbol_name(b));
 }
 
 static bool find_symbol_in_section(const struct symsearch *syms,
@@ -2170,7 +2188,7 @@ void *__symbol_get(const char *symbol)
 		sym = NULL;
 	preempt_enable();
 
-	return sym ? (void *)sym->value : NULL;
+	return sym ? (void *)kernel_symbol_value(sym) : NULL;
 }
 EXPORT_SYMBOL_GPL(__symbol_get);
 
@@ -2200,10 +2218,12 @@ static int verify_export_symbols(struct module *mod)
 
 	for (i = 0; i < ARRAY_SIZE(arr); i++) {
 		for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
-			if (find_symbol(s->name, &owner, NULL, true, false)) {
+			if (find_symbol(kernel_symbol_name(s), &owner, NULL,
+					true, false)) {
 				pr_err("%s: exports duplicate symbol %s"
 				       " (owned by %s)\n",
-				       mod->name, s->name, module_name(owner));
+				       mod->name, kernel_symbol_name(s),
+				       module_name(owner));
 				return -ENOEXEC;
 			}
 		}
@@ -2252,7 +2272,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 			ksym = resolve_symbol_wait(mod, info, name);
 			/* Ok if resolved.  */
 			if (ksym && !IS_ERR(ksym)) {
-				sym[i].st_value = ksym->value;
+				sym[i].st_value = kernel_symbol_value(ksym);
 				break;
 			}
 
@@ -2516,7 +2536,7 @@ static int is_exported(const char *name, unsigned long value,
 		ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
 	else
 		ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
-	return ks != NULL && ks->value == value;
+	return ks != NULL && kernel_symbol_value(ks) == value;
 }
 
 /* As per nm */
-- 
cgit v1.2.3


From 1b1eeca7e4c19fa76d409d4c7b338dba21f2df45 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:13 -0700
Subject: init: allow initcall tables to be emitted using relative references

Allow the initcall tables to be emitted using relative references that
are only half the size on 64-bit architectures and don't require fixups
at runtime on relocatable kernels.

Link: http://lkml.kernel.org/r/20180704083651.24360-5-ard.biesheuvel@linaro.org
Acked-by: James Morris <james.morris@microsoft.com>
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h   | 44 +++++++++++++++++++++++++++++++++-----------
 init/main.c            | 32 ++++++++++++++++----------------
 kernel/printk/printk.c | 16 +++++++++-------
 security/security.c    | 17 ++++++++++-------
 4 files changed, 68 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/init.h b/include/linux/init.h
index bc27cf03c41e..2538d176dd1f 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -116,8 +116,24 @@
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
-extern initcall_t __con_initcall_start[], __con_initcall_end[];
-extern initcall_t __security_initcall_start[], __security_initcall_end[];
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef int initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+	return offset_to_ptr(entry);
+}
+#else
+typedef initcall_t initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+	return *entry;
+}
+#endif
+
+extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_entry_t __security_initcall_start[], __security_initcall_end[];
 
 /* Used for contructor calls. */
 typedef void (*ctor_fn_t)(void);
@@ -167,9 +183,20 @@ extern bool initcall_debug;
  * as KEEP() in the linker script.
  */
 
-#define __define_initcall(fn, id) \
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define ___define_initcall(fn, id, __sec)			\
+	__ADDRESSABLE(fn)					\
+	asm(".section	\"" #__sec ".init\", \"a\"	\n"	\
+	"__initcall_" #fn #id ":			\n"	\
+	    ".long	" #fn " - .			\n"	\
+	    ".previous					\n");
+#else
+#define ___define_initcall(fn, id, __sec) \
 	static initcall_t __initcall_##fn##id __used \
-	__attribute__((__section__(".initcall" #id ".init"))) = fn;
+		__attribute__((__section__(#__sec ".init"))) = fn;
+#endif
+
+#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
 
 /*
  * Early initcalls run before initializing SMP.
@@ -208,13 +235,8 @@ extern bool initcall_debug;
 #define __exitcall(fn)						\
 	static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define console_initcall(fn)					\
-	static initcall_t __initcall_##fn			\
-	__used __section(.con_initcall.init) = fn
-
-#define security_initcall(fn)					\
-	static initcall_t __initcall_##fn			\
-	__used __section(.security_initcall.init) = fn
+#define console_initcall(fn)	___define_initcall(fn,, .con_initcall)
+#define security_initcall(fn)	___define_initcall(fn,, .security_initcall)
 
 struct obs_kernel_param {
 	const char *str;
diff --git a/init/main.c b/init/main.c
index b729e1f22838..3a6ce89e128f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -902,18 +902,18 @@ int __init_or_module do_one_initcall(initcall_t fn)
 }
 
 
-extern initcall_t __initcall_start[];
-extern initcall_t __initcall0_start[];
-extern initcall_t __initcall1_start[];
-extern initcall_t __initcall2_start[];
-extern initcall_t __initcall3_start[];
-extern initcall_t __initcall4_start[];
-extern initcall_t __initcall5_start[];
-extern initcall_t __initcall6_start[];
-extern initcall_t __initcall7_start[];
-extern initcall_t __initcall_end[];
-
-static initcall_t *initcall_levels[] __initdata = {
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
+
+static initcall_entry_t *initcall_levels[] __initdata = {
 	__initcall0_start,
 	__initcall1_start,
 	__initcall2_start,
@@ -939,7 +939,7 @@ static char *initcall_level_names[] __initdata = {
 
 static void __init do_initcall_level(int level)
 {
-	initcall_t *fn;
+	initcall_entry_t *fn;
 
 	strcpy(initcall_command_line, saved_command_line);
 	parse_args(initcall_level_names[level],
@@ -950,7 +950,7 @@ static void __init do_initcall_level(int level)
 
 	trace_initcall_level(initcall_level_names[level]);
 	for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
-		do_one_initcall(*fn);
+		do_one_initcall(initcall_from_entry(fn));
 }
 
 static void __init do_initcalls(void)
@@ -981,11 +981,11 @@ static void __init do_basic_setup(void)
 
 static void __init do_pre_smp_initcalls(void)
 {
-	initcall_t *fn;
+	initcall_entry_t *fn;
 
 	trace_initcall_level("early");
 	for (fn = __initcall_start; fn < __initcall0_start; fn++)
-		do_one_initcall(*fn);
+		do_one_initcall(initcall_from_entry(fn));
 }
 
 /*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 90b6ab01db59..918f386b2f6e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2788,7 +2788,8 @@ EXPORT_SYMBOL(unregister_console);
 void __init console_init(void)
 {
 	int ret;
-	initcall_t *call;
+	initcall_t call;
+	initcall_entry_t *ce;
 
 	/* Setup the default TTY line discipline. */
 	n_tty_init();
@@ -2797,13 +2798,14 @@ void __init console_init(void)
 	 * set up the console device so that later boot sequences can
 	 * inform about problems etc..
 	 */
-	call = __con_initcall_start;
+	ce = __con_initcall_start;
 	trace_initcall_level("console");
-	while (call < __con_initcall_end) {
-		trace_initcall_start((*call));
-		ret = (*call)();
-		trace_initcall_finish((*call), ret);
-		call++;
+	while (ce < __con_initcall_end) {
+		call = initcall_from_entry(ce);
+		trace_initcall_start(call);
+		ret = call();
+		trace_initcall_finish(call, ret);
+		ce++;
 	}
 }
 
diff --git a/security/security.c b/security/security.c
index 47cfff01d7ec..736e78da1ab9 100644
--- a/security/security.c
+++ b/security/security.c
@@ -48,14 +48,17 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] =
 static void __init do_security_initcalls(void)
 {
 	int ret;
-	initcall_t *call;
-	call = __security_initcall_start;
+	initcall_t call;
+	initcall_entry_t *ce;
+
+	ce = __security_initcall_start;
 	trace_initcall_level("security");
-	while (call < __security_initcall_end) {
-		trace_initcall_start((*call));
-		ret = (*call) ();
-		trace_initcall_finish((*call), ret);
-		call++;
+	while (ce < __security_initcall_end) {
+		call = initcall_from_entry(ce);
+		trace_initcall_start(call);
+		ret = call();
+		trace_initcall_finish(call, ret);
+		ce++;
 	}
 }
 
-- 
cgit v1.2.3


From c9d8b55fa0191623fccb9ed67d2ff8f9159e9a89 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:18 -0700
Subject: PCI: Add support for relative addressing in quirk tables

Allow the PCI quirk tables to be emitted in a way that avoids absolute
references to the hook functions. This reduces the size of the entries,
and, more importantly, makes them invariant under runtime relocation
(e.g., for KASLR)

Link: http://lkml.kernel.org/r/20180704083651.24360-6-ard.biesheuvel@linaro.org
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/quirks.c | 12 +++++++++---
 include/linux/pci.h  | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 46f58a9771d7..ef7143a274e0 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -66,9 +66,15 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
 		     f->vendor == (u16) PCI_ANY_ID) &&
 		    (f->device == dev->device ||
 		     f->device == (u16) PCI_ANY_ID)) {
-			calltime = fixup_debug_start(dev, f->hook);
-			f->hook(dev);
-			fixup_debug_report(dev, calltime, f->hook);
+			void (*hook)(struct pci_dev *dev);
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+			hook = offset_to_ptr(&f->hook_offset);
+#else
+			hook = f->hook;
+#endif
+			calltime = fixup_debug_start(dev, hook);
+			hook(dev);
+			fixup_debug_report(dev, calltime, hook);
 		}
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9b87f1936906..e72ca8dd6241 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1809,7 +1809,11 @@ struct pci_fixup {
 	u16 device;			/* Or PCI_ANY_ID */
 	u32 class;			/* Or PCI_ANY_ID */
 	unsigned int class_shift;	/* should be 0, 8, 16 */
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	int hook_offset;
+#else
 	void (*hook)(struct pci_dev *dev);
+#endif
 };
 
 enum pci_fixup_pass {
@@ -1823,12 +1827,28 @@ enum pci_fixup_pass {
 	pci_fixup_suspend_late,	/* pci_device_suspend_late() */
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				    class_shift, hook)			\
+	__ADDRESSABLE(hook)						\
+	asm(".section "	#sec ", \"a\"				\n"	\
+	    ".balign	16					\n"	\
+	    ".short "	#vendor ", " #device "			\n"	\
+	    ".long "	#class ", " #class_shift "		\n"	\
+	    ".long "	#hook " - .				\n"	\
+	    ".previous						\n");
+#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				  class_shift, hook)			\
+	__DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,	\
+				  class_shift, hook)
+#else
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,	\
 				  class_shift, hook)			\
 	static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used	\
 	__attribute__((__section__(#section), aligned((sizeof(void *)))))    \
 		= { vendor, device, class, class_shift, hook };
+#endif
 
 #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,		\
 					 class_shift, hook)		\
-- 
cgit v1.2.3


From 46e0c9be206fa7b11aca75da2d6b8535d0139752 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 21 Aug 2018 21:56:22 -0700
Subject: kernel: tracepoints: add support for relative references

To avoid the need for relocating absolute references to tracepoint
structures at boot time when running relocatable kernels (which may
take a disproportionate amount of space), add the option to emit
these tables as relative references instead.

Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracepoint.h | 19 ++++++++++++++----
 kernel/tracepoint.c        | 49 ++++++++++++++++++++++++----------------------
 2 files changed, 41 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d9a084c72541..7f2e16e76ac4 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -249,6 +249,19 @@ extern void syscall_unregfunc(void);
 		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __TRACEPOINT_ENTRY(name)					\
+	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
+	    "	.balign 4					\n"	\
+	    "	.long 	__tracepoint_" #name " - .		\n"	\
+	    "	.previous					\n")
+#else
+#define __TRACEPOINT_ENTRY(name)					 \
+	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
+		&__tracepoint_##name
+#endif
+
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for iteration
@@ -258,11 +271,9 @@ extern void syscall_unregfunc(void);
 	static const char __tpstrtab_##name[]				 \
 	__attribute__((section("__tracepoints_strings"))) = #name;	 \
 	struct tracepoint __tracepoint_##name				 \
-	__attribute__((section("__tracepoints"))) =			 \
+	__attribute__((section("__tracepoints"), used)) =		 \
 		{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
-	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
-	__attribute__((section("__tracepoints_ptrs"))) =		 \
-		&__tracepoint_##name;
+	__TRACEPOINT_ENTRY(name);
 
 #define DEFINE_TRACE(name)						\
 	DEFINE_TRACE_FN(name, NULL, NULL);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 96db841bf0fc..bf2c06ef9afc 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -371,6 +371,27 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
+static void for_each_tracepoint_range(struct tracepoint * const *begin,
+		struct tracepoint * const *end,
+		void (*fct)(struct tracepoint *tp, void *priv),
+		void *priv)
+{
+	if (!begin)
+		return;
+
+	if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
+		const int *iter;
+
+		for (iter = (const int *)begin; iter < (const int *)end; iter++)
+			fct(offset_to_ptr(iter), priv);
+	} else {
+		struct tracepoint * const *iter;
+
+		for (iter = begin; iter < end; iter++)
+			fct(*iter, priv);
+	}
+}
+
 #ifdef CONFIG_MODULES
 bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -435,15 +456,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
  * Ensure the tracer unregistered the module's probes before the module
  * teardown is performed. Prevents leaks of probe and data pointers.
  */
-static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
-		struct tracepoint * const *end)
+static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
 {
-	struct tracepoint * const *iter;
-
-	if (!begin)
-		return;
-	for (iter = begin; iter < end; iter++)
-		WARN_ON_ONCE((*iter)->funcs);
+	WARN_ON_ONCE(tp->funcs);
 }
 
 static int tracepoint_module_coming(struct module *mod)
@@ -494,8 +509,9 @@ static void tracepoint_module_going(struct module *mod)
 			 * Called the going notifier before checking for
 			 * quiescence.
 			 */
-			tp_module_going_check_quiescent(mod->tracepoints_ptrs,
-				mod->tracepoints_ptrs + mod->num_tracepoints);
+			for_each_tracepoint_range(mod->tracepoints_ptrs,
+				mod->tracepoints_ptrs + mod->num_tracepoints,
+				tp_module_going_check_quiescent, NULL);
 			break;
 		}
 	}
@@ -547,19 +563,6 @@ static __init int init_tracepoints(void)
 __initcall(init_tracepoints);
 #endif /* CONFIG_MODULES */
 
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
-		struct tracepoint * const *end,
-		void (*fct)(struct tracepoint *tp, void *priv),
-		void *priv)
-{
-	struct tracepoint * const *iter;
-
-	if (!begin)
-		return;
-	for (iter = begin; iter < end; iter++)
-		fct(*iter, priv);
-}
-
 /**
  * for_each_kernel_tracepoint - iteration on all kernel tracepoints
  * @fct: callback
-- 
cgit v1.2.3


From 9144d75e22cad3c89e6b2ccab551db9ee28d250a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 21 Aug 2018 21:57:03 -0700
Subject: include/linux/bitops.h: introduce BITS_PER_TYPE

net_dim.h has a rather useful extension to BITS_PER_BYTE to compute the
number of bits in a type (BITS_PER_BYTE * sizeof(T)), so promote the macro
to bitops.h, alongside BITS_PER_BYTE, for wider usage.

Link: http://lkml.kernel.org/r/20180706094458.14116-1-chris@chris-wilson.co.uk
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Andy Gospodarek <gospo@broadcom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h  | 3 ++-
 include/linux/net_dim.h | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index af419012d77d..7ddb1349394d 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -4,7 +4,8 @@
 #include <asm/types.h>
 #include <linux/bits.h>
 
-#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
 
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index db99240d00bd..c79e859408e6 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -363,7 +363,6 @@ static inline void net_dim_sample(u16 event_ctr,
 }
 
 #define NET_DIM_NEVENTS 64
-#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
 #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
 
 static inline void net_dim_calc_stats(struct net_dim_sample *start,
-- 
cgit v1.2.3


From feba04fd2cf8f6a74865338df2e3e1e94d6cfd13 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 21 Aug 2018 21:57:11 -0700
Subject: lib: add crc64 calculation routines

Patch series "add crc64 calculation as kernel library", v5.

This patchset adds basic implementation of crc64 calculation as a Linux
kernel library.  Since bcache already does crc64 by itself, this patchset
also modifies bcache code to use the new crc64 library routine.

Currently bcache is the only user of crc64 calculation, another potential
user is bcachefs which is on the way to be in mainline kernel.  Therefore
it makes sense to make crc64 calculation to be a public library.

bcache uses crc64 as storage checksum, if a change of crc lib routines
results an inconsistent result, the unmatched checksum may make bcache
'think' the on-disk is corrupted, such a change should be avoided or
detected as early as possible.  Therefore a patch is being prepared which
adds a crc test framework, to check consistency of different calculations.

This patch (of 2):

Add the re-write crc64 calculation routines for Linux kernel.  The CRC64
polynomical arithmetic follows ECMA-182 specification, inspired by CRC
paper of Dr.  Ross N.  Williams (see
http://www.ross.net/crc/download/crc_v3.txt) and other public domain
implementations.

All the changes work in this way,
- When Linux kernel is built, host program lib/gen_crc64table.c will be
  compiled to lib/gen_crc64table and executed.
- The output of gen_crc64table execution is an array called as lookup
  table (a.k.a POLY 0x42f0e1eba9ea369) which contain 256 64-bit long
  numbers, this table is dumped into header file lib/crc64table.h.
- Then the header file is included by lib/crc64.c for normal 64bit crc
  calculation.
- Function declaration of the crc64 calculation routines is placed in
  include/linux/crc64.h

Currently bcache is the only user of crc64_be(), another potential user is
bcachefs which is on the way to be in mainline kernel.  Therefore it makes
sense to move crc64 calculation into lib/crc64.c as public code.

[colyli@suse.de: fix review comments from v4]
  Link: http://lkml.kernel.org/r/20180726053352.2781-2-colyli@suse.de
Link: http://lkml.kernel.org/r/20180718165545.1622-2-colyli@suse.de
Signed-off-by: Coly Li <colyli@suse.de>
Co-developed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Michael Lyle <mlyle@lyle.org>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Eric Biggers <ebiggers3@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Noah Massey <noah.massey@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crc64.h | 11 +++++++++
 lib/.gitignore        |  2 ++
 lib/Kconfig           |  8 ++++++
 lib/Makefile          | 11 +++++++++
 lib/crc64.c           | 56 ++++++++++++++++++++++++++++++++++++++++++
 lib/gen_crc64table.c  | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 156 insertions(+)
 create mode 100644 include/linux/crc64.h
 create mode 100644 lib/crc64.c
 create mode 100644 lib/gen_crc64table.c

(limited to 'include')

diff --git a/include/linux/crc64.h b/include/linux/crc64.h
new file mode 100644
index 000000000000..c756e65a1b58
--- /dev/null
+++ b/include/linux/crc64.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * See lib/crc64.c for the related specification and polynomial arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+u64 __pure crc64_be(u64 crc, const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
diff --git a/lib/.gitignore b/lib/.gitignore
index 09aae85418ab..f2a39c9e5485 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -2,5 +2,7 @@
 # Generated files
 #
 gen_crc32table
+gen_crc64table
 crc32table.h
+crc64table.h
 oid_registry_data.c
diff --git a/lib/Kconfig b/lib/Kconfig
index 706836ec314d..9c10b9852563 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -170,6 +170,14 @@ config CRC32_BIT
 
 endchoice
 
+config CRC64
+	tristate "CRC64 functions"
+	help
+	  This option is provided for the case where no in-kernel-tree
+	  modules require CRC64 functions, but a module built outside
+	  the kernel tree does. Such modules that use library CRC64
+	  functions require M here.
+
 config CRC4
 	tristate "CRC4 functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index d95bb2525101..9baefb6cb1a1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_CRC16)	+= crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
 obj-$(CONFIG_CRC_ITU_T)	+= crc-itu-t.o
 obj-$(CONFIG_CRC32)	+= crc32.o
+obj-$(CONFIG_CRC64)     += crc64.o
 obj-$(CONFIG_CRC32_SELFTEST)	+= crc32test.o
 obj-$(CONFIG_CRC4)	+= crc4.o
 obj-$(CONFIG_CRC7)	+= crc7.o
@@ -217,7 +218,9 @@ obj-$(CONFIG_FONT_SUPPORT) += fonts/
 obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
 
 hostprogs-y	:= gen_crc32table
+hostprogs-y	+= gen_crc64table
 clean-files	:= crc32table.h
+clean-files	+= crc64table.h
 
 $(obj)/crc32.o: $(obj)/crc32table.h
 
@@ -227,6 +230,14 @@ quiet_cmd_crc32 = GEN     $@
 $(obj)/crc32table.h: $(obj)/gen_crc32table
 	$(call cmd,crc32)
 
+$(obj)/crc64.o: $(obj)/crc64table.h
+
+quiet_cmd_crc64 = GEN     $@
+      cmd_crc64 = $< > $@
+
+$(obj)/crc64table.h: $(obj)/gen_crc64table
+	$(call cmd,crc64)
+
 #
 # Build a fast OID lookip registry from include/linux/oid_registry.h
 #
diff --git a/lib/crc64.c b/lib/crc64.c
new file mode 100644
index 000000000000..0ef8ae6ac047
--- /dev/null
+++ b/lib/crc64.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64-bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table[256] is the lookup table of a table-driven 64-bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ *   Author: Coly Li <colyli@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+	or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+{
+	size_t i, t;
+
+	const unsigned char *_p = p;
+
+	for (i = 0; i < len; i++) {
+		t = ((crc >> 56) ^ (*_p++)) & 0xFF;
+		crc = crc64table[t] ^ (crc << 8);
+	}
+
+	return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_be);
diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
new file mode 100644
index 000000000000..9011926e4162
--- /dev/null
+++ b/lib/gen_crc64table.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate lookup table for the table-driven CRC64 calculation.
+ *
+ * gen_crc64table is executed in kernel build time and generates
+ * lib/crc64table.h. This header is included by lib/crc64.c for
+ * the table-driven CRC64 calculation.
+ *
+ * See lib/crc64.c for more information about which specification
+ * and polynomial arithmetic that gen_crc64table.c follows to
+ * generate the lookup table.
+ *
+ * Copyright 2018 SUSE Linux.
+ *   Author: Coly Li <colyli@suse.de>
+ */
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <linux/swab.h>
+
+#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
+
+static uint64_t crc64_table[256] = {0};
+
+static void generate_crc64_table(void)
+{
+	uint64_t i, j, c, crc;
+
+	for (i = 0; i < 256; i++) {
+		crc = 0;
+		c = i << 56;
+
+		for (j = 0; j < 8; j++) {
+			if ((crc ^ c) & 0x8000000000000000ULL)
+				crc = (crc << 1) ^ CRC64_ECMA182_POLY;
+			else
+				crc <<= 1;
+			c <<= 1;
+		}
+
+		crc64_table[i] = crc;
+	}
+}
+
+static void print_crc64_table(void)
+{
+	int i;
+
+	printf("/* this file is generated - do not edit */\n\n");
+	printf("#include <linux/types.h>\n");
+	printf("#include <linux/cache.h>\n\n");
+	printf("static const u64 ____cacheline_aligned crc64table[256] = {\n");
+	for (i = 0; i < 256; i++) {
+		printf("\t0x%016" PRIx64 "ULL", crc64_table[i]);
+		if (i & 0x1)
+			printf(",\n");
+		else
+			printf(", ");
+	}
+	printf("};\n");
+}
+
+int main(int argc, char *argv[])
+{
+	generate_crc64_table();
+	print_crc64_table();
+	return 0;
+}
-- 
cgit v1.2.3


From cbf6898fd69455092c43cd573b38d42c86ddb1e0 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 21 Aug 2018 21:59:01 -0700
Subject: autofs: add AUTOFS_EXP_FORCED flag

The userspace automount(8) daemon is meant to perform a forced expire when
sent a SIGUSR2.

But since the expiration is routed through the kernel and the kernel
doesn't send an expire request if the mount is busy this hasn't worked at
least since autofs version 5.

Add an AUTOFS_EXP_FORCED flag to allow implemention of the feature and
bump the protocol version so user space can check if it's implemented if
needed.

Link: http://lkml.kernel.org/r/152937734715.21213.6594007182776598970.stgit@pluto.themaw.net
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs/expire.c           | 62 +++++++++++++++++++++++++++++++++++---------
 include/uapi/linux/auto_fs.h |  8 +++---
 2 files changed, 55 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index dfb666c5b8a2..d441244b79df 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -29,7 +29,8 @@ static inline int autofs_can_expire(struct dentry *dentry,
 }
 
 /* Check a mount point for busyness */
-static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
+static int autofs_mount_busy(struct vfsmount *mnt,
+			     struct dentry *dentry, unsigned int how)
 {
 	struct dentry *top = dentry;
 	struct path path = {.mnt = mnt, .dentry = dentry};
@@ -50,6 +51,12 @@ static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 			goto done;
 	}
 
+	/* Not a submount, has a forced expire been requested */
+	if (how & AUTOFS_EXP_FORCED) {
+		status = 0;
+		goto done;
+	}
+
 	/* Update the expiry counter if fs is busy */
 	if (!may_umount_tree(path.mnt)) {
 		struct autofs_info *ino;
@@ -189,6 +196,10 @@ static int autofs_direct_busy(struct vfsmount *mnt,
 {
 	pr_debug("top %p %pd\n", top, top);
 
+	/* Forced expire, user space handles busy mounts */
+	if (how & AUTOFS_EXP_FORCED)
+		return 0;
+
 	/* If it's busy update the expiry counters */
 	if (!may_umount_tree(mnt)) {
 		struct autofs_info *ino;
@@ -235,7 +246,7 @@ static int autofs_tree_busy(struct vfsmount *mnt,
 		 * If the fs is busy update the expiry counter.
 		 */
 		if (d_mountpoint(p)) {
-			if (autofs_mount_busy(mnt, p)) {
+			if (autofs_mount_busy(mnt, p, how)) {
 				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
@@ -258,6 +269,10 @@ static int autofs_tree_busy(struct vfsmount *mnt,
 		}
 	}
 
+	/* Forced expire, user space handles busy mounts */
+	if (how & AUTOFS_EXP_FORCED)
+		return 0;
+
 	/* Timeout of a tree mount is ultimately determined by its top dentry */
 	if (!autofs_can_expire(top, timeout, how))
 		return 1;
@@ -280,9 +295,15 @@ static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
 
 		if (d_mountpoint(p)) {
 			/* Can we umount this guy */
-			if (autofs_mount_busy(mnt, p))
+			if (autofs_mount_busy(mnt, p, how))
 				continue;
 
+			/* This isn't a submount so if a forced expire
+			 * has been requested, user space handles busy
+			 * mounts */
+			if (how & AUTOFS_EXP_FORCED)
+				return p;
+
 			/* Can we expire this guy */
 			if (autofs_can_expire(p, timeout, how))
 				return p;
@@ -361,9 +382,15 @@ static struct dentry *should_expire(struct dentry *dentry,
 		pr_debug("checking mountpoint %p %pd\n", dentry, dentry);
 
 		/* Can we umount this guy */
-		if (autofs_mount_busy(mnt, dentry))
+		if (autofs_mount_busy(mnt, dentry, how))
 			return NULL;
 
+		/* This isn't a submount so if a forced expire
+		 * has been requested, user space handles busy
+		 * mounts */
+		if (how & AUTOFS_EXP_FORCED)
+			return dentry;
+
 		/* Can we expire this guy */
 		if (autofs_can_expire(dentry, timeout, how))
 			return dentry;
@@ -372,6 +399,11 @@ static struct dentry *should_expire(struct dentry *dentry,
 
 	if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
 		pr_debug("checking symlink %p %pd\n", dentry, dentry);
+
+		/* Forced expire, user space handles busy mounts */
+		if (how & AUTOFS_EXP_FORCED)
+			return dentry;
+
 		/*
 		 * A symlink can't be "busy" in the usual sense so
 		 * just check last used for expire timeout.
@@ -386,10 +418,13 @@ static struct dentry *should_expire(struct dentry *dentry,
 
 	/* Case 2: tree mount, expire iff entire tree is not busy */
 	if (!(how & AUTOFS_EXP_LEAVES)) {
-		/* Path walk currently on this dentry? */
-		ino_count = atomic_read(&ino->count) + 1;
-		if (d_count(dentry) > ino_count)
-			return NULL;
+		/* Not a forced expire? */
+		if (!(how & AUTOFS_EXP_FORCED)) {
+			/* ref-walk currently on this dentry? */
+			ino_count = atomic_read(&ino->count) + 1;
+			if (d_count(dentry) > ino_count)
+				return NULL;
+		}
 
 		if (!autofs_tree_busy(mnt, dentry, timeout, how))
 			return dentry;
@@ -398,12 +433,15 @@ static struct dentry *should_expire(struct dentry *dentry,
 	 *	   (autofs-4.1).
 	 */
 	} else {
-		/* Path walk currently on this dentry? */
 		struct dentry *expired;
 
-		ino_count = atomic_read(&ino->count) + 1;
-		if (d_count(dentry) > ino_count)
-			return NULL;
+		/* Not a forced expire? */
+		if (!(how & AUTOFS_EXP_FORCED)) {
+			/* ref-walk currently on this dentry? */
+			ino_count = atomic_read(&ino->count) + 1;
+			if (d_count(dentry) > ino_count)
+				return NULL;
+		}
 
 		expired = autofs_check_leaves(mnt, dentry, timeout, how);
 		if (expired) {
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index e13eec3dfb2f..df31aa9c9a8c 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -23,7 +23,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	3
 #define AUTOFS_MAX_PROTO_VERSION	5
 
-#define AUTOFS_PROTO_SUBVERSION		2
+#define AUTOFS_PROTO_SUBVERSION		3
 
 /*
  * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
@@ -90,8 +90,10 @@ enum {
 /* autofs version 4 and later definitions */
 
 /* Mask for expire behaviour */
-#define AUTOFS_EXP_IMMEDIATE		1
-#define AUTOFS_EXP_LEAVES		2
+#define AUTOFS_EXP_NORMAL		0x00
+#define AUTOFS_EXP_IMMEDIATE		0x01
+#define AUTOFS_EXP_LEAVES		0x02
+#define AUTOFS_EXP_FORCED		0x04
 
 #define AUTOFS_TYPE_ANY			0U
 #define AUTOFS_TYPE_INDIRECT		1U
-- 
cgit v1.2.3


From 52cba1a274818c3ee6299c1a1b476e7bc5037a2f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 21:59:51 -0700
Subject: signal: make force_sigsegv() void

Patch series "signal: refactor some functions", v3.

This series refactors a bunch of functions in signal.c to simplify parts
of the code.

The greatest single change is declaring the static do_sigpending() helper
as void which makes it possible to remove a bunch of unnecessary checks in
the syscalls later on.

This patch (of 17):

force_sigsegv() returned 0 unconditionally so it doesn't make sense to have
it return at all. In addition, there are no callers that check
force_sigsegv()'s return value.

Link: http://lkml.kernel.org/r/20180602103653.18181-2-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/signal.h | 2 +-
 kernel/signal.c              | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 113d1ad1ced7..e138ac16c650 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -314,7 +314,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey);
 int force_sig_ptrace_errno_trap(int errno, void __user *addr);
 
 extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sigsegv(int, struct task_struct *);
+extern void force_sigsegv(int sig, struct task_struct *p);
 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
 extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
diff --git a/kernel/signal.c b/kernel/signal.c
index 8d8a940422a8..8a828baa0f93 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1458,8 +1458,7 @@ send_sig(int sig, struct task_struct *p, int priv)
 	return send_sig_info(sig, __si_special(priv), p);
 }
 
-void
-force_sig(int sig, struct task_struct *p)
+void force_sig(int sig, struct task_struct *p)
 {
 	force_sig_info(sig, SEND_SIG_PRIV, p);
 }
@@ -1470,8 +1469,7 @@ force_sig(int sig, struct task_struct *p)
  * the problem was already a SIGSEGV, we'll want to
  * make sure we don't even try to deliver the signal..
  */
-int
-force_sigsegv(int sig, struct task_struct *p)
+void force_sigsegv(int sig, struct task_struct *p)
 {
 	if (sig == SIGSEGV) {
 		unsigned long flags;
@@ -1480,7 +1478,6 @@ force_sigsegv(int sig, struct task_struct *p)
 		spin_unlock_irqrestore(&p->sighand->siglock, flags);
 	}
 	force_sig(SIGSEGV, p);
-	return 0;
 }
 
 int force_sig_fault(int sig, int code, void __user *addr
-- 
cgit v1.2.3


From 67a48a24478841525ba73ec6d64caaf079cf3b7c Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 22:00:34 -0700
Subject: signal: make unhandled_signal() return bool

unhandled_signal() already behaves like a boolean function.  Let's
actually declare it as such too.  All callers treat it as such too.

Link: http://lkml.kernel.org/r/20180602103653.18181-13-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 +-
 kernel/signal.c        | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3c5200137b24..fa23cae66758 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -287,7 +287,7 @@ static inline void disallow_signal(int sig)
 
 extern struct kmem_cache *sighand_cachep;
 
-int unhandled_signal(struct task_struct *tsk, int sig);
+extern bool unhandled_signal(struct task_struct *tsk, int sig);
 
 /*
  * In POSIX a signal is sent either to a specific thread (Linux task)
diff --git a/kernel/signal.c b/kernel/signal.c
index 7eec2dba597e..c10c09fc4ec3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -505,13 +505,15 @@ flush_signal_handlers(struct task_struct *t, int force_default)
 	}
 }
 
-int unhandled_signal(struct task_struct *tsk, int sig)
+bool unhandled_signal(struct task_struct *tsk, int sig)
 {
 	void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
 	if (is_global_init(tsk))
-		return 1;
+		return true;
+
 	if (handler != SIG_IGN && handler != SIG_DFL)
-		return 0;
+		return false;
+
 	/* if ptraced, let the tracer determine */
 	return !tsk->ptrace;
 }
-- 
cgit v1.2.3


From 20ab7218d2507b2dbec9c053c2f1b96054e266b6 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 21 Aug 2018 22:00:54 -0700
Subject: signal: make get_signal() return bool

make get_signal() already behaves like a boolean function.  Let's actually
declare it as such too.

Link: http://lkml.kernel.org/r/20180602103653.18181-18-christian@brauner.io
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morris <james.morris@microsoft.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 2 +-
 kernel/signal.c        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index fa23cae66758..e3d9e0640e6e 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -265,7 +265,7 @@ extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 
-extern int get_signal(struct ksignal *ksig);
+extern bool get_signal(struct ksignal *ksig);
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 extern void kernel_sigaction(int, __sighandler_t);
diff --git a/kernel/signal.c b/kernel/signal.c
index 9f9b4183178e..786bacc60649 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2287,7 +2287,7 @@ static int ptrace_signal(int signr, siginfo_t *info)
 	return signr;
 }
 
-int get_signal(struct ksignal *ksig)
+bool get_signal(struct ksignal *ksig)
 {
 	struct sighand_struct *sighand = current->sighand;
 	struct signal_struct *signal = current->signal;
@@ -2297,7 +2297,7 @@ int get_signal(struct ksignal *ksig)
 		task_work_run();
 
 	if (unlikely(uprobe_deny_signal()))
-		return 0;
+		return false;
 
 	/*
 	 * Do this once, we can't return to user-mode if freezing() == T.
-- 
cgit v1.2.3


From dc2c8c84def6ce450c63529e08c1db100020994e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 21 Aug 2018 22:01:52 -0700
Subject: ipc: get rid of ids->tables_initialized hack

In sysvipc we have an ids->tables_initialized regarding the rhashtable,
introduced in 0cfb6aee70bd ("ipc: optimize semget/shmget/msgget for lots
of keys")

It's there, specifically, to prevent nil pointer dereferences, from using
an uninitialized api.  Considering how rhashtable_init() can fail
(probably due to ENOMEM, if anything), this made the overall ipc
initialization capable of failure as well.  That alone is ugly, but fine,
however I've spotted a few issues regarding the semantics of
tables_initialized (however unlikely they may be):

- There is inconsistency in what we return to userspace: ipc_addid()
  returns ENOSPC which is certainly _wrong_, while ipc_obtain_object_idr()
  returns EINVAL.

- After we started using rhashtables, ipc_findkey() can return nil upon
  !tables_initialized, but the caller expects nil for when the ipc
  structure isn't found, and can therefore call into ipcget() callbacks.

Now that rhashtable initialization cannot fail, we can properly get rid of
the hack altogether.

[manfred@colorfullife.com: commit id extended to 12 digits]
Link: http://lkml.kernel.org/r/20180712185241.4017-10-manfred@colorfullife.com
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  1 -
 ipc/util.c                    | 23 ++++++++---------------
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6cea726612b7..e9ccdfdb1928 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -16,7 +16,6 @@ struct user_namespace;
 struct ipc_ids {
 	int in_use;
 	unsigned short seq;
-	bool tables_initialized;
 	struct rw_semaphore rwsem;
 	struct idr ipcs_idr;
 	int max_id;
diff --git a/ipc/util.c b/ipc/util.c
index bd1863b6ed39..7c1387c9510d 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -126,7 +126,6 @@ int ipc_init_ids(struct ipc_ids *ids)
 	if (err)
 		return err;
 	idr_init(&ids->ipcs_idr);
-	ids->tables_initialized = true;
 	ids->max_id = -1;
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	ids->next_id = -1;
@@ -179,19 +178,16 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
  */
 static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 {
-	struct kern_ipc_perm *ipcp = NULL;
+	struct kern_ipc_perm *ipcp;
 
-	if (likely(ids->tables_initialized))
-		ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
+	ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
 					      ipc_kht_params);
+	if (!ipcp)
+		return NULL;
 
-	if (ipcp) {
-		rcu_read_lock();
-		ipc_lock_object(ipcp);
-		return ipcp;
-	}
-
-	return NULL;
+	rcu_read_lock();
+	ipc_lock_object(ipcp);
+	return ipcp;
 }
 
 /*
@@ -269,7 +265,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
 	if (limit > IPCMNI)
 		limit = IPCMNI;
 
-	if (!ids->tables_initialized || ids->in_use >= limit)
+	if (ids->in_use >= limit)
 		return -ENOSPC;
 
 	idr_preload(GFP_KERNEL);
@@ -578,9 +574,6 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
 	struct kern_ipc_perm *out;
 	int lid = ipcid_to_idx(id);
 
-	if (unlikely(!ids->tables_initialized))
-		return ERR_PTR(-EINVAL);
-
 	out = idr_find(&ids->ipcs_idr, lid);
 	if (!out)
 		return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From 27c331a174614208d0b539019583990967ad9479 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Tue, 21 Aug 2018 22:02:00 -0700
Subject: ipc/util.c: further variable name cleanups

The varable names got a mess, thus standardize them again:

id: user space id. Called semid, shmid, msgid if the type is known.
    Most functions use "id" already.
idx: "index" for the idr lookup
    Right now, some functions use lid, ipc_addid() already uses idx as
    the variable name.
seq: sequence number, to avoid quick collisions of the user space id
key: user space key, used for the rhash tree

Link: http://lkml.kernel.org/r/20180712185241.4017-12-manfred@colorfullife.com
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Kees Cook <keescook@chromium.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  2 +-
 ipc/msg.c                     |  6 +++---
 ipc/sem.c                     |  6 +++---
 ipc/shm.c                     |  4 ++--
 ipc/util.c                    | 26 +++++++++++++-------------
 ipc/util.h                    | 10 +++++-----
 6 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e9ccdfdb1928..6ab8c1bada3f 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -18,7 +18,7 @@ struct ipc_ids {
 	unsigned short seq;
 	struct rw_semaphore rwsem;
 	struct idr ipcs_idr;
-	int max_id;
+	int max_idx;
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	int next_id;
 #endif
diff --git a/ipc/msg.c b/ipc/msg.c
index ee78dad90460..883642cf2b27 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -456,7 +456,7 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 			 int cmd, struct msginfo *msginfo)
 {
 	int err;
-	int max_id;
+	int max_idx;
 
 	/*
 	 * We must not return kernel stack data.
@@ -483,9 +483,9 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_id = ipc_get_maxid(&msg_ids(ns));
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
 	up_read(&msg_ids(ns).rwsem);
-	return (max_id < 0) ? 0 : max_id;
+	return (max_idx < 0) ? 0 : max_idx;
 }
 
 static int msgctl_stat(struct ipc_namespace *ns, int msqid,
diff --git a/ipc/sem.c b/ipc/sem.c
index e4df102f3404..26f8e37fcdcb 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1294,7 +1294,7 @@ static int semctl_info(struct ipc_namespace *ns, int semid,
 			 int cmd, void __user *p)
 {
 	struct seminfo seminfo;
-	int max_id;
+	int max_idx;
 	int err;
 
 	err = security_sem_semctl(NULL, cmd);
@@ -1318,11 +1318,11 @@ static int semctl_info(struct ipc_namespace *ns, int semid,
 		seminfo.semusz = SEMUSZ;
 		seminfo.semaem = SEMAEM;
 	}
-	max_id = ipc_get_maxid(&sem_ids(ns));
+	max_idx = ipc_get_maxidx(&sem_ids(ns));
 	up_read(&sem_ids(ns).rwsem);
 	if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
 		return -EFAULT;
-	return (max_id < 0) ? 0 : max_id;
+	return (max_idx < 0) ? 0 : max_idx;
 }
 
 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
diff --git a/ipc/shm.c b/ipc/shm.c
index c4fb359f1e53..b0eb3757ab89 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -948,7 +948,7 @@ static int shmctl_ipc_info(struct ipc_namespace *ns,
 		shminfo->shmall = ns->shm_ctlall;
 		shminfo->shmmin = SHMMIN;
 		down_read(&shm_ids(ns).rwsem);
-		err = ipc_get_maxid(&shm_ids(ns));
+		err = ipc_get_maxidx(&shm_ids(ns));
 		up_read(&shm_ids(ns).rwsem);
 		if (err < 0)
 			err = 0;
@@ -968,7 +968,7 @@ static int shmctl_shm_info(struct ipc_namespace *ns,
 		shm_info->shm_tot = ns->shm_tot;
 		shm_info->swap_attempts = 0;
 		shm_info->swap_successes = 0;
-		err = ipc_get_maxid(&shm_ids(ns));
+		err = ipc_get_maxidx(&shm_ids(ns));
 		up_read(&shm_ids(ns).rwsem);
 		if (err < 0)
 			err = 0;
diff --git a/ipc/util.c b/ipc/util.c
index a2aae8c1410d..6f30ba80ca15 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -119,7 +119,7 @@ void ipc_init_ids(struct ipc_ids *ids)
 	init_rwsem(&ids->rwsem);
 	rhashtable_init(&ids->key_ht, &ipc_kht_params);
 	idr_init(&ids->ipcs_idr);
-	ids->max_id = -1;
+	ids->max_idx = -1;
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	ids->next_id = -1;
 #endif
@@ -237,7 +237,7 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
  * @limit: limit for the number of used ids
  *
  * Add an entry 'new' to the ipc ids idr. The permissions object is
- * initialised and the first free entry is set up and the id assigned
+ * initialised and the first free entry is set up and the index assigned
  * is returned. The 'new' entry is returned in a locked state on success.
  *
  * On failure the entry is not locked and a negative err-code is returned.
@@ -291,8 +291,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
 	}
 
 	ids->in_use++;
-	if (idx > ids->max_id)
-		ids->max_id = idx;
+	if (idx > ids->max_idx)
+		ids->max_idx = idx;
 	return idx;
 }
 
@@ -431,20 +431,20 @@ static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
  */
 void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 {
-	int lid = ipcid_to_idx(ipcp->id);
+	int idx = ipcid_to_idx(ipcp->id);
 
-	idr_remove(&ids->ipcs_idr, lid);
+	idr_remove(&ids->ipcs_idr, idx);
 	ipc_kht_remove(ids, ipcp);
 	ids->in_use--;
 	ipcp->deleted = true;
 
-	if (unlikely(lid == ids->max_id)) {
+	if (unlikely(idx == ids->max_idx)) {
 		do {
-			lid--;
-			if (lid == -1)
+			idx--;
+			if (idx == -1)
 				break;
-		} while (!idr_find(&ids->ipcs_idr, lid));
-		ids->max_id = lid;
+		} while (!idr_find(&ids->ipcs_idr, idx));
+		ids->max_idx = idx;
 	}
 }
 
@@ -564,9 +564,9 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
 struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
 {
 	struct kern_ipc_perm *out;
-	int lid = ipcid_to_idx(id);
+	int idx = ipcid_to_idx(id);
 
-	out = idr_find(&ids->ipcs_idr, lid);
+	out = idr_find(&ids->ipcs_idr, idx);
 	if (!out)
 		return ERR_PTR(-EINVAL);
 
diff --git a/ipc/util.h b/ipc/util.h
index 6c5c77c61f85..e74564fe3375 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -113,12 +113,12 @@ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *);
 int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
 
 /**
- * ipc_get_maxid - get the last assigned id
+ * ipc_get_maxidx - get the highest assigned index
  * @ids: ipc identifier set
  *
  * Called with ipc_ids.rwsem held for reading.
  */
-static inline int ipc_get_maxid(struct ipc_ids *ids)
+static inline int ipc_get_maxidx(struct ipc_ids *ids)
 {
 	if (ids->in_use == 0)
 		return -1;
@@ -126,7 +126,7 @@ static inline int ipc_get_maxid(struct ipc_ids *ids)
 	if (ids->in_use == IPCMNI)
 		return IPCMNI - 1;
 
-	return ids->max_id;
+	return ids->max_idx;
 }
 
 /*
@@ -172,9 +172,9 @@ extern struct msg_msg *load_msg(const void __user *src, size_t len);
 extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
 extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);
 
-static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
+static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int id)
 {
-	return uid / SEQ_MULTIPLIER != ipcp->seq;
+	return ipcid_to_seqx(id) != ipcp->seq;
 }
 
 static inline void ipc_lock_object(struct kern_ipc_perm *perm)
-- 
cgit v1.2.3


From 9abdda5ddab8a899ca8c4b859ef0a7710f40e0dd Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 16 Aug 2018 12:05:59 -0400
Subject: sunrpc: Extract target name into svc_cred

NFSv4.0 callback needs to know the GSS target name the client used
when it established its lease. That information is available from
the GSS context created by gssproxy. Make it available in each
svc_cred.

Note this will also give us access to the real target service
principal name (which is typically "nfs", but spec does not require
that).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4state.c                  |  7 ++--
 include/linux/sunrpc/svcauth.h       |  3 ++
 net/sunrpc/auth_gss/gss_rpc_upcall.c | 70 +++++++++++++++++++++++-------------
 3 files changed, 53 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c8e4a8f42bbe..3322fe35a8fe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1979,8 +1979,10 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 	target->cr_principal = kstrdup(source->cr_principal, GFP_KERNEL);
 	target->cr_raw_principal = kstrdup(source->cr_raw_principal,
 								GFP_KERNEL);
-	if ((source->cr_principal && ! target->cr_principal) ||
-	    (source->cr_raw_principal && ! target->cr_raw_principal))
+	target->cr_targ_princ = kstrdup(source->cr_targ_princ, GFP_KERNEL);
+	if ((source->cr_principal && !target->cr_principal) ||
+	    (source->cr_raw_principal && !target->cr_raw_principal) ||
+	    (source->cr_targ_princ && !target->cr_targ_princ))
 		return -ENOMEM;
 
 	target->cr_flavor = source->cr_flavor;
@@ -2057,6 +2059,7 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 		|| (!gid_eq(cr1->cr_gid, cr2->cr_gid))
 		|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
 		return false;
+	/* XXX: check that cr_targ_princ fields match ? */
 	if (cr1->cr_principal == cr2->cr_principal)
 		return true;
 	if (!cr1->cr_principal || !cr2->cr_principal)
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 7c3656505847..04e404a07882 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -31,6 +31,7 @@ struct svc_cred {
 	/* name of form servicetype@hostname, passed down by
 	 * rpc.svcgssd, or computed from the above: */
 	char			*cr_principal;
+	char			*cr_targ_princ;
 	struct gss_api_mech	*cr_gss_mech;
 };
 
@@ -39,6 +40,7 @@ static inline void init_svc_cred(struct svc_cred *cred)
 	cred->cr_group_info = NULL;
 	cred->cr_raw_principal = NULL;
 	cred->cr_principal = NULL;
+	cred->cr_targ_princ = NULL;
 	cred->cr_gss_mech = NULL;
 }
 
@@ -48,6 +50,7 @@ static inline void free_svc_cred(struct svc_cred *cred)
 		put_group_info(cred->cr_group_info);
 	kfree(cred->cr_raw_principal);
 	kfree(cred->cr_principal);
+	kfree(cred->cr_targ_princ);
 	gss_mech_put(cred->cr_gss_mech);
 	init_svc_cred(cred);
 }
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 1c7c49dbf8ba..73dcda060335 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -234,6 +234,35 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
 	return 0;
 }
 
+static char *gssp_stringify(struct xdr_netobj *netobj)
+{
+	return kstrndup(netobj->data, netobj->len, GFP_KERNEL);
+}
+
+static void gssp_hostbased_service(char **principal)
+{
+	char *c;
+
+	if (!*principal)
+		return;
+
+	/* terminate and remove realm part */
+	c = strchr(*principal, '@');
+	if (c) {
+		*c = '\0';
+
+		/* change service-hostname delimiter */
+		c = strchr(*principal, '/');
+		if (c)
+			*c = '@';
+	}
+	if (!c) {
+		/* not a service principal */
+		kfree(*principal);
+		*principal = NULL;
+	}
+}
+
 /*
  * Public functions
  */
@@ -262,6 +291,7 @@ int gssp_accept_sec_context_upcall(struct net *net,
 		 */
 		.exported_context_token.len = GSSX_max_output_handle_sz,
 		.mech.len = GSS_OID_MAX_LEN,
+		.targ_name.display_name.len = GSSX_max_princ_sz,
 		.src_name.display_name.len = GSSX_max_princ_sz
 	};
 	struct gssx_res_accept_sec_context res = {
@@ -275,6 +305,7 @@ int gssp_accept_sec_context_upcall(struct net *net,
 		.rpc_cred = NULL, /* FIXME ? */
 	};
 	struct xdr_netobj client_name = { 0 , NULL };
+	struct xdr_netobj target_name = { 0, NULL };
 	int ret;
 
 	if (data->in_handle.len != 0)
@@ -285,8 +316,6 @@ int gssp_accept_sec_context_upcall(struct net *net,
 	if (ret)
 		return ret;
 
-	/* use nfs/ for targ_name ? */
-
 	ret = gssp_call(net, &msg);
 
 	gssp_free_receive_pages(&arg);
@@ -304,6 +333,7 @@ int gssp_accept_sec_context_upcall(struct net *net,
 			kfree(rctxh.mech.data);
 		}
 		client_name = rctxh.src_name.display_name;
+		target_name = rctxh.targ_name.display_name;
 	}
 
 	if (res.options.count == 1) {
@@ -325,32 +355,22 @@ int gssp_accept_sec_context_upcall(struct net *net,
 	}
 
 	/* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
-	if (data->found_creds && client_name.data != NULL) {
-		char *c;
-
-		data->creds.cr_raw_principal = kstrndup(client_name.data,
-						client_name.len, GFP_KERNEL);
-
-		data->creds.cr_principal = kstrndup(client_name.data,
-						client_name.len, GFP_KERNEL);
-		if (data->creds.cr_principal) {
-			/* terminate and remove realm part */
-			c = strchr(data->creds.cr_principal, '@');
-			if (c) {
-				*c = '\0';
-
-				/* change service-hostname delimiter */
-				c = strchr(data->creds.cr_principal, '/');
-				if (c) *c = '@';
-			}
-			if (!c) {
-				/* not a service principal */
-				kfree(data->creds.cr_principal);
-				data->creds.cr_principal = NULL;
-			}
+	if (data->found_creds) {
+		if (client_name.data) {
+			data->creds.cr_raw_principal =
+					gssp_stringify(&client_name);
+			data->creds.cr_principal =
+					gssp_stringify(&client_name);
+			gssp_hostbased_service(&data->creds.cr_principal);
+		}
+		if (target_name.data) {
+			data->creds.cr_targ_princ =
+					gssp_stringify(&target_name);
+			gssp_hostbased_service(&data->creds.cr_targ_princ);
 		}
 	}
 	kfree(client_name.data);
+	kfree(target_name.data);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 815f0ddb346c196018d4d8f8f55c12b83da1de3f Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 22 Aug 2018 16:37:24 -0700
Subject: include/linux/compiler*.h: make compiler-*.h mutually exclusive

Commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6")
recently exposed a brittle part of the build for supporting non-gcc
compilers.

Both Clang and ICC define __GNUC__, __GNUC_MINOR__, and
__GNUC_PATCHLEVEL__ for quick compatibility with code bases that haven't
added compiler specific checks for __clang__ or __INTEL_COMPILER.

This is brittle, as they happened to get compatibility by posing as a
certain version of GCC.  This broke when upgrading the minimal version
of GCC required to build the kernel, to a version above what ICC and
Clang claim to be.

Rather than always including compiler-gcc.h then undefining or
redefining macros in compiler-intel.h or compiler-clang.h, let's
separate out the compiler specific macro definitions into mutually
exclusive headers, do more proper compiler detection, and keep shared
definitions in compiler_types.h.

Fixes: cafa0010cd51 ("Raise the minimum required gcc version to 4.6")
Reported-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Suggested-by: Eli Friedman <efriedma@codeaurora.org>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/asm-offsets.c     |  13 +--
 drivers/gpu/drm/i915/i915_utils.h |   2 +-
 drivers/watchdog/kempld_wdt.c     |   5 -
 include/linux/compiler-clang.h    |  20 ++--
 include/linux/compiler-gcc.h      |  88 --------------
 include/linux/compiler-intel.h    |  13 +--
 include/linux/compiler_types.h    | 238 ++++++++++++++++++--------------------
 mm/ksm.c                          |   4 +-
 mm/migrate.c                      |   3 +-
 9 files changed, 133 insertions(+), 253 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 974d8d7d1bcd..3968d6c22455 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -38,25 +38,14 @@
 #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32
 #endif
 /*
- * GCC 3.0, 3.1: general bad code generation.
- * GCC 3.2.0: incorrect function argument offset calculation.
- * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c
- *            (http://gcc.gnu.org/PR8896) and incorrect structure
- *	      initialisation in fs/jffs2/erase.c
  * GCC 4.8.0-4.8.2: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58854
  *	      miscompiles find_get_entry(), and can result in EXT3 and EXT4
  *	      filesystem corruption (possibly other FS too).
  */
-#ifdef __GNUC__
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-#error Your compiler is too buggy; it is known to miscompile kernels.
-#error    Known good compilers: 3.3, 4.x
-#endif
-#if GCC_VERSION >= 40800 && GCC_VERSION < 40803
+#if defined(GCC_VERSION) && GCC_VERSION >= 40800 && GCC_VERSION < 40803
 #error Your compiler is too buggy; it is known to miscompile kernels
 #error and result in filesystem corruption and oopses.
 #endif
-#endif
 
 int main(void)
 {
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 00165ad55fb3..395dd2511568 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -43,7 +43,7 @@
 #define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
 			     __stringify(x), (long)(x))
 
-#if GCC_VERSION >= 70000
+#if defined(GCC_VERSION) && GCC_VERSION >= 70000
 #define add_overflows(A, B) \
 	__builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0)
 #else
diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c
index 2f3b049ea301..e268add43010 100644
--- a/drivers/watchdog/kempld_wdt.c
+++ b/drivers/watchdog/kempld_wdt.c
@@ -146,12 +146,7 @@ static int kempld_wdt_set_stage_timeout(struct kempld_wdt_data *wdt_data,
 	u32 remainder;
 	u8 stage_cfg;
 
-#if GCC_VERSION < 40400
-	/* work around a bug compiling do_div() */
-	prescaler = READ_ONCE(kempld_prescaler[PRESCALER_21]);
-#else
 	prescaler = kempld_prescaler[PRESCALER_21];
-#endif
 
 	if (!stage)
 		return -EINVAL;
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 7087446c24c8..b1ce500fe8b3 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -6,11 +6,7 @@
 /* Some compiler specific definitions are overwritten here
  * for Clang compiler
  */
-
-#ifdef uninitialized_var
-#undef uninitialized_var
 #define uninitialized_var(x) x = *(&(x))
-#endif
 
 /* same as gcc, this was present in clang-2.6 so we can assume it works
  * with any version that can compile the kernel
@@ -25,14 +21,8 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
-#undef __no_sanitize_address
 #define __no_sanitize_address __attribute__((no_sanitize("address")))
 
-/* Clang doesn't have a way to turn it off per-function, yet. */
-#ifdef __noretpoline
-#undef __noretpoline
-#endif
-
 /*
  * Not all versions of clang implement the the type-generic versions
  * of the builtin overflow checkers. Fortunately, clang implements
@@ -40,9 +30,17 @@
  * checks. Unfortunately, we don't know which version of gcc clang
  * pretends to be, so the macro may or may not be defined.
  */
-#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
 #if __has_builtin(__builtin_mul_overflow) && \
     __has_builtin(__builtin_add_overflow) && \
     __has_builtin(__builtin_sub_overflow)
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
+
+/* The following are for compatibility with GCC, from compiler-gcc.h,
+ * and may be redefined here because they should not be shared with other
+ * compilers, like ICC.
+ */
+#define barrier() __asm__ __volatile__("" : : : "memory")
+#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#define __assume_aligned(a, ...)	\
+	__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 250b9b7cfd60..763bbad1e258 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -75,48 +75,6 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #endif
 
-/*
- * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
- * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
- * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
- * defined so the gnu89 semantics are the default.
- */
-#ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline	__attribute__((gnu_inline))
-#else
-# define __gnu_inline
-#endif
-
-/*
- * Force always-inline if the user requests it so via the .config,
- * or if gcc is too old.
- * GCC does not warn about unused static inline functions for
- * -Wunused-function.  This turns out to avoid the need for complex #ifdef
- * directives.  Suppress the warning in clang as well by using "unused"
- * function attribute, which is redundant but not harmful for gcc.
- * Prefer gnu_inline, so that extern inline functions do not emit an
- * externally visible function. This makes extern inline behave as per gnu89
- * semantics rather than c99. This prevents multiple symbol definition errors
- * of extern inline functions at link time.
- * A lot of inline functions can cause havoc with function tracing.
- */
-#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
-    !defined(CONFIG_OPTIMIZE_INLINING)
-#define inline \
-	inline __attribute__((always_inline, unused)) notrace __gnu_inline
-#else
-#define inline inline		__attribute__((unused)) notrace __gnu_inline
-#endif
-
-#define __inline__ inline
-#define __inline inline
-#define __always_inline	inline __attribute__((always_inline))
-#define  noinline	__attribute__((noinline))
-
-#define __packed	__attribute__((packed))
-#define __weak		__attribute__((weak))
-#define __alias(symbol)	__attribute__((alias(#symbol)))
-
 #ifdef RETPOLINE
 #define __noretpoline __attribute__((indirect_branch("keep")))
 #endif
@@ -135,55 +93,9 @@
  */
 #define __naked		__attribute__((naked)) noinline __noclone notrace
 
-#define __noreturn	__attribute__((noreturn))
-
-/*
- * From the GCC manual:
- *
- * Many functions have no effects except the return value and their
- * return value depends only on the parameters and/or global
- * variables.  Such a function can be subject to common subexpression
- * elimination and loop optimization just as an arithmetic operator
- * would be.
- * [...]
- */
-#define __pure			__attribute__((pure))
-#define __aligned(x)		__attribute__((aligned(x)))
-#define __aligned_largest	__attribute__((aligned))
-#define __printf(a, b)		__attribute__((format(printf, a, b)))
-#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
-#define __attribute_const__	__attribute__((__const__))
-#define __maybe_unused		__attribute__((unused))
-#define __always_unused		__attribute__((unused))
-#define __mode(x)               __attribute__((mode(x)))
-
-#define __must_check		__attribute__((warn_unused_result))
-#define __malloc		__attribute__((__malloc__))
-
-#define __used			__attribute__((__used__))
-#define __compiler_offsetof(a, b)					\
-	__builtin_offsetof(a, b)
-
-/* Mark functions as cold. gcc will assume any path leading to a call
- * to them will be unlikely.  This means a lot of manual unlikely()s
- * are unnecessary now for any paths leading to the usual suspects
- * like BUG(), printk(), panic() etc. [but let's keep them for now for
- * older compilers]
- *
- * Early snapshots of gcc 4.3 don't support this and we can't detect this
- * in the preprocessor, but we can live with this because they're unreleased.
- * Maketime probing would be overkill here.
- *
- * gcc also has a __attribute__((__hot__)) to move hot functions into
- * a special section, but I don't see any sense in this right now in
- * the kernel context
- */
-#define __cold			__attribute__((__cold__))
-
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
 #define __optimize(level)	__attribute__((__optimize__(level)))
-#define __nostackprotector	__optimize("no-stack-protector")
 
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 547cdc920a3c..4c7f9befa9f6 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -14,10 +14,6 @@
 /* Intel ECC compiler doesn't support gcc specific asm stmts.
  * It uses intrinsics to do the equivalent things.
  */
-#undef barrier
-#undef barrier_data
-#undef RELOC_HIDE
-#undef OPTIMIZER_HIDE_VAR
 
 #define barrier() __memory_barrier()
 #define barrier_data(ptr) barrier()
@@ -38,13 +34,12 @@
 
 #endif
 
-#ifndef __HAVE_BUILTIN_BSWAP16__
 /* icc has this, but it's called _bswap16 */
 #define __HAVE_BUILTIN_BSWAP16__
 #define __builtin_bswap16 _bswap16
-#endif
 
-/*
- * icc defines __GNUC__, but does not implement the builtin overflow checkers.
+/* The following are for compatibility with GCC, from compiler-gcc.h,
+ * and may be redefined here because they should not be shared with other
+ * compilers, like clang.
  */
-#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+#define __visible	__attribute__((externally_visible))
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index fbf337933fd8..90479a0f3986 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -54,32 +54,20 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 
 #ifdef __KERNEL__
 
-#ifdef __GNUC__
-#include <linux/compiler-gcc.h>
-#endif
-
-#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
-#define notrace __attribute__((hotpatch(0,0)))
-#else
-#define notrace __attribute__((no_instrument_function))
-#endif
-
-/* Intel compiler defines __GNUC__. So we will overwrite implementations
- * coming from above header files here
- */
-#ifdef __INTEL_COMPILER
-# include <linux/compiler-intel.h>
-#endif
-
-/* Clang compiler defines __GNUC__. So we will overwrite implementations
- * coming from above header files here
- */
+/* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
+#elif defined(__INTEL_COMPILER)
+#include <linux/compiler-intel.h>
+#elif defined(__GNUC__)
+/* The above compilers also define __GNUC__, so order is important here. */
+#include <linux/compiler-gcc.h>
+#else
+#error "Unknown compiler"
 #endif
 
 /*
- * Generic compiler-dependent macros required for kernel
+ * Generic compiler-independent macros required for kernel
  * build go below this comment. Actual compiler/compiler version
  * specific implementations come from the above header files
  */
@@ -106,93 +94,19 @@ struct ftrace_likely_data {
 	unsigned long			constant;
 };
 
-#endif /* __KERNEL__ */
-
-#endif /* __ASSEMBLY__ */
-
-#ifdef __KERNEL__
-
 /* Don't. Just don't. */
 #define __deprecated
 #define __deprecated_for_modules
 
-#ifndef __must_check
-#define __must_check
-#endif
-
-#ifndef CONFIG_ENABLE_MUST_CHECK
-#undef __must_check
-#define __must_check
-#endif
-
-#ifndef __malloc
-#define __malloc
-#endif
-
-/*
- * Allow us to avoid 'defined but not used' warnings on functions and data,
- * as well as force them to be emitted to the assembly file.
- *
- * As of gcc 3.4, static functions that are not marked with attribute((used))
- * may be elided from the assembly file.  As of gcc 3.4, static data not so
- * marked will not be elided, but this may change in a future gcc version.
- *
- * NOTE: Because distributions shipped with a backported unit-at-a-time
- * compiler in gcc 3.3, we must define __used to be __attribute__((used))
- * for gcc >=3.3 instead of 3.4.
- *
- * In prior versions of gcc, such functions and data would be emitted, but
- * would be warned about except with attribute((unused)).
- *
- * Mark functions that are referenced only in inline assembly as __used so
- * the code is emitted even though it appears to be unreferenced.
- */
-#ifndef __used
-# define __used			/* unimplemented */
-#endif
-
-#ifndef __maybe_unused
-# define __maybe_unused		/* unimplemented */
-#endif
-
-#ifndef __always_unused
-# define __always_unused	/* unimplemented */
-#endif
-
-#ifndef noinline
-#define noinline
-#endif
-
-/*
- * Rather then using noinline to prevent stack consumption, use
- * noinline_for_stack instead.  For documentation reasons.
- */
-#define noinline_for_stack noinline
-
-#ifndef __always_inline
-#define __always_inline inline
-#endif
-
 #endif /* __KERNEL__ */
 
+#endif /* __ASSEMBLY__ */
+
 /*
- * From the GCC manual:
- *
- * Many functions do not examine any values except their arguments,
- * and have no effects except the return value.  Basically this is
- * just slightly more strict class than the `pure' attribute above,
- * since function is not allowed to read global memory.
- *
- * Note that a function that has pointer arguments and examines the
- * data pointed to must _not_ be declared `const'.  Likewise, a
- * function that calls a non-`const' function usually must not be
- * `const'.  It does not make sense for a `const' function to return
- * `void'.
+ * The below symbols may be defined for one or more, but not ALL, of the above
+ * compilers. We don't consider that to be an error, so set them to nothing.
+ * For example, some of them are for compiler specific plugins.
  */
-#ifndef __attribute_const__
-# define __attribute_const__	/* unimplemented */
-#endif
-
 #ifndef __designated_init
 # define __designated_init
 #endif
@@ -214,28 +128,10 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
-/*
- * Tell gcc if a function is cold. The compiler will assume any path
- * directly leading to the call is unlikely.
- */
-
-#ifndef __cold
-#define __cold
-#endif
-
-/* Simple shorthand for a section definition */
-#ifndef __section
-# define __section(S) __attribute__ ((__section__(#S)))
-#endif
-
 #ifndef __visible
 #define __visible
 #endif
 
-#ifndef __nostackprotector
-# define __nostackprotector
-#endif
-
 /*
  * Assume alignment of return value.
  */
@@ -243,17 +139,23 @@ struct ftrace_likely_data {
 #define __assume_aligned(a, ...)
 #endif
 
-
 /* Are two types/vars the same type (ignoring qualifiers)? */
-#ifndef __same_type
-# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
-#endif
+#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
 /* Is this type a native word size -- useful for atomic operations */
-#ifndef __native_word
-# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+#define __native_word(t) \
+	(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
+	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+
+#ifndef __attribute_const__
+#define __attribute_const__	__attribute__((__const__))
 #endif
 
+#ifndef __noclone
+#define __noclone
+#endif
+
+/* Helpers for emitting diagnostics in pragmas. */
 #ifndef __diag
 #define __diag(string)
 #endif
@@ -272,4 +174,92 @@ struct ftrace_likely_data {
 #define __diag_error(compiler, version, option, comment) \
 	__diag_ ## compiler(version, error, option)
 
+/*
+ * From the GCC manual:
+ *
+ * Many functions have no effects except the return value and their
+ * return value depends only on the parameters and/or global
+ * variables.  Such a function can be subject to common subexpression
+ * elimination and loop optimization just as an arithmetic operator
+ * would be.
+ * [...]
+ */
+#define __pure			__attribute__((pure))
+#define __aligned(x)		__attribute__((aligned(x)))
+#define __aligned_largest	__attribute__((aligned))
+#define __printf(a, b)		__attribute__((format(printf, a, b)))
+#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
+#define __maybe_unused		__attribute__((unused))
+#define __always_unused		__attribute__((unused))
+#define __mode(x)		__attribute__((mode(x)))
+#define __malloc		__attribute__((__malloc__))
+#define __used			__attribute__((__used__))
+#define __noreturn		__attribute__((noreturn))
+#define __packed		__attribute__((packed))
+#define __weak			__attribute__((weak))
+#define __alias(symbol)		__attribute__((alias(#symbol)))
+#define __cold			__attribute__((cold))
+#define __section(S)		__attribute__((__section__(#S)))
+
+
+#ifdef CONFIG_ENABLE_MUST_CHECK
+#define __must_check		__attribute__((warn_unused_result))
+#else
+#define __must_check
+#endif
+
+#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
+#define notrace			__attribute__((hotpatch(0, 0)))
+#else
+#define notrace			__attribute__((no_instrument_function))
+#endif
+
+#define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
+
+/*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
+/*
+ * Force always-inline if the user requests it so via the .config.
+ * GCC does not warn about unused static inline functions for
+ * -Wunused-function.  This turns out to avoid the need for complex #ifdef
+ * directives.  Suppress the warning in clang as well by using "unused"
+ * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
+ */
+#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
+	!defined(CONFIG_OPTIMIZE_INLINING)
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
+#else
+#define inline inline	__attribute__((unused)) notrace __gnu_inline
+#endif
+
+#define __inline__ inline
+#define __inline inline
+#define noinline	__attribute__((noinline))
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline))
+#endif
+
+/*
+ * Rather then using noinline to prevent stack consumption, use
+ * noinline_for_stack instead.  For documentation reasons.
+ */
+#define noinline_for_stack noinline
+
 #endif /* __LINUX_COMPILER_TYPES_H */
diff --git a/mm/ksm.c b/mm/ksm.c
index 1bd514c9e5d0..5b0894b45ee5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -652,9 +652,9 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
 	 * list_head to stay clear from the rb_parent_color union
 	 * (aligned and different than any node) and also different
 	 * from &migrate_nodes. This will verify that future list.h changes
-	 * don't break STABLE_NODE_DUP_HEAD.
+	 * don't break STABLE_NODE_DUP_HEAD. Only recent gcc can handle it.
 	 */
-#if GCC_VERSION >= 40903 /* only recent gcc can handle it */
+#if defined(GCC_VERSION) && GCC_VERSION >= 40903
 	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes);
 	BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1);
 #endif
diff --git a/mm/migrate.c b/mm/migrate.c
index 4a83268e23c2..c27e97b5b69d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1131,7 +1131,8 @@ out:
  * gcc 4.7 and 4.8 on arm get an ICEs when inlining unmap_and_move().  Work
  * around it.
  */
-#if (GCC_VERSION >= 40700 && GCC_VERSION < 40900) && defined(CONFIG_ARM)
+#if defined(CONFIG_ARM) && \
+	defined(GCC_VERSION) && GCC_VERSION < 40900 && GCC_VERSION >= 40700
 #define ICE_noinline noinline
 #else
 #define ICE_noinline
-- 
cgit v1.2.3


From 52a288c736669851f166544d4a0b93e1090d7e9b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 22 Aug 2018 17:30:13 +0200
Subject: x86/mm/tlb: Revert the recent lazy TLB patches

Revert commits:

  95b0e6357d3e x86/mm/tlb: Always use lazy TLB mode
  64482aafe55f x86/mm/tlb: Only send page table free TLB flush to lazy TLB CPUs
  ac0315896970 x86/mm/tlb: Make lazy TLB mode lazier
  61d0beb5796a x86/mm/tlb: Restructure switch_mm_irqs_off()
  2ff6ddf19c0e x86/mm/tlb: Leave lazy TLB mode at page table free time

In order to simplify the TLB invalidate fixes for x86 and unify the
parts that need backporting.  We'll try again later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/tlbflush.h |  21 +++-
 arch/x86/mm/tlb.c               | 205 +++++++++++-----------------------------
 include/asm-generic/tlb.h       |  10 --
 mm/memory.c                     |  22 ++---
 4 files changed, 77 insertions(+), 181 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 511bf5fae8b8..6690cd3fc8b1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -148,6 +148,22 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 #define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
 #endif
 
+static inline bool tlb_defer_switch_to_init_mm(void)
+{
+	/*
+	 * If we have PCID, then switching to init_mm is reasonably
+	 * fast.  If we don't have PCID, then switching to init_mm is
+	 * quite slow, so we try to defer it in the hopes that we can
+	 * avoid it entirely.  The latter approach runs the risk of
+	 * receiving otherwise unnecessary IPIs.
+	 *
+	 * This choice is just a heuristic.  The tlb code can handle this
+	 * function returning true or false regardless of whether we have
+	 * PCID.
+	 */
+	return !static_cpu_has(X86_FEATURE_PCID);
+}
+
 struct tlb_context {
 	u64 ctx_id;
 	u64 tlb_gen;
@@ -538,9 +554,4 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 	native_flush_tlb_others(mask, info)
 #endif
 
-extern void tlb_flush_remove_tables(struct mm_struct *mm);
-extern void tlb_flush_remove_tables_local(void *arg);
-
-#define HAVE_TLB_FLUSH_REMOVE_TABLES
-
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 752dbf4e0e50..9517d1b2a281 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -7,7 +7,6 @@
 #include <linux/export.h>
 #include <linux/cpu.h>
 #include <linux/debugfs.h>
-#include <linux/gfp.h>
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -186,11 +185,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 {
 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-	bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
 	unsigned cpu = smp_processor_id();
 	u64 next_tlb_gen;
-	bool need_flush;
-	u16 new_asid;
 
 	/*
 	 * NB: The scheduler will call us with prev == next when switching
@@ -244,41 +240,20 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			   next->context.ctx_id);
 
 		/*
-		 * Even in lazy TLB mode, the CPU should stay set in the
-		 * mm_cpumask. The TLB shootdown code can figure out from
-		 * from cpu_tlbstate.is_lazy whether or not to send an IPI.
+		 * We don't currently support having a real mm loaded without
+		 * our cpu set in mm_cpumask().  We have all the bookkeeping
+		 * in place to figure out whether we would need to flush
+		 * if our cpu were cleared in mm_cpumask(), but we don't
+		 * currently use it.
 		 */
 		if (WARN_ON_ONCE(real_prev != &init_mm &&
 				 !cpumask_test_cpu(cpu, mm_cpumask(next))))
 			cpumask_set_cpu(cpu, mm_cpumask(next));
 
-		/*
-		 * If the CPU is not in lazy TLB mode, we are just switching
-		 * from one thread in a process to another thread in the same
-		 * process. No TLB flush required.
-		 */
-		if (!was_lazy)
-			return;
-
-		/*
-		 * Read the tlb_gen to check whether a flush is needed.
-		 * If the TLB is up to date, just use it.
-		 * The barrier synchronizes with the tlb_gen increment in
-		 * the TLB shootdown code.
-		 */
-		smp_mb();
-		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
-		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
-				next_tlb_gen)
-			return;
-
-		/*
-		 * TLB contents went out of date while we were in lazy
-		 * mode. Fall through to the TLB switching code below.
-		 */
-		new_asid = prev_asid;
-		need_flush = true;
+		return;
 	} else {
+		u16 new_asid;
+		bool need_flush;
 		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
 
 		/*
@@ -329,41 +304,41 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
 
 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
-	}
 
-	if (need_flush) {
-		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
-		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-		load_new_mm_cr3(next->pgd, new_asid, true);
+		if (need_flush) {
+			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
+			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
+			load_new_mm_cr3(next->pgd, new_asid, true);
+
+			/*
+			 * NB: This gets called via leave_mm() in the idle path
+			 * where RCU functions differently.  Tracing normally
+			 * uses RCU, so we need to use the _rcuidle variant.
+			 *
+			 * (There is no good reason for this.  The idle code should
+			 *  be rearranged to call this before rcu_idle_enter().)
+			 */
+			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+		} else {
+			/* The new ASID is already up to date. */
+			load_new_mm_cr3(next->pgd, new_asid, false);
+
+			/* See above wrt _rcuidle. */
+			trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+		}
 
 		/*
-		 * NB: This gets called via leave_mm() in the idle path
-		 * where RCU functions differently.  Tracing normally
-		 * uses RCU, so we need to use the _rcuidle variant.
-		 *
-		 * (There is no good reason for this.  The idle code should
-		 *  be rearranged to call this before rcu_idle_enter().)
+		 * Record last user mm's context id, so we can avoid
+		 * flushing branch buffer with IBPB if we switch back
+		 * to the same user.
 		 */
-		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
-	} else {
-		/* The new ASID is already up to date. */
-		load_new_mm_cr3(next->pgd, new_asid, false);
+		if (next != &init_mm)
+			this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
 
-		/* See above wrt _rcuidle. */
-		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
+		this_cpu_write(cpu_tlbstate.loaded_mm, next);
+		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
 	}
 
-	/*
-	 * Record last user mm's context id, so we can avoid
-	 * flushing branch buffer with IBPB if we switch back
-	 * to the same user.
-	 */
-	if (next != &init_mm)
-		this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
-
-	this_cpu_write(cpu_tlbstate.loaded_mm, next);
-	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
-
 	load_mm_cr4(next);
 	switch_ldt(real_prev, next);
 }
@@ -386,7 +361,20 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
 		return;
 
-	this_cpu_write(cpu_tlbstate.is_lazy, true);
+	if (tlb_defer_switch_to_init_mm()) {
+		/*
+		 * There's a significant optimization that may be possible
+		 * here.  We have accurate enough TLB flush tracking that we
+		 * don't need to maintain coherence of TLB per se when we're
+		 * lazy.  We do, however, need to maintain coherence of
+		 * paging-structure caches.  We could, in principle, leave our
+		 * old mm loaded and only switch to init_mm when
+		 * tlb_remove_page() happens.
+		 */
+		this_cpu_write(cpu_tlbstate.is_lazy, true);
+	} else {
+		switch_mm(NULL, &init_mm, NULL);
+	}
 }
 
 /*
@@ -473,9 +461,6 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
 		 * paging-structure cache to avoid speculatively reading
 		 * garbage into our TLB.  Since switching to init_mm is barely
 		 * slower than a minimal flush, just switch to init_mm.
-		 *
-		 * This should be rare, with native_flush_tlb_others skipping
-		 * IPIs to lazy TLB mode CPUs.
 		 */
 		switch_mm_irqs_off(NULL, &init_mm, NULL);
 		return;
@@ -582,9 +567,6 @@ static void flush_tlb_func_remote(void *info)
 void native_flush_tlb_others(const struct cpumask *cpumask,
 			     const struct flush_tlb_info *info)
 {
-	cpumask_var_t lazymask;
-	unsigned int cpu;
-
 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
 	if (info->end == TLB_FLUSH_ALL)
 		trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
@@ -608,6 +590,8 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 		 * that UV should be updated so that smp_call_function_many(),
 		 * etc, are optimal on UV.
 		 */
+		unsigned int cpu;
+
 		cpu = smp_processor_id();
 		cpumask = uv_flush_tlb_others(cpumask, info);
 		if (cpumask)
@@ -615,29 +599,8 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
 					       (void *)info, 1);
 		return;
 	}
-
-	/*
-	 * A temporary cpumask is used in order to skip sending IPIs
-	 * to CPUs in lazy TLB state, while keeping them in mm_cpumask(mm).
-	 * If the allocation fails, simply IPI every CPU in mm_cpumask.
-	 */
-	if (!alloc_cpumask_var(&lazymask, GFP_ATOMIC)) {
-		smp_call_function_many(cpumask, flush_tlb_func_remote,
-			       (void *)info, 1);
-		return;
-	}
-
-	cpumask_copy(lazymask, cpumask);
-
-	for_each_cpu(cpu, lazymask) {
-		if (per_cpu(cpu_tlbstate.is_lazy, cpu))
-			cpumask_clear_cpu(cpu, lazymask);
-	}
-
-	smp_call_function_many(lazymask, flush_tlb_func_remote,
+	smp_call_function_many(cpumask, flush_tlb_func_remote,
 			       (void *)info, 1);
-
-	free_cpumask_var(lazymask);
 }
 
 /*
@@ -690,68 +653,6 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 	put_cpu();
 }
 
-void tlb_flush_remove_tables_local(void *arg)
-{
-	struct mm_struct *mm = arg;
-
-	if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm &&
-			this_cpu_read(cpu_tlbstate.is_lazy)) {
-		/*
-		 * We're in lazy mode.  We need to at least flush our
-		 * paging-structure cache to avoid speculatively reading
-		 * garbage into our TLB.  Since switching to init_mm is barely
-		 * slower than a minimal flush, just switch to init_mm.
-		 */
-		switch_mm_irqs_off(NULL, &init_mm, NULL);
-	}
-}
-
-static void mm_fill_lazy_tlb_cpu_mask(struct mm_struct *mm,
-				      struct cpumask *lazy_cpus)
-{
-	int cpu;
-
-	for_each_cpu(cpu, mm_cpumask(mm)) {
-		if (!per_cpu(cpu_tlbstate.is_lazy, cpu))
-			cpumask_set_cpu(cpu, lazy_cpus);
-	}
-}
-
-void tlb_flush_remove_tables(struct mm_struct *mm)
-{
-	int cpu = get_cpu();
-	cpumask_var_t lazy_cpus;
-
-	if (cpumask_any_but(mm_cpumask(mm), cpu) >= nr_cpu_ids) {
-		put_cpu();
-		return;
-	}
-
-	if (!zalloc_cpumask_var(&lazy_cpus, GFP_ATOMIC)) {
-		/*
-		 * If the cpumask allocation fails, do a brute force flush
-		 * on all the CPUs that have this mm loaded.
-		 */
-		smp_call_function_many(mm_cpumask(mm),
-				tlb_flush_remove_tables_local, (void *)mm, 1);
-		put_cpu();
-		return;
-	}
-
-	/*
-	 * CPUs with !is_lazy either received a TLB flush IPI while the user
-	 * pages in this address range were unmapped, or have context switched
-	 * and reloaded %CR3 since then.
-	 *
-	 * Shootdown IPIs at page table freeing time only need to be sent to
-	 * CPUs that may have out of date TLB contents.
-	 */
-	mm_fill_lazy_tlb_cpu_mask(mm, lazy_cpus);
-	smp_call_function_many(lazy_cpus,
-				tlb_flush_remove_tables_local, (void *)mm, 1);
-	free_cpumask_var(lazy_cpus);
-	put_cpu();
-}
 
 static void do_flush_tlb_all(void *info)
 {
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e811ef7b8350..3063125197ad 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -303,14 +303,4 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 
 #define tlb_migrate_finish(mm) do {} while (0)
 
-/*
- * Used to flush the TLB when page tables are removed, when lazy
- * TLB mode may cause a CPU to retain intermediate translations
- * pointing to about-to-be-freed page table memory.
- */
-#ifndef HAVE_TLB_FLUSH_REMOVE_TABLES
-#define tlb_flush_remove_tables(mm) do {} while (0)
-#define tlb_flush_remove_tables_local(mm) do {} while (0)
-#endif
-
 #endif /* _ASM_GENERIC__TLB_H */
diff --git a/mm/memory.c b/mm/memory.c
index 19f47d7b9b86..d1dd43f8c1ce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -326,20 +326,16 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 
+/*
+ * See the comment near struct mmu_table_batch.
+ */
+
 static void tlb_remove_table_smp_sync(void *arg)
 {
-	struct mm_struct __maybe_unused *mm = arg;
-	/*
-	 * On most architectures this does nothing. Simply delivering the
-	 * interrupt is enough to prevent races with software page table
-	 * walking like that done in get_user_pages_fast.
-	 *
-	 * See the comment near struct mmu_table_batch.
-	 */
-	tlb_flush_remove_tables_local(mm);
+	/* Simply deliver the interrupt */
 }
 
-static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
+static void tlb_remove_table_one(void *table)
 {
 	/*
 	 * This isn't an RCU grace period and hence the page-tables cannot be
@@ -348,7 +344,7 @@ static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
 	 * It is however sufficient for software page-table walkers that rely on
 	 * IRQ disabling. See the comment near struct mmu_table_batch.
 	 */
-	smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);
+	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
 	__tlb_remove_table(table);
 }
 
@@ -369,8 +365,6 @@ void tlb_table_flush(struct mmu_gather *tlb)
 {
 	struct mmu_table_batch **batch = &tlb->batch;
 
-	tlb_flush_remove_tables(tlb->mm);
-
 	if (*batch) {
 		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
 		*batch = NULL;
@@ -393,7 +387,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 	if (*batch == NULL) {
 		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
 		if (*batch == NULL) {
-			tlb_remove_table_one(table, tlb);
+			tlb_remove_table_one(table);
 			return;
 		}
 		(*batch)->nr = 0;
-- 
cgit v1.2.3


From fd1102f0aadec7d18792b132e1d224290b2aecca Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 23 Aug 2018 18:47:09 +1000
Subject: mm: mmu_notifier fix for tlb_end_vma

The generic tlb_end_vma does not call invalidate_range mmu notifier, and
it resets resets the mmu_gather range, which means the notifier won't be
called on part of the range in case of an unmap that spans multiple
vmas.

ARM64 seems to be the only arch I could see that has notifiers and uses
the generic tlb_end_vma.  I have not actually tested it.

[ Catalin and Will point out that ARM64 currently only uses the
  notifiers for KVM, which doesn't use the ->invalidate_range()
  callback right now, so it's a bug, but one that happens to
  not affect them.  So not necessary for stable.  - Linus ]

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/tlb.h | 17 +++++++++++++----
 mm/memory.c               | 10 ----------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 3063125197ad..b3353e21f3b3 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -15,6 +15,7 @@
 #ifndef _ASM_GENERIC__TLB_H
 #define _ASM_GENERIC__TLB_H
 
+#include <linux/mmu_notifier.h>
 #include <linux/swap.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
@@ -138,6 +139,16 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
 	}
 }
 
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+	if (!tlb->end)
+		return;
+
+	tlb_flush(tlb);
+	mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
+	__tlb_reset_range(tlb);
+}
+
 static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 					struct page *page, int page_size)
 {
@@ -186,10 +197,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 
 #define __tlb_end_vma(tlb, vma)					\
 	do {							\
-		if (!tlb->fullmm && tlb->end) {			\
-			tlb_flush(tlb);				\
-			__tlb_reset_range(tlb);			\
-		}						\
+		if (!tlb->fullmm)				\
+			tlb_flush_mmu_tlbonly(tlb);		\
 	} while (0)
 
 #ifndef tlb_end_vma
diff --git a/mm/memory.c b/mm/memory.c
index 8b3442979855..3ff4394a2e1b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -238,16 +238,6 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 	__tlb_reset_range(tlb);
 }
 
-static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-	if (!tlb->end)
-		return;
-
-	tlb_flush(tlb);
-	mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
-	__tlb_reset_range(tlb);
-}
-
 static void tlb_flush_mmu_free(struct mmu_gather *tlb)
 {
 	struct mmu_gather_batch *batch;
-- 
cgit v1.2.3


From 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Thu, 23 Aug 2018 17:00:35 -0700
Subject: namei: allow restricted O_CREAT of FIFOs and regular files

Disallows open of FIFOs or regular files not owned by the user in world
writable sticky directories, unless the owner is the same as that of the
directory or the file is opened without the O_CREAT flag.  The purpose
is to make data spoofing attacks harder.  This protection can be turned
on and off separately for FIFOs and regular files via sysctl, just like
the symlinks/hardlinks protection.  This patch is based on Openwall's
"HARDEN_FIFO" feature by Solar Designer.

This is a brief list of old vulnerabilities that could have been prevented
by this feature, some of them even allow for privilege escalation:

CVE-2000-1134
CVE-2007-3852
CVE-2008-0525
CVE-2009-0416
CVE-2011-4834
CVE-2015-1838
CVE-2015-7442
CVE-2016-7489

This list is not meant to be complete.  It's difficult to track down all
vulnerabilities of this kind because they were often reported without any
mention of this particular attack vector.  In fact, before
hardlinks/symlinks restrictions, fifos/regular files weren't the favorite
vehicle to exploit them.

[s.mesoraca16@gmail.com: fix bug reported by Dan Carpenter]
  Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda
  Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gmail.com
[keescook@chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future]
[keescook@chromium.org: adjust commit subjet]
Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast
Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Suggested-by: Solar Designer <solar@openwall.com>
Suggested-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/fs.txt | 36 ++++++++++++++++++++++++++++++
 fs/namei.c                  | 53 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/fs.h          |  2 ++
 kernel/sysctl.c             | 18 +++++++++++++++
 4 files changed, 106 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 6c00c1e2743f..819caf8ca05f 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -34,7 +34,9 @@ Currently, these files are in /proc/sys/fs:
 - overflowgid
 - pipe-user-pages-hard
 - pipe-user-pages-soft
+- protected_fifos
 - protected_hardlinks
+- protected_regular
 - protected_symlinks
 - suid_dumpable
 - super-max
@@ -182,6 +184,24 @@ applied.
 
 ==============================================================
 
+protected_fifos:
+
+The intent of this protection is to avoid unintentional writes to
+an attacker-controlled FIFO, where a program expected to create a regular
+file.
+
+When set to "0", writing to FIFOs is unrestricted.
+
+When set to "1" don't allow O_CREAT open on FIFOs that we don't own
+in world writable sticky directories, unless they are owned by the
+owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+This protection is based on the restrictions in Openwall.
+
+==============================================================
+
 protected_hardlinks:
 
 A long-standing class of security issues is the hardlink-based
@@ -202,6 +222,22 @@ This protection is based on the restrictions in Openwall and grsecurity.
 
 ==============================================================
 
+protected_regular:
+
+This protection is similar to protected_fifos, but it
+avoids writes to an attacker-controlled regular file, where a program
+expected to create one.
+
+When set to "0", writing to regular files is unrestricted.
+
+When set to "1" don't allow O_CREAT open on regular files that we
+don't own in world writable sticky directories, unless they are
+owned by the owner of the directory.
+
+When set to "2" it also applies to group writable sticky directories.
+
+==============================================================
+
 protected_symlinks:
 
 A long-standing class of security issues is the symlink-based
diff --git a/fs/namei.c b/fs/namei.c
index ae6aa9ae757c..0cab6494978c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -887,6 +887,8 @@ static inline void put_link(struct nameidata *nd)
 
 int sysctl_protected_symlinks __read_mostly = 0;
 int sysctl_protected_hardlinks __read_mostly = 0;
+int sysctl_protected_fifos __read_mostly;
+int sysctl_protected_regular __read_mostly;
 
 /**
  * may_follow_link - Check symlink following for unsafe situations
@@ -1003,6 +1005,45 @@ static int may_linkat(struct path *link)
 	return -EPERM;
 }
 
+/**
+ * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
+ *			  should be allowed, or not, on files that already
+ *			  exist.
+ * @dir: the sticky parent directory
+ * @inode: the inode of the file to open
+ *
+ * Block an O_CREAT open of a FIFO (or a regular file) when:
+ *   - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
+ *   - the file already exists
+ *   - we are in a sticky directory
+ *   - we don't own the file
+ *   - the owner of the directory doesn't own the file
+ *   - the directory is world writable
+ * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
+ * the directory doesn't have to be world writable: being group writable will
+ * be enough.
+ *
+ * Returns 0 if the open is allowed, -ve on error.
+ */
+static int may_create_in_sticky(struct dentry * const dir,
+				struct inode * const inode)
+{
+	if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
+	    (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
+	    likely(!(dir->d_inode->i_mode & S_ISVTX)) ||
+	    uid_eq(inode->i_uid, dir->d_inode->i_uid) ||
+	    uid_eq(current_fsuid(), inode->i_uid))
+		return 0;
+
+	if (likely(dir->d_inode->i_mode & 0002) ||
+	    (dir->d_inode->i_mode & 0020 &&
+	     ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
+	      (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
+		return -EACCES;
+	}
+	return 0;
+}
+
 static __always_inline
 const char *get_link(struct nameidata *nd)
 {
@@ -3348,9 +3389,15 @@ finish_open:
 	if (error)
 		return error;
 	audit_inode(nd->name, nd->path.dentry, 0);
-	error = -EISDIR;
-	if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
-		goto out;
+	if (open_flag & O_CREAT) {
+		error = -EISDIR;
+		if (d_is_dir(nd->path.dentry))
+			goto out;
+		error = may_create_in_sticky(dir,
+					     d_backing_inode(nd->path.dentry));
+		if (unlikely(error))
+			goto out;
+	}
 	error = -ENOTDIR;
 	if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
 		goto out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e5710541183b..33322702c910 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -74,6 +74,8 @@ extern struct inodes_stat_t inodes_stat;
 extern int leases_enable, lease_break_time;
 extern int sysctl_protected_symlinks;
 extern int sysctl_protected_hardlinks;
+extern int sysctl_protected_fifos;
+extern int sysctl_protected_regular;
 
 typedef __kernel_rwf_t rwf_t;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71ceb6c13c1a..cc02050fd0c4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1807,6 +1807,24 @@ static struct ctl_table fs_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "protected_fifos",
+		.data		= &sysctl_protected_fifos,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "protected_regular",
+		.data		= &sysctl_protected_regular,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two,
+	},
 	{
 		.procname	= "suid_dumpable",
 		.data		= &suid_dumpable,
-- 
cgit v1.2.3


From d4ae9916ea2947341180d2b538f48875ff393a86 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Thu, 23 Aug 2018 17:00:42 -0700
Subject: mm: soft-offline: close the race against page allocation

A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline.  This is
undesirable because soft-offline (which is about corrected error) is
less aggressive than hard-offline (which is about uncorrected error),
and we can make soft-offline fail and keep using the page for good
reason like "system is busy."

Two main changes of this patch are:

- setting migrate type of the target page to MIGRATE_ISOLATE. As done
  in free_unref_page_commit(), this makes kernel bypass pcplist when
  freeing the page. So we can assume that the page is in freelist just
  after put_page() returns,

- setting PG_hwpoison on free page under zone->lock which protects
  freelists, so this allows us to avoid setting PG_hwpoison on a page
  that is decided to be allocated soon.

[akpm@linux-foundation.org: tweak set_hwpoison_free_buddy_page() comment]
Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: Xishi Qiu <xishi.qiuxishi@alibaba-inc.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <zy.zhengyi@alibaba-inc.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h |  5 +++++
 include/linux/swapops.h    | 10 ----------
 mm/memory-failure.c        | 26 +++++++++++++++++++++-----
 mm/migrate.c               |  2 +-
 mm/page_alloc.c            | 30 ++++++++++++++++++++++++++++++
 5 files changed, 57 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 901943e4754b..74bee8cecf4c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -369,8 +369,13 @@ PAGEFLAG_FALSE(Uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
+extern bool set_hwpoison_free_buddy_page(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison)
+static inline bool set_hwpoison_free_buddy_page(struct page *page)
+{
+	return 0;
+}
 #define __PG_HWPOISON 0
 #endif
 
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 1d3877c39a00..568a3553d918 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -340,11 +340,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_HWPOISON;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-	return TestSetPageHWPoison(page);
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 	atomic_long_inc(&num_poisoned_pages);
@@ -367,11 +362,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
 	return 0;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-	return false;
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 49dc32c61137..192d0bbfc9ea 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -57,6 +57,7 @@
 #include <linux/mm_inline.h>
 #include <linux/kfifo.h>
 #include <linux/ratelimit.h>
+#include <linux/page-isolation.h>
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct page *page, int flags)
 static int soft_offline_in_use_page(struct page *page, int flags)
 {
 	int ret;
+	int mt;
 	struct page *hpage = compound_head(page);
 
 	if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(struct page *page, int flags)
 		put_hwpoison_page(hpage);
 	}
 
+	/*
+	 * Setting MIGRATE_ISOLATE here ensures that the page will be linked
+	 * to free list immediately (not via pcplist) when released after
+	 * successful page migration. Otherwise we can't guarantee that the
+	 * page is really free after put_page() returns, so
+	 * set_hwpoison_free_buddy_page() highly likely fails.
+	 */
+	mt = get_pageblock_migratetype(page);
+	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
 	if (PageHuge(page))
 		ret = soft_offline_huge_page(page, flags);
 	else
 		ret = __soft_offline_page(page, flags);
-
+	set_pageblock_migratetype(page, mt);
 	return ret;
 }
 
-static void soft_offline_free_page(struct page *page)
+static int soft_offline_free_page(struct page *page)
 {
 	int rc = 0;
 	struct page *head = compound_head(page);
 
 	if (PageHuge(head))
 		rc = dissolve_free_huge_page(page);
-	if (!rc && !TestSetPageHWPoison(page))
-		num_poisoned_pages_inc();
+	if (!rc) {
+		if (set_hwpoison_free_buddy_page(page))
+			num_poisoned_pages_inc();
+		else
+			rc = -EBUSY;
+	}
+	return rc;
 }
 
 /**
@@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page, int flags)
 	if (ret > 0)
 		ret = soft_offline_in_use_page(page, flags);
 	else if (ret == 0)
-		soft_offline_free_page(page);
+		ret = soft_offline_free_page(page);
 
 	return ret;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 91a99457127c..d6a2e89b086a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1212,7 +1212,7 @@ out:
 			 * intentionally. Although it's rather weird,
 			 * it's how HWPoison flag works at the moment.
 			 */
-			if (!test_set_page_hwpoison(page))
+			if (set_hwpoison_free_buddy_page(page))
 				num_poisoned_pages_inc();
 		}
 	} else {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c677c1506d73..e75865d58ba7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8096,3 +8096,33 @@ bool is_free_buddy_page(struct page *page)
 
 	return order < MAX_ORDER;
 }
+
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Set PG_hwpoison flag if a given page is confirmed to be a free page.  This
+ * test is performed under the zone lock to prevent a race against page
+ * allocation.
+ */
+bool set_hwpoison_free_buddy_page(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long flags;
+	unsigned int order;
+	bool hwpoisoned = false;
+
+	spin_lock_irqsave(&zone->lock, flags);
+	for (order = 0; order < MAX_ORDER; order++) {
+		struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+		if (PageBuddy(page_head) && page_order(page_head) >= order) {
+			if (!TestSetPageHWPoison(page))
+				hwpoisoned = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	return hwpoisoned;
+}
+#endif
-- 
cgit v1.2.3


From 263fade51f7bdd5ad7ebbfe82113a44c5ea4c36c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Thu, 23 Aug 2018 17:01:15 -0700
Subject: docs/mm: make GFP flags descriptions usable as kernel-doc

This patch adds DOC: headings for GFP flag descriptions and adjusts the
formatting to fit sphinx expectations of paragraphs.

Link: http://lkml.kernel.org/r/1532626360-16650-7-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 291 +++++++++++++++++++++++++++-------------------------
 1 file changed, 154 insertions(+), 137 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index a6afcec53795..24bcc5eec6b4 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -59,29 +59,32 @@ struct vm_area_struct;
 #define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* ZONE_MOVABLE allowed */
 #define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
 
-/*
+/**
+ * DOC: Page mobility and placement hints
+ *
  * Page mobility and placement hints
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
  * These flags provide hints about how mobile the page is. Pages with similar
  * mobility are placed within the same pageblocks to minimise problems due
  * to external fragmentation.
  *
- * __GFP_MOVABLE (also a zone modifier) indicates that the page can be
- *   moved by page migration during memory compaction or can be reclaimed.
+ * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ * moved by page migration during memory compaction or can be reclaimed.
  *
- * __GFP_RECLAIMABLE is used for slab allocations that specify
- *   SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ * %__GFP_RECLAIMABLE is used for slab allocations that specify
+ * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
  *
- * __GFP_WRITE indicates the caller intends to dirty the page. Where possible,
- *   these pages will be spread between local zones to avoid all the dirty
- *   pages being in one zone (fair zone allocation policy).
+ * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ * these pages will be spread between local zones to avoid all the dirty
+ * pages being in one zone (fair zone allocation policy).
  *
- * __GFP_HARDWALL enforces the cpuset memory allocation policy.
+ * %__GFP_HARDWALL enforces the cpuset memory allocation policy.
  *
- * __GFP_THISNODE forces the allocation to be satisified from the requested
- *   node with no fallbacks or placement policy enforcements.
+ * %__GFP_THISNODE forces the allocation to be satisified from the requested
+ * node with no fallbacks or placement policy enforcements.
  *
- * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
  */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)
@@ -89,54 +92,60 @@ struct vm_area_struct;
 #define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)
 #define __GFP_ACCOUNT	((__force gfp_t)___GFP_ACCOUNT)
 
-/*
+/**
+ * DOC: Watermark modifiers
+ *
  * Watermark modifiers -- controls access to emergency reserves
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *
- * __GFP_HIGH indicates that the caller is high-priority and that granting
- *   the request is necessary before the system can make forward progress.
- *   For example, creating an IO context to clean pages.
+ * %__GFP_HIGH indicates that the caller is high-priority and that granting
+ * the request is necessary before the system can make forward progress.
+ * For example, creating an IO context to clean pages.
  *
- * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
- *   high priority. Users are typically interrupt handlers. This may be
- *   used in conjunction with __GFP_HIGH
+ * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is
+ * high priority. Users are typically interrupt handlers. This may be
+ * used in conjunction with %__GFP_HIGH
  *
- * __GFP_MEMALLOC allows access to all memory. This should only be used when
- *   the caller guarantees the allocation will allow more memory to be freed
- *   very shortly e.g. process exiting or swapping. Users either should
- *   be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ * %__GFP_MEMALLOC allows access to all memory. This should only be used when
+ * the caller guarantees the allocation will allow more memory to be freed
+ * very shortly e.g. process exiting or swapping. Users either should
+ * be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
  *
- * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
- *   This takes precedence over the __GFP_MEMALLOC flag if both are set.
+ * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ * This takes precedence over the %__GFP_MEMALLOC flag if both are set.
  */
 #define __GFP_ATOMIC	((__force gfp_t)___GFP_ATOMIC)
 #define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)
 #define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)
 #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
 
-/*
+/**
+ * DOC: Reclaim modifiers
+ *
  * Reclaim modifiers
+ * ~~~~~~~~~~~~~~~~~
  *
- * __GFP_IO can start physical IO.
+ * %__GFP_IO can start physical IO.
  *
- * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the
- *   allocator recursing into the filesystem which might already be holding
- *   locks.
+ * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ * allocator recursing into the filesystem which might already be holding
+ * locks.
  *
- * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
- *   This flag can be cleared to avoid unnecessary delays when a fallback
- *   option is available.
+ * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ * This flag can be cleared to avoid unnecessary delays when a fallback
+ * option is available.
  *
- * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
- *   the low watermark is reached and have it reclaim pages until the high
- *   watermark is reached. A caller may wish to clear this flag when fallback
- *   options are available and the reclaim is likely to disrupt the system. The
- *   canonical example is THP allocation where a fallback is cheap but
- *   reclaim/compaction may cause indirect stalls.
+ * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ * the low watermark is reached and have it reclaim pages until the high
+ * watermark is reached. A caller may wish to clear this flag when fallback
+ * options are available and the reclaim is likely to disrupt the system. The
+ * canonical example is THP allocation where a fallback is cheap but
+ * reclaim/compaction may cause indirect stalls.
  *
- * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
+ * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
  *
  * The default allocator behavior depends on the request size. We have a concept
- * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER).
+ * of so called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER).
  * !costly allocations are too essential to fail so they are implicitly
  * non-failing by default (with some exceptions like OOM victims might fail so
  * the caller still has to check for failures) while costly requests try to be
@@ -144,40 +153,40 @@ struct vm_area_struct;
  * The following three modifiers might be used to override some of these
  * implicit rules
  *
- * __GFP_NORETRY: The VM implementation will try only very lightweight
- *   memory direct reclaim to get some memory under memory pressure (thus
- *   it can sleep). It will avoid disruptive actions like OOM killer. The
- *   caller must handle the failure which is quite likely to happen under
- *   heavy memory pressure. The flag is suitable when failure can easily be
- *   handled at small cost, such as reduced throughput
- *
- * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
- *   procedures that have previously failed if there is some indication
- *   that progress has been made else where.  It can wait for other
- *   tasks to attempt high level approaches to freeing memory such as
- *   compaction (which removes fragmentation) and page-out.
- *   There is still a definite limit to the number of retries, but it is
- *   a larger limit than with __GFP_NORETRY.
- *   Allocations with this flag may fail, but only when there is
- *   genuinely little unused memory. While these allocations do not
- *   directly trigger the OOM killer, their failure indicates that
- *   the system is likely to need to use the OOM killer soon.  The
- *   caller must handle failure, but can reasonably do so by failing
- *   a higher-level request, or completing it only in a much less
- *   efficient manner.
- *   If the allocation does fail, and the caller is in a position to
- *   free some non-essential memory, doing so could benefit the system
- *   as a whole.
- *
- * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
- *   cannot handle allocation failures. The allocation could block
- *   indefinitely but will never return with failure. Testing for
- *   failure is pointless.
- *   New users should be evaluated carefully (and the flag should be
- *   used only when there is no reasonable failure policy) but it is
- *   definitely preferable to use the flag rather than opencode endless
- *   loop around allocator.
- *   Using this flag for costly allocations is _highly_ discouraged.
+ * %__GFP_NORETRY: The VM implementation will try only very lightweight
+ * memory direct reclaim to get some memory under memory pressure (thus
+ * it can sleep). It will avoid disruptive actions like OOM killer. The
+ * caller must handle the failure which is quite likely to happen under
+ * heavy memory pressure. The flag is suitable when failure can easily be
+ * handled at small cost, such as reduced throughput
+ *
+ * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication
+ * that progress has been made else where.  It can wait for other
+ * tasks to attempt high level approaches to freeing memory such as
+ * compaction (which removes fragmentation) and page-out.
+ * There is still a definite limit to the number of retries, but it is
+ * a larger limit than with %__GFP_NORETRY.
+ * Allocations with this flag may fail, but only when there is
+ * genuinely little unused memory. While these allocations do not
+ * directly trigger the OOM killer, their failure indicates that
+ * the system is likely to need to use the OOM killer soon.  The
+ * caller must handle failure, but can reasonably do so by failing
+ * a higher-level request, or completing it only in a much less
+ * efficient manner.
+ * If the allocation does fail, and the caller is in a position to
+ * free some non-essential memory, doing so could benefit the system
+ * as a whole.
+ *
+ * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures. The allocation could block
+ * indefinitely but will never return with failure. Testing for
+ * failure is pointless.
+ * New users should be evaluated carefully (and the flag should be
+ * used only when there is no reasonable failure policy) but it is
+ * definitely preferable to use the flag rather than opencode endless
+ * loop around allocator.
+ * Using this flag for costly allocations is _highly_ discouraged.
  */
 #define __GFP_IO	((__force gfp_t)___GFP_IO)
 #define __GFP_FS	((__force gfp_t)___GFP_FS)
@@ -188,14 +197,17 @@ struct vm_area_struct;
 #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)
 #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY)
 
-/*
+/**
+ * DOC: Action modifiers
+ *
  * Action modifiers
+ * ~~~~~~~~~~~~~~~~
  *
- * __GFP_NOWARN suppresses allocation failure reports.
+ * %__GFP_NOWARN suppresses allocation failure reports.
  *
- * __GFP_COMP address compound page metadata.
+ * %__GFP_COMP address compound page metadata.
  *
- * __GFP_ZERO returns a zeroed page on success.
+ * %__GFP_ZERO returns a zeroed page on success.
  */
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
 #define __GFP_COMP	((__force gfp_t)___GFP_COMP)
@@ -208,66 +220,71 @@ struct vm_area_struct;
 #define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
-/*
+/**
+ * DOC: Useful GFP flag combinations
+ *
+ * Useful GFP flag combinations
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
  * Useful GFP flag combinations that are commonly used. It is recommended
  * that subsystems start with one of these combinations and then set/clear
- * __GFP_FOO flags as necessary.
- *
- * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
- *   watermark is applied to allow access to "atomic reserves"
- *
- * GFP_KERNEL is typical for kernel-internal allocations. The caller requires
- *   ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
- *
- * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
- *   accounted to kmemcg.
- *
- * GFP_NOWAIT is for kernel allocations that should not stall for direct
- *   reclaim, start physical IO or use any filesystem callback.
- *
- * GFP_NOIO will use direct reclaim to discard clean pages or slab pages
- *   that do not require the starting of any physical IO.
- *   Please try to avoid using this flag directly and instead use
- *   memalloc_noio_{save,restore} to mark the whole scope which cannot
- *   perform any IO with a short explanation why. All allocation requests
- *   will inherit GFP_NOIO implicitly.
- *
- * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
- *   Please try to avoid using this flag directly and instead use
- *   memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
- *   recurse into the FS layer with a short explanation why. All allocation
- *   requests will inherit GFP_NOFS implicitly.
- *
- * GFP_USER is for userspace allocations that also need to be directly
- *   accessibly by the kernel or hardware. It is typically used by hardware
- *   for buffers that are mapped to userspace (e.g. graphics) that hardware
- *   still must DMA to. cpuset limits are enforced for these allocations.
- *
- * GFP_DMA exists for historical reasons and should be avoided where possible.
- *   The flags indicates that the caller requires that the lowest zone be
- *   used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
- *   it would require careful auditing as some users really require it and
- *   others use the flag to avoid lowmem reserves in ZONE_DMA and treat the
- *   lowest zone as a type of emergency reserve.
- *
- * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit
- *   address.
- *
- * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
- *   do not need to be directly accessible by the kernel but that cannot
- *   move once in use. An example may be a hardware allocation that maps
- *   data directly into userspace but has no addressing limitations.
- *
- * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
- *   need direct access to but can use kmap() when access is required. They
- *   are expected to be movable via page reclaim or page migration. Typically,
- *   pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
- *
- * GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are
- *   compound allocations that will generally fail quickly if memory is not
- *   available and will not wake kswapd/kcompactd on failure. The _LIGHT
- *   version does not attempt reclaim/compaction at all and is by default used
- *   in page fault path, while the non-light is used by khugepaged.
+ * %__GFP_FOO flags as necessary.
+ *
+ * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
+ * watermark is applied to allow access to "atomic reserves"
+ *
+ * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
+ * accounted to kmemcg.
+ *
+ * %GFP_NOWAIT is for kernel allocations that should not stall for direct
+ * reclaim, start physical IO or use any filesystem callback.
+ *
+ * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ * that do not require the starting of any physical IO.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_noio_{save,restore} to mark the whole scope which cannot
+ * perform any IO with a short explanation why. All allocation requests
+ * will inherit GFP_NOIO implicitly.
+ *
+ * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
+ * recurse into the FS layer with a short explanation why. All allocation
+ * requests will inherit GFP_NOFS implicitly.
+ *
+ * %GFP_USER is for userspace allocations that also need to be directly
+ * accessibly by the kernel or hardware. It is typically used by hardware
+ * for buffers that are mapped to userspace (e.g. graphics) that hardware
+ * still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * %GFP_DMA exists for historical reasons and should be avoided where possible.
+ * The flags indicates that the caller requires that the lowest zone be
+ * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ * it would require careful auditing as some users really require it and
+ * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the
+ * lowest zone as a type of emergency reserve.
+ *
+ * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
+ * address.
+ *
+ * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ * do not need to be directly accessible by the kernel but that cannot
+ * move once in use. An example may be a hardware allocation that maps
+ * data directly into userspace but has no addressing limitations.
+ *
+ * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ * need direct access to but can use kmap() when access is required. They
+ * are expected to be movable via page reclaim or page migration. Typically,
+ * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE.
+ *
+ * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They
+ * are compound allocations that will generally fail quickly if memory is not
+ * available and will not wake kswapd/kcompactd on failure. The _LIGHT
+ * version does not attempt reclaim/compaction at all and is by default used
+ * in page fault path, while the non-light is used by khugepaged.
  */
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
-- 
cgit v1.2.3


From 2b7403035459c75e193c6b04a293e518a4212de0 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Thu, 23 Aug 2018 17:01:36 -0700
Subject: mm: Change return type int to vm_fault_t for fault handlers

Use new return type vm_fault_t for fault handler.  For now, this is just
documenting that the function returns a VM_FAULT value rather than an
errno.  Once all instances are converted, vm_fault_t will become a
distinct type.

Ref-> commit 1c8f422059ae ("mm: change return type to vm_fault_t")

The aim is to change the return type of finish_fault() and
handle_mm_fault() to vm_fault_t type.  As part of that clean up return
type of all other recursively called functions have been changed to
vm_fault_t type.

The places from where handle_mm_fault() is getting invoked will be
change to vm_fault_t type but in a separate patch.

vmf_error() is the newly introduce inline function in 4.17-rc6.

[akpm@linux-foundation.org: don't shadow outer local `ret' in __do_huge_pmd_anonymous_page()]
Link: http://lkml.kernel.org/r/20180604171727.GA20279@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c              |  6 +--
 include/linux/huge_mm.h       |  9 +++--
 include/linux/hugetlb.h       |  2 +-
 include/linux/mm.h            | 14 +++----
 include/linux/oom.h           |  2 +-
 include/linux/swapops.h       |  5 ++-
 include/linux/userfaultfd_k.h |  5 ++-
 kernel/memremap.c             |  2 +-
 mm/gup.c                      |  4 +-
 mm/huge_memory.c              | 31 +++++++--------
 mm/hugetlb.c                  | 29 +++++++-------
 mm/internal.h                 |  2 +-
 mm/khugepaged.c               |  3 +-
 mm/memory.c                   | 90 ++++++++++++++++++++++---------------------
 mm/shmem.c                    |  5 ++-
 15 files changed, 106 insertions(+), 103 deletions(-)

(limited to 'include')

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index f649023b19b5..bfa0ec69f924 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -340,17 +340,15 @@ out:
  * fatal_signal_pending()s, and the mmap_sem must be released before
  * returning it.
  */
-int handle_userfault(struct vm_fault *vmf, unsigned long reason)
+vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 {
 	struct mm_struct *mm = vmf->vma->vm_mm;
 	struct userfaultfd_ctx *ctx;
 	struct userfaultfd_wait_queue uwq;
-	int ret;
+	vm_fault_t ret = VM_FAULT_SIGBUS;
 	bool must_wait, return_to_userland;
 	long blocking_state;
 
-	ret = VM_FAULT_SIGBUS;
-
 	/*
 	 * We don't do userfault handling for the final child pid update.
 	 *
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a8a126259bc4..27e3e32135a8 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -6,7 +6,7 @@
 
 #include <linux/fs.h> /* only for vma_is_dax() */
 
-extern int do_huge_pmd_anonymous_page(struct vm_fault *vmf);
+extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
 extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
 			 struct vm_area_struct *vma);
@@ -23,7 +23,7 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 }
 #endif
 
-extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
+extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
 extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 					  unsigned long addr,
 					  pmd_t *pmd,
@@ -216,7 +216,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 		pud_t *pud, int flags);
 
-extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
 
@@ -321,7 +321,8 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
 	return NULL;
 }
 
-static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd)
+static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf,
+		pmd_t orig_pmd)
 {
 	return 0;
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c39d9170a8a0..6b68e345f0ca 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -105,7 +105,7 @@ void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(int, char *);
 void hugetlb_show_meminfo(void);
 unsigned long hugetlb_total_pages(void);
-int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
 int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
 				struct vm_area_struct *dst_vma,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a9e733b5fb76..8fcc36660de6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -728,10 +728,10 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 	return pte;
 }
 
-int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
+vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page);
-int finish_fault(struct vm_fault *vmf);
-int finish_mkwrite_fault(struct vm_fault *vmf);
+vm_fault_t finish_fault(struct vm_fault *vmf);
+vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
 #endif
 
 /*
@@ -1403,8 +1403,8 @@ int generic_error_remove_page(struct address_space *mapping, struct page *page);
 int invalidate_inode_page(struct page *page);
 
 #ifdef CONFIG_MMU
-extern int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
-		unsigned int flags);
+extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
+			unsigned long address, unsigned int flags);
 extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 			    unsigned long address, unsigned int fault_flags,
 			    bool *unlocked);
@@ -1413,7 +1413,7 @@ void unmap_mapping_pages(struct address_space *mapping,
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
 #else
-static inline int handle_mm_fault(struct vm_area_struct *vma,
+static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 		unsigned long address, unsigned int flags)
 {
 	/* should never happen if there's no MMU */
@@ -2563,7 +2563,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_COW	0x4000	/* internal GUP flag */
 #define FOLL_ANON	0x8000	/* don't do file mappings */
 
-static inline int vm_fault_to_errno(int vm_fault, int foll_flags)
+static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
 {
 	if (vm_fault & VM_FAULT_OOM)
 		return -ENOMEM;
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 92f70e4c6252..69864a547663 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -88,7 +88,7 @@ static inline bool mm_is_oom_victim(struct mm_struct *mm)
  *
  * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise.
  */
-static inline int check_stable_address_space(struct mm_struct *mm)
+static inline vm_fault_t check_stable_address_space(struct mm_struct *mm)
 {
 	if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags)))
 		return VM_FAULT_SIGBUS;
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 568a3553d918..22af9d8a84ae 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -4,6 +4,7 @@
 
 #include <linux/radix-tree.h>
 #include <linux/bug.h>
+#include <linux/mm_types.h>
 
 /*
  * swapcache pages are stored in the swapper_space radix tree.  We want to
@@ -134,7 +135,7 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
 	return pfn_to_page(swp_offset(entry));
 }
 
-int device_private_entry_fault(struct vm_area_struct *vma,
+vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 		       unsigned long addr,
 		       swp_entry_t entry,
 		       unsigned int flags,
@@ -169,7 +170,7 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
 	return NULL;
 }
 
-static inline int device_private_entry_fault(struct vm_area_struct *vma,
+static inline vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 				     unsigned long addr,
 				     swp_entry_t entry,
 				     unsigned int flags,
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index e091f0a11b11..37c9eba75c98 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -28,7 +28,7 @@
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
-extern int handle_userfault(struct vm_fault *vmf, unsigned long reason);
+extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
 			    unsigned long src_start, unsigned long len,
@@ -77,7 +77,8 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 #else /* CONFIG_USERFAULTFD */
 
 /* mm helpers */
-static inline int handle_userfault(struct vm_fault *vmf, unsigned long reason)
+static inline vm_fault_t handle_userfault(struct vm_fault *vmf,
+				unsigned long reason)
 {
 	return VM_FAULT_SIGBUS;
 }
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 1f87ea6b6545..d57d58f77409 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -43,7 +43,7 @@ static unsigned long order_at(struct resource *res, unsigned long pgoff)
 			pgoff += 1UL << order, order = order_at((res), pgoff))
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
-int device_private_entry_fault(struct vm_area_struct *vma,
+vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 		       unsigned long addr,
 		       swp_entry_t entry,
 		       unsigned int flags,
diff --git a/mm/gup.c b/mm/gup.c
index fc5f98069f4e..1abc8b4afff6 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -497,7 +497,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		unsigned long address, unsigned int *flags, int *nonblocking)
 {
 	unsigned int fault_flags = 0;
-	int ret;
+	vm_fault_t ret;
 
 	/* mlock all present pages, but do not fault in new pages */
 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
@@ -818,7 +818,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 		     bool *unlocked)
 {
 	struct vm_area_struct *vma;
-	int ret, major = 0;
+	vm_fault_t ret, major = 0;
 
 	if (unlocked)
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 78427af91de9..08b544383d74 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -541,14 +541,14 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 }
 EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
 
-static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
-		gfp_t gfp)
+static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
+			struct page *page, gfp_t gfp)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
@@ -584,15 +584,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
 		/* Deliver the page fault to userland */
 		if (userfaultfd_missing(vma)) {
-			int ret;
+			vm_fault_t ret2;
 
 			spin_unlock(vmf->ptl);
 			mem_cgroup_cancel_charge(page, memcg, true);
 			put_page(page);
 			pte_free(vma->vm_mm, pgtable);
-			ret = handle_userfault(vmf, VM_UFFD_MISSING);
-			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
-			return ret;
+			ret2 = handle_userfault(vmf, VM_UFFD_MISSING);
+			VM_BUG_ON(ret2 & VM_FAULT_FALLBACK);
+			return ret2;
 		}
 
 		entry = mk_huge_pmd(page, vma->vm_page_prot);
@@ -663,7 +663,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
 	return true;
 }
 
-int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
+vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	gfp_t gfp;
@@ -682,7 +682,7 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 		pgtable_t pgtable;
 		struct page *zero_page;
 		bool set;
-		int ret;
+		vm_fault_t ret;
 		pgtable = pte_alloc_one(vma->vm_mm, haddr);
 		if (unlikely(!pgtable))
 			return VM_FAULT_OOM;
@@ -1118,15 +1118,16 @@ unlock:
 	spin_unlock(vmf->ptl);
 }
 
-static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
-		struct page *page)
+static vm_fault_t do_huge_pmd_wp_page_fallback(struct vm_fault *vmf,
+			pmd_t orig_pmd, struct page *page)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	struct mem_cgroup *memcg;
 	pgtable_t pgtable;
 	pmd_t _pmd;
-	int ret = 0, i;
+	int i;
+	vm_fault_t ret = 0;
 	struct page **pages;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
@@ -1236,7 +1237,7 @@ out_free_pages:
 	goto out;
 }
 
-int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
+vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL, *new_page;
@@ -1245,7 +1246,7 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 	gfp_t huge_gfp;			/* for allocation and charge */
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
 	VM_BUG_ON_VMA(!vma->anon_vma, vma);
@@ -1457,7 +1458,7 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
+vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct anon_vma *anon_vma = NULL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9f1c853f67b5..3c21775f196b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3501,14 +3501,15 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
  * cannot race with other handlers or page migration.
  * Keep the pte_same checks anyway to make transition from the mutex easier.
  */
-static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
+static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 		       unsigned long address, pte_t *ptep,
 		       struct page *pagecache_page, spinlock_t *ptl)
 {
 	pte_t pte;
 	struct hstate *h = hstate_vma(vma);
 	struct page *old_page, *new_page;
-	int ret = 0, outside_reserve = 0;
+	int outside_reserve = 0;
+	vm_fault_t ret = 0;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 	unsigned long haddr = address & huge_page_mask(h);
@@ -3572,8 +3573,7 @@ retry_avoidcopy:
 			return 0;
 		}
 
-		ret = (PTR_ERR(new_page) == -ENOMEM) ?
-			VM_FAULT_OOM : VM_FAULT_SIGBUS;
+		ret = vmf_error(PTR_ERR(new_page));
 		goto out_release_old;
 	}
 
@@ -3676,12 +3676,13 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 	return 0;
 }
 
-static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			   struct address_space *mapping, pgoff_t idx,
-			   unsigned long address, pte_t *ptep, unsigned int flags)
+static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			struct address_space *mapping, pgoff_t idx,
+			unsigned long address, pte_t *ptep, unsigned int flags)
 {
 	struct hstate *h = hstate_vma(vma);
-	int ret = VM_FAULT_SIGBUS;
+	vm_fault_t ret = VM_FAULT_SIGBUS;
 	int anon_rmap = 0;
 	unsigned long size;
 	struct page *page;
@@ -3744,11 +3745,7 @@ retry:
 
 		page = alloc_huge_page(vma, haddr, 0);
 		if (IS_ERR(page)) {
-			ret = PTR_ERR(page);
-			if (ret == -ENOMEM)
-				ret = VM_FAULT_OOM;
-			else
-				ret = VM_FAULT_SIGBUS;
+			ret = vmf_error(PTR_ERR(page));
 			goto out;
 		}
 		clear_huge_page(page, address, pages_per_huge_page(h));
@@ -3872,12 +3869,12 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
 }
 #endif
 
-int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags)
 {
 	pte_t *ptep, entry;
 	spinlock_t *ptl;
-	int ret;
+	vm_fault_t ret;
 	u32 hash;
 	pgoff_t idx;
 	struct page *page = NULL;
@@ -4207,7 +4204,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		if (absent || is_swap_pte(huge_ptep_get(pte)) ||
 		    ((flags & FOLL_WRITE) &&
 		      !huge_pte_write(huge_ptep_get(pte)))) {
-			int ret;
+			vm_fault_t ret;
 			unsigned int fault_flags = 0;
 
 			if (pte)
diff --git a/mm/internal.h b/mm/internal.h
index dab088cb6937..87256ae1bef8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -38,7 +38,7 @@
 
 void page_writeback_init(void);
 
-int do_swap_page(struct vm_fault *vmf);
+vm_fault_t do_swap_page(struct vm_fault *vmf);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 961cbe9062a5..a31d740e6cd1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -880,7 +880,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 					unsigned long address, pmd_t *pmd,
 					int referenced)
 {
-	int swapped_in = 0, ret = 0;
+	int swapped_in = 0;
+	vm_fault_t ret = 0;
 	struct vm_fault vmf = {
 		.vma = vma,
 		.address = address,
diff --git a/mm/memory.c b/mm/memory.c
index 19f47d7b9b86..42ebdc33268e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2384,9 +2384,9 @@ static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
  *
  * We do this without the lock held, so that it can sleep if it needs to.
  */
-static int do_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
 {
-	int ret;
+	vm_fault_t ret;
 	struct page *page = vmf->page;
 	unsigned int old_flags = vmf->flags;
 
@@ -2490,7 +2490,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
  *   held to the old page, as well as updating the rmap.
  * - In any case, unlock the PTL and drop the reference we took to the old page.
  */
-static int wp_page_copy(struct vm_fault *vmf)
+static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mm_struct *mm = vma->vm_mm;
@@ -2638,7 +2638,7 @@ oom:
  * The function expects the page to be locked or other protection against
  * concurrent faults / writeback (such as DAX radix tree locks).
  */
-int finish_mkwrite_fault(struct vm_fault *vmf)
+vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
 {
 	WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED));
 	vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address,
@@ -2659,12 +2659,12 @@ int finish_mkwrite_fault(struct vm_fault *vmf)
  * Handle write page faults for VM_MIXEDMAP or VM_PFNMAP for a VM_SHARED
  * mapping
  */
-static int wp_pfn_shared(struct vm_fault *vmf)
+static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
 	if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
-		int ret;
+		vm_fault_t ret;
 
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		vmf->flags |= FAULT_FLAG_MKWRITE;
@@ -2677,7 +2677,7 @@ static int wp_pfn_shared(struct vm_fault *vmf)
 	return VM_FAULT_WRITE;
 }
 
-static int wp_page_shared(struct vm_fault *vmf)
+static vm_fault_t wp_page_shared(struct vm_fault *vmf)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -2685,7 +2685,7 @@ static int wp_page_shared(struct vm_fault *vmf)
 	get_page(vmf->page);
 
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
-		int tmp;
+		vm_fault_t tmp;
 
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		tmp = do_page_mkwrite(vmf);
@@ -2728,7 +2728,7 @@ static int wp_page_shared(struct vm_fault *vmf)
  * but allow concurrent faults), with pte both mapped and locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_wp_page(struct vm_fault *vmf)
+static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -2904,7 +2904,7 @@ EXPORT_SYMBOL(unmap_mapping_range);
  * We return with the mmap_sem locked or unlocked in the same cases
  * as does filemap_fault().
  */
-int do_swap_page(struct vm_fault *vmf)
+vm_fault_t do_swap_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL, *swapcache;
@@ -2913,7 +2913,7 @@ int do_swap_page(struct vm_fault *vmf)
 	pte_t pte;
 	int locked;
 	int exclusive = 0;
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
 		goto out;
@@ -3124,12 +3124,12 @@ out_release:
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-static int do_anonymous_page(struct vm_fault *vmf)
+static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mem_cgroup *memcg;
 	struct page *page;
-	int ret = 0;
+	vm_fault_t ret = 0;
 	pte_t entry;
 
 	/* File mapping without ->vm_ops ? */
@@ -3239,10 +3239,10 @@ oom:
  * released depending on flags and vma->vm_ops->fault() return value.
  * See filemap_fault() and __lock_page_retry().
  */
-static int __do_fault(struct vm_fault *vmf)
+static vm_fault_t __do_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret;
+	vm_fault_t ret;
 
 	ret = vma->vm_ops->fault(vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
@@ -3276,7 +3276,7 @@ static int pmd_devmap_trans_unstable(pmd_t *pmd)
 	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
 }
 
-static int pte_alloc_one_map(struct vm_fault *vmf)
+static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
@@ -3352,13 +3352,14 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
 	vmf->prealloc_pte = NULL;
 }
 
-static int do_set_pmd(struct vm_fault *vmf, struct page *page)
+static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	pmd_t entry;
-	int i, ret;
+	int i;
+	vm_fault_t ret;
 
 	if (!transhuge_vma_suitable(vma, haddr))
 		return VM_FAULT_FALLBACK;
@@ -3408,7 +3409,7 @@ out:
 	return ret;
 }
 #else
-static int do_set_pmd(struct vm_fault *vmf, struct page *page)
+static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 {
 	BUILD_BUG();
 	return 0;
@@ -3429,13 +3430,13 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
  * Target users are page handler itself and implementations of
  * vm_ops->map_pages.
  */
-int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
+vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 		struct page *page)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
 	pte_t entry;
-	int ret;
+	vm_fault_t ret;
 
 	if (pmd_none(*vmf->pmd) && PageTransCompound(page) &&
 			IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
@@ -3494,10 +3495,10 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
  * The function expects the page to be locked and on success it consumes a
  * reference of a page being mapped (for the PTE which maps it).
  */
-int finish_fault(struct vm_fault *vmf)
+vm_fault_t finish_fault(struct vm_fault *vmf)
 {
 	struct page *page;
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	/* Did we COW the page? */
 	if ((vmf->flags & FAULT_FLAG_WRITE) &&
@@ -3583,12 +3584,13 @@ late_initcall(fault_around_debugfs);
  * (and therefore to page order).  This way it's easier to guarantee
  * that we don't cross page table boundaries.
  */
-static int do_fault_around(struct vm_fault *vmf)
+static vm_fault_t do_fault_around(struct vm_fault *vmf)
 {
 	unsigned long address = vmf->address, nr_pages, mask;
 	pgoff_t start_pgoff = vmf->pgoff;
 	pgoff_t end_pgoff;
-	int off, ret = 0;
+	int off;
+	vm_fault_t ret = 0;
 
 	nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT;
 	mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
@@ -3638,10 +3640,10 @@ out:
 	return ret;
 }
 
-static int do_read_fault(struct vm_fault *vmf)
+static vm_fault_t do_read_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret = 0;
+	vm_fault_t ret = 0;
 
 	/*
 	 * Let's call ->map_pages() first and use ->fault() as fallback
@@ -3665,10 +3667,10 @@ static int do_read_fault(struct vm_fault *vmf)
 	return ret;
 }
 
-static int do_cow_fault(struct vm_fault *vmf)
+static vm_fault_t do_cow_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret;
+	vm_fault_t ret;
 
 	if (unlikely(anon_vma_prepare(vma)))
 		return VM_FAULT_OOM;
@@ -3704,10 +3706,10 @@ uncharge_out:
 	return ret;
 }
 
-static int do_shared_fault(struct vm_fault *vmf)
+static vm_fault_t do_shared_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret, tmp;
+	vm_fault_t ret, tmp;
 
 	ret = __do_fault(vmf);
 	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@@ -3745,10 +3747,10 @@ static int do_shared_fault(struct vm_fault *vmf)
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-static int do_fault(struct vm_fault *vmf)
+static vm_fault_t do_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	int ret;
+	vm_fault_t ret;
 
 	/* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
 	if (!vma->vm_ops->fault)
@@ -3783,7 +3785,7 @@ static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
 	return mpol_misplaced(page, vma, addr);
 }
 
-static int do_numa_page(struct vm_fault *vmf)
+static vm_fault_t do_numa_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL;
@@ -3873,7 +3875,7 @@ out:
 	return 0;
 }
 
-static inline int create_huge_pmd(struct vm_fault *vmf)
+static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
 {
 	if (vma_is_anonymous(vmf->vma))
 		return do_huge_pmd_anonymous_page(vmf);
@@ -3883,7 +3885,7 @@ static inline int create_huge_pmd(struct vm_fault *vmf)
 }
 
 /* `inline' is required to avoid gcc 4.1.2 build error */
-static inline int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
+static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
 {
 	if (vma_is_anonymous(vmf->vma))
 		return do_huge_pmd_wp_page(vmf, orig_pmd);
@@ -3902,7 +3904,7 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
 	return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
 }
 
-static int create_huge_pud(struct vm_fault *vmf)
+static vm_fault_t create_huge_pud(struct vm_fault *vmf)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	/* No support for anonymous transparent PUD pages yet */
@@ -3914,7 +3916,7 @@ static int create_huge_pud(struct vm_fault *vmf)
 	return VM_FAULT_FALLBACK;
 }
 
-static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	/* No support for anonymous transparent PUD pages yet */
@@ -3941,7 +3943,7 @@ static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
  * The mmap_sem may have been released depending on flags and our return value.
  * See filemap_fault() and __lock_page_or_retry().
  */
-static int handle_pte_fault(struct vm_fault *vmf)
+static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 {
 	pte_t entry;
 
@@ -4029,8 +4031,8 @@ unlock:
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
-		unsigned int flags)
+static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
+		unsigned long address, unsigned int flags)
 {
 	struct vm_fault vmf = {
 		.vma = vma,
@@ -4043,7 +4045,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	struct mm_struct *mm = vma->vm_mm;
 	pgd_t *pgd;
 	p4d_t *p4d;
-	int ret;
+	vm_fault_t ret;
 
 	pgd = pgd_offset(mm, address);
 	p4d = p4d_alloc(mm, pgd, address);
@@ -4118,10 +4120,10 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
-int handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
+vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 		unsigned int flags)
 {
-	int ret;
+	vm_fault_t ret;
 
 	__set_current_state(TASK_RUNNING);
 
diff --git a/mm/shmem.c b/mm/shmem.c
index fb04baacc9fa..0376c124b043 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -124,7 +124,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 		struct page **pagep, enum sgp_type sgp,
 		gfp_t gfp, struct vm_area_struct *vma,
-		struct vm_fault *vmf, int *fault_type);
+		struct vm_fault *vmf, vm_fault_t *fault_type);
 
 int shmem_getpage(struct inode *inode, pgoff_t index,
 		struct page **pagep, enum sgp_type sgp)
@@ -1620,7 +1620,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
  */
 static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 	struct page **pagep, enum sgp_type sgp, gfp_t gfp,
-	struct vm_area_struct *vma, struct vm_fault *vmf, int *fault_type)
+	struct vm_area_struct *vma, struct vm_fault *vmf,
+			vm_fault_t *fault_type)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-- 
cgit v1.2.3


From 0c36dd37d5b80b43f4e3dc5a1dbfe6dbd86e8f2a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 21 Aug 2018 17:02:40 +0200
Subject: i2c: remove deprecated attach_adapter callback

There aren't any users left. Remove this callback from the 2.4 times.
Phew, finally, that took years to reach...

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 11 +----------
 include/linux/i2c.h         |  6 ------
 2 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 5a937109a289..f15737763608 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -62,7 +62,7 @@
 
 /*
  * core_lock protects i2c_adapter_idr, and guarantees that device detection,
- * deletion of detected devices, and attach_adapter calls are serialized
+ * deletion of detected devices are serialized
  */
 static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
@@ -1124,15 +1124,6 @@ static int i2c_do_add_adapter(struct i2c_driver *driver,
 	/* Detect supported devices on that bus, and instantiate them */
 	i2c_detect(adap, driver);
 
-	/* Let legacy drivers scan this bus for matching devices */
-	if (driver->attach_adapter) {
-		dev_warn(&adap->dev, "%s: attach_adapter method is deprecated\n",
-			 driver->driver.name);
-		dev_warn(&adap->dev,
-			 "Please use another way to instantiate your i2c_client\n");
-		/* We ignore the return code; if it fails, too bad */
-		driver->attach_adapter(adap);
-	}
 	return 0;
 }
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 36f357ecdf67..b79387fd57da 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -231,7 +231,6 @@ enum i2c_alert_protocol {
 /**
  * struct i2c_driver - represent an I2C device driver
  * @class: What kind of i2c device we instantiate (for detect)
- * @attach_adapter: Callback for bus addition (deprecated)
  * @probe: Callback for device binding - soon to be deprecated
  * @probe_new: New callback for device binding
  * @remove: Callback for device unbinding
@@ -268,11 +267,6 @@ enum i2c_alert_protocol {
 struct i2c_driver {
 	unsigned int class;
 
-	/* Notifies the driver that a new bus has appeared. You should avoid
-	 * using this, it will be removed in a near future.
-	 */
-	int (*attach_adapter)(struct i2c_adapter *) __deprecated;
-
 	/* Standard driver model interfaces */
 	int (*probe)(struct i2c_client *, const struct i2c_device_id *);
 	int (*remove)(struct i2c_client *);
-- 
cgit v1.2.3


From 5d3a01a228c703dad971cad11f14c0512c4c2713 Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <peter@korsgaard.com>
Date: Wed, 22 Aug 2018 18:16:07 +0200
Subject: i2c: ocores: update my email address

The old @sunsite.dk address is no longer active, so update the references.

Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-ocores      | 2 +-
 MAINTAINERS                              | 2 +-
 drivers/i2c/busses/i2c-ocores.c          | 4 ++--
 include/linux/platform_data/i2c-ocores.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
index 4e713f4cdb2f..9caaf7df1b2f 100644
--- a/Documentation/i2c/busses/i2c-ocores
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -4,7 +4,7 @@ Supported adapters:
   * OpenCores.org I2C controller by Richard Herveille (see datasheet link)
     https://opencores.org/project/i2c/overview
 
-Author: Peter Korsgaard <jacmet@sunsite.dk>
+Author: Peter Korsgaard <peter@korsgaard.com>
 
 Description
 -----------
diff --git a/MAINTAINERS b/MAINTAINERS
index 8cd5c1bd6441..61abdf319d20 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10787,7 +10787,7 @@ F:	arch/*/boot/dts/
 F:	include/dt-bindings/
 
 OPENCORES I2C BUS DRIVER
-M:	Peter Korsgaard <jacmet@sunsite.dk>
+M:	Peter Korsgaard <peter@korsgaard.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	Documentation/i2c/busses/i2c-ocores
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 88444ef74943..87f9caacba85 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -2,7 +2,7 @@
  * i2c-ocores.c: I2C bus driver for OpenCores I2C controller
  * (https://opencores.org/project/i2c/overview)
  *
- * Peter Korsgaard <jacmet@sunsite.dk>
+ * Peter Korsgaard <peter@korsgaard.com>
  *
  * Support for the GRLIB port of the controller by
  * Andreas Larsson <andreas@gaisler.com>
@@ -576,7 +576,7 @@ static struct platform_driver ocores_i2c_driver = {
 
 module_platform_driver(ocores_i2c_driver);
 
-MODULE_AUTHOR("Peter Korsgaard <jacmet@sunsite.dk>");
+MODULE_AUTHOR("Peter Korsgaard <peter@korsgaard.com>");
 MODULE_DESCRIPTION("OpenCores I2C bus driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:ocores-i2c");
diff --git a/include/linux/platform_data/i2c-ocores.h b/include/linux/platform_data/i2c-ocores.h
index 01edd96fe1f7..113d6b12f650 100644
--- a/include/linux/platform_data/i2c-ocores.h
+++ b/include/linux/platform_data/i2c-ocores.h
@@ -1,7 +1,7 @@
 /*
  * i2c-ocores.h - definitions for the i2c-ocores interface
  *
- * Peter Korsgaard <jacmet@sunsite.dk>
+ * Peter Korsgaard <peter@korsgaard.com>
  *
  * This file is licensed under the terms of the GNU General Public License
  * version 2.  This program is licensed "as is" without any warranty of any
-- 
cgit v1.2.3